diff options
662 files changed, 15048 insertions, 8755 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index b939ebb6287..80d150458c8 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -145,7 +145,7 @@ Part Ic - DMA addressing limitations int dma_supported(struct device *dev, u64 mask) int -pci_dma_supported(struct device *dev, u64 mask) +pci_dma_supported(struct pci_dev *hwdev, u64 mask) Checks to see if the device can support DMA to the memory described by mask. @@ -189,7 +189,7 @@ dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) dma_addr_t -pci_map_single(struct device *dev, void *cpu_addr, size_t size, +pci_map_single(struct pci_dev *hwdev, void *cpu_addr, size_t size, int direction) Maps a piece of processor virtual memory so it can be accessed by the @@ -395,6 +395,71 @@ Notes: You must do this: See also dma_map_single(). +dma_addr_t +dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) + +void +dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) + +int +dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) + +void +dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) + +The four functions above are just like the counterpart functions +without the _attrs suffixes, except that they pass an optional +struct dma_attrs*. + +struct dma_attrs encapsulates a set of "dma attributes". For the +definition of struct dma_attrs see linux/dma-attrs.h. + +The interpretation of dma attributes is architecture-specific, and +each attribute should be documented in Documentation/DMA-attributes.txt. + +If struct dma_attrs* is NULL, the semantics of each of these +functions is identical to those of the corresponding function +without the _attrs suffix. As a result dma_map_single_attrs() +can generally replace dma_map_single(), etc. + +As an example of the use of the *_attrs functions, here's how +you could pass an attribute DMA_ATTR_FOO when mapping memory +for DMA: + +#include <linux/dma-attrs.h> +/* DMA_ATTR_FOO should be defined in linux/dma-attrs.h and + * documented in Documentation/DMA-attributes.txt */ +... + + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_FOO, &attrs); + .... + n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, &attr); + .... + +Architectures that care about DMA_ATTR_FOO would check for its +presence in their implementations of the mapping and unmapping +routines, e.g.: + +void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + .... + int foo = dma_get_attr(DMA_ATTR_FOO, attrs); + .... + if (foo) + /* twizzle the frobnozzle */ + .... + Part II - Advanced dma_ usage ----------------------------- diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt new file mode 100644 index 00000000000..6d772f84b47 --- /dev/null +++ b/Documentation/DMA-attributes.txt @@ -0,0 +1,24 @@ + DMA attributes + ============== + +This document describes the semantics of the DMA attributes that are +defined in linux/dma-attrs.h. + +DMA_ATTR_WRITE_BARRIER +---------------------- + +DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA +to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces +all pending DMA writes to complete, and thus provides a mechanism to +strictly order DMA from a device across all intervening busses and +bridges. This barrier is not specific to a particular type of +interconnect, it applies to the system as a whole, and so its +implementation must account for the idiosyncracies of the system all +the way from the DMA device to memory. + +As an example of a situation where DMA_ATTR_WRITE_BARRIER would be +useful, suppose that a device does a DMA write to indicate that data is +ready and available in memory. The DMA of the "completion indication" +could race with data DMA. Mapping the memory used for completion +indications with DMA_ATTR_WRITE_BARRIER would prevent the race. + diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt index 31d12e21ff8..c298a6690e0 100644 --- a/Documentation/cgroups.txt +++ b/Documentation/cgroups.txt @@ -500,8 +500,7 @@ post-attachment activity that requires memory allocations or blocking. void fork(struct cgroup_subsy *ss, struct task_struct *task) -Called when a task is forked into a cgroup. Also called during -registration for all existing tasks. +Called when a task is forked into a cgroup. void exit(struct cgroup_subsys *ss, struct task_struct *task) diff --git a/Documentation/controllers/devices.txt b/Documentation/controllers/devices.txt new file mode 100644 index 00000000000..4dcea42432c --- /dev/null +++ b/Documentation/controllers/devices.txt @@ -0,0 +1,48 @@ +Device Whitelist Controller + +1. Description: + +Implement a cgroup to track and enforce open and mknod restrictions +on device files. A device cgroup associates a device access +whitelist with each cgroup. A whitelist entry has 4 fields. +'type' is a (all), c (char), or b (block). 'all' means it applies +to all types and all major and minor numbers. Major and minor are +either an integer or * for all. Access is a composition of r +(read), w (write), and m (mknod). + +The root device cgroup starts with rwm to 'all'. A child device +cgroup gets a copy of the parent. Administrators can then remove +devices from the whitelist or add new entries. A child cgroup can +never receive a device access which is denied its parent. However +when a device access is removed from a parent it will not also be +removed from the child(ren). + +2. User Interface + +An entry is added using devices.allow, and removed using +devices.deny. For instance + + echo 'c 1:3 mr' > /cgroups/1/devices.allow + +allows cgroup 1 to read and mknod the device usually known as +/dev/null. Doing + + echo a > /cgroups/1/devices.deny + +will remove the default 'a *:* mrw' entry. + +3. Security + +Any task can move itself between cgroups. This clearly won't +suffice, but we can decide the best way to adequately restrict +movement as people get some experience with this. We may just want +to require CAP_SYS_ADMIN, which at least is a separate bit from +CAP_MKNOD. We may want to just refuse moving to a cgroup which +isn't a descendent of the current one. Or we may want to use +CAP_MAC_ADMIN, since we really are trying to lock down root. + +CAP_SYS_ADMIN is needed to modify the whitelist or move another +task to a new cgroup. (Again we'll probably want to change that). + +A cgroup may not be granted more permissions than the cgroup's +parent has. diff --git a/Documentation/controllers/resource_counter.txt b/Documentation/controllers/resource_counter.txt new file mode 100644 index 00000000000..f196ac1d7d2 --- /dev/null +++ b/Documentation/controllers/resource_counter.txt @@ -0,0 +1,181 @@ + + The Resource Counter + +The resource counter, declared at include/linux/res_counter.h, +is supposed to facilitate the resource management by controllers +by providing common stuff for accounting. + +This "stuff" includes the res_counter structure and routines +to work with it. + + + +1. Crucial parts of the res_counter structure + + a. unsigned long long usage + + The usage value shows the amount of a resource that is consumed + by a group at a given time. The units of measurement should be + determined by the controller that uses this counter. E.g. it can + be bytes, items or any other unit the controller operates on. + + b. unsigned long long max_usage + + The maximal value of the usage over time. + + This value is useful when gathering statistical information about + the particular group, as it shows the actual resource requirements + for a particular group, not just some usage snapshot. + + c. unsigned long long limit + + The maximal allowed amount of resource to consume by the group. In + case the group requests for more resources, so that the usage value + would exceed the limit, the resource allocation is rejected (see + the next section). + + d. unsigned long long failcnt + + The failcnt stands for "failures counter". This is the number of + resource allocation attempts that failed. + + c. spinlock_t lock + + Protects changes of the above values. + + + +2. Basic accounting routines + + a. void res_counter_init(struct res_counter *rc) + + Initializes the resource counter. As usual, should be the first + routine called for a new counter. + + b. int res_counter_charge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is about to be allocated it has to be accounted + with the appropriate resource counter (controller should determine + which one to use on its own). This operation is called "charging". + + This is not very important which operation - resource allocation + or charging - is performed first, but + * if the allocation is performed first, this may create a + temporary resource over-usage by the time resource counter is + charged; + * if the charging is performed first, then it should be uncharged + on error path (if the one is called). + + c. void res_counter_uncharge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is released (freed) it should be de-accounted + from the resource counter it was accounted to. This is called + "uncharging". + + The _locked routines imply that the res_counter->lock is taken. + + + 2.1 Other accounting routines + + There are more routines that may help you with common needs, like + checking whether the limit is reached or resetting the max_usage + value. They are all declared in include/linux/res_counter.h. + + + +3. Analyzing the resource counter registrations + + a. If the failcnt value constantly grows, this means that the counter's + limit is too tight. Either the group is misbehaving and consumes too + many resources, or the configuration is not suitable for the group + and the limit should be increased. + + b. The max_usage value can be used to quickly tune the group. One may + set the limits to maximal values and either load the container with + a common pattern or leave one for a while. After this the max_usage + value shows the amount of memory the container would require during + its common activity. + + Setting the limit a bit above this value gives a pretty good + configuration that works in most of the cases. + + c. If the max_usage is much less than the limit, but the failcnt value + is growing, then the group tries to allocate a big chunk of resource + at once. + + d. If the max_usage is much less than the limit, but the failcnt value + is 0, then this group is given too high limit, that it does not + require. It is better to lower the limit a bit leaving more resource + for other groups. + + + +4. Communication with the control groups subsystem (cgroups) + +All the resource controllers that are using cgroups and resource counters +should provide files (in the cgroup filesystem) to work with the resource +counter fields. They are recommended to adhere to the following rules: + + a. File names + + Field name File name + --------------------------------------------------- + usage usage_in_<unit_of_measurement> + max_usage max_usage_in_<unit_of_measurement> + limit limit_in_<unit_of_measurement> + failcnt failcnt + lock no file :) + + b. Reading from file should show the corresponding field value in the + appropriate format. + + c. Writing to file + + Field Expected behavior + ---------------------------------- + usage prohibited + max_usage reset to usage + limit set the limit + failcnt reset to zero + + + +5. Usage example + + a. Declare a task group (take a look at cgroups subsystem for this) and + fold a res_counter into it + + struct my_group { + struct res_counter res; + + <other fields> + } + + b. Put hooks in resource allocation/release paths + + int alloc_something(...) + { + if (res_counter_charge(res_counter_ptr, amount) < 0) + return -ENOMEM; + + <allocate the resource and return to the caller> + } + + void release_something(...) + { + res_counter_uncharge(res_counter_ptr, amount); + + <release the resource> + } + + In order to keep the usage value self-consistent, both the + "res_counter_ptr" and the "amount" in release_something() should be + the same as they were in the alloc_something() when the releasing + resource was allocated. + + c. Provide the way to read res_counter values and set them (the cgroups + still can help with it). + + c. Compile and run :) diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index af3b925ece0..6c442d8426b 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -154,6 +154,11 @@ scaling_governor, and by "echoing" the name of another that some governors won't load - they only work on some specific architectures or processors. + +cpuinfo_cur_freq : Current speed of the CPU, in KHz. + +scaling_available_frequencies : List of available frequencies, in KHz. + scaling_min_freq and scaling_max_freq show the current "policy limits" (in kHz). By echoing new values into these @@ -162,6 +167,15 @@ scaling_max_freq show the current "policy limits" (in first set scaling_max_freq, then scaling_min_freq. +affected_cpus : List of CPUs that require software coordination + of frequency. + +related_cpus : List of CPUs that need some sort of frequency + coordination, whether software or hardware. + +scaling_driver : Hardware driver for cpufreq. + +scaling_cur_freq : Current frequency of the CPU, in KHz. If you have selected the "userspace" governor which allows you to set the CPU operating frequency to a specific value, you can read out diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index aa854b9b18c..fb7b361e6ee 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -171,6 +171,7 @@ files describing that cpuset: - memory_migrate flag: if set, move pages to cpusets nodes - cpu_exclusive flag: is cpu placement exclusive? - mem_exclusive flag: is memory placement exclusive? + - mem_hardwall flag: is memory allocation hardwalled - memory_pressure: measure of how much paging pressure in cpuset In addition, the root cpuset only has the following file: @@ -222,17 +223,18 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct ancestor or descendent, may share any of the same CPUs or Memory Nodes. -A cpuset that is mem_exclusive restricts kernel allocations for -page, buffer and other data commonly shared by the kernel across -multiple users. All cpusets, whether mem_exclusive or not, restrict -allocations of memory for user space. This enables configuring a -system so that several independent jobs can share common kernel data, -such as file system pages, while isolating each jobs user allocation in -its own cpuset. To do this, construct a large mem_exclusive cpuset to -hold all the jobs, and construct child, non-mem_exclusive cpusets for -each individual job. Only a small amount of typical kernel memory, -such as requests from interrupt handlers, is allowed to be taken -outside even a mem_exclusive cpuset. +A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", +i.e. it restricts kernel allocations for page, buffer and other data +commonly shared by the kernel across multiple users. All cpusets, +whether hardwalled or not, restrict allocations of memory for user +space. This enables configuring a system so that several independent +jobs can share common kernel data, such as file system pages, while +isolating each job's user allocation in its own cpuset. To do this, +construct a large mem_exclusive cpuset to hold all the jobs, and +construct child, non-mem_exclusive cpusets for each individual job. +Only a small amount of typical kernel memory, such as requests from +interrupt handlers, is allowed to be taken outside even a +mem_exclusive cpuset. 1.5 What is memory_pressure ? @@ -707,7 +709,7 @@ Now you want to do something with this cpuset. In this directory you can find several files: # ls -cpus cpu_exclusive mems mem_exclusive tasks +cpus cpu_exclusive mems mem_exclusive mem_hardwall tasks Reading them will give you information about the state of this cpuset: the CPUs and Memory Nodes it can use, the processes that are using diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e5f3d918316..3ce193f8656 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -627,8 +627,7 @@ and is between 256 and 4096 characters. It is defined in the file eata= [HW,SCSI] edd= [EDD] - Format: {"of[f]" | "sk[ipmbr]"} - See comment in arch/i386/boot/edd.S + Format: {"off" | "on" | "skip[mbr]"} eisa_irq_edge= [PARISC,HW] See header of drivers/parisc/eisa.c. @@ -1389,6 +1388,13 @@ and is between 256 and 4096 characters. It is defined in the file nr_uarts= [SERIAL] maximum number of UARTs to be registered. + olpc_ec_timeout= [OLPC] ms delay when issuing EC commands + Rather than timing out after 20 ms if an EC + command is not properly ACKed, override the length + of the timeout. We have interrupts disabled while + waiting for the ACK, so if this is set too high + interrupts *may* be lost! + opl3= [HW,OSS] Format: <io> diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt index 266955d23ee..09b55e46174 100644 --- a/Documentation/keys-request-key.txt +++ b/Documentation/keys-request-key.txt @@ -11,26 +11,29 @@ request_key*(): struct key *request_key(const struct key_type *type, const char *description, - const char *callout_string); + const char *callout_info); or: struct key *request_key_with_auxdata(const struct key_type *type, const char *description, - const char *callout_string, + const char *callout_info, + size_t callout_len, void *aux); or: struct key *request_key_async(const struct key_type *type, const char *description, - const char *callout_string); + const char *callout_info, + size_t callout_len); or: struct key *request_key_async_with_auxdata(const struct key_type *type, const char *description, - const char *callout_string, + const char *callout_info, + size_t callout_len, void *aux); Or by userspace invoking the request_key system call: diff --git a/Documentation/keys.txt b/Documentation/keys.txt index 51652d39e61..d5c7a57d170 100644 --- a/Documentation/keys.txt +++ b/Documentation/keys.txt @@ -170,7 +170,8 @@ The key service provides a number of features besides keys: amount of description and payload space that can be consumed. The user can view information on this and other statistics through procfs - files. + files. The root user may also alter the quota limits through sysctl files + (see the section "New procfs files"). Process-specific and thread-specific keyrings are not counted towards a user's quota. @@ -329,6 +330,27 @@ about the status of the key service: <bytes>/<max> Key size quota +Four new sysctl files have been added also for the purpose of controlling the +quota limits on keys: + + (*) /proc/sys/kernel/keys/root_maxkeys + /proc/sys/kernel/keys/root_maxbytes + + These files hold the maximum number of keys that root may have and the + maximum total number of bytes of data that root may have stored in those + keys. + + (*) /proc/sys/kernel/keys/maxkeys + /proc/sys/kernel/keys/maxbytes + + These files hold the maximum number of keys that each non-root user may + have and the maximum total number of bytes of data that each of those + users may have stored in their keys. + +Root may alter these by writing each new limit as a decimal number string to +the appropriate file. + + =============================== USERSPACE SYSTEM CALL INTERFACE =============================== @@ -711,6 +733,27 @@ The keyctl syscall functions are: The assumed authoritative key is inherited across fork and exec. + (*) Get the LSM security context attached to a key. + + long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer, + size_t buflen) + + This function returns a string that represents the LSM security context + attached to a key in the buffer provided. + + Unless there's an error, it always returns the amount of data it could + produce, even if that's too big for the buffer, but it won't copy more + than requested to userspace. If the buffer pointer is NULL then no copy + will take place. + + A NUL character is included at the end of the string if the buffer is + sufficiently big. This is included in the returned count. If no LSM is + in force then an empty string will be returned. + + A process must have view permission on the key for this function to be + successful. + + =============== KERNEL SERVICES =============== @@ -771,7 +814,7 @@ payload contents" for more information. struct key *request_key(const struct key_type *type, const char *description, - const char *callout_string); + const char *callout_info); This is used to request a key or keyring with a description that matches the description specified according to the key type's match function. This @@ -793,24 +836,28 @@ payload contents" for more information. struct key *request_key_with_auxdata(const struct key_type *type, const char *description, - const char *callout_string, + const void *callout_info, + size_t callout_len, void *aux); This is identical to request_key(), except that the auxiliary data is - passed to the key_type->request_key() op if it exists. + passed to the key_type->request_key() op if it exists, and the callout_info + is a blob of length callout_len, if given (the length may be 0). (*) A key can be requested asynchronously by calling one of: struct key *request_key_async(const struct key_type *type, const char *description, - const char *callout_string); + const void *callout_info, + size_t callout_len); or: struct key *request_key_async_with_auxdata(const struct key_type *type, const char *description, - const char *callout_string, + const char *callout_info, + size_t callout_len, void *aux); which are asynchronous equivalents of request_key() and diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt index 7f60dfe642c..b152e81da59 100644 --- a/Documentation/oops-tracing.txt +++ b/Documentation/oops-tracing.txt @@ -253,6 +253,10 @@ characters, each representing a particular tainted value. 8: 'D' if the kernel has died recently, i.e. there was an OOPS or BUG. + 9: 'A' if the ACPI table has been overridden. + + 10: 'W' if a warning has previously been issued by the kernel. + The primary reason for the 'Tainted: ' string is to tell kernel debuggers if this is a clean kernel or if anything unusual has occurred. Tainting is permanent: even if an offending module is diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 10c8f6922ef..5ce0952aa06 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -85,6 +85,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.: 'k' - Secure Access Key (SAK) Kills all programs on the current virtual console. NOTE: See important comments below in SAK section. +'l' - Shows a stack backtrace for all active CPUs. + 'm' - Will dump current memory info to your console. 'n' - Used to make RT tasks nice-able diff --git a/MAINTAINERS b/MAINTAINERS index c1dd1ae7b13..d103766f3b4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2694,7 +2694,7 @@ P: David Howells M: dhowells@redhat.com P: Koichi Yasutake M: yasutake.koichi@jp.panasonic.com -L: linux-am33-list@redhat.com +L: linux-am33-list@redhat.com (moderated for non-subscribers) W: ftp://ftp.redhat.com/pub/redhat/gnupro/AM33/ S: Maintained diff --git a/arch/Kconfig b/arch/Kconfig index 694c9af520b..3ea332b009e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -36,3 +36,6 @@ config HAVE_KPROBES config HAVE_KRETPROBES def_bool n + +config HAVE_DMA_ATTRS + def_bool n diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c index 6c56c754a0b..4b18cd94d59 100644 --- a/arch/alpha/kernel/asm-offsets.c +++ b/arch/alpha/kernel/asm-offsets.c @@ -8,13 +8,9 @@ #include <linux/stddef.h> #include <linux/sched.h> #include <linux/ptrace.h> +#include <linux/kbuild.h> #include <asm/io.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - void foo(void) { DEFINE(TI_TASK, offsetof(struct thread_info, task)); diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index baf57563b14..36ab22a7ea1 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -514,8 +514,8 @@ sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn) void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len || !start) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 0a0d2479274..4a881258bb1 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -16,6 +16,7 @@ #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/procinfo.h> +#include <linux/kbuild.h> /* * Make sure that the compiler and target are compatible. @@ -35,13 +36,6 @@ #error Known good compilers: 3.3 #endif -/* Use marker if you need to separate the values later */ - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - int main(void) { DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); diff --git a/arch/arm/kernel/atags.c b/arch/arm/kernel/atags.c index e2e934c3808..64c420805e6 100644 --- a/arch/arm/kernel/atags.c +++ b/arch/arm/kernel/atags.c @@ -35,7 +35,7 @@ create_proc_entries(void) { struct proc_dir_entry* tags_entry; - tags_entry = create_proc_read_entry("atags", 0400, &proc_root, read_buffer, &tags_buffer); + tags_entry = create_proc_read_entry("atags", 0400, NULL, read_buffer, &tags_buffer); if (!tags_entry) return -ENOMEM; diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c index f56d48c451e..a53c0aba5c1 100644 --- a/arch/arm/kernel/ecard.c +++ b/arch/arm/kernel/ecard.c @@ -37,6 +37,7 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/device.h> #include <linux/init.h> #include <linux/mutex.h> @@ -723,17 +724,14 @@ unsigned int __ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed) return address; } -static int ecard_prints(char *buffer, ecard_t *ec) +static int ecard_prints(struct seq_file *m, ecard_t *ec) { - char *start = buffer; - - buffer += sprintf(buffer, " %d: %s ", ec->slot_no, - ec->easi ? "EASI" : " "); + seq_printf(m, " %d: %s ", ec->slot_no, ec->easi ? "EASI" : " "); if (ec->cid.id == 0) { struct in_chunk_dir incd; - buffer += sprintf(buffer, "[%04X:%04X] ", + seq_printf(m, "[%04X:%04X] ", ec->cid.manufacturer, ec->cid.product); if (!ec->card_desc && ec->cid.cd && @@ -744,43 +742,43 @@ static int ecard_prints(char *buffer, ecard_t *ec) strcpy((char *)ec->card_desc, incd.d.string); } - buffer += sprintf(buffer, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*"); + seq_printf(m, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*"); } else - buffer += sprintf(buffer, "Simple card %d\n", ec->cid.id); + seq_printf(m, "Simple card %d\n", ec->cid.id); - return buffer - start; + return 0; } -static int get_ecard_dev_info(char *buf, char **start, off_t pos, int count) +static int ecard_devices_proc_show(struct seq_file *m, void *v) { ecard_t *ec = cards; - off_t at = 0; - int len, cnt; - - cnt = 0; - while (ec && count > cnt) { - len = ecard_prints(buf, ec); - at += len; - if (at >= pos) { - if (!*start) { - *start = buf + (pos - (at - len)); - cnt = at - pos; - } else - cnt += len; - buf += len; - } + + while (ec) { + ecard_prints(m, ec); ec = ec->next; } - return (count > cnt) ? cnt : count; + return 0; } +static int ecard_devices_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ecard_devices_proc_show, NULL); +} + +static const struct file_operations bus_ecard_proc_fops = { + .owner = THIS_MODULE, + .open = ecard_devices_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static struct proc_dir_entry *proc_bus_ecard_dir = NULL; static void ecard_proc_init(void) { - proc_bus_ecard_dir = proc_mkdir("ecard", proc_bus); - create_proc_info_entry("devices", 0, proc_bus_ecard_dir, - get_ecard_dev_info); + proc_bus_ecard_dir = proc_mkdir("bus/ecard", NULL); + proc_create("devices", 0, proc_bus_ecard_dir, &bus_ecard_proc_fops); } #define ec_set_resource(ec,nr,st,sz) \ diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c index 4143828a968..c6b94f60e0b 100644 --- a/arch/arm/mach-davinci/clock.c +++ b/arch/arm/mach-davinci/clock.c @@ -311,11 +311,7 @@ static const struct file_operations proc_davinci_ck_operations = { static int __init davinci_ck_proc_init(void) { - struct proc_dir_entry *entry; - - entry = create_proc_entry("davinci_clocks", 0, NULL); - if (entry) - entry->proc_fops = &proc_davinci_ck_operations; + proc_create("davinci_clocks", 0, NULL, &proc_davinci_ck_operations); return 0; } diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c index 62066f3020c..7429f8c0101 100644 --- a/arch/arm/mm/iomap.c +++ b/arch/arm/mm/iomap.c @@ -26,8 +26,8 @@ EXPORT_SYMBOL(ioport_unmap); #ifdef CONFIG_PCI void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len || !start) diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c index 078cd33f467..e4796c67a83 100644 --- a/arch/avr32/kernel/asm-offsets.c +++ b/arch/avr32/kernel/asm-offsets.c @@ -5,14 +5,7 @@ */ #include <linux/thread_info.h> - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)); +#include <linux/kbuild.h> void foo(void) { diff --git a/arch/avr32/mm/tlb.c b/arch/avr32/mm/tlb.c index b835257a8fa..cd12edbea9f 100644 --- a/arch/avr32/mm/tlb.c +++ b/arch/avr32/mm/tlb.c @@ -369,11 +369,7 @@ static const struct file_operations proc_tlb_operations = { static int __init proctlb_init(void) { - struct proc_dir_entry *entry; - - entry = create_proc_entry("tlb", 0, NULL); - if (entry) - entry->proc_fops = &proc_tlb_operations; + proc_create("tlb", 0, NULL, &proc_tlb_operations); return 0; } late_initcall(proctlb_init); diff --git a/arch/blackfin/kernel/asm-offsets.c b/arch/blackfin/kernel/asm-offsets.c index b56b2741cde..721f15f3ceb 100644 --- a/arch/blackfin/kernel/asm-offsets.c +++ b/arch/blackfin/kernel/asm-offsets.c @@ -34,8 +34,7 @@ #include <linux/hardirq.h> #include <linux/irq.h> #include <linux/thread_info.h> - -#define DEFINE(sym, val) asm volatile("\n->" #sym " %0 " #val : : "i" (val)) +#include <linux/kbuild.h> int main(void) { diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c index d1fa24401dc..cb9d883d493 100644 --- a/arch/blackfin/kernel/signal.c +++ b/arch/blackfin/kernel/signal.c @@ -212,7 +212,7 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t * info, /* Set up registers for signal handler */ wrusp((unsigned long)frame); - if (get_personality & FDPIC_FUNCPTRS) { + if (current->personality & FDPIC_FUNCPTRS) { struct fdpic_func_descriptor __user *funcptr = (struct fdpic_func_descriptor *) ka->sa.sa_handler; __get_user(regs->pc, &funcptr->text); diff --git a/arch/cris/kernel/profile.c b/arch/cris/kernel/profile.c index aad0a9e5991..44f7b4f7947 100644 --- a/arch/cris/kernel/profile.c +++ b/arch/cris/kernel/profile.c @@ -75,9 +75,9 @@ __init init_cris_profile(void) sample_buffer_pos = sample_buffer; - entry = create_proc_entry("system_profile", S_IWUSR | S_IRUGO, NULL); + entry = proc_create("system_profile", S_IWUSR | S_IRUGO, NULL, + &cris_proc_profile_operations); if (entry) { - entry->proc_fops = &cris_proc_profile_operations; entry->size = SAMPLE_BUFFER_SIZE; } prof_running = 1; diff --git a/arch/frv/kernel/asm-offsets.c b/arch/frv/kernel/asm-offsets.c index fbb19fc1af4..9de96843a27 100644 --- a/arch/frv/kernel/asm-offsets.c +++ b/arch/frv/kernel/asm-offsets.c @@ -7,15 +7,13 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/personality.h> +#include <linux/kbuild.h> #include <asm/registers.h> #include <asm/ucontext.h> #include <asm/processor.h> #include <asm/thread_info.h> #include <asm/gdb-stub.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - #define DEF_PTREG(sym, reg) \ asm volatile("\n->" #sym " %0 offsetof(struct pt_regs, " #reg ")" \ : : "i" (offsetof(struct pt_regs, reg))) @@ -32,11 +30,6 @@ asm volatile("\n->" #sym " %0 offsetof(struct frv_frame0, " #reg ")" \ : : "i" (offsetof(struct frv_frame0, reg))) -#define BLANK() asm volatile("\n->" : : ) - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)); - void foo(void) { /* offsets into the thread_info structure */ diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index d64bcaff54c..3bdb368292a 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -297,7 +297,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set) __frame->lr = (unsigned long) &frame->retcode; __frame->gr8 = sig; - if (get_personality & FDPIC_FUNCPTRS) { + if (current->personality & FDPIC_FUNCPTRS) { struct fdpic_func_descriptor __user *funcptr = (struct fdpic_func_descriptor __user *) ka->sa.sa_handler; __get_user(__frame->pc, &funcptr->text); @@ -396,7 +396,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, __frame->gr8 = sig; __frame->gr9 = (unsigned long) &frame->info; - if (get_personality & FDPIC_FUNCPTRS) { + if (current->personality & FDPIC_FUNCPTRS) { struct fdpic_func_descriptor __user *funcptr = (struct fdpic_func_descriptor __user *) ka->sa.sa_handler; __get_user(__frame->pc, &funcptr->text); diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index a40df80b2eb..1d2dfe67d44 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -362,11 +362,8 @@ asmlinkage void memory_access_exception(unsigned long esr0, #ifdef CONFIG_MMU unsigned long fixup; - if ((esr0 & ESRx_EC) == ESRx_EC_DATA_ACCESS) - if (handle_misalignment(esr0, ear0, epcr0) == 0) - return; - - if ((fixup = search_exception_table(__frame->pc)) != 0) { + fixup = search_exception_table(__frame->pc); + if (fixup) { __frame->pc = fixup; return; } diff --git a/arch/frv/mb93090-mb00/pci-iomap.c b/arch/frv/mb93090-mb00/pci-iomap.c index 068fa04bd52..35f6df28351 100644 --- a/arch/frv/mb93090-mb00/pci-iomap.c +++ b/arch/frv/mb93090-mb00/pci-iomap.c @@ -13,8 +13,8 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len || !start) diff --git a/arch/frv/mm/unaligned.c b/arch/frv/mm/unaligned.c deleted file mode 100644 index 8f0375fc15a..00000000000 --- a/arch/frv/mm/unaligned.c +++ /dev/null @@ -1,217 +0,0 @@ -/* unaligned.c: unalignment fixup handler for CPUs on which it is supported (FR451 only) - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/sched.h> -#include <linux/signal.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/types.h> -#include <linux/user.h> -#include <linux/string.h> -#include <linux/linkage.h> -#include <linux/init.h> - -#include <asm/setup.h> -#include <asm/system.h> -#include <asm/uaccess.h> - -#if 0 -#define kdebug(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ ) -#else -#define kdebug(fmt, ...) do {} while(0) -#endif - -#define _MA_SIGNED 0x01 -#define _MA_HALF 0x02 -#define _MA_WORD 0x04 -#define _MA_DWORD 0x08 -#define _MA_SZ_MASK 0x0e -#define _MA_LOAD 0x10 -#define _MA_STORE 0x20 -#define _MA_UPDATE 0x40 -#define _MA_IMM 0x80 - -#define _MA_LDxU _MA_LOAD | _MA_UPDATE -#define _MA_LDxI _MA_LOAD | _MA_IMM -#define _MA_STxU _MA_STORE | _MA_UPDATE -#define _MA_STxI _MA_STORE | _MA_IMM - -static const uint8_t tbl_LDGRk_reg[0x40] = { - [0x02] = _MA_LOAD | _MA_HALF | _MA_SIGNED, /* LDSH @(GRi,GRj),GRk */ - [0x03] = _MA_LOAD | _MA_HALF, /* LDUH @(GRi,GRj),GRk */ - [0x04] = _MA_LOAD | _MA_WORD, /* LD @(GRi,GRj),GRk */ - [0x05] = _MA_LOAD | _MA_DWORD, /* LDD @(GRi,GRj),GRk */ - [0x12] = _MA_LDxU | _MA_HALF | _MA_SIGNED, /* LDSHU @(GRi,GRj),GRk */ - [0x13] = _MA_LDxU | _MA_HALF, /* LDUHU @(GRi,GRj),GRk */ - [0x14] = _MA_LDxU | _MA_WORD, /* LDU @(GRi,GRj),GRk */ - [0x15] = _MA_LDxU | _MA_DWORD, /* LDDU @(GRi,GRj),GRk */ -}; - -static const uint8_t tbl_STGRk_reg[0x40] = { - [0x01] = _MA_STORE | _MA_HALF, /* STH @(GRi,GRj),GRk */ - [0x02] = _MA_STORE | _MA_WORD, /* ST @(GRi,GRj),GRk */ - [0x03] = _MA_STORE | _MA_DWORD, /* STD @(GRi,GRj),GRk */ - [0x11] = _MA_STxU | _MA_HALF, /* STHU @(GRi,GRj),GRk */ - [0x12] = _MA_STxU | _MA_WORD, /* STU @(GRi,GRj),GRk */ - [0x13] = _MA_STxU | _MA_DWORD, /* STDU @(GRi,GRj),GRk */ -}; - -static const uint8_t tbl_LDSTGRk_imm[0x80] = { - [0x31] = _MA_LDxI | _MA_HALF | _MA_SIGNED, /* LDSHI @(GRi,d12),GRk */ - [0x32] = _MA_LDxI | _MA_WORD, /* LDI @(GRi,d12),GRk */ - [0x33] = _MA_LDxI | _MA_DWORD, /* LDDI @(GRi,d12),GRk */ - [0x36] = _MA_LDxI | _MA_HALF, /* LDUHI @(GRi,d12),GRk */ - [0x51] = _MA_STxI | _MA_HALF, /* STHI @(GRi,d12),GRk */ - [0x52] = _MA_STxI | _MA_WORD, /* STI @(GRi,d12),GRk */ - [0x53] = _MA_STxI | _MA_DWORD, /* STDI @(GRi,d12),GRk */ -}; - - -/*****************************************************************************/ -/* - * see if we can handle the exception by fixing up a misaligned memory access - */ -int handle_misalignment(unsigned long esr0, unsigned long ear0, unsigned long epcr0) -{ - unsigned long insn, addr, *greg; - int GRi, GRj, GRk, D12, op; - - union { - uint64_t _64; - uint32_t _32[2]; - uint16_t _16; - uint8_t _8[8]; - } x; - - if (!(esr0 & ESR0_EAV) || !(epcr0 & EPCR0_V) || !(ear0 & 7)) - return -EAGAIN; - - epcr0 &= EPCR0_PC; - - if (__frame->pc != epcr0) { - kdebug("MISALIGN: Execution not halted on excepting instruction\n"); - BUG(); - } - - if (__get_user(insn, (unsigned long *) epcr0) < 0) - return -EFAULT; - - /* determine the instruction type first */ - switch ((insn >> 18) & 0x7f) { - case 0x2: - /* LDx @(GRi,GRj),GRk */ - op = tbl_LDGRk_reg[(insn >> 6) & 0x3f]; - break; - - case 0x3: - /* STx GRk,@(GRi,GRj) */ - op = tbl_STGRk_reg[(insn >> 6) & 0x3f]; - break; - - default: - op = tbl_LDSTGRk_imm[(insn >> 18) & 0x7f]; - break; - } - - if (!op) - return -EAGAIN; - - kdebug("MISALIGN: pc=%08lx insn=%08lx ad=%08lx op=%02x\n", epcr0, insn, ear0, op); - - memset(&x, 0xba, 8); - - /* validate the instruction parameters */ - greg = (unsigned long *) &__frame->tbr; - - GRi = (insn >> 12) & 0x3f; - GRk = (insn >> 25) & 0x3f; - - if (GRi > 31 || GRk > 31) - return -ENOENT; - - if (op & _MA_DWORD && GRk & 1) - return -EINVAL; - - if (op & _MA_IMM) { - D12 = insn & 0xfff; - asm ("slli %0,#20,%0 ! srai %0,#20,%0" : "=r"(D12) : "0"(D12)); /* sign extend */ - addr = (GRi ? greg[GRi] : 0) + D12; - } - else { - GRj = (insn >> 0) & 0x3f; - if (GRj > 31) - return -ENOENT; - addr = (GRi ? greg[GRi] : 0) + (GRj ? greg[GRj] : 0); - } - - if (addr != ear0) { - kdebug("MISALIGN: Calculated addr (%08lx) does not match EAR0 (%08lx)\n", - addr, ear0); - return -EFAULT; - } - - /* check the address is okay */ - if (user_mode(__frame) && ___range_ok(ear0, 8) < 0) - return -EFAULT; - - /* perform the memory op */ - if (op & _MA_STORE) { - /* perform a store */ - x._32[0] = 0; - if (GRk != 0) { - if (op & _MA_HALF) { - x._16 = greg[GRk]; - } - else { - x._32[0] = greg[GRk]; - } - } - if (op & _MA_DWORD) - x._32[1] = greg[GRk + 1]; - - kdebug("MISALIGN: Store GR%d { %08x:%08x } -> %08lx (%dB)\n", - GRk, x._32[1], x._32[0], addr, op & _MA_SZ_MASK); - - if (__memcpy_user((void *) addr, &x, op & _MA_SZ_MASK) != 0) - return -EFAULT; - } - else { - /* perform a load */ - if (__memcpy_user(&x, (void *) addr, op & _MA_SZ_MASK) != 0) - return -EFAULT; - - if (op & _MA_HALF) { - if (op & _MA_SIGNED) - asm ("slli %0,#16,%0 ! srai %0,#16,%0" - : "=r"(x._32[0]) : "0"(x._16)); - else - asm ("sethi #0,%0" - : "=r"(x._32[0]) : "0"(x._16)); - } - - kdebug("MISALIGN: Load %08lx (%dB) -> GR%d, { %08x:%08x }\n", - addr, op & _MA_SZ_MASK, GRk, x._32[1], x._32[0]); - - if (GRk != 0) - greg[GRk] = x._32[0]; - if (op & _MA_DWORD) - greg[GRk + 1] = x._32[1]; - } - - /* update the base pointer if required */ - if (op & _MA_UPDATE) - greg[GRi] = addr; - - /* well... we've done that insn */ - __frame->pc = __frame->pc + 4; - - return 0; -} /* end handle_misalignment() */ diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c index fc30b4fd091..2042552e087 100644 --- a/arch/h8300/kernel/asm-offsets.c +++ b/arch/h8300/kernel/asm-offsets.c @@ -13,15 +13,11 @@ #include <linux/kernel_stat.h> #include <linux/ptrace.h> #include <linux/hardirq.h> +#include <linux/kbuild.h> #include <asm/bootinfo.h> #include <asm/irq.h> #include <asm/ptrace.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - int main(void) { /* offsets into the task struct */ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 3aa6c821449..0df5f6f75ed 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -19,6 +19,7 @@ config IA64 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_DMA_ATTRS select HAVE_KVM default y help @@ -47,6 +48,9 @@ config MMU config SWIOTLB bool +config IOMMU_HELPER + bool + config GENERIC_LOCKBREAK bool default y @@ -615,7 +619,7 @@ config IRQ_PER_CPU default y config IOMMU_HELPER - def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC) + def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) source "arch/ia64/hp/sim/Kconfig" diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index 8f6bcfe1dad..1c44ec2a1d5 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -20,10 +20,10 @@ extern int swiotlb_late_init_with_default_size (size_t size); extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent; extern ia64_mv_dma_free_coherent swiotlb_free_coherent; -extern ia64_mv_dma_map_single swiotlb_map_single; -extern ia64_mv_dma_unmap_single swiotlb_unmap_single; -extern ia64_mv_dma_map_sg swiotlb_map_sg; -extern ia64_mv_dma_unmap_sg swiotlb_unmap_sg; +extern ia64_mv_dma_map_single_attrs swiotlb_map_single_attrs; +extern ia64_mv_dma_unmap_single_attrs swiotlb_unmap_single_attrs; +extern ia64_mv_dma_map_sg_attrs swiotlb_map_sg_attrs; +extern ia64_mv_dma_unmap_sg_attrs swiotlb_unmap_sg_attrs; extern ia64_mv_dma_supported swiotlb_dma_supported; extern ia64_mv_dma_mapping_error swiotlb_dma_mapping_error; @@ -31,19 +31,19 @@ extern ia64_mv_dma_mapping_error swiotlb_dma_mapping_error; extern ia64_mv_dma_alloc_coherent sba_alloc_coherent; extern ia64_mv_dma_free_coherent sba_free_coherent; -extern ia64_mv_dma_map_single sba_map_single; -extern ia64_mv_dma_unmap_single sba_unmap_single; -extern ia64_mv_dma_map_sg sba_map_sg; -extern ia64_mv_dma_unmap_sg sba_unmap_sg; +extern ia64_mv_dma_map_single_attrs sba_map_single_attrs; +extern ia64_mv_dma_unmap_single_attrs sba_unmap_single_attrs; +extern ia64_mv_dma_map_sg_attrs sba_map_sg_attrs; +extern ia64_mv_dma_unmap_sg_attrs sba_unmap_sg_attrs; extern ia64_mv_dma_supported sba_dma_supported; extern ia64_mv_dma_mapping_error sba_dma_mapping_error; #define hwiommu_alloc_coherent sba_alloc_coherent #define hwiommu_free_coherent sba_free_coherent -#define hwiommu_map_single sba_map_single -#define hwiommu_unmap_single sba_unmap_single -#define hwiommu_map_sg sba_map_sg -#define hwiommu_unmap_sg sba_unmap_sg +#define hwiommu_map_single_attrs sba_map_single_attrs +#define hwiommu_unmap_single_attrs sba_unmap_single_attrs +#define hwiommu_map_sg_attrs sba_map_sg_attrs +#define hwiommu_unmap_sg_attrs sba_unmap_sg_attrs #define hwiommu_dma_supported sba_dma_supported #define hwiommu_dma_mapping_error sba_dma_mapping_error #define hwiommu_sync_single_for_cpu machvec_dma_sync_single @@ -98,41 +98,48 @@ hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma } dma_addr_t -hwsw_map_single (struct device *dev, void *addr, size_t size, int dir) +hwsw_map_single_attrs(struct device *dev, void *addr, size_t size, int dir, + struct dma_attrs *attrs) { if (use_swiotlb(dev)) - return swiotlb_map_single(dev, addr, size, dir); + return swiotlb_map_single_attrs(dev, addr, size, dir, attrs); else - return hwiommu_map_single(dev, addr, size, dir); + return hwiommu_map_single_attrs(dev, addr, size, dir, attrs); } +EXPORT_SYMBOL(hwsw_map_single_attrs); void -hwsw_unmap_single (struct device *dev, dma_addr_t iova, size_t size, int dir) +hwsw_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, + int dir, struct dma_attrs *attrs) { if (use_swiotlb(dev)) - return swiotlb_unmap_single(dev, iova, size, dir); + return swiotlb_unmap_single_attrs(dev, iova, size, dir, attrs); else - return hwiommu_unmap_single(dev, iova, size, dir); + return hwiommu_unmap_single_attrs(dev, iova, size, dir, attrs); } - +EXPORT_SYMBOL(hwsw_unmap_single_attrs); int -hwsw_map_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) +hwsw_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, + int dir, struct dma_attrs *attrs) { if (use_swiotlb(dev)) - return swiotlb_map_sg(dev, sglist, nents, dir); + return swiotlb_map_sg_attrs(dev, sglist, nents, dir, attrs); else - return hwiommu_map_sg(dev, sglist, nents, dir); + return hwiommu_map_sg_attrs(dev, sglist, nents, dir, attrs); } +EXPORT_SYMBOL(hwsw_map_sg_attrs); void -hwsw_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) +hwsw_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, + int dir, struct dma_attrs *attrs) { if (use_swiotlb(dev)) - return swiotlb_unmap_sg(dev, sglist, nents, dir); + return swiotlb_unmap_sg_attrs(dev, sglist, nents, dir, attrs); else - return hwiommu_unmap_sg(dev, sglist, nents, dir); + return hwiommu_unmap_sg_attrs(dev, sglist, nents, dir, attrs); } +EXPORT_SYMBOL(hwsw_unmap_sg_attrs); void hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir) @@ -185,10 +192,6 @@ hwsw_dma_mapping_error (dma_addr_t dma_addr) } EXPORT_SYMBOL(hwsw_dma_mapping_error); -EXPORT_SYMBOL(hwsw_map_single); -EXPORT_SYMBOL(hwsw_unmap_single); -EXPORT_SYMBOL(hwsw_map_sg); -EXPORT_SYMBOL(hwsw_unmap_sg); EXPORT_SYMBOL(hwsw_dma_supported); EXPORT_SYMBOL(hwsw_alloc_coherent); EXPORT_SYMBOL(hwsw_free_coherent); diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 9409de5c944..34421aed1e2 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -899,16 +899,18 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) } /** - * sba_map_single - map one buffer and return IOVA for DMA + * sba_map_single_attrs - map one buffer and return IOVA for DMA * @dev: instance of PCI owned by the driver that's asking. * @addr: driver buffer to map. * @size: number of bytes to map in driver buffer. * @dir: R/W or both. + * @attrs: optional dma attributes * * See Documentation/DMA-mapping.txt */ dma_addr_t -sba_map_single(struct device *dev, void *addr, size_t size, int dir) +sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir, + struct dma_attrs *attrs) { struct ioc *ioc; dma_addr_t iovp; @@ -932,7 +934,8 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir) ** Device is bit capable of DMA'ing to the buffer... ** just return the PCI address of ptr */ - DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n", + DBG_BYPASS("sba_map_single_attrs() bypass mask/addr: " + "0x%lx/0x%lx\n", to_pci_dev(dev)->dma_mask, pci_addr); return pci_addr; } @@ -953,7 +956,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir) #ifdef ASSERT_PDIR_SANITY spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check before sba_map_single()")) + if (sba_check_pdir(ioc,"Check before sba_map_single_attrs()")) panic("Sanity check failed"); spin_unlock_irqrestore(&ioc->res_lock, flags); #endif @@ -982,11 +985,12 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir) /* form complete address */ #ifdef ASSERT_PDIR_SANITY spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check after sba_map_single()"); + sba_check_pdir(ioc,"Check after sba_map_single_attrs()"); spin_unlock_irqrestore(&ioc->res_lock, flags); #endif return SBA_IOVA(ioc, iovp, offset); } +EXPORT_SYMBOL(sba_map_single_attrs); #ifdef ENABLE_MARK_CLEAN static SBA_INLINE void @@ -1013,15 +1017,17 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) #endif /** - * sba_unmap_single - unmap one IOVA and free resources + * sba_unmap_single_attrs - unmap one IOVA and free resources * @dev: instance of PCI owned by the driver that's asking. * @iova: IOVA of driver buffer previously mapped. * @size: number of bytes mapped in driver buffer. * @dir: R/W or both. + * @attrs: optional dma attributes * * See Documentation/DMA-mapping.txt */ -void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) +void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, + int dir, struct dma_attrs *attrs) { struct ioc *ioc; #if DELAYED_RESOURCE_CNT > 0 @@ -1038,7 +1044,8 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) /* ** Address does not fall w/in IOVA, must be bypassing */ - DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova); + DBG_BYPASS("sba_unmap_single_atttrs() bypass addr: 0x%lx\n", + iova); #ifdef ENABLE_MARK_CLEAN if (dir == DMA_FROM_DEVICE) { @@ -1087,7 +1094,7 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir) spin_unlock_irqrestore(&ioc->res_lock, flags); #endif /* DELAYED_RESOURCE_CNT == 0 */ } - +EXPORT_SYMBOL(sba_unmap_single_attrs); /** * sba_alloc_coherent - allocate/map shared mem for DMA @@ -1144,7 +1151,8 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp * If device can't bypass or bypass is disabled, pass the 32bit fake * device to map single to get an iova mapping. */ - *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0); + *dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr, + size, 0, NULL); return addr; } @@ -1161,7 +1169,7 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp */ void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - sba_unmap_single(dev, dma_handle, size, 0); + sba_unmap_single_attrs(dev, dma_handle, size, 0, NULL); free_pages((unsigned long) vaddr, get_order(size)); } @@ -1410,10 +1418,12 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev, * @sglist: array of buffer/length pairs * @nents: number of entries in list * @dir: R/W or both. + * @attrs: optional dma attributes * * See Documentation/DMA-mapping.txt */ -int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir) +int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, + int dir, struct dma_attrs *attrs) { struct ioc *ioc; int coalesced, filled = 0; @@ -1441,16 +1451,16 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di /* Fast path single entry scatterlists. */ if (nents == 1) { sglist->dma_length = sglist->length; - sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir); + sglist->dma_address = sba_map_single_attrs(dev, sba_sg_address(sglist), sglist->length, dir, attrs); return 1; } #ifdef ASSERT_PDIR_SANITY spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check before sba_map_sg()")) + if (sba_check_pdir(ioc,"Check before sba_map_sg_attrs()")) { sba_dump_sg(ioc, sglist, nents); - panic("Check before sba_map_sg()"); + panic("Check before sba_map_sg_attrs()"); } spin_unlock_irqrestore(&ioc->res_lock, flags); #endif @@ -1479,10 +1489,10 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di #ifdef ASSERT_PDIR_SANITY spin_lock_irqsave(&ioc->res_lock, flags); - if (sba_check_pdir(ioc,"Check after sba_map_sg()")) + if (sba_check_pdir(ioc,"Check after sba_map_sg_attrs()")) { sba_dump_sg(ioc, sglist, nents); - panic("Check after sba_map_sg()\n"); + panic("Check after sba_map_sg_attrs()\n"); } spin_unlock_irqrestore(&ioc->res_lock, flags); #endif @@ -1492,18 +1502,20 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di return filled; } - +EXPORT_SYMBOL(sba_map_sg_attrs); /** - * sba_unmap_sg - unmap Scatter/Gather list + * sba_unmap_sg_attrs - unmap Scatter/Gather list * @dev: instance of PCI owned by the driver that's asking. * @sglist: array of buffer/length pairs * @nents: number of entries in list * @dir: R/W or both. + * @attrs: optional dma attributes * * See Documentation/DMA-mapping.txt */ -void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir) +void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, + int nents, int dir, struct dma_attrs *attrs) { #ifdef ASSERT_PDIR_SANITY struct ioc *ioc; @@ -1518,13 +1530,14 @@ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, in ASSERT(ioc); spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check before sba_unmap_sg()"); + sba_check_pdir(ioc,"Check before sba_unmap_sg_attrs()"); spin_unlock_irqrestore(&ioc->res_lock, flags); #endif while (nents && sglist->dma_length) { - sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir); + sba_unmap_single_attrs(dev, sglist->dma_address, + sglist->dma_length, dir, attrs); sglist = sg_next(sglist); nents--; } @@ -1533,11 +1546,12 @@ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, in #ifdef ASSERT_PDIR_SANITY spin_lock_irqsave(&ioc->res_lock, flags); - sba_check_pdir(ioc,"Check after sba_unmap_sg()"); + sba_check_pdir(ioc,"Check after sba_unmap_sg_attrs()"); spin_unlock_irqrestore(&ioc->res_lock, flags); #endif } +EXPORT_SYMBOL(sba_unmap_sg_attrs); /************************************************************** * @@ -1918,15 +1932,13 @@ static const struct file_operations ioc_fops = { static void __init ioc_proc_init(void) { - struct proc_dir_entry *dir, *entry; + struct proc_dir_entry *dir; dir = proc_mkdir("bus/mckinley", NULL); if (!dir) return; - entry = create_proc_entry(ioc_list->name, 0, dir); - if (entry) - entry->proc_fops = &ioc_fops; + proc_create(ioc_list->name, 0, dir, &ioc_fops); } #endif @@ -2166,10 +2178,6 @@ sba_page_override(char *str) __setup("sbapagesize=",sba_page_override); EXPORT_SYMBOL(sba_dma_mapping_error); -EXPORT_SYMBOL(sba_map_single); -EXPORT_SYMBOL(sba_unmap_single); -EXPORT_SYMBOL(sba_map_sg); -EXPORT_SYMBOL(sba_unmap_sg); EXPORT_SYMBOL(sba_dma_supported); EXPORT_SYMBOL(sba_alloc_coherent); EXPORT_SYMBOL(sba_free_coherent); diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 230a6f92367..c64a55af9b9 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -9,7 +9,7 @@ #include <linux/sched.h> #include <linux/pid.h> #include <linux/clocksource.h> - +#include <linux/kbuild.h> #include <asm-ia64/processor.h> #include <asm-ia64/ptrace.h> #include <asm-ia64/siginfo.h> @@ -19,11 +19,6 @@ #include "../kernel/sigframe.h" #include "../kernel/fsyscall_gtod_data.h" -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - void foo(void) { DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct)); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index c8e403752a0..7fbb51e10bb 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6695,16 +6695,12 @@ pfm_init(void) /* * create /proc/perfmon (mostly for debugging purposes) */ - perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL); + perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops); if (perfmon_dir == NULL) { printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); pmu_conf = NULL; return -1; } - /* - * install customized file operations for /proc/perfmon entry - */ - perfmon_dir->proc_fops = &pfm_proc_fops; /* * create /proc/sys/kernel/perfmon (for debugging purposes) diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index b11bb50a197..ecb9eb78d68 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -648,18 +648,16 @@ salinfo_init(void) if (!dir) continue; - entry = create_proc_entry("event", S_IRUSR, dir); + entry = proc_create_data("event", S_IRUSR, dir, + &salinfo_event_fops, data); if (!entry) continue; - entry->data = data; - entry->proc_fops = &salinfo_event_fops; *sdir++ = entry; - entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir); + entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir, + &salinfo_data_fops, data); if (!entry) continue; - entry->data = data; - entry->proc_fops = &salinfo_data_fops; *sdir++ = entry; /* we missed any events before now */ diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index dfc6bf1c7b4..49d3120415e 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -550,11 +550,12 @@ static int __init sn2_ptc_init(void) if (!ia64_platform_is("sn2")) return 0; - if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) { + proc_sn2_ptc = proc_create(PTC_BASENAME, 0444, + NULL, &proc_sn2_ptc_operations); + if (!&proc_sn2_ptc_operations) { printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); return -EINVAL; } - proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations; spin_lock_init(&sn2_global_ptc_lock); return 0; } diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index 62b3e9a496a..2526e5c783a 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -139,30 +139,21 @@ static const struct file_operations proc_sn_topo_fops = { void register_sn_procfs(void) { static struct proc_dir_entry *sgi_proc_dir = NULL; - struct proc_dir_entry *pde; BUG_ON(sgi_proc_dir != NULL); if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL))) return; - pde = create_proc_entry("partition_id", 0444, sgi_proc_dir); - if (pde) - pde->proc_fops = &proc_partition_id_fops; - pde = create_proc_entry("system_serial_number", 0444, sgi_proc_dir); - if (pde) - pde->proc_fops = &proc_system_sn_fops; - pde = create_proc_entry("licenseID", 0444, sgi_proc_dir); - if (pde) - pde->proc_fops = &proc_license_id_fops; - pde = create_proc_entry("sn_force_interrupt", 0644, sgi_proc_dir); - if (pde) - pde->proc_fops = &proc_sn_force_intr_fops; - pde = create_proc_entry("coherence_id", 0444, sgi_proc_dir); - if (pde) - pde->proc_fops = &proc_coherence_id_fops; - pde = create_proc_entry("sn_topology", 0444, sgi_proc_dir); - if (pde) - pde->proc_fops = &proc_sn_topo_fops; + proc_create("partition_id", 0444, sgi_proc_dir, + &proc_partition_id_fops); + proc_create("system_serial_number", 0444, sgi_proc_dir, + &proc_system_sn_fops); + proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops); + proc_create("sn_force_interrupt", 0644, sgi_proc_dir, + &proc_sn_force_intr_fops); + proc_create("coherence_id", 0444, sgi_proc_dir, + &proc_coherence_id_fops); + proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops); } #endif /* CONFIG_PROC_FS */ diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 18b94b792d5..52175af299a 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -10,6 +10,7 @@ */ #include <linux/module.h> +#include <linux/dma-attrs.h> #include <asm/dma.h> #include <asm/sn/intr.h> #include <asm/sn/pcibus_provider_defs.h> @@ -149,11 +150,12 @@ void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, EXPORT_SYMBOL(sn_dma_free_coherent); /** - * sn_dma_map_single - map a single page for DMA + * sn_dma_map_single_attrs - map a single page for DMA * @dev: device to map for * @cpu_addr: kernel virtual address of the region to map * @size: size of the region * @direction: DMA direction + * @attrs: optional dma attributes * * Map the region pointed to by @cpu_addr for DMA and return the * DMA address. @@ -163,42 +165,59 @@ EXPORT_SYMBOL(sn_dma_free_coherent); * no way of saving the dmamap handle from the alloc to later free * (which is pretty much unacceptable). * + * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with + * dma_map_consistent() so that writes force a flush of pending DMA. + * (See "SGI Altix Architecture Considerations for Linux Device Drivers", + * Document Number: 007-4763-001) + * * TODO: simplify our interface; * figure out how to save dmamap handle so can use two step. */ -dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size, - int direction) +dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr, + size_t size, int direction, + struct dma_attrs *attrs) { dma_addr_t dma_addr; unsigned long phys_addr; struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); + int dmabarr; + + dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs); BUG_ON(dev->bus != &pci_bus_type); phys_addr = __pa(cpu_addr); - dma_addr = provider->dma_map(pdev, phys_addr, size, SN_DMA_ADDR_PHYS); + if (dmabarr) + dma_addr = provider->dma_map_consistent(pdev, phys_addr, + size, SN_DMA_ADDR_PHYS); + else + dma_addr = provider->dma_map(pdev, phys_addr, size, + SN_DMA_ADDR_PHYS); + if (!dma_addr) { printk(KERN_ERR "%s: out of ATEs\n", __func__); return 0; } return dma_addr; } -EXPORT_SYMBOL(sn_dma_map_single); +EXPORT_SYMBOL(sn_dma_map_single_attrs); /** - * sn_dma_unmap_single - unamp a DMA mapped page + * sn_dma_unmap_single_attrs - unamp a DMA mapped page * @dev: device to sync * @dma_addr: DMA address to sync * @size: size of region * @direction: DMA direction + * @attrs: optional dma attributes * * This routine is supposed to sync the DMA region specified * by @dma_handle into the coherence domain. On SN, we're always cache * coherent, so we just need to free any ATEs associated with this mapping. */ -void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - int direction) +void sn_dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr, + size_t size, int direction, + struct dma_attrs *attrs) { struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); @@ -207,19 +226,21 @@ void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, provider->dma_unmap(pdev, dma_addr, direction); } -EXPORT_SYMBOL(sn_dma_unmap_single); +EXPORT_SYMBOL(sn_dma_unmap_single_attrs); /** - * sn_dma_unmap_sg - unmap a DMA scatterlist + * sn_dma_unmap_sg_attrs - unmap a DMA scatterlist * @dev: device to unmap * @sg: scatterlist to unmap * @nhwentries: number of scatterlist entries * @direction: DMA direction + * @attrs: optional dma attributes * * Unmap a set of streaming mode DMA translations. */ -void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nhwentries, int direction) +void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nhwentries, int direction, + struct dma_attrs *attrs) { int i; struct pci_dev *pdev = to_pci_dev(dev); @@ -234,25 +255,34 @@ void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, sg->dma_length = 0; } } -EXPORT_SYMBOL(sn_dma_unmap_sg); +EXPORT_SYMBOL(sn_dma_unmap_sg_attrs); /** - * sn_dma_map_sg - map a scatterlist for DMA + * sn_dma_map_sg_attrs - map a scatterlist for DMA * @dev: device to map for * @sg: scatterlist to map * @nhwentries: number of entries * @direction: direction of the DMA transaction + * @attrs: optional dma attributes + * + * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with + * dma_map_consistent() so that writes force a flush of pending DMA. + * (See "SGI Altix Architecture Considerations for Linux Device Drivers", + * Document Number: 007-4763-001) * * Maps each entry of @sg for DMA. */ -int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries, - int direction) +int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nhwentries, int direction, struct dma_attrs *attrs) { unsigned long phys_addr; struct scatterlist *saved_sg = sgl, *sg; struct pci_dev *pdev = to_pci_dev(dev); struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev); int i; + int dmabarr; + + dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs); BUG_ON(dev->bus != &pci_bus_type); @@ -260,11 +290,19 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries, * Setup a DMA address for each entry in the scatterlist. */ for_each_sg(sgl, sg, nhwentries, i) { + dma_addr_t dma_addr; phys_addr = SG_ENT_PHYS_ADDRESS(sg); - sg->dma_address = provider->dma_map(pdev, - phys_addr, sg->length, - SN_DMA_ADDR_PHYS); + if (dmabarr) + dma_addr = provider->dma_map_consistent(pdev, + phys_addr, + sg->length, + SN_DMA_ADDR_PHYS); + else + dma_addr = provider->dma_map(pdev, phys_addr, + sg->length, + SN_DMA_ADDR_PHYS); + sg->dma_address = dma_addr; if (!sg->dma_address) { printk(KERN_ERR "%s: out of ATEs\n", __func__); @@ -272,7 +310,8 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries, * Free any successfully allocated entries. */ if (i > 0) - sn_dma_unmap_sg(dev, saved_sg, i, direction); + sn_dma_unmap_sg_attrs(dev, saved_sg, i, + direction, attrs); return 0; } @@ -281,7 +320,7 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries, return nhwentries; } -EXPORT_SYMBOL(sn_dma_map_sg); +EXPORT_SYMBOL(sn_dma_map_sg_attrs); void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, int direction) diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c index 246a8820c22..b1f012f6c49 100644 --- a/arch/m68k/kernel/asm-offsets.c +++ b/arch/m68k/kernel/asm-offsets.c @@ -11,14 +11,12 @@ #include <linux/stddef.h> #include <linux/sched.h> #include <linux/kernel_stat.h> +#include <linux/kbuild.h> #include <asm/bootinfo.h> #include <asm/irq.h> #include <asm/amigahw.h> #include <linux/font.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - int main(void) { /* offsets into the task struct */ diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index 5b2799eb96a..326fb997809 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -109,7 +109,6 @@ #include <linux/mm.h> #include <linux/delay.h> #include <linux/init.h> -#include <linux/proc_fs.h> #include <linux/interrupt.h> #include <asm/bootinfo.h> @@ -124,10 +123,6 @@ int iop_scc_present,iop_ism_present; -#ifdef CONFIG_PROC_FS -static int iop_get_proc_info(char *, char **, off_t, int); -#endif /* CONFIG_PROC_FS */ - /* structure for tracking channel listeners */ struct listener { @@ -299,12 +294,6 @@ void __init iop_init(void) iop_listeners[IOP_NUM_ISM][i].devname = NULL; iop_listeners[IOP_NUM_ISM][i].handler = NULL; } - -#if 0 /* Crashing in 2.4 now, not yet sure why. --jmt */ -#ifdef CONFIG_PROC_FS - create_proc_info_entry("mac_iop", 0, &proc_root, iop_get_proc_info); -#endif -#endif } /* @@ -637,77 +626,3 @@ irqreturn_t iop_ism_irq(int irq, void *dev_id) } return IRQ_HANDLED; } - -#ifdef CONFIG_PROC_FS - -char *iop_chan_state(int state) -{ - switch(state) { - case IOP_MSG_IDLE : return "idle "; - case IOP_MSG_NEW : return "new "; - case IOP_MSG_RCVD : return "received "; - case IOP_MSG_COMPLETE : return "completed "; - default : return "unknown "; - } -} - -int iop_dump_one_iop(char *buf, int iop_num, char *iop_name) -{ - int i,len = 0; - volatile struct mac_iop *iop = iop_base[iop_num]; - - len += sprintf(buf+len, "%s IOP channel states:\n\n", iop_name); - len += sprintf(buf+len, "## send_state recv_state device\n"); - len += sprintf(buf+len, "------------------------------------------------\n"); - for (i = 0 ; i < NUM_IOP_CHAN ; i++) { - len += sprintf(buf+len, "%2d %10s %10s %s\n", i, - iop_chan_state(iop_readb(iop, IOP_ADDR_SEND_STATE+i)), - iop_chan_state(iop_readb(iop, IOP_ADDR_RECV_STATE+i)), - iop_listeners[iop_num][i].handler? - iop_listeners[iop_num][i].devname : ""); - - } - len += sprintf(buf+len, "\n"); - return len; -} - -static int iop_get_proc_info(char *buf, char **start, off_t pos, int count) -{ - int len, cnt; - - cnt = 0; - len = sprintf(buf, "IOPs detected:\n\n"); - - if (iop_scc_present) { - len += sprintf(buf+len, "SCC IOP (%p): status %02X\n", - iop_base[IOP_NUM_SCC], - (uint) iop_base[IOP_NUM_SCC]->status_ctrl); - } - if (iop_ism_present) { - len += sprintf(buf+len, "ISM IOP (%p): status %02X\n\n", - iop_base[IOP_NUM_ISM], - (uint) iop_base[IOP_NUM_ISM]->status_ctrl); - } - - if (iop_scc_present) { - len += iop_dump_one_iop(buf+len, IOP_NUM_SCC, "SCC"); - - } - - if (iop_ism_present) { - len += iop_dump_one_iop(buf+len, IOP_NUM_ISM, "ISM"); - - } - - if (len >= pos) { - if (!*start) { - *start = buf + pos; - cnt = len - pos; - } else { - cnt += len; - } - } - return (count > cnt) ? cnt : count; -} - -#endif /* CONFIG_PROC_FS */ diff --git a/arch/m68knommu/kernel/asm-offsets.c b/arch/m68knommu/kernel/asm-offsets.c index d97b89bae53..fd0c685a7f1 100644 --- a/arch/m68knommu/kernel/asm-offsets.c +++ b/arch/m68knommu/kernel/asm-offsets.c @@ -13,15 +13,11 @@ #include <linux/kernel_stat.h> #include <linux/ptrace.h> #include <linux/hardirq.h> +#include <linux/kbuild.h> #include <asm/bootinfo.h> #include <asm/irq.h> #include <asm/thread_info.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - int main(void) { /* offsets into the task struct */ diff --git a/arch/mips/basler/excite/excite_procfs.c b/arch/mips/basler/excite/excite_procfs.c index 9ee67a95f6b..08923e6825b 100644 --- a/arch/mips/basler/excite/excite_procfs.c +++ b/arch/mips/basler/excite/excite_procfs.c @@ -18,8 +18,9 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - +#include <linux/module.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/stat.h> #include <asm/page.h> #include <asm/io.h> @@ -28,14 +29,25 @@ #include <excite.h> -static int excite_get_unit_id(char *buf, char **addr, off_t offs, int size) +static int excite_unit_id_proc_show(struct seq_file *m, void *v) { - const int len = snprintf(buf, PAGE_SIZE, "%06x", unit_id); - const int w = len - offs; - *addr = buf + offs; - return w < size ? w : size; + seq_printf(m, "%06x", unit_id); + return 0; } +static int excite_unit_id_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, excite_unit_id_proc_show, NULL); +} + +static const struct file_operations excite_unit_id_proc_fops = { + .owner = THIS_MODULE, + .open = excite_unit_id_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int excite_bootrom_read(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -65,12 +77,12 @@ excite_bootrom_read(char *page, char **start, off_t off, int count, void excite_procfs_init(void) { /* Create & populate /proc/excite */ - struct proc_dir_entry * const pdir = proc_mkdir("excite", &proc_root); + struct proc_dir_entry * const pdir = proc_mkdir("excite", NULL); if (pdir) { struct proc_dir_entry * e; - e = create_proc_info_entry("unit_id", S_IRUGO, pdir, - excite_get_unit_id); + e = proc_create("unit_id", S_IRUGO, pdir, + &excite_unit_id_proc_fops); if (e) e->size = 6; e = create_proc_read_entry("bootrom", S_IRUGO, pdir, diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 5bf03b3c415..72942226fcd 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -13,327 +13,285 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/interrupt.h> - +#include <linux/kbuild.h> #include <asm/ptrace.h> #include <asm/processor.h> -#define text(t) __asm__("\n->#" t) -#define _offset(type, member) (&(((type *)NULL)->member)) -#define offset(string, ptr, member) \ - __asm__("\n->" string " %0" : : "i" (_offset(ptr, member))) -#define constant(string, member) \ - __asm__("\n->" string " %0" : : "ri" (member)) -#define size(string, size) \ - __asm__("\n->" string " %0" : : "i" (sizeof(size))) -#define linefeed text("") - void output_ptreg_defines(void) { - text("MIPS pt_regs offsets."); - offset("PT_R0", struct pt_regs, regs[0]); - offset("PT_R1", struct pt_regs, regs[1]); - offset("PT_R2", struct pt_regs, regs[2]); - offset("PT_R3", struct pt_regs, regs[3]); - offset("PT_R4", struct pt_regs, regs[4]); - offset("PT_R5", struct pt_regs, regs[5]); - offset("PT_R6", struct pt_regs, regs[6]); - offset("PT_R7", struct pt_regs, regs[7]); - offset("PT_R8", struct pt_regs, regs[8]); - offset("PT_R9", struct pt_regs, regs[9]); - offset("PT_R10", struct pt_regs, regs[10]); - offset("PT_R11", struct pt_regs, regs[11]); - offset("PT_R12", struct pt_regs, regs[12]); - offset("PT_R13", struct pt_regs, regs[13]); - offset("PT_R14", struct pt_regs, regs[14]); - offset("PT_R15", struct pt_regs, regs[15]); - offset("PT_R16", struct pt_regs, regs[16]); - offset("PT_R17", struct pt_regs, regs[17]); - offset("PT_R18", struct pt_regs, regs[18]); - offset("PT_R19", struct pt_regs, regs[19]); - offset("PT_R20", struct pt_regs, regs[20]); - offset("PT_R21", struct pt_regs, regs[21]); - offset("PT_R22", struct pt_regs, regs[22]); - offset("PT_R23", struct pt_regs, regs[23]); - offset("PT_R24", struct pt_regs, regs[24]); - offset("PT_R25", struct pt_regs, regs[25]); - offset("PT_R26", struct pt_regs, regs[26]); - offset("PT_R27", struct pt_regs, regs[27]); - offset("PT_R28", struct pt_regs, regs[28]); - offset("PT_R29", struct pt_regs, regs[29]); - offset("PT_R30", struct pt_regs, regs[30]); - offset("PT_R31", struct pt_regs, regs[31]); - offset("PT_LO", struct pt_regs, lo); - offset("PT_HI", struct pt_regs, hi); + COMMENT("MIPS pt_regs offsets."); + OFFSET(PT_R0, pt_regs, regs[0]); + OFFSET(PT_R1, pt_regs, regs[1]); + OFFSET(PT_R2, pt_regs, regs[2]); + OFFSET(PT_R3, pt_regs, regs[3]); + OFFSET(PT_R4, pt_regs, regs[4]); + OFFSET(PT_R5, pt_regs, regs[5]); + OFFSET(PT_R6, pt_regs, regs[6]); + OFFSET(PT_R7, pt_regs, regs[7]); + OFFSET(PT_R8, pt_regs, regs[8]); + OFFSET(PT_R9, pt_regs, regs[9]); + OFFSET(PT_R10, pt_regs, regs[10]); + OFFSET(PT_R11, pt_regs, regs[11]); + OFFSET(PT_R12, pt_regs, regs[12]); + OFFSET(PT_R13, pt_regs, regs[13]); + OFFSET(PT_R14, pt_regs, regs[14]); + OFFSET(PT_R15, pt_regs, regs[15]); + OFFSET(PT_R16, pt_regs, regs[16]); + OFFSET(PT_R17, pt_regs, regs[17]); + OFFSET(PT_R18, pt_regs, regs[18]); + OFFSET(PT_R19, pt_regs, regs[19]); + OFFSET(PT_R20, pt_regs, regs[20]); + OFFSET(PT_R21, pt_regs, regs[21]); + OFFSET(PT_R22, pt_regs, regs[22]); + OFFSET(PT_R23, pt_regs, regs[23]); + OFFSET(PT_R24, pt_regs, regs[24]); + OFFSET(PT_R25, pt_regs, regs[25]); + OFFSET(PT_R26, pt_regs, regs[26]); + OFFSET(PT_R27, pt_regs, regs[27]); + OFFSET(PT_R28, pt_regs, regs[28]); + OFFSET(PT_R29, pt_regs, regs[29]); + OFFSET(PT_R30, pt_regs, regs[30]); + OFFSET(PT_R31, pt_regs, regs[31]); + OFFSET(PT_LO, pt_regs, lo); + OFFSET(PT_HI, pt_regs, hi); #ifdef CONFIG_CPU_HAS_SMARTMIPS - offset("PT_ACX", struct pt_regs, acx); + OFFSET(PT_ACX, pt_regs, acx); #endif - offset("PT_EPC", struct pt_regs, cp0_epc); - offset("PT_BVADDR", struct pt_regs, cp0_badvaddr); - offset("PT_STATUS", struct pt_regs, cp0_status); - offset("PT_CAUSE", struct pt_regs, cp0_cause); + OFFSET(PT_EPC, pt_regs, cp0_epc); + OFFSET(PT_BVADDR, pt_regs, cp0_badvaddr); + OFFSET(PT_STATUS, pt_regs, cp0_status); + OFFSET(PT_CAUSE, pt_regs, cp0_cause); #ifdef CONFIG_MIPS_MT_SMTC - offset("PT_TCSTATUS", struct pt_regs, cp0_tcstatus); + OFFSET(PT_TCSTATUS, pt_regs, cp0_tcstatus); #endif /* CONFIG_MIPS_MT_SMTC */ - size("PT_SIZE", struct pt_regs); - linefeed; + DEFINE(PT_SIZE, sizeof(struct pt_regs)); + BLANK(); } void output_task_defines(void) { - text("MIPS task_struct offsets."); - offset("TASK_STATE", struct task_struct, state); - offset("TASK_THREAD_INFO", struct task_struct, stack); - offset("TASK_FLAGS", struct task_struct, flags); - offset("TASK_MM", struct task_struct, mm); - offset("TASK_PID", struct task_struct, pid); - size( "TASK_STRUCT_SIZE", struct task_struct); - linefeed; + COMMENT("MIPS task_struct offsets."); + OFFSET(TASK_STATE, task_struct, state); + OFFSET(TASK_THREAD_INFO, task_struct, stack); + OFFSET(TASK_FLAGS, task_struct, flags); + OFFSET(TASK_MM, task_struct, mm); + OFFSET(TASK_PID, task_struct, pid); + DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct)); + BLANK(); } void output_thread_info_defines(void) { - text("MIPS thread_info offsets."); - offset("TI_TASK", struct thread_info, task); - offset("TI_EXEC_DOMAIN", struct thread_info, exec_domain); - offset("TI_FLAGS", struct thread_info, flags); - offset("TI_TP_VALUE", struct thread_info, tp_value); - offset("TI_CPU", struct thread_info, cpu); - offset("TI_PRE_COUNT", struct thread_info, preempt_count); - offset("TI_ADDR_LIMIT", struct thread_info, addr_limit); - offset("TI_RESTART_BLOCK", struct thread_info, restart_block); - offset("TI_REGS", struct thread_info, regs); - constant("_THREAD_SIZE", THREAD_SIZE); - constant("_THREAD_MASK", THREAD_MASK); - linefeed; + COMMENT("MIPS thread_info offsets."); + OFFSET(TI_TASK, thread_info, task); + OFFSET(TI_EXEC_DOMAIN, thread_info, exec_domain); + OFFSET(TI_FLAGS, thread_info, flags); + OFFSET(TI_TP_VALUE, thread_info, tp_value); + OFFSET(TI_CPU, thread_info, cpu); + OFFSET(TI_PRE_COUNT, thread_info, preempt_count); + OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit); + OFFSET(TI_RESTART_BLOCK, thread_info, restart_block); + OFFSET(TI_REGS, thread_info, regs); + DEFINE(_THREAD_SIZE, THREAD_SIZE); + DEFINE(_THREAD_MASK, THREAD_MASK); + BLANK(); } void output_thread_defines(void) { - text("MIPS specific thread_struct offsets."); - offset("THREAD_REG16", struct task_struct, thread.reg16); - offset("THREAD_REG17", struct task_struct, thread.reg17); - offset("THREAD_REG18", struct task_struct, thread.reg18); - offset("THREAD_REG19", struct task_struct, thread.reg19); - offset("THREAD_REG20", struct task_struct, thread.reg20); - offset("THREAD_REG21", struct task_struct, thread.reg21); - offset("THREAD_REG22", struct task_struct, thread.reg22); - offset("THREAD_REG23", struct task_struct, thread.reg23); - offset("THREAD_REG29", struct task_struct, thread.reg29); - offset("THREAD_REG30", struct task_struct, thread.reg30); - offset("THREAD_REG31", struct task_struct, thread.reg31); - offset("THREAD_STATUS", struct task_struct, + COMMENT("MIPS specific thread_struct offsets."); + OFFSET(THREAD_REG16, task_struct, thread.reg16); + OFFSET(THREAD_REG17, task_struct, thread.reg17); + OFFSET(THREAD_REG18, task_struct, thread.reg18); + OFFSET(THREAD_REG19, task_struct, thread.reg19); + OFFSET(THREAD_REG20, task_struct, thread.reg20); + OFFSET(THREAD_REG21, task_struct, thread.reg21); + OFFSET(THREAD_REG22, task_struct, thread.reg22); + OFFSET(THREAD_REG23, task_struct, thread.reg23); + OFFSET(THREAD_REG29, task_struct, thread.reg29); + OFFSET(THREAD_REG30, task_struct, thread.reg30); + OFFSET(THREAD_REG31, task_struct, thread.reg31); + OFFSET(THREAD_STATUS, task_struct, thread.cp0_status); - offset("THREAD_FPU", struct task_struct, thread.fpu); + OFFSET(THREAD_FPU, task_struct, thread.fpu); - offset("THREAD_BVADDR", struct task_struct, \ + OFFSET(THREAD_BVADDR, task_struct, \ thread.cp0_badvaddr); - offset("THREAD_BUADDR", struct task_struct, \ + OFFSET(THREAD_BUADDR, task_struct, \ thread.cp0_baduaddr); - offset("THREAD_ECODE", struct task_struct, \ + OFFSET(THREAD_ECODE, task_struct, \ thread.error_code); - offset("THREAD_TRAPNO", struct task_struct, thread.trap_no); - offset("THREAD_TRAMP", struct task_struct, \ + OFFSET(THREAD_TRAPNO, task_struct, thread.trap_no); + OFFSET(THREAD_TRAMP, task_struct, \ thread.irix_trampoline); - offset("THREAD_OLDCTX", struct task_struct, \ + OFFSET(THREAD_OLDCTX, task_struct, \ thread.irix_oldctx); - linefeed; + BLANK(); } void output_thread_fpu_defines(void) { - offset("THREAD_FPR0", - struct task_struct, thread.fpu.fpr[0]); - offset("THREAD_FPR1", - struct task_struct, thread.fpu.fpr[1]); - offset("THREAD_FPR2", - struct task_struct, thread.fpu.fpr[2]); - offset("THREAD_FPR3", - struct task_struct, thread.fpu.fpr[3]); - offset("THREAD_FPR4", - struct task_struct, thread.fpu.fpr[4]); - offset("THREAD_FPR5", - struct task_struct, thread.fpu.fpr[5]); - offset("THREAD_FPR6", - struct task_struct, thread.fpu.fpr[6]); - offset("THREAD_FPR7", - struct task_struct, thread.fpu.fpr[7]); - offset("THREAD_FPR8", - struct task_struct, thread.fpu.fpr[8]); - offset("THREAD_FPR9", - struct task_struct, thread.fpu.fpr[9]); - offset("THREAD_FPR10", - struct task_struct, thread.fpu.fpr[10]); - offset("THREAD_FPR11", - struct task_struct, thread.fpu.fpr[11]); - offset("THREAD_FPR12", - struct task_struct, thread.fpu.fpr[12]); - offset("THREAD_FPR13", - struct task_struct, thread.fpu.fpr[13]); - offset("THREAD_FPR14", - struct task_struct, thread.fpu.fpr[14]); - offset("THREAD_FPR15", - struct task_struct, thread.fpu.fpr[15]); - offset("THREAD_FPR16", - struct task_struct, thread.fpu.fpr[16]); - offset("THREAD_FPR17", - struct task_struct, thread.fpu.fpr[17]); - offset("THREAD_FPR18", - struct task_struct, thread.fpu.fpr[18]); - offset("THREAD_FPR19", - struct task_struct, thread.fpu.fpr[19]); - offset("THREAD_FPR20", - struct task_struct, thread.fpu.fpr[20]); - offset("THREAD_FPR21", - struct task_struct, thread.fpu.fpr[21]); - offset("THREAD_FPR22", - struct task_struct, thread.fpu.fpr[22]); - offset("THREAD_FPR23", - struct task_struct, thread.fpu.fpr[23]); - offset("THREAD_FPR24", - struct task_struct, thread.fpu.fpr[24]); - offset("THREAD_FPR25", - struct task_struct, thread.fpu.fpr[25]); - offset("THREAD_FPR26", - struct task_struct, thread.fpu.fpr[26]); - offset("THREAD_FPR27", - struct task_struct, thread.fpu.fpr[27]); - offset("THREAD_FPR28", - struct task_struct, thread.fpu.fpr[28]); - offset("THREAD_FPR29", - struct task_struct, thread.fpu.fpr[29]); - offset("THREAD_FPR30", - struct task_struct, thread.fpu.fpr[30]); - offset("THREAD_FPR31", - struct task_struct, thread.fpu.fpr[31]); + OFFSET(THREAD_FPR0, task_struct, thread.fpu.fpr[0]); + OFFSET(THREAD_FPR1, task_struct, thread.fpu.fpr[1]); + OFFSET(THREAD_FPR2, task_struct, thread.fpu.fpr[2]); + OFFSET(THREAD_FPR3, task_struct, thread.fpu.fpr[3]); + OFFSET(THREAD_FPR4, task_struct, thread.fpu.fpr[4]); + OFFSET(THREAD_FPR5, task_struct, thread.fpu.fpr[5]); + OFFSET(THREAD_FPR6, task_struct, thread.fpu.fpr[6]); + OFFSET(THREAD_FPR7, task_struct, thread.fpu.fpr[7]); + OFFSET(THREAD_FPR8, task_struct, thread.fpu.fpr[8]); + OFFSET(THREAD_FPR9, task_struct, thread.fpu.fpr[9]); + OFFSET(THREAD_FPR10, task_struct, thread.fpu.fpr[10]); + OFFSET(THREAD_FPR11, task_struct, thread.fpu.fpr[11]); + OFFSET(THREAD_FPR12, task_struct, thread.fpu.fpr[12]); + OFFSET(THREAD_FPR13, task_struct, thread.fpu.fpr[13]); + OFFSET(THREAD_FPR14, task_struct, thread.fpu.fpr[14]); + OFFSET(THREAD_FPR15, task_struct, thread.fpu.fpr[15]); + OFFSET(THREAD_FPR16, task_struct, thread.fpu.fpr[16]); + OFFSET(THREAD_FPR17, task_struct, thread.fpu.fpr[17]); + OFFSET(THREAD_FPR18, task_struct, thread.fpu.fpr[18]); + OFFSET(THREAD_FPR19, task_struct, thread.fpu.fpr[19]); + OFFSET(THREAD_FPR20, task_struct, thread.fpu.fpr[20]); + OFFSET(THREAD_FPR21, task_struct, thread.fpu.fpr[21]); + OFFSET(THREAD_FPR22, task_struct, thread.fpu.fpr[22]); + OFFSET(THREAD_FPR23, task_struct, thread.fpu.fpr[23]); + OFFSET(THREAD_FPR24, task_struct, thread.fpu.fpr[24]); + OFFSET(THREAD_FPR25, task_struct, thread.fpu.fpr[25]); + OFFSET(THREAD_FPR26, task_struct, thread.fpu.fpr[26]); + OFFSET(THREAD_FPR27, task_struct, thread.fpu.fpr[27]); + OFFSET(THREAD_FPR28, task_struct, thread.fpu.fpr[28]); + OFFSET(THREAD_FPR29, task_struct, thread.fpu.fpr[29]); + OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]); + OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]); - offset("THREAD_FCR31", - struct task_struct, thread.fpu.fcr31); - linefeed; + OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31); + BLANK(); } void output_mm_defines(void) { - text("Size of struct page"); - size("STRUCT_PAGE_SIZE", struct page); - linefeed; - text("Linux mm_struct offsets."); - offset("MM_USERS", struct mm_struct, mm_users); - offset("MM_PGD", struct mm_struct, pgd); - offset("MM_CONTEXT", struct mm_struct, context); - linefeed; - constant("_PAGE_SIZE", PAGE_SIZE); - constant("_PAGE_SHIFT", PAGE_SHIFT); - linefeed; - constant("_PGD_T_SIZE", sizeof(pgd_t)); - constant("_PMD_T_SIZE", sizeof(pmd_t)); - constant("_PTE_T_SIZE", sizeof(pte_t)); - linefeed; - constant("_PGD_T_LOG2", PGD_T_LOG2); - constant("_PMD_T_LOG2", PMD_T_LOG2); - constant("_PTE_T_LOG2", PTE_T_LOG2); - linefeed; - constant("_PGD_ORDER", PGD_ORDER); - constant("_PMD_ORDER", PMD_ORDER); - constant("_PTE_ORDER", PTE_ORDER); - linefeed; - constant("_PMD_SHIFT", PMD_SHIFT); - constant("_PGDIR_SHIFT", PGDIR_SHIFT); - linefeed; - constant("_PTRS_PER_PGD", PTRS_PER_PGD); - constant("_PTRS_PER_PMD", PTRS_PER_PMD); - constant("_PTRS_PER_PTE", PTRS_PER_PTE); - linefeed; + COMMENT("Size of struct page"); + DEFINE(STRUCT_PAGE_SIZE, sizeof(struct page)); + BLANK(); + COMMENT("Linux mm_struct offsets."); + OFFSET(MM_USERS, mm_struct, mm_users); + OFFSET(MM_PGD, mm_struct, pgd); + OFFSET(MM_CONTEXT, mm_struct, context); + BLANK(); + DEFINE(_PAGE_SIZE, PAGE_SIZE); + DEFINE(_PAGE_SHIFT, PAGE_SHIFT); + BLANK(); + DEFINE(_PGD_T_SIZE, sizeof(pgd_t)); + DEFINE(_PMD_T_SIZE, sizeof(pmd_t)); + DEFINE(_PTE_T_SIZE, sizeof(pte_t)); + BLANK(); + DEFINE(_PGD_T_LOG2, PGD_T_LOG2); + DEFINE(_PMD_T_LOG2, PMD_T_LOG2); + DEFINE(_PTE_T_LOG2, PTE_T_LOG2); + BLANK(); + DEFINE(_PGD_ORDER, PGD_ORDER); + DEFINE(_PMD_ORDER, PMD_ORDER); + DEFINE(_PTE_ORDER, PTE_ORDER); + BLANK(); + DEFINE(_PMD_SHIFT, PMD_SHIFT); + DEFINE(_PGDIR_SHIFT, PGDIR_SHIFT); + BLANK(); + DEFINE(_PTRS_PER_PGD, PTRS_PER_PGD); + DEFINE(_PTRS_PER_PMD, PTRS_PER_PMD); + DEFINE(_PTRS_PER_PTE, PTRS_PER_PTE); + BLANK(); } #ifdef CONFIG_32BIT void output_sc_defines(void) { - text("Linux sigcontext offsets."); - offset("SC_REGS", struct sigcontext, sc_regs); - offset("SC_FPREGS", struct sigcontext, sc_fpregs); - offset("SC_ACX", struct sigcontext, sc_acx); - offset("SC_MDHI", struct sigcontext, sc_mdhi); - offset("SC_MDLO", struct sigcontext, sc_mdlo); - offset("SC_PC", struct sigcontext, sc_pc); - offset("SC_FPC_CSR", struct sigcontext, sc_fpc_csr); - offset("SC_FPC_EIR", struct sigcontext, sc_fpc_eir); - offset("SC_HI1", struct sigcontext, sc_hi1); - offset("SC_LO1", struct sigcontext, sc_lo1); - offset("SC_HI2", struct sigcontext, sc_hi2); - offset("SC_LO2", struct sigcontext, sc_lo2); - offset("SC_HI3", struct sigcontext, sc_hi3); - offset("SC_LO3", struct sigcontext, sc_lo3); - linefeed; + COMMENT("Linux sigcontext offsets."); + OFFSET(SC_REGS, sigcontext, sc_regs); + OFFSET(SC_FPREGS, sigcontext, sc_fpregs); + OFFSET(SC_ACX, sigcontext, sc_acx); + OFFSET(SC_MDHI, sigcontext, sc_mdhi); + OFFSET(SC_MDLO, sigcontext, sc_mdlo); + OFFSET(SC_PC, sigcontext, sc_pc); + OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr); + OFFSET(SC_FPC_EIR, sigcontext, sc_fpc_eir); + OFFSET(SC_HI1, sigcontext, sc_hi1); + OFFSET(SC_LO1, sigcontext, sc_lo1); + OFFSET(SC_HI2, sigcontext, sc_hi2); + OFFSET(SC_LO2, sigcontext, sc_lo2); + OFFSET(SC_HI3, sigcontext, sc_hi3); + OFFSET(SC_LO3, sigcontext, sc_lo3); + BLANK(); } #endif #ifdef CONFIG_64BIT void output_sc_defines(void) { - text("Linux sigcontext offsets."); - offset("SC_REGS", struct sigcontext, sc_regs); - offset("SC_FPREGS", struct sigcontext, sc_fpregs); - offset("SC_MDHI", struct sigcontext, sc_mdhi); - offset("SC_MDLO", struct sigcontext, sc_mdlo); - offset("SC_PC", struct sigcontext, sc_pc); - offset("SC_FPC_CSR", struct sigcontext, sc_fpc_csr); - linefeed; + COMMENT("Linux sigcontext offsets."); + OFFSET(SC_REGS, sigcontext, sc_regs); + OFFSET(SC_FPREGS, sigcontext, sc_fpregs); + OFFSET(SC_MDHI, sigcontext, sc_mdhi); + OFFSET(SC_MDLO, sigcontext, sc_mdlo); + OFFSET(SC_PC, sigcontext, sc_pc); + OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr); + BLANK(); } #endif #ifdef CONFIG_MIPS32_COMPAT void output_sc32_defines(void) { - text("Linux 32-bit sigcontext offsets."); - offset("SC32_FPREGS", struct sigcontext32, sc_fpregs); - offset("SC32_FPC_CSR", struct sigcontext32, sc_fpc_csr); - offset("SC32_FPC_EIR", struct sigcontext32, sc_fpc_eir); - linefeed; + COMMENT("Linux 32-bit sigcontext offsets."); + OFFSET(SC32_FPREGS, sigcontext32, sc_fpregs); + OFFSET(SC32_FPC_CSR, sigcontext32, sc_fpc_csr); + OFFSET(SC32_FPC_EIR, sigcontext32, sc_fpc_eir); + BLANK(); } #endif void output_signal_defined(void) { - text("Linux signal numbers."); - constant("_SIGHUP", SIGHUP); - constant("_SIGINT", SIGINT); - constant("_SIGQUIT", SIGQUIT); - constant("_SIGILL", SIGILL); - constant("_SIGTRAP", SIGTRAP); - constant("_SIGIOT", SIGIOT); - constant("_SIGABRT", SIGABRT); - constant("_SIGEMT", SIGEMT); - constant("_SIGFPE", SIGFPE); - constant("_SIGKILL", SIGKILL); - constant("_SIGBUS", SIGBUS); - constant("_SIGSEGV", SIGSEGV); - constant("_SIGSYS", SIGSYS); - constant("_SIGPIPE", SIGPIPE); - constant("_SIGALRM", SIGALRM); - constant("_SIGTERM", SIGTERM); - constant("_SIGUSR1", SIGUSR1); - constant("_SIGUSR2", SIGUSR2); - constant("_SIGCHLD", SIGCHLD); - constant("_SIGPWR", SIGPWR); - constant("_SIGWINCH", SIGWINCH); - constant("_SIGURG", SIGURG); - constant("_SIGIO", SIGIO); - constant("_SIGSTOP", SIGSTOP); - constant("_SIGTSTP", SIGTSTP); - constant("_SIGCONT", SIGCONT); - constant("_SIGTTIN", SIGTTIN); - constant("_SIGTTOU", SIGTTOU); - constant("_SIGVTALRM", SIGVTALRM); - constant("_SIGPROF", SIGPROF); - constant("_SIGXCPU", SIGXCPU); - constant("_SIGXFSZ", SIGXFSZ); - linefeed; + COMMENT("Linux signal numbers."); + DEFINE(_SIGHUP, SIGHUP); + DEFINE(_SIGINT, SIGINT); + DEFINE(_SIGQUIT, SIGQUIT); + DEFINE(_SIGILL, SIGILL); + DEFINE(_SIGTRAP, SIGTRAP); + DEFINE(_SIGIOT, SIGIOT); + DEFINE(_SIGABRT, SIGABRT); + DEFINE(_SIGEMT, SIGEMT); + DEFINE(_SIGFPE, SIGFPE); + DEFINE(_SIGKILL, SIGKILL); + DEFINE(_SIGBUS, SIGBUS); + DEFINE(_SIGSEGV, SIGSEGV); + DEFINE(_SIGSYS, SIGSYS); + DEFINE(_SIGPIPE, SIGPIPE); + DEFINE(_SIGALRM, SIGALRM); + DEFINE(_SIGTERM, SIGTERM); + DEFINE(_SIGUSR1, SIGUSR1); + DEFINE(_SIGUSR2, SIGUSR2); + DEFINE(_SIGCHLD, SIGCHLD); + DEFINE(_SIGPWR, SIGPWR); + DEFINE(_SIGWINCH, SIGWINCH); + DEFINE(_SIGURG, SIGURG); + DEFINE(_SIGIO, SIGIO); + DEFINE(_SIGSTOP, SIGSTOP); + DEFINE(_SIGTSTP, SIGTSTP); + DEFINE(_SIGCONT, SIGCONT); + DEFINE(_SIGTTIN, SIGTTIN); + DEFINE(_SIGTTOU, SIGTTOU); + DEFINE(_SIGVTALRM, SIGVTALRM); + DEFINE(_SIGPROF, SIGPROF); + DEFINE(_SIGXCPU, SIGXCPU); + DEFINE(_SIGXFSZ, SIGXFSZ); + BLANK(); } void output_irq_cpustat_t_defines(void) { - text("Linux irq_cpustat_t offsets."); - offset("IC_SOFTIRQ_PENDING", irq_cpustat_t, __softirq_pending); - size("IC_IRQ_CPUSTAT_T", irq_cpustat_t); - linefeed; + COMMENT("Linux irq_cpustat_t offsets."); + DEFINE(IC_SOFTIRQ_PENDING, + offsetof(irq_cpustat_t, __softirq_pending)); + DEFINE(IC_IRQ_CPUSTAT_T, sizeof(irq_cpustat_t)); + BLANK(); } diff --git a/arch/mips/lib/iomap-pci.c b/arch/mips/lib/iomap-pci.c index c11b2494bb6..2ab899c4b4c 100644 --- a/arch/mips/lib/iomap-pci.c +++ b/arch/mips/lib/iomap-pci.c @@ -45,8 +45,8 @@ static void __iomem *ioport_map_pci(struct pci_dev *dev, */ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len || !start) diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c index 855977ca51c..6537d90a25b 100644 --- a/arch/mips/pmc-sierra/yosemite/setup.c +++ b/arch/mips/pmc-sierra/yosemite/setup.c @@ -143,9 +143,6 @@ void __init plat_time_init(void) mips_hpt_frequency = 33000000 * 3 * 5; } -/* No other usable initialization hook than this ... */ -extern void (*late_time_init)(void); - unsigned long ocd_base; EXPORT_SYMBOL(ocd_base); diff --git a/arch/mn10300/kernel/asm-offsets.c b/arch/mn10300/kernel/asm-offsets.c index ee2d9f8af5a..2646fcbd7d8 100644 --- a/arch/mn10300/kernel/asm-offsets.c +++ b/arch/mn10300/kernel/asm-offsets.c @@ -7,6 +7,7 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/personality.h> +#include <linux/kbuild.h> #include <asm/ucontext.h> #include <asm/processor.h> #include <asm/thread_info.h> @@ -14,14 +15,6 @@ #include "sigframe.h" #include "mn10300-serial.h" -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->") - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)); - void foo(void) { OFFSET(SIGCONTEXT_d0, sigcontext, d0); diff --git a/arch/mn10300/unit-asb2305/pci-iomap.c b/arch/mn10300/unit-asb2305/pci-iomap.c index dbceae4307d..c1a8d8f941f 100644 --- a/arch/mn10300/unit-asb2305/pci-iomap.c +++ b/arch/mn10300/unit-asb2305/pci-iomap.c @@ -16,8 +16,8 @@ */ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len || !start) diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index eaa79bc14d9..3efc0b73e4f 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -32,6 +32,7 @@ #include <linux/thread_info.h> #include <linux/ptrace.h> #include <linux/hardirq.h> +#include <linux/kbuild.h> #include <asm/pgtable.h> #include <asm/ptrace.h> @@ -39,11 +40,6 @@ #include <asm/pdc.h> #include <asm/uaccess.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - #ifdef CONFIG_64BIT #define FRAME_SIZE 128 #else diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 9448d4e9114..ccd61b9567a 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -397,10 +397,9 @@ pcxl_dma_init(void) "pcxl_dma_init: Unable to create gsc /proc dir entry\n"); else { struct proc_dir_entry* ent; - ent = create_proc_entry("pcxl_dma", 0, proc_gsc_root); - if (ent) - ent->proc_fops = &proc_pcxl_dma_ops; - else + ent = proc_create("pcxl_dma", 0, proc_gsc_root, + &proc_pcxl_dma_ops); + if (!ent) printk(KERN_WARNING "pci-dma.c: Unable to create pcxl_dma /proc entry.\n"); } diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index f4a811690ab..9abed07db7f 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -438,8 +438,8 @@ void ioport_unmap(void __iomem *addr) /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len || !start) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 4e40c122bf2..3934e265940 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -608,6 +608,19 @@ source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" +config HAS_RAPIDIO + bool + default n + +config RAPIDIO + bool "RapidIO support" + depends on HAS_RAPIDIO + help + If you say Y here, the kernel will include drivers and + infrastructure code to support RapidIO interconnect devices. + +source "drivers/rapidio/Kconfig" + endmenu menu "Advanced setup" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 807a2dce626..a7d24e692ba 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -118,7 +118,6 @@ config XMON_DISASSEMBLY config IRQSTACKS bool "Use separate kernel stacks when processing interrupts" - depends on PPC64 help If you say Y here the kernel will use separate kernel stacks for handling hard and soft interrupts. This can help avoid diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts index 7f9b999843c..1e4bfe9cadb 100644 --- a/arch/powerpc/boot/dts/mpc8641_hpcn.dts +++ b/arch/powerpc/boot/dts/mpc8641_hpcn.dts @@ -26,6 +26,7 @@ serial1 = &serial1; pci0 = &pci0; pci1 = &pci1; + rapidio0 = &rapidio0; }; cpus { @@ -500,4 +501,15 @@ 0x0 0x00100000>; }; }; + rapidio0: rapidio@f80c0000 { + #address-cells = <2>; + #size-cells = <2>; + compatible = "fsl,rapidio-delta"; + reg = <0xf80c0000 0x20000>; + ranges = <0 0 0xc0000000 0 0x20000000>; + interrupt-parent = <&mpic>; + /* err_irq bell_outb_irq bell_inb_irq + msg1_tx_irq msg1_rx_irq msg2_tx_irq msg2_rx_irq */ + interrupts = <48 2 49 2 50 2 53 2 54 2 55 2 56 2>; + }; }; diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index a20501f8947..88338a9f5e9 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -696,6 +696,7 @@ CONFIG_WINDFARM=y CONFIG_WINDFARM_PM81=y CONFIG_WINDFARM_PM91=y CONFIG_WINDFARM_PM112=y +CONFIG_WINDFARM_PM121=y # CONFIG_PMAC_RACKMETER is not set CONFIG_NETDEVICES=y # CONFIG_NETDEVICES_MULTIQUEUE is not set diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9177b21b1a9..d14cebf62bb 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -73,7 +73,6 @@ pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ pci-common.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_RAPIDIO) += rio.o obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ machine_kexec_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_AUDIT) += audit.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 62134845af0..ec9228d687b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -30,6 +30,7 @@ #include <linux/time.h> #include <linux/hardirq.h> #endif +#include <linux/kbuild.h> #include <asm/io.h> #include <asm/page.h> @@ -51,11 +52,6 @@ #include <asm/iseries/alpaca.h> #endif -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - int main(void) { DEFINE(THREAD, offsetof(struct task_struct, thread)); @@ -67,6 +63,7 @@ int main(void) #endif /* CONFIG_PPC64 */ DEFINE(KSP, offsetof(struct thread_struct, ksp)); + DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode)); DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 84c86863306..0c8614d9875 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -137,11 +137,12 @@ transfer_to_handler: 2: /* if from kernel, check interrupted DOZE/NAP mode and * check for stack overflow */ - lwz r9,THREAD_INFO-THREAD(r12) - cmplw r1,r9 /* if r1 <= current->thread_info */ + lwz r9,KSP_LIMIT(r12) + cmplw r1,r9 /* if r1 <= ksp_limit */ ble- stack_ovf /* then the kernel stack overflowed */ 5: #ifdef CONFIG_6xx + rlwinm r9,r1,0,0,31-THREAD_SHIFT tophys(r9,r9) /* check local flags */ lwz r12,TI_LOCAL_FLAGS(r9) mtcrf 0x01,r12 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 215973a2c8d..024805e1747 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -239,6 +239,10 @@ instruction_access_slb_pSeries: .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM +BEGIN_FTR_SECTION + cmpdi r0,0x1ebe + beq- 1f +END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) mr r9,r13 mfmsr r10 mfspr r13,SPRN_SPRG3 @@ -253,6 +257,13 @@ system_call_pSeries: rfid b . /* prevent speculative execution */ +/* Fast LE/BE switch system call */ +1: mfspr r12,SPRN_SRR1 + xori r12,r12,MSR_LE + mtspr SPRN_SRR1,r12 + rfid /* return to userspace */ + b . + STD_EXCEPTION_PSERIES(0xd00, single_step) STD_EXCEPTION_PSERIES(0xe00, trap_0e) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 425616f92d1..2f73f705d56 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -307,6 +307,7 @@ void do_IRQ(struct pt_regs *regs) if (curtp != irqtp) { struct irq_desc *desc = irq_desc + irq; void *handler = desc->handle_irq; + unsigned long saved_sp_limit = current->thread.ksp_limit; if (handler == NULL) handler = &__do_IRQ; irqtp->task = curtp->task; @@ -319,7 +320,10 @@ void do_IRQ(struct pt_regs *regs) (irqtp->preempt_count & ~SOFTIRQ_MASK) | (curtp->preempt_count & SOFTIRQ_MASK); + current->thread.ksp_limit = (unsigned long)irqtp + + _ALIGN_UP(sizeof(struct thread_info), 16); call_handle_irq(irq, desc, irqtp, handler); + current->thread.ksp_limit = saved_sp_limit; irqtp->task = NULL; @@ -352,9 +356,7 @@ void __init init_IRQ(void) { if (ppc_md.init_IRQ) ppc_md.init_IRQ(); -#ifdef CONFIG_PPC64 irq_ctx_init(); -#endif } @@ -383,11 +385,15 @@ void irq_ctx_init(void) static inline void do_softirq_onstack(void) { struct thread_info *curtp, *irqtp; + unsigned long saved_sp_limit = current->thread.ksp_limit; curtp = current_thread_info(); irqtp = softirq_ctx[smp_processor_id()]; irqtp->task = curtp->task; + current->thread.ksp_limit = (unsigned long)irqtp + + _ALIGN_UP(sizeof(struct thread_info), 16); call_do_softirq(irqtp); + current->thread.ksp_limit = saved_sp_limit; irqtp->task = NULL; } diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index 1ffacc698ff..1e656b43ad7 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -591,10 +591,8 @@ int __init lparcfg_init(void) !firmware_has_feature(FW_FEATURE_ISERIES)) mode |= S_IWUSR; - ent = create_proc_entry("ppc64/lparcfg", mode, NULL); - if (ent) { - ent->proc_fops = &lparcfg_fops; - } else { + ent = proc_create("ppc64/lparcfg", mode, NULL, &lparcfg_fops); + if (!ent) { printk(KERN_ERR "Failed to create ppc64/lparcfg\n"); return -EIO; } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 92ccc6fcc5b..89aaaa6f356 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -32,6 +32,31 @@ .text +#ifdef CONFIG_IRQSTACKS +_GLOBAL(call_do_softirq) + mflr r0 + stw r0,4(r1) + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) + mr r1,r3 + bl __do_softirq + lwz r1,0(r1) + lwz r0,4(r1) + mtlr r0 + blr + +_GLOBAL(call_handle_irq) + mflr r0 + stw r0,4(r1) + mtctr r6 + stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) + mr r1,r5 + bctrl + lwz r1,0(r1) + lwz r0,4(r1) + mtlr r0 + blr +#endif /* CONFIG_IRQSTACKS */ + /* * This returns the high 64 bits of the product of two 64-bit numbers. */ diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c index f78dfce1b77..c647ddef40d 100644 --- a/arch/powerpc/kernel/proc_ppc64.c +++ b/arch/powerpc/kernel/proc_ppc64.c @@ -68,12 +68,11 @@ static int __init proc_ppc64_init(void) { struct proc_dir_entry *pde; - pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL); + pde = proc_create_data("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL, + &page_map_fops, vdso_data); if (!pde) return 1; - pde->data = vdso_data; pde->size = PAGE_SIZE; - pde->proc_fops = &page_map_fops; return 0; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6caad17ea72..7de41c3948e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -589,6 +589,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, kregs = (struct pt_regs *) sp; sp -= STACK_FRAME_OVERHEAD; p->thread.ksp = sp; + p->thread.ksp_limit = (unsigned long)task_stack_page(p) + + _ALIGN_UP(sizeof(struct thread_info), 16); #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_SLB)) { diff --git a/arch/powerpc/kernel/rio.c b/arch/powerpc/kernel/rio.c deleted file mode 100644 index 29487fedfc7..00000000000 --- a/arch/powerpc/kernel/rio.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * RapidIO PPC32 support - * - * Copyright 2005 MontaVista Software, Inc. - * Matt Porter <mporter@kernel.crashing.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/rio.h> - -#include <asm/rio.h> - -/** - * platform_rio_init - Do platform specific RIO init - * - * Any platform specific initialization of RapdIO - * hardware is done here as well as registration - * of any active master ports in the system. - */ -void __attribute__ ((weak)) - platform_rio_init(void) -{ - printk(KERN_WARNING "RIO: No platform_rio_init() present\n"); -} - -/** - * ppc_rio_init - Do PPC32 RIO init - * - * Calls platform-specific RIO init code and then calls - * rio_init_mports() to initialize any master ports that - * have been registered with the RIO subsystem. - */ -static int __init ppc_rio_init(void) -{ - printk(KERN_INFO "RIO: RapidIO init\n"); - - /* Platform specific initialization */ - platform_rio_init(); - - /* Enumerate all registered ports */ - rio_init_mports(); - - return 0; -} - -subsys_initcall(ppc_rio_init); diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index f2e3bc714d7..f9c6abc84a9 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -255,8 +255,6 @@ static void check_location(struct seq_file *m, const char *c); static int __init proc_rtas_init(void) { - struct proc_dir_entry *entry; - if (!machine_is(pseries)) return -ENODEV; @@ -264,35 +262,20 @@ static int __init proc_rtas_init(void) if (rtas_node == NULL) return -ENODEV; - entry = create_proc_entry("ppc64/rtas/progress", S_IRUGO|S_IWUSR, NULL); - if (entry) - entry->proc_fops = &ppc_rtas_progress_operations; - - entry = create_proc_entry("ppc64/rtas/clock", S_IRUGO|S_IWUSR, NULL); - if (entry) - entry->proc_fops = &ppc_rtas_clock_operations; - - entry = create_proc_entry("ppc64/rtas/poweron", S_IWUSR|S_IRUGO, NULL); - if (entry) - entry->proc_fops = &ppc_rtas_poweron_operations; - - entry = create_proc_entry("ppc64/rtas/sensors", S_IRUGO, NULL); - if (entry) - entry->proc_fops = &ppc_rtas_sensors_operations; - - entry = create_proc_entry("ppc64/rtas/frequency", S_IWUSR|S_IRUGO, - NULL); - if (entry) - entry->proc_fops = &ppc_rtas_tone_freq_operations; - - entry = create_proc_entry("ppc64/rtas/volume", S_IWUSR|S_IRUGO, NULL); - if (entry) - entry->proc_fops = &ppc_rtas_tone_volume_operations; - - entry = create_proc_entry("ppc64/rtas/rmo_buffer", S_IRUSR, NULL); - if (entry) - entry->proc_fops = &ppc_rtas_rmo_buf_ops; - + proc_create("ppc64/rtas/progress", S_IRUGO|S_IWUSR, NULL, + &ppc_rtas_progress_operations); + proc_create("ppc64/rtas/clock", S_IRUGO|S_IWUSR, NULL, + &ppc_rtas_clock_operations); + proc_create("ppc64/rtas/poweron", S_IWUSR|S_IRUGO, NULL, + &ppc_rtas_poweron_operations); + proc_create("ppc64/rtas/sensors", S_IRUGO, NULL, + &ppc_rtas_sensors_operations); + proc_create("ppc64/rtas/frequency", S_IWUSR|S_IRUGO, NULL, + &ppc_rtas_tone_freq_operations); + proc_create("ppc64/rtas/volume", S_IWUSR|S_IRUGO, NULL, + &ppc_rtas_tone_volume_operations); + proc_create("ppc64/rtas/rmo_buffer", S_IRUSR, NULL, + &ppc_rtas_rmo_buf_ops); return 0; } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 627f126d184..0a5e22b2272 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -704,18 +704,11 @@ static int initialize_flash_pde_data(const char *rtas_call_name, static struct proc_dir_entry *create_flash_pde(const char *filename, const struct file_operations *fops) { - struct proc_dir_entry *ent = NULL; - - ent = create_proc_entry(filename, S_IRUSR | S_IWUSR, NULL); - if (ent != NULL) { - ent->proc_fops = fops; - ent->owner = THIS_MODULE; - } - - return ent; + return proc_create(filename, S_IRUSR | S_IWUSR, NULL, fops); } static const struct file_operations rtas_flash_operations = { + .owner = THIS_MODULE, .read = rtas_flash_read, .write = rtas_flash_write, .open = rtas_excl_open, @@ -723,6 +716,7 @@ static const struct file_operations rtas_flash_operations = { }; static const struct file_operations manage_flash_operations = { + .owner = THIS_MODULE, .read = manage_flash_read, .write = manage_flash_write, .open = rtas_excl_open, @@ -730,6 +724,7 @@ static const struct file_operations manage_flash_operations = { }; static const struct file_operations validate_flash_operations = { + .owner = THIS_MODULE, .read = validate_flash_read, .write = validate_flash_write, .open = rtas_excl_open, diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 36f6779c88d..5112a4aa801 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -16,6 +16,7 @@ #include <linux/root_dev.h> #include <linux/cpu.h> #include <linux/console.h> +#include <linux/lmb.h> #include <asm/io.h> #include <asm/prom.h> @@ -229,6 +230,24 @@ int __init ppc_init(void) arch_initcall(ppc_init); +#ifdef CONFIG_IRQSTACKS +static void __init irqstack_early_init(void) +{ + unsigned int i; + + /* interrupt stacks must be in lowmem, we get that for free on ppc32 + * as the lmb is limited to lowmem by LMB_REAL_LIMIT */ + for_each_possible_cpu(i) { + softirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + hardirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE)); + } +} +#else +#define irqstack_early_init() +#endif + /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { @@ -286,6 +305,8 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; + irqstack_early_init(); + /* set up the bootmem stuff with available memory */ do_init_bootmem(); if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d9e37f365b5..f67e118116f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -154,19 +154,35 @@ out: /* * walk_memory_resource() needs to make sure there is no holes in a given - * memory range. On PPC64, since this range comes from /sysfs, the range - * is guaranteed to be valid, non-overlapping and can not contain any - * holes. By the time we get here (memory add or remove), /proc/device-tree - * is updated and correct. Only reason we need to check against device-tree - * would be if we allow user-land to specify a memory range through a - * system call/ioctl etc. instead of doing offline/online through /sysfs. + * memory range. PPC64 does not maintain the memory layout in /proc/iomem. + * Instead it maintains it in lmb.memory structures. Walk through the + * memory regions, find holes and callback for contiguous regions. */ int walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) { - return (*func)(start_pfn, nr_pages, arg); + struct lmb_property res; + unsigned long pfn, len; + u64 end; + int ret = -1; + + res.base = (u64) start_pfn << PAGE_SHIFT; + res.size = (u64) nr_pages << PAGE_SHIFT; + + end = res.base + res.size - 1; + while ((res.base < end) && (lmb_find(&res) >= 0)) { + pfn = (unsigned long)(res.base >> PAGE_SHIFT); + len = (unsigned long)(res.size >> PAGE_SHIFT); + ret = (*func)(pfn, len, arg); + if (ret) + break; + res.base += (res.size + 1); + res.size = (end - res.base + 1); + } + return ret; } +EXPORT_SYMBOL_GPL(walk_memory_resource); #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig index 7442c58d44f..053f49a1dca 100644 --- a/arch/powerpc/platforms/86xx/Kconfig +++ b/arch/powerpc/platforms/86xx/Kconfig @@ -8,6 +8,7 @@ config MPC8641_HPCN select PPC_I8259 select DEFAULT_UIMAGE select FSL_ULI1575 + select HAS_RAPIDIO help This option enables support for the MPC8641 HPCN board. diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index f947f555fd4..f13704aabbe 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -221,6 +221,7 @@ mpc86xx_time_init(void) static __initdata struct of_device_id of_bus_ids[] = { { .compatible = "simple-bus", }, + { .compatible = "fsl,rapidio-delta", }, {}, }; diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 00528ef84ad..45dcd269350 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1063,10 +1063,9 @@ int __init spu_sched_init(void) mod_timer(&spuloadavg_timer, 0); - entry = create_proc_entry("spu_loadavg", 0, NULL); + entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops); if (!entry) goto out_stop_kthread; - entry->proc_fops = &spu_loadavg_fops; pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n", SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE); diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c index 79aa773f3c9..aea5286f124 100644 --- a/arch/powerpc/platforms/cell/spufs/sputrace.c +++ b/arch/powerpc/platforms/cell/spufs/sputrace.c @@ -201,10 +201,9 @@ static int __init sputrace_init(void) if (!sputrace_log) goto out; - entry = create_proc_entry("sputrace", S_IRUSR, NULL); + entry = proc_create("sputrace", S_IRUSR, NULL, &sputrace_fops); if (!entry) goto out_free_log; - entry->proc_fops = &sputrace_fops; for (i = 0; i < ARRAY_SIZE(spu_probes); i++) { struct spu_probe *p = &spu_probes[i]; diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c index e5b40e3e008..b0f8a857ec0 100644 --- a/arch/powerpc/platforms/iseries/lpevents.c +++ b/arch/powerpc/platforms/iseries/lpevents.c @@ -330,15 +330,11 @@ static const struct file_operations proc_lpevents_operations = { static int __init proc_lpevents_init(void) { - struct proc_dir_entry *e; - if (!firmware_has_feature(FW_FEATURE_ISERIES)) return 0; - e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL); - if (e) - e->proc_fops = &proc_lpevents_operations; - + proc_create("iSeries/lpevents", S_IFREG|S_IRUGO, NULL, + &proc_lpevents_operations); return 0; } __initcall(proc_lpevents_init); diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c index c0f2433bc16..1dc7295746d 100644 --- a/arch/powerpc/platforms/iseries/mf.c +++ b/arch/powerpc/platforms/iseries/mf.c @@ -1255,11 +1255,11 @@ static int __init mf_proc_init(void) if (i == 3) /* no vmlinux entry for 'D' */ continue; - ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf); + ent = proc_create_data("vmlinux", S_IFREG|S_IWUSR, mf, + &proc_vmlinux_operations, + (void *)(long)i); if (!ent) return 1; - ent->data = (void *)(long)i; - ent->proc_fops = &proc_vmlinux_operations; } ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root); diff --git a/arch/powerpc/platforms/iseries/proc.c b/arch/powerpc/platforms/iseries/proc.c index f2cde418020..91f4c6cd4b9 100644 --- a/arch/powerpc/platforms/iseries/proc.c +++ b/arch/powerpc/platforms/iseries/proc.c @@ -110,15 +110,11 @@ static const struct file_operations proc_titantod_operations = { static int __init iseries_proc_init(void) { - struct proc_dir_entry *e; - if (!firmware_has_feature(FW_FEATURE_ISERIES)) return 0; - e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL); - if (e) - e->proc_fops = &proc_titantod_operations; - + proc_create("iSeries/titanTod", S_IFREG|S_IRUGO, NULL, + &proc_titantod_operations); return 0; } __initcall(iseries_proc_init); diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c index df23331eb25..49ff4dc422b 100644 --- a/arch/powerpc/platforms/iseries/viopath.c +++ b/arch/powerpc/platforms/iseries/viopath.c @@ -180,15 +180,10 @@ static const struct file_operations proc_viopath_operations = { static int __init vio_proc_init(void) { - struct proc_dir_entry *e; - if (!firmware_has_feature(FW_FEATURE_ISERIES)) return 0; - e = create_proc_entry("iSeries/config", 0, NULL); - if (e) - e->proc_fops = &proc_viopath_operations; - + proc_create("iSeries/config", 0, NULL, &proc_viopath_operations); return 0; } __initcall(vio_proc_init); diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index 78093d7f97a..4d72c8f7215 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -6,7 +6,10 @@ obj-y += pic.o setup.o time.o feature.o pci.o \ obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq_32.o obj-$(CONFIG_CPU_FREQ_PMAC64) += cpufreq_64.o -obj-$(CONFIG_NVRAM) += nvram.o +# CONFIG_NVRAM is an arch. independant tristate symbol, for pmac32 we really +# need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really +# CONFIG_NVRAM=y +obj-$(CONFIG_NVRAM:m=y) += nvram.o # ppc64 pmac doesn't define CONFIG_NVRAM but needs nvram stuff obj-$(CONFIG_PPC64) += nvram.o obj-$(CONFIG_PPC32) += bootx_init.o diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index bf44c5441a3..00bd0166d07 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -337,7 +337,8 @@ static void __init pmac_setup_arch(void) find_via_pmu(); smu_init(); -#if defined(CONFIG_NVRAM) || defined(CONFIG_PPC64) +#if defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) || \ + defined(CONFIG_PPC64) pmac_nvram_init(); #endif diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index bd2593ed28d..554c6e42ef2 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_PCI) += pci.o pci_dlpar.o obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o +obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_HVCS) += hvcserver.o diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index a3fd56b186e..6f544ba4b37 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -1259,14 +1259,8 @@ static const struct file_operations proc_eeh_operations = { static int __init eeh_init_proc(void) { - struct proc_dir_entry *e; - - if (machine_is(pseries)) { - e = create_proc_entry("ppc64/eeh", 0, NULL); - if (e) - e->proc_fops = &proc_eeh_operations; - } - + if (machine_is(pseries)) + proc_create("ppc64/eeh", 0, NULL, &proc_eeh_operations); return 0; } __initcall(eeh_init_proc); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c new file mode 100644 index 00000000000..3c5727dd5aa --- /dev/null +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -0,0 +1,141 @@ +/* + * pseries Memory Hotplug infrastructure. + * + * Copyright (C) 2008 Badari Pulavarty, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/of.h> +#include <linux/lmb.h> +#include <asm/firmware.h> +#include <asm/machdep.h> +#include <asm/pSeries_reconfig.h> + +static int pseries_remove_memory(struct device_node *np) +{ + const char *type; + const unsigned int *my_index; + const unsigned int *regs; + u64 start_pfn, start; + struct zone *zone; + int ret = -EINVAL; + + /* + * Check to see if we are actually removing memory + */ + type = of_get_property(np, "device_type", NULL); + if (type == NULL || strcmp(type, "memory") != 0) + return 0; + + /* + * Find the memory index and size of the removing section + */ + my_index = of_get_property(np, "ibm,my-drc-index", NULL); + if (!my_index) + return ret; + + regs = of_get_property(np, "reg", NULL); + if (!regs) + return ret; + + start_pfn = section_nr_to_pfn(*my_index & 0xffff); + zone = page_zone(pfn_to_page(start_pfn)); + + /* + * Remove section mappings and sysfs entries for the + * section of the memory we are removing. + * + * NOTE: Ideally, this should be done in generic code like + * remove_memory(). But remove_memory() gets called by writing + * to sysfs "state" file and we can't remove sysfs entries + * while writing to it. So we have to defer it to here. + */ + ret = __remove_pages(zone, start_pfn, regs[3] >> PAGE_SHIFT); + if (ret) + return ret; + + /* + * Update memory regions for memory remove + */ + lmb_remove(start_pfn << PAGE_SHIFT, regs[3]); + + /* + * Remove htab bolted mappings for this section of memory + */ + start = (unsigned long)__va(start_pfn << PAGE_SHIFT); + ret = remove_section_mapping(start, start + regs[3]); + return ret; +} + +static int pseries_add_memory(struct device_node *np) +{ + const char *type; + const unsigned int *my_index; + const unsigned int *regs; + u64 start_pfn; + int ret = -EINVAL; + + /* + * Check to see if we are actually adding memory + */ + type = of_get_property(np, "device_type", NULL); + if (type == NULL || strcmp(type, "memory") != 0) + return 0; + + /* + * Find the memory index and size of the added section + */ + my_index = of_get_property(np, "ibm,my-drc-index", NULL); + if (!my_index) + return ret; + + regs = of_get_property(np, "reg", NULL); + if (!regs) + return ret; + + start_pfn = section_nr_to_pfn(*my_index & 0xffff); + + /* + * Update memory region to represent the memory add + */ + lmb_add(start_pfn << PAGE_SHIFT, regs[3]); + return 0; +} + +static int pseries_memory_notifier(struct notifier_block *nb, + unsigned long action, void *node) +{ + int err = NOTIFY_OK; + + switch (action) { + case PSERIES_RECONFIG_ADD: + if (pseries_add_memory(node)) + err = NOTIFY_BAD; + break; + case PSERIES_RECONFIG_REMOVE: + if (pseries_remove_memory(node)) + err = NOTIFY_BAD; + break; + default: + err = NOTIFY_DONE; + break; + } + return err; +} + +static struct notifier_block pseries_mem_nb = { + .notifier_call = pseries_memory_notifier, +}; + +static int __init pseries_memory_hotplug_init(void) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + pSeries_reconfig_notifier_register(&pseries_mem_nb); + + return 0; +} +machine_device_initcall(pseries, pseries_memory_hotplug_init); diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index ac75c10de27..75769aae41d 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -512,12 +512,9 @@ static int proc_ppc64_create_ofdt(void) if (!machine_is(pseries)) return 0; - ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL); - if (ent) { - ent->data = NULL; + ent = proc_create("ppc64/ofdt", S_IWUSR, NULL, &ofdt_fops); + if (ent) ent->size = 0; - ent->proc_fops = &ofdt_fops; - } return 0; } diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index befadd4f952..7d3e2b0bd4d 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -468,10 +468,9 @@ static int __init rtas_init(void) return -ENOMEM; } - entry = create_proc_entry("ppc64/rtas/error_log", S_IRUSR, NULL); - if (entry) - entry->proc_fops = &proc_rtas_log_operations; - else + entry = proc_create("ppc64/rtas/error_log", S_IRUSR, NULL, + &proc_rtas_log_operations); + if (!entry) printk(KERN_ERR "Failed to create error_log proc entry\n"); if (kernel_thread(rtasd, NULL, CLONE_FS) < 0) diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index af2425e4655..3d920376f58 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -1,5 +1,8 @@ /* - * MPC85xx RapidIO support + * Freescale MPC85xx/MPC86xx RapidIO support + * + * Copyright (C) 2007, 2008 Freescale Semiconductor, Inc. + * Zhang Wei <wei.zhang@freescale.com> * * Copyright 2005 MontaVista Software, Inc. * Matt Porter <mporter@kernel.crashing.org> @@ -17,12 +20,23 @@ #include <linux/interrupt.h> #include <linux/rio.h> #include <linux/rio_drv.h> +#include <linux/of_platform.h> +#include <linux/delay.h> #include <asm/io.h> -#define RIO_REGS_BASE (CCSRBAR + 0xc0000) +/* RapidIO definition irq, which read from OF-tree */ +#define IRQ_RIO_BELL(m) (((struct rio_priv *)(m->priv))->bellirq) +#define IRQ_RIO_TX(m) (((struct rio_priv *)(m->priv))->txirq) +#define IRQ_RIO_RX(m) (((struct rio_priv *)(m->priv))->rxirq) + #define RIO_ATMU_REGS_OFFSET 0x10c00 -#define RIO_MSG_REGS_OFFSET 0x11000 +#define RIO_P_MSG_REGS_OFFSET 0x11000 +#define RIO_S_MSG_REGS_OFFSET 0x13000 +#define RIO_ESCSR 0x158 +#define RIO_CCSR 0x15c +#define RIO_ISR_AACR 0x10120 +#define RIO_ISR_AACR_AA 0x1 /* Accept All ID */ #define RIO_MAINT_WIN_SIZE 0x400000 #define RIO_DBELL_WIN_SIZE 0x1000 @@ -50,18 +64,18 @@ #define DOORBELL_DSR_TE 0x00000080 #define DOORBELL_DSR_QFI 0x00000010 #define DOORBELL_DSR_DIQI 0x00000001 -#define DOORBELL_TID_OFFSET 0x03 -#define DOORBELL_SID_OFFSET 0x05 +#define DOORBELL_TID_OFFSET 0x02 +#define DOORBELL_SID_OFFSET 0x04 #define DOORBELL_INFO_OFFSET 0x06 #define DOORBELL_MESSAGE_SIZE 0x08 -#define DBELL_SID(x) (*(u8 *)(x + DOORBELL_SID_OFFSET)) -#define DBELL_TID(x) (*(u8 *)(x + DOORBELL_TID_OFFSET)) +#define DBELL_SID(x) (*(u16 *)(x + DOORBELL_SID_OFFSET)) +#define DBELL_TID(x) (*(u16 *)(x + DOORBELL_TID_OFFSET)) #define DBELL_INF(x) (*(u16 *)(x + DOORBELL_INFO_OFFSET)) struct rio_atmu_regs { u32 rowtar; - u32 pad1; + u32 rowtear; u32 rowbar; u32 pad2; u32 rowar; @@ -87,7 +101,15 @@ struct rio_msg_regs { u32 ifqdpar; u32 pad6; u32 ifqepar; - u32 pad7[250]; + u32 pad7[226]; + u32 odmr; + u32 odsr; + u32 res0[4]; + u32 oddpr; + u32 oddatr; + u32 res1[3]; + u32 odretcr; + u32 res2[12]; u32 dmr; u32 dsr; u32 pad8; @@ -112,20 +134,12 @@ struct rio_tx_desc { u32 res4; }; -static u32 regs_win; -static struct rio_atmu_regs *atmu_regs; -static struct rio_atmu_regs *maint_atmu_regs; -static struct rio_atmu_regs *dbell_atmu_regs; -static u32 dbell_win; -static u32 maint_win; -static struct rio_msg_regs *msg_regs; - -static struct rio_dbell_ring { +struct rio_dbell_ring { void *virt; dma_addr_t phys; -} dbell_ring; +}; -static struct rio_msg_tx_ring { +struct rio_msg_tx_ring { void *virt; dma_addr_t phys; void *virt_buffer[RIO_MAX_TX_RING_SIZE]; @@ -133,19 +147,35 @@ static struct rio_msg_tx_ring { int tx_slot; int size; void *dev_id; -} msg_tx_ring; +}; -static struct rio_msg_rx_ring { +struct rio_msg_rx_ring { void *virt; dma_addr_t phys; void *virt_buffer[RIO_MAX_RX_RING_SIZE]; int rx_slot; int size; void *dev_id; -} msg_rx_ring; +}; + +struct rio_priv { + void __iomem *regs_win; + struct rio_atmu_regs __iomem *atmu_regs; + struct rio_atmu_regs __iomem *maint_atmu_regs; + struct rio_atmu_regs __iomem *dbell_atmu_regs; + void __iomem *dbell_win; + void __iomem *maint_win; + struct rio_msg_regs __iomem *msg_regs; + struct rio_dbell_ring dbell_ring; + struct rio_msg_tx_ring msg_tx_ring; + struct rio_msg_rx_ring msg_rx_ring; + int bellirq; + int txirq; + int rxirq; +}; /** - * mpc85xx_rio_doorbell_send - Send a MPC85xx doorbell message + * fsl_rio_doorbell_send - Send a MPC85xx doorbell message * @index: ID of RapidIO interface * @destid: Destination ID of target device * @data: 16-bit info field of RapidIO doorbell message @@ -153,18 +183,34 @@ static struct rio_msg_rx_ring { * Sends a MPC85xx doorbell message. Returns %0 on success or * %-EINVAL on failure. */ -static int mpc85xx_rio_doorbell_send(int index, u16 destid, u16 data) +static int fsl_rio_doorbell_send(struct rio_mport *mport, + int index, u16 destid, u16 data) { - pr_debug("mpc85xx_doorbell_send: index %d destid %4.4x data %4.4x\n", + struct rio_priv *priv = mport->priv; + pr_debug("fsl_doorbell_send: index %d destid %4.4x data %4.4x\n", index, destid, data); - out_be32((void *)&dbell_atmu_regs->rowtar, destid << 22); - out_be16((void *)(dbell_win), data); + switch (mport->phy_type) { + case RIO_PHY_PARALLEL: + out_be32(&priv->dbell_atmu_regs->rowtar, destid << 22); + out_be16(priv->dbell_win, data); + break; + case RIO_PHY_SERIAL: + /* In the serial version silicons, such as MPC8548, MPC8641, + * below operations is must be. + */ + out_be32(&priv->msg_regs->odmr, 0x00000000); + out_be32(&priv->msg_regs->odretcr, 0x00000004); + out_be32(&priv->msg_regs->oddpr, destid << 16); + out_be32(&priv->msg_regs->oddatr, data); + out_be32(&priv->msg_regs->odmr, 0x00000001); + break; + } return 0; } /** - * mpc85xx_local_config_read - Generate a MPC85xx local config space read + * fsl_local_config_read - Generate a MPC85xx local config space read * @index: ID of RapdiIO interface * @offset: Offset into configuration space * @len: Length (in bytes) of the maintenance transaction @@ -173,17 +219,19 @@ static int mpc85xx_rio_doorbell_send(int index, u16 destid, u16 data) * Generates a MPC85xx local configuration space read. Returns %0 on * success or %-EINVAL on failure. */ -static int mpc85xx_local_config_read(int index, u32 offset, int len, u32 * data) +static int fsl_local_config_read(struct rio_mport *mport, + int index, u32 offset, int len, u32 *data) { - pr_debug("mpc85xx_local_config_read: index %d offset %8.8x\n", index, + struct rio_priv *priv = mport->priv; + pr_debug("fsl_local_config_read: index %d offset %8.8x\n", index, offset); - *data = in_be32((void *)(regs_win + offset)); + *data = in_be32(priv->regs_win + offset); return 0; } /** - * mpc85xx_local_config_write - Generate a MPC85xx local config space write + * fsl_local_config_write - Generate a MPC85xx local config space write * @index: ID of RapdiIO interface * @offset: Offset into configuration space * @len: Length (in bytes) of the maintenance transaction @@ -192,18 +240,20 @@ static int mpc85xx_local_config_read(int index, u32 offset, int len, u32 * data) * Generates a MPC85xx local configuration space write. Returns %0 on * success or %-EINVAL on failure. */ -static int mpc85xx_local_config_write(int index, u32 offset, int len, u32 data) +static int fsl_local_config_write(struct rio_mport *mport, + int index, u32 offset, int len, u32 data) { + struct rio_priv *priv = mport->priv; pr_debug - ("mpc85xx_local_config_write: index %d offset %8.8x data %8.8x\n", + ("fsl_local_config_write: index %d offset %8.8x data %8.8x\n", index, offset, data); - out_be32((void *)(regs_win + offset), data); + out_be32(priv->regs_win + offset, data); return 0; } /** - * mpc85xx_rio_config_read - Generate a MPC85xx read maintenance transaction + * fsl_rio_config_read - Generate a MPC85xx read maintenance transaction * @index: ID of RapdiIO interface * @destid: Destination ID of transaction * @hopcount: Number of hops to target device @@ -215,18 +265,19 @@ static int mpc85xx_local_config_write(int index, u32 offset, int len, u32 data) * success or %-EINVAL on failure. */ static int -mpc85xx_rio_config_read(int index, u16 destid, u8 hopcount, u32 offset, int len, - u32 * val) +fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 *val) { + struct rio_priv *priv = mport->priv; u8 *data; pr_debug - ("mpc85xx_rio_config_read: index %d destid %d hopcount %d offset %8.8x len %d\n", + ("fsl_rio_config_read: index %d destid %d hopcount %d offset %8.8x len %d\n", index, destid, hopcount, offset, len); - out_be32((void *)&maint_atmu_regs->rowtar, + out_be32(&priv->maint_atmu_regs->rowtar, (destid << 22) | (hopcount << 12) | ((offset & ~0x3) >> 9)); - data = (u8 *) maint_win + offset; + data = (u8 *) priv->maint_win + offset; switch (len) { case 1: *val = in_8((u8 *) data); @@ -243,7 +294,7 @@ mpc85xx_rio_config_read(int index, u16 destid, u8 hopcount, u32 offset, int len, } /** - * mpc85xx_rio_config_write - Generate a MPC85xx write maintenance transaction + * fsl_rio_config_write - Generate a MPC85xx write maintenance transaction * @index: ID of RapdiIO interface * @destid: Destination ID of transaction * @hopcount: Number of hops to target device @@ -255,17 +306,18 @@ mpc85xx_rio_config_read(int index, u16 destid, u8 hopcount, u32 offset, int len, * success or %-EINVAL on failure. */ static int -mpc85xx_rio_config_write(int index, u16 destid, u8 hopcount, u32 offset, - int len, u32 val) +fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 val) { + struct rio_priv *priv = mport->priv; u8 *data; pr_debug - ("mpc85xx_rio_config_write: index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n", + ("fsl_rio_config_write: index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n", index, destid, hopcount, offset, len, val); - out_be32((void *)&maint_atmu_regs->rowtar, + out_be32(&priv->maint_atmu_regs->rowtar, (destid << 22) | (hopcount << 12) | ((offset & ~0x3) >> 9)); - data = (u8 *) maint_win + offset; + data = (u8 *) priv->maint_win + offset; switch (len) { case 1: out_8((u8 *) data, val); @@ -296,9 +348,10 @@ int rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox, void *buffer, size_t len) { + struct rio_priv *priv = mport->priv; u32 omr; - struct rio_tx_desc *desc = - (struct rio_tx_desc *)msg_tx_ring.virt + msg_tx_ring.tx_slot; + struct rio_tx_desc *desc = (struct rio_tx_desc *)priv->msg_tx_ring.virt + + priv->msg_tx_ring.tx_slot; int ret = 0; pr_debug @@ -311,31 +364,43 @@ rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox, } /* Copy and clear rest of buffer */ - memcpy(msg_tx_ring.virt_buffer[msg_tx_ring.tx_slot], buffer, len); + memcpy(priv->msg_tx_ring.virt_buffer[priv->msg_tx_ring.tx_slot], buffer, + len); if (len < (RIO_MAX_MSG_SIZE - 4)) - memset((void *)((u32) msg_tx_ring. - virt_buffer[msg_tx_ring.tx_slot] + len), 0, - RIO_MAX_MSG_SIZE - len); + memset(priv->msg_tx_ring.virt_buffer[priv->msg_tx_ring.tx_slot] + + len, 0, RIO_MAX_MSG_SIZE - len); - /* Set mbox field for message */ - desc->dport = mbox & 0x3; + switch (mport->phy_type) { + case RIO_PHY_PARALLEL: + /* Set mbox field for message */ + desc->dport = mbox & 0x3; - /* Enable EOMI interrupt, set priority, and set destid */ - desc->dattr = 0x28000000 | (rdev->destid << 2); + /* Enable EOMI interrupt, set priority, and set destid */ + desc->dattr = 0x28000000 | (rdev->destid << 2); + break; + case RIO_PHY_SERIAL: + /* Set mbox field for message, and set destid */ + desc->dport = (rdev->destid << 16) | (mbox & 0x3); + + /* Enable EOMI interrupt and priority */ + desc->dattr = 0x28000000; + break; + } /* Set transfer size aligned to next power of 2 (in double words) */ desc->dwcnt = is_power_of_2(len) ? len : 1 << get_bitmask_order(len); /* Set snooping and source buffer address */ - desc->saddr = 0x00000004 | msg_tx_ring.phys_buffer[msg_tx_ring.tx_slot]; + desc->saddr = 0x00000004 + | priv->msg_tx_ring.phys_buffer[priv->msg_tx_ring.tx_slot]; /* Increment enqueue pointer */ - omr = in_be32((void *)&msg_regs->omr); - out_be32((void *)&msg_regs->omr, omr | RIO_MSG_OMR_MUI); + omr = in_be32(&priv->msg_regs->omr); + out_be32(&priv->msg_regs->omr, omr | RIO_MSG_OMR_MUI); /* Go to next descriptor */ - if (++msg_tx_ring.tx_slot == msg_tx_ring.size) - msg_tx_ring.tx_slot = 0; + if (++priv->msg_tx_ring.tx_slot == priv->msg_tx_ring.size) + priv->msg_tx_ring.tx_slot = 0; out: return ret; @@ -344,7 +409,7 @@ rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox, EXPORT_SYMBOL_GPL(rio_hw_add_outb_message); /** - * mpc85xx_rio_tx_handler - MPC85xx outbound message interrupt handler + * fsl_rio_tx_handler - MPC85xx outbound message interrupt handler * @irq: Linux interrupt number * @dev_instance: Pointer to interrupt-specific data * @@ -352,32 +417,34 @@ EXPORT_SYMBOL_GPL(rio_hw_add_outb_message); * mailbox event handler and acks the interrupt occurrence. */ static irqreturn_t -mpc85xx_rio_tx_handler(int irq, void *dev_instance) +fsl_rio_tx_handler(int irq, void *dev_instance) { int osr; struct rio_mport *port = (struct rio_mport *)dev_instance; + struct rio_priv *priv = port->priv; - osr = in_be32((void *)&msg_regs->osr); + osr = in_be32(&priv->msg_regs->osr); if (osr & RIO_MSG_OSR_TE) { pr_info("RIO: outbound message transmission error\n"); - out_be32((void *)&msg_regs->osr, RIO_MSG_OSR_TE); + out_be32(&priv->msg_regs->osr, RIO_MSG_OSR_TE); goto out; } if (osr & RIO_MSG_OSR_QOI) { pr_info("RIO: outbound message queue overflow\n"); - out_be32((void *)&msg_regs->osr, RIO_MSG_OSR_QOI); + out_be32(&priv->msg_regs->osr, RIO_MSG_OSR_QOI); goto out; } if (osr & RIO_MSG_OSR_EOMI) { - u32 dqp = in_be32((void *)&msg_regs->odqdpar); - int slot = (dqp - msg_tx_ring.phys) >> 5; - port->outb_msg[0].mcback(port, msg_tx_ring.dev_id, -1, slot); + u32 dqp = in_be32(&priv->msg_regs->odqdpar); + int slot = (dqp - priv->msg_tx_ring.phys) >> 5; + port->outb_msg[0].mcback(port, priv->msg_tx_ring.dev_id, -1, + slot); /* Ack the end-of-message interrupt */ - out_be32((void *)&msg_regs->osr, RIO_MSG_OSR_EOMI); + out_be32(&priv->msg_regs->osr, RIO_MSG_OSR_EOMI); } out: @@ -398,6 +465,7 @@ mpc85xx_rio_tx_handler(int irq, void *dev_instance) int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries) { int i, j, rc = 0; + struct rio_priv *priv = mport->priv; if ((entries < RIO_MIN_TX_RING_SIZE) || (entries > RIO_MAX_TX_RING_SIZE) || (!is_power_of_2(entries))) { @@ -406,54 +474,53 @@ int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entr } /* Initialize shadow copy ring */ - msg_tx_ring.dev_id = dev_id; - msg_tx_ring.size = entries; - - for (i = 0; i < msg_tx_ring.size; i++) { - if (! - (msg_tx_ring.virt_buffer[i] = - dma_alloc_coherent(NULL, RIO_MSG_BUFFER_SIZE, - &msg_tx_ring.phys_buffer[i], - GFP_KERNEL))) { + priv->msg_tx_ring.dev_id = dev_id; + priv->msg_tx_ring.size = entries; + + for (i = 0; i < priv->msg_tx_ring.size; i++) { + priv->msg_tx_ring.virt_buffer[i] = + dma_alloc_coherent(NULL, RIO_MSG_BUFFER_SIZE, + &priv->msg_tx_ring.phys_buffer[i], GFP_KERNEL); + if (!priv->msg_tx_ring.virt_buffer[i]) { rc = -ENOMEM; - for (j = 0; j < msg_tx_ring.size; j++) - if (msg_tx_ring.virt_buffer[j]) + for (j = 0; j < priv->msg_tx_ring.size; j++) + if (priv->msg_tx_ring.virt_buffer[j]) dma_free_coherent(NULL, - RIO_MSG_BUFFER_SIZE, - msg_tx_ring. - virt_buffer[j], - msg_tx_ring. - phys_buffer[j]); + RIO_MSG_BUFFER_SIZE, + priv->msg_tx_ring. + virt_buffer[j], + priv->msg_tx_ring. + phys_buffer[j]); goto out; } } /* Initialize outbound message descriptor ring */ - if (!(msg_tx_ring.virt = dma_alloc_coherent(NULL, - msg_tx_ring.size * - RIO_MSG_DESC_SIZE, - &msg_tx_ring.phys, - GFP_KERNEL))) { + priv->msg_tx_ring.virt = dma_alloc_coherent(NULL, + priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE, + &priv->msg_tx_ring.phys, GFP_KERNEL); + if (!priv->msg_tx_ring.virt) { rc = -ENOMEM; goto out_dma; } - memset(msg_tx_ring.virt, 0, msg_tx_ring.size * RIO_MSG_DESC_SIZE); - msg_tx_ring.tx_slot = 0; + memset(priv->msg_tx_ring.virt, 0, + priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE); + priv->msg_tx_ring.tx_slot = 0; /* Point dequeue/enqueue pointers at first entry in ring */ - out_be32((void *)&msg_regs->odqdpar, msg_tx_ring.phys); - out_be32((void *)&msg_regs->odqepar, msg_tx_ring.phys); + out_be32(&priv->msg_regs->odqdpar, priv->msg_tx_ring.phys); + out_be32(&priv->msg_regs->odqepar, priv->msg_tx_ring.phys); /* Configure for snooping */ - out_be32((void *)&msg_regs->osar, 0x00000004); + out_be32(&priv->msg_regs->osar, 0x00000004); /* Clear interrupt status */ - out_be32((void *)&msg_regs->osr, 0x000000b3); + out_be32(&priv->msg_regs->osr, 0x000000b3); /* Hook up outbound message handler */ - if ((rc = - request_irq(MPC85xx_IRQ_RIO_TX, mpc85xx_rio_tx_handler, 0, - "msg_tx", (void *)mport)) < 0) + rc = request_irq(IRQ_RIO_TX(mport), fsl_rio_tx_handler, 0, + "msg_tx", (void *)mport); + if (rc < 0) goto out_irq; /* @@ -463,28 +530,28 @@ int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entr * Chaining mode * Disable */ - out_be32((void *)&msg_regs->omr, 0x00100220); + out_be32(&priv->msg_regs->omr, 0x00100220); /* Set number of entries */ - out_be32((void *)&msg_regs->omr, - in_be32((void *)&msg_regs->omr) | + out_be32(&priv->msg_regs->omr, + in_be32(&priv->msg_regs->omr) | ((get_bitmask_order(entries) - 2) << 12)); /* Now enable the unit */ - out_be32((void *)&msg_regs->omr, in_be32((void *)&msg_regs->omr) | 0x1); + out_be32(&priv->msg_regs->omr, in_be32(&priv->msg_regs->omr) | 0x1); out: return rc; out_irq: - dma_free_coherent(NULL, msg_tx_ring.size * RIO_MSG_DESC_SIZE, - msg_tx_ring.virt, msg_tx_ring.phys); + dma_free_coherent(NULL, priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE, + priv->msg_tx_ring.virt, priv->msg_tx_ring.phys); out_dma: - for (i = 0; i < msg_tx_ring.size; i++) + for (i = 0; i < priv->msg_tx_ring.size; i++) dma_free_coherent(NULL, RIO_MSG_BUFFER_SIZE, - msg_tx_ring.virt_buffer[i], - msg_tx_ring.phys_buffer[i]); + priv->msg_tx_ring.virt_buffer[i], + priv->msg_tx_ring.phys_buffer[i]); return rc; } @@ -499,19 +566,20 @@ int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entr */ void rio_close_outb_mbox(struct rio_mport *mport, int mbox) { + struct rio_priv *priv = mport->priv; /* Disable inbound message unit */ - out_be32((void *)&msg_regs->omr, 0); + out_be32(&priv->msg_regs->omr, 0); /* Free ring */ - dma_free_coherent(NULL, msg_tx_ring.size * RIO_MSG_DESC_SIZE, - msg_tx_ring.virt, msg_tx_ring.phys); + dma_free_coherent(NULL, priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE, + priv->msg_tx_ring.virt, priv->msg_tx_ring.phys); /* Free interrupt */ - free_irq(MPC85xx_IRQ_RIO_TX, (void *)mport); + free_irq(IRQ_RIO_TX(mport), (void *)mport); } /** - * mpc85xx_rio_rx_handler - MPC85xx inbound message interrupt handler + * fsl_rio_rx_handler - MPC85xx inbound message interrupt handler * @irq: Linux interrupt number * @dev_instance: Pointer to interrupt-specific data * @@ -519,16 +587,17 @@ void rio_close_outb_mbox(struct rio_mport *mport, int mbox) * mailbox event handler and acks the interrupt occurrence. */ static irqreturn_t -mpc85xx_rio_rx_handler(int irq, void *dev_instance) +fsl_rio_rx_handler(int irq, void *dev_instance) { int isr; struct rio_mport *port = (struct rio_mport *)dev_instance; + struct rio_priv *priv = port->priv; - isr = in_be32((void *)&msg_regs->isr); + isr = in_be32(&priv->msg_regs->isr); if (isr & RIO_MSG_ISR_TE) { pr_info("RIO: inbound message reception error\n"); - out_be32((void *)&msg_regs->isr, RIO_MSG_ISR_TE); + out_be32((void *)&priv->msg_regs->isr, RIO_MSG_ISR_TE); goto out; } @@ -540,10 +609,10 @@ mpc85xx_rio_rx_handler(int irq, void *dev_instance) * make the callback with an unknown/invalid mailbox number * argument. */ - port->inb_msg[0].mcback(port, msg_rx_ring.dev_id, -1, -1); + port->inb_msg[0].mcback(port, priv->msg_rx_ring.dev_id, -1, -1); /* Ack the queueing interrupt */ - out_be32((void *)&msg_regs->isr, RIO_MSG_ISR_DIQI); + out_be32(&priv->msg_regs->isr, RIO_MSG_ISR_DIQI); } out: @@ -564,6 +633,7 @@ mpc85xx_rio_rx_handler(int irq, void *dev_instance) int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries) { int i, rc = 0; + struct rio_priv *priv = mport->priv; if ((entries < RIO_MIN_RX_RING_SIZE) || (entries > RIO_MAX_RX_RING_SIZE) || (!is_power_of_2(entries))) { @@ -572,36 +642,35 @@ int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entri } /* Initialize client buffer ring */ - msg_rx_ring.dev_id = dev_id; - msg_rx_ring.size = entries; - msg_rx_ring.rx_slot = 0; - for (i = 0; i < msg_rx_ring.size; i++) - msg_rx_ring.virt_buffer[i] = NULL; + priv->msg_rx_ring.dev_id = dev_id; + priv->msg_rx_ring.size = entries; + priv->msg_rx_ring.rx_slot = 0; + for (i = 0; i < priv->msg_rx_ring.size; i++) + priv->msg_rx_ring.virt_buffer[i] = NULL; /* Initialize inbound message ring */ - if (!(msg_rx_ring.virt = dma_alloc_coherent(NULL, - msg_rx_ring.size * - RIO_MAX_MSG_SIZE, - &msg_rx_ring.phys, - GFP_KERNEL))) { + priv->msg_rx_ring.virt = dma_alloc_coherent(NULL, + priv->msg_rx_ring.size * RIO_MAX_MSG_SIZE, + &priv->msg_rx_ring.phys, GFP_KERNEL); + if (!priv->msg_rx_ring.virt) { rc = -ENOMEM; goto out; } /* Point dequeue/enqueue pointers at first entry in ring */ - out_be32((void *)&msg_regs->ifqdpar, (u32) msg_rx_ring.phys); - out_be32((void *)&msg_regs->ifqepar, (u32) msg_rx_ring.phys); + out_be32(&priv->msg_regs->ifqdpar, (u32) priv->msg_rx_ring.phys); + out_be32(&priv->msg_regs->ifqepar, (u32) priv->msg_rx_ring.phys); /* Clear interrupt status */ - out_be32((void *)&msg_regs->isr, 0x00000091); + out_be32(&priv->msg_regs->isr, 0x00000091); /* Hook up inbound message handler */ - if ((rc = - request_irq(MPC85xx_IRQ_RIO_RX, mpc85xx_rio_rx_handler, 0, - "msg_rx", (void *)mport)) < 0) { + rc = request_irq(IRQ_RIO_RX(mport), fsl_rio_rx_handler, 0, + "msg_rx", (void *)mport); + if (rc < 0) { dma_free_coherent(NULL, RIO_MSG_BUFFER_SIZE, - msg_tx_ring.virt_buffer[i], - msg_tx_ring.phys_buffer[i]); + priv->msg_tx_ring.virt_buffer[i], + priv->msg_tx_ring.phys_buffer[i]); goto out; } @@ -612,15 +681,13 @@ int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entri * Unmask all interrupt sources * Disable */ - out_be32((void *)&msg_regs->imr, 0x001b0060); + out_be32(&priv->msg_regs->imr, 0x001b0060); /* Set number of queue entries */ - out_be32((void *)&msg_regs->imr, - in_be32((void *)&msg_regs->imr) | - ((get_bitmask_order(entries) - 2) << 12)); + setbits32(&priv->msg_regs->imr, (get_bitmask_order(entries) - 2) << 12); /* Now enable the unit */ - out_be32((void *)&msg_regs->imr, in_be32((void *)&msg_regs->imr) | 0x1); + setbits32(&priv->msg_regs->imr, 0x1); out: return rc; @@ -636,15 +703,16 @@ int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entri */ void rio_close_inb_mbox(struct rio_mport *mport, int mbox) { + struct rio_priv *priv = mport->priv; /* Disable inbound message unit */ - out_be32((void *)&msg_regs->imr, 0); + out_be32(&priv->msg_regs->imr, 0); /* Free ring */ - dma_free_coherent(NULL, msg_rx_ring.size * RIO_MAX_MSG_SIZE, - msg_rx_ring.virt, msg_rx_ring.phys); + dma_free_coherent(NULL, priv->msg_rx_ring.size * RIO_MAX_MSG_SIZE, + priv->msg_rx_ring.virt, priv->msg_rx_ring.phys); /* Free interrupt */ - free_irq(MPC85xx_IRQ_RIO_RX, (void *)mport); + free_irq(IRQ_RIO_RX(mport), (void *)mport); } /** @@ -659,21 +727,22 @@ void rio_close_inb_mbox(struct rio_mport *mport, int mbox) int rio_hw_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf) { int rc = 0; + struct rio_priv *priv = mport->priv; pr_debug("RIO: rio_hw_add_inb_buffer(), msg_rx_ring.rx_slot %d\n", - msg_rx_ring.rx_slot); + priv->msg_rx_ring.rx_slot); - if (msg_rx_ring.virt_buffer[msg_rx_ring.rx_slot]) { + if (priv->msg_rx_ring.virt_buffer[priv->msg_rx_ring.rx_slot]) { printk(KERN_ERR "RIO: error adding inbound buffer %d, buffer exists\n", - msg_rx_ring.rx_slot); + priv->msg_rx_ring.rx_slot); rc = -EINVAL; goto out; } - msg_rx_ring.virt_buffer[msg_rx_ring.rx_slot] = buf; - if (++msg_rx_ring.rx_slot == msg_rx_ring.size) - msg_rx_ring.rx_slot = 0; + priv->msg_rx_ring.virt_buffer[priv->msg_rx_ring.rx_slot] = buf; + if (++priv->msg_rx_ring.rx_slot == priv->msg_rx_ring.size) + priv->msg_rx_ring.rx_slot = 0; out: return rc; @@ -691,20 +760,21 @@ EXPORT_SYMBOL_GPL(rio_hw_add_inb_buffer); */ void *rio_hw_get_inb_message(struct rio_mport *mport, int mbox) { - u32 imr; + struct rio_priv *priv = mport->priv; u32 phys_buf, virt_buf; void *buf = NULL; int buf_idx; - phys_buf = in_be32((void *)&msg_regs->ifqdpar); + phys_buf = in_be32(&priv->msg_regs->ifqdpar); /* If no more messages, then bail out */ - if (phys_buf == in_be32((void *)&msg_regs->ifqepar)) + if (phys_buf == in_be32(&priv->msg_regs->ifqepar)) goto out2; - virt_buf = (u32) msg_rx_ring.virt + (phys_buf - msg_rx_ring.phys); - buf_idx = (phys_buf - msg_rx_ring.phys) / RIO_MAX_MSG_SIZE; - buf = msg_rx_ring.virt_buffer[buf_idx]; + virt_buf = (u32) priv->msg_rx_ring.virt + (phys_buf + - priv->msg_rx_ring.phys); + buf_idx = (phys_buf - priv->msg_rx_ring.phys) / RIO_MAX_MSG_SIZE; + buf = priv->msg_rx_ring.virt_buffer[buf_idx]; if (!buf) { printk(KERN_ERR @@ -716,11 +786,10 @@ void *rio_hw_get_inb_message(struct rio_mport *mport, int mbox) memcpy(buf, (void *)virt_buf, RIO_MAX_MSG_SIZE); /* Clear the available buffer */ - msg_rx_ring.virt_buffer[buf_idx] = NULL; + priv->msg_rx_ring.virt_buffer[buf_idx] = NULL; out1: - imr = in_be32((void *)&msg_regs->imr); - out_be32((void *)&msg_regs->imr, imr | RIO_MSG_IMR_MI); + setbits32(&priv->msg_regs->imr, RIO_MSG_IMR_MI); out2: return buf; @@ -729,7 +798,7 @@ void *rio_hw_get_inb_message(struct rio_mport *mport, int mbox) EXPORT_SYMBOL_GPL(rio_hw_get_inb_message); /** - * mpc85xx_rio_dbell_handler - MPC85xx doorbell interrupt handler + * fsl_rio_dbell_handler - MPC85xx doorbell interrupt handler * @irq: Linux interrupt number * @dev_instance: Pointer to interrupt-specific data * @@ -737,31 +806,31 @@ EXPORT_SYMBOL_GPL(rio_hw_get_inb_message); * doorbell event handlers and executes a matching event handler. */ static irqreturn_t -mpc85xx_rio_dbell_handler(int irq, void *dev_instance) +fsl_rio_dbell_handler(int irq, void *dev_instance) { int dsr; struct rio_mport *port = (struct rio_mport *)dev_instance; + struct rio_priv *priv = port->priv; - dsr = in_be32((void *)&msg_regs->dsr); + dsr = in_be32(&priv->msg_regs->dsr); if (dsr & DOORBELL_DSR_TE) { pr_info("RIO: doorbell reception error\n"); - out_be32((void *)&msg_regs->dsr, DOORBELL_DSR_TE); + out_be32(&priv->msg_regs->dsr, DOORBELL_DSR_TE); goto out; } if (dsr & DOORBELL_DSR_QFI) { pr_info("RIO: doorbell queue full\n"); - out_be32((void *)&msg_regs->dsr, DOORBELL_DSR_QFI); + out_be32(&priv->msg_regs->dsr, DOORBELL_DSR_QFI); goto out; } /* XXX Need to check/dispatch until queue empty */ if (dsr & DOORBELL_DSR_DIQI) { u32 dmsg = - (u32) dbell_ring.virt + - (in_be32((void *)&msg_regs->dqdpar) & 0xfff); - u32 dmr; + (u32) priv->dbell_ring.virt + + (in_be32(&priv->msg_regs->dqdpar) & 0xfff); struct rio_dbell *dbell; int found = 0; @@ -784,9 +853,8 @@ mpc85xx_rio_dbell_handler(int irq, void *dev_instance) ("RIO: spurious doorbell, sid %2.2x tid %2.2x info %4.4x\n", DBELL_SID(dmsg), DBELL_TID(dmsg), DBELL_INF(dmsg)); } - dmr = in_be32((void *)&msg_regs->dmr); - out_be32((void *)&msg_regs->dmr, dmr | DOORBELL_DMR_DI); - out_be32((void *)&msg_regs->dsr, DOORBELL_DSR_DIQI); + setbits32(&priv->msg_regs->dmr, DOORBELL_DMR_DI); + out_be32(&priv->msg_regs->dsr, DOORBELL_DSR_DIQI); } out: @@ -794,21 +862,22 @@ mpc85xx_rio_dbell_handler(int irq, void *dev_instance) } /** - * mpc85xx_rio_doorbell_init - MPC85xx doorbell interface init + * fsl_rio_doorbell_init - MPC85xx doorbell interface init * @mport: Master port implementing the inbound doorbell unit * * Initializes doorbell unit hardware and inbound DMA buffer - * ring. Called from mpc85xx_rio_setup(). Returns %0 on success + * ring. Called from fsl_rio_setup(). Returns %0 on success * or %-ENOMEM on failure. */ -static int mpc85xx_rio_doorbell_init(struct rio_mport *mport) +static int fsl_rio_doorbell_init(struct rio_mport *mport) { + struct rio_priv *priv = mport->priv; int rc = 0; /* Map outbound doorbell window immediately after maintenance window */ - if (!(dbell_win = - (u32) ioremap(mport->iores.start + RIO_MAINT_WIN_SIZE, - RIO_DBELL_WIN_SIZE))) { + priv->dbell_win = ioremap(mport->iores.start + RIO_MAINT_WIN_SIZE, + RIO_DBELL_WIN_SIZE); + if (!priv->dbell_win) { printk(KERN_ERR "RIO: unable to map outbound doorbell window\n"); rc = -ENOMEM; @@ -816,37 +885,36 @@ static int mpc85xx_rio_doorbell_init(struct rio_mport *mport) } /* Initialize inbound doorbells */ - if (!(dbell_ring.virt = dma_alloc_coherent(NULL, - 512 * DOORBELL_MESSAGE_SIZE, - &dbell_ring.phys, - GFP_KERNEL))) { + priv->dbell_ring.virt = dma_alloc_coherent(NULL, 512 * + DOORBELL_MESSAGE_SIZE, &priv->dbell_ring.phys, GFP_KERNEL); + if (!priv->dbell_ring.virt) { printk(KERN_ERR "RIO: unable allocate inbound doorbell ring\n"); rc = -ENOMEM; - iounmap((void *)dbell_win); + iounmap(priv->dbell_win); goto out; } /* Point dequeue/enqueue pointers at first entry in ring */ - out_be32((void *)&msg_regs->dqdpar, (u32) dbell_ring.phys); - out_be32((void *)&msg_regs->dqepar, (u32) dbell_ring.phys); + out_be32(&priv->msg_regs->dqdpar, (u32) priv->dbell_ring.phys); + out_be32(&priv->msg_regs->dqepar, (u32) priv->dbell_ring.phys); /* Clear interrupt status */ - out_be32((void *)&msg_regs->dsr, 0x00000091); + out_be32(&priv->msg_regs->dsr, 0x00000091); /* Hook up doorbell handler */ - if ((rc = - request_irq(MPC85xx_IRQ_RIO_BELL, mpc85xx_rio_dbell_handler, 0, - "dbell_rx", (void *)mport) < 0)) { - iounmap((void *)dbell_win); + rc = request_irq(IRQ_RIO_BELL(mport), fsl_rio_dbell_handler, 0, + "dbell_rx", (void *)mport); + if (rc < 0) { + iounmap(priv->dbell_win); dma_free_coherent(NULL, 512 * DOORBELL_MESSAGE_SIZE, - dbell_ring.virt, dbell_ring.phys); + priv->dbell_ring.virt, priv->dbell_ring.phys); printk(KERN_ERR "MPC85xx RIO: unable to request inbound doorbell irq"); goto out; } /* Configure doorbells for snooping, 512 entries, and enable */ - out_be32((void *)&msg_regs->dmr, 0x00108161); + out_be32(&priv->msg_regs->dmr, 0x00108161); out: return rc; @@ -854,7 +922,7 @@ static int mpc85xx_rio_doorbell_init(struct rio_mport *mport) static char *cmdline = NULL; -static int mpc85xx_rio_get_hdid(int index) +static int fsl_rio_get_hdid(int index) { /* XXX Need to parse multiple entries in some format */ if (!cmdline) @@ -863,7 +931,7 @@ static int mpc85xx_rio_get_hdid(int index) return simple_strtol(cmdline, NULL, 0); } -static int mpc85xx_rio_get_cmdline(char *s) +static int fsl_rio_get_cmdline(char *s) { if (!s) return 0; @@ -872,61 +940,266 @@ static int mpc85xx_rio_get_cmdline(char *s) return 1; } -__setup("riohdid=", mpc85xx_rio_get_cmdline); +__setup("riohdid=", fsl_rio_get_cmdline); + +static inline void fsl_rio_info(struct device *dev, u32 ccsr) +{ + const char *str; + if (ccsr & 1) { + /* Serial phy */ + switch (ccsr >> 30) { + case 0: + str = "1"; + break; + case 1: + str = "4"; + break; + default: + str = "Unknown"; + break;; + } + dev_info(dev, "Hardware port width: %s\n", str); + + switch ((ccsr >> 27) & 7) { + case 0: + str = "Single-lane 0"; + break; + case 1: + str = "Single-lane 2"; + break; + case 2: + str = "Four-lane"; + break; + default: + str = "Unknown"; + break; + } + dev_info(dev, "Training connection status: %s\n", str); + } else { + /* Parallel phy */ + if (!(ccsr & 0x80000000)) + dev_info(dev, "Output port operating in 8-bit mode\n"); + if (!(ccsr & 0x08000000)) + dev_info(dev, "Input port operating in 8-bit mode\n"); + } +} /** - * mpc85xx_rio_setup - Setup MPC85xx RapidIO interface - * @law_start: Starting physical address of RapidIO LAW - * @law_size: Size of RapidIO LAW + * fsl_rio_setup - Setup MPC85xx RapidIO interface + * @fsl_rio_setup - Setup Freescale PowerPC RapidIO interface * * Initializes MPC85xx RapidIO hardware interface, configures * master port with system-specific info, and registers the * master port with the RapidIO subsystem. */ -void mpc85xx_rio_setup(int law_start, int law_size) +int fsl_rio_setup(struct of_device *dev) { struct rio_ops *ops; struct rio_mport *port; + struct rio_priv *priv; + int rc = 0; + const u32 *dt_range, *cell; + struct resource regs; + int rlen; + u32 ccsr; + u64 law_start, law_size; + int paw, aw, sw; + + if (!dev->node) { + dev_err(&dev->dev, "Device OF-Node is NULL"); + return -EFAULT; + } + + rc = of_address_to_resource(dev->node, 0, ®s); + if (rc) { + dev_err(&dev->dev, "Can't get %s property 'reg'\n", + dev->node->full_name); + return -EFAULT; + } + dev_info(&dev->dev, "Of-device full name %s\n", dev->node->full_name); + dev_info(&dev->dev, "Regs start 0x%08x size 0x%08x\n", regs.start, + regs.end - regs.start + 1); + + dt_range = of_get_property(dev->node, "ranges", &rlen); + if (!dt_range) { + dev_err(&dev->dev, "Can't get %s property 'ranges'\n", + dev->node->full_name); + return -EFAULT; + } + + /* Get node address wide */ + cell = of_get_property(dev->node, "#address-cells", NULL); + if (cell) + aw = *cell; + else + aw = of_n_addr_cells(dev->node); + /* Get node size wide */ + cell = of_get_property(dev->node, "#size-cells", NULL); + if (cell) + sw = *cell; + else + sw = of_n_size_cells(dev->node); + /* Get parent address wide wide */ + paw = of_n_addr_cells(dev->node); + + law_start = of_read_number(dt_range + aw, paw); + law_size = of_read_number(dt_range + aw + paw, sw); + + dev_info(&dev->dev, "LAW start 0x%016llx, size 0x%016llx.\n", + law_start, law_size); ops = kmalloc(sizeof(struct rio_ops), GFP_KERNEL); - ops->lcread = mpc85xx_local_config_read; - ops->lcwrite = mpc85xx_local_config_write; - ops->cread = mpc85xx_rio_config_read; - ops->cwrite = mpc85xx_rio_config_write; - ops->dsend = mpc85xx_rio_doorbell_send; + ops->lcread = fsl_local_config_read; + ops->lcwrite = fsl_local_config_write; + ops->cread = fsl_rio_config_read; + ops->cwrite = fsl_rio_config_write; + ops->dsend = fsl_rio_doorbell_send; - port = kmalloc(sizeof(struct rio_mport), GFP_KERNEL); + port = kzalloc(sizeof(struct rio_mport), GFP_KERNEL); port->id = 0; port->index = 0; + + priv = kzalloc(sizeof(struct rio_priv), GFP_KERNEL); + if (!priv) { + printk(KERN_ERR "Can't alloc memory for 'priv'\n"); + rc = -ENOMEM; + goto err; + } + INIT_LIST_HEAD(&port->dbells); port->iores.start = law_start; port->iores.end = law_start + law_size; port->iores.flags = IORESOURCE_MEM; + priv->bellirq = irq_of_parse_and_map(dev->node, 2); + priv->txirq = irq_of_parse_and_map(dev->node, 3); + priv->rxirq = irq_of_parse_and_map(dev->node, 4); + dev_info(&dev->dev, "bellirq: %d, txirq: %d, rxirq %d\n", priv->bellirq, + priv->txirq, priv->rxirq); + rio_init_dbell_res(&port->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff); rio_init_mbox_res(&port->riores[RIO_INB_MBOX_RESOURCE], 0, 0); rio_init_mbox_res(&port->riores[RIO_OUTB_MBOX_RESOURCE], 0, 0); strcpy(port->name, "RIO0 mport"); port->ops = ops; - port->host_deviceid = mpc85xx_rio_get_hdid(port->id); + port->host_deviceid = fsl_rio_get_hdid(port->id); + port->priv = priv; rio_register_mport(port); - regs_win = (u32) ioremap(RIO_REGS_BASE, 0x20000); - atmu_regs = (struct rio_atmu_regs *)(regs_win + RIO_ATMU_REGS_OFFSET); - maint_atmu_regs = atmu_regs + 1; - dbell_atmu_regs = atmu_regs + 2; - msg_regs = (struct rio_msg_regs *)(regs_win + RIO_MSG_REGS_OFFSET); + priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1); + + /* Probe the master port phy type */ + ccsr = in_be32(priv->regs_win + RIO_CCSR); + port->phy_type = (ccsr & 1) ? RIO_PHY_SERIAL : RIO_PHY_PARALLEL; + dev_info(&dev->dev, "RapidIO PHY type: %s\n", + (port->phy_type == RIO_PHY_PARALLEL) ? "parallel" : + ((port->phy_type == RIO_PHY_SERIAL) ? "serial" : + "unknown")); + /* Checking the port training status */ + if (in_be32((priv->regs_win + RIO_ESCSR)) & 1) { + dev_err(&dev->dev, "Port is not ready. " + "Try to restart connection...\n"); + switch (port->phy_type) { + case RIO_PHY_SERIAL: + /* Disable ports */ + out_be32(priv->regs_win + RIO_CCSR, 0); + /* Set 1x lane */ + setbits32(priv->regs_win + RIO_CCSR, 0x02000000); + /* Enable ports */ + setbits32(priv->regs_win + RIO_CCSR, 0x00600000); + break; + case RIO_PHY_PARALLEL: + /* Disable ports */ + out_be32(priv->regs_win + RIO_CCSR, 0x22000000); + /* Enable ports */ + out_be32(priv->regs_win + RIO_CCSR, 0x44000000); + break; + } + msleep(100); + if (in_be32((priv->regs_win + RIO_ESCSR)) & 1) { + dev_err(&dev->dev, "Port restart failed.\n"); + rc = -ENOLINK; + goto err; + } + dev_info(&dev->dev, "Port restart success!\n"); + } + fsl_rio_info(&dev->dev, ccsr); + + port->sys_size = (in_be32((priv->regs_win + RIO_PEF_CAR)) + & RIO_PEF_CTLS) >> 4; + dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n", + port->sys_size ? 65536 : 256); + + priv->atmu_regs = (struct rio_atmu_regs *)(priv->regs_win + + RIO_ATMU_REGS_OFFSET); + priv->maint_atmu_regs = priv->atmu_regs + 1; + priv->dbell_atmu_regs = priv->atmu_regs + 2; + priv->msg_regs = (struct rio_msg_regs *)(priv->regs_win + + ((port->phy_type == RIO_PHY_SERIAL) ? + RIO_S_MSG_REGS_OFFSET : RIO_P_MSG_REGS_OFFSET)); + + /* Set to receive any dist ID for serial RapidIO controller. */ + if (port->phy_type == RIO_PHY_SERIAL) + out_be32((priv->regs_win + RIO_ISR_AACR), RIO_ISR_AACR_AA); /* Configure maintenance transaction window */ - out_be32((void *)&maint_atmu_regs->rowbar, 0x000c0000); - out_be32((void *)&maint_atmu_regs->rowar, 0x80077015); + out_be32(&priv->maint_atmu_regs->rowbar, 0x000c0000); + out_be32(&priv->maint_atmu_regs->rowar, 0x80077015); - maint_win = (u32) ioremap(law_start, RIO_MAINT_WIN_SIZE); + priv->maint_win = ioremap(law_start, RIO_MAINT_WIN_SIZE); /* Configure outbound doorbell window */ - out_be32((void *)&dbell_atmu_regs->rowbar, 0x000c0400); - out_be32((void *)&dbell_atmu_regs->rowar, 0x8004200b); - mpc85xx_rio_doorbell_init(port); + out_be32(&priv->dbell_atmu_regs->rowbar, 0x000c0400); + out_be32(&priv->dbell_atmu_regs->rowar, 0x8004200b); + fsl_rio_doorbell_init(port); + + return 0; +err: + if (priv) + iounmap(priv->regs_win); + kfree(ops); + kfree(priv); + kfree(port); + return rc; +} + +/* The probe function for RapidIO peer-to-peer network. + */ +static int __devinit fsl_of_rio_rpn_probe(struct of_device *dev, + const struct of_device_id *match) +{ + int rc; + printk(KERN_INFO "Setting up RapidIO peer-to-peer network %s\n", + dev->node->full_name); + + rc = fsl_rio_setup(dev); + if (rc) + goto out; + + /* Enumerate all registered ports */ + rc = rio_init_mports(); +out: + return rc; +}; + +static const struct of_device_id fsl_of_rio_rpn_ids[] = { + { + .compatible = "fsl,rapidio-delta", + }, + {}, +}; + +static struct of_platform_driver fsl_of_rio_rpn_driver = { + .name = "fsl-of-rio", + .match_table = fsl_of_rio_rpn_ids, + .probe = fsl_of_rio_rpn_probe, +}; + +static __init int fsl_of_rio_rpn_init(void) +{ + return of_register_platform_driver(&fsl_of_rio_rpn_driver); } + +subsys_initcall(fsl_of_rio_rpn_init); diff --git a/arch/powerpc/sysdev/fsl_rio.h b/arch/powerpc/sysdev/fsl_rio.h deleted file mode 100644 index 6d3ff30b157..00000000000 --- a/arch/powerpc/sysdev/fsl_rio.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * MPC85xx RapidIO definitions - * - * Copyright 2005 MontaVista Software, Inc. - * Matt Porter <mporter@kernel.crashing.org> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef __PPC_SYSLIB_PPC85XX_RIO_H -#define __PPC_SYSLIB_PPC85XX_RIO_H - -#include <linux/init.h> - -extern void mpc85xx_rio_setup(int law_start, int law_size); - -#endif /* __PPC_SYSLIB_PPC85XX_RIO_H */ diff --git a/arch/ppc/kernel/asm-offsets.c b/arch/ppc/kernel/asm-offsets.c index a51a1771423..8dcbdd6c2d2 100644 --- a/arch/ppc/kernel/asm-offsets.c +++ b/arch/ppc/kernel/asm-offsets.c @@ -18,6 +18,8 @@ #include <linux/suspend.h> #include <linux/mman.h> #include <linux/mm.h> +#include <linux/kbuild.h> + #include <asm/io.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -26,11 +28,6 @@ #include <asm/thread_info.h> #include <asm/vdso_datapage.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - int main(void) { diff --git a/arch/ppc/kernel/pci.c b/arch/ppc/kernel/pci.c index 50ce83f20ad..df3ef6db072 100644 --- a/arch/ppc/kernel/pci.c +++ b/arch/ppc/kernel/pci.c @@ -1121,8 +1121,8 @@ void __init pci_init_resource(struct resource *res, resource_size_t start, void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len) diff --git a/arch/ppc/platforms/sbc82xx.c b/arch/ppc/platforms/sbc82xx.c index 0df6aacb823..24f6e0694ac 100644 --- a/arch/ppc/platforms/sbc82xx.c +++ b/arch/ppc/platforms/sbc82xx.c @@ -30,8 +30,6 @@ static void (*callback_init_IRQ)(void); extern unsigned char __res[sizeof(bd_t)]; -extern void (*late_time_init)(void); - #ifdef CONFIG_GEN_RTC TODC_ALLOC(); diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 1375f8a4469..fa28ecae636 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -5,44 +5,38 @@ */ #include <linux/sched.h> - -/* Use marker if you need to separate the values later */ - -#define DEFINE(sym, val, marker) \ - asm volatile("\n->" #sym " %0 " #val " " #marker : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) +#include <linux/kbuild.h> int main(void) { - DEFINE(__THREAD_info, offsetof(struct task_struct, stack),); - DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),); - DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),); + DEFINE(__THREAD_info, offsetof(struct task_struct, stack)); + DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp)); + DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info)); DEFINE(__THREAD_mm_segment, - offsetof(struct task_struct, thread.mm_segment),); + offsetof(struct task_struct, thread.mm_segment)); BLANK(); - DEFINE(__TASK_pid, offsetof(struct task_struct, pid),); + DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); BLANK(); - DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid),); - DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address),); - DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id),); + DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid)); + DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address)); + DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id)); BLANK(); - DEFINE(__TI_task, offsetof(struct thread_info, task),); - DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain),); - DEFINE(__TI_flags, offsetof(struct thread_info, flags),); - DEFINE(__TI_cpu, offsetof(struct thread_info, cpu),); - DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count),); + DEFINE(__TI_task, offsetof(struct thread_info, task)); + DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain)); + DEFINE(__TI_flags, offsetof(struct thread_info, flags)); + DEFINE(__TI_cpu, offsetof(struct thread_info, cpu)); + DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); BLANK(); - DEFINE(__PT_ARGS, offsetof(struct pt_regs, args),); - DEFINE(__PT_PSW, offsetof(struct pt_regs, psw),); - DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs),); - DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2),); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc),); - DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap),); - DEFINE(__PT_SIZE, sizeof(struct pt_regs),); + DEFINE(__PT_ARGS, offsetof(struct pt_regs, args)); + DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); + DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); + DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); + DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); + DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap)); + DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); - DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain),); - DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs),); - DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1),); + DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); + DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs)); + DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1)); return 0; } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index c36d8123ca1..c59a86dca58 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -60,8 +60,6 @@ init_IRQ(void) /* * Switch to the asynchronous interrupt stack for softirq execution. */ -extern void __do_softirq(void); - asmlinkage void do_softirq(void) { unsigned long flags, old, new; diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 49b435c3a57..08d2e732525 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -191,8 +191,8 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (unlikely(!len || !start)) diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c index dc6725c51a8..57cf0e0680f 100644 --- a/arch/sh/kernel/asm-offsets.c +++ b/arch/sh/kernel/asm-offsets.c @@ -11,12 +11,9 @@ #include <linux/stddef.h> #include <linux/types.h> #include <linux/mm.h> -#include <asm/thread_info.h> - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) +#include <linux/kbuild.h> -#define BLANK() asm volatile("\n->" : : ) +#include <asm/thread_info.h> int main(void) { diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 9bf19b00696..a2a99e487e3 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -200,8 +200,6 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } -extern asmlinkage void __do_softirq(void); - asmlinkage void do_softirq(void) { unsigned long flags; diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c index 6773ed76e41..cd3f7694e9b 100644 --- a/arch/sparc/kernel/asm-offsets.c +++ b/arch/sparc/kernel/asm-offsets.c @@ -12,11 +12,7 @@ #include <linux/sched.h> // #include <linux/mm.h> - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) +#include <linux/kbuild.h> int foo(void) { diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c index 54501c1ca78..9ef37e13a92 100644 --- a/arch/sparc/lib/iomap.c +++ b/arch/sparc/lib/iomap.c @@ -21,8 +21,8 @@ EXPORT_SYMBOL(ioport_unmap); /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len || !start) diff --git a/arch/sparc64/lib/iomap.c b/arch/sparc64/lib/iomap.c index ac556db0697..7120ebbd4d0 100644 --- a/arch/sparc64/lib/iomap.c +++ b/arch/sparc64/lib/iomap.c @@ -21,8 +21,8 @@ EXPORT_SYMBOL(ioport_unmap); /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { - unsigned long start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len || !start) diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 984f80e668c..6540d2c9fbb 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -59,7 +59,7 @@ static int make_proc_exitcode(void) { struct proc_dir_entry *ent; - ent = create_proc_entry("exitcode", 0600, &proc_root); + ent = create_proc_entry("exitcode", 0600, NULL); if (ent == NULL) { printk(KERN_WARNING "make_proc_exitcode : Failed to register " "/proc/exitcode\n"); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index e8cb9ff183e..83603cfbde8 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -364,7 +364,7 @@ int __init make_proc_sysemu(void) if (!sysemu_supported) return 0; - ent = create_proc_entry("sysemu", 0600, &proc_root); + ent = create_proc_entry("sysemu", 0600, NULL); if (ent == NULL) { diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index e066e84493b..0d0cea2ac98 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -4,6 +4,7 @@ */ #include <linux/clockchips.h> +#include <linux/init.h> #include <linux/interrupt.h> #include <linux/jiffies.h> #include <linux/threads.h> @@ -109,8 +110,6 @@ static void __init setup_itimer(void) clockevents_register_device(&itimer_clockevent); } -extern void (*late_time_init)(void); - void __init time_init(void) { long long nsecs; diff --git a/arch/v850/kernel/asm-offsets.c b/arch/v850/kernel/asm-offsets.c index cee5c3142d4..581e6986a77 100644 --- a/arch/v850/kernel/asm-offsets.c +++ b/arch/v850/kernel/asm-offsets.c @@ -13,14 +13,11 @@ #include <linux/kernel_stat.h> #include <linux/ptrace.h> #include <linux/hardirq.h> +#include <linux/kbuild.h> + #include <asm/irq.h> #include <asm/errno.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - int main (void) { /* offsets into the task struct */ diff --git a/arch/v850/kernel/rte_mb_a_pci.c b/arch/v850/kernel/rte_mb_a_pci.c index 7165478824e..687e367d8b6 100644 --- a/arch/v850/kernel/rte_mb_a_pci.c +++ b/arch/v850/kernel/rte_mb_a_pci.c @@ -790,8 +790,8 @@ pci_free_consistent (struct pci_dev *pdev, size_t size, void *cpu_addr, void __iomem *pci_iomap (struct pci_dev *dev, int bar, unsigned long max) { - unsigned long start = pci_resource_start (dev, bar); - unsigned long len = pci_resource_len (dev, bar); + resource_size_t start = pci_resource_start (dev, bar); + resource_size_t len = pci_resource_len (dev, bar); if (!start || len == 0) return 0; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a12dbb2b93f..f70e3e3a9fa 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -537,9 +537,6 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT Calgary anyway, pass 'iommu=calgary' on the kernel command line. If unsure, say Y. -config IOMMU_HELPER - def_bool (CALGARY_IOMMU || GART_IOMMU) - # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool @@ -550,6 +547,8 @@ config SWIOTLB access 32-bits of memory can be used on systems with more than 3 GB of memory. If unsure, say Y. +config IOMMU_HELPER + def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) config NR_CPUS int "Maximum number of CPUs (2-255)" @@ -1505,6 +1504,10 @@ config PCI_GODIRECT config PCI_GOANY bool "Any" +config PCI_GOOLPC + bool "OLPC" + depends on OLPC + endchoice config PCI_BIOS @@ -1514,12 +1517,17 @@ config PCI_BIOS # x86-64 doesn't support PCI BIOS access from long mode so always go direct. config PCI_DIRECT def_bool y - depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY) || X86_VISWS) + depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC) || X86_VISWS) config PCI_MMCONFIG def_bool y depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) +config PCI_OLPC + bool + depends on PCI && PCI_GOOLPC + default y + config PCI_DOMAINS def_bool y depends on PCI @@ -1639,6 +1647,13 @@ config GEODE_MFGPT_TIMER MFGPTs have a better resolution and max interval than the generic PIT, and are suitable for use as high-res timers. +config OLPC + bool "One Laptop Per Child support" + default n + help + Add support for detecting the unique features of the OLPC + XO hardware. + endif # X86_32 config K8_NB diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index d84a48ece78..03399d64013 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -126,17 +126,25 @@ void query_edd(void) { char eddarg[8]; int do_mbr = 1; +#ifdef CONFIG_EDD_OFF + int do_edd = 0; +#else int do_edd = 1; +#endif int be_quiet; int devno; struct edd_info ei, *edp; u32 *mbrptr; if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) { - if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) + if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) { + do_edd = 1; do_mbr = 0; + } else if (!strcmp(eddarg, "off")) do_edd = 0; + else if (!strcmp(eddarg, "on")) + do_edd = 1; } be_quiet = cmdline_find_option_bool("quiet"); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fa19c381954..30d54ed27e5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -91,6 +91,8 @@ endif obj-$(CONFIG_SCx200) += scx200.o scx200-y += scx200_32.o +obj-$(CONFIG_OLPC) += olpc.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) @@ -101,4 +103,6 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o + + obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o endif diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 977ed5cdeaa..c49ebcc6c41 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -771,6 +771,32 @@ static void __init acpi_register_lapic_address(unsigned long address) boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); } +static int __init early_acpi_parse_madt_lapic_addr_ovr(void) +{ + int count; + + if (!cpu_has_apic) + return -ENODEV; + + /* + * Note that the LAPIC address is obtained from the MADT (32-bit value) + * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). + */ + + count = + acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, + acpi_parse_lapic_addr_ovr, 0); + if (count < 0) { + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); + return count; + } + + acpi_register_lapic_address(acpi_lapic_addr); + + return count; +} + static int __init acpi_parse_madt_lapic_entries(void) { int count; @@ -901,6 +927,33 @@ static inline int acpi_parse_madt_ioapic_entries(void) } #endif /* !CONFIG_X86_IO_APIC */ +static void __init early_acpi_process_madt(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int error; + + if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + + /* + * Parse MADT LAPIC entries + */ + error = early_acpi_parse_madt_lapic_addr_ovr(); + if (!error) { + acpi_lapic = 1; + smp_found_config = 1; + } + if (error == -EINVAL) { + /* + * Dell Precision Workstation 410, 610 come here. + */ + printk(KERN_ERR PREFIX + "Invalid BIOS MADT, disabling ACPI\n"); + disable_acpi(); + } + } +#endif +} + static void __init acpi_process_madt(void) { #ifdef CONFIG_X86_LOCAL_APIC @@ -1233,6 +1286,23 @@ int __init acpi_boot_table_init(void) return 0; } +int __init early_acpi_boot_init(void) +{ + /* + * If acpi_disabled, bail out + * One exception: acpi=ht continues far enough to enumerate LAPICs + */ + if (acpi_disabled && !acpi_ht) + return 1; + + /* + * Process the Multiple APIC Description Table (MADT), if present + */ + early_acpi_process_madt(); + + return 0; +} + int __init acpi_boot_init(void) { /* diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 670c3c31128..92588083950 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -9,6 +9,7 @@ #include <linux/signal.h> #include <linux/personality.h> #include <linux/suspend.h> +#include <linux/kbuild.h> #include <asm/ucontext.h> #include "sigframe.h" #include <asm/pgtable.h> @@ -23,14 +24,6 @@ #include <linux/lguest.h> #include "../../../drivers/lguest/lg.h" -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)); - /* workaround for a warning with -Wmissing-prototypes */ void foo(void); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 494e1e096ee..f126c05d617 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -10,6 +10,7 @@ #include <linux/errno.h> #include <linux/hardirq.h> #include <linux/suspend.h> +#include <linux/kbuild.h> #include <asm/pda.h> #include <asm/processor.h> #include <asm/segment.h> @@ -17,14 +18,6 @@ #include <asm/ia32.h> #include <asm/bootparam.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) - -#define OFFSET(sym, str, mem) \ - DEFINE(sym, offsetof(struct str, mem)) - #define __NO_STUBS 1 #undef __SYSCALL #undef _ASM_X86_64_UNISTD_H_ diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8db8f73503b..b0c8208df9f 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -601,6 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { policy->cpus = perf->shared_cpu_map; } + policy->related_cpus = perf->shared_cpu_map; #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 1960f1985e5..84c480bb371 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -424,7 +424,7 @@ static int __init mtrr_if_init(void) return -ENODEV; proc_root_mtrr = - proc_create("mtrr", S_IWUSR | S_IRUGO, &proc_root, &mtrr_fops); + proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); if (proc_root_mtrr) proc_root_mtrr->owner = THIS_MODULE; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 00bda7bcda6..147352df28b 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -190,8 +190,6 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } -extern asmlinkage void __do_softirq(void); - asmlinkage void do_softirq(void) { unsigned long flags; diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c new file mode 100644 index 00000000000..edc5fbfe85c --- /dev/null +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -0,0 +1,243 @@ +/* + * AMD Family 10h mmconfig enablement + */ + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/pci.h> +#include <linux/dmi.h> +#include <asm/pci-direct.h> +#include <linux/sort.h> +#include <asm/io.h> +#include <asm/msr.h> +#include <asm/acpi.h> + +#include "../pci/pci.h" + +struct pci_hostbridge_probe { + u32 bus; + u32 slot; + u32 vendor; + u32 device; +}; + +static u64 __cpuinitdata fam10h_pci_mmconf_base; +static int __cpuinitdata fam10h_pci_mmconf_base_status; + +static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, +}; + +struct range { + u64 start; + u64 end; +}; + +static int __cpuinit cmp_range(const void *x1, const void *x2) +{ + const struct range *r1 = x1; + const struct range *r2 = x2; + int start1, start2; + + start1 = r1->start >> 32; + start2 = r2->start >> 32; + + return start1 - start2; +} + +/*[47:0] */ +/* need to avoid (0xfd<<32) and (0xfe<<32), ht used space */ +#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32) +#define BASE_VALID(b) ((b != (0xfdULL << 32)) && (b != (0xfeULL << 32))) +static void __cpuinit get_fam10h_pci_mmconf_base(void) +{ + int i; + unsigned bus; + unsigned slot; + int found; + + u64 val; + u32 address; + u64 tom2; + u64 base = FAM10H_PCI_MMCONF_BASE; + + int hi_mmio_num; + struct range range[8]; + + /* only try to get setting from BSP */ + /* -1 or 1 */ + if (fam10h_pci_mmconf_base_status) + return; + + if (!early_pci_allowed()) + goto fail; + + found = 0; + for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { + u32 id; + u16 device; + u16 vendor; + + bus = pci_probes[i].bus; + slot = pci_probes[i].slot; + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); + + vendor = id & 0xffff; + device = (id>>16) & 0xffff; + if (pci_probes[i].vendor == vendor && + pci_probes[i].device == device) { + found = 1; + break; + } + } + + if (!found) + goto fail; + + /* SYS_CFG */ + address = MSR_K8_SYSCFG; + rdmsrl(address, val); + + /* TOP_MEM2 is not enabled? */ + if (!(val & (1<<21))) { + tom2 = 0; + } else { + /* TOP_MEM2 */ + address = MSR_K8_TOP_MEM2; + rdmsrl(address, val); + tom2 = val & (0xffffULL<<32); + } + + if (base <= tom2) + base = tom2 + (1ULL<<32); + + /* + * need to check if the range is in the high mmio range that is + * above 4G + */ + hi_mmio_num = 0; + for (i = 0; i < 8; i++) { + u32 reg; + u64 start; + u64 end; + reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); + if (!(reg & 3)) + continue; + + start = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); + end = (((u64)reg) << 8) & (0xffULL << 32); /* 39:16 on 31:8*/ + + if (!end) + continue; + + range[hi_mmio_num].start = start; + range[hi_mmio_num].end = end; + hi_mmio_num++; + } + + if (!hi_mmio_num) + goto out; + + /* sort the range */ + sort(range, hi_mmio_num, sizeof(struct range), cmp_range, NULL); + + if (range[hi_mmio_num - 1].end < base) + goto out; + if (range[0].start > base) + goto out; + + /* need to find one window */ + base = range[0].start - (1ULL << 32); + if ((base > tom2) && BASE_VALID(base)) + goto out; + base = range[hi_mmio_num - 1].end + (1ULL << 32); + if ((base > tom2) && BASE_VALID(base)) + goto out; + /* need to find window between ranges */ + if (hi_mmio_num > 1) + for (i = 0; i < hi_mmio_num - 1; i++) { + if (range[i + 1].start > (range[i].end + (1ULL << 32))) { + base = range[i].end + (1ULL << 32); + if ((base > tom2) && BASE_VALID(base)) + goto out; + } + } + +fail: + fam10h_pci_mmconf_base_status = -1; + return; +out: + fam10h_pci_mmconf_base = base; + fam10h_pci_mmconf_base_status = 1; +} + +void __cpuinit fam10h_check_enable_mmcfg(void) +{ + u64 val; + u32 address; + + if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) + return; + + address = MSR_FAM10H_MMIO_CONF_BASE; + rdmsrl(address, val); + + /* try to make sure that AP's setting is identical to BSP setting */ + if (val & FAM10H_MMIO_CONF_ENABLE) { + unsigned busnbits; + busnbits = (val >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + /* only trust the one handle 256 buses, if acpi=off */ + if (!acpi_pci_disabled || busnbits >= 8) { + u64 base; + base = val & (0xffffULL << 32); + if (fam10h_pci_mmconf_base_status <= 0) { + fam10h_pci_mmconf_base = base; + fam10h_pci_mmconf_base_status = 1; + return; + } else if (fam10h_pci_mmconf_base == base) + return; + } + } + + /* + * if it is not enabled, try to enable it and assume only one segment + * with 256 buses + */ + get_fam10h_pci_mmconf_base(); + if (fam10h_pci_mmconf_base_status <= 0) + return; + + printk(KERN_INFO "Enable MMCONFIG on AMD Family 10h\n"); + val &= ~((FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT) | + (FAM10H_MMIO_CONF_BUSRANGE_MASK<<FAM10H_MMIO_CONF_BUSRANGE_SHIFT)); + val |= fam10h_pci_mmconf_base | (8 << FAM10H_MMIO_CONF_BUSRANGE_SHIFT) | + FAM10H_MMIO_CONF_ENABLE; + wrmsrl(address, val); +} + +static int __devinit set_check_enable_amd_mmconf(const struct dmi_system_id *d) +{ + pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; + return 0; +} + +static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { + { + .callback = set_check_enable_amd_mmconf, + .ident = "Sun Microsystems Machine", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sun Microsystems"), + }, + }, + {} +}; + +void __init check_enable_amd_mmconf_dmi(void) +{ + dmi_check_system(mmconf_dmi_table); +} diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c new file mode 100644 index 00000000000..3e667227480 --- /dev/null +++ b/arch/x86/kernel/olpc.c @@ -0,0 +1,260 @@ +/* + * Support for the OLPC DCON and OLPC EC access + * + * Copyright © 2006 Advanced Micro Devices, Inc. + * Copyright © 2007-2008 Andres Salomon <dilinger@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/string.h> +#include <asm/geode.h> +#include <asm/olpc.h> + +#ifdef CONFIG_OPEN_FIRMWARE +#include <asm/ofw.h> +#endif + +struct olpc_platform_t olpc_platform_info; +EXPORT_SYMBOL_GPL(olpc_platform_info); + +static DEFINE_SPINLOCK(ec_lock); + +/* what the timeout *should* be (in ms) */ +#define EC_BASE_TIMEOUT 20 + +/* the timeout that bugs in the EC might force us to actually use */ +static int ec_timeout = EC_BASE_TIMEOUT; + +static int __init olpc_ec_timeout_set(char *str) +{ + if (get_option(&str, &ec_timeout) != 1) { + ec_timeout = EC_BASE_TIMEOUT; + printk(KERN_ERR "olpc-ec: invalid argument to " + "'olpc_ec_timeout=', ignoring!\n"); + } + printk(KERN_DEBUG "olpc-ec: using %d ms delay for EC commands.\n", + ec_timeout); + return 1; +} +__setup("olpc_ec_timeout=", olpc_ec_timeout_set); + +/* + * These {i,o}bf_status functions return whether the buffers are full or not. + */ + +static inline unsigned int ibf_status(unsigned int port) +{ + return !!(inb(port) & 0x02); +} + +static inline unsigned int obf_status(unsigned int port) +{ + return inb(port) & 0x01; +} + +#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d)) +static int __wait_on_ibf(unsigned int line, unsigned int port, int desired) +{ + unsigned int timeo; + int state = ibf_status(port); + + for (timeo = ec_timeout; state != desired && timeo; timeo--) { + mdelay(1); + state = ibf_status(port); + } + + if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && + timeo < (ec_timeout - EC_BASE_TIMEOUT)) { + printk(KERN_WARNING "olpc-ec: %d: waited %u ms for IBF!\n", + line, ec_timeout - timeo); + } + + return !(state == desired); +} + +#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d)) +static int __wait_on_obf(unsigned int line, unsigned int port, int desired) +{ + unsigned int timeo; + int state = obf_status(port); + + for (timeo = ec_timeout; state != desired && timeo; timeo--) { + mdelay(1); + state = obf_status(port); + } + + if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) && + timeo < (ec_timeout - EC_BASE_TIMEOUT)) { + printk(KERN_WARNING "olpc-ec: %d: waited %u ms for OBF!\n", + line, ec_timeout - timeo); + } + + return !(state == desired); +} + +/* + * This allows the kernel to run Embedded Controller commands. The EC is + * documented at <http://wiki.laptop.org/go/Embedded_controller>, and the + * available EC commands are here: + * <http://wiki.laptop.org/go/Ec_specification>. Unfortunately, while + * OpenFirmware's source is available, the EC's is not. + */ +int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, + unsigned char *outbuf, size_t outlen) +{ + unsigned long flags; + int ret = -EIO; + int i; + + spin_lock_irqsave(&ec_lock, flags); + + /* Clear OBF */ + for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++) + inb(0x68); + if (i == 10) { + printk(KERN_ERR "olpc-ec: timeout while attempting to " + "clear OBF flag!\n"); + goto err; + } + + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for EC to " + "quiesce!\n"); + goto err; + } + +restart: + /* + * Note that if we time out during any IBF checks, that's a failure; + * we have to return. There's no way for the kernel to clear that. + * + * If we time out during an OBF check, we can restart the command; + * reissuing it will clear the OBF flag, and we should be alright. + * The OBF flag will sometimes misbehave due to what we believe + * is a hardware quirk.. + */ + printk(KERN_DEBUG "olpc-ec: running cmd 0x%x\n", cmd); + outb(cmd, 0x6c); + + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for EC to read " + "command!\n"); + goto err; + } + + if (inbuf && inlen) { + /* write data to EC */ + for (i = 0; i < inlen; i++) { + if (wait_on_ibf(0x6c, 0)) { + printk(KERN_ERR "olpc-ec: timeout waiting for" + " EC accept data!\n"); + goto err; + } + printk(KERN_DEBUG "olpc-ec: sending cmd arg 0x%x\n", + inbuf[i]); + outb(inbuf[i], 0x68); + } + } + if (outbuf && outlen) { + /* read data from EC */ + for (i = 0; i < outlen; i++) { + if (wait_on_obf(0x6c, 1)) { + printk(KERN_ERR "olpc-ec: timeout waiting for" + " EC to provide data!\n"); + goto restart; + } + outbuf[i] = inb(0x68); + printk(KERN_DEBUG "olpc-ec: received 0x%x\n", + outbuf[i]); + } + } + + ret = 0; +err: + spin_unlock_irqrestore(&ec_lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(olpc_ec_cmd); + +#ifdef CONFIG_OPEN_FIRMWARE +static void __init platform_detect(void) +{ + size_t propsize; + u32 rev; + + if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, + &propsize) || propsize != 4) { + printk(KERN_ERR "ofw: getprop call failed!\n"); + rev = 0; + } + olpc_platform_info.boardrev = be32_to_cpu(rev); +} +#else +static void __init platform_detect(void) +{ + /* stopgap until OFW support is added to the kernel */ + olpc_platform_info.boardrev = be32_to_cpu(0xc2); +} +#endif + +static int __init olpc_init(void) +{ + unsigned char *romsig; + + /* The ioremap check is dangerous; limit what we run it on */ + if (!is_geode() || geode_has_vsa2()) + return 0; + + spin_lock_init(&ec_lock); + + romsig = ioremap(0xffffffc0, 16); + if (!romsig) + return 0; + + if (strncmp(romsig, "CL1 Q", 7)) + goto unmap; + if (strncmp(romsig+6, romsig+13, 3)) { + printk(KERN_INFO "OLPC BIOS signature looks invalid. " + "Assuming not OLPC\n"); + goto unmap; + } + + printk(KERN_INFO "OLPC board with OpenFirmware %.16s\n", romsig); + olpc_platform_info.flags |= OLPC_F_PRESENT; + + /* get the platform revision */ + platform_detect(); + + /* assume B1 and above models always have a DCON */ + if (olpc_board_at_least(olpc_board(0xb1))) + olpc_platform_info.flags |= OLPC_F_DCON; + + /* get the EC revision */ + olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, + (unsigned char *) &olpc_platform_info.ecver, 1); + + /* check to see if the VSA exists */ + if (geode_has_vsa2()) + olpc_platform_info.flags |= OLPC_F_VSA; + + printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", + ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", + olpc_platform_info.boardrev >> 4, + olpc_platform_info.ecver); + +unmap: + iounmap(romsig); + return 0; +} + +postcore_initcall(olpc_init); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index a94fb959a87..22c14e21c97 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -29,6 +29,7 @@ #include <linux/crash_dump.h> #include <linux/root_dev.h> #include <linux/pci.h> +#include <asm/pci-direct.h> #include <linux/efi.h> #include <linux/acpi.h> #include <linux/kallsyms.h> @@ -40,6 +41,7 @@ #include <linux/dmi.h> #include <linux/dma-mapping.h> #include <linux/ctype.h> +#include <linux/sort.h> #include <linux/uaccess.h> #include <linux/init_ohci1394_dma.h> #include <linux/kvm_para.h> @@ -288,6 +290,18 @@ static void __init parse_setup_data(void) } } +#ifdef CONFIG_PCI_MMCONFIG +extern void __cpuinit fam10h_check_enable_mmcfg(void); +extern void __init check_enable_amd_mmconf_dmi(void); +#else +void __cpuinit fam10h_check_enable_mmcfg(void) +{ +} +void __init check_enable_amd_mmconf_dmi(void) +{ +} +#endif + /* * setup_arch - architecture-specific boot-time initializations * @@ -515,6 +529,9 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif + + /* do this before identify_cpu for boot cpu */ + check_enable_amd_mmconf_dmi(); } static int __cpuinit get_model_name(struct cpuinfo_x86 *c) @@ -767,6 +784,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) /* MFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); + if (c->x86 == 0x10) + fam10h_check_enable_mmcfg(); + if (amd_apic_timer_broken()) disable_apic_timer = 1; diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 1a89e93f3f1..2ff21f39893 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -115,7 +115,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ void __init hpet_time_init(void) { diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 86808e666f9..1f476e47784 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -13,12 +13,15 @@ #include <linux/nodemask.h> #include <asm/io.h> #include <linux/pci_ids.h> +#include <linux/acpi.h> #include <asm/types.h> #include <asm/mmzone.h> #include <asm/proto.h> #include <asm/e820.h> #include <asm/pci-direct.h> #include <asm/numa.h> +#include <asm/mpspec.h> +#include <asm/apic.h> static __init int find_northbridge(void) { @@ -44,6 +47,30 @@ static __init int find_northbridge(void) return -1; } +static __init void early_get_boot_cpu_id(void) +{ + /* + * need to get boot_cpu_id so can use that to create apicid_to_node + * in k8_scan_nodes() + */ + /* + * Find possible boot-time SMP configuration: + */ + early_find_smp_config(); +#ifdef CONFIG_ACPI + /* + * Read APIC information from ACPI tables. + */ + early_acpi_boot_init(); +#endif + /* + * get boot-time SMP configuration: + */ + if (smp_found_config) + early_get_smp_config(); + early_init_lapic_mapping(); +} + int __init k8_scan_nodes(unsigned long start, unsigned long end) { unsigned long prevbase; @@ -56,6 +83,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) unsigned cores; unsigned bits; int j; + unsigned apicid_base; if (!early_pci_allowed()) return -1; @@ -174,11 +202,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) /* use the coreid bits from early_identify_cpu */ bits = boot_cpu_data.x86_coreid_bits; cores = (1<<bits); + apicid_base = 0; + /* need to get boot_cpu_id early for system with apicid lifting */ + early_get_boot_cpu_id(); + if (boot_cpu_physical_apicid > 0) { + printk(KERN_INFO "BSP APIC ID: %02x\n", + boot_cpu_physical_apicid); + apicid_base = boot_cpu_physical_apicid; + } for (i = 0; i < 8; i++) { if (nodes[i].start != nodes[i].end) { nodeid = nodeids[i]; - for (j = 0; j < cores; j++) + for (j = apicid_base; j < cores + apicid_base; j++) apicid_to_node[(nodeid << bits) + j] = i; setup_node_bootmem(i, nodes[i].start, nodes[i].end); } diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 index cdd6828b5ab..2a1516efb54 100644 --- a/arch/x86/pci/Makefile_32 +++ b/arch/x86/pci/Makefile_32 @@ -3,6 +3,7 @@ obj-y := i386.o init.o obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_32.o direct.o mmconfig-shared.o obj-$(CONFIG_PCI_DIRECT) += direct.o +obj-$(CONFIG_PCI_OLPC) += olpc.o pci-y := fixup.o pci-$(CONFIG_ACPI) += acpi.o @@ -10,5 +11,6 @@ pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o pci-$(CONFIG_X86_NUMAQ) := numa.o irq.o +pci-$(CONFIG_NUMA) += mp_bus_to_node.o obj-y += $(pci-y) common.o early.o diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 index 7d8c467bf14..8fbd19832cf 100644 --- a/arch/x86/pci/Makefile_64 +++ b/arch/x86/pci/Makefile_64 @@ -13,5 +13,5 @@ obj-y += legacy.o irq.o common.o early.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o -obj-$(CONFIG_NUMA) += k8-bus_64.o +obj-y += k8-bus_64.o diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2664cb3fc96..1a9c0c6a1a1 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -191,7 +191,10 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do { struct pci_bus *bus; struct pci_sysdata *sd; + int node; +#ifdef CONFIG_ACPI_NUMA int pxm; +#endif dmi_check_system(acpi_pciprobe_dmi_table); @@ -201,6 +204,17 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do return NULL; } + node = -1; +#ifdef CONFIG_ACPI_NUMA + pxm = acpi_get_pxm(device->handle); + if (pxm >= 0) + node = pxm_to_node(pxm); + if (node != -1) + set_mp_bus_to_node(busnum, node); + else + node = get_mp_bus_to_node(busnum); +#endif + /* Allocate per-root-bus (not per bus) arch-specific data. * TODO: leak; this memory is never freed. * It's arguable whether it's worth the trouble to care. @@ -212,13 +226,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do } sd->domain = domain; - sd->node = -1; - - pxm = acpi_get_pxm(device->handle); -#ifdef CONFIG_ACPI_NUMA - if (pxm >= 0) - sd->node = pxm_to_node(pxm); -#endif + sd->node = node; /* * Maybe the desired pci bus has been already scanned. In such case * it is unnecessary to scan the pci bus with the given domain,busnum. @@ -238,9 +246,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do kfree(sd); #ifdef CONFIG_ACPI_NUMA - if (bus != NULL) { + if (bus) { if (pxm >= 0) { - printk("bus %d -> pxm %d -> node %d\n", + printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", busnum, pxm, pxm_to_node(pxm)); } } @@ -248,7 +256,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do if (bus && (pci_probe & PCI_USE__CRS)) get_current_resources(device, busnum, domain, bus); - return bus; } diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 75fcc29ecf5..2a4d751818b 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -342,9 +342,14 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return NULL; } + sd->node = get_mp_bus_to_node(busnum); + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); + bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + if (!bus) + kfree(sd); - return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd); + return bus; } extern u8 pci_cache_line_size; @@ -420,6 +425,10 @@ char * __devinit pcibios_setup(char *str) pci_probe &= ~PCI_PROBE_MMCONF; return NULL; } + else if (!strcmp(str, "check_enable_amd_mmconf")) { + pci_probe |= PCI_CHECK_ENABLE_AMD_MMCONF; + return NULL; + } #endif else if (!strcmp(str, "noacpi")) { acpi_noirq_set(); @@ -480,7 +489,7 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } -struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) +struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) { struct pci_bus *bus = NULL; struct pci_sysdata *sd; @@ -495,10 +504,15 @@ struct pci_bus *__devinit pci_scan_bus_with_sysdata(int busno) printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); return NULL; } - sd->node = -1; - bus = pci_scan_bus(busno, &pci_root_ops, sd); + sd->node = node; + bus = pci_scan_bus(busno, ops, sd); if (!bus) kfree(sd); return bus; } + +struct pci_bus *pci_scan_bus_with_sysdata(int busno) +{ + return pci_scan_bus_on_node(busno, &pci_root_ops, -1); +} diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 42f3e4cad17..21d1e0e0d53 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -258,7 +258,8 @@ void __init pci_direct_init(int type) { if (type == 0) return; - printk(KERN_INFO "PCI: Using configuration type %d\n", type); + printk(KERN_INFO "PCI: Using configuration type %d for base access\n", + type); if (type == 1) raw_pci_ops = &pci_direct_conf1; else @@ -275,8 +276,10 @@ int __init pci_direct_probe(void) if (!region) goto type2; - if (pci_check_type1()) + if (pci_check_type1()) { + raw_pci_ops = &pci_direct_conf1; return 1; + } release_resource(region); type2: @@ -290,7 +293,6 @@ int __init pci_direct_probe(void) goto fail2; if (pci_check_type2()) { - printk(KERN_INFO "PCI: Using configuration type 2\n"); raw_pci_ops = &pci_direct_conf2; return 2; } diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index a5ef5f55137..b60b2abd480 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -493,3 +493,20 @@ static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); + +/* + * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config + * have 4096 bytes. Even if the device is capable, that doesn't mean we can + * access it. Maybe we don't have a way to generate extended config space + * accesses. So check it + */ +static void fam10h_pci_cfg_space_size(struct pci_dev *dev) +{ + dev->cfg_size = pci_cfg_space_size_ext(dev, 0); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index 3de9f9ba2da..dd30c6076b5 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -6,16 +6,17 @@ in the right sequence from here. */ static __init int pci_access_init(void) { - int type __maybe_unused = 0; - #ifdef CONFIG_PCI_DIRECT + int type = 0; + type = pci_direct_probe(); #endif -#ifdef CONFIG_PCI_MMCONFIG - pci_mmcfg_init(type); + + pci_mmcfg_early_init(); + +#ifdef CONFIG_PCI_OLPC + pci_olpc_init(); #endif - if (raw_pci_ops) - return 0; #ifdef CONFIG_PCI_BIOS pci_pcbios_init(); #endif @@ -28,7 +29,7 @@ static __init int pci_access_init(void) #ifdef CONFIG_PCI_DIRECT pci_direct_init(type); #endif - if (!raw_pci_ops) + if (!raw_pci_ops && !raw_pci_ext_ops) printk(KERN_ERR "PCI: Fatal: No config space access function found\n"); diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 579745ca6b6..0908fca901b 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -136,9 +136,11 @@ static void __init pirq_peer_trick(void) busmap[e->bus] = 1; } for(i = 1; i < 256; i++) { + int node; if (!busmap[i] || pci_find_bus(0, i)) continue; - if (pci_scan_bus_with_sysdata(i)) + node = get_mp_bus_to_node(i); + if (pci_scan_bus_on_node(i, &pci_root_ops, node)) printk(KERN_INFO "PCI: Discovered primary peer " "bus %02x [IRQ]\n", i); } diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index 9cc813e2970..ab6d4b18a88 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -1,83 +1,536 @@ #include <linux/init.h> #include <linux/pci.h> +#include <asm/pci-direct.h> #include <asm/mpspec.h> #include <linux/cpumask.h> +#include <linux/topology.h> /* * This discovers the pcibus <-> node mapping on AMD K8. - * - * RED-PEN need to call this again on PCI hotplug - * RED-PEN empty cpus get reported wrong + * also get peer root bus resource for io,mmio */ -#define NODE_ID_REGISTER 0x60 -#define NODE_ID(dword) (dword & 0x07) -#define LDT_BUS_NUMBER_REGISTER_0 0x94 -#define LDT_BUS_NUMBER_REGISTER_1 0xB4 -#define LDT_BUS_NUMBER_REGISTER_2 0xD4 -#define NR_LDT_BUS_NUMBER_REGISTERS 3 -#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF) -#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) -#define PCI_DEVICE_ID_K8HTCONFIG 0x1100 + +/* + * sub bus (transparent) will use entres from 3 to store extra from root, + * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +static int pci_root_num; +static struct pci_root_info pci_root_info[PCI_ROOT_NR]; + +#ifdef CONFIG_NUMA + +#define BUS_NR 256 + +static int mp_bus_to_node[BUS_NR]; + +void set_mp_bus_to_node(int busnum, int node) +{ + if (busnum >= 0 && busnum < BUS_NR) + mp_bus_to_node[busnum] = node; +} + +int get_mp_bus_to_node(int busnum) +{ + int node = -1; + + if (busnum < 0 || busnum > (BUS_NR - 1)) + return node; + + node = mp_bus_to_node[busnum]; + + /* + * let numa_node_id to decide it later in dma_alloc_pages + * if there is no ram on that node + */ + if (node != -1 && !node_online(node)) + node = -1; + + return node; +} +#endif + +void set_pci_bus_resources_arch_default(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + /* if only one root bus, don't need to anything */ + if (pci_root_num < 2) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +#define RANGE_NUM 16 + +struct res_range { + size_t start; + size_t end; +}; + +static void __init update_range(struct res_range *range, size_t start, + size_t end) +{ + int i; + int j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + + if (start <= range[j].start && end >= range[j].end) { + range[j].start = 0; + range[j].end = 0; + continue; + } + + if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { + range[j].start = end + 1; + continue; + } + + + if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { + range[j].end = start - 1; + continue; + } + + if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + continue; + } + } +} + +static void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + size_t final_start, final_end; + size_t common_start, common_end; + + res = &info->res[i]; + if (res->flags != flags) + continue; + + common_start = max((size_t)res->start, start); + common_end = min((size_t)res->end, end); + if (common_start > common_end + 1) + continue; + + final_start = min((size_t)res->start, start); + final_end = max((size_t)res->end, end); + + res->start = final_start; + res->end = final_end; + return; + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} + +struct pci_hostbridge_probe { + u32 bus; + u32 slot; + u32 vendor; + u32 device; +}; + +static struct pci_hostbridge_probe pci_probes[] __initdata = { + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, +}; + +static u64 __initdata fam10h_mmconf_start; +static u64 __initdata fam10h_mmconf_end; +static void __init get_pci_mmcfg_amd_fam10h_range(void) +{ + u32 address; + u64 base, msr; + unsigned segn_busn_bits; + + /* assume all cpus from fam10h have mmconf */ + if (boot_cpu_data.x86 < 0x10) + return; + + address = MSR_FAM10H_MMIO_CONF_BASE; + rdmsrl(address, msr); + + /* mmconfig is not enable */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); + + segn_busn_bits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + fam10h_mmconf_start = base; + fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; +} /** - * fill_mp_bus_to_cpumask() + * early_fill_mp_bus_to_node() + * called before pcibios_scan_root and pci_scan_bus * fills the mp_bus_to_cpumask array based according to the LDT Bus Number * Registers found in the K8 northbridge */ -__init static int -fill_mp_bus_to_cpumask(void) +static int __init early_fill_mp_bus_info(void) { - struct pci_dev *nb_dev = NULL; - int i, j; - u32 ldtbus, nid; - static int lbnr[3] = { - LDT_BUS_NUMBER_REGISTER_0, - LDT_BUS_NUMBER_REGISTER_1, - LDT_BUS_NUMBER_REGISTER_2 - }; - - while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { - pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); - - for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { - pci_read_config_dword(nb_dev, lbnr[i], &ldtbus); - /* - * if there are no busses hanging off of the current - * ldt link then both the secondary and subordinate - * bus number fields are set to 0. - * - * RED-PEN - * This is slightly broken because it assumes - * HT node IDs == Linux node ids, which is not always - * true. However it is probably mostly true. - */ - if (!(SECONDARY_LDT_BUS_NUMBER(ldtbus) == 0 - && SUBORDINATE_LDT_BUS_NUMBER(ldtbus) == 0)) { - for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); - j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); - j++) { - struct pci_bus *bus; - struct pci_sysdata *sd; - - long node = NODE_ID(nid); - /* Algorithm a bit dumb, but - it shouldn't matter here */ - bus = pci_find_bus(0, j); - if (!bus) - continue; - if (!node_online(node)) - node = 0; - - sd = bus->sysdata; - sd->node = node; - } + int i; + int j; + unsigned bus; + unsigned slot; + int found; + int node; + int link; + int def_node; + int def_link; + struct pci_root_info *info; + u32 reg; + struct resource *res; + size_t start; + size_t end; + struct res_range range[RANGE_NUM]; + u64 val; + u32 address; + +#ifdef CONFIG_NUMA + for (i = 0; i < BUS_NR; i++) + mp_bus_to_node[i] = -1; +#endif + + if (!early_pci_allowed()) + return -1; + + found = 0; + for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { + u32 id; + u16 device; + u16 vendor; + + bus = pci_probes[i].bus; + slot = pci_probes[i].slot; + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); + + vendor = id & 0xffff; + device = (id>>16) & 0xffff; + if (pci_probes[i].vendor == vendor && + pci_probes[i].device == device) { + found = 1; + break; + } + } + + if (!found) + return 0; + + pci_root_num = 0; + for (i = 0; i < 4; i++) { + int min_bus; + int max_bus; + reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2)); + + /* Check if that register is enabled for bus range */ + if ((reg & 7) != 3) + continue; + + min_bus = (reg >> 16) & 0xff; + max_bus = (reg >> 24) & 0xff; + node = (reg >> 4) & 0x07; +#ifdef CONFIG_NUMA + for (j = min_bus; j <= max_bus; j++) + mp_bus_to_node[j] = (unsigned char) node; +#endif + link = (reg >> 8) & 0x03; + + info = &pci_root_info[pci_root_num]; + info->bus_min = min_bus; + info->bus_max = max_bus; + info->node = node; + info->link = link; + sprintf(info->name, "PCI Bus #%02x", min_bus); + pci_root_num++; + } + + /* get the default node and link for left over res */ + reg = read_pci_config(bus, slot, 0, 0x60); + def_node = (reg >> 8) & 0x07; + reg = read_pci_config(bus, slot, 0, 0x64); + def_link = (reg >> 8) & 0x03; + + memset(range, 0, sizeof(range)); + range[0].end = 0xffff; + /* io port resource */ + for (i = 0; i < 4; i++) { + reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xfff000; + reg = read_pci_config(bus, slot, 1, 0xc4 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xfff000) | 0xfff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", + node, link, (u64)start, (u64)end); + + /* kernel only handle 16 bit only */ + if (end > 0xffff) + end = 0xffff; + update_res(info, start, end, IORESOURCE_IO, 1); + update_range(range, start, end); + } + /* add left over io port range to def node/link, [0, 0xffff] */ + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_IO, 1); + } + } + + memset(range, 0, sizeof(range)); + /* 0xfd00000000-0xffffffffff for HT */ + range[0].end = (0xfdULL<<32) - 1; + + /* need to take out [0, TOM) for RAM*/ + address = MSR_K8_TOP_MEM1; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); + if (end < (1ULL<<32)) + update_range(range, 0, end - 1); + + /* get mmconfig */ + get_pci_mmcfg_amd_fam10h_range(); + /* need to take out mmconf range */ + if (fam10h_mmconf_end) { + printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); + update_range(range, fam10h_mmconf_start, fam10h_mmconf_end); + } + + /* mmio resource */ + for (i = 0; i < 8; i++) { + reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xffffff00; /* 39:16 on 31:8*/ + start <<= 8; + reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xffffff00); + end <<= 8; + end |= 0xffff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + + printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", + node, link, (u64)start, (u64)end); + /* + * some sick allocation would have range overlap with fam10h + * mmconf range, so need to update start and end. + */ + if (fam10h_mmconf_end) { + int changed = 0; + u64 endx = 0; + if (start >= fam10h_mmconf_start && + start <= fam10h_mmconf_end) { + start = fam10h_mmconf_end + 1; + changed = 1; + } + + if (end >= fam10h_mmconf_start && + end <= fam10h_mmconf_end) { + end = fam10h_mmconf_start - 1; + changed = 1; + } + + if (start < fam10h_mmconf_start && + end > fam10h_mmconf_end) { + /* we got a hole */ + endx = fam10h_mmconf_start - 1; + update_res(info, start, endx, IORESOURCE_MEM, 0); + update_range(range, start, endx); + printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx); + start = fam10h_mmconf_end + 1; + changed = 1; + } + if (changed) { + if (start <= end) { + printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end); + } else { + printk(KERN_CONT "%s\n", endx?"":" ==> none"); + continue; + } } } + + update_res(info, start, end, IORESOURCE_MEM, 1); + update_range(range, start, end); + printk(KERN_CONT "\n"); + } + + /* need to take out [4G, TOM2) for RAM*/ + /* SYS_CFG */ + address = MSR_K8_SYSCFG; + rdmsrl(address, val); + /* TOP_MEM2 is enabled? */ + if (val & (1<<21)) { + /* TOP_MEM2 */ + address = MSR_K8_TOP_MEM2; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); + update_range(range, 1ULL<<32, end - 1); + } + + /* + * add left over mmio range to def node/link ? + * that is tricky, just record range in from start_min to 4G + */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_MEM, 1); + } + } + +#ifdef CONFIG_NUMA + for (i = 0; i < BUS_NR; i++) { + node = mp_bus_to_node[i]; + if (node >= 0) + printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); + } +#endif + + for (i = 0; i < pci_root_num; i++) { + int res_num; + int busnum; + + info = &pci_root_info[i]; + res_num = info->res_num; + busnum = info->bus_min; + printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + info->bus_min, info->bus_max, info->node, info->link); + for (j = 0; j < res_num; j++) { + res = &info->res[j]; + printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", + busnum, j, + (res->flags & IORESOURCE_IO)?"io port":"mmio", + res->start, res->end); + } } return 0; } -fs_initcall(fill_mp_bus_to_cpumask); +postcore_initcall(early_fill_mp_bus_info); diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index e041ced0ce1..a67921ce60a 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -12,6 +12,7 @@ static void __devinit pcibios_fixup_peer_bridges(void) { int n, devfn; + long node; if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) return; @@ -21,12 +22,13 @@ static void __devinit pcibios_fixup_peer_bridges(void) u32 l; if (pci_find_bus(0, n)) continue; + node = get_mp_bus_to_node(n); for (devfn = 0; devfn < 256; devfn += 8) { if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus_with_sysdata(n); + pci_scan_bus_on_node(n, &pci_root_ops, node); break; } } diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8d54df4dfaa..0cfebecf2a8 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -28,7 +28,7 @@ static int __initdata pci_mmcfg_resources_inserted; static const char __init *pci_mmcfg_e7520(void) { u32 win; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0xce, 2, &win); win = win & 0xf000; if(win == 0x0000 || win == 0xf000) @@ -53,7 +53,7 @@ static const char __init *pci_mmcfg_intel_945(void) pci_mmcfg_config_num = 1; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar); + raw_pci_ops->read(0, 0, PCI_DEVFN(0, 0), 0x48, 4, &pciexbar); /* Enable bit */ if (!(pciexbar & 1)) @@ -100,33 +100,102 @@ static const char __init *pci_mmcfg_intel_945(void) return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; } +static const char __init *pci_mmcfg_amd_fam10h(void) +{ + u32 low, high, address; + u64 base, msr; + int i; + unsigned segnbits = 0, busnbits; + + if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) + return NULL; + + address = MSR_FAM10H_MMIO_CONF_BASE; + if (rdmsr_safe(address, &low, &high)) + return NULL; + + msr = high; + msr <<= 32; + msr |= low; + + /* mmconfig is not enable */ + if (!(msr & FAM10H_MMIO_CONF_ENABLE)) + return NULL; + + base = msr & (FAM10H_MMIO_CONF_BASE_MASK<<FAM10H_MMIO_CONF_BASE_SHIFT); + + busnbits = (msr >> FAM10H_MMIO_CONF_BUSRANGE_SHIFT) & + FAM10H_MMIO_CONF_BUSRANGE_MASK; + + /* + * only handle bus 0 ? + * need to skip it + */ + if (!busnbits) + return NULL; + + if (busnbits > 8) { + segnbits = busnbits - 8; + busnbits = 8; + } + + pci_mmcfg_config_num = (1 << segnbits); + pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]) * + pci_mmcfg_config_num, GFP_KERNEL); + if (!pci_mmcfg_config) + return NULL; + + for (i = 0; i < (1 << segnbits); i++) { + pci_mmcfg_config[i].address = base + (1<<28) * i; + pci_mmcfg_config[i].pci_segment = i; + pci_mmcfg_config[i].start_bus_number = 0; + pci_mmcfg_config[i].end_bus_number = (1 << busnbits) - 1; + } + + return "AMD Family 10h NB"; +} + struct pci_mmcfg_hostbridge_probe { + u32 bus; + u32 devfn; u32 vendor; u32 device; const char *(*probe)(void); }; static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, - { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, + { 0, PCI_DEVFN(0, 0), PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, + { 0, PCI_DEVFN(0x18, 0), PCI_VENDOR_ID_AMD, + 0x1200, pci_mmcfg_amd_fam10h }, + { 0xff, PCI_DEVFN(0, 0), PCI_VENDOR_ID_AMD, + 0x1200, pci_mmcfg_amd_fam10h }, }; static int __init pci_mmcfg_check_hostbridge(void) { u32 l; + u32 bus, devfn; u16 vendor, device; int i; const char *name; - pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0, 4, &l); - vendor = l & 0xffff; - device = (l >> 16) & 0xffff; + if (!raw_pci_ops) + return 0; pci_mmcfg_config_num = 0; pci_mmcfg_config = NULL; name = NULL; for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { + bus = pci_mmcfg_probes[i].bus; + devfn = pci_mmcfg_probes[i].devfn; + raw_pci_ops->read(0, bus, devfn, 0, 4, &l); + vendor = l & 0xffff; + device = (l >> 16) & 0xffff; + if (pci_mmcfg_probes[i].vendor == vendor && pci_mmcfg_probes[i].device == device) name = pci_mmcfg_probes[i].probe(); @@ -173,9 +242,78 @@ static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) pci_mmcfg_resources_inserted = 1; } -static void __init pci_mmcfg_reject_broken(int type) +static acpi_status __init check_mcfg_resource(struct acpi_resource *res, + void *data) +{ + struct resource *mcfg_res = data; + struct acpi_resource_address64 address; + acpi_status status; + + if (res->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32) { + struct acpi_resource_fixed_memory32 *fixmem32 = + &res->data.fixed_memory32; + if (!fixmem32) + return AE_OK; + if ((mcfg_res->start >= fixmem32->address) && + (mcfg_res->end < (fixmem32->address + + fixmem32->address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + } + if ((res->type != ACPI_RESOURCE_TYPE_ADDRESS32) && + (res->type != ACPI_RESOURCE_TYPE_ADDRESS64)) + return AE_OK; + + status = acpi_resource_to_address64(res, &address); + if (ACPI_FAILURE(status) || + (address.address_length <= 0) || + (address.resource_type != ACPI_MEMORY_RANGE)) + return AE_OK; + + if ((mcfg_res->start >= address.minimum) && + (mcfg_res->end < (address.minimum + address.address_length))) { + mcfg_res->flags = 1; + return AE_CTRL_TERMINATE; + } + return AE_OK; +} + +static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, + void *context, void **rv) +{ + struct resource *mcfg_res = context; + + acpi_walk_resources(handle, METHOD_NAME__CRS, + check_mcfg_resource, context); + + if (mcfg_res->flags) + return AE_CTRL_TERMINATE; + + return AE_OK; +} + +static int __init is_acpi_reserved(unsigned long start, unsigned long end) +{ + struct resource mcfg_res; + + mcfg_res.start = start; + mcfg_res.end = end; + mcfg_res.flags = 0; + + acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL); + + if (!mcfg_res.flags) + acpi_get_devices("PNP0C02", find_mboard_resource, &mcfg_res, + NULL); + + return mcfg_res.flags; +} + +static void __init pci_mmcfg_reject_broken(int early) { typeof(pci_mmcfg_config[0]) *cfg; + int i; if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || @@ -184,51 +322,80 @@ static void __init pci_mmcfg_reject_broken(int type) cfg = &pci_mmcfg_config[0]; - /* - * Handle more broken MCFG tables on Asus etc. - * They only contain a single entry for bus 0-0. - */ - if (pci_mmcfg_config_num == 1 && - cfg->pci_segment == 0 && - (cfg->start_bus_number | cfg->end_bus_number) == 0) { - printk(KERN_ERR "PCI: start and end of bus number is 0. " - "Rejected as broken MCFG.\n"); - goto reject; + for (i = 0; i < pci_mmcfg_config_num; i++) { + int valid = 0; + u32 size = (cfg->end_bus_number + 1) << 20; + cfg = &pci_mmcfg_config[i]; + printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " + "segment %hu buses %u - %u\n", + i, (unsigned long)cfg->address, cfg->pci_segment, + (unsigned int)cfg->start_bus_number, + (unsigned int)cfg->end_bus_number); + + if (!early && + is_acpi_reserved(cfg->address, cfg->address + size - 1)) { + printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved " + "in ACPI motherboard resources\n", + cfg->address); + valid = 1; + } + + if (valid) + continue; + + if (!early) + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" + " reserved in ACPI motherboard resources\n", + cfg->address); + /* Don't try to do this check unless configuration + type 1 is available. how about type 2 ?*/ + if (raw_pci_ops && e820_all_mapped(cfg->address, + cfg->address + size - 1, + E820_RESERVED)) { + printk(KERN_NOTICE + "PCI: MCFG area at %Lx reserved in E820\n", + cfg->address); + valid = 1; + } + + if (!valid) + goto reject; } - /* - * Only do this check when type 1 works. If it doesn't work - * assume we run on a Mac and always use MCFG - */ - if (type == 1 && !e820_all_mapped(cfg->address, - cfg->address + MMCONFIG_APER_MIN, - E820_RESERVED)) { - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" - " E820-reserved\n", cfg->address); - goto reject; - } return; reject: printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + pci_mmcfg_arch_free(); kfree(pci_mmcfg_config); pci_mmcfg_config = NULL; pci_mmcfg_config_num = 0; } -void __init pci_mmcfg_init(int type) -{ - int known_bridge = 0; +static int __initdata known_bridge; +void __init __pci_mmcfg_init(int early) +{ + /* MMCONFIG disabled */ if ((pci_probe & PCI_PROBE_MMCONF) == 0) return; - if (type == 1 && pci_mmcfg_check_hostbridge()) - known_bridge = 1; + /* MMCONFIG already enabled */ + if (!early && !(pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF)) + return; + + /* for late to exit */ + if (known_bridge) + return; + + if (early) { + if (pci_mmcfg_check_hostbridge()) + known_bridge = 1; + } if (!known_bridge) { acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); - pci_mmcfg_reject_broken(type); + pci_mmcfg_reject_broken(early); } if ((pci_mmcfg_config_num == 0) || @@ -249,6 +416,16 @@ void __init pci_mmcfg_init(int type) } } +void __init pci_mmcfg_early_init(void) +{ + __pci_mmcfg_init(1); +} + +void __init pci_mmcfg_late_init(void) +{ + __pci_mmcfg_init(0); +} + static int __init pci_mmcfg_late_insert_resources(void) { /* diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 081816ada05..f3c761dce69 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -136,3 +136,7 @@ int __init pci_mmcfg_arch_init(void) raw_pci_ext_ops = &pci_mmcfg; return 1; } + +void __init pci_mmcfg_arch_free(void) +{ +} diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 9207fd49233..a1994163c99 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -127,7 +127,7 @@ static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) int __init pci_mmcfg_arch_init(void) { int i; - pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * + pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); if (pci_mmcfg_virt == NULL) { printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); @@ -141,9 +141,29 @@ int __init pci_mmcfg_arch_init(void) printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " "segment %d\n", pci_mmcfg_config[i].pci_segment); + pci_mmcfg_arch_free(); return 0; } } raw_pci_ext_ops = &pci_mmcfg; return 1; } + +void __init pci_mmcfg_arch_free(void) +{ + int i; + + if (pci_mmcfg_virt == NULL) + return; + + for (i = 0; i < pci_mmcfg_config_num; ++i) { + if (pci_mmcfg_virt[i].virt) { + iounmap(pci_mmcfg_virt[i].virt); + pci_mmcfg_virt[i].virt = NULL; + pci_mmcfg_virt[i].cfg = NULL; + } + } + + kfree(pci_mmcfg_virt); + pci_mmcfg_virt = NULL; +} diff --git a/arch/x86/pci/mp_bus_to_node.c b/arch/x86/pci/mp_bus_to_node.c new file mode 100644 index 00000000000..022943999b8 --- /dev/null +++ b/arch/x86/pci/mp_bus_to_node.c @@ -0,0 +1,23 @@ +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/topology.h> + +#define BUS_NR 256 + +static unsigned char mp_bus_to_node[BUS_NR]; + +void set_mp_bus_to_node(int busnum, int node) +{ + if (busnum >= 0 && busnum < BUS_NR) + mp_bus_to_node[busnum] = (unsigned char) node; +} + +int get_mp_bus_to_node(int busnum) +{ + int node; + + if (busnum < 0 || busnum > (BUS_NR - 1)) + return 0; + node = mp_bus_to_node[busnum]; + return node; +} diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c new file mode 100644 index 00000000000..5e7636558c0 --- /dev/null +++ b/arch/x86/pci/olpc.c @@ -0,0 +1,313 @@ +/* + * Low-level PCI config space access for OLPC systems who lack the VSA + * PCI virtualization software. + * + * Copyright © 2006 Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * The AMD Geode chipset (ie: GX2 processor, cs5536 I/O companion device) + * has some I/O functions (display, southbridge, sound, USB HCIs, etc) + * that more or less behave like PCI devices, but the hardware doesn't + * directly implement the PCI configuration space headers. AMD provides + * "VSA" (Virtual System Architecture) software that emulates PCI config + * space for these devices, by trapping I/O accesses to PCI config register + * (CF8/CFC) and running some code in System Management Mode interrupt state. + * On the OLPC platform, we don't want to use that VSA code because + * (a) it slows down suspend/resume, and (b) recompiling it requires special + * compilers that are hard to get. So instead of letting the complex VSA + * code simulate the PCI config registers for the on-chip devices, we + * just simulate them the easy way, by inserting the code into the + * pci_write_config and pci_read_config path. Most of the config registers + * are read-only anyway, so the bulk of the simulation is just table lookup. + */ + +#include <linux/pci.h> +#include <linux/init.h> +#include <asm/olpc.h> +#include <asm/geode.h> +#include "pci.h" + +/* + * In the tables below, the first two line (8 longwords) are the + * size masks that are used when the higher level PCI code determines + * the size of the region by writing ~0 to a base address register + * and reading back the result. + * + * The following lines are the values that are read during normal + * PCI config access cycles, i.e. not after just having written + * ~0 to a base address register. + */ + +static const uint32_t lxnb_hdr[] = { /* dev 1 function 0 - devfn = 8 */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x281022, 0x2200005, 0x6000021, 0x80f808, /* AMD Vendor ID */ + 0x0, 0x0, 0x0, 0x0, /* No virtual registers, hence no BAR */ + 0x0, 0x0, 0x0, 0x28100b, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t gxnb_hdr[] = { /* dev 1 function 0 - devfn = 8 */ + 0xfffffffd, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x28100b, 0x2200005, 0x6000021, 0x80f808, /* NSC Vendor ID */ + 0xac1d, 0x0, 0x0, 0x0, /* I/O BAR - base of virtual registers */ + 0x0, 0x0, 0x0, 0x28100b, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t lxfb_hdr[] = { /* dev 1 function 1 - devfn = 9 */ + 0xff000008, 0xffffc000, 0xffffc000, 0xffffc000, + 0xffffc000, 0x0, 0x0, 0x0, + + 0x20811022, 0x2200003, 0x3000000, 0x0, /* AMD Vendor ID */ + 0xfd000000, 0xfe000000, 0xfe004000, 0xfe008000, /* FB, GP, VG, DF */ + 0xfe00c000, 0x0, 0x0, 0x30100b, /* VIP */ + 0x0, 0x0, 0x0, 0x10e, /* INTA, IRQ14 for graphics accel */ + 0x0, 0x0, 0x0, 0x0, + 0x3d0, 0x3c0, 0xa0000, 0x0, /* VG IO, VG IO, EGA FB, MONO FB */ + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t gxfb_hdr[] = { /* dev 1 function 1 - devfn = 9 */ + 0xff800008, 0xffffc000, 0xffffc000, 0xffffc000, + 0x0, 0x0, 0x0, 0x0, + + 0x30100b, 0x2200003, 0x3000000, 0x0, /* NSC Vendor ID */ + 0xfd000000, 0xfe000000, 0xfe004000, 0xfe008000, /* FB, GP, VG, DF */ + 0x0, 0x0, 0x0, 0x30100b, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x3d0, 0x3c0, 0xa0000, 0x0, /* VG IO, VG IO, EGA FB, MONO FB */ + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t aes_hdr[] = { /* dev 1 function 2 - devfn = 0xa */ + 0xffffc000, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20821022, 0x2a00006, 0x10100000, 0x8, /* NSC Vendor ID */ + 0xfe010000, 0x0, 0x0, 0x0, /* AES registers */ + 0x0, 0x0, 0x0, 0x20821022, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + + +static const uint32_t isa_hdr[] = { /* dev f function 0 - devfn = 78 */ + 0xfffffff9, 0xffffff01, 0xffffffc1, 0xffffffe1, + 0xffffff81, 0xffffffc1, 0x0, 0x0, + + 0x20901022, 0x2a00049, 0x6010003, 0x802000, + 0x18b1, 0x1001, 0x1801, 0x1881, /* SMB-8 GPIO-256 MFGPT-64 IRQ-32 */ + 0x1401, 0x1841, 0x0, 0x20901022, /* PMS-128 ACPI-64 */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0xaa5b, /* IRQ steering */ + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t ac97_hdr[] = { /* dev f function 3 - devfn = 7b */ + 0xffffff81, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20931022, 0x2a00041, 0x4010001, 0x0, + 0x1481, 0x0, 0x0, 0x0, /* I/O BAR-128 */ + 0x0, 0x0, 0x0, 0x20931022, + 0x0, 0x0, 0x0, 0x205, /* IntB, IRQ5 */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t ohci_hdr[] = { /* dev f function 4 - devfn = 7c */ + 0xfffff000, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20941022, 0x2300006, 0xc031002, 0x0, + 0xfe01a000, 0x0, 0x0, 0x0, /* MEMBAR-1000 */ + 0x0, 0x0, 0x0, 0x20941022, + 0x0, 0x40, 0x0, 0x40a, /* CapPtr INT-D, IRQA */ + 0xc8020001, 0x0, 0x0, 0x0, /* Capabilities - 40 is R/O, + 44 is mask 8103 (power control) */ + 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, +}; + +static const uint32_t ehci_hdr[] = { /* dev f function 4 - devfn = 7d */ + 0xfffff000, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, + + 0x20951022, 0x2300006, 0xc032002, 0x0, + 0xfe01b000, 0x0, 0x0, 0x0, /* MEMBAR-1000 */ + 0x0, 0x0, 0x0, 0x20951022, + 0x0, 0x40, 0x0, 0x40a, /* CapPtr INT-D, IRQA */ + 0xc8020001, 0x0, 0x0, 0x0, /* Capabilities - 40 is R/O, 44 is + mask 8103 (power control) */ +#if 0 + 0x1, 0x40080000, 0x0, 0x0, /* EECP - see EHCI spec section 2.1.7 */ +#endif + 0x01000001, 0x0, 0x0, 0x0, /* EECP - see EHCI spec section 2.1.7 */ + 0x2020, 0x0, 0x0, 0x0, /* (EHCI page 8) 60 SBRN (R/O), + 61 FLADJ (R/W), PORTWAKECAP */ +}; + +static uint32_t ff_loc = ~0; +static uint32_t zero_loc; +static int bar_probing; /* Set after a write of ~0 to a BAR */ +static int is_lx; + +#define NB_SLOT 0x1 /* Northbridge - GX chip - Device 1 */ +#define SB_SLOT 0xf /* Southbridge - CS5536 chip - Device F */ + +static int is_simulated(unsigned int bus, unsigned int devfn) +{ + return (!bus && ((PCI_SLOT(devfn) == NB_SLOT) || + (PCI_SLOT(devfn) == SB_SLOT))); +} + +static uint32_t *hdr_addr(const uint32_t *hdr, int reg) +{ + uint32_t addr; + + /* + * This is a little bit tricky. The header maps consist of + * 0x20 bytes of size masks, followed by 0x70 bytes of header data. + * In the normal case, when not probing a BAR's size, we want + * to access the header data, so we add 0x20 to the reg offset, + * thus skipping the size mask area. + * In the BAR probing case, we want to access the size mask for + * the BAR, so we subtract 0x10 (the config header offset for + * BAR0), and don't skip the size mask area. + */ + + addr = (uint32_t)hdr + reg + (bar_probing ? -0x10 : 0x20); + + bar_probing = 0; + return (uint32_t *)addr; +} + +static int pci_olpc_read(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, uint32_t *value) +{ + uint32_t *addr; + + /* Use the hardware mechanism for non-simulated devices */ + if (!is_simulated(bus, devfn)) + return pci_direct_conf1.read(seg, bus, devfn, reg, len, value); + + /* + * No device has config registers past 0x70, so we save table space + * by not storing entries for the nonexistent registers + */ + if (reg >= 0x70) + addr = &zero_loc; + else { + switch (devfn) { + case 0x8: + addr = hdr_addr(is_lx ? lxnb_hdr : gxnb_hdr, reg); + break; + case 0x9: + addr = hdr_addr(is_lx ? lxfb_hdr : gxfb_hdr, reg); + break; + case 0xa: + addr = is_lx ? hdr_addr(aes_hdr, reg) : &ff_loc; + break; + case 0x78: + addr = hdr_addr(isa_hdr, reg); + break; + case 0x7b: + addr = hdr_addr(ac97_hdr, reg); + break; + case 0x7c: + addr = hdr_addr(ohci_hdr, reg); + break; + case 0x7d: + addr = hdr_addr(ehci_hdr, reg); + break; + default: + addr = &ff_loc; + break; + } + } + switch (len) { + case 1: + *value = *(uint8_t *)addr; + break; + case 2: + *value = *(uint16_t *)addr; + break; + case 4: + *value = *addr; + break; + default: + BUG(); + } + + return 0; +} + +static int pci_olpc_write(unsigned int seg, unsigned int bus, + unsigned int devfn, int reg, int len, uint32_t value) +{ + /* Use the hardware mechanism for non-simulated devices */ + if (!is_simulated(bus, devfn)) + return pci_direct_conf1.write(seg, bus, devfn, reg, len, value); + + /* XXX we may want to extend this to simulate EHCI power management */ + + /* + * Mostly we just discard writes, but if the write is a size probe + * (i.e. writing ~0 to a BAR), we remember it and arrange to return + * the appropriate size mask on the next read. This is cheating + * to some extent, because it depends on the fact that the next + * access after such a write will always be a read to the same BAR. + */ + + if ((reg >= 0x10) && (reg < 0x2c)) { + /* write is to a BAR */ + if (value == ~0) + bar_probing = 1; + } else { + /* + * No warning on writes to ROM BAR, CMD, LATENCY_TIMER, + * CACHE_LINE_SIZE, or PM registers. + */ + if ((reg != PCI_ROM_ADDRESS) && (reg != PCI_COMMAND_MASTER) && + (reg != PCI_LATENCY_TIMER) && + (reg != PCI_CACHE_LINE_SIZE) && (reg != 0x44)) + printk(KERN_WARNING "OLPC PCI: Config write to devfn" + " %x reg %x value %x\n", devfn, reg, value); + } + + return 0; +} + +static struct pci_raw_ops pci_olpc_conf = { + .read = pci_olpc_read, + .write = pci_olpc_write, +}; + +void __init pci_olpc_init(void) +{ + if (!machine_is_olpc() || olpc_has_vsa()) + return; + + printk(KERN_INFO "PCI: Using configuration type OLPC\n"); + raw_pci_ops = &pci_olpc_conf; + is_lx = is_geode_lx(); +} diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index c4bddaeff61..c58805a92db 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -26,6 +26,7 @@ #define PCI_ASSIGN_ALL_BUSSES 0x4000 #define PCI_CAN_SKIP_ISA_ALIGN 0x8000 #define PCI_USE__CRS 0x10000 +#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; @@ -97,11 +98,12 @@ extern struct pci_raw_ops pci_direct_conf1; extern int pci_direct_probe(void); extern void pci_direct_init(int type); extern void pci_pcbios_init(void); -extern void pci_mmcfg_init(int type); +extern void pci_olpc_init(void); /* pci-mmconfig.c */ extern int __init pci_mmcfg_arch_init(void); +extern void __init pci_mmcfg_arch_free(void); /* * AMD Fam10h CPUs are buggy, and cannot access MMIO config space diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index ef63adadf7f..070ff8af3a2 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -19,12 +19,11 @@ #include <linux/thread_info.h> #include <linux/ptrace.h> #include <linux/mm.h> +#include <linux/kbuild.h> #include <asm/ptrace.h> #include <asm/uaccess.h> -#define DEFINE(sym, val) asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - int main(void) { /* struct pt_regs */ diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 55c5f1fc4f1..66e55288178 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -53,7 +53,7 @@ EXPORT_SYMBOL(blk_queue_ordered); /* * Cache flushing for ordered writes handling */ -inline unsigned blk_ordered_cur_seq(struct request_queue *q) +unsigned blk_ordered_cur_seq(struct request_queue *q) { if (!q->ordseq) return 0; @@ -143,10 +143,8 @@ static void queue_flush(struct request_queue *q, unsigned which) end_io = post_flush_end_io; } + blk_rq_init(q, rq); rq->cmd_flags = REQ_HARDBARRIER; - rq_init(q, rq); - rq->elevator_private = NULL; - rq->elevator_private2 = NULL; rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; q->prepare_flush_fn(q, rq); @@ -167,14 +165,11 @@ static inline struct request *start_ordered(struct request_queue *q, blkdev_dequeue_request(rq); q->orig_bar_rq = rq; rq = &q->bar_rq; - rq->cmd_flags = 0; - rq_init(q, rq); + blk_rq_init(q, rq); if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) rq->cmd_flags |= REQ_RW; if (q->ordered & QUEUE_ORDERED_FUA) rq->cmd_flags |= REQ_FUA; - rq->elevator_private = NULL; - rq->elevator_private2 = NULL; init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; diff --git a/block/blk-core.c b/block/blk-core.c index 2a438a93f72..5d09f8c5602 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -107,41 +107,21 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) } EXPORT_SYMBOL(blk_get_backing_dev_info); -/* - * We can't just memset() the structure, since the allocation path - * already stored some information in the request. - */ -void rq_init(struct request_queue *q, struct request *rq) +void blk_rq_init(struct request_queue *q, struct request *rq) { + memset(rq, 0, sizeof(*rq)); + INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); rq->q = q; rq->sector = rq->hard_sector = (sector_t) -1; - rq->nr_sectors = rq->hard_nr_sectors = 0; - rq->current_nr_sectors = rq->hard_cur_sectors = 0; - rq->bio = rq->biotail = NULL; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); - rq->rq_disk = NULL; - rq->nr_phys_segments = 0; - rq->nr_hw_segments = 0; - rq->ioprio = 0; - rq->special = NULL; - rq->buffer = NULL; + rq->cmd = rq->__cmd; rq->tag = -1; - rq->errors = 0; rq->ref_count = 1; - rq->cmd_len = 0; - memset(rq->cmd, 0, sizeof(rq->cmd)); - rq->data_len = 0; - rq->extra_len = 0; - rq->sense_len = 0; - rq->data = NULL; - rq->sense = NULL; - rq->end_io = NULL; - rq->end_io_data = NULL; - rq->next_rq = NULL; } +EXPORT_SYMBOL(blk_rq_init); static void req_bio_endio(struct request *rq, struct bio *bio, unsigned int nbytes, int error) @@ -194,7 +174,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg) if (blk_pc_request(rq)) { printk(KERN_INFO " cdb: "); - for (bit = 0; bit < sizeof(rq->cmd); bit++) + for (bit = 0; bit < BLK_MAX_CDB; bit++) printk("%02x ", rq->cmd[bit]); printk("\n"); } @@ -220,7 +200,8 @@ void blk_plug_device(struct request_queue *q) if (blk_queue_stopped(q)) return; - if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { + __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); } @@ -235,9 +216,10 @@ int blk_remove_plug(struct request_queue *q) { WARN_ON(!irqs_disabled()); - if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; + queue_flag_clear(QUEUE_FLAG_PLUGGED, q); del_timer(&q->unplug_timer); return 1; } @@ -333,15 +315,16 @@ void blk_start_queue(struct request_queue *q) { WARN_ON(!irqs_disabled()); - clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_STOPPED, q); /* * one level of recursion is ok and is much faster than kicking * the unplug handling */ - if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + queue_flag_set(QUEUE_FLAG_REENTER, q); q->request_fn(q); - clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_REENTER, q); } else { blk_plug_device(q); kblockd_schedule_work(&q->unplug_work); @@ -366,7 +349,7 @@ EXPORT_SYMBOL(blk_start_queue); void blk_stop_queue(struct request_queue *q) { blk_remove_plug(q); - set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_STOPPED, q); } EXPORT_SYMBOL(blk_stop_queue); @@ -395,11 +378,8 @@ EXPORT_SYMBOL(blk_sync_queue); * blk_run_queue - run a single device queue * @q: The queue to run */ -void blk_run_queue(struct request_queue *q) +void __blk_run_queue(struct request_queue *q) { - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); blk_remove_plug(q); /* @@ -407,15 +387,28 @@ void blk_run_queue(struct request_queue *q) * handling reinvoke the handler shortly if we already got there. */ if (!elv_queue_empty(q)) { - if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + queue_flag_set(QUEUE_FLAG_REENTER, q); q->request_fn(q); - clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_REENTER, q); } else { blk_plug_device(q); kblockd_schedule_work(&q->unplug_work); } } +} +EXPORT_SYMBOL(__blk_run_queue); + +/** + * blk_run_queue - run a single device queue + * @q: The queue to run + */ +void blk_run_queue(struct request_queue *q) +{ + unsigned long flags; + spin_lock_irqsave(q->queue_lock, flags); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); @@ -428,7 +421,7 @@ void blk_put_queue(struct request_queue *q) void blk_cleanup_queue(struct request_queue *q) { mutex_lock(&q->sysfs_lock); - set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); + queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); mutex_unlock(&q->sysfs_lock); if (q->elevator) @@ -607,6 +600,8 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) if (!rq) return NULL; + blk_rq_init(q, rq); + /* * first three bits are identical in rq->cmd_flags and bio->bi_rw, * see bio.h and blkdev.h @@ -789,8 +784,6 @@ rq_starved: if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; - rq_init(q, rq); - blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); out: return rq; diff --git a/block/blk-map.c b/block/blk-map.c index 3c942bd6422..0b1af5a3537 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -255,10 +255,18 @@ EXPORT_SYMBOL(blk_rq_unmap_user); * @kbuf: the kernel buffer * @len: length of user data * @gfp_mask: memory allocation flags + * + * Description: + * Data will be mapped directly if possible. Otherwise a bounce + * buffer is used. */ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, unsigned int len, gfp_t gfp_mask) { + unsigned long kaddr; + unsigned int alignment; + int reading = rq_data_dir(rq) == READ; + int do_copy = 0; struct bio *bio; if (len > (q->max_hw_sectors << 9)) @@ -266,13 +274,24 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (!len || !kbuf) return -EINVAL; - bio = bio_map_kern(q, kbuf, len, gfp_mask); + kaddr = (unsigned long)kbuf; + alignment = queue_dma_alignment(q) | q->dma_pad_mask; + do_copy = ((kaddr & alignment) || (len & alignment)); + + if (do_copy) + bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); + else + bio = bio_map_kern(q, kbuf, len, gfp_mask); + if (IS_ERR(bio)) return PTR_ERR(bio); if (rq_data_dir(rq) == WRITE) bio->bi_rw |= (1 << BIO_RW); + if (do_copy) + rq->cmd_flags |= REQ_COPY_USER; + blk_rq_bio_prep(q, rq, bio); blk_queue_bounce(q, &rq->bio); rq->buffer = rq->data = NULL; diff --git a/block/blk-merge.c b/block/blk-merge.c index b5c5c4a9e3f..73b23562af2 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -55,7 +55,7 @@ void blk_recalc_rq_segments(struct request *rq) if (!rq->bio) return; - cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); + cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); hw_seg_size = seg_size = 0; phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; rq_for_each_segment(bv, rq, iter) { @@ -128,7 +128,7 @@ EXPORT_SYMBOL(blk_recount_segments); static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, struct bio *nxt) { - if (!(q->queue_flags & (1 << QUEUE_FLAG_CLUSTER))) + if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) return 0; if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) @@ -175,7 +175,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, int nsegs, cluster; nsegs = 0; - cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); + cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); /* * for each bio in rq diff --git a/block/blk-settings.c b/block/blk-settings.c index 5713f7e5cbd..6089384ab06 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -14,7 +14,6 @@ unsigned long blk_max_low_pfn; EXPORT_SYMBOL(blk_max_low_pfn); unsigned long blk_max_pfn; -EXPORT_SYMBOL(blk_max_pfn); /** * blk_queue_prep_rq - set a prepare_request function for queue @@ -288,7 +287,7 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) t->max_segment_size = min(t->max_segment_size, b->max_segment_size); t->hardsect_size = max(t->hardsect_size, b->hardsect_size); if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) - clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); + queue_flag_clear(QUEUE_FLAG_CLUSTER, t); } EXPORT_SYMBOL(blk_queue_stack_limits); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index fc41d83be22..e85c4013e8a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -135,6 +135,25 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_hw_sectors_kb, (page)); } +static ssize_t queue_nomerges_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_nomerges(q), page); +} + +static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long nm; + ssize_t ret = queue_var_store(&nm, page, count); + + if (nm) + set_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags); + else + clear_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags); + + return ret; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, @@ -170,6 +189,12 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { .show = queue_hw_sector_size_show, }; +static struct queue_sysfs_entry queue_nomerges_entry = { + .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR }, + .show = queue_nomerges_show, + .store = queue_nomerges_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -177,6 +202,7 @@ static struct attribute *default_attrs[] = { &queue_max_sectors_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, + &queue_nomerges_entry.attr, NULL, }; diff --git a/block/blk-tag.c b/block/blk-tag.c index 4780a46ce23..e176ddbe599 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -70,7 +70,7 @@ void __blk_queue_free_tags(struct request_queue *q) __blk_free_tags(bqt); q->queue_tags = NULL; - q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED); + queue_flag_clear(QUEUE_FLAG_QUEUED, q); } /** @@ -98,7 +98,7 @@ EXPORT_SYMBOL(blk_free_tags); **/ void blk_queue_free_tags(struct request_queue *q) { - clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_QUEUED, q); } EXPORT_SYMBOL(blk_queue_free_tags); @@ -188,7 +188,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth, rc = blk_queue_resize_tags(q, depth); if (rc) return rc; - set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_QUEUED, q); return 0; } else atomic_inc(&tags->refcnt); @@ -197,7 +197,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth, * assign it, all done */ q->queue_tags = tags; - q->queue_flags |= (1 << QUEUE_FLAG_QUEUED); + queue_flag_set(QUEUE_FLAG_QUEUED, q); INIT_LIST_HEAD(&q->tag_busy_list); return 0; fail: diff --git a/block/blk.h b/block/blk.h index ec9120fb789..59776ab4742 100644 --- a/block/blk.h +++ b/block/blk.h @@ -10,7 +10,6 @@ extern struct kmem_cache *blk_requestq_cachep; extern struct kobj_type blk_queue_ktype; -void rq_init(struct request_queue *q, struct request *rq); void init_request_from_bio(struct request *req, struct bio *bio); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); diff --git a/block/elevator.c b/block/elevator.c index 88318c38360..ac5310ef827 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -69,7 +69,7 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) /* * can we safely merge with this request? */ -inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) +int elv_rq_merge_ok(struct request *rq, struct bio *bio) { if (!rq_mergeable(rq)) return 0; @@ -488,6 +488,9 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) } } + if (blk_queue_nomerges(q)) + return ELEVATOR_NO_MERGE; + /* * See if our hash lookup can find a potential backmerge. */ @@ -1070,7 +1073,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) */ spin_lock_irq(q->queue_lock); - set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); elv_drain_elevator(q); @@ -1104,7 +1107,10 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) * finally exit old elevator and turn off BYPASS. */ elevator_exit(old_elevator); - clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + spin_lock_irq(q->queue_lock); + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); + spin_unlock_irq(q->queue_lock); + return 1; fail_register: @@ -1115,7 +1121,11 @@ fail_register: elevator_exit(e); q->elevator = old_elevator; elv_register_queue(q); - clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); + + spin_lock_irq(q->queue_lock); + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); + spin_unlock_irq(q->queue_lock); + return 0; } diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index a2c3a936ebf..ffa3720e6ca 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -217,8 +217,6 @@ EXPORT_SYMBOL_GPL(blk_verify_command); static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, struct sg_io_hdr *hdr, int has_write_perm) { - memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ - if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; if (blk_verify_command(rq->cmd, has_write_perm)) @@ -531,7 +529,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, rq->data_len = 0; rq->extra_len = 0; rq->timeout = BLK_DEFAULT_SG_TIMEOUT; - memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = cmd; rq->cmd[4] = data; rq->cmd_len = 6; diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index 43a95e5640d..5b73f6a2cd8 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -92,6 +92,7 @@ struct acpi_ac { #ifdef CONFIG_ACPI_PROCFS_POWER static const struct file_operations acpi_ac_fops = { + .owner = THIS_MODULE, .open = acpi_ac_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -195,16 +196,11 @@ static int acpi_ac_add_fs(struct acpi_device *device) } /* 'state' [R] */ - entry = create_proc_entry(ACPI_AC_FILE_STATE, - S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data(ACPI_AC_FILE_STATE, + S_IRUGO, acpi_device_dir(device), + &acpi_ac_fops, acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_ac_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } - return 0; } diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index d5729d5dc19..b1c723f9f58 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -741,15 +741,13 @@ static int acpi_battery_add_fs(struct acpi_device *device) } for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) { - entry = create_proc_entry(acpi_battery_file[i].name, - acpi_battery_file[i].mode, acpi_device_dir(device)); + entry = proc_create_data(acpi_battery_file[i].name, + acpi_battery_file[i].mode, + acpi_device_dir(device), + &acpi_battery_file[i].ops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_battery_file[i].ops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } } return 0; } diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 2d1955c1183..a6dbcf4d9ef 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -35,6 +35,7 @@ #ifdef CONFIG_X86 #include <asm/mpspec.h> #endif +#include <linux/pci.h> #include <acpi/acpi_bus.h> #include <acpi/acpi_drivers.h> @@ -784,6 +785,7 @@ static int __init acpi_init(void) result = acpi_bus_init(); if (!result) { + pci_mmcfg_late_init(); if (!(pm_flags & PM_APM)) pm_flags |= PM_ACPI; else { diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 6c5da83cdb6..1dfec413588 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -102,6 +102,7 @@ struct acpi_button { }; static const struct file_operations acpi_button_info_fops = { + .owner = THIS_MODULE, .open = acpi_button_info_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -109,6 +110,7 @@ static const struct file_operations acpi_button_info_fops = { }; static const struct file_operations acpi_button_state_fops = { + .owner = THIS_MODULE, .open = acpi_button_state_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -207,27 +209,21 @@ static int acpi_button_add_fs(struct acpi_device *device) acpi_device_dir(device)->owner = THIS_MODULE; /* 'info' [R] */ - entry = create_proc_entry(ACPI_BUTTON_FILE_INFO, - S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data(ACPI_BUTTON_FILE_INFO, + S_IRUGO, acpi_device_dir(device), + &acpi_button_info_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_button_info_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* show lid state [R] */ if (button->type == ACPI_BUTTON_TYPE_LID) { - entry = create_proc_entry(ACPI_BUTTON_FILE_STATE, - S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data(ACPI_BUTTON_FILE_STATE, + S_IRUGO, acpi_device_dir(device), + &acpi_button_state_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_button_state_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } } return 0; diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 7222a18a031..e3f04b272f3 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -669,16 +669,11 @@ static int acpi_ec_add_fs(struct acpi_device *device) return -ENODEV; } - entry = create_proc_entry(ACPI_EC_FILE_INFO, S_IRUGO, - acpi_device_dir(device)); + entry = proc_create_data(ACPI_EC_FILE_INFO, S_IRUGO, + acpi_device_dir(device), + &acpi_ec_info_ops, acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_ec_info_ops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } - return 0; } diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c index abec1ca94cf..0c24bd4d656 100644 --- a/drivers/acpi/event.c +++ b/drivers/acpi/event.c @@ -102,6 +102,7 @@ static unsigned int acpi_system_poll_event(struct file *file, poll_table * wait) } static const struct file_operations acpi_system_event_ops = { + .owner = THIS_MODULE, .open = acpi_system_open_event, .read = acpi_system_read_event, .release = acpi_system_close_event, @@ -294,10 +295,9 @@ static int __init acpi_event_init(void) #ifdef CONFIG_ACPI_PROC_EVENT /* 'event' [R] */ - entry = create_proc_entry("event", S_IRUSR, acpi_root_dir); - if (entry) - entry->proc_fops = &acpi_system_event_ops; - else + entry = proc_create("event", S_IRUSR, acpi_root_dir, + &acpi_system_event_ops); + if (!entry) return -ENODEV; #endif diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index c8e3cba423e..194077ab9b8 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -192,17 +192,13 @@ static int acpi_fan_add_fs(struct acpi_device *device) } /* 'status' [R/W] */ - entry = create_proc_entry(ACPI_FAN_FILE_STATE, - S_IFREG | S_IRUGO | S_IWUSR, - acpi_device_dir(device)); + entry = proc_create_data(ACPI_FAN_FILE_STATE, + S_IFREG | S_IRUGO | S_IWUSR, + acpi_device_dir(device), + &acpi_fan_state_ops, + device); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_fan_state_ops; - entry->data = device; - entry->owner = THIS_MODULE; - } - return 0; } diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 76bf6d90c70..21fc8bf0d31 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -93,6 +93,7 @@ struct acpi_power_resource { static struct list_head acpi_power_resource_list; static const struct file_operations acpi_power_fops = { + .owner = THIS_MODULE, .open = acpi_power_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -543,15 +544,11 @@ static int acpi_power_add_fs(struct acpi_device *device) } /* 'status' [R] */ - entry = create_proc_entry(ACPI_POWER_FILE_STATUS, - S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data(ACPI_POWER_FILE_STATUS, + S_IRUGO, acpi_device_dir(device), + &acpi_power_fops, acpi_driver_data(device)); if (!entry) return -EIO; - else { - entry->proc_fops = &acpi_power_fops; - entry->data = acpi_driver_data(device); - } - return 0; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index a825b431b64..dd28c912e84 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -112,6 +112,7 @@ static struct acpi_driver acpi_processor_driver = { #define UNINSTALL_NOTIFY_HANDLER 2 static const struct file_operations acpi_processor_info_fops = { + .owner = THIS_MODULE, .open = acpi_processor_info_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -326,40 +327,30 @@ static int acpi_processor_add_fs(struct acpi_device *device) acpi_device_dir(device)->owner = THIS_MODULE; /* 'info' [R] */ - entry = create_proc_entry(ACPI_PROCESSOR_FILE_INFO, - S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data(ACPI_PROCESSOR_FILE_INFO, + S_IRUGO, acpi_device_dir(device), + &acpi_processor_info_fops, + acpi_driver_data(device)); if (!entry) return -EIO; - else { - entry->proc_fops = &acpi_processor_info_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'throttling' [R/W] */ - entry = create_proc_entry(ACPI_PROCESSOR_FILE_THROTTLING, - S_IFREG | S_IRUGO | S_IWUSR, - acpi_device_dir(device)); + entry = proc_create_data(ACPI_PROCESSOR_FILE_THROTTLING, + S_IFREG | S_IRUGO | S_IWUSR, + acpi_device_dir(device), + &acpi_processor_throttling_fops, + acpi_driver_data(device)); if (!entry) return -EIO; - else { - entry->proc_fops = &acpi_processor_throttling_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'limit' [R/W] */ - entry = create_proc_entry(ACPI_PROCESSOR_FILE_LIMIT, - S_IFREG | S_IRUGO | S_IWUSR, - acpi_device_dir(device)); + entry = proc_create_data(ACPI_PROCESSOR_FILE_LIMIT, + S_IFREG | S_IRUGO | S_IWUSR, + acpi_device_dir(device), + &acpi_processor_limit_fops, + acpi_driver_data(device)); if (!entry) return -EIO; - else { - entry->proc_fops = &acpi_processor_limit_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } - return 0; } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 0d90ff5fd11..789d4947ed3 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1282,6 +1282,7 @@ static int acpi_processor_power_open_fs(struct inode *inode, struct file *file) } static const struct file_operations acpi_processor_power_fops = { + .owner = THIS_MODULE, .open = acpi_processor_power_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -1822,16 +1823,12 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, } /* 'power' [R] */ - entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, - S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data(ACPI_PROCESSOR_FILE_POWER, + S_IRUGO, acpi_device_dir(device), + &acpi_processor_power_fops, + acpi_driver_data(device)); if (!entry) return -EIO; - else { - entry->proc_fops = &acpi_processor_power_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } - return 0; } diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index b477a4be8a6..d80b2d1441a 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -411,6 +411,7 @@ EXPORT_SYMBOL(acpi_processor_notify_smm); static int acpi_processor_perf_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_processor_perf_fops = { + .owner = THIS_MODULE, .open = acpi_processor_perf_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -456,7 +457,6 @@ static int acpi_processor_perf_open_fs(struct inode *inode, struct file *file) static void acpi_cpufreq_add_file(struct acpi_processor *pr) { - struct proc_dir_entry *entry = NULL; struct acpi_device *device = NULL; @@ -464,14 +464,9 @@ static void acpi_cpufreq_add_file(struct acpi_processor *pr) return; /* add file 'performance' [R/W] */ - entry = create_proc_entry(ACPI_PROCESSOR_FILE_PERFORMANCE, - S_IFREG | S_IRUGO, - acpi_device_dir(device)); - if (entry){ - entry->proc_fops = &acpi_processor_perf_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } + proc_create_data(ACPI_PROCESSOR_FILE_PERFORMANCE, S_IFREG | S_IRUGO, + acpi_device_dir(device), + &acpi_processor_perf_fops, acpi_driver_data(device)); return; } diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 649ae99b921..ef34b18f95c 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -509,6 +509,7 @@ static ssize_t acpi_processor_write_limit(struct file * file, } struct file_operations acpi_processor_limit_fops = { + .owner = THIS_MODULE, .open = acpi_processor_limit_open_fs, .read = seq_read, .write = acpi_processor_write_limit, diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 0bba3a914e8..bb06738860c 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -1252,6 +1252,7 @@ static ssize_t acpi_processor_write_throttling(struct file *file, } struct file_operations acpi_processor_throttling_fops = { + .owner = THIS_MODULE, .open = acpi_processor_throttling_open_fs, .read = seq_read, .write = acpi_processor_write_throttling, diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 585ae3c9c8e..10a36512647 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -483,8 +483,6 @@ acpi_sbs_add_fs(struct proc_dir_entry **dir, struct file_operations *state_fops, struct file_operations *alarm_fops, void *data) { - struct proc_dir_entry *entry = NULL; - if (!*dir) { *dir = proc_mkdir(dir_name, parent_dir); if (!*dir) { @@ -494,34 +492,19 @@ acpi_sbs_add_fs(struct proc_dir_entry **dir, } /* 'info' [R] */ - if (info_fops) { - entry = create_proc_entry(ACPI_SBS_FILE_INFO, S_IRUGO, *dir); - if (entry) { - entry->proc_fops = info_fops; - entry->data = data; - entry->owner = THIS_MODULE; - } - } + if (info_fops) + proc_create_data(ACPI_SBS_FILE_INFO, S_IRUGO, *dir, + info_fops, data); /* 'state' [R] */ - if (state_fops) { - entry = create_proc_entry(ACPI_SBS_FILE_STATE, S_IRUGO, *dir); - if (entry) { - entry->proc_fops = state_fops; - entry->data = data; - entry->owner = THIS_MODULE; - } - } + if (state_fops) + proc_create_data(ACPI_SBS_FILE_STATE, S_IRUGO, *dir, + state_fops, data); /* 'alarm' [R/W] */ - if (alarm_fops) { - entry = create_proc_entry(ACPI_SBS_FILE_ALARM, S_IRUGO, *dir); - if (entry) { - entry->proc_fops = alarm_fops; - entry->data = data; - entry->owner = THIS_MODULE; - } - } + if (alarm_fops) + proc_create_data(ACPI_SBS_FILE_ALARM, S_IRUGO, *dir, + alarm_fops, data); return 0; } diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c index f8df5217d47..8a5fe871051 100644 --- a/drivers/acpi/sleep/proc.c +++ b/drivers/acpi/sleep/proc.c @@ -440,6 +440,7 @@ acpi_system_wakeup_device_open_fs(struct inode *inode, struct file *file) } static const struct file_operations acpi_system_wakeup_device_fops = { + .owner = THIS_MODULE, .open = acpi_system_wakeup_device_open_fs, .read = seq_read, .write = acpi_system_write_wakeup_device, @@ -449,6 +450,7 @@ static const struct file_operations acpi_system_wakeup_device_fops = { #ifdef CONFIG_ACPI_PROCFS static const struct file_operations acpi_system_sleep_fops = { + .owner = THIS_MODULE, .open = acpi_system_sleep_open_fs, .read = seq_read, .write = acpi_system_write_sleep, @@ -459,6 +461,7 @@ static const struct file_operations acpi_system_sleep_fops = { #ifdef HAVE_ACPI_LEGACY_ALARM static const struct file_operations acpi_system_alarm_fops = { + .owner = THIS_MODULE, .open = acpi_system_alarm_open_fs, .read = seq_read, .write = acpi_system_write_alarm, @@ -477,37 +480,26 @@ static u32 rtc_handler(void *context) static int __init acpi_sleep_proc_init(void) { - struct proc_dir_entry *entry = NULL; - if (acpi_disabled) return 0; #ifdef CONFIG_ACPI_PROCFS /* 'sleep' [R/W] */ - entry = - create_proc_entry("sleep", S_IFREG | S_IRUGO | S_IWUSR, - acpi_root_dir); - if (entry) - entry->proc_fops = &acpi_system_sleep_fops; + proc_create("sleep", S_IFREG | S_IRUGO | S_IWUSR, + acpi_root_dir, &acpi_system_sleep_fops); #endif /* CONFIG_ACPI_PROCFS */ #ifdef HAVE_ACPI_LEGACY_ALARM /* 'alarm' [R/W] */ - entry = - create_proc_entry("alarm", S_IFREG | S_IRUGO | S_IWUSR, - acpi_root_dir); - if (entry) - entry->proc_fops = &acpi_system_alarm_fops; + proc_create("alarm", S_IFREG | S_IRUGO | S_IWUSR, + acpi_root_dir, &acpi_system_alarm_fops); acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, NULL); #endif /* HAVE_ACPI_LEGACY_ALARM */ /* 'wakeup device' [R/W] */ - entry = - create_proc_entry("wakeup", S_IFREG | S_IRUGO | S_IWUSR, - acpi_root_dir); - if (entry) - entry->proc_fops = &acpi_system_wakeup_device_fops; + proc_create("wakeup", S_IFREG | S_IRUGO | S_IWUSR, + acpi_root_dir, &acpi_system_wakeup_device_fops); return 0; } diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c index 4749f379a91..769f24855eb 100644 --- a/drivers/acpi/system.c +++ b/drivers/acpi/system.c @@ -396,6 +396,7 @@ static int acpi_system_info_open_fs(struct inode *inode, struct file *file) } static const struct file_operations acpi_system_info_ops = { + .owner = THIS_MODULE, .open = acpi_system_info_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -406,6 +407,7 @@ static ssize_t acpi_system_read_dsdt(struct file *, char __user *, size_t, loff_t *); static const struct file_operations acpi_system_dsdt_ops = { + .owner = THIS_MODULE, .read = acpi_system_read_dsdt, }; @@ -430,6 +432,7 @@ static ssize_t acpi_system_read_fadt(struct file *, char __user *, size_t, loff_t *); static const struct file_operations acpi_system_fadt_ops = { + .owner = THIS_MODULE, .read = acpi_system_read_fadt, }; @@ -454,31 +457,23 @@ static int acpi_system_procfs_init(void) { struct proc_dir_entry *entry; int error = 0; - char *name; /* 'info' [R] */ - name = ACPI_SYSTEM_FILE_INFO; - entry = create_proc_entry(name, S_IRUGO, acpi_root_dir); + entry = proc_create(ACPI_SYSTEM_FILE_INFO, S_IRUGO, acpi_root_dir, + &acpi_system_info_ops); if (!entry) goto Error; - else { - entry->proc_fops = &acpi_system_info_ops; - } /* 'dsdt' [R] */ - name = ACPI_SYSTEM_FILE_DSDT; - entry = create_proc_entry(name, S_IRUSR, acpi_root_dir); - if (entry) - entry->proc_fops = &acpi_system_dsdt_ops; - else + entry = proc_create(ACPI_SYSTEM_FILE_DSDT, S_IRUSR, acpi_root_dir, + &acpi_system_dsdt_ops); + if (!entry) goto Error; /* 'fadt' [R] */ - name = ACPI_SYSTEM_FILE_FADT; - entry = create_proc_entry(name, S_IRUSR, acpi_root_dir); - if (entry) - entry->proc_fops = &acpi_system_fadt_ops; - else + entry = proc_create(ACPI_SYSTEM_FILE_FADT, S_IRUSR, acpi_root_dir, + &acpi_system_fadt_ops); + if (!entry) goto Error; Done: diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 766bd25d337..0815ac3ae3d 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -198,6 +198,7 @@ struct acpi_thermal { }; static const struct file_operations acpi_thermal_state_fops = { + .owner = THIS_MODULE, .open = acpi_thermal_state_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -205,6 +206,7 @@ static const struct file_operations acpi_thermal_state_fops = { }; static const struct file_operations acpi_thermal_temp_fops = { + .owner = THIS_MODULE, .open = acpi_thermal_temp_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -212,6 +214,7 @@ static const struct file_operations acpi_thermal_temp_fops = { }; static const struct file_operations acpi_thermal_trip_fops = { + .owner = THIS_MODULE, .open = acpi_thermal_trip_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -219,6 +222,7 @@ static const struct file_operations acpi_thermal_trip_fops = { }; static const struct file_operations acpi_thermal_cooling_fops = { + .owner = THIS_MODULE, .open = acpi_thermal_cooling_open_fs, .read = seq_read, .write = acpi_thermal_write_cooling_mode, @@ -227,6 +231,7 @@ static const struct file_operations acpi_thermal_cooling_fops = { }; static const struct file_operations acpi_thermal_polling_fops = { + .owner = THIS_MODULE, .open = acpi_thermal_polling_open_fs, .read = seq_read, .write = acpi_thermal_write_polling, @@ -1419,63 +1424,47 @@ static int acpi_thermal_add_fs(struct acpi_device *device) } /* 'state' [R] */ - entry = create_proc_entry(ACPI_THERMAL_FILE_STATE, - S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data(ACPI_THERMAL_FILE_STATE, + S_IRUGO, acpi_device_dir(device), + &acpi_thermal_state_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_thermal_state_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'temperature' [R] */ - entry = create_proc_entry(ACPI_THERMAL_FILE_TEMPERATURE, - S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data(ACPI_THERMAL_FILE_TEMPERATURE, + S_IRUGO, acpi_device_dir(device), + &acpi_thermal_temp_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_thermal_temp_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'trip_points' [R] */ - entry = create_proc_entry(ACPI_THERMAL_FILE_TRIP_POINTS, - S_IRUGO, - acpi_device_dir(device)); + entry = proc_create_data(ACPI_THERMAL_FILE_TRIP_POINTS, + S_IRUGO, + acpi_device_dir(device), + &acpi_thermal_trip_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_thermal_trip_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'cooling_mode' [R/W] */ - entry = create_proc_entry(ACPI_THERMAL_FILE_COOLING_MODE, - S_IFREG | S_IRUGO | S_IWUSR, - acpi_device_dir(device)); + entry = proc_create_data(ACPI_THERMAL_FILE_COOLING_MODE, + S_IFREG | S_IRUGO | S_IWUSR, + acpi_device_dir(device), + &acpi_thermal_cooling_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_thermal_cooling_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'polling_frequency' [R/W] */ - entry = create_proc_entry(ACPI_THERMAL_FILE_POLLING_FREQ, - S_IFREG | S_IRUGO | S_IWUSR, - acpi_device_dir(device)); + entry = proc_create_data(ACPI_THERMAL_FILE_POLLING_FREQ, + S_IFREG | S_IRUGO | S_IWUSR, + acpi_device_dir(device), + &acpi_thermal_polling_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_thermal_polling_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } - return 0; } diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 980a7418878..43b228314a8 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -192,6 +192,7 @@ struct acpi_video_device { /* bus */ static int acpi_video_bus_info_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_video_bus_info_fops = { + .owner = THIS_MODULE, .open = acpi_video_bus_info_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -200,6 +201,7 @@ static struct file_operations acpi_video_bus_info_fops = { static int acpi_video_bus_ROM_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_video_bus_ROM_fops = { + .owner = THIS_MODULE, .open = acpi_video_bus_ROM_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -209,6 +211,7 @@ static struct file_operations acpi_video_bus_ROM_fops = { static int acpi_video_bus_POST_info_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_video_bus_POST_info_fops = { + .owner = THIS_MODULE, .open = acpi_video_bus_POST_info_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -217,6 +220,7 @@ static struct file_operations acpi_video_bus_POST_info_fops = { static int acpi_video_bus_POST_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_video_bus_POST_fops = { + .owner = THIS_MODULE, .open = acpi_video_bus_POST_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -225,6 +229,7 @@ static struct file_operations acpi_video_bus_POST_fops = { static int acpi_video_bus_DOS_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_video_bus_DOS_fops = { + .owner = THIS_MODULE, .open = acpi_video_bus_DOS_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -235,6 +240,7 @@ static struct file_operations acpi_video_bus_DOS_fops = { static int acpi_video_device_info_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_video_device_info_fops = { + .owner = THIS_MODULE, .open = acpi_video_device_info_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -244,6 +250,7 @@ static struct file_operations acpi_video_device_info_fops = { static int acpi_video_device_state_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_video_device_state_fops = { + .owner = THIS_MODULE, .open = acpi_video_device_state_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -253,6 +260,7 @@ static struct file_operations acpi_video_device_state_fops = { static int acpi_video_device_brightness_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_video_device_brightness_fops = { + .owner = THIS_MODULE, .open = acpi_video_device_brightness_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -262,6 +270,7 @@ static struct file_operations acpi_video_device_brightness_fops = { static int acpi_video_device_EDID_open_fs(struct inode *inode, struct file *file); static struct file_operations acpi_video_device_EDID_fops = { + .owner = THIS_MODULE, .open = acpi_video_device_EDID_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -1070,51 +1079,36 @@ static int acpi_video_device_add_fs(struct acpi_device *device) } /* 'info' [R] */ - entry = create_proc_entry("info", S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data("info", S_IRUGO, acpi_device_dir(device), + &acpi_video_device_info_fops, acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_video_device_info_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'state' [R/W] */ - entry = - create_proc_entry("state", S_IFREG | S_IRUGO | S_IWUSR, - acpi_device_dir(device)); + acpi_video_device_state_fops.write = acpi_video_device_write_state; + entry = proc_create_data("state", S_IFREG | S_IRUGO | S_IWUSR, + acpi_device_dir(device), + &acpi_video_device_state_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - acpi_video_device_state_fops.write = acpi_video_device_write_state; - entry->proc_fops = &acpi_video_device_state_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'brightness' [R/W] */ - entry = - create_proc_entry("brightness", S_IFREG | S_IRUGO | S_IWUSR, - acpi_device_dir(device)); + acpi_video_device_brightness_fops.write = + acpi_video_device_write_brightness; + entry = proc_create_data("brightness", S_IFREG | S_IRUGO | S_IWUSR, + acpi_device_dir(device), + &acpi_video_device_brightness_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - acpi_video_device_brightness_fops.write = acpi_video_device_write_brightness; - entry->proc_fops = &acpi_video_device_brightness_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'EDID' [R] */ - entry = create_proc_entry("EDID", S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data("EDID", S_IRUGO, acpi_device_dir(device), + &acpi_video_device_EDID_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_video_device_EDID_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } - return 0; } @@ -1353,61 +1347,43 @@ static int acpi_video_bus_add_fs(struct acpi_device *device) } /* 'info' [R] */ - entry = create_proc_entry("info", S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data("info", S_IRUGO, acpi_device_dir(device), + &acpi_video_bus_info_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_video_bus_info_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'ROM' [R] */ - entry = create_proc_entry("ROM", S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data("ROM", S_IRUGO, acpi_device_dir(device), + &acpi_video_bus_ROM_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_video_bus_ROM_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'POST_info' [R] */ - entry = - create_proc_entry("POST_info", S_IRUGO, acpi_device_dir(device)); + entry = proc_create_data("POST_info", S_IRUGO, acpi_device_dir(device), + &acpi_video_bus_POST_info_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - entry->proc_fops = &acpi_video_bus_POST_info_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'POST' [R/W] */ - entry = - create_proc_entry("POST", S_IFREG | S_IRUGO | S_IRUSR, - acpi_device_dir(device)); + acpi_video_bus_POST_fops.write = acpi_video_bus_write_POST; + entry = proc_create_data("POST", S_IFREG | S_IRUGO | S_IRUSR, + acpi_device_dir(device), + &acpi_video_bus_POST_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - acpi_video_bus_POST_fops.write = acpi_video_bus_write_POST; - entry->proc_fops = &acpi_video_bus_POST_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } /* 'DOS' [R/W] */ - entry = - create_proc_entry("DOS", S_IFREG | S_IRUGO | S_IRUSR, - acpi_device_dir(device)); + acpi_video_bus_DOS_fops.write = acpi_video_bus_write_DOS; + entry = proc_create_data("DOS", S_IFREG | S_IRUGO | S_IRUSR, + acpi_device_dir(device), + &acpi_video_bus_DOS_fops, + acpi_driver_data(device)); if (!entry) return -ENODEV; - else { - acpi_video_bus_DOS_fops.write = acpi_video_bus_write_DOS; - entry->proc_fops = &acpi_video_bus_DOS_fops; - entry->data = acpi_driver_data(device); - entry->owner = THIS_MODULE; - } return 0; } diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 292aa9a0f02..1c11df9a5f3 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -566,11 +566,11 @@ config PATA_RADISYS If unsure, say N. -config PATA_RB500 - tristate "RouterBoard 500 PATA CompactFlash support" - depends on MIKROTIK_RB500 +config PATA_RB532 + tristate "RouterBoard 532 PATA CompactFlash support" + depends on MIKROTIK_RB532 help - This option enables support for the RouterBoard 500 + This option enables support for the RouterBoard 532 PATA CompactFlash controller. If unsure, say N. diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 1fbc2aa648b..b693d829383 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -55,7 +55,7 @@ obj-$(CONFIG_PATA_PDC2027X) += pata_pdc2027x.o obj-$(CONFIG_PATA_PDC_OLD) += pata_pdc202xx_old.o obj-$(CONFIG_PATA_QDI) += pata_qdi.o obj-$(CONFIG_PATA_RADISYS) += pata_radisys.o -obj-$(CONFIG_PATA_RB500) += pata_rb500_cf.o +obj-$(CONFIG_PATA_RB532) += pata_rb532_cf.o obj-$(CONFIG_PATA_RZ1000) += pata_rz1000.o obj-$(CONFIG_PATA_SC1200) += pata_sc1200.o obj-$(CONFIG_PATA_SERVERWORKS) += pata_serverworks.o diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a34f32442ed..3ce43920e45 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -49,7 +49,11 @@ #include "libata.h" -#define SECTOR_SIZE 512 +#define SECTOR_SIZE 512 +#define ATA_SCSI_RBUF_SIZE 4096 + +static DEFINE_SPINLOCK(ata_scsi_rbuf_lock); +static u8 ata_scsi_rbuf[ATA_SCSI_RBUF_SIZE]; typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc); @@ -179,6 +183,13 @@ DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR, ata_scsi_lpm_show, ata_scsi_lpm_put); EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy); +static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) +{ + cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; + + scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); +} + static void ata_scsi_invalid_field(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { @@ -1632,53 +1643,48 @@ defer: /** * ata_scsi_rbuf_get - Map response buffer. - * @cmd: SCSI command containing buffer to be mapped. - * @buf_out: Pointer to mapped area. + * @flags: unsigned long variable to store irq enable status + * @copy_in: copy in from user buffer * - * Maps buffer contained within SCSI command @cmd. + * Prepare buffer for simulated SCSI commands. * * LOCKING: - * spin_lock_irqsave(host lock) + * spin_lock_irqsave(ata_scsi_rbuf_lock) on success * * RETURNS: - * Length of response buffer. + * Pointer to response buffer. */ - -static unsigned int ata_scsi_rbuf_get(struct scsi_cmnd *cmd, u8 **buf_out) +static void *ata_scsi_rbuf_get(struct scsi_cmnd *cmd, bool copy_in, + unsigned long *flags) { - u8 *buf; - unsigned int buflen; - - struct scatterlist *sg = scsi_sglist(cmd); + spin_lock_irqsave(&ata_scsi_rbuf_lock, *flags); - if (sg) { - buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; - buflen = sg->length; - } else { - buf = NULL; - buflen = 0; - } - - *buf_out = buf; - return buflen; + memset(ata_scsi_rbuf, 0, ATA_SCSI_RBUF_SIZE); + if (copy_in) + sg_copy_to_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), + ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE); + return ata_scsi_rbuf; } /** * ata_scsi_rbuf_put - Unmap response buffer. * @cmd: SCSI command containing buffer to be unmapped. - * @buf: buffer to unmap + * @copy_out: copy out result + * @flags: @flags passed to ata_scsi_rbuf_get() * - * Unmaps response buffer contained within @cmd. + * Returns rbuf buffer. The result is copied to @cmd's buffer if + * @copy_back is true. * * LOCKING: - * spin_lock_irqsave(host lock) + * Unlocks ata_scsi_rbuf_lock. */ - -static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf) +static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, bool copy_out, + unsigned long *flags) { - struct scatterlist *sg = scsi_sglist(cmd); - if (sg) - kunmap_atomic(buf - sg->offset, KM_IRQ0); + if (copy_out) + sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), + ata_scsi_rbuf, ATA_SCSI_RBUF_SIZE); + spin_unlock_irqrestore(&ata_scsi_rbuf_lock, *flags); } /** @@ -1696,24 +1702,17 @@ static inline void ata_scsi_rbuf_put(struct scsi_cmnd *cmd, u8 *buf) * LOCKING: * spin_lock_irqsave(host lock) */ - -void ata_scsi_rbuf_fill(struct ata_scsi_args *args, - unsigned int (*actor) (struct ata_scsi_args *args, - u8 *rbuf, unsigned int buflen)) +static void ata_scsi_rbuf_fill(struct ata_scsi_args *args, + unsigned int (*actor)(struct ata_scsi_args *args, u8 *rbuf)) { u8 *rbuf; - unsigned int buflen, rc; + unsigned int rc; struct scsi_cmnd *cmd = args->cmd; unsigned long flags; - local_irq_save(flags); - - buflen = ata_scsi_rbuf_get(cmd, &rbuf); - memset(rbuf, 0, buflen); - rc = actor(args, rbuf, buflen); - ata_scsi_rbuf_put(cmd, rbuf); - - local_irq_restore(flags); + rbuf = ata_scsi_rbuf_get(cmd, false, &flags); + rc = actor(args, rbuf); + ata_scsi_rbuf_put(cmd, rc == 0, &flags); if (rc == 0) cmd->result = SAM_STAT_GOOD; @@ -1721,26 +1720,9 @@ void ata_scsi_rbuf_fill(struct ata_scsi_args *args, } /** - * ATA_SCSI_RBUF_SET - helper to set values in SCSI response buffer - * @idx: byte index into SCSI response buffer - * @val: value to set - * - * To be used by SCSI command simulator functions. This macros - * expects two local variables, u8 *rbuf and unsigned int buflen, - * are in scope. - * - * LOCKING: - * None. - */ -#define ATA_SCSI_RBUF_SET(idx, val) do { \ - if ((idx) < buflen) rbuf[(idx)] = (u8)(val); \ - } while (0) - -/** * ata_scsiop_inq_std - Simulate INQUIRY command * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. - * @buflen: Response buffer length. * * Returns standard device identification data associated * with non-VPD INQUIRY command output. @@ -1748,10 +1730,17 @@ void ata_scsi_rbuf_fill(struct ata_scsi_args *args, * LOCKING: * spin_lock_irqsave(host lock) */ - -unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen) +static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf) { + const u8 versions[] = { + 0x60, /* SAM-3 (no version claimed) */ + + 0x03, + 0x20, /* SBC-2 (no version claimed) */ + + 0x02, + 0x60 /* SPC-3 (no version claimed) */ + }; u8 hdr[] = { TYPE_DISK, 0, @@ -1760,35 +1749,21 @@ unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf, 95 - 4 }; + VPRINTK("ENTER\n"); + /* set scsi removeable (RMB) bit per ata bit */ if (ata_id_removeable(args->id)) hdr[1] |= (1 << 7); - VPRINTK("ENTER\n"); - memcpy(rbuf, hdr, sizeof(hdr)); + memcpy(&rbuf[8], "ATA ", 8); + ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16); + ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4); - if (buflen > 35) { - memcpy(&rbuf[8], "ATA ", 8); - ata_id_string(args->id, &rbuf[16], ATA_ID_PROD, 16); - ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4); - if (rbuf[32] == 0 || rbuf[32] == ' ') - memcpy(&rbuf[32], "n/a ", 4); - } - - if (buflen > 63) { - const u8 versions[] = { - 0x60, /* SAM-3 (no version claimed) */ - - 0x03, - 0x20, /* SBC-2 (no version claimed) */ + if (rbuf[32] == 0 || rbuf[32] == ' ') + memcpy(&rbuf[32], "n/a ", 4); - 0x02, - 0x60 /* SPC-3 (no version claimed) */ - }; - - memcpy(rbuf + 59, versions, sizeof(versions)); - } + memcpy(rbuf + 59, versions, sizeof(versions)); return 0; } @@ -1797,27 +1772,22 @@ unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf, * ata_scsiop_inq_00 - Simulate INQUIRY VPD page 0, list of pages * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. - * @buflen: Response buffer length. * * Returns list of inquiry VPD pages available. * * LOCKING: * spin_lock_irqsave(host lock) */ - -unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen) +static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf) { const u8 pages[] = { 0x00, /* page 0x00, this page */ 0x80, /* page 0x80, unit serial no page */ 0x83 /* page 0x83, device ident page */ }; - rbuf[3] = sizeof(pages); /* number of supported VPD pages */ - - if (buflen > 6) - memcpy(rbuf + 4, pages, sizeof(pages)); + rbuf[3] = sizeof(pages); /* number of supported VPD pages */ + memcpy(rbuf + 4, pages, sizeof(pages)); return 0; } @@ -1825,16 +1795,13 @@ unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf, * ata_scsiop_inq_80 - Simulate INQUIRY VPD page 80, device serial number * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. - * @buflen: Response buffer length. * * Returns ATA device serial number. * * LOCKING: * spin_lock_irqsave(host lock) */ - -unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen) +static unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf) { const u8 hdr[] = { 0, @@ -1842,12 +1809,10 @@ unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf, 0, ATA_ID_SERNO_LEN, /* page len */ }; - memcpy(rbuf, hdr, sizeof(hdr)); - - if (buflen > (ATA_ID_SERNO_LEN + 4 - 1)) - ata_id_string(args->id, (unsigned char *) &rbuf[4], - ATA_ID_SERNO, ATA_ID_SERNO_LEN); + memcpy(rbuf, hdr, sizeof(hdr)); + ata_id_string(args->id, (unsigned char *) &rbuf[4], + ATA_ID_SERNO, ATA_ID_SERNO_LEN); return 0; } @@ -1855,7 +1820,6 @@ unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf, * ata_scsiop_inq_83 - Simulate INQUIRY VPD page 83, device identity * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. - * @buflen: Response buffer length. * * Yields two logical unit device identification designators: * - vendor specific ASCII containing the ATA serial number @@ -1865,41 +1829,37 @@ unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf, * LOCKING: * spin_lock_irqsave(host lock) */ - -unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen) +static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf) { - int num; const int sat_model_serial_desc_len = 68; + int num; rbuf[1] = 0x83; /* this page code */ num = 4; - if (buflen > (ATA_ID_SERNO_LEN + num + 3)) { - /* piv=0, assoc=lu, code_set=ACSII, designator=vendor */ - rbuf[num + 0] = 2; - rbuf[num + 3] = ATA_ID_SERNO_LEN; - num += 4; - ata_id_string(args->id, (unsigned char *) rbuf + num, - ATA_ID_SERNO, ATA_ID_SERNO_LEN); - num += ATA_ID_SERNO_LEN; - } - if (buflen > (sat_model_serial_desc_len + num + 3)) { - /* SAT defined lu model and serial numbers descriptor */ - /* piv=0, assoc=lu, code_set=ACSII, designator=t10 vendor id */ - rbuf[num + 0] = 2; - rbuf[num + 1] = 1; - rbuf[num + 3] = sat_model_serial_desc_len; - num += 4; - memcpy(rbuf + num, "ATA ", 8); - num += 8; - ata_id_string(args->id, (unsigned char *) rbuf + num, - ATA_ID_PROD, ATA_ID_PROD_LEN); - num += ATA_ID_PROD_LEN; - ata_id_string(args->id, (unsigned char *) rbuf + num, - ATA_ID_SERNO, ATA_ID_SERNO_LEN); - num += ATA_ID_SERNO_LEN; - } + /* piv=0, assoc=lu, code_set=ACSII, designator=vendor */ + rbuf[num + 0] = 2; + rbuf[num + 3] = ATA_ID_SERNO_LEN; + num += 4; + ata_id_string(args->id, (unsigned char *) rbuf + num, + ATA_ID_SERNO, ATA_ID_SERNO_LEN); + num += ATA_ID_SERNO_LEN; + + /* SAT defined lu model and serial numbers descriptor */ + /* piv=0, assoc=lu, code_set=ACSII, designator=t10 vendor id */ + rbuf[num + 0] = 2; + rbuf[num + 1] = 1; + rbuf[num + 3] = sat_model_serial_desc_len; + num += 4; + memcpy(rbuf + num, "ATA ", 8); + num += 8; + ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_PROD, + ATA_ID_PROD_LEN); + num += ATA_ID_PROD_LEN; + ata_id_string(args->id, (unsigned char *) rbuf + num, ATA_ID_SERNO, + ATA_ID_SERNO_LEN); + num += ATA_ID_SERNO_LEN; + rbuf[3] = num - 4; /* page len (assume less than 256 bytes) */ return 0; } @@ -1908,35 +1868,26 @@ unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf, * ata_scsiop_inq_89 - Simulate INQUIRY VPD page 89, ATA info * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. - * @buflen: Response buffer length. * * Yields SAT-specified ATA VPD page. * * LOCKING: * spin_lock_irqsave(host lock) */ - -static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen) +static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf) { - u8 pbuf[60]; struct ata_taskfile tf; - unsigned int i; - if (!buflen) - return 0; - - memset(&pbuf, 0, sizeof(pbuf)); memset(&tf, 0, sizeof(tf)); - pbuf[1] = 0x89; /* our page code */ - pbuf[2] = (0x238 >> 8); /* page size fixed at 238h */ - pbuf[3] = (0x238 & 0xff); + rbuf[1] = 0x89; /* our page code */ + rbuf[2] = (0x238 >> 8); /* page size fixed at 238h */ + rbuf[3] = (0x238 & 0xff); - memcpy(&pbuf[8], "linux ", 8); - memcpy(&pbuf[16], "libata ", 16); - memcpy(&pbuf[32], DRV_VERSION, 4); - ata_id_string(args->id, &pbuf[32], ATA_ID_FW_REV, 4); + memcpy(&rbuf[8], "linux ", 8); + memcpy(&rbuf[16], "libata ", 16); + memcpy(&rbuf[32], DRV_VERSION, 4); + ata_id_string(args->id, &rbuf[32], ATA_ID_FW_REV, 4); /* we don't store the ATA device signature, so we fake it */ @@ -1944,19 +1895,12 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf, tf.lbal = 0x1; tf.nsect = 0x1; - ata_tf_to_fis(&tf, 0, 1, &pbuf[36]); /* TODO: PMP? */ - pbuf[36] = 0x34; /* force D2H Reg FIS (34h) */ + ata_tf_to_fis(&tf, 0, 1, &rbuf[36]); /* TODO: PMP? */ + rbuf[36] = 0x34; /* force D2H Reg FIS (34h) */ - pbuf[56] = ATA_CMD_ID_ATA; + rbuf[56] = ATA_CMD_ID_ATA; - i = min(buflen, 60U); - memcpy(rbuf, &pbuf[0], i); - buflen -= i; - - if (!buflen) - return 0; - - memcpy(&rbuf[60], &args->id[0], min(buflen, 512U)); + memcpy(&rbuf[60], &args->id[0], 512); return 0; } @@ -1964,7 +1908,6 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf, * ata_scsiop_noop - Command handler that simply returns success. * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. - * @buflen: Response buffer length. * * No operation. Simply returns success to caller, to indicate * that the caller should successfully complete this SCSI command. @@ -1972,47 +1915,16 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf, * LOCKING: * spin_lock_irqsave(host lock) */ - -unsigned int ata_scsiop_noop(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen) +static unsigned int ata_scsiop_noop(struct ata_scsi_args *args, u8 *rbuf) { VPRINTK("ENTER\n"); return 0; } /** - * ata_msense_push - Push data onto MODE SENSE data output buffer - * @ptr_io: (input/output) Location to store more output data - * @last: End of output data buffer - * @buf: Pointer to BLOB being added to output buffer - * @buflen: Length of BLOB - * - * Store MODE SENSE data on an output buffer. - * - * LOCKING: - * None. - */ - -static void ata_msense_push(u8 **ptr_io, const u8 *last, - const u8 *buf, unsigned int buflen) -{ - u8 *ptr = *ptr_io; - - if ((ptr + buflen - 1) > last) - return; - - memcpy(ptr, buf, buflen); - - ptr += buflen; - - *ptr_io = ptr; -} - -/** * ata_msense_caching - Simulate MODE SENSE caching info page * @id: device IDENTIFY data - * @ptr_io: (input/output) Location to store more output data - * @last: End of output data buffer + * @buf: output buffer * * Generate a caching info page, which conditionally indicates * write caching to the SCSI layer, depending on device @@ -2021,58 +1933,43 @@ static void ata_msense_push(u8 **ptr_io, const u8 *last, * LOCKING: * None. */ - -static unsigned int ata_msense_caching(u16 *id, u8 **ptr_io, - const u8 *last) +static unsigned int ata_msense_caching(u16 *id, u8 *buf) { - u8 page[CACHE_MPAGE_LEN]; - - memcpy(page, def_cache_mpage, sizeof(page)); + memcpy(buf, def_cache_mpage, sizeof(def_cache_mpage)); if (ata_id_wcache_enabled(id)) - page[2] |= (1 << 2); /* write cache enable */ + buf[2] |= (1 << 2); /* write cache enable */ if (!ata_id_rahead_enabled(id)) - page[12] |= (1 << 5); /* disable read ahead */ - - ata_msense_push(ptr_io, last, page, sizeof(page)); - return sizeof(page); + buf[12] |= (1 << 5); /* disable read ahead */ + return sizeof(def_cache_mpage); } /** * ata_msense_ctl_mode - Simulate MODE SENSE control mode page - * @dev: Device associated with this MODE SENSE command - * @ptr_io: (input/output) Location to store more output data - * @last: End of output data buffer + * @buf: output buffer * * Generate a generic MODE SENSE control mode page. * * LOCKING: * None. */ - -static unsigned int ata_msense_ctl_mode(u8 **ptr_io, const u8 *last) +static unsigned int ata_msense_ctl_mode(u8 *buf) { - ata_msense_push(ptr_io, last, def_control_mpage, - sizeof(def_control_mpage)); + memcpy(buf, def_control_mpage, sizeof(def_control_mpage)); return sizeof(def_control_mpage); } /** * ata_msense_rw_recovery - Simulate MODE SENSE r/w error recovery page - * @dev: Device associated with this MODE SENSE command - * @ptr_io: (input/output) Location to store more output data - * @last: End of output data buffer + * @bufp: output buffer * * Generate a generic MODE SENSE r/w error recovery page. * * LOCKING: * None. */ - -static unsigned int ata_msense_rw_recovery(u8 **ptr_io, const u8 *last) +static unsigned int ata_msense_rw_recovery(u8 *buf) { - - ata_msense_push(ptr_io, last, def_rw_recovery_mpage, - sizeof(def_rw_recovery_mpage)); + memcpy(buf, def_rw_recovery_mpage, sizeof(def_rw_recovery_mpage)); return sizeof(def_rw_recovery_mpage); } @@ -2104,7 +2001,6 @@ static int ata_dev_supports_fua(u16 *id) * ata_scsiop_mode_sense - Simulate MODE SENSE 6, 10 commands * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. - * @buflen: Response buffer length. * * Simulate MODE SENSE commands. Assume this is invoked for direct * access devices (e.g. disks) only. There should be no block @@ -2113,19 +2009,17 @@ static int ata_dev_supports_fua(u16 *id) * LOCKING: * spin_lock_irqsave(host lock) */ - -unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen) +static unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf) { struct ata_device *dev = args->dev; - u8 *scsicmd = args->cmd->cmnd, *p, *last; + u8 *scsicmd = args->cmd->cmnd, *p = rbuf; const u8 sat_blk_desc[] = { 0, 0, 0, 0, /* number of blocks: sat unspecified */ 0, 0, 0x2, 0x0 /* block length: 512 bytes */ }; u8 pg, spg; - unsigned int ebd, page_control, six_byte, output_len, alloc_len, minlen; + unsigned int ebd, page_control, six_byte; u8 dpofua; VPRINTK("ENTER\n"); @@ -2148,17 +2042,10 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, goto invalid_fld; } - if (six_byte) { - output_len = 4 + (ebd ? 8 : 0); - alloc_len = scsicmd[4]; - } else { - output_len = 8 + (ebd ? 8 : 0); - alloc_len = (scsicmd[7] << 8) + scsicmd[8]; - } - minlen = (alloc_len < buflen) ? alloc_len : buflen; - - p = rbuf + output_len; - last = rbuf + minlen - 1; + if (six_byte) + p += 4 + (ebd ? 8 : 0); + else + p += 8 + (ebd ? 8 : 0); pg = scsicmd[2] & 0x3f; spg = scsicmd[3]; @@ -2171,61 +2058,48 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, switch(pg) { case RW_RECOVERY_MPAGE: - output_len += ata_msense_rw_recovery(&p, last); + p += ata_msense_rw_recovery(p); break; case CACHE_MPAGE: - output_len += ata_msense_caching(args->id, &p, last); + p += ata_msense_caching(args->id, p); break; - case CONTROL_MPAGE: { - output_len += ata_msense_ctl_mode(&p, last); + case CONTROL_MPAGE: + p += ata_msense_ctl_mode(p); break; - } case ALL_MPAGES: - output_len += ata_msense_rw_recovery(&p, last); - output_len += ata_msense_caching(args->id, &p, last); - output_len += ata_msense_ctl_mode(&p, last); + p += ata_msense_rw_recovery(p); + p += ata_msense_caching(args->id, p); + p += ata_msense_ctl_mode(p); break; default: /* invalid page code */ goto invalid_fld; } - if (minlen < 1) - return 0; - dpofua = 0; if (ata_dev_supports_fua(args->id) && (dev->flags & ATA_DFLAG_LBA48) && (!(dev->flags & ATA_DFLAG_PIO) || dev->multi_count)) dpofua = 1 << 4; if (six_byte) { - output_len--; - rbuf[0] = output_len; - if (minlen > 2) - rbuf[2] |= dpofua; + rbuf[0] = p - rbuf - 1; + rbuf[2] |= dpofua; if (ebd) { - if (minlen > 3) - rbuf[3] = sizeof(sat_blk_desc); - if (minlen > 11) - memcpy(rbuf + 4, sat_blk_desc, - sizeof(sat_blk_desc)); + rbuf[3] = sizeof(sat_blk_desc); + memcpy(rbuf + 4, sat_blk_desc, sizeof(sat_blk_desc)); } } else { - output_len -= 2; + unsigned int output_len = p - rbuf - 2; + rbuf[0] = output_len >> 8; - if (minlen > 1) - rbuf[1] = output_len; - if (minlen > 3) - rbuf[3] |= dpofua; + rbuf[1] = output_len; + rbuf[3] |= dpofua; if (ebd) { - if (minlen > 7) - rbuf[7] = sizeof(sat_blk_desc); - if (minlen > 15) - memcpy(rbuf + 8, sat_blk_desc, - sizeof(sat_blk_desc)); + rbuf[7] = sizeof(sat_blk_desc); + memcpy(rbuf + 8, sat_blk_desc, sizeof(sat_blk_desc)); } } return 0; @@ -2245,15 +2119,13 @@ saving_not_supp: * ata_scsiop_read_cap - Simulate READ CAPACITY[ 16] commands * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. - * @buflen: Response buffer length. * * Simulate READ CAPACITY commands. * * LOCKING: * None. */ -unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen) +static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) { u64 last_lba = args->dev->n_sectors - 1; /* LBA of the last block */ @@ -2264,28 +2136,28 @@ unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf, last_lba = 0xffffffff; /* sector count, 32-bit */ - ATA_SCSI_RBUF_SET(0, last_lba >> (8 * 3)); - ATA_SCSI_RBUF_SET(1, last_lba >> (8 * 2)); - ATA_SCSI_RBUF_SET(2, last_lba >> (8 * 1)); - ATA_SCSI_RBUF_SET(3, last_lba); + rbuf[0] = last_lba >> (8 * 3); + rbuf[1] = last_lba >> (8 * 2); + rbuf[2] = last_lba >> (8 * 1); + rbuf[3] = last_lba; /* sector size */ - ATA_SCSI_RBUF_SET(6, ATA_SECT_SIZE >> 8); - ATA_SCSI_RBUF_SET(7, ATA_SECT_SIZE & 0xff); + rbuf[6] = ATA_SECT_SIZE >> 8; + rbuf[7] = ATA_SECT_SIZE & 0xff; } else { /* sector count, 64-bit */ - ATA_SCSI_RBUF_SET(0, last_lba >> (8 * 7)); - ATA_SCSI_RBUF_SET(1, last_lba >> (8 * 6)); - ATA_SCSI_RBUF_SET(2, last_lba >> (8 * 5)); - ATA_SCSI_RBUF_SET(3, last_lba >> (8 * 4)); - ATA_SCSI_RBUF_SET(4, last_lba >> (8 * 3)); - ATA_SCSI_RBUF_SET(5, last_lba >> (8 * 2)); - ATA_SCSI_RBUF_SET(6, last_lba >> (8 * 1)); - ATA_SCSI_RBUF_SET(7, last_lba); + rbuf[0] = last_lba >> (8 * 7); + rbuf[1] = last_lba >> (8 * 6); + rbuf[2] = last_lba >> (8 * 5); + rbuf[3] = last_lba >> (8 * 4); + rbuf[4] = last_lba >> (8 * 3); + rbuf[5] = last_lba >> (8 * 2); + rbuf[6] = last_lba >> (8 * 1); + rbuf[7] = last_lba; /* sector size */ - ATA_SCSI_RBUF_SET(10, ATA_SECT_SIZE >> 8); - ATA_SCSI_RBUF_SET(11, ATA_SECT_SIZE & 0xff); + rbuf[10] = ATA_SECT_SIZE >> 8; + rbuf[11] = ATA_SECT_SIZE & 0xff; } return 0; @@ -2295,16 +2167,13 @@ unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf, * ata_scsiop_report_luns - Simulate REPORT LUNS command * @args: device IDENTIFY data / SCSI command of interest. * @rbuf: Response buffer, to which simulated SCSI cmd output is sent. - * @buflen: Response buffer length. * * Simulate REPORT LUNS command. * * LOCKING: * spin_lock_irqsave(host lock) */ - -unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen) +static unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf) { VPRINTK("ENTER\n"); rbuf[3] = 8; /* just one lun, LUN 0, size 8 bytes */ @@ -2312,53 +2181,6 @@ unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf, return 0; } -/** - * ata_scsi_set_sense - Set SCSI sense data and status - * @cmd: SCSI request to be handled - * @sk: SCSI-defined sense key - * @asc: SCSI-defined additional sense code - * @ascq: SCSI-defined additional sense code qualifier - * - * Helper function that builds a valid fixed format, current - * response code and the given sense key (sk), additional sense - * code (asc) and additional sense code qualifier (ascq) with - * a SCSI command status of %SAM_STAT_CHECK_CONDITION and - * DRIVER_SENSE set in the upper bits of scsi_cmnd::result . - * - * LOCKING: - * Not required - */ - -void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) -{ - cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; - - scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); -} - -/** - * ata_scsi_badcmd - End a SCSI request with an error - * @cmd: SCSI request to be handled - * @done: SCSI command completion function - * @asc: SCSI-defined additional sense code - * @ascq: SCSI-defined additional sense code qualifier - * - * Helper function that completes a SCSI command with - * %SAM_STAT_CHECK_CONDITION, with a sense key %ILLEGAL_REQUEST - * and the specified additional sense codes. - * - * LOCKING: - * spin_lock_irqsave(host lock) - */ - -void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 asc, u8 ascq) -{ - DPRINTK("ENTER\n"); - ata_scsi_set_sense(cmd, ILLEGAL_REQUEST, asc, ascq); - - done(cmd); -} - static void atapi_sense_complete(struct ata_queued_cmd *qc) { if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0)) { @@ -2485,13 +2307,10 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc) u8 *scsicmd = cmd->cmnd; if ((scsicmd[0] == INQUIRY) && ((scsicmd[1] & 0x03) == 0)) { - u8 *buf = NULL; - unsigned int buflen; unsigned long flags; + u8 *buf; - local_irq_save(flags); - - buflen = ata_scsi_rbuf_get(cmd, &buf); + buf = ata_scsi_rbuf_get(cmd, true, &flags); /* ATAPI devices typically report zero for their SCSI version, * and sometimes deviate from the spec WRT response data @@ -2506,9 +2325,7 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc) buf[3] = 0x32; } - ata_scsi_rbuf_put(cmd, buf); - - local_irq_restore(flags); + ata_scsi_rbuf_put(cmd, true, &flags); } cmd->result = SAM_STAT_GOOD; diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index ae2cfd95d43..4514283937e 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -146,34 +146,6 @@ extern void ata_scsi_scan_host(struct ata_port *ap, int sync); extern int ata_scsi_offline_dev(struct ata_device *dev); extern void ata_scsi_media_change_notify(struct ata_device *dev); extern void ata_scsi_hotplug(struct work_struct *work); -extern unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen); - -extern unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen); - -extern unsigned int ata_scsiop_inq_80(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen); -extern unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen); -extern unsigned int ata_scsiop_noop(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen); -extern unsigned int ata_scsiop_sync_cache(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen); -extern unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen); -extern unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen); -extern unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf, - unsigned int buflen); -extern void ata_scsi_badcmd(struct scsi_cmnd *cmd, - void (*done)(struct scsi_cmnd *), - u8 asc, u8 ascq); -extern void ata_scsi_set_sense(struct scsi_cmnd *cmd, - u8 sk, u8 asc, u8 ascq); -extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args, - unsigned int (*actor) (struct ata_scsi_args *args, - u8 *rbuf, unsigned int buflen)); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); extern void ata_scsi_dev_rescan(struct work_struct *work); extern int ata_bus_probe(struct ata_port *ap); diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c index a75de0684c1..9ab89732cf9 100644 --- a/drivers/ata/pata_bf54x.c +++ b/drivers/ata/pata_bf54x.c @@ -1272,8 +1272,8 @@ static void bfin_freeze(struct ata_port *ap) void bfin_thaw(struct ata_port *ap) { + dev_dbg(ap->dev, "in atapi dma thaw\n"); bfin_check_status(ap); - bfin_irq_clear(ap); bfin_irq_on(ap); } @@ -1339,13 +1339,130 @@ static int bfin_port_start(struct ata_port *ap) return 0; } +static unsigned int bfin_ata_host_intr(struct ata_port *ap, + struct ata_queued_cmd *qc) +{ + struct ata_eh_info *ehi = &ap->link.eh_info; + u8 status, host_stat = 0; + + VPRINTK("ata%u: protocol %d task_state %d\n", + ap->print_id, qc->tf.protocol, ap->hsm_task_state); + + /* Check whether we are expecting interrupt in this state */ + switch (ap->hsm_task_state) { + case HSM_ST_FIRST: + /* Some pre-ATAPI-4 devices assert INTRQ + * at this state when ready to receive CDB. + */ + + /* Check the ATA_DFLAG_CDB_INTR flag is enough here. + * The flag was turned on only for atapi devices. + * No need to check is_atapi_taskfile(&qc->tf) again. + */ + if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) + goto idle_irq; + break; + case HSM_ST_LAST: + if (qc->tf.protocol == ATA_PROT_DMA || + qc->tf.protocol == ATAPI_PROT_DMA) { + /* check status of DMA engine */ + host_stat = ap->ops->bmdma_status(ap); + VPRINTK("ata%u: host_stat 0x%X\n", + ap->print_id, host_stat); + + /* if it's not our irq... */ + if (!(host_stat & ATA_DMA_INTR)) + goto idle_irq; + + /* before we do anything else, clear DMA-Start bit */ + ap->ops->bmdma_stop(qc); + + if (unlikely(host_stat & ATA_DMA_ERR)) { + /* error when transfering data to/from memory */ + qc->err_mask |= AC_ERR_HOST_BUS; + ap->hsm_task_state = HSM_ST_ERR; + } + } + break; + case HSM_ST: + break; + default: + goto idle_irq; + } + + /* check altstatus */ + status = ap->ops->sff_check_altstatus(ap); + if (status & ATA_BUSY) + goto busy_ata; + + /* check main status, clearing INTRQ */ + status = ap->ops->sff_check_status(ap); + if (unlikely(status & ATA_BUSY)) + goto busy_ata; + + /* ack bmdma irq events */ + ap->ops->sff_irq_clear(ap); + + ata_sff_hsm_move(ap, qc, status, 0); + + if (unlikely(qc->err_mask) && (qc->tf.protocol == ATA_PROT_DMA || + qc->tf.protocol == ATAPI_PROT_DMA)) + ata_ehi_push_desc(ehi, "BMDMA stat 0x%x", host_stat); + +busy_ata: + return 1; /* irq handled */ + +idle_irq: + ap->stats.idle_irq++; + +#ifdef ATA_IRQ_TRAP + if ((ap->stats.idle_irq % 1000) == 0) { + ap->ops->irq_ack(ap, 0); /* debug trap */ + ata_port_printk(ap, KERN_WARNING, "irq trap\n"); + return 1; + } +#endif + return 0; /* irq not handled */ +} + +static irqreturn_t bfin_ata_interrupt(int irq, void *dev_instance) +{ + struct ata_host *host = dev_instance; + unsigned int i; + unsigned int handled = 0; + unsigned long flags; + + /* TODO: make _irqsave conditional on x86 PCI IDE legacy mode */ + spin_lock_irqsave(&host->lock, flags); + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap; + + ap = host->ports[i]; + if (ap && + !(ap->flags & ATA_FLAG_DISABLED)) { + struct ata_queued_cmd *qc; + + qc = ata_qc_from_tag(ap, ap->link.active_tag); + if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)) && + (qc->flags & ATA_QCFLAG_ACTIVE)) + handled |= bfin_ata_host_intr(ap, qc); + } + } + + spin_unlock_irqrestore(&host->lock, flags); + + return IRQ_RETVAL(handled); +} + + static struct scsi_host_template bfin_sht = { ATA_BASE_SHT(DRV_NAME), .sg_tablesize = SG_NONE, .dma_boundary = ATA_DMA_BOUNDARY, }; -static const struct ata_port_operations bfin_pata_ops = { +static struct ata_port_operations bfin_pata_ops = { .inherits = &ata_sff_port_ops, .set_piomode = bfin_set_piomode, @@ -1370,7 +1487,6 @@ static const struct ata_port_operations bfin_pata_ops = { .thaw = bfin_thaw, .softreset = bfin_softreset, .postreset = bfin_postreset, - .post_internal_cmd = bfin_bmdma_stop, .sff_irq_clear = bfin_irq_clear, .sff_irq_on = bfin_irq_on, @@ -1507,7 +1623,7 @@ static int __devinit bfin_atapi_probe(struct platform_device *pdev) } if (ata_host_activate(host, platform_get_irq(pdev, 0), - ata_sff_interrupt, IRQF_SHARED, &bfin_sht) != 0) { + bfin_ata_interrupt, IRQF_SHARED, &bfin_sht) != 0) { peripheral_free_list(atapi_io_port); dev_err(&pdev->dev, "Fail to attach ATAPI device\n"); return -ENODEV; diff --git a/drivers/ata/pata_rb500_cf.c b/drivers/ata/pata_rb532_cf.c index 4345174aaee..a108d259f19 100644 --- a/drivers/ata/pata_rb500_cf.c +++ b/drivers/ata/pata_rb532_cf.c @@ -32,7 +32,7 @@ #include <asm/gpio.h> -#define DRV_NAME "pata-rb500-cf" +#define DRV_NAME "pata-rb532-cf" #define DRV_VERSION "0.1.0" #define DRV_DESC "PATA driver for RouterBOARD 532 Compact Flash" @@ -43,7 +43,7 @@ #define RB500_CF_REG_CTRL 0x080E #define RB500_CF_REG_DATA 0x0C00 -struct rb500_cf_info { +struct rb532_cf_info { void __iomem *iobase; unsigned int gpio_line; int frozen; @@ -52,10 +52,10 @@ struct rb500_cf_info { /* ------------------------------------------------------------------------ */ -static inline void rb500_pata_finish_io(struct ata_port *ap) +static inline void rb532_pata_finish_io(struct ata_port *ap) { struct ata_host *ah = ap->host; - struct rb500_cf_info *info = ah->private_data; + struct rb532_cf_info *info = ah->private_data; ata_sff_altstatus(ap); ndelay(RB500_CF_IO_DELAY); @@ -63,14 +63,14 @@ static inline void rb500_pata_finish_io(struct ata_port *ap) set_irq_type(info->irq, IRQ_TYPE_LEVEL_HIGH); } -static void rb500_pata_exec_command(struct ata_port *ap, +static void rb532_pata_exec_command(struct ata_port *ap, const struct ata_taskfile *tf) { writeb(tf->command, ap->ioaddr.command_addr); - rb500_pata_finish_io(ap); + rb532_pata_finish_io(ap); } -static void rb500_pata_data_xfer(struct ata_device *adev, unsigned char *buf, +static void rb532_pata_data_xfer(struct ata_device *adev, unsigned char *buf, unsigned int buflen, int write_data) { struct ata_port *ap = adev->link->ap; @@ -84,27 +84,27 @@ static void rb500_pata_data_xfer(struct ata_device *adev, unsigned char *buf, *buf = readb(ioaddr); } - rb500_pata_finish_io(adev->link->ap); + rb532_pata_finish_io(adev->link->ap); } -static void rb500_pata_freeze(struct ata_port *ap) +static void rb532_pata_freeze(struct ata_port *ap) { - struct rb500_cf_info *info = ap->host->private_data; + struct rb532_cf_info *info = ap->host->private_data; info->frozen = 1; } -static void rb500_pata_thaw(struct ata_port *ap) +static void rb532_pata_thaw(struct ata_port *ap) { - struct rb500_cf_info *info = ap->host->private_data; + struct rb532_cf_info *info = ap->host->private_data; info->frozen = 0; } -static irqreturn_t rb500_pata_irq_handler(int irq, void *dev_instance) +static irqreturn_t rb532_pata_irq_handler(int irq, void *dev_instance) { struct ata_host *ah = dev_instance; - struct rb500_cf_info *info = ah->private_data; + struct rb532_cf_info *info = ah->private_data; if (gpio_get_value(info->gpio_line)) { set_irq_type(info->irq, IRQ_TYPE_LEVEL_LOW); @@ -117,30 +117,30 @@ static irqreturn_t rb500_pata_irq_handler(int irq, void *dev_instance) return IRQ_HANDLED; } -static struct ata_port_operations rb500_pata_port_ops = { +static struct ata_port_operations rb532_pata_port_ops = { .inherits = &ata_sff_port_ops, - .sff_exec_command = rb500_pata_exec_command, - .sff_data_xfer = rb500_pata_data_xfer, - .freeze = rb500_pata_freeze, - .thaw = rb500_pata_thaw, + .sff_exec_command = rb532_pata_exec_command, + .sff_data_xfer = rb532_pata_data_xfer, + .freeze = rb532_pata_freeze, + .thaw = rb532_pata_thaw, }; /* ------------------------------------------------------------------------ */ -static struct scsi_host_template rb500_pata_sht = { +static struct scsi_host_template rb532_pata_sht = { ATA_PIO_SHT(DRV_NAME), }; /* ------------------------------------------------------------------------ */ -static void rb500_pata_setup_ports(struct ata_host *ah) +static void rb532_pata_setup_ports(struct ata_host *ah) { - struct rb500_cf_info *info = ah->private_data; + struct rb532_cf_info *info = ah->private_data; struct ata_port *ap; ap = ah->ports[0]; - ap->ops = &rb500_pata_port_ops; + ap->ops = &rb532_pata_port_ops; ap->pio_mask = 0x1f; /* PIO4 */ ap->flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO; @@ -153,13 +153,13 @@ static void rb500_pata_setup_ports(struct ata_host *ah) ap->ioaddr.data_addr = info->iobase + RB500_CF_REG_DATA; } -static __devinit int rb500_pata_driver_probe(struct platform_device *pdev) +static __devinit int rb532_pata_driver_probe(struct platform_device *pdev) { unsigned int irq; int gpio; struct resource *res; struct ata_host *ah; - struct rb500_cf_info *info; + struct rb532_cf_info *info; int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -213,10 +213,10 @@ static __devinit int rb500_pata_driver_probe(struct platform_device *pdev) goto err_free_gpio; } - rb500_pata_setup_ports(ah); + rb532_pata_setup_ports(ah); - ret = ata_host_activate(ah, irq, rb500_pata_irq_handler, - IRQF_TRIGGER_LOW, &rb500_pata_sht); + ret = ata_host_activate(ah, irq, rb532_pata_irq_handler, + IRQF_TRIGGER_LOW, &rb532_pata_sht); if (ret) goto err_free_gpio; @@ -228,10 +228,10 @@ err_free_gpio: return ret; } -static __devexit int rb500_pata_driver_remove(struct platform_device *pdev) +static __devexit int rb532_pata_driver_remove(struct platform_device *pdev) { struct ata_host *ah = platform_get_drvdata(pdev); - struct rb500_cf_info *info = ah->private_data; + struct rb532_cf_info *info = ah->private_data; ata_host_detach(ah); gpio_free(info->gpio_line); @@ -242,9 +242,9 @@ static __devexit int rb500_pata_driver_remove(struct platform_device *pdev) /* work with hotplug and coldplug */ MODULE_ALIAS("platform:" DRV_NAME); -static struct platform_driver rb500_pata_platform_driver = { - .probe = rb500_pata_driver_probe, - .remove = __devexit_p(rb500_pata_driver_remove), +static struct platform_driver rb532_pata_platform_driver = { + .probe = rb532_pata_driver_probe, + .remove = __devexit_p(rb532_pata_driver_remove), .driver = { .name = DRV_NAME, .owner = THIS_MODULE, @@ -255,16 +255,16 @@ static struct platform_driver rb500_pata_platform_driver = { #define DRV_INFO DRV_DESC " version " DRV_VERSION -static int __init rb500_pata_module_init(void) +static int __init rb532_pata_module_init(void) { printk(KERN_INFO DRV_INFO "\n"); - return platform_driver_register(&rb500_pata_platform_driver); + return platform_driver_register(&rb532_pata_platform_driver); } -static void __exit rb500_pata_module_exit(void) +static void __exit rb532_pata_module_exit(void) { - platform_driver_unregister(&rb500_pata_platform_driver); + platform_driver_unregister(&rb532_pata_platform_driver); } MODULE_AUTHOR("Gabor Juhos <juhosg at openwrt.org>"); @@ -273,5 +273,5 @@ MODULE_DESCRIPTION(DRV_DESC); MODULE_VERSION(DRV_VERSION); MODULE_LICENSE("GPL"); -module_init(rb500_pata_module_init); -module_exit(rb500_pata_module_exit); +module_init(rb532_pata_module_init); +module_exit(rb532_pata_module_exit); diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 26a6337195b..842b1a15b78 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -172,10 +172,11 @@ enum { PCIE_IRQ_MASK_OFS = 0x1910, PCIE_UNMASK_ALL_IRQS = 0x40a, /* assorted bits */ - HC_MAIN_IRQ_CAUSE_OFS = 0x1d60, - HC_MAIN_IRQ_MASK_OFS = 0x1d64, - HC_SOC_MAIN_IRQ_CAUSE_OFS = 0x20020, - HC_SOC_MAIN_IRQ_MASK_OFS = 0x20024, + /* Host Controller Main Interrupt Cause/Mask registers (1 per-chip) */ + PCI_HC_MAIN_IRQ_CAUSE_OFS = 0x1d60, + PCI_HC_MAIN_IRQ_MASK_OFS = 0x1d64, + SOC_HC_MAIN_IRQ_CAUSE_OFS = 0x20020, + SOC_HC_MAIN_IRQ_MASK_OFS = 0x20024, ERR_IRQ = (1 << 0), /* shift by port # */ DONE_IRQ = (1 << 1), /* shift by port # */ HC0_IRQ_PEND = 0x1ff, /* bits 0-8 = HC0's ports */ @@ -445,8 +446,8 @@ struct mv_host_priv { const struct mv_hw_ops *ops; int n_ports; void __iomem *base; - void __iomem *main_cause_reg_addr; - void __iomem *main_mask_reg_addr; + void __iomem *main_irq_cause_addr; + void __iomem *main_irq_mask_addr; u32 irq_cause_ofs; u32 irq_mask_ofs; u32 unmask_all_irqs; @@ -727,8 +728,8 @@ static inline unsigned int mv_hardport_from_port(unsigned int port) * Simple code, with two return values, so macro rather than inline. * * port is the sole input, in range 0..7. - * shift is one output, for use with the main_cause and main_mask registers. - * hardport is the other output, in range 0..3 + * shift is one output, for use with main_irq_cause / main_irq_mask registers. + * hardport is the other output, in range 0..3. * * Note that port and hardport may be the same variable in some cases. */ @@ -1679,12 +1680,12 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp /** * mv_host_intr - Handle all interrupts on the given host controller * @host: host specific structure - * @main_cause: Main interrupt cause register for the chip. + * @main_irq_cause: Main interrupt cause register for the chip. * * LOCKING: * Inherited from caller. */ -static int mv_host_intr(struct ata_host *host, u32 main_cause) +static int mv_host_intr(struct ata_host *host, u32 main_irq_cause) { struct mv_host_priv *hpriv = host->private_data; void __iomem *mmio = hpriv->base, *hc_mmio = NULL; @@ -1705,7 +1706,7 @@ static int mv_host_intr(struct ata_host *host, u32 main_cause) * Do nothing if port is not interrupting or is disabled: */ MV_PORT_TO_SHIFT_AND_HARDPORT(port, shift, hardport); - port_cause = (main_cause >> shift) & (DONE_IRQ | ERR_IRQ); + port_cause = (main_irq_cause >> shift) & (DONE_IRQ | ERR_IRQ); if (!port_cause || !ap || (ap->flags & ATA_FLAG_DISABLED)) continue; /* @@ -1811,20 +1812,20 @@ static irqreturn_t mv_interrupt(int irq, void *dev_instance) struct ata_host *host = dev_instance; struct mv_host_priv *hpriv = host->private_data; unsigned int handled = 0; - u32 main_cause, main_mask; + u32 main_irq_cause, main_irq_mask; spin_lock(&host->lock); - main_cause = readl(hpriv->main_cause_reg_addr); - main_mask = readl(hpriv->main_mask_reg_addr); + main_irq_cause = readl(hpriv->main_irq_cause_addr); + main_irq_mask = readl(hpriv->main_irq_mask_addr); /* * Deal with cases where we either have nothing pending, or have read * a bogus register value which can indicate HW removal or PCI fault. */ - if ((main_cause & main_mask) && (main_cause != 0xffffffffU)) { - if (unlikely((main_cause & PCI_ERR) && HAS_PCI(host))) + if ((main_irq_cause & main_irq_mask) && (main_irq_cause != 0xffffffffU)) { + if (unlikely((main_irq_cause & PCI_ERR) && HAS_PCI(host))) handled = mv_pci_error(host, hpriv->base); else - handled = mv_host_intr(host, main_cause); + handled = mv_host_intr(host, main_irq_cause); } spin_unlock(&host->lock); return IRQ_RETVAL(handled); @@ -2027,7 +2028,7 @@ static void mv_reset_pci_bus(struct ata_host *host, void __iomem *mmio) ZERO(MV_PCI_DISC_TIMER); ZERO(MV_PCI_MSI_TRIGGER); writel(0x000100ff, mmio + MV_PCI_XBAR_TMOUT); - ZERO(HC_MAIN_IRQ_MASK_OFS); + ZERO(PCI_HC_MAIN_IRQ_MASK_OFS); ZERO(MV_PCI_SERR_MASK); ZERO(hpriv->irq_cause_ofs); ZERO(hpriv->irq_mask_ofs); @@ -2404,7 +2405,7 @@ static void mv_eh_freeze(struct ata_port *ap) { struct mv_host_priv *hpriv = ap->host->private_data; unsigned int shift, hardport, port = ap->port_no; - u32 main_mask; + u32 main_irq_mask; /* FIXME: handle coalescing completion events properly */ @@ -2412,9 +2413,9 @@ static void mv_eh_freeze(struct ata_port *ap) MV_PORT_TO_SHIFT_AND_HARDPORT(port, shift, hardport); /* disable assertion of portN err, done events */ - main_mask = readl(hpriv->main_mask_reg_addr); - main_mask &= ~((DONE_IRQ | ERR_IRQ) << shift); - writelfl(main_mask, hpriv->main_mask_reg_addr); + main_irq_mask = readl(hpriv->main_irq_mask_addr); + main_irq_mask &= ~((DONE_IRQ | ERR_IRQ) << shift); + writelfl(main_irq_mask, hpriv->main_irq_mask_addr); } static void mv_eh_thaw(struct ata_port *ap) @@ -2423,7 +2424,7 @@ static void mv_eh_thaw(struct ata_port *ap) unsigned int shift, hardport, port = ap->port_no; void __iomem *hc_mmio = mv_hc_base_from_port(hpriv->base, port); void __iomem *port_mmio = mv_ap_base(ap); - u32 main_mask, hc_irq_cause; + u32 main_irq_mask, hc_irq_cause; /* FIXME: handle coalescing completion events properly */ @@ -2438,9 +2439,9 @@ static void mv_eh_thaw(struct ata_port *ap) writelfl(hc_irq_cause, hc_mmio + HC_IRQ_CAUSE_OFS); /* enable assertion of portN err, done events */ - main_mask = readl(hpriv->main_mask_reg_addr); - main_mask |= ((DONE_IRQ | ERR_IRQ) << shift); - writelfl(main_mask, hpriv->main_mask_reg_addr); + main_irq_mask = readl(hpriv->main_irq_mask_addr); + main_irq_mask |= ((DONE_IRQ | ERR_IRQ) << shift); + writelfl(main_irq_mask, hpriv->main_irq_mask_addr); } /** @@ -2654,15 +2655,15 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx) goto done; if (HAS_PCI(host)) { - hpriv->main_cause_reg_addr = mmio + HC_MAIN_IRQ_CAUSE_OFS; - hpriv->main_mask_reg_addr = mmio + HC_MAIN_IRQ_MASK_OFS; + hpriv->main_irq_cause_addr = mmio + PCI_HC_MAIN_IRQ_CAUSE_OFS; + hpriv->main_irq_mask_addr = mmio + PCI_HC_MAIN_IRQ_MASK_OFS; } else { - hpriv->main_cause_reg_addr = mmio + HC_SOC_MAIN_IRQ_CAUSE_OFS; - hpriv->main_mask_reg_addr = mmio + HC_SOC_MAIN_IRQ_MASK_OFS; + hpriv->main_irq_cause_addr = mmio + SOC_HC_MAIN_IRQ_CAUSE_OFS; + hpriv->main_irq_mask_addr = mmio + SOC_HC_MAIN_IRQ_MASK_OFS; } /* global interrupt mask: 0 == mask everything */ - writel(0, hpriv->main_mask_reg_addr); + writel(0, hpriv->main_irq_mask_addr); n_hc = mv_get_hc_count(host->ports[0]->flags); @@ -2712,23 +2713,23 @@ static int mv_init_host(struct ata_host *host, unsigned int board_idx) writelfl(hpriv->unmask_all_irqs, mmio + hpriv->irq_mask_ofs); if (IS_GEN_I(hpriv)) writelfl(~HC_MAIN_MASKED_IRQS_5, - hpriv->main_mask_reg_addr); + hpriv->main_irq_mask_addr); else writelfl(~HC_MAIN_MASKED_IRQS, - hpriv->main_mask_reg_addr); + hpriv->main_irq_mask_addr); VPRINTK("HC MAIN IRQ cause/mask=0x%08x/0x%08x " "PCI int cause/mask=0x%08x/0x%08x\n", - readl(hpriv->main_cause_reg_addr), - readl(hpriv->main_mask_reg_addr), + readl(hpriv->main_irq_cause_addr), + readl(hpriv->main_irq_mask_addr), readl(mmio + hpriv->irq_cause_ofs), readl(mmio + hpriv->irq_mask_ofs)); } else { writelfl(~HC_MAIN_MASKED_IRQS_SOC, - hpriv->main_mask_reg_addr); + hpriv->main_irq_mask_addr); VPRINTK("HC MAIN IRQ cause/mask=0x%08x/0x%08x\n", - readl(hpriv->main_cause_reg_addr), - readl(hpriv->main_mask_reg_addr)); + readl(hpriv->main_irq_cause_addr), + readl(hpriv->main_irq_mask_addr)); } done: return rc; diff --git a/drivers/base/core.c b/drivers/base/core.c index 9248e0927d0..be288b5e418 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -787,6 +787,10 @@ int device_add(struct device *dev) parent = get_device(dev->parent); setup_parent(dev, parent); + /* use parent numa_node */ + if (parent) + set_dev_node(dev, dev_to_node(parent)); + /* first, register with generic layer. */ error = kobject_add(&dev->kobj, dev->kobj.parent, "%s", dev->bus_id); if (error) @@ -1306,8 +1310,11 @@ int device_move(struct device *dev, struct device *new_parent) dev->parent = new_parent; if (old_parent) klist_remove(&dev->knode_parent); - if (new_parent) + if (new_parent) { klist_add_tail(&dev->knode_parent, &new_parent->klist_children); + set_dev_node(dev, dev_to_node(new_parent)); + } + if (!dev->class) goto out_put; error = device_move_class_links(dev, old_parent, new_parent); @@ -1317,9 +1324,12 @@ int device_move(struct device *dev, struct device *new_parent) if (!kobject_move(&dev->kobj, &old_parent->kobj)) { if (new_parent) klist_remove(&dev->knode_parent); - if (old_parent) + dev->parent = old_parent; + if (old_parent) { klist_add_tail(&dev->knode_parent, &old_parent->klist_children); + set_dev_node(dev, dev_to_node(old_parent)); + } } cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 1fef7df8c9d..9fd4a853414 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -396,6 +396,8 @@ _request_firmware(const struct firmware **firmware_p, const char *name, if (!firmware_p) return -EINVAL; + printk(KERN_INFO "firmware: requesting %s\n", name); + *firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL); if (!firmware) { printk(KERN_ERR "%s: kmalloc(struct firmware) failed\n", diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 280e71ee744..5b4c6e649c1 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -195,7 +195,6 @@ void aoedev_exit(void); struct aoedev *aoedev_by_aoeaddr(int maj, int min); struct aoedev *aoedev_by_sysminor_m(ulong sysminor); void aoedev_downdev(struct aoedev *d); -int aoedev_isbusy(struct aoedev *d); int aoedev_flush(const char __user *str, size_t size); int aoenet_init(void); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index d00293ba3b4..8fc429cf82b 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -668,16 +668,16 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) u16 n; /* word 83: command set supported */ - n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1])); + n = get_unaligned_le16(&id[83 << 1]); /* word 86: command set/feature enabled */ - n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1])); + n |= get_unaligned_le16(&id[86 << 1]); if (n & (1<<10)) { /* bit 10: LBA 48 */ d->flags |= DEVFL_EXT; /* word 100: number lba48 sectors */ - ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1])); + ssize = get_unaligned_le64(&id[100 << 1]); /* set as in ide-disk.c:init_idedisk_capacity */ d->geo.cylinders = ssize; @@ -688,12 +688,12 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) d->flags &= ~DEVFL_EXT; /* number lba28 sectors */ - ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1])); + ssize = get_unaligned_le32(&id[60 << 1]); /* NOTE: obsolete in ATA 6 */ - d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1])); - d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1])); - d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1])); + d->geo.cylinders = get_unaligned_le16(&id[54 << 1]); + d->geo.heads = get_unaligned_le16(&id[55 << 1]); + d->geo.sectors = get_unaligned_le16(&id[56 << 1]); } if (d->ssize != ssize) @@ -779,7 +779,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) u16 aoemajor; hin = (struct aoe_hdr *) skb_mac_header(skb); - aoemajor = be16_to_cpu(get_unaligned(&hin->major)); + aoemajor = get_unaligned_be16(&hin->major); d = aoedev_by_aoeaddr(aoemajor, hin->minor); if (d == NULL) { snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " @@ -791,7 +791,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) spin_lock_irqsave(&d->lock, flags); - n = be32_to_cpu(get_unaligned(&hin->tag)); + n = get_unaligned_be32(&hin->tag); t = gettgt(d, hin->src); if (t == NULL) { printk(KERN_INFO "aoe: can't find target e%ld.%d:%012llx\n", @@ -806,9 +806,9 @@ aoecmd_ata_rsp(struct sk_buff *skb) snprintf(ebuf, sizeof ebuf, "%15s e%d.%d tag=%08x@%08lx\n", "unexpected rsp", - be16_to_cpu(get_unaligned(&hin->major)), + get_unaligned_be16(&hin->major), hin->minor, - be32_to_cpu(get_unaligned(&hin->tag)), + get_unaligned_be32(&hin->tag), jiffies); aoechr_error(ebuf); return; @@ -873,7 +873,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) printk(KERN_INFO "aoe: unrecognized ata command %2.2Xh for %d.%d\n", ahout->cmdstat, - be16_to_cpu(get_unaligned(&hin->major)), + get_unaligned_be16(&hin->major), hin->minor); } } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index f9a1cd9edb7..a1d813ab0d6 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -18,24 +18,6 @@ static void skbpoolfree(struct aoedev *d); static struct aoedev *devlist; static DEFINE_SPINLOCK(devlist_lock); -int -aoedev_isbusy(struct aoedev *d) -{ - struct aoetgt **t, **te; - struct frame *f, *e; - - t = d->targets; - te = t + NTARGETS; - for (; t < te && *t; t++) { - f = (*t)->frames; - e = f + (*t)->nframes; - for (; f < e; f++) - if (f->tag != FREETAG) - return 1; - } - return 0; -} - struct aoedev * aoedev_by_aoeaddr(int maj, int min) { diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 18d243c73ee..d625169c8e4 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -128,7 +128,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, skb_push(skb, ETH_HLEN); /* (1) */ h = (struct aoe_hdr *) skb_mac_header(skb); - n = be32_to_cpu(get_unaligned(&h->tag)); + n = get_unaligned_be32(&h->tag); if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31)) goto exit; @@ -140,7 +140,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, printk(KERN_ERR "%s%d.%d@%s; ecode=%d '%s'\n", "aoe: error packet from ", - be16_to_cpu(get_unaligned(&h->major)), + get_unaligned_be16(&h->major), h->minor, skb->dev->name, h->err, aoe_errlist[n]); goto exit; diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index cf6083a1f92..e539be5750d 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -425,7 +425,7 @@ static void __devinit cciss_procinit(int i) struct proc_dir_entry *pde; if (proc_cciss == NULL) - proc_cciss = proc_mkdir("cciss", proc_root_driver); + proc_cciss = proc_mkdir("driver/cciss", NULL); if (!proc_cciss) return; pde = proc_create(hba[i]->devname, S_IWUSR | S_IRUSR | S_IRGRP | @@ -3700,7 +3700,7 @@ static void __exit cciss_cleanup(void) cciss_remove_one(hba[i]->pdev); } } - remove_proc_entry("cciss", proc_root_driver); + remove_proc_entry("driver/cciss", NULL); } static void fail_all_cmds(unsigned long ctlr) diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 69199185ff4..09c14341e6e 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -214,7 +214,7 @@ static struct proc_dir_entry *proc_array; static void __init ida_procinit(int i) { if (proc_array == NULL) { - proc_array = proc_mkdir("cpqarray", proc_root_driver); + proc_array = proc_mkdir("driver/cpqarray", NULL); if (!proc_array) return; } @@ -1796,7 +1796,7 @@ static void __exit cpqarray_exit(void) } } - remove_proc_entry("cpqarray", proc_root_driver); + remove_proc_entry("driver/cpqarray", NULL); } module_init(cpqarray_init) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 7652e87d60c..395f8ea7981 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4526,14 +4526,15 @@ static void __init parse_floppy_cfg_string(char *cfg) } } -int __init init_module(void) +static int __init floppy_module_init(void) { if (floppy) parse_floppy_cfg_string(floppy); return floppy_init(); } +module_init(floppy_module_init); -void cleanup_module(void) +static void __exit floppy_module_exit(void) { int drive; @@ -4562,6 +4563,7 @@ void cleanup_module(void) /* eject disk, if any */ fd_eject(0); } +module_exit(floppy_module_exit); module_param(floppy, charp, 0); module_param(FLOPPY_IRQ, int, 0); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f7f163557aa..d3a25b027ff 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -546,7 +546,7 @@ static void loop_unplug(struct request_queue *q) { struct loop_device *lo = q->queuedata; - clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); + queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q); blk_run_address_space(lo->lo_backing_file->f_mapping); } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 60cc54368b6..ad98dda6037 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -29,6 +29,7 @@ #include <linux/kernel.h> #include <net/sock.h> #include <linux/net.h> +#include <linux/kthread.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -55,6 +56,7 @@ static unsigned int debugflags; static unsigned int nbds_max = 16; static struct nbd_device *nbd_dev; +static int max_part; /* * Use just one lock (or at most 1 per NIC). Two arguments for this: @@ -337,7 +339,7 @@ static struct request *nbd_read_stat(struct nbd_device *lo) } req = nbd_find_request(lo, *(struct request **)reply.handle); - if (unlikely(IS_ERR(req))) { + if (IS_ERR(req)) { result = PTR_ERR(req); if (result != -ENOENT) goto harderror; @@ -441,6 +443,85 @@ static void nbd_clear_que(struct nbd_device *lo) } +static void nbd_handle_req(struct nbd_device *lo, struct request *req) +{ + if (!blk_fs_request(req)) + goto error_out; + + nbd_cmd(req) = NBD_CMD_READ; + if (rq_data_dir(req) == WRITE) { + nbd_cmd(req) = NBD_CMD_WRITE; + if (lo->flags & NBD_READ_ONLY) { + printk(KERN_ERR "%s: Write on read-only\n", + lo->disk->disk_name); + goto error_out; + } + } + + req->errors = 0; + + mutex_lock(&lo->tx_lock); + if (unlikely(!lo->sock)) { + mutex_unlock(&lo->tx_lock); + printk(KERN_ERR "%s: Attempted send on closed socket\n", + lo->disk->disk_name); + req->errors++; + nbd_end_request(req); + return; + } + + lo->active_req = req; + + if (nbd_send_req(lo, req) != 0) { + printk(KERN_ERR "%s: Request send failed\n", + lo->disk->disk_name); + req->errors++; + nbd_end_request(req); + } else { + spin_lock(&lo->queue_lock); + list_add(&req->queuelist, &lo->queue_head); + spin_unlock(&lo->queue_lock); + } + + lo->active_req = NULL; + mutex_unlock(&lo->tx_lock); + wake_up_all(&lo->active_wq); + + return; + +error_out: + req->errors++; + nbd_end_request(req); +} + +static int nbd_thread(void *data) +{ + struct nbd_device *lo = data; + struct request *req; + + set_user_nice(current, -20); + while (!kthread_should_stop() || !list_empty(&lo->waiting_queue)) { + /* wait for something to do */ + wait_event_interruptible(lo->waiting_wq, + kthread_should_stop() || + !list_empty(&lo->waiting_queue)); + + /* extract request */ + if (list_empty(&lo->waiting_queue)) + continue; + + spin_lock_irq(&lo->queue_lock); + req = list_entry(lo->waiting_queue.next, struct request, + queuelist); + list_del_init(&req->queuelist); + spin_unlock_irq(&lo->queue_lock); + + /* handle request */ + nbd_handle_req(lo, req); + } + return 0; +} + /* * We always wait for result of write, for now. It would be nice to make it optional * in future @@ -456,65 +537,23 @@ static void do_nbd_request(struct request_queue * q) struct nbd_device *lo; blkdev_dequeue_request(req); + + spin_unlock_irq(q->queue_lock); + dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%x)\n", req->rq_disk->disk_name, req, req->cmd_type); - if (!blk_fs_request(req)) - goto error_out; - lo = req->rq_disk->private_data; BUG_ON(lo->magic != LO_MAGIC); - nbd_cmd(req) = NBD_CMD_READ; - if (rq_data_dir(req) == WRITE) { - nbd_cmd(req) = NBD_CMD_WRITE; - if (lo->flags & NBD_READ_ONLY) { - printk(KERN_ERR "%s: Write on read-only\n", - lo->disk->disk_name); - goto error_out; - } - } - - req->errors = 0; - spin_unlock_irq(q->queue_lock); - - mutex_lock(&lo->tx_lock); - if (unlikely(!lo->sock)) { - mutex_unlock(&lo->tx_lock); - printk(KERN_ERR "%s: Attempted send on closed socket\n", - lo->disk->disk_name); - req->errors++; - nbd_end_request(req); - spin_lock_irq(q->queue_lock); - continue; - } - - lo->active_req = req; + spin_lock_irq(&lo->queue_lock); + list_add_tail(&req->queuelist, &lo->waiting_queue); + spin_unlock_irq(&lo->queue_lock); - if (nbd_send_req(lo, req) != 0) { - printk(KERN_ERR "%s: Request send failed\n", - lo->disk->disk_name); - req->errors++; - nbd_end_request(req); - } else { - spin_lock(&lo->queue_lock); - list_add(&req->queuelist, &lo->queue_head); - spin_unlock(&lo->queue_lock); - } - - lo->active_req = NULL; - mutex_unlock(&lo->tx_lock); - wake_up_all(&lo->active_wq); + wake_up(&lo->waiting_wq); spin_lock_irq(q->queue_lock); - continue; - -error_out: - req->errors++; - spin_unlock(q->queue_lock); - nbd_end_request(req); - spin_lock(q->queue_lock); } } @@ -524,6 +563,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file, struct nbd_device *lo = inode->i_bdev->bd_disk->private_data; int error; struct request sreq ; + struct task_struct *thread; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -537,6 +577,7 @@ static int nbd_ioctl(struct inode *inode, struct file *file, switch (cmd) { case NBD_DISCONNECT: printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name); + blk_rq_init(NULL, &sreq); sreq.cmd_type = REQ_TYPE_SPECIAL; nbd_cmd(&sreq) = NBD_CMD_DISC; /* @@ -571,10 +612,13 @@ static int nbd_ioctl(struct inode *inode, struct file *file, error = -EINVAL; file = fget(arg); if (file) { + struct block_device *bdev = inode->i_bdev; inode = file->f_path.dentry->d_inode; if (S_ISSOCK(inode->i_mode)) { lo->file = file; lo->sock = SOCKET_I(inode); + if (max_part > 0) + bdev->bd_invalidated = 1; error = 0; } else { fput(file); @@ -606,7 +650,12 @@ static int nbd_ioctl(struct inode *inode, struct file *file, case NBD_DO_IT: if (!lo->file) return -EINVAL; + thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); + if (IS_ERR(thread)) + return PTR_ERR(thread); + wake_up_process(thread); error = nbd_do_it(lo); + kthread_stop(thread); if (error) return error; sock_shutdown(lo, 1); @@ -619,6 +668,8 @@ static int nbd_ioctl(struct inode *inode, struct file *file, lo->bytesize = 0; inode->i_bdev->bd_inode->i_size = 0; set_capacity(lo->disk, 0); + if (max_part > 0) + ioctl_by_bdev(inode->i_bdev, BLKRRPART, 0); return lo->harderror; case NBD_CLEAR_QUE: /* @@ -652,6 +703,7 @@ static int __init nbd_init(void) { int err = -ENOMEM; int i; + int part_shift; BUILD_BUG_ON(sizeof(struct nbd_request) != 28); @@ -659,8 +711,17 @@ static int __init nbd_init(void) if (!nbd_dev) return -ENOMEM; + if (max_part < 0) { + printk(KERN_CRIT "nbd: max_part must be >= 0\n"); + return -EINVAL; + } + + part_shift = 0; + if (max_part > 0) + part_shift = fls(max_part); + for (i = 0; i < nbds_max; i++) { - struct gendisk *disk = alloc_disk(1); + struct gendisk *disk = alloc_disk(1 << part_shift); elevator_t *old_e; if (!disk) goto out; @@ -695,17 +756,18 @@ static int __init nbd_init(void) nbd_dev[i].file = NULL; nbd_dev[i].magic = LO_MAGIC; nbd_dev[i].flags = 0; + INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); spin_lock_init(&nbd_dev[i].queue_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); mutex_init(&nbd_dev[i].tx_lock); init_waitqueue_head(&nbd_dev[i].active_wq); + init_waitqueue_head(&nbd_dev[i].waiting_wq); nbd_dev[i].blksize = 1024; nbd_dev[i].bytesize = 0; disk->major = NBD_MAJOR; - disk->first_minor = i; + disk->first_minor = i << part_shift; disk->fops = &nbd_fops; disk->private_data = &nbd_dev[i]; - disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; sprintf(disk->disk_name, "nbd%d", i); set_capacity(disk, 0); add_disk(disk); @@ -743,7 +805,9 @@ MODULE_DESCRIPTION("Network Block Device"); MODULE_LICENSE("GPL"); module_param(nbds_max, int, 0444); -MODULE_PARM_DESC(nbds_max, "How many network block devices to initialize."); +MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)"); +module_param(max_part, int, 0444); +MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)"); #ifndef NDEBUG module_param(debugflags, int, 0644); MODULE_PARM_DESC(debugflags, "flags for controlling debug output"); diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index df819f8a95a..570f3b70dce 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -716,10 +716,8 @@ static int pd_special_command(struct pd_unit *disk, struct request rq; int err = 0; - memset(&rq, 0, sizeof(rq)); - rq.errors = 0; + blk_rq_init(NULL, &rq); rq.rq_disk = disk->gd; - rq.ref_count = 1; rq.end_io_data = &wait; rq.end_io = blk_end_sync_rq; blk_insert_request(disk->gd->queue, &rq, 0, func); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 18feb1c7c33..3ba1df93e9e 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -776,8 +776,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->cmd_len = COMMAND_SIZE(cgc->cmd[0]); memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE); - if (sizeof(rq->cmd) > CDROM_PACKET_SIZE) - memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE); rq->timeout = 60*HZ; rq->cmd_type = REQ_TYPE_BLOCK_PC; @@ -2744,7 +2742,6 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) int i; int ret = 0; char b[BDEVNAME_SIZE]; - struct proc_dir_entry *proc; struct block_device *bdev; if (pd->pkt_dev == dev) { @@ -2788,11 +2785,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) goto out_mem; } - proc = create_proc_entry(pd->name, 0, pkt_proc); - if (proc) { - proc->data = pd; - proc->proc_fops = &pkt_proc_fops; - } + proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd); DPRINTK(DRIVER_NAME": writer %s mapped to %s\n", pd->name, bdevname(bdev, b)); return 0; @@ -3101,7 +3094,7 @@ static int __init pkt_init(void) goto out_misc; } - pkt_proc = proc_mkdir(DRIVER_NAME, proc_root_driver); + pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL); return 0; @@ -3117,7 +3110,7 @@ out2: static void __exit pkt_exit(void) { - remove_proc_entry(DRIVER_NAME, proc_root_driver); + remove_proc_entry("driver/"DRIVER_NAME, NULL); misc_deregister(&pkt_misc); pkt_debugfs_cleanup(); diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 7483f947f0e..d797e209951 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -102,8 +102,7 @@ static void ps3disk_scatter_gather(struct ps3_storage_device *dev, dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u segs %u sectors from %lu\n", __func__, __LINE__, i, bio_segments(iter.bio), - bio_sectors(iter.bio), - (unsigned long)iter.bio->bi_sector); + bio_sectors(iter.bio), iter.bio->bi_sector); size = bvec->bv_len; buf = bvec_kmap_irq(bvec, &flags); @@ -406,7 +405,6 @@ static void ps3disk_prepare_flush(struct request_queue *q, struct request *req) dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); - memset(req->cmd, 0, sizeof(req->cmd)); req->cmd_type = REQ_TYPE_FLUSH; } diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 27bfe72aab5..e322cce8c12 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -2399,7 +2399,7 @@ static void ub_disconnect(struct usb_interface *intf) del_gendisk(lun->disk); /* * I wish I could do: - * set_bit(QUEUE_FLAG_DEAD, &q->queue_flags); + * queue_flag_set(QUEUE_FLAG_DEAD, q); * As it is, we rely on our internal poisoning and let * the upper levels to spin furiously failing all the I/O. */ diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d771da816d9..f2fff5799dd 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -137,7 +137,7 @@ static void blkif_restart_queue_callback(void *arg) schedule_work(&info->work); } -int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) +static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg) { /* We don't have real geometry info, but let's at least return values consistent with the size of the device */ diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index ac3829030ac..69f26eb6415 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2194,7 +2194,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, if (ret) break; - memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = GPCMD_READ_CD; rq->cmd[1] = 1 << 2; rq->cmd[2] = (lba >> 24) & 0xff; diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index b74b6c2768a..5245a4a0ba7 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -144,6 +144,7 @@ static int proc_viocd_open(struct inode *inode, struct file *file) } static const struct file_operations proc_viocd_operations = { + .owner = THIS_MODULE, .open = proc_viocd_open, .read = seq_read, .llseek = seq_lseek, @@ -679,7 +680,6 @@ static struct vio_driver viocd_driver = { static int __init viocd_init(void) { - struct proc_dir_entry *e; int ret = 0; if (!firmware_has_feature(FW_FEATURE_ISERIES)) @@ -719,12 +719,8 @@ static int __init viocd_init(void) if (ret) goto out_free_info; - e = create_proc_entry("iSeries/viocd", S_IFREG|S_IRUGO, NULL); - if (e) { - e->owner = THIS_MODULE; - e->proc_fops = &proc_viocd_operations; - } - + proc_create("iSeries/viocd", S_IFREG|S_IRUGO, NULL, + &proc_viocd_operations); return 0; out_free_info: diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 929d4fa73fd..5dce3877eee 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -80,6 +80,15 @@ config VT_HW_CONSOLE_BINDING information. For framebuffer console users, please refer to <file:Documentation/fb/fbcon.txt>. +config DEVKMEM + bool "/dev/kmem virtual device support" + default y + help + Say Y here if you want to support the /dev/kmem device. The + /dev/kmem device is rarely used, but can be used for certain + kind of kernel debugging operations. + When in doubt, say "N". + config SERIAL_NONSTANDARD bool "Non-standard serial port support" depends on HAS_IOMEM diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c index 17d54315e14..cdd876dbb2b 100644 --- a/drivers/char/apm-emulation.c +++ b/drivers/char/apm-emulation.c @@ -14,6 +14,7 @@ #include <linux/poll.h> #include <linux/slab.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/miscdevice.h> #include <linux/apm_bios.h> #include <linux/capability.h> @@ -493,11 +494,10 @@ static struct miscdevice apm_device = { * -1: Unknown * 8) min = minutes; sec = seconds */ -static int apm_get_info(char *buf, char **start, off_t fpos, int length) +static int proc_apm_show(struct seq_file *m, void *v) { struct apm_power_info info; char *units; - int ret; info.ac_line_status = 0xff; info.battery_status = 0xff; @@ -515,14 +515,27 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) case 1: units = "sec"; break; } - ret = sprintf(buf, "%s 1.2 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", + seq_printf(m, "%s 1.2 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", driver_version, APM_32_BIT_SUPPORT, info.ac_line_status, info.battery_status, info.battery_flag, info.battery_life, info.time, units); - return ret; + return 0; } + +static int proc_apm_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_apm_show, NULL); +} + +static const struct file_operations apm_proc_fops = { + .owner = THIS_MODULE, + .open = proc_apm_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif static int kapmd(void *arg) @@ -593,7 +606,7 @@ static int __init apm_init(void) wake_up_process(kapmd_tsk); #ifdef CONFIG_PROC_FS - create_proc_info_entry("apm", 0, NULL, apm_get_info); + proc_create("apm", 0, NULL, &apm_proc_fops); #endif ret = misc_register(&apm_device); diff --git a/drivers/char/i8k.c b/drivers/char/i8k.c index 8609b8236c6..f49037b744f 100644 --- a/drivers/char/i8k.c +++ b/drivers/char/i8k.c @@ -82,6 +82,7 @@ static int i8k_ioctl(struct inode *, struct file *, unsigned int, unsigned long); static const struct file_operations i8k_fops = { + .owner = THIS_MODULE, .open = i8k_open_fs, .read = seq_read, .llseek = seq_lseek, @@ -554,13 +555,10 @@ static int __init i8k_init(void) return -ENODEV; /* Register the proc entry */ - proc_i8k = create_proc_entry("i8k", 0, NULL); + proc_i8k = proc_create("i8k", 0, NULL, &i8k_fops); if (!proc_i8k) return -ENOENT; - proc_i8k->proc_fops = &i8k_fops; - proc_i8k->owner = THIS_MODULE; - printk(KERN_INFO "Dell laptop SMM driver v%s Massimo Dal Zotto (dz@debian.org)\n", I8K_VERSION); diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index b1d6cad8428..0a61856c631 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -133,8 +133,9 @@ *****************/ #include <linux/proc_fs.h> +#include <linux/seq_file.h> -static int ip2_read_procmem(char *, char **, off_t, int); +static const struct file_operations ip2mem_proc_fops; static int ip2_read_proc(char *, char **, off_t, int, int *, void * ); /********************/ @@ -423,7 +424,7 @@ cleanup_module(void) } put_tty_driver(ip2_tty_driver); unregister_chrdev(IP2_IPL_MAJOR, pcIpl); - remove_proc_entry("ip2mem", &proc_root); + remove_proc_entry("ip2mem", NULL); // free memory for (i = 0; i < IP2_MAX_BOARDS; i++) { @@ -695,7 +696,7 @@ ip2_loadmain(int *iop, int *irqp, unsigned char *firmware, int firmsize) } } /* Register the read_procmem thing */ - if (!create_proc_info_entry("ip2mem",0,&proc_root,ip2_read_procmem)) { + if (!proc_create("ip2mem",0,NULL,&ip2mem_proc_fops)) { printk(KERN_ERR "IP2: failed to register read_procmem\n"); } else { @@ -2967,65 +2968,61 @@ ip2_ipl_open( struct inode *pInode, struct file *pFile ) } return 0; } -/******************************************************************************/ -/* Function: ip2_read_procmem */ -/* Parameters: */ -/* */ -/* Returns: Length of output */ -/* */ -/* Description: */ -/* Supplies some driver operating parameters */ -/* Not real useful unless your debugging the fifo */ -/* */ -/******************************************************************************/ - -#define LIMIT (PAGE_SIZE - 120) static int -ip2_read_procmem(char *buf, char **start, off_t offset, int len) +proc_ip2mem_show(struct seq_file *m, void *v) { i2eBordStrPtr pB; i2ChanStrPtr pCh; PTTY tty; int i; - len = 0; - #define FMTLINE "%3d: 0x%08x 0x%08x 0%011o 0%011o\n" #define FMTLIN2 " 0x%04x 0x%04x tx flow 0x%x\n" #define FMTLIN3 " 0x%04x 0x%04x rc flow\n" - len += sprintf(buf+len,"\n"); + seq_printf(m,"\n"); for( i = 0; i < IP2_MAX_BOARDS; ++i ) { pB = i2BoardPtrTable[i]; if ( pB ) { - len += sprintf(buf+len,"board %d:\n",i); - len += sprintf(buf+len,"\tFifo rem: %d mty: %x outM %x\n", + seq_printf(m,"board %d:\n",i); + seq_printf(m,"\tFifo rem: %d mty: %x outM %x\n", pB->i2eFifoRemains,pB->i2eWaitingForEmptyFifo,pB->i2eOutMailWaiting); } } - len += sprintf(buf+len,"#: tty flags, port flags, cflags, iflags\n"); + seq_printf(m,"#: tty flags, port flags, cflags, iflags\n"); for (i=0; i < IP2_MAX_PORTS; i++) { - if (len > LIMIT) - break; pCh = DevTable[i]; if (pCh) { tty = pCh->pTTY; if (tty && tty->count) { - len += sprintf(buf+len,FMTLINE,i,(int)tty->flags,pCh->flags, + seq_printf(m,FMTLINE,i,(int)tty->flags,pCh->flags, tty->termios->c_cflag,tty->termios->c_iflag); - len += sprintf(buf+len,FMTLIN2, + seq_printf(m,FMTLIN2, pCh->outfl.asof,pCh->outfl.room,pCh->channelNeeds); - len += sprintf(buf+len,FMTLIN3,pCh->infl.asof,pCh->infl.room); + seq_printf(m,FMTLIN3,pCh->infl.asof,pCh->infl.room); } } } - return len; + return 0; +} + +static int proc_ip2mem_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_ip2mem_show, NULL); } +static const struct file_operations ip2mem_proc_fops = { + .owner = THIS_MODULE, + .open = proc_ip2mem_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + /* * This is the handler for /proc/tty/driver/ip2 * diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile index 553f0a408ed..eb8a1a8c188 100644 --- a/drivers/char/ipmi/Makefile +++ b/drivers/char/ipmi/Makefile @@ -9,7 +9,3 @@ obj-$(CONFIG_IPMI_DEVICE_INTERFACE) += ipmi_devintf.o obj-$(CONFIG_IPMI_SI) += ipmi_si.o obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o - -ipmi_si.o: $(ipmi_si-objs) - $(LD) -r -o $@ $(ipmi_si-objs) - diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c index e736119b649..7b98c067190 100644 --- a/drivers/char/ipmi/ipmi_bt_sm.c +++ b/drivers/char/ipmi/ipmi_bt_sm.c @@ -37,26 +37,32 @@ #define BT_DEBUG_ENABLE 1 /* Generic messages */ #define BT_DEBUG_MSG 2 /* Prints all request/response buffers */ #define BT_DEBUG_STATES 4 /* Verbose look at state changes */ -/* BT_DEBUG_OFF must be zero to correspond to the default uninitialized - value */ +/* + * BT_DEBUG_OFF must be zero to correspond to the default uninitialized + * value + */ static int bt_debug; /* 0 == BT_DEBUG_OFF */ module_param(bt_debug, int, 0644); MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states"); -/* Typical "Get BT Capabilities" values are 2-3 retries, 5-10 seconds, - and 64 byte buffers. However, one HP implementation wants 255 bytes of - buffer (with a documented message of 160 bytes) so go for the max. - Since the Open IPMI architecture is single-message oriented at this - stage, the queue depth of BT is of no concern. */ +/* + * Typical "Get BT Capabilities" values are 2-3 retries, 5-10 seconds, + * and 64 byte buffers. However, one HP implementation wants 255 bytes of + * buffer (with a documented message of 160 bytes) so go for the max. + * Since the Open IPMI architecture is single-message oriented at this + * stage, the queue depth of BT is of no concern. + */ #define BT_NORMAL_TIMEOUT 5 /* seconds */ #define BT_NORMAL_RETRY_LIMIT 2 #define BT_RESET_DELAY 6 /* seconds after warm reset */ -/* States are written in chronological order and usually cover - multiple rows of the state table discussion in the IPMI spec. */ +/* + * States are written in chronological order and usually cover + * multiple rows of the state table discussion in the IPMI spec. + */ enum bt_states { BT_STATE_IDLE = 0, /* Order is critical in this list */ @@ -76,10 +82,12 @@ enum bt_states { BT_STATE_LONG_BUSY /* BT doesn't get hosed :-) */ }; -/* Macros seen at the end of state "case" blocks. They help with legibility - and debugging. */ +/* + * Macros seen at the end of state "case" blocks. They help with legibility + * and debugging. + */ -#define BT_STATE_CHANGE(X,Y) { bt->state = X; return Y; } +#define BT_STATE_CHANGE(X, Y) { bt->state = X; return Y; } #define BT_SI_SM_RETURN(Y) { last_printed = BT_STATE_PRINTME; return Y; } @@ -110,11 +118,13 @@ struct si_sm_data { #define BT_H_BUSY 0x40 #define BT_B_BUSY 0x80 -/* Some bits are toggled on each write: write once to set it, once - more to clear it; writing a zero does nothing. To absolutely - clear it, check its state and write if set. This avoids the "get - current then use as mask" scheme to modify one bit. Note that the - variable "bt" is hardcoded into these macros. */ +/* + * Some bits are toggled on each write: write once to set it, once + * more to clear it; writing a zero does nothing. To absolutely + * clear it, check its state and write if set. This avoids the "get + * current then use as mask" scheme to modify one bit. Note that the + * variable "bt" is hardcoded into these macros. + */ #define BT_STATUS bt->io->inputb(bt->io, 0) #define BT_CONTROL(x) bt->io->outputb(bt->io, 0, x) @@ -125,8 +135,10 @@ struct si_sm_data { #define BT_INTMASK_R bt->io->inputb(bt->io, 2) #define BT_INTMASK_W(x) bt->io->outputb(bt->io, 2, x) -/* Convenience routines for debugging. These are not multi-open safe! - Note the macros have hardcoded variables in them. */ +/* + * Convenience routines for debugging. These are not multi-open safe! + * Note the macros have hardcoded variables in them. + */ static char *state2txt(unsigned char state) { @@ -182,7 +194,8 @@ static char *status2txt(unsigned char status) static unsigned int bt_init_data(struct si_sm_data *bt, struct si_sm_io *io) { memset(bt, 0, sizeof(struct si_sm_data)); - if (bt->io != io) { /* external: one-time only things */ + if (bt->io != io) { + /* external: one-time only things */ bt->io = io; bt->seq = 0; } @@ -229,7 +242,7 @@ static int bt_start_transaction(struct si_sm_data *bt, printk(KERN_WARNING "BT: +++++++++++++++++ New command\n"); printk(KERN_WARNING "BT: NetFn/LUN CMD [%d data]:", size - 2); for (i = 0; i < size; i ++) - printk (" %02x", data[i]); + printk(" %02x", data[i]); printk("\n"); } bt->write_data[0] = size + 1; /* all data plus seq byte */ @@ -246,8 +259,10 @@ static int bt_start_transaction(struct si_sm_data *bt, return 0; } -/* After the upper state machine has been told SI_SM_TRANSACTION_COMPLETE - it calls this. Strip out the length and seq bytes. */ +/* + * After the upper state machine has been told SI_SM_TRANSACTION_COMPLETE + * it calls this. Strip out the length and seq bytes. + */ static int bt_get_result(struct si_sm_data *bt, unsigned char *data, @@ -269,10 +284,10 @@ static int bt_get_result(struct si_sm_data *bt, memcpy(data + 2, bt->read_data + 4, msg_len - 2); if (bt_debug & BT_DEBUG_MSG) { - printk (KERN_WARNING "BT: result %d bytes:", msg_len); + printk(KERN_WARNING "BT: result %d bytes:", msg_len); for (i = 0; i < msg_len; i++) printk(" %02x", data[i]); - printk ("\n"); + printk("\n"); } return msg_len; } @@ -292,8 +307,10 @@ static void reset_flags(struct si_sm_data *bt) BT_INTMASK_W(BT_BMC_HWRST); } -/* Get rid of an unwanted/stale response. This should only be needed for - BMCs that support multiple outstanding requests. */ +/* + * Get rid of an unwanted/stale response. This should only be needed for + * BMCs that support multiple outstanding requests. + */ static void drain_BMC2HOST(struct si_sm_data *bt) { @@ -326,8 +343,8 @@ static inline void write_all_bytes(struct si_sm_data *bt) printk(KERN_WARNING "BT: write %d bytes seq=0x%02X", bt->write_count, bt->seq); for (i = 0; i < bt->write_count; i++) - printk (" %02x", bt->write_data[i]); - printk ("\n"); + printk(" %02x", bt->write_data[i]); + printk("\n"); } for (i = 0; i < bt->write_count; i++) HOST2BMC(bt->write_data[i]); @@ -337,8 +354,10 @@ static inline int read_all_bytes(struct si_sm_data *bt) { unsigned char i; - /* length is "framing info", minimum = 4: NetFn, Seq, Cmd, cCode. - Keep layout of first four bytes aligned with write_data[] */ + /* + * length is "framing info", minimum = 4: NetFn, Seq, Cmd, cCode. + * Keep layout of first four bytes aligned with write_data[] + */ bt->read_data[0] = BMC2HOST; bt->read_count = bt->read_data[0]; @@ -362,8 +381,8 @@ static inline int read_all_bytes(struct si_sm_data *bt) if (max > 16) max = 16; for (i = 0; i < max; i++) - printk (" %02x", bt->read_data[i]); - printk ("%s\n", bt->read_count == max ? "" : " ..."); + printk(KERN_CONT " %02x", bt->read_data[i]); + printk(KERN_CONT "%s\n", bt->read_count == max ? "" : " ..."); } /* per the spec, the (NetFn[1], Seq[2], Cmd[3]) tuples must match */ @@ -402,8 +421,10 @@ static enum si_sm_result error_recovery(struct si_sm_data *bt, printk(KERN_WARNING "IPMI BT: %s in %s %s ", /* open-ended line */ reason, STATE2TXT, STATUS2TXT); - /* Per the IPMI spec, retries are based on the sequence number - known only to this module, so manage a restart here. */ + /* + * Per the IPMI spec, retries are based on the sequence number + * known only to this module, so manage a restart here. + */ (bt->error_retries)++; if (bt->error_retries < bt->BT_CAP_retries) { printk("%d retries left\n", @@ -412,8 +433,8 @@ static enum si_sm_result error_recovery(struct si_sm_data *bt, return SI_SM_CALL_WITHOUT_DELAY; } - printk("failed %d retries, sending error response\n", - bt->BT_CAP_retries); + printk(KERN_WARNING "failed %d retries, sending error response\n", + bt->BT_CAP_retries); if (!bt->nonzero_status) printk(KERN_ERR "IPMI BT: stuck, try power cycle\n"); @@ -424,8 +445,10 @@ static enum si_sm_result error_recovery(struct si_sm_data *bt, return SI_SM_CALL_WITHOUT_DELAY; } - /* Concoct a useful error message, set up the next state, and - be done with this sequence. */ + /* + * Concoct a useful error message, set up the next state, and + * be done with this sequence. + */ bt->state = BT_STATE_IDLE; switch (cCode) { @@ -461,10 +484,12 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) last_printed = bt->state; } - /* Commands that time out may still (eventually) provide a response. - This stale response will get in the way of a new response so remove - it if possible (hopefully during IDLE). Even if it comes up later - it will be rejected by its (now-forgotten) seq number. */ + /* + * Commands that time out may still (eventually) provide a response. + * This stale response will get in the way of a new response so remove + * it if possible (hopefully during IDLE). Even if it comes up later + * it will be rejected by its (now-forgotten) seq number. + */ if ((bt->state < BT_STATE_WRITE_BYTES) && (status & BT_B2H_ATN)) { drain_BMC2HOST(bt); @@ -472,7 +497,8 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) } if ((bt->state != BT_STATE_IDLE) && - (bt->state < BT_STATE_PRINTME)) { /* check timeout */ + (bt->state < BT_STATE_PRINTME)) { + /* check timeout */ bt->timeout -= time; if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) return error_recovery(bt, @@ -482,8 +508,10 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) switch (bt->state) { - /* Idle state first checks for asynchronous messages from another - channel, then does some opportunistic housekeeping. */ + /* + * Idle state first checks for asynchronous messages from another + * channel, then does some opportunistic housekeeping. + */ case BT_STATE_IDLE: if (status & BT_SMS_ATN) { @@ -531,16 +559,19 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); BT_CONTROL(BT_H_BUSY); /* set */ - /* Uncached, ordered writes should just proceeed serially but - some BMCs don't clear B2H_ATN with one hit. Fast-path a - workaround without too much penalty to the general case. */ + /* + * Uncached, ordered writes should just proceeed serially but + * some BMCs don't clear B2H_ATN with one hit. Fast-path a + * workaround without too much penalty to the general case. + */ BT_CONTROL(BT_B2H_ATN); /* clear it to ACK the BMC */ BT_STATE_CHANGE(BT_STATE_CLEAR_B2H, SI_SM_CALL_WITHOUT_DELAY); case BT_STATE_CLEAR_B2H: - if (status & BT_B2H_ATN) { /* keep hitting it */ + if (status & BT_B2H_ATN) { + /* keep hitting it */ BT_CONTROL(BT_B2H_ATN); BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); } @@ -548,7 +579,8 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) SI_SM_CALL_WITHOUT_DELAY); case BT_STATE_READ_BYTES: - if (!(status & BT_H_BUSY)) /* check in case of retry */ + if (!(status & BT_H_BUSY)) + /* check in case of retry */ BT_CONTROL(BT_H_BUSY); BT_CONTROL(BT_CLR_RD_PTR); /* start of BMC2HOST buffer */ i = read_all_bytes(bt); /* true == packet seq match */ @@ -599,8 +631,10 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) BT_STATE_CHANGE(BT_STATE_XACTION_START, SI_SM_CALL_WITH_DELAY); - /* Get BT Capabilities, using timing of upper level state machine. - Set outreqs to prevent infinite loop on timeout. */ + /* + * Get BT Capabilities, using timing of upper level state machine. + * Set outreqs to prevent infinite loop on timeout. + */ case BT_STATE_CAPABILITIES_BEGIN: bt->BT_CAP_outreqs = 1; { @@ -638,10 +672,12 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) static int bt_detect(struct si_sm_data *bt) { - /* It's impossible for the BT status and interrupt registers to be - all 1's, (assuming a properly functioning, self-initialized BMC) - but that's what you get from reading a bogus address, so we - test that first. The calling routine uses negative logic. */ + /* + * It's impossible for the BT status and interrupt registers to be + * all 1's, (assuming a properly functioning, self-initialized BMC) + * but that's what you get from reading a bogus address, so we + * test that first. The calling routine uses negative logic. + */ if ((BT_STATUS == 0xFF) && (BT_INTMASK_R == 0xFF)) return 1; @@ -658,8 +694,7 @@ static int bt_size(void) return sizeof(struct si_sm_data); } -struct si_sm_handlers bt_smi_handlers = -{ +struct si_sm_handlers bt_smi_handlers = { .init_data = bt_init_data, .start_transaction = bt_start_transaction, .get_result = bt_get_result, diff --git a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c index c1b8228cb7b..80704875794 100644 --- a/drivers/char/ipmi/ipmi_kcs_sm.c +++ b/drivers/char/ipmi/ipmi_kcs_sm.c @@ -60,37 +60,58 @@ MODULE_PARM_DESC(kcs_debug, "debug bitmask, 1=enable, 2=messages, 4=states"); /* The states the KCS driver may be in. */ enum kcs_states { - KCS_IDLE, /* The KCS interface is currently - doing nothing. */ - KCS_START_OP, /* We are starting an operation. The - data is in the output buffer, but - nothing has been done to the - interface yet. This was added to - the state machine in the spec to - wait for the initial IBF. */ - KCS_WAIT_WRITE_START, /* We have written a write cmd to the - interface. */ - KCS_WAIT_WRITE, /* We are writing bytes to the - interface. */ - KCS_WAIT_WRITE_END, /* We have written the write end cmd - to the interface, and still need to - write the last byte. */ - KCS_WAIT_READ, /* We are waiting to read data from - the interface. */ - KCS_ERROR0, /* State to transition to the error - handler, this was added to the - state machine in the spec to be - sure IBF was there. */ - KCS_ERROR1, /* First stage error handler, wait for - the interface to respond. */ - KCS_ERROR2, /* The abort cmd has been written, - wait for the interface to - respond. */ - KCS_ERROR3, /* We wrote some data to the - interface, wait for it to switch to - read mode. */ - KCS_HOSED /* The hardware failed to follow the - state machine. */ + /* The KCS interface is currently doing nothing. */ + KCS_IDLE, + + /* + * We are starting an operation. The data is in the output + * buffer, but nothing has been done to the interface yet. This + * was added to the state machine in the spec to wait for the + * initial IBF. + */ + KCS_START_OP, + + /* We have written a write cmd to the interface. */ + KCS_WAIT_WRITE_START, + + /* We are writing bytes to the interface. */ + KCS_WAIT_WRITE, + + /* + * We have written the write end cmd to the interface, and + * still need to write the last byte. + */ + KCS_WAIT_WRITE_END, + + /* We are waiting to read data from the interface. */ + KCS_WAIT_READ, + + /* + * State to transition to the error handler, this was added to + * the state machine in the spec to be sure IBF was there. + */ + KCS_ERROR0, + + /* + * First stage error handler, wait for the interface to + * respond. + */ + KCS_ERROR1, + + /* + * The abort cmd has been written, wait for the interface to + * respond. + */ + KCS_ERROR2, + + /* + * We wrote some data to the interface, wait for it to switch + * to read mode. + */ + KCS_ERROR3, + + /* The hardware failed to follow the state machine. */ + KCS_HOSED }; #define MAX_KCS_READ_SIZE IPMI_MAX_MSG_LENGTH @@ -102,8 +123,7 @@ enum kcs_states { #define MAX_ERROR_RETRIES 10 #define ERROR0_OBF_WAIT_JIFFIES (2*HZ) -struct si_sm_data -{ +struct si_sm_data { enum kcs_states state; struct si_sm_io *io; unsigned char write_data[MAX_KCS_WRITE_SIZE]; @@ -187,7 +207,8 @@ static inline void start_error_recovery(struct si_sm_data *kcs, char *reason) (kcs->error_retries)++; if (kcs->error_retries > MAX_ERROR_RETRIES) { if (kcs_debug & KCS_DEBUG_ENABLE) - printk(KERN_DEBUG "ipmi_kcs_sm: kcs hosed: %s\n", reason); + printk(KERN_DEBUG "ipmi_kcs_sm: kcs hosed: %s\n", + reason); kcs->state = KCS_HOSED; } else { kcs->error0_timeout = jiffies + ERROR0_OBF_WAIT_JIFFIES; @@ -271,10 +292,9 @@ static int start_kcs_transaction(struct si_sm_data *kcs, unsigned char *data, if (kcs_debug & KCS_DEBUG_MSG) { printk(KERN_DEBUG "start_kcs_transaction -"); - for (i = 0; i < size; i ++) { + for (i = 0; i < size; i++) printk(" %02x", (unsigned char) (data [i])); - } - printk ("\n"); + printk("\n"); } kcs->error_retries = 0; memcpy(kcs->write_data, data, size); @@ -305,9 +325,11 @@ static int get_kcs_result(struct si_sm_data *kcs, unsigned char *data, kcs->read_pos = 3; } if (kcs->truncated) { - /* Report a truncated error. We might overwrite - another error, but that's too bad, the user needs - to know it was truncated. */ + /* + * Report a truncated error. We might overwrite + * another error, but that's too bad, the user needs + * to know it was truncated. + */ data[2] = IPMI_ERR_MSG_TRUNCATED; kcs->truncated = 0; } @@ -315,9 +337,11 @@ static int get_kcs_result(struct si_sm_data *kcs, unsigned char *data, return kcs->read_pos; } -/* This implements the state machine defined in the IPMI manual, see - that for details on how this works. Divide that flowchart into - sections delimited by "Wait for IBF" and this will become clear. */ +/* + * This implements the state machine defined in the IPMI manual, see + * that for details on how this works. Divide that flowchart into + * sections delimited by "Wait for IBF" and this will become clear. + */ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time) { unsigned char status; @@ -388,11 +412,12 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time) write_next_byte(kcs); } break; - + case KCS_WAIT_WRITE_END: if (state != KCS_WRITE_STATE) { start_error_recovery(kcs, - "Not in write state for write end"); + "Not in write state" + " for write end"); break; } clear_obf(kcs, status); @@ -413,13 +438,15 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time) return SI_SM_CALL_WITH_DELAY; read_next_byte(kcs); } else { - /* We don't implement this exactly like the state - machine in the spec. Some broken hardware - does not write the final dummy byte to the - read register. Thus obf will never go high - here. We just go straight to idle, and we - handle clearing out obf in idle state if it - happens to come in. */ + /* + * We don't implement this exactly like the state + * machine in the spec. Some broken hardware + * does not write the final dummy byte to the + * read register. Thus obf will never go high + * here. We just go straight to idle, and we + * handle clearing out obf in idle state if it + * happens to come in. + */ clear_obf(kcs, status); kcs->orig_write_count = 0; kcs->state = KCS_IDLE; @@ -430,7 +457,8 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time) case KCS_ERROR0: clear_obf(kcs, status); status = read_status(kcs); - if (GET_STATUS_OBF(status)) /* controller isn't responding */ + if (GET_STATUS_OBF(status)) + /* controller isn't responding */ if (time_before(jiffies, kcs->error0_timeout)) return SI_SM_CALL_WITH_TICK_DELAY; write_cmd(kcs, KCS_GET_STATUS_ABORT); @@ -442,7 +470,7 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time) write_data(kcs, 0); kcs->state = KCS_ERROR2; break; - + case KCS_ERROR2: if (state != KCS_READ_STATE) { start_error_recovery(kcs, @@ -456,7 +484,7 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time) write_data(kcs, KCS_READ_BYTE); kcs->state = KCS_ERROR3; break; - + case KCS_ERROR3: if (state != KCS_IDLE_STATE) { start_error_recovery(kcs, @@ -475,7 +503,7 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time) return SI_SM_TRANSACTION_COMPLETE; } break; - + case KCS_HOSED: break; } @@ -495,10 +523,12 @@ static int kcs_size(void) static int kcs_detect(struct si_sm_data *kcs) { - /* It's impossible for the KCS status register to be all 1's, - (assuming a properly functioning, self-initialized BMC) - but that's what you get from reading a bogus address, so we - test that first. */ + /* + * It's impossible for the KCS status register to be all 1's, + * (assuming a properly functioning, self-initialized BMC) + * but that's what you get from reading a bogus address, so we + * test that first. + */ if (read_status(kcs) == 0xff) return 1; @@ -509,8 +539,7 @@ static void kcs_cleanup(struct si_sm_data *kcs) { } -struct si_sm_handlers kcs_smi_handlers = -{ +struct si_sm_handlers kcs_smi_handlers = { .init_data = init_kcs_data, .start_transaction = start_kcs_transaction, .get_result = get_kcs_result, diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 32b2b22996d..8a59aaa21be 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -47,7 +47,7 @@ #define PFX "IPMI message handler: " -#define IPMI_DRIVER_VERSION "39.1" +#define IPMI_DRIVER_VERSION "39.2" static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void); static int ipmi_init_msghandler(void); @@ -63,16 +63,16 @@ static struct proc_dir_entry *proc_ipmi_root; #define MAX_EVENTS_IN_QUEUE 25 -/* Don't let a message sit in a queue forever, always time it with at lest - the max message timer. This is in milliseconds. */ +/* + * Don't let a message sit in a queue forever, always time it with at lest + * the max message timer. This is in milliseconds. + */ #define MAX_MSG_TIMEOUT 60000 - /* * The main "user" data structure. */ -struct ipmi_user -{ +struct ipmi_user { struct list_head link; /* Set to "0" when the user is destroyed. */ @@ -91,8 +91,7 @@ struct ipmi_user int gets_events; }; -struct cmd_rcvr -{ +struct cmd_rcvr { struct list_head link; ipmi_user_t user; @@ -106,12 +105,12 @@ struct cmd_rcvr * or change any data until the RCU period completes. So we * use this next variable during mass deletion so we can have * a list and don't have to wait and restart the search on - * every individual deletion of a command. */ + * every individual deletion of a command. + */ struct cmd_rcvr *next; }; -struct seq_table -{ +struct seq_table { unsigned int inuse : 1; unsigned int broadcast : 1; @@ -119,53 +118,60 @@ struct seq_table unsigned long orig_timeout; unsigned int retries_left; - /* To verify on an incoming send message response that this is - the message that the response is for, we keep a sequence id - and increment it every time we send a message. */ + /* + * To verify on an incoming send message response that this is + * the message that the response is for, we keep a sequence id + * and increment it every time we send a message. + */ long seqid; - /* This is held so we can properly respond to the message on a - timeout, and it is used to hold the temporary data for - retransmission, too. */ + /* + * This is held so we can properly respond to the message on a + * timeout, and it is used to hold the temporary data for + * retransmission, too. + */ struct ipmi_recv_msg *recv_msg; }; -/* Store the information in a msgid (long) to allow us to find a - sequence table entry from the msgid. */ +/* + * Store the information in a msgid (long) to allow us to find a + * sequence table entry from the msgid. + */ #define STORE_SEQ_IN_MSGID(seq, seqid) (((seq&0xff)<<26) | (seqid&0x3ffffff)) #define GET_SEQ_FROM_MSGID(msgid, seq, seqid) \ do { \ seq = ((msgid >> 26) & 0x3f); \ seqid = (msgid & 0x3fffff); \ - } while (0) + } while (0) #define NEXT_SEQID(seqid) (((seqid) + 1) & 0x3fffff) -struct ipmi_channel -{ +struct ipmi_channel { unsigned char medium; unsigned char protocol; - /* My slave address. This is initialized to IPMI_BMC_SLAVE_ADDR, - but may be changed by the user. */ + /* + * My slave address. This is initialized to IPMI_BMC_SLAVE_ADDR, + * but may be changed by the user. + */ unsigned char address; - /* My LUN. This should generally stay the SMS LUN, but just in - case... */ + /* + * My LUN. This should generally stay the SMS LUN, but just in + * case... + */ unsigned char lun; }; #ifdef CONFIG_PROC_FS -struct ipmi_proc_entry -{ +struct ipmi_proc_entry { char *name; struct ipmi_proc_entry *next; }; #endif -struct bmc_device -{ +struct bmc_device { struct platform_device *dev; struct ipmi_device_id id; unsigned char guid[16]; @@ -186,10 +192,108 @@ struct bmc_device struct device_attribute aux_firmware_rev_attr; }; +/* + * Various statistics for IPMI, these index stats[] in the ipmi_smi + * structure. + */ +enum ipmi_stat_indexes { + /* Commands we got from the user that were invalid. */ + IPMI_STAT_sent_invalid_commands = 0, + + /* Commands we sent to the MC. */ + IPMI_STAT_sent_local_commands, + + /* Responses from the MC that were delivered to a user. */ + IPMI_STAT_handled_local_responses, + + /* Responses from the MC that were not delivered to a user. */ + IPMI_STAT_unhandled_local_responses, + + /* Commands we sent out to the IPMB bus. */ + IPMI_STAT_sent_ipmb_commands, + + /* Commands sent on the IPMB that had errors on the SEND CMD */ + IPMI_STAT_sent_ipmb_command_errs, + + /* Each retransmit increments this count. */ + IPMI_STAT_retransmitted_ipmb_commands, + + /* + * When a message times out (runs out of retransmits) this is + * incremented. + */ + IPMI_STAT_timed_out_ipmb_commands, + + /* + * This is like above, but for broadcasts. Broadcasts are + * *not* included in the above count (they are expected to + * time out). + */ + IPMI_STAT_timed_out_ipmb_broadcasts, + + /* Responses I have sent to the IPMB bus. */ + IPMI_STAT_sent_ipmb_responses, + + /* The response was delivered to the user. */ + IPMI_STAT_handled_ipmb_responses, + + /* The response had invalid data in it. */ + IPMI_STAT_invalid_ipmb_responses, + + /* The response didn't have anyone waiting for it. */ + IPMI_STAT_unhandled_ipmb_responses, + + /* Commands we sent out to the IPMB bus. */ + IPMI_STAT_sent_lan_commands, + + /* Commands sent on the IPMB that had errors on the SEND CMD */ + IPMI_STAT_sent_lan_command_errs, + + /* Each retransmit increments this count. */ + IPMI_STAT_retransmitted_lan_commands, + + /* + * When a message times out (runs out of retransmits) this is + * incremented. + */ + IPMI_STAT_timed_out_lan_commands, + + /* Responses I have sent to the IPMB bus. */ + IPMI_STAT_sent_lan_responses, + + /* The response was delivered to the user. */ + IPMI_STAT_handled_lan_responses, + + /* The response had invalid data in it. */ + IPMI_STAT_invalid_lan_responses, + + /* The response didn't have anyone waiting for it. */ + IPMI_STAT_unhandled_lan_responses, + + /* The command was delivered to the user. */ + IPMI_STAT_handled_commands, + + /* The command had invalid data in it. */ + IPMI_STAT_invalid_commands, + + /* The command didn't have anyone waiting for it. */ + IPMI_STAT_unhandled_commands, + + /* Invalid data in an event. */ + IPMI_STAT_invalid_events, + + /* Events that were received with the proper format. */ + IPMI_STAT_events, + + + /* This *must* remain last, add new values above this. */ + IPMI_NUM_STATS +}; + + #define IPMI_IPMB_NUM_SEQ 64 #define IPMI_MAX_CHANNELS 16 -struct ipmi_smi -{ +struct ipmi_smi { /* What interface number are we? */ int intf_num; @@ -198,8 +302,10 @@ struct ipmi_smi /* Used for a list of interfaces. */ struct list_head link; - /* The list of upper layers that are using me. seq_lock - * protects this. */ + /* + * The list of upper layers that are using me. seq_lock + * protects this. + */ struct list_head users; /* Information to supply to users. */ @@ -213,10 +319,12 @@ struct ipmi_smi char *my_dev_name; char *sysfs_name; - /* This is the lower-layer's sender routine. Note that you + /* + * This is the lower-layer's sender routine. Note that you * must either be holding the ipmi_interfaces_mutex or be in * an umpreemptible region to use this. You must fetch the - * value into a local variable and make sure it is not NULL. */ + * value into a local variable and make sure it is not NULL. + */ struct ipmi_smi_handlers *handlers; void *send_info; @@ -229,34 +337,45 @@ struct ipmi_smi /* Driver-model device for the system interface. */ struct device *si_dev; - /* A table of sequence numbers for this interface. We use the - sequence numbers for IPMB messages that go out of the - interface to match them up with their responses. A routine - is called periodically to time the items in this list. */ + /* + * A table of sequence numbers for this interface. We use the + * sequence numbers for IPMB messages that go out of the + * interface to match them up with their responses. A routine + * is called periodically to time the items in this list. + */ spinlock_t seq_lock; struct seq_table seq_table[IPMI_IPMB_NUM_SEQ]; int curr_seq; - /* Messages that were delayed for some reason (out of memory, - for instance), will go in here to be processed later in a - periodic timer interrupt. */ + /* + * Messages that were delayed for some reason (out of memory, + * for instance), will go in here to be processed later in a + * periodic timer interrupt. + */ spinlock_t waiting_msgs_lock; struct list_head waiting_msgs; - /* The list of command receivers that are registered for commands - on this interface. */ + /* + * The list of command receivers that are registered for commands + * on this interface. + */ struct mutex cmd_rcvrs_mutex; struct list_head cmd_rcvrs; - /* Events that were queues because no one was there to receive - them. */ + /* + * Events that were queues because no one was there to receive + * them. + */ spinlock_t events_lock; /* For dealing with event stuff. */ struct list_head waiting_events; unsigned int waiting_events_count; /* How many events in queue? */ - int delivering_events; + char delivering_events; + char event_msg_printed; - /* The event receiver for my BMC, only really used at panic - shutdown as a place to store this. */ + /* + * The event receiver for my BMC, only really used at panic + * shutdown as a place to store this. + */ unsigned char event_receiver; unsigned char event_receiver_lun; unsigned char local_sel_device; @@ -268,14 +387,18 @@ struct ipmi_smi int auto_maintenance_timeout; spinlock_t maintenance_mode_lock; /* Used in a timer... */ - /* A cheap hack, if this is non-null and a message to an - interface comes in with a NULL user, call this routine with - it. Note that the message will still be freed by the - caller. This only works on the system interface. */ + /* + * A cheap hack, if this is non-null and a message to an + * interface comes in with a NULL user, call this routine with + * it. Note that the message will still be freed by the + * caller. This only works on the system interface. + */ void (*null_user_handler)(ipmi_smi_t intf, struct ipmi_recv_msg *msg); - /* When we are scanning the channels for an SMI, this will - tell which channel we are scanning. */ + /* + * When we are scanning the channels for an SMI, this will + * tell which channel we are scanning. + */ int curr_channel; /* Channel information */ @@ -285,74 +408,14 @@ struct ipmi_smi struct proc_dir_entry *proc_dir; char proc_dir_name[10]; - spinlock_t counter_lock; /* For making counters atomic. */ - - /* Commands we got that were invalid. */ - unsigned int sent_invalid_commands; - - /* Commands we sent to the MC. */ - unsigned int sent_local_commands; - /* Responses from the MC that were delivered to a user. */ - unsigned int handled_local_responses; - /* Responses from the MC that were not delivered to a user. */ - unsigned int unhandled_local_responses; - - /* Commands we sent out to the IPMB bus. */ - unsigned int sent_ipmb_commands; - /* Commands sent on the IPMB that had errors on the SEND CMD */ - unsigned int sent_ipmb_command_errs; - /* Each retransmit increments this count. */ - unsigned int retransmitted_ipmb_commands; - /* When a message times out (runs out of retransmits) this is - incremented. */ - unsigned int timed_out_ipmb_commands; - - /* This is like above, but for broadcasts. Broadcasts are - *not* included in the above count (they are expected to - time out). */ - unsigned int timed_out_ipmb_broadcasts; + atomic_t stats[IPMI_NUM_STATS]; - /* Responses I have sent to the IPMB bus. */ - unsigned int sent_ipmb_responses; - - /* The response was delivered to the user. */ - unsigned int handled_ipmb_responses; - /* The response had invalid data in it. */ - unsigned int invalid_ipmb_responses; - /* The response didn't have anyone waiting for it. */ - unsigned int unhandled_ipmb_responses; - - /* Commands we sent out to the IPMB bus. */ - unsigned int sent_lan_commands; - /* Commands sent on the IPMB that had errors on the SEND CMD */ - unsigned int sent_lan_command_errs; - /* Each retransmit increments this count. */ - unsigned int retransmitted_lan_commands; - /* When a message times out (runs out of retransmits) this is - incremented. */ - unsigned int timed_out_lan_commands; - - /* Responses I have sent to the IPMB bus. */ - unsigned int sent_lan_responses; - - /* The response was delivered to the user. */ - unsigned int handled_lan_responses; - /* The response had invalid data in it. */ - unsigned int invalid_lan_responses; - /* The response didn't have anyone waiting for it. */ - unsigned int unhandled_lan_responses; - - /* The command was delivered to the user. */ - unsigned int handled_commands; - /* The command had invalid data in it. */ - unsigned int invalid_commands; - /* The command didn't have anyone waiting for it. */ - unsigned int unhandled_commands; - - /* Invalid data in an event. */ - unsigned int invalid_events; - /* Events that were received with the proper format. */ - unsigned int events; + /* + * run_to_completion duplicate of smb_info, smi_info + * and ipmi_serial_info structures. Used to decrease numbers of + * parameters passed by "low" level IPMI code. + */ + int run_to_completion; }; #define to_si_intf_from_dev(device) container_of(device, struct ipmi_smi, dev) @@ -368,12 +431,19 @@ static DEFINE_MUTEX(ipmidriver_mutex); static LIST_HEAD(ipmi_interfaces); static DEFINE_MUTEX(ipmi_interfaces_mutex); -/* List of watchers that want to know when smi's are added and - deleted. */ +/* + * List of watchers that want to know when smi's are added and deleted. + */ static LIST_HEAD(smi_watchers); static DEFINE_MUTEX(smi_watchers_mutex); +#define ipmi_inc_stat(intf, stat) \ + atomic_inc(&(intf)->stats[IPMI_STAT_ ## stat]) +#define ipmi_get_stat(intf, stat) \ + ((unsigned int) atomic_read(&(intf)->stats[IPMI_STAT_ ## stat])) + + static void free_recv_msg_list(struct list_head *q) { struct ipmi_recv_msg *msg, *msg2; @@ -417,10 +487,8 @@ static void clean_up_interface_data(ipmi_smi_t intf) for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) { if ((intf->seq_table[i].inuse) - && (intf->seq_table[i].recv_msg)) - { + && (intf->seq_table[i].recv_msg)) ipmi_free_recv_msg(intf->seq_table[i].recv_msg); - } } } @@ -487,6 +555,7 @@ int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher) } return -ENOMEM; } +EXPORT_SYMBOL(ipmi_smi_watcher_register); int ipmi_smi_watcher_unregister(struct ipmi_smi_watcher *watcher) { @@ -495,6 +564,7 @@ int ipmi_smi_watcher_unregister(struct ipmi_smi_watcher *watcher) mutex_unlock(&smi_watchers_mutex); return 0; } +EXPORT_SYMBOL(ipmi_smi_watcher_unregister); /* * Must be called with smi_watchers_mutex held. @@ -530,8 +600,7 @@ ipmi_addr_equal(struct ipmi_addr *addr1, struct ipmi_addr *addr2) } if ((addr1->addr_type == IPMI_IPMB_ADDR_TYPE) - || (addr1->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) - { + || (addr1->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) { struct ipmi_ipmb_addr *ipmb_addr1 = (struct ipmi_ipmb_addr *) addr1; struct ipmi_ipmb_addr *ipmb_addr2 @@ -559,9 +628,8 @@ ipmi_addr_equal(struct ipmi_addr *addr1, struct ipmi_addr *addr2) int ipmi_validate_addr(struct ipmi_addr *addr, int len) { - if (len < sizeof(struct ipmi_system_interface_addr)) { + if (len < sizeof(struct ipmi_system_interface_addr)) return -EINVAL; - } if (addr->addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) { if (addr->channel != IPMI_BMC_CHANNEL) @@ -575,23 +643,21 @@ int ipmi_validate_addr(struct ipmi_addr *addr, int len) return -EINVAL; if ((addr->addr_type == IPMI_IPMB_ADDR_TYPE) - || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) - { - if (len < sizeof(struct ipmi_ipmb_addr)) { + || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) { + if (len < sizeof(struct ipmi_ipmb_addr)) return -EINVAL; - } return 0; } if (addr->addr_type == IPMI_LAN_ADDR_TYPE) { - if (len < sizeof(struct ipmi_lan_addr)) { + if (len < sizeof(struct ipmi_lan_addr)) return -EINVAL; - } return 0; } return -EINVAL; } +EXPORT_SYMBOL(ipmi_validate_addr); unsigned int ipmi_addr_length(int addr_type) { @@ -599,34 +665,28 @@ unsigned int ipmi_addr_length(int addr_type) return sizeof(struct ipmi_system_interface_addr); if ((addr_type == IPMI_IPMB_ADDR_TYPE) - || (addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) - { + || (addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) return sizeof(struct ipmi_ipmb_addr); - } if (addr_type == IPMI_LAN_ADDR_TYPE) return sizeof(struct ipmi_lan_addr); return 0; } +EXPORT_SYMBOL(ipmi_addr_length); static void deliver_response(struct ipmi_recv_msg *msg) { if (!msg->user) { ipmi_smi_t intf = msg->user_msg_data; - unsigned long flags; /* Special handling for NULL users. */ if (intf->null_user_handler) { intf->null_user_handler(intf, msg); - spin_lock_irqsave(&intf->counter_lock, flags); - intf->handled_local_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, handled_local_responses); } else { /* No handler, so give up. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->unhandled_local_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, unhandled_local_responses); } ipmi_free_recv_msg(msg); } else { @@ -646,9 +706,11 @@ deliver_err_response(struct ipmi_recv_msg *msg, int err) deliver_response(msg); } -/* Find the next sequence number not being used and add the given - message with the given timeout to the sequence table. This must be - called with the interface's seq_lock held. */ +/* + * Find the next sequence number not being used and add the given + * message with the given timeout to the sequence table. This must be + * called with the interface's seq_lock held. + */ static int intf_next_seq(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg, unsigned long timeout, @@ -660,10 +722,8 @@ static int intf_next_seq(ipmi_smi_t intf, int rv = 0; unsigned int i; - for (i = intf->curr_seq; - (i+1)%IPMI_IPMB_NUM_SEQ != intf->curr_seq; - i = (i+1)%IPMI_IPMB_NUM_SEQ) - { + for (i = intf->curr_seq; (i+1)%IPMI_IPMB_NUM_SEQ != intf->curr_seq; + i = (i+1)%IPMI_IPMB_NUM_SEQ) { if (!intf->seq_table[i].inuse) break; } @@ -671,8 +731,10 @@ static int intf_next_seq(ipmi_smi_t intf, if (!intf->seq_table[i].inuse) { intf->seq_table[i].recv_msg = recv_msg; - /* Start with the maximum timeout, when the send response - comes in we will start the real timer. */ + /* + * Start with the maximum timeout, when the send response + * comes in we will start the real timer. + */ intf->seq_table[i].timeout = MAX_MSG_TIMEOUT; intf->seq_table[i].orig_timeout = timeout; intf->seq_table[i].retries_left = retries; @@ -685,15 +747,17 @@ static int intf_next_seq(ipmi_smi_t intf, } else { rv = -EAGAIN; } - + return rv; } -/* Return the receive message for the given sequence number and - release the sequence number so it can be reused. Some other data - is passed in to be sure the message matches up correctly (to help - guard against message coming in after their timeout and the - sequence number being reused). */ +/* + * Return the receive message for the given sequence number and + * release the sequence number so it can be reused. Some other data + * is passed in to be sure the message matches up correctly (to help + * guard against message coming in after their timeout and the + * sequence number being reused). + */ static int intf_find_seq(ipmi_smi_t intf, unsigned char seq, short channel, @@ -712,11 +776,9 @@ static int intf_find_seq(ipmi_smi_t intf, if (intf->seq_table[seq].inuse) { struct ipmi_recv_msg *msg = intf->seq_table[seq].recv_msg; - if ((msg->addr.channel == channel) - && (msg->msg.cmd == cmd) - && (msg->msg.netfn == netfn) - && (ipmi_addr_equal(addr, &(msg->addr)))) - { + if ((msg->addr.channel == channel) && (msg->msg.cmd == cmd) + && (msg->msg.netfn == netfn) + && (ipmi_addr_equal(addr, &(msg->addr)))) { *recv_msg = msg; intf->seq_table[seq].inuse = 0; rv = 0; @@ -741,11 +803,12 @@ static int intf_start_seq_timer(ipmi_smi_t intf, GET_SEQ_FROM_MSGID(msgid, seq, seqid); spin_lock_irqsave(&(intf->seq_lock), flags); - /* We do this verification because the user can be deleted - while a message is outstanding. */ + /* + * We do this verification because the user can be deleted + * while a message is outstanding. + */ if ((intf->seq_table[seq].inuse) - && (intf->seq_table[seq].seqid == seqid)) - { + && (intf->seq_table[seq].seqid == seqid)) { struct seq_table *ent = &(intf->seq_table[seq]); ent->timeout = ent->orig_timeout; rv = 0; @@ -770,11 +833,12 @@ static int intf_err_seq(ipmi_smi_t intf, GET_SEQ_FROM_MSGID(msgid, seq, seqid); spin_lock_irqsave(&(intf->seq_lock), flags); - /* We do this verification because the user can be deleted - while a message is outstanding. */ + /* + * We do this verification because the user can be deleted + * while a message is outstanding. + */ if ((intf->seq_table[seq].inuse) - && (intf->seq_table[seq].seqid == seqid)) - { + && (intf->seq_table[seq].seqid == seqid)) { struct seq_table *ent = &(intf->seq_table[seq]); ent->inuse = 0; @@ -800,24 +864,30 @@ int ipmi_create_user(unsigned int if_num, int rv = 0; ipmi_smi_t intf; - /* There is no module usecount here, because it's not - required. Since this can only be used by and called from - other modules, they will implicitly use this module, and - thus this can't be removed unless the other modules are - removed. */ + /* + * There is no module usecount here, because it's not + * required. Since this can only be used by and called from + * other modules, they will implicitly use this module, and + * thus this can't be removed unless the other modules are + * removed. + */ if (handler == NULL) return -EINVAL; - /* Make sure the driver is actually initialized, this handles - problems with initialization order. */ + /* + * Make sure the driver is actually initialized, this handles + * problems with initialization order. + */ if (!initialized) { rv = ipmi_init_msghandler(); if (rv) return rv; - /* The init code doesn't return an error if it was turned - off, but it won't initialize. Check that. */ + /* + * The init code doesn't return an error if it was turned + * off, but it won't initialize. Check that. + */ if (!initialized) return -ENODEV; } @@ -858,8 +928,10 @@ int ipmi_create_user(unsigned int if_num, } } - /* Hold the lock so intf->handlers is guaranteed to be good - * until now */ + /* + * Hold the lock so intf->handlers is guaranteed to be good + * until now + */ mutex_unlock(&ipmi_interfaces_mutex); new_user->valid = 1; @@ -876,6 +948,7 @@ out_kfree: kfree(new_user); return rv; } +EXPORT_SYMBOL(ipmi_create_user); static void free_user(struct kref *ref) { @@ -899,8 +972,7 @@ int ipmi_destroy_user(ipmi_user_t user) for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) { if (intf->seq_table[i].inuse - && (intf->seq_table[i].recv_msg->user == user)) - { + && (intf->seq_table[i].recv_msg->user == user)) { intf->seq_table[i].inuse = 0; ipmi_free_recv_msg(intf->seq_table[i].recv_msg); } @@ -943,6 +1015,7 @@ int ipmi_destroy_user(ipmi_user_t user) return 0; } +EXPORT_SYMBOL(ipmi_destroy_user); void ipmi_get_version(ipmi_user_t user, unsigned char *major, @@ -951,6 +1024,7 @@ void ipmi_get_version(ipmi_user_t user, *major = user->intf->ipmi_version_major; *minor = user->intf->ipmi_version_minor; } +EXPORT_SYMBOL(ipmi_get_version); int ipmi_set_my_address(ipmi_user_t user, unsigned int channel, @@ -961,6 +1035,7 @@ int ipmi_set_my_address(ipmi_user_t user, user->intf->channels[channel].address = address; return 0; } +EXPORT_SYMBOL(ipmi_set_my_address); int ipmi_get_my_address(ipmi_user_t user, unsigned int channel, @@ -971,6 +1046,7 @@ int ipmi_get_my_address(ipmi_user_t user, *address = user->intf->channels[channel].address; return 0; } +EXPORT_SYMBOL(ipmi_get_my_address); int ipmi_set_my_LUN(ipmi_user_t user, unsigned int channel, @@ -981,6 +1057,7 @@ int ipmi_set_my_LUN(ipmi_user_t user, user->intf->channels[channel].lun = LUN & 0x3; return 0; } +EXPORT_SYMBOL(ipmi_set_my_LUN); int ipmi_get_my_LUN(ipmi_user_t user, unsigned int channel, @@ -991,6 +1068,7 @@ int ipmi_get_my_LUN(ipmi_user_t user, *address = user->intf->channels[channel].lun; return 0; } +EXPORT_SYMBOL(ipmi_get_my_LUN); int ipmi_get_maintenance_mode(ipmi_user_t user) { @@ -1075,6 +1153,11 @@ int ipmi_set_gets_events(ipmi_user_t user, int val) list_for_each_entry_safe(msg, msg2, &intf->waiting_events, link) list_move_tail(&msg->link, &msgs); intf->waiting_events_count = 0; + if (intf->event_msg_printed) { + printk(KERN_WARNING PFX "Event queue no longer" + " full\n"); + intf->event_msg_printed = 0; + } intf->delivering_events = 1; spin_unlock_irqrestore(&intf->events_lock, flags); @@ -1094,6 +1177,7 @@ int ipmi_set_gets_events(ipmi_user_t user, int val) return 0; } +EXPORT_SYMBOL(ipmi_set_gets_events); static struct cmd_rcvr *find_cmd_rcvr(ipmi_smi_t intf, unsigned char netfn, @@ -1159,6 +1243,7 @@ int ipmi_register_for_cmd(ipmi_user_t user, return rv; } +EXPORT_SYMBOL(ipmi_register_for_cmd); int ipmi_unregister_for_cmd(ipmi_user_t user, unsigned char netfn, @@ -1196,19 +1281,13 @@ int ipmi_unregister_for_cmd(ipmi_user_t user, } return rv; } - -void ipmi_user_set_run_to_completion(ipmi_user_t user, int val) -{ - ipmi_smi_t intf = user->intf; - if (intf->handlers) - intf->handlers->set_run_to_completion(intf->send_info, val); -} +EXPORT_SYMBOL(ipmi_unregister_for_cmd); static unsigned char ipmb_checksum(unsigned char *data, int size) { unsigned char csum = 0; - + for (; size > 0; size--, data++) csum += *data; @@ -1250,8 +1329,10 @@ static inline void format_ipmb_msg(struct ipmi_smi_msg *smi_msg, = ipmb_checksum(&(smi_msg->data[i+6]), smi_msg->data_size-6); - /* Add on the checksum size and the offset from the - broadcast. */ + /* + * Add on the checksum size and the offset from the + * broadcast. + */ smi_msg->data_size += 1 + i; smi_msg->msgid = msgid; @@ -1287,17 +1368,21 @@ static inline void format_lan_msg(struct ipmi_smi_msg *smi_msg, = ipmb_checksum(&(smi_msg->data[7]), smi_msg->data_size-7); - /* Add on the checksum size and the offset from the - broadcast. */ + /* + * Add on the checksum size and the offset from the + * broadcast. + */ smi_msg->data_size += 1; smi_msg->msgid = msgid; } -/* Separate from ipmi_request so that the user does not have to be - supplied in certain circumstances (mainly at panic time). If - messages are supplied, they will be freed, even if an error - occurs. */ +/* + * Separate from ipmi_request so that the user does not have to be + * supplied in certain circumstances (mainly at panic time). If + * messages are supplied, they will be freed, even if an error + * occurs. + */ static int i_ipmi_request(ipmi_user_t user, ipmi_smi_t intf, struct ipmi_addr *addr, @@ -1319,19 +1404,18 @@ static int i_ipmi_request(ipmi_user_t user, struct ipmi_smi_handlers *handlers; - if (supplied_recv) { + if (supplied_recv) recv_msg = supplied_recv; - } else { + else { recv_msg = ipmi_alloc_recv_msg(); - if (recv_msg == NULL) { + if (recv_msg == NULL) return -ENOMEM; - } } recv_msg->user_msg_data = user_msg_data; - if (supplied_smi) { + if (supplied_smi) smi_msg = (struct ipmi_smi_msg *) supplied_smi; - } else { + else { smi_msg = ipmi_alloc_smi_msg(); if (smi_msg == NULL) { ipmi_free_recv_msg(recv_msg); @@ -1350,8 +1434,10 @@ static int i_ipmi_request(ipmi_user_t user, if (user) kref_get(&user->refcount); recv_msg->msgid = msgid; - /* Store the message to send in the receive message so timeout - responses can get the proper response data. */ + /* + * Store the message to send in the receive message so timeout + * responses can get the proper response data. + */ recv_msg->msg = *msg; if (addr->addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) { @@ -1365,9 +1451,7 @@ static int i_ipmi_request(ipmi_user_t user, smi_addr = (struct ipmi_system_interface_addr *) addr; if (smi_addr->lun > 3) { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; } @@ -1377,13 +1461,12 @@ static int i_ipmi_request(ipmi_user_t user, if ((msg->netfn == IPMI_NETFN_APP_REQUEST) && ((msg->cmd == IPMI_SEND_MSG_CMD) || (msg->cmd == IPMI_GET_MSG_CMD) - || (msg->cmd == IPMI_READ_EVENT_MSG_BUFFER_CMD))) - { - /* We don't let the user do these, since we manage - the sequence numbers. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + || (msg->cmd == IPMI_READ_EVENT_MSG_BUFFER_CMD))) { + /* + * We don't let the user do these, since we manage + * the sequence numbers. + */ + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; } @@ -1391,14 +1474,12 @@ static int i_ipmi_request(ipmi_user_t user, if (((msg->netfn == IPMI_NETFN_APP_REQUEST) && ((msg->cmd == IPMI_COLD_RESET_CMD) || (msg->cmd == IPMI_WARM_RESET_CMD))) - || (msg->netfn == IPMI_NETFN_FIRMWARE_REQUEST)) - { + || (msg->netfn == IPMI_NETFN_FIRMWARE_REQUEST)) { spin_lock_irqsave(&intf->maintenance_mode_lock, flags); intf->auto_maintenance_timeout = IPMI_MAINTENANCE_MODE_TIMEOUT; if (!intf->maintenance_mode - && !intf->maintenance_mode_enable) - { + && !intf->maintenance_mode_enable) { intf->maintenance_mode_enable = 1; maintenance_mode_update(intf); } @@ -1407,9 +1488,7 @@ static int i_ipmi_request(ipmi_user_t user, } if ((msg->data_len + 2) > IPMI_MAX_MSG_LENGTH) { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EMSGSIZE; goto out_err; } @@ -1421,31 +1500,23 @@ static int i_ipmi_request(ipmi_user_t user, if (msg->data_len > 0) memcpy(&(smi_msg->data[2]), msg->data, msg->data_len); smi_msg->data_size = msg->data_len + 2; - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_local_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_local_commands); } else if ((addr->addr_type == IPMI_IPMB_ADDR_TYPE) - || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) - { + || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) { struct ipmi_ipmb_addr *ipmb_addr; unsigned char ipmb_seq; long seqid; int broadcast = 0; if (addr->channel >= IPMI_MAX_CHANNELS) { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; } if (intf->channels[addr->channel].medium - != IPMI_CHANNEL_MEDIUM_IPMB) - { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + != IPMI_CHANNEL_MEDIUM_IPMB) { + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; } @@ -1457,9 +1528,11 @@ static int i_ipmi_request(ipmi_user_t user, retries = 4; } if (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE) { - /* Broadcasts add a zero at the beginning of the - message, but otherwise is the same as an IPMB - address. */ + /* + * Broadcasts add a zero at the beginning of the + * message, but otherwise is the same as an IPMB + * address. + */ addr->addr_type = IPMI_IPMB_ADDR_TYPE; broadcast = 1; } @@ -1469,21 +1542,19 @@ static int i_ipmi_request(ipmi_user_t user, if (retry_time_ms == 0) retry_time_ms = 1000; - /* 9 for the header and 1 for the checksum, plus - possibly one for the broadcast. */ + /* + * 9 for the header and 1 for the checksum, plus + * possibly one for the broadcast. + */ if ((msg->data_len + 10 + broadcast) > IPMI_MAX_MSG_LENGTH) { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EMSGSIZE; goto out_err; } ipmb_addr = (struct ipmi_ipmb_addr *) addr; if (ipmb_addr->lun > 3) { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; } @@ -1491,29 +1562,31 @@ static int i_ipmi_request(ipmi_user_t user, memcpy(&recv_msg->addr, ipmb_addr, sizeof(*ipmb_addr)); if (recv_msg->msg.netfn & 0x1) { - /* It's a response, so use the user's sequence - from msgid. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_ipmb_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + /* + * It's a response, so use the user's sequence + * from msgid. + */ + ipmi_inc_stat(intf, sent_ipmb_responses); format_ipmb_msg(smi_msg, msg, ipmb_addr, msgid, msgid, broadcast, source_address, source_lun); - /* Save the receive message so we can use it - to deliver the response. */ + /* + * Save the receive message so we can use it + * to deliver the response. + */ smi_msg->user_data = recv_msg; } else { /* It's a command, so get a sequence for it. */ spin_lock_irqsave(&(intf->seq_lock), flags); - spin_lock(&intf->counter_lock); - intf->sent_ipmb_commands++; - spin_unlock(&intf->counter_lock); + ipmi_inc_stat(intf, sent_ipmb_commands); - /* Create a sequence number with a 1 second - timeout and 4 retries. */ + /* + * Create a sequence number with a 1 second + * timeout and 4 retries. + */ rv = intf_next_seq(intf, recv_msg, retry_time_ms, @@ -1522,34 +1595,42 @@ static int i_ipmi_request(ipmi_user_t user, &ipmb_seq, &seqid); if (rv) { - /* We have used up all the sequence numbers, - probably, so abort. */ + /* + * We have used up all the sequence numbers, + * probably, so abort. + */ spin_unlock_irqrestore(&(intf->seq_lock), flags); goto out_err; } - /* Store the sequence number in the message, - so that when the send message response - comes back we can start the timer. */ + /* + * Store the sequence number in the message, + * so that when the send message response + * comes back we can start the timer. + */ format_ipmb_msg(smi_msg, msg, ipmb_addr, STORE_SEQ_IN_MSGID(ipmb_seq, seqid), ipmb_seq, broadcast, source_address, source_lun); - /* Copy the message into the recv message data, so we - can retransmit it later if necessary. */ + /* + * Copy the message into the recv message data, so we + * can retransmit it later if necessary. + */ memcpy(recv_msg->msg_data, smi_msg->data, smi_msg->data_size); recv_msg->msg.data = recv_msg->msg_data; recv_msg->msg.data_len = smi_msg->data_size; - /* We don't unlock until here, because we need - to copy the completed message into the - recv_msg before we release the lock. - Otherwise, race conditions may bite us. I - know that's pretty paranoid, but I prefer - to be correct. */ + /* + * We don't unlock until here, because we need + * to copy the completed message into the + * recv_msg before we release the lock. + * Otherwise, race conditions may bite us. I + * know that's pretty paranoid, but I prefer + * to be correct. + */ spin_unlock_irqrestore(&(intf->seq_lock), flags); } } else if (addr->addr_type == IPMI_LAN_ADDR_TYPE) { @@ -1558,21 +1639,16 @@ static int i_ipmi_request(ipmi_user_t user, long seqid; if (addr->channel >= IPMI_MAX_CHANNELS) { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; } if ((intf->channels[addr->channel].medium - != IPMI_CHANNEL_MEDIUM_8023LAN) + != IPMI_CHANNEL_MEDIUM_8023LAN) && (intf->channels[addr->channel].medium - != IPMI_CHANNEL_MEDIUM_ASYNC)) - { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + != IPMI_CHANNEL_MEDIUM_ASYNC)) { + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; } @@ -1585,18 +1661,14 @@ static int i_ipmi_request(ipmi_user_t user, /* 11 for the header and 1 for the checksum. */ if ((msg->data_len + 12) > IPMI_MAX_MSG_LENGTH) { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EMSGSIZE; goto out_err; } lan_addr = (struct ipmi_lan_addr *) addr; if (lan_addr->lun > 3) { - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; } @@ -1604,28 +1676,30 @@ static int i_ipmi_request(ipmi_user_t user, memcpy(&recv_msg->addr, lan_addr, sizeof(*lan_addr)); if (recv_msg->msg.netfn & 0x1) { - /* It's a response, so use the user's sequence - from msgid. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_lan_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + /* + * It's a response, so use the user's sequence + * from msgid. + */ + ipmi_inc_stat(intf, sent_lan_responses); format_lan_msg(smi_msg, msg, lan_addr, msgid, msgid, source_lun); - /* Save the receive message so we can use it - to deliver the response. */ + /* + * Save the receive message so we can use it + * to deliver the response. + */ smi_msg->user_data = recv_msg; } else { /* It's a command, so get a sequence for it. */ spin_lock_irqsave(&(intf->seq_lock), flags); - spin_lock(&intf->counter_lock); - intf->sent_lan_commands++; - spin_unlock(&intf->counter_lock); + ipmi_inc_stat(intf, sent_lan_commands); - /* Create a sequence number with a 1 second - timeout and 4 retries. */ + /* + * Create a sequence number with a 1 second + * timeout and 4 retries. + */ rv = intf_next_seq(intf, recv_msg, retry_time_ms, @@ -1634,40 +1708,46 @@ static int i_ipmi_request(ipmi_user_t user, &ipmb_seq, &seqid); if (rv) { - /* We have used up all the sequence numbers, - probably, so abort. */ + /* + * We have used up all the sequence numbers, + * probably, so abort. + */ spin_unlock_irqrestore(&(intf->seq_lock), flags); goto out_err; } - /* Store the sequence number in the message, - so that when the send message response - comes back we can start the timer. */ + /* + * Store the sequence number in the message, + * so that when the send message response + * comes back we can start the timer. + */ format_lan_msg(smi_msg, msg, lan_addr, STORE_SEQ_IN_MSGID(ipmb_seq, seqid), ipmb_seq, source_lun); - /* Copy the message into the recv message data, so we - can retransmit it later if necessary. */ + /* + * Copy the message into the recv message data, so we + * can retransmit it later if necessary. + */ memcpy(recv_msg->msg_data, smi_msg->data, smi_msg->data_size); recv_msg->msg.data = recv_msg->msg_data; recv_msg->msg.data_len = smi_msg->data_size; - /* We don't unlock until here, because we need - to copy the completed message into the - recv_msg before we release the lock. - Otherwise, race conditions may bite us. I - know that's pretty paranoid, but I prefer - to be correct. */ + /* + * We don't unlock until here, because we need + * to copy the completed message into the + * recv_msg before we release the lock. + * Otherwise, race conditions may bite us. I + * know that's pretty paranoid, but I prefer + * to be correct. + */ spin_unlock_irqrestore(&(intf->seq_lock), flags); } } else { /* Unknown address type. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->sent_invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_invalid_commands); rv = -EINVAL; goto out_err; } @@ -1735,6 +1815,7 @@ int ipmi_request_settime(ipmi_user_t user, retries, retry_time_ms); } +EXPORT_SYMBOL(ipmi_request_settime); int ipmi_request_supply_msgs(ipmi_user_t user, struct ipmi_addr *addr, @@ -1766,6 +1847,7 @@ int ipmi_request_supply_msgs(ipmi_user_t user, lun, -1, 0); } +EXPORT_SYMBOL(ipmi_request_supply_msgs); #ifdef CONFIG_PROC_FS static int ipmb_file_read_proc(char *page, char **start, off_t off, @@ -1790,7 +1872,7 @@ static int version_file_read_proc(char *page, char **start, off_t off, char *out = (char *) page; ipmi_smi_t intf = data; - return sprintf(out, "%d.%d\n", + return sprintf(out, "%u.%u\n", ipmi_version_major(&intf->bmc->id), ipmi_version_minor(&intf->bmc->id)); } @@ -1801,65 +1883,65 @@ static int stat_file_read_proc(char *page, char **start, off_t off, char *out = (char *) page; ipmi_smi_t intf = data; - out += sprintf(out, "sent_invalid_commands: %d\n", - intf->sent_invalid_commands); - out += sprintf(out, "sent_local_commands: %d\n", - intf->sent_local_commands); - out += sprintf(out, "handled_local_responses: %d\n", - intf->handled_local_responses); - out += sprintf(out, "unhandled_local_responses: %d\n", - intf->unhandled_local_responses); - out += sprintf(out, "sent_ipmb_commands: %d\n", - intf->sent_ipmb_commands); - out += sprintf(out, "sent_ipmb_command_errs: %d\n", - intf->sent_ipmb_command_errs); - out += sprintf(out, "retransmitted_ipmb_commands: %d\n", - intf->retransmitted_ipmb_commands); - out += sprintf(out, "timed_out_ipmb_commands: %d\n", - intf->timed_out_ipmb_commands); - out += sprintf(out, "timed_out_ipmb_broadcasts: %d\n", - intf->timed_out_ipmb_broadcasts); - out += sprintf(out, "sent_ipmb_responses: %d\n", - intf->sent_ipmb_responses); - out += sprintf(out, "handled_ipmb_responses: %d\n", - intf->handled_ipmb_responses); - out += sprintf(out, "invalid_ipmb_responses: %d\n", - intf->invalid_ipmb_responses); - out += sprintf(out, "unhandled_ipmb_responses: %d\n", - intf->unhandled_ipmb_responses); - out += sprintf(out, "sent_lan_commands: %d\n", - intf->sent_lan_commands); - out += sprintf(out, "sent_lan_command_errs: %d\n", - intf->sent_lan_command_errs); - out += sprintf(out, "retransmitted_lan_commands: %d\n", - intf->retransmitted_lan_commands); - out += sprintf(out, "timed_out_lan_commands: %d\n", - intf->timed_out_lan_commands); - out += sprintf(out, "sent_lan_responses: %d\n", - intf->sent_lan_responses); - out += sprintf(out, "handled_lan_responses: %d\n", - intf->handled_lan_responses); - out += sprintf(out, "invalid_lan_responses: %d\n", - intf->invalid_lan_responses); - out += sprintf(out, "unhandled_lan_responses: %d\n", - intf->unhandled_lan_responses); - out += sprintf(out, "handled_commands: %d\n", - intf->handled_commands); - out += sprintf(out, "invalid_commands: %d\n", - intf->invalid_commands); - out += sprintf(out, "unhandled_commands: %d\n", - intf->unhandled_commands); - out += sprintf(out, "invalid_events: %d\n", - intf->invalid_events); - out += sprintf(out, "events: %d\n", - intf->events); + out += sprintf(out, "sent_invalid_commands: %u\n", + ipmi_get_stat(intf, sent_invalid_commands)); + out += sprintf(out, "sent_local_commands: %u\n", + ipmi_get_stat(intf, sent_local_commands)); + out += sprintf(out, "handled_local_responses: %u\n", + ipmi_get_stat(intf, handled_local_responses)); + out += sprintf(out, "unhandled_local_responses: %u\n", + ipmi_get_stat(intf, unhandled_local_responses)); + out += sprintf(out, "sent_ipmb_commands: %u\n", + ipmi_get_stat(intf, sent_ipmb_commands)); + out += sprintf(out, "sent_ipmb_command_errs: %u\n", + ipmi_get_stat(intf, sent_ipmb_command_errs)); + out += sprintf(out, "retransmitted_ipmb_commands: %u\n", + ipmi_get_stat(intf, retransmitted_ipmb_commands)); + out += sprintf(out, "timed_out_ipmb_commands: %u\n", + ipmi_get_stat(intf, timed_out_ipmb_commands)); + out += sprintf(out, "timed_out_ipmb_broadcasts: %u\n", + ipmi_get_stat(intf, timed_out_ipmb_broadcasts)); + out += sprintf(out, "sent_ipmb_responses: %u\n", + ipmi_get_stat(intf, sent_ipmb_responses)); + out += sprintf(out, "handled_ipmb_responses: %u\n", + ipmi_get_stat(intf, handled_ipmb_responses)); + out += sprintf(out, "invalid_ipmb_responses: %u\n", + ipmi_get_stat(intf, invalid_ipmb_responses)); + out += sprintf(out, "unhandled_ipmb_responses: %u\n", + ipmi_get_stat(intf, unhandled_ipmb_responses)); + out += sprintf(out, "sent_lan_commands: %u\n", + ipmi_get_stat(intf, sent_lan_commands)); + out += sprintf(out, "sent_lan_command_errs: %u\n", + ipmi_get_stat(intf, sent_lan_command_errs)); + out += sprintf(out, "retransmitted_lan_commands: %u\n", + ipmi_get_stat(intf, retransmitted_lan_commands)); + out += sprintf(out, "timed_out_lan_commands: %u\n", + ipmi_get_stat(intf, timed_out_lan_commands)); + out += sprintf(out, "sent_lan_responses: %u\n", + ipmi_get_stat(intf, sent_lan_responses)); + out += sprintf(out, "handled_lan_responses: %u\n", + ipmi_get_stat(intf, handled_lan_responses)); + out += sprintf(out, "invalid_lan_responses: %u\n", + ipmi_get_stat(intf, invalid_lan_responses)); + out += sprintf(out, "unhandled_lan_responses: %u\n", + ipmi_get_stat(intf, unhandled_lan_responses)); + out += sprintf(out, "handled_commands: %u\n", + ipmi_get_stat(intf, handled_commands)); + out += sprintf(out, "invalid_commands: %u\n", + ipmi_get_stat(intf, invalid_commands)); + out += sprintf(out, "unhandled_commands: %u\n", + ipmi_get_stat(intf, unhandled_commands)); + out += sprintf(out, "invalid_events: %u\n", + ipmi_get_stat(intf, invalid_events)); + out += sprintf(out, "events: %u\n", + ipmi_get_stat(intf, events)); return (out - ((char *) page)); } #endif /* CONFIG_PROC_FS */ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, - read_proc_t *read_proc, write_proc_t *write_proc, + read_proc_t *read_proc, void *data, struct module *owner) { int rv = 0; @@ -1886,7 +1968,6 @@ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, } else { file->data = data; file->read_proc = read_proc; - file->write_proc = write_proc; file->owner = owner; mutex_lock(&smi->proc_entry_lock); @@ -1899,6 +1980,7 @@ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, return rv; } +EXPORT_SYMBOL(ipmi_smi_add_proc_entry); static int add_proc_entries(ipmi_smi_t smi, int num) { @@ -1909,23 +1991,22 @@ static int add_proc_entries(ipmi_smi_t smi, int num) smi->proc_dir = proc_mkdir(smi->proc_dir_name, proc_ipmi_root); if (!smi->proc_dir) rv = -ENOMEM; - else { + else smi->proc_dir->owner = THIS_MODULE; - } if (rv == 0) rv = ipmi_smi_add_proc_entry(smi, "stats", - stat_file_read_proc, NULL, + stat_file_read_proc, smi, THIS_MODULE); if (rv == 0) rv = ipmi_smi_add_proc_entry(smi, "ipmb", - ipmb_file_read_proc, NULL, + ipmb_file_read_proc, smi, THIS_MODULE); if (rv == 0) rv = ipmi_smi_add_proc_entry(smi, "version", - version_file_read_proc, NULL, + version_file_read_proc, smi, THIS_MODULE); #endif /* CONFIG_PROC_FS */ @@ -2210,37 +2291,47 @@ static int create_files(struct bmc_device *bmc) err = device_create_file(&bmc->dev->dev, &bmc->device_id_attr); - if (err) goto out; + if (err) + goto out; err = device_create_file(&bmc->dev->dev, &bmc->provides_dev_sdrs_attr); - if (err) goto out_devid; + if (err) + goto out_devid; err = device_create_file(&bmc->dev->dev, &bmc->revision_attr); - if (err) goto out_sdrs; + if (err) + goto out_sdrs; err = device_create_file(&bmc->dev->dev, &bmc->firmware_rev_attr); - if (err) goto out_rev; + if (err) + goto out_rev; err = device_create_file(&bmc->dev->dev, &bmc->version_attr); - if (err) goto out_firm; + if (err) + goto out_firm; err = device_create_file(&bmc->dev->dev, &bmc->add_dev_support_attr); - if (err) goto out_version; + if (err) + goto out_version; err = device_create_file(&bmc->dev->dev, &bmc->manufacturer_id_attr); - if (err) goto out_add_dev; + if (err) + goto out_add_dev; err = device_create_file(&bmc->dev->dev, &bmc->product_id_attr); - if (err) goto out_manu; + if (err) + goto out_manu; if (bmc->id.aux_firmware_revision_set) { err = device_create_file(&bmc->dev->dev, &bmc->aux_firmware_rev_attr); - if (err) goto out_prod_id; + if (err) + goto out_prod_id; } if (bmc->guid_set) { err = device_create_file(&bmc->dev->dev, &bmc->guid_attr); - if (err) goto out_aux_firm; + if (err) + goto out_aux_firm; } return 0; @@ -2368,8 +2459,10 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum, "ipmi_msghandler:" " Unable to register bmc device: %d\n", rv); - /* Don't go to out_err, you can only do that if - the device is registered already. */ + /* + * Don't go to out_err, you can only do that if + * the device is registered already. + */ return rv; } @@ -2560,17 +2653,18 @@ channel_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg) if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) && (msg->msg.netfn == IPMI_NETFN_APP_RESPONSE) - && (msg->msg.cmd == IPMI_GET_CHANNEL_INFO_CMD)) - { + && (msg->msg.cmd == IPMI_GET_CHANNEL_INFO_CMD)) { /* It's the one we want */ if (msg->msg.data[0] != 0) { /* Got an error from the channel, just go on. */ if (msg->msg.data[0] == IPMI_INVALID_COMMAND_ERR) { - /* If the MC does not support this - command, that is legal. We just - assume it has one IPMB at channel - zero. */ + /* + * If the MC does not support this + * command, that is legal. We just + * assume it has one IPMB at channel + * zero. + */ intf->channels[0].medium = IPMI_CHANNEL_MEDIUM_IPMB; intf->channels[0].protocol @@ -2591,7 +2685,7 @@ channel_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg) intf->channels[chan].medium = msg->msg.data[2] & 0x7f; intf->channels[chan].protocol = msg->msg.data[3] & 0x1f; - next_channel: + next_channel: intf->curr_channel++; if (intf->curr_channel >= IPMI_MAX_CHANNELS) wake_up(&intf->waitq); @@ -2619,6 +2713,7 @@ void ipmi_poll_interface(ipmi_user_t user) if (intf->handlers->poll) intf->handlers->poll(intf->send_info); } +EXPORT_SYMBOL(ipmi_poll_interface); int ipmi_register_smi(struct ipmi_smi_handlers *handlers, void *send_info, @@ -2633,14 +2728,18 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, ipmi_smi_t tintf; struct list_head *link; - /* Make sure the driver is actually initialized, this handles - problems with initialization order. */ + /* + * Make sure the driver is actually initialized, this handles + * problems with initialization order. + */ if (!initialized) { rv = ipmi_init_msghandler(); if (rv) return rv; - /* The init code doesn't return an error if it was turned - off, but it won't initialize. Check that. */ + /* + * The init code doesn't return an error if it was turned + * off, but it won't initialize. Check that. + */ if (!initialized) return -ENODEV; } @@ -2688,8 +2787,9 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, spin_lock_init(&intf->maintenance_mode_lock); INIT_LIST_HEAD(&intf->cmd_rcvrs); init_waitqueue_head(&intf->waitq); + for (i = 0; i < IPMI_NUM_STATS; i++) + atomic_set(&intf->stats[i], 0); - spin_lock_init(&intf->counter_lock); intf->proc_dir = NULL; mutex_lock(&smi_watchers_mutex); @@ -2717,11 +2817,12 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, get_guid(intf); if ((intf->ipmi_version_major > 1) - || ((intf->ipmi_version_major == 1) - && (intf->ipmi_version_minor >= 5))) - { - /* Start scanning the channels to see what is - available. */ + || ((intf->ipmi_version_major == 1) + && (intf->ipmi_version_minor >= 5))) { + /* + * Start scanning the channels to see what is + * available. + */ intf->null_user_handler = channel_handler; intf->curr_channel = 0; rv = send_channel_info_cmd(intf, 0); @@ -2769,6 +2870,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, return rv; } +EXPORT_SYMBOL(ipmi_register_smi); static void cleanup_smi_msgs(ipmi_smi_t intf) { @@ -2803,8 +2905,10 @@ int ipmi_unregister_smi(ipmi_smi_t intf) remove_proc_entries(intf); - /* Call all the watcher interfaces to tell them that - an interface is gone. */ + /* + * Call all the watcher interfaces to tell them that + * an interface is gone. + */ list_for_each_entry(w, &smi_watchers, link) w->smi_gone(intf_num); mutex_unlock(&smi_watchers_mutex); @@ -2812,22 +2916,21 @@ int ipmi_unregister_smi(ipmi_smi_t intf) kref_put(&intf->refcount, intf_free); return 0; } +EXPORT_SYMBOL(ipmi_unregister_smi); static int handle_ipmb_get_msg_rsp(ipmi_smi_t intf, struct ipmi_smi_msg *msg) { struct ipmi_ipmb_addr ipmb_addr; struct ipmi_recv_msg *recv_msg; - unsigned long flags; - - /* This is 11, not 10, because the response must contain a - * completion code. */ + /* + * This is 11, not 10, because the response must contain a + * completion code. + */ if (msg->rsp_size < 11) { /* Message not big enough, just ignore it. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->invalid_ipmb_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, invalid_ipmb_responses); return 0; } @@ -2841,37 +2944,38 @@ static int handle_ipmb_get_msg_rsp(ipmi_smi_t intf, ipmb_addr.channel = msg->rsp[3] & 0x0f; ipmb_addr.lun = msg->rsp[7] & 3; - /* It's a response from a remote entity. Look up the sequence - number and handle the response. */ + /* + * It's a response from a remote entity. Look up the sequence + * number and handle the response. + */ if (intf_find_seq(intf, msg->rsp[7] >> 2, msg->rsp[3] & 0x0f, msg->rsp[8], (msg->rsp[4] >> 2) & (~1), (struct ipmi_addr *) &(ipmb_addr), - &recv_msg)) - { - /* We were unable to find the sequence number, - so just nuke the message. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->unhandled_ipmb_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + &recv_msg)) { + /* + * We were unable to find the sequence number, + * so just nuke the message. + */ + ipmi_inc_stat(intf, unhandled_ipmb_responses); return 0; } memcpy(recv_msg->msg_data, &(msg->rsp[9]), msg->rsp_size - 9); - /* THe other fields matched, so no need to set them, except - for netfn, which needs to be the response that was - returned, not the request value. */ + /* + * The other fields matched, so no need to set them, except + * for netfn, which needs to be the response that was + * returned, not the request value. + */ recv_msg->msg.netfn = msg->rsp[4] >> 2; recv_msg->msg.data = recv_msg->msg_data; recv_msg->msg.data_len = msg->rsp_size - 10; recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE; - spin_lock_irqsave(&intf->counter_lock, flags); - intf->handled_ipmb_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, handled_ipmb_responses); deliver_response(recv_msg); return 0; @@ -2888,14 +2992,11 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, ipmi_user_t user = NULL; struct ipmi_ipmb_addr *ipmb_addr; struct ipmi_recv_msg *recv_msg; - unsigned long flags; struct ipmi_smi_handlers *handlers; if (msg->rsp_size < 10) { /* Message not big enough, just ignore it. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, invalid_commands); return 0; } @@ -2919,19 +3020,17 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, if (user == NULL) { /* We didn't find a user, deliver an error response. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->unhandled_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, unhandled_commands); msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); msg->data[1] = IPMI_SEND_MSG_CMD; msg->data[2] = msg->rsp[3]; msg->data[3] = msg->rsp[6]; - msg->data[4] = ((netfn + 1) << 2) | (msg->rsp[7] & 0x3); + msg->data[4] = ((netfn + 1) << 2) | (msg->rsp[7] & 0x3); msg->data[5] = ipmb_checksum(&(msg->data[3]), 2); msg->data[6] = intf->channels[msg->rsp[3] & 0xf].address; - /* rqseq/lun */ - msg->data[7] = (msg->rsp[7] & 0xfc) | (msg->rsp[4] & 0x3); + /* rqseq/lun */ + msg->data[7] = (msg->rsp[7] & 0xfc) | (msg->rsp[4] & 0x3); msg->data[8] = msg->rsp[8]; /* cmd */ msg->data[9] = IPMI_INVALID_CMD_COMPLETION_CODE; msg->data[10] = ipmb_checksum(&(msg->data[6]), 4); @@ -2950,23 +3049,25 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, handlers = intf->handlers; if (handlers) { handlers->sender(intf->send_info, msg, 0); - /* We used the message, so return the value - that causes it to not be freed or - queued. */ + /* + * We used the message, so return the value + * that causes it to not be freed or + * queued. + */ rv = -1; } rcu_read_unlock(); } else { /* Deliver the message to the user. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->handled_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, handled_commands); recv_msg = ipmi_alloc_recv_msg(); if (!recv_msg) { - /* We couldn't allocate memory for the - message, so requeue it for handling - later. */ + /* + * We couldn't allocate memory for the + * message, so requeue it for handling + * later. + */ rv = 1; kref_put(&user->refcount, free_user); } else { @@ -2977,8 +3078,10 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, ipmb_addr->lun = msg->rsp[7] & 3; ipmb_addr->channel = msg->rsp[3] & 0xf; - /* Extract the rest of the message information - from the IPMB header.*/ + /* + * Extract the rest of the message information + * from the IPMB header. + */ recv_msg->user = user; recv_msg->recv_type = IPMI_CMD_RECV_TYPE; recv_msg->msgid = msg->rsp[7] >> 2; @@ -2986,8 +3089,10 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, recv_msg->msg.cmd = msg->rsp[8]; recv_msg->msg.data = recv_msg->msg_data; - /* We chop off 10, not 9 bytes because the checksum - at the end also needs to be removed. */ + /* + * We chop off 10, not 9 bytes because the checksum + * at the end also needs to be removed. + */ recv_msg->msg.data_len = msg->rsp_size - 10; memcpy(recv_msg->msg_data, &(msg->rsp[9]), @@ -3004,16 +3109,15 @@ static int handle_lan_get_msg_rsp(ipmi_smi_t intf, { struct ipmi_lan_addr lan_addr; struct ipmi_recv_msg *recv_msg; - unsigned long flags; - /* This is 13, not 12, because the response must contain a - * completion code. */ + /* + * This is 13, not 12, because the response must contain a + * completion code. + */ if (msg->rsp_size < 13) { /* Message not big enough, just ignore it. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->invalid_lan_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, invalid_lan_responses); return 0; } @@ -3030,37 +3134,38 @@ static int handle_lan_get_msg_rsp(ipmi_smi_t intf, lan_addr.privilege = msg->rsp[3] >> 4; lan_addr.lun = msg->rsp[9] & 3; - /* It's a response from a remote entity. Look up the sequence - number and handle the response. */ + /* + * It's a response from a remote entity. Look up the sequence + * number and handle the response. + */ if (intf_find_seq(intf, msg->rsp[9] >> 2, msg->rsp[3] & 0x0f, msg->rsp[10], (msg->rsp[6] >> 2) & (~1), (struct ipmi_addr *) &(lan_addr), - &recv_msg)) - { - /* We were unable to find the sequence number, - so just nuke the message. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->unhandled_lan_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + &recv_msg)) { + /* + * We were unable to find the sequence number, + * so just nuke the message. + */ + ipmi_inc_stat(intf, unhandled_lan_responses); return 0; } memcpy(recv_msg->msg_data, &(msg->rsp[11]), msg->rsp_size - 11); - /* The other fields matched, so no need to set them, except - for netfn, which needs to be the response that was - returned, not the request value. */ + /* + * The other fields matched, so no need to set them, except + * for netfn, which needs to be the response that was + * returned, not the request value. + */ recv_msg->msg.netfn = msg->rsp[6] >> 2; recv_msg->msg.data = recv_msg->msg_data; recv_msg->msg.data_len = msg->rsp_size - 12; recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE; - spin_lock_irqsave(&intf->counter_lock, flags); - intf->handled_lan_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, handled_lan_responses); deliver_response(recv_msg); return 0; @@ -3077,13 +3182,10 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, ipmi_user_t user = NULL; struct ipmi_lan_addr *lan_addr; struct ipmi_recv_msg *recv_msg; - unsigned long flags; if (msg->rsp_size < 12) { /* Message not big enough, just ignore it. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->invalid_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, invalid_commands); return 0; } @@ -3107,23 +3209,23 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, if (user == NULL) { /* We didn't find a user, just give up. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->unhandled_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, unhandled_commands); - rv = 0; /* Don't do anything with these messages, just - allow them to be freed. */ + /* + * Don't do anything with these messages, just allow + * them to be freed. + */ + rv = 0; } else { /* Deliver the message to the user. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->handled_commands++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, handled_commands); recv_msg = ipmi_alloc_recv_msg(); if (!recv_msg) { - /* We couldn't allocate memory for the - message, so requeue it for handling - later. */ + /* + * We couldn't allocate memory for the + * message, so requeue it for handling later. + */ rv = 1; kref_put(&user->refcount, free_user); } else { @@ -3137,8 +3239,10 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, lan_addr->channel = msg->rsp[3] & 0xf; lan_addr->privilege = msg->rsp[3] >> 4; - /* Extract the rest of the message information - from the IPMB header.*/ + /* + * Extract the rest of the message information + * from the IPMB header. + */ recv_msg->user = user; recv_msg->recv_type = IPMI_CMD_RECV_TYPE; recv_msg->msgid = msg->rsp[9] >> 2; @@ -3146,8 +3250,10 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t intf, recv_msg->msg.cmd = msg->rsp[10]; recv_msg->msg.data = recv_msg->msg_data; - /* We chop off 12, not 11 bytes because the checksum - at the end also needs to be removed. */ + /* + * We chop off 12, not 11 bytes because the checksum + * at the end also needs to be removed. + */ recv_msg->msg.data_len = msg->rsp_size - 12; memcpy(recv_msg->msg_data, &(msg->rsp[11]), @@ -3163,7 +3269,7 @@ static void copy_event_into_recv_msg(struct ipmi_recv_msg *recv_msg, struct ipmi_smi_msg *msg) { struct ipmi_system_interface_addr *smi_addr; - + recv_msg->msgid = 0; smi_addr = (struct ipmi_system_interface_addr *) &(recv_msg->addr); smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; @@ -3189,9 +3295,7 @@ static int handle_read_event_rsp(ipmi_smi_t intf, if (msg->rsp_size < 19) { /* Message is too small to be an IPMB event. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->invalid_events++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, invalid_events); return 0; } @@ -3204,12 +3308,12 @@ static int handle_read_event_rsp(ipmi_smi_t intf, spin_lock_irqsave(&intf->events_lock, flags); - spin_lock(&intf->counter_lock); - intf->events++; - spin_unlock(&intf->counter_lock); + ipmi_inc_stat(intf, events); - /* Allocate and fill in one message for every user that is getting - events. */ + /* + * Allocate and fill in one message for every user that is + * getting events. + */ rcu_read_lock(); list_for_each_entry_rcu(user, &intf->users, link) { if (!user->gets_events) @@ -3223,9 +3327,11 @@ static int handle_read_event_rsp(ipmi_smi_t intf, list_del(&recv_msg->link); ipmi_free_recv_msg(recv_msg); } - /* We couldn't allocate memory for the - message, so requeue it for handling - later. */ + /* + * We couldn't allocate memory for the + * message, so requeue it for handling + * later. + */ rv = 1; goto out; } @@ -3246,13 +3352,17 @@ static int handle_read_event_rsp(ipmi_smi_t intf, deliver_response(recv_msg); } } else if (intf->waiting_events_count < MAX_EVENTS_IN_QUEUE) { - /* No one to receive the message, put it in queue if there's - not already too many things in the queue. */ + /* + * No one to receive the message, put it in queue if there's + * not already too many things in the queue. + */ recv_msg = ipmi_alloc_recv_msg(); if (!recv_msg) { - /* We couldn't allocate memory for the - message, so requeue it for handling - later. */ + /* + * We couldn't allocate memory for the + * message, so requeue it for handling + * later. + */ rv = 1; goto out; } @@ -3260,11 +3370,14 @@ static int handle_read_event_rsp(ipmi_smi_t intf, copy_event_into_recv_msg(recv_msg, msg); list_add_tail(&(recv_msg->link), &(intf->waiting_events)); intf->waiting_events_count++; - } else { - /* There's too many things in the queue, discard this - message. */ - printk(KERN_WARNING PFX "Event queue full, discarding an" - " incoming event\n"); + } else if (!intf->event_msg_printed) { + /* + * There's too many things in the queue, discard this + * message. + */ + printk(KERN_WARNING PFX "Event queue full, discarding" + " incoming events\n"); + intf->event_msg_printed = 1; } out: @@ -3277,16 +3390,15 @@ static int handle_bmc_rsp(ipmi_smi_t intf, struct ipmi_smi_msg *msg) { struct ipmi_recv_msg *recv_msg; - unsigned long flags; struct ipmi_user *user; recv_msg = (struct ipmi_recv_msg *) msg->user_data; - if (recv_msg == NULL) - { - printk(KERN_WARNING"IPMI message received with no owner. This\n" - "could be because of a malformed message, or\n" - "because of a hardware error. Contact your\n" - "hardware vender for assistance\n"); + if (recv_msg == NULL) { + printk(KERN_WARNING + "IPMI message received with no owner. This\n" + "could be because of a malformed message, or\n" + "because of a hardware error. Contact your\n" + "hardware vender for assistance\n"); return 0; } @@ -3294,16 +3406,12 @@ static int handle_bmc_rsp(ipmi_smi_t intf, /* Make sure the user still exists. */ if (user && !user->valid) { /* The user for the message went away, so give up. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->unhandled_local_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, unhandled_local_responses); ipmi_free_recv_msg(recv_msg); } else { struct ipmi_system_interface_addr *smi_addr; - spin_lock_irqsave(&intf->counter_lock, flags); - intf->handled_local_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, handled_local_responses); recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE; recv_msg->msgid = msg->msgid; smi_addr = ((struct ipmi_system_interface_addr *) @@ -3324,9 +3432,11 @@ static int handle_bmc_rsp(ipmi_smi_t intf, return 0; } -/* Handle a new message. Return 1 if the message should be requeued, - 0 if the message should be freed, or -1 if the message should not - be freed or requeued. */ +/* + * Handle a new message. Return 1 if the message should be requeued, + * 0 if the message should be freed, or -1 if the message should not + * be freed or requeued. + */ static int handle_new_recv_msg(ipmi_smi_t intf, struct ipmi_smi_msg *msg) { @@ -3351,10 +3461,12 @@ static int handle_new_recv_msg(ipmi_smi_t intf, msg->rsp[1] = msg->data[1]; msg->rsp[2] = IPMI_ERR_UNSPECIFIED; msg->rsp_size = 3; - } else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1))/* Netfn */ - || (msg->rsp[1] != msg->data[1])) /* Command */ - { - /* The response is not even marginally correct. */ + } else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1)) + || (msg->rsp[1] != msg->data[1])) { + /* + * The NetFN and Command in the response is not even + * marginally correct. + */ printk(KERN_WARNING PFX "BMC returned incorrect response," " expected netfn %x cmd %x, got netfn %x cmd %x\n", (msg->data[0] >> 2) | 1, msg->data[1], @@ -3369,10 +3481,11 @@ static int handle_new_recv_msg(ipmi_smi_t intf, if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) && (msg->rsp[1] == IPMI_SEND_MSG_CMD) - && (msg->user_data != NULL)) - { - /* It's a response to a response we sent. For this we - deliver a send message response to the user. */ + && (msg->user_data != NULL)) { + /* + * It's a response to a response we sent. For this we + * deliver a send message response to the user. + */ struct ipmi_recv_msg *recv_msg = msg->user_data; requeue = 0; @@ -3398,8 +3511,7 @@ static int handle_new_recv_msg(ipmi_smi_t intf, recv_msg->msg_data[0] = msg->rsp[2]; deliver_response(recv_msg); } else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) - && (msg->rsp[1] == IPMI_GET_MSG_CMD)) - { + && (msg->rsp[1] == IPMI_GET_MSG_CMD)) { /* It's from the receive queue. */ chan = msg->rsp[3] & 0xf; if (chan >= IPMI_MAX_CHANNELS) { @@ -3411,12 +3523,16 @@ static int handle_new_recv_msg(ipmi_smi_t intf, switch (intf->channels[chan].medium) { case IPMI_CHANNEL_MEDIUM_IPMB: if (msg->rsp[4] & 0x04) { - /* It's a response, so find the - requesting message and send it up. */ + /* + * It's a response, so find the + * requesting message and send it up. + */ requeue = handle_ipmb_get_msg_rsp(intf, msg); } else { - /* It's a command to the SMS from some other - entity. Handle that. */ + /* + * It's a command to the SMS from some other + * entity. Handle that. + */ requeue = handle_ipmb_get_msg_cmd(intf, msg); } break; @@ -3424,25 +3540,30 @@ static int handle_new_recv_msg(ipmi_smi_t intf, case IPMI_CHANNEL_MEDIUM_8023LAN: case IPMI_CHANNEL_MEDIUM_ASYNC: if (msg->rsp[6] & 0x04) { - /* It's a response, so find the - requesting message and send it up. */ + /* + * It's a response, so find the + * requesting message and send it up. + */ requeue = handle_lan_get_msg_rsp(intf, msg); } else { - /* It's a command to the SMS from some other - entity. Handle that. */ + /* + * It's a command to the SMS from some other + * entity. Handle that. + */ requeue = handle_lan_get_msg_cmd(intf, msg); } break; default: - /* We don't handle the channel type, so just - * free the message. */ + /* + * We don't handle the channel type, so just + * free the message. + */ requeue = 0; } } else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) - && (msg->rsp[1] == IPMI_READ_EVENT_MSG_BUFFER_CMD)) - { + && (msg->rsp[1] == IPMI_READ_EVENT_MSG_BUFFER_CMD)) { /* It's an asyncronous event. */ requeue = handle_read_event_rsp(intf, msg); } else { @@ -3458,71 +3579,82 @@ static int handle_new_recv_msg(ipmi_smi_t intf, void ipmi_smi_msg_received(ipmi_smi_t intf, struct ipmi_smi_msg *msg) { - unsigned long flags; + unsigned long flags = 0; /* keep us warning-free. */ int rv; + int run_to_completion; if ((msg->data_size >= 2) && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2)) && (msg->data[1] == IPMI_SEND_MSG_CMD) - && (msg->user_data == NULL)) - { - /* This is the local response to a command send, start - the timer for these. The user_data will not be - NULL if this is a response send, and we will let - response sends just go through. */ - - /* Check for errors, if we get certain errors (ones - that mean basically we can try again later), we - ignore them and start the timer. Otherwise we - report the error immediately. */ + && (msg->user_data == NULL)) { + /* + * This is the local response to a command send, start + * the timer for these. The user_data will not be + * NULL if this is a response send, and we will let + * response sends just go through. + */ + + /* + * Check for errors, if we get certain errors (ones + * that mean basically we can try again later), we + * ignore them and start the timer. Otherwise we + * report the error immediately. + */ if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0) && (msg->rsp[2] != IPMI_NODE_BUSY_ERR) && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR) && (msg->rsp[2] != IPMI_BUS_ERR) - && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) - { + && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) { int chan = msg->rsp[3] & 0xf; /* Got an error sending the message, handle it. */ - spin_lock_irqsave(&intf->counter_lock, flags); if (chan >= IPMI_MAX_CHANNELS) ; /* This shouldn't happen */ else if ((intf->channels[chan].medium == IPMI_CHANNEL_MEDIUM_8023LAN) || (intf->channels[chan].medium == IPMI_CHANNEL_MEDIUM_ASYNC)) - intf->sent_lan_command_errs++; + ipmi_inc_stat(intf, sent_lan_command_errs); else - intf->sent_ipmb_command_errs++; - spin_unlock_irqrestore(&intf->counter_lock, flags); + ipmi_inc_stat(intf, sent_ipmb_command_errs); intf_err_seq(intf, msg->msgid, msg->rsp[2]); - } else { + } else /* The message was sent, start the timer. */ intf_start_seq_timer(intf, msg->msgid); - } ipmi_free_smi_msg(msg); goto out; } - /* To preserve message order, if the list is not empty, we - tack this message onto the end of the list. */ - spin_lock_irqsave(&intf->waiting_msgs_lock, flags); + /* + * To preserve message order, if the list is not empty, we + * tack this message onto the end of the list. + */ + run_to_completion = intf->run_to_completion; + if (!run_to_completion) + spin_lock_irqsave(&intf->waiting_msgs_lock, flags); if (!list_empty(&intf->waiting_msgs)) { list_add_tail(&msg->link, &intf->waiting_msgs); - spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); + if (!run_to_completion) + spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); goto out; } - spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); - + if (!run_to_completion) + spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); + rv = handle_new_recv_msg(intf, msg); if (rv > 0) { - /* Could not handle the message now, just add it to a - list to handle later. */ - spin_lock_irqsave(&intf->waiting_msgs_lock, flags); + /* + * Could not handle the message now, just add it to a + * list to handle later. + */ + run_to_completion = intf->run_to_completion; + if (!run_to_completion) + spin_lock_irqsave(&intf->waiting_msgs_lock, flags); list_add_tail(&msg->link, &intf->waiting_msgs); - spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); + if (!run_to_completion) + spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); } else if (rv == 0) { ipmi_free_smi_msg(msg); } @@ -3530,6 +3662,7 @@ void ipmi_smi_msg_received(ipmi_smi_t intf, out: return; } +EXPORT_SYMBOL(ipmi_smi_msg_received); void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf) { @@ -3544,7 +3677,7 @@ void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf) } rcu_read_unlock(); } - +EXPORT_SYMBOL(ipmi_smi_watchdog_pretimeout); static struct ipmi_smi_msg * smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg, @@ -3552,14 +3685,16 @@ smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg, { struct ipmi_smi_msg *smi_msg = ipmi_alloc_smi_msg(); if (!smi_msg) - /* If we can't allocate the message, then just return, we - get 4 retries, so this should be ok. */ + /* + * If we can't allocate the message, then just return, we + * get 4 retries, so this should be ok. + */ return NULL; memcpy(smi_msg->data, recv_msg->msg.data, recv_msg->msg.data_len); smi_msg->data_size = recv_msg->msg.data_len; smi_msg->msgid = STORE_SEQ_IN_MSGID(seq, seqid); - + #ifdef DEBUG_MSGING { int m; @@ -3594,28 +3729,26 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, ent->inuse = 0; msg = ent->recv_msg; list_add_tail(&msg->link, timeouts); - spin_lock(&intf->counter_lock); if (ent->broadcast) - intf->timed_out_ipmb_broadcasts++; + ipmi_inc_stat(intf, timed_out_ipmb_broadcasts); else if (ent->recv_msg->addr.addr_type == IPMI_LAN_ADDR_TYPE) - intf->timed_out_lan_commands++; + ipmi_inc_stat(intf, timed_out_lan_commands); else - intf->timed_out_ipmb_commands++; - spin_unlock(&intf->counter_lock); + ipmi_inc_stat(intf, timed_out_ipmb_commands); } else { struct ipmi_smi_msg *smi_msg; /* More retries, send again. */ - /* Start with the max timer, set to normal - timer after the message is sent. */ + /* + * Start with the max timer, set to normal timer after + * the message is sent. + */ ent->timeout = MAX_MSG_TIMEOUT; ent->retries_left--; - spin_lock(&intf->counter_lock); if (ent->recv_msg->addr.addr_type == IPMI_LAN_ADDR_TYPE) - intf->retransmitted_lan_commands++; + ipmi_inc_stat(intf, retransmitted_lan_commands); else - intf->retransmitted_ipmb_commands++; - spin_unlock(&intf->counter_lock); + ipmi_inc_stat(intf, retransmitted_ipmb_commands); smi_msg = smi_from_recv_msg(intf, ent->recv_msg, slot, ent->seqid); @@ -3624,11 +3757,13 @@ static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, spin_unlock_irqrestore(&intf->seq_lock, *flags); - /* Send the new message. We send with a zero - * priority. It timed out, I doubt time is - * that critical now, and high priority - * messages are really only for messages to the - * local MC, which don't get resent. */ + /* + * Send the new message. We send with a zero + * priority. It timed out, I doubt time is that + * critical now, and high priority messages are really + * only for messages to the local MC, which don't get + * resent. + */ handlers = intf->handlers; if (handlers) intf->handlers->sender(intf->send_info, @@ -3659,16 +3794,20 @@ static void ipmi_timeout_handler(long timeout_period) list_del(&smi_msg->link); ipmi_free_smi_msg(smi_msg); } else { - /* To preserve message order, quit if we - can't handle a message. */ + /* + * To preserve message order, quit if we + * can't handle a message. + */ break; } } spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); - /* Go through the seq table and find any messages that - have timed out, putting them in the timeouts - list. */ + /* + * Go through the seq table and find any messages that + * have timed out, putting them in the timeouts + * list. + */ INIT_LIST_HEAD(&timeouts); spin_lock_irqsave(&intf->seq_lock, flags); for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) @@ -3694,8 +3833,7 @@ static void ipmi_timeout_handler(long timeout_period) intf->auto_maintenance_timeout -= timeout_period; if (!intf->maintenance_mode - && (intf->auto_maintenance_timeout <= 0)) - { + && (intf->auto_maintenance_timeout <= 0)) { intf->maintenance_mode_enable = 0; maintenance_mode_update(intf); } @@ -3713,8 +3851,10 @@ static void ipmi_request_event(void) struct ipmi_smi_handlers *handlers; rcu_read_lock(); - /* Called from the timer, no need to check if handlers is - * valid. */ + /* + * Called from the timer, no need to check if handlers is + * valid. + */ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { /* No event requests when in maintenance mode. */ if (intf->maintenance_mode_enable) @@ -3735,10 +3875,12 @@ static struct timer_list ipmi_timer; /* How many jiffies does it take to get to the timeout time. */ #define IPMI_TIMEOUT_JIFFIES ((IPMI_TIMEOUT_TIME * HZ) / 1000) -/* Request events from the queue every second (this is the number of - IPMI_TIMEOUT_TIMES between event requests). Hopefully, in the - future, IPMI will add a way to know immediately if an event is in - the queue and this silliness can go away. */ +/* + * Request events from the queue every second (this is the number of + * IPMI_TIMEOUT_TIMES between event requests). Hopefully, in the + * future, IPMI will add a way to know immediately if an event is in + * the queue and this silliness can go away. + */ #define IPMI_REQUEST_EV_TIME (1000 / (IPMI_TIMEOUT_TIME)) static atomic_t stop_operation; @@ -3782,6 +3924,7 @@ struct ipmi_smi_msg *ipmi_alloc_smi_msg(void) } return rv; } +EXPORT_SYMBOL(ipmi_alloc_smi_msg); static void free_recv_msg(struct ipmi_recv_msg *msg) { @@ -3789,7 +3932,7 @@ static void free_recv_msg(struct ipmi_recv_msg *msg) kfree(msg); } -struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) +static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) { struct ipmi_recv_msg *rv; @@ -3808,6 +3951,7 @@ void ipmi_free_recv_msg(struct ipmi_recv_msg *msg) kref_put(&msg->user->refcount, free_user); msg->done(msg); } +EXPORT_SYMBOL(ipmi_free_recv_msg); #ifdef CONFIG_IPMI_PANIC_EVENT @@ -3825,8 +3969,7 @@ static void event_receiver_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg) if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) && (msg->msg.netfn == IPMI_NETFN_SENSOR_EVENT_RESPONSE) && (msg->msg.cmd == IPMI_GET_EVENT_RECEIVER_CMD) - && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) - { + && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) { /* A get event receiver command, save it. */ intf->event_receiver = msg->msg.data[1]; intf->event_receiver_lun = msg->msg.data[2] & 0x3; @@ -3838,10 +3981,11 @@ static void device_id_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg) if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) && (msg->msg.netfn == IPMI_NETFN_APP_RESPONSE) && (msg->msg.cmd == IPMI_GET_DEVICE_ID_CMD) - && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) - { - /* A get device id command, save if we are an event - receiver or generator. */ + && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) { + /* + * A get device id command, save if we are an event + * receiver or generator. + */ intf->local_sel_device = (msg->msg.data[6] >> 2) & 1; intf->local_event_generator = (msg->msg.data[6] >> 5) & 1; } @@ -3874,8 +4018,10 @@ static void send_panic_events(char *str) data[4] = 0x6f; /* Sensor specific, IPMI table 36-1 */ data[5] = 0xa1; /* Runtime stop OEM bytes 2 & 3. */ - /* Put a few breadcrumbs in. Hopefully later we can add more things - to make the panic events more useful. */ + /* + * Put a few breadcrumbs in. Hopefully later we can add more things + * to make the panic events more useful. + */ if (str) { data[3] = str[0]; data[6] = str[1]; @@ -3891,6 +4037,7 @@ static void send_panic_events(char *str) /* Interface is not ready. */ continue; + intf->run_to_completion = 1; /* Send the event announcing the panic. */ intf->handlers->set_run_to_completion(intf->send_info, 1); i_ipmi_request(NULL, @@ -3908,9 +4055,11 @@ static void send_panic_events(char *str) } #ifdef CONFIG_IPMI_PANIC_STRING - /* On every interface, dump a bunch of OEM event holding the - string. */ - if (!str) + /* + * On every interface, dump a bunch of OEM event holding the + * string. + */ + if (!str) return; /* For every registered interface, send the event. */ @@ -3931,11 +4080,13 @@ static void send_panic_events(char *str) */ smp_rmb(); - /* First job here is to figure out where to send the - OEM events. There's no way in IPMI to send OEM - events using an event send command, so we have to - find the SEL to put them in and stick them in - there. */ + /* + * First job here is to figure out where to send the + * OEM events. There's no way in IPMI to send OEM + * events using an event send command, so we have to + * find the SEL to put them in and stick them in + * there. + */ /* Get capabilities from the get device id. */ intf->local_sel_device = 0; @@ -3983,24 +4134,29 @@ static void send_panic_events(char *str) } intf->null_user_handler = NULL; - /* Validate the event receiver. The low bit must not - be 1 (it must be a valid IPMB address), it cannot - be zero, and it must not be my address. */ - if (((intf->event_receiver & 1) == 0) + /* + * Validate the event receiver. The low bit must not + * be 1 (it must be a valid IPMB address), it cannot + * be zero, and it must not be my address. + */ + if (((intf->event_receiver & 1) == 0) && (intf->event_receiver != 0) - && (intf->event_receiver != intf->channels[0].address)) - { - /* The event receiver is valid, send an IPMB - message. */ + && (intf->event_receiver != intf->channels[0].address)) { + /* + * The event receiver is valid, send an IPMB + * message. + */ ipmb = (struct ipmi_ipmb_addr *) &addr; ipmb->addr_type = IPMI_IPMB_ADDR_TYPE; ipmb->channel = 0; /* FIXME - is this right? */ ipmb->lun = intf->event_receiver_lun; ipmb->slave_addr = intf->event_receiver; } else if (intf->local_sel_device) { - /* The event receiver was not valid (or was - me), but I am an SEL device, just dump it - in my SEL. */ + /* + * The event receiver was not valid (or was + * me), but I am an SEL device, just dump it + * in my SEL. + */ si = (struct ipmi_system_interface_addr *) &addr; si->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; si->channel = IPMI_BMC_CHANNEL; @@ -4008,7 +4164,6 @@ static void send_panic_events(char *str) } else continue; /* No where to send the event. */ - msg.netfn = IPMI_NETFN_STORAGE_REQUEST; /* Storage. */ msg.cmd = IPMI_ADD_SEL_ENTRY_CMD; msg.data = data; @@ -4025,8 +4180,10 @@ static void send_panic_events(char *str) data[2] = 0xf0; /* OEM event without timestamp. */ data[3] = intf->channels[0].address; data[4] = j++; /* sequence # */ - /* Always give 11 bytes, so strncpy will fill - it with zeroes for me. */ + /* + * Always give 11 bytes, so strncpy will fill + * it with zeroes for me. + */ strncpy(data+5, p, 11); p += size; @@ -4043,7 +4200,7 @@ static void send_panic_events(char *str) intf->channels[0].lun, 0, 1); /* no retry, and no wait. */ } - } + } #endif /* CONFIG_IPMI_PANIC_STRING */ } #endif /* CONFIG_IPMI_PANIC_EVENT */ @@ -4052,7 +4209,7 @@ static int has_panicked; static int panic_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { ipmi_smi_t intf; @@ -4066,6 +4223,7 @@ static int panic_event(struct notifier_block *this, /* Interface is not ready. */ continue; + intf->run_to_completion = 1; intf->handlers->set_run_to_completion(intf->send_info, 1); } @@ -4133,11 +4291,16 @@ static __exit void cleanup_ipmi(void) atomic_notifier_chain_unregister(&panic_notifier_list, &panic_block); - /* This can't be called if any interfaces exist, so no worry about - shutting down the interfaces. */ + /* + * This can't be called if any interfaces exist, so no worry + * about shutting down the interfaces. + */ - /* Tell the timer to stop, then wait for it to stop. This avoids - problems with race conditions removing the timer here. */ + /* + * Tell the timer to stop, then wait for it to stop. This + * avoids problems with race conditions removing the timer + * here. + */ atomic_inc(&stop_operation); del_timer_sync(&ipmi_timer); @@ -4164,31 +4327,6 @@ module_exit(cleanup_ipmi); module_init(ipmi_init_msghandler_mod); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Corey Minyard <minyard@mvista.com>"); -MODULE_DESCRIPTION("Incoming and outgoing message routing for an IPMI interface."); +MODULE_DESCRIPTION("Incoming and outgoing message routing for an IPMI" + " interface."); MODULE_VERSION(IPMI_DRIVER_VERSION); - -EXPORT_SYMBOL(ipmi_create_user); -EXPORT_SYMBOL(ipmi_destroy_user); -EXPORT_SYMBOL(ipmi_get_version); -EXPORT_SYMBOL(ipmi_request_settime); -EXPORT_SYMBOL(ipmi_request_supply_msgs); -EXPORT_SYMBOL(ipmi_poll_interface); -EXPORT_SYMBOL(ipmi_register_smi); -EXPORT_SYMBOL(ipmi_unregister_smi); -EXPORT_SYMBOL(ipmi_register_for_cmd); -EXPORT_SYMBOL(ipmi_unregister_for_cmd); -EXPORT_SYMBOL(ipmi_smi_msg_received); -EXPORT_SYMBOL(ipmi_smi_watchdog_pretimeout); -EXPORT_SYMBOL(ipmi_alloc_smi_msg); -EXPORT_SYMBOL(ipmi_addr_length); -EXPORT_SYMBOL(ipmi_validate_addr); -EXPORT_SYMBOL(ipmi_set_gets_events); -EXPORT_SYMBOL(ipmi_smi_watcher_register); -EXPORT_SYMBOL(ipmi_smi_watcher_unregister); -EXPORT_SYMBOL(ipmi_set_my_address); -EXPORT_SYMBOL(ipmi_get_my_address); -EXPORT_SYMBOL(ipmi_set_my_LUN); -EXPORT_SYMBOL(ipmi_get_my_LUN); -EXPORT_SYMBOL(ipmi_smi_add_proc_entry); -EXPORT_SYMBOL(ipmi_user_set_run_to_completion); -EXPORT_SYMBOL(ipmi_free_recv_msg); diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c index b86186de7f0..a261bd735df 100644 --- a/drivers/char/ipmi/ipmi_poweroff.c +++ b/drivers/char/ipmi/ipmi_poweroff.c @@ -87,7 +87,10 @@ MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog " /* parameter definition to allow user to flag power cycle */ module_param(poweroff_powercycle, int, 0644); -MODULE_PARM_DESC(poweroff_powercycle, " Set to non-zero to enable power cycle instead of power down. Power cycle is contingent on hardware support, otherwise it defaults back to power down."); +MODULE_PARM_DESC(poweroff_powercycle, + " Set to non-zero to enable power cycle instead of power" + " down. Power cycle is contingent on hardware support," + " otherwise it defaults back to power down."); /* Stuff from the get device id command. */ static unsigned int mfg_id; @@ -95,22 +98,25 @@ static unsigned int prod_id; static unsigned char capabilities; static unsigned char ipmi_version; -/* We use our own messages for this operation, we don't let the system - allocate them, since we may be in a panic situation. The whole - thing is single-threaded, anyway, so multiple messages are not - required. */ +/* + * We use our own messages for this operation, we don't let the system + * allocate them, since we may be in a panic situation. The whole + * thing is single-threaded, anyway, so multiple messages are not + * required. + */ +static atomic_t dummy_count = ATOMIC_INIT(0); static void dummy_smi_free(struct ipmi_smi_msg *msg) { + atomic_dec(&dummy_count); } static void dummy_recv_free(struct ipmi_recv_msg *msg) { + atomic_dec(&dummy_count); } -static struct ipmi_smi_msg halt_smi_msg = -{ +static struct ipmi_smi_msg halt_smi_msg = { .done = dummy_smi_free }; -static struct ipmi_recv_msg halt_recv_msg = -{ +static struct ipmi_recv_msg halt_recv_msg = { .done = dummy_recv_free }; @@ -127,8 +133,7 @@ static void receive_handler(struct ipmi_recv_msg *recv_msg, void *handler_data) complete(comp); } -static struct ipmi_user_hndl ipmi_poweroff_handler = -{ +static struct ipmi_user_hndl ipmi_poweroff_handler = { .ipmi_recv_hndl = receive_handler }; @@ -152,17 +157,28 @@ static int ipmi_request_wait_for_response(ipmi_user_t user, return halt_recv_msg.msg.data[0]; } -/* We are in run-to-completion mode, no completion is desired. */ +/* Wait for message to complete, spinning. */ static int ipmi_request_in_rc_mode(ipmi_user_t user, struct ipmi_addr *addr, struct kernel_ipmi_msg *send_msg) { int rv; + atomic_set(&dummy_count, 2); rv = ipmi_request_supply_msgs(user, addr, 0, send_msg, NULL, &halt_smi_msg, &halt_recv_msg, 0); - if (rv) + if (rv) { + atomic_set(&dummy_count, 0); return rv; + } + + /* + * Spin until our message is done. + */ + while (atomic_read(&dummy_count) > 0) { + ipmi_poll_interface(user); + cpu_relax(); + } return halt_recv_msg.msg.data[0]; } @@ -184,47 +200,47 @@ static int ipmi_request_in_rc_mode(ipmi_user_t user, static void (*atca_oem_poweroff_hook)(ipmi_user_t user); -static void pps_poweroff_atca (ipmi_user_t user) +static void pps_poweroff_atca(ipmi_user_t user) { - struct ipmi_system_interface_addr smi_addr; - struct kernel_ipmi_msg send_msg; - int rv; - /* - * Configure IPMI address for local access - */ - smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; - smi_addr.channel = IPMI_BMC_CHANNEL; - smi_addr.lun = 0; - - printk(KERN_INFO PFX "PPS powerdown hook used"); - - send_msg.netfn = IPMI_NETFN_OEM; - send_msg.cmd = IPMI_ATCA_PPS_GRACEFUL_RESTART; - send_msg.data = IPMI_ATCA_PPS_IANA; - send_msg.data_len = 3; - rv = ipmi_request_in_rc_mode(user, - (struct ipmi_addr *) &smi_addr, - &send_msg); - if (rv && rv != IPMI_UNKNOWN_ERR_COMPLETION_CODE) { - printk(KERN_ERR PFX "Unable to send ATCA ," - " IPMI error 0x%x\n", rv); - } + struct ipmi_system_interface_addr smi_addr; + struct kernel_ipmi_msg send_msg; + int rv; + /* + * Configure IPMI address for local access + */ + smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + smi_addr.channel = IPMI_BMC_CHANNEL; + smi_addr.lun = 0; + + printk(KERN_INFO PFX "PPS powerdown hook used"); + + send_msg.netfn = IPMI_NETFN_OEM; + send_msg.cmd = IPMI_ATCA_PPS_GRACEFUL_RESTART; + send_msg.data = IPMI_ATCA_PPS_IANA; + send_msg.data_len = 3; + rv = ipmi_request_in_rc_mode(user, + (struct ipmi_addr *) &smi_addr, + &send_msg); + if (rv && rv != IPMI_UNKNOWN_ERR_COMPLETION_CODE) { + printk(KERN_ERR PFX "Unable to send ATCA ," + " IPMI error 0x%x\n", rv); + } return; } -static int ipmi_atca_detect (ipmi_user_t user) +static int ipmi_atca_detect(ipmi_user_t user) { struct ipmi_system_interface_addr smi_addr; struct kernel_ipmi_msg send_msg; int rv; unsigned char data[1]; - /* - * Configure IPMI address for local access - */ - smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; - smi_addr.channel = IPMI_BMC_CHANNEL; - smi_addr.lun = 0; + /* + * Configure IPMI address for local access + */ + smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + smi_addr.channel = IPMI_BMC_CHANNEL; + smi_addr.lun = 0; /* * Use get address info to check and see if we are ATCA @@ -238,28 +254,30 @@ static int ipmi_atca_detect (ipmi_user_t user) (struct ipmi_addr *) &smi_addr, &send_msg); - printk(KERN_INFO PFX "ATCA Detect mfg 0x%X prod 0x%X\n", mfg_id, prod_id); - if((mfg_id == IPMI_MOTOROLA_MANUFACTURER_ID) - && (prod_id == IPMI_MOTOROLA_PPS_IPMC_PRODUCT_ID)) { - printk(KERN_INFO PFX "Installing Pigeon Point Systems Poweroff Hook\n"); + printk(KERN_INFO PFX "ATCA Detect mfg 0x%X prod 0x%X\n", + mfg_id, prod_id); + if ((mfg_id == IPMI_MOTOROLA_MANUFACTURER_ID) + && (prod_id == IPMI_MOTOROLA_PPS_IPMC_PRODUCT_ID)) { + printk(KERN_INFO PFX + "Installing Pigeon Point Systems Poweroff Hook\n"); atca_oem_poweroff_hook = pps_poweroff_atca; } return !rv; } -static void ipmi_poweroff_atca (ipmi_user_t user) +static void ipmi_poweroff_atca(ipmi_user_t user) { struct ipmi_system_interface_addr smi_addr; struct kernel_ipmi_msg send_msg; int rv; unsigned char data[4]; - /* - * Configure IPMI address for local access - */ - smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; - smi_addr.channel = IPMI_BMC_CHANNEL; - smi_addr.lun = 0; + /* + * Configure IPMI address for local access + */ + smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + smi_addr.channel = IPMI_BMC_CHANNEL; + smi_addr.lun = 0; printk(KERN_INFO PFX "Powering down via ATCA power command\n"); @@ -273,23 +291,24 @@ static void ipmi_poweroff_atca (ipmi_user_t user) data[2] = 0; /* Power Level */ data[3] = 0; /* Don't change saved presets */ send_msg.data = data; - send_msg.data_len = sizeof (data); + send_msg.data_len = sizeof(data); rv = ipmi_request_in_rc_mode(user, (struct ipmi_addr *) &smi_addr, &send_msg); - /** At this point, the system may be shutting down, and most - ** serial drivers (if used) will have interrupts turned off - ** it may be better to ignore IPMI_UNKNOWN_ERR_COMPLETION_CODE - ** return code - **/ - if (rv && rv != IPMI_UNKNOWN_ERR_COMPLETION_CODE) { + /* + * At this point, the system may be shutting down, and most + * serial drivers (if used) will have interrupts turned off + * it may be better to ignore IPMI_UNKNOWN_ERR_COMPLETION_CODE + * return code + */ + if (rv && rv != IPMI_UNKNOWN_ERR_COMPLETION_CODE) { printk(KERN_ERR PFX "Unable to send ATCA powerdown message," " IPMI error 0x%x\n", rv); goto out; } - if(atca_oem_poweroff_hook) - return atca_oem_poweroff_hook(user); + if (atca_oem_poweroff_hook) + atca_oem_poweroff_hook(user); out: return; } @@ -310,13 +329,13 @@ static void ipmi_poweroff_atca (ipmi_user_t user) #define IPMI_CPI1_PRODUCT_ID 0x000157 #define IPMI_CPI1_MANUFACTURER_ID 0x0108 -static int ipmi_cpi1_detect (ipmi_user_t user) +static int ipmi_cpi1_detect(ipmi_user_t user) { return ((mfg_id == IPMI_CPI1_MANUFACTURER_ID) && (prod_id == IPMI_CPI1_PRODUCT_ID)); } -static void ipmi_poweroff_cpi1 (ipmi_user_t user) +static void ipmi_poweroff_cpi1(ipmi_user_t user) { struct ipmi_system_interface_addr smi_addr; struct ipmi_ipmb_addr ipmb_addr; @@ -328,12 +347,12 @@ static void ipmi_poweroff_cpi1 (ipmi_user_t user) unsigned char aer_addr; unsigned char aer_lun; - /* - * Configure IPMI address for local access - */ - smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; - smi_addr.channel = IPMI_BMC_CHANNEL; - smi_addr.lun = 0; + /* + * Configure IPMI address for local access + */ + smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + smi_addr.channel = IPMI_BMC_CHANNEL; + smi_addr.lun = 0; printk(KERN_INFO PFX "Powering down via CPI1 power command\n"); @@ -425,7 +444,7 @@ static void ipmi_poweroff_cpi1 (ipmi_user_t user) */ #define DELL_IANA_MFR_ID {0xA2, 0x02, 0x00} -static int ipmi_dell_chassis_detect (ipmi_user_t user) +static int ipmi_dell_chassis_detect(ipmi_user_t user) { const char ipmi_version_major = ipmi_version & 0xF; const char ipmi_version_minor = (ipmi_version >> 4) & 0xF; @@ -444,25 +463,25 @@ static int ipmi_dell_chassis_detect (ipmi_user_t user) #define IPMI_NETFN_CHASSIS_REQUEST 0 #define IPMI_CHASSIS_CONTROL_CMD 0x02 -static int ipmi_chassis_detect (ipmi_user_t user) +static int ipmi_chassis_detect(ipmi_user_t user) { /* Chassis support, use it. */ return (capabilities & 0x80); } -static void ipmi_poweroff_chassis (ipmi_user_t user) +static void ipmi_poweroff_chassis(ipmi_user_t user) { struct ipmi_system_interface_addr smi_addr; struct kernel_ipmi_msg send_msg; int rv; unsigned char data[1]; - /* - * Configure IPMI address for local access - */ - smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; - smi_addr.channel = IPMI_BMC_CHANNEL; - smi_addr.lun = 0; + /* + * Configure IPMI address for local access + */ + smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + smi_addr.channel = IPMI_BMC_CHANNEL; + smi_addr.lun = 0; powercyclefailed: printk(KERN_INFO PFX "Powering %s via IPMI chassis control command\n", @@ -525,15 +544,13 @@ static struct poweroff_function poweroff_functions[] = { /* Called on a powerdown request. */ -static void ipmi_poweroff_function (void) +static void ipmi_poweroff_function(void) { if (!ready) return; /* Use run-to-completion mode, since interrupts may be off. */ - ipmi_user_set_run_to_completion(ipmi_user, 1); specific_poweroff_func(ipmi_user); - ipmi_user_set_run_to_completion(ipmi_user, 0); } /* Wait for an IPMI interface to be installed, the first one installed @@ -561,13 +578,13 @@ static void ipmi_po_new_smi(int if_num, struct device *device) ipmi_ifnum = if_num; - /* - * Do a get device ide and store some results, since this is + /* + * Do a get device ide and store some results, since this is * used by several functions. - */ - smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; - smi_addr.channel = IPMI_BMC_CHANNEL; - smi_addr.lun = 0; + */ + smi_addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; + smi_addr.channel = IPMI_BMC_CHANNEL; + smi_addr.lun = 0; send_msg.netfn = IPMI_NETFN_APP_REQUEST; send_msg.cmd = IPMI_GET_DEVICE_ID_CMD; @@ -632,8 +649,7 @@ static void ipmi_po_smi_gone(int if_num) pm_power_off = old_poweroff_func; } -static struct ipmi_smi_watcher smi_watcher = -{ +static struct ipmi_smi_watcher smi_watcher = { .owner = THIS_MODULE, .new_smi = ipmi_po_new_smi, .smi_gone = ipmi_po_smi_gone @@ -675,12 +691,12 @@ static struct ctl_table_header *ipmi_table_header; /* * Startup and shutdown functions. */ -static int ipmi_poweroff_init (void) +static int ipmi_poweroff_init(void) { int rv; - printk (KERN_INFO "Copyright (C) 2004 MontaVista Software -" - " IPMI Powerdown via sys_reboot.\n"); + printk(KERN_INFO "Copyright (C) 2004 MontaVista Software -" + " IPMI Powerdown via sys_reboot.\n"); if (poweroff_powercycle) printk(KERN_INFO PFX "Power cycle is enabled.\n"); diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 4f560d0bb80..5a5455585c1 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -80,7 +80,7 @@ #define SI_USEC_PER_JIFFY (1000000/HZ) #define SI_TIMEOUT_JIFFIES (SI_TIMEOUT_TIME_USEC/SI_USEC_PER_JIFFY) #define SI_SHORT_TIMEOUT_USEC 250 /* .25ms when the SM request a - short timeout */ + short timeout */ /* Bit for BMC global enables. */ #define IPMI_BMC_RCV_MSG_INTR 0x01 @@ -114,14 +114,61 @@ static char *si_to_str[] = { "kcs", "smic", "bt" }; #define DEVICE_NAME "ipmi_si" -static struct device_driver ipmi_driver = -{ +static struct device_driver ipmi_driver = { .name = DEVICE_NAME, .bus = &platform_bus_type }; -struct smi_info -{ + +/* + * Indexes into stats[] in smi_info below. + */ +enum si_stat_indexes { + /* + * Number of times the driver requested a timer while an operation + * was in progress. + */ + SI_STAT_short_timeouts = 0, + + /* + * Number of times the driver requested a timer while nothing was in + * progress. + */ + SI_STAT_long_timeouts, + + /* Number of times the interface was idle while being polled. */ + SI_STAT_idles, + + /* Number of interrupts the driver handled. */ + SI_STAT_interrupts, + + /* Number of time the driver got an ATTN from the hardware. */ + SI_STAT_attentions, + + /* Number of times the driver requested flags from the hardware. */ + SI_STAT_flag_fetches, + + /* Number of times the hardware didn't follow the state machine. */ + SI_STAT_hosed_count, + + /* Number of completed messages. */ + SI_STAT_complete_transactions, + + /* Number of IPMI events received from the hardware. */ + SI_STAT_events, + + /* Number of watchdog pretimeouts. */ + SI_STAT_watchdog_pretimeouts, + + /* Number of asyncronous messages received. */ + SI_STAT_incoming_messages, + + + /* This *must* remain last, add new values above this. */ + SI_NUM_STATS +}; + +struct smi_info { int intf_num; ipmi_smi_t intf; struct si_sm_data *si_sm; @@ -134,8 +181,10 @@ struct smi_info struct ipmi_smi_msg *curr_msg; enum si_intf_state si_state; - /* Used to handle the various types of I/O that can occur with - IPMI */ + /* + * Used to handle the various types of I/O that can occur with + * IPMI + */ struct si_sm_io io; int (*io_setup)(struct smi_info *info); void (*io_cleanup)(struct smi_info *info); @@ -146,15 +195,18 @@ struct smi_info void (*addr_source_cleanup)(struct smi_info *info); void *addr_source_data; - /* Per-OEM handler, called from handle_flags(). - Returns 1 when handle_flags() needs to be re-run - or 0 indicating it set si_state itself. - */ + /* + * Per-OEM handler, called from handle_flags(). Returns 1 + * when handle_flags() needs to be re-run or 0 indicating it + * set si_state itself. + */ int (*oem_data_avail_handler)(struct smi_info *smi_info); - /* Flags from the last GET_MSG_FLAGS command, used when an ATTN - is set to hold the flags until we are done handling everything - from the flags. */ + /* + * Flags from the last GET_MSG_FLAGS command, used when an ATTN + * is set to hold the flags until we are done handling everything + * from the flags. + */ #define RECEIVE_MSG_AVAIL 0x01 #define EVENT_MSG_BUFFER_FULL 0x02 #define WDT_PRE_TIMEOUT_INT 0x08 @@ -162,25 +214,31 @@ struct smi_info #define OEM1_DATA_AVAIL 0x40 #define OEM2_DATA_AVAIL 0x80 #define OEM_DATA_AVAIL (OEM0_DATA_AVAIL | \ - OEM1_DATA_AVAIL | \ - OEM2_DATA_AVAIL) + OEM1_DATA_AVAIL | \ + OEM2_DATA_AVAIL) unsigned char msg_flags; - /* If set to true, this will request events the next time the - state machine is idle. */ + /* + * If set to true, this will request events the next time the + * state machine is idle. + */ atomic_t req_events; - /* If true, run the state machine to completion on every send - call. Generally used after a panic to make sure stuff goes - out. */ + /* + * If true, run the state machine to completion on every send + * call. Generally used after a panic to make sure stuff goes + * out. + */ int run_to_completion; /* The I/O port of an SI interface. */ int port; - /* The space between start addresses of the two ports. For - instance, if the first port is 0xca2 and the spacing is 4, then - the second port is 0xca6. */ + /* + * The space between start addresses of the two ports. For + * instance, if the first port is 0xca2 and the spacing is 4, then + * the second port is 0xca6. + */ unsigned int spacing; /* zero if no irq; */ @@ -195,10 +253,12 @@ struct smi_info /* Used to gracefully stop the timer without race conditions. */ atomic_t stop_operation; - /* The driver will disable interrupts when it gets into a - situation where it cannot handle messages due to lack of - memory. Once that situation clears up, it will re-enable - interrupts. */ + /* + * The driver will disable interrupts when it gets into a + * situation where it cannot handle messages due to lack of + * memory. Once that situation clears up, it will re-enable + * interrupts. + */ int interrupt_disabled; /* From the get device id response... */ @@ -208,33 +268,28 @@ struct smi_info struct device *dev; struct platform_device *pdev; - /* True if we allocated the device, false if it came from - * someplace else (like PCI). */ + /* + * True if we allocated the device, false if it came from + * someplace else (like PCI). + */ int dev_registered; /* Slave address, could be reported from DMI. */ unsigned char slave_addr; /* Counters and things for the proc filesystem. */ - spinlock_t count_lock; - unsigned long short_timeouts; - unsigned long long_timeouts; - unsigned long timeout_restarts; - unsigned long idles; - unsigned long interrupts; - unsigned long attentions; - unsigned long flag_fetches; - unsigned long hosed_count; - unsigned long complete_transactions; - unsigned long events; - unsigned long watchdog_pretimeouts; - unsigned long incoming_messages; - - struct task_struct *thread; + atomic_t stats[SI_NUM_STATS]; + + struct task_struct *thread; struct list_head link; }; +#define smi_inc_stat(smi, stat) \ + atomic_inc(&(smi)->stats[SI_STAT_ ## stat]) +#define smi_get_stat(smi, stat) \ + ((unsigned int) atomic_read(&(smi)->stats[SI_STAT_ ## stat])) + #define SI_MAX_PARMS 4 static int force_kipmid[SI_MAX_PARMS]; @@ -246,7 +301,7 @@ static int try_smi_init(struct smi_info *smi); static void cleanup_one_si(struct smi_info *to_clean); static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list); -static int register_xaction_notifier(struct notifier_block * nb) +static int register_xaction_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&xaction_notifier_list, nb); } @@ -255,7 +310,7 @@ static void deliver_recv_msg(struct smi_info *smi_info, struct ipmi_smi_msg *msg) { /* Deliver the message to the upper layer with the lock - released. */ + released. */ spin_unlock(&(smi_info->si_lock)); ipmi_smi_msg_received(smi_info->intf, msg); spin_lock(&(smi_info->si_lock)); @@ -287,9 +342,12 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) struct timeval t; #endif - /* No need to save flags, we aleady have interrupts off and we - already hold the SMI lock. */ - spin_lock(&(smi_info->msg_lock)); + /* + * No need to save flags, we aleady have interrupts off and we + * already hold the SMI lock. + */ + if (!smi_info->run_to_completion) + spin_lock(&(smi_info->msg_lock)); /* Pick the high priority queue first. */ if (!list_empty(&(smi_info->hp_xmit_msgs))) { @@ -310,7 +368,7 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) link); #ifdef DEBUG_TIMING do_gettimeofday(&t); - printk("**Start2: %d.%9.9d\n", t.tv_sec, t.tv_usec); + printk(KERN_DEBUG "**Start2: %d.%9.9d\n", t.tv_sec, t.tv_usec); #endif err = atomic_notifier_call_chain(&xaction_notifier_list, 0, smi_info); @@ -322,14 +380,14 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) smi_info->si_sm, smi_info->curr_msg->data, smi_info->curr_msg->data_size); - if (err) { + if (err) return_hosed_msg(smi_info, err); - } rv = SI_SM_CALL_WITHOUT_DELAY; } - out: - spin_unlock(&(smi_info->msg_lock)); + out: + if (!smi_info->run_to_completion) + spin_unlock(&(smi_info->msg_lock)); return rv; } @@ -338,8 +396,10 @@ static void start_enable_irq(struct smi_info *smi_info) { unsigned char msg[2]; - /* If we are enabling interrupts, we have to tell the - BMC to use them. */ + /* + * If we are enabling interrupts, we have to tell the + * BMC to use them. + */ msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; @@ -371,10 +431,12 @@ static void start_clear_flags(struct smi_info *smi_info) smi_info->si_state = SI_CLEARING_FLAGS; } -/* When we have a situtaion where we run out of memory and cannot - allocate messages, we just leave them in the BMC and run the system - polled until we can allocate some memory. Once we have some - memory, we will re-enable the interrupt. */ +/* + * When we have a situtaion where we run out of memory and cannot + * allocate messages, we just leave them in the BMC and run the system + * polled until we can allocate some memory. Once we have some + * memory, we will re-enable the interrupt. + */ static inline void disable_si_irq(struct smi_info *smi_info) { if ((smi_info->irq) && (!smi_info->interrupt_disabled)) { @@ -396,9 +458,7 @@ static void handle_flags(struct smi_info *smi_info) retry: if (smi_info->msg_flags & WDT_PRE_TIMEOUT_INT) { /* Watchdog pre-timeout */ - spin_lock(&smi_info->count_lock); - smi_info->watchdog_pretimeouts++; - spin_unlock(&smi_info->count_lock); + smi_inc_stat(smi_info, watchdog_pretimeouts); start_clear_flags(smi_info); smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT; @@ -444,12 +504,11 @@ static void handle_flags(struct smi_info *smi_info) smi_info->curr_msg->data_size); smi_info->si_state = SI_GETTING_EVENTS; } else if (smi_info->msg_flags & OEM_DATA_AVAIL && - smi_info->oem_data_avail_handler) { + smi_info->oem_data_avail_handler) { if (smi_info->oem_data_avail_handler(smi_info)) goto retry; - } else { + } else smi_info->si_state = SI_NORMAL; - } } static void handle_transaction_done(struct smi_info *smi_info) @@ -459,7 +518,7 @@ static void handle_transaction_done(struct smi_info *smi_info) struct timeval t; do_gettimeofday(&t); - printk("**Done: %d.%9.9d\n", t.tv_sec, t.tv_usec); + printk(KERN_DEBUG "**Done: %d.%9.9d\n", t.tv_sec, t.tv_usec); #endif switch (smi_info->si_state) { case SI_NORMAL: @@ -472,9 +531,11 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->curr_msg->rsp, IPMI_MAX_MSG_LENGTH); - /* Do this here becase deliver_recv_msg() releases the - lock, and a new message can be put in during the - time the lock is released. */ + /* + * Do this here becase deliver_recv_msg() releases the + * lock, and a new message can be put in during the + * time the lock is released. + */ msg = smi_info->curr_msg; smi_info->curr_msg = NULL; deliver_recv_msg(smi_info, msg); @@ -488,12 +549,13 @@ static void handle_transaction_done(struct smi_info *smi_info) /* We got the flags from the SMI, now handle them. */ len = smi_info->handlers->get_result(smi_info->si_sm, msg, 4); if (msg[2] != 0) { - /* Error fetching flags, just give up for - now. */ + /* Error fetching flags, just give up for now. */ smi_info->si_state = SI_NORMAL; } else if (len < 4) { - /* Hmm, no flags. That's technically illegal, but - don't use uninitialized data. */ + /* + * Hmm, no flags. That's technically illegal, but + * don't use uninitialized data. + */ smi_info->si_state = SI_NORMAL; } else { smi_info->msg_flags = msg[3]; @@ -530,9 +592,11 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->curr_msg->rsp, IPMI_MAX_MSG_LENGTH); - /* Do this here becase deliver_recv_msg() releases the - lock, and a new message can be put in during the - time the lock is released. */ + /* + * Do this here becase deliver_recv_msg() releases the + * lock, and a new message can be put in during the + * time the lock is released. + */ msg = smi_info->curr_msg; smi_info->curr_msg = NULL; if (msg->rsp[2] != 0) { @@ -543,14 +607,14 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->msg_flags &= ~EVENT_MSG_BUFFER_FULL; handle_flags(smi_info); } else { - spin_lock(&smi_info->count_lock); - smi_info->events++; - spin_unlock(&smi_info->count_lock); - - /* Do this before we deliver the message - because delivering the message releases the - lock and something else can mess with the - state. */ + smi_inc_stat(smi_info, events); + + /* + * Do this before we deliver the message + * because delivering the message releases the + * lock and something else can mess with the + * state. + */ handle_flags(smi_info); deliver_recv_msg(smi_info, msg); @@ -566,9 +630,11 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->curr_msg->rsp, IPMI_MAX_MSG_LENGTH); - /* Do this here becase deliver_recv_msg() releases the - lock, and a new message can be put in during the - time the lock is released. */ + /* + * Do this here becase deliver_recv_msg() releases the + * lock, and a new message can be put in during the + * time the lock is released. + */ msg = smi_info->curr_msg; smi_info->curr_msg = NULL; if (msg->rsp[2] != 0) { @@ -579,14 +645,14 @@ static void handle_transaction_done(struct smi_info *smi_info) smi_info->msg_flags &= ~RECEIVE_MSG_AVAIL; handle_flags(smi_info); } else { - spin_lock(&smi_info->count_lock); - smi_info->incoming_messages++; - spin_unlock(&smi_info->count_lock); - - /* Do this before we deliver the message - because delivering the message releases the - lock and something else can mess with the - state. */ + smi_inc_stat(smi_info, incoming_messages); + + /* + * Do this before we deliver the message + * because delivering the message releases the + * lock and something else can mess with the + * state. + */ handle_flags(smi_info); deliver_recv_msg(smi_info, msg); @@ -674,69 +740,70 @@ static void handle_transaction_done(struct smi_info *smi_info) } } -/* Called on timeouts and events. Timeouts should pass the elapsed - time, interrupts should pass in zero. Must be called with - si_lock held and interrupts disabled. */ +/* + * Called on timeouts and events. Timeouts should pass the elapsed + * time, interrupts should pass in zero. Must be called with + * si_lock held and interrupts disabled. + */ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, int time) { enum si_sm_result si_sm_result; restart: - /* There used to be a loop here that waited a little while - (around 25us) before giving up. That turned out to be - pointless, the minimum delays I was seeing were in the 300us - range, which is far too long to wait in an interrupt. So - we just run until the state machine tells us something - happened or it needs a delay. */ + /* + * There used to be a loop here that waited a little while + * (around 25us) before giving up. That turned out to be + * pointless, the minimum delays I was seeing were in the 300us + * range, which is far too long to wait in an interrupt. So + * we just run until the state machine tells us something + * happened or it needs a delay. + */ si_sm_result = smi_info->handlers->event(smi_info->si_sm, time); time = 0; while (si_sm_result == SI_SM_CALL_WITHOUT_DELAY) - { si_sm_result = smi_info->handlers->event(smi_info->si_sm, 0); - } - if (si_sm_result == SI_SM_TRANSACTION_COMPLETE) - { - spin_lock(&smi_info->count_lock); - smi_info->complete_transactions++; - spin_unlock(&smi_info->count_lock); + if (si_sm_result == SI_SM_TRANSACTION_COMPLETE) { + smi_inc_stat(smi_info, complete_transactions); handle_transaction_done(smi_info); si_sm_result = smi_info->handlers->event(smi_info->si_sm, 0); - } - else if (si_sm_result == SI_SM_HOSED) - { - spin_lock(&smi_info->count_lock); - smi_info->hosed_count++; - spin_unlock(&smi_info->count_lock); + } else if (si_sm_result == SI_SM_HOSED) { + smi_inc_stat(smi_info, hosed_count); - /* Do the before return_hosed_msg, because that - releases the lock. */ + /* + * Do the before return_hosed_msg, because that + * releases the lock. + */ smi_info->si_state = SI_NORMAL; if (smi_info->curr_msg != NULL) { - /* If we were handling a user message, format - a response to send to the upper layer to - tell it about the error. */ + /* + * If we were handling a user message, format + * a response to send to the upper layer to + * tell it about the error. + */ return_hosed_msg(smi_info, IPMI_ERR_UNSPECIFIED); } si_sm_result = smi_info->handlers->event(smi_info->si_sm, 0); } - /* We prefer handling attn over new messages. */ - if (si_sm_result == SI_SM_ATTN) - { + /* + * We prefer handling attn over new messages. But don't do + * this if there is not yet an upper layer to handle anything. + */ + if (likely(smi_info->intf) && si_sm_result == SI_SM_ATTN) { unsigned char msg[2]; - spin_lock(&smi_info->count_lock); - smi_info->attentions++; - spin_unlock(&smi_info->count_lock); + smi_inc_stat(smi_info, attentions); - /* Got a attn, send down a get message flags to see - what's causing it. It would be better to handle - this in the upper layer, but due to the way - interrupts work with the SMI, that's not really - possible. */ + /* + * Got a attn, send down a get message flags to see + * what's causing it. It would be better to handle + * this in the upper layer, but due to the way + * interrupts work with the SMI, that's not really + * possible. + */ msg[0] = (IPMI_NETFN_APP_REQUEST << 2); msg[1] = IPMI_GET_MSG_FLAGS_CMD; @@ -748,20 +815,19 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, /* If we are currently idle, try to start the next message. */ if (si_sm_result == SI_SM_IDLE) { - spin_lock(&smi_info->count_lock); - smi_info->idles++; - spin_unlock(&smi_info->count_lock); + smi_inc_stat(smi_info, idles); si_sm_result = start_next_msg(smi_info); if (si_sm_result != SI_SM_IDLE) goto restart; - } + } if ((si_sm_result == SI_SM_IDLE) - && (atomic_read(&smi_info->req_events))) - { - /* We are idle and the upper layer requested that I fetch - events, so do so. */ + && (atomic_read(&smi_info->req_events))) { + /* + * We are idle and the upper layer requested that I fetch + * events, so do so. + */ atomic_set(&smi_info->req_events, 0); smi_info->curr_msg = ipmi_alloc_smi_msg(); @@ -803,56 +869,50 @@ static void sender(void *send_info, return; } - spin_lock_irqsave(&(smi_info->msg_lock), flags); #ifdef DEBUG_TIMING do_gettimeofday(&t); printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec); #endif if (smi_info->run_to_completion) { - /* If we are running to completion, then throw it in - the list and run transactions until everything is - clear. Priority doesn't matter here. */ + /* + * If we are running to completion, then throw it in + * the list and run transactions until everything is + * clear. Priority doesn't matter here. + */ + + /* + * Run to completion means we are single-threaded, no + * need for locks. + */ list_add_tail(&(msg->link), &(smi_info->xmit_msgs)); - /* We have to release the msg lock and claim the smi - lock in this case, because of race conditions. */ - spin_unlock_irqrestore(&(smi_info->msg_lock), flags); - - spin_lock_irqsave(&(smi_info->si_lock), flags); result = smi_event_handler(smi_info, 0); while (result != SI_SM_IDLE) { udelay(SI_SHORT_TIMEOUT_USEC); result = smi_event_handler(smi_info, SI_SHORT_TIMEOUT_USEC); } - spin_unlock_irqrestore(&(smi_info->si_lock), flags); return; - } else { - if (priority > 0) { - list_add_tail(&(msg->link), &(smi_info->hp_xmit_msgs)); - } else { - list_add_tail(&(msg->link), &(smi_info->xmit_msgs)); - } } - spin_unlock_irqrestore(&(smi_info->msg_lock), flags); - spin_lock_irqsave(&(smi_info->si_lock), flags); - if ((smi_info->si_state == SI_NORMAL) - && (smi_info->curr_msg == NULL)) - { + spin_lock_irqsave(&smi_info->msg_lock, flags); + if (priority > 0) + list_add_tail(&msg->link, &smi_info->hp_xmit_msgs); + else + list_add_tail(&msg->link, &smi_info->xmit_msgs); + spin_unlock_irqrestore(&smi_info->msg_lock, flags); + + spin_lock_irqsave(&smi_info->si_lock, flags); + if (smi_info->si_state == SI_NORMAL && smi_info->curr_msg == NULL) start_next_msg(smi_info); - } - spin_unlock_irqrestore(&(smi_info->si_lock), flags); + spin_unlock_irqrestore(&smi_info->si_lock, flags); } static void set_run_to_completion(void *send_info, int i_run_to_completion) { struct smi_info *smi_info = send_info; enum si_sm_result result; - unsigned long flags; - - spin_lock_irqsave(&(smi_info->si_lock), flags); smi_info->run_to_completion = i_run_to_completion; if (i_run_to_completion) { @@ -863,8 +923,6 @@ static void set_run_to_completion(void *send_info, int i_run_to_completion) SI_SHORT_TIMEOUT_USEC); } } - - spin_unlock_irqrestore(&(smi_info->si_lock), flags); } static int ipmi_thread(void *data) @@ -878,9 +936,8 @@ static int ipmi_thread(void *data) spin_lock_irqsave(&(smi_info->si_lock), flags); smi_result = smi_event_handler(smi_info, 0); spin_unlock_irqrestore(&(smi_info->si_lock), flags); - if (smi_result == SI_SM_CALL_WITHOUT_DELAY) { - /* do nothing */ - } + if (smi_result == SI_SM_CALL_WITHOUT_DELAY) + ; /* do nothing */ else if (smi_result == SI_SM_CALL_WITH_DELAY) schedule(); else @@ -931,7 +988,7 @@ static void smi_timeout(unsigned long data) spin_lock_irqsave(&(smi_info->si_lock), flags); #ifdef DEBUG_TIMING do_gettimeofday(&t); - printk("**Timer: %d.%9.9d\n", t.tv_sec, t.tv_usec); + printk(KERN_DEBUG "**Timer: %d.%9.9d\n", t.tv_sec, t.tv_usec); #endif jiffies_now = jiffies; time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies) @@ -945,23 +1002,19 @@ static void smi_timeout(unsigned long data) if ((smi_info->irq) && (!smi_info->interrupt_disabled)) { /* Running with interrupts, only do long timeouts. */ smi_info->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES; - spin_lock_irqsave(&smi_info->count_lock, flags); - smi_info->long_timeouts++; - spin_unlock_irqrestore(&smi_info->count_lock, flags); + smi_inc_stat(smi_info, long_timeouts); goto do_add_timer; } - /* If the state machine asks for a short delay, then shorten - the timer timeout. */ + /* + * If the state machine asks for a short delay, then shorten + * the timer timeout. + */ if (smi_result == SI_SM_CALL_WITH_DELAY) { - spin_lock_irqsave(&smi_info->count_lock, flags); - smi_info->short_timeouts++; - spin_unlock_irqrestore(&smi_info->count_lock, flags); + smi_inc_stat(smi_info, short_timeouts); smi_info->si_timer.expires = jiffies + 1; } else { - spin_lock_irqsave(&smi_info->count_lock, flags); - smi_info->long_timeouts++; - spin_unlock_irqrestore(&smi_info->count_lock, flags); + smi_inc_stat(smi_info, long_timeouts); smi_info->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES; } @@ -979,13 +1032,11 @@ static irqreturn_t si_irq_handler(int irq, void *data) spin_lock_irqsave(&(smi_info->si_lock), flags); - spin_lock(&smi_info->count_lock); - smi_info->interrupts++; - spin_unlock(&smi_info->count_lock); + smi_inc_stat(smi_info, interrupts); #ifdef DEBUG_TIMING do_gettimeofday(&t); - printk("**Interrupt: %d.%9.9d\n", t.tv_sec, t.tv_usec); + printk(KERN_DEBUG "**Interrupt: %d.%9.9d\n", t.tv_sec, t.tv_usec); #endif smi_event_handler(smi_info, 0); spin_unlock_irqrestore(&(smi_info->si_lock), flags); @@ -1028,7 +1079,7 @@ static int smi_start_processing(void *send_info, * The BT interface is efficient enough to not need a thread, * and there is no need for a thread if we have interrupts. */ - else if ((new_smi->si_type != SI_BT) && (!new_smi->irq)) + else if ((new_smi->si_type != SI_BT) && (!new_smi->irq)) enable = 1; if (enable) { @@ -1054,8 +1105,7 @@ static void set_maintenance_mode(void *send_info, int enable) atomic_set(&smi_info->req_events, 0); } -static struct ipmi_smi_handlers handlers = -{ +static struct ipmi_smi_handlers handlers = { .owner = THIS_MODULE, .start_processing = smi_start_processing, .sender = sender, @@ -1065,8 +1115,10 @@ static struct ipmi_smi_handlers handlers = .poll = poll, }; -/* There can be 4 IO ports passed in (with or without IRQs), 4 addresses, - a default IO port, and 1 ACPI/SPMI address. That sets SI_MAX_DRIVERS */ +/* + * There can be 4 IO ports passed in (with or without IRQs), 4 addresses, + * a default IO port, and 1 ACPI/SPMI address. That sets SI_MAX_DRIVERS. + */ static LIST_HEAD(smi_infos); static DEFINE_MUTEX(smi_infos_lock); @@ -1257,10 +1309,9 @@ static void port_cleanup(struct smi_info *info) int idx; if (addr) { - for (idx = 0; idx < info->io_size; idx++) { + for (idx = 0; idx < info->io_size; idx++) release_region(addr + idx * info->io.regspacing, info->io.regsize); - } } } @@ -1274,8 +1325,10 @@ static int port_setup(struct smi_info *info) info->io_cleanup = port_cleanup; - /* Figure out the actual inb/inw/inl/etc routine to use based - upon the register size. */ + /* + * Figure out the actual inb/inw/inl/etc routine to use based + * upon the register size. + */ switch (info->io.regsize) { case 1: info->io.inputb = port_inb; @@ -1290,17 +1343,18 @@ static int port_setup(struct smi_info *info) info->io.outputb = port_outl; break; default: - printk("ipmi_si: Invalid register size: %d\n", + printk(KERN_WARNING "ipmi_si: Invalid register size: %d\n", info->io.regsize); return -EINVAL; } - /* Some BIOSes reserve disjoint I/O regions in their ACPI + /* + * Some BIOSes reserve disjoint I/O regions in their ACPI * tables. This causes problems when trying to register the * entire I/O region. Therefore we must register each I/O * port separately. */ - for (idx = 0; idx < info->io_size; idx++) { + for (idx = 0; idx < info->io_size; idx++) { if (request_region(addr + idx * info->io.regspacing, info->io.regsize, DEVICE_NAME) == NULL) { /* Undo allocations */ @@ -1388,8 +1442,10 @@ static int mem_setup(struct smi_info *info) info->io_cleanup = mem_cleanup; - /* Figure out the actual readb/readw/readl/etc routine to use based - upon the register size. */ + /* + * Figure out the actual readb/readw/readl/etc routine to use based + * upon the register size. + */ switch (info->io.regsize) { case 1: info->io.inputb = intf_mem_inb; @@ -1410,16 +1466,18 @@ static int mem_setup(struct smi_info *info) break; #endif default: - printk("ipmi_si: Invalid register size: %d\n", + printk(KERN_WARNING "ipmi_si: Invalid register size: %d\n", info->io.regsize); return -EINVAL; } - /* Calculate the total amount of memory to claim. This is an + /* + * Calculate the total amount of memory to claim. This is an * unusual looking calculation, but it avoids claiming any * more memory than it has to. It will claim everything * between the first address to the end of the last full - * register. */ + * register. + */ mapsize = ((info->io_size * info->io.regspacing) - (info->io.regspacing - info->io.regsize)); @@ -1749,9 +1807,11 @@ static __devinit void hardcode_find_bmc(void) #include <linux/acpi.h> -/* Once we get an ACPI failure, we don't try any more, because we go - through the tables sequentially. Once we don't find a table, there - are no more. */ +/* + * Once we get an ACPI failure, we don't try any more, because we go + * through the tables sequentially. Once we don't find a table, there + * are no more. + */ static int acpi_failure; /* For GPE-type interrupts. */ @@ -1765,9 +1825,7 @@ static u32 ipmi_acpi_gpe(void *context) spin_lock_irqsave(&(smi_info->si_lock), flags); - spin_lock(&smi_info->count_lock); - smi_info->interrupts++; - spin_unlock(&smi_info->count_lock); + smi_inc_stat(smi_info, interrupts); #ifdef DEBUG_TIMING do_gettimeofday(&t); @@ -1816,7 +1874,8 @@ static int acpi_gpe_irq_setup(struct smi_info *info) /* * Defined at - * http://h21007.www2.hp.com/dspp/files/unprotected/devresource/Docs/TechPapers/IA64/hpspmi.pdf + * http://h21007.www2.hp.com/dspp/files/unprotected/devresource/ + * Docs/TechPapers/IA64/hpspmi.pdf */ struct SPMITable { s8 Signature[4]; @@ -1838,14 +1897,18 @@ struct SPMITable { */ u8 InterruptType; - /* If bit 0 of InterruptType is set, then this is the SCI - interrupt in the GPEx_STS register. */ + /* + * If bit 0 of InterruptType is set, then this is the SCI + * interrupt in the GPEx_STS register. + */ u8 GPE; s16 Reserved; - /* If bit 1 of InterruptType is set, then this is the I/O - APIC/SAPIC interrupt. */ + /* + * If bit 1 of InterruptType is set, then this is the I/O + * APIC/SAPIC interrupt. + */ u32 GlobalSystemInterrupt; /* The actual register address. */ @@ -1863,7 +1926,7 @@ static __devinit int try_init_acpi(struct SPMITable *spmi) if (spmi->IPMIlegacy != 1) { printk(KERN_INFO "IPMI: Bad SPMI legacy %d\n", spmi->IPMIlegacy); - return -ENODEV; + return -ENODEV; } if (spmi->addr.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) @@ -1880,8 +1943,7 @@ static __devinit int try_init_acpi(struct SPMITable *spmi) info->addr_source = "ACPI"; /* Figure out the interface type. */ - switch (spmi->InterfaceType) - { + switch (spmi->InterfaceType) { case 1: /* KCS */ info->si_type = SI_KCS; break; @@ -1929,7 +1991,8 @@ static __devinit int try_init_acpi(struct SPMITable *spmi) info->io.addr_type = IPMI_IO_ADDR_SPACE; } else { kfree(info); - printk("ipmi_si: Unknown ACPI I/O Address type\n"); + printk(KERN_WARNING + "ipmi_si: Unknown ACPI I/O Address type\n"); return -EIO; } info->io.addr_data = spmi->addr.address; @@ -1963,8 +2026,7 @@ static __devinit void acpi_find_bmc(void) #endif #ifdef CONFIG_DMI -struct dmi_ipmi_data -{ +struct dmi_ipmi_data { u8 type; u8 addr_space; unsigned long base_addr; @@ -1989,11 +2051,10 @@ static int __devinit decode_dmi(const struct dmi_header *dm, /* I/O */ base_addr &= 0xFFFE; dmi->addr_space = IPMI_IO_ADDR_SPACE; - } - else { + } else /* Memory */ dmi->addr_space = IPMI_MEM_ADDR_SPACE; - } + /* If bit 4 of byte 0x10 is set, then the lsb for the address is odd. */ dmi->base_addr = base_addr | ((data[0x10] & 0x10) >> 4); @@ -2002,7 +2063,7 @@ static int __devinit decode_dmi(const struct dmi_header *dm, /* The top two bits of byte 0x10 hold the register spacing. */ reg_spacing = (data[0x10] & 0xC0) >> 6; - switch(reg_spacing){ + switch (reg_spacing) { case 0x00: /* Byte boundaries */ dmi->offset = 1; break; @@ -2018,12 +2079,14 @@ static int __devinit decode_dmi(const struct dmi_header *dm, } } else { /* Old DMI spec. */ - /* Note that technically, the lower bit of the base + /* + * Note that technically, the lower bit of the base * address should be 1 if the address is I/O and 0 if * the address is in memory. So many systems get that * wrong (and all that I have seen are I/O) so we just * ignore that bit and assume I/O. Systems that use - * memory should use the newer spec, anyway. */ + * memory should use the newer spec, anyway. + */ dmi->base_addr = base_addr & 0xfffe; dmi->addr_space = IPMI_IO_ADDR_SPACE; dmi->offset = 1; @@ -2230,13 +2293,13 @@ static struct pci_device_id ipmi_pci_devices[] = { MODULE_DEVICE_TABLE(pci, ipmi_pci_devices); static struct pci_driver ipmi_pci_driver = { - .name = DEVICE_NAME, - .id_table = ipmi_pci_devices, - .probe = ipmi_pci_probe, - .remove = __devexit_p(ipmi_pci_remove), + .name = DEVICE_NAME, + .id_table = ipmi_pci_devices, + .probe = ipmi_pci_probe, + .remove = __devexit_p(ipmi_pci_remove), #ifdef CONFIG_PM - .suspend = ipmi_pci_suspend, - .resume = ipmi_pci_resume, + .suspend = ipmi_pci_suspend, + .resume = ipmi_pci_resume, #endif }; #endif /* CONFIG_PCI */ @@ -2306,7 +2369,7 @@ static int __devinit ipmi_of_probe(struct of_device *dev, info->io.addr_data, info->io.regsize, info->io.regspacing, info->irq); - dev->dev.driver_data = (void*) info; + dev->dev.driver_data = (void *) info; return try_smi_init(info); } @@ -2319,14 +2382,16 @@ static int __devexit ipmi_of_remove(struct of_device *dev) static struct of_device_id ipmi_match[] = { - { .type = "ipmi", .compatible = "ipmi-kcs", .data = (void *)(unsigned long) SI_KCS }, - { .type = "ipmi", .compatible = "ipmi-smic", .data = (void *)(unsigned long) SI_SMIC }, - { .type = "ipmi", .compatible = "ipmi-bt", .data = (void *)(unsigned long) SI_BT }, + { .type = "ipmi", .compatible = "ipmi-kcs", + .data = (void *)(unsigned long) SI_KCS }, + { .type = "ipmi", .compatible = "ipmi-smic", + .data = (void *)(unsigned long) SI_SMIC }, + { .type = "ipmi", .compatible = "ipmi-bt", + .data = (void *)(unsigned long) SI_BT }, {}, }; -static struct of_platform_driver ipmi_of_platform_driver = -{ +static struct of_platform_driver ipmi_of_platform_driver = { .name = "ipmi", .match_table = ipmi_match, .probe = ipmi_of_probe, @@ -2347,32 +2412,32 @@ static int try_get_dev_id(struct smi_info *smi_info) if (!resp) return -ENOMEM; - /* Do a Get Device ID command, since it comes back with some - useful info. */ + /* + * Do a Get Device ID command, since it comes back with some + * useful info. + */ msg[0] = IPMI_NETFN_APP_REQUEST << 2; msg[1] = IPMI_GET_DEVICE_ID_CMD; smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); smi_result = smi_info->handlers->event(smi_info->si_sm, 0); - for (;;) - { + for (;;) { if (smi_result == SI_SM_CALL_WITH_DELAY || smi_result == SI_SM_CALL_WITH_TICK_DELAY) { schedule_timeout_uninterruptible(1); smi_result = smi_info->handlers->event( smi_info->si_sm, 100); - } - else if (smi_result == SI_SM_CALL_WITHOUT_DELAY) - { + } else if (smi_result == SI_SM_CALL_WITHOUT_DELAY) { smi_result = smi_info->handlers->event( smi_info->si_sm, 0); - } - else + } else break; } if (smi_result == SI_SM_HOSED) { - /* We couldn't get the state machine to run, so whatever's at - the port is probably not an IPMI SMI interface. */ + /* + * We couldn't get the state machine to run, so whatever's at + * the port is probably not an IPMI SMI interface. + */ rv = -ENODEV; goto out; } @@ -2405,30 +2470,28 @@ static int stat_file_read_proc(char *page, char **start, off_t off, out += sprintf(out, "interrupts_enabled: %d\n", smi->irq && !smi->interrupt_disabled); - out += sprintf(out, "short_timeouts: %ld\n", - smi->short_timeouts); - out += sprintf(out, "long_timeouts: %ld\n", - smi->long_timeouts); - out += sprintf(out, "timeout_restarts: %ld\n", - smi->timeout_restarts); - out += sprintf(out, "idles: %ld\n", - smi->idles); - out += sprintf(out, "interrupts: %ld\n", - smi->interrupts); - out += sprintf(out, "attentions: %ld\n", - smi->attentions); - out += sprintf(out, "flag_fetches: %ld\n", - smi->flag_fetches); - out += sprintf(out, "hosed_count: %ld\n", - smi->hosed_count); - out += sprintf(out, "complete_transactions: %ld\n", - smi->complete_transactions); - out += sprintf(out, "events: %ld\n", - smi->events); - out += sprintf(out, "watchdog_pretimeouts: %ld\n", - smi->watchdog_pretimeouts); - out += sprintf(out, "incoming_messages: %ld\n", - smi->incoming_messages); + out += sprintf(out, "short_timeouts: %u\n", + smi_get_stat(smi, short_timeouts)); + out += sprintf(out, "long_timeouts: %u\n", + smi_get_stat(smi, long_timeouts)); + out += sprintf(out, "idles: %u\n", + smi_get_stat(smi, idles)); + out += sprintf(out, "interrupts: %u\n", + smi_get_stat(smi, interrupts)); + out += sprintf(out, "attentions: %u\n", + smi_get_stat(smi, attentions)); + out += sprintf(out, "flag_fetches: %u\n", + smi_get_stat(smi, flag_fetches)); + out += sprintf(out, "hosed_count: %u\n", + smi_get_stat(smi, hosed_count)); + out += sprintf(out, "complete_transactions: %u\n", + smi_get_stat(smi, complete_transactions)); + out += sprintf(out, "events: %u\n", + smi_get_stat(smi, events)); + out += sprintf(out, "watchdog_pretimeouts: %u\n", + smi_get_stat(smi, watchdog_pretimeouts)); + out += sprintf(out, "incoming_messages: %u\n", + smi_get_stat(smi, incoming_messages)); return out - page; } @@ -2460,7 +2523,7 @@ static int param_read_proc(char *page, char **start, off_t off, static int oem_data_avail_to_receive_msg_avail(struct smi_info *smi_info) { smi_info->msg_flags = ((smi_info->msg_flags & ~OEM_DATA_AVAIL) | - RECEIVE_MSG_AVAIL); + RECEIVE_MSG_AVAIL); return 1; } @@ -2502,10 +2565,9 @@ static void setup_dell_poweredge_oem_data_handler(struct smi_info *smi_info) id->ipmi_version == DELL_POWEREDGE_8G_BMC_IPMI_VERSION) { smi_info->oem_data_avail_handler = oem_data_avail_to_receive_msg_avail; - } - else if (ipmi_version_major(id) < 1 || - (ipmi_version_major(id) == 1 && - ipmi_version_minor(id) < 5)) { + } else if (ipmi_version_major(id) < 1 || + (ipmi_version_major(id) == 1 && + ipmi_version_minor(id) < 5)) { smi_info->oem_data_avail_handler = oem_data_avail_to_receive_msg_avail; } @@ -2597,8 +2659,10 @@ static void setup_xaction_handlers(struct smi_info *smi_info) static inline void wait_for_timer_and_thread(struct smi_info *smi_info) { if (smi_info->intf) { - /* The timer and thread are only running if the - interface has been started up and registered. */ + /* + * The timer and thread are only running if the + * interface has been started up and registered. + */ if (smi_info->thread != NULL) kthread_stop(smi_info->thread); del_timer_sync(&smi_info->si_timer); @@ -2676,6 +2740,7 @@ static int is_new_interface(struct smi_info *info) static int try_smi_init(struct smi_info *new_smi) { int rv; + int i; if (new_smi->addr_source) { printk(KERN_INFO "ipmi_si: Trying %s-specified %s state" @@ -2722,7 +2787,7 @@ static int try_smi_init(struct smi_info *new_smi) /* Allocate the state machine's data and initialize it. */ new_smi->si_sm = kmalloc(new_smi->handlers->size(), GFP_KERNEL); if (!new_smi->si_sm) { - printk(" Could not allocate state machine memory\n"); + printk(KERN_ERR "Could not allocate state machine memory\n"); rv = -ENOMEM; goto out_err; } @@ -2732,13 +2797,12 @@ static int try_smi_init(struct smi_info *new_smi) /* Now that we know the I/O size, we can set up the I/O. */ rv = new_smi->io_setup(new_smi); if (rv) { - printk(" Could not set up I/O space\n"); + printk(KERN_ERR "Could not set up I/O space\n"); goto out_err; } spin_lock_init(&(new_smi->si_lock)); spin_lock_init(&(new_smi->msg_lock)); - spin_lock_init(&(new_smi->count_lock)); /* Do low-level detection first. */ if (new_smi->handlers->detect(new_smi->si_sm)) { @@ -2749,8 +2813,10 @@ static int try_smi_init(struct smi_info *new_smi) goto out_err; } - /* Attempt a get device id command. If it fails, we probably - don't have a BMC here. */ + /* + * Attempt a get device id command. If it fails, we probably + * don't have a BMC here. + */ rv = try_get_dev_id(new_smi); if (rv) { if (new_smi->addr_source) @@ -2767,22 +2833,28 @@ static int try_smi_init(struct smi_info *new_smi) new_smi->curr_msg = NULL; atomic_set(&new_smi->req_events, 0); new_smi->run_to_completion = 0; + for (i = 0; i < SI_NUM_STATS; i++) + atomic_set(&new_smi->stats[i], 0); new_smi->interrupt_disabled = 0; atomic_set(&new_smi->stop_operation, 0); new_smi->intf_num = smi_num; smi_num++; - /* Start clearing the flags before we enable interrupts or the - timer to avoid racing with the timer. */ + /* + * Start clearing the flags before we enable interrupts or the + * timer to avoid racing with the timer. + */ start_clear_flags(new_smi); /* IRQ is defined to be set when non-zero. */ if (new_smi->irq) new_smi->si_state = SI_CLEARING_FLAGS_THEN_SET_IRQ; if (!new_smi->dev) { - /* If we don't already have a device from something - * else (like PCI), then register a new one. */ + /* + * If we don't already have a device from something + * else (like PCI), then register a new one. + */ new_smi->pdev = platform_device_alloc("ipmi_si", new_smi->intf_num); if (rv) { @@ -2820,7 +2892,7 @@ static int try_smi_init(struct smi_info *new_smi) } rv = ipmi_smi_add_proc_entry(new_smi->intf, "type", - type_file_read_proc, NULL, + type_file_read_proc, new_smi, THIS_MODULE); if (rv) { printk(KERN_ERR @@ -2830,7 +2902,7 @@ static int try_smi_init(struct smi_info *new_smi) } rv = ipmi_smi_add_proc_entry(new_smi->intf, "si_stats", - stat_file_read_proc, NULL, + stat_file_read_proc, new_smi, THIS_MODULE); if (rv) { printk(KERN_ERR @@ -2840,7 +2912,7 @@ static int try_smi_init(struct smi_info *new_smi) } rv = ipmi_smi_add_proc_entry(new_smi->intf, "params", - param_read_proc, NULL, + param_read_proc, new_smi, THIS_MODULE); if (rv) { printk(KERN_ERR @@ -2853,7 +2925,8 @@ static int try_smi_init(struct smi_info *new_smi) mutex_unlock(&smi_infos_lock); - printk(KERN_INFO "IPMI %s interface initialized\n",si_to_str[new_smi->si_type]); + printk(KERN_INFO "IPMI %s interface initialized\n", + si_to_str[new_smi->si_type]); return 0; @@ -2868,9 +2941,11 @@ static int try_smi_init(struct smi_info *new_smi) if (new_smi->irq_cleanup) new_smi->irq_cleanup(new_smi); - /* Wait until we know that we are out of any interrupt - handlers might have been running before we freed the - interrupt. */ + /* + * Wait until we know that we are out of any interrupt + * handlers might have been running before we freed the + * interrupt. + */ synchronize_sched(); if (new_smi->si_sm) { @@ -2942,11 +3017,10 @@ static __devinit int init_ipmi_si(void) #ifdef CONFIG_PCI rv = pci_register_driver(&ipmi_pci_driver); - if (rv){ + if (rv) printk(KERN_ERR "init_ipmi_si: Unable to register PCI driver: %d\n", rv); - } #endif #ifdef CONFIG_PPC_OF @@ -2975,7 +3049,8 @@ static __devinit int init_ipmi_si(void) of_unregister_platform_driver(&ipmi_of_platform_driver); #endif driver_unregister(&ipmi_driver); - printk("ipmi_si: Unable to find any System Interface(s)\n"); + printk(KERN_WARNING + "ipmi_si: Unable to find any System Interface(s)\n"); return -ENODEV; } else { mutex_unlock(&smi_infos_lock); @@ -2997,13 +3072,17 @@ static void cleanup_one_si(struct smi_info *to_clean) /* Tell the driver that we are shutting down. */ atomic_inc(&to_clean->stop_operation); - /* Make sure the timer and thread are stopped and will not run - again. */ + /* + * Make sure the timer and thread are stopped and will not run + * again. + */ wait_for_timer_and_thread(to_clean); - /* Timeouts are stopped, now make sure the interrupts are off - for the device. A little tricky with locks to make sure - there are no races. */ + /* + * Timeouts are stopped, now make sure the interrupts are off + * for the device. A little tricky with locks to make sure + * there are no races. + */ spin_lock_irqsave(&to_clean->si_lock, flags); while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) { spin_unlock_irqrestore(&to_clean->si_lock, flags); @@ -3074,4 +3153,5 @@ module_exit(cleanup_ipmi_si); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Corey Minyard <minyard@mvista.com>"); -MODULE_DESCRIPTION("Interface to the IPMI driver for the KCS, SMIC, and BT system interfaces."); +MODULE_DESCRIPTION("Interface to the IPMI driver for the KCS, SMIC, and BT" + " system interfaces."); diff --git a/drivers/char/ipmi/ipmi_si_sm.h b/drivers/char/ipmi/ipmi_si_sm.h index 4b731b24dc1..df89f73475f 100644 --- a/drivers/char/ipmi/ipmi_si_sm.h +++ b/drivers/char/ipmi/ipmi_si_sm.h @@ -34,22 +34,27 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* This is defined by the state machines themselves, it is an opaque - data type for them to use. */ +/* + * This is defined by the state machines themselves, it is an opaque + * data type for them to use. + */ struct si_sm_data; -/* The structure for doing I/O in the state machine. The state - machine doesn't have the actual I/O routines, they are done through - this interface. */ -struct si_sm_io -{ +/* + * The structure for doing I/O in the state machine. The state + * machine doesn't have the actual I/O routines, they are done through + * this interface. + */ +struct si_sm_io { unsigned char (*inputb)(struct si_sm_io *io, unsigned int offset); void (*outputb)(struct si_sm_io *io, unsigned int offset, unsigned char b); - /* Generic info used by the actual handling routines, the - state machine shouldn't touch these. */ + /* + * Generic info used by the actual handling routines, the + * state machine shouldn't touch these. + */ void __iomem *addr; int regspacing; int regsize; @@ -59,53 +64,67 @@ struct si_sm_io }; /* Results of SMI events. */ -enum si_sm_result -{ +enum si_sm_result { SI_SM_CALL_WITHOUT_DELAY, /* Call the driver again immediately */ SI_SM_CALL_WITH_DELAY, /* Delay some before calling again. */ - SI_SM_CALL_WITH_TICK_DELAY, /* Delay at least 1 tick before calling again. */ + SI_SM_CALL_WITH_TICK_DELAY,/* Delay >=1 tick before calling again. */ SI_SM_TRANSACTION_COMPLETE, /* A transaction is finished. */ SI_SM_IDLE, /* The SM is in idle state. */ SI_SM_HOSED, /* The hardware violated the state machine. */ - SI_SM_ATTN /* The hardware is asserting attn and the - state machine is idle. */ + + /* + * The hardware is asserting attn and the state machine is + * idle. + */ + SI_SM_ATTN }; /* Handlers for the SMI state machine. */ -struct si_sm_handlers -{ - /* Put the version number of the state machine here so the - upper layer can print it. */ +struct si_sm_handlers { + /* + * Put the version number of the state machine here so the + * upper layer can print it. + */ char *version; - /* Initialize the data and return the amount of I/O space to - reserve for the space. */ + /* + * Initialize the data and return the amount of I/O space to + * reserve for the space. + */ unsigned int (*init_data)(struct si_sm_data *smi, struct si_sm_io *io); - /* Start a new transaction in the state machine. This will - return -2 if the state machine is not idle, -1 if the size - is invalid (to large or too small), or 0 if the transaction - is successfully completed. */ + /* + * Start a new transaction in the state machine. This will + * return -2 if the state machine is not idle, -1 if the size + * is invalid (to large or too small), or 0 if the transaction + * is successfully completed. + */ int (*start_transaction)(struct si_sm_data *smi, unsigned char *data, unsigned int size); - /* Return the results after the transaction. This will return - -1 if the buffer is too small, zero if no transaction is - present, or the actual length of the result data. */ + /* + * Return the results after the transaction. This will return + * -1 if the buffer is too small, zero if no transaction is + * present, or the actual length of the result data. + */ int (*get_result)(struct si_sm_data *smi, unsigned char *data, unsigned int length); - /* Call this periodically (for a polled interface) or upon - receiving an interrupt (for a interrupt-driven interface). - If interrupt driven, you should probably poll this - periodically when not in idle state. This should be called - with the time that passed since the last call, if it is - significant. Time is in microseconds. */ + /* + * Call this periodically (for a polled interface) or upon + * receiving an interrupt (for a interrupt-driven interface). + * If interrupt driven, you should probably poll this + * periodically when not in idle state. This should be called + * with the time that passed since the last call, if it is + * significant. Time is in microseconds. + */ enum si_sm_result (*event)(struct si_sm_data *smi, long time); - /* Attempt to detect an SMI. Returns 0 on success or nonzero - on failure. */ + /* + * Attempt to detect an SMI. Returns 0 on success or nonzero + * on failure. + */ int (*detect)(struct si_sm_data *smi); /* The interface is shutting down, so clean it up. */ diff --git a/drivers/char/ipmi/ipmi_smic_sm.c b/drivers/char/ipmi/ipmi_smic_sm.c index e64ea7d25d2..faed9297190 100644 --- a/drivers/char/ipmi/ipmi_smic_sm.c +++ b/drivers/char/ipmi/ipmi_smic_sm.c @@ -85,6 +85,7 @@ enum smic_states { /* SMIC Flags Register Bits */ #define SMIC_RX_DATA_READY 0x80 #define SMIC_TX_DATA_READY 0x40 + /* * SMIC_SMI and SMIC_EVM_DATA_AVAIL are only used by * a few systems, and then only by Systems Management @@ -104,23 +105,22 @@ enum smic_states { #define EC_ILLEGAL_COMMAND 0x04 #define EC_BUFFER_FULL 0x05 -struct si_sm_data -{ +struct si_sm_data { enum smic_states state; struct si_sm_io *io; - unsigned char write_data[MAX_SMIC_WRITE_SIZE]; - int write_pos; - int write_count; - int orig_write_count; - unsigned char read_data[MAX_SMIC_READ_SIZE]; - int read_pos; - int truncated; - unsigned int error_retries; - long smic_timeout; + unsigned char write_data[MAX_SMIC_WRITE_SIZE]; + int write_pos; + int write_count; + int orig_write_count; + unsigned char read_data[MAX_SMIC_READ_SIZE]; + int read_pos; + int truncated; + unsigned int error_retries; + long smic_timeout; }; -static unsigned int init_smic_data (struct si_sm_data *smic, - struct si_sm_io *io) +static unsigned int init_smic_data(struct si_sm_data *smic, + struct si_sm_io *io) { smic->state = SMIC_IDLE; smic->io = io; @@ -150,11 +150,10 @@ static int start_smic_transaction(struct si_sm_data *smic, return IPMI_NOT_IN_MY_STATE_ERR; if (smic_debug & SMIC_DEBUG_MSG) { - printk(KERN_INFO "start_smic_transaction -"); - for (i = 0; i < size; i ++) { - printk (" %02x", (unsigned char) (data [i])); - } - printk ("\n"); + printk(KERN_DEBUG "start_smic_transaction -"); + for (i = 0; i < size; i++) + printk(" %02x", (unsigned char) data[i]); + printk("\n"); } smic->error_retries = 0; memcpy(smic->write_data, data, size); @@ -173,11 +172,10 @@ static int smic_get_result(struct si_sm_data *smic, int i; if (smic_debug & SMIC_DEBUG_MSG) { - printk (KERN_INFO "smic_get result -"); - for (i = 0; i < smic->read_pos; i ++) { - printk (" %02x", (smic->read_data [i])); - } - printk ("\n"); + printk(KERN_DEBUG "smic_get result -"); + for (i = 0; i < smic->read_pos; i++) + printk(" %02x", smic->read_data[i]); + printk("\n"); } if (length < smic->read_pos) { smic->read_pos = length; @@ -223,8 +221,8 @@ static inline void write_smic_control(struct si_sm_data *smic, smic->io->outputb(smic->io, 1, control); } -static inline void write_si_sm_data (struct si_sm_data *smic, - unsigned char data) +static inline void write_si_sm_data(struct si_sm_data *smic, + unsigned char data) { smic->io->outputb(smic->io, 0, data); } @@ -233,10 +231,9 @@ static inline void start_error_recovery(struct si_sm_data *smic, char *reason) { (smic->error_retries)++; if (smic->error_retries > SMIC_MAX_ERROR_RETRIES) { - if (smic_debug & SMIC_DEBUG_ENABLE) { + if (smic_debug & SMIC_DEBUG_ENABLE) printk(KERN_WARNING "ipmi_smic_drv: smic hosed: %s\n", reason); - } smic->state = SMIC_HOSED; } else { smic->write_count = smic->orig_write_count; @@ -254,14 +251,14 @@ static inline void write_next_byte(struct si_sm_data *smic) (smic->write_count)--; } -static inline void read_next_byte (struct si_sm_data *smic) +static inline void read_next_byte(struct si_sm_data *smic) { if (smic->read_pos >= MAX_SMIC_READ_SIZE) { - read_smic_data (smic); + read_smic_data(smic); smic->truncated = 1; } else { smic->read_data[smic->read_pos] = read_smic_data(smic); - (smic->read_pos)++; + smic->read_pos++; } } @@ -336,7 +333,7 @@ static inline void read_next_byte (struct si_sm_data *smic) SMIC_SC_SMS_RD_END 0xC6 */ -static enum si_sm_result smic_event (struct si_sm_data *smic, long time) +static enum si_sm_result smic_event(struct si_sm_data *smic, long time) { unsigned char status; unsigned char flags; @@ -347,13 +344,15 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) return SI_SM_HOSED; } if (smic->state != SMIC_IDLE) { - if (smic_debug & SMIC_DEBUG_STATES) { - printk(KERN_INFO + if (smic_debug & SMIC_DEBUG_STATES) + printk(KERN_DEBUG "smic_event - smic->smic_timeout = %ld," " time = %ld\n", smic->smic_timeout, time); - } -/* FIXME: smic_event is sometimes called with time > SMIC_RETRY_TIMEOUT */ + /* + * FIXME: smic_event is sometimes called with time > + * SMIC_RETRY_TIMEOUT + */ if (time < SMIC_RETRY_TIMEOUT) { smic->smic_timeout -= time; if (smic->smic_timeout < 0) { @@ -366,9 +365,9 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) if (flags & SMIC_FLAG_BSY) return SI_SM_CALL_WITH_DELAY; - status = read_smic_status (smic); + status = read_smic_status(smic); if (smic_debug & SMIC_DEBUG_STATES) - printk(KERN_INFO + printk(KERN_DEBUG "smic_event - state = %d, flags = 0x%02x," " status = 0x%02x\n", smic->state, flags, status); @@ -377,9 +376,7 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) case SMIC_IDLE: /* in IDLE we check for available messages */ if (flags & SMIC_SMS_DATA_AVAIL) - { return SI_SM_ATTN; - } return SI_SM_IDLE; case SMIC_START_OP: @@ -391,7 +388,7 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) case SMIC_OP_OK: if (status != SMIC_SC_SMS_READY) { - /* this should not happen */ + /* this should not happen */ start_error_recovery(smic, "state = SMIC_OP_OK," " status != SMIC_SC_SMS_READY"); @@ -411,8 +408,10 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) "status != SMIC_SC_SMS_WR_START"); return SI_SM_CALL_WITH_DELAY; } - /* we must not issue WR_(NEXT|END) unless - TX_DATA_READY is set */ + /* + * we must not issue WR_(NEXT|END) unless + * TX_DATA_READY is set + * */ if (flags & SMIC_TX_DATA_READY) { if (smic->write_count == 1) { /* last byte */ @@ -424,10 +423,8 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) } write_next_byte(smic); write_smic_flags(smic, flags | SMIC_FLAG_BSY); - } - else { + } else return SI_SM_CALL_WITH_DELAY; - } break; case SMIC_WRITE_NEXT: @@ -442,52 +439,48 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) if (smic->write_count == 1) { write_smic_control(smic, SMIC_CC_SMS_WR_END); smic->state = SMIC_WRITE_END; - } - else { + } else { write_smic_control(smic, SMIC_CC_SMS_WR_NEXT); smic->state = SMIC_WRITE_NEXT; } write_next_byte(smic); write_smic_flags(smic, flags | SMIC_FLAG_BSY); - } - else { + } else return SI_SM_CALL_WITH_DELAY; - } break; case SMIC_WRITE_END: if (status != SMIC_SC_SMS_WR_END) { - start_error_recovery (smic, - "state = SMIC_WRITE_END, " - "status != SMIC_SC_SMS_WR_END"); + start_error_recovery(smic, + "state = SMIC_WRITE_END, " + "status != SMIC_SC_SMS_WR_END"); return SI_SM_CALL_WITH_DELAY; } /* data register holds an error code */ data = read_smic_data(smic); if (data != 0) { - if (smic_debug & SMIC_DEBUG_ENABLE) { - printk(KERN_INFO + if (smic_debug & SMIC_DEBUG_ENABLE) + printk(KERN_DEBUG "SMIC_WRITE_END: data = %02x\n", data); - } start_error_recovery(smic, "state = SMIC_WRITE_END, " "data != SUCCESS"); return SI_SM_CALL_WITH_DELAY; - } else { + } else smic->state = SMIC_WRITE2READ; - } break; case SMIC_WRITE2READ: - /* we must wait for RX_DATA_READY to be set before we - can continue */ + /* + * we must wait for RX_DATA_READY to be set before we + * can continue + */ if (flags & SMIC_RX_DATA_READY) { write_smic_control(smic, SMIC_CC_SMS_RD_START); write_smic_flags(smic, flags | SMIC_FLAG_BSY); smic->state = SMIC_READ_START; - } else { + } else return SI_SM_CALL_WITH_DELAY; - } break; case SMIC_READ_START: @@ -502,15 +495,16 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) write_smic_control(smic, SMIC_CC_SMS_RD_NEXT); write_smic_flags(smic, flags | SMIC_FLAG_BSY); smic->state = SMIC_READ_NEXT; - } else { + } else return SI_SM_CALL_WITH_DELAY; - } break; case SMIC_READ_NEXT: switch (status) { - /* smic tells us that this is the last byte to be read - --> clean up */ + /* + * smic tells us that this is the last byte to be read + * --> clean up + */ case SMIC_SC_SMS_RD_END: read_next_byte(smic); write_smic_control(smic, SMIC_CC_SMS_RD_END); @@ -523,9 +517,8 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) write_smic_control(smic, SMIC_CC_SMS_RD_NEXT); write_smic_flags(smic, flags | SMIC_FLAG_BSY); smic->state = SMIC_READ_NEXT; - } else { + } else return SI_SM_CALL_WITH_DELAY; - } break; default: start_error_recovery( @@ -546,10 +539,9 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) data = read_smic_data(smic); /* data register holds an error code */ if (data != 0) { - if (smic_debug & SMIC_DEBUG_ENABLE) { - printk(KERN_INFO + if (smic_debug & SMIC_DEBUG_ENABLE) + printk(KERN_DEBUG "SMIC_READ_END: data = %02x\n", data); - } start_error_recovery(smic, "state = SMIC_READ_END, " "data != SUCCESS"); @@ -565,7 +557,7 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) default: if (smic_debug & SMIC_DEBUG_ENABLE) { - printk(KERN_WARNING "smic->state = %d\n", smic->state); + printk(KERN_DEBUG "smic->state = %d\n", smic->state); start_error_recovery(smic, "state = UNKNOWN"); return SI_SM_CALL_WITH_DELAY; } @@ -576,10 +568,12 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time) static int smic_detect(struct si_sm_data *smic) { - /* It's impossible for the SMIC fnags register to be all 1's, - (assuming a properly functioning, self-initialized BMC) - but that's what you get from reading a bogus address, so we - test that first. */ + /* + * It's impossible for the SMIC fnags register to be all 1's, + * (assuming a properly functioning, self-initialized BMC) + * but that's what you get from reading a bogus address, so we + * test that first. + */ if (read_smic_flags(smic) == 0xff) return 1; @@ -595,8 +589,7 @@ static int smic_size(void) return sizeof(struct si_sm_data); } -struct si_sm_handlers smic_smi_handlers = -{ +struct si_sm_handlers smic_smi_handlers = { .init_data = init_smic_data, .start_transaction = start_smic_transaction, .get_result = smic_get_result, diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 8f45ca9235a..1b9a8704781 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -54,13 +54,15 @@ #include <asm/atomic.h> #ifdef CONFIG_X86 -/* This is ugly, but I've determined that x86 is the only architecture - that can reasonably support the IPMI NMI watchdog timeout at this - time. If another architecture adds this capability somehow, it - will have to be a somewhat different mechanism and I have no idea - how it will work. So in the unlikely event that another - architecture supports this, we can figure out a good generic - mechanism for it at that time. */ +/* + * This is ugly, but I've determined that x86 is the only architecture + * that can reasonably support the IPMI NMI watchdog timeout at this + * time. If another architecture adds this capability somehow, it + * will have to be a somewhat different mechanism and I have no idea + * how it will work. So in the unlikely event that another + * architecture supports this, we can figure out a good generic + * mechanism for it at that time. + */ #include <asm/kdebug.h> #define HAVE_DIE_NMI #endif @@ -95,9 +97,8 @@ /* Operations that can be performed on a pretimout. */ #define WDOG_PREOP_NONE 0 #define WDOG_PREOP_PANIC 1 -#define WDOG_PREOP_GIVE_DATA 2 /* Cause data to be available to - read. Doesn't work in NMI - mode. */ +/* Cause data to be available to read. Doesn't work in NMI mode. */ +#define WDOG_PREOP_GIVE_DATA 2 /* Actions to perform on a full timeout. */ #define WDOG_SET_TIMEOUT_ACT(byte, use) \ @@ -108,8 +109,10 @@ #define WDOG_TIMEOUT_POWER_DOWN 2 #define WDOG_TIMEOUT_POWER_CYCLE 3 -/* Byte 3 of the get command, byte 4 of the get response is the - pre-timeout in seconds. */ +/* + * Byte 3 of the get command, byte 4 of the get response is the + * pre-timeout in seconds. + */ /* Bits for setting byte 4 of the set command, byte 5 of the get response. */ #define WDOG_EXPIRE_CLEAR_BIOS_FRB2 (1 << 1) @@ -118,11 +121,13 @@ #define WDOG_EXPIRE_CLEAR_SMS_OS (1 << 4) #define WDOG_EXPIRE_CLEAR_OEM (1 << 5) -/* Setting/getting the watchdog timer value. This is for bytes 5 and - 6 (the timeout time) of the set command, and bytes 6 and 7 (the - timeout time) and 8 and 9 (the current countdown value) of the - response. The timeout value is given in seconds (in the command it - is 100ms intervals). */ +/* + * Setting/getting the watchdog timer value. This is for bytes 5 and + * 6 (the timeout time) of the set command, and bytes 6 and 7 (the + * timeout time) and 8 and 9 (the current countdown value) of the + * response. The timeout value is given in seconds (in the command it + * is 100ms intervals). + */ #define WDOG_SET_TIMEOUT(byte1, byte2, val) \ (byte1) = (((val) * 10) & 0xff), (byte2) = (((val) * 10) >> 8) #define WDOG_GET_TIMEOUT(byte1, byte2) \ @@ -184,8 +189,10 @@ static int ipmi_set_timeout(int do_heartbeat); static void ipmi_register_watchdog(int ipmi_intf); static void ipmi_unregister_watchdog(int ipmi_intf); -/* If true, the driver will start running as soon as it is configured - and ready. */ +/* + * If true, the driver will start running as soon as it is configured + * and ready. + */ static int start_now; static int set_param_int(const char *val, struct kernel_param *kp) @@ -309,10 +316,12 @@ static int ipmi_ignore_heartbeat; /* Is someone using the watchdog? Only one user is allowed. */ static unsigned long ipmi_wdog_open; -/* If set to 1, the heartbeat command will set the state to reset and - start the timer. The timer doesn't normally run when the driver is - first opened until the heartbeat is set the first time, this - variable is used to accomplish this. */ +/* + * If set to 1, the heartbeat command will set the state to reset and + * start the timer. The timer doesn't normally run when the driver is + * first opened until the heartbeat is set the first time, this + * variable is used to accomplish this. + */ static int ipmi_start_timer_on_heartbeat; /* IPMI version of the BMC. */ @@ -329,10 +338,12 @@ static int nmi_handler_registered; static int ipmi_heartbeat(void); -/* We use a mutex to make sure that only one thing can send a set - timeout at one time, because we only have one copy of the data. - The mutex is claimed when the set_timeout is sent and freed - when both messages are free. */ +/* + * We use a mutex to make sure that only one thing can send a set + * timeout at one time, because we only have one copy of the data. + * The mutex is claimed when the set_timeout is sent and freed + * when both messages are free. + */ static atomic_t set_timeout_tofree = ATOMIC_INIT(0); static DEFINE_MUTEX(set_timeout_lock); static DECLARE_COMPLETION(set_timeout_wait); @@ -346,15 +357,13 @@ static void set_timeout_free_recv(struct ipmi_recv_msg *msg) if (atomic_dec_and_test(&set_timeout_tofree)) complete(&set_timeout_wait); } -static struct ipmi_smi_msg set_timeout_smi_msg = -{ +static struct ipmi_smi_msg set_timeout_smi_msg = { .done = set_timeout_free_smi }; -static struct ipmi_recv_msg set_timeout_recv_msg = -{ +static struct ipmi_recv_msg set_timeout_recv_msg = { .done = set_timeout_free_recv }; - + static int i_ipmi_set_timeout(struct ipmi_smi_msg *smi_msg, struct ipmi_recv_msg *recv_msg, int *send_heartbeat_now) @@ -373,13 +382,14 @@ static int i_ipmi_set_timeout(struct ipmi_smi_msg *smi_msg, WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS); if ((ipmi_version_major > 1) - || ((ipmi_version_major == 1) && (ipmi_version_minor >= 5))) - { + || ((ipmi_version_major == 1) && (ipmi_version_minor >= 5))) { /* This is an IPMI 1.5-only feature. */ data[0] |= WDOG_DONT_STOP_ON_SET; } else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) { - /* In ipmi 1.0, setting the timer stops the watchdog, we - need to start it back up again. */ + /* + * In ipmi 1.0, setting the timer stops the watchdog, we + * need to start it back up again. + */ hbnow = 1; } @@ -465,12 +475,10 @@ static void panic_recv_free(struct ipmi_recv_msg *msg) atomic_dec(&panic_done_count); } -static struct ipmi_smi_msg panic_halt_heartbeat_smi_msg = -{ +static struct ipmi_smi_msg panic_halt_heartbeat_smi_msg = { .done = panic_smi_free }; -static struct ipmi_recv_msg panic_halt_heartbeat_recv_msg = -{ +static struct ipmi_recv_msg panic_halt_heartbeat_recv_msg = { .done = panic_recv_free }; @@ -480,8 +488,10 @@ static void panic_halt_ipmi_heartbeat(void) struct ipmi_system_interface_addr addr; int rv; - /* Don't reset the timer if we have the timer turned off, that - re-enables the watchdog. */ + /* + * Don't reset the timer if we have the timer turned off, that + * re-enables the watchdog. + */ if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) return; @@ -505,19 +515,19 @@ static void panic_halt_ipmi_heartbeat(void) atomic_add(2, &panic_done_count); } -static struct ipmi_smi_msg panic_halt_smi_msg = -{ +static struct ipmi_smi_msg panic_halt_smi_msg = { .done = panic_smi_free }; -static struct ipmi_recv_msg panic_halt_recv_msg = -{ +static struct ipmi_recv_msg panic_halt_recv_msg = { .done = panic_recv_free }; -/* Special call, doesn't claim any locks. This is only to be called - at panic or halt time, in run-to-completion mode, when the caller - is the only CPU and the only thing that will be going is these IPMI - calls. */ +/* + * Special call, doesn't claim any locks. This is only to be called + * at panic or halt time, in run-to-completion mode, when the caller + * is the only CPU and the only thing that will be going is these IPMI + * calls. + */ static void panic_halt_ipmi_set_timeout(void) { int send_heartbeat_now; @@ -540,10 +550,12 @@ static void panic_halt_ipmi_set_timeout(void) ipmi_poll_interface(watchdog_user); } -/* We use a semaphore to make sure that only one thing can send a - heartbeat at one time, because we only have one copy of the data. - The semaphore is claimed when the set_timeout is sent and freed - when both messages are free. */ +/* + * We use a mutex to make sure that only one thing can send a + * heartbeat at one time, because we only have one copy of the data. + * The semaphore is claimed when the set_timeout is sent and freed + * when both messages are free. + */ static atomic_t heartbeat_tofree = ATOMIC_INIT(0); static DEFINE_MUTEX(heartbeat_lock); static DECLARE_COMPLETION(heartbeat_wait); @@ -557,15 +569,13 @@ static void heartbeat_free_recv(struct ipmi_recv_msg *msg) if (atomic_dec_and_test(&heartbeat_tofree)) complete(&heartbeat_wait); } -static struct ipmi_smi_msg heartbeat_smi_msg = -{ +static struct ipmi_smi_msg heartbeat_smi_msg = { .done = heartbeat_free_smi }; -static struct ipmi_recv_msg heartbeat_recv_msg = -{ +static struct ipmi_recv_msg heartbeat_recv_msg = { .done = heartbeat_free_recv }; - + static int ipmi_heartbeat(void) { struct kernel_ipmi_msg msg; @@ -580,10 +590,12 @@ static int ipmi_heartbeat(void) ipmi_watchdog_state = action_val; return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); } else if (pretimeout_since_last_heartbeat) { - /* A pretimeout occurred, make sure we set the timeout. - We don't want to set the action, though, we want to - leave that alone (thus it can't be combined with the - above operation. */ + /* + * A pretimeout occurred, make sure we set the timeout. + * We don't want to set the action, though, we want to + * leave that alone (thus it can't be combined with the + * above operation. + */ return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); } @@ -591,8 +603,10 @@ static int ipmi_heartbeat(void) atomic_set(&heartbeat_tofree, 2); - /* Don't reset the timer if we have the timer turned off, that - re-enables the watchdog. */ + /* + * Don't reset the timer if we have the timer turned off, that + * re-enables the watchdog. + */ if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) { mutex_unlock(&heartbeat_lock); return 0; @@ -625,10 +639,12 @@ static int ipmi_heartbeat(void) wait_for_completion(&heartbeat_wait); if (heartbeat_recv_msg.msg.data[0] != 0) { - /* Got an error in the heartbeat response. It was already - reported in ipmi_wdog_msg_handler, but we should return - an error here. */ - rv = -EINVAL; + /* + * Got an error in the heartbeat response. It was already + * reported in ipmi_wdog_msg_handler, but we should return + * an error here. + */ + rv = -EINVAL; } mutex_unlock(&heartbeat_lock); @@ -636,8 +652,7 @@ static int ipmi_heartbeat(void) return rv; } -static struct watchdog_info ident = -{ +static struct watchdog_info ident = { .options = 0, /* WDIOF_SETTIMEOUT, */ .firmware_version = 1, .identity = "IPMI" @@ -650,7 +665,7 @@ static int ipmi_ioctl(struct inode *inode, struct file *file, int i; int val; - switch(cmd) { + switch (cmd) { case WDIOC_GETSUPPORT: i = copy_to_user(argp, &ident, sizeof(ident)); return i ? -EFAULT : 0; @@ -690,15 +705,13 @@ static int ipmi_ioctl(struct inode *inode, struct file *file, i = copy_from_user(&val, argp, sizeof(int)); if (i) return -EFAULT; - if (val & WDIOS_DISABLECARD) - { + if (val & WDIOS_DISABLECARD) { ipmi_watchdog_state = WDOG_TIMEOUT_NONE; ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); ipmi_start_timer_on_heartbeat = 0; } - if (val & WDIOS_ENABLECARD) - { + if (val & WDIOS_ENABLECARD) { ipmi_watchdog_state = action_val; ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); } @@ -724,13 +737,13 @@ static ssize_t ipmi_write(struct file *file, int rv; if (len) { - if (!nowayout) { - size_t i; + if (!nowayout) { + size_t i; /* In case it was set long ago */ expect_close = 0; - for (i = 0; i != len; i++) { + for (i = 0; i != len; i++) { char c; if (get_user(c, buf + i)) @@ -758,15 +771,17 @@ static ssize_t ipmi_read(struct file *file, if (count <= 0) return 0; - /* Reading returns if the pretimeout has gone off, and it only does - it once per pretimeout. */ + /* + * Reading returns if the pretimeout has gone off, and it only does + * it once per pretimeout. + */ spin_lock(&ipmi_read_lock); if (!data_to_read) { if (file->f_flags & O_NONBLOCK) { rv = -EAGAIN; goto out; } - + init_waitqueue_entry(&wait, current); add_wait_queue(&read_q, &wait); while (!data_to_read) { @@ -776,7 +791,7 @@ static ssize_t ipmi_read(struct file *file, spin_lock(&ipmi_read_lock); } remove_wait_queue(&read_q, &wait); - + if (signal_pending(current)) { rv = -ERESTARTSYS; goto out; @@ -799,25 +814,27 @@ static ssize_t ipmi_read(struct file *file, static int ipmi_open(struct inode *ino, struct file *filep) { - switch (iminor(ino)) { - case WATCHDOG_MINOR: + switch (iminor(ino)) { + case WATCHDOG_MINOR: if (test_and_set_bit(0, &ipmi_wdog_open)) - return -EBUSY; + return -EBUSY; - /* Don't start the timer now, let it start on the - first heartbeat. */ + /* + * Don't start the timer now, let it start on the + * first heartbeat. + */ ipmi_start_timer_on_heartbeat = 1; return nonseekable_open(ino, filep); default: return (-ENODEV); - } + } } static unsigned int ipmi_poll(struct file *file, poll_table *wait) { unsigned int mask = 0; - + poll_wait(file, &read_q, wait); spin_lock(&ipmi_read_lock); @@ -851,7 +868,7 @@ static int ipmi_close(struct inode *ino, struct file *filep) clear_bit(0, &ipmi_wdog_open); } - ipmi_fasync (-1, filep, 0); + ipmi_fasync(-1, filep, 0); expect_close = 0; return 0; @@ -882,7 +899,7 @@ static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg, msg->msg.data[0], msg->msg.cmd); } - + ipmi_free_recv_msg(msg); } @@ -902,14 +919,14 @@ static void ipmi_wdog_pretimeout_handler(void *handler_data) } } - /* On some machines, the heartbeat will give - an error and not work unless we re-enable - the timer. So do so. */ + /* + * On some machines, the heartbeat will give an error and not + * work unless we re-enable the timer. So do so. + */ pretimeout_since_last_heartbeat = 1; } -static struct ipmi_user_hndl ipmi_hndlrs = -{ +static struct ipmi_user_hndl ipmi_hndlrs = { .ipmi_recv_hndl = ipmi_wdog_msg_handler, .ipmi_watchdog_pretimeout = ipmi_wdog_pretimeout_handler }; @@ -949,8 +966,10 @@ static void ipmi_register_watchdog(int ipmi_intf) int old_timeout = timeout; int old_preop_val = preop_val; - /* Set the pretimeout to go off in a second and give - ourselves plenty of time to stop the timer. */ + /* + * Set the pretimeout to go off in a second and give + * ourselves plenty of time to stop the timer. + */ ipmi_watchdog_state = WDOG_TIMEOUT_RESET; preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */ pretimeout = 99; @@ -974,7 +993,7 @@ static void ipmi_register_watchdog(int ipmi_intf) " occur. The NMI pretimeout will" " likely not work\n"); } - out_restore: + out_restore: testing_nmi = 0; preop_val = old_preop_val; pretimeout = old_pretimeout; @@ -1009,9 +1028,11 @@ static void ipmi_unregister_watchdog(int ipmi_intf) /* Make sure no one can call us any more. */ misc_deregister(&ipmi_wdog_miscdev); - /* Wait to make sure the message makes it out. The lower layer has - pointers to our buffers, we want to make sure they are done before - we release our memory. */ + /* + * Wait to make sure the message makes it out. The lower layer has + * pointers to our buffers, we want to make sure they are done before + * we release our memory. + */ while (atomic_read(&set_timeout_tofree)) schedule_timeout_uninterruptible(1); @@ -1052,15 +1073,17 @@ ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) return NOTIFY_STOP; } - /* If we are not expecting a timeout, ignore it. */ + /* If we are not expecting a timeout, ignore it. */ if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) return NOTIFY_OK; if (preaction_val != WDOG_PRETIMEOUT_NMI) return NOTIFY_OK; - /* If no one else handled the NMI, we assume it was the IPMI - watchdog. */ + /* + * If no one else handled the NMI, we assume it was the IPMI + * watchdog. + */ if (preop_val == WDOG_PREOP_PANIC) { /* On some machines, the heartbeat will give an error and not work unless we re-enable @@ -1082,7 +1105,7 @@ static int wdog_reboot_handler(struct notifier_block *this, unsigned long code, void *unused) { - static int reboot_event_handled = 0; + static int reboot_event_handled; if ((watchdog_user) && (!reboot_event_handled)) { /* Make sure we only do this once. */ @@ -1115,7 +1138,7 @@ static int wdog_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { - static int panic_event_handled = 0; + static int panic_event_handled; /* On a panic, if we have a panic timeout, make sure to extend the watchdog timer to a reasonable value to complete the @@ -1125,7 +1148,7 @@ static int wdog_panic_handler(struct notifier_block *this, ipmi_watchdog_state != WDOG_TIMEOUT_NONE) { /* Make sure we do this only once. */ panic_event_handled = 1; - + timeout = 255; pretimeout = 0; panic_halt_ipmi_set_timeout(); @@ -1151,8 +1174,7 @@ static void ipmi_smi_gone(int if_num) ipmi_unregister_watchdog(if_num); } -static struct ipmi_smi_watcher smi_watcher = -{ +static struct ipmi_smi_watcher smi_watcher = { .owner = THIS_MODULE, .new_smi = ipmi_new_smi, .smi_gone = ipmi_smi_gone diff --git a/drivers/char/mem.c b/drivers/char/mem.c index e83623ead44..934ffafedae 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -364,6 +364,7 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma) return 0; } +#ifdef CONFIG_DEVKMEM static int mmap_kmem(struct file * file, struct vm_area_struct * vma) { unsigned long pfn; @@ -384,6 +385,7 @@ static int mmap_kmem(struct file * file, struct vm_area_struct * vma) vma->vm_pgoff = pfn; return mmap_mem(file, vma); } +#endif #ifdef CONFIG_CRASH_DUMP /* @@ -422,6 +424,7 @@ static ssize_t read_oldmem(struct file *file, char __user *buf, extern long vread(char *buf, char *addr, unsigned long count); extern long vwrite(char *buf, char *addr, unsigned long count); +#ifdef CONFIG_DEVKMEM /* * This function reads the *virtual* memory as seen by the kernel. */ @@ -626,6 +629,7 @@ static ssize_t write_kmem(struct file * file, const char __user * buf, *ppos = p; return virtr + wrote; } +#endif #ifdef CONFIG_DEVPORT static ssize_t read_port(struct file * file, char __user * buf, @@ -803,6 +807,7 @@ static const struct file_operations mem_fops = { .get_unmapped_area = get_unmapped_area_mem, }; +#ifdef CONFIG_DEVKMEM static const struct file_operations kmem_fops = { .llseek = memory_lseek, .read = read_kmem, @@ -811,6 +816,7 @@ static const struct file_operations kmem_fops = { .open = open_kmem, .get_unmapped_area = get_unmapped_area_mem, }; +#endif static const struct file_operations null_fops = { .llseek = null_lseek, @@ -889,11 +895,13 @@ static int memory_open(struct inode * inode, struct file * filp) filp->f_mapping->backing_dev_info = &directly_mappable_cdev_bdi; break; +#ifdef CONFIG_DEVKMEM case 2: filp->f_op = &kmem_fops; filp->f_mapping->backing_dev_info = &directly_mappable_cdev_bdi; break; +#endif case 3: filp->f_op = &null_fops; break; @@ -942,7 +950,9 @@ static const struct { const struct file_operations *fops; } devlist[] = { /* list of minor devices */ {1, "mem", S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops}, +#ifdef CONFIG_DEVKMEM {2, "kmem", S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops}, +#endif {3, "null", S_IRUGO | S_IWUGO, &null_fops}, #ifdef CONFIG_DEVPORT {4, "port", S_IRUSR | S_IWUSR | S_IRGRP, &port_fops}, diff --git a/drivers/char/misc.c b/drivers/char/misc.c index 4d058dadbfc..eaace0db0ff 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -263,23 +263,26 @@ EXPORT_SYMBOL(misc_deregister); static int __init misc_init(void) { -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *ent; + int err; - ent = create_proc_entry("misc", 0, NULL); - if (ent) - ent->proc_fops = &misc_proc_fops; +#ifdef CONFIG_PROC_FS + proc_create("misc", 0, NULL, &misc_proc_fops); #endif misc_class = class_create(THIS_MODULE, "misc"); + err = PTR_ERR(misc_class); if (IS_ERR(misc_class)) - return PTR_ERR(misc_class); + goto fail_remove; - if (register_chrdev(MISC_MAJOR,"misc",&misc_fops)) { - printk("unable to get major %d for misc devices\n", - MISC_MAJOR); - class_destroy(misc_class); - return -EIO; - } + err = -EIO; + if (register_chrdev(MISC_MAJOR,"misc",&misc_fops)) + goto fail_printk; return 0; + +fail_printk: + printk("unable to get major %d for misc devices\n", MISC_MAJOR); + class_destroy(misc_class); +fail_remove: + remove_proc_entry("misc", NULL); + return err; } subsys_initcall(misc_init); diff --git a/drivers/char/pcmcia/ipwireless/hardware.c b/drivers/char/pcmcia/ipwireless/hardware.c index 1f978ff87fa..fa9d3c945f3 100644 --- a/drivers/char/pcmcia/ipwireless/hardware.c +++ b/drivers/char/pcmcia/ipwireless/hardware.c @@ -354,32 +354,6 @@ struct ipw_rx_packet { unsigned int channel_idx; }; -#ifdef IPWIRELESS_STATE_DEBUG -int ipwireless_dump_hardware_state(char *p, size_t limit, - struct ipw_hardware *hw) -{ - return snprintf(p, limit, - "debug: initializing=%d\n" - "debug: tx_ready=%d\n" - "debug: tx_queued=%d\n" - "debug: rx_ready=%d\n" - "debug: rx_bytes_queued=%d\n" - "debug: blocking_rx=%d\n" - "debug: removed=%d\n" - "debug: hardware.shutting_down=%d\n" - "debug: to_setup=%d\n", - hw->initializing, - hw->tx_ready, - hw->tx_queued, - hw->rx_ready, - hw->rx_bytes_queued, - hw->blocking_rx, - hw->removed, - hw->shutting_down, - hw->to_setup); -} -#endif - static char *data_type(const unsigned char *buf, unsigned length) { struct nl_packet_header *hdr = (struct nl_packet_header *) buf; diff --git a/drivers/char/pcmcia/ipwireless/hardware.h b/drivers/char/pcmcia/ipwireless/hardware.h index c83190ffb0e..19ce5eb266b 100644 --- a/drivers/char/pcmcia/ipwireless/hardware.h +++ b/drivers/char/pcmcia/ipwireless/hardware.h @@ -58,7 +58,5 @@ void ipwireless_init_hardware_v1(struct ipw_hardware *hw, void *reboot_cb_data); void ipwireless_init_hardware_v2_v3(struct ipw_hardware *hw); void ipwireless_sleep(unsigned int tenths); -int ipwireless_dump_hardware_state(char *p, size_t limit, - struct ipw_hardware *hw); #endif diff --git a/drivers/char/pcmcia/ipwireless/network.c b/drivers/char/pcmcia/ipwireless/network.c index d793e68b3e0..fe914d34f7f 100644 --- a/drivers/char/pcmcia/ipwireless/network.c +++ b/drivers/char/pcmcia/ipwireless/network.c @@ -63,21 +63,6 @@ struct ipw_network { struct work_struct work_go_offline; }; - -#ifdef IPWIRELESS_STATE_DEBUG -int ipwireless_dump_network_state(char *p, size_t limit, - struct ipw_network *network) -{ - return snprintf(p, limit, - "debug: ppp_blocked=%d\n" - "debug: outgoing_packets_queued=%d\n" - "debug: network.shutting_down=%d\n", - network->ppp_blocked, - network->outgoing_packets_queued, - network->shutting_down); -} -#endif - static void notify_packet_sent(void *callback_data, unsigned int packet_length) { struct ipw_network *network = callback_data; diff --git a/drivers/char/pcmcia/ipwireless/network.h b/drivers/char/pcmcia/ipwireless/network.h index b0e1e952fd1..ccacd26fc7e 100644 --- a/drivers/char/pcmcia/ipwireless/network.h +++ b/drivers/char/pcmcia/ipwireless/network.h @@ -49,7 +49,4 @@ void ipwireless_ppp_close(struct ipw_network *net); int ipwireless_ppp_channel_index(struct ipw_network *net); int ipwireless_ppp_unit_number(struct ipw_network *net); -int ipwireless_dump_network_state(char *p, size_t limit, - struct ipw_network *net); - #endif diff --git a/drivers/char/random.c b/drivers/char/random.c index f43c89f7c44..0cf98bd4f2d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -272,7 +272,7 @@ static int random_write_wakeup_thresh = 128; static int trickle_thresh __read_mostly = INPUT_POOL_WORDS * 28; -static DEFINE_PER_CPU(int, trickle_count) = 0; +static DEFINE_PER_CPU(int, trickle_count); /* * A pool of size .poolwords is stirred with a primitive polynomial @@ -370,17 +370,19 @@ static struct poolinfo { */ static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); +static struct fasync_struct *fasync; #if 0 -static int debug = 0; +static int debug; module_param(debug, bool, 0644); -#define DEBUG_ENT(fmt, arg...) do { if (debug) \ - printk(KERN_DEBUG "random %04d %04d %04d: " \ - fmt,\ - input_pool.entropy_count,\ - blocking_pool.entropy_count,\ - nonblocking_pool.entropy_count,\ - ## arg); } while (0) +#define DEBUG_ENT(fmt, arg...) do { \ + if (debug) \ + printk(KERN_DEBUG "random %04d %04d %04d: " \ + fmt,\ + input_pool.entropy_count,\ + blocking_pool.entropy_count,\ + nonblocking_pool.entropy_count,\ + ## arg); } while (0) #else #define DEBUG_ENT(fmt, arg...) do {} while (0) #endif @@ -394,7 +396,7 @@ module_param(debug, bool, 0644); struct entropy_store; struct entropy_store { - /* mostly-read data: */ + /* read-only data: */ struct poolinfo *poolinfo; __u32 *pool; const char *name; @@ -402,7 +404,7 @@ struct entropy_store { struct entropy_store *pull; /* read-write data: */ - spinlock_t lock ____cacheline_aligned_in_smp; + spinlock_t lock; unsigned add_ptr; int entropy_count; int input_rotate; @@ -438,25 +440,26 @@ static struct entropy_store nonblocking_pool = { }; /* - * This function adds a byte into the entropy "pool". It does not + * This function adds bytes into the entropy "pool". It does not * update the entropy estimate. The caller should call - * credit_entropy_store if this is appropriate. + * credit_entropy_bits if this is appropriate. * * The pool is stirred with a primitive polynomial of the appropriate * degree, and then twisted. We twist by three bits at a time because * it's cheap to do so and helps slightly in the expected case where * the entropy is concentrated in the low-order bits. */ -static void __add_entropy_words(struct entropy_store *r, const __u32 *in, - int nwords, __u32 out[16]) +static void mix_pool_bytes_extract(struct entropy_store *r, const void *in, + int nbytes, __u8 out[64]) { static __u32 const twist_table[8] = { 0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158, 0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 }; - unsigned long i, add_ptr, tap1, tap2, tap3, tap4, tap5; - int new_rotate, input_rotate; + unsigned long i, j, tap1, tap2, tap3, tap4, tap5; + int input_rotate; int wordmask = r->poolinfo->poolwords - 1; - __u32 w, next_w; + const char *bytes = in; + __u32 w; unsigned long flags; /* Taps are constant, so we can load them without holding r->lock. */ @@ -465,78 +468,76 @@ static void __add_entropy_words(struct entropy_store *r, const __u32 *in, tap3 = r->poolinfo->tap3; tap4 = r->poolinfo->tap4; tap5 = r->poolinfo->tap5; - next_w = *in++; spin_lock_irqsave(&r->lock, flags); - prefetch_range(r->pool, wordmask); input_rotate = r->input_rotate; - add_ptr = r->add_ptr; + i = r->add_ptr; - while (nwords--) { - w = rol32(next_w, input_rotate); - if (nwords > 0) - next_w = *in++; - i = add_ptr = (add_ptr - 1) & wordmask; - /* - * Normally, we add 7 bits of rotation to the pool. - * At the beginning of the pool, add an extra 7 bits - * rotation, so that successive passes spread the - * input bits across the pool evenly. - */ - new_rotate = input_rotate + 14; - if (i) - new_rotate = input_rotate + 7; - input_rotate = new_rotate & 31; + /* mix one byte at a time to simplify size handling and churn faster */ + while (nbytes--) { + w = rol32(*bytes++, input_rotate & 31); + i = (i - 1) & wordmask; /* XOR in the various taps */ + w ^= r->pool[i]; w ^= r->pool[(i + tap1) & wordmask]; w ^= r->pool[(i + tap2) & wordmask]; w ^= r->pool[(i + tap3) & wordmask]; w ^= r->pool[(i + tap4) & wordmask]; w ^= r->pool[(i + tap5) & wordmask]; - w ^= r->pool[i]; + + /* Mix the result back in with a twist */ r->pool[i] = (w >> 3) ^ twist_table[w & 7]; + + /* + * Normally, we add 7 bits of rotation to the pool. + * At the beginning of the pool, add an extra 7 bits + * rotation, so that successive passes spread the + * input bits across the pool evenly. + */ + input_rotate += i ? 7 : 14; } r->input_rotate = input_rotate; - r->add_ptr = add_ptr; + r->add_ptr = i; - if (out) { - for (i = 0; i < 16; i++) { - out[i] = r->pool[add_ptr]; - add_ptr = (add_ptr - 1) & wordmask; - } - } + if (out) + for (j = 0; j < 16; j++) + ((__u32 *)out)[j] = r->pool[(i - j) & wordmask]; spin_unlock_irqrestore(&r->lock, flags); } -static inline void add_entropy_words(struct entropy_store *r, const __u32 *in, - int nwords) +static void mix_pool_bytes(struct entropy_store *r, const void *in, int bytes) { - __add_entropy_words(r, in, nwords, NULL); + mix_pool_bytes_extract(r, in, bytes, NULL); } /* * Credit (or debit) the entropy store with n bits of entropy */ -static void credit_entropy_store(struct entropy_store *r, int nbits) +static void credit_entropy_bits(struct entropy_store *r, int nbits) { unsigned long flags; + if (!nbits) + return; + spin_lock_irqsave(&r->lock, flags); - if (r->entropy_count + nbits < 0) { - DEBUG_ENT("negative entropy/overflow (%d+%d)\n", - r->entropy_count, nbits); + DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name); + r->entropy_count += nbits; + if (r->entropy_count < 0) { + DEBUG_ENT("negative entropy/overflow\n"); r->entropy_count = 0; - } else if (r->entropy_count + nbits > r->poolinfo->POOLBITS) { + } else if (r->entropy_count > r->poolinfo->POOLBITS) r->entropy_count = r->poolinfo->POOLBITS; - } else { - r->entropy_count += nbits; - if (nbits) - DEBUG_ENT("added %d entropy credits to %s\n", - nbits, r->name); + + /* should we wake readers? */ + if (r == &input_pool && + r->entropy_count >= random_read_wakeup_thresh) { + wake_up_interruptible(&random_read_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); } spin_unlock_irqrestore(&r->lock, flags); @@ -551,7 +552,7 @@ static void credit_entropy_store(struct entropy_store *r, int nbits) /* There is one of these per entropy source */ struct timer_rand_state { cycles_t last_time; - long last_delta,last_delta2; + long last_delta, last_delta2; unsigned dont_count_entropy:1; }; @@ -586,7 +587,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) sample.jiffies = jiffies; sample.cycles = get_cycles(); sample.num = num; - add_entropy_words(&input_pool, (u32 *)&sample, sizeof(sample)/4); + mix_pool_bytes(&input_pool, &sample, sizeof(sample)); /* * Calculate number of bits of randomness we probably added. @@ -620,13 +621,9 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) * Round down by 1 bit on general principles, * and limit entropy entimate to 12 bits. */ - credit_entropy_store(&input_pool, - min_t(int, fls(delta>>1), 11)); + credit_entropy_bits(&input_pool, + min_t(int, fls(delta>>1), 11)); } - - if(input_pool.entropy_count >= random_read_wakeup_thresh) - wake_up_interruptible(&random_read_wait); - out: preempt_enable(); } @@ -677,7 +674,7 @@ void add_disk_randomness(struct gendisk *disk) * *********************************************************************/ -static ssize_t extract_entropy(struct entropy_store *r, void * buf, +static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int rsvd); /* @@ -704,10 +701,10 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) "(%d of %d requested)\n", r->name, bytes * 8, nbytes * 8, r->entropy_count); - bytes=extract_entropy(r->pull, tmp, bytes, - random_read_wakeup_thresh / 8, rsvd); - add_entropy_words(r, tmp, (bytes + 3) / 4); - credit_entropy_store(r, bytes*8); + bytes = extract_entropy(r->pull, tmp, bytes, + random_read_wakeup_thresh / 8, rsvd); + mix_pool_bytes(r, tmp, bytes); + credit_entropy_bits(r, bytes*8); } } @@ -744,13 +741,15 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, if (r->limit && nbytes + reserved >= r->entropy_count / 8) nbytes = r->entropy_count/8 - reserved; - if(r->entropy_count / 8 >= nbytes + reserved) + if (r->entropy_count / 8 >= nbytes + reserved) r->entropy_count -= nbytes*8; else r->entropy_count = reserved; - if (r->entropy_count < random_write_wakeup_thresh) + if (r->entropy_count < random_write_wakeup_thresh) { wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } } DEBUG_ENT("debiting %d entropy credits from %s%s\n", @@ -764,45 +763,46 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, static void extract_buf(struct entropy_store *r, __u8 *out) { int i; - __u32 data[16], buf[5 + SHA_WORKSPACE_WORDS]; + __u32 hash[5], workspace[SHA_WORKSPACE_WORDS]; + __u8 extract[64]; + + /* Generate a hash across the pool, 16 words (512 bits) at a time */ + sha_init(hash); + for (i = 0; i < r->poolinfo->poolwords; i += 16) + sha_transform(hash, (__u8 *)(r->pool + i), workspace); - sha_init(buf); /* - * As we hash the pool, we mix intermediate values of - * the hash back into the pool. This eliminates - * backtracking attacks (where the attacker knows - * the state of the pool plus the current outputs, and - * attempts to find previous ouputs), unless the hash - * function can be inverted. + * We mix the hash back into the pool to prevent backtracking + * attacks (where the attacker knows the state of the pool + * plus the current outputs, and attempts to find previous + * ouputs), unless the hash function can be inverted. By + * mixing at least a SHA1 worth of hash data back, we make + * brute-forcing the feedback as hard as brute-forcing the + * hash. */ - for (i = 0; i < r->poolinfo->poolwords; i += 16) { - /* hash blocks of 16 words = 512 bits */ - sha_transform(buf, (__u8 *)(r->pool + i), buf + 5); - /* feed back portion of the resulting hash */ - add_entropy_words(r, &buf[i % 5], 1); - } + mix_pool_bytes_extract(r, hash, sizeof(hash), extract); /* - * To avoid duplicates, we atomically extract a - * portion of the pool while mixing, and hash one - * final time. + * To avoid duplicates, we atomically extract a portion of the + * pool while mixing, and hash one final time. */ - __add_entropy_words(r, &buf[i % 5], 1, data); - sha_transform(buf, (__u8 *)data, buf + 5); + sha_transform(hash, extract, workspace); + memset(extract, 0, sizeof(extract)); + memset(workspace, 0, sizeof(workspace)); /* - * In case the hash function has some recognizable - * output pattern, we fold it in half. + * In case the hash function has some recognizable output + * pattern, we fold it in half. Thus, we always feed back + * twice as much data as we output. */ - - buf[0] ^= buf[3]; - buf[1] ^= buf[4]; - buf[2] ^= rol32(buf[2], 16); - memcpy(out, buf, EXTRACT_SIZE); - memset(buf, 0, sizeof(buf)); + hash[0] ^= hash[3]; + hash[1] ^= hash[4]; + hash[2] ^= rol32(hash[2], 16); + memcpy(out, hash, EXTRACT_SIZE); + memset(hash, 0, sizeof(hash)); } -static ssize_t extract_entropy(struct entropy_store *r, void * buf, +static ssize_t extract_entropy(struct entropy_store *r, void *buf, size_t nbytes, int min, int reserved) { ssize_t ret = 0, i; @@ -872,7 +872,6 @@ void get_random_bytes(void *buf, int nbytes) { extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0); } - EXPORT_SYMBOL(get_random_bytes); /* @@ -894,12 +893,11 @@ static void init_std_data(struct entropy_store *r) spin_unlock_irqrestore(&r->lock, flags); now = ktime_get_real(); - add_entropy_words(r, (__u32 *)&now, sizeof(now)/4); - add_entropy_words(r, (__u32 *)utsname(), - sizeof(*(utsname()))/4); + mix_pool_bytes(r, &now, sizeof(now)); + mix_pool_bytes(r, utsname(), sizeof(*(utsname()))); } -static int __init rand_initialize(void) +static int rand_initialize(void) { init_std_data(&input_pool); init_std_data(&blocking_pool); @@ -940,7 +938,7 @@ void rand_initialize_disk(struct gendisk *disk) #endif static ssize_t -random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) +random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { ssize_t n, retval = 0, count = 0; @@ -1002,8 +1000,7 @@ random_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) } static ssize_t -urandom_read(struct file * file, char __user * buf, - size_t nbytes, loff_t *ppos) +urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { return extract_entropy_user(&nonblocking_pool, buf, nbytes); } @@ -1038,16 +1035,15 @@ write_pool(struct entropy_store *r, const char __user *buffer, size_t count) count -= bytes; p += bytes; - add_entropy_words(r, buf, (bytes + 3) / 4); + mix_pool_bytes(r, buf, bytes); cond_resched(); } return 0; } -static ssize_t -random_write(struct file * file, const char __user * buffer, - size_t count, loff_t *ppos) +static ssize_t random_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { size_t ret; struct inode *inode = file->f_path.dentry->d_inode; @@ -1064,9 +1060,7 @@ random_write(struct file * file, const char __user * buffer, return (ssize_t)count; } -static int -random_ioctl(struct inode * inode, struct file * file, - unsigned int cmd, unsigned long arg) +static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { int size, ent_count; int __user *p = (int __user *)arg; @@ -1074,8 +1068,8 @@ random_ioctl(struct inode * inode, struct file * file, switch (cmd) { case RNDGETENTCNT: - ent_count = input_pool.entropy_count; - if (put_user(ent_count, p)) + /* inherently racy, no point locking */ + if (put_user(input_pool.entropy_count, p)) return -EFAULT; return 0; case RNDADDTOENTCNT: @@ -1083,13 +1077,7 @@ random_ioctl(struct inode * inode, struct file * file, return -EPERM; if (get_user(ent_count, p)) return -EFAULT; - credit_entropy_store(&input_pool, ent_count); - /* - * Wake up waiting processes if we have enough - * entropy. - */ - if (input_pool.entropy_count >= random_read_wakeup_thresh) - wake_up_interruptible(&random_read_wait); + credit_entropy_bits(&input_pool, ent_count); return 0; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) @@ -1104,39 +1092,45 @@ random_ioctl(struct inode * inode, struct file * file, size); if (retval < 0) return retval; - credit_entropy_store(&input_pool, ent_count); - /* - * Wake up waiting processes if we have enough - * entropy. - */ - if (input_pool.entropy_count >= random_read_wakeup_thresh) - wake_up_interruptible(&random_read_wait); + credit_entropy_bits(&input_pool, ent_count); return 0; case RNDZAPENTCNT: case RNDCLEARPOOL: /* Clear the entropy pool counters. */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - init_std_data(&input_pool); - init_std_data(&blocking_pool); - init_std_data(&nonblocking_pool); + rand_initialize(); return 0; default: return -EINVAL; } } +static int random_fasync(int fd, struct file *filp, int on) +{ + return fasync_helper(fd, filp, on, &fasync); +} + +static int random_release(struct inode *inode, struct file *filp) +{ + return fasync_helper(-1, filp, 0, &fasync); +} + const struct file_operations random_fops = { .read = random_read, .write = random_write, .poll = random_poll, - .ioctl = random_ioctl, + .unlocked_ioctl = random_ioctl, + .fasync = random_fasync, + .release = random_release, }; const struct file_operations urandom_fops = { .read = urandom_read, .write = random_write, - .ioctl = random_ioctl, + .unlocked_ioctl = random_ioctl, + .fasync = random_fasync, + .release = random_release, }; /*************************************************************** @@ -1157,7 +1151,6 @@ void generate_random_uuid(unsigned char uuid_out[16]) /* Set the UUID variant to DCE */ uuid_out[8] = (uuid_out[8] & 0x3F) | 0x80; } - EXPORT_SYMBOL(generate_random_uuid); /******************************************************************** @@ -1339,7 +1332,7 @@ ctl_table random_table[] = { #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static __u32 twothirdsMD4Transform (__u32 const buf[4], __u32 const in[12]) +static __u32 twothirdsMD4Transform(__u32 const buf[4], __u32 const in[12]) { __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; @@ -1487,8 +1480,8 @@ __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, */ memcpy(hash, saddr, 16); - hash[4]=((__force u16)sport << 16) + (__force u16)dport; - memcpy(&hash[5],keyptr->secret,sizeof(__u32) * 7); + hash[4] = ((__force u16)sport << 16) + (__force u16)dport; + memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7); seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK; seq += keyptr->count; @@ -1538,10 +1531,10 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, * Note that the words are placed into the starting vector, which is * then mixed with a partial MD4 over random data. */ - hash[0]=(__force u32)saddr; - hash[1]=(__force u32)daddr; - hash[2]=((__force u16)sport << 16) + (__force u16)dport; - hash[3]=keyptr->secret[11]; + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = keyptr->secret[11]; seq = half_md4_transform(hash, keyptr->secret) & HASH_MASK; seq += keyptr->count; @@ -1556,10 +1549,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, * Choosing a clock of 64 ns period is OK. (period of 274 s) */ seq += ktime_to_ns(ktime_get_real()) >> 6; -#if 0 - printk("init_seq(%lx, %lx, %d, %d) = %d\n", - saddr, daddr, sport, dport, seq); -#endif + return seq; } @@ -1582,14 +1572,15 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) +u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) { struct keydata *keyptr = get_keyptr(); u32 hash[12]; memcpy(hash, saddr, 16); hash[4] = (__force u32)dport; - memcpy(&hash[5],keyptr->secret,sizeof(__u32) * 7); + memcpy(&hash[5], keyptr->secret, sizeof(__u32) * 7); return twothirdsMD4Transform((const __u32 *)daddr, hash); } @@ -1617,13 +1608,9 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, seq += ktime_to_ns(ktime_get_real()); seq &= (1ull << 48) - 1; -#if 0 - printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n", - saddr, daddr, sport, dport, seq); -#endif + return seq; } - EXPORT_SYMBOL(secure_dccp_sequence_number); #endif diff --git a/drivers/char/rocket_int.h b/drivers/char/rocket_int.h index b01d38125a8..143cc432fdb 100644 --- a/drivers/char/rocket_int.h +++ b/drivers/char/rocket_int.h @@ -55,7 +55,7 @@ static inline void sOutW(unsigned short port, unsigned short value) static inline void out32(unsigned short port, Byte_t *p) { - u32 value = le32_to_cpu(get_unaligned((__le32 *)p)); + u32 value = get_unaligned_le32(p); #ifdef ROCKET_DEBUG_IO printk(KERN_DEBUG "out32(%x, %lx)...\n", port, value); #endif diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index e2ec2ee4cf7..5f80a9dff57 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -1069,10 +1069,8 @@ no_irq: } #ifdef CONFIG_PROC_FS - ent = create_proc_entry("driver/rtc", 0, NULL); - if (ent) - ent->proc_fops = &rtc_proc_fops; - else + ent = proc_create("driver/rtc", 0, NULL, &rtc_proc_fops); + if (!ent) printk(KERN_WARNING "rtc: Failed to register with procfs.\n"); #endif diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c index 1b75b0b7d54..31a7765eaf7 100644 --- a/drivers/char/snsc_event.c +++ b/drivers/char/snsc_event.c @@ -63,16 +63,13 @@ static int scdrv_parse_event(char *event, int *src, int *code, int *esp_code, char *desc) { char *desc_end; - __be32 from_buf; /* record event source address */ - from_buf = get_unaligned((__be32 *)event); - *src = be32_to_cpup(&from_buf); + *src = get_unaligned_be32(event); event += 4; /* move on to event code */ /* record the system controller's event code */ - from_buf = get_unaligned((__be32 *)event); - *code = be32_to_cpup(&from_buf); + *code = get_unaligned_be32(event); event += 4; /* move on to event arguments */ /* how many arguments are in the packet? */ @@ -86,8 +83,7 @@ scdrv_parse_event(char *event, int *src, int *code, int *esp_code, char *desc) /* not an integer argument, so give up */ return -1; } - from_buf = get_unaligned((__be32 *)event); - *esp_code = be32_to_cpup(&from_buf); + *esp_code = get_unaligned_be32(event); event += 4; /* parse out the event description */ diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 1ade193c912..9e9bad8bdcf 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -196,6 +196,48 @@ static struct sysrq_key_op sysrq_showlocks_op = { #define sysrq_showlocks_op (*(struct sysrq_key_op *)0) #endif +#ifdef CONFIG_SMP +static DEFINE_SPINLOCK(show_lock); + +static void showacpu(void *dummy) +{ + unsigned long flags; + + /* Idle CPUs have no interesting backtrace. */ + if (idle_cpu(smp_processor_id())) + return; + + spin_lock_irqsave(&show_lock, flags); + printk(KERN_INFO "CPU%d:\n", smp_processor_id()); + show_stack(NULL, NULL); + spin_unlock_irqrestore(&show_lock, flags); +} + +static void sysrq_showregs_othercpus(struct work_struct *dummy) +{ + smp_call_function(showacpu, NULL, 0, 0); +} + +static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus); + +static void sysrq_handle_showallcpus(int key, struct tty_struct *tty) +{ + struct pt_regs *regs = get_irq_regs(); + if (regs) { + printk(KERN_INFO "CPU%d:\n", smp_processor_id()); + show_regs(regs); + } + schedule_work(&sysrq_showallcpus); +} + +static struct sysrq_key_op sysrq_showallcpus_op = { + .handler = sysrq_handle_showallcpus, + .help_msg = "aLlcpus", + .action_msg = "Show backtrace of all active CPUs", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; +#endif + static void sysrq_handle_showregs(int key, struct tty_struct *tty) { struct pt_regs *regs = get_irq_regs(); @@ -340,7 +382,11 @@ static struct sysrq_key_op *sysrq_key_table[36] = { &sysrq_kill_op, /* i */ NULL, /* j */ &sysrq_SAK_op, /* k */ +#ifdef CONFIG_SMP + &sysrq_showallcpus_op, /* l */ +#else NULL, /* l */ +#endif &sysrq_showmem_op, /* m */ &sysrq_unrt_op, /* n */ /* o: This will often be registered as 'Off' at init time */ diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c index ce5ebe3b168..64f1ceed0b2 100644 --- a/drivers/char/toshiba.c +++ b/drivers/char/toshiba.c @@ -520,12 +520,11 @@ static int __init toshiba_init(void) { struct proc_dir_entry *pde; - pde = create_proc_entry("toshiba", 0, NULL); + pde = proc_create("toshiba", 0, NULL, &proc_toshiba_fops); if (!pde) { misc_deregister(&tosh_device); return -ENOMEM; } - pde->proc_fops = &proc_toshiba_fops; } #endif diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index 8f3f7620f95..3738cfa209f 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -23,7 +23,7 @@ if TCG_TPM config TCG_TIS tristate "TPM Interface Specification 1.2 Interface" - depends on PNPACPI + depends on PNP ---help--- If you have a TPM security chip that is compliant with the TCG TIS 1.2 TPM specification say Yes and it will be accessible @@ -32,7 +32,6 @@ config TCG_TIS config TCG_NSC tristate "National Semiconductor TPM Interface" - depends on PNPACPI ---help--- If you have a TPM security chip from National Semiconductor say Yes and it will be accessible from within Linux. To @@ -48,7 +47,7 @@ config TCG_ATMEL config TCG_INFINEON tristate "Infineon Technologies TPM Interface" - depends on PNPACPI + depends on PNP ---help--- If you have a TPM security chip from Infineon Technologies (either SLD 9630 TT 1.1 or SLB 9635 TT 1.2) say Yes and it diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 6313326bc41..ab18c1e7b11 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -264,7 +264,7 @@ static const struct tpm_vendor_specific tpm_nsc = { static struct platform_device *pdev = NULL; -static void __devexit tpm_nsc_remove(struct device *dev) +static void tpm_nsc_remove(struct device *dev) { struct tpm_chip *chip = dev_get_drvdata(dev); if ( chip ) { diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c index db7a731e236..58aad63831f 100644 --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c @@ -249,6 +249,7 @@ static int proc_viotape_open(struct inode *inode, struct file *file) } static const struct file_operations proc_viotape_operations = { + .owner = THIS_MODULE, .open = proc_viotape_open, .read = seq_read, .llseek = seq_lseek, @@ -915,7 +916,6 @@ static struct vio_driver viotape_driver = { int __init viotap_init(void) { int ret; - struct proc_dir_entry *e; if (!firmware_has_feature(FW_FEATURE_ISERIES)) return -ENODEV; @@ -968,11 +968,8 @@ int __init viotap_init(void) if (ret) goto unreg_class; - e = create_proc_entry("iSeries/viotape", S_IFREG|S_IRUGO, NULL); - if (e) { - e->owner = THIS_MODULE; - e->proc_fops = &proc_viotape_operations; - } + proc_create("iSeries/viotape", S_IFREG|S_IRUGO, NULL, + &proc_viotape_operations); return 0; diff --git a/drivers/char/vt.c b/drivers/char/vt.c index df4c3ead9e2..1c266047713 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -301,7 +301,7 @@ static void scrup(struct vc_data *vc, unsigned int t, unsigned int b, int nr) d = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t); s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * (t + nr)); scr_memmovew(d, s, (b - t - nr) * vc->vc_size_row); - scr_memsetw(d + (b - t - nr) * vc->vc_cols, vc->vc_video_erase_char, + scr_memsetw(d + (b - t - nr) * vc->vc_cols, vc->vc_scrl_erase_char, vc->vc_size_row * nr); } @@ -319,7 +319,7 @@ static void scrdown(struct vc_data *vc, unsigned int t, unsigned int b, int nr) s = (unsigned short *)(vc->vc_origin + vc->vc_size_row * t); step = vc->vc_cols * nr; scr_memmovew(s + step, s, (b - t - nr) * vc->vc_size_row); - scr_memsetw(s, vc->vc_video_erase_char, 2 * step); + scr_memsetw(s, vc->vc_scrl_erase_char, 2 * step); } static void do_update_region(struct vc_data *vc, unsigned long start, int count) @@ -400,7 +400,7 @@ static u8 build_attr(struct vc_data *vc, u8 _color, u8 _intensity, u8 _blink, * Bit 7 : blink */ { - u8 a = vc->vc_color; + u8 a = _color; if (!vc->vc_can_do_color) return _intensity | (_italic ? 2 : 0) | @@ -434,6 +434,7 @@ static void update_attr(struct vc_data *vc) vc->vc_blink, vc->vc_underline, vc->vc_reverse ^ vc->vc_decscnm, vc->vc_italic); vc->vc_video_erase_char = (build_attr(vc, vc->vc_color, 1, vc->vc_blink, 0, vc->vc_decscnm, 0) << 8) | ' '; + vc->vc_scrl_erase_char = (build_attr(vc, vc->vc_def_color, 1, false, false, false, false) << 8) | ' '; } /* Note: inverting the screen twice should revert to the original state */ diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index c159ae64eeb..5f076aef74f 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -69,6 +69,15 @@ config CPU_FREQ_DEFAULT_GOV_PERFORMANCE the frequency statically to the highest frequency supported by the CPU. +config CPU_FREQ_DEFAULT_GOV_POWERSAVE + bool "powersave" + depends on EMBEDDED + select CPU_FREQ_GOV_POWERSAVE + help + Use the CPUFreq governor 'powersave' as default. This sets + the frequency statically to the lowest frequency supported by + the CPU. + config CPU_FREQ_DEFAULT_GOV_USERSPACE bool "userspace" select CPU_FREQ_GOV_USERSPACE diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d3575f5ec6d..7fce038fa57 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -583,15 +583,13 @@ out: i += sprintf(&buf[i], "\n"); return i; } -/** - * show_affected_cpus - show the CPUs affected by each transition - */ -static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) + +static ssize_t show_cpus(cpumask_t mask, char *buf) { ssize_t i = 0; unsigned int cpu; - for_each_cpu_mask(cpu, policy->cpus) { + for_each_cpu_mask(cpu, mask) { if (i) i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); @@ -602,6 +600,25 @@ static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) return i; } +/** + * show_related_cpus - show the CPUs affected by each transition even if + * hw coordination is in use + */ +static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) +{ + if (cpus_empty(policy->related_cpus)) + return show_cpus(policy->cpus, buf); + return show_cpus(policy->related_cpus, buf); +} + +/** + * show_affected_cpus - show the CPUs affected by each transition + */ +static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) +{ + return show_cpus(policy->cpus, buf); +} + static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, const char *buf, size_t count) { @@ -646,6 +663,7 @@ define_one_ro(cpuinfo_max_freq); define_one_ro(scaling_available_governors); define_one_ro(scaling_driver); define_one_ro(scaling_cur_freq); +define_one_ro(related_cpus); define_one_ro(affected_cpus); define_one_rw(scaling_min_freq); define_one_rw(scaling_max_freq); @@ -658,6 +676,7 @@ static struct attribute *default_attrs[] = { &scaling_min_freq.attr, &scaling_max_freq.attr, &affected_cpus.attr, + &related_cpus.attr, &scaling_governor.attr, &scaling_driver.attr, &scaling_available_governors.attr, diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index 13fe06b94b0..88d2f44fba4 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -35,12 +35,12 @@ static int cpufreq_governor_powersave(struct cpufreq_policy *policy, return 0; } -static struct cpufreq_governor cpufreq_gov_powersave = { +struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", .governor = cpufreq_governor_powersave, .owner = THIS_MODULE, }; - +EXPORT_SYMBOL(cpufreq_gov_powersave); static int __init cpufreq_gov_powersave_init(void) { @@ -58,5 +58,9 @@ MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); MODULE_LICENSE("GPL"); +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +fs_initcall(cpufreq_gov_powersave_init); +#else module_init(cpufreq_gov_powersave_init); +#endif module_exit(cpufreq_gov_powersave_exit); diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index ef09e069433..ae70d63a8b2 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -288,7 +288,7 @@ cpufreq_stat_notifier_trans (struct notifier_block *nb, unsigned long val, if (!stat) return 0; - old_index = freq_table_get_index(stat, freq->old); + old_index = stat->last_index; new_index = freq_table_get_index(stat, freq->new); cpufreq_stats_update(freq->cpu); diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 2b382990fe5..6e6c3c4aea6 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -67,7 +67,7 @@ config EDAC_E7XXX E7205, E7500, E7501 and E7505 server chipsets. config EDAC_E752X - tristate "Intel e752x (e7520, e7525, e7320)" + tristate "Intel e752x (e7520, e7525, e7320) and 3100" depends on EDAC_MM_EDAC && PCI && X86 && HOTPLUG help Support for error detection and correction on the Intel diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index f2207541059..2b95f1a3edf 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -17,6 +17,7 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> +#include <linux/edac.h> #include "edac_core.h" #define AMD76X_REVISION " Ver: 2.0.2 " __DATE__ @@ -344,6 +345,9 @@ static struct pci_driver amd76x_driver = { static int __init amd76x_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&amd76x_driver); } @@ -358,3 +362,6 @@ module_exit(amd76x_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); MODULE_DESCRIPTION("MC support for AMD 76x memory controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index 6eb434749cd..c94a0eb492c 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -29,6 +29,7 @@ #define EDAC_MOD_STR "e752x_edac" static int force_function_unhide; +static int sysbus_parity = -1; static struct edac_pci_ctl_info *e752x_pci; @@ -62,6 +63,14 @@ static struct edac_pci_ctl_info *e752x_pci; #define PCI_DEVICE_ID_INTEL_7320_1_ERR 0x3593 #endif /* PCI_DEVICE_ID_INTEL_7320_1_ERR */ +#ifndef PCI_DEVICE_ID_INTEL_3100_0 +#define PCI_DEVICE_ID_INTEL_3100_0 0x35B0 +#endif /* PCI_DEVICE_ID_INTEL_3100_0 */ + +#ifndef PCI_DEVICE_ID_INTEL_3100_1_ERR +#define PCI_DEVICE_ID_INTEL_3100_1_ERR 0x35B1 +#endif /* PCI_DEVICE_ID_INTEL_3100_1_ERR */ + #define E752X_NR_CSROWS 8 /* number of csrows */ /* E752X register addresses - device 0 function 0 */ @@ -152,6 +161,12 @@ static struct edac_pci_ctl_info *e752x_pci; /* error syndrome register (16b) */ #define E752X_DEVPRES1 0xF4 /* Device Present 1 register (8b) */ +/* 3100 IMCH specific register addresses - device 0 function 1 */ +#define I3100_NSI_FERR 0x48 /* NSI first error reg (32b) */ +#define I3100_NSI_NERR 0x4C /* NSI next error reg (32b) */ +#define I3100_NSI_SMICMD 0x54 /* NSI SMI command register (32b) */ +#define I3100_NSI_EMASK 0x90 /* NSI error mask register (32b) */ + /* ICH5R register addresses - device 30 function 0 */ #define ICH5R_PCI_STAT 0x06 /* PCI status register (16b) */ #define ICH5R_PCI_2ND_STAT 0x1E /* PCI status secondary reg (16b) */ @@ -160,7 +175,8 @@ static struct edac_pci_ctl_info *e752x_pci; enum e752x_chips { E7520 = 0, E7525 = 1, - E7320 = 2 + E7320 = 2, + I3100 = 3 }; struct e752x_pvt { @@ -185,8 +201,10 @@ struct e752x_dev_info { struct e752x_error_info { u32 ferr_global; u32 nerr_global; - u8 hi_ferr; - u8 hi_nerr; + u32 nsi_ferr; /* 3100 only */ + u32 nsi_nerr; /* 3100 only */ + u8 hi_ferr; /* all but 3100 */ + u8 hi_nerr; /* all but 3100 */ u16 sysbus_ferr; u16 sysbus_nerr; u8 buf_ferr; @@ -215,6 +233,10 @@ static const struct e752x_dev_info e752x_devs[] = { .err_dev = PCI_DEVICE_ID_INTEL_7320_1_ERR, .ctl_dev = PCI_DEVICE_ID_INTEL_7320_0, .ctl_name = "E7320"}, + [I3100] = { + .err_dev = PCI_DEVICE_ID_INTEL_3100_1_ERR, + .ctl_dev = PCI_DEVICE_ID_INTEL_3100_0, + .ctl_name = "3100"}, }; static unsigned long ctl_page_to_phys(struct mem_ctl_info *mci, @@ -402,7 +424,7 @@ static inline void process_threshold_ce(struct mem_ctl_info *mci, u16 error, static char *global_message[11] = { "PCI Express C1", "PCI Express C", "PCI Express B1", "PCI Express B", "PCI Express A1", "PCI Express A", - "DMA Controler", "HUB Interface", "System Bus", + "DMA Controler", "HUB or NS Interface", "System Bus", "DRAM Controler", "Internal Buffer" }; @@ -455,6 +477,63 @@ static inline void hub_error(int fatal, u8 errors, int *error_found, do_hub_error(fatal, errors); } +#define NSI_FATAL_MASK 0x0c080081 +#define NSI_NON_FATAL_MASK 0x23a0ba64 +#define NSI_ERR_MASK (NSI_FATAL_MASK | NSI_NON_FATAL_MASK) + +static char *nsi_message[30] = { + "NSI Link Down", /* NSI_FERR/NSI_NERR bit 0, fatal error */ + "", /* reserved */ + "NSI Parity Error", /* bit 2, non-fatal */ + "", /* reserved */ + "", /* reserved */ + "Correctable Error Message", /* bit 5, non-fatal */ + "Non-Fatal Error Message", /* bit 6, non-fatal */ + "Fatal Error Message", /* bit 7, fatal */ + "", /* reserved */ + "Receiver Error", /* bit 9, non-fatal */ + "", /* reserved */ + "Bad TLP", /* bit 11, non-fatal */ + "Bad DLLP", /* bit 12, non-fatal */ + "REPLAY_NUM Rollover", /* bit 13, non-fatal */ + "", /* reserved */ + "Replay Timer Timeout", /* bit 15, non-fatal */ + "", /* reserved */ + "", /* reserved */ + "", /* reserved */ + "Data Link Protocol Error", /* bit 19, fatal */ + "", /* reserved */ + "Poisoned TLP", /* bit 21, non-fatal */ + "", /* reserved */ + "Completion Timeout", /* bit 23, non-fatal */ + "Completer Abort", /* bit 24, non-fatal */ + "Unexpected Completion", /* bit 25, non-fatal */ + "Receiver Overflow", /* bit 26, fatal */ + "Malformed TLP", /* bit 27, fatal */ + "", /* reserved */ + "Unsupported Request" /* bit 29, non-fatal */ +}; + +static void do_nsi_error(int fatal, u32 errors) +{ + int i; + + for (i = 0; i < 30; i++) { + if (errors & (1 << i)) + printk(KERN_WARNING "%sError %s\n", + fatal_message[fatal], nsi_message[i]); + } +} + +static inline void nsi_error(int fatal, u32 errors, int *error_found, + int handle_error) +{ + *error_found = 1; + + if (handle_error) + do_nsi_error(fatal, errors); +} + static char *membuf_message[4] = { "Internal PMWB to DRAM parity", "Internal PMWB to System Bus Parity", @@ -546,6 +625,31 @@ static void e752x_check_hub_interface(struct e752x_error_info *info, } } +static void e752x_check_ns_interface(struct e752x_error_info *info, + int *error_found, int handle_error) +{ + u32 stat32; + + stat32 = info->nsi_ferr; + if (stat32 & NSI_ERR_MASK) { /* Error, so process */ + if (stat32 & NSI_FATAL_MASK) /* check for fatal errors */ + nsi_error(1, stat32 & NSI_FATAL_MASK, error_found, + handle_error); + if (stat32 & NSI_NON_FATAL_MASK) /* check for non-fatal ones */ + nsi_error(0, stat32 & NSI_NON_FATAL_MASK, error_found, + handle_error); + } + stat32 = info->nsi_nerr; + if (stat32 & NSI_ERR_MASK) { + if (stat32 & NSI_FATAL_MASK) + nsi_error(1, stat32 & NSI_FATAL_MASK, error_found, + handle_error); + if (stat32 & NSI_NON_FATAL_MASK) + nsi_error(0, stat32 & NSI_NON_FATAL_MASK, error_found, + handle_error); + } +} + static void e752x_check_sysbus(struct e752x_error_info *info, int *error_found, int handle_error) { @@ -653,7 +757,15 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, pci_read_config_dword(dev, E752X_FERR_GLOBAL, &info->ferr_global); if (info->ferr_global) { - pci_read_config_byte(dev, E752X_HI_FERR, &info->hi_ferr); + if (pvt->dev_info->err_dev == PCI_DEVICE_ID_INTEL_3100_1_ERR) { + pci_read_config_dword(dev, I3100_NSI_FERR, + &info->nsi_ferr); + info->hi_ferr = 0; + } else { + pci_read_config_byte(dev, E752X_HI_FERR, + &info->hi_ferr); + info->nsi_ferr = 0; + } pci_read_config_word(dev, E752X_SYSBUS_FERR, &info->sysbus_ferr); pci_read_config_byte(dev, E752X_BUF_FERR, &info->buf_ferr); @@ -669,10 +781,15 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, pci_read_config_dword(dev, E752X_DRAM_RETR_ADD, &info->dram_retr_add); + /* ignore the reserved bits just in case */ if (info->hi_ferr & 0x7f) pci_write_config_byte(dev, E752X_HI_FERR, info->hi_ferr); + if (info->nsi_ferr & NSI_ERR_MASK) + pci_write_config_dword(dev, I3100_NSI_FERR, + info->nsi_ferr); + if (info->sysbus_ferr) pci_write_config_word(dev, E752X_SYSBUS_FERR, info->sysbus_ferr); @@ -692,7 +809,15 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, pci_read_config_dword(dev, E752X_NERR_GLOBAL, &info->nerr_global); if (info->nerr_global) { - pci_read_config_byte(dev, E752X_HI_NERR, &info->hi_nerr); + if (pvt->dev_info->err_dev == PCI_DEVICE_ID_INTEL_3100_1_ERR) { + pci_read_config_dword(dev, I3100_NSI_NERR, + &info->nsi_nerr); + info->hi_nerr = 0; + } else { + pci_read_config_byte(dev, E752X_HI_NERR, + &info->hi_nerr); + info->nsi_nerr = 0; + } pci_read_config_word(dev, E752X_SYSBUS_NERR, &info->sysbus_nerr); pci_read_config_byte(dev, E752X_BUF_NERR, &info->buf_nerr); @@ -706,6 +831,10 @@ static void e752x_get_error_info(struct mem_ctl_info *mci, pci_write_config_byte(dev, E752X_HI_NERR, info->hi_nerr); + if (info->nsi_nerr & NSI_ERR_MASK) + pci_write_config_dword(dev, I3100_NSI_NERR, + info->nsi_nerr); + if (info->sysbus_nerr) pci_write_config_word(dev, E752X_SYSBUS_NERR, info->sysbus_nerr); @@ -750,6 +879,7 @@ static int e752x_process_error_info(struct mem_ctl_info *mci, global_error(0, stat32, &error_found, handle_errors); e752x_check_hub_interface(info, &error_found, handle_errors); + e752x_check_ns_interface(info, &error_found, handle_errors); e752x_check_sysbus(info, &error_found, handle_errors); e752x_check_membuf(info, &error_found, handle_errors); e752x_check_dram(mci, info, &error_found, handle_errors); @@ -920,15 +1050,53 @@ fail: return 1; } +/* Setup system bus parity mask register. + * Sysbus parity supported on: + * e7320/e7520/e7525 + Xeon + * i3100 + Xeon/Celeron + * Sysbus parity not supported on: + * i3100 + Pentium M/Celeron M/Core Duo/Core2 Duo + */ +static void e752x_init_sysbus_parity_mask(struct e752x_pvt *pvt) +{ + char *cpu_id = cpu_data(0).x86_model_id; + struct pci_dev *dev = pvt->dev_d0f1; + int enable = 1; + + /* Allow module paramter override, else see if CPU supports parity */ + if (sysbus_parity != -1) { + enable = sysbus_parity; + } else if (cpu_id[0] && + ((strstr(cpu_id, "Pentium") && strstr(cpu_id, " M ")) || + (strstr(cpu_id, "Celeron") && strstr(cpu_id, " M ")) || + (strstr(cpu_id, "Core") && strstr(cpu_id, "Duo")))) { + e752x_printk(KERN_INFO, "System Bus Parity not " + "supported by CPU, disabling\n"); + enable = 0; + } + + if (enable) + pci_write_config_word(dev, E752X_SYSBUS_ERRMASK, 0x0000); + else + pci_write_config_word(dev, E752X_SYSBUS_ERRMASK, 0x0309); +} + static void e752x_init_error_reporting_regs(struct e752x_pvt *pvt) { struct pci_dev *dev; dev = pvt->dev_d0f1; /* Turn off error disable & SMI in case the BIOS turned it on */ - pci_write_config_byte(dev, E752X_HI_ERRMASK, 0x00); - pci_write_config_byte(dev, E752X_HI_SMICMD, 0x00); - pci_write_config_word(dev, E752X_SYSBUS_ERRMASK, 0x00); + if (pvt->dev_info->err_dev == PCI_DEVICE_ID_INTEL_3100_1_ERR) { + pci_write_config_dword(dev, I3100_NSI_EMASK, 0); + pci_write_config_dword(dev, I3100_NSI_SMICMD, 0); + } else { + pci_write_config_byte(dev, E752X_HI_ERRMASK, 0x00); + pci_write_config_byte(dev, E752X_HI_SMICMD, 0x00); + } + + e752x_init_sysbus_parity_mask(pvt); + pci_write_config_word(dev, E752X_SYSBUS_SMICMD, 0x00); pci_write_config_byte(dev, E752X_BUF_ERRMASK, 0x00); pci_write_config_byte(dev, E752X_BUF_SMICMD, 0x00); @@ -949,16 +1117,6 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) debugf0("%s(): mci\n", __func__); debugf0("Starting Probe1\n"); - /* make sure error reporting method is sane */ - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_NMI: - break; - default: - edac_op_state = EDAC_OPSTATE_POLL; - break; - } - /* check to see if device 0 function 1 is enabled; if it isn't, we * assume the BIOS has reserved it for a reason and is expecting * exclusive access, we take care not to violate that assumption and @@ -985,8 +1143,9 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) debugf3("%s(): init mci\n", __func__); mci->mtype_cap = MEM_FLAG_RDDR; - mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED | - EDAC_FLAG_S4ECD4ED; + /* 3100 IMCH supports SECDEC only */ + mci->edac_ctl_cap = (dev_idx == I3100) ? EDAC_FLAG_SECDED : + (EDAC_FLAG_NONE | EDAC_FLAG_SECDED | EDAC_FLAG_S4ECD4ED); /* FIXME - what if different memory types are in different csrows? */ mci->mod_name = EDAC_MOD_STR; mci->mod_ver = E752X_REVISION; @@ -1018,7 +1177,10 @@ static int e752x_probe1(struct pci_dev *pdev, int dev_idx) e752x_init_csrows(mci, pdev, ddrcsr); e752x_init_mem_map_table(pdev, pvt); - mci->edac_cap |= EDAC_FLAG_NONE; + if (dev_idx == I3100) + mci->edac_cap = EDAC_FLAG_SECDED; /* the only mode supported */ + else + mci->edac_cap |= EDAC_FLAG_NONE; debugf3("%s(): tolm, remapbase, remaplimit\n", __func__); /* load the top of low memory, remap base, and remap limit vars */ @@ -1110,6 +1272,9 @@ static const struct pci_device_id e752x_pci_tbl[] __devinitdata = { PCI_VEND_DEV(INTEL, 7320_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, E7320}, { + PCI_VEND_DEV(INTEL, 3100_0), PCI_ANY_ID, PCI_ANY_ID, 0, 0, + I3100}, + { 0, } /* 0 terminated list. */ }; @@ -1128,6 +1293,10 @@ static int __init e752x_init(void) int pci_rc; debugf3("%s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&e752x_driver); return (pci_rc < 0) ? pci_rc : 0; } @@ -1143,10 +1312,15 @@ module_exit(e752x_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman\n"); -MODULE_DESCRIPTION("MC support for Intel e752x memory controllers"); +MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers"); module_param(force_function_unhide, int, 0444); MODULE_PARM_DESC(force_function_unhide, "if BIOS sets Dev0:Fun1 up as hidden:" " 1=force unhide and hope BIOS doesn't fight driver for Dev0:Fun1 access"); + module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + +module_param(sysbus_parity, int, 0444); +MODULE_PARM_DESC(sysbus_parity, "0=disable system bus parity checking," + " 1=enable system bus parity checking, default=auto-detect"); diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 96ecc492664..c7d11cc4e21 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -414,16 +414,6 @@ static int e7xxx_probe1(struct pci_dev *pdev, int dev_idx) debugf0("%s(): mci\n", __func__); - /* make sure error reporting method is sane */ - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_NMI: - break; - default: - edac_op_state = EDAC_OPSTATE_POLL; - break; - } - pci_read_config_dword(pdev, E7XXX_DRC, &drc); drc_chan = dual_channel_active(drc, dev_idx); @@ -565,6 +555,9 @@ static struct pci_driver e7xxx_driver = { static int __init e7xxx_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&e7xxx_driver); } diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index b9552bc03de..63372fa7ecf 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -36,7 +36,7 @@ * is protected by the 'device_ctls_mutex' lock */ static DEFINE_MUTEX(device_ctls_mutex); -static struct list_head edac_device_list = LIST_HEAD_INIT(edac_device_list); +static LIST_HEAD(edac_device_list); #ifdef CONFIG_EDAC_DEBUG static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) @@ -375,37 +375,6 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info wait_for_completion(&edac_device->removal_complete); } -/** - * edac_device_find - * Search for a edac_device_ctl_info structure whose index is 'idx'. - * - * If found, return a pointer to the structure. - * Else return NULL. - * - * Caller must hold device_ctls_mutex. - */ -struct edac_device_ctl_info *edac_device_find(int idx) -{ - struct list_head *item; - struct edac_device_ctl_info *edac_dev; - - /* Iterate over list, looking for exact match of ID */ - list_for_each(item, &edac_device_list) { - edac_dev = list_entry(item, struct edac_device_ctl_info, link); - - if (edac_dev->dev_idx >= idx) { - if (edac_dev->dev_idx == idx) - return edac_dev; - - /* not on list, so terminate early */ - break; - } - } - - return NULL; -} -EXPORT_SYMBOL_GPL(edac_device_find); - /* * edac_device_workq_function * performs the operation scheduled by a workq request diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 063a1bffe38..a4cf1645f58 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -36,7 +36,7 @@ /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); -static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); +static LIST_HEAD(mc_devices); #ifdef CONFIG_EDAC_DEBUG @@ -886,24 +886,3 @@ void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, mci->csrows[csrow].channels[channel].ce_count++; } EXPORT_SYMBOL(edac_mc_handle_fbd_ce); - -/* - * Iterate over all MC instances and check for ECC, et al, errors - */ -void edac_check_mc_devices(void) -{ - struct list_head *item; - struct mem_ctl_info *mci; - - debugf3("%s()\n", __func__); - mutex_lock(&mem_ctls_mutex); - - list_for_each(item, &mc_devices) { - mci = list_entry(item, struct mem_ctl_info, link); - - if (mci->edac_check != NULL) - mci->edac_check(mci); - } - - mutex_unlock(&mem_ctls_mutex); -} diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index cbc419c8ebc..233d4798c3a 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -27,7 +27,6 @@ extern int edac_mc_register_sysfs_main_kobj(struct mem_ctl_info *mci); extern void edac_mc_unregister_sysfs_main_kobj(struct mem_ctl_info *mci); extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci); extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci); -extern void edac_check_mc_devices(void); extern int edac_get_log_ue(void); extern int edac_get_log_ce(void); extern int edac_get_panic_on_ue(void); diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 32be43576a8..9b24340b52e 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -29,7 +29,7 @@ #include "edac_module.h" static DEFINE_MUTEX(edac_pci_ctls_mutex); -static struct list_head edac_pci_list = LIST_HEAD_INIT(edac_pci_list); +static LIST_HEAD(edac_pci_list); /* * edac_pci_alloc_ctl_info @@ -189,6 +189,9 @@ static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci) wait_for_completion(&pci->complete); } +#if 0 +/* Older code, but might use in the future */ + /* * edac_pci_find() * Search for an edac_pci_ctl_info structure whose index is 'idx' @@ -219,6 +222,7 @@ struct edac_pci_ctl_info *edac_pci_find(int idx) return NULL; } EXPORT_SYMBOL_GPL(edac_pci_find); +#endif /* * edac_pci_workq_function() @@ -422,7 +426,7 @@ EXPORT_SYMBOL_GPL(edac_pci_del_device); * * a Generic parity check API */ -void edac_pci_generic_check(struct edac_pci_ctl_info *pci) +static void edac_pci_generic_check(struct edac_pci_ctl_info *pci) { debugf4("%s()\n", __func__); edac_pci_do_parity_check(); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 71c3195d370..2c1fa1bb6df 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -37,17 +37,17 @@ int edac_pci_get_check_errors(void) return check_pci_errors; } -int edac_pci_get_log_pe(void) +static int edac_pci_get_log_pe(void) { return edac_pci_log_pe; } -int edac_pci_get_log_npe(void) +static int edac_pci_get_log_npe(void) { return edac_pci_log_npe; } -int edac_pci_get_panic_on_pe(void) +static int edac_pci_get_panic_on_pe(void) { return edac_pci_panic_on_pe; } @@ -197,7 +197,8 @@ error_out: * * unregister the kobj for the EDAC PCI instance */ -void edac_pci_unregister_sysfs_instance_kobj(struct edac_pci_ctl_info *pci) +static void edac_pci_unregister_sysfs_instance_kobj( + struct edac_pci_ctl_info *pci) { debugf0("%s()\n", __func__); @@ -337,7 +338,7 @@ static struct kobj_type ktype_edac_pci_main_kobj = { * setup the sysfs for EDAC PCI attributes * assumes edac_class has already been initialized */ -int edac_pci_main_kobj_setup(void) +static int edac_pci_main_kobj_setup(void) { int err; struct sysdev_class *edac_class; diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index 5d4292811c1..6c9a0f2a593 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -326,15 +326,6 @@ static int i3000_probe1(struct pci_dev *pdev, int dev_idx) return -ENODEV; } - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_NMI: - break; - default: - edac_op_state = EDAC_OPSTATE_POLL; - break; - } - c0dra[0] = readb(window + I3000_C0DRA + 0); /* ranks 0,1 */ c0dra[1] = readb(window + I3000_C0DRA + 1); /* ranks 2,3 */ c1dra[0] = readb(window + I3000_C1DRA + 0); /* ranks 0,1 */ @@ -503,6 +494,10 @@ static int __init i3000_init(void) int pci_rc; debugf3("MC: %s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&i3000_driver); if (pci_rc < 0) goto fail0; diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 5a852017c17..4a16b5b61cf 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1286,16 +1286,6 @@ static int i5000_probe1(struct pci_dev *pdev, int dev_idx) if (PCI_FUNC(pdev->devfn) != 0) return -ENODEV; - /* make sure error reporting method is sane */ - switch (edac_op_state) { - case EDAC_OPSTATE_POLL: - case EDAC_OPSTATE_NMI: - break; - default: - edac_op_state = EDAC_OPSTATE_POLL; - break; - } - /* Ask the devices for the number of CSROWS and CHANNELS so * that we can calculate the memory resources, etc * @@ -1478,6 +1468,9 @@ static int __init i5000_init(void) debugf2("MC: " __FILE__ ": %s()\n", __func__); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&i5000_driver); return (pci_rc < 0) ? pci_rc : 0; @@ -1501,5 +1494,6 @@ MODULE_AUTHOR ("Linux Networx (http://lnxi.com) Doug Thompson <norsk5@xmission.com>"); MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " I5000_REVISION); + module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index 83bfe37c4bb..c5305e3ee43 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -29,6 +29,7 @@ #include <linux/slab.h> +#include <linux/edac.h> #include "edac_core.h" #define I82443_REVISION "0.1" @@ -386,6 +387,9 @@ static struct pci_driver i82443bxgx_edacmc_driver = { static int __init i82443bxgx_edacmc_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&i82443bxgx_edacmc_driver); } @@ -400,3 +404,6 @@ module_exit(i82443bxgx_edacmc_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD"); MODULE_DESCRIPTION("EDAC MC support for Intel 82443BX/GX memory controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index f5ecd2c4d81..c0088ba9672 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -14,6 +14,7 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> +#include <linux/edac.h> #include "edac_core.h" #define I82860_REVISION " Ver: 2.0.2 " __DATE__ @@ -294,6 +295,9 @@ static int __init i82860_init(void) debugf3("%s()\n", __func__); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + if ((pci_rc = pci_register_driver(&i82860_driver)) < 0) goto fail0; @@ -345,3 +349,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) " "Ben Woodard <woodard@redhat.com>"); MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index 031abadc439..e43bdc43a1b 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -18,6 +18,7 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> +#include <linux/edac.h> #include "edac_core.h" #define I82875P_REVISION " Ver: 2.0.2 " __DATE__ @@ -393,6 +394,7 @@ static int i82875p_probe1(struct pci_dev *pdev, int dev_idx) struct i82875p_error_info discard; debugf0("%s()\n", __func__); + ovrfl_pdev = pci_get_device(PCI_VEND_DEV(INTEL, 82875_6), NULL); if (i82875p_setup_overfl_dev(pdev, &ovrfl_pdev, &ovrfl_window)) @@ -532,6 +534,10 @@ static int __init i82875p_init(void) int pci_rc; debugf3("%s()\n", __func__); + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&i82875p_driver); if (pci_rc < 0) @@ -586,3 +592,6 @@ module_exit(i82875p_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh"); MODULE_DESCRIPTION("MC support for Intel 82875 memory hub controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 0ee88845693..2eed3ea2cf6 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -14,7 +14,7 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> - +#include <linux/edac.h> #include "edac_core.h" #define I82975X_REVISION " Ver: 1.0.0 " __DATE__ @@ -611,6 +611,9 @@ static int __init i82975x_init(void) debugf3("%s()\n", __func__); + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + pci_rc = pci_register_driver(&i82975x_driver); if (pci_rc < 0) goto fail0; @@ -664,3 +667,6 @@ module_exit(i82975x_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arvind R. <arvind@acarlab.com>"); MODULE_DESCRIPTION("MC support for Intel 82975 memory hub controllers"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c index 90320917be2..3fd65a56384 100644 --- a/drivers/edac/pasemi_edac.c +++ b/drivers/edac/pasemi_edac.c @@ -284,6 +284,9 @@ static struct pci_driver pasemi_edac_driver = { static int __init pasemi_edac_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&pasemi_edac_driver); } @@ -298,3 +301,6 @@ module_exit(pasemi_edac_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>"); MODULE_DESCRIPTION("MC support for PA Semi PWRficient memory controller"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index e25f712f2dc..9900675e959 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -20,6 +20,7 @@ #include <linux/pci.h> #include <linux/pci_ids.h> #include <linux/slab.h> +#include <linux/edac.h> #include "edac_core.h" #define R82600_REVISION " Ver: 2.0.2 " __DATE__ @@ -393,6 +394,9 @@ static struct pci_driver r82600_driver = { static int __init r82600_init(void) { + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + return pci_register_driver(&r82600_driver); } @@ -412,3 +416,6 @@ MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers"); module_param(disable_hardware_scrub, bool, 0644); MODULE_PARM_DESC(disable_hardware_scrub, "If set, disable the chipset's automatic scrub for CEs"); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 40ffd767647..dc2cec6127d 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -17,6 +17,15 @@ config EDD obscure configurations. Most disk controller BIOS vendors do not yet implement this feature. +config EDD_OFF + bool "Sets default behavior for EDD detection to off" + depends on EDD + default n + help + Say Y if you want EDD disabled by default, even though it is compiled into the + kernel. Say N if you want EDD enabled by default. EDD can be dynamically set + using the kernel parameter 'edd={on|skipmbr|off}'. + config EFI_VARS tristate "EFI Variable Support via sysfs" depends on EFI diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index f235940719e..25918f7dfd0 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -63,7 +63,7 @@ static void smi_data_buf_free(void) return; dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n", - __FUNCTION__, smi_data_buf_phys_addr, smi_data_buf_size); + __func__, smi_data_buf_phys_addr, smi_data_buf_size); dma_free_coherent(&dcdbas_pdev->dev, smi_data_buf_size, smi_data_buf, smi_data_buf_handle); @@ -92,7 +92,7 @@ static int smi_data_buf_realloc(unsigned long size) if (!buf) { dev_dbg(&dcdbas_pdev->dev, "%s: failed to allocate memory size %lu\n", - __FUNCTION__, size); + __func__, size); return -ENOMEM; } /* memory zeroed by dma_alloc_coherent */ @@ -110,7 +110,7 @@ static int smi_data_buf_realloc(unsigned long size) smi_data_buf_size = size; dev_dbg(&dcdbas_pdev->dev, "%s: phys: %x size: %lu\n", - __FUNCTION__, smi_data_buf_phys_addr, smi_data_buf_size); + __func__, smi_data_buf_phys_addr, smi_data_buf_size); return 0; } @@ -258,7 +258,7 @@ static int smi_request(struct smi_cmd *smi_cmd) if (smi_cmd->magic != SMI_CMD_MAGIC) { dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n", - __FUNCTION__); + __func__); return -EBADR; } @@ -267,7 +267,7 @@ static int smi_request(struct smi_cmd *smi_cmd) set_cpus_allowed_ptr(current, &cpumask_of_cpu(0)); if (smp_processor_id() != 0) { dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n", - __FUNCTION__); + __func__); ret = -EBUSY; goto out; } @@ -428,7 +428,7 @@ static int host_control_smi(void) default: dev_dbg(&dcdbas_pdev->dev, "%s: invalid SMI type %u\n", - __FUNCTION__, host_control_smi_type); + __func__, host_control_smi_type); return -ENOSYS; } @@ -456,13 +456,13 @@ static void dcdbas_host_control(void) host_control_action = HC_ACTION_NONE; if (!smi_data_buf) { - dev_dbg(&dcdbas_pdev->dev, "%s: no SMI buffer\n", __FUNCTION__); + dev_dbg(&dcdbas_pdev->dev, "%s: no SMI buffer\n", __func__); return; } if (smi_data_buf_size < sizeof(struct apm_cmd)) { dev_dbg(&dcdbas_pdev->dev, "%s: SMI buffer too small\n", - __FUNCTION__); + __func__); return; } diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c index 477a3d0e3ca..6a8b1e037e0 100644 --- a/drivers/firmware/dell_rbu.c +++ b/drivers/firmware/dell_rbu.c @@ -123,7 +123,7 @@ static int create_packet(void *data, size_t length) if (!newpacket) { printk(KERN_WARNING "dell_rbu:%s: failed to allocate new " - "packet\n", __FUNCTION__); + "packet\n", __func__); retval = -ENOMEM; spin_lock(&rbu_data.lock); goto out_noalloc; @@ -152,7 +152,7 @@ static int create_packet(void *data, size_t length) printk(KERN_WARNING "dell_rbu:%s: failed to allocate " "invalid_addr_packet_array \n", - __FUNCTION__); + __func__); retval = -ENOMEM; spin_lock(&rbu_data.lock); goto out_alloc_packet; @@ -164,7 +164,7 @@ static int create_packet(void *data, size_t length) if (!packet_data_temp_buf) { printk(KERN_WARNING "dell_rbu:%s: failed to allocate new " - "packet\n", __FUNCTION__); + "packet\n", __func__); retval = -ENOMEM; spin_lock(&rbu_data.lock); goto out_alloc_packet_array; @@ -416,7 +416,7 @@ static int img_update_realloc(unsigned long size) */ if ((size != 0) && (rbu_data.image_update_buffer == NULL)) { printk(KERN_ERR "dell_rbu:%s: corruption " - "check failed\n", __FUNCTION__); + "check failed\n", __func__); return -EINVAL; } /* @@ -642,7 +642,7 @@ static ssize_t write_rbu_image_type(struct kobject *kobj, if (req_firm_rc) { printk(KERN_ERR "dell_rbu:%s request_firmware_nowait" - " failed %d\n", __FUNCTION__, rc); + " failed %d\n", __func__, rc); rc = -EIO; } else rbu_data.entry_created = 1; @@ -718,7 +718,7 @@ static int __init dcdrbu_init(void) if (IS_ERR(rbu_device)) { printk(KERN_ERR "dell_rbu:%s:platform_device_register_simple " - "failed\n", __FUNCTION__); + "failed\n", __func__); return PTR_ERR(rbu_device); } diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e03c67dd3e6..f43d6d3cf2f 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -606,7 +606,7 @@ static u8 *fetch_item(__u8 *start, __u8 *end, struct hid_item *item) case 2: if ((end - start) < 2) return NULL; - item->data.u16 = le16_to_cpu(get_unaligned((__le16*)start)); + item->data.u16 = get_unaligned_le16(start); start = (__u8 *)((__le16 *)start + 1); return start; @@ -614,7 +614,7 @@ static u8 *fetch_item(__u8 *start, __u8 *end, struct hid_item *item) item->size++; if ((end - start) < 4) return NULL; - item->data.u32 = le32_to_cpu(get_unaligned((__le32*)start)); + item->data.u32 = get_unaligned_le32(start); start = (__u8 *)((__le32 *)start + 1); return start; } @@ -765,7 +765,7 @@ static __inline__ __u32 extract(__u8 *report, unsigned offset, unsigned n) report += offset >> 3; /* adjust byte index */ offset &= 7; /* now only need bit offset into one byte */ - x = le64_to_cpu(get_unaligned((__le64 *) report)); + x = get_unaligned_le64(report); x = (x >> offset) & ((1ULL << n) - 1); /* extract bit field */ return (u32) x; } diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index fe9df38f62c..68e7f19dc03 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -782,7 +782,7 @@ static ide_startstop_t cdrom_start_seek_continuation(ide_drive_t *drive) sector_div(frame, queue_hardsect_size(drive->queue) >> SECTOR_BITS); - memset(rq->cmd, 0, sizeof(rq->cmd)); + memset(rq->cmd, 0, BLK_MAX_CDB); rq->cmd[0] = GPCMD_SEEK; put_unaligned(cpu_to_be32(frame), (unsigned int *) &rq->cmd[2]); @@ -1694,7 +1694,7 @@ static int ide_cdrom_prep_fs(struct request_queue *q, struct request *rq) long block = (long)rq->hard_sector / (hard_sect >> 9); unsigned long blocks = rq->hard_nr_sectors / (hard_sect >> 9); - memset(rq->cmd, 0, sizeof(rq->cmd)); + memset(rq->cmd, 0, BLK_MAX_CDB); if (rq_data_dir(rq) == READ) rq->cmd[0] = GPCMD_READ_10; diff --git a/drivers/ide/ide-cd_verbose.c b/drivers/ide/ide-cd_verbose.c index 6ed7ca07133..6490a2dea96 100644 --- a/drivers/ide/ide-cd_verbose.c +++ b/drivers/ide/ide-cd_verbose.c @@ -326,7 +326,7 @@ void ide_cd_log_error(const char *name, struct request *failed_command, printk(KERN_ERR " The failed \"%s\" packet command " "was: \n \"", s); - for (i = 0; i < sizeof(failed_command->cmd); i++) + for (i = 0; i < BLK_MAX_CDB; i++) printk(KERN_CONT "%02x ", failed_command->cmd[i]); printk(KERN_CONT "\"\n"); } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 788783da902..696525342e9 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -1550,8 +1550,7 @@ irqreturn_t ide_intr (int irq, void *dev_id) void ide_init_drive_cmd (struct request *rq) { - memset(rq, 0, sizeof(*rq)); - rq->ref_count = 1; + blk_rq_init(NULL, rq); } EXPORT_SYMBOL(ide_init_drive_cmd); diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index 7b2f3815a83..8d6ad812a01 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -822,6 +822,7 @@ static int ide_drivers_open(struct inode *inode, struct file *file) } static const struct file_operations ide_drivers_operations = { + .owner = THIS_MODULE, .open = ide_drivers_open, .read = seq_read, .llseek = seq_lseek, @@ -830,16 +831,12 @@ static const struct file_operations ide_drivers_operations = { void proc_ide_create(void) { - struct proc_dir_entry *entry; - proc_ide_root = proc_mkdir("ide", NULL); if (!proc_ide_root) return; - entry = create_proc_entry("drivers", 0, proc_ide_root); - if (entry) - entry->proc_fops = &ide_drivers_operations; + proc_create("drivers", 0, proc_ide_root, &ide_drivers_operations); } void proc_ide_destroy(void) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 54a43b04460..1e1f26331a2 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -662,7 +662,7 @@ static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc) static void idetape_init_rq(struct request *rq, u8 cmd) { - memset(rq, 0, sizeof(*rq)); + blk_rq_init(NULL, rq); rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd[0] = cmd; } diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 9a846a0cd5a..0c908ca3ff7 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -494,8 +494,7 @@ int ide_raw_taskfile(ide_drive_t *drive, ide_task_t *task, u8 *buf, u16 nsect) { struct request rq; - memset(&rq, 0, sizeof(rq)); - rq.ref_count = 1; + blk_rq_init(NULL, &rq); rq.cmd_type = REQ_TYPE_ATA_TASKFILE; rq.buffer = buf; diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 999584c03d9..c758dcb13b1 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -564,7 +564,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t mesg) if (!(drive->dn % 2)) ide_acpi_get_timing(hwif); - memset(&rq, 0, sizeof(rq)); + blk_rq_init(NULL, &rq); memset(&rqpm, 0, sizeof(rqpm)); memset(&args, 0, sizeof(args)); rq.cmd_type = REQ_TYPE_PM_SUSPEND; @@ -602,7 +602,7 @@ static int generic_ide_resume(struct device *dev) ide_acpi_exec_tfs(drive); - memset(&rq, 0, sizeof(rq)); + blk_rq_init(NULL, &rq); memset(&rqpm, 0, sizeof(rqpm)); memset(&args, 0, sizeof(args)); rq.cmd_type = REQ_TYPE_PM_RESUME; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 4e3128ff73c..fe78f7d2509 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -38,6 +38,7 @@ #include <linux/dma-mapping.h> #include <linux/sched.h> #include <linux/hugetlb.h> +#include <linux/dma-attrs.h> #include "uverbs.h" @@ -72,9 +73,10 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d * @addr: userspace virtual address to start at * @size: length of region to pin * @access: IB_ACCESS_xxx flags for memory being pinned + * @dmasync: flush in-flight DMA when the memory region is written */ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, - size_t size, int access) + size_t size, int access, int dmasync) { struct ib_umem *umem; struct page **page_list; @@ -87,6 +89,10 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, int ret; int off; int i; + DEFINE_DMA_ATTRS(attrs); + + if (dmasync) + dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); if (!can_do_mlock()) return ERR_PTR(-EPERM); @@ -174,10 +180,11 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0); } - chunk->nmap = ib_dma_map_sg(context->device, - &chunk->page_list[0], - chunk->nents, - DMA_BIDIRECTIONAL); + chunk->nmap = ib_dma_map_sg_attrs(context->device, + &chunk->page_list[0], + chunk->nents, + DMA_BIDIRECTIONAL, + &attrs); if (chunk->nmap <= 0) { for (i = 0; i < chunk->nents; ++i) put_page(sg_page(&chunk->page_list[i])); diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 6af2c0f79a6..2acf9b62cf9 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -452,7 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(-ENOMEM); c2mr->pd = c2pd; - c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc); + c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(c2mr->umem)) { err = PTR_ERR(c2mr->umem); kfree(c2mr); diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index ab4695c1dd5..e343e9e6484 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -602,7 +602,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mhp) return ERR_PTR(-ENOMEM); - mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc); + mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(mhp->umem)) { err = PTR_ERR(mhp->umem); kfree(mhp); diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 46ae4eb2c4e..f974367cad4 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -323,7 +323,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } e_mr->umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags); + mr_access_flags, 0); if (IS_ERR(e_mr->umem)) { ib_mr = (void *)e_mr->umem; goto reg_user_mr_exit1; diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index db4ba92f79f..9d343b7c2f3 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -195,7 +195,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto bail; } - umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags); + umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); if (IS_ERR(umem)) return (void *) umem; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 5e570bb0bb6..e3dddfc687f 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -137,7 +137,7 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont int err; *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), - IB_ACCESS_LOCAL_WRITE); + IB_ACCESS_LOCAL_WRITE, 1); if (IS_ERR(*umem)) return PTR_ERR(*umem); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 8e342cc9bae..8aee4233b38 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -63,7 +63,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, page->user_virt = (virt & PAGE_MASK); page->refcnt = 0; page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, - PAGE_SIZE, 0); + PAGE_SIZE, 0, 0); if (IS_ERR(page->umem)) { err = PTR_ERR(page->umem); kfree(page); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index fe2c2e94a5f..68e92485fc7 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -132,7 +132,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags); + mr->umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 80ea8b9e776..8e02ecfec18 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -482,7 +482,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err; qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - qp->buf_size, 0); + qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 204619702f9..12d6bc6f800 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -109,7 +109,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, } srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, - buf_size, 0); + buf_size, 0, 0); if (IS_ERR(srq->umem)) { err = PTR_ERR(srq->umem); goto err_srq; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 696e1f30233..2a9f460cf06 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1006,17 +1006,23 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct mthca_dev *dev = to_mdev(pd->device); struct ib_umem_chunk *chunk; struct mthca_mr *mr; + struct mthca_reg_mr ucmd; u64 *pages; int shift, n, len; int i, j, k; int err = 0; int write_mtt_size; + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + mr = kmalloc(sizeof *mr, GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - mr->umem = ib_umem_get(pd->uobject->context, start, length, acc); + mr->umem = ib_umem_get(pd->uobject->context, start, length, acc, + ucmd.mr_attrs & MTHCA_MR_DMASYNC); + if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err; diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h index 02cc0a766f3..f8cb3b664d3 100644 --- a/drivers/infiniband/hw/mthca/mthca_user.h +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -41,7 +41,7 @@ * Increment this value if any changes that break userspace ABI * compatibility are made. */ -#define MTHCA_UVERBS_ABI_VERSION 1 +#define MTHCA_UVERBS_ABI_VERSION 2 /* * Make sure that all structs defined in this file remain laid out so @@ -61,6 +61,14 @@ struct mthca_alloc_pd_resp { __u32 reserved; }; +struct mthca_reg_mr { + __u32 mr_attrs; +#define MTHCA_MR_DMASYNC 0x1 +/* mark the memory region with a DMA attribute that causes + * in-flight DMA to be flushed when the region is written to */ + __u32 reserved; +}; + struct mthca_create_cq { __u32 lkey; __u32 pdn; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index ee74f7c7a6d..9ae397a0ff7 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -2377,7 +2377,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u8 single_page = 1; u8 stag_key; - region = ib_umem_get(pd->uobject->context, start, length, acc); + region = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(region)) { return (struct ib_mr *)region; } diff --git a/drivers/input/input.c b/drivers/input/input.c index f02c242c311..27006fc1830 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -898,30 +898,26 @@ static int __init input_proc_init(void) { struct proc_dir_entry *entry; - proc_bus_input_dir = proc_mkdir("input", proc_bus); + proc_bus_input_dir = proc_mkdir("bus/input", NULL); if (!proc_bus_input_dir) return -ENOMEM; proc_bus_input_dir->owner = THIS_MODULE; - entry = create_proc_entry("devices", 0, proc_bus_input_dir); + entry = proc_create("devices", 0, proc_bus_input_dir, + &input_devices_fileops); if (!entry) goto fail1; - entry->owner = THIS_MODULE; - entry->proc_fops = &input_devices_fileops; - - entry = create_proc_entry("handlers", 0, proc_bus_input_dir); + entry = proc_create("handlers", 0, proc_bus_input_dir, + &input_handlers_fileops); if (!entry) goto fail2; - entry->owner = THIS_MODULE; - entry->proc_fops = &input_handlers_fileops; - return 0; fail2: remove_proc_entry("devices", proc_bus_input_dir); - fail1: remove_proc_entry("input", proc_bus); + fail1: remove_proc_entry("bus/input", NULL); return -ENOMEM; } @@ -929,7 +925,7 @@ static void input_proc_exit(void) { remove_proc_entry("devices", proc_bus_input_dir); remove_proc_entry("handlers", proc_bus_input_dir); - remove_proc_entry("input", proc_bus); + remove_proc_entry("bus/input", NULL); } #else /* !CONFIG_PROC_FS */ diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index 1d759f6f807..55c1134d613 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -528,9 +528,9 @@ static void aiptek_irq(struct urb *urb) (aiptek->curSetting.pointerMode)) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED; } else { - x = le16_to_cpu(get_unaligned((__le16 *) (data + 1))); - y = le16_to_cpu(get_unaligned((__le16 *) (data + 3))); - z = le16_to_cpu(get_unaligned((__le16 *) (data + 6))); + x = get_unaligned_le16(data + 1); + y = get_unaligned_le16(data + 3); + z = get_unaligned_le16(data + 6); dv = (data[5] & 0x01) != 0 ? 1 : 0; p = (data[5] & 0x02) != 0 ? 1 : 0; @@ -613,8 +613,8 @@ static void aiptek_irq(struct urb *urb) (aiptek->curSetting.pointerMode)) { aiptek->diagnostic = AIPTEK_DIAGNOSTIC_TOOL_DISALLOWED; } else { - x = le16_to_cpu(get_unaligned((__le16 *) (data + 1))); - y = le16_to_cpu(get_unaligned((__le16 *) (data + 3))); + x = get_unaligned_le16(data + 1); + y = get_unaligned_le16(data + 3); jitterable = data[5] & 0x1c; @@ -679,7 +679,7 @@ static void aiptek_irq(struct urb *urb) pck = (data[1] & aiptek->curSetting.stylusButtonUpper) != 0 ? 1 : 0; macro = dv && p && tip && !(data[3] & 1) ? (data[3] >> 1) : -1; - z = le16_to_cpu(get_unaligned((__le16 *) (data + 4))); + z = get_unaligned_le16(data + 4); if (dv) { /* If the selected tool changed, reset the old @@ -757,7 +757,7 @@ static void aiptek_irq(struct urb *urb) * hat switches (which just so happen to be the macroKeys.) */ else if (data[0] == 6) { - macro = le16_to_cpu(get_unaligned((__le16 *) (data + 1))); + macro = get_unaligned_le16(data + 1); if (macro > 0) { input_report_key(inputdev, macroKeyEvents[macro - 1], 0); @@ -952,7 +952,7 @@ aiptek_query(struct aiptek *aiptek, unsigned char command, unsigned char data) buf[0], buf[1], buf[2]); ret = -EIO; } else { - ret = le16_to_cpu(get_unaligned((__le16 *) (buf + 1))); + ret = get_unaligned_le16(buf + 1); } kfree(buf); return ret; diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index f66ca215cde..c5a8661a1ba 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -245,11 +245,11 @@ static void parse_hid_report_descriptor(struct gtco *device, char * report, data = report[i]; break; case 2: - data16 = le16_to_cpu(get_unaligned((__le16 *)&report[i])); + data16 = get_unaligned_le16(&report[i]); break; case 3: size = 4; - data32 = le32_to_cpu(get_unaligned((__le32 *)&report[i])); + data32 = get_unaligned_le32(&report[i]); break; } @@ -695,10 +695,10 @@ static void gtco_urb_callback(struct urb *urbinfo) /* Fall thru */ case 1: /* All reports have X and Y coords in the same place */ - val = le16_to_cpu(get_unaligned((__le16 *)&device->buffer[1])); + val = get_unaligned_le16(&device->buffer[1]); input_report_abs(inputdev, ABS_X, val); - val = le16_to_cpu(get_unaligned((__le16 *)&device->buffer[3])); + val = get_unaligned_le16(&device->buffer[3]); input_report_abs(inputdev, ABS_Y, val); /* Ditto for proximity bit */ @@ -762,7 +762,7 @@ static void gtco_urb_callback(struct urb *urbinfo) le_buffer[1] = (u8)(device->buffer[4] >> 1); le_buffer[1] |= (u8)((device->buffer[5] & 0x1) << 7); - val = le16_to_cpu(get_unaligned((__le16 *)le_buffer)); + val = get_unaligned_le16(le_buffer); input_report_abs(inputdev, ABS_Y, val); /* @@ -772,10 +772,10 @@ static void gtco_urb_callback(struct urb *urbinfo) buttonbyte = device->buffer[5] >> 1; } else { - val = le16_to_cpu(get_unaligned((__le16 *)&device->buffer[1])); + val = get_unaligned_le16(&device->buffer[1]); input_report_abs(inputdev, ABS_X, val); - val = le16_to_cpu(get_unaligned((__le16 *)&device->buffer[3])); + val = get_unaligned_le16(&device->buffer[3]); input_report_abs(inputdev, ABS_Y, val); buttonbyte = device->buffer[5]; diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c index 1182fc13316..f23f5a97fb3 100644 --- a/drivers/input/tablet/kbtab.c +++ b/drivers/input/tablet/kbtab.c @@ -63,8 +63,8 @@ static void kbtab_irq(struct urb *urb) goto exit; } - kbtab->x = le16_to_cpu(get_unaligned((__le16 *) &data[1])); - kbtab->y = le16_to_cpu(get_unaligned((__le16 *) &data[3])); + kbtab->x = get_unaligned_le16(&data[1]); + kbtab->y = get_unaligned_le16(&data[3]); kbtab->pressure = (data[5]); diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c index 845a797b003..c29208bd752 100644 --- a/drivers/isdn/capi/kcapi_proc.c +++ b/drivers/isdn/capi/kcapi_proc.c @@ -114,6 +114,7 @@ static int seq_contrstats_open(struct inode *inode, struct file *file) } static const struct file_operations proc_controller_ops = { + .owner = THIS_MODULE, .open = seq_controller_open, .read = seq_read, .llseek = seq_lseek, @@ -121,6 +122,7 @@ static const struct file_operations proc_controller_ops = { }; static const struct file_operations proc_contrstats_ops = { + .owner = THIS_MODULE, .open = seq_contrstats_open, .read = seq_read, .llseek = seq_lseek, @@ -219,6 +221,7 @@ seq_applstats_open(struct inode *inode, struct file *file) } static const struct file_operations proc_applications_ops = { + .owner = THIS_MODULE, .open = seq_applications_open, .read = seq_read, .llseek = seq_lseek, @@ -226,21 +229,13 @@ static const struct file_operations proc_applications_ops = { }; static const struct file_operations proc_applstats_ops = { + .owner = THIS_MODULE, .open = seq_applstats_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; -static void -create_seq_entry(char *name, mode_t mode, const struct file_operations *f) -{ - struct proc_dir_entry *entry; - entry = create_proc_entry(name, mode, NULL); - if (entry) - entry->proc_fops = f; -} - // --------------------------------------------------------------------------- static void *capi_driver_start(struct seq_file *seq, loff_t *pos) @@ -283,6 +278,7 @@ seq_capi_driver_open(struct inode *inode, struct file *file) } static const struct file_operations proc_driver_ops = { + .owner = THIS_MODULE, .open = seq_capi_driver_open, .read = seq_read, .llseek = seq_lseek, @@ -296,11 +292,11 @@ kcapi_proc_init(void) { proc_mkdir("capi", NULL); proc_mkdir("capi/controllers", NULL); - create_seq_entry("capi/controller", 0, &proc_controller_ops); - create_seq_entry("capi/contrstats", 0, &proc_contrstats_ops); - create_seq_entry("capi/applications", 0, &proc_applications_ops); - create_seq_entry("capi/applstats", 0, &proc_applstats_ops); - create_seq_entry("capi/driver", 0, &proc_driver_ops); + proc_create("capi/controller", 0, NULL, &proc_controller_ops); + proc_create("capi/contrstats", 0, NULL, &proc_contrstats_ops); + proc_create("capi/applications", 0, NULL, &proc_applications_ops); + proc_create("capi/applstats", 0, NULL, &proc_applstats_ops); + proc_create("capi/driver", 0, NULL, &proc_driver_ops); } void __exit diff --git a/drivers/isdn/divert/divert_procfs.c b/drivers/isdn/divert/divert_procfs.c index 4fd4c46892e..8b256a617c8 100644 --- a/drivers/isdn/divert/divert_procfs.c +++ b/drivers/isdn/divert/divert_procfs.c @@ -288,13 +288,12 @@ divert_dev_init(void) isdn_proc_entry = proc_mkdir("isdn", init_net.proc_net); if (!isdn_proc_entry) return (-1); - isdn_divert_entry = create_proc_entry("divert", S_IFREG | S_IRUGO, isdn_proc_entry); + isdn_divert_entry = proc_create("divert", S_IFREG | S_IRUGO, + isdn_proc_entry, &isdn_fops); if (!isdn_divert_entry) { remove_proc_entry("isdn", init_net.proc_net); return (-1); } - isdn_divert_entry->proc_fops = &isdn_fops; - isdn_divert_entry->owner = THIS_MODULE; #endif /* CONFIG_PROC_FS */ return (0); diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c index 0632a260699..fae895828a1 100644 --- a/drivers/isdn/hardware/eicon/divasproc.c +++ b/drivers/isdn/hardware/eicon/divasproc.c @@ -125,15 +125,11 @@ static const struct file_operations divas_fops = { int create_divas_proc(void) { - divas_proc_entry = create_proc_entry(divas_proc_name, - S_IFREG | S_IRUGO, - proc_net_eicon); + proc_create(divas_proc_name, S_IFREG | S_IRUGO, proc_net_eicon, + &divas_fops); if (!divas_proc_entry) return (0); - divas_proc_entry->proc_fops = &divas_fops; - divas_proc_entry->owner = THIS_MODULE; - return (1); } diff --git a/drivers/isdn/hysdn/hysdn_procconf.c b/drivers/isdn/hysdn/hysdn_procconf.c index 27d890b48f8..877be9922c3 100644 --- a/drivers/isdn/hysdn/hysdn_procconf.c +++ b/drivers/isdn/hysdn/hysdn_procconf.c @@ -370,6 +370,7 @@ hysdn_conf_close(struct inode *ino, struct file *filep) /******************************************************/ static const struct file_operations conf_fops = { + .owner = THIS_MODULE, .llseek = no_llseek, .read = hysdn_conf_read, .write = hysdn_conf_write, @@ -402,11 +403,9 @@ hysdn_procconf_init(void) while (card) { sprintf(conf_name, "%s%d", PROC_CONF_BASENAME, card->myid); - if ((card->procconf = (void *) create_proc_entry(conf_name, - S_IFREG | S_IRUGO | S_IWUSR, - hysdn_proc_entry)) != NULL) { - ((struct proc_dir_entry *) card->procconf)->proc_fops = &conf_fops; - ((struct proc_dir_entry *) card->procconf)->owner = THIS_MODULE; + if ((card->procconf = (void *) proc_create(conf_name, + S_IFREG | S_IRUGO | S_IWUSR, + hysdn_proc_entry)) != NULL) { hysdn_proclog_init(card); /* init the log file entry */ } card = card->next; /* next entry */ diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c index 27b3991fb0e..8991d2c8ee4 100644 --- a/drivers/isdn/hysdn/hysdn_proclog.c +++ b/drivers/isdn/hysdn/hysdn_proclog.c @@ -380,6 +380,7 @@ hysdn_log_poll(struct file *file, poll_table * wait) /**************************************************/ static const struct file_operations log_fops = { + .owner = THIS_MODULE, .llseek = no_llseek, .read = hysdn_log_read, .write = hysdn_log_write, @@ -402,10 +403,9 @@ hysdn_proclog_init(hysdn_card * card) if ((pd = kzalloc(sizeof(struct procdata), GFP_KERNEL)) != NULL) { sprintf(pd->log_name, "%s%d", PROC_LOG_BASENAME, card->myid); - if ((pd->log = create_proc_entry(pd->log_name, S_IFREG | S_IRUGO | S_IWUSR, hysdn_proc_entry)) != NULL) { - pd->log->proc_fops = &log_fops; - pd->log->owner = THIS_MODULE; - } + pd->log = proc_create(pd->log_name, + S_IFREG | S_IRUGO | S_IWUSR, hysdn_proc_entry, + &log_fops); init_waitqueue_head(&(pd->rd_queue)); diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index ac05a928f76..b3c54be7455 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -105,7 +105,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev) led_cdev->dev = device_create(leds_class, parent, 0, "%s", led_cdev->name); - if (unlikely(IS_ERR(led_cdev->dev))) + if (IS_ERR(led_cdev->dev)) return PTR_ERR(led_cdev->dev); dev_set_drvdata(led_cdev->dev, led_cdev); diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig index 77f50b63a97..b52659620d5 100644 --- a/drivers/macintosh/Kconfig +++ b/drivers/macintosh/Kconfig @@ -234,6 +234,14 @@ config WINDFARM_PM112 which are the recent dual and quad G5 machines using the 970MP dual-core processor. +config WINDFARM_PM121 + tristate "Support for thermal management on PowerMac12,1" + depends on WINDFARM && I2C && PMAC_SMU + select I2C_POWERMAC + help + This driver provides thermal control for the PowerMac12,1 + which is the iMac G5 (iSight). + config ANSLCD tristate "Support for ANS LCD display" depends on ADB_CUDA && PPC_PMAC diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile index 2dfc3f4eaf4..e3132efa17c 100644 --- a/drivers/macintosh/Makefile +++ b/drivers/macintosh/Makefile @@ -42,4 +42,9 @@ obj-$(CONFIG_WINDFARM_PM112) += windfarm_pm112.o windfarm_smu_sat.o \ windfarm_smu_sensors.o \ windfarm_max6690_sensor.o \ windfarm_lm75_sensor.o windfarm_pid.o +obj-$(CONFIG_WINDFARM_PM121) += windfarm_pm121.o windfarm_smu_sat.o \ + windfarm_smu_controls.o \ + windfarm_smu_sensors.o \ + windfarm_max6690_sensor.o \ + windfarm_lm75_sensor.o windfarm_pid.o obj-$(CONFIG_PMAC_RACKMETER) += rack-meter.o diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index 7e10c3ab4d5..b92b959fe16 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -127,6 +127,12 @@ static struct wf_lm75_sensor *wf_lm75_create(struct i2c_adapter *adapter, */ if (!strcmp(loc, "Hard drive") || !strcmp(loc, "DRIVE BAY")) lm->sens.name = "hd-temp"; + else if (!strcmp(loc, "Incoming Air Temp")) + lm->sens.name = "incoming-air-temp"; + else if (!strcmp(loc, "ODD Temp")) + lm->sens.name = "optical-drive-temp"; + else if (!strcmp(loc, "HD Temp")) + lm->sens.name = "hard-drive-temp"; else goto fail; diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c index 5f03aab9fb5..e207a90d6b2 100644 --- a/drivers/macintosh/windfarm_max6690_sensor.c +++ b/drivers/macintosh/windfarm_max6690_sensor.c @@ -77,18 +77,28 @@ static struct wf_sensor_ops wf_max6690_ops = { .owner = THIS_MODULE, }; -static void wf_max6690_create(struct i2c_adapter *adapter, u8 addr) +static void wf_max6690_create(struct i2c_adapter *adapter, u8 addr, + const char *loc) { struct wf_6690_sensor *max; - char *name = "backside-temp"; + char *name; max = kzalloc(sizeof(struct wf_6690_sensor), GFP_KERNEL); if (max == NULL) { printk(KERN_ERR "windfarm: Couldn't create MAX6690 sensor %s: " - "no memory\n", name); + "no memory\n", loc); return; } + if (!strcmp(loc, "BACKSIDE")) + name = "backside-temp"; + else if (!strcmp(loc, "NB Ambient")) + name = "north-bridge-temp"; + else if (!strcmp(loc, "GPU Ambient")) + name = "gpu-temp"; + else + goto fail; + max->sens.ops = &wf_max6690_ops; max->sens.name = name; max->i2c.addr = addr >> 1; @@ -138,9 +148,7 @@ static int wf_max6690_attach(struct i2c_adapter *adapter) if (loc == NULL || addr == 0) continue; printk("found max6690, loc=%s addr=0x%02x\n", loc, addr); - if (strcmp(loc, "BACKSIDE")) - continue; - wf_max6690_create(adapter, addr); + wf_max6690_create(adapter, addr, loc); } return 0; diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c new file mode 100644 index 00000000000..66ec4fb115b --- /dev/null +++ b/drivers/macintosh/windfarm_pm121.c @@ -0,0 +1,1040 @@ +/* + * Windfarm PowerMac thermal control. iMac G5 iSight + * + * (c) Copyright 2007 Étienne Bersac <bersace@gmail.com> + * + * Bits & pieces from windfarm_pm81.c by (c) Copyright 2005 Benjamin + * Herrenschmidt, IBM Corp. <benh@kernel.crashing.org> + * + * Released under the term of the GNU GPL v2. + * + * + * + * PowerMac12,1 + * ============ + * + * + * The algorithm used is the PID control algorithm, used the same way + * the published Darwin code does, using the same values that are + * present in the Darwin 8.10 snapshot property lists (note however + * that none of the code has been re-used, it's a complete + * re-implementation + * + * There is two models using PowerMac12,1. Model 2 is iMac G5 iSight + * 17" while Model 3 is iMac G5 20". They do have both the same + * controls with a tiny difference. The control-ids of hard-drive-fan + * and cpu-fan is swapped. + * + * + * Target Correction : + * + * controls have a target correction calculated as : + * + * new_min = ((((average_power * slope) >> 16) + offset) >> 16) + min_value + * new_value = max(new_value, max(new_min, 0)) + * + * OD Fan control correction. + * + * # model_id: 2 + * offset : -19563152 + * slope : 1956315 + * + * # model_id: 3 + * offset : -15650652 + * slope : 1565065 + * + * HD Fan control correction. + * + * # model_id: 2 + * offset : -15650652 + * slope : 1565065 + * + * # model_id: 3 + * offset : -19563152 + * slope : 1956315 + * + * CPU Fan control correction. + * + * # model_id: 2 + * offset : -25431900 + * slope : 2543190 + * + * # model_id: 3 + * offset : -15650652 + * slope : 1565065 + * + * + * Target rubber-banding : + * + * Some controls have a target correction which depends on another + * control value. The correction is computed in the following way : + * + * new_min = ref_value * slope + offset + * + * ref_value is the value of the reference control. If new_min is + * greater than 0, then we correct the target value using : + * + * new_target = max (new_target, new_min >> 16) + * + * + * # model_id : 2 + * control : cpu-fan + * ref : optical-drive-fan + * offset : -15650652 + * slope : 1565065 + * + * # model_id : 3 + * control : optical-drive-fan + * ref : hard-drive-fan + * offset : -32768000 + * slope : 65536 + * + * + * In order to have the moste efficient correction with those + * dependencies, we must trigger HD loop before OD loop before CPU + * loop. + * + * + * The various control loops found in Darwin config file are: + * + * HD Fan control loop. + * + * # model_id: 2 + * control : hard-drive-fan + * sensor : hard-drive-temp + * PID params : G_d = 0x00000000 + * G_p = 0x002D70A3 + * G_r = 0x00019999 + * History = 2 entries + * Input target = 0x370000 + * Interval = 5s + * + * # model_id: 3 + * control : hard-drive-fan + * sensor : hard-drive-temp + * PID params : G_d = 0x00000000 + * G_p = 0x002170A3 + * G_r = 0x00019999 + * History = 2 entries + * Input target = 0x370000 + * Interval = 5s + * + * OD Fan control loop. + * + * # model_id: 2 + * control : optical-drive-fan + * sensor : optical-drive-temp + * PID params : G_d = 0x00000000 + * G_p = 0x001FAE14 + * G_r = 0x00019999 + * History = 2 entries + * Input target = 0x320000 + * Interval = 5s + * + * # model_id: 3 + * control : optical-drive-fan + * sensor : optical-drive-temp + * PID params : G_d = 0x00000000 + * G_p = 0x001FAE14 + * G_r = 0x00019999 + * History = 2 entries + * Input target = 0x320000 + * Interval = 5s + * + * GPU Fan control loop. + * + * # model_id: 2 + * control : hard-drive-fan + * sensor : gpu-temp + * PID params : G_d = 0x00000000 + * G_p = 0x002A6666 + * G_r = 0x00019999 + * History = 2 entries + * Input target = 0x5A0000 + * Interval = 5s + * + * # model_id: 3 + * control : cpu-fan + * sensor : gpu-temp + * PID params : G_d = 0x00000000 + * G_p = 0x0010CCCC + * G_r = 0x00019999 + * History = 2 entries + * Input target = 0x500000 + * Interval = 5s + * + * KODIAK (aka northbridge) Fan control loop. + * + * # model_id: 2 + * control : optical-drive-fan + * sensor : north-bridge-temp + * PID params : G_d = 0x00000000 + * G_p = 0x003BD70A + * G_r = 0x00019999 + * History = 2 entries + * Input target = 0x550000 + * Interval = 5s + * + * # model_id: 3 + * control : hard-drive-fan + * sensor : north-bridge-temp + * PID params : G_d = 0x00000000 + * G_p = 0x0030F5C2 + * G_r = 0x00019999 + * History = 2 entries + * Input target = 0x550000 + * Interval = 5s + * + * CPU Fan control loop. + * + * control : cpu-fan + * sensors : cpu-temp, cpu-power + * PID params : from SDB partition + * + * + * CPU Slew control loop. + * + * control : cpufreq-clamp + * sensor : cpu-temp + * + */ + +#undef DEBUG + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/wait.h> +#include <linux/kmod.h> +#include <linux/device.h> +#include <linux/platform_device.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/sections.h> +#include <asm/smu.h> + +#include "windfarm.h" +#include "windfarm_pid.h" + +#define VERSION "0.3" + +static int pm121_mach_model; /* machine model id */ + +/* Controls & sensors */ +static struct wf_sensor *sensor_cpu_power; +static struct wf_sensor *sensor_cpu_temp; +static struct wf_sensor *sensor_cpu_voltage; +static struct wf_sensor *sensor_cpu_current; +static struct wf_sensor *sensor_gpu_temp; +static struct wf_sensor *sensor_north_bridge_temp; +static struct wf_sensor *sensor_hard_drive_temp; +static struct wf_sensor *sensor_optical_drive_temp; +static struct wf_sensor *sensor_incoming_air_temp; /* unused ! */ + +enum { + FAN_CPU, + FAN_HD, + FAN_OD, + CPUFREQ, + N_CONTROLS +}; +static struct wf_control *controls[N_CONTROLS] = {}; + +/* Set to kick the control loop into life */ +static int pm121_all_controls_ok, pm121_all_sensors_ok, pm121_started; + +enum { + FAILURE_FAN = 1 << 0, + FAILURE_SENSOR = 1 << 1, + FAILURE_OVERTEMP = 1 << 2 +}; + +/* All sys loops. Note the HD before the OD loop in order to have it + run before. */ +enum { + LOOP_GPU, /* control = hd or cpu, but luckily, + it doesn't matter */ + LOOP_HD, /* control = hd */ + LOOP_KODIAK, /* control = hd or od */ + LOOP_OD, /* control = od */ + N_LOOPS +}; + +static const char *loop_names[N_LOOPS] = { + "GPU", + "HD", + "KODIAK", + "OD", +}; + +#define PM121_NUM_CONFIGS 2 + +static unsigned int pm121_failure_state; +static int pm121_readjust, pm121_skipping; +static s32 average_power; + +struct pm121_correction { + int offset; + int slope; +}; + +static struct pm121_correction corrections[N_CONTROLS][PM121_NUM_CONFIGS] = { + /* FAN_OD */ + { + /* MODEL 2 */ + { .offset = -19563152, + .slope = 1956315 + }, + /* MODEL 3 */ + { .offset = -15650652, + .slope = 1565065 + }, + }, + /* FAN_HD */ + { + /* MODEL 2 */ + { .offset = -15650652, + .slope = 1565065 + }, + /* MODEL 3 */ + { .offset = -19563152, + .slope = 1956315 + }, + }, + /* FAN_CPU */ + { + /* MODEL 2 */ + { .offset = -25431900, + .slope = 2543190 + }, + /* MODEL 3 */ + { .offset = -15650652, + .slope = 1565065 + }, + }, + /* CPUFREQ has no correction (and is not implemented at all) */ +}; + +struct pm121_connection { + unsigned int control_id; + unsigned int ref_id; + struct pm121_correction correction; +}; + +static struct pm121_connection pm121_connections[] = { + /* MODEL 2 */ + { .control_id = FAN_CPU, + .ref_id = FAN_OD, + { .offset = -32768000, + .slope = 65536 + } + }, + /* MODEL 3 */ + { .control_id = FAN_OD, + .ref_id = FAN_HD, + { .offset = -32768000, + .slope = 65536 + } + }, +}; + +/* pointer to the current model connection */ +static struct pm121_connection *pm121_connection; + +/* + * ****** System Fans Control Loop ****** + * + */ + +/* Since each loop handles only one control and we want to avoid + * writing virtual control, we store the control correction with the + * loop params. Some data are not set, there are common to all loop + * and thus, hardcoded. + */ +struct pm121_sys_param { + /* purely informative since we use mach_model-2 as index */ + int model_id; + struct wf_sensor **sensor; /* use sensor_id instead ? */ + s32 gp, itarget; + unsigned int control_id; +}; + +static struct pm121_sys_param +pm121_sys_all_params[N_LOOPS][PM121_NUM_CONFIGS] = { + /* GPU Fan control loop */ + { + { .model_id = 2, + .sensor = &sensor_gpu_temp, + .gp = 0x002A6666, + .itarget = 0x5A0000, + .control_id = FAN_HD, + }, + { .model_id = 3, + .sensor = &sensor_gpu_temp, + .gp = 0x0010CCCC, + .itarget = 0x500000, + .control_id = FAN_CPU, + }, + }, + /* HD Fan control loop */ + { + { .model_id = 2, + .sensor = &sensor_hard_drive_temp, + .gp = 0x002D70A3, + .itarget = 0x370000, + .control_id = FAN_HD, + }, + { .model_id = 3, + .sensor = &sensor_hard_drive_temp, + .gp = 0x002170A3, + .itarget = 0x370000, + .control_id = FAN_HD, + }, + }, + /* KODIAK Fan control loop */ + { + { .model_id = 2, + .sensor = &sensor_north_bridge_temp, + .gp = 0x003BD70A, + .itarget = 0x550000, + .control_id = FAN_OD, + }, + { .model_id = 3, + .sensor = &sensor_north_bridge_temp, + .gp = 0x0030F5C2, + .itarget = 0x550000, + .control_id = FAN_HD, + }, + }, + /* OD Fan control loop */ + { + { .model_id = 2, + .sensor = &sensor_optical_drive_temp, + .gp = 0x001FAE14, + .itarget = 0x320000, + .control_id = FAN_OD, + }, + { .model_id = 3, + .sensor = &sensor_optical_drive_temp, + .gp = 0x001FAE14, + .itarget = 0x320000, + .control_id = FAN_OD, + }, + }, +}; + +/* the hardcoded values */ +#define PM121_SYS_GD 0x00000000 +#define PM121_SYS_GR 0x00019999 +#define PM121_SYS_HISTORY_SIZE 2 +#define PM121_SYS_INTERVAL 5 + +/* State data used by the system fans control loop + */ +struct pm121_sys_state { + int ticks; + s32 setpoint; + struct wf_pid_state pid; +}; + +struct pm121_sys_state *pm121_sys_state[N_LOOPS] = {}; + +/* + * ****** CPU Fans Control Loop ****** + * + */ + +#define PM121_CPU_INTERVAL 1 + +/* State data used by the cpu fans control loop + */ +struct pm121_cpu_state { + int ticks; + s32 setpoint; + struct wf_cpu_pid_state pid; +}; + +static struct pm121_cpu_state *pm121_cpu_state; + + + +/* + * ***** Implementation ***** + * + */ + +/* correction the value using the output-low-bound correction algo */ +static s32 pm121_correct(s32 new_setpoint, + unsigned int control_id, + s32 min) +{ + s32 new_min; + struct pm121_correction *correction; + correction = &corrections[control_id][pm121_mach_model - 2]; + + new_min = (average_power * correction->slope) >> 16; + new_min += correction->offset; + new_min = (new_min >> 16) + min; + + return max(new_setpoint, max(new_min, 0)); +} + +static s32 pm121_connect(unsigned int control_id, s32 setpoint) +{ + s32 new_min, value, new_setpoint; + + if (pm121_connection->control_id == control_id) { + controls[control_id]->ops->get_value(controls[control_id], + &value); + new_min = value * pm121_connection->correction.slope; + new_min += pm121_connection->correction.offset; + if (new_min > 0) { + new_setpoint = max(setpoint, (new_min >> 16)); + if (new_setpoint != setpoint) { + pr_debug("pm121: %s depending on %s, " + "corrected from %d to %d RPM\n", + controls[control_id]->name, + controls[pm121_connection->ref_id]->name, + (int) setpoint, (int) new_setpoint); + } + } else + new_setpoint = setpoint; + } + /* no connection */ + else + new_setpoint = setpoint; + + return new_setpoint; +} + +/* FAN LOOPS */ +static void pm121_create_sys_fans(int loop_id) +{ + struct pm121_sys_param *param = NULL; + struct wf_pid_param pid_param; + struct wf_control *control = NULL; + int i; + + /* First, locate the params for this model */ + for (i = 0; i < PM121_NUM_CONFIGS; i++) { + if (pm121_sys_all_params[loop_id][i].model_id == pm121_mach_model) { + param = &(pm121_sys_all_params[loop_id][i]); + break; + } + } + + /* No params found, put fans to max */ + if (param == NULL) { + printk(KERN_WARNING "pm121: %s fan config not found " + " for this machine model\n", + loop_names[loop_id]); + goto fail; + } + + control = controls[param->control_id]; + + /* Alloc & initialize state */ + pm121_sys_state[loop_id] = kmalloc(sizeof(struct pm121_sys_state), + GFP_KERNEL); + if (pm121_sys_state[loop_id] == NULL) { + printk(KERN_WARNING "pm121: Memory allocation error\n"); + goto fail; + } + pm121_sys_state[loop_id]->ticks = 1; + + /* Fill PID params */ + pid_param.gd = PM121_SYS_GD; + pid_param.gp = param->gp; + pid_param.gr = PM121_SYS_GR; + pid_param.interval = PM121_SYS_INTERVAL; + pid_param.history_len = PM121_SYS_HISTORY_SIZE; + pid_param.itarget = param->itarget; + pid_param.min = control->ops->get_min(control); + pid_param.max = control->ops->get_max(control); + + wf_pid_init(&pm121_sys_state[loop_id]->pid, &pid_param); + + pr_debug("pm121: %s Fan control loop initialized.\n" + " itarged=%d.%03d, min=%d RPM, max=%d RPM\n", + loop_names[loop_id], FIX32TOPRINT(pid_param.itarget), + pid_param.min, pid_param.max); + return; + + fail: + /* note that this is not optimal since another loop may still + control the same control */ + printk(KERN_WARNING "pm121: failed to set up %s loop " + "setting \"%s\" to max speed.\n", + loop_names[loop_id], control->name); + + if (control) + wf_control_set_max(control); +} + +static void pm121_sys_fans_tick(int loop_id) +{ + struct pm121_sys_param *param; + struct pm121_sys_state *st; + struct wf_sensor *sensor; + struct wf_control *control; + s32 temp, new_setpoint; + int rc; + + param = &(pm121_sys_all_params[loop_id][pm121_mach_model-2]); + st = pm121_sys_state[loop_id]; + sensor = *(param->sensor); + control = controls[param->control_id]; + + if (--st->ticks != 0) { + if (pm121_readjust) + goto readjust; + return; + } + st->ticks = PM121_SYS_INTERVAL; + + rc = sensor->ops->get_value(sensor, &temp); + if (rc) { + printk(KERN_WARNING "windfarm: %s sensor error %d\n", + sensor->name, rc); + pm121_failure_state |= FAILURE_SENSOR; + return; + } + + pr_debug("pm121: %s Fan tick ! %s: %d.%03d\n", + loop_names[loop_id], sensor->name, + FIX32TOPRINT(temp)); + + new_setpoint = wf_pid_run(&st->pid, temp); + + /* correction */ + new_setpoint = pm121_correct(new_setpoint, + param->control_id, + st->pid.param.min); + /* linked corretion */ + new_setpoint = pm121_connect(param->control_id, new_setpoint); + + if (new_setpoint == st->setpoint) + return; + st->setpoint = new_setpoint; + pr_debug("pm121: %s corrected setpoint: %d RPM\n", + control->name, (int)new_setpoint); + readjust: + if (control && pm121_failure_state == 0) { + rc = control->ops->set_value(control, st->setpoint); + if (rc) { + printk(KERN_WARNING "windfarm: %s fan error %d\n", + control->name, rc); + pm121_failure_state |= FAILURE_FAN; + } + } +} + + +/* CPU LOOP */ +static void pm121_create_cpu_fans(void) +{ + struct wf_cpu_pid_param pid_param; + const struct smu_sdbp_header *hdr; + struct smu_sdbp_cpupiddata *piddata; + struct smu_sdbp_fvt *fvt; + struct wf_control *fan_cpu; + s32 tmax, tdelta, maxpow, powadj; + + fan_cpu = controls[FAN_CPU]; + + /* First, locate the PID params in SMU SBD */ + hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL); + if (hdr == 0) { + printk(KERN_WARNING "pm121: CPU PID fan config not found.\n"); + goto fail; + } + piddata = (struct smu_sdbp_cpupiddata *)&hdr[1]; + + /* Get the FVT params for operating point 0 (the only supported one + * for now) in order to get tmax + */ + hdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); + if (hdr) { + fvt = (struct smu_sdbp_fvt *)&hdr[1]; + tmax = ((s32)fvt->maxtemp) << 16; + } else + tmax = 0x5e0000; /* 94 degree default */ + + /* Alloc & initialize state */ + pm121_cpu_state = kmalloc(sizeof(struct pm121_cpu_state), + GFP_KERNEL); + if (pm121_cpu_state == NULL) + goto fail; + pm121_cpu_state->ticks = 1; + + /* Fill PID params */ + pid_param.interval = PM121_CPU_INTERVAL; + pid_param.history_len = piddata->history_len; + if (pid_param.history_len > WF_CPU_PID_MAX_HISTORY) { + printk(KERN_WARNING "pm121: History size overflow on " + "CPU control loop (%d)\n", piddata->history_len); + pid_param.history_len = WF_CPU_PID_MAX_HISTORY; + } + pid_param.gd = piddata->gd; + pid_param.gp = piddata->gp; + pid_param.gr = piddata->gr / pid_param.history_len; + + tdelta = ((s32)piddata->target_temp_delta) << 16; + maxpow = ((s32)piddata->max_power) << 16; + powadj = ((s32)piddata->power_adj) << 16; + + pid_param.tmax = tmax; + pid_param.ttarget = tmax - tdelta; + pid_param.pmaxadj = maxpow - powadj; + + pid_param.min = fan_cpu->ops->get_min(fan_cpu); + pid_param.max = fan_cpu->ops->get_max(fan_cpu); + + wf_cpu_pid_init(&pm121_cpu_state->pid, &pid_param); + + pr_debug("pm121: CPU Fan control initialized.\n"); + pr_debug(" ttarged=%d.%03d, tmax=%d.%03d, min=%d RPM, max=%d RPM,\n", + FIX32TOPRINT(pid_param.ttarget), FIX32TOPRINT(pid_param.tmax), + pid_param.min, pid_param.max); + + return; + + fail: + printk(KERN_WARNING "pm121: CPU fan config not found, max fan speed\n"); + + if (controls[CPUFREQ]) + wf_control_set_max(controls[CPUFREQ]); + if (fan_cpu) + wf_control_set_max(fan_cpu); +} + + +static void pm121_cpu_fans_tick(struct pm121_cpu_state *st) +{ + s32 new_setpoint, temp, power; + struct wf_control *fan_cpu = NULL; + int rc; + + if (--st->ticks != 0) { + if (pm121_readjust) + goto readjust; + return; + } + st->ticks = PM121_CPU_INTERVAL; + + fan_cpu = controls[FAN_CPU]; + + rc = sensor_cpu_temp->ops->get_value(sensor_cpu_temp, &temp); + if (rc) { + printk(KERN_WARNING "pm121: CPU temp sensor error %d\n", + rc); + pm121_failure_state |= FAILURE_SENSOR; + return; + } + + rc = sensor_cpu_power->ops->get_value(sensor_cpu_power, &power); + if (rc) { + printk(KERN_WARNING "pm121: CPU power sensor error %d\n", + rc); + pm121_failure_state |= FAILURE_SENSOR; + return; + } + + pr_debug("pm121: CPU Fans tick ! CPU temp: %d.%03d°C, power: %d.%03d\n", + FIX32TOPRINT(temp), FIX32TOPRINT(power)); + + if (temp > st->pid.param.tmax) + pm121_failure_state |= FAILURE_OVERTEMP; + + new_setpoint = wf_cpu_pid_run(&st->pid, power, temp); + + /* correction */ + new_setpoint = pm121_correct(new_setpoint, + FAN_CPU, + st->pid.param.min); + + /* connected correction */ + new_setpoint = pm121_connect(FAN_CPU, new_setpoint); + + if (st->setpoint == new_setpoint) + return; + st->setpoint = new_setpoint; + pr_debug("pm121: CPU corrected setpoint: %d RPM\n", (int)new_setpoint); + + readjust: + if (fan_cpu && pm121_failure_state == 0) { + rc = fan_cpu->ops->set_value(fan_cpu, st->setpoint); + if (rc) { + printk(KERN_WARNING "pm121: %s fan error %d\n", + fan_cpu->name, rc); + pm121_failure_state |= FAILURE_FAN; + } + } +} + +/* + * ****** Common ****** + * + */ + +static void pm121_tick(void) +{ + unsigned int last_failure = pm121_failure_state; + unsigned int new_failure; + s32 total_power; + int i; + + if (!pm121_started) { + pr_debug("pm121: creating control loops !\n"); + for (i = 0; i < N_LOOPS; i++) + pm121_create_sys_fans(i); + + pm121_create_cpu_fans(); + pm121_started = 1; + } + + /* skipping ticks */ + if (pm121_skipping && --pm121_skipping) + return; + + /* compute average power */ + total_power = 0; + for (i = 0; i < pm121_cpu_state->pid.param.history_len; i++) + total_power += pm121_cpu_state->pid.powers[i]; + + average_power = total_power / pm121_cpu_state->pid.param.history_len; + + + pm121_failure_state = 0; + for (i = 0 ; i < N_LOOPS; i++) { + if (pm121_sys_state[i]) + pm121_sys_fans_tick(i); + } + + if (pm121_cpu_state) + pm121_cpu_fans_tick(pm121_cpu_state); + + pm121_readjust = 0; + new_failure = pm121_failure_state & ~last_failure; + + /* If entering failure mode, clamp cpufreq and ramp all + * fans to full speed. + */ + if (pm121_failure_state && !last_failure) { + for (i = 0; i < N_CONTROLS; i++) { + if (controls[i]) + wf_control_set_max(controls[i]); + } + } + + /* If leaving failure mode, unclamp cpufreq and readjust + * all fans on next iteration + */ + if (!pm121_failure_state && last_failure) { + if (controls[CPUFREQ]) + wf_control_set_min(controls[CPUFREQ]); + pm121_readjust = 1; + } + + /* Overtemp condition detected, notify and start skipping a couple + * ticks to let the temperature go down + */ + if (new_failure & FAILURE_OVERTEMP) { + wf_set_overtemp(); + pm121_skipping = 2; + } + + /* We only clear the overtemp condition if overtemp is cleared + * _and_ no other failure is present. Since a sensor error will + * clear the overtemp condition (can't measure temperature) at + * the control loop levels, but we don't want to keep it clear + * here in this case + */ + if (new_failure == 0 && last_failure & FAILURE_OVERTEMP) + wf_clear_overtemp(); +} + + +static struct wf_control* pm121_register_control(struct wf_control *ct, + const char *match, + unsigned int id) +{ + if (controls[id] == NULL && !strcmp(ct->name, match)) { + if (wf_get_control(ct) == 0) + controls[id] = ct; + } + return controls[id]; +} + +static void pm121_new_control(struct wf_control *ct) +{ + int all = 1; + + if (pm121_all_controls_ok) + return; + + all = pm121_register_control(ct, "optical-drive-fan", FAN_OD) && all; + all = pm121_register_control(ct, "hard-drive-fan", FAN_HD) && all; + all = pm121_register_control(ct, "cpu-fan", FAN_CPU) && all; + all = pm121_register_control(ct, "cpufreq-clamp", CPUFREQ) && all; + + if (all) + pm121_all_controls_ok = 1; +} + + + + +static struct wf_sensor* pm121_register_sensor(struct wf_sensor *sensor, + const char *match, + struct wf_sensor **var) +{ + if (*var == NULL && !strcmp(sensor->name, match)) { + if (wf_get_sensor(sensor) == 0) + *var = sensor; + } + return *var; +} + +static void pm121_new_sensor(struct wf_sensor *sr) +{ + int all = 1; + + if (pm121_all_sensors_ok) + return; + + all = pm121_register_sensor(sr, "cpu-temp", + &sensor_cpu_temp) && all; + all = pm121_register_sensor(sr, "cpu-current", + &sensor_cpu_current) && all; + all = pm121_register_sensor(sr, "cpu-voltage", + &sensor_cpu_voltage) && all; + all = pm121_register_sensor(sr, "cpu-power", + &sensor_cpu_power) && all; + all = pm121_register_sensor(sr, "hard-drive-temp", + &sensor_hard_drive_temp) && all; + all = pm121_register_sensor(sr, "optical-drive-temp", + &sensor_optical_drive_temp) && all; + all = pm121_register_sensor(sr, "incoming-air-temp", + &sensor_incoming_air_temp) && all; + all = pm121_register_sensor(sr, "north-bridge-temp", + &sensor_north_bridge_temp) && all; + all = pm121_register_sensor(sr, "gpu-temp", + &sensor_gpu_temp) && all; + + if (all) + pm121_all_sensors_ok = 1; +} + + + +static int pm121_notify(struct notifier_block *self, + unsigned long event, void *data) +{ + switch (event) { + case WF_EVENT_NEW_CONTROL: + pr_debug("pm121: new control %s detected\n", + ((struct wf_control *)data)->name); + pm121_new_control(data); + break; + case WF_EVENT_NEW_SENSOR: + pr_debug("pm121: new sensor %s detected\n", + ((struct wf_sensor *)data)->name); + pm121_new_sensor(data); + break; + case WF_EVENT_TICK: + if (pm121_all_controls_ok && pm121_all_sensors_ok) + pm121_tick(); + break; + } + + return 0; +} + +static struct notifier_block pm121_events = { + .notifier_call = pm121_notify, +}; + +static int pm121_init_pm(void) +{ + const struct smu_sdbp_header *hdr; + + hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL); + if (hdr != 0) { + struct smu_sdbp_sensortree *st = + (struct smu_sdbp_sensortree *)&hdr[1]; + pm121_mach_model = st->model_id; + } + + pm121_connection = &pm121_connections[pm121_mach_model - 2]; + + printk(KERN_INFO "pm121: Initializing for iMac G5 iSight model ID %d\n", + pm121_mach_model); + + return 0; +} + + +static int pm121_probe(struct platform_device *ddev) +{ + wf_register_client(&pm121_events); + + return 0; +} + +static int __devexit pm121_remove(struct platform_device *ddev) +{ + wf_unregister_client(&pm121_events); + return 0; +} + +static struct platform_driver pm121_driver = { + .probe = pm121_probe, + .remove = __devexit_p(pm121_remove), + .driver = { + .name = "windfarm", + .bus = &platform_bus_type, + }, +}; + + +static int __init pm121_init(void) +{ + int rc = -ENODEV; + + if (machine_is_compatible("PowerMac12,1")) + rc = pm121_init_pm(); + + if (rc == 0) { + request_module("windfarm_smu_controls"); + request_module("windfarm_smu_sensors"); + request_module("windfarm_smu_sat"); + request_module("windfarm_lm75_sensor"); + request_module("windfarm_max6690_sensor"); + request_module("windfarm_cpufreq_clamp"); + platform_driver_register(&pm121_driver); + } + + return rc; +} + +static void __exit pm121_exit(void) +{ + + platform_driver_unregister(&pm121_driver); +} + + +module_init(pm121_init); +module_exit(pm121_exit); + +MODULE_AUTHOR("Étienne Bersac <bersace@gmail.com>"); +MODULE_DESCRIPTION("Thermal control logic for iMac G5 (iSight)"); +MODULE_LICENSE("GPL"); + diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c index 58c2590f05e..961fa0e7c2c 100644 --- a/drivers/macintosh/windfarm_smu_controls.c +++ b/drivers/macintosh/windfarm_smu_controls.c @@ -218,6 +218,10 @@ static struct smu_fan_control *smu_fan_create(struct device_node *node, fct->ctrl.name = "cpu-fan"; else if (!strcmp(l, "Hard Drive") || !strcmp(l, "Hard drive")) fct->ctrl.name = "drive-bay-fan"; + else if (!strcmp(l, "HDD Fan")) /* seen on iMac G5 iSight */ + fct->ctrl.name = "hard-drive-fan"; + else if (!strcmp(l, "ODD Fan")) /* same */ + fct->ctrl.name = "optical-drive-fan"; /* Unrecognized fan, bail out */ if (fct->ctrl.name == NULL) diff --git a/drivers/mca/mca-legacy.c b/drivers/mca/mca-legacy.c index 0c7bfa74c8e..494f0c2001f 100644 --- a/drivers/mca/mca-legacy.c +++ b/drivers/mca/mca-legacy.c @@ -282,24 +282,6 @@ void mca_set_adapter_name(int slot, char* name) EXPORT_SYMBOL(mca_set_adapter_name); /** - * mca_is_adapter_used - check if claimed by driver - * @slot: slot to check - * - * Returns 1 if the slot has been claimed by a driver - */ - -int mca_is_adapter_used(int slot) -{ - struct mca_device *mca_dev = mca_find_device_by_slot(slot); - - if(!mca_dev) - return 0; - - return mca_device_claimed(mca_dev); -} -EXPORT_SYMBOL(mca_is_adapter_used); - -/** * mca_mark_as_used - claim an MCA device * @slot: slot to claim * FIXME: should we make this threadsafe diff --git a/drivers/mca/mca-proc.c b/drivers/mca/mca-proc.c index 33d5e0820cc..81ea0d377bf 100644 --- a/drivers/mca/mca-proc.c +++ b/drivers/mca/mca-proc.c @@ -183,7 +183,7 @@ void __init mca_do_proc_init(void) struct proc_dir_entry* node = NULL; struct mca_device *mca_dev; - proc_mca = proc_mkdir("mca", &proc_root); + proc_mca = proc_mkdir("mca", NULL); create_proc_read_entry("pos",0,proc_mca,get_mca_info,NULL); create_proc_read_entry("machine",0,proc_mca,get_mca_machine_info,NULL); diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c index 6b91b9ab1d4..3ea5ad4b780 100644 --- a/drivers/md/dm-emc.c +++ b/drivers/md/dm-emc.c @@ -110,8 +110,6 @@ static struct request *get_failover_req(struct emc_handler *h, memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); rq->sense_len = 0; - memset(&rq->cmd, 0, BLK_MAX_CDB); - rq->timeout = EMC_FAILOVER_TIMEOUT; rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE; diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c index 204bf42c944..b63a0ab37c5 100644 --- a/drivers/md/dm-mpath-hp-sw.c +++ b/drivers/md/dm-mpath-hp-sw.c @@ -137,7 +137,6 @@ static struct request *hp_sw_get_request(struct dm_path *path) req->sense = h->sense; memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE); - memset(&req->cmd, 0, BLK_MAX_CDB); req->cmd[0] = START_STOP; req->cmd[4] = 1; req->cmd_len = COMMAND_SIZE(req->cmd[0]); diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c index e04eb5c697f..95e77734880 100644 --- a/drivers/md/dm-mpath-rdac.c +++ b/drivers/md/dm-mpath-rdac.c @@ -284,7 +284,6 @@ static struct request *get_rdac_req(struct rdac_handler *h, return NULL; } - memset(&rq->cmd, 0, BLK_MAX_CDB); rq->sense = h->sense; memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); rq->sense_len = 0; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 51be5334421..73326e7c54b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -873,10 +873,13 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) q->max_hw_sectors = t->limits.max_hw_sectors; q->seg_boundary_mask = t->limits.seg_boundary_mask; q->bounce_pfn = t->limits.bounce_pfn; + /* XXX: the below will probably go bug. must ensure there can be no + * concurrency on queue_flags, and use the unlocked versions... + */ if (t->limits.no_cluster) - q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER); + queue_flag_clear(QUEUE_FLAG_CLUSTER, q); else - q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER); + queue_flag_set(QUEUE_FLAG_CLUSTER, q); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 87620b705be..bb3e4b1cb77 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -282,7 +282,8 @@ static mddev_t * mddev_find(dev_t unit) kfree(new); return NULL; } - set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); + /* Can be unlocked because the queue is new: no concurrency */ + queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue); blk_queue_make_request(new->queue, md_fail_request); @@ -5947,13 +5948,9 @@ static struct notifier_block md_notifier = { static void md_geninit(void) { - struct proc_dir_entry *p; - dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); - p = create_proc_entry("mdstat", S_IRUGO, NULL); - if (p) - p->proc_fops = &md_seq_fops; + proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops); } static int __init md_init(void) diff --git a/drivers/media/video/zoran_procfs.c b/drivers/media/video/zoran_procfs.c index 328ed6e7ac6..870bc5a70e3 100644 --- a/drivers/media/video/zoran_procfs.c +++ b/drivers/media/video/zoran_procfs.c @@ -180,6 +180,7 @@ static ssize_t zoran_write(struct file *file, const char __user *buffer, } static const struct file_operations zoran_operations = { + .owner = THIS_MODULE, .open = zoran_open, .read = seq_read, .write = zoran_write, @@ -195,10 +196,8 @@ zoran_proc_init (struct zoran *zr) char name[8]; snprintf(name, 7, "zoran%d", zr->id); - if ((zr->zoran_proc = create_proc_entry(name, 0, NULL))) { - zr->zoran_proc->data = zr; - zr->zoran_proc->owner = THIS_MODULE; - zr->zoran_proc->proc_fops = &zoran_operations; + zr->zoran_proc = proc_create_data(name, 0, NULL, &zoran_operations, zr); + if (zr->zoran_proc != NULL) { dprintk(2, KERN_INFO "%s: procfs entry /proc/%s allocated. data=%p\n", diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index a9531489740..81483de8c0f 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -371,7 +371,7 @@ static int i2o_block_prep_req_fn(struct request_queue *q, struct request *req) /* connect the i2o_block_request to the request */ if (!req->special) { ireq = i2o_block_request_alloc(); - if (unlikely(IS_ERR(ireq))) { + if (IS_ERR(ireq)) { osm_debug("unable to allocate i2o_block_request!\n"); return BLKPREP_DEFER; } diff --git a/drivers/message/i2o/i2o_proc.c b/drivers/message/i2o/i2o_proc.c index 6fdd072201f..54a3016ff45 100644 --- a/drivers/message/i2o/i2o_proc.c +++ b/drivers/message/i2o/i2o_proc.c @@ -1893,13 +1893,11 @@ static int i2o_proc_create_entries(struct proc_dir_entry *dir, struct proc_dir_entry *tmp; while (i2o_pe->name) { - tmp = create_proc_entry(i2o_pe->name, i2o_pe->mode, dir); + tmp = proc_create_data(i2o_pe->name, i2o_pe->mode, dir, + i2o_pe->fops, data); if (!tmp) return -1; - tmp->data = data; - tmp->proc_fops = i2o_pe->fops; - i2o_pe++; } diff --git a/drivers/misc/hdpuftrs/hdpu_cpustate.c b/drivers/misc/hdpuftrs/hdpu_cpustate.c index 302e92418bb..ff51ab67231 100644 --- a/drivers/misc/hdpuftrs/hdpu_cpustate.c +++ b/drivers/misc/hdpuftrs/hdpu_cpustate.c @@ -210,13 +210,10 @@ static int hdpu_cpustate_probe(struct platform_device *pdev) return ret; } - proc_de = create_proc_entry("sky_cpustate", 0666, &proc_root); + proc_de = proc_create("sky_cpustate", 0666, NULL, &proc_cpustate); if (!proc_de) { printk(KERN_WARNING "sky_cpustate: " "Unable to create proc entry\n"); - } else { - proc_de->proc_fops = &proc_cpustate; - proc_de->owner = THIS_MODULE; } printk(KERN_INFO "Sky CPU State Driver v" SKY_CPUSTATE_VERSION "\n"); diff --git a/drivers/misc/hdpuftrs/hdpu_nexus.c b/drivers/misc/hdpuftrs/hdpu_nexus.c index 2fa36f7a6eb..08e26beefe6 100644 --- a/drivers/misc/hdpuftrs/hdpu_nexus.c +++ b/drivers/misc/hdpuftrs/hdpu_nexus.c @@ -102,22 +102,17 @@ static int hdpu_nexus_probe(struct platform_device *pdev) printk(KERN_ERR "sky_nexus: Could not map slot id\n"); } - hdpu_slot_id = create_proc_entry("sky_slot_id", 0666, &proc_root); + hdpu_slot_id = proc_create("sky_slot_id", 0666, NULL, &proc_slot_id); if (!hdpu_slot_id) { printk(KERN_WARNING "sky_nexus: " "Unable to create proc dir entry: sky_slot_id\n"); - } else { - hdpu_slot_id->proc_fops = &proc_slot_id; - hdpu_slot_id->owner = THIS_MODULE; } - hdpu_chassis_id = create_proc_entry("sky_chassis_id", 0666, &proc_root); - if (!hdpu_chassis_id) { + hdpu_chassis_id = proc_create("sky_chassis_id", 0666, NULL, + &proc_chassis_id); + if (!hdpu_chassis_id) printk(KERN_WARNING "sky_nexus: " "Unable to create proc dir entry: sky_chassis_id\n"); - } else { - hdpu_chassis_id->proc_fops = &proc_chassis_id; - hdpu_chassis_id->owner = THIS_MODULE; } return 0; @@ -128,8 +123,8 @@ static int hdpu_nexus_remove(struct platform_device *pdev) slot_id = -1; chassis_id = -1; - remove_proc_entry("sky_slot_id", &proc_root); - remove_proc_entry("sky_chassis_id", &proc_root); + remove_proc_entry("sky_slot_id", NULL); + remove_proc_entry("sky_chassis_id", NULL); hdpu_slot_id = 0; hdpu_chassis_id = 0; diff --git a/drivers/misc/ibmasm/command.c b/drivers/misc/ibmasm/command.c index 1a0e7978226..276d3fb6809 100644 --- a/drivers/misc/ibmasm/command.c +++ b/drivers/misc/ibmasm/command.c @@ -96,7 +96,7 @@ static inline void do_exec_command(struct service_processor *sp) { char tsbuf[32]; - dbg("%s:%d at %s\n", __FUNCTION__, __LINE__, get_timestamp(tsbuf)); + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); if (ibmasm_send_i2o_message(sp)) { sp->current_command->status = IBMASM_CMD_FAILED; @@ -119,7 +119,7 @@ void ibmasm_exec_command(struct service_processor *sp, struct command *cmd) unsigned long flags; char tsbuf[32]; - dbg("%s:%d at %s\n", __FUNCTION__, __LINE__, get_timestamp(tsbuf)); + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); spin_lock_irqsave(&sp->lock, flags); @@ -139,7 +139,7 @@ static void exec_next_command(struct service_processor *sp) unsigned long flags; char tsbuf[32]; - dbg("%s:%d at %s\n", __FUNCTION__, __LINE__, get_timestamp(tsbuf)); + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); spin_lock_irqsave(&sp->lock, flags); sp->current_command = dequeue_command(sp); diff --git a/drivers/misc/ibmasm/heartbeat.c b/drivers/misc/ibmasm/heartbeat.c index 3036e785b3e..1bc4306572a 100644 --- a/drivers/misc/ibmasm/heartbeat.c +++ b/drivers/misc/ibmasm/heartbeat.c @@ -75,9 +75,9 @@ void ibmasm_heartbeat_exit(struct service_processor *sp) { char tsbuf[32]; - dbg("%s:%d at %s\n", __FUNCTION__, __LINE__, get_timestamp(tsbuf)); + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); ibmasm_wait_for_response(sp->heartbeat, IBMASM_CMD_TIMEOUT_NORMAL); - dbg("%s:%d at %s\n", __FUNCTION__, __LINE__, get_timestamp(tsbuf)); + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); suspend_heartbeats = 1; command_put(sp->heartbeat); } @@ -88,7 +88,7 @@ void ibmasm_receive_heartbeat(struct service_processor *sp, void *message, size struct dot_command_header *header = (struct dot_command_header *)cmd->buffer; char tsbuf[32]; - dbg("%s:%d at %s\n", __FUNCTION__, __LINE__, get_timestamp(tsbuf)); + dbg("%s:%d at %s\n", __func__, __LINE__, get_timestamp(tsbuf)); if (suspend_heartbeats) return; diff --git a/drivers/misc/intel_menlow.c b/drivers/misc/intel_menlow.c index 0c0bb3093e0..80a13635240 100644 --- a/drivers/misc/intel_menlow.c +++ b/drivers/misc/intel_menlow.c @@ -175,19 +175,17 @@ static int intel_menlow_memory_add(struct acpi_device *device) goto end; } - if (cdev) { - acpi_driver_data(device) = cdev; - result = sysfs_create_link(&device->dev.kobj, - &cdev->device.kobj, "thermal_cooling"); - if (result) - goto unregister; - - result = sysfs_create_link(&cdev->device.kobj, - &device->dev.kobj, "device"); - if (result) { - sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); - goto unregister; - } + acpi_driver_data(device) = cdev; + result = sysfs_create_link(&device->dev.kobj, + &cdev->device.kobj, "thermal_cooling"); + if (result) + goto unregister; + + result = sysfs_create_link(&cdev->device.kobj, + &device->dev.kobj, "device"); + if (result) { + sysfs_remove_link(&device->dev.kobj, "thermal_cooling"); + goto unregister; } end: diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c index 05172d2613d..6f76573e7c8 100644 --- a/drivers/misc/ioc4.c +++ b/drivers/misc/ioc4.c @@ -75,7 +75,7 @@ ioc4_register_submodule(struct ioc4_submodule *is) printk(KERN_WARNING "%s: IOC4 submodule %s probe failed " "for pci_dev %s", - __FUNCTION__, module_name(is->is_owner), + __func__, module_name(is->is_owner), pci_name(idd->idd_pdev)); } } @@ -102,7 +102,7 @@ ioc4_unregister_submodule(struct ioc4_submodule *is) printk(KERN_WARNING "%s: IOC4 submodule %s remove failed " "for pci_dev %s.\n", - __FUNCTION__, module_name(is->is_owner), + __func__, module_name(is->is_owner), pci_name(idd->idd_pdev)); } } @@ -282,7 +282,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) if ((ret = pci_enable_device(pdev))) { printk(KERN_WARNING "%s: Failed to enable IOC4 device for pci_dev %s.\n", - __FUNCTION__, pci_name(pdev)); + __func__, pci_name(pdev)); goto out; } pci_set_master(pdev); @@ -292,7 +292,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) if (!idd) { printk(KERN_WARNING "%s: Failed to allocate IOC4 data for pci_dev %s.\n", - __FUNCTION__, pci_name(pdev)); + __func__, pci_name(pdev)); ret = -ENODEV; goto out_idd; } @@ -307,7 +307,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) printk(KERN_WARNING "%s: Unable to find IOC4 misc resource " "for pci_dev %s.\n", - __FUNCTION__, pci_name(idd->idd_pdev)); + __func__, pci_name(idd->idd_pdev)); ret = -ENODEV; goto out_pci; } @@ -316,7 +316,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) printk(KERN_WARNING "%s: Unable to request IOC4 misc region " "for pci_dev %s.\n", - __FUNCTION__, pci_name(idd->idd_pdev)); + __func__, pci_name(idd->idd_pdev)); ret = -ENODEV; goto out_pci; } @@ -326,7 +326,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) printk(KERN_WARNING "%s: Unable to remap IOC4 misc region " "for pci_dev %s.\n", - __FUNCTION__, pci_name(idd->idd_pdev)); + __func__, pci_name(idd->idd_pdev)); ret = -ENODEV; goto out_misc_region; } @@ -372,7 +372,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) printk(KERN_WARNING "%s: IOC4 submodule 0x%s probe failed " "for pci_dev %s.\n", - __FUNCTION__, module_name(is->is_owner), + __func__, module_name(is->is_owner), pci_name(idd->idd_pdev)); } } @@ -406,7 +406,7 @@ ioc4_remove(struct pci_dev *pdev) printk(KERN_WARNING "%s: IOC4 submodule 0x%s remove failed " "for pci_dev %s.\n", - __FUNCTION__, module_name(is->is_owner), + __func__, module_name(is->is_owner), pci_name(idd->idd_pdev)); } } @@ -418,7 +418,7 @@ ioc4_remove(struct pci_dev *pdev) printk(KERN_WARNING "%s: Unable to get IOC4 misc mapping for pci_dev %s. " "Device removal may be incomplete.\n", - __FUNCTION__, pci_name(idd->idd_pdev)); + __func__, pci_name(idd->idd_pdev)); } release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs)); diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c index 7fa61e907e1..71d1c84e2fa 100644 --- a/drivers/misc/phantom.c +++ b/drivers/misc/phantom.c @@ -12,6 +12,7 @@ * or alternatively, you might use OpenHaptics provided by Sensable. */ +#include <linux/compat.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/device.h> @@ -91,11 +92,8 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, unsigned long flags; unsigned int i; - if (_IOC_TYPE(cmd) != PH_IOC_MAGIC || - _IOC_NR(cmd) > PH_IOC_MAXNR) - return -ENOTTY; - switch (cmd) { + case PHN_SETREG: case PHN_SET_REG: if (copy_from_user(&r, argp, sizeof(r))) return -EFAULT; @@ -126,6 +124,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, phantom_status(dev, dev->status & ~PHB_RUNNING); spin_unlock_irqrestore(&dev->regs_lock, flags); break; + case PHN_SETREGS: case PHN_SET_REGS: if (copy_from_user(&rs, argp, sizeof(rs))) return -EFAULT; @@ -143,6 +142,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, } spin_unlock_irqrestore(&dev->regs_lock, flags); break; + case PHN_GETREG: case PHN_GET_REG: if (copy_from_user(&r, argp, sizeof(r))) return -EFAULT; @@ -155,6 +155,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, if (copy_to_user(argp, &r, sizeof(r))) return -EFAULT; break; + case PHN_GETREGS: case PHN_GET_REGS: { u32 m; @@ -168,6 +169,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, for (i = 0; i < m; i++) if (rs.mask & BIT(i)) rs.values[i] = ioread32(dev->iaddr + i); + atomic_set(&dev->counter, 0); spin_unlock_irqrestore(&dev->regs_lock, flags); if (copy_to_user(argp, &rs, sizeof(rs))) @@ -191,6 +193,20 @@ static long phantom_ioctl(struct file *file, unsigned int cmd, return 0; } +#ifdef CONFIG_COMPAT +static long phantom_compat_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + if (_IOC_NR(cmd) <= 3 && _IOC_SIZE(cmd) == sizeof(compat_uptr_t)) { + cmd &= ~(_IOC_SIZEMASK << _IOC_SIZESHIFT); + cmd |= sizeof(void *) << _IOC_SIZESHIFT; + } + return phantom_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#else +#define phantom_compat_ioctl NULL +#endif + static int phantom_open(struct inode *inode, struct file *file) { struct phantom_device *dev = container_of(inode->i_cdev, @@ -239,11 +255,12 @@ static unsigned int phantom_poll(struct file *file, poll_table *wait) pr_debug("phantom_poll: %d\n", atomic_read(&dev->counter)); poll_wait(file, &dev->wait, wait); - if (atomic_read(&dev->counter)) { + + if (!(dev->status & PHB_RUNNING)) + mask = POLLERR; + else if (atomic_read(&dev->counter)) mask = POLLIN | POLLRDNORM; - atomic_dec(&dev->counter); - } else if ((dev->status & PHB_RUNNING) == 0) - mask = POLLIN | POLLRDNORM | POLLERR; + pr_debug("phantom_poll end: %x/%d\n", mask, atomic_read(&dev->counter)); return mask; @@ -253,6 +270,7 @@ static struct file_operations phantom_file_ops = { .open = phantom_open, .release = phantom_release, .unlocked_ioctl = phantom_ioctl, + .compat_ioctl = phantom_compat_ioctl, .poll = phantom_poll, }; diff --git a/drivers/misc/sony-laptop.c b/drivers/misc/sony-laptop.c index 02ff3d19b1c..00e48e2a9c1 100644 --- a/drivers/misc/sony-laptop.c +++ b/drivers/misc/sony-laptop.c @@ -961,7 +961,7 @@ static int sony_nc_resume(struct acpi_device *device) ret = acpi_callsetfunc(sony_nc_acpi_handle, *item->acpiset, item->value, NULL); if (ret < 0) { - printk("%s: %d\n", __FUNCTION__, ret); + printk("%s: %d\n", __func__, ret); break; } } @@ -1453,7 +1453,7 @@ static struct sonypi_eventtypes type4_events[] = { udelay(1); \ if (!n) \ dprintk("command failed at %s : %s (line %d)\n", \ - __FILE__, __FUNCTION__, __LINE__); \ + __FILE__, __func__, __LINE__); \ } static u8 sony_pic_call1(u8 dev) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 365024b83d3..35508584ac2 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -340,7 +340,7 @@ checkstatus: /* SPI R3, R4, or R7 == R1 + 4 bytes */ case MMC_RSP_SPI_R3: - cmd->resp[1] = be32_to_cpu(get_unaligned((u32 *)cp)); + cmd->resp[1] = get_unaligned_be32(cp); break; /* SPI R1 == just one status byte */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6e91b4b7aab..6425603bc37 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3282,17 +3282,14 @@ static int bond_create_proc_entry(struct bonding *bond) struct net_device *bond_dev = bond->dev; if (bond_proc_dir) { - bond->proc_entry = create_proc_entry(bond_dev->name, - S_IRUGO, - bond_proc_dir); + bond->proc_entry = proc_create_data(bond_dev->name, + S_IRUGO, bond_proc_dir, + &bond_info_fops, bond); if (bond->proc_entry == NULL) { printk(KERN_WARNING DRV_NAME ": Warning: Cannot create /proc/net/%s/%s\n", DRV_NAME, bond_dev->name); } else { - bond->proc_entry->data = bond; - bond->proc_entry->proc_fops = &bond_info_fops; - bond->proc_entry->owner = THIS_MODULE; memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); } } diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 2d139ec7977..f3cba5e24ec 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1802,7 +1802,7 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx) * it is protected by the before last buffer's el bit being set */ if (rx->prev->skb) { struct rfd *prev_rfd = (struct rfd *)rx->prev->skb->data; - put_unaligned(cpu_to_le32(rx->dma_addr), &prev_rfd->link); + put_unaligned_le32(rx->dma_addr, &prev_rfd->link); } return 0; diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index b53f6b6491b..e5c2380f50c 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -1508,7 +1508,7 @@ static int hamachi_rx(struct net_device *dev) hmp->rx_buf_sz, PCI_DMA_FROMDEVICE); buf_addr = (u8 *) hmp->rx_skbuff[entry]->data; - frame_status = le32_to_cpu(get_unaligned((__le32*)&(buf_addr[data_size - 12]))); + frame_status = get_unaligned_le32(&(buf_addr[data_size - 12])); if (hamachi_debug > 4) printk(KERN_DEBUG " hamachi_rx() status was %8.8x.\n", frame_status); diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index ce4fc2ec2fe..00527805e4f 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -1302,13 +1302,10 @@ static void ibmveth_proc_register_adapter(struct ibmveth_adapter *adapter) if (ibmveth_proc_dir) { char u_addr[10]; sprintf(u_addr, "%x", adapter->vdev->unit_address); - entry = create_proc_entry(u_addr, S_IFREG, ibmveth_proc_dir); - if (!entry) { + entry = proc_create_data(u_addr, S_IFREG, ibmveth_proc_dir, + &ibmveth_proc_fops, adapter); + if (!entry) ibmveth_error_printk("Cannot create adapter proc entry"); - } else { - entry->data = (void *) adapter; - entry->proc_fops = &ibmveth_proc_fops; - } } return; } diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c index 93916cf33f2..ad92d3ff1c4 100644 --- a/drivers/net/irda/mcs7780.c +++ b/drivers/net/irda/mcs7780.c @@ -464,7 +464,7 @@ static void mcs_unwrap_fir(struct mcs_cb *mcs, __u8 *buf, int len) } fcs = ~(crc32_le(~0, buf, new_len)); - if(fcs != le32_to_cpu(get_unaligned((__le32 *)(buf+new_len)))) { + if(fcs != get_unaligned_le32(buf + new_len)) { IRDA_ERROR("crc error calc 0x%x len %d\n", fcs, new_len); mcs->stats.rx_errors++; mcs->stats.rx_crc_errors++; diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c index e59c485bc49..05196378274 100644 --- a/drivers/net/irda/stir4200.c +++ b/drivers/net/irda/stir4200.c @@ -329,7 +329,7 @@ static void fir_eof(struct stir_cb *stir) } fcs = ~(crc32_le(~0, rx_buff->data, len)); - if (fcs != le32_to_cpu(get_unaligned((__le32 *)(rx_buff->data+len)))) { + if (fcs != get_unaligned_le32(rx_buff->data + len)) { pr_debug("crc error calc 0x%x len %d\n", fcs, len); stir->stats.rx_errors++; stir->stats.rx_crc_errors++; diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c index acd082a96a4..d15e00b8591 100644 --- a/drivers/net/irda/vlsi_ir.c +++ b/drivers/net/irda/vlsi_ir.c @@ -1674,13 +1674,12 @@ vlsi_irda_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (vlsi_proc_root != NULL) { struct proc_dir_entry *ent; - ent = create_proc_entry(ndev->name, S_IFREG|S_IRUGO, vlsi_proc_root); + ent = proc_create_data(ndev->name, S_IFREG|S_IRUGO, + vlsi_proc_root, VLSI_PROC_FOPS, ndev); if (!ent) { IRDA_WARNING("%s: failed to create proc entry\n", __FUNCTION__); } else { - ent->data = ndev; - ent->proc_fops = VLSI_PROC_FOPS; ent->size = 0; } idev->proc_entry = ent; diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index cead81e80f0..ef63c8d2bd7 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -2437,7 +2437,7 @@ static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev) int status; segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6); - if (unlikely(IS_ERR(segs))) + if (IS_ERR(segs)) goto drop; while (segs) { diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 4fad4ddb350..58a26a47af2 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -1052,11 +1052,9 @@ static int __init pppoe_proc_init(void) { struct proc_dir_entry *p; - p = create_proc_entry("pppoe", S_IRUGO, init_net.proc_net); + p = proc_net_fops_create(&init_net, "pppoe", S_IRUGO, &pppoe_seq_fops); if (!p) return -ENOMEM; - - p->proc_fops = &pppoe_seq_fops; return 0; } #else /* CONFIG_PROC_FS */ diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index 3d10ca050b7..244d7830c92 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -2469,12 +2469,12 @@ static int __init pppol2tp_init(void) goto out_unregister_pppol2tp_proto; #ifdef CONFIG_PROC_FS - pppol2tp_proc = create_proc_entry("pppol2tp", 0, init_net.proc_net); + pppol2tp_proc = proc_net_fops_create(&init_net, "pppol2tp", 0, + &pppol2tp_proc_fops); if (!pppol2tp_proc) { err = -ENOMEM; goto out_unregister_pppox_proto; } - pppol2tp_proc->proc_fops = &pppol2tp_proc_fops; #endif /* CONFIG_PROC_FS */ printk(KERN_INFO "PPPoL2TP kernel driver, %s\n", PPPOL2TP_DRV_VERSION); diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index e7fd08adbba..2b8fd68bc51 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -77,7 +77,7 @@ static int rionet_capable = 1; * could be made into a hash table to save memory depending * on system trade-offs. */ -static struct rio_dev *rionet_active[RIO_MAX_ROUTE_ENTRIES]; +static struct rio_dev **rionet_active; #define is_rionet_capable(pef, src_ops, dst_ops) \ ((pef & RIO_PEF_INB_MBOX) && \ @@ -195,7 +195,8 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) } if (eth->h_dest[0] & 0x01) { - for (i = 0; i < RIO_MAX_ROUTE_ENTRIES; i++) + for (i = 0; i < RIO_MAX_ROUTE_ENTRIES(rnet->mport->sys_size); + i++) if (rionet_active[i]) rionet_queue_tx_msg(skb, ndev, rionet_active[i]); @@ -385,6 +386,8 @@ static void rionet_remove(struct rio_dev *rdev) struct net_device *ndev = NULL; struct rionet_peer *peer, *tmp; + free_pages((unsigned long)rionet_active, rdev->net->hport->sys_size ? + __ilog2(sizeof(void *)) + 4 : 0); unregister_netdev(ndev); kfree(ndev); @@ -443,6 +446,15 @@ static int rionet_setup_netdev(struct rio_mport *mport) goto out; } + rionet_active = (struct rio_dev **)__get_free_pages(GFP_KERNEL, + mport->sys_size ? __ilog2(sizeof(void *)) + 4 : 0); + if (!rionet_active) { + rc = -ENOMEM; + goto out; + } + memset((void *)rionet_active, 0, sizeof(void *) * + RIO_MAX_ROUTE_ENTRIES(mport->sys_size)); + /* Set up private area */ rnet = (struct rionet_private *)ndev->priv; rnet->mport = mport; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index e3f74c9f78b..b66c75e3b8a 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -4361,7 +4361,7 @@ static int tg3_tso_bug(struct tg3 *tp, struct sk_buff *skb) } segs = skb_gso_segment(skb, tp->dev->features & ~NETIF_F_TSO); - if (unlikely(IS_ERR(segs))) + if (IS_ERR(segs)) goto tg3_tso_bug_end; do { diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index 6c6fc325c8f..bc30c6e8fea 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -482,7 +482,6 @@ static char version[] __devinitdata = "de4x5.c:V0.546 2001/02/22 davies@maniac.ultranet.com\n"; #define c_char const char -#define TWIDDLE(a) (u_short)le16_to_cpu(get_unaligned((__le16 *)(a))) /* ** MII Information @@ -4405,7 +4404,7 @@ srom_infoleaf_info(struct net_device *dev) } } - lp->infoleaf_offset = TWIDDLE(p+1); + lp->infoleaf_offset = get_unaligned_le16(p + 1); return 0; } @@ -4476,7 +4475,7 @@ srom_exec(struct net_device *dev, u_char *p) while (count--) { gep_wr(((lp->chipset==DC21140) && (lp->ibn!=5) ? - *p++ : TWIDDLE(w++)), dev); + *p++ : get_unaligned_le16(w++)), dev); mdelay(2); /* 2ms per action */ } @@ -4711,10 +4710,10 @@ type1_infoblock(struct net_device *dev, u_char count, u_char *p) lp->active = *p++; lp->phy[lp->active].gep = (*p ? p : NULL); p += (*p + 1); lp->phy[lp->active].rst = (*p ? p : NULL); p += (*p + 1); - lp->phy[lp->active].mc = TWIDDLE(p); p += 2; - lp->phy[lp->active].ana = TWIDDLE(p); p += 2; - lp->phy[lp->active].fdx = TWIDDLE(p); p += 2; - lp->phy[lp->active].ttm = TWIDDLE(p); + lp->phy[lp->active].mc = get_unaligned_le16(p); p += 2; + lp->phy[lp->active].ana = get_unaligned_le16(p); p += 2; + lp->phy[lp->active].fdx = get_unaligned_le16(p); p += 2; + lp->phy[lp->active].ttm = get_unaligned_le16(p); return 0; } else if ((lp->media == INIT) && (lp->timeout < 0)) { lp->ibn = 1; @@ -4751,16 +4750,16 @@ type2_infoblock(struct net_device *dev, u_char count, u_char *p) lp->infoblock_media = (*p) & MEDIA_CODE; if ((*p++) & EXT_FIELD) { - lp->cache.csr13 = TWIDDLE(p); p += 2; - lp->cache.csr14 = TWIDDLE(p); p += 2; - lp->cache.csr15 = TWIDDLE(p); p += 2; + lp->cache.csr13 = get_unaligned_le16(p); p += 2; + lp->cache.csr14 = get_unaligned_le16(p); p += 2; + lp->cache.csr15 = get_unaligned_le16(p); p += 2; } else { lp->cache.csr13 = CSR13; lp->cache.csr14 = CSR14; lp->cache.csr15 = CSR15; } - lp->cache.gepc = ((s32)(TWIDDLE(p)) << 16); p += 2; - lp->cache.gep = ((s32)(TWIDDLE(p)) << 16); + lp->cache.gepc = ((s32)(get_unaligned_le16(p)) << 16); p += 2; + lp->cache.gep = ((s32)(get_unaligned_le16(p)) << 16); lp->infoblock_csr6 = OMR_SIA; lp->useMII = false; @@ -4792,10 +4791,10 @@ type3_infoblock(struct net_device *dev, u_char count, u_char *p) if (MOTO_SROM_BUG) lp->active = 0; lp->phy[lp->active].gep = (*p ? p : NULL); p += (2 * (*p) + 1); lp->phy[lp->active].rst = (*p ? p : NULL); p += (2 * (*p) + 1); - lp->phy[lp->active].mc = TWIDDLE(p); p += 2; - lp->phy[lp->active].ana = TWIDDLE(p); p += 2; - lp->phy[lp->active].fdx = TWIDDLE(p); p += 2; - lp->phy[lp->active].ttm = TWIDDLE(p); p += 2; + lp->phy[lp->active].mc = get_unaligned_le16(p); p += 2; + lp->phy[lp->active].ana = get_unaligned_le16(p); p += 2; + lp->phy[lp->active].fdx = get_unaligned_le16(p); p += 2; + lp->phy[lp->active].ttm = get_unaligned_le16(p); p += 2; lp->phy[lp->active].mci = *p; return 0; } else if ((lp->media == INIT) && (lp->timeout < 0)) { @@ -4835,8 +4834,8 @@ type4_infoblock(struct net_device *dev, u_char count, u_char *p) lp->cache.csr13 = CSR13; /* Hard coded defaults */ lp->cache.csr14 = CSR14; lp->cache.csr15 = CSR15; - lp->cache.gepc = ((s32)(TWIDDLE(p)) << 16); p += 2; - lp->cache.gep = ((s32)(TWIDDLE(p)) << 16); p += 2; + lp->cache.gepc = ((s32)(get_unaligned_le16(p)) << 16); p += 2; + lp->cache.gep = ((s32)(get_unaligned_le16(p)) << 16); p += 2; csr6 = *p++; flags = *p++; diff --git a/drivers/net/tulip/de4x5.h b/drivers/net/tulip/de4x5.h index 9fb8d7f0799..f5f33b3eb06 100644 --- a/drivers/net/tulip/de4x5.h +++ b/drivers/net/tulip/de4x5.h @@ -1017,4 +1017,4 @@ struct de4x5_ioctl { #define DE4X5_SET_OMR 0x0d /* Set the OMR Register contents */ #define DE4X5_GET_REG 0x0e /* Get the DE4X5 Registers */ -#define MOTO_SROM_BUG ((lp->active == 8) && (((le32_to_cpu(get_unaligned(((__le32 *)dev->dev_addr))))&0x00ffffff)==0x3e0008)) +#define MOTO_SROM_BUG (lp->active == 8 && (get_unaligned_le32(dev->dev_addr) & 0x00ffffff) == 0x3e0008) diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h index 908422f2f32..92c68a22f16 100644 --- a/drivers/net/tulip/tulip.h +++ b/drivers/net/tulip/tulip.h @@ -25,6 +25,7 @@ #include <linux/pci.h> #include <asm/io.h> #include <asm/irq.h> +#include <asm/unaligned.h> @@ -304,11 +305,7 @@ enum t21143_csr6_bits { #define RUN_AT(x) (jiffies + (x)) -#if defined(__i386__) /* AKA get_unaligned() */ -#define get_u16(ptr) (*(u16 *)(ptr)) -#else -#define get_u16(ptr) (((u8*)(ptr))[0] + (((u8*)(ptr))[1]<<8)) -#endif +#define get_u16(ptr) get_unaligned_le16((ptr)) struct medialeaf { u8 type; diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index fa1c1c329a2..f9d13fa05d6 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -327,8 +327,8 @@ static void tulip_up(struct net_device *dev) tp->dirty_rx = tp->dirty_tx = 0; if (tp->flags & MC_HASH_ONLY) { - u32 addr_low = le32_to_cpu(get_unaligned((__le32 *)dev->dev_addr)); - u32 addr_high = le16_to_cpu(get_unaligned((__le16 *)(dev->dev_addr+4))); + u32 addr_low = get_unaligned_le32(dev->dev_addr); + u32 addr_high = get_unaligned_le16(dev->dev_addr + 4); if (tp->chip_id == AX88140) { iowrite32(0, ioaddr + CSR13); iowrite32(addr_low, ioaddr + CSR14); @@ -1437,13 +1437,13 @@ static int __devinit tulip_init_one (struct pci_dev *pdev, do value = ioread32(ioaddr + CSR9); while (value < 0 && --boguscnt > 0); - put_unaligned(cpu_to_le16(value), ((__le16*)dev->dev_addr) + i); + put_unaligned_le16(value, ((__le16 *)dev->dev_addr) + i); sum += value & 0xffff; } } else if (chip_idx == COMET) { /* No need to read the EEPROM. */ - put_unaligned(cpu_to_le32(ioread32(ioaddr + 0xA4)), (__le32 *)dev->dev_addr); - put_unaligned(cpu_to_le16(ioread32(ioaddr + 0xA8)), (__le16 *)(dev->dev_addr + 4)); + put_unaligned_le32(ioread32(ioaddr + 0xA4), dev->dev_addr); + put_unaligned_le16(ioread32(ioaddr + 0xA8), dev->dev_addr + 4); for (i = 0; i < 6; i ++) sum += dev->dev_addr[i]; } else { diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 932d6b1c9d0..45f47c1c0a3 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -3657,7 +3657,7 @@ void mpi_receive_802_11 (struct airo_info *ai) ptr += hdrlen; if (hdrlen == 24) ptr += 6; - gap = le16_to_cpu(get_unaligned((__le16 *)ptr)); + gap = get_unaligned_le16(ptr); ptr += sizeof(__le16); if (gap) { if (gap <= 8) @@ -4347,24 +4347,28 @@ static int proc_config_open( struct inode *inode, struct file *file ); static int proc_wepkey_open( struct inode *inode, struct file *file ); static const struct file_operations proc_statsdelta_ops = { + .owner = THIS_MODULE, .read = proc_read, .open = proc_statsdelta_open, .release = proc_close }; static const struct file_operations proc_stats_ops = { + .owner = THIS_MODULE, .read = proc_read, .open = proc_stats_open, .release = proc_close }; static const struct file_operations proc_status_ops = { + .owner = THIS_MODULE, .read = proc_read, .open = proc_status_open, .release = proc_close }; static const struct file_operations proc_SSID_ops = { + .owner = THIS_MODULE, .read = proc_read, .write = proc_write, .open = proc_SSID_open, @@ -4372,6 +4376,7 @@ static const struct file_operations proc_SSID_ops = { }; static const struct file_operations proc_BSSList_ops = { + .owner = THIS_MODULE, .read = proc_read, .write = proc_write, .open = proc_BSSList_open, @@ -4379,6 +4384,7 @@ static const struct file_operations proc_BSSList_ops = { }; static const struct file_operations proc_APList_ops = { + .owner = THIS_MODULE, .read = proc_read, .write = proc_write, .open = proc_APList_open, @@ -4386,6 +4392,7 @@ static const struct file_operations proc_APList_ops = { }; static const struct file_operations proc_config_ops = { + .owner = THIS_MODULE, .read = proc_read, .write = proc_write, .open = proc_config_open, @@ -4393,6 +4400,7 @@ static const struct file_operations proc_config_ops = { }; static const struct file_operations proc_wepkey_ops = { + .owner = THIS_MODULE, .read = proc_read, .write = proc_write, .open = proc_wepkey_open, @@ -4411,10 +4419,6 @@ struct proc_data { void (*on_close) (struct inode *, struct file *); }; -#ifndef SETPROC_OPS -#define SETPROC_OPS(entry, ops) (entry)->proc_fops = &(ops) -#endif - static int setup_proc_entry( struct net_device *dev, struct airo_info *apriv ) { struct proc_dir_entry *entry; @@ -4430,100 +4434,76 @@ static int setup_proc_entry( struct net_device *dev, apriv->proc_entry->owner = THIS_MODULE; /* Setup the StatsDelta */ - entry = create_proc_entry("StatsDelta", - S_IFREG | (S_IRUGO&proc_perm), - apriv->proc_entry); + entry = proc_create_data("StatsDelta", + S_IFREG | (S_IRUGO&proc_perm), + apriv->proc_entry, &proc_statsdelta_ops, dev); if (!entry) goto fail_stats_delta; entry->uid = proc_uid; entry->gid = proc_gid; - entry->data = dev; - entry->owner = THIS_MODULE; - SETPROC_OPS(entry, proc_statsdelta_ops); /* Setup the Stats */ - entry = create_proc_entry("Stats", - S_IFREG | (S_IRUGO&proc_perm), - apriv->proc_entry); + entry = proc_create_data("Stats", + S_IFREG | (S_IRUGO&proc_perm), + apriv->proc_entry, &proc_stats_ops, dev); if (!entry) goto fail_stats; entry->uid = proc_uid; entry->gid = proc_gid; - entry->data = dev; - entry->owner = THIS_MODULE; - SETPROC_OPS(entry, proc_stats_ops); /* Setup the Status */ - entry = create_proc_entry("Status", - S_IFREG | (S_IRUGO&proc_perm), - apriv->proc_entry); + entry = proc_create_data("Status", + S_IFREG | (S_IRUGO&proc_perm), + apriv->proc_entry, &proc_status_ops, dev); if (!entry) goto fail_status; entry->uid = proc_uid; entry->gid = proc_gid; - entry->data = dev; - entry->owner = THIS_MODULE; - SETPROC_OPS(entry, proc_status_ops); /* Setup the Config */ - entry = create_proc_entry("Config", - S_IFREG | proc_perm, - apriv->proc_entry); + entry = proc_create_data("Config", + S_IFREG | proc_perm, + apriv->proc_entry, &proc_config_ops, dev); if (!entry) goto fail_config; entry->uid = proc_uid; entry->gid = proc_gid; - entry->data = dev; - entry->owner = THIS_MODULE; - SETPROC_OPS(entry, proc_config_ops); /* Setup the SSID */ - entry = create_proc_entry("SSID", - S_IFREG | proc_perm, - apriv->proc_entry); + entry = proc_create_data("SSID", + S_IFREG | proc_perm, + apriv->proc_entry, &proc_SSID_ops, dev); if (!entry) goto fail_ssid; entry->uid = proc_uid; entry->gid = proc_gid; - entry->data = dev; - entry->owner = THIS_MODULE; - SETPROC_OPS(entry, proc_SSID_ops); /* Setup the APList */ - entry = create_proc_entry("APList", - S_IFREG | proc_perm, - apriv->proc_entry); + entry = proc_create_data("APList", + S_IFREG | proc_perm, + apriv->proc_entry, &proc_APList_ops, dev); if (!entry) goto fail_aplist; entry->uid = proc_uid; entry->gid = proc_gid; - entry->data = dev; - entry->owner = THIS_MODULE; - SETPROC_OPS(entry, proc_APList_ops); /* Setup the BSSList */ - entry = create_proc_entry("BSSList", - S_IFREG | proc_perm, - apriv->proc_entry); + entry = proc_create_data("BSSList", + S_IFREG | proc_perm, + apriv->proc_entry, &proc_BSSList_ops, dev); if (!entry) goto fail_bsslist; entry->uid = proc_uid; entry->gid = proc_gid; - entry->data = dev; - entry->owner = THIS_MODULE; - SETPROC_OPS(entry, proc_BSSList_ops); /* Setup the WepKey */ - entry = create_proc_entry("WepKey", - S_IFREG | proc_perm, - apriv->proc_entry); + entry = proc_create_data("WepKey", + S_IFREG | proc_perm, + apriv->proc_entry, &proc_wepkey_ops, dev); if (!entry) goto fail_wepkey; entry->uid = proc_uid; entry->gid = proc_gid; - entry->data = dev; - entry->owner = THIS_MODULE; - SETPROC_OPS(entry, proc_wepkey_ops); return 0; @@ -5625,9 +5605,9 @@ static int __init airo_init_module( void ) int have_isa_dev = 0; #endif - airo_entry = create_proc_entry("aironet", + airo_entry = create_proc_entry("driver/aironet", S_IFDIR | airo_perm, - proc_root_driver); + NULL); if (airo_entry) { airo_entry->uid = proc_uid; @@ -5651,7 +5631,7 @@ static int __init airo_init_module( void ) airo_print_info("", "Finished probing for PCI adapters"); if (i) { - remove_proc_entry("aironet", proc_root_driver); + remove_proc_entry("driver/aironet", NULL); return i; } #endif @@ -5673,7 +5653,7 @@ static void __exit airo_cleanup_module( void ) #ifdef CONFIG_PCI pci_unregister_driver(&airo_driver); #endif - remove_proc_entry("aironet", proc_root_driver); + remove_proc_entry("driver/aironet", NULL); } /* diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index e18305b781c..4e5c8fc3520 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -58,10 +58,6 @@ #include "reg.h" #include "debug.h" -/* unaligned little endian access */ -#define LE_READ_2(_p) (le16_to_cpu(get_unaligned((__le16 *)(_p)))) -#define LE_READ_4(_p) (le32_to_cpu(get_unaligned((__le32 *)(_p)))) - enum { ATH_LED_TX, ATH_LED_RX, @@ -2909,9 +2905,9 @@ static void ath5k_configure_filter(struct ieee80211_hw *hw, if (!mclist) break; /* calculate XOR of eight 6-bit values */ - val = LE_READ_4(mclist->dmi_addr + 0); + val = get_unaligned_le32(mclist->dmi_addr + 0); pos = (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val; - val = LE_READ_4(mclist->dmi_addr + 3); + val = get_unaligned_le32(mclist->dmi_addr + 3); pos ^= (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val; pos &= 0x3f; mfilt[pos / 32] |= (1 << (pos % 32)); diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 4bf8a99099f..8c24cd72aac 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -2171,7 +2171,7 @@ static int b43_write_initvals(struct b43_wldev *dev, goto err_format; array_size -= sizeof(iv->data.d32); - value = be32_to_cpu(get_unaligned(&iv->data.d32)); + value = get_unaligned_be32(&iv->data.d32); b43_write32(dev, offset, value); iv = (const struct b43_iv *)((const uint8_t *)iv + diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index ef829ee8ffd..14a5eea2573 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -1720,7 +1720,7 @@ static int b43legacy_write_initvals(struct b43legacy_wldev *dev, goto err_format; array_size -= sizeof(iv->data.d32); - value = be32_to_cpu(get_unaligned(&iv->data.d32)); + value = get_unaligned_be32(&iv->data.d32); b43legacy_write32(dev, offset, value); iv = (const struct b43legacy_iv *)((const uint8_t *)iv + diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index 598e4eef4f4..d3406830c8e 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -554,40 +554,36 @@ static void iwl3945_add_radiotap(struct iwl3945_priv *priv, iwl3945_rt->rt_hdr.it_pad = 0; /* total header + data */ - put_unaligned(cpu_to_le16(sizeof(*iwl3945_rt)), - &iwl3945_rt->rt_hdr.it_len); + put_unaligned_le16(sizeof(*iwl3945_rt), &iwl3945_rt->rt_hdr.it_len); /* Indicate all the fields we add to the radiotap header */ - put_unaligned(cpu_to_le32((1 << IEEE80211_RADIOTAP_TSFT) | - (1 << IEEE80211_RADIOTAP_FLAGS) | - (1 << IEEE80211_RADIOTAP_RATE) | - (1 << IEEE80211_RADIOTAP_CHANNEL) | - (1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL) | - (1 << IEEE80211_RADIOTAP_DBM_ANTNOISE) | - (1 << IEEE80211_RADIOTAP_ANTENNA)), - &iwl3945_rt->rt_hdr.it_present); + put_unaligned_le32((1 << IEEE80211_RADIOTAP_TSFT) | + (1 << IEEE80211_RADIOTAP_FLAGS) | + (1 << IEEE80211_RADIOTAP_RATE) | + (1 << IEEE80211_RADIOTAP_CHANNEL) | + (1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL) | + (1 << IEEE80211_RADIOTAP_DBM_ANTNOISE) | + (1 << IEEE80211_RADIOTAP_ANTENNA), + &iwl3945_rt->rt_hdr.it_present); /* Zero the flags, we'll add to them as we go */ iwl3945_rt->rt_flags = 0; - put_unaligned(cpu_to_le64(tsf), &iwl3945_rt->rt_tsf); + put_unaligned_le64(tsf, &iwl3945_rt->rt_tsf); iwl3945_rt->rt_dbmsignal = signal; iwl3945_rt->rt_dbmnoise = noise; /* Convert the channel frequency and set the flags */ - put_unaligned(cpu_to_le16(stats->freq), &iwl3945_rt->rt_channelMHz); + put_unaligned_le16(stats->freq, &iwl3945_rt->rt_channelMHz); if (!(phy_flags_hw & RX_RES_PHY_FLAGS_BAND_24_MSK)) - put_unaligned(cpu_to_le16(IEEE80211_CHAN_OFDM | - IEEE80211_CHAN_5GHZ), + put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ, &iwl3945_rt->rt_chbitmask); else if (phy_flags_hw & RX_RES_PHY_FLAGS_MOD_CCK_MSK) - put_unaligned(cpu_to_le16(IEEE80211_CHAN_CCK | - IEEE80211_CHAN_2GHZ), + put_unaligned_le16(IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ, &iwl3945_rt->rt_chbitmask); else /* 802.11g */ - put_unaligned(cpu_to_le16(IEEE80211_CHAN_OFDM | - IEEE80211_CHAN_2GHZ), + put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ, &iwl3945_rt->rt_chbitmask); if (rate == -1) diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c index e72c97a0d6c..1a409fcc80d 100644 --- a/drivers/net/wireless/libertas/scan.c +++ b/drivers/net/wireless/libertas/scan.c @@ -522,7 +522,7 @@ static int lbs_process_bss(struct bss_descriptor *bss, if (*bytesleft >= sizeof(beaconsize)) { /* Extract & convert beacon size from the command buffer */ - beaconsize = le16_to_cpu(get_unaligned((__le16 *)*pbeaconinfo)); + beaconsize = get_unaligned_le16(*pbeaconinfo); *bytesleft -= sizeof(beaconsize); *pbeaconinfo += sizeof(beaconsize); } diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index e34675c2f8f..5316074f39f 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -545,11 +545,11 @@ static void handle_rx_packet(struct zd_usb *usb, const u8 *buffer, * be padded. Unaligned access might also happen if the length_info * structure is not present. */ - if (get_unaligned(&length_info->tag) == cpu_to_le16(RX_LENGTH_INFO_TAG)) + if (get_unaligned_le16(&length_info->tag) == RX_LENGTH_INFO_TAG) { unsigned int l, k, n; for (i = 0, l = 0;; i++) { - k = le16_to_cpu(get_unaligned(&length_info->length[i])); + k = get_unaligned_le16(&length_info->length[i]); if (k == 0) return; n = l+k; diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c index 24640726f8b..57e1f495b9f 100644 --- a/drivers/net/yellowfin.c +++ b/drivers/net/yellowfin.c @@ -1062,7 +1062,7 @@ static int yellowfin_rx(struct net_device *dev) buf_addr = rx_skb->data; data_size = (le32_to_cpu(desc->dbdma_cmd) - le32_to_cpu(desc->result_status)) & 0xffff; - frame_status = le16_to_cpu(get_unaligned((__le16*)&(buf_addr[data_size - 2]))); + frame_status = get_unaligned_le16(&(buf_addr[data_size - 2])); if (yellowfin_debug > 4) printk(KERN_DEBUG " yellowfin_rx() status was %4.4x.\n", frame_status); diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index e07492be1f4..208dd12825b 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -21,6 +21,7 @@ #include <linux/kernel.h> #include <linux/nubus.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/init.h> #include <linux/module.h> @@ -28,38 +29,36 @@ #include <asm/byteorder.h> static int -get_nubus_dev_info(char *buf, char **start, off_t pos, int count) +nubus_devices_proc_show(struct seq_file *m, void *v) { struct nubus_dev *dev = nubus_devices; - off_t at = 0; - int len, cnt; - cnt = 0; - while (dev && count > cnt) { - len = sprintf(buf, "%x\t%04x %04x %04x %04x", + while (dev) { + seq_printf(m, "%x\t%04x %04x %04x %04x", dev->board->slot, dev->category, dev->type, dev->dr_sw, dev->dr_hw); - len += sprintf(buf+len, - "\t%08lx", - dev->board->slot_addr); - buf[len++] = '\n'; - at += len; - if (at >= pos) { - if (!*start) { - *start = buf + (pos - (at - len)); - cnt = at - pos; - } else - cnt += len; - buf += len; - } + seq_printf(m, "\t%08lx\n", dev->board->slot_addr); dev = dev->next; } - return (count > cnt) ? cnt : count; + return 0; +} + +static int nubus_devices_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, nubus_devices_proc_show, NULL); } +static const struct file_operations nubus_devices_proc_fops = { + .owner = THIS_MODULE, + .open = nubus_devices_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static struct proc_dir_entry *proc_bus_nubus_dir; static void nubus_proc_subdir(struct nubus_dev* dev, @@ -171,8 +170,7 @@ void __init nubus_proc_init(void) { if (!MACH_IS_MAC) return; - proc_bus_nubus_dir = proc_mkdir("nubus", proc_bus); - create_proc_info_entry("devices", 0, proc_bus_nubus_dir, - get_nubus_dev_info); + proc_bus_nubus_dir = proc_mkdir("bus/nubus", NULL); + proc_create("devices", 0, proc_bus_nubus_dir, &nubus_devices_proc_fops); proc_bus_nubus_add_devices(); } diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 62db3c3fe4d..07d2a8d4498 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1551,8 +1551,7 @@ static int __init ccio_probe(struct parisc_device *dev) { int i; struct ioc *ioc, **ioc_p = &ioc_list; - struct proc_dir_entry *info_entry, *bitmap_entry; - + ioc = kzalloc(sizeof(struct ioc), GFP_KERNEL); if (ioc == NULL) { printk(KERN_ERR MODULE_NAME ": memory allocation failure\n"); @@ -1580,13 +1579,10 @@ static int __init ccio_probe(struct parisc_device *dev) HBA_DATA(dev->dev.platform_data)->iommu = ioc; if (ioc_count == 0) { - info_entry = create_proc_entry(MODULE_NAME, 0, proc_runway_root); - if (info_entry) - info_entry->proc_fops = &ccio_proc_info_fops; - - bitmap_entry = create_proc_entry(MODULE_NAME"-bitmap", 0, proc_runway_root); - if (bitmap_entry) - bitmap_entry->proc_fops = &ccio_proc_bitmap_fops; + proc_create(MODULE_NAME, 0, proc_runway_root, + &ccio_proc_info_fops); + proc_create(MODULE_NAME"-bitmap", 0, proc_runway_root, + &ccio_proc_bitmap_fops); } ioc_count++; diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 8c4d2c13d5f..afc849bd3f5 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1895,7 +1895,9 @@ sba_driver_callback(struct parisc_device *dev) int i; char *version; void __iomem *sba_addr = ioremap_nocache(dev->hpa.start, SBA_FUNC_SIZE); - struct proc_dir_entry *info_entry, *bitmap_entry, *root; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *root; +#endif sba_dump_ranges(sba_addr); @@ -1973,14 +1975,8 @@ sba_driver_callback(struct parisc_device *dev) break; } - info_entry = create_proc_entry("sba_iommu", 0, root); - bitmap_entry = create_proc_entry("sba_iommu-bitmap", 0, root); - - if (info_entry) - info_entry->proc_fops = &sba_proc_fops; - - if (bitmap_entry) - bitmap_entry->proc_fops = &sba_proc_bitmap_fops; + proc_create("sba_iommu", 0, root, &sba_proc_fops); + proc_create("sba_iommu-bitmap", 0, root, &sba_proc_bitmap_fops); #endif parisc_vmerge_boundary = IOVP_SIZE; diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index a8580893820..e71092e8028 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -3082,6 +3082,7 @@ static struct pci_driver parport_pc_pci_driver; static int __init parport_pc_init_superio(int autoirq, int autodma) {return 0;} #endif /* CONFIG_PCI */ +#ifdef CONFIG_PNP static const struct pnp_device_id parport_pc_pnp_tbl[] = { /* Standard LPT Printer Port */ @@ -3148,6 +3149,9 @@ static struct pnp_driver parport_pc_pnp_driver = { .remove = parport_pc_pnp_remove, }; +#else +static struct pnp_driver parport_pc_pnp_driver; +#endif /* CONFIG_PNP */ static int __devinit parport_pc_platform_probe(struct platform_device *pdev) { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f991359f0c3..4a55bf38095 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -842,11 +842,14 @@ static void set_pcie_port_type(struct pci_dev *pdev) * reading the dword at 0x100 which must either be 0 or a valid extended * capability header. */ -int pci_cfg_space_size(struct pci_dev *dev) +int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) { int pos; u32 status; + if (!check_exp_pcix) + goto skip; + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); @@ -858,6 +861,7 @@ int pci_cfg_space_size(struct pci_dev *dev) goto fail; } + skip: if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) goto fail; if (status == 0xffffffff) @@ -869,6 +873,11 @@ int pci_cfg_space_size(struct pci_dev *dev) return PCI_CFG_SPACE_SIZE; } +int pci_cfg_space_size(struct pci_dev *dev) +{ + return pci_cfg_space_size_ext(dev, 1); +} + static void pci_release_bus_bridge_dev(struct device *dev) { kfree(dev); @@ -964,7 +973,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dev->dev.release = pci_release_dev; pci_dev_get(dev); - set_dev_node(&dev->dev, pcibus_to_node(bus)); dev->dev.dma_mask = &dev->dma_mask; dev->dev.dma_parms = &dev->dma_parms; dev->dev.coherent_dma_mask = 0xffffffffull; @@ -1080,6 +1088,10 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) return max; } +void __attribute__((weak)) set_pci_bus_resources_arch_default(struct pci_bus *b) +{ +} + struct pci_bus * pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { @@ -1119,6 +1131,9 @@ struct pci_bus * pci_create_bus(struct device *parent, goto dev_reg_err; b->bridge = get_device(dev); + if (!parent) + set_dev_node(b->bridge, pcibus_to_node(b)); + b->dev.class = &pcibus_class; b->dev.parent = b->bridge; sprintf(b->dev.bus_id, "%04x:%02x", pci_domain_nr(b), bus); @@ -1136,6 +1151,8 @@ struct pci_bus * pci_create_bus(struct device *parent, b->resource[0] = &ioport_resource; b->resource[1] = &iomem_resource; + set_pci_bus_resources_arch_default(b); + return b; dev_create_file_err: diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index ef18fcd641e..963a97642ae 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -293,6 +293,7 @@ static int proc_bus_pci_release(struct inode *inode, struct file *file) #endif /* HAVE_PCI_MMAP */ static const struct file_operations proc_bus_pci_operations = { + .owner = THIS_MODULE, .llseek = proc_bus_pci_lseek, .read = proc_bus_pci_read, .write = proc_bus_pci_write, @@ -406,11 +407,10 @@ int pci_proc_attach_device(struct pci_dev *dev) } sprintf(name, "%02x.%x", PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); - e = create_proc_entry(name, S_IFREG | S_IRUGO | S_IWUSR, bus->procdir); + e = proc_create_data(name, S_IFREG | S_IRUGO | S_IWUSR, bus->procdir, + &proc_bus_pci_operations, dev); if (!e) return -ENOMEM; - e->proc_fops = &proc_bus_pci_operations; - e->data = dev; e->size = dev->cfg_size; dev->procent = e; @@ -462,6 +462,7 @@ static int proc_bus_pci_dev_open(struct inode *inode, struct file *file) return seq_open(file, &proc_bus_pci_devices_op); } static const struct file_operations proc_bus_pci_dev_operations = { + .owner = THIS_MODULE, .open = proc_bus_pci_dev_open, .read = seq_read, .llseek = seq_lseek, @@ -470,12 +471,10 @@ static const struct file_operations proc_bus_pci_dev_operations = { static int __init pci_proc_init(void) { - struct proc_dir_entry *entry; struct pci_dev *dev = NULL; - proc_bus_pci_dir = proc_mkdir("pci", proc_bus); - entry = create_proc_entry("devices", 0, proc_bus_pci_dir); - if (entry) - entry->proc_fops = &proc_bus_pci_dev_operations; + proc_bus_pci_dir = proc_mkdir("bus/pci", NULL); + proc_create("devices", 0, proc_bus_pci_dir, + &proc_bus_pci_dev_operations); proc_initialized = 1; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { pci_proc_attach_device(dev); diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index 06a85d7d5aa..36379535f9d 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -402,15 +402,6 @@ EXPORT_SYMBOL(pcmcia_replace_cis); ======================================================================*/ -static inline u16 cis_get_u16(void *ptr) -{ - return le16_to_cpu(get_unaligned((__le16 *) ptr)); -} -static inline u32 cis_get_u32(void *ptr) -{ - return le32_to_cpu(get_unaligned((__le32 *) ptr)); -} - typedef struct tuple_flags { u_int link_space:4; u_int has_link:1; @@ -471,7 +462,7 @@ static int follow_link(struct pcmcia_socket *s, tuple_t *tuple) /* Get indirect link from the MFC tuple */ read_cis_cache(s, LINK_SPACE(tuple->Flags), tuple->LinkOffset, 5, link); - ofs = cis_get_u32(link + 1); + ofs = get_unaligned_le32(link + 1); SPACE(tuple->Flags) = (link[0] == CISTPL_MFC_ATTR); /* Move to the next indirect link */ tuple->LinkOffset += 5; @@ -679,8 +670,8 @@ static int parse_checksum(tuple_t *tuple, cistpl_checksum_t *csum) if (tuple->TupleDataLen < 5) return CS_BAD_TUPLE; p = (u_char *) tuple->TupleData; - csum->addr = tuple->CISOffset + cis_get_u16(p) - 2; - csum->len = cis_get_u16(p + 2); + csum->addr = tuple->CISOffset + get_unaligned_le16(p) - 2; + csum->len = get_unaligned_le16(p + 2); csum->sum = *(p + 4); return CS_SUCCESS; } @@ -691,7 +682,7 @@ static int parse_longlink(tuple_t *tuple, cistpl_longlink_t *link) { if (tuple->TupleDataLen < 4) return CS_BAD_TUPLE; - link->addr = cis_get_u32(tuple->TupleData); + link->addr = get_unaligned_le32(tuple->TupleData); return CS_SUCCESS; } @@ -710,7 +701,7 @@ static int parse_longlink_mfc(tuple_t *tuple, return CS_BAD_TUPLE; for (i = 0; i < link->nfn; i++) { link->fn[i].space = *p; p++; - link->fn[i].addr = cis_get_u32(p); + link->fn[i].addr = get_unaligned_le32(p); p += 4; } return CS_SUCCESS; @@ -800,8 +791,8 @@ static int parse_manfid(tuple_t *tuple, cistpl_manfid_t *m) { if (tuple->TupleDataLen < 4) return CS_BAD_TUPLE; - m->manf = cis_get_u16(tuple->TupleData); - m->card = cis_get_u16(tuple->TupleData + 2); + m->manf = get_unaligned_le16(tuple->TupleData); + m->card = get_unaligned_le16(tuple->TupleData + 2); return CS_SUCCESS; } @@ -1100,7 +1091,7 @@ static int parse_cftable_entry(tuple_t *tuple, break; case 0x20: entry->mem.nwin = 1; - entry->mem.win[0].len = cis_get_u16(p) << 8; + entry->mem.win[0].len = get_unaligned_le16(p) << 8; entry->mem.win[0].card_addr = 0; entry->mem.win[0].host_addr = 0; p += 2; @@ -1108,8 +1099,8 @@ static int parse_cftable_entry(tuple_t *tuple, break; case 0x40: entry->mem.nwin = 1; - entry->mem.win[0].len = cis_get_u16(p) << 8; - entry->mem.win[0].card_addr = cis_get_u16(p + 2) << 8; + entry->mem.win[0].len = get_unaligned_le16(p) << 8; + entry->mem.win[0].card_addr = get_unaligned_le16(p + 2) << 8; entry->mem.win[0].host_addr = 0; p += 4; if (p > q) return CS_BAD_TUPLE; @@ -1146,7 +1137,7 @@ static int parse_bar(tuple_t *tuple, cistpl_bar_t *bar) p = (u_char *)tuple->TupleData; bar->attr = *p; p += 2; - bar->size = cis_get_u32(p); + bar->size = get_unaligned_le32(p); return CS_SUCCESS; } @@ -1159,7 +1150,7 @@ static int parse_config_cb(tuple_t *tuple, cistpl_config_t *config) return CS_BAD_TUPLE; config->last_idx = *(++p); p++; - config->base = cis_get_u32(p); + config->base = get_unaligned_le32(p); config->subtuples = tuple->TupleDataLen - 6; return CS_SUCCESS; } @@ -1275,7 +1266,7 @@ static int parse_vers_2(tuple_t *tuple, cistpl_vers_2_t *v2) v2->vers = p[0]; v2->comply = p[1]; - v2->dindex = cis_get_u16(p +2 ); + v2->dindex = get_unaligned_le16(p +2 ); v2->vspec8 = p[6]; v2->vspec9 = p[7]; v2->nhdr = p[8]; @@ -1316,8 +1307,8 @@ static int parse_format(tuple_t *tuple, cistpl_format_t *fmt) fmt->type = p[0]; fmt->edc = p[1]; - fmt->offset = cis_get_u32(p + 2); - fmt->length = cis_get_u32(p + 6); + fmt->offset = get_unaligned_le32(p + 2); + fmt->length = get_unaligned_le32(p + 6); return CS_SUCCESS; } diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c index 2b8266c3d40..3f94edab25f 100644 --- a/drivers/pnp/isapnp/proc.c +++ b/drivers/pnp/isapnp/proc.c @@ -85,6 +85,7 @@ static ssize_t isapnp_proc_bus_read(struct file *file, char __user * buf, } static const struct file_operations isapnp_proc_bus_file_operations = { + .owner = THIS_MODULE, .llseek = isapnp_proc_bus_lseek, .read = isapnp_proc_bus_read, }; @@ -102,12 +103,10 @@ static int isapnp_proc_attach_device(struct pnp_dev *dev) return -ENOMEM; } sprintf(name, "%02x", dev->number); - e = dev->procent = create_proc_entry(name, S_IFREG | S_IRUGO, de); + e = dev->procent = proc_create_data(name, S_IFREG | S_IRUGO, de, + &isapnp_proc_bus_file_operations, dev); if (!e) return -ENOMEM; - e->proc_fops = &isapnp_proc_bus_file_operations; - e->owner = THIS_MODULE; - e->data = dev; e->size = 256; return 0; } @@ -116,7 +115,7 @@ int __init isapnp_proc_init(void) { struct pnp_dev *dev; - isapnp_proc_bus_dir = proc_mkdir("isapnp", proc_bus); + isapnp_proc_bus_dir = proc_mkdir("bus/isapnp", NULL); protocol_for_each_dev(&isapnp_protocol, dev) { isapnp_proc_attach_device(dev); } diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c index bb19bc957ba..46d506f6625 100644 --- a/drivers/pnp/pnpbios/proc.c +++ b/drivers/pnp/pnpbios/proc.c @@ -256,7 +256,7 @@ int pnpbios_interface_attach_device(struct pnp_bios_node *node) */ int __init pnpbios_proc_init(void) { - proc_pnp = proc_mkdir("pnp", proc_bus); + proc_pnp = proc_mkdir("bus/pnp", NULL); if (!proc_pnp) return -EIO; proc_pnp_boot = proc_mkdir("boot", proc_pnp); @@ -294,5 +294,5 @@ void __exit pnpbios_proc_exit(void) remove_proc_entry("configuration_info", proc_pnp); remove_proc_entry("devices", proc_pnp); remove_proc_entry("boot", proc_pnp); - remove_proc_entry("pnp", proc_bus); + remove_proc_entry("bus/pnp", NULL); } diff --git a/drivers/power/ds2760_battery.c b/drivers/power/ds2760_battery.c index bdb9b7285b3..71be36f1870 100644 --- a/drivers/power/ds2760_battery.c +++ b/drivers/power/ds2760_battery.c @@ -262,7 +262,7 @@ static void ds2760_battery_work(struct work_struct *work) struct ds2760_device_info, monitor_work.work); const int interval = HZ * 60; - dev_dbg(di->dev, "%s\n", __FUNCTION__); + dev_dbg(di->dev, "%s\n", __func__); ds2760_battery_update_status(di); queue_delayed_work(di->monitor_wqueue, &di->monitor_work, interval); @@ -275,7 +275,7 @@ static void ds2760_battery_external_power_changed(struct power_supply *psy) { struct ds2760_device_info *di = to_ds2760_device_info(psy); - dev_dbg(di->dev, "%s\n", __FUNCTION__); + dev_dbg(di->dev, "%s\n", __func__); cancel_delayed_work(&di->monitor_work); queue_delayed_work(di->monitor_wqueue, &di->monitor_work, HZ/10); diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c index af7a231092a..ab1e8289f07 100644 --- a/drivers/power/olpc_battery.c +++ b/drivers/power/olpc_battery.c @@ -315,7 +315,6 @@ static int __init olpc_bat_init(void) if (ret) goto battery_failed; - olpc_register_battery_callback(&olpc_battery_trigger_uevent); goto success; battery_failed: @@ -328,7 +327,6 @@ success: static void __exit olpc_bat_exit(void) { - olpc_deregister_battery_callback(); power_supply_unregister(&olpc_bat); power_supply_unregister(&olpc_ac); platform_device_unregister(bat_pdev); diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 03d6a38464e..138dd76ee34 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -39,7 +39,7 @@ static void power_supply_changed_work(struct work_struct *work) struct power_supply *psy = container_of(work, struct power_supply, changed_work); - dev_dbg(psy->dev, "%s\n", __FUNCTION__); + dev_dbg(psy->dev, "%s\n", __func__); class_for_each_device(power_supply_class, psy, __power_supply_changed_work); @@ -51,7 +51,7 @@ static void power_supply_changed_work(struct work_struct *work) void power_supply_changed(struct power_supply *psy) { - dev_dbg(psy->dev, "%s\n", __FUNCTION__); + dev_dbg(psy->dev, "%s\n", __func__); schedule_work(&psy->changed_work); } @@ -82,7 +82,7 @@ int power_supply_am_i_supplied(struct power_supply *psy) error = class_for_each_device(power_supply_class, psy, __power_supply_am_i_supplied); - dev_dbg(psy->dev, "%s %d\n", __FUNCTION__, error); + dev_dbg(psy->dev, "%s %d\n", __func__, error); return error; } diff --git a/drivers/power/power_supply_leds.c b/drivers/power/power_supply_leds.c index fa3034f85c3..2dece40c544 100644 --- a/drivers/power/power_supply_leds.c +++ b/drivers/power/power_supply_leds.c @@ -24,7 +24,7 @@ static void power_supply_update_bat_leds(struct power_supply *psy) if (psy->get_property(psy, POWER_SUPPLY_PROP_STATUS, &status)) return; - dev_dbg(psy->dev, "%s %d\n", __FUNCTION__, status.intval); + dev_dbg(psy->dev, "%s %d\n", __func__, status.intval); switch (status.intval) { case POWER_SUPPLY_STATUS_FULL: @@ -101,7 +101,7 @@ static void power_supply_update_gen_leds(struct power_supply *psy) if (psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &online)) return; - dev_dbg(psy->dev, "%s %d\n", __FUNCTION__, online.intval); + dev_dbg(psy->dev, "%s %d\n", __func__, online.intval); if (online.intval) led_trigger_event(psy->online_trig, LED_FULL); diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig index 4142115d298..c32822ad84a 100644 --- a/drivers/rapidio/Kconfig +++ b/drivers/rapidio/Kconfig @@ -1,14 +1,6 @@ # # RapidIO configuration # -config RAPIDIO_8_BIT_TRANSPORT - bool "8-bit transport addressing" - depends on RAPIDIO - ---help--- - By default, the kernel assumes a 16-bit addressed RapidIO - network. By selecting this option, the kernel will support - an 8-bit addressed network. - config RAPIDIO_DISC_TIMEOUT int "Discovery timeout duration (seconds)" depends on RAPIDIO diff --git a/drivers/rapidio/rio-access.c b/drivers/rapidio/rio-access.c index 8b56bbdd011..a3824baca2e 100644 --- a/drivers/rapidio/rio-access.c +++ b/drivers/rapidio/rio-access.c @@ -48,7 +48,7 @@ int __rio_local_read_config_##size \ u32 data = 0; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->lcread(mport->id, offset, len, &data); \ + res = mport->ops->lcread(mport, mport->id, offset, len, &data); \ *value = (type)data; \ spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ @@ -71,7 +71,7 @@ int __rio_local_write_config_##size \ unsigned long flags; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->lcwrite(mport->id, offset, len, value); \ + res = mport->ops->lcwrite(mport, mport->id, offset, len, value);\ spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ } @@ -108,7 +108,7 @@ int rio_mport_read_config_##size \ u32 data = 0; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->cread(mport->id, destid, hopcount, offset, len, &data); \ + res = mport->ops->cread(mport, mport->id, destid, hopcount, offset, len, &data); \ *value = (type)data; \ spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ @@ -131,7 +131,7 @@ int rio_mport_write_config_##size \ unsigned long flags; \ if (RIO_##size##_BAD) return RIO_BAD_SIZE; \ spin_lock_irqsave(&rio_config_lock, flags); \ - res = mport->ops->cwrite(mport->id, destid, hopcount, offset, len, value); \ + res = mport->ops->cwrite(mport, mport->id, destid, hopcount, offset, len, value); \ spin_unlock_irqrestore(&rio_config_lock, flags); \ return res; \ } @@ -166,7 +166,7 @@ int rio_mport_send_doorbell(struct rio_mport *mport, u16 destid, u16 data) unsigned long flags; spin_lock_irqsave(&rio_doorbell_lock, flags); - res = mport->ops->dsend(mport->id, destid, data); + res = mport->ops->dsend(mport, mport->id, destid, data); spin_unlock_irqrestore(&rio_doorbell_lock, flags); return res; diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 44420723a35..a926c896475 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -73,7 +73,7 @@ static u16 rio_get_device_id(struct rio_mport *port, u16 destid, u8 hopcount) rio_mport_read_config_32(port, destid, hopcount, RIO_DID_CSR, &result); - return RIO_GET_DID(result); + return RIO_GET_DID(port->sys_size, result); } /** @@ -88,7 +88,7 @@ static u16 rio_get_device_id(struct rio_mport *port, u16 destid, u8 hopcount) static void rio_set_device_id(struct rio_mport *port, u16 destid, u8 hopcount, u16 did) { rio_mport_write_config_32(port, destid, hopcount, RIO_DID_CSR, - RIO_SET_DID(did)); + RIO_SET_DID(port->sys_size, did)); } /** @@ -100,7 +100,8 @@ static void rio_set_device_id(struct rio_mport *port, u16 destid, u8 hopcount, u */ static void rio_local_set_device_id(struct rio_mport *port, u16 did) { - rio_local_write_config_32(port, RIO_DID_CSR, RIO_SET_DID(did)); + rio_local_write_config_32(port, RIO_DID_CSR, RIO_SET_DID(port->sys_size, + did)); } /** @@ -350,8 +351,18 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rswitch->switchid = next_switchid; rswitch->hopcount = hopcount; rswitch->destid = destid; + rswitch->route_table = kzalloc(sizeof(u8)* + RIO_MAX_ROUTE_ENTRIES(port->sys_size), + GFP_KERNEL); + if (!rswitch->route_table) { + kfree(rdev); + rdev = NULL; + kfree(rswitch); + goto out; + } /* Initialize switch route table */ - for (rdid = 0; rdid < RIO_MAX_ROUTE_ENTRIES; rdid++) + for (rdid = 0; rdid < RIO_MAX_ROUTE_ENTRIES(port->sys_size); + rdid++) rswitch->route_table[rdid] = RIO_INVALID_ROUTE; rdev->rswitch = rswitch; sprintf(rio_name(rdev), "%02x:s:%04x", rdev->net->id, @@ -480,7 +491,7 @@ static u16 rio_get_host_deviceid_lock(struct rio_mport *port, u8 hopcount) { u32 result; - rio_mport_read_config_32(port, RIO_ANY_DESTID, hopcount, + rio_mport_read_config_32(port, RIO_ANY_DESTID(port->sys_size), hopcount, RIO_HOST_DID_LOCK_CSR, &result); return (u16) (result & 0xffff); @@ -571,14 +582,16 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, } /* Attempt to acquire device lock */ - rio_mport_write_config_32(port, RIO_ANY_DESTID, hopcount, + rio_mport_write_config_32(port, RIO_ANY_DESTID(port->sys_size), + hopcount, RIO_HOST_DID_LOCK_CSR, port->host_deviceid); while ((tmp = rio_get_host_deviceid_lock(port, hopcount)) < port->host_deviceid) { /* Delay a bit */ mdelay(1); /* Attempt to acquire device lock again */ - rio_mport_write_config_32(port, RIO_ANY_DESTID, hopcount, + rio_mport_write_config_32(port, RIO_ANY_DESTID(port->sys_size), + hopcount, RIO_HOST_DID_LOCK_CSR, port->host_deviceid); } @@ -590,7 +603,9 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, } /* Setup new RIO device */ - if ((rdev = rio_setup_device(net, port, RIO_ANY_DESTID, hopcount, 1))) { + rdev = rio_setup_device(net, port, RIO_ANY_DESTID(port->sys_size), + hopcount, 1); + if (rdev) { /* Add device to the global and bus/net specific list. */ list_add_tail(&rdev->net_list, &net->devices); } else @@ -598,7 +613,8 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, if (rio_is_switch(rdev)) { next_switchid++; - sw_inport = rio_get_swpinfo_inport(port, RIO_ANY_DESTID, hopcount); + sw_inport = rio_get_swpinfo_inport(port, + RIO_ANY_DESTID(port->sys_size), hopcount); rio_route_add_entry(port, rdev->rswitch, RIO_GLOBAL_TABLE, port->host_deviceid, sw_inport); rdev->rswitch->route_table[port->host_deviceid] = sw_inport; @@ -612,7 +628,8 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, } num_ports = - rio_get_swpinfo_tports(port, RIO_ANY_DESTID, hopcount); + rio_get_swpinfo_tports(port, RIO_ANY_DESTID(port->sys_size), + hopcount); pr_debug( "RIO: found %s (vid %4.4x did %4.4x) with %d ports\n", rio_name(rdev), rdev->vid, rdev->did, num_ports); @@ -624,13 +641,15 @@ static int rio_enum_peer(struct rio_net *net, struct rio_mport *port, cur_destid = next_destid; if (rio_sport_is_active - (port, RIO_ANY_DESTID, hopcount, port_num)) { + (port, RIO_ANY_DESTID(port->sys_size), hopcount, + port_num)) { pr_debug( "RIO: scanning device on port %d\n", port_num); rio_route_add_entry(port, rdev->rswitch, - RIO_GLOBAL_TABLE, - RIO_ANY_DESTID, port_num); + RIO_GLOBAL_TABLE, + RIO_ANY_DESTID(port->sys_size), + port_num); if (rio_enum_peer(net, port, hopcount + 1) < 0) return -1; @@ -735,7 +754,8 @@ rio_disc_peer(struct rio_net *net, struct rio_mport *port, u16 destid, pr_debug( "RIO: scanning device on port %d\n", port_num); - for (ndestid = 0; ndestid < RIO_ANY_DESTID; + for (ndestid = 0; + ndestid < RIO_ANY_DESTID(port->sys_size); ndestid++) { rio_route_get_entry(port, rdev->rswitch, RIO_GLOBAL_TABLE, @@ -917,7 +937,9 @@ static void rio_build_route_tables(void) list_for_each_entry(rdev, &rio_devices, global_list) if (rio_is_switch(rdev)) - for (i = 0; i < RIO_MAX_ROUTE_ENTRIES; i++) { + for (i = 0; + i < RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size); + i++) { if (rio_route_get_entry (rdev->net->hport, rdev->rswitch, RIO_GLOBAL_TABLE, i, &sport) < 0) @@ -981,7 +1003,8 @@ int rio_disc_mport(struct rio_mport *mport) del_timer_sync(&rio_enum_timer); pr_debug("done\n"); - if (rio_disc_peer(net, mport, RIO_ANY_DESTID, 0) < 0) { + if (rio_disc_peer(net, mport, RIO_ANY_DESTID(mport->sys_size), + 0) < 0) { printk(KERN_INFO "RIO: master port %d device has failed discovery\n", mport->id); diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c index 659e31164cf..97a147f050d 100644 --- a/drivers/rapidio/rio-sysfs.c +++ b/drivers/rapidio/rio-sysfs.c @@ -43,7 +43,8 @@ static ssize_t routes_show(struct device *dev, struct device_attribute *attr, ch if (!rdev->rswitch) goto out; - for (i = 0; i < RIO_MAX_ROUTE_ENTRIES; i++) { + for (i = 0; i < RIO_MAX_ROUTE_ENTRIES(rdev->net->hport->sys_size); + i++) { if (rdev->rswitch->route_table[i] == RIO_INVALID_ROUTE) continue; str += diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index 80c5f1ba2e4..680661abbc4 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -43,7 +43,7 @@ u16 rio_local_get_device_id(struct rio_mport *port) rio_local_read_config_32(port, RIO_DID_CSR, &result); - return (RIO_GET_DID(result)); + return (RIO_GET_DID(port->sys_size, result)); } /** diff --git a/drivers/rapidio/rio.h b/drivers/rapidio/rio.h index 80e3f03b504..7786d02581f 100644 --- a/drivers/rapidio/rio.h +++ b/drivers/rapidio/rio.h @@ -51,10 +51,5 @@ extern struct rio_route_ops __end_rio_route_ops[]; DECLARE_RIO_ROUTE_SECTION(.rio_route_ops, \ vid, did, add_hook, get_hook) -#ifdef CONFIG_RAPIDIO_8_BIT_TRANSPORT -#define RIO_GET_DID(x) ((x & 0x00ff0000) >> 16) -#define RIO_SET_DID(x) ((x & 0x000000ff) << 16) -#else -#define RIO_GET_DID(x) (x & 0xffff) -#define RIO_SET_DID(x) (x & 0xffff) -#endif +#define RIO_GET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x00ff0000) >> 16)) +#define RIO_SET_DID(size, x) (size ? (x & 0xffff) : ((x & 0x000000ff) << 16)) diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c index 4f28045d9ef..8624f55d056 100644 --- a/drivers/rtc/rtc-bfin.c +++ b/drivers/rtc/rtc-bfin.c @@ -419,7 +419,7 @@ static int __devinit bfin_rtc_probe(struct platform_device *pdev) return -ENOMEM; rtc->rtc_dev = rtc_device_register(pdev->name, &pdev->dev, &bfin_rtc_ops, THIS_MODULE); - if (unlikely(IS_ERR(rtc))) { + if (IS_ERR(rtc)) { ret = PTR_ERR(rtc->rtc_dev); goto err; } diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c index 8d300e6d0d9..0c6257a034f 100644 --- a/drivers/rtc/rtc-proc.c +++ b/drivers/rtc/rtc-proc.c @@ -108,12 +108,10 @@ void rtc_proc_add_device(struct rtc_device *rtc) if (rtc->id == 0) { struct proc_dir_entry *ent; - ent = create_proc_entry("driver/rtc", 0, NULL); - if (ent) { - ent->proc_fops = &rtc_proc_fops; + ent = proc_create_data("driver/rtc", 0, NULL, + &rtc_proc_fops, rtc); + if (ent) ent->owner = rtc->owner; - ent->data = rtc; - } } } diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 556063e8f7a..03c0e40a92f 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -157,6 +157,7 @@ static int dasd_devices_open(struct inode *inode, struct file *file) } static const struct file_operations dasd_devices_file_ops = { + .owner = THIS_MODULE, .open = dasd_devices_open, .read = seq_read, .llseek = seq_lseek, @@ -311,17 +312,16 @@ out_error: int dasd_proc_init(void) { - dasd_proc_root_entry = proc_mkdir("dasd", &proc_root); + dasd_proc_root_entry = proc_mkdir("dasd", NULL); if (!dasd_proc_root_entry) goto out_nodasd; dasd_proc_root_entry->owner = THIS_MODULE; - dasd_devices_entry = create_proc_entry("devices", - S_IFREG | S_IRUGO | S_IWUSR, - dasd_proc_root_entry); + dasd_devices_entry = proc_create("devices", + S_IFREG | S_IRUGO | S_IWUSR, + dasd_proc_root_entry, + &dasd_devices_file_ops); if (!dasd_devices_entry) goto out_nodevices; - dasd_devices_entry->proc_fops = &dasd_devices_file_ops; - dasd_devices_entry->owner = THIS_MODULE; dasd_statistics_entry = create_proc_entry("statistics", S_IFREG | S_IRUGO | S_IWUSR, dasd_proc_root_entry); @@ -335,7 +335,7 @@ dasd_proc_init(void) out_nostatistics: remove_proc_entry("devices", dasd_proc_root_entry); out_nodevices: - remove_proc_entry("dasd", &proc_root); + remove_proc_entry("dasd", NULL); out_nodasd: return -ENOENT; } @@ -345,5 +345,5 @@ dasd_proc_exit(void) { remove_proc_entry("devices", dasd_proc_root_entry); remove_proc_entry("statistics", dasd_proc_root_entry); - remove_proc_entry("dasd", &proc_root); + remove_proc_entry("dasd", NULL); } diff --git a/drivers/s390/char/tape_proc.c b/drivers/s390/char/tape_proc.c index c9b96d51b28..e7c888c14e7 100644 --- a/drivers/s390/char/tape_proc.c +++ b/drivers/s390/char/tape_proc.c @@ -111,6 +111,7 @@ static int tape_proc_open(struct inode *inode, struct file *file) static const struct file_operations tape_proc_ops = { + .owner = THIS_MODULE, .open = tape_proc_open, .read = seq_read, .llseek = seq_lseek, @@ -124,14 +125,12 @@ void tape_proc_init(void) { tape_proc_devices = - create_proc_entry ("tapedevices", S_IFREG | S_IRUGO | S_IWUSR, - &proc_root); + proc_create("tapedevices", S_IFREG | S_IRUGO | S_IWUSR, NULL, + &tape_proc_ops); if (tape_proc_devices == NULL) { PRINT_WARN("tape: Cannot register procfs entry tapedevices\n"); return; } - tape_proc_devices->proc_fops = &tape_proc_ops; - tape_proc_devices->owner = THIS_MODULE; } /* @@ -141,5 +140,5 @@ void tape_proc_cleanup(void) { if (tape_proc_devices != NULL) - remove_proc_entry ("tapedevices", &proc_root); + remove_proc_entry ("tapedevices", NULL); } diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index e8597ec9224..40ef948fcb3 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -374,13 +374,10 @@ cio_ignore_proc_init (void) { struct proc_dir_entry *entry; - entry = create_proc_entry ("cio_ignore", S_IFREG | S_IRUGO | S_IWUSR, - &proc_root); + entry = proc_create("cio_ignore", S_IFREG | S_IRUGO | S_IWUSR, NULL, + &cio_ignore_proc_fops); if (!entry) return -ENOENT; - - entry->proc_fops = &cio_ignore_proc_fops; - return 0; } diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index 10aa1e78080..43876e28737 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -3632,7 +3632,7 @@ qdio_add_procfs_entry(void) { proc_perf_file_registration=0; qdio_perf_proc_file=create_proc_entry(QDIO_PERF, - S_IFREG|0444,&proc_root); + S_IFREG|0444,NULL); if (qdio_perf_proc_file) { qdio_perf_proc_file->read_proc=&qdio_perf_procfile_read; } else proc_perf_file_registration=-1; @@ -3647,7 +3647,7 @@ static void qdio_remove_procfs_entry(void) { if (!proc_perf_file_registration) /* means if it went ok earlier */ - remove_proc_entry(QDIO_PERF,&proc_root); + remove_proc_entry(QDIO_PERF,NULL); } /** diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index b135a1ed4b2..18551aaf5e0 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4996,7 +4996,7 @@ static int __init megaraid_init(void) max_mbox_busy_wait = MBOX_BUSY_WAIT; #ifdef CONFIG_PROC_FS - mega_proc_dir_entry = proc_mkdir("megaraid", &proc_root); + mega_proc_dir_entry = proc_mkdir("megaraid", NULL); if (!mega_proc_dir_entry) { printk(KERN_WARNING "megaraid: failed to create megaraid root\n"); @@ -5005,7 +5005,7 @@ static int __init megaraid_init(void) error = pci_register_driver(&megaraid_pci_driver); if (error) { #ifdef CONFIG_PROC_FS - remove_proc_entry("megaraid", &proc_root); + remove_proc_entry("megaraid", NULL); #endif return error; } @@ -5035,7 +5035,7 @@ static void __exit megaraid_exit(void) pci_unregister_driver(&megaraid_pci_driver); #ifdef CONFIG_PROC_FS - remove_proc_entry("megaraid", &proc_root); + remove_proc_entry("megaraid", NULL); #endif } diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 07103c399fe..f6600bfb5bd 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1773,7 +1773,7 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp) if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts) printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %u>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); - set_bit(QUEUE_FLAG_BIDI, &sdp->request_queue->queue_flags); + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue); return 0; } diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index b8de041bc0a..a235802f298 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -449,37 +449,40 @@ int scsi_get_device_flags(struct scsi_device *sdev, } #ifdef CONFIG_SCSI_PROC_FS -/* - * proc_scsi_dev_info_read: dump the scsi_dev_info_list via - * /proc/scsi/device_info - */ -static int proc_scsi_devinfo_read(char *buffer, char **start, - off_t offset, int length) +static int devinfo_seq_show(struct seq_file *m, void *v) { - struct scsi_dev_info_list *devinfo; - int size, len = 0; - off_t begin = 0; - off_t pos = 0; + struct scsi_dev_info_list *devinfo = + list_entry(v, struct scsi_dev_info_list, dev_info_list); - list_for_each_entry(devinfo, &scsi_dev_info_list, dev_info_list) { - size = sprintf(buffer + len, "'%.8s' '%.16s' 0x%x\n", + seq_printf(m, "'%.8s' '%.16s' 0x%x\n", devinfo->vendor, devinfo->model, devinfo->flags); - len += size; - pos = begin + len; - if (pos < offset) { - len = 0; - begin = pos; - } - if (pos > offset + length) - goto stop_output; - } + return 0; +} + +static void * devinfo_seq_start(struct seq_file *m, loff_t *pos) +{ + return seq_list_start(&scsi_dev_info_list, *pos); +} -stop_output: - *start = buffer + (offset - begin); /* Start of wanted data */ - len -= (offset - begin); /* Start slop */ - if (len > length) - len = length; /* Ending slop */ - return (len); +static void * devinfo_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &scsi_dev_info_list, pos); +} + +static void devinfo_seq_stop(struct seq_file *m, void *v) +{ +} + +static const struct seq_operations scsi_devinfo_seq_ops = { + .start = devinfo_seq_start, + .next = devinfo_seq_next, + .stop = devinfo_seq_stop, + .show = devinfo_seq_show, +}; + +static int proc_scsi_devinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &scsi_devinfo_seq_ops); } /* @@ -489,11 +492,12 @@ stop_output: * integer value of flag to the scsi device info list. * To use, echo "vendor:model:flag" > /proc/scsi/device_info */ -static int proc_scsi_devinfo_write(struct file *file, const char __user *buf, - unsigned long length, void *data) +static ssize_t proc_scsi_devinfo_write(struct file *file, + const char __user *buf, + size_t length, loff_t *ppos) { char *buffer; - int err = length; + ssize_t err = length; if (!buf || length>PAGE_SIZE) return -EINVAL; @@ -517,6 +521,15 @@ out: free_page((unsigned long)buffer); return err; } + +static const struct file_operations scsi_devinfo_proc_fops = { + .owner = THIS_MODULE, + .open = proc_scsi_devinfo_open, + .read = seq_read, + .write = proc_scsi_devinfo_write, + .llseek = seq_lseek, + .release = seq_release, +}; #endif /* CONFIG_SCSI_PROC_FS */ module_param_string(dev_flags, scsi_dev_flags, sizeof(scsi_dev_flags), 0); @@ -577,15 +590,13 @@ int __init scsi_init_devinfo(void) } #ifdef CONFIG_SCSI_PROC_FS - p = create_proc_entry("scsi/device_info", 0, NULL); + p = proc_create("scsi/device_info", 0, NULL, &scsi_devinfo_proc_fops); if (!p) { error = -ENOMEM; goto out; } p->owner = THIS_MODULE; - p->get_info = proc_scsi_devinfo_read; - p->write_proc = proc_scsi_devinfo_write; #endif /* CONFIG_SCSI_PROC_FS */ out: diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 221f31e36d2..1eaba6cd80f 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1771,6 +1771,7 @@ scsi_reset_provider(struct scsi_device *dev, int flag) unsigned long flags; int rtn; + blk_rq_init(NULL, &req); scmd->request = &req; memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout)); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 67f412bb497..d545ad1cf47 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -536,6 +536,9 @@ static void scsi_run_queue(struct request_queue *q) !shost->host_blocked && !shost->host_self_blocked && !((shost->can_queue > 0) && (shost->host_busy >= shost->can_queue))) { + + int flagset; + /* * As long as shost is accepting commands and we have * starved queues, call blk_run_queue. scsi_request_fn @@ -549,19 +552,20 @@ static void scsi_run_queue(struct request_queue *q) sdev = list_entry(shost->starved_list.next, struct scsi_device, starved_entry); list_del_init(&sdev->starved_entry); - spin_unlock_irqrestore(shost->host_lock, flags); - + spin_unlock(shost->host_lock); + + spin_lock(sdev->request_queue->queue_lock); + flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && + !test_bit(QUEUE_FLAG_REENTER, + &sdev->request_queue->queue_flags); + if (flagset) + queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); + __blk_run_queue(sdev->request_queue); + if (flagset) + queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); + spin_unlock(sdev->request_queue->queue_lock); - if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && - !test_and_set_bit(QUEUE_FLAG_REENTER, - &sdev->request_queue->queue_flags)) { - blk_run_queue(sdev->request_queue); - clear_bit(QUEUE_FLAG_REENTER, - &sdev->request_queue->queue_flags); - } else - blk_run_queue(sdev->request_queue); - - spin_lock_irqsave(shost->host_lock, flags); + spin_lock(shost->host_lock); if (unlikely(!list_empty(&sdev->starved_entry))) /* * sdev lost a race, and was put back on the @@ -1585,8 +1589,9 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segment_size(q, dma_get_max_seg_size(dev)); + /* New queue, no concurrency on queue_flags */ if (!shost->use_clustering) - clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); /* * set a reasonable default alignment on word boundaries: the diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index 3a1c99d5c77..e4a0d2f9b35 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -413,6 +413,7 @@ static int proc_scsi_open(struct inode *inode, struct file *file) } static const struct file_operations proc_scsi_operations = { + .owner = THIS_MODULE, .open = proc_scsi_open, .read = seq_read, .write = proc_scsi_write, @@ -431,10 +432,9 @@ int __init scsi_init_procfs(void) if (!proc_scsi) goto err1; - pde = create_proc_entry("scsi/scsi", 0, NULL); + pde = proc_create("scsi/scsi", 0, NULL, &proc_scsi_operations); if (!pde) goto err2; - pde->proc_fops = &proc_scsi_operations; return 0; diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index fcd7455ffc3..a00eee6f7be 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1828,7 +1828,7 @@ void scsi_scan_host(struct Scsi_Host *shost) } p = kthread_run(do_scan_async, data, "scsi_scan_%d", shost->host_no); - if (unlikely(IS_ERR(p))) + if (IS_ERR(p)) do_scan_async(data); } EXPORT_SYMBOL(scsi_scan_host); diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 7899e3dda9b..f4461d35ffb 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -248,8 +248,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) else q->queuedata = shost; - set_bit(QUEUE_FLAG_BIDI, &q->queue_flags); - + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); return 0; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3cea17dd5db..01cefbb2d53 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -860,7 +860,6 @@ static int sd_sync_cache(struct scsi_disk *sdkp) static void sd_prepare_flush(struct request_queue *q, struct request *rq) { - memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->timeout = SD_TIMEOUT; rq->cmd[0] = SYNCHRONIZE_CACHE; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 2029422bc04..c9d7f721b9e 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -2667,7 +2667,6 @@ sg_proc_init(void) { int k, mask; int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr); - struct proc_dir_entry *pdep; struct sg_proc_leaf * leaf; sg_proc_sgp = proc_mkdir(sg_proc_sg_dirname, NULL); @@ -2676,13 +2675,10 @@ sg_proc_init(void) for (k = 0; k < num_leaves; ++k) { leaf = &sg_proc_leaf_arr[k]; mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO; - pdep = create_proc_entry(leaf->name, mask, sg_proc_sgp); - if (pdep) { - leaf->fops->owner = THIS_MODULE, - leaf->fops->read = seq_read, - leaf->fops->llseek = seq_lseek, - pdep->proc_fops = leaf->fops; - } + leaf->fops->owner = THIS_MODULE; + leaf->fops->read = seq_read; + leaf->fops->llseek = seq_lseek; + proc_create(leaf->name, mask, sg_proc_sgp, leaf->fops); } return 0; } diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index 4220f22b666..5f71ff3aee3 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -305,8 +305,6 @@ enum { */ #define FW_GET_BYTE(p) *((__u8 *) (p)) -#define FW_GET_WORD(p) le16_to_cpu(get_unaligned((__le16 *) (p))) -#define FW_GET_LONG(p) le32_to_cpu(get_unaligned((__le32 *) (p))) #define FW_DIR "ueagle-atm/" #define NB_MODEM 4 @@ -621,7 +619,7 @@ static void uea_upload_pre_firmware(const struct firmware *fw_entry, void *conte if (size < 4) goto err_fw_corrupted; - crc = FW_GET_LONG(pfw); + crc = get_unaligned_le32(pfw); pfw += 4; size -= 4; if (crc32_be(0, pfw, size) != crc) @@ -640,7 +638,7 @@ static void uea_upload_pre_firmware(const struct firmware *fw_entry, void *conte while (size > 3) { u8 len = FW_GET_BYTE(pfw); - u16 add = FW_GET_WORD(pfw + 1); + u16 add = get_unaligned_le16(pfw + 1); size -= len + 3; if (size < 0) @@ -738,7 +736,7 @@ static int check_dsp_e1(u8 *dsp, unsigned int len) for (i = 0; i < pagecount; i++) { - pageoffset = FW_GET_LONG(dsp + p); + pageoffset = get_unaligned_le32(dsp + p); p += 4; if (pageoffset == 0) @@ -759,7 +757,7 @@ static int check_dsp_e1(u8 *dsp, unsigned int len) return 1; pp += 2; /* skip blockaddr */ - blocksize = FW_GET_WORD(dsp + pp); + blocksize = get_unaligned_le16(dsp + pp); pp += 2; /* enough space for block data? */ @@ -928,7 +926,7 @@ static void uea_load_page_e1(struct work_struct *work) goto bad1; p += 4 * pageno; - pageoffset = FW_GET_LONG(p); + pageoffset = get_unaligned_le32(p); if (pageoffset == 0) goto bad1; @@ -945,10 +943,10 @@ static void uea_load_page_e1(struct work_struct *work) bi.wOvlOffset = cpu_to_le16(ovl | 0x8000); for (i = 0; i < blockcount; i++) { - blockaddr = FW_GET_WORD(p); + blockaddr = get_unaligned_le16(p); p += 2; - blocksize = FW_GET_WORD(p); + blocksize = get_unaligned_le16(p); p += 2; bi.wSize = cpu_to_le16(blocksize); @@ -1152,9 +1150,9 @@ static int uea_cmv_e1(struct uea_softc *sc, cmv.bDirection = E1_HOSTTOMODEM; cmv.bFunction = function; cmv.wIndex = cpu_to_le16(sc->cmv_dsc.e1.idx); - put_unaligned(cpu_to_le32(address), &cmv.dwSymbolicAddress); + put_unaligned_le32(address, &cmv.dwSymbolicAddress); cmv.wOffsetAddress = cpu_to_le16(offset); - put_unaligned(cpu_to_le32(data >> 16 | data << 16), &cmv.dwData); + put_unaligned_le32(data >> 16 | data << 16, &cmv.dwData); ret = uea_request(sc, UEA_E1_SET_BLOCK, UEA_MPTX_START, sizeof(cmv), &cmv); if (ret < 0) @@ -1646,7 +1644,7 @@ static int request_cmvs(struct uea_softc *sc, if (size < 5) goto err_fw_corrupted; - crc = FW_GET_LONG(data); + crc = get_unaligned_le32(data); data += 4; size -= 4; if (crc32_be(0, data, size) != crc) @@ -1696,9 +1694,9 @@ static int uea_send_cmvs_e1(struct uea_softc *sc) "please update your firmware\n"); for (i = 0; i < len; i++) { - ret = uea_write_cmv_e1(sc, FW_GET_LONG(&cmvs_v1[i].address), - FW_GET_WORD(&cmvs_v1[i].offset), - FW_GET_LONG(&cmvs_v1[i].data)); + ret = uea_write_cmv_e1(sc, get_unaligned_le32(&cmvs_v1[i].address), + get_unaligned_le16(&cmvs_v1[i].offset), + get_unaligned_le32(&cmvs_v1[i].data)); if (ret < 0) goto out; } @@ -1706,9 +1704,9 @@ static int uea_send_cmvs_e1(struct uea_softc *sc) struct uea_cmvs_v2 *cmvs_v2 = cmvs_ptr; for (i = 0; i < len; i++) { - ret = uea_write_cmv_e1(sc, FW_GET_LONG(&cmvs_v2[i].address), - (u16) FW_GET_LONG(&cmvs_v2[i].offset), - FW_GET_LONG(&cmvs_v2[i].data)); + ret = uea_write_cmv_e1(sc, get_unaligned_le32(&cmvs_v2[i].address), + (u16) get_unaligned_le32(&cmvs_v2[i].offset), + get_unaligned_le32(&cmvs_v2[i].data)); if (ret < 0) goto out; } @@ -1759,10 +1757,10 @@ static int uea_send_cmvs_e4(struct uea_softc *sc) for (i = 0; i < len; i++) { ret = uea_write_cmv_e4(sc, 1, - FW_GET_LONG(&cmvs_v2[i].group), - FW_GET_LONG(&cmvs_v2[i].address), - FW_GET_LONG(&cmvs_v2[i].offset), - FW_GET_LONG(&cmvs_v2[i].data)); + get_unaligned_le32(&cmvs_v2[i].group), + get_unaligned_le32(&cmvs_v2[i].address), + get_unaligned_le32(&cmvs_v2[i].offset), + get_unaligned_le32(&cmvs_v2[i].data)); if (ret < 0) goto out; } @@ -1964,7 +1962,7 @@ static void uea_dispatch_cmv_e1(struct uea_softc *sc, struct intr_pkt *intr) if (UEA_CHIP_VERSION(sc) == ADI930 && cmv->bFunction == E1_MAKEFUNCTION(2, 2)) { cmv->wIndex = cpu_to_le16(dsc->idx); - put_unaligned(cpu_to_le32(dsc->address), &cmv->dwSymbolicAddress); + put_unaligned_le32(dsc->address, &cmv->dwSymbolicAddress); cmv->wOffsetAddress = cpu_to_le16(dsc->offset); } else goto bad2; @@ -1978,11 +1976,11 @@ static void uea_dispatch_cmv_e1(struct uea_softc *sc, struct intr_pkt *intr) /* in case of MEMACCESS */ if (le16_to_cpu(cmv->wIndex) != dsc->idx || - le32_to_cpu(get_unaligned(&cmv->dwSymbolicAddress)) != dsc->address || + get_unaligned_le32(&cmv->dwSymbolicAddress) != dsc->address || le16_to_cpu(cmv->wOffsetAddress) != dsc->offset) goto bad2; - sc->data = le32_to_cpu(get_unaligned(&cmv->dwData)); + sc->data = get_unaligned_le32(&cmv->dwData); sc->data = sc->data << 16 | sc->data >> 16; wake_up_cmv_ack(sc); diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7b572e75e73..cefe7f2c6f7 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -280,7 +280,7 @@ static void acm_ctrl_irq(struct urb *urb) case USB_CDC_NOTIFY_SERIAL_STATE: - newctrl = le16_to_cpu(get_unaligned((__le16 *) data)); + newctrl = get_unaligned_le16(data); if (acm->tty && !acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { dbg("calling hangup"); diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 8607846e3c3..1d253dd4ea8 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -773,7 +773,7 @@ int __init usbfs_init(void) usb_register_notify(&usbfs_nb); /* create mount point for usbfs */ - usbdir = proc_mkdir("usb", proc_bus); + usbdir = proc_mkdir("bus/usb", NULL); return 0; } @@ -783,6 +783,6 @@ void usbfs_cleanup(void) usb_unregister_notify(&usbfs_nb); unregister_filesystem(&usb_fs_type); if (usbdir) - remove_proc_entry("usb", proc_bus); + remove_proc_entry("bus/usb", NULL); } diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c index 9b913afb2e6..274c60a970c 100644 --- a/drivers/usb/gadget/at91_udc.c +++ b/drivers/usb/gadget/at91_udc.c @@ -231,6 +231,7 @@ static int proc_udc_open(struct inode *inode, struct file *file) } static const struct file_operations proc_ops = { + .owner = THIS_MODULE, .open = proc_udc_open, .read = seq_read, .llseek = seq_lseek, @@ -239,15 +240,7 @@ static const struct file_operations proc_ops = { static void create_debug_file(struct at91_udc *udc) { - struct proc_dir_entry *pde; - - pde = create_proc_entry (debug_filename, 0, NULL); - udc->pde = pde; - if (pde == NULL) - return; - - pde->proc_fops = &proc_ops; - pde->data = udc; + udc->pde = proc_create_data(debug_filename, 0, NULL, &proc_ops, udc); } static void remove_debug_file(struct at91_udc *udc) diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index 64a592cbbe7..be6613afedb 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -127,7 +127,7 @@ goku_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc) /* enabling the no-toggle interrupt mode would need an api hook */ mode = 0; - max = le16_to_cpu(get_unaligned(&desc->wMaxPacketSize)); + max = get_unaligned_le16(&desc->wMaxPacketSize); switch (max) { case 64: mode++; case 32: mode++; diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index 95f7662376f..881d74c3d96 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -2504,6 +2504,7 @@ static int proc_udc_open(struct inode *inode, struct file *file) } static const struct file_operations proc_ops = { + .owner = THIS_MODULE, .open = proc_udc_open, .read = seq_read, .llseek = seq_lseek, @@ -2512,11 +2513,7 @@ static const struct file_operations proc_ops = { static void create_proc_file(void) { - struct proc_dir_entry *pde; - - pde = create_proc_entry (proc_filename, 0, NULL); - if (pde) - pde->proc_fops = &proc_ops; + proc_create(proc_filename, 0, NULL, &proc_ops); } static void remove_proc_file(void) diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c index bd58dd504f6..d0677f5d3cd 100644 --- a/drivers/usb/gadget/rndis.c +++ b/drivers/usb/gadget/rndis.c @@ -183,14 +183,10 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len, DBG("query OID %08x value, len %d:\n", OID, buf_len); for (i = 0; i < buf_len; i += 16) { DBG("%03d: %08x %08x %08x %08x\n", i, - le32_to_cpu(get_unaligned((__le32 *) - &buf[i])), - le32_to_cpu(get_unaligned((__le32 *) - &buf[i + 4])), - le32_to_cpu(get_unaligned((__le32 *) - &buf[i + 8])), - le32_to_cpu(get_unaligned((__le32 *) - &buf[i + 12]))); + get_unaligned_le32(&buf[i]), + get_unaligned_le32(&buf[i + 4]), + get_unaligned_le32(&buf[i + 8]), + get_unaligned_le32(&buf[i + 12])); } } @@ -666,7 +662,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len, break; case OID_PNP_QUERY_POWER: DBG("%s: OID_PNP_QUERY_POWER D%d\n", __func__, - le32_to_cpu(get_unaligned((__le32 *)buf)) - 1); + get_unaligned_le32(buf) - 1); /* only suspend is a real power state, and * it can't be entered by OID_PNP_SET_POWER... */ @@ -705,14 +701,10 @@ static int gen_ndis_set_resp (u8 configNr, u32 OID, u8 *buf, u32 buf_len, DBG("set OID %08x value, len %d:\n", OID, buf_len); for (i = 0; i < buf_len; i += 16) { DBG("%03d: %08x %08x %08x %08x\n", i, - le32_to_cpu(get_unaligned((__le32 *) - &buf[i])), - le32_to_cpu(get_unaligned((__le32 *) - &buf[i + 4])), - le32_to_cpu(get_unaligned((__le32 *) - &buf[i + 8])), - le32_to_cpu(get_unaligned((__le32 *) - &buf[i + 12]))); + get_unaligned_le32(&buf[i]), + get_unaligned_le32(&buf[i + 4]), + get_unaligned_le32(&buf[i + 8]), + get_unaligned_le32(&buf[i + 12])); } } @@ -726,8 +718,7 @@ static int gen_ndis_set_resp (u8 configNr, u32 OID, u8 *buf, u32 buf_len, * PROMISCUOUS, DIRECTED, * MULTICAST, ALL_MULTICAST, BROADCAST */ - *params->filter = (u16) le32_to_cpu(get_unaligned( - (__le32 *)buf)); + *params->filter = (u16)get_unaligned_le32(buf); DBG("%s: OID_GEN_CURRENT_PACKET_FILTER %08x\n", __func__, *params->filter); @@ -777,7 +768,7 @@ update_linkstate: * resuming, Windows forces a reset, and then SET_POWER D0. * FIXME ... then things go batty; Windows wedges itself. */ - i = le32_to_cpu(get_unaligned((__le32 *)buf)); + i = get_unaligned_le32(buf); DBG("%s: OID_PNP_SET_POWER D%d\n", __func__, i - 1); switch (i) { case NdisDeviceStateD0: @@ -1064,8 +1055,8 @@ int rndis_msg_parser (u8 configNr, u8 *buf) return -ENOMEM; tmp = (__le32 *) buf; - MsgType = le32_to_cpu(get_unaligned(tmp++)); - MsgLength = le32_to_cpu(get_unaligned(tmp++)); + MsgType = get_unaligned_le32(tmp++); + MsgLength = get_unaligned_le32(tmp++); if (configNr >= RNDIS_MAX_CONFIGS) return -ENOTSUPP; @@ -1296,10 +1287,9 @@ int rndis_rm_hdr(struct sk_buff *skb) tmp++; /* DataOffset, DataLength */ - if (!skb_pull(skb, le32_to_cpu(get_unaligned(tmp++)) - + 8 /* offset of DataOffset */)) + if (!skb_pull(skb, get_unaligned_le32(tmp++) + 8)) return -EOVERFLOW; - skb_trim(skb, le32_to_cpu(get_unaligned(tmp++))); + skb_trim(skb, get_unaligned_le32(tmp++)); return 0; } diff --git a/drivers/usb/gadget/usbstring.c b/drivers/usb/gadget/usbstring.c index 878e428a0ec..4154be375c7 100644 --- a/drivers/usb/gadget/usbstring.c +++ b/drivers/usb/gadget/usbstring.c @@ -74,7 +74,7 @@ static int utf8_to_utf16le(const char *s, __le16 *cp, unsigned len) goto fail; } else uchar = c; - put_unaligned (cpu_to_le16 (uchar), cp++); + put_unaligned_le16(uchar, cp++); count++; len--; } diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index f13d1029aeb..382587c4457 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -770,7 +770,7 @@ static int ehci_hub_control ( if (status & ~0xffff) /* only if wPortChange is interesting */ #endif dbg_port (ehci, "GetStatus", wIndex + 1, temp); - put_unaligned(cpu_to_le32 (status), (__le32 *) buf); + put_unaligned_le32(status, buf); break; case SetHubFeature: switch (wValue) { diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c index 5be3bb3e6a9..17dc2eccda8 100644 --- a/drivers/usb/host/ohci-hub.c +++ b/drivers/usb/host/ohci-hub.c @@ -736,14 +736,14 @@ static int ohci_hub_control ( break; case GetHubStatus: temp = roothub_status (ohci) & ~(RH_HS_CRWE | RH_HS_DRWE); - put_unaligned(cpu_to_le32 (temp), (__le32 *) buf); + put_unaligned_le32(temp, buf); break; case GetPortStatus: if (!wIndex || wIndex > ports) goto error; wIndex--; temp = roothub_portstatus (ohci, wIndex); - put_unaligned(cpu_to_le32 (temp), (__le32 *) buf); + put_unaligned_le32(temp, buf); #ifndef OHCI_VERBOSE_DEBUG if (*(u16*)(buf+2)) /* only if wPortChange is interesting */ diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index 3fd7a0c1207..426575247b2 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -1506,15 +1506,7 @@ static const char proc_filename[] = "driver/sl811h"; static void create_debug_file(struct sl811 *sl811) { - struct proc_dir_entry *pde; - - pde = create_proc_entry(proc_filename, 0, NULL); - if (pde == NULL) - return; - - pde->proc_fops = &proc_ops; - pde->data = sl811; - sl811->pde = pde; + sl811->pde = proc_create_data(proc_filename, 0, NULL, &proc_ops, sl811); } static void remove_debug_file(struct sl811 *sl811) diff --git a/drivers/video/clps711xfb.c b/drivers/video/clps711xfb.c index 17b5267f44d..9f8a389dc7a 100644 --- a/drivers/video/clps711xfb.c +++ b/drivers/video/clps711xfb.c @@ -381,7 +381,7 @@ int __init clps711xfb_init(void) /* Register the /proc entries. */ clps7111fb_backlight_proc_entry = create_proc_entry("backlight", 0444, - &proc_root); + NULL); if (clps7111fb_backlight_proc_entry == NULL) { printk("Couldn't create the /proc entry for the backlight.\n"); return -EINVAL; diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 8eda7b60df8..ad31983b43e 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -1881,7 +1881,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, scr_memsetw((unsigned short *) (vc->vc_origin + vc->vc_size_row * (b - count)), - vc->vc_video_erase_char, + vc->vc_scrl_erase_char, vc->vc_size_row * count); return 1; break; @@ -1953,7 +1953,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, scr_memsetw((unsigned short *) (vc->vc_origin + vc->vc_size_row * (b - count)), - vc->vc_video_erase_char, + vc->vc_scrl_erase_char, vc->vc_size_row * count); return 1; } @@ -1972,7 +1972,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, scr_memsetw((unsigned short *) (vc->vc_origin + vc->vc_size_row * t), - vc->vc_video_erase_char, + vc->vc_scrl_erase_char, vc->vc_size_row * count); return 1; break; @@ -2042,7 +2042,7 @@ static int fbcon_scroll(struct vc_data *vc, int t, int b, int dir, scr_memsetw((unsigned short *) (vc->vc_origin + vc->vc_size_row * t), - vc->vc_video_erase_char, + vc->vc_scrl_erase_char, vc->vc_size_row * count); return 1; } diff --git a/drivers/video/console/mdacon.c b/drivers/video/console/mdacon.c index bd8d995fe25..38a296bbdfc 100644 --- a/drivers/video/console/mdacon.c +++ b/drivers/video/console/mdacon.c @@ -531,7 +531,7 @@ static void mdacon_cursor(struct vc_data *c, int mode) static int mdacon_scroll(struct vc_data *c, int t, int b, int dir, int lines) { - u16 eattr = mda_convert_attr(c->vc_video_erase_char); + u16 eattr = mda_convert_attr(c->vc_scrl_erase_char); if (!lines) return 0; diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c index 67a682d6cc7..a11cc2fdd4c 100644 --- a/drivers/video/console/sticon.c +++ b/drivers/video/console/sticon.c @@ -170,12 +170,12 @@ static int sticon_scroll(struct vc_data *conp, int t, int b, int dir, int count) switch (dir) { case SM_UP: sti_bmove(sti, t + count, 0, t, 0, b - t - count, conp->vc_cols); - sti_clear(sti, b - count, 0, count, conp->vc_cols, conp->vc_video_erase_char); + sti_clear(sti, b - count, 0, count, conp->vc_cols, conp->vc_scrl_erase_char); break; case SM_DOWN: sti_bmove(sti, t, 0, t + count, 0, b - t - count, conp->vc_cols); - sti_clear(sti, t, 0, count, conp->vc_cols, conp->vc_video_erase_char); + sti_clear(sti, t, 0, count, conp->vc_cols, conp->vc_scrl_erase_char); break; } diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 6df29a62d72..bd1f57b259d 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1350,7 +1350,7 @@ static int vgacon_scroll(struct vc_data *c, int t, int b, int dir, } else c->vc_origin += delta; scr_memsetw((u16 *) (c->vc_origin + c->vc_screenbuf_size - - delta), c->vc_video_erase_char, + delta), c->vc_scrl_erase_char, delta); } else { if (oldo - delta < vga_vram_base) { @@ -1363,7 +1363,7 @@ static int vgacon_scroll(struct vc_data *c, int t, int b, int dir, } else c->vc_origin -= delta; c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; - scr_memsetw((u16 *) (c->vc_origin), c->vc_video_erase_char, + scr_memsetw((u16 *) (c->vc_origin), c->vc_scrl_erase_char, delta); } c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; diff --git a/drivers/video/matrox/matroxfb_misc.c b/drivers/video/matrox/matroxfb_misc.c index aaa3e538e5d..5b5f072fc1a 100644 --- a/drivers/video/matrox/matroxfb_misc.c +++ b/drivers/video/matrox/matroxfb_misc.c @@ -522,8 +522,6 @@ static void parse_bios(unsigned char __iomem* vbios, struct matrox_bios* bd) { #endif } -#define get_u16(x) (le16_to_cpu(get_unaligned((__u16*)(x)))) -#define get_u32(x) (le32_to_cpu(get_unaligned((__u32*)(x)))) static int parse_pins1(WPMINFO const struct matrox_bios* bd) { unsigned int maxdac; @@ -532,11 +530,12 @@ static int parse_pins1(WPMINFO const struct matrox_bios* bd) { case 1: maxdac = 220000; break; default: maxdac = 240000; break; } - if (get_u16(bd->pins + 24)) { - maxdac = get_u16(bd->pins + 24) * 10; + if (get_unaligned_le16(bd->pins + 24)) { + maxdac = get_unaligned_le16(bd->pins + 24) * 10; } MINFO->limits.pixel.vcomax = maxdac; - MINFO->values.pll.system = get_u16(bd->pins + 28) ? get_u16(bd->pins + 28) * 10 : 50000; + MINFO->values.pll.system = get_unaligned_le16(bd->pins + 28) ? + get_unaligned_le16(bd->pins + 28) * 10 : 50000; /* ignore 4MB, 8MB, module clocks */ MINFO->features.pll.ref_freq = 14318; MINFO->values.reg.mctlwtst = 0x00030101; @@ -575,7 +574,8 @@ static void default_pins2(WPMINFO2) { static int parse_pins3(WPMINFO const struct matrox_bios* bd) { MINFO->limits.pixel.vcomax = MINFO->limits.system.vcomax = (bd->pins[36] == 0xFF) ? 230000 : ((bd->pins[36] + 100) * 1000); - MINFO->values.reg.mctlwtst = get_u32(bd->pins + 48) == 0xFFFFFFFF ? 0x01250A21 : get_u32(bd->pins + 48); + MINFO->values.reg.mctlwtst = get_unaligned_le32(bd->pins + 48) == 0xFFFFFFFF ? + 0x01250A21 : get_unaligned_le32(bd->pins + 48); /* memory config */ MINFO->values.reg.memrdbk = ((bd->pins[57] << 21) & 0x1E000000) | ((bd->pins[57] << 22) & 0x00C00000) | @@ -601,7 +601,7 @@ static void default_pins3(WPMINFO2) { static int parse_pins4(WPMINFO const struct matrox_bios* bd) { MINFO->limits.pixel.vcomax = (bd->pins[ 39] == 0xFF) ? 230000 : bd->pins[ 39] * 4000; MINFO->limits.system.vcomax = (bd->pins[ 38] == 0xFF) ? MINFO->limits.pixel.vcomax : bd->pins[ 38] * 4000; - MINFO->values.reg.mctlwtst = get_u32(bd->pins + 71); + MINFO->values.reg.mctlwtst = get_unaligned_le32(bd->pins + 71); MINFO->values.reg.memrdbk = ((bd->pins[87] << 21) & 0x1E000000) | ((bd->pins[87] << 22) & 0x00C00000) | ((bd->pins[86] << 1) & 0x000001E0) | @@ -609,7 +609,7 @@ static int parse_pins4(WPMINFO const struct matrox_bios* bd) { MINFO->values.reg.opt = ((bd->pins[53] << 15) & 0x00400000) | ((bd->pins[53] << 22) & 0x10000000) | ((bd->pins[53] << 7) & 0x00001C00); - MINFO->values.reg.opt3 = get_u32(bd->pins + 67); + MINFO->values.reg.opt3 = get_unaligned_le32(bd->pins + 67); MINFO->values.pll.system = (bd->pins[ 65] == 0xFF) ? 200000 : bd->pins[ 65] * 4000; MINFO->features.pll.ref_freq = (bd->pins[ 92] & 0x01) ? 14318 : 27000; return 0; @@ -640,12 +640,12 @@ static int parse_pins5(WPMINFO const struct matrox_bios* bd) { MINFO->limits.video.vcomin = (bd->pins[122] == 0xFF) ? MINFO->limits.system.vcomin : bd->pins[122] * mult; MINFO->values.pll.system = MINFO->values.pll.video = (bd->pins[ 92] == 0xFF) ? 284000 : bd->pins[ 92] * 4000; - MINFO->values.reg.opt = get_u32(bd->pins+ 48); - MINFO->values.reg.opt2 = get_u32(bd->pins+ 52); - MINFO->values.reg.opt3 = get_u32(bd->pins+ 94); - MINFO->values.reg.mctlwtst = get_u32(bd->pins+ 98); - MINFO->values.reg.memmisc = get_u32(bd->pins+102); - MINFO->values.reg.memrdbk = get_u32(bd->pins+106); + MINFO->values.reg.opt = get_unaligned_le32(bd->pins + 48); + MINFO->values.reg.opt2 = get_unaligned_le32(bd->pins + 52); + MINFO->values.reg.opt3 = get_unaligned_le32(bd->pins + 94); + MINFO->values.reg.mctlwtst = get_unaligned_le32(bd->pins + 98); + MINFO->values.reg.memmisc = get_unaligned_le32(bd->pins + 102); + MINFO->values.reg.memrdbk = get_unaligned_le32(bd->pins + 106); MINFO->features.pll.ref_freq = (bd->pins[110] & 0x01) ? 14318 : 27000; MINFO->values.memory.ddr = (bd->pins[114] & 0x60) == 0x20; MINFO->values.memory.dll = (bd->pins[115] & 0x02) != 0; diff --git a/drivers/video/metronomefb.c b/drivers/video/metronomefb.c index 24979128636..cc4c038a1b3 100644 --- a/drivers/video/metronomefb.c +++ b/drivers/video/metronomefb.c @@ -206,8 +206,7 @@ static int load_waveform(u8 *mem, size_t size, u8 *metromem, int m, int t, } /* check waveform mode table address checksum */ - wmta = le32_to_cpu(get_unaligned((__le32 *) wfm_hdr->wmta)); - wmta &= 0x00FFFFFF; + wmta = get_unaligned_le32(wfm_hdr->wmta) & 0x00FFFFFF; cksum_idx = wmta + m*4 + 3; if (cksum_idx > size) return -EINVAL; @@ -219,8 +218,7 @@ static int load_waveform(u8 *mem, size_t size, u8 *metromem, int m, int t, } /* check waveform temperature table address checksum */ - tta = le32_to_cpu(get_unaligned((int *) (mem + wmta + m*4))); - tta &= 0x00FFFFFF; + tta = get_unaligned_le32(mem + wmta + m * 4) & 0x00FFFFFF; cksum_idx = tta + trn*4 + 3; if (cksum_idx > size) return -EINVAL; @@ -233,8 +231,7 @@ static int load_waveform(u8 *mem, size_t size, u8 *metromem, int m, int t, /* here we do the real work of putting the waveform into the metromem buffer. this does runlength decoding of the waveform */ - wfm_idx = le32_to_cpu(get_unaligned((__le32 *) (mem + tta + trn*4))); - wfm_idx &= 0x00FFFFFF; + wfm_idx = get_unaligned_le32(mem + tta + trn * 4) & 0x00FFFFFF; owfm_idx = wfm_idx; if (wfm_idx > size) return -EINVAL; diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c index 2ce4cebc31d..099b6fb5b5c 100644 --- a/drivers/zorro/proc.c +++ b/drivers/zorro/proc.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/zorro.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/init.h> #include <linux/smp_lock.h> #include <asm/uaccess.h> @@ -76,36 +77,58 @@ proc_bus_zorro_read(struct file *file, char __user *buf, size_t nbytes, loff_t * } static const struct file_operations proc_bus_zorro_operations = { + .owner = THIS_MODULE, .llseek = proc_bus_zorro_lseek, .read = proc_bus_zorro_read, }; -static int -get_zorro_dev_info(char *buf, char **start, off_t pos, int count) +static void * zorro_seq_start(struct seq_file *m, loff_t *pos) { - u_int slot; - off_t at = 0; - int len, cnt; - - for (slot = cnt = 0; slot < zorro_num_autocon && count > cnt; slot++) { - struct zorro_dev *z = &zorro_autocon[slot]; - len = sprintf(buf, "%02x\t%08x\t%08lx\t%08lx\t%02x\n", slot, - z->id, (unsigned long)zorro_resource_start(z), - (unsigned long)zorro_resource_len(z), - z->rom.er_Type); - at += len; - if (at >= pos) { - if (!*start) { - *start = buf + (pos - (at - len)); - cnt = at - pos; - } else - cnt += len; - buf += len; - } - } - return (count > cnt) ? cnt : count; + return (*pos < zorro_num_autocon) ? pos : NULL; +} + +static void * zorro_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return (*pos < zorro_num_autocon) ? pos : NULL; +} + +static void zorro_seq_stop(struct seq_file *m, void *v) +{ +} + +static int zorro_seq_show(struct seq_file *m, void *v) +{ + u_int slot = *(loff_t *)v; + struct zorro_dev *z = &zorro_autocon[slot]; + + seq_printf(m, "%02x\t%08x\t%08lx\t%08lx\t%02x\n", slot, z->id, + (unsigned long)zorro_resource_start(z), + (unsigned long)zorro_resource_len(z), + z->rom.er_Type); + return 0; +} + +static const struct seq_operations zorro_devices_seq_ops = { + .start = zorro_seq_start, + .next = zorro_seq_next, + .stop = zorro_seq_stop, + .show = zorro_seq_show, +}; + +static int zorro_devices_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &zorro_devices_seq_ops); } +static const struct file_operations zorro_devices_proc_fops = { + .owner = THIS_MODULE, + .open = zorro_devices_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static struct proc_dir_entry *proc_bus_zorro_dir; static int __init zorro_proc_attach_device(u_int slot) @@ -114,11 +137,11 @@ static int __init zorro_proc_attach_device(u_int slot) char name[4]; sprintf(name, "%02x", slot); - entry = create_proc_entry(name, 0, proc_bus_zorro_dir); + entry = proc_create_data(name, 0, proc_bus_zorro_dir, + &proc_bus_zorro_operations, + &zorro_autocon[slot]); if (!entry) return -ENOMEM; - entry->proc_fops = &proc_bus_zorro_operations; - entry->data = &zorro_autocon[slot]; entry->size = sizeof(struct zorro_dev); return 0; } @@ -128,9 +151,9 @@ static int __init zorro_proc_init(void) u_int slot; if (MACH_IS_AMIGA && AMIGAHW_PRESENT(ZORRO)) { - proc_bus_zorro_dir = proc_mkdir("zorro", proc_bus); - create_proc_info_entry("devices", 0, proc_bus_zorro_dir, - get_zorro_dev_info); + proc_bus_zorro_dir = proc_mkdir("bus/zorro", NULL); + proc_create("devices", 0, proc_bus_zorro_dir, + &zorro_devices_proc_fops); for (slot = 0; slot < zorro_num_autocon; slot++) zorro_proc_attach_device(slot); } diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 853845abcca..55e8ee1900a 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -41,7 +41,7 @@ config BINFMT_ELF_FDPIC It is also possible to run FDPIC ELF binaries on MMU linux also. config BINFMT_FLAT - tristate "Kernel support for flat binaries" + bool "Kernel support for flat binaries" depends on !MMU help Support uClinux FLAT format binaries. diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index b9b2b27b68c..ea7df214692 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -122,9 +122,9 @@ adfs_dir_checkbyte(const struct adfs_dir *dir) ptr.ptr8 = bufoff(bh, i); end.ptr8 = ptr.ptr8 + last - i; - do + do { dircheck = *ptr.ptr8++ ^ ror13(dircheck); - while (ptr.ptr8 < end.ptr8); + } while (ptr.ptr8 < end.ptr8); } /* diff --git a/fs/affs/file.c b/fs/affs/file.c index 6e0c9399200..e87ede608f7 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -325,8 +325,7 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block); - if (block > (sector_t)0x7fffffffUL) - BUG(); + BUG_ON(block > (sector_t)0x7fffffffUL); if (block >= AFFS_I(inode)->i_blkcnt) { if (block > AFFS_I(inode)->i_blkcnt || !create) @@ -493,8 +492,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign u32 tmp; pr_debug("AFFS: read_page(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); - if (from > to || to > PAGE_CACHE_SIZE) - BUG(); + BUG_ON(from > to || to > PAGE_CACHE_SIZE); kmap(page); data = page_address(page); bsize = AFFS_SB(sb)->s_data_blksize; @@ -507,8 +505,7 @@ affs_do_readpage_ofs(struct file *file, struct page *page, unsigned from, unsign if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, to - from); - if (from + tmp > to || tmp > bsize) - BUG(); + BUG_ON(from + tmp > to || tmp > bsize); memcpy(data + from, AFFS_DATA(bh) + boff, tmp); affs_brelse(bh); bidx++; @@ -540,8 +537,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, newsize - size); - if (boff + tmp > bsize || tmp > bsize) - BUG(); + BUG_ON(boff + tmp > bsize || tmp > bsize); memset(AFFS_DATA(bh) + boff, 0, tmp); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); affs_fix_checksum(sb, bh); @@ -560,8 +556,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) if (IS_ERR(bh)) goto out; tmp = min(bsize, newsize - size); - if (tmp > bsize) - BUG(); + BUG_ON(tmp > bsize); AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); AFFS_DATA_HEAD(bh)->key = cpu_to_be32(inode->i_ino); AFFS_DATA_HEAD(bh)->sequence = cpu_to_be32(bidx); @@ -683,8 +678,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (IS_ERR(bh)) return PTR_ERR(bh); tmp = min(bsize - boff, to - from); - if (boff + tmp > bsize || tmp > bsize) - BUG(); + BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp); affs_fix_checksum(sb, bh); @@ -732,8 +726,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, if (IS_ERR(bh)) goto out; tmp = min(bsize, to - from); - if (tmp > bsize) - BUG(); + BUG_ON(tmp > bsize); memcpy(AFFS_DATA(bh), data + from, tmp); if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); diff --git a/fs/affs/super.c b/fs/affs/super.c index d2dc047cb47..01d25d53254 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -199,7 +199,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s case Opt_prefix: /* Free any previous prefix */ kfree(*prefix); - *prefix = NULL; *prefix = match_strdup(&args[0]); if (!*prefix) return 0; @@ -233,6 +232,8 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s break; case Opt_volume: { char *vol = match_strdup(&args[0]); + if (!vol) + return 0; strlcpy(volume, vol, 32); kfree(vol); break; diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h index 7b4d4fab4c8..255f5dd6040 100644 --- a/fs/afs/afs_cm.h +++ b/fs/afs/afs_cm.h @@ -24,7 +24,8 @@ enum AFS_CM_Operations { CBGetXStatsVersion = 209, /* get version of extended statistics */ CBGetXStats = 210, /* get contents of extended statistics data */ CBInitCallBackState3 = 213, /* initialise callback state, version 3 */ - CBGetCapabilities = 65538, /* get client capabilities */ + CBProbeUuid = 214, /* check the client hasn't rebooted */ + CBTellMeAboutYourself = 65538, /* get client capabilities */ }; #define AFS_CAP_ERROR_TRANSLATION 0x1 diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 584bb0f9c36..5e1df14e16b 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -20,7 +20,7 @@ DECLARE_RWSEM(afs_proc_cells_sem); LIST_HEAD(afs_proc_cells); -static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells); +static LIST_HEAD(afs_cells); static DEFINE_RWLOCK(afs_cells_lock); static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */ static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq); diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 47b71c8947f..eb765489164 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -26,8 +26,9 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *, - bool); +static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool); +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *, + struct sk_buff *, bool); static void afs_cm_destructor(struct afs_call *); /* @@ -71,11 +72,21 @@ static const struct afs_call_type afs_SRXCBProbe = { }; /* - * CB.GetCapabilities operation type + * CB.ProbeUuid operation type */ -static const struct afs_call_type afs_SRXCBGetCapabilites = { - .name = "CB.GetCapabilities", - .deliver = afs_deliver_cb_get_capabilities, +static const struct afs_call_type afs_SRXCBProbeUuid = { + .name = "CB.ProbeUuid", + .deliver = afs_deliver_cb_probe_uuid, + .abort_to_error = afs_abort_to_error, + .destructor = afs_cm_destructor, +}; + +/* + * CB.TellMeAboutYourself operation type + */ +static const struct afs_call_type afs_SRXCBTellMeAboutYourself = { + .name = "CB.TellMeAboutYourself", + .deliver = afs_deliver_cb_tell_me_about_yourself, .abort_to_error = afs_abort_to_error, .destructor = afs_cm_destructor, }; @@ -103,8 +114,8 @@ bool afs_cm_incoming_call(struct afs_call *call) case CBProbe: call->type = &afs_SRXCBProbe; return true; - case CBGetCapabilities: - call->type = &afs_SRXCBGetCapabilites; + case CBTellMeAboutYourself: + call->type = &afs_SRXCBTellMeAboutYourself; return true; default: return false; @@ -393,9 +404,105 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, } /* + * allow the fileserver to quickly find out if the fileserver has been rebooted + */ +static void SRXAFSCB_ProbeUuid(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + struct afs_uuid *r = call->request; + + struct { + __be32 match; + } reply; + + _enter(""); + + + if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0) + reply.match = htonl(0); + else + reply.match = htonl(1); + + afs_send_simple_reply(call, &reply, sizeof(reply)); + _leave(""); +} + +/* + * deliver request data to a CB.ProbeUuid call + */ +static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, + bool last) +{ + struct afs_uuid *r; + unsigned loop; + __be32 *b; + int ret; + + _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + + if (skb->len > 0) + return -EBADMSG; + if (!last) + return 0; + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + call->unmarshall++; + + case 1: + _debug("extract UUID"); + ret = afs_extract_data(call, skb, last, call->buffer, + 11 * sizeof(__be32)); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + _debug("unmarshall UUID"); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + b = call->buffer; + r = call->request; + r->time_low = ntohl(b[0]); + r->time_mid = ntohl(b[1]); + r->time_hi_and_version = ntohl(b[2]); + r->clock_seq_hi_and_reserved = ntohl(b[3]); + r->clock_seq_low = ntohl(b[4]); + + for (loop = 0; loop < 6; loop++) + r->node[loop] = ntohl(b[loop + 5]); + + call->offset = 0; + call->unmarshall++; + + case 2: + _debug("trailer"); + if (skb->len != 0) + return -EBADMSG; + break; + } + + if (!last) + return 0; + + call->state = AFS_CALL_REPLYING; + + INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); + schedule_work(&call->work); + return 0; +} + +/* * allow the fileserver to ask about the cache manager's capabilities */ -static void SRXAFSCB_GetCapabilities(struct work_struct *work) +static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) { struct afs_interface *ifs; struct afs_call *call = container_of(work, struct afs_call, work); @@ -456,10 +563,10 @@ static void SRXAFSCB_GetCapabilities(struct work_struct *work) } /* - * deliver request data to a CB.GetCapabilities call + * deliver request data to a CB.TellMeAboutYourself call */ -static int afs_deliver_cb_get_capabilities(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, + struct sk_buff *skb, bool last) { _enter(",{%u},%d", skb->len, last); @@ -471,7 +578,7 @@ static int afs_deliver_cb_get_capabilities(struct afs_call *call, /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; - INIT_WORK(&call->work, SRXAFSCB_GetCapabilities); + INIT_WORK(&call->work, SRXAFSCB_TellMeAboutYourself); schedule_work(&call->work); return 0; } diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 846c7615ac9..9f7d1ae7026 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -41,6 +41,7 @@ static const struct file_operations afs_proc_cells_fops = { .write = afs_proc_cells_write, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; static int afs_proc_rootcell_open(struct inode *inode, struct file *file); @@ -56,7 +57,8 @@ static const struct file_operations afs_proc_rootcell_fops = { .read = afs_proc_rootcell_read, .write = afs_proc_rootcell_write, .llseek = no_llseek, - .release = afs_proc_rootcell_release + .release = afs_proc_rootcell_release, + .owner = THIS_MODULE, }; static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file); @@ -80,6 +82,7 @@ static const struct file_operations afs_proc_cell_volumes_fops = { .read = seq_read, .llseek = seq_lseek, .release = afs_proc_cell_volumes_release, + .owner = THIS_MODULE, }; static int afs_proc_cell_vlservers_open(struct inode *inode, @@ -104,6 +107,7 @@ static const struct file_operations afs_proc_cell_vlservers_fops = { .read = seq_read, .llseek = seq_lseek, .release = afs_proc_cell_vlservers_release, + .owner = THIS_MODULE, }; static int afs_proc_cell_servers_open(struct inode *inode, struct file *file); @@ -127,6 +131,7 @@ static const struct file_operations afs_proc_cell_servers_fops = { .read = seq_read, .llseek = seq_lseek, .release = afs_proc_cell_servers_release, + .owner = THIS_MODULE, }; /* @@ -143,17 +148,13 @@ int afs_proc_init(void) goto error_dir; proc_afs->owner = THIS_MODULE; - p = create_proc_entry("cells", 0, proc_afs); + p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops); if (!p) goto error_cells; - p->proc_fops = &afs_proc_cells_fops; - p->owner = THIS_MODULE; - p = create_proc_entry("rootcell", 0, proc_afs); + p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops); if (!p) goto error_rootcell; - p->proc_fops = &afs_proc_rootcell_fops; - p->owner = THIS_MODULE; _leave(" = 0"); return 0; @@ -395,26 +396,20 @@ int afs_proc_cell_setup(struct afs_cell *cell) if (!cell->proc_dir) goto error_dir; - p = create_proc_entry("servers", 0, cell->proc_dir); + p = proc_create_data("servers", 0, cell->proc_dir, + &afs_proc_cell_servers_fops, cell); if (!p) goto error_servers; - p->proc_fops = &afs_proc_cell_servers_fops; - p->owner = THIS_MODULE; - p->data = cell; - p = create_proc_entry("vlservers", 0, cell->proc_dir); + p = proc_create_data("vlservers", 0, cell->proc_dir, + &afs_proc_cell_vlservers_fops, cell); if (!p) goto error_vlservers; - p->proc_fops = &afs_proc_cell_vlservers_fops; - p->owner = THIS_MODULE; - p->data = cell; - p = create_proc_entry("volumes", 0, cell->proc_dir); + p = proc_create_data("volumes", 0, cell->proc_dir, + &afs_proc_cell_volumes_fops, cell); if (!p) goto error_volumes; - p->proc_fops = &afs_proc_cell_volumes_fops; - p->owner = THIS_MODULE; - p->data = cell; _leave(" = 0"); return 0; @@ -191,6 +191,43 @@ static int aio_setup_ring(struct kioctx *ctx) kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ } while(0) + +/* __put_ioctx + * Called when the last user of an aio context has gone away, + * and the struct needs to be freed. + */ +static void __put_ioctx(struct kioctx *ctx) +{ + unsigned nr_events = ctx->max_reqs; + + BUG_ON(ctx->reqs_active); + + cancel_delayed_work(&ctx->wq); + cancel_work_sync(&ctx->wq.work); + aio_free_ring(ctx); + mmdrop(ctx->mm); + ctx->mm = NULL; + pr_debug("__put_ioctx: freeing %p\n", ctx); + kmem_cache_free(kioctx_cachep, ctx); + + if (nr_events) { + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - nr_events > aio_nr); + aio_nr -= nr_events; + spin_unlock(&aio_nr_lock); + } +} + +#define get_ioctx(kioctx) do { \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ + atomic_inc(&(kioctx)->users); \ +} while (0) +#define put_ioctx(kioctx) do { \ + BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ + if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ + __put_ioctx(kioctx); \ +} while (0) + /* ioctx_alloc * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. */ @@ -240,7 +277,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (ctx->max_reqs == 0) goto out_cleanup; - /* now link into global list. kludge. FIXME */ + /* now link into global list. */ write_lock(&mm->ioctx_list_lock); ctx->next = mm->ioctx_list; mm->ioctx_list = ctx; @@ -361,32 +398,6 @@ void exit_aio(struct mm_struct *mm) } } -/* __put_ioctx - * Called when the last user of an aio context has gone away, - * and the struct needs to be freed. - */ -void __put_ioctx(struct kioctx *ctx) -{ - unsigned nr_events = ctx->max_reqs; - - BUG_ON(ctx->reqs_active); - - cancel_delayed_work(&ctx->wq); - cancel_work_sync(&ctx->wq.work); - aio_free_ring(ctx); - mmdrop(ctx->mm); - ctx->mm = NULL; - pr_debug("__put_ioctx: freeing %p\n", ctx); - kmem_cache_free(kioctx_cachep, ctx); - - if (nr_events) { - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - nr_events > aio_nr); - aio_nr -= nr_events; - spin_unlock(&aio_nr_lock); - } -} - /* aio_get_req * Allocate a slot for an aio request. Increments the users count * of the kioctx so that the kioctx stays around until all requests are @@ -542,10 +553,7 @@ int aio_put_req(struct kiocb *req) return ret; } -/* Lookup an ioctx id. ioctx_list is lockless for reads. - * FIXME: this is O(n) and is only suitable for development. - */ -struct kioctx *lookup_ioctx(unsigned long ctx_id) +static struct kioctx *lookup_ioctx(unsigned long ctx_id) { struct kioctx *ioctx; struct mm_struct *mm; @@ -1552,7 +1560,7 @@ static int aio_wake_function(wait_queue_t *wait, unsigned mode, return 1; } -int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, +static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb) { struct kiocb *req; @@ -1593,7 +1601,7 @@ int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, * event using the eventfd_signal() function. */ req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); - if (unlikely(IS_ERR(req->ki_eventfd))) { + if (IS_ERR(req->ki_eventfd)) { ret = PTR_ERR(req->ki_eventfd); goto out_put_req; } diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index a54a946a50a..aa4c5ff8a40 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -533,9 +533,9 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct goto next; if (d_unhashed(dentry)) { - struct autofs_info *ino = autofs4_dentry_ino(dentry); struct inode *inode = dentry->d_inode; + ino = autofs4_dentry_ino(dentry); list_del_init(&ino->rehash); dget(dentry); /* diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 82123ff3e1d..e8717de3bab 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -489,9 +489,9 @@ static void befs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { befs_inode_info *befs_ino = BEFS_I(dentry->d_inode); if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { - char *p = nd_get_link(nd); - if (!IS_ERR(p)) - kfree(p); + char *link = nd_get_link(nd); + if (!IS_ERR(link)) + kfree(link); } } diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index a1bb2244cac..ba4cddb92f1 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -372,21 +372,17 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data); } else { - static unsigned long error_time, error_time2; if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && - (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ) + (N_MAGIC(ex) != NMAGIC) && printk_ratelimit()) { printk(KERN_NOTICE "executable not page aligned\n"); - error_time2 = jiffies; } - if ((fd_offset & ~PAGE_MASK) != 0 && - (jiffies-error_time) > 5*HZ) + if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit()) { printk(KERN_WARNING "fd_offset is not page aligned. Please convert program: %s\n", bprm->file->f_path.dentry->d_name.name); - error_time = jiffies; } if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { @@ -495,15 +491,13 @@ static int load_aout_library(struct file *file) start_addr = ex.a_entry & 0xfffff000; if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { - static unsigned long error_time; loff_t pos = N_TXTOFF(ex); - if ((jiffies-error_time) > 5*HZ) + if (printk_ratelimit()) { printk(KERN_WARNING "N_TXTOFF is not page aligned. Please convert library: %s\n", file->f_path.dentry->d_name.name); - error_time = jiffies; } down_write(¤t->mm->mmap_sem); do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9924581df6f..b25707fee2c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1255,26 +1255,23 @@ static int writenote(struct memelfnote *men, struct file *file, static void fill_elf_header(struct elfhdr *elf, int segs, u16 machine, u32 flags, u8 osabi) { + memset(elf, 0, sizeof(*elf)); + memcpy(elf->e_ident, ELFMAG, SELFMAG); elf->e_ident[EI_CLASS] = ELF_CLASS; elf->e_ident[EI_DATA] = ELF_DATA; elf->e_ident[EI_VERSION] = EV_CURRENT; elf->e_ident[EI_OSABI] = ELF_OSABI; - memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); elf->e_type = ET_CORE; elf->e_machine = machine; elf->e_version = EV_CURRENT; - elf->e_entry = 0; elf->e_phoff = sizeof(struct elfhdr); - elf->e_shoff = 0; elf->e_flags = flags; elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize = sizeof(struct elf_phdr); elf->e_phnum = segs; - elf->e_shentsize = 0; - elf->e_shnum = 0; - elf->e_shstrndx = 0; + return; } @@ -1725,26 +1722,25 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->thread_status_size = 0; if (signr) { - struct elf_thread_status *tmp; + struct elf_thread_status *ets; rcu_read_lock(); do_each_thread(g, p) if (current->mm == p->mm && current != p) { - tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); - if (!tmp) { + ets = kzalloc(sizeof(*ets), GFP_ATOMIC); + if (!ets) { rcu_read_unlock(); return 0; } - tmp->thread = p; - list_add(&tmp->list, &info->thread_list); + ets->thread = p; + list_add(&ets->list, &info->thread_list); } while_each_thread(g, p); rcu_read_unlock(); list_for_each(t, &info->thread_list) { - struct elf_thread_status *tmp; int sz; - tmp = list_entry(t, struct elf_thread_status, list); - sz = elf_dump_thread_status(signr, tmp); + ets = list_entry(t, struct elf_thread_status, list); + sz = elf_dump_thread_status(signr, ets); info->thread_status_size += sz; } } @@ -2000,10 +1996,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { struct page *page; - struct vm_area_struct *vma; + struct vm_area_struct *tmp_vma; if (get_user_pages(current, current->mm, addr, 1, 0, 1, - &page, &vma) <= 0) { + &page, &tmp_vma) <= 0) { DUMP_SEEK(PAGE_SIZE); } else { if (page == ZERO_PAGE(0)) { @@ -2013,7 +2009,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un } } else { void *kaddr; - flush_cache_page(vma, addr, + flush_cache_page(tmp_vma, addr, page_to_pfn(page)); kaddr = kmap(page); if ((size += PAGE_SIZE) > limit || diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 32649f2a165..ddd35d87339 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -136,8 +136,8 @@ static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *params, retval = kernel_read(file, params->hdr.e_phoff, (char *) params->phdrs, size); - if (retval < 0) - return retval; + if (unlikely(retval != size)) + return retval < 0 ? retval : -ENOEXEC; /* determine stack size for this binary */ phdr = params->phdrs; @@ -218,8 +218,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, phdr->p_offset, interpreter_name, phdr->p_filesz); - if (retval < 0) + if (unlikely(retval != phdr->p_filesz)) { + if (retval >= 0) + retval = -ENOEXEC; goto error; + } retval = -ENOENT; if (interpreter_name[phdr->p_filesz - 1] != '\0') @@ -245,8 +248,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE); - if (retval < 0) + if (unlikely(retval != BINPRM_BUF_SIZE)) { + if (retval >= 0) + retval = -ENOEXEC; goto error; + } interp_params.hdr = *((struct elfhdr *) bprm->buf); break; diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f95ae9789c9..f9c88d0c8ce 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->sh_bang++; /* Well, the bang-shell is implicit... */ + bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 0498b181dd5..3b40d45a3a1 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -531,7 +531,8 @@ static int load_flat_file(struct linux_binprm * bprm, DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); down_write(¤t->mm->mmap_sem); - textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE, 0); + textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, + MAP_PRIVATE|MAP_EXECUTABLE, 0); up_write(¤t->mm->mmap_sem); if (!textpos || textpos >= (unsigned long) -4096) { if (!textpos) @@ -932,14 +933,8 @@ static int __init init_flat_binfmt(void) return register_binfmt(&flat_format); } -static void __exit exit_flat_binfmt(void) -{ - unregister_binfmt(&flat_format); -} - /****************************************************************************/ core_initcall(init_flat_binfmt); -module_exit(exit_flat_binfmt); /****************************************************************************/ diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index dbf0ac0523d..7191306367c 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -115,6 +115,12 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!enabled) goto _ret; + retval = -ENOEXEC; + if (bprm->misc_bang) + goto _ret; + + bprm->misc_bang = 1; + /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index ab33939b12a..9e3963f7ebf 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -29,7 +29,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) * Sorta complicated, but hopefully it will work. -TYT */ - bprm->sh_bang++; + bprm->sh_bang = 1; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; @@ -937,6 +937,95 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, return ERR_PTR(-EINVAL); } +static void bio_copy_kern_endio(struct bio *bio, int err) +{ + struct bio_vec *bvec; + const int read = bio_data_dir(bio) == READ; + char *p = bio->bi_private; + int i; + + __bio_for_each_segment(bvec, bio, i, 0) { + char *addr = page_address(bvec->bv_page); + + if (read && !err) + memcpy(p, addr, bvec->bv_len); + + __free_page(bvec->bv_page); + p += bvec->bv_len; + } + + bio_put(bio); +} + +/** + * bio_copy_kern - copy kernel address into bio + * @q: the struct request_queue for the bio + * @data: pointer to buffer to copy + * @len: length in bytes + * @gfp_mask: allocation flags for bio and page allocation + * + * copy the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, + gfp_t gfp_mask, int reading) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + struct bio *bio; + struct bio_vec *bvec; + int i, ret; + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + while (len) { + struct page *page; + unsigned int bytes = PAGE_SIZE; + + if (bytes > len) + bytes = len; + + page = alloc_page(q->bounce_gfp | gfp_mask); + if (!page) { + ret = -ENOMEM; + goto cleanup; + } + + if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { + ret = -EINVAL; + goto cleanup; + } + + len -= bytes; + } + + if (!reading) { + void *p = data; + + bio_for_each_segment(bvec, bio, i) { + char *addr = page_address(bvec->bv_page); + + memcpy(addr, p, bvec->bv_len); + p += bvec->bv_len; + } + } + + bio->bi_private = data; + bio->bi_end_io = bio_copy_kern_endio; + return bio; +cleanup: + bio_for_each_segment(bvec, bio, i) + __free_page(bvec->bv_page); + + bio_put(bio); + + return ERR_PTR(ret); +} + /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions * for performing direct-IO in BIOs. @@ -1273,6 +1362,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); EXPORT_SYMBOL(bio_map_user); EXPORT_SYMBOL(bio_unmap_user); EXPORT_SYMBOL(bio_map_kern); +EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); EXPORT_SYMBOL(bio_split_pool); diff --git a/fs/buffer.c b/fs/buffer.c index 3db4a26adc4..189efa4efc6 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2211,8 +2211,8 @@ out: return err; } -int cont_expand_zero(struct file *file, struct address_space *mapping, - loff_t pos, loff_t *bytes) +static int cont_expand_zero(struct file *file, struct address_space *mapping, + loff_t pos, loff_t *bytes) { struct inode *inode = mapping->host; unsigned blocksize = 1 << inode->i_blkbits; @@ -2328,23 +2328,6 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) return 0; } -int generic_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - __block_commit_write(inode,page,from,to); - /* - * No need to use i_size_read() here, the i_size - * cannot change under us because we hold i_mutex. - */ - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} - /* * block_page_mkwrite() is not allowed to change the file size as it gets * called from a page fault handler when a page is first dirtied. Hence we must @@ -3315,7 +3298,6 @@ EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); -EXPORT_SYMBOL(generic_commit_write); EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev); diff --git a/fs/char_dev.c b/fs/char_dev.c index 038674aa88a..68e510b8845 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -55,7 +55,6 @@ static struct char_device_struct { unsigned int baseminor; int minorct; char name[64]; - struct file_operations *fops; struct cdev *cdev; /* will die */ } *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 0228ed06069..cc950f69e51 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -468,7 +468,7 @@ cifs_proc_init(void) { struct proc_dir_entry *pde; - proc_fs_cifs = proc_mkdir("cifs", proc_root_fs); + proc_fs_cifs = proc_mkdir("fs/cifs", NULL); if (proc_fs_cifs == NULL) return; @@ -559,7 +559,7 @@ cifs_proc_clean(void) remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); remove_proc_entry("Experimental", proc_fs_cifs); remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); - remove_proc_entry("cifs", proc_root_fs); + remove_proc_entry("fs/cifs", NULL); } static int diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c index 95a54253c04..e1c854890f9 100644 --- a/fs/coda/coda_linux.c +++ b/fs/coda/coda_linux.c @@ -134,7 +134,7 @@ void coda_iattr_to_vattr(struct iattr *iattr, struct coda_vattr *vattr) unsigned int valid; /* clean out */ - vattr->va_mode = (umode_t) -1; + vattr->va_mode = -1; vattr->va_uid = (vuid_t) -1; vattr->va_gid = (vgid_t) -1; vattr->va_size = (off_t) -1; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index f89ff083079..3d2580e00a3 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -345,7 +345,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, } /* destruction routines: unlink, rmdir */ -int coda_unlink(struct inode *dir, struct dentry *de) +static int coda_unlink(struct inode *dir, struct dentry *de) { int error; const char *name = de->d_name.name; @@ -365,7 +365,7 @@ int coda_unlink(struct inode *dir, struct dentry *de) return 0; } -int coda_rmdir(struct inode *dir, struct dentry *de) +static int coda_rmdir(struct inode *dir, struct dentry *de) { const char *name = de->d_name.name; int len = de->d_name.len; @@ -424,7 +424,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, /* file operations for directories */ -int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) +static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) { struct coda_file_info *cfi; struct file *host_file; diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 59375efcf39..3e5637fc377 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -14,18 +14,26 @@ int sysctl_drop_caches; static void drop_pagecache_sb(struct super_block *sb) { - struct inode *inode; + struct inode *inode, *toput_inode = NULL; spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (inode->i_state & (I_FREEING|I_WILL_FREE)) continue; + if (inode->i_mapping->nrpages == 0) + continue; + __iget(inode); + spin_unlock(&inode_lock); __invalidate_mapping_pages(inode->i_mapping, 0, -1, true); + iput(toput_inode); + toput_inode = inode; + spin_lock(&inode_lock); } spin_unlock(&inode_lock); + iput(toput_inode); } -void drop_pagecache(void) +static void drop_pagecache(void) { struct super_block *sb; @@ -45,7 +53,7 @@ restart: spin_unlock(&sb_lock); } -void drop_slab(void) +static void drop_slab(void) { int nr_objects; diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 76885701551..1e34a7fd488 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o +ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index a066e109ad9..cd62d75b2cc 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -119,21 +119,21 @@ static int ecryptfs_calculate_md5(char *dst, if (rc) { printk(KERN_ERR "%s: Error initializing crypto hash; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } rc = crypto_hash_update(&desc, &sg, len); if (rc) { printk(KERN_ERR "%s: Error updating crypto hash; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } rc = crypto_hash_final(&desc, dst); if (rc) { printk(KERN_ERR "%s: Error finalizing crypto hash; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } out: @@ -437,7 +437,7 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, if (rc < 0) { printk(KERN_ERR "%s: Error attempting to encrypt page with " "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, + "rc = [%d]\n", __func__, page->index, extent_offset, rc); goto out; } @@ -487,7 +487,7 @@ int ecryptfs_encrypt_page(struct page *page) 0, PAGE_CACHE_SIZE); if (rc) printk(KERN_ERR "%s: Error attempting to copy " - "page at index [%ld]\n", __FUNCTION__, + "page at index [%ld]\n", __func__, page->index); goto out; } @@ -508,7 +508,7 @@ int ecryptfs_encrypt_page(struct page *page) extent_offset); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out; } ecryptfs_lower_offset_for_extent( @@ -569,7 +569,7 @@ static int ecryptfs_decrypt_extent(struct page *page, if (rc < 0) { printk(KERN_ERR "%s: Error attempting to decrypt to page with " "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __FUNCTION__, page->index, extent_offset, + "rc = [%d]\n", __func__, page->index, extent_offset, rc); goto out; } @@ -622,7 +622,7 @@ int ecryptfs_decrypt_page(struct page *page) ecryptfs_inode); if (rc) printk(KERN_ERR "%s: Error attempting to copy " - "page at index [%ld]\n", __FUNCTION__, + "page at index [%ld]\n", __func__, page->index); goto out; } @@ -656,7 +656,7 @@ int ecryptfs_decrypt_page(struct page *page) extent_offset); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out; } } @@ -1215,7 +1215,7 @@ int ecryptfs_read_and_validate_header_region(char *data, ecryptfs_inode); if (rc) { printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out; } if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { @@ -1246,7 +1246,6 @@ ecryptfs_write_header_metadata(char *virt, (*written) = 6; } -struct kmem_cache *ecryptfs_header_cache_0; struct kmem_cache *ecryptfs_header_cache_1; struct kmem_cache *ecryptfs_header_cache_2; @@ -1320,7 +1319,7 @@ ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, 0, crypt_stat->num_header_bytes_at_front); if (rc) printk(KERN_ERR "%s: Error attempting to write header " - "information to lower file; rc = [%d]\n", __FUNCTION__, + "information to lower file; rc = [%d]\n", __func__, rc); return rc; } @@ -1365,14 +1364,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) } } else { printk(KERN_WARNING "%s: Encrypted flag not set\n", - __FUNCTION__); + __func__); rc = -EINVAL; goto out; } /* Released in this function */ virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); if (!virt) { - printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__); + printk(KERN_ERR "%s: Out of memory\n", __func__); rc = -ENOMEM; goto out; } @@ -1380,7 +1379,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) ecryptfs_dentry); if (unlikely(rc)) { printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); goto out_free; } if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) @@ -1391,7 +1390,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) ecryptfs_dentry, virt); if (rc) { printk(KERN_ERR "%s: Error writing metadata out to lower file; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out_free; } out_free: @@ -1585,7 +1584,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) if (!page_virt) { rc = -ENOMEM; printk(KERN_ERR "%s: Unable to allocate page_virt\n", - __FUNCTION__); + __func__); goto out; } rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 5007f788da0..951ee33a022 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -4,7 +4,7 @@ * * Copyright (C) 1997-2003 Erez Zadok * Copyright (C) 2001-2003 Stony Brook University - * Copyright (C) 2004-2007 International Business Machines Corp. + * Copyright (C) 2004-2008 International Business Machines Corp. * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com> * Trevor S. Highland <trevor.highland@gmail.com> * Tyler Hicks <tyhicks@ou.edu> @@ -34,6 +34,7 @@ #include <linux/namei.h> #include <linux/scatterlist.h> #include <linux/hash.h> +#include <linux/nsproxy.h> /* Version verification for shared data structures w/ userspace */ #define ECRYPTFS_VERSION_MAJOR 0x00 @@ -49,11 +50,13 @@ #define ECRYPTFS_VERSIONING_POLICY 0x00000008 #define ECRYPTFS_VERSIONING_XATTR 0x00000010 #define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 +#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040 #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ | ECRYPTFS_VERSIONING_PUBKEY \ | ECRYPTFS_VERSIONING_XATTR \ - | ECRYPTFS_VERSIONING_MULTKEY) + | ECRYPTFS_VERSIONING_MULTKEY \ + | ECRYPTFS_VERSIONING_DEVMISC) #define ECRYPTFS_MAX_PASSWORD_LENGTH 64 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH #define ECRYPTFS_SALT_SIZE 8 @@ -73,17 +76,14 @@ #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 #define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ #define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) -#define ECRYPTFS_NLMSG_HELO 100 -#define ECRYPTFS_NLMSG_QUIT 101 -#define ECRYPTFS_NLMSG_REQUEST 102 -#define ECRYPTFS_NLMSG_RESPONSE 103 #define ECRYPTFS_MAX_PKI_NAME_BYTES 16 #define ECRYPTFS_DEFAULT_NUM_USERS 4 #define ECRYPTFS_MAX_NUM_USERS 32768 #define ECRYPTFS_TRANSPORT_NETLINK 0 #define ECRYPTFS_TRANSPORT_CONNECTOR 1 #define ECRYPTFS_TRANSPORT_RELAYFS 2 -#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK +#define ECRYPTFS_TRANSPORT_MISCDEV 3 +#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV #define ECRYPTFS_XATTR_NAME "user.ecryptfs" #define RFC2440_CIPHER_DES3_EDE 0x02 @@ -366,32 +366,63 @@ struct ecryptfs_auth_tok_list_item { }; struct ecryptfs_message { + /* Can never be greater than ecryptfs_message_buf_len */ + /* Used to find the parent msg_ctx */ + /* Inherits from msg_ctx->index */ u32 index; u32 data_len; u8 data[]; }; struct ecryptfs_msg_ctx { -#define ECRYPTFS_MSG_CTX_STATE_FREE 0x0001 -#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x0002 -#define ECRYPTFS_MSG_CTX_STATE_DONE 0x0003 - u32 state; - unsigned int index; - unsigned int counter; +#define ECRYPTFS_MSG_CTX_STATE_FREE 0x01 +#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x02 +#define ECRYPTFS_MSG_CTX_STATE_DONE 0x03 +#define ECRYPTFS_MSG_CTX_STATE_NO_REPLY 0x04 + u8 state; +#define ECRYPTFS_MSG_HELO 100 +#define ECRYPTFS_MSG_QUIT 101 +#define ECRYPTFS_MSG_REQUEST 102 +#define ECRYPTFS_MSG_RESPONSE 103 + u8 type; + u32 index; + /* Counter converts to a sequence number. Each message sent + * out for which we expect a response has an associated + * sequence number. The response must have the same sequence + * number as the counter for the msg_stc for the message to be + * valid. */ + u32 counter; + size_t msg_size; struct ecryptfs_message *msg; struct task_struct *task; struct list_head node; + struct list_head daemon_out_list; struct mutex mux; }; extern unsigned int ecryptfs_transport; -struct ecryptfs_daemon_id { - pid_t pid; - uid_t uid; - struct hlist_node id_chain; +struct ecryptfs_daemon; + +struct ecryptfs_daemon { +#define ECRYPTFS_DAEMON_IN_READ 0x00000001 +#define ECRYPTFS_DAEMON_IN_POLL 0x00000002 +#define ECRYPTFS_DAEMON_ZOMBIE 0x00000004 +#define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008 + u32 flags; + u32 num_queued_msg_ctx; + struct pid *pid; + uid_t euid; + struct user_namespace *user_ns; + struct task_struct *task; + struct mutex mux; + struct list_head msg_ctx_out_queue; + wait_queue_head_t wait; + struct hlist_node euid_chain; }; +extern struct mutex ecryptfs_daemon_hash_mux; + static inline struct ecryptfs_file_info * ecryptfs_file_to_private(struct file *file) { @@ -500,7 +531,7 @@ ecryptfs_set_dentry_lower_mnt(struct dentry *dentry, struct vfsmount *lower_mnt) } #define ecryptfs_printk(type, fmt, arg...) \ - __ecryptfs_printk(type "%s: " fmt, __FUNCTION__, ## arg); + __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); void __ecryptfs_printk(const char *fmt, ...); extern const struct file_operations ecryptfs_main_fops; @@ -581,10 +612,13 @@ int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); -int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid); -int ecryptfs_process_quit(uid_t uid, pid_t pid); -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, - pid_t pid, u32 seq); +int ecryptfs_process_helo(unsigned int transport, uid_t euid, + struct user_namespace *user_ns, struct pid *pid); +int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, + struct pid *pid); +int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, + struct user_namespace *user_ns, struct pid *pid, + u32 seq); int ecryptfs_send_message(unsigned int transport, char *data, int data_len, struct ecryptfs_msg_ctx **msg_ctx); int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, @@ -593,14 +627,14 @@ int ecryptfs_init_messaging(unsigned int transport); void ecryptfs_release_messaging(unsigned int transport); int ecryptfs_send_netlink(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, - u16 msg_flags, pid_t daemon_pid); + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct pid *daemon_pid); int ecryptfs_init_netlink(void); void ecryptfs_release_netlink(void); int ecryptfs_send_connector(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, - u16 msg_flags, pid_t daemon_pid); + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct pid *daemon_pid); int ecryptfs_init_connector(void); void ecryptfs_release_connector(void); void @@ -642,5 +676,21 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, size_t offset_in_page, size_t size, struct inode *ecryptfs_inode); struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); +int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon); +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns); +int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, + size_t *length_size); +int ecryptfs_write_packet_length(char *dest, size_t size, + size_t *packet_size_length); +int ecryptfs_init_ecryptfs_miscdev(void); +void ecryptfs_destroy_ecryptfs_miscdev(void); +int ecryptfs_send_miscdev(char *data, size_t data_size, + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct ecryptfs_daemon *daemon); +void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx); +int +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns, struct pid *pid); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2b8f5ed4ade..2258b8f654a 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -195,7 +195,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) file, ecryptfs_inode_to_private(inode)->lower_file); if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { ecryptfs_printk(KERN_DEBUG, "This is a directory\n"); + mutex_lock(&crypt_stat->cs_mutex); crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); + mutex_unlock(&crypt_stat->cs_mutex); rc = 0; goto out; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e2386115210..0a1397335a8 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -111,7 +111,7 @@ ecryptfs_do_create(struct inode *directory_inode, lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); lower_dir_dentry = lock_parent(lower_dentry); - if (unlikely(IS_ERR(lower_dir_dentry))) { + if (IS_ERR(lower_dir_dentry)) { ecryptfs_printk(KERN_ERR, "Error locking directory of " "dentry\n"); rc = PTR_ERR(lower_dir_dentry); @@ -121,7 +121,7 @@ ecryptfs_do_create(struct inode *directory_inode, ecryptfs_dentry, mode, nd); if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); goto out_lock; } rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, @@ -908,7 +908,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) ia->ia_valid &= ~ATTR_MODE; + mutex_lock(&lower_dentry->d_inode->i_mutex); rc = notify_change(lower_dentry, ia); + mutex_unlock(&lower_dentry->d_inode->i_mutex); out: fsstack_copy_attr_all(inode, lower_inode, NULL); return rc; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 682b1b2482c..e82b457180b 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -65,7 +65,7 @@ static int process_request_key_err(long err_code) } /** - * parse_packet_length + * ecryptfs_parse_packet_length * @data: Pointer to memory containing length at offset * @size: This function writes the decoded size to this memory * address; zero on error @@ -73,8 +73,8 @@ static int process_request_key_err(long err_code) * * Returns zero on success; non-zero on error */ -static int parse_packet_length(unsigned char *data, size_t *size, - size_t *length_size) +int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, + size_t *length_size) { int rc = 0; @@ -105,7 +105,7 @@ out: } /** - * write_packet_length + * ecryptfs_write_packet_length * @dest: The byte array target into which to write the length. Must * have at least 5 bytes allocated. * @size: The length to write. @@ -114,8 +114,8 @@ out: * * Returns zero on success; non-zero on error. */ -static int write_packet_length(char *dest, size_t size, - size_t *packet_size_length) +int ecryptfs_write_packet_length(char *dest, size_t size, + size_t *packet_size_length) { int rc = 0; @@ -162,8 +162,8 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key, goto out; } message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE; - rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " "header; cannot generate packet length\n"); @@ -172,8 +172,9 @@ write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key, i += packet_size_len; memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); i += ECRYPTFS_SIG_SIZE_HEX; - rc = write_packet_length(&message[i], session_key->encrypted_key_size, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], + session_key->encrypted_key_size, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet " "header; cannot generate packet length\n"); @@ -225,7 +226,7 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code, rc = -EIO; goto out; } - rc = parse_packet_length(&data[i], &m_size, &data_len); + rc = ecryptfs_parse_packet_length(&data[i], &m_size, &data_len); if (rc) { ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " "rc = [%d]\n", rc); @@ -304,8 +305,8 @@ write_tag_66_packet(char *signature, u8 cipher_code, goto out; } message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE; - rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " "header; cannot generate packet length\n"); @@ -315,8 +316,8 @@ write_tag_66_packet(char *signature, u8 cipher_code, memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); i += ECRYPTFS_SIG_SIZE_HEX; /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ - rc = write_packet_length(&message[i], crypt_stat->key_size + 3, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " "header; cannot generate packet length\n"); @@ -357,20 +358,25 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec, /* verify that everything through the encrypted FEK size is present */ if (message_len < 4) { rc = -EIO; + printk(KERN_ERR "%s: message_len is [%Zd]; minimum acceptable " + "message length is [%d]\n", __func__, message_len, 4); goto out; } if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) { - ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n"); rc = -EIO; + printk(KERN_ERR "%s: Type should be ECRYPTFS_TAG_67\n", + __func__); goto out; } if (data[i++]) { - ecryptfs_printk(KERN_ERR, "Status indicator has non zero value" - " [%d]\n", data[i-1]); rc = -EIO; + printk(KERN_ERR "%s: Status indicator has non zero " + "value [%d]\n", __func__, data[i-1]); + goto out; } - rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len); + rc = ecryptfs_parse_packet_length(&data[i], &key_rec->enc_key_size, + &data_len); if (rc) { ecryptfs_printk(KERN_WARNING, "Error parsing packet length; " "rc = [%d]\n", rc); @@ -378,17 +384,17 @@ parse_tag_67_packet(struct ecryptfs_key_record *key_rec, } i += data_len; if (message_len < (i + key_rec->enc_key_size)) { - ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n", - message_len, (i + key_rec->enc_key_size)); rc = -EIO; + printk(KERN_ERR "%s: message_len [%Zd]; max len is [%Zd]\n", + __func__, message_len, (i + key_rec->enc_key_size)); goto out; } if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) { - ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than " - "the maximum key size [%d]\n", - key_rec->enc_key_size, - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES); rc = -EIO; + printk(KERN_ERR "%s: Encrypted key_size [%Zd] larger than " + "the maximum key size [%d]\n", __func__, + key_rec->enc_key_size, + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES); goto out; } memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size); @@ -445,7 +451,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), &netlink_message, &netlink_message_length); if (rc) { - ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet"); + ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n"); goto out; } rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, @@ -570,8 +576,8 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, goto out; } (*new_auth_tok) = &auth_tok_list_item->auth_tok; - rc = parse_packet_length(&data[(*packet_size)], &body_size, - &length_size); + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); if (rc) { printk(KERN_WARNING "Error parsing packet length; " "rc = [%d]\n", rc); @@ -704,8 +710,8 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, goto out; } (*new_auth_tok) = &auth_tok_list_item->auth_tok; - rc = parse_packet_length(&data[(*packet_size)], &body_size, - &length_size); + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); if (rc) { printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n", rc); @@ -852,8 +858,8 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents, rc = -EINVAL; goto out; } - rc = parse_packet_length(&data[(*packet_size)], &body_size, - &length_size); + rc = ecryptfs_parse_packet_length(&data[(*packet_size)], &body_size, + &length_size); if (rc) { printk(KERN_WARNING "Invalid tag 11 packet format\n"); goto out; @@ -1405,8 +1411,8 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes, auth_tok->token.private_key.key_size; rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec); if (rc) { - ecryptfs_printk(KERN_ERR, "Failed to encrypt session key " - "via a pki"); + printk(KERN_ERR "Failed to encrypt session key via a key " + "module; rc = [%d]\n", rc); goto out; } if (ecryptfs_verbosity > 0) { @@ -1430,8 +1436,9 @@ encrypted_session_key_set: goto out; } dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE; - rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), - &packet_size_length); + rc = ecryptfs_write_packet_length(&dest[(*packet_size)], + (max_packet_size - 4), + &packet_size_length); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet " "header; cannot generate packet length\n"); @@ -1489,8 +1496,9 @@ write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents, goto out; } dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE; - rc = write_packet_length(&dest[(*packet_length)], - (max_packet_size - 4), &packet_size_length); + rc = ecryptfs_write_packet_length(&dest[(*packet_length)], + (max_packet_size - 4), + &packet_size_length); if (rc) { printk(KERN_ERR "Error generating tag 11 packet header; cannot " "generate packet length. rc = [%d]\n", rc); @@ -1682,8 +1690,9 @@ encrypted_session_key_set: dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE; /* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3) * to get the number of octets in the actual Tag 3 packet */ - rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4), - &packet_size_length); + rc = ecryptfs_write_packet_length(&dest[(*packet_size)], + (max_packet_size - 4), + &packet_size_length); if (rc) { printk(KERN_ERR "Error generating tag 3 packet header; cannot " "generate packet length. rc = [%d]\n", rc); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index d25ac9500a9..d603631601e 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -219,7 +219,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, if (rc) { printk(KERN_ERR "%s: Error attempting to initialize the " "persistent file for the dentry with name [%s]; " - "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc); + "rc = [%d]\n", __func__, dentry->d_name.name, rc); goto out; } out: diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 9cc2aec27b0..1b5c20058ac 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -1,7 +1,7 @@ /** * eCryptfs: Linux filesystem encryption layer * - * Copyright (C) 2004-2006 International Business Machines Corp. + * Copyright (C) 2004-2008 International Business Machines Corp. * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> * Tyler Hicks <tyhicks@ou.edu> * @@ -20,19 +20,21 @@ * 02111-1307, USA. */ #include <linux/sched.h> +#include <linux/user_namespace.h> +#include <linux/nsproxy.h> #include "ecryptfs_kernel.h" static LIST_HEAD(ecryptfs_msg_ctx_free_list); static LIST_HEAD(ecryptfs_msg_ctx_alloc_list); static struct mutex ecryptfs_msg_ctx_lists_mux; -static struct hlist_head *ecryptfs_daemon_id_hash; -static struct mutex ecryptfs_daemon_id_hash_mux; +static struct hlist_head *ecryptfs_daemon_hash; +struct mutex ecryptfs_daemon_hash_mux; static int ecryptfs_hash_buckets; #define ecryptfs_uid_hash(uid) \ hash_long((unsigned long)uid, ecryptfs_hash_buckets) -static unsigned int ecryptfs_msg_counter; +static u32 ecryptfs_msg_counter; static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; /** @@ -40,9 +42,10 @@ static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr; * @msg_ctx: The context that was acquired from the free list * * Acquires a context element from the free list and locks the mutex - * on the context. Returns zero on success; non-zero on error or upon - * failure to acquire a free context element. Be sure to lock the - * list mutex before calling. + * on the context. Sets the msg_ctx task to current. Returns zero on + * success; non-zero on error or upon failure to acquire a free + * context element. Must be called with ecryptfs_msg_ctx_lists_mux + * held. */ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) { @@ -50,11 +53,11 @@ static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx) int rc; if (list_empty(&ecryptfs_msg_ctx_free_list)) { - ecryptfs_printk(KERN_WARNING, "The eCryptfs free " - "context list is empty. It may be helpful to " - "specify the ecryptfs_message_buf_len " - "parameter to be greater than the current " - "value of [%d]\n", ecryptfs_message_buf_len); + printk(KERN_WARNING "%s: The eCryptfs free " + "context list is empty. It may be helpful to " + "specify the ecryptfs_message_buf_len " + "parameter to be greater than the current " + "value of [%d]\n", __func__, ecryptfs_message_buf_len); rc = -ENOMEM; goto out; } @@ -75,8 +78,7 @@ out: * ecryptfs_msg_ctx_free_to_alloc * @msg_ctx: The context to move from the free list to the alloc list * - * Be sure to lock the list mutex and the context mutex before - * calling. + * Must be called with ecryptfs_msg_ctx_lists_mux held. */ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) { @@ -89,36 +91,39 @@ static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx) * ecryptfs_msg_ctx_alloc_to_free * @msg_ctx: The context to move from the alloc list to the free list * - * Be sure to lock the list mutex and the context mutex before - * calling. + * Must be called with ecryptfs_msg_ctx_lists_mux held. */ -static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) +void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx) { list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list); if (msg_ctx->msg) kfree(msg_ctx->msg); + msg_ctx->msg = NULL; msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE; } /** - * ecryptfs_find_daemon_id - * @uid: The user id which maps to the desired daemon id - * @id: If return value is zero, points to the desired daemon id - * pointer + * ecryptfs_find_daemon_by_euid + * @euid: The effective user id which maps to the desired daemon id + * @user_ns: The namespace in which @euid applies + * @daemon: If return value is zero, points to the desired daemon pointer * - * Search the hash list for the given user id. Returns zero if the - * user id exists in the list; non-zero otherwise. The daemon id hash - * mutex should be held before calling this function. + * Must be called with ecryptfs_daemon_hash_mux held. + * + * Search the hash list for the given user id. + * + * Returns zero if the user id exists in the list; non-zero otherwise. */ -static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id) +int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns) { struct hlist_node *elem; int rc; - hlist_for_each_entry(*id, elem, - &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)], - id_chain) { - if ((*id)->uid == uid) { + hlist_for_each_entry(*daemon, elem, + &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)], + euid_chain) { + if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) { rc = 0; goto out; } @@ -128,181 +133,325 @@ out: return rc; } -static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type, - pid_t pid) +static int +ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, + u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx); + +/** + * ecryptfs_send_raw_message + * @transport: Transport type + * @msg_type: Message type + * @daemon: Daemon struct for recipient of message + * + * A raw message is one that does not include an ecryptfs_message + * struct. It simply has a type. + * + * Must be called with ecryptfs_daemon_hash_mux held. + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type, + struct ecryptfs_daemon *daemon) { + struct ecryptfs_msg_ctx *msg_ctx; int rc; switch(transport) { case ECRYPTFS_TRANSPORT_NETLINK: - rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid); + rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, + daemon->pid); + break; + case ECRYPTFS_TRANSPORT_MISCDEV: + rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type, + &msg_ctx); + if (rc) { + printk(KERN_ERR "%s: Error whilst attempting to send " + "message via procfs; rc = [%d]\n", __func__, rc); + goto out; + } + /* Raw messages are logically context-free (e.g., no + * reply is expected), so we set the state of the + * ecryptfs_msg_ctx object to indicate that it should + * be freed as soon as the transport sends out the message. */ + mutex_lock(&msg_ctx->mux); + msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; + mutex_unlock(&msg_ctx->mux); break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: rc = -ENOSYS; } +out: + return rc; +} + +/** + * ecryptfs_spawn_daemon - Create and initialize a new daemon struct + * @daemon: Pointer to set to newly allocated daemon struct + * @euid: Effective user id for the daemon + * @user_ns: The namespace in which @euid applies + * @pid: Process id for the daemon + * + * Must be called ceremoniously while in possession of + * ecryptfs_sacred_daemon_hash_mux + * + * Returns zero on success; non-zero otherwise + */ +int +ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, + struct user_namespace *user_ns, struct pid *pid) +{ + int rc = 0; + + (*daemon) = kzalloc(sizeof(**daemon), GFP_KERNEL); + if (!(*daemon)) { + rc = -ENOMEM; + printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " + "GFP_KERNEL memory\n", __func__, sizeof(**daemon)); + goto out; + } + (*daemon)->euid = euid; + (*daemon)->user_ns = get_user_ns(user_ns); + (*daemon)->pid = get_pid(pid); + (*daemon)->task = current; + mutex_init(&(*daemon)->mux); + INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue); + init_waitqueue_head(&(*daemon)->wait); + (*daemon)->num_queued_msg_ctx = 0; + hlist_add_head(&(*daemon)->euid_chain, + &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]); +out: return rc; } /** * ecryptfs_process_helo * @transport: The underlying transport (netlink, etc.) - * @uid: The user ID owner of the message + * @euid: The user ID owner of the message + * @user_ns: The namespace in which @euid applies * @pid: The process ID for the userspace program that sent the * message * - * Adds the uid and pid values to the daemon id hash. If a uid + * Adds the euid and pid values to the daemon euid hash. If an euid * already has a daemon pid registered, the daemon will be - * unregistered before the new daemon id is put into the hash list. - * Returns zero after adding a new daemon id to the hash list; + * unregistered before the new daemon is put into the hash list. + * Returns zero after adding a new daemon to the hash list; * non-zero otherwise. */ -int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid) +int ecryptfs_process_helo(unsigned int transport, uid_t euid, + struct user_namespace *user_ns, struct pid *pid) { - struct ecryptfs_daemon_id *new_id; - struct ecryptfs_daemon_id *old_id; + struct ecryptfs_daemon *new_daemon; + struct ecryptfs_daemon *old_daemon; int rc; - mutex_lock(&ecryptfs_daemon_id_hash_mux); - new_id = kmalloc(sizeof(*new_id), GFP_KERNEL); - if (!new_id) { - rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable " - "to register daemon [%d] for user [%d]\n", - pid, uid); - goto unlock; - } - if (!ecryptfs_find_daemon_id(uid, &old_id)) { + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns); + if (rc != 0) { printk(KERN_WARNING "Received request from user [%d] " - "to register daemon [%d]; unregistering daemon " - "[%d]\n", uid, pid, old_id->pid); - hlist_del(&old_id->id_chain); - rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT, - old_id->pid); + "to register daemon [0x%p]; unregistering daemon " + "[0x%p]\n", euid, pid, old_daemon->pid); + rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT, + old_daemon); if (rc) printk(KERN_WARNING "Failed to send QUIT " - "message to daemon [%d]; rc = [%d]\n", - old_id->pid, rc); - kfree(old_id); + "message to daemon [0x%p]; rc = [%d]\n", + old_daemon->pid, rc); + hlist_del(&old_daemon->euid_chain); + kfree(old_daemon); } - new_id->uid = uid; - new_id->pid = pid; - hlist_add_head(&new_id->id_chain, - &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]); - rc = 0; -unlock: - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid); + if (rc) + printk(KERN_ERR "%s: The gods are displeased with this attempt " + "to create a new daemon object for euid [%d]; pid " + "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_exorcise_daemon - Destroy the daemon struct + * + * Must be called ceremoniously while in possession of + * ecryptfs_daemon_hash_mux and the daemon's own mux. + */ +int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon) +{ + struct ecryptfs_msg_ctx *msg_ctx, *msg_ctx_tmp; + int rc = 0; + + mutex_lock(&daemon->mux); + if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ) + || (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) { + rc = -EBUSY; + printk(KERN_WARNING "%s: Attempt to destroy daemon with pid " + "[0x%p], but it is in the midst of a read or a poll\n", + __func__, daemon->pid); + mutex_unlock(&daemon->mux); + goto out; + } + list_for_each_entry_safe(msg_ctx, msg_ctx_tmp, + &daemon->msg_ctx_out_queue, daemon_out_list) { + list_del(&msg_ctx->daemon_out_list); + daemon->num_queued_msg_ctx--; + printk(KERN_WARNING "%s: Warning: dropping message that is in " + "the out queue of a dying daemon\n", __func__); + ecryptfs_msg_ctx_alloc_to_free(msg_ctx); + } + hlist_del(&daemon->euid_chain); + if (daemon->task) + wake_up_process(daemon->task); + if (daemon->pid) + put_pid(daemon->pid); + if (daemon->user_ns) + put_user_ns(daemon->user_ns); + mutex_unlock(&daemon->mux); + memset(daemon, 0, sizeof(*daemon)); + kfree(daemon); +out: return rc; } /** * ecryptfs_process_quit - * @uid: The user ID owner of the message + * @euid: The user ID owner of the message + * @user_ns: The namespace in which @euid applies * @pid: The process ID for the userspace program that sent the * message * - * Deletes the corresponding daemon id for the given uid and pid, if + * Deletes the corresponding daemon for the given euid and pid, if * it is the registered that is requesting the deletion. Returns zero - * after deleting the desired daemon id; non-zero otherwise. + * after deleting the desired daemon; non-zero otherwise. */ -int ecryptfs_process_quit(uid_t uid, pid_t pid) +int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) { - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; int rc; - mutex_lock(&ecryptfs_daemon_id_hash_mux); - if (ecryptfs_find_daemon_id(uid, &id)) { + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns); + if (rc || !daemon) { rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Received request from user [%d] to " - "unregister unrecognized daemon [%d]\n", uid, - pid); - goto unlock; + printk(KERN_ERR "Received request from user [%d] to " + "unregister unrecognized daemon [0x%p]\n", euid, pid); + goto out_unlock; } - if (id->pid != pid) { - rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Received request from user [%d] " - "with pid [%d] to unregister daemon [%d]\n", - uid, pid, id->pid); - goto unlock; - } - hlist_del(&id->id_chain); - kfree(id); - rc = 0; -unlock: - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + rc = ecryptfs_exorcise_daemon(daemon); +out_unlock: + mutex_unlock(&ecryptfs_daemon_hash_mux); return rc; } /** * ecryptfs_process_reponse * @msg: The ecryptfs message received; the caller should sanity check - * msg->data_len + * msg->data_len and free the memory * @pid: The process ID of the userspace application that sent the * message - * @seq: The sequence number of the message + * @seq: The sequence number of the message; must match the sequence + * number for the existing message context waiting for this + * response + * + * Processes a response message after sending an operation request to + * userspace. Some other process is awaiting this response. Before + * sending out its first communications, the other process allocated a + * msg_ctx from the ecryptfs_msg_ctx_arr at a particular index. The + * response message contains this index so that we can copy over the + * response message into the msg_ctx that the process holds a + * reference to. The other process is going to wake up, check to see + * that msg_ctx->state == ECRYPTFS_MSG_CTX_STATE_DONE, and then + * proceed to read off and process the response message. Returns zero + * upon delivery to desired context element; non-zero upon delivery + * failure or error. * - * Processes a response message after sending a operation request to - * userspace. Returns zero upon delivery to desired context element; - * non-zero upon delivery failure or error. + * Returns zero on success; non-zero otherwise */ -int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid, - pid_t pid, u32 seq) +int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, + struct user_namespace *user_ns, struct pid *pid, + u32 seq) { - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; struct ecryptfs_msg_ctx *msg_ctx; - int msg_size; + size_t msg_size; + struct nsproxy *nsproxy; + struct user_namespace *current_user_ns; int rc; if (msg->index >= ecryptfs_message_buf_len) { rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Attempt to reference " - "context buffer at index [%d]; maximum " - "allowable is [%d]\n", msg->index, - (ecryptfs_message_buf_len - 1)); + printk(KERN_ERR "%s: Attempt to reference " + "context buffer at index [%d]; maximum " + "allowable is [%d]\n", __func__, msg->index, + (ecryptfs_message_buf_len - 1)); goto out; } msg_ctx = &ecryptfs_msg_ctx_arr[msg->index]; mutex_lock(&msg_ctx->mux); - if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) { + mutex_lock(&ecryptfs_daemon_hash_mux); + rcu_read_lock(); + nsproxy = task_nsproxy(msg_ctx->task); + if (nsproxy == NULL) { rc = -EBADMSG; - ecryptfs_printk(KERN_WARNING, "User [%d] received a " - "message response from process [%d] but does " - "not have a registered daemon\n", - msg_ctx->task->euid, pid); + printk(KERN_ERR "%s: Receiving process is a zombie. Dropping " + "message.\n", __func__); + rcu_read_unlock(); + mutex_unlock(&ecryptfs_daemon_hash_mux); goto wake_up; } - if (msg_ctx->task->euid != uid) { + current_user_ns = nsproxy->user_ns; + rc = ecryptfs_find_daemon_by_euid(&daemon, msg_ctx->task->euid, + current_user_ns); + rcu_read_unlock(); + mutex_unlock(&ecryptfs_daemon_hash_mux); + if (rc) { + rc = -EBADMSG; + printk(KERN_WARNING "%s: User [%d] received a " + "message response from process [0x%p] but does " + "not have a registered daemon\n", __func__, + msg_ctx->task->euid, pid); + goto wake_up; + } + if (msg_ctx->task->euid != euid) { rc = -EBADMSG; - ecryptfs_printk(KERN_WARNING, "Received message from user " - "[%d]; expected message from user [%d]\n", - uid, msg_ctx->task->euid); + printk(KERN_WARNING "%s: Received message from user " + "[%d]; expected message from user [%d]\n", __func__, + euid, msg_ctx->task->euid); goto unlock; } - if (id->pid != pid) { + if (current_user_ns != user_ns) { rc = -EBADMSG; - ecryptfs_printk(KERN_ERR, "User [%d] received a " - "message response from an unrecognized " - "process [%d]\n", msg_ctx->task->euid, pid); + printk(KERN_WARNING "%s: Received message from user_ns " + "[0x%p]; expected message from user_ns [0x%p]\n", + __func__, user_ns, nsproxy->user_ns); + goto unlock; + } + if (daemon->pid != pid) { + rc = -EBADMSG; + printk(KERN_ERR "%s: User [%d] sent a message response " + "from an unrecognized process [0x%p]\n", + __func__, msg_ctx->task->euid, pid); goto unlock; } if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) { rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Desired context element is not " - "pending a response\n"); + printk(KERN_WARNING "%s: Desired context element is not " + "pending a response\n", __func__); goto unlock; } else if (msg_ctx->counter != seq) { rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, "Invalid message sequence; " - "expected [%d]; received [%d]\n", - msg_ctx->counter, seq); + printk(KERN_WARNING "%s: Invalid message sequence; " + "expected [%d]; received [%d]\n", __func__, + msg_ctx->counter, seq); goto unlock; } - msg_size = sizeof(*msg) + msg->data_len; + msg_size = (sizeof(*msg) + msg->data_len); msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); if (!msg_ctx->msg) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); + printk(KERN_ERR "%s: Failed to allocate [%Zd] bytes of " + "GFP_KERNEL memory\n", __func__, msg_size); goto unlock; } memcpy(msg_ctx->msg, msg, msg_size); @@ -317,34 +466,38 @@ out: } /** - * ecryptfs_send_message + * ecryptfs_send_message_locked * @transport: The transport over which to send the message (i.e., * netlink) * @data: The data to send * @data_len: The length of data * @msg_ctx: The message context allocated for the send + * + * Must be called with ecryptfs_daemon_hash_mux held. + * + * Returns zero on success; non-zero otherwise */ -int ecryptfs_send_message(unsigned int transport, char *data, int data_len, - struct ecryptfs_msg_ctx **msg_ctx) +static int +ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, + u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx) { - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; int rc; - mutex_lock(&ecryptfs_daemon_id_hash_mux); - if (ecryptfs_find_daemon_id(current->euid, &id)) { - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + if (rc || !daemon) { rc = -ENOTCONN; - ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon " - "registered\n", current->euid); + printk(KERN_ERR "%s: User [%d] does not have a daemon " + "registered\n", __func__, current->euid); goto out; } - mutex_unlock(&ecryptfs_daemon_id_hash_mux); mutex_lock(&ecryptfs_msg_ctx_lists_mux); rc = ecryptfs_acquire_free_msg_ctx(msg_ctx); if (rc) { mutex_unlock(&ecryptfs_msg_ctx_lists_mux); - ecryptfs_printk(KERN_WARNING, "Could not claim a free " - "context element\n"); + printk(KERN_WARNING "%s: Could not claim a free " + "context element\n", __func__); goto out; } ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); @@ -352,23 +505,50 @@ int ecryptfs_send_message(unsigned int transport, char *data, int data_len, mutex_unlock(&ecryptfs_msg_ctx_lists_mux); switch (transport) { case ECRYPTFS_TRANSPORT_NETLINK: - rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, - ECRYPTFS_NLMSG_REQUEST, 0, id->pid); + rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type, + 0, daemon->pid); + break; + case ECRYPTFS_TRANSPORT_MISCDEV: + rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, + 0, daemon); break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: rc = -ENOSYS; } - if (rc) { - printk(KERN_ERR "Error attempting to send message to userspace " - "daemon; rc = [%d]\n", rc); - } + if (rc) + printk(KERN_ERR "%s: Error attempting to send message to " + "userspace daemon; rc = [%d]\n", __func__, rc); out: return rc; } /** + * ecryptfs_send_message + * @transport: The transport over which to send the message (i.e., + * netlink) + * @data: The data to send + * @data_len: The length of data + * @msg_ctx: The message context allocated for the send + * + * Grabs ecryptfs_daemon_hash_mux. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_send_message(unsigned int transport, char *data, int data_len, + struct ecryptfs_msg_ctx **msg_ctx) +{ + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_send_message_locked(transport, data, data_len, + ECRYPTFS_MSG_REQUEST, msg_ctx); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** * ecryptfs_wait_for_response * @msg_ctx: The context that was assigned when sending a message * @msg: The incoming message from userspace; not set if rc != 0 @@ -377,7 +557,7 @@ out: * of time exceeds ecryptfs_message_wait_timeout. If zero is * returned, msg will point to a valid message from userspace; a * non-zero value is returned upon failure to receive a message or an - * error occurs. + * error occurs. Callee must free @msg on success. */ int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, struct ecryptfs_message **msg) @@ -413,32 +593,32 @@ int ecryptfs_init_messaging(unsigned int transport) if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) { ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS; - ecryptfs_printk(KERN_WARNING, "Specified number of users is " - "too large, defaulting to [%d] users\n", - ecryptfs_number_of_users); + printk(KERN_WARNING "%s: Specified number of users is " + "too large, defaulting to [%d] users\n", __func__, + ecryptfs_number_of_users); } - mutex_init(&ecryptfs_daemon_id_hash_mux); - mutex_lock(&ecryptfs_daemon_id_hash_mux); + mutex_init(&ecryptfs_daemon_hash_mux); + mutex_lock(&ecryptfs_daemon_hash_mux); ecryptfs_hash_buckets = 1; while (ecryptfs_number_of_users >> ecryptfs_hash_buckets) ecryptfs_hash_buckets++; - ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head) - * ecryptfs_hash_buckets, GFP_KERNEL); - if (!ecryptfs_daemon_id_hash) { + ecryptfs_daemon_hash = kmalloc((sizeof(struct hlist_head) + * ecryptfs_hash_buckets), GFP_KERNEL); + if (!ecryptfs_daemon_hash) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); + mutex_unlock(&ecryptfs_daemon_hash_mux); goto out; } for (i = 0; i < ecryptfs_hash_buckets; i++) - INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]); - mutex_unlock(&ecryptfs_daemon_id_hash_mux); - + INIT_HLIST_HEAD(&ecryptfs_daemon_hash[i]); + mutex_unlock(&ecryptfs_daemon_hash_mux); ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx) - * ecryptfs_message_buf_len), GFP_KERNEL); + * ecryptfs_message_buf_len), + GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n"); + printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); goto out; } mutex_init(&ecryptfs_msg_ctx_lists_mux); @@ -446,6 +626,7 @@ int ecryptfs_init_messaging(unsigned int transport) ecryptfs_msg_counter = 0; for (i = 0; i < ecryptfs_message_buf_len; i++) { INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node); + INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].daemon_out_list); mutex_init(&ecryptfs_msg_ctx_arr[i].mux); mutex_lock(&ecryptfs_msg_ctx_arr[i].mux); ecryptfs_msg_ctx_arr[i].index = i; @@ -464,6 +645,11 @@ int ecryptfs_init_messaging(unsigned int transport) if (rc) ecryptfs_release_messaging(transport); break; + case ECRYPTFS_TRANSPORT_MISCDEV: + rc = ecryptfs_init_ecryptfs_miscdev(); + if (rc) + ecryptfs_release_messaging(transport); + break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: @@ -488,27 +674,37 @@ void ecryptfs_release_messaging(unsigned int transport) kfree(ecryptfs_msg_ctx_arr); mutex_unlock(&ecryptfs_msg_ctx_lists_mux); } - if (ecryptfs_daemon_id_hash) { + if (ecryptfs_daemon_hash) { struct hlist_node *elem; - struct ecryptfs_daemon_id *id; + struct ecryptfs_daemon *daemon; int i; - mutex_lock(&ecryptfs_daemon_id_hash_mux); + mutex_lock(&ecryptfs_daemon_hash_mux); for (i = 0; i < ecryptfs_hash_buckets; i++) { - hlist_for_each_entry(id, elem, - &ecryptfs_daemon_id_hash[i], - id_chain) { - hlist_del(elem); - kfree(id); + int rc; + + hlist_for_each_entry(daemon, elem, + &ecryptfs_daemon_hash[i], + euid_chain) { + rc = ecryptfs_exorcise_daemon(daemon); + if (rc) + printk(KERN_ERR "%s: Error whilst " + "attempting to destroy daemon; " + "rc = [%d]. Dazed and confused, " + "but trying to continue.\n", + __func__, rc); } } - kfree(ecryptfs_daemon_id_hash); - mutex_unlock(&ecryptfs_daemon_id_hash_mux); + kfree(ecryptfs_daemon_hash); + mutex_unlock(&ecryptfs_daemon_hash_mux); } switch(transport) { case ECRYPTFS_TRANSPORT_NETLINK: ecryptfs_release_netlink(); break; + case ECRYPTFS_TRANSPORT_MISCDEV: + ecryptfs_destroy_ecryptfs_miscdev(); + break; case ECRYPTFS_TRANSPORT_CONNECTOR: case ECRYPTFS_TRANSPORT_RELAYFS: default: diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c new file mode 100644 index 00000000000..788995efd1d --- /dev/null +++ b/fs/ecryptfs/miscdev.c @@ -0,0 +1,598 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * + * Copyright (C) 2008 International Business Machines Corp. + * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <linux/fs.h> +#include <linux/hash.h> +#include <linux/random.h> +#include <linux/miscdevice.h> +#include <linux/poll.h> +#include <linux/wait.h> +#include <linux/module.h> +#include "ecryptfs_kernel.h" + +static atomic_t ecryptfs_num_miscdev_opens; + +/** + * ecryptfs_miscdev_poll + * @file: dev file (ignored) + * @pt: dev poll table (ignored) + * + * Returns the poll mask + */ +static unsigned int +ecryptfs_miscdev_poll(struct file *file, poll_table *pt) +{ + struct ecryptfs_daemon *daemon; + unsigned int mask = 0; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + /* TODO: Just use file->private_data? */ + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + BUG_ON(rc || !daemon); + mutex_lock(&daemon->mux); + mutex_unlock(&ecryptfs_daemon_hash_mux); + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { + printk(KERN_WARNING "%s: Attempt to poll on zombified " + "daemon\n", __func__); + goto out_unlock_daemon; + } + if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) + goto out_unlock_daemon; + if (daemon->flags & ECRYPTFS_DAEMON_IN_POLL) + goto out_unlock_daemon; + daemon->flags |= ECRYPTFS_DAEMON_IN_POLL; + mutex_unlock(&daemon->mux); + poll_wait(file, &daemon->wait, pt); + mutex_lock(&daemon->mux); + if (!list_empty(&daemon->msg_ctx_out_queue)) + mask |= POLLIN | POLLRDNORM; +out_unlock_daemon: + daemon->flags &= ~ECRYPTFS_DAEMON_IN_POLL; + mutex_unlock(&daemon->mux); + return mask; +} + +/** + * ecryptfs_miscdev_open + * @inode: inode of miscdev handle (ignored) + * @file: file for miscdev handle (ignored) + * + * Returns zero on success; non-zero otherwise + */ +static int +ecryptfs_miscdev_open(struct inode *inode, struct file *file) +{ + struct ecryptfs_daemon *daemon = NULL; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = try_module_get(THIS_MODULE); + if (rc == 0) { + rc = -EIO; + printk(KERN_ERR "%s: Error attempting to increment module use " + "count; rc = [%d]\n", __func__, rc); + goto out_unlock_daemon_list; + } + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + if (rc || !daemon) { + rc = ecryptfs_spawn_daemon(&daemon, current->euid, + current->nsproxy->user_ns, + task_pid(current)); + if (rc) { + printk(KERN_ERR "%s: Error attempting to spawn daemon; " + "rc = [%d]\n", __func__, rc); + goto out_module_put_unlock_daemon_list; + } + } + mutex_lock(&daemon->mux); + if (daemon->pid != task_pid(current)) { + rc = -EINVAL; + printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], " + "but pid [0x%p] has attempted to open the handle " + "instead\n", __func__, daemon->pid, daemon->euid, + task_pid(current)); + goto out_unlock_daemon; + } + if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) { + rc = -EBUSY; + printk(KERN_ERR "%s: Miscellaneous device handle may only be " + "opened once per daemon; pid [0x%p] already has this " + "handle open\n", __func__, daemon->pid); + goto out_unlock_daemon; + } + daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; + atomic_inc(&ecryptfs_num_miscdev_opens); +out_unlock_daemon: + mutex_unlock(&daemon->mux); +out_module_put_unlock_daemon_list: + if (rc) + module_put(THIS_MODULE); +out_unlock_daemon_list: + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_miscdev_release + * @inode: inode of fs/ecryptfs/euid handle (ignored) + * @file: file for fs/ecryptfs/euid handle (ignored) + * + * This keeps the daemon registered until the daemon sends another + * ioctl to fs/ecryptfs/ctl or until the kernel module unregisters. + * + * Returns zero on success; non-zero otherwise + */ +static int +ecryptfs_miscdev_release(struct inode *inode, struct file *file) +{ + struct ecryptfs_daemon *daemon = NULL; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + BUG_ON(rc || !daemon); + mutex_lock(&daemon->mux); + BUG_ON(daemon->pid != task_pid(current)); + BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); + daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; + atomic_dec(&ecryptfs_num_miscdev_opens); + mutex_unlock(&daemon->mux); + rc = ecryptfs_exorcise_daemon(daemon); + if (rc) { + printk(KERN_CRIT "%s: Fatal error whilst attempting to " + "shut down daemon; rc = [%d]. Please report this " + "bug.\n", __func__, rc); + BUG(); + } + module_put(THIS_MODULE); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_send_miscdev + * @data: Data to send to daemon; may be NULL + * @data_size: Amount of data to send to daemon + * @msg_ctx: Message context, which is used to handle the reply. If + * this is NULL, then we do not expect a reply. + * @msg_type: Type of message + * @msg_flags: Flags for message + * @daemon: eCryptfs daemon object + * + * Add msg_ctx to queue and then, if it exists, notify the blocked + * miscdevess about the data being available. Must be called with + * ecryptfs_daemon_hash_mux held. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_send_miscdev(char *data, size_t data_size, + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct ecryptfs_daemon *daemon) +{ + int rc = 0; + + mutex_lock(&msg_ctx->mux); + if (data) { + msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), + GFP_KERNEL); + if (!msg_ctx->msg) { + rc = -ENOMEM; + printk(KERN_ERR "%s: Out of memory whilst attempting " + "to kmalloc(%Zd, GFP_KERNEL)\n", __func__, + (sizeof(*msg_ctx->msg) + data_size)); + goto out_unlock; + } + } else + msg_ctx->msg = NULL; + msg_ctx->msg->index = msg_ctx->index; + msg_ctx->msg->data_len = data_size; + msg_ctx->type = msg_type; + if (data) { + memcpy(msg_ctx->msg->data, data, data_size); + msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); + } else + msg_ctx->msg_size = 0; + mutex_lock(&daemon->mux); + list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); + daemon->num_queued_msg_ctx++; + wake_up_interruptible(&daemon->wait); + mutex_unlock(&daemon->mux); +out_unlock: + mutex_unlock(&msg_ctx->mux); + return rc; +} + +/** + * ecryptfs_miscdev_read - format and send message from queue + * @file: fs/ecryptfs/euid miscdevfs handle (ignored) + * @buf: User buffer into which to copy the next message on the daemon queue + * @count: Amount of space available in @buf + * @ppos: Offset in file (ignored) + * + * Pulls the most recent message from the daemon queue, formats it for + * being sent via a miscdevfs handle, and copies it into @buf + * + * Returns the number of bytes copied into the user buffer + */ +static ssize_t +ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct ecryptfs_daemon *daemon; + struct ecryptfs_msg_ctx *msg_ctx; + size_t packet_length_size; + u32 counter_nbo; + char packet_length[3]; + size_t i; + size_t total_length; + int rc; + + mutex_lock(&ecryptfs_daemon_hash_mux); + /* TODO: Just use file->private_data? */ + rc = ecryptfs_find_daemon_by_euid(&daemon, current->euid, + current->nsproxy->user_ns); + BUG_ON(rc || !daemon); + mutex_lock(&daemon->mux); + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { + rc = 0; + printk(KERN_WARNING "%s: Attempt to read from zombified " + "daemon\n", __func__); + goto out_unlock_daemon; + } + if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) { + rc = 0; + goto out_unlock_daemon; + } + /* This daemon will not go away so long as this flag is set */ + daemon->flags |= ECRYPTFS_DAEMON_IN_READ; + mutex_unlock(&ecryptfs_daemon_hash_mux); +check_list: + if (list_empty(&daemon->msg_ctx_out_queue)) { + mutex_unlock(&daemon->mux); + rc = wait_event_interruptible( + daemon->wait, !list_empty(&daemon->msg_ctx_out_queue)); + mutex_lock(&daemon->mux); + if (rc < 0) { + rc = 0; + goto out_unlock_daemon; + } + } + if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { + rc = 0; + goto out_unlock_daemon; + } + if (list_empty(&daemon->msg_ctx_out_queue)) { + /* Something else jumped in since the + * wait_event_interruptable() and removed the + * message from the queue; try again */ + goto check_list; + } + BUG_ON(current->euid != daemon->euid); + BUG_ON(current->nsproxy->user_ns != daemon->user_ns); + BUG_ON(task_pid(current) != daemon->pid); + msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, + struct ecryptfs_msg_ctx, daemon_out_list); + BUG_ON(!msg_ctx); + mutex_lock(&msg_ctx->mux); + if (msg_ctx->msg) { + rc = ecryptfs_write_packet_length(packet_length, + msg_ctx->msg_size, + &packet_length_size); + if (rc) { + rc = 0; + printk(KERN_WARNING "%s: Error writing packet length; " + "rc = [%d]\n", __func__, rc); + goto out_unlock_msg_ctx; + } + } else { + packet_length_size = 0; + msg_ctx->msg_size = 0; + } + /* miscdevfs packet format: + * Octet 0: Type + * Octets 1-4: network byte order msg_ctx->counter + * Octets 5-N0: Size of struct ecryptfs_message to follow + * Octets N0-N1: struct ecryptfs_message (including data) + * + * Octets 5-N1 not written if the packet type does not + * include a message */ + total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size); + if (count < total_length) { + rc = 0; + printk(KERN_WARNING "%s: Only given user buffer of " + "size [%Zd], but we need [%Zd] to read the " + "pending message\n", __func__, count, total_length); + goto out_unlock_msg_ctx; + } + i = 0; + buf[i++] = msg_ctx->type; + counter_nbo = cpu_to_be32(msg_ctx->counter); + memcpy(&buf[i], (char *)&counter_nbo, 4); + i += 4; + if (msg_ctx->msg) { + memcpy(&buf[i], packet_length, packet_length_size); + i += packet_length_size; + rc = copy_to_user(&buf[i], msg_ctx->msg, msg_ctx->msg_size); + if (rc) { + printk(KERN_ERR "%s: copy_to_user returned error " + "[%d]\n", __func__, rc); + goto out_unlock_msg_ctx; + } + i += msg_ctx->msg_size; + } + rc = i; + list_del(&msg_ctx->daemon_out_list); + kfree(msg_ctx->msg); + msg_ctx->msg = NULL; + /* We do not expect a reply from the userspace daemon for any + * message type other than ECRYPTFS_MSG_REQUEST */ + if (msg_ctx->type != ECRYPTFS_MSG_REQUEST) + ecryptfs_msg_ctx_alloc_to_free(msg_ctx); +out_unlock_msg_ctx: + mutex_unlock(&msg_ctx->mux); +out_unlock_daemon: + daemon->flags &= ~ECRYPTFS_DAEMON_IN_READ; + mutex_unlock(&daemon->mux); + return rc; +} + +/** + * ecryptfs_miscdev_helo + * @euid: effective user id of miscdevess sending helo packet + * @user_ns: The namespace in which @euid applies + * @pid: miscdevess id of miscdevess sending helo packet + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) +{ + int rc; + + rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns, + pid); + if (rc) + printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); + return rc; +} + +/** + * ecryptfs_miscdev_quit + * @euid: effective user id of miscdevess sending quit packet + * @user_ns: The namespace in which @euid applies + * @pid: miscdevess id of miscdevess sending quit packet + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns, + struct pid *pid) +{ + int rc; + + rc = ecryptfs_process_quit(euid, user_ns, pid); + if (rc) + printk(KERN_WARNING + "Error processing QUIT message; rc = [%d]\n", rc); + return rc; +} + +/** + * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon + * @data: Bytes comprising struct ecryptfs_message + * @data_size: sizeof(struct ecryptfs_message) + data len + * @euid: Effective user id of miscdevess sending the miscdev response + * @user_ns: The namespace in which @euid applies + * @pid: Miscdevess id of miscdevess sending the miscdev response + * @seq: Sequence number for miscdev response packet + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_miscdev_response(char *data, size_t data_size, + uid_t euid, struct user_namespace *user_ns, + struct pid *pid, u32 seq) +{ + struct ecryptfs_message *msg = (struct ecryptfs_message *)data; + int rc; + + if ((sizeof(*msg) + msg->data_len) != data_size) { + printk(KERN_WARNING "%s: (sizeof(*msg) + msg->data_len) = " + "[%Zd]; data_size = [%Zd]. Invalid packet.\n", __func__, + (sizeof(*msg) + msg->data_len), data_size); + rc = -EINVAL; + goto out; + } + rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq); + if (rc) + printk(KERN_ERR + "Error processing response message; rc = [%d]\n", rc); +out: + return rc; +} + +/** + * ecryptfs_miscdev_write - handle write to daemon miscdev handle + * @file: File for misc dev handle (ignored) + * @buf: Buffer containing user data + * @count: Amount of data in @buf + * @ppos: Pointer to offset in file (ignored) + * + * miscdevfs packet format: + * Octet 0: Type + * Octets 1-4: network byte order msg_ctx->counter (0's for non-response) + * Octets 5-N0: Size of struct ecryptfs_message to follow + * Octets N0-N1: struct ecryptfs_message (including data) + * + * Returns the number of bytes read from @buf + */ +static ssize_t +ecryptfs_miscdev_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + u32 counter_nbo, seq; + size_t packet_size, packet_size_length, i; + ssize_t sz = 0; + char *data; + int rc; + + if (count == 0) + goto out; + data = kmalloc(count, GFP_KERNEL); + if (!data) { + printk(KERN_ERR "%s: Out of memory whilst attempting to " + "kmalloc([%Zd], GFP_KERNEL)\n", __func__, count); + goto out; + } + rc = copy_from_user(data, buf, count); + if (rc) { + printk(KERN_ERR "%s: copy_from_user returned error [%d]\n", + __func__, rc); + goto out_free; + } + sz = count; + i = 0; + switch (data[i++]) { + case ECRYPTFS_MSG_RESPONSE: + if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) { + printk(KERN_WARNING "%s: Minimum acceptable packet " + "size is [%Zd], but amount of data written is " + "only [%Zd]. Discarding response packet.\n", + __func__, + (1 + 4 + 1 + sizeof(struct ecryptfs_message)), + count); + goto out_free; + } + memcpy((char *)&counter_nbo, &data[i], 4); + seq = be32_to_cpu(counter_nbo); + i += 4; + rc = ecryptfs_parse_packet_length(&data[i], &packet_size, + &packet_size_length); + if (rc) { + printk(KERN_WARNING "%s: Error parsing packet length; " + "rc = [%d]\n", __func__, rc); + goto out_free; + } + i += packet_size_length; + if ((1 + 4 + packet_size_length + packet_size) != count) { + printk(KERN_WARNING "%s: (1 + packet_size_length([%Zd])" + " + packet_size([%Zd]))([%Zd]) != " + "count([%Zd]). Invalid packet format.\n", + __func__, packet_size_length, packet_size, + (1 + packet_size_length + packet_size), count); + goto out_free; + } + rc = ecryptfs_miscdev_response(&data[i], packet_size, + current->euid, + current->nsproxy->user_ns, + task_pid(current), seq); + if (rc) + printk(KERN_WARNING "%s: Failed to deliver miscdev " + "response to requesting operation; rc = [%d]\n", + __func__, rc); + break; + case ECRYPTFS_MSG_HELO: + rc = ecryptfs_miscdev_helo(current->euid, + current->nsproxy->user_ns, + task_pid(current)); + if (rc) { + printk(KERN_ERR "%s: Error attempting to process " + "helo from pid [0x%p]; rc = [%d]\n", __func__, + task_pid(current), rc); + goto out_free; + } + break; + case ECRYPTFS_MSG_QUIT: + rc = ecryptfs_miscdev_quit(current->euid, + current->nsproxy->user_ns, + task_pid(current)); + if (rc) { + printk(KERN_ERR "%s: Error attempting to process " + "quit from pid [0x%p]; rc = [%d]\n", __func__, + task_pid(current), rc); + goto out_free; + } + break; + default: + ecryptfs_printk(KERN_WARNING, "Dropping miscdev " + "message of unrecognized type [%d]\n", + data[0]); + break; + } +out_free: + kfree(data); +out: + return sz; +} + + +static const struct file_operations ecryptfs_miscdev_fops = { + .open = ecryptfs_miscdev_open, + .poll = ecryptfs_miscdev_poll, + .read = ecryptfs_miscdev_read, + .write = ecryptfs_miscdev_write, + .release = ecryptfs_miscdev_release, +}; + +static struct miscdevice ecryptfs_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "ecryptfs", + .fops = &ecryptfs_miscdev_fops +}; + +/** + * ecryptfs_init_ecryptfs_miscdev + * + * Messages sent to the userspace daemon from the kernel are placed on + * a queue associated with the daemon. The next read against the + * miscdev handle by that daemon will return the oldest message placed + * on the message queue for the daemon. + * + * Returns zero on success; non-zero otherwise + */ +int ecryptfs_init_ecryptfs_miscdev(void) +{ + int rc; + + atomic_set(&ecryptfs_num_miscdev_opens, 0); + mutex_lock(&ecryptfs_daemon_hash_mux); + rc = misc_register(&ecryptfs_miscdev); + if (rc) + printk(KERN_ERR "%s: Failed to register miscellaneous device " + "for communications with userspace daemons; rc = [%d]\n", + __func__, rc); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return rc; +} + +/** + * ecryptfs_destroy_ecryptfs_miscdev + * + * All of the daemons must be exorcised prior to calling this + * function. + */ +void ecryptfs_destroy_ecryptfs_miscdev(void) +{ + BUG_ON(atomic_read(&ecryptfs_num_miscdev_opens) != 0); + misc_deregister(&ecryptfs_miscdev); +} diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 6df1debdccc..2b6fe1e6e8b 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -153,7 +153,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, flush_dcache_page(page); if (rc) { printk(KERN_ERR "%s: Error reading xattr " - "region; rc = [%d]\n", __FUNCTION__, rc); + "region; rc = [%d]\n", __func__, rc); goto out; } } else { @@ -169,7 +169,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, if (rc) { printk(KERN_ERR "%s: Error attempting to read " "extent at offset [%lld] in the lower " - "file; rc = [%d]\n", __FUNCTION__, + "file; rc = [%d]\n", __func__, lower_offset, rc); goto out; } @@ -212,7 +212,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page) "the encrypted content from the lower " "file whilst inserting the metadata " "from the xattr into the header; rc = " - "[%d]\n", __FUNCTION__, rc); + "[%d]\n", __func__, rc); goto out; } @@ -293,7 +293,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error attemping to read " "lower page segment; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); ClearPageUptodate(page); goto out; } else @@ -308,7 +308,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, "from the lower file whilst " "inserting the metadata from " "the xattr into the header; rc " - "= [%d]\n", __FUNCTION__, rc); + "= [%d]\n", __func__, rc); ClearPageUptodate(page); goto out; } @@ -320,7 +320,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error reading " "page; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); ClearPageUptodate(page); goto out; } @@ -331,7 +331,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error decrypting page " "at index [%ld]; rc = [%d]\n", - __FUNCTION__, page->index, rc); + __func__, page->index, rc); ClearPageUptodate(page); goto out; } @@ -348,7 +348,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, if (rc) { printk(KERN_ERR "%s: Error on attempt to " "truncate to (higher) offset [%lld];" - " rc = [%d]\n", __FUNCTION__, + " rc = [%d]\n", __func__, prev_page_end_size, rc); goto out; } @@ -389,7 +389,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) kfree(file_size_virt); if (rc) printk(KERN_ERR "%s: Error writing file size to header; " - "rc = [%d]\n", __FUNCTION__, rc); + "rc = [%d]\n", __func__, rc); out: return rc; } diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c index f638a698dc5..e0abad62b39 100644 --- a/fs/ecryptfs/netlink.c +++ b/fs/ecryptfs/netlink.c @@ -44,8 +44,8 @@ static struct sock *ecryptfs_nl_sock; * upon sending the message; non-zero upon error. */ int ecryptfs_send_netlink(char *data, int data_len, - struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type, - u16 msg_flags, pid_t daemon_pid) + struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, + u16 msg_flags, struct pid *daemon_pid) { struct sk_buff *skb; struct nlmsghdr *nlh; @@ -60,7 +60,7 @@ int ecryptfs_send_netlink(char *data, int data_len, ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); goto out; } - nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0, + nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0, msg_type, payload_len); nlh->nlmsg_flags = msg_flags; if (msg_ctx && payload_len) { @@ -69,7 +69,7 @@ int ecryptfs_send_netlink(char *data, int data_len, msg->data_len = data_len; memcpy(msg->data, data, data_len); } - rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0); + rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0); if (rc < 0) { ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " "message; rc = [%d]\n", rc); @@ -99,6 +99,7 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); struct ecryptfs_message *msg = NLMSG_DATA(nlh); + struct pid *pid; int rc; if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { @@ -107,8 +108,10 @@ static int ecryptfs_process_nl_response(struct sk_buff *skb) "incorrectly specified data length\n"); goto out; } - rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, - NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq); + pid = find_get_pid(NETLINK_CREDS(skb)->pid); + rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL, + pid, nlh->nlmsg_seq); + put_pid(pid); if (rc) printk(KERN_ERR "Error processing response message; rc = [%d]\n", rc); @@ -126,11 +129,13 @@ out: */ static int ecryptfs_process_nl_helo(struct sk_buff *skb) { + struct pid *pid; int rc; + pid = find_get_pid(NETLINK_CREDS(skb)->pid); rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, - NETLINK_CREDS(skb)->uid, - NETLINK_CREDS(skb)->pid); + NETLINK_CREDS(skb)->uid, NULL, pid); + put_pid(pid); if (rc) printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); return rc; @@ -147,10 +152,12 @@ static int ecryptfs_process_nl_helo(struct sk_buff *skb) */ static int ecryptfs_process_nl_quit(struct sk_buff *skb) { + struct pid *pid; int rc; - rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, - NETLINK_CREDS(skb)->pid); + pid = find_get_pid(NETLINK_CREDS(skb)->pid); + rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid); + put_pid(pid); if (rc) printk(KERN_WARNING "Error processing QUIT message; rc = [%d]\n", rc); @@ -176,20 +183,20 @@ static void ecryptfs_receive_nl_message(struct sk_buff *skb) goto free; } switch (nlh->nlmsg_type) { - case ECRYPTFS_NLMSG_RESPONSE: + case ECRYPTFS_MSG_RESPONSE: if (ecryptfs_process_nl_response(skb)) { ecryptfs_printk(KERN_WARNING, "Failed to " "deliver netlink response to " "requesting operation\n"); } break; - case ECRYPTFS_NLMSG_HELO: + case ECRYPTFS_MSG_HELO: if (ecryptfs_process_nl_helo(skb)) { ecryptfs_printk(KERN_WARNING, "Failed to " "fulfill HELO request\n"); } break; - case ECRYPTFS_NLMSG_QUIT: + case ECRYPTFS_MSG_QUIT: if (ecryptfs_process_nl_quit(skb)) { ecryptfs_printk(KERN_WARNING, "Failed to " "fulfill QUIT request\n"); diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 0c4928623bb..ebf55150be5 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -55,7 +55,7 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, set_fs(fs_save); if (octets_written < 0) { printk(KERN_ERR "%s: octets_written = [%td]; " - "expected [%td]\n", __FUNCTION__, octets_written, size); + "expected [%td]\n", __func__, octets_written, size); rc = -EINVAL; } mutex_unlock(&inode_info->lower_file_mutex); @@ -153,7 +153,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, rc = PTR_ERR(ecryptfs_page); printk(KERN_ERR "%s: Error getting page at " "index [%ld] from eCryptfs inode " - "mapping; rc = [%d]\n", __FUNCTION__, + "mapping; rc = [%d]\n", __func__, ecryptfs_page_idx, rc); goto out; } @@ -165,7 +165,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, if (rc) { printk(KERN_ERR "%s: Error decrypting " "page; rc = [%d]\n", - __FUNCTION__, rc); + __func__, rc); ClearPageUptodate(ecryptfs_page); page_cache_release(ecryptfs_page); goto out; @@ -202,7 +202,7 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, page_cache_release(ecryptfs_page); if (rc) { printk(KERN_ERR "%s: Error encrypting " - "page; rc = [%d]\n", __FUNCTION__, rc); + "page; rc = [%d]\n", __func__, rc); goto out; } pos += num_bytes; @@ -254,7 +254,7 @@ int ecryptfs_read_lower(char *data, loff_t offset, size_t size, set_fs(fs_save); if (octets_read < 0) { printk(KERN_ERR "%s: octets_read = [%td]; " - "expected [%td]\n", __FUNCTION__, octets_read, size); + "expected [%td]\n", __func__, octets_read, size); rc = -EINVAL; } mutex_unlock(&inode_info->lower_file_mutex); @@ -327,7 +327,7 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, printk(KERN_ERR "%s: Attempt to read data past the end of the " "file; offset = [%lld]; size = [%td]; " "ecryptfs_file_size = [%lld]\n", - __FUNCTION__, offset, size, ecryptfs_file_size); + __func__, offset, size, ecryptfs_file_size); goto out; } pos = offset; @@ -345,14 +345,14 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, rc = PTR_ERR(ecryptfs_page); printk(KERN_ERR "%s: Error getting page at " "index [%ld] from eCryptfs inode " - "mapping; rc = [%d]\n", __FUNCTION__, + "mapping; rc = [%d]\n", __func__, ecryptfs_page_idx, rc); goto out; } rc = ecryptfs_decrypt_page(ecryptfs_page); if (rc) { printk(KERN_ERR "%s: Error decrypting " - "page; rc = [%d]\n", __FUNCTION__, rc); + "page; rc = [%d]\n", __func__, rc); ClearPageUptodate(ecryptfs_page); page_cache_release(ecryptfs_page); goto out; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a415f42d32c..0d237182d72 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -257,25 +257,6 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1, (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } -/* Special initialization for the RB tree node to detect linkage */ -static inline void ep_rb_initnode(struct rb_node *n) -{ - rb_set_parent(n, n); -} - -/* Removes a node from the RB tree and marks it for a fast is-linked check */ -static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) -{ - rb_erase(n, r); - rb_set_parent(n, n); -} - -/* Fast check to verify that the item is linked to the main RB tree */ -static inline int ep_rb_linked(struct rb_node *n) -{ - return rb_parent(n) != n; -} - /* Tells us if the item is currently linked */ static inline int ep_is_linked(struct list_head *p) { @@ -283,13 +264,13 @@ static inline int ep_is_linked(struct list_head *p) } /* Get the "struct epitem" from a wait queue pointer */ -static inline struct epitem * ep_item_from_wait(wait_queue_t *p) +static inline struct epitem *ep_item_from_wait(wait_queue_t *p) { return container_of(p, struct eppoll_entry, wait)->base; } /* Get the "struct epitem" from an epoll queue wrapper */ -static inline struct epitem * ep_item_from_epqueue(poll_table *p) +static inline struct epitem *ep_item_from_epqueue(poll_table *p) { return container_of(p, struct ep_pqueue, pt)->epi; } @@ -411,8 +392,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_init(&epi->fllink); spin_unlock(&file->f_ep_lock); - if (ep_rb_linked(&epi->rbn)) - ep_rb_erase(&epi->rbn, &ep->rbr); + rb_erase(&epi->rbn, &ep->rbr); spin_lock_irqsave(&ep->lock, flags); if (ep_is_linked(&epi->rdllink)) @@ -728,7 +708,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, goto error_return; /* Item initialization follow here ... */ - ep_rb_initnode(&epi->rbn); INIT_LIST_HEAD(&epi->rdllink); INIT_LIST_HEAD(&epi->fllink); INIT_LIST_HEAD(&epi->pwqlist); diff --git a/fs/exec.c b/fs/exec.c index b152029f18f..a13883903ee 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -735,6 +735,7 @@ static int exec_mmap(struct mm_struct *mm) tsk->active_mm = mm; activate_mm(active_mm, mm); task_unlock(tsk); + mm_update_next_owner(mm); arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); @@ -963,6 +964,8 @@ int flush_old_exec(struct linux_binprm * bprm) if (retval) goto out; + set_mm_exe_file(bprm->mm, bprm->file); + /* * Release all of the old mmap stuff */ @@ -1268,7 +1271,6 @@ int do_execve(char * filename, { struct linux_binprm *bprm; struct file *file; - unsigned long env_p; struct files_struct *displaced; int retval; @@ -1321,11 +1323,9 @@ int do_execve(char * filename, if (retval < 0) goto out; - env_p = bprm->p; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out; - bprm->argv_len = env_p - bprm->p; retval = search_binary_handler(bprm,regs); if (retval >= 0) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ef97f19c2f9..9d57695de74 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2449,17 +2449,10 @@ static void ext4_mb_history_init(struct super_block *sb) int i; if (sbi->s_mb_proc != NULL) { - struct proc_dir_entry *p; - p = create_proc_entry("mb_history", S_IRUGO, sbi->s_mb_proc); - if (p) { - p->proc_fops = &ext4_mb_seq_history_fops; - p->data = sb; - } - p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc); - if (p) { - p->proc_fops = &ext4_mb_seq_groups_fops; - p->data = sb; - } + proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, + &ext4_mb_seq_history_fops, sb); + proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, + &ext4_mb_seq_groups_fops, sb); } sbi->s_mb_history_max = 1000; @@ -2867,7 +2860,6 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb) mb_debug("freed %u blocks in %u structures\n", count, count2); } -#define EXT4_ROOT "ext4" #define EXT4_MB_STATS_NAME "stats" #define EXT4_MB_MAX_TO_SCAN_NAME "max_to_scan" #define EXT4_MB_MIN_TO_SCAN_NAME "min_to_scan" @@ -3007,9 +2999,9 @@ int __init init_ext4_mballoc(void) return -ENOMEM; } #ifdef CONFIG_PROC_FS - proc_root_ext4 = proc_mkdir(EXT4_ROOT, proc_root_fs); + proc_root_ext4 = proc_mkdir("fs/ext4", NULL); if (proc_root_ext4 == NULL) - printk(KERN_ERR "EXT4-fs: Unable to create %s\n", EXT4_ROOT); + printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); #endif return 0; } @@ -3020,7 +3012,7 @@ void exit_ext4_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); #ifdef CONFIG_PROC_FS - remove_proc_entry(EXT4_ROOT, proc_root_fs); + remove_proc_entry("fs/ext4", NULL); #endif } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5f522a55b59..4e0a3dd9d67 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1222,8 +1222,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, brelse(bh); goto out_invalid; } - logical_sector_size = - le16_to_cpu(get_unaligned((__le16 *)&b->sector_size)); + logical_sector_size = get_unaligned_le16(&b->sector_size); if (!is_power_of_2(logical_sector_size) || (logical_sector_size < 512) || (logical_sector_size > 4096)) { @@ -1322,8 +1321,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1; sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length; - sbi->dir_entries = - le16_to_cpu(get_unaligned((__le16 *)&b->dir_entries)); + sbi->dir_entries = get_unaligned_le16(&b->dir_entries); if (sbi->dir_entries & (sbi->dir_per_block - 1)) { if (!silent) printk(KERN_ERR "FAT: bogus directroy-entries per block" @@ -1335,7 +1333,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, rootdir_sectors = sbi->dir_entries * sizeof(struct msdos_dir_entry) / sb->s_blocksize; sbi->data_start = sbi->dir_start + rootdir_sectors; - total_sectors = le16_to_cpu(get_unaligned((__le16 *)&b->sectors)); + total_sectors = get_unaligned_le16(&b->sectors); if (total_sectors == 0) total_sectors = le32_to_cpu(b->total_sect); diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index 2b46064f66b..50ab5eecb99 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h @@ -50,7 +50,11 @@ extern daddr_t vxfs_bmap1(struct inode *, long); /* vxfs_fshead.c */ extern int vxfs_read_fshead(struct super_block *); +/* vxfs_immed.c */ +extern const struct inode_operations vxfs_immed_symlink_iops; + /* vxfs_inode.c */ +extern const struct address_space_operations vxfs_immed_aops; extern struct kmem_cache *vxfs_inode_cachep; extern void vxfs_dumpi(struct vxfs_inode_info *, ino_t); extern struct inode * vxfs_get_fake_inode(struct super_block *, @@ -69,6 +73,7 @@ extern const struct file_operations vxfs_dir_operations; extern int vxfs_read_olt(struct super_block *, u_long); /* vxfs_subr.c */ +extern const struct address_space_operations vxfs_aops; extern struct page * vxfs_get_page(struct address_space *, u_long); extern void vxfs_put_page(struct page *); extern struct buffer_head * vxfs_bread(struct inode *, int); diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index 8a5959a61ba..c36aeaf92e4 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -35,6 +35,7 @@ #include <linux/namei.h> #include "vxfs.h" +#include "vxfs_extern.h" #include "vxfs_inode.h" diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index ad88d2364bc..9f3f2ceb73f 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -41,11 +41,6 @@ #include "vxfs_extern.h" -extern const struct address_space_operations vxfs_aops; -extern const struct address_space_operations vxfs_immed_aops; - -extern const struct inode_operations vxfs_immed_symlink_iops; - struct kmem_cache *vxfs_inode_cachep; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 06557679ca4..ae45f77765c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -25,6 +25,45 @@ #include <linux/buffer_head.h> #include "internal.h" + +/** + * writeback_acquire - attempt to get exclusive writeback access to a device + * @bdi: the device's backing_dev_info structure + * + * It is a waste of resources to have more than one pdflush thread blocked on + * a single request queue. Exclusion at the request_queue level is obtained + * via a flag in the request_queue's backing_dev_info.state. + * + * Non-request_queue-backed address_spaces will share default_backing_dev_info, + * unless they implement their own. Which is somewhat inefficient, as this + * may prevent concurrent writeback against multiple devices. + */ +static int writeback_acquire(struct backing_dev_info *bdi) +{ + return !test_and_set_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_in_progress - determine whether there is writeback in progress + * @bdi: the device's backing_dev_info structure. + * + * Determine whether there is writeback in progress against a backing device. + */ +int writeback_in_progress(struct backing_dev_info *bdi) +{ + return test_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_release - relinquish exclusive writeback access against a device. + * @bdi: the device's backing_dev_info structure + */ +static void writeback_release(struct backing_dev_info *bdi) +{ + BUG_ON(!writeback_in_progress(bdi)); + clear_bit(BDI_pdflush, &bdi->state); +} + /** * __mark_inode_dirty - internal function * @inode: inode to mark @@ -747,43 +786,4 @@ int generic_osync_inode(struct inode *inode, struct address_space *mapping, int return err; } - EXPORT_SYMBOL(generic_osync_inode); - -/** - * writeback_acquire - attempt to get exclusive writeback access to a device - * @bdi: the device's backing_dev_info structure - * - * It is a waste of resources to have more than one pdflush thread blocked on - * a single request queue. Exclusion at the request_queue level is obtained - * via a flag in the request_queue's backing_dev_info.state. - * - * Non-request_queue-backed address_spaces will share default_backing_dev_info, - * unless they implement their own. Which is somewhat inefficient, as this - * may prevent concurrent writeback against multiple devices. - */ -int writeback_acquire(struct backing_dev_info *bdi) -{ - return !test_and_set_bit(BDI_pdflush, &bdi->state); -} - -/** - * writeback_in_progress - determine whether there is writeback in progress - * @bdi: the device's backing_dev_info structure. - * - * Determine whether there is writeback in progress against a backing device. - */ -int writeback_in_progress(struct backing_dev_info *bdi) -{ - return test_bit(BDI_pdflush, &bdi->state); -} - -/** - * writeback_release - relinquish exclusive writeback access against a device. - * @bdi: the device's backing_dev_info structure - */ -void writeback_release(struct backing_dev_info *bdi) -{ - BUG_ON(!writeback_in_progress(bdi)); - clear_bit(BDI_pdflush, &bdi->state); -} diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 32de44ed002..8cf67974adf 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -297,7 +297,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) return 0; } p = match_strdup(&args[0]); - hsb->nls_disk = load_nls(p); + if (p) + hsb->nls_disk = load_nls(p); if (!hsb->nls_disk) { printk(KERN_ERR "hfs: unable to load codepage \"%s\"\n", p); kfree(p); @@ -311,7 +312,8 @@ static int parse_options(char *options, struct hfs_sb_info *hsb) return 0; } p = match_strdup(&args[0]); - hsb->nls_io = load_nls(p); + if (p) + hsb->nls_io = load_nls(p); if (!hsb->nls_io) { printk(KERN_ERR "hfs: unable to load iocharset \"%s\"\n", p); kfree(p); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index d72d0a8b25a..9e59537b43d 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -311,6 +311,10 @@ int hfsplus_delete_cat(u32, struct inode *, struct qstr *); int hfsplus_rename_cat(u32, struct inode *, struct qstr *, struct inode *, struct qstr *); +/* dir.c */ +extern const struct inode_operations hfsplus_dir_inode_operations; +extern const struct file_operations hfsplus_dir_operations; + /* extents.c */ int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); void hfsplus_ext_write_extent(struct inode *); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 37744cf3706..d53b2af91c2 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -278,9 +278,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) return 0; } -extern const struct inode_operations hfsplus_dir_inode_operations; -extern struct file_operations hfsplus_dir_operations; - static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .truncate = hfsplus_file_truncate, diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index dc64fac0083..9997cbf8beb 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -132,7 +132,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) return 0; } p = match_strdup(&args[0]); - sbi->nls = load_nls(p); + if (p) + sbi->nls = load_nls(p); if (!sbi->nls) { printk(KERN_ERR "hfs: unable to load nls mapping \"%s\"\n", p); kfree(p); diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 72cab78f050..175d08eacc8 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -47,7 +47,7 @@ static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) return 0; wd->ablk_start = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART)); - extent = be32_to_cpu(get_unaligned((__be32 *)(bufptr + HFSP_WRAPOFF_EMBEDEXT))); + extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT); wd->embed_start = (extent >> 16) & 0xFFFF; wd->embed_count = extent & 0xFFFF; diff --git a/fs/inode.c b/fs/inode.c index 27ee1af50d0..bf647813042 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -495,8 +495,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea struct inode * inode = NULL; repeat: - hlist_for_each (node, head) { - inode = hlist_entry(node, struct inode, i_hash); + hlist_for_each_entry(inode, node, head, i_hash) { if (inode->i_sb != sb) continue; if (!test(inode, data)) @@ -520,8 +519,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head struct inode * inode = NULL; repeat: - hlist_for_each (node, head) { - inode = hlist_entry(node, struct inode, i_hash); + hlist_for_each_entry(inode, node, head, i_hash) { if (inode->i_ino != ino) continue; if (inode->i_sb != sb) diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 7b94a1e3c01..6676c06bb7c 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -598,7 +598,7 @@ asmlinkage long sys_inotify_init(void) } ih = inotify_init(&inotify_user_ops); - if (unlikely(IS_ERR(ih))) { + if (IS_ERR(ih)) { ret = PTR_ERR(ih); goto out_free_dev; } diff --git a/fs/ioctl.c b/fs/ioctl.c index f32fbde2175..7db32b3382d 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -28,8 +28,8 @@ * * Returns 0 on success, -errno on error. */ -long vfs_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) +static long vfs_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { int error = -ENOTTY; diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index d1bdf8adb35..ccbf72faf27 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -78,29 +78,29 @@ static inline int isonum_712(char *p) } static inline unsigned int isonum_721(char *p) { - return le16_to_cpu(get_unaligned((__le16 *)p)); + return get_unaligned_le16(p); } static inline unsigned int isonum_722(char *p) { - return be16_to_cpu(get_unaligned((__le16 *)p)); + return get_unaligned_be16(p); } static inline unsigned int isonum_723(char *p) { /* Ignore bigendian datum due to broken mastering programs */ - return le16_to_cpu(get_unaligned((__le16 *)p)); + return get_unaligned_le16(p); } static inline unsigned int isonum_731(char *p) { - return le32_to_cpu(get_unaligned((__le32 *)p)); + return get_unaligned_le32(p); } static inline unsigned int isonum_732(char *p) { - return be32_to_cpu(get_unaligned((__le32 *)p)); + return get_unaligned_be32(p); } static inline unsigned int isonum_733(char *p) { /* Ignore bigendian datum due to broken mastering programs */ - return le32_to_cpu(get_unaligned((__le32 *)p)); + return get_unaligned_le32(p); } extern int iso_date(char *, int); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 954cff001df..eb7eb6c27bc 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -904,19 +904,10 @@ static void jbd2_stats_proc_init(journal_t *journal) snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); if (journal->j_proc_entry) { - struct proc_dir_entry *p; - p = create_proc_entry("history", S_IRUGO, - journal->j_proc_entry); - if (p) { - p->proc_fops = &jbd2_seq_history_fops; - p->data = journal; - p = create_proc_entry("info", S_IRUGO, - journal->j_proc_entry); - if (p) { - p->proc_fops = &jbd2_seq_info_fops; - p->data = journal; - } - } + proc_create_data("history", S_IRUGO, journal->j_proc_entry, + &jbd2_seq_history_fops, journal); + proc_create_data("info", S_IRUGO, journal->j_proc_entry, + &jbd2_seq_info_fops, journal); } } diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 887f5759e53..bf6ab19b86e 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -89,7 +89,7 @@ void jfs_proc_init(void) { int i; - if (!(base = proc_mkdir("jfs", proc_root_fs))) + if (!(base = proc_mkdir("fs/jfs", NULL))) return; base->owner = THIS_MODULE; @@ -109,7 +109,7 @@ void jfs_proc_clean(void) if (base) { for (i = 0; i < NPROCENT; i++) remove_proc_entry(Entries[i].name, base); - remove_proc_entry("jfs", proc_root_fs); + remove_proc_entry("fs/jfs", NULL); } } diff --git a/fs/namei.c b/fs/namei.c index e179f71bfcb..32fd9655485 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -30,6 +30,7 @@ #include <linux/capability.h> #include <linux/file.h> #include <linux/fcntl.h> +#include <linux/device_cgroup.h> #include <asm/namei.h> #include <asm/uaccess.h> @@ -281,6 +282,10 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) if (retval) return retval; + retval = devcgroup_inode_permission(inode, mask); + if (retval) + return retval; + return security_inode_permission(inode, mask, nd); } @@ -2028,6 +2033,10 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) if (!dir->i_op || !dir->i_op->mknod) return -EPERM; + error = devcgroup_inode_mknod(mode, dev); + if (error) + return error; + error = security_inode_mknod(dir, dentry, mode, dev); if (error) return error; diff --git a/fs/namespace.c b/fs/namespace.c index fe376805cf5..061e5edb4d2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1176,17 +1176,6 @@ static int mount_is_safe(struct nameidata *nd) #endif } -static int lives_below_in_same_fs(struct dentry *d, struct dentry *dentry) -{ - while (1) { - if (d == dentry) - return 1; - if (d == NULL || d == d->d_parent) - return 0; - d = d->d_parent; - } -} - struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, int flag) { @@ -1203,7 +1192,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, p = mnt; list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { - if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry)) + if (!is_subdir(r->mnt_mountpoint, dentry)) continue; for (s = r; s; s = next_mnt(s, r)) { diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index df6d60bdfcd..97645f11211 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -102,48 +102,47 @@ static inline void ncp_init_request_s(struct ncp_server *server, int subfunction } static inline char * - ncp_reply_data(struct ncp_server *server, int offset) +ncp_reply_data(struct ncp_server *server, int offset) { return &(server->packet[sizeof(struct ncp_reply_header) + offset]); } -static inline __u8 BVAL(void* data) +static inline u8 BVAL(void *data) { - return get_unaligned((__u8*)data); + return *(u8 *)data; } -static __u8 - ncp_reply_byte(struct ncp_server *server, int offset) +static u8 ncp_reply_byte(struct ncp_server *server, int offset) { - return get_unaligned((__u8 *) ncp_reply_data(server, offset)); + return *(u8 *)ncp_reply_data(server, offset); } -static inline __u16 WVAL_LH(void* data) +static inline u16 WVAL_LH(void *data) { - return le16_to_cpu(get_unaligned((__le16*)data)); + return get_unaligned_le16(data); } -static __u16 - ncp_reply_le16(struct ncp_server *server, int offset) +static u16 +ncp_reply_le16(struct ncp_server *server, int offset) { - return le16_to_cpu(get_unaligned((__le16 *) ncp_reply_data(server, offset))); + return get_unaligned_le16(ncp_reply_data(server, offset)); } -static __u16 - ncp_reply_be16(struct ncp_server *server, int offset) +static u16 +ncp_reply_be16(struct ncp_server *server, int offset) { - return be16_to_cpu(get_unaligned((__be16 *) ncp_reply_data(server, offset))); + return get_unaligned_be16(ncp_reply_data(server, offset)); } -static inline __u32 DVAL_LH(void* data) +static inline u32 DVAL_LH(void *data) { - return le32_to_cpu(get_unaligned((__le32*)data)); + return get_unaligned_le32(data); } static __le32 - ncp_reply_dword(struct ncp_server *server, int offset) +ncp_reply_dword(struct ncp_server *server, int offset) { - return get_unaligned((__le32 *) ncp_reply_data(server, offset)); + return get_unaligned((__le32 *)ncp_reply_data(server, offset)); } static inline __u32 ncp_reply_dword_lh(struct ncp_server* server, int offset) { @@ -1006,8 +1005,8 @@ ncp_read_bounce(struct ncp_server *server, const char *file_id, result = ncp_request2(server, 72, bounce, bufsize); ncp_unlock_server(server); if (!result) { - int len = be16_to_cpu(get_unaligned((__be16*)((char*)bounce + - sizeof(struct ncp_reply_header)))); + int len = get_unaligned_be16((char *)bounce + + sizeof(struct ncp_reply_header)); result = -EIO; if (len <= to_read) { char* source; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f2f3b284e6d..89ac5bb0401 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1321,6 +1321,7 @@ static const struct file_operations nfs_server_list_fops = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; static int nfs_volume_list_open(struct inode *inode, struct file *file); @@ -1341,6 +1342,7 @@ static const struct file_operations nfs_volume_list_fops = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; /* @@ -1500,33 +1502,29 @@ int __init nfs_fs_proc_init(void) { struct proc_dir_entry *p; - proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs); + proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); if (!proc_fs_nfs) goto error_0; proc_fs_nfs->owner = THIS_MODULE; /* a file of servers with which we're dealing */ - p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs); + p = proc_create("servers", S_IFREG|S_IRUGO, + proc_fs_nfs, &nfs_server_list_fops); if (!p) goto error_1; - p->proc_fops = &nfs_server_list_fops; - p->owner = THIS_MODULE; - /* a file of volumes that we have mounted */ - p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs); + p = proc_create("volumes", S_IFREG|S_IRUGO, + proc_fs_nfs, &nfs_volume_list_fops); if (!p) goto error_2; - - p->proc_fops = &nfs_volume_list_fops; - p->owner = THIS_MODULE; return 0; error_2: remove_proc_entry("servers", proc_fs_nfs); error_1: - remove_proc_entry("nfsfs", proc_root_fs); + remove_proc_entry("fs/nfsfs", NULL); error_0: return -ENOMEM; } @@ -1538,7 +1536,7 @@ void nfs_fs_proc_exit(void) { remove_proc_entry("volumes", proc_fs_nfs); remove_proc_entry("servers", proc_fs_nfs); - remove_proc_entry("nfsfs", proc_root_fs); + remove_proc_entry("fs/nfsfs", NULL); } #endif /* CONFIG_PROC_FS */ diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 42f3820ee8f..5ac00c4fee9 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -169,6 +169,7 @@ static const struct file_operations exports_operations = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; /*----------------------------------------------------------------------------*/ @@ -801,10 +802,9 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = create_proc_entry("fs/nfs/exports", 0, NULL); + entry = proc_create("exports", 0, entry, &exports_operations); if (!entry) return -ENOMEM; - entry->proc_fops = &exports_operations; return 0; } #else /* CONFIG_PROC_FS */ diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 2ad5c8b104b..790defb847e 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -1191,7 +1191,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol, if (size) { page = ntfs_map_page(mftbmp_mapping, ofs >> PAGE_CACHE_SHIFT); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to read mft " "bitmap, aborting."); return PTR_ERR(page); @@ -2118,7 +2118,7 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) } /* Read, map, and pin the page containing the mft record. */ page = ntfs_map_page(mft_vi->i_mapping, index); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to map page containing mft record " "to format 0x%llx.", (long long)mft_no); return PTR_ERR(page); @@ -2519,7 +2519,7 @@ mft_rec_already_initialized: ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; /* Read, map, and pin the page containing the mft record. */ page = ntfs_map_page(vol->mft_ino->i_mapping, index); - if (unlikely(IS_ERR(page))) { + if (IS_ERR(page)) { ntfs_error(vol->sb, "Failed to map page containing allocated " "mft record 0x%llx.", (long long)bit); err = PTR_ERR(page); diff --git a/fs/proc/base.c b/fs/proc/base.c index c5e412a00b1..fcf02f2deeb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -195,12 +195,32 @@ static int proc_root_link(struct inode *inode, struct path *path) return result; } -#define MAY_PTRACE(task) \ - (task == current || \ - (task->parent == current && \ - (task->ptrace & PT_PTRACED) && \ - (task_is_stopped_or_traced(task)) && \ - security_ptrace(current,task) == 0)) +/* + * Return zero if current may access user memory in @task, -error if not. + */ +static int check_mem_permission(struct task_struct *task) +{ + /* + * A task can always look at itself, in case it chooses + * to use system calls instead of load instructions. + */ + if (task == current) + return 0; + + /* + * If current is actively ptrace'ing, and would also be + * permitted to freshly attach with ptrace now, permit it. + */ + if (task->parent == current && (task->ptrace & PT_PTRACED) && + task_is_stopped_or_traced(task) && + ptrace_may_attach(task)) + return 0; + + /* + * Noone else is allowed. + */ + return -EPERM; +} struct mm_struct *mm_for_maps(struct task_struct *task) { @@ -722,7 +742,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; ret = -ENOMEM; @@ -748,7 +768,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { + if (!retval || check_mem_permission(task)) { if (!ret) ret = -EIO; break; @@ -792,7 +812,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, if (!task) goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) + if (check_mem_permission(task)) goto out; copied = -ENOMEM; @@ -1181,6 +1201,81 @@ static const struct file_operations proc_pid_sched_operations = { #endif +/* + * We added or removed a vma mapping the executable. The vmas are only mapped + * during exec and are not mapped with the mmap system call. + * Callers must hold down_write() on the mm's mmap_sem for these + */ +void added_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas++; +} + +void removed_exe_file_vma(struct mm_struct *mm) +{ + mm->num_exe_file_vmas--; + if ((mm->num_exe_file_vmas == 0) && mm->exe_file){ + fput(mm->exe_file); + mm->exe_file = NULL; + } + +} + +void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) +{ + if (new_exe_file) + get_file(new_exe_file); + if (mm->exe_file) + fput(mm->exe_file); + mm->exe_file = new_exe_file; + mm->num_exe_file_vmas = 0; +} + +struct file *get_mm_exe_file(struct mm_struct *mm) +{ + struct file *exe_file; + + /* We need mmap_sem to protect against races with removal of + * VM_EXECUTABLE vmas */ + down_read(&mm->mmap_sem); + exe_file = mm->exe_file; + if (exe_file) + get_file(exe_file); + up_read(&mm->mmap_sem); + return exe_file; +} + +void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm) +{ + /* It's safe to write the exe_file pointer without exe_file_lock because + * this is called during fork when the task is not yet in /proc */ + newmm->exe_file = get_mm_exe_file(oldmm); +} + +static int proc_exe_link(struct inode *inode, struct path *exe_path) +{ + struct task_struct *task; + struct mm_struct *mm; + struct file *exe_file; + + task = get_proc_task(inode); + if (!task) + return -ENOENT; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + return -ENOENT; + exe_file = get_mm_exe_file(mm); + mmput(mm); + if (exe_file) { + *exe_path = exe_file->f_path; + path_get(&exe_file->f_path); + fput(exe_file); + return 0; + } else + return -ENOENT; +} + static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a36ad3c75cf..9d53b39a9cf 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -69,12 +69,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes, count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); start = NULL; - if (dp->get_info) { - /* Handle old net routines */ - n = dp->get_info(page, &start, *ppos, count); - if (n < count) - eof = 1; - } else if (dp->read_proc) { + if (dp->read_proc) { /* * How to be a proc read function * ------------------------------ @@ -277,8 +272,11 @@ static int xlate_proc_name(const char *name, int len; int rtn = 0; + de = *ret; + if (!de) + de = &proc_root; + spin_lock(&proc_subdir_lock); - de = &proc_root; while (1) { next = strchr(cp, '/'); if (!next) @@ -385,20 +383,18 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, lock_kernel(); spin_lock(&proc_subdir_lock); - if (de) { - for (de = de->subdir; de ; de = de->next) { - if (de->namelen != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { - unsigned int ino; + for (de = de->subdir; de ; de = de->next) { + if (de->namelen != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { + unsigned int ino; - ino = de->low_ino; - de_get(de); - spin_unlock(&proc_subdir_lock); - error = -EINVAL; - inode = proc_get_inode(dir->i_sb, ino, de); - goto out_unlock; - } + ino = de->low_ino; + de_get(de); + spin_unlock(&proc_subdir_lock); + error = -EINVAL; + inode = proc_get_inode(dir->i_sb, ino, de); + goto out_unlock; } } spin_unlock(&proc_subdir_lock); @@ -410,7 +406,8 @@ out_unlock: d_add(dentry, inode); return NULL; } - de_put(de); + if (de) + de_put(de); return ERR_PTR(error); } @@ -440,10 +437,6 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, lock_kernel(); ino = inode->i_ino; - if (!de) { - ret = -EINVAL; - goto out; - } i = filp->f_pos; switch (i) { case 0: @@ -582,7 +575,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, /* make sure name is valid */ if (!name || !strlen(name)) goto out; - if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) + if (xlate_proc_name(name, parent, &fn) != 0) goto out; /* At this point there must not be any '/' characters beyond *fn */ @@ -682,9 +675,10 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, return ent; } -struct proc_dir_entry *proc_create(const char *name, mode_t mode, - struct proc_dir_entry *parent, - const struct file_operations *proc_fops) +struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops, + void *data) { struct proc_dir_entry *pde; nlink_t nlink; @@ -705,6 +699,7 @@ struct proc_dir_entry *proc_create(const char *name, mode_t mode, if (!pde) goto out; pde->proc_fops = proc_fops; + pde->data = data; if (proc_register(parent, pde) < 0) goto out_free; return pde; @@ -734,55 +729,58 @@ void free_proc_entry(struct proc_dir_entry *de) void remove_proc_entry(const char *name, struct proc_dir_entry *parent) { struct proc_dir_entry **p; - struct proc_dir_entry *de; + struct proc_dir_entry *de = NULL; const char *fn = name; int len; - if (!parent && xlate_proc_name(name, &parent, &fn) != 0) - goto out; + if (xlate_proc_name(name, &parent, &fn) != 0) + return; len = strlen(fn); spin_lock(&proc_subdir_lock); for (p = &parent->subdir; *p; p=&(*p)->next ) { - if (!proc_match(len, fn, *p)) - continue; - de = *p; - *p = de->next; - de->next = NULL; - - spin_lock(&de->pde_unload_lock); - /* - * Stop accepting new callers into module. If you're - * dynamically allocating ->proc_fops, save a pointer somewhere. - */ - de->proc_fops = NULL; - /* Wait until all existing callers into module are done. */ - if (de->pde_users > 0) { - DECLARE_COMPLETION_ONSTACK(c); - - if (!de->pde_unload_completion) - de->pde_unload_completion = &c; - - spin_unlock(&de->pde_unload_lock); - spin_unlock(&proc_subdir_lock); + if (proc_match(len, fn, *p)) { + de = *p; + *p = de->next; + de->next = NULL; + break; + } + } + spin_unlock(&proc_subdir_lock); + if (!de) + return; - wait_for_completion(de->pde_unload_completion); + spin_lock(&de->pde_unload_lock); + /* + * Stop accepting new callers into module. If you're + * dynamically allocating ->proc_fops, save a pointer somewhere. + */ + de->proc_fops = NULL; + /* Wait until all existing callers into module are done. */ + if (de->pde_users > 0) { + DECLARE_COMPLETION_ONSTACK(c); + + if (!de->pde_unload_completion) + de->pde_unload_completion = &c; - spin_lock(&proc_subdir_lock); - goto continue_removing; - } spin_unlock(&de->pde_unload_lock); + wait_for_completion(de->pde_unload_completion); + + goto continue_removing; + } + spin_unlock(&de->pde_unload_lock); + continue_removing: - if (S_ISDIR(de->mode)) - parent->nlink--; - de->nlink = 0; - WARN_ON(de->subdir); - if (atomic_dec_and_test(&de->count)) - free_proc_entry(de); - break; + if (S_ISDIR(de->mode)) + parent->nlink--; + de->nlink = 0; + if (de->subdir) { + printk(KERN_WARNING "%s: removing non-empty directory " + "'%s/%s', leaking at least '%s'\n", __func__, + de->parent->name, de->name, de->subdir->name); + WARN_ON(1); } - spin_unlock(&proc_subdir_lock); -out: - return; + if (atomic_dec_and_test(&de->count)) + free_proc_entry(de); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 82b3a1b5a70..6f4e8dc97da 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -25,8 +25,7 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de) { - if (de) - atomic_inc(&de->count); + atomic_inc(&de->count); return de; } @@ -35,18 +34,16 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de) */ void de_put(struct proc_dir_entry *de) { - if (de) { - lock_kernel(); - if (!atomic_read(&de->count)) { - printk("de_put: entry %s already free!\n", de->name); - unlock_kernel(); - return; - } - - if (atomic_dec_and_test(&de->count)) - free_proc_entry(de); + lock_kernel(); + if (!atomic_read(&de->count)) { + printk("de_put: entry %s already free!\n", de->name); unlock_kernel(); + return; } + + if (atomic_dec_and_test(&de->count)) + free_proc_entry(de); + unlock_kernel(); } /* @@ -392,7 +389,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, { struct inode * inode; - if (de != NULL && !try_module_get(de->owner)) + if (!try_module_get(de->owner)) goto out_mod; inode = iget_locked(sb, ino); @@ -402,30 +399,29 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; PROC_I(inode)->fd = 0; PROC_I(inode)->pde = de; - if (de) { - if (de->mode) { - inode->i_mode = de->mode; - inode->i_uid = de->uid; - inode->i_gid = de->gid; - } - if (de->size) - inode->i_size = de->size; - if (de->nlink) - inode->i_nlink = de->nlink; - if (de->proc_iops) - inode->i_op = de->proc_iops; - if (de->proc_fops) { - if (S_ISREG(inode->i_mode)) { + + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; + inode->i_gid = de->gid; + } + if (de->size) + inode->i_size = de->size; + if (de->nlink) + inode->i_nlink = de->nlink; + if (de->proc_iops) + inode->i_op = de->proc_iops; + if (de->proc_fops) { + if (S_ISREG(inode->i_mode)) { #ifdef CONFIG_COMPAT - if (!de->proc_fops->compat_ioctl) - inode->i_fop = - &proc_reg_file_ops_no_compat; - else + if (!de->proc_fops->compat_ioctl) + inode->i_fop = + &proc_reg_file_ops_no_compat; + else #endif - inode->i_fop = &proc_reg_file_ops; - } else { - inode->i_fop = de->proc_fops; - } + inode->i_fop = &proc_reg_file_ops; + } else { + inode->i_fop = de->proc_fops; } } unlock_new_inode(inode); @@ -433,8 +429,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, return inode; out_ino: - if (de != NULL) - module_put(de->owner); + module_put(de->owner); out_mod: return NULL; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index bc72f5c8c47..28cbca80590 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -11,6 +11,7 @@ #include <linux/proc_fs.h> +extern struct proc_dir_entry proc_root; #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); #else @@ -46,9 +47,6 @@ extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); extern int maps_protect; -extern void create_seq_entry(char *name, mode_t mode, - const struct file_operations *f); -extern int proc_exe_link(struct inode *, struct path *); extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 941e95114b5..79ecd281d2c 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -137,7 +137,7 @@ static const struct file_operations proc_nommu_vma_list_operations = { static int __init proc_nommu_init(void) { - create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations); + proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations); return 0; } diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 441a32f0e5f..48bcf20cec2 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -826,14 +826,6 @@ static struct file_operations proc_kpageflags_operations = { struct proc_dir_entry *proc_root_kcore; -void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) -{ - struct proc_dir_entry *entry; - entry = create_proc_entry(name, mode, NULL); - if (entry) - entry->proc_fops = f; -} - void __init proc_misc_init(void) { static struct { @@ -862,66 +854,52 @@ void __init proc_misc_init(void) /* And now for trickier ones */ #ifdef CONFIG_PRINTK - { - struct proc_dir_entry *entry; - entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); - if (entry) - entry->proc_fops = &proc_kmsg_operations; - } + proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); #endif - create_seq_entry("locks", 0, &proc_locks_operations); - create_seq_entry("devices", 0, &proc_devinfo_operations); - create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); + proc_create("locks", 0, NULL, &proc_locks_operations); + proc_create("devices", 0, NULL, &proc_devinfo_operations); + proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK - create_seq_entry("partitions", 0, &proc_partitions_operations); + proc_create("partitions", 0, NULL, &proc_partitions_operations); #endif - create_seq_entry("stat", 0, &proc_stat_operations); - create_seq_entry("interrupts", 0, &proc_interrupts_operations); + proc_create("stat", 0, NULL, &proc_stat_operations); + proc_create("interrupts", 0, NULL, &proc_interrupts_operations); #ifdef CONFIG_SLABINFO - create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); + proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); #ifdef CONFIG_DEBUG_SLAB_LEAK - create_seq_entry("slab_allocators", 0 ,&proc_slabstats_operations); + proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); #endif #endif #ifdef CONFIG_MMU proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); #endif - create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); - create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops); - create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); - create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations); + proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); + proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); + proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); + proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); #ifdef CONFIG_BLOCK - create_seq_entry("diskstats", 0, &proc_diskstats_operations); + proc_create("diskstats", 0, NULL, &proc_diskstats_operations); #endif #ifdef CONFIG_MODULES - create_seq_entry("modules", 0, &proc_modules_operations); + proc_create("modules", 0, NULL, &proc_modules_operations); #endif #ifdef CONFIG_SCHEDSTATS - create_seq_entry("schedstat", 0, &proc_schedstat_operations); + proc_create("schedstat", 0, NULL, &proc_schedstat_operations); #endif #ifdef CONFIG_PROC_KCORE - proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); - if (proc_root_kcore) { - proc_root_kcore->proc_fops = &proc_kcore_operations; + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); + if (proc_root_kcore) proc_root_kcore->size = (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; - } #endif #ifdef CONFIG_PROC_PAGE_MONITOR - create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); - create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); + proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); + proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); #endif #ifdef CONFIG_PROC_VMCORE - proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); - if (proc_vmcore) - proc_vmcore->proc_fops = &proc_vmcore_operations; + proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); #endif #ifdef CONFIG_MAGIC_SYSRQ - { - struct proc_dir_entry *entry; - entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL); - if (entry) - entry->proc_fops = &proc_sysrq_trigger_operations; - } + proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations); #endif } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 614c34b6d1c..5acc001d49f 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -165,8 +165,8 @@ out: return err; } -static ssize_t proc_sys_read(struct file *filp, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, + size_t count, loff_t *ppos, int write) { struct dentry *dentry = filp->f_dentry; struct ctl_table_header *head; @@ -190,12 +190,12 @@ static ssize_t proc_sys_read(struct file *filp, char __user *buf, * and won't be until we finish. */ error = -EPERM; - if (sysctl_perm(table, MAY_READ)) + if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) goto out; /* careful: calling conventions are nasty here */ res = count; - error = table->proc_handler(table, 0, filp, buf, &res, ppos); + error = table->proc_handler(table, write, filp, buf, &res, ppos); if (!error) error = res; out: @@ -204,44 +204,16 @@ out: return error; } -static ssize_t proc_sys_write(struct file *filp, const char __user *buf, +static ssize_t proc_sys_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct dentry *dentry = filp->f_dentry; - struct ctl_table_header *head; - struct ctl_table *table; - ssize_t error; - size_t res; - - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - /* Has the sysctl entry disappeared on us? */ - error = -ENOENT; - if (!table) - goto out; - - /* Has the sysctl entry been replaced by a directory? */ - error = -EISDIR; - if (!table->proc_handler) - goto out; - - /* - * At this point we know that the sysctl was not unregistered - * and won't be until we finish. - */ - error = -EPERM; - if (sysctl_perm(table, MAY_WRITE)) - goto out; - - /* careful: calling conventions are nasty here */ - res = count; - error = table->proc_handler(table, 1, filp, (char __user *)buf, - &res, ppos); - if (!error) - error = res; -out: - sysctl_head_finish(head); + return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0); +} - return error; +static ssize_t proc_sys_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); } @@ -416,7 +388,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata * goto out; /* Use the permissions on the sysctl table entry */ - error = sysctl_perm(table, mask); + error = sysctl_perm(head->root, table, mask); out: sysctl_head_finish(head); return error; diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index 49816e00b51..ac26ccc25f4 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -5,7 +5,7 @@ */ #include <asm/uaccess.h> - +#include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/time.h> @@ -136,39 +136,54 @@ static const struct file_operations proc_tty_drivers_operations = { .release = seq_release, }; -/* - * This is the handler for /proc/tty/ldiscs - */ -static int tty_ldiscs_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) { - int i; - int len = 0; - off_t begin = 0; + return (*pos < NR_LDISCS) ? pos : NULL; +} + +static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return (*pos < NR_LDISCS) ? pos : NULL; +} + +static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) +{ +} + +static int tty_ldiscs_seq_show(struct seq_file *m, void *v) +{ + int i = *(loff_t *)v; struct tty_ldisc *ld; - for (i=0; i < NR_LDISCS; i++) { - ld = tty_ldisc_get(i); - if (ld == NULL) - continue; - len += sprintf(page+len, "%-10s %2d\n", - ld->name ? ld->name : "???", i); - tty_ldisc_put(i); - if (len+begin > off+count) - break; - if (len+begin < off) { - begin += len; - len = 0; - } - } - if (i >= NR_LDISCS) - *eof = 1; - if (off >= len+begin) + ld = tty_ldisc_get(i); + if (ld == NULL) return 0; - *start = page + (off-begin); - return ((count < begin+len-off) ? count : begin+len-off); + seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i); + tty_ldisc_put(i); + return 0; +} + +static const struct seq_operations tty_ldiscs_seq_ops = { + .start = tty_ldiscs_seq_start, + .next = tty_ldiscs_seq_next, + .stop = tty_ldiscs_seq_stop, + .show = tty_ldiscs_seq_show, +}; + +static int proc_tty_ldiscs_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &tty_ldiscs_seq_ops); } +static const struct file_operations tty_ldiscs_proc_fops = { + .owner = THIS_MODULE, + .open = proc_tty_ldiscs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + /* * This function is called by tty_register_driver() to handle * registering the driver's /proc handler into /proc/tty/driver/<foo> @@ -214,7 +229,6 @@ void proc_tty_unregister_driver(struct tty_driver *driver) */ void __init proc_tty_init(void) { - struct proc_dir_entry *entry; if (!proc_mkdir("tty", NULL)) return; proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); @@ -224,10 +238,7 @@ void __init proc_tty_init(void) * password lengths and inter-keystroke timings during password * entry. */ - proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); - - create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); - entry = create_proc_entry("tty/drivers", 0, NULL); - if (entry) - entry->proc_fops = &proc_tty_drivers_operations; + proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL); + proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops); + proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations); } diff --git a/fs/proc/root.c b/fs/proc/root.c index ef0fb57fc9e..95117538a4f 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -22,8 +22,6 @@ #include "internal.h" -struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; - static int proc_test_super(struct super_block *sb, void *data) { return sb->s_fs_info == data; @@ -126,8 +124,8 @@ void __init proc_root_init(void) #ifdef CONFIG_SYSVIPC proc_mkdir("sysvipc", NULL); #endif - proc_root_fs = proc_mkdir("fs", NULL); - proc_root_driver = proc_mkdir("driver", NULL); + proc_mkdir("fs", NULL); + proc_mkdir("driver", NULL); proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) /* just give it a mountpoint */ @@ -137,7 +135,7 @@ void __init proc_root_init(void) #ifdef CONFIG_PROC_DEVICETREE proc_device_tree_init(); #endif - proc_bus = proc_mkdir("bus", NULL); + proc_mkdir("bus", NULL); proc_sys_init(); } @@ -232,9 +230,5 @@ void pid_ns_release_proc(struct pid_namespace *ns) EXPORT_SYMBOL(proc_symlink); EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); -EXPORT_SYMBOL(proc_create); +EXPORT_SYMBOL(proc_create_data); EXPORT_SYMBOL(remove_proc_entry); -EXPORT_SYMBOL(proc_root); -EXPORT_SYMBOL(proc_root_fs); -EXPORT_SYMBOL(proc_bus); -EXPORT_SYMBOL(proc_root_driver); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7415eeb7cc3..e2b8e769f51 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -75,40 +75,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return mm->total_vm; } -int proc_exe_link(struct inode *inode, struct path *path) -{ - struct vm_area_struct * vma; - int result = -ENOENT; - struct task_struct *task = get_proc_task(inode); - struct mm_struct * mm = NULL; - - if (task) { - mm = get_task_mm(task); - put_task_struct(task); - } - if (!mm) - goto out; - down_read(&mm->mmap_sem); - - vma = mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) - break; - vma = vma->vm_next; - } - - if (vma) { - *path = vma->vm_file->f_path; - path_get(&vma->vm_file->f_path); - result = 0; - } - - up_read(&mm->mmap_sem); - mmput(mm); -out: - return result; -} - static void pad_len_spaces(struct seq_file *m, int len) { len = 25 + sizeof(void*) * 6 - len; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 8011528518b..4b733f10845 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -103,40 +103,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } -int proc_exe_link(struct inode *inode, struct path *path) -{ - struct vm_list_struct *vml; - struct vm_area_struct *vma; - struct task_struct *task = get_proc_task(inode); - struct mm_struct *mm = get_task_mm(task); - int result = -ENOENT; - - if (!mm) - goto out; - down_read(&mm->mmap_sem); - - vml = mm->context.vmlist; - vma = NULL; - while (vml) { - if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) { - vma = vml->vma; - break; - } - vml = vml->next; - } - - if (vma) { - *path = vma->vm_file->f_path; - path_get(&vma->vm_file->f_path); - result = 0; - } - - up_read(&mm->mmap_sem); - mmput(mm); -out: - return result; -} - /* * display mapping lines for a particular process's /proc/pid/maps */ diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index b41a514b097..9590b902430 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -26,6 +26,9 @@ #include <linux/fs.h> #include <linux/mm.h> +#include <linux/ramfs.h> + +#include "internal.h" const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h index af7cc074a47..6b330639b51 100644 --- a/fs/ramfs/internal.h +++ b/fs/ramfs/internal.h @@ -11,5 +11,4 @@ extern const struct address_space_operations ramfs_aops; -extern const struct file_operations ramfs_file_operations; extern const struct inode_operations ramfs_file_inode_operations; diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 8f86c52b30d..b9dbeeca704 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -467,6 +467,7 @@ static const struct file_operations r_file_operations = { .read = seq_read, .llseek = seq_lseek, .release = seq_release, + .owner = THIS_MODULE, }; static struct proc_dir_entry *proc_info_root = NULL; @@ -475,12 +476,8 @@ static const char proc_info_root_name[] = "fs/reiserfs"; static void add_file(struct super_block *sb, char *name, int (*func) (struct seq_file *, struct super_block *)) { - struct proc_dir_entry *de; - de = create_proc_entry(name, 0, REISERFS_SB(sb)->procdir); - if (de) { - de->data = func; - de->proc_fops = &r_file_operations; - } + proc_create_data(name, 0, REISERFS_SB(sb)->procdir, + &r_file_operations, func); } int reiserfs_proc_info_init(struct super_block *sb) diff --git a/fs/splice.c b/fs/splice.c index eeb1a86a701..633f58ebfb7 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1075,7 +1075,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, ret = splice_direct_to_actor(in, &sd, direct_splice_actor); if (ret > 0) - *ppos += ret; + *ppos = sd.pos; return ret; } diff --git a/fs/super.c b/fs/super.c index a5a4aca7e22..453877c5697 100644 --- a/fs/super.c +++ b/fs/super.c @@ -117,7 +117,7 @@ static inline void destroy_super(struct super_block *s) * Drop a superblock's refcount. Returns non-zero if the superblock was * destroyed. The caller must hold sb_lock. */ -int __put_super(struct super_block *sb) +static int __put_super(struct super_block *sb) { int ret = 0; diff --git a/fs/sync.c b/fs/sync.c index 7cd005ea763..228e17b5e9e 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -64,7 +64,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) /* sync the superblock to buffers */ sb = inode->i_sb; lock_super(sb); - if (sb->s_op->write_super) + if (sb->s_dirt && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); diff --git a/fs/timerfd.c b/fs/timerfd.c index 10c80b59ec4..5400524e9cb 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -20,6 +20,7 @@ #include <linux/hrtimer.h> #include <linux/anon_inodes.h> #include <linux/timerfd.h> +#include <linux/syscalls.h> struct timerfd_ctx { struct hrtimer tmr; diff --git a/fs/xattr.c b/fs/xattr.c index 89a942f07e1..4706a8b1f49 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -67,7 +67,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) } int -vfs_setxattr(struct dentry *dentry, char *name, void *value, +vfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; @@ -131,7 +131,7 @@ out_noalloc: EXPORT_SYMBOL_GPL(xattr_getsecurity); ssize_t -vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) +vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; int error; @@ -187,7 +187,7 @@ vfs_listxattr(struct dentry *d, char *list, size_t size) EXPORT_SYMBOL_GPL(vfs_listxattr); int -vfs_removexattr(struct dentry *dentry, char *name) +vfs_removexattr(struct dentry *dentry, const char *name) { struct inode *inode = dentry->d_inode; int error; @@ -218,7 +218,7 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); * Extended attribute SET operations */ static long -setxattr(struct dentry *d, char __user *name, void __user *value, +setxattr(struct dentry *d, const char __user *name, const void __user *value, size_t size, int flags) { int error; @@ -252,8 +252,8 @@ setxattr(struct dentry *d, char __user *name, void __user *value, } asmlinkage long -sys_setxattr(char __user *path, char __user *name, void __user *value, - size_t size, int flags) +sys_setxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags) { struct nameidata nd; int error; @@ -271,8 +271,8 @@ sys_setxattr(char __user *path, char __user *name, void __user *value, } asmlinkage long -sys_lsetxattr(char __user *path, char __user *name, void __user *value, - size_t size, int flags) +sys_lsetxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags) { struct nameidata nd; int error; @@ -290,7 +290,7 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value, } asmlinkage long -sys_fsetxattr(int fd, char __user *name, void __user *value, +sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags) { struct file *f; @@ -315,7 +315,8 @@ sys_fsetxattr(int fd, char __user *name, void __user *value, * Extended attribute GET operations */ static ssize_t -getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) +getxattr(struct dentry *d, const char __user *name, void __user *value, + size_t size) { ssize_t error; void *kvalue = NULL; @@ -349,8 +350,8 @@ getxattr(struct dentry *d, char __user *name, void __user *value, size_t size) } asmlinkage ssize_t -sys_getxattr(char __user *path, char __user *name, void __user *value, - size_t size) +sys_getxattr(const char __user *path, const char __user *name, + void __user *value, size_t size) { struct nameidata nd; ssize_t error; @@ -364,7 +365,7 @@ sys_getxattr(char __user *path, char __user *name, void __user *value, } asmlinkage ssize_t -sys_lgetxattr(char __user *path, char __user *name, void __user *value, +sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, size_t size) { struct nameidata nd; @@ -379,7 +380,7 @@ sys_lgetxattr(char __user *path, char __user *name, void __user *value, } asmlinkage ssize_t -sys_fgetxattr(int fd, char __user *name, void __user *value, size_t size) +sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size) { struct file *f; ssize_t error = -EBADF; @@ -424,7 +425,7 @@ listxattr(struct dentry *d, char __user *list, size_t size) } asmlinkage ssize_t -sys_listxattr(char __user *path, char __user *list, size_t size) +sys_listxattr(const char __user *path, char __user *list, size_t size) { struct nameidata nd; ssize_t error; @@ -438,7 +439,7 @@ sys_listxattr(char __user *path, char __user *list, size_t size) } asmlinkage ssize_t -sys_llistxattr(char __user *path, char __user *list, size_t size) +sys_llistxattr(const char __user *path, char __user *list, size_t size) { struct nameidata nd; ssize_t error; @@ -470,7 +471,7 @@ sys_flistxattr(int fd, char __user *list, size_t size) * Extended attribute REMOVE operations */ static long -removexattr(struct dentry *d, char __user *name) +removexattr(struct dentry *d, const char __user *name) { int error; char kname[XATTR_NAME_MAX + 1]; @@ -485,7 +486,7 @@ removexattr(struct dentry *d, char __user *name) } asmlinkage long -sys_removexattr(char __user *path, char __user *name) +sys_removexattr(const char __user *path, const char __user *name) { struct nameidata nd; int error; @@ -503,7 +504,7 @@ sys_removexattr(char __user *path, char __user *name) } asmlinkage long -sys_lremovexattr(char __user *path, char __user *name) +sys_lremovexattr(const char __user *path, const char __user *name) { struct nameidata nd; int error; @@ -521,7 +522,7 @@ sys_lremovexattr(char __user *path, char __user *name) } asmlinkage long -sys_fremovexattr(int fd, char __user *name) +sys_fremovexattr(int fd, const char __user *name) { struct file *f; struct dentry *dentry; diff --git a/include/asm-alpha/unaligned.h b/include/asm-alpha/unaligned.h index a1d72846f61..3787c60aed3 100644 --- a/include/asm-alpha/unaligned.h +++ b/include/asm-alpha/unaligned.h @@ -1,6 +1,11 @@ -#ifndef __ALPHA_UNALIGNED_H -#define __ALPHA_UNALIGNED_H +#ifndef _ASM_ALPHA_UNALIGNED_H +#define _ASM_ALPHA_UNALIGNED_H -#include <asm-generic/unaligned.h> +#include <linux/unaligned/le_struct.h> +#include <linux/unaligned/be_byteshift.h> +#include <linux/unaligned/generic.h> -#endif +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le + +#endif /* _ASM_ALPHA_UNALIGNED_H */ diff --git a/include/asm-arm/unaligned.h b/include/asm-arm/unaligned.h index 5db03cf3b90..44593a89490 100644 --- a/include/asm-arm/unaligned.h +++ b/include/asm-arm/unaligned.h @@ -1,171 +1,9 @@ -#ifndef __ASM_ARM_UNALIGNED_H -#define __ASM_ARM_UNALIGNED_H +#ifndef _ASM_ARM_UNALIGNED_H +#define _ASM_ARM_UNALIGNED_H -#include <asm/types.h> - -extern int __bug_unaligned_x(const void *ptr); - -/* - * What is the most efficient way of loading/storing an unaligned value? - * - * That is the subject of this file. Efficiency here is defined as - * minimum code size with minimum register usage for the common cases. - * It is currently not believed that long longs are common, so we - * trade efficiency for the chars, shorts and longs against the long - * longs. - * - * Current stats with gcc 2.7.2.2 for these functions: - * - * ptrsize get: code regs put: code regs - * 1 1 1 1 2 - * 2 3 2 3 2 - * 4 7 3 7 3 - * 8 20 6 16 6 - * - * gcc 2.95.1 seems to code differently: - * - * ptrsize get: code regs put: code regs - * 1 1 1 1 2 - * 2 3 2 3 2 - * 4 7 4 7 4 - * 8 19 8 15 6 - * - * which may or may not be more efficient (depending upon whether - * you can afford the extra registers). Hopefully the gcc 2.95 - * is inteligent enough to decide if it is better to use the - * extra register, but evidence so far seems to suggest otherwise. - * - * Unfortunately, gcc is not able to optimise the high word - * out of long long >> 32, or the low word from long long << 32 - */ - -#define __get_unaligned_2_le(__p) \ - (unsigned int)(__p[0] | __p[1] << 8) - -#define __get_unaligned_2_be(__p) \ - (unsigned int)(__p[0] << 8 | __p[1]) - -#define __get_unaligned_4_le(__p) \ - (unsigned int)(__p[0] | __p[1] << 8 | __p[2] << 16 | __p[3] << 24) - -#define __get_unaligned_4_be(__p) \ - (unsigned int)(__p[0] << 24 | __p[1] << 16 | __p[2] << 8 | __p[3]) - -#define __get_unaligned_8_le(__p) \ - ((unsigned long long)__get_unaligned_4_le((__p+4)) << 32 | \ - __get_unaligned_4_le(__p)) - -#define __get_unaligned_8_be(__p) \ - ((unsigned long long)__get_unaligned_4_be(__p) << 32 | \ - __get_unaligned_4_be((__p+4))) - -#define __get_unaligned_le(ptr) \ - ((__force typeof(*(ptr)))({ \ - const __u8 *__p = (const __u8 *)(ptr); \ - __builtin_choose_expr(sizeof(*(ptr)) == 1, *__p, \ - __builtin_choose_expr(sizeof(*(ptr)) == 2, __get_unaligned_2_le(__p), \ - __builtin_choose_expr(sizeof(*(ptr)) == 4, __get_unaligned_4_le(__p), \ - __builtin_choose_expr(sizeof(*(ptr)) == 8, __get_unaligned_8_le(__p), \ - (void)__bug_unaligned_x(__p))))); \ - })) - -#define __get_unaligned_be(ptr) \ - ((__force typeof(*(ptr)))({ \ - const __u8 *__p = (const __u8 *)(ptr); \ - __builtin_choose_expr(sizeof(*(ptr)) == 1, *__p, \ - __builtin_choose_expr(sizeof(*(ptr)) == 2, __get_unaligned_2_be(__p), \ - __builtin_choose_expr(sizeof(*(ptr)) == 4, __get_unaligned_4_be(__p), \ - __builtin_choose_expr(sizeof(*(ptr)) == 8, __get_unaligned_8_be(__p), \ - (void)__bug_unaligned_x(__p))))); \ - })) - - -static inline void __put_unaligned_2_le(__u32 __v, register __u8 *__p) -{ - *__p++ = __v; - *__p++ = __v >> 8; -} - -static inline void __put_unaligned_2_be(__u32 __v, register __u8 *__p) -{ - *__p++ = __v >> 8; - *__p++ = __v; -} - -static inline void __put_unaligned_4_le(__u32 __v, register __u8 *__p) -{ - __put_unaligned_2_le(__v >> 16, __p + 2); - __put_unaligned_2_le(__v, __p); -} - -static inline void __put_unaligned_4_be(__u32 __v, register __u8 *__p) -{ - __put_unaligned_2_be(__v >> 16, __p); - __put_unaligned_2_be(__v, __p + 2); -} - -static inline void __put_unaligned_8_le(const unsigned long long __v, register __u8 *__p) -{ - /* - * tradeoff: 8 bytes of stack for all unaligned puts (2 - * instructions), or an extra register in the long long - * case - go for the extra register. - */ - __put_unaligned_4_le(__v >> 32, __p+4); - __put_unaligned_4_le(__v, __p); -} - -static inline void __put_unaligned_8_be(const unsigned long long __v, register __u8 *__p) -{ - /* - * tradeoff: 8 bytes of stack for all unaligned puts (2 - * instructions), or an extra register in the long long - * case - go for the extra register. - */ - __put_unaligned_4_be(__v >> 32, __p); - __put_unaligned_4_be(__v, __p+4); -} - -/* - * Try to store an unaligned value as efficiently as possible. - */ -#define __put_unaligned_le(val,ptr) \ - ({ \ - (void)sizeof(*(ptr) = (val)); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - *(ptr) = (val); \ - break; \ - case 2: __put_unaligned_2_le((__force u16)(val),(__u8 *)(ptr)); \ - break; \ - case 4: __put_unaligned_4_le((__force u32)(val),(__u8 *)(ptr)); \ - break; \ - case 8: __put_unaligned_8_le((__force u64)(val),(__u8 *)(ptr)); \ - break; \ - default: __bug_unaligned_x(ptr); \ - break; \ - } \ - (void) 0; \ - }) - -#define __put_unaligned_be(val,ptr) \ - ({ \ - (void)sizeof(*(ptr) = (val)); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - *(ptr) = (val); \ - break; \ - case 2: __put_unaligned_2_be((__force u16)(val),(__u8 *)(ptr)); \ - break; \ - case 4: __put_unaligned_4_be((__force u32)(val),(__u8 *)(ptr)); \ - break; \ - case 8: __put_unaligned_8_be((__force u64)(val),(__u8 *)(ptr)); \ - break; \ - default: __bug_unaligned_x(ptr); \ - break; \ - } \ - (void) 0; \ - }) +#include <linux/unaligned/le_byteshift.h> +#include <linux/unaligned/be_byteshift.h> +#include <linux/unaligned/generic.h> /* * Select endianness @@ -178,4 +16,4 @@ static inline void __put_unaligned_8_be(const unsigned long long __v, register _ #define put_unaligned __put_unaligned_be #endif -#endif +#endif /* _ASM_ARM_UNALIGNED_H */ diff --git a/include/asm-avr32/unaligned.h b/include/asm-avr32/unaligned.h index 36f5fd43054..04187729047 100644 --- a/include/asm-avr32/unaligned.h +++ b/include/asm-avr32/unaligned.h @@ -1,5 +1,5 @@ -#ifndef __ASM_AVR32_UNALIGNED_H -#define __ASM_AVR32_UNALIGNED_H +#ifndef _ASM_AVR32_UNALIGNED_H +#define _ASM_AVR32_UNALIGNED_H /* * AVR32 can handle some unaligned accesses, depending on the @@ -11,6 +11,11 @@ * optimize word loads in general. */ -#include <asm-generic/unaligned.h> +#include <linux/unaligned/be_struct.h> +#include <linux/unaligned/le_byteshift.h> +#include <linux/unaligned/generic.h> -#endif /* __ASM_AVR32_UNALIGNED_H */ +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be + +#endif /* _ASM_AVR32_UNALIGNED_H */ diff --git a/include/asm-blackfin/unaligned.h b/include/asm-blackfin/unaligned.h index 10081dc241e..fd8a1d63494 100644 --- a/include/asm-blackfin/unaligned.h +++ b/include/asm-blackfin/unaligned.h @@ -1,6 +1,11 @@ -#ifndef __BFIN_UNALIGNED_H -#define __BFIN_UNALIGNED_H +#ifndef _ASM_BLACKFIN_UNALIGNED_H +#define _ASM_BLACKFIN_UNALIGNED_H -#include <asm-generic/unaligned.h> +#include <linux/unaligned/le_struct.h> +#include <linux/unaligned/be_byteshift.h> +#include <linux/unaligned/generic.h> -#endif /* __BFIN_UNALIGNED_H */ +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le + +#endif /* _ASM_BLACKFIN_UNALIGNED_H */ diff --git a/include/asm-cris/unaligned.h b/include/asm-cris/unaligned.h index 7fbbb399f6f..7b3f3fec567 100644 --- a/include/asm-cris/unaligned.h +++ b/include/asm-cris/unaligned.h @@ -1,16 +1,13 @@ -#ifndef __CRIS_UNALIGNED_H -#define __CRIS_UNALIGNED_H +#ifndef _ASM_CRIS_UNALIGNED_H +#define _ASM_CRIS_UNALIGNED_H /* * CRIS can do unaligned accesses itself. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. */ +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> -#define get_unaligned(ptr) (*(ptr)) +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) - -#endif +#endif /* _ASM_CRIS_UNALIGNED_H */ diff --git a/include/asm-frv/unaligned.h b/include/asm-frv/unaligned.h index dc8e9c9bf6b..64ccc736f2d 100644 --- a/include/asm-frv/unaligned.h +++ b/include/asm-frv/unaligned.h @@ -9,194 +9,14 @@ * 2 of the License, or (at your option) any later version. */ -#ifndef _ASM_UNALIGNED_H -#define _ASM_UNALIGNED_H +#ifndef _ASM_FRV_UNALIGNED_H +#define _ASM_FRV_UNALIGNED_H +#include <linux/unaligned/le_byteshift.h> +#include <linux/unaligned/be_byteshift.h> +#include <linux/unaligned/generic.h> -/* - * Unaligned accesses on uClinux can't be performed in a fault handler - the - * CPU detects them as imprecise exceptions making this impossible. - * - * With the FR451, however, they are precise, and so we used to fix them up in - * the memory access fault handler. However, instruction bundling make this - * impractical. So, now we fall back to using memcpy. - */ -#ifdef CONFIG_MMU - -/* - * The asm statement in the macros below is a way to get GCC to copy a - * value from one variable to another without having any clue it's - * actually doing so, so that it won't have any idea that the values - * in the two variables are related. - */ - -#define get_unaligned(ptr) ({ \ - typeof((*(ptr))) __x; \ - void *__ptrcopy; \ - asm("" : "=r" (__ptrcopy) : "0" (ptr)); \ - memcpy(&__x, __ptrcopy, sizeof(*(ptr))); \ - __x; \ -}) - -#define put_unaligned(val, ptr) ({ \ - typeof((*(ptr))) __x = (val); \ - void *__ptrcopy; \ - asm("" : "=r" (__ptrcopy) : "0" (ptr)); \ - memcpy(__ptrcopy, &__x, sizeof(*(ptr))); \ -}) - -extern int handle_misalignment(unsigned long esr0, unsigned long ear0, unsigned long epcr0); - -#else - -#define get_unaligned(ptr) \ -({ \ - typeof(*(ptr)) x; \ - const char *__p = (const char *) (ptr); \ - \ - switch (sizeof(x)) { \ - case 1: \ - x = *(ptr); \ - break; \ - case 2: \ - { \ - uint8_t a; \ - asm(" ldub%I2 %M2,%0 \n" \ - " ldub%I3.p %M3,%1 \n" \ - " slli %0,#8,%0 \n" \ - " or %0,%1,%0 \n" \ - : "=&r"(x), "=&r"(a) \ - : "m"(__p[0]), "m"(__p[1]) \ - ); \ - break; \ - } \ - \ - case 4: \ - { \ - uint8_t a; \ - asm(" ldub%I2 %M2,%0 \n" \ - " ldub%I3.p %M3,%1 \n" \ - " slli %0,#8,%0 \n" \ - " or %0,%1,%0 \n" \ - " ldub%I4.p %M4,%1 \n" \ - " slli %0,#8,%0 \n" \ - " or %0,%1,%0 \n" \ - " ldub%I5.p %M5,%1 \n" \ - " slli %0,#8,%0 \n" \ - " or %0,%1,%0 \n" \ - : "=&r"(x), "=&r"(a) \ - : "m"(__p[0]), "m"(__p[1]), "m"(__p[2]), "m"(__p[3]) \ - ); \ - break; \ - } \ - \ - case 8: \ - { \ - union { uint64_t x; u32 y[2]; } z; \ - uint8_t a; \ - asm(" ldub%I3 %M3,%0 \n" \ - " ldub%I4.p %M4,%2 \n" \ - " slli %0,#8,%0 \n" \ - " or %0,%2,%0 \n" \ - " ldub%I5.p %M5,%2 \n" \ - " slli %0,#8,%0 \n" \ - " or %0,%2,%0 \n" \ - " ldub%I6.p %M6,%2 \n" \ - " slli %0,#8,%0 \n" \ - " or %0,%2,%0 \n" \ - " ldub%I7 %M7,%1 \n" \ - " ldub%I8.p %M8,%2 \n" \ - " slli %1,#8,%1 \n" \ - " or %1,%2,%1 \n" \ - " ldub%I9.p %M9,%2 \n" \ - " slli %1,#8,%1 \n" \ - " or %1,%2,%1 \n" \ - " ldub%I10.p %M10,%2 \n" \ - " slli %1,#8,%1 \n" \ - " or %1,%2,%1 \n" \ - : "=&r"(z.y[0]), "=&r"(z.y[1]), "=&r"(a) \ - : "m"(__p[0]), "m"(__p[1]), "m"(__p[2]), "m"(__p[3]), \ - "m"(__p[4]), "m"(__p[5]), "m"(__p[6]), "m"(__p[7]) \ - ); \ - x = z.x; \ - break; \ - } \ - \ - default: \ - x = 0; \ - BUG(); \ - break; \ - } \ - \ - x; \ -}) - -#define put_unaligned(val, ptr) \ -do { \ - char *__p = (char *) (ptr); \ - int x; \ - \ - switch (sizeof(*ptr)) { \ - case 2: \ - { \ - asm(" stb%I1.p %0,%M1 \n" \ - " srli %0,#8,%0 \n" \ - " stb%I2 %0,%M2 \n" \ - : "=r"(x), "=m"(__p[1]), "=m"(__p[0]) \ - : "0"(val) \ - ); \ - break; \ - } \ - \ - case 4: \ - { \ - asm(" stb%I1.p %0,%M1 \n" \ - " srli %0,#8,%0 \n" \ - " stb%I2.p %0,%M2 \n" \ - " srli %0,#8,%0 \n" \ - " stb%I3.p %0,%M3 \n" \ - " srli %0,#8,%0 \n" \ - " stb%I4 %0,%M4 \n" \ - : "=r"(x), "=m"(__p[3]), "=m"(__p[2]), "=m"(__p[1]), "=m"(__p[0]) \ - : "0"(val) \ - ); \ - break; \ - } \ - \ - case 8: \ - { \ - uint32_t __high, __low; \ - __high = (uint64_t)val >> 32; \ - __low = val & 0xffffffff; \ - asm(" stb%I2.p %0,%M2 \n" \ - " srli %0,#8,%0 \n" \ - " stb%I3.p %0,%M3 \n" \ - " srli %0,#8,%0 \n" \ - " stb%I4.p %0,%M4 \n" \ - " srli %0,#8,%0 \n" \ - " stb%I5.p %0,%M5 \n" \ - " srli %0,#8,%0 \n" \ - " stb%I6.p %1,%M6 \n" \ - " srli %1,#8,%1 \n" \ - " stb%I7.p %1,%M7 \n" \ - " srli %1,#8,%1 \n" \ - " stb%I8.p %1,%M8 \n" \ - " srli %1,#8,%1 \n" \ - " stb%I9 %1,%M9 \n" \ - : "=&r"(__low), "=&r"(__high), "=m"(__p[7]), "=m"(__p[6]), \ - "=m"(__p[5]), "=m"(__p[4]), "=m"(__p[3]), "=m"(__p[2]), \ - "=m"(__p[1]), "=m"(__p[0]) \ - : "0"(__low), "1"(__high) \ - ); \ - break; \ - } \ - \ - default: \ - *(ptr) = (val); \ - break; \ - } \ -} while(0) - -#endif +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be -#endif +#endif /* _ASM_FRV_UNALIGNED_H */ diff --git a/include/asm-generic/ioctl.h b/include/asm-generic/ioctl.h index cd027298beb..86418138557 100644 --- a/include/asm-generic/ioctl.h +++ b/include/asm-generic/ioctl.h @@ -21,8 +21,19 @@ */ #define _IOC_NRBITS 8 #define _IOC_TYPEBITS 8 -#define _IOC_SIZEBITS 14 -#define _IOC_DIRBITS 2 + +/* + * Let any architecture override either of the following before + * including this file. + */ + +#ifndef _IOC_SIZEBITS +# define _IOC_SIZEBITS 14 +#endif + +#ifndef _IOC_DIRBITS +# define _IOC_DIRBITS 2 +#endif #define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) #define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) @@ -35,11 +46,21 @@ #define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) /* - * Direction bits. + * Direction bits, which any architecture can choose to override + * before including this file. */ -#define _IOC_NONE 0U -#define _IOC_WRITE 1U -#define _IOC_READ 2U + +#ifndef _IOC_NONE +# define _IOC_NONE 0U +#endif + +#ifndef _IOC_WRITE +# define _IOC_WRITE 1U +#endif + +#ifndef _IOC_READ +# define _IOC_READ 2U +#endif #define _IOC(dir,type,nr,size) \ (((dir) << _IOC_DIRSHIFT) | \ diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h deleted file mode 100644 index 2fe1b2e67f0..00000000000 --- a/include/asm-generic/unaligned.h +++ /dev/null @@ -1,124 +0,0 @@ -#ifndef _ASM_GENERIC_UNALIGNED_H_ -#define _ASM_GENERIC_UNALIGNED_H_ - -/* - * For the benefit of those who are trying to port Linux to another - * architecture, here are some C-language equivalents. - * - * This is based almost entirely upon Richard Henderson's - * asm-alpha/unaligned.h implementation. Some comments were - * taken from David Mosberger's asm-ia64/unaligned.h header. - */ - -#include <linux/types.h> - -/* - * The main single-value unaligned transfer routines. - */ -#define get_unaligned(ptr) \ - __get_unaligned((ptr), sizeof(*(ptr))) -#define put_unaligned(x,ptr) \ - ((void)sizeof(*(ptr)=(x)),\ - __put_unaligned((__force __u64)(x), (ptr), sizeof(*(ptr)))) - -/* - * This function doesn't actually exist. The idea is that when - * someone uses the macros below with an unsupported size (datatype), - * the linker will alert us to the problem via an unresolved reference - * error. - */ -extern void bad_unaligned_access_length(void) __attribute__((noreturn)); - -struct __una_u64 { __u64 x __attribute__((packed)); }; -struct __una_u32 { __u32 x __attribute__((packed)); }; -struct __una_u16 { __u16 x __attribute__((packed)); }; - -/* - * Elemental unaligned loads - */ - -static inline __u64 __uldq(const __u64 *addr) -{ - const struct __una_u64 *ptr = (const struct __una_u64 *) addr; - return ptr->x; -} - -static inline __u32 __uldl(const __u32 *addr) -{ - const struct __una_u32 *ptr = (const struct __una_u32 *) addr; - return ptr->x; -} - -static inline __u16 __uldw(const __u16 *addr) -{ - const struct __una_u16 *ptr = (const struct __una_u16 *) addr; - return ptr->x; -} - -/* - * Elemental unaligned stores - */ - -static inline void __ustq(__u64 val, __u64 *addr) -{ - struct __una_u64 *ptr = (struct __una_u64 *) addr; - ptr->x = val; -} - -static inline void __ustl(__u32 val, __u32 *addr) -{ - struct __una_u32 *ptr = (struct __una_u32 *) addr; - ptr->x = val; -} - -static inline void __ustw(__u16 val, __u16 *addr) -{ - struct __una_u16 *ptr = (struct __una_u16 *) addr; - ptr->x = val; -} - -#define __get_unaligned(ptr, size) ({ \ - const void *__gu_p = ptr; \ - __u64 __val; \ - switch (size) { \ - case 1: \ - __val = *(const __u8 *)__gu_p; \ - break; \ - case 2: \ - __val = __uldw(__gu_p); \ - break; \ - case 4: \ - __val = __uldl(__gu_p); \ - break; \ - case 8: \ - __val = __uldq(__gu_p); \ - break; \ - default: \ - bad_unaligned_access_length(); \ - }; \ - (__force __typeof__(*(ptr)))__val; \ -}) - -#define __put_unaligned(val, ptr, size) \ -({ \ - void *__gu_p = ptr; \ - switch (size) { \ - case 1: \ - *(__u8 *)__gu_p = (__force __u8)val; \ - break; \ - case 2: \ - __ustw((__force __u16)val, __gu_p); \ - break; \ - case 4: \ - __ustl((__force __u32)val, __gu_p); \ - break; \ - case 8: \ - __ustq(val, __gu_p); \ - break; \ - default: \ - bad_unaligned_access_length(); \ - }; \ - (void)0; \ -}) - -#endif /* _ASM_GENERIC_UNALIGNED_H */ diff --git a/include/asm-h8300/unaligned.h b/include/asm-h8300/unaligned.h index ffb67f47207..b8d06c70c2d 100644 --- a/include/asm-h8300/unaligned.h +++ b/include/asm-h8300/unaligned.h @@ -1,15 +1,11 @@ -#ifndef __H8300_UNALIGNED_H -#define __H8300_UNALIGNED_H +#ifndef _ASM_H8300_UNALIGNED_H +#define _ASM_H8300_UNALIGNED_H +#include <linux/unaligned/be_memmove.h> +#include <linux/unaligned/le_byteshift.h> +#include <linux/unaligned/generic.h> -/* Use memmove here, so gcc does not insert a __builtin_memcpy. */ +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be -#define get_unaligned(ptr) \ - ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; }) - -#define put_unaligned(val, ptr) \ - ({ __typeof__(*(ptr)) __tmp = (val); \ - memmove((ptr), &__tmp, sizeof(*(ptr))); \ - (void)0; }) - -#endif +#endif /* _ASM_H8300_UNALIGNED_H */ diff --git a/include/asm-ia64/dma-mapping.h b/include/asm-ia64/dma-mapping.h index f1735a22d0e..9f0df9bd46b 100644 --- a/include/asm-ia64/dma-mapping.h +++ b/include/asm-ia64/dma-mapping.h @@ -23,10 +23,30 @@ dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr, { dma_free_coherent(dev, size, cpu_addr, dma_handle); } -#define dma_map_single platform_dma_map_single -#define dma_map_sg platform_dma_map_sg -#define dma_unmap_single platform_dma_unmap_single -#define dma_unmap_sg platform_dma_unmap_sg +#define dma_map_single_attrs platform_dma_map_single_attrs +static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, + size_t size, int dir) +{ + return dma_map_single_attrs(dev, cpu_addr, size, dir, NULL); +} +#define dma_map_sg_attrs platform_dma_map_sg_attrs +static inline int dma_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, int dir) +{ + return dma_map_sg_attrs(dev, sgl, nents, dir, NULL); +} +#define dma_unmap_single_attrs platform_dma_unmap_single_attrs +static inline void dma_unmap_single(struct device *dev, dma_addr_t cpu_addr, + size_t size, int dir) +{ + return dma_unmap_single_attrs(dev, cpu_addr, size, dir, NULL); +} +#define dma_unmap_sg_attrs platform_dma_unmap_sg_attrs +static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, int dir) +{ + return dma_unmap_sg_attrs(dev, sgl, nents, dir, NULL); +} #define dma_sync_single_for_cpu platform_dma_sync_single_for_cpu #define dma_sync_sg_for_cpu platform_dma_sync_sg_for_cpu #define dma_sync_single_for_device platform_dma_sync_single_for_device diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h index c201a2020aa..9f020eb825c 100644 --- a/include/asm-ia64/machvec.h +++ b/include/asm-ia64/machvec.h @@ -22,6 +22,7 @@ struct pci_bus; struct task_struct; struct pci_dev; struct msi_desc; +struct dma_attrs; typedef void ia64_mv_setup_t (char **); typedef void ia64_mv_cpu_init_t (void); @@ -56,6 +57,11 @@ typedef void ia64_mv_dma_sync_sg_for_device (struct device *, struct scatterlist typedef int ia64_mv_dma_mapping_error (dma_addr_t dma_addr); typedef int ia64_mv_dma_supported (struct device *, u64); +typedef dma_addr_t ia64_mv_dma_map_single_attrs (struct device *, void *, size_t, int, struct dma_attrs *); +typedef void ia64_mv_dma_unmap_single_attrs (struct device *, dma_addr_t, size_t, int, struct dma_attrs *); +typedef int ia64_mv_dma_map_sg_attrs (struct device *, struct scatterlist *, int, int, struct dma_attrs *); +typedef void ia64_mv_dma_unmap_sg_attrs (struct device *, struct scatterlist *, int, int, struct dma_attrs *); + /* * WARNING: The legacy I/O space is _architected_. Platforms are * expected to follow this architected model (see Section 10.7 in the @@ -136,10 +142,10 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); # define platform_dma_init ia64_mv.dma_init # define platform_dma_alloc_coherent ia64_mv.dma_alloc_coherent # define platform_dma_free_coherent ia64_mv.dma_free_coherent -# define platform_dma_map_single ia64_mv.dma_map_single -# define platform_dma_unmap_single ia64_mv.dma_unmap_single -# define platform_dma_map_sg ia64_mv.dma_map_sg -# define platform_dma_unmap_sg ia64_mv.dma_unmap_sg +# define platform_dma_map_single_attrs ia64_mv.dma_map_single_attrs +# define platform_dma_unmap_single_attrs ia64_mv.dma_unmap_single_attrs +# define platform_dma_map_sg_attrs ia64_mv.dma_map_sg_attrs +# define platform_dma_unmap_sg_attrs ia64_mv.dma_unmap_sg_attrs # define platform_dma_sync_single_for_cpu ia64_mv.dma_sync_single_for_cpu # define platform_dma_sync_sg_for_cpu ia64_mv.dma_sync_sg_for_cpu # define platform_dma_sync_single_for_device ia64_mv.dma_sync_single_for_device @@ -190,10 +196,10 @@ struct ia64_machine_vector { ia64_mv_dma_init *dma_init; ia64_mv_dma_alloc_coherent *dma_alloc_coherent; ia64_mv_dma_free_coherent *dma_free_coherent; - ia64_mv_dma_map_single *dma_map_single; - ia64_mv_dma_unmap_single *dma_unmap_single; - ia64_mv_dma_map_sg *dma_map_sg; - ia64_mv_dma_unmap_sg *dma_unmap_sg; + ia64_mv_dma_map_single_attrs *dma_map_single_attrs; + ia64_mv_dma_unmap_single_attrs *dma_unmap_single_attrs; + ia64_mv_dma_map_sg_attrs *dma_map_sg_attrs; + ia64_mv_dma_unmap_sg_attrs *dma_unmap_sg_attrs; ia64_mv_dma_sync_single_for_cpu *dma_sync_single_for_cpu; ia64_mv_dma_sync_sg_for_cpu *dma_sync_sg_for_cpu; ia64_mv_dma_sync_single_for_device *dma_sync_single_for_device; @@ -240,10 +246,10 @@ struct ia64_machine_vector { platform_dma_init, \ platform_dma_alloc_coherent, \ platform_dma_free_coherent, \ - platform_dma_map_single, \ - platform_dma_unmap_single, \ - platform_dma_map_sg, \ - platform_dma_unmap_sg, \ + platform_dma_map_single_attrs, \ + platform_dma_unmap_single_attrs, \ + platform_dma_map_sg_attrs, \ + platform_dma_unmap_sg_attrs, \ platform_dma_sync_single_for_cpu, \ platform_dma_sync_sg_for_cpu, \ platform_dma_sync_single_for_device, \ @@ -292,9 +298,13 @@ extern ia64_mv_dma_init swiotlb_init; extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent; extern ia64_mv_dma_free_coherent swiotlb_free_coherent; extern ia64_mv_dma_map_single swiotlb_map_single; +extern ia64_mv_dma_map_single_attrs swiotlb_map_single_attrs; extern ia64_mv_dma_unmap_single swiotlb_unmap_single; +extern ia64_mv_dma_unmap_single_attrs swiotlb_unmap_single_attrs; extern ia64_mv_dma_map_sg swiotlb_map_sg; +extern ia64_mv_dma_map_sg_attrs swiotlb_map_sg_attrs; extern ia64_mv_dma_unmap_sg swiotlb_unmap_sg; +extern ia64_mv_dma_unmap_sg_attrs swiotlb_unmap_sg_attrs; extern ia64_mv_dma_sync_single_for_cpu swiotlb_sync_single_for_cpu; extern ia64_mv_dma_sync_sg_for_cpu swiotlb_sync_sg_for_cpu; extern ia64_mv_dma_sync_single_for_device swiotlb_sync_single_for_device; @@ -340,17 +350,17 @@ extern ia64_mv_dma_supported swiotlb_dma_supported; #ifndef platform_dma_free_coherent # define platform_dma_free_coherent swiotlb_free_coherent #endif -#ifndef platform_dma_map_single -# define platform_dma_map_single swiotlb_map_single +#ifndef platform_dma_map_single_attrs +# define platform_dma_map_single_attrs swiotlb_map_single_attrs #endif -#ifndef platform_dma_unmap_single -# define platform_dma_unmap_single swiotlb_unmap_single +#ifndef platform_dma_unmap_single_attrs +# define platform_dma_unmap_single_attrs swiotlb_unmap_single_attrs #endif -#ifndef platform_dma_map_sg -# define platform_dma_map_sg swiotlb_map_sg +#ifndef platform_dma_map_sg_attrs +# define platform_dma_map_sg_attrs swiotlb_map_sg_attrs #endif -#ifndef platform_dma_unmap_sg -# define platform_dma_unmap_sg swiotlb_unmap_sg +#ifndef platform_dma_unmap_sg_attrs +# define platform_dma_unmap_sg_attrs swiotlb_unmap_sg_attrs #endif #ifndef platform_dma_sync_single_for_cpu # define platform_dma_sync_single_for_cpu swiotlb_sync_single_for_cpu diff --git a/include/asm-ia64/machvec_hpzx1.h b/include/asm-ia64/machvec_hpzx1.h index e90daf9ce34..2f57f5144b9 100644 --- a/include/asm-ia64/machvec_hpzx1.h +++ b/include/asm-ia64/machvec_hpzx1.h @@ -4,10 +4,10 @@ extern ia64_mv_setup_t dig_setup; extern ia64_mv_dma_alloc_coherent sba_alloc_coherent; extern ia64_mv_dma_free_coherent sba_free_coherent; -extern ia64_mv_dma_map_single sba_map_single; -extern ia64_mv_dma_unmap_single sba_unmap_single; -extern ia64_mv_dma_map_sg sba_map_sg; -extern ia64_mv_dma_unmap_sg sba_unmap_sg; +extern ia64_mv_dma_map_single_attrs sba_map_single_attrs; +extern ia64_mv_dma_unmap_single_attrs sba_unmap_single_attrs; +extern ia64_mv_dma_map_sg_attrs sba_map_sg_attrs; +extern ia64_mv_dma_unmap_sg_attrs sba_unmap_sg_attrs; extern ia64_mv_dma_supported sba_dma_supported; extern ia64_mv_dma_mapping_error sba_dma_mapping_error; @@ -23,10 +23,10 @@ extern ia64_mv_dma_mapping_error sba_dma_mapping_error; #define platform_dma_init machvec_noop #define platform_dma_alloc_coherent sba_alloc_coherent #define platform_dma_free_coherent sba_free_coherent -#define platform_dma_map_single sba_map_single -#define platform_dma_unmap_single sba_unmap_single -#define platform_dma_map_sg sba_map_sg -#define platform_dma_unmap_sg sba_unmap_sg +#define platform_dma_map_single_attrs sba_map_single_attrs +#define platform_dma_unmap_single_attrs sba_unmap_single_attrs +#define platform_dma_map_sg_attrs sba_map_sg_attrs +#define platform_dma_unmap_sg_attrs sba_unmap_sg_attrs #define platform_dma_sync_single_for_cpu machvec_dma_sync_single #define platform_dma_sync_sg_for_cpu machvec_dma_sync_sg #define platform_dma_sync_single_for_device machvec_dma_sync_single diff --git a/include/asm-ia64/machvec_hpzx1_swiotlb.h b/include/asm-ia64/machvec_hpzx1_swiotlb.h index f00a34a148f..a842cdda827 100644 --- a/include/asm-ia64/machvec_hpzx1_swiotlb.h +++ b/include/asm-ia64/machvec_hpzx1_swiotlb.h @@ -4,10 +4,10 @@ extern ia64_mv_setup_t dig_setup; extern ia64_mv_dma_alloc_coherent hwsw_alloc_coherent; extern ia64_mv_dma_free_coherent hwsw_free_coherent; -extern ia64_mv_dma_map_single hwsw_map_single; -extern ia64_mv_dma_unmap_single hwsw_unmap_single; -extern ia64_mv_dma_map_sg hwsw_map_sg; -extern ia64_mv_dma_unmap_sg hwsw_unmap_sg; +extern ia64_mv_dma_map_single_attrs hwsw_map_single_attrs; +extern ia64_mv_dma_unmap_single_attrs hwsw_unmap_single_attrs; +extern ia64_mv_dma_map_sg_attrs hwsw_map_sg_attrs; +extern ia64_mv_dma_unmap_sg_attrs hwsw_unmap_sg_attrs; extern ia64_mv_dma_supported hwsw_dma_supported; extern ia64_mv_dma_mapping_error hwsw_dma_mapping_error; extern ia64_mv_dma_sync_single_for_cpu hwsw_sync_single_for_cpu; @@ -28,10 +28,10 @@ extern ia64_mv_dma_sync_sg_for_device hwsw_sync_sg_for_device; #define platform_dma_init machvec_noop #define platform_dma_alloc_coherent hwsw_alloc_coherent #define platform_dma_free_coherent hwsw_free_coherent -#define platform_dma_map_single hwsw_map_single -#define platform_dma_unmap_single hwsw_unmap_single -#define platform_dma_map_sg hwsw_map_sg -#define platform_dma_unmap_sg hwsw_unmap_sg +#define platform_dma_map_single_attrs hwsw_map_single_attrs +#define platform_dma_unmap_single_attrs hwsw_unmap_single_attrs +#define platform_dma_map_sg_attrs hwsw_map_sg_attrs +#define platform_dma_unmap_sg_attrs hwsw_unmap_sg_attrs #define platform_dma_supported hwsw_dma_supported #define platform_dma_mapping_error hwsw_dma_mapping_error #define platform_dma_sync_single_for_cpu hwsw_sync_single_for_cpu diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h index 61439a7f5b0..781308ea7b8 100644 --- a/include/asm-ia64/machvec_sn2.h +++ b/include/asm-ia64/machvec_sn2.h @@ -57,10 +57,10 @@ extern ia64_mv_readl_t __sn_readl_relaxed; extern ia64_mv_readq_t __sn_readq_relaxed; extern ia64_mv_dma_alloc_coherent sn_dma_alloc_coherent; extern ia64_mv_dma_free_coherent sn_dma_free_coherent; -extern ia64_mv_dma_map_single sn_dma_map_single; -extern ia64_mv_dma_unmap_single sn_dma_unmap_single; -extern ia64_mv_dma_map_sg sn_dma_map_sg; -extern ia64_mv_dma_unmap_sg sn_dma_unmap_sg; +extern ia64_mv_dma_map_single_attrs sn_dma_map_single_attrs; +extern ia64_mv_dma_unmap_single_attrs sn_dma_unmap_single_attrs; +extern ia64_mv_dma_map_sg_attrs sn_dma_map_sg_attrs; +extern ia64_mv_dma_unmap_sg_attrs sn_dma_unmap_sg_attrs; extern ia64_mv_dma_sync_single_for_cpu sn_dma_sync_single_for_cpu; extern ia64_mv_dma_sync_sg_for_cpu sn_dma_sync_sg_for_cpu; extern ia64_mv_dma_sync_single_for_device sn_dma_sync_single_for_device; @@ -113,10 +113,10 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus; #define platform_dma_init machvec_noop #define platform_dma_alloc_coherent sn_dma_alloc_coherent #define platform_dma_free_coherent sn_dma_free_coherent -#define platform_dma_map_single sn_dma_map_single -#define platform_dma_unmap_single sn_dma_unmap_single -#define platform_dma_map_sg sn_dma_map_sg -#define platform_dma_unmap_sg sn_dma_unmap_sg +#define platform_dma_map_single_attrs sn_dma_map_single_attrs +#define platform_dma_unmap_single_attrs sn_dma_unmap_single_attrs +#define platform_dma_map_sg_attrs sn_dma_map_sg_attrs +#define platform_dma_unmap_sg_attrs sn_dma_unmap_sg_attrs #define platform_dma_sync_single_for_cpu sn_dma_sync_single_for_cpu #define platform_dma_sync_sg_for_cpu sn_dma_sync_sg_for_cpu #define platform_dma_sync_single_for_device sn_dma_sync_single_for_device diff --git a/include/asm-ia64/unaligned.h b/include/asm-ia64/unaligned.h index bb855988810..7bddc7f5858 100644 --- a/include/asm-ia64/unaligned.h +++ b/include/asm-ia64/unaligned.h @@ -1,6 +1,11 @@ #ifndef _ASM_IA64_UNALIGNED_H #define _ASM_IA64_UNALIGNED_H -#include <asm-generic/unaligned.h> +#include <linux/unaligned/le_struct.h> +#include <linux/unaligned/be_byteshift.h> +#include <linux/unaligned/generic.h> + +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le #endif /* _ASM_IA64_UNALIGNED_H */ diff --git a/include/asm-m32r/unaligned.h b/include/asm-m32r/unaligned.h index fccc180c391..377eb20d1ec 100644 --- a/include/asm-m32r/unaligned.h +++ b/include/asm-m32r/unaligned.h @@ -1,19 +1,18 @@ #ifndef _ASM_M32R_UNALIGNED_H #define _ASM_M32R_UNALIGNED_H -/* - * For the benefit of those who are trying to port Linux to another - * architecture, here are some C-language equivalents. - */ - -#include <asm/string.h> - -#define get_unaligned(ptr) \ - ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; }) - -#define put_unaligned(val, ptr) \ - ({ __typeof__(*(ptr)) __tmp = (val); \ - memmove((ptr), &__tmp, sizeof(*(ptr))); \ - (void)0; }) +#if defined(__LITTLE_ENDIAN__) +# include <linux/unaligned/le_memmove.h> +# include <linux/unaligned/be_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_le +# define put_unaligned __put_unaligned_le +#else +# include <linux/unaligned/be_memmove.h> +# include <linux/unaligned/le_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_be +# define put_unaligned __put_unaligned_be +#endif #endif /* _ASM_M32R_UNALIGNED_H */ diff --git a/include/asm-m68k/unaligned.h b/include/asm-m68k/unaligned.h index 804cb3f888f..77698f2dc33 100644 --- a/include/asm-m68k/unaligned.h +++ b/include/asm-m68k/unaligned.h @@ -1,16 +1,13 @@ -#ifndef __M68K_UNALIGNED_H -#define __M68K_UNALIGNED_H +#ifndef _ASM_M68K_UNALIGNED_H +#define _ASM_M68K_UNALIGNED_H /* * The m68k can do unaligned accesses itself. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. */ +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> -#define get_unaligned(ptr) (*(ptr)) +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) - -#endif +#endif /* _ASM_M68K_UNALIGNED_H */ diff --git a/include/asm-m68knommu/unaligned.h b/include/asm-m68knommu/unaligned.h index 869e9dd24f5..eb1ea4cb9a5 100644 --- a/include/asm-m68knommu/unaligned.h +++ b/include/asm-m68knommu/unaligned.h @@ -1,23 +1,25 @@ -#ifndef __M68K_UNALIGNED_H -#define __M68K_UNALIGNED_H +#ifndef _ASM_M68KNOMMU_UNALIGNED_H +#define _ASM_M68KNOMMU_UNALIGNED_H #ifdef CONFIG_COLDFIRE +#include <linux/unaligned/be_struct.h> +#include <linux/unaligned/le_byteshift.h> +#include <linux/unaligned/generic.h> -#include <asm-generic/unaligned.h> +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be #else /* * The m68k can do unaligned accesses itself. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. */ +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> -#define get_unaligned(ptr) (*(ptr)) -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be #endif -#endif +#endif /* _ASM_M68KNOMMU_UNALIGNED_H */ diff --git a/include/asm-mips/unaligned.h b/include/asm-mips/unaligned.h index 3249049e93a..79240494857 100644 --- a/include/asm-mips/unaligned.h +++ b/include/asm-mips/unaligned.h @@ -5,25 +5,24 @@ * * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) */ -#ifndef __ASM_GENERIC_UNALIGNED_H -#define __ASM_GENERIC_UNALIGNED_H +#ifndef _ASM_MIPS_UNALIGNED_H +#define _ASM_MIPS_UNALIGNED_H #include <linux/compiler.h> +#if defined(__MIPSEB__) +# include <linux/unaligned/be_struct.h> +# include <linux/unaligned/le_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_be +# define put_unaligned __put_unaligned_be +#elif defined(__MIPSEL__) +# include <linux/unaligned/le_struct.h> +# include <linux/unaligned/be_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_le +# define put_unaligned __put_unaligned_le +#else +# error "MIPS, but neither __MIPSEB__, nor __MIPSEL__???" +#endif -#define get_unaligned(ptr) \ -({ \ - struct __packed { \ - typeof(*(ptr)) __v; \ - } *__p = (void *) (ptr); \ - __p->__v; \ -}) - -#define put_unaligned(val, ptr) \ -do { \ - struct __packed { \ - typeof(*(ptr)) __v; \ - } *__p = (void *) (ptr); \ - __p->__v = (val); \ -} while(0) - -#endif /* __ASM_GENERIC_UNALIGNED_H */ +#endif /* _ASM_MIPS_UNALIGNED_H */ diff --git a/include/asm-mn10300/unaligned.h b/include/asm-mn10300/unaligned.h index cad3afbd035..0df671318ae 100644 --- a/include/asm-mn10300/unaligned.h +++ b/include/asm-mn10300/unaligned.h @@ -8,129 +8,13 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ -#ifndef _ASM_UNALIGNED_H -#define _ASM_UNALIGNED_H +#ifndef _ASM_MN10300_UNALIGNED_H +#define _ASM_MN10300_UNALIGNED_H -#include <asm/types.h> +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> -#if 0 -extern int __bug_unaligned_x(void *ptr); +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le -/* - * What is the most efficient way of loading/storing an unaligned value? - * - * That is the subject of this file. Efficiency here is defined as - * minimum code size with minimum register usage for the common cases. - * It is currently not believed that long longs are common, so we - * trade efficiency for the chars, shorts and longs against the long - * longs. - * - * Current stats with gcc 2.7.2.2 for these functions: - * - * ptrsize get: code regs put: code regs - * 1 1 1 1 2 - * 2 3 2 3 2 - * 4 7 3 7 3 - * 8 20 6 16 6 - * - * gcc 2.95.1 seems to code differently: - * - * ptrsize get: code regs put: code regs - * 1 1 1 1 2 - * 2 3 2 3 2 - * 4 7 4 7 4 - * 8 19 8 15 6 - * - * which may or may not be more efficient (depending upon whether - * you can afford the extra registers). Hopefully the gcc 2.95 - * is inteligent enough to decide if it is better to use the - * extra register, but evidence so far seems to suggest otherwise. - * - * Unfortunately, gcc is not able to optimise the high word - * out of long long >> 32, or the low word from long long << 32 - */ - -#define __get_unaligned_2(__p) \ - (__p[0] | __p[1] << 8) - -#define __get_unaligned_4(__p) \ - (__p[0] | __p[1] << 8 | __p[2] << 16 | __p[3] << 24) - -#define get_unaligned(ptr) \ -({ \ - unsigned int __v1, __v2; \ - __typeof__(*(ptr)) __v; \ - __u8 *__p = (__u8 *)(ptr); \ - \ - switch (sizeof(*(ptr))) { \ - case 1: __v = *(ptr); break; \ - case 2: __v = __get_unaligned_2(__p); break; \ - case 4: __v = __get_unaligned_4(__p); break; \ - case 8: \ - __v2 = __get_unaligned_4((__p+4)); \ - __v1 = __get_unaligned_4(__p); \ - __v = ((unsigned long long)__v2 << 32 | __v1); \ - break; \ - default: __v = __bug_unaligned_x(__p); break; \ - } \ - __v; \ -}) - - -static inline void __put_unaligned_2(__u32 __v, register __u8 *__p) -{ - *__p++ = __v; - *__p++ = __v >> 8; -} - -static inline void __put_unaligned_4(__u32 __v, register __u8 *__p) -{ - __put_unaligned_2(__v >> 16, __p + 2); - __put_unaligned_2(__v, __p); -} - -static inline void __put_unaligned_8(const unsigned long long __v, __u8 *__p) -{ - /* - * tradeoff: 8 bytes of stack for all unaligned puts (2 - * instructions), or an extra register in the long long - * case - go for the extra register. - */ - __put_unaligned_4(__v >> 32, __p + 4); - __put_unaligned_4(__v, __p); -} - -/* - * Try to store an unaligned value as efficiently as possible. - */ -#define put_unaligned(val, ptr) \ - ({ \ - switch (sizeof(*(ptr))) { \ - case 1: \ - *(ptr) = (val); \ - break; \ - case 2: \ - __put_unaligned_2((val), (__u8 *)(ptr)); \ - break; \ - case 4: \ - __put_unaligned_4((val), (__u8 *)(ptr)); \ - break; \ - case 8: \ - __put_unaligned_8((val), (__u8 *)(ptr)); \ - break; \ - default: \ - __bug_unaligned_x(ptr); \ - break; \ - } \ - (void) 0; \ - }) - - -#else - -#define get_unaligned(ptr) (*(ptr)) -#define put_unaligned(val, ptr) ({ *(ptr) = (val); (void) 0; }) - -#endif - -#endif +#endif /* _ASM_MN10300_UNALIGNED_H */ diff --git a/include/asm-parisc/unaligned.h b/include/asm-parisc/unaligned.h index 53c905838d9..dfc5d3321a5 100644 --- a/include/asm-parisc/unaligned.h +++ b/include/asm-parisc/unaligned.h @@ -1,7 +1,11 @@ -#ifndef _ASM_PARISC_UNALIGNED_H_ -#define _ASM_PARISC_UNALIGNED_H_ +#ifndef _ASM_PARISC_UNALIGNED_H +#define _ASM_PARISC_UNALIGNED_H -#include <asm-generic/unaligned.h> +#include <linux/unaligned/be_struct.h> +#include <linux/unaligned/le_byteshift.h> +#include <linux/unaligned/generic.h> +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be #ifdef __KERNEL__ struct pt_regs; @@ -9,4 +13,4 @@ void handle_unaligned(struct pt_regs *regs); int check_unaligned(struct pt_regs *regs); #endif -#endif /* _ASM_PARISC_UNALIGNED_H_ */ +#endif /* _ASM_PARISC_UNALIGNED_H */ diff --git a/include/asm-powerpc/irq.h b/include/asm-powerpc/irq.h index b5c03127a9b..5089deb8fec 100644 --- a/include/asm-powerpc/irq.h +++ b/include/asm-powerpc/irq.h @@ -619,8 +619,6 @@ struct pt_regs; #define __ARCH_HAS_DO_SOFTIRQ -extern void __do_softirq(void); - #ifdef CONFIG_IRQSTACKS /* * Per-cpu stacks for handling hard and soft interrupts. diff --git a/include/asm-powerpc/processor.h b/include/asm-powerpc/processor.h index fd98ca998b4..cf83f2d7e2a 100644 --- a/include/asm-powerpc/processor.h +++ b/include/asm-powerpc/processor.h @@ -138,6 +138,8 @@ typedef struct { struct thread_struct { unsigned long ksp; /* Kernel stack pointer */ + unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ + #ifdef CONFIG_PPC64 unsigned long ksp_vsid; #endif @@ -182,11 +184,14 @@ struct thread_struct { #define ARCH_MIN_TASKALIGN 16 #define INIT_SP (sizeof(init_stack) + (unsigned long) &init_stack) +#define INIT_SP_LIMIT \ + (_ALIGN_UP(sizeof(init_thread_info), 16) + (unsigned long) &init_stack) #ifdef CONFIG_PPC32 #define INIT_THREAD { \ .ksp = INIT_SP, \ + .ksp_limit = INIT_SP_LIMIT, \ .fs = KERNEL_DS, \ .pgdir = swapper_pg_dir, \ .fpexc_mode = MSR_FE0 | MSR_FE1, \ @@ -194,6 +199,7 @@ struct thread_struct { #else #define INIT_THREAD { \ .ksp = INIT_SP, \ + .ksp_limit = INIT_SP_LIMIT, \ .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .fs = KERNEL_DS, \ .fpr = {0}, \ diff --git a/include/asm-ppc/rio.h b/include/asm-powerpc/rio.h index 0018bf80cb2..0018bf80cb2 100644 --- a/include/asm-ppc/rio.h +++ b/include/asm-powerpc/rio.h diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h index fab1674b31b..2b6559a6d11 100644 --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -204,7 +204,7 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */ * Changes the memory location '*ptr' to be val and returns * the previous value stored there. */ -static __inline__ unsigned long +static __always_inline unsigned long __xchg_u32(volatile void *p, unsigned long val) { unsigned long prev; @@ -229,7 +229,7 @@ __xchg_u32(volatile void *p, unsigned long val) * Changes the memory location '*ptr' to be val and returns * the previous value stored there. */ -static __inline__ unsigned long +static __always_inline unsigned long __xchg_u32_local(volatile void *p, unsigned long val) { unsigned long prev; @@ -247,7 +247,7 @@ __xchg_u32_local(volatile void *p, unsigned long val) } #ifdef CONFIG_PPC64 -static __inline__ unsigned long +static __always_inline unsigned long __xchg_u64(volatile void *p, unsigned long val) { unsigned long prev; @@ -266,7 +266,7 @@ __xchg_u64(volatile void *p, unsigned long val) return prev; } -static __inline__ unsigned long +static __always_inline unsigned long __xchg_u64_local(volatile void *p, unsigned long val) { unsigned long prev; @@ -290,7 +290,7 @@ __xchg_u64_local(volatile void *p, unsigned long val) */ extern void __xchg_called_with_bad_pointer(void); -static __inline__ unsigned long +static __always_inline unsigned long __xchg(volatile void *ptr, unsigned long x, unsigned int size) { switch (size) { @@ -305,7 +305,7 @@ __xchg(volatile void *ptr, unsigned long x, unsigned int size) return x; } -static __inline__ unsigned long +static __always_inline unsigned long __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) { switch (size) { @@ -338,7 +338,7 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size) */ #define __HAVE_ARCH_CMPXCHG 1 -static __inline__ unsigned long +static __always_inline unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) { unsigned int prev; @@ -361,7 +361,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) return prev; } -static __inline__ unsigned long +static __always_inline unsigned long __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, unsigned long new) { @@ -384,7 +384,7 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old, } #ifdef CONFIG_PPC64 -static __inline__ unsigned long +static __always_inline unsigned long __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) { unsigned long prev; @@ -406,7 +406,7 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) return prev; } -static __inline__ unsigned long +static __always_inline unsigned long __cmpxchg_u64_local(volatile unsigned long *p, unsigned long old, unsigned long new) { @@ -432,7 +432,7 @@ __cmpxchg_u64_local(volatile unsigned long *p, unsigned long old, if something tries to do an invalid cmpxchg(). */ extern void __cmpxchg_called_with_bad_pointer(void); -static __inline__ unsigned long +static __always_inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { @@ -448,7 +448,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, return old; } -static __inline__ unsigned long +static __always_inline unsigned long __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size) { diff --git a/include/asm-powerpc/unaligned.h b/include/asm-powerpc/unaligned.h index 6c95dfa2652..5f1b1e3c213 100644 --- a/include/asm-powerpc/unaligned.h +++ b/include/asm-powerpc/unaligned.h @@ -5,15 +5,12 @@ /* * The PowerPC can do unaligned accesses itself in big endian mode. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. */ +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> -#define get_unaligned(ptr) (*(ptr)) - -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_UNALIGNED_H */ diff --git a/include/asm-s390/unaligned.h b/include/asm-s390/unaligned.h index 8ee86dbedd1..da9627afe5d 100644 --- a/include/asm-s390/unaligned.h +++ b/include/asm-s390/unaligned.h @@ -1,24 +1,13 @@ -/* - * include/asm-s390/unaligned.h - * - * S390 version - * - * Derived from "include/asm-i386/unaligned.h" - */ - -#ifndef __S390_UNALIGNED_H -#define __S390_UNALIGNED_H +#ifndef _ASM_S390_UNALIGNED_H +#define _ASM_S390_UNALIGNED_H /* * The S390 can do unaligned accesses itself. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. */ +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> -#define get_unaligned(ptr) (*(ptr)) - -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be -#endif +#endif /* _ASM_S390_UNALIGNED_H */ diff --git a/include/asm-sh/unaligned.h b/include/asm-sh/unaligned.h index 5250e3063b4..c1641a01d50 100644 --- a/include/asm-sh/unaligned.h +++ b/include/asm-sh/unaligned.h @@ -1,7 +1,19 @@ -#ifndef __ASM_SH_UNALIGNED_H -#define __ASM_SH_UNALIGNED_H +#ifndef _ASM_SH_UNALIGNED_H +#define _ASM_SH_UNALIGNED_H /* SH can't handle unaligned accesses. */ -#include <asm-generic/unaligned.h> +#ifdef __LITTLE_ENDIAN__ +# include <linux/unaligned/le_struct.h> +# include <linux/unaligned/be_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_le +# define put_unaligned __put_unaligned_le +#else +# include <linux/unaligned/be_struct.h> +# include <linux/unaligned/le_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_be +# define put_unaligned __put_unaligned_be +#endif -#endif /* __ASM_SH_UNALIGNED_H */ +#endif /* _ASM_SH_UNALIGNED_H */ diff --git a/include/asm-sparc/unaligned.h b/include/asm-sparc/unaligned.h index b6f8eddd30a..11d2d5fb590 100644 --- a/include/asm-sparc/unaligned.h +++ b/include/asm-sparc/unaligned.h @@ -1,6 +1,10 @@ -#ifndef _ASM_SPARC_UNALIGNED_H_ -#define _ASM_SPARC_UNALIGNED_H_ +#ifndef _ASM_SPARC_UNALIGNED_H +#define _ASM_SPARC_UNALIGNED_H -#include <asm-generic/unaligned.h> +#include <linux/unaligned/be_struct.h> +#include <linux/unaligned/le_byteshift.h> +#include <linux/unaligned/generic.h> +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be #endif /* _ASM_SPARC_UNALIGNED_H */ diff --git a/include/asm-sparc64/unaligned.h b/include/asm-sparc64/unaligned.h index 1ed3ba53777..edcebb09441 100644 --- a/include/asm-sparc64/unaligned.h +++ b/include/asm-sparc64/unaligned.h @@ -1,6 +1,10 @@ -#ifndef _ASM_SPARC64_UNALIGNED_H_ -#define _ASM_SPARC64_UNALIGNED_H_ +#ifndef _ASM_SPARC64_UNALIGNED_H +#define _ASM_SPARC64_UNALIGNED_H -#include <asm-generic/unaligned.h> +#include <linux/unaligned/be_struct.h> +#include <linux/unaligned/le_byteshift.h> +#include <linux/unaligned/generic.h> +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be #endif /* _ASM_SPARC64_UNALIGNED_H */ diff --git a/include/asm-um/unaligned.h b/include/asm-um/unaligned.h index 1d2497c5727..a47196974e3 100644 --- a/include/asm-um/unaligned.h +++ b/include/asm-um/unaligned.h @@ -1,6 +1,6 @@ -#ifndef __UM_UNALIGNED_H -#define __UM_UNALIGNED_H +#ifndef _ASM_UM_UNALIGNED_H +#define _ASM_UM_UNALIGNED_H #include "asm/arch/unaligned.h" -#endif +#endif /* _ASM_UM_UNALIGNED_H */ diff --git a/include/asm-v850/unaligned.h b/include/asm-v850/unaligned.h index e30b18653a9..53122b28491 100644 --- a/include/asm-v850/unaligned.h +++ b/include/asm-v850/unaligned.h @@ -1,6 +1,4 @@ /* - * include/asm-v850/unaligned.h -- Unaligned memory access - * * Copyright (C) 2001 NEC Corporation * Copyright (C) 2001 Miles Bader <miles@gnu.org> * @@ -8,123 +6,17 @@ * Public License. See the file COPYING in the main directory of this * archive for more details. * - * This file is a copy of the arm version, include/asm-arm/unaligned.h - * * Note that some v850 chips support unaligned access, but it seems too * annoying to use. */ +#ifndef _ASM_V850_UNALIGNED_H +#define _ASM_V850_UNALIGNED_H -#ifndef __V850_UNALIGNED_H__ -#define __V850_UNALIGNED_H__ - -#include <asm/types.h> - -extern int __bug_unaligned_x(void *ptr); - -/* - * What is the most efficient way of loading/storing an unaligned value? - * - * That is the subject of this file. Efficiency here is defined as - * minimum code size with minimum register usage for the common cases. - * It is currently not believed that long longs are common, so we - * trade efficiency for the chars, shorts and longs against the long - * longs. - * - * Current stats with gcc 2.7.2.2 for these functions: - * - * ptrsize get: code regs put: code regs - * 1 1 1 1 2 - * 2 3 2 3 2 - * 4 7 3 7 3 - * 8 20 6 16 6 - * - * gcc 2.95.1 seems to code differently: - * - * ptrsize get: code regs put: code regs - * 1 1 1 1 2 - * 2 3 2 3 2 - * 4 7 4 7 4 - * 8 19 8 15 6 - * - * which may or may not be more efficient (depending upon whether - * you can afford the extra registers). Hopefully the gcc 2.95 - * is inteligent enough to decide if it is better to use the - * extra register, but evidence so far seems to suggest otherwise. - * - * Unfortunately, gcc is not able to optimise the high word - * out of long long >> 32, or the low word from long long << 32 - */ - -#define __get_unaligned_2(__p) \ - (__p[0] | __p[1] << 8) - -#define __get_unaligned_4(__p) \ - (__p[0] | __p[1] << 8 | __p[2] << 16 | __p[3] << 24) - -#define get_unaligned(ptr) \ - ({ \ - __typeof__(*(ptr)) __v; \ - __u8 *__p = (__u8 *)(ptr); \ - switch (sizeof(*(ptr))) { \ - case 1: __v = *(ptr); break; \ - case 2: __v = __get_unaligned_2(__p); break; \ - case 4: __v = __get_unaligned_4(__p); break; \ - case 8: { \ - unsigned int __v1, __v2; \ - __v2 = __get_unaligned_4((__p+4)); \ - __v1 = __get_unaligned_4(__p); \ - __v = ((unsigned long long)__v2 << 32 | __v1); \ - } \ - break; \ - default: __v = __bug_unaligned_x(__p); break; \ - } \ - __v; \ - }) - - -static inline void __put_unaligned_2(__u32 __v, register __u8 *__p) -{ - *__p++ = __v; - *__p++ = __v >> 8; -} - -static inline void __put_unaligned_4(__u32 __v, register __u8 *__p) -{ - __put_unaligned_2(__v >> 16, __p + 2); - __put_unaligned_2(__v, __p); -} - -static inline void __put_unaligned_8(const unsigned long long __v, register __u8 *__p) -{ - /* - * tradeoff: 8 bytes of stack for all unaligned puts (2 - * instructions), or an extra register in the long long - * case - go for the extra register. - */ - __put_unaligned_4(__v >> 32, __p+4); - __put_unaligned_4(__v, __p); -} - -/* - * Try to store an unaligned value as efficiently as possible. - */ -#define put_unaligned(val,ptr) \ - ({ \ - switch (sizeof(*(ptr))) { \ - case 1: \ - *(ptr) = (val); \ - break; \ - case 2: __put_unaligned_2((val),(__u8 *)(ptr)); \ - break; \ - case 4: __put_unaligned_4((val),(__u8 *)(ptr)); \ - break; \ - case 8: __put_unaligned_8((val),(__u8 *)(ptr)); \ - break; \ - default: __bug_unaligned_x(ptr); \ - break; \ - } \ - (void) 0; \ - }) +#include <linux/unaligned/be_byteshift.h> +#include <linux/unaligned/le_byteshift.h> +#include <linux/unaligned/generic.h> +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le -#endif /* __V850_UNALIGNED_H__ */ +#endif /* _ASM_V850_UNALIGNED_H */ diff --git a/include/asm-x86/olpc.h b/include/asm-x86/olpc.h new file mode 100644 index 00000000000..97d47133486 --- /dev/null +++ b/include/asm-x86/olpc.h @@ -0,0 +1,132 @@ +/* OLPC machine specific definitions */ + +#ifndef ASM_OLPC_H_ +#define ASM_OLPC_H_ + +#include <asm/geode.h> + +struct olpc_platform_t { + int flags; + uint32_t boardrev; + int ecver; +}; + +#define OLPC_F_PRESENT 0x01 +#define OLPC_F_DCON 0x02 +#define OLPC_F_VSA 0x04 + +#ifdef CONFIG_OLPC + +extern struct olpc_platform_t olpc_platform_info; + +/* + * OLPC board IDs contain the major build number within the mask 0x0ff0, + * and the minor build number withing 0x000f. Pre-builds have a minor + * number less than 8, and normal builds start at 8. For example, 0x0B10 + * is a PreB1, and 0x0C18 is a C1. + */ + +static inline uint32_t olpc_board(uint8_t id) +{ + return (id << 4) | 0x8; +} + +static inline uint32_t olpc_board_pre(uint8_t id) +{ + return id << 4; +} + +static inline int machine_is_olpc(void) +{ + return (olpc_platform_info.flags & OLPC_F_PRESENT) ? 1 : 0; +} + +/* + * The DCON is OLPC's Display Controller. It has a number of unique + * features that we might want to take advantage of.. + */ +static inline int olpc_has_dcon(void) +{ + return (olpc_platform_info.flags & OLPC_F_DCON) ? 1 : 0; +} + +/* + * The VSA is software from AMD that typical Geode bioses will include. + * It is used to emulate the PCI bus, VGA, etc. OLPC's Open Firmware does + * not include the VSA; instead, PCI is emulated by the kernel. + * + * The VSA is described further in arch/x86/pci/olpc.c. + */ +static inline int olpc_has_vsa(void) +{ + return (olpc_platform_info.flags & OLPC_F_VSA) ? 1 : 0; +} + +/* + * The "Mass Production" version of OLPC's XO is identified as being model + * C2. During the prototype phase, the following models (in chronological + * order) were created: A1, B1, B2, B3, B4, C1. The A1 through B2 models + * were based on Geode GX CPUs, and models after that were based upon + * Geode LX CPUs. There were also some hand-assembled models floating + * around, referred to as PreB1, PreB2, etc. + */ +static inline int olpc_board_at_least(uint32_t rev) +{ + return olpc_platform_info.boardrev >= rev; +} + +#else + +static inline int machine_is_olpc(void) +{ + return 0; +} + +static inline int olpc_has_dcon(void) +{ + return 0; +} + +static inline int olpc_has_vsa(void) +{ + return 0; +} + +#endif + +/* EC related functions */ + +extern int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen, + unsigned char *outbuf, size_t outlen); + +extern int olpc_ec_mask_set(uint8_t bits); +extern int olpc_ec_mask_unset(uint8_t bits); + +/* EC commands */ + +#define EC_FIRMWARE_REV 0x08 + +/* SCI source values */ + +#define EC_SCI_SRC_EMPTY 0x00 +#define EC_SCI_SRC_GAME 0x01 +#define EC_SCI_SRC_BATTERY 0x02 +#define EC_SCI_SRC_BATSOC 0x04 +#define EC_SCI_SRC_BATERR 0x08 +#define EC_SCI_SRC_EBOOK 0x10 +#define EC_SCI_SRC_WLAN 0x20 +#define EC_SCI_SRC_ACPWR 0x40 +#define EC_SCI_SRC_ALL 0x7F + +/* GPIO assignments */ + +#define OLPC_GPIO_MIC_AC geode_gpio(1) +#define OLPC_GPIO_DCON_IRQ geode_gpio(7) +#define OLPC_GPIO_THRM_ALRM geode_gpio(10) +#define OLPC_GPIO_SMB_CLK geode_gpio(14) +#define OLPC_GPIO_SMB_DATA geode_gpio(15) +#define OLPC_GPIO_WORKAUX geode_gpio(24) +#define OLPC_GPIO_LID geode_gpio(26) +#define OLPC_GPIO_ECSCI geode_gpio(27) + +#endif diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index ddd8e248fc0..30bbde0cb34 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -19,6 +19,8 @@ struct pci_sysdata { }; /* scan a bus after allocating a pci_sysdata for it */ +extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, + int node); extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); static inline int pci_domain_nr(struct pci_bus *bus) diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h index 68779b048a3..bce72d7a958 100644 --- a/include/asm-x86/time.h +++ b/include/asm-x86/time.h @@ -1,7 +1,6 @@ #ifndef _ASMX86_TIME_H #define _ASMX86_TIME_H -extern void (*late_time_init)(void); extern void hpet_time_init(void); #include <asm/mc146818rtc.h> diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 22073268b48..0e6d6b03aff 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -193,9 +193,25 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif +struct pci_bus; +void set_pci_bus_resources_arch_default(struct pci_bus *b); + #ifdef CONFIG_SMP #define mc_capable() (boot_cpu_data.x86_max_cores > 1) #define smt_capable() (smp_num_siblings > 1) #endif +#ifdef CONFIG_NUMA +extern int get_mp_bus_to_node(int busnum); +extern void set_mp_bus_to_node(int busnum, int node); +#else +static inline int get_mp_bus_to_node(int busnum) +{ + return 0; +} +static inline void set_mp_bus_to_node(int busnum, int node) +{ +} +#endif + #endif diff --git a/include/asm-x86/unaligned.h b/include/asm-x86/unaligned.h index d270ffe7275..a7bd416b476 100644 --- a/include/asm-x86/unaligned.h +++ b/include/asm-x86/unaligned.h @@ -3,35 +3,12 @@ /* * The x86 can do unaligned accesses itself. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. */ -/** - * get_unaligned - get value from possibly mis-aligned location - * @ptr: pointer to value - * - * This macro should be used for accessing values larger in size than - * single bytes at locations that are expected to be improperly aligned, - * e.g. retrieving a u16 value from a location not u16-aligned. - * - * Note that unaligned accesses can be very expensive on some architectures. - */ -#define get_unaligned(ptr) (*(ptr)) +#include <linux/unaligned/access_ok.h> +#include <linux/unaligned/generic.h> -/** - * put_unaligned - put value to a possibly mis-aligned location - * @val: value to place - * @ptr: pointer to location - * - * This macro should be used for placing values larger in size than - * single bytes at locations that are expected to be improperly aligned, - * e.g. writing a u16 value to a location not u16-aligned. - * - * Note that unaligned accesses can be very expensive on some architectures. - */ -#define put_unaligned(val, ptr) ((void)(*(ptr) = (val))) +#define get_unaligned __get_unaligned_le +#define put_unaligned __put_unaligned_le #endif /* _ASM_X86_UNALIGNED_H */ diff --git a/include/asm-xtensa/unaligned.h b/include/asm-xtensa/unaligned.h index 28220890d0a..8f3424fc5d1 100644 --- a/include/asm-xtensa/unaligned.h +++ b/include/asm-xtensa/unaligned.h @@ -1,6 +1,4 @@ /* - * include/asm-xtensa/unaligned.h - * * Xtensa doesn't handle unaligned accesses efficiently. * * This file is subject to the terms and conditions of the GNU General Public @@ -9,20 +7,23 @@ * * Copyright (C) 2001 - 2005 Tensilica Inc. */ +#ifndef _ASM_XTENSA_UNALIGNED_H +#define _ASM_XTENSA_UNALIGNED_H -#ifndef _XTENSA_UNALIGNED_H -#define _XTENSA_UNALIGNED_H - -#include <linux/string.h> - -/* Use memmove here, so gcc does not insert a __builtin_memcpy. */ - -#define get_unaligned(ptr) \ - ({ __typeof__(*(ptr)) __tmp; memmove(&__tmp, (ptr), sizeof(*(ptr))); __tmp; }) - -#define put_unaligned(val, ptr) \ - ({ __typeof__(*(ptr)) __tmp = (val); \ - memmove((ptr), &__tmp, sizeof(*(ptr))); \ - (void)0; }) +#ifdef __XTENSA_EL__ +# include <linux/unaligned/le_memmove.h> +# include <linux/unaligned/be_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_le +# define put_unaligned __put_unaligned_le +#elif defined(__XTENSA_EB__) +# include <linux/unaligned/be_memmove.h> +# include <linux/unaligned/le_byteshift.h> +# include <linux/unaligned/generic.h> +# define get_unaligned __get_unaligned_be +# define put_unaligned __put_unaligned_be +#else +# error processor byte order undefined! +#endif -#endif /* _XTENSA_UNALIGNED_H */ +#endif /* _ASM_XTENSA_UNALIGNED_H */ diff --git a/include/linux/Kbuild b/include/linux/Kbuild index bda6f04791d..78fade0a1e3 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -20,6 +20,7 @@ header-y += affs_hardblocks.h header-y += aio_abi.h header-y += arcfb.h header-y += atmapi.h +header-y += atmarp.h header-y += atmbr2684.h header-y += atmclip.h header-y += atm_eni.h @@ -48,6 +49,7 @@ header-y += coff.h header-y += comstats.h header-y += const.h header-y += cgroupstats.h +header-y += cramfs_fs.h header-y += cycx_cfm.h header-y += dlmconstants.h header-y += dlm_device.h @@ -70,10 +72,12 @@ header-y += firewire-constants.h header-y += fuse.h header-y += genetlink.h header-y += gen_stats.h +header-y += gfs2_ondisk.h header-y += gigaset_dev.h header-y += hysdn_if.h header-y += i2o-dev.h header-y += i8k.h +header-y += if_addrlabel.h header-y += if_arcnet.h header-y += if_bonding.h header-y += if_cablemodem.h @@ -91,6 +95,7 @@ header-y += if_tunnel.h header-y += in6.h header-y += in_route.h header-y += ioctl.h +header-y += ip6_tunnel.h header-y += ipmi_msgdefs.h header-y += ipsec.h header-y += ipx.h @@ -117,7 +122,6 @@ header-y += nfs2.h header-y += nfs4_mount.h header-y += nfs_mount.h header-y += nl80211.h -header-y += oom.h header-y += param.h header-y += pci_regs.h header-y += pfkeyv2.h @@ -166,7 +170,6 @@ unifdef-y += adfs_fs.h unifdef-y += agpgart.h unifdef-y += apm_bios.h unifdef-y += atalk.h -unifdef-y += atmarp.h unifdef-y += atmdev.h unifdef-y += atm.h unifdef-y += atm_tcp.h @@ -182,7 +185,6 @@ unifdef-y += cm4000_cs.h unifdef-y += cn_proc.h unifdef-y += coda.h unifdef-y += connector.h -unifdef-y += cramfs_fs.h unifdef-y += cuda.h unifdef-y += cyclades.h unifdef-y += dccp.h @@ -205,7 +207,6 @@ unifdef-y += futex.h unifdef-y += fs.h unifdef-y += gameport.h unifdef-y += generic_serial.h -unifdef-y += gfs2_ondisk.h unifdef-y += hayesesp.h unifdef-y += hdlcdrv.h unifdef-y += hdlc.h @@ -219,7 +220,6 @@ unifdef-y += i2c-dev.h unifdef-y += icmp.h unifdef-y += icmpv6.h unifdef-y += if_addr.h -unifdef-y += if_addrlabel.h unifdef-y += if_arp.h unifdef-y += if_bridge.h unifdef-y += if_ec.h @@ -243,7 +243,6 @@ unifdef-y += ipc.h unifdef-y += ipmi.h unifdef-y += ipv6.h unifdef-y += ipv6_route.h -unifdef-y += ip6_tunnel.h unifdef-y += isdn.h unifdef-y += isdnif.h unifdef-y += isdn_divertif.h diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2c7e003356a..41f7ce7edd7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,6 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); +int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); int acpi_numa_init (void); @@ -235,6 +236,10 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n, #else /* CONFIG_ACPI */ +static inline int early_acpi_boot_init(void) +{ + return 0; +} static inline int acpi_boot_init(void) { return 0; diff --git a/include/linux/aio.h b/include/linux/aio.h index 0d0b7f629bd..b51ddd28444 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -209,27 +209,8 @@ extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); extern int aio_put_req(struct kiocb *iocb); extern void kick_iocb(struct kiocb *iocb); extern int aio_complete(struct kiocb *iocb, long res, long res2); -extern void __put_ioctx(struct kioctx *ctx); struct mm_struct; extern void exit_aio(struct mm_struct *mm); -extern struct kioctx *lookup_ioctx(unsigned long ctx_id); -extern int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb); - -/* semi private, but used by the 32bit emulations: */ -struct kioctx *lookup_ioctx(unsigned long ctx_id); -int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb); - -#define get_ioctx(kioctx) do { \ - BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ - atomic_inc(&(kioctx)->users); \ -} while (0) -#define put_ioctx(kioctx) do { \ - BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ - if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \ - __put_ioctx(kioctx); \ -} while (0) #define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 48a62baace5..b66fa2bdfd9 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -156,9 +156,7 @@ static inline unsigned long bdi_stat_error(struct backing_dev_info *bdi) extern struct backing_dev_info default_backing_dev_info; void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page); -int writeback_acquire(struct backing_dev_info *bdi); int writeback_in_progress(struct backing_dev_info *bdi); -void writeback_release(struct backing_dev_info *bdi); static inline int bdi_congested(struct backing_dev_info *bdi, int bdi_bits) { diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b7fc55ec8d4..b512e48f6d8 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -34,7 +34,8 @@ struct linux_binprm{ #endif struct mm_struct *mm; unsigned long p; /* current top of mem */ - int sh_bang; + unsigned int sh_bang:1, + misc_bang:1; struct file * file; int e_uid, e_gid; kernel_cap_t cap_inheritable, cap_permitted; @@ -48,7 +49,6 @@ struct linux_binprm{ unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; - unsigned long argv_len; }; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 diff --git a/include/linux/bio.h b/include/linux/bio.h index d259690863f..61c15eaf3fb 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -324,6 +324,8 @@ extern struct bio *bio_map_user_iov(struct request_queue *, extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); +extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, + gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 48bde600a2d..024f2b02724 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -6,8 +6,8 @@ #define BIT(nr) (1UL << (nr)) #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) #define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) #endif /* @@ -114,8 +114,6 @@ static inline unsigned fls_long(unsigned long l) #ifdef __KERNEL__ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT -extern unsigned long __find_first_bit(const unsigned long *addr, - unsigned long size); /** * find_first_bit - find the first set bit in a memory region @@ -124,28 +122,8 @@ extern unsigned long __find_first_bit(const unsigned long *addr, * * Returns the bit number of the first set bit. */ -static __always_inline unsigned long -find_first_bit(const unsigned long *addr, unsigned long size) -{ - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) - return __ffs((*addr) | (1ul << size)); - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) - return ((*addr) == 0) ? BITS_PER_LONG : __ffs(*addr); - - /* size is not constant or too big */ - return __find_first_bit(addr, size); -} - -extern unsigned long __find_first_zero_bit(const unsigned long *addr, - unsigned long size); +extern unsigned long find_first_bit(const unsigned long *addr, + unsigned long size); /** * find_first_zero_bit - find the first cleared bit in a memory region @@ -154,31 +132,12 @@ extern unsigned long __find_first_zero_bit(const unsigned long *addr, * * Returns the bit number of the first cleared bit. */ -static __always_inline unsigned long -find_first_zero_bit(const unsigned long *addr, unsigned long size) -{ - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - return __ffs(~(*addr) | (1ul << size)); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) - return (~(*addr) == 0) ? BITS_PER_LONG : __ffs(~(*addr)); - - /* size is not constant or too big */ - return __find_first_zero_bit(addr, size); -} +extern unsigned long find_first_zero_bit(const unsigned long *addr, + unsigned long size); + #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifdef CONFIG_GENERIC_FIND_NEXT_BIT -extern unsigned long __find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); /** * find_next_bit - find the next set bit in a memory region @@ -186,36 +145,8 @@ extern unsigned long __find_next_bit(const unsigned long *addr, * @offset: The bitnumber to start searching at * @size: The bitmap size in bits */ -static __always_inline unsigned long -find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - unsigned long value; - - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - value = (*addr) & ((~0ul) << offset); - value |= (1ul << size); - return __ffs(value); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { - value = (*addr) & ((~0ul) << offset); - return (value == 0) ? BITS_PER_LONG : __ffs(value); - } - - /* size is not constant or too big */ - return __find_next_bit(addr, size, offset); -} - -extern unsigned long __find_next_zero_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); +extern unsigned long find_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -223,33 +154,11 @@ extern unsigned long __find_next_zero_bit(const unsigned long *addr, * @offset: The bitnumber to start searching at * @size: The bitmap size in bits */ -static __always_inline unsigned long -find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - unsigned long value; - - /* Avoid a function call if the bitmap size is a constant */ - /* and not bigger than BITS_PER_LONG. */ - - /* insert a sentinel so that __ffs returns size if there */ - /* are no set bits in the bitmap */ - if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { - value = (~(*addr)) & ((~0ul) << offset); - value |= (1ul << size); - return __ffs(value); - } - - /* the result of __ffs(0) is undefined, so it needs to be */ - /* handled separately */ - if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { - value = (~(*addr)) & ((~0ul) << offset); - return (value == 0) ? BITS_PER_LONG : __ffs(value); - } - - /* size is not constant or too big */ - return __find_next_zero_bit(addr, size, offset); -} + +extern unsigned long find_next_zero_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); + #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ #endif /* __KERNEL__ */ #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c5065e3d2ca..c09696a90d6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -215,8 +215,9 @@ struct request { /* * when request is used as a packet command carrier */ - unsigned int cmd_len; - unsigned char cmd[BLK_MAX_CDB]; + unsigned short cmd_len; + unsigned char __cmd[BLK_MAX_CDB]; + unsigned char *cmd; unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ @@ -407,6 +408,31 @@ struct request_queue #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ + +static inline void queue_flag_set_unlocked(unsigned int flag, + struct request_queue *q) +{ + __set_bit(flag, &q->queue_flags); +} + +static inline void queue_flag_set(unsigned int flag, struct request_queue *q) +{ + WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); + __set_bit(flag, &q->queue_flags); +} + +static inline void queue_flag_clear_unlocked(unsigned int flag, + struct request_queue *q) +{ + __clear_bit(flag, &q->queue_flags); +} + +static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) +{ + WARN_ON_ONCE(!spin_is_locked(q->queue_lock)); + __clear_bit(flag, &q->queue_flags); +} enum { /* @@ -451,6 +477,7 @@ enum { #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) +#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) @@ -496,17 +523,17 @@ static inline int blk_queue_full(struct request_queue *q, int rw) static inline void blk_set_queue_full(struct request_queue *q, int rw) { if (rw == READ) - set_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_READFULL, q); else - set_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); + queue_flag_set(QUEUE_FLAG_WRITEFULL, q); } static inline void blk_clear_queue_full(struct request_queue *q, int rw) { if (rw == READ) - clear_bit(QUEUE_FLAG_READFULL, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_READFULL, q); else - clear_bit(QUEUE_FLAG_WRITEFULL, &q->queue_flags); + queue_flag_clear(QUEUE_FLAG_WRITEFULL, q); } @@ -583,6 +610,7 @@ extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); +extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern void blk_end_sync_rq(struct request *rq, int error); @@ -626,6 +654,7 @@ extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); +extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 932eb02a275..82aa36c53ea 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -225,7 +225,6 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct page *page, get_block_t get_block); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); -int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); int file_fsync(struct file *, struct dentry *, int); int nobh_write_begin(struct file *, struct address_space *, diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a6a6035a4e1..e155aa78d85 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -88,6 +88,17 @@ static inline void css_put(struct cgroup_subsys_state *css) __css_put(css); } +/* bits in struct cgroup flags field */ +enum { + /* Control Group is dead */ + CGRP_REMOVED, + /* Control Group has previously had a child cgroup or a task, + * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */ + CGRP_RELEASABLE, + /* Control Group requires release notifications to userspace */ + CGRP_NOTIFY_ON_RELEASE, +}; + struct cgroup { unsigned long flags; /* "unsigned long" so bitops work */ @@ -139,10 +150,10 @@ struct css_set { struct kref ref; /* - * List running through all cgroup groups. Protected by - * css_set_lock + * List running through all cgroup groups in the same hash + * slot. Protected by css_set_lock */ - struct list_head list; + struct hlist_node hlist; /* * List running through all tasks using this cgroup @@ -163,7 +174,16 @@ struct css_set { * during subsystem registration (at boot time). */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; +}; + +/* + * cgroup_map_cb is an abstract callback API for reporting map-valued + * control files + */ +struct cgroup_map_cb { + int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value); + void *state; }; /* struct cftype: @@ -190,20 +210,51 @@ struct cftype { struct file *file, char __user *buf, size_t nbytes, loff_t *ppos); /* - * read_uint() is a shortcut for the common case of returning a + * read_u64() is a shortcut for the common case of returning a * single integer. Use it in place of read() */ - u64 (*read_uint) (struct cgroup *cgrp, struct cftype *cft); + u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft); + /* + * read_s64() is a signed version of read_u64() + */ + s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft); + /* + * read_map() is used for defining a map of key/value + * pairs. It should call cb->fill(cb, key, value) for each + * entry. The key/value pairs (and their ordering) should not + * change between reboots. + */ + int (*read_map) (struct cgroup *cont, struct cftype *cft, + struct cgroup_map_cb *cb); + /* + * read_seq_string() is used for outputting a simple sequence + * using seqfile. + */ + int (*read_seq_string) (struct cgroup *cont, struct cftype *cft, + struct seq_file *m); + ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft, struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos); /* - * write_uint() is a shortcut for the common case of accepting + * write_u64() is a shortcut for the common case of accepting * a single integer (as parsed by simple_strtoull) from * userspace. Use in place of write(); return 0 or error. */ - int (*write_uint) (struct cgroup *cgrp, struct cftype *cft, u64 val); + int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val); + /* + * write_s64() is a signed version of write_u64() + */ + int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val); + + /* + * trigger() callback can be used to get some kick from the + * userspace, when the actual string written is not important + * at all. The private field can be used to determine the + * kick type for multiplexing. + */ + int (*trigger)(struct cgroup *cgrp, unsigned int event); int (*release) (struct inode *inode, struct file *file); }; @@ -254,6 +305,12 @@ struct cgroup_subsys { struct cgroup *cgrp); void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp); void (*bind)(struct cgroup_subsys *ss, struct cgroup *root); + /* + * This routine is called with the task_lock of mm->owner held + */ + void (*mm_owner_changed)(struct cgroup_subsys *ss, + struct cgroup *old, + struct cgroup *new); int subsys_id; int active; int disabled; @@ -339,4 +396,13 @@ static inline int cgroupstats_build(struct cgroupstats *stats, #endif /* !CONFIG_CGROUPS */ +#ifdef CONFIG_MM_OWNER +extern void +cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new); +#else /* !CONFIG_MM_OWNER */ +static inline void +cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) +{ +} +#endif /* CONFIG_MM_OWNER */ #endif /* _LINUX_CGROUP_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 1ddebfc5256..e2877454ec8 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -42,3 +42,9 @@ SUBSYS(mem_cgroup) #endif /* */ + +#ifdef CONFIG_CGROUP_DEVICE +SUBSYS(devices) +#endif + +/* */ diff --git a/include/linux/coda_linux.h b/include/linux/coda_linux.h index 1c47a34aa79..31b75311e2c 100644 --- a/include/linux/coda_linux.h +++ b/include/linux/coda_linux.h @@ -43,9 +43,6 @@ int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); int coda_setattr(struct dentry *, struct iattr *); /* this file: heloers */ -static __inline__ struct CodaFid *coda_i2f(struct inode *); -static __inline__ char *coda_i2s(struct inode *); -static __inline__ void coda_flag_inode(struct inode *, int flag); char *coda_f2s(struct CodaFid *f); int coda_isroot(struct inode *i); int coda_iscontrol(const char *name, size_t length); diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index d71f7c0f931..b03f80a078b 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -53,6 +53,7 @@ struct vc_data { unsigned short vc_hi_font_mask; /* [#] Attribute set for upper 256 chars of font or 0 if not supported */ struct console_font vc_font; /* Current VC font set */ unsigned short vc_video_erase_char; /* Background erase character */ + unsigned short vc_scrl_erase_char; /* Erase character for scroll */ /* VT terminal data */ unsigned int vc_state; /* Escape sequence parser state */ unsigned int vc_npar,vc_par[NPAR]; /* Parameters of current escape sequence */ diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f212fa98283..7464ba3b433 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -108,7 +108,7 @@ static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) extern void get_online_cpus(void); extern void put_online_cpus(void); #define hotcpu_notifier(fn, pri) { \ - static struct notifier_block fn##_nb = \ + static struct notifier_block fn##_nb __cpuinitdata = \ { .notifier_call = fn, .priority = pri }; \ register_cpu_notifier(&fn##_nb); \ } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index ddd8652fc3f..e7e91dbfde0 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -83,7 +83,8 @@ struct cpufreq_real_policy { }; struct cpufreq_policy { - cpumask_t cpus; /* affected CPUs */ + cpumask_t cpus; /* CPUs requiring sw coordination */ + cpumask_t related_cpus; /* CPUs with any coordination */ unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of registered CPU */ @@ -307,6 +308,9 @@ extern struct cpufreq_governor cpufreq_gov_performance; #endif #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_performance) +#elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE) +extern struct cpufreq_governor cpufreq_gov_powersave; +#define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_powersave) #elif defined(CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE) extern struct cpufreq_governor cpufreq_gov_userspace; #define CPUFREQ_DEFAULT_GOVERNOR (&cpufreq_gov_userspace) diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h new file mode 100644 index 00000000000..0b0d9c39ed6 --- /dev/null +++ b/include/linux/device_cgroup.h @@ -0,0 +1,12 @@ +#include <linux/module.h> +#include <linux/fs.h> + +#ifdef CONFIG_CGROUP_DEVICE +extern int devcgroup_inode_permission(struct inode *inode, int mask); +extern int devcgroup_inode_mknod(int mode, dev_t dev); +#else +static inline int devcgroup_inode_permission(struct inode *inode, int mask) +{ return 0; } +static inline int devcgroup_inode_mknod(int mode, dev_t dev) +{ return 0; } +#endif diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h new file mode 100644 index 00000000000..1677e2bfa00 --- /dev/null +++ b/include/linux/dma-attrs.h @@ -0,0 +1,74 @@ +#ifndef _DMA_ATTR_H +#define _DMA_ATTR_H + +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/bug.h> + +/** + * an enum dma_attr represents an attribute associated with a DMA + * mapping. The semantics of each attribute should be defined in + * Documentation/DMA-attributes.txt. + */ +enum dma_attr { + DMA_ATTR_WRITE_BARRIER, + DMA_ATTR_MAX, +}; + +#define __DMA_ATTRS_LONGS BITS_TO_LONGS(DMA_ATTR_MAX) + +/** + * struct dma_attrs - an opaque container for DMA attributes + * @flags - bitmask representing a collection of enum dma_attr + */ +struct dma_attrs { + unsigned long flags[__DMA_ATTRS_LONGS]; +}; + +#define DEFINE_DMA_ATTRS(x) \ + struct dma_attrs x = { \ + .flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \ + } + +static inline void init_dma_attrs(struct dma_attrs *attrs) +{ + bitmap_zero(attrs->flags, __DMA_ATTRS_LONGS); +} + +#ifdef CONFIG_HAVE_DMA_ATTRS +/** + * dma_set_attr - set a specific attribute + * @attr: attribute to set + * @attrs: struct dma_attrs (may be NULL) + */ +static inline void dma_set_attr(enum dma_attr attr, struct dma_attrs *attrs) +{ + if (attrs == NULL) + return; + BUG_ON(attr >= DMA_ATTR_MAX); + __set_bit(attr, attrs->flags); +} + +/** + * dma_get_attr - check for a specific attribute + * @attr: attribute to set + * @attrs: struct dma_attrs (may be NULL) + */ +static inline int dma_get_attr(enum dma_attr attr, struct dma_attrs *attrs) +{ + if (attrs == NULL) + return 0; + BUG_ON(attr >= DMA_ATTR_MAX); + return test_bit(attr, attrs->flags); +} +#else /* !CONFIG_HAVE_DMA_ATTRS */ +static inline void dma_set_attr(enum dma_attr attr, struct dma_attrs *attrs) +{ +} + +static inline int dma_get_attr(enum dma_attr attr, struct dma_attrs *attrs) +{ + return 0; +} +#endif /* CONFIG_HAVE_DMA_ATTRS */ +#endif /* _DMA_ATTR_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 33203070962..952e0f857ac 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -146,4 +146,21 @@ static inline void dmam_release_declared_memory(struct device *dev) } #endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */ +#ifndef CONFIG_HAVE_DMA_ATTRS +struct dma_attrs; + +#define dma_map_single_attrs(dev, cpu_addr, size, dir, attrs) \ + dma_map_single(dev, cpu_addr, size, dir) + +#define dma_unmap_single_attrs(dev, dma_addr, size, dir, attrs) \ + dma_unmap_single(dev, dma_addr, size, dir) + +#define dma_map_sg_attrs(dev, sgl, nents, dir, attrs) \ + dma_map_sg(dev, sgl, nents, dir) + +#define dma_unmap_sg_attrs(dev, sgl, nents, dir, attrs) \ + dma_unmap_sg(dev, sgl, nents, dir) + +#endif /* CONFIG_HAVE_DMA_ATTRS */ + #endif diff --git a/include/linux/edac.h b/include/linux/edac.h index eab451e69a9..7cf92e8a419 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -3,7 +3,7 @@ * * Author: Dave Jiang <djiang@mvista.com> * - * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under + * 2006-2008 (c) MontaVista Software, Inc. This file is licensed under * the terms of the GNU General Public License version 2. This program * is licensed "as is" without any warranty of any kind, whether express * or implied. @@ -26,4 +26,16 @@ extern atomic_t edac_handlers; extern int edac_handler_set(void); extern void edac_atomic_assert_error(void); +static inline void opstate_init(void) +{ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_NMI: + break; + default: + edac_op_state = EDAC_OPSTATE_POLL; + } + return; +} + #endif diff --git a/include/linux/elf.h b/include/linux/elf.h index bad1b16ec49..ff9fbed9012 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -208,7 +208,7 @@ typedef struct elf32_hdr{ } Elf32_Ehdr; typedef struct elf64_hdr { - unsigned char e_ident[16]; /* ELF "magic number" */ + unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ Elf64_Half e_type; Elf64_Half e_machine; Elf64_Word e_version; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2c925747bc4..a1ba005d08e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1521,7 +1521,6 @@ extern int get_sb_pseudo(struct file_system_type *, char *, const struct super_operations *ops, unsigned long, struct vfsmount *mnt); extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); -int __put_super(struct super_block *sb); int __put_super_and_need_restart(struct super_block *sb); void unnamed_dev_init(void); @@ -1965,7 +1964,6 @@ extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); -extern long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c37653b6843..b414be38718 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -40,9 +40,9 @@ struct vm_area_struct; #define __GFP_FS ((__force gfp_t)0x80u) /* Can call down to low-level FS? */ #define __GFP_COLD ((__force gfp_t)0x100u) /* Cache-cold page required */ #define __GFP_NOWARN ((__force gfp_t)0x200u) /* Suppress page allocation failure warning */ -#define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */ -#define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */ -#define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */ +#define __GFP_REPEAT ((__force gfp_t)0x400u) /* See above */ +#define __GFP_NOFAIL ((__force gfp_t)0x800u) /* See above */ +#define __GFP_NORETRY ((__force gfp_t)0x1000u)/* See above */ #define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */ #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ diff --git a/include/linux/idr.h b/include/linux/idr.h index 0edda411959..9a2d762124d 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -14,6 +14,7 @@ #include <linux/types.h> #include <linux/bitops.h> +#include <linux/init.h> #if BITS_PER_LONG == 32 # define IDR_BITS 5 @@ -115,4 +116,6 @@ void ida_remove(struct ida *ida, int id); void ida_destroy(struct ida *ida); void ida_init(struct ida *ida); +void __init idr_init_cache(void); + #endif /* __IDR_H__ */ diff --git a/include/linux/init.h b/include/linux/init.h index fb58c0493cf..21d658cdfa2 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -147,6 +147,8 @@ extern unsigned int reset_devices; void setup_arch(char **); void prepare_namespace(void); +extern void (*late_time_init)(void); + #endif #ifndef MODULE diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b5fef13148b..f1fc7470d26 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -289,6 +289,7 @@ struct softirq_action }; asmlinkage void do_softirq(void); +asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e4451d1da75..ea6c18a8b0d 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -4,6 +4,17 @@ #include <linux/err.h> #include <linux/idr.h> #include <linux/rwsem.h> +#include <linux/notifier.h> + +/* + * ipc namespace events + */ +#define IPCNS_MEMCHANGED 0x00000001 /* Notify lowmem size changed */ +#define IPCNS_CREATED 0x00000002 /* Notify new ipc namespace created */ +#define IPCNS_REMOVED 0x00000003 /* Notify ipc namespace removed */ + +#define IPCNS_CALLBACK_PRI 0 + struct ipc_ids { int in_use; @@ -30,15 +41,24 @@ struct ipc_namespace { size_t shm_ctlall; int shm_ctlmni; int shm_tot; + + struct notifier_block ipcns_nb; }; extern struct ipc_namespace init_ipc_ns; +extern atomic_t nr_ipc_ns; #ifdef CONFIG_SYSVIPC #define INIT_IPC_NS(ns) .ns = &init_ipc_ns, -#else + +extern int register_ipcns_notifier(struct ipc_namespace *); +extern int cond_register_ipcns_notifier(struct ipc_namespace *); +extern int unregister_ipcns_notifier(struct ipc_namespace *); +extern int ipcns_notify(unsigned long); + +#else /* CONFIG_SYSVIPC */ #define INIT_IPC_NS(ns) -#endif +#endif /* CONFIG_SYSVIPC */ #if defined(CONFIG_SYSVIPC) && defined(CONFIG_IPC_NS) extern void free_ipc_ns(struct kref *kref); diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index c5bd28b69ae..7ebdb4fb4e5 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -64,7 +64,7 @@ * applications and another for userland applications. The * capabilities are basically the same for both interface, although * the interfaces are somewhat different. The stuff in the - * #ifdef KERNEL below is the in-kernel interface. The userland + * #ifdef __KERNEL__ below is the in-kernel interface. The userland * interface is defined later in the file. */ @@ -75,8 +75,7 @@ * work for sockets. */ #define IPMI_MAX_ADDR_SIZE 32 -struct ipmi_addr -{ +struct ipmi_addr { /* Try to take these from the "Channel Medium Type" table in section 6.5 of the IPMI 1.5 manual. */ int addr_type; @@ -90,8 +89,7 @@ struct ipmi_addr * 0), or IPMC_BMC_CHANNEL if communicating directly with the BMC. */ #define IPMI_SYSTEM_INTERFACE_ADDR_TYPE 0x0c -struct ipmi_system_interface_addr -{ +struct ipmi_system_interface_addr { int addr_type; short channel; unsigned char lun; @@ -100,10 +98,9 @@ struct ipmi_system_interface_addr /* An IPMB Address. */ #define IPMI_IPMB_ADDR_TYPE 0x01 /* Used for broadcast get device id as described in section 17.9 of the - IPMI 1.5 manual. */ + IPMI 1.5 manual. */ #define IPMI_IPMB_BROADCAST_ADDR_TYPE 0x41 -struct ipmi_ipmb_addr -{ +struct ipmi_ipmb_addr { int addr_type; short channel; unsigned char slave_addr; @@ -128,8 +125,7 @@ struct ipmi_ipmb_addr * message is a little weird, but this is required. */ #define IPMI_LAN_ADDR_TYPE 0x04 -struct ipmi_lan_addr -{ +struct ipmi_lan_addr { int addr_type; short channel; unsigned char privilege; @@ -162,16 +158,14 @@ struct ipmi_lan_addr * byte of data in the response (as the spec shows the messages laid * out). */ -struct ipmi_msg -{ +struct ipmi_msg { unsigned char netfn; unsigned char cmd; unsigned short data_len; unsigned char __user *data; }; -struct kernel_ipmi_msg -{ +struct kernel_ipmi_msg { unsigned char netfn; unsigned char cmd; unsigned short data_len; @@ -239,12 +233,11 @@ typedef struct ipmi_user *ipmi_user_t; * used after the message is delivered, so the upper layer may use the * link to build a linked list, if it likes. */ -struct ipmi_recv_msg -{ +struct ipmi_recv_msg { struct list_head link; /* The type of message as defined in the "Receive Types" - defines above. */ + defines above. */ int recv_type; ipmi_user_t user; @@ -271,9 +264,8 @@ struct ipmi_recv_msg /* Allocate and free the receive message. */ void ipmi_free_recv_msg(struct ipmi_recv_msg *msg); -struct ipmi_user_hndl -{ - /* Routine type to call when a message needs to be routed to +struct ipmi_user_hndl { + /* Routine type to call when a message needs to be routed to the upper layer. This will be called with some locks held, the only IPMI routines that can be called are ipmi_request and the alloc/free operations. The handler_data is the @@ -368,9 +360,8 @@ int ipmi_request_supply_msgs(ipmi_user_t user, * Poll the IPMI interface for the user. This causes the IPMI code to * do an immediate check for information from the driver and handle * anything that is immediately pending. This will not block in any - * way. This is useful if you need to implement polling from the user - * for things like modifying the watchdog timeout when a panic occurs - * or disabling the watchdog timer on a reboot. + * way. This is useful if you need to spin waiting for something to + * happen in the IPMI driver. */ void ipmi_poll_interface(ipmi_user_t user); @@ -422,12 +413,6 @@ int ipmi_get_maintenance_mode(ipmi_user_t user); int ipmi_set_maintenance_mode(ipmi_user_t user, int mode); /* - * Allow run-to-completion mode to be set for the interface of - * a specific user. - */ -void ipmi_user_set_run_to_completion(ipmi_user_t user, int val); - -/* * When the user is created, it will not receive IPMI events by * default. The user must set this to TRUE to get incoming events. * The first user that sets this to TRUE will receive all events that @@ -440,8 +425,7 @@ int ipmi_set_gets_events(ipmi_user_t user, int val); * every existing interface when a new watcher is registered with * ipmi_smi_watcher_register(). */ -struct ipmi_smi_watcher -{ +struct ipmi_smi_watcher { struct list_head link; /* You must set the owner to the current module, if you are in @@ -512,8 +496,7 @@ int ipmi_validate_addr(struct ipmi_addr *addr, int len); /* Messages sent to the interface are this format. */ -struct ipmi_req -{ +struct ipmi_req { unsigned char __user *addr; /* Address to send the message to. */ unsigned int addr_len; @@ -538,12 +521,11 @@ struct ipmi_req /* Messages sent to the interface with timing parameters are this format. */ -struct ipmi_req_settime -{ +struct ipmi_req_settime { struct ipmi_req req; /* See ipmi_request_settime() above for details on these - values. */ + values. */ int retries; unsigned int retry_time_ms; }; @@ -560,8 +542,7 @@ struct ipmi_req_settime struct ipmi_req_settime) /* Messages received from the interface are this format. */ -struct ipmi_recv -{ +struct ipmi_recv { int recv_type; /* Is this a command, response or an asyncronous event. */ @@ -607,13 +588,12 @@ struct ipmi_recv struct ipmi_recv) /* Register to get commands from other entities on this interface. */ -struct ipmi_cmdspec -{ +struct ipmi_cmdspec { unsigned char netfn; unsigned char cmd; }; -/* +/* * Register to receive a specific command. error values: * - EFAULT - an address supplied was invalid. * - EBUSY - The netfn/cmd supplied was already in use. @@ -636,8 +616,7 @@ struct ipmi_cmdspec * else. The chans field is a bitmask, (1 << channel) for each channel. * It may be IPMI_CHAN_ALL for all channels. */ -struct ipmi_cmdspec_chans -{ +struct ipmi_cmdspec_chans { unsigned int netfn; unsigned int cmd; unsigned int chans; @@ -659,7 +638,7 @@ struct ipmi_cmdspec_chans #define IPMICTL_UNREGISTER_FOR_CMD_CHANS _IOR(IPMI_IOC_MAGIC, 29, \ struct ipmi_cmdspec_chans) -/* +/* * Set whether this interface receives events. Note that the first * user registered for events will get all pending events for the * interface. error values: @@ -675,15 +654,18 @@ struct ipmi_cmdspec_chans * things it takes to determine your address (if not the BMC) and set * it for everyone else. You should probably leave the LUN alone. */ -struct ipmi_channel_lun_address_set -{ +struct ipmi_channel_lun_address_set { unsigned short channel; unsigned char value; }; -#define IPMICTL_SET_MY_CHANNEL_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 24, struct ipmi_channel_lun_address_set) -#define IPMICTL_GET_MY_CHANNEL_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 25, struct ipmi_channel_lun_address_set) -#define IPMICTL_SET_MY_CHANNEL_LUN_CMD _IOR(IPMI_IOC_MAGIC, 26, struct ipmi_channel_lun_address_set) -#define IPMICTL_GET_MY_CHANNEL_LUN_CMD _IOR(IPMI_IOC_MAGIC, 27, struct ipmi_channel_lun_address_set) +#define IPMICTL_SET_MY_CHANNEL_ADDRESS_CMD \ + _IOR(IPMI_IOC_MAGIC, 24, struct ipmi_channel_lun_address_set) +#define IPMICTL_GET_MY_CHANNEL_ADDRESS_CMD \ + _IOR(IPMI_IOC_MAGIC, 25, struct ipmi_channel_lun_address_set) +#define IPMICTL_SET_MY_CHANNEL_LUN_CMD \ + _IOR(IPMI_IOC_MAGIC, 26, struct ipmi_channel_lun_address_set) +#define IPMICTL_GET_MY_CHANNEL_LUN_CMD \ + _IOR(IPMI_IOC_MAGIC, 27, struct ipmi_channel_lun_address_set) /* Legacy interfaces, these only set IPMB 0. */ #define IPMICTL_SET_MY_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 17, unsigned int) #define IPMICTL_GET_MY_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 18, unsigned int) @@ -694,8 +676,7 @@ struct ipmi_channel_lun_address_set * Get/set the default timing values for an interface. You shouldn't * generally mess with these. */ -struct ipmi_timing_parms -{ +struct ipmi_timing_parms { int retries; unsigned int retry_time_ms; }; diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 6e8cec50338..62b73668b60 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -60,8 +60,7 @@ typedef struct ipmi_smi *ipmi_smi_t; * asynchronous data and messages and request them from the * interface. */ -struct ipmi_smi_msg -{ +struct ipmi_smi_msg { struct list_head link; long msgid; @@ -74,12 +73,11 @@ struct ipmi_smi_msg unsigned char rsp[IPMI_MAX_MSG_LENGTH]; /* Will be called when the system is done with the message - (presumably to free it). */ + (presumably to free it). */ void (*done)(struct ipmi_smi_msg *msg); }; -struct ipmi_smi_handlers -{ +struct ipmi_smi_handlers { struct module *owner; /* The low-level interface cannot start sending messages to @@ -231,7 +229,7 @@ static inline void ipmi_free_smi_msg(struct ipmi_smi_msg *msg) directory for this interface. Note that the entry will automatically be dstroyed when the interface is destroyed. */ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, - read_proc_t *read_proc, write_proc_t *write_proc, + read_proc_t *read_proc, void *data, struct module *owner); #endif /* __LINUX_IPMI_SMI_H */ diff --git a/include/linux/kbuild.h b/include/linux/kbuild.h new file mode 100644 index 00000000000..22a72198c14 --- /dev/null +++ b/include/linux/kbuild.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_KBUILD_H +#define __LINUX_KBUILD_H + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +#define OFFSET(sym, str, mem) \ + DEFINE(sym, offsetof(struct str, mem)) + +#define COMMENT(x) \ + asm volatile("\n->#" x) + +#endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cd6d02cf854..53839ba265e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -20,6 +20,9 @@ extern const char linux_banner[]; extern const char linux_proc_banner[]; +#define USHORT_MAX ((u16)(~0U)) +#define SHORT_MAX ((s16)(USHORT_MAX>>1)) +#define SHORT_MIN (-SHORT_MAX - 1) #define INT_MAX ((int)(~0U>>1)) #define INT_MIN (-INT_MAX - 1) #define UINT_MAX (~0U) @@ -188,6 +191,7 @@ extern int log_buf_copy(char *dest, int idx, int len); extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; extern int printk_ratelimit(void); +extern int __ratelimit(int ratelimit_jiffies, int ratelimit_burst); extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); @@ -255,6 +259,7 @@ extern enum system_states { #define TAINT_USER (1<<6) #define TAINT_DIE (1<<7) #define TAINT_OVERRIDDEN_ACPI_TABLE (1<<8) +#define TAINT_WARN (1<<9) extern void dump_stack(void) __cold; diff --git a/include/linux/key.h b/include/linux/key.h index a70b8a8f200..c45c962d1cc 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -19,6 +19,7 @@ #include <linux/list.h> #include <linux/rbtree.h> #include <linux/rcupdate.h> +#include <linux/sysctl.h> #include <asm/atomic.h> #ifdef __KERNEL__ @@ -67,6 +68,8 @@ struct key; #define KEY_OTH_SETATTR 0x00000020 #define KEY_OTH_ALL 0x0000003f +#define KEY_PERM_UNDEF 0xffffffff + struct seq_file; struct user_struct; struct signal_struct; @@ -208,16 +211,19 @@ extern struct key *request_key(struct key_type *type, extern struct key *request_key_with_auxdata(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux); extern struct key *request_key_async(struct key_type *type, const char *description, - const char *callout_info); + const void *callout_info, + size_t callout_len); extern struct key *request_key_async_with_auxdata(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux); extern int wait_for_key_construction(struct key *key, bool intr); @@ -229,6 +235,7 @@ extern key_ref_t key_create_or_update(key_ref_t keyring, const char *description, const void *payload, size_t plen, + key_perm_t perm, unsigned long flags); extern int key_update(key_ref_t key, @@ -257,14 +264,18 @@ extern int keyring_add_key(struct key *keyring, extern struct key *key_lookup(key_serial_t id); -#define key_serial(key) ((key) ? (key)->serial : 0) +static inline key_serial_t key_serial(struct key *key) +{ + return key ? key->serial : 0; +} + +#ifdef CONFIG_SYSCTL +extern ctl_table key_sysctls[]; +#endif /* * the userspace interface */ -extern struct key root_user_keyring, root_session_keyring; -extern int alloc_uid_keyring(struct user_struct *user, - struct task_struct *ctx); extern void switch_uid_keyring(struct user_struct *new_user); extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); extern int copy_thread_group_keys(struct task_struct *tsk); @@ -293,7 +304,6 @@ extern void key_init(void); #define make_key_ref(k, p) ({ NULL; }) #define key_ref_to_ptr(k) ({ NULL; }) #define is_key_possessed(k) 0 -#define alloc_uid_keyring(u,c) 0 #define switch_uid_keyring(u) do { } while(0) #define __install_session_keyring(t, k) ({ NULL; }) #define copy_keys(f,t) 0 @@ -306,10 +316,6 @@ extern void key_init(void); #define key_fsgid_changed(t) do { } while(0) #define key_init() do { } while(0) -/* Initial keyrings */ -extern struct key root_user_keyring; -extern struct key root_session_keyring; - #endif /* CONFIG_KEYS */ #endif /* __KERNEL__ */ #endif /* _LINUX_KEY_H */ diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h index 3365945640c..656ee6b77a4 100644 --- a/include/linux/keyctl.h +++ b/include/linux/keyctl.h @@ -49,5 +49,6 @@ #define KEYCTL_SET_REQKEY_KEYRING 14 /* set default request-key keyring */ #define KEYCTL_SET_TIMEOUT 15 /* set key timeout */ #define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */ +#define KEYCTL_GET_SECURITY 17 /* get key security label */ #endif /* _LINUX_KEYCTL_H */ diff --git a/include/linux/list.h b/include/linux/list.h index b4a939b6b62..7627508f1b7 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -328,7 +328,7 @@ static inline int list_is_singular(const struct list_head *head) return !list_empty(head) && (head->next == head->prev); } -static inline void __list_splice(struct list_head *list, +static inline void __list_splice(const struct list_head *list, struct list_head *head) { struct list_head *first = list->next; @@ -347,7 +347,8 @@ static inline void __list_splice(struct list_head *list, * @list: the new list to add. * @head: the place to add it in the first list. */ -static inline void list_splice(struct list_head *list, struct list_head *head) +static inline void list_splice(const struct list_head *list, + struct list_head *head) { if (!list_empty(list)) __list_splice(list, head); diff --git a/include/linux/lmb.h b/include/linux/lmb.h index 271153d27fb..c46c89505da 100644 --- a/include/linux/lmb.h +++ b/include/linux/lmb.h @@ -40,7 +40,8 @@ extern struct lmb lmb; extern void __init lmb_init(void); extern void __init lmb_analyze(void); -extern long __init lmb_add(u64 base, u64 size); +extern long lmb_add(u64 base, u64 size); +extern long lmb_remove(u64 base, u64 size); extern long __init lmb_reserve(u64 base, u64 size); extern u64 __init lmb_alloc_nid(u64 size, u64 align, int nid, u64 (*nid_range)(u64, u64, int *)); @@ -53,6 +54,7 @@ extern u64 __init lmb_phys_mem_size(void); extern u64 __init lmb_end_of_DRAM(void); extern void __init lmb_enforce_memory_limit(u64 memory_limit); extern int __init lmb_is_reserved(u64 addr); +extern int lmb_find(struct lmb_property *res); extern void lmb_dump_all(void); diff --git a/include/linux/mca-legacy.h b/include/linux/mca-legacy.h index f2bb770e530..7a3aea84590 100644 --- a/include/linux/mca-legacy.h +++ b/include/linux/mca-legacy.h @@ -34,7 +34,6 @@ extern int mca_find_adapter(int id, int start); extern int mca_find_unused_adapter(int id, int start); -extern int mca_is_adapter_used(int slot); extern int mca_mark_as_used(int slot); extern void mca_mark_as_unused(int slot); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8b1c4295848..e6608776bc9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -27,9 +27,6 @@ struct mm_struct; #ifdef CONFIG_CGROUP_MEM_RES_CTLR -extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); -extern void mm_free_cgroup(struct mm_struct *mm); - #define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) extern struct page_cgroup *page_get_page_cgroup(struct page *page); @@ -48,8 +45,10 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); +extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); + #define mm_match_cgroup(mm, cgroup) \ - ((cgroup) == rcu_dereference((mm)->mem_cgroup)) + ((cgroup) == mem_cgroup_from_task((mm)->owner)) extern int mem_cgroup_prepare_migration(struct page *page); extern void mem_cgroup_end_migration(struct page *page); @@ -73,15 +72,6 @@ extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, struct zone *zone, int priority); #else /* CONFIG_CGROUP_MEM_RES_CTLR */ -static inline void mm_init_cgroup(struct mm_struct *mm, - struct task_struct *p) -{ -} - -static inline void mm_free_cgroup(struct mm_struct *mm) -{ -} - static inline void page_reset_bad_cgroup(struct page *page) { } diff --git a/include/linux/memory.h b/include/linux/memory.h index f80e0e331cb..2f5f8a5ef2a 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -53,6 +53,13 @@ struct memory_notify { struct notifier_block; struct mem_section; +/* + * Priorities for the hotplug memory callback routines (stored in decreasing + * order in the callback chain) + */ +#define SLAB_CALLBACK_PRI 1 +#define IPC_CALLBACK_PRI 10 + #ifndef CONFIG_MEMORY_HOTPLUG_SPARSE static inline int memory_dev_init(void) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 8b7f4a5d4f6..c31a9cd2a30 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1066,6 +1066,19 @@ extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff); extern void exit_mmap(struct mm_struct *); + +#ifdef CONFIG_PROC_FS +/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ +extern void added_exe_file_vma(struct mm_struct *mm); +extern void removed_exe_file_vma(struct mm_struct *mm); +#else +static inline void added_exe_file_vma(struct mm_struct *mm) +{} + +static inline void removed_exe_file_vma(struct mm_struct *mm) +{} +#endif /* CONFIG_PROC_FS */ + extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); extern int install_special_mapping(struct mm_struct *mm, unsigned long addr, unsigned long len, @@ -1230,8 +1243,6 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages); -void drop_pagecache(void); -void drop_slab(void); #ifndef CONFIG_MMU #define randomize_va_space 0 diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e2bae8dde35..eb7c16cc955 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -225,8 +225,15 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; /* aio lock */ struct kioctx *ioctx_list; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR - struct mem_cgroup *mem_cgroup; +#ifdef CONFIG_MM_OWNER + struct task_struct *owner; /* The thread group leader that */ + /* owns the mm_struct. */ +#endif + +#ifdef CONFIG_PROC_FS + /* store ref to file /proc/<pid>/exe symlink points to */ + struct file *exe_file; + unsigned long num_exe_file_vmas; #endif }; diff --git a/include/linux/msg.h b/include/linux/msg.h index 10a3d5a1abf..6f3b8e79a99 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -49,16 +49,26 @@ struct msginfo { unsigned short msgseg; }; +/* + * Scaling factor to compute msgmni: + * the memory dedicated to msg queues (msgmni * msgmnb) should occupy + * at most 1/MSG_MEM_SCALE of the lowmem (see the formula in ipc/msg.c): + * up to 8MB : msgmni = 16 (MSGMNI) + * 4 GB : msgmni = 8K + * more than 16 GB : msgmni = 32K (IPCMNI) + */ +#define MSG_MEM_SCALE 32 + #define MSGMNI 16 /* <= IPCMNI */ /* max # of msg queue identifiers */ #define MSGMAX 8192 /* <= INT_MAX */ /* max size of message (bytes) */ #define MSGMNB 16384 /* <= INT_MAX */ /* default max size of a message queue */ /* unused */ -#define MSGPOOL (MSGMNI*MSGMNB/1024) /* size in kilobytes of message pool */ +#define MSGPOOL (MSGMNI * MSGMNB) /* size in bytes of message pool */ #define MSGTQL MSGMNB /* number of system message headers */ #define MSGMAP MSGMNB /* number of entries in message map */ #define MSGSSZ 16 /* message segment size */ -#define __MSGSEG ((MSGPOOL*1024)/ MSGSSZ) /* max no. of segments */ +#define __MSGSEG (MSGPOOL / MSGSSZ) /* max no. of segments */ #define MSGSEG (__MSGSEG <= 0xffff ? __MSGSEG : 0xffff) #ifdef __KERNEL__ diff --git a/include/linux/nbd.h b/include/linux/nbd.h index 986572081e1..155719dab81 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -56,9 +56,11 @@ struct nbd_device { int magic; spinlock_t queue_lock; - struct list_head queue_head;/* Requests are added here... */ + struct list_head queue_head; /* Requests waiting result */ struct request *active_req; wait_queue_head_t active_wq; + struct list_head waiting_queue; /* Requests to be sent */ + wait_queue_head_t waiting_wq; struct mutex tx_lock; struct gendisk *disk; @@ -86,11 +88,7 @@ struct nbd_request { char handle[8]; __be64 from; __be32 len; -} -#ifdef __GNUC__ - __attribute__ ((packed)) -#endif -; +} __attribute__ ((packed)); /* * This is the reply packet that nbd-server sends back to the client after diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 20dfed59018..0ff6224d172 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -121,6 +121,10 @@ extern int raw_notifier_chain_register(struct raw_notifier_head *nh, extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh, struct notifier_block *nb); +extern int blocking_notifier_chain_cond_register( + struct blocking_notifier_head *nh, + struct notifier_block *nb); + extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, struct notifier_block *nb); extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, diff --git a/include/linux/pci.h b/include/linux/pci.h index 292491324b0..abc998ffb66 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -254,7 +254,7 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev, #define PCI_NUM_RESOURCES 11 #ifndef PCI_BUS_NUM_RESOURCES -#define PCI_BUS_NUM_RESOURCES 8 +#define PCI_BUS_NUM_RESOURCES 16 #endif #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ @@ -666,6 +666,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), void *userdata); +int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix); int pci_cfg_space_size(struct pci_dev *dev); unsigned char pci_bus_max_busnr(struct pci_bus *bus); @@ -1053,5 +1054,13 @@ extern unsigned long pci_cardbus_mem_size; extern int pcibios_add_platform_entries(struct pci_dev *dev); +#ifdef CONFIG_PCI_MMCONFIG +extern void __init pci_mmcfg_early_init(void); +extern void __init pci_mmcfg_late_init(void); +#else +static inline void pci_mmcfg_early_init(void) { } +static inline void pci_mmcfg_late_init(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* LINUX_PCI_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1ac969724bb..d746a2abb32 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -4,7 +4,6 @@ #include <linux/preempt.h> #include <linux/slab.h> /* For kmalloc() */ #include <linux/smp.h> -#include <linux/string.h> /* For memset() */ #include <linux/cpumask.h> #include <asm/percpu.h> diff --git a/include/linux/personality.h b/include/linux/personality.h index 012cd558189..a84e9ff9b27 100644 --- a/include/linux/personality.h +++ b/include/linux/personality.h @@ -105,10 +105,6 @@ struct exec_domain { */ #define personality(pers) (pers & PER_MASK) -/* - * Personality of the currently running process. - */ -#define get_personality (current->personality) /* * Change personality of the currently running process. diff --git a/include/linux/phantom.h b/include/linux/phantom.h index 96f4048a6cc..02268c54c25 100644 --- a/include/linux/phantom.h +++ b/include/linux/phantom.h @@ -27,14 +27,17 @@ struct phm_regs { #define PH_IOC_MAGIC 'p' #define PHN_GET_REG _IOWR(PH_IOC_MAGIC, 0, struct phm_reg *) -#define PHN_SET_REG _IOW (PH_IOC_MAGIC, 1, struct phm_reg *) +#define PHN_SET_REG _IOW(PH_IOC_MAGIC, 1, struct phm_reg *) #define PHN_GET_REGS _IOWR(PH_IOC_MAGIC, 2, struct phm_regs *) -#define PHN_SET_REGS _IOW (PH_IOC_MAGIC, 3, struct phm_regs *) +#define PHN_SET_REGS _IOW(PH_IOC_MAGIC, 3, struct phm_regs *) /* this ioctl tells the driver, that the caller is not OpenHaptics and might * use improved registers update (no more phantom switchoffs when using * libphantom) */ -#define PHN_NOT_OH _IO (PH_IOC_MAGIC, 4) -#define PH_IOC_MAXNR 4 +#define PHN_NOT_OH _IO(PH_IOC_MAGIC, 4) +#define PHN_GETREG _IOWR(PH_IOC_MAGIC, 5, struct phm_reg) +#define PHN_SETREG _IOW(PH_IOC_MAGIC, 6, struct phm_reg) +#define PHN_GETREGS _IOWR(PH_IOC_MAGIC, 7, struct phm_regs) +#define PHN_SETREGS _IOW(PH_IOC_MAGIC, 8, struct phm_regs) #define PHN_CONTROL 0x6 /* control byte in iaddr space */ #define PHN_CTL_AMP 0x1 /* switch after torques change */ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 9b6c935f69c..9883bc94226 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -9,7 +9,6 @@ struct net; struct completion; - /* * The proc filesystem constants/structures */ @@ -41,7 +40,7 @@ enum { * /proc file has a parent, but "subdir" is NULL for all * non-directory entries). * - * "get_info" is called at "read", while "owner" is used to protect module + * "owner" is used to protect module * from unloading while proc_dir_entry is in use */ @@ -49,7 +48,6 @@ typedef int (read_proc_t)(char *page, char **start, off_t off, int count, int *eof, void *data); typedef int (write_proc_t)(struct file *file, const char __user *buffer, unsigned long count, void *data); -typedef int (get_info_t)(char *, char **, off_t, int); struct proc_dir_entry { unsigned int low_ino; @@ -70,7 +68,6 @@ struct proc_dir_entry { * somewhere. */ const struct file_operations *proc_fops; - get_info_t *get_info; struct module *owner; struct proc_dir_entry *next, *parent, *subdir; void *data; @@ -97,10 +94,6 @@ struct vmcore { #ifdef CONFIG_PROC_FS -extern struct proc_dir_entry proc_root; -extern struct proc_dir_entry *proc_root_fs; -extern struct proc_dir_entry *proc_bus; -extern struct proc_dir_entry *proc_root_driver; extern struct proc_dir_entry *proc_root_kcore; extern spinlock_t proc_subdir_lock; @@ -123,9 +116,10 @@ void de_put(struct proc_dir_entry *de); extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent); -struct proc_dir_entry *proc_create(const char *name, mode_t mode, +struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, struct proc_dir_entry *parent, - const struct file_operations *proc_fops); + const struct file_operations *proc_fops, + void *data); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); extern struct vfsmount *proc_mnt; @@ -180,6 +174,12 @@ extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *); extern struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, struct proc_dir_entry *parent); +static inline struct proc_dir_entry *proc_create(const char *name, mode_t mode, + struct proc_dir_entry *parent, const struct file_operations *proc_fops) +{ + return proc_create_data(name, mode, parent, proc_fops, NULL); +} + static inline struct proc_dir_entry *create_proc_read_entry(const char *name, mode_t mode, struct proc_dir_entry *base, read_proc_t *read_proc, void * data) @@ -192,24 +192,19 @@ static inline struct proc_dir_entry *create_proc_read_entry(const char *name, return res; } -static inline struct proc_dir_entry *create_proc_info_entry(const char *name, - mode_t mode, struct proc_dir_entry *base, get_info_t *get_info) -{ - struct proc_dir_entry *res=create_proc_entry(name,mode,base); - if (res) res->get_info=get_info; - return res; -} - extern struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops); extern void proc_net_remove(struct net *net, const char *name); extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent); -#else +/* While the {get|set|dup}_mm_exe_file functions are for mm_structs, they are + * only needed to implement /proc/<pid>|self/exe so we define them here. */ +extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); +extern struct file *get_mm_exe_file(struct mm_struct *mm); +extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm); -#define proc_root_driver NULL -#define proc_bus NULL +#else #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) static inline void proc_net_remove(struct net *net, const char *name) {} @@ -226,6 +221,12 @@ static inline struct proc_dir_entry *proc_create(const char *name, { return NULL; } +static inline struct proc_dir_entry *proc_create_data(const char *name, + mode_t mode, struct proc_dir_entry *parent, + const struct file_operations *proc_fops, void *data) +{ + return NULL; +} #define remove_proc_entry(name, parent) do {} while (0) static inline struct proc_dir_entry *proc_symlink(const char *name, @@ -236,16 +237,11 @@ static inline struct proc_dir_entry *proc_mkdir(const char *name, static inline struct proc_dir_entry *create_proc_read_entry(const char *name, mode_t mode, struct proc_dir_entry *base, read_proc_t *read_proc, void * data) { return NULL; } -static inline struct proc_dir_entry *create_proc_info_entry(const char *name, - mode_t mode, struct proc_dir_entry *base, get_info_t *get_info) - { return NULL; } struct tty_driver; static inline void proc_tty_register_driver(struct tty_driver *driver) {}; static inline void proc_tty_unregister_driver(struct tty_driver *driver) {}; -extern struct proc_dir_entry proc_root; - static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; @@ -255,6 +251,19 @@ static inline void pid_ns_release_proc(struct pid_namespace *ns) { } +static inline void set_mm_exe_file(struct mm_struct *mm, + struct file *new_exe_file) +{} + +static inline struct file *get_mm_exe_file(struct mm_struct *mm) +{ + return NULL; +} + +static inline void dup_mm_exe_file(struct mm_struct *oldmm, + struct mm_struct *newmm) +{} + #endif /* CONFIG_PROC_FS */ #if !defined(CONFIG_PROC_KCORE) diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 61363ce896d..6d9e1fca098 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -9,6 +9,8 @@ * * Author: Pavel Emelianov <xemul@openvz.org> * + * See Documentation/controllers/resource_counter.txt for more + * info about what this counter is. */ #include <linux/cgroup.h> @@ -25,6 +27,10 @@ struct res_counter { */ unsigned long long usage; /* + * the maximal value of the usage from the counter creation + */ + unsigned long long max_usage; + /* * the limit that usage cannot exceed */ unsigned long long limit; @@ -39,8 +45,9 @@ struct res_counter { spinlock_t lock; }; -/* +/** * Helpers to interact with userspace + * res_counter_read_u64() - returns the value of the specified member. * res_counter_read/_write - put/get the specified fields from the * res_counter struct to/from the user * @@ -51,6 +58,8 @@ struct res_counter { * @pos: and the offset. */ +u64 res_counter_read_u64(struct res_counter *counter, int member); + ssize_t res_counter_read(struct res_counter *counter, int member, const char __user *buf, size_t nbytes, loff_t *pos, int (*read_strategy)(unsigned long long val, char *s)); @@ -64,6 +73,7 @@ ssize_t res_counter_write(struct res_counter *counter, int member, enum { RES_USAGE, + RES_MAX_USAGE, RES_LIMIT, RES_FAILCNT, }; @@ -124,4 +134,21 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt) return ret; } +static inline void res_counter_reset_max(struct res_counter *cnt) +{ + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + cnt->max_usage = cnt->usage; + spin_unlock_irqrestore(&cnt->lock, flags); +} + +static inline void res_counter_reset_failcnt(struct res_counter *cnt) +{ + unsigned long flags; + + spin_lock_irqsave(&cnt->lock, flags); + cnt->failcnt = 0; + spin_unlock_irqrestore(&cnt->lock, flags); +} #endif diff --git a/include/linux/resource.h b/include/linux/resource.h index ae13db71474..aaa423a6f3d 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -19,6 +19,7 @@ struct task_struct; #define RUSAGE_SELF 0 #define RUSAGE_CHILDREN (-1) #define RUSAGE_BOTH (-2) /* sys_wait4() uses this */ +#define RUSAGE_THREAD 1 /* only the calling thread */ struct rusage { struct timeval ru_utime; /* user time used */ diff --git a/include/linux/rio.h b/include/linux/rio.h index 68e3f6853fa..cfb66bbc0f2 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -23,7 +23,6 @@ #include <linux/device.h> #include <linux/rio_regs.h> -#define RIO_ANY_DESTID 0xff #define RIO_NO_HOPCOUNT -1 #define RIO_INVALID_DESTID 0xffff @@ -39,11 +38,8 @@ entry is invalid (no route exists for the device ID) */ -#ifdef CONFIG_RAPIDIO_8_BIT_TRANSPORT -#define RIO_MAX_ROUTE_ENTRIES (1 << 8) -#else -#define RIO_MAX_ROUTE_ENTRIES (1 << 16) -#endif +#define RIO_MAX_ROUTE_ENTRIES(size) (size ? (1 << 16) : (1 << 8)) +#define RIO_ANY_DESTID(size) (size ? 0xffff : 0xff) #define RIO_MAX_MBOX 4 #define RIO_MAX_MSG_SIZE 0x1000 @@ -149,6 +145,11 @@ struct rio_dbell { void *dev_id; }; +enum rio_phy_type { + RIO_PHY_PARALLEL, + RIO_PHY_SERIAL, +}; + /** * struct rio_mport - RIO master port info * @dbells: List of doorbell events @@ -163,6 +164,7 @@ struct rio_dbell { * @id: Port ID, unique among all ports * @index: Port index, unique among all port interfaces of the same type * @name: Port name string + * @priv: Master port private data */ struct rio_mport { struct list_head dbells; /* list of doorbell events */ @@ -177,7 +179,13 @@ struct rio_mport { unsigned char id; /* port ID, unique among all ports */ unsigned char index; /* port index, unique among all port interfaces of the same type */ + unsigned int sys_size; /* RapidIO common transport system size. + * 0 - Small size. 256 devices. + * 1 - Large size, 65536 devices. + */ + enum rio_phy_type phy_type; /* RapidIO phy type */ unsigned char name[40]; + void *priv; /* Master port private data */ }; /** @@ -211,7 +219,7 @@ struct rio_switch { u16 switchid; u16 hopcount; u16 destid; - u8 route_table[RIO_MAX_ROUTE_ENTRIES]; + u8 *route_table; int (*add_entry) (struct rio_mport * mport, u16 destid, u8 hopcount, u16 table, u16 route_destid, u8 route_port); int (*get_entry) (struct rio_mport * mport, u16 destid, u8 hopcount, @@ -229,13 +237,15 @@ struct rio_switch { * @dsend: Callback to send a doorbell message. */ struct rio_ops { - int (*lcread) (int index, u32 offset, int len, u32 * data); - int (*lcwrite) (int index, u32 offset, int len, u32 data); - int (*cread) (int index, u16 destid, u8 hopcount, u32 offset, int len, - u32 * data); - int (*cwrite) (int index, u16 destid, u8 hopcount, u32 offset, int len, - u32 data); - int (*dsend) (int index, u16 destid, u16 data); + int (*lcread) (struct rio_mport *mport, int index, u32 offset, int len, + u32 *data); + int (*lcwrite) (struct rio_mport *mport, int index, u32 offset, int len, + u32 data); + int (*cread) (struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 *data); + int (*cwrite) (struct rio_mport *mport, int index, u16 destid, + u8 hopcount, u32 offset, int len, u32 data); + int (*dsend) (struct rio_mport *mport, int index, u16 destid, u16 data); }; #define RIO_RESOURCE_MEM 0x00000100 diff --git a/include/linux/sched.h b/include/linux/sched.h index 024d72b47a0..1d02babdb2c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2148,6 +2148,19 @@ static inline void migration_init(void) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif +#ifdef CONFIG_MM_OWNER +extern void mm_update_next_owner(struct mm_struct *mm); +extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); +#else +static inline void mm_update_next_owner(struct mm_struct *mm) +{ +} + +static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) +{ +} +#endif /* CONFIG_MM_OWNER */ + #endif /* __KERNEL__ */ #endif diff --git a/include/linux/security.h b/include/linux/security.h index d0a28fd1747..adb09d893ae 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,8 +53,9 @@ extern void cap_capset_set(struct task_struct *target, kernel_cap_t *effective, extern int cap_bprm_set_security(struct linux_binprm *bprm); extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); -extern int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, size_t size, int flags); -extern int cap_inode_removexattr(struct dentry *dentry, char *name); +extern int cap_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); extern int cap_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); @@ -1008,6 +1009,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @perm describes the combination of permissions required of this key. * Return 1 if permission granted, 0 if permission denied and -ve it the * normal permissions model should be effected. + * @key_getsecurity: + * Get a textual representation of the security context attached to a key + * for the purposes of honouring KEYCTL_GETSECURITY. This function + * allocates the storage for the NUL-terminated string and the caller + * should free it. + * @key points to the key to be queried. + * @_buffer points to a pointer that should be set to point to the + * resulting string (if no label or an error occurs). + * Return the length of the string (including terminating NUL) or -ve if + * an error. + * May also return 0 (and a NULL buffer pointer) if there is no label. * * Security hooks affecting all System V IPC operations. * @@ -1362,13 +1374,13 @@ struct security_operations { int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); void (*inode_delete) (struct inode *inode); - int (*inode_setxattr) (struct dentry *dentry, char *name, void *value, - size_t size, int flags); - void (*inode_post_setxattr) (struct dentry *dentry, char *name, void *value, - size_t size, int flags); - int (*inode_getxattr) (struct dentry *dentry, char *name); + int (*inode_setxattr) (struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); + void (*inode_post_setxattr) (struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); + int (*inode_getxattr) (struct dentry *dentry, const char *name); int (*inode_listxattr) (struct dentry *dentry); - int (*inode_removexattr) (struct dentry *dentry, char *name); + int (*inode_removexattr) (struct dentry *dentry, const char *name); int (*inode_need_killpriv) (struct dentry *dentry); int (*inode_killpriv) (struct dentry *dentry); int (*inode_getsecurity) (const struct inode *inode, const char *name, void **buffer, bool alloc); @@ -1537,7 +1549,7 @@ struct security_operations { int (*key_permission) (key_ref_t key_ref, struct task_struct *context, key_perm_t perm); - + int (*key_getsecurity)(struct key *key, char **_buffer); #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT @@ -1633,13 +1645,13 @@ int security_inode_permission(struct inode *inode, int mask, struct nameidata *n int security_inode_setattr(struct dentry *dentry, struct iattr *attr); int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); void security_inode_delete(struct inode *inode); -int security_inode_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags); -void security_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags); -int security_inode_getxattr(struct dentry *dentry, char *name); +int security_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +void security_inode_post_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +int security_inode_getxattr(struct dentry *dentry, const char *name); int security_inode_listxattr(struct dentry *dentry); -int security_inode_removexattr(struct dentry *dentry, char *name); +int security_inode_removexattr(struct dentry *dentry, const char *name); int security_inode_need_killpriv(struct dentry *dentry); int security_inode_killpriv(struct dentry *dentry); int security_inode_getsecurity(const struct inode *inode, const char *name, void **buffer, bool alloc); @@ -2041,17 +2053,18 @@ static inline int security_inode_getattr(struct vfsmount *mnt, static inline void security_inode_delete(struct inode *inode) { } -static inline int security_inode_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static inline int security_inode_setxattr(struct dentry *dentry, + const char *name, const void *value, size_t size, int flags) { return cap_inode_setxattr(dentry, name, value, size, flags); } -static inline void security_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static inline void security_inode_post_setxattr(struct dentry *dentry, + const char *name, const void *value, size_t size, int flags) { } -static inline int security_inode_getxattr(struct dentry *dentry, char *name) +static inline int security_inode_getxattr(struct dentry *dentry, + const char *name) { return 0; } @@ -2061,7 +2074,8 @@ static inline int security_inode_listxattr(struct dentry *dentry) return 0; } -static inline int security_inode_removexattr(struct dentry *dentry, char *name) +static inline int security_inode_removexattr(struct dentry *dentry, + const char *name) { return cap_inode_removexattr(dentry, name); } @@ -2729,6 +2743,7 @@ int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long f void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, struct task_struct *context, key_perm_t perm); +int security_key_getsecurity(struct key *key, char **_buffer); #else @@ -2750,6 +2765,12 @@ static inline int security_key_permission(key_ref_t key_ref, return 0; } +static inline int security_key_getsecurity(struct key *key, char **_buffer) +{ + *_buffer = NULL; + return 0; +} + #endif #endif /* CONFIG_KEYS */ diff --git a/include/linux/smb.h b/include/linux/smb.h index f098dff93f6..caa43b2370c 100644 --- a/include/linux/smb.h +++ b/include/linux/smb.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <linux/magic.h> +#include <linux/time.h> enum smb_protocol { SMB_PROTOCOL_NONE, diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8df6d1382ac..0522f368f9d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -240,26 +240,28 @@ asmlinkage long sys_truncate64(const char __user *path, loff_t length); asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); #endif -asmlinkage long sys_setxattr(char __user *path, char __user *name, - void __user *value, size_t size, int flags); -asmlinkage long sys_lsetxattr(char __user *path, char __user *name, - void __user *value, size_t size, int flags); -asmlinkage long sys_fsetxattr(int fd, char __user *name, void __user *value, - size_t size, int flags); -asmlinkage ssize_t sys_getxattr(char __user *path, char __user *name, +asmlinkage long sys_setxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags); +asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, + const void __user *value, size_t size, int flags); +asmlinkage long sys_fsetxattr(int fd, const char __user *name, + const void __user *value, size_t size, int flags); +asmlinkage ssize_t sys_getxattr(const char __user *path, const char __user *name, void __user *value, size_t size); -asmlinkage ssize_t sys_lgetxattr(char __user *path, char __user *name, +asmlinkage ssize_t sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, size_t size); -asmlinkage ssize_t sys_fgetxattr(int fd, char __user *name, +asmlinkage ssize_t sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size); -asmlinkage ssize_t sys_listxattr(char __user *path, char __user *list, +asmlinkage ssize_t sys_listxattr(const char __user *path, char __user *list, size_t size); -asmlinkage ssize_t sys_llistxattr(char __user *path, char __user *list, +asmlinkage ssize_t sys_llistxattr(const char __user *path, char __user *list, size_t size); asmlinkage ssize_t sys_flistxattr(int fd, char __user *list, size_t size); -asmlinkage long sys_removexattr(char __user *path, char __user *name); -asmlinkage long sys_lremovexattr(char __user *path, char __user *name); -asmlinkage long sys_fremovexattr(int fd, char __user *name); +asmlinkage long sys_removexattr(const char __user *path, + const char __user *name); +asmlinkage long sys_lremovexattr(const char __user *path, + const char __user *name); +asmlinkage long sys_fremovexattr(int fd, const char __user *name); asmlinkage unsigned long sys_brk(unsigned long brk); asmlinkage long sys_mprotect(unsigned long start, size_t len, diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 571f01d20a8..24141b4d1a1 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -945,11 +945,14 @@ enum /* For the /proc/sys support */ struct ctl_table; struct nsproxy; +struct ctl_table_root; + extern struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev); extern struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces, struct ctl_table_header *prev); extern void sysctl_head_finish(struct ctl_table_header *prev); -extern int sysctl_perm(struct ctl_table *table, int op); +extern int sysctl_perm(struct ctl_table_root *root, + struct ctl_table *table, int op); typedef struct ctl_table ctl_table; @@ -981,11 +984,6 @@ extern int do_sysctl (int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); -extern int do_sysctl_strategy (struct ctl_table *table, - int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen); - extern ctl_handler sysctl_data; extern ctl_handler sysctl_string; extern ctl_handler sysctl_intvec; @@ -1054,6 +1052,8 @@ struct ctl_table_root { struct list_head header_list; struct list_head *(*lookup)(struct ctl_table_root *root, struct nsproxy *namespaces); + int (*permissions)(struct ctl_table_root *root, + struct nsproxy *namespaces, struct ctl_table *table); }; /* struct ctl_table_header is used to maintain dynamic lists of @@ -1085,8 +1085,6 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table); -#else /* __KERNEL__ */ - #endif /* __KERNEL__ */ #endif /* _LINUX_SYSCTL_H */ diff --git a/include/linux/sysv_fs.h b/include/linux/sysv_fs.h index e0248631e46..96411306eec 100644 --- a/include/linux/sysv_fs.h +++ b/include/linux/sysv_fs.h @@ -1,11 +1,7 @@ #ifndef _LINUX_SYSV_FS_H #define _LINUX_SYSV_FS_H -#if defined(__GNUC__) -# define __packed2__ __attribute__((packed, aligned(2))) -#else ->> I want to scream! << -#endif +#define __packed2__ __attribute__((packed, aligned(2))) #ifndef __KERNEL__ diff --git a/include/linux/unaligned/access_ok.h b/include/linux/unaligned/access_ok.h new file mode 100644 index 00000000000..99c1b4d20b0 --- /dev/null +++ b/include/linux/unaligned/access_ok.h @@ -0,0 +1,67 @@ +#ifndef _LINUX_UNALIGNED_ACCESS_OK_H +#define _LINUX_UNALIGNED_ACCESS_OK_H + +#include <linux/kernel.h> +#include <asm/byteorder.h> + +static inline u16 get_unaligned_le16(const void *p) +{ + return le16_to_cpup((__le16 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return le32_to_cpup((__le32 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return le64_to_cpup((__le64 *)p); +} + +static inline u16 get_unaligned_be16(const void *p) +{ + return be16_to_cpup((__be16 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return be32_to_cpup((__be32 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return be64_to_cpup((__be64 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + *((__le16 *)p) = cpu_to_le16(val); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + *((__le32 *)p) = cpu_to_le32(val); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + *((__le64 *)p) = cpu_to_le64(val); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + *((__be16 *)p) = cpu_to_be16(val); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + *((__be32 *)p) = cpu_to_be32(val); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + *((__be64 *)p) = cpu_to_be64(val); +} + +#endif /* _LINUX_UNALIGNED_ACCESS_OK_H */ diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h new file mode 100644 index 00000000000..46dd12c5709 --- /dev/null +++ b/include/linux/unaligned/be_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H +#define _LINUX_UNALIGNED_BE_BYTESHIFT_H + +#include <linux/kernel.h> + +static inline u16 __get_unaligned_be16(const u8 *p) +{ + return p[0] << 8 | p[1]; +} + +static inline u32 __get_unaligned_be32(const u8 *p) +{ + return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; +} + +static inline u64 __get_unaligned_be64(const u8 *p) +{ + return (u64)__get_unaligned_be32(p) << 32 | + __get_unaligned_be32(p + 4); +} + +static inline void __put_unaligned_be16(u16 val, u8 *p) +{ + *p++ = val >> 8; + *p++ = val; +} + +static inline void __put_unaligned_be32(u32 val, u8 *p) +{ + __put_unaligned_be16(val >> 16, p); + __put_unaligned_be16(val, p + 2); +} + +static inline void __put_unaligned_be64(u64 val, u8 *p) +{ + __put_unaligned_be32(val >> 32, p); + __put_unaligned_be32(val, p + 4); +} + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_be16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_be32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_be64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_be16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_be32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_be64(val, p); +} + +#endif /* _LINUX_UNALIGNED_BE_BYTESHIFT_H */ diff --git a/include/linux/unaligned/be_memmove.h b/include/linux/unaligned/be_memmove.h new file mode 100644 index 00000000000..c2a76c5c9ed --- /dev/null +++ b/include/linux/unaligned/be_memmove.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_BE_MEMMOVE_H +#define _LINUX_UNALIGNED_BE_MEMMOVE_H + +#include <linux/unaligned/memmove.h> + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_memmove16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_memmove32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_memmove64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_memmove16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_memmove32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_memmove64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_MEMMOVE_H */ diff --git a/include/linux/unaligned/be_struct.h b/include/linux/unaligned/be_struct.h new file mode 100644 index 00000000000..132415836c5 --- /dev/null +++ b/include/linux/unaligned/be_struct.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_BE_STRUCT_H +#define _LINUX_UNALIGNED_BE_STRUCT_H + +#include <linux/unaligned/packed_struct.h> + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_cpu16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_cpu32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_cpu64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_cpu16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_cpu32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_cpu64(val, p); +} + +#endif /* _LINUX_UNALIGNED_BE_STRUCT_H */ diff --git a/include/linux/unaligned/generic.h b/include/linux/unaligned/generic.h new file mode 100644 index 00000000000..02d97ff3df7 --- /dev/null +++ b/include/linux/unaligned/generic.h @@ -0,0 +1,68 @@ +#ifndef _LINUX_UNALIGNED_GENERIC_H +#define _LINUX_UNALIGNED_GENERIC_H + +/* + * Cause a link-time error if we try an unaligned access other than + * 1,2,4 or 8 bytes long + */ +extern void __bad_unaligned_access_size(void); + +#define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({ \ + __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ + __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \ + __bad_unaligned_access_size())))); \ + })) + +#define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({ \ + __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ + __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \ + __bad_unaligned_access_size())))); \ + })) + +#define __put_unaligned_le(val, ptr) ({ \ + void *__gu_p = (ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + *(u8 *)__gu_p = (__force u8)(val); \ + break; \ + case 2: \ + put_unaligned_le16((__force u16)(val), __gu_p); \ + break; \ + case 4: \ + put_unaligned_le32((__force u32)(val), __gu_p); \ + break; \ + case 8: \ + put_unaligned_le64((__force u64)(val), __gu_p); \ + break; \ + default: \ + __bad_unaligned_access_size(); \ + break; \ + } \ + (void)0; }) + +#define __put_unaligned_be(val, ptr) ({ \ + void *__gu_p = (ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + *(u8 *)__gu_p = (__force u8)(val); \ + break; \ + case 2: \ + put_unaligned_be16((__force u16)(val), __gu_p); \ + break; \ + case 4: \ + put_unaligned_be32((__force u32)(val), __gu_p); \ + break; \ + case 8: \ + put_unaligned_be64((__force u64)(val), __gu_p); \ + break; \ + default: \ + __bad_unaligned_access_size(); \ + break; \ + } \ + (void)0; }) + +#endif /* _LINUX_UNALIGNED_GENERIC_H */ diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h new file mode 100644 index 00000000000..59777e951ba --- /dev/null +++ b/include/linux/unaligned/le_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H +#define _LINUX_UNALIGNED_LE_BYTESHIFT_H + +#include <linux/kernel.h> + +static inline u16 __get_unaligned_le16(const u8 *p) +{ + return p[0] | p[1] << 8; +} + +static inline u32 __get_unaligned_le32(const u8 *p) +{ + return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; +} + +static inline u64 __get_unaligned_le64(const u8 *p) +{ + return (u64)__get_unaligned_le32(p + 4) << 32 | + __get_unaligned_le32(p); +} + +static inline void __put_unaligned_le16(u16 val, u8 *p) +{ + *p++ = val; + *p++ = val >> 8; +} + +static inline void __put_unaligned_le32(u32 val, u8 *p) +{ + __put_unaligned_le16(val >> 16, p + 2); + __put_unaligned_le16(val, p); +} + +static inline void __put_unaligned_le64(u64 val, u8 *p) +{ + __put_unaligned_le32(val >> 32, p + 4); + __put_unaligned_le32(val, p); +} + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_le16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_le32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_le64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_le16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_le32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_le64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_BYTESHIFT_H */ diff --git a/include/linux/unaligned/le_memmove.h b/include/linux/unaligned/le_memmove.h new file mode 100644 index 00000000000..269849bee4e --- /dev/null +++ b/include/linux/unaligned/le_memmove.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_LE_MEMMOVE_H +#define _LINUX_UNALIGNED_LE_MEMMOVE_H + +#include <linux/unaligned/memmove.h> + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_memmove16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_memmove32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_memmove64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_memmove16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_memmove32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_memmove64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_MEMMOVE_H */ diff --git a/include/linux/unaligned/le_struct.h b/include/linux/unaligned/le_struct.h new file mode 100644 index 00000000000..088c4572faa --- /dev/null +++ b/include/linux/unaligned/le_struct.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_LE_STRUCT_H +#define _LINUX_UNALIGNED_LE_STRUCT_H + +#include <linux/unaligned/packed_struct.h> + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_cpu16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_cpu32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_cpu64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_cpu16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_cpu32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_cpu64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_STRUCT_H */ diff --git a/include/linux/unaligned/memmove.h b/include/linux/unaligned/memmove.h new file mode 100644 index 00000000000..eeb5a779a4f --- /dev/null +++ b/include/linux/unaligned/memmove.h @@ -0,0 +1,45 @@ +#ifndef _LINUX_UNALIGNED_MEMMOVE_H +#define _LINUX_UNALIGNED_MEMMOVE_H + +#include <linux/kernel.h> +#include <linux/string.h> + +/* Use memmove here, so gcc does not insert a __builtin_memcpy. */ + +static inline u16 __get_unaligned_memmove16(const void *p) +{ + u16 tmp; + memmove(&tmp, p, 2); + return tmp; +} + +static inline u32 __get_unaligned_memmove32(const void *p) +{ + u32 tmp; + memmove(&tmp, p, 4); + return tmp; +} + +static inline u64 __get_unaligned_memmove64(const void *p) +{ + u64 tmp; + memmove(&tmp, p, 8); + return tmp; +} + +static inline void __put_unaligned_memmove16(u16 val, void *p) +{ + memmove(p, &val, 2); +} + +static inline void __put_unaligned_memmove32(u32 val, void *p) +{ + memmove(p, &val, 4); +} + +static inline void __put_unaligned_memmove64(u64 val, void *p) +{ + memmove(p, &val, 8); +} + +#endif /* _LINUX_UNALIGNED_MEMMOVE_H */ diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h new file mode 100644 index 00000000000..2498bb9fe00 --- /dev/null +++ b/include/linux/unaligned/packed_struct.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H +#define _LINUX_UNALIGNED_PACKED_STRUCT_H + +#include <linux/kernel.h> + +struct __una_u16 { u16 x __attribute__((packed)); }; +struct __una_u32 { u32 x __attribute__((packed)); }; +struct __una_u64 { u64 x __attribute__((packed)); }; + +static inline u16 __get_unaligned_cpu16(const void *p) +{ + const struct __una_u16 *ptr = (const struct __una_u16 *)p; + return ptr->x; +} + +static inline u32 __get_unaligned_cpu32(const void *p) +{ + const struct __una_u32 *ptr = (const struct __una_u32 *)p; + return ptr->x; +} + +static inline u64 __get_unaligned_cpu64(const void *p) +{ + const struct __una_u64 *ptr = (const struct __una_u64 *)p; + return ptr->x; +} + +static inline void __put_unaligned_cpu16(u16 val, void *p) +{ + struct __una_u16 *ptr = (struct __una_u16 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu32(u32 val, void *p) +{ + struct __una_u32 *ptr = (struct __una_u32 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu64(u64 val, void *p) +{ + struct __una_u64 *ptr = (struct __una_u64 *)p; + ptr->x = val; +} + +#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index df6b95d2218..d131e352cfe 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -47,10 +47,10 @@ struct xattr_handler { }; ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); -ssize_t vfs_getxattr(struct dentry *, char *, void *, size_t); +ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); -int vfs_setxattr(struct dentry *, char *, void *, size_t, int); -int vfs_removexattr(struct dentry *, char *); +int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); +int vfs_removexattr(struct dentry *, const char *); ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 22298423cf0..9ee0d2e51b1 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -62,7 +62,7 @@ struct ib_umem_chunk { #ifdef CONFIG_INFINIBAND_USER_MEM struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, - size_t size, int access); + size_t size, int access, int dmasync); void ib_umem_release(struct ib_umem *umem); int ib_umem_page_count(struct ib_umem *umem); @@ -72,7 +72,7 @@ int ib_umem_page_count(struct ib_umem *umem); static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, size_t size, - int access) { + int access, int dmasync) { return ERR_PTR(-EINVAL); } static inline void ib_umem_release(struct ib_umem *umem) { } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2dcbecce3f6..911a661b727 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1542,6 +1542,24 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, dma_unmap_single(dev->dma_device, addr, size, direction); } +static inline u64 ib_dma_map_single_attrs(struct ib_device *dev, + void *cpu_addr, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return dma_map_single_attrs(dev->dma_device, cpu_addr, size, + direction, attrs); +} + +static inline void ib_dma_unmap_single_attrs(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return dma_unmap_single_attrs(dev->dma_device, addr, size, + direction, attrs); +} + /** * ib_dma_map_page - Map a physical page to DMA address * @dev: The device for which the dma_addr is to be created @@ -1611,6 +1629,21 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, dma_unmap_sg(dev->dma_device, sg, nents, direction); } +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, attrs); +} /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry * @dev: The device for which the DMA addresses were created diff --git a/init/Kconfig b/init/Kconfig index da071c4bbfb..3e7b257fc05 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -259,17 +259,14 @@ config IKCONFIG_PROC config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" range 12 21 - default 17 if S390 || LOCKDEP - default 16 if X86_NUMAQ || IA64 - default 15 if SMP - default 14 + default 17 help Select kernel log buffer size as a power of 2. - Defaults and Examples: - 17 => 128 KB for S/390 - 16 => 64 KB for x86 NUMAQ or IA-64 - 15 => 32 KB for SMP - 14 => 16 KB for uniprocessor + Examples: + 17 => 128 KB + 16 => 64 KB + 15 => 32 KB + 14 => 16 KB 13 => 8 KB 12 => 4 KB @@ -284,6 +281,7 @@ config CGROUPS config CGROUP_DEBUG bool "Example debug cgroup subsystem" depends on CGROUPS + default n help This option enables a simple cgroup subsystem that exports useful debugging information about the cgroups @@ -300,6 +298,13 @@ config CGROUP_NS for instance virtual servers and checkpoint/restart jobs. +config CGROUP_DEVICE + bool "Device controller for cgroups" + depends on CGROUPS && EXPERIMENTAL + help + Provides a cgroup implementing whitelists for devices which + a process in the cgroup can mknod or open. + config CPUSETS bool "Cpuset support" depends on SMP && CGROUPS @@ -373,9 +378,13 @@ config RESOURCE_COUNTERS infrastructure that works with cgroups depends on CGROUPS +config MM_OWNER + bool + config CGROUP_MEM_RES_CTLR bool "Memory Resource Controller for Control Groups" depends on CGROUPS && RESOURCE_COUNTERS + select MM_OWNER help Provides a memory resource controller that manages both page cache and RSS memory. @@ -388,6 +397,9 @@ config CGROUP_MEM_RES_CTLR Only enable when you're ok with these trade offs and really sure you need the memory resource controller. + This config option also selects MM_OWNER config option, which + could in turn add some fork/exit overhead. + config SYSFS_DEPRECATED bool @@ -538,6 +550,17 @@ config SYSCTL_SYSCALL If unsure say Y here. +config SYSCTL_SYSCALL_CHECK + bool "Sysctl checks" if EMBEDDED + depends on SYSCTL_SYSCALL + default y + ---help--- + sys_sysctl uses binary paths that have been found challenging + to properly maintain and use. This enables checks that help + you to keep things correct. + + If unsure say Y here. + config KALLSYMS bool "Load all symbols for debugging/ksymoops" if EMBEDDED default y diff --git a/init/initramfs.c b/init/initramfs.c index d53fee8d860..8eeeccb328c 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -57,7 +57,7 @@ static char __init *find_link(int major, int minor, int ino, continue; return (*p)->name; } - q = (struct hash *)malloc(sizeof(struct hash)); + q = kmalloc(sizeof(struct hash), GFP_KERNEL); if (!q) panic("can't allocate link hash entry"); q->major = major; @@ -77,7 +77,7 @@ static void __init free_hash(void) while (*p) { q = *p; *p = q->next; - free(q); + kfree(q); } } } @@ -445,10 +445,10 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) { int written; dry_run = check_only; - header_buf = malloc(110); - symlink_buf = malloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1); - name_buf = malloc(N_ALIGN(PATH_MAX)); - window = malloc(WSIZE); + header_buf = kmalloc(110, GFP_KERNEL); + symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL); + name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL); + window = kmalloc(WSIZE, GFP_KERNEL); if (!window || !header_buf || !symlink_buf || !name_buf) panic("can't allocate buffers"); state = Start; @@ -484,10 +484,10 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) buf += inptr; len -= inptr; } - free(window); - free(name_buf); - free(symlink_buf); - free(header_buf); + kfree(window); + kfree(name_buf); + kfree(symlink_buf); + kfree(header_buf); return message; } diff --git a/init/main.c b/init/main.c index 1687b0167c4..624266b524d 100644 --- a/init/main.c +++ b/init/main.c @@ -58,6 +58,7 @@ #include <linux/kthread.h> #include <linux/sched.h> #include <linux/signal.h> +#include <linux/idr.h> #include <asm/io.h> #include <asm/bugs.h> @@ -559,6 +560,7 @@ asmlinkage void __init start_kernel(void) printk(KERN_NOTICE); printk(linux_banner); setup_arch(&command_line); + mm_init_owner(&init_mm, &init_task); setup_command_line(command_line); unwind_setup(); setup_per_cpu_areas(); @@ -636,6 +638,7 @@ asmlinkage void __init start_kernel(void) enable_debug_pagealloc(); cpu_hotplug_init(); kmem_cache_init(); + idr_init_cache(); setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) @@ -700,10 +703,8 @@ static void __init do_initcalls(void) int result; if (initcall_debug) { - printk("Calling initcall 0x%p", *call); - print_fn_descriptor_symbol(": %s()", + print_fn_descriptor_symbol("calling %s()\n", (unsigned long) *call); - printk("\n"); t0 = ktime_get(); } @@ -713,15 +714,10 @@ static void __init do_initcalls(void) t1 = ktime_get(); delta = ktime_sub(t1, t0); - printk("initcall 0x%p", *call); - print_fn_descriptor_symbol(": %s()", + print_fn_descriptor_symbol("initcall %s()", (unsigned long) *call); - printk(" returned %d.\n", result); - - printk("initcall 0x%p ran for %Ld msecs: ", - *call, (unsigned long long)delta.tv64 >> 20); - print_fn_descriptor_symbol("%s()\n", - (unsigned long) *call); + printk(" returned %d after %Ld msecs\n", result, + (unsigned long long) delta.tv64 >> 20); } if (result && result != -ENODEV && initcall_debug) { @@ -737,10 +733,9 @@ static void __init do_initcalls(void) local_irq_enable(); } if (msg) { - printk(KERN_WARNING "initcall at 0x%p", *call); - print_fn_descriptor_symbol(": %s()", + print_fn_descriptor_symbol(KERN_WARNING "initcall %s()", (unsigned long) *call); - printk(": returned with %s\n", msg); + printk(" returned with %s\n", msg); } } diff --git a/ipc/Makefile b/ipc/Makefile index 5fc5e33ea04..65c38439580 100644 --- a/ipc/Makefile +++ b/ipc/Makefile @@ -3,7 +3,7 @@ # obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o -obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o +obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o obj_mq-$(CONFIG_COMPAT) += compat_mq.o obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 7f4235bed51..d3497465cc0 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -15,6 +15,8 @@ #include <linux/sysctl.h> #include <linux/uaccess.h> #include <linux/ipc_namespace.h> +#include <linux/msg.h> +#include "util.h" static void *get_ipc(ctl_table *table) { @@ -24,6 +26,27 @@ static void *get_ipc(ctl_table *table) return which; } +/* + * Routine that is called when a tunable has successfully been changed by + * hand and it has a callback routine registered on the ipc namespace notifier + * chain: we don't want such tunables to be recomputed anymore upon memory + * add/remove or ipc namespace creation/removal. + * They can come back to a recomputable state by being set to a <0 value. + */ +static void tunable_set_callback(int val) +{ + if (val >= 0) + unregister_ipcns_notifier(current->nsproxy->ipc_ns); + else { + /* + * Re-enable automatic recomputing only if not already + * enabled. + */ + recompute_msgmni(current->nsproxy->ipc_ns); + cond_register_ipcns_notifier(current->nsproxy->ipc_ns); + } +} + #ifdef CONFIG_PROC_FS static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -35,6 +58,24 @@ static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp, return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); } +static int proc_ipc_callback_dointvec(ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table ipc_table; + size_t lenp_bef = *lenp; + int rc; + + memcpy(&ipc_table, table, sizeof(ipc_table)); + ipc_table.data = get_ipc(table); + + rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); + + if (write && !rc && lenp_bef == *lenp) + tunable_set_callback(*((int *)(ipc_table.data))); + + return rc; +} + static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -49,6 +90,7 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, #else #define proc_ipc_doulongvec_minmax NULL #define proc_ipc_dointvec NULL +#define proc_ipc_callback_dointvec NULL #endif #ifdef CONFIG_SYSCTL_SYSCALL @@ -90,8 +132,30 @@ static int sysctl_ipc_data(ctl_table *table, int __user *name, int nlen, } return 1; } + +static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, + int nlen, void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + int rc; + + rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, + newlen); + + if (newval && newlen && rc > 0) { + /* + * Tunable has successfully been changed from userland + */ + int *data = get_ipc(table); + + tunable_set_callback(*data); + } + + return rc; +} #else #define sysctl_ipc_data NULL +#define sysctl_ipc_registered_data NULL #endif static struct ctl_table ipc_kern_table[] = { @@ -137,8 +201,8 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof (init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_dointvec, - .strategy = sysctl_ipc_data, + .proc_handler = proc_ipc_callback_dointvec, + .strategy = sysctl_ipc_registered_data, }, { .ctl_name = KERN_MSGMNB, diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c new file mode 100644 index 00000000000..70ff09183f7 --- /dev/null +++ b/ipc/ipcns_notifier.c @@ -0,0 +1,82 @@ +/* + * linux/ipc/ipcns_notifier.c + * Copyright (C) 2007 BULL SA. Nadia Derbey + * + * Notification mechanism for ipc namespaces: + * The callback routine registered in the memory chain invokes the ipcns + * notifier chain with the IPCNS_MEMCHANGED event. + * Each callback routine registered in the ipcns namespace recomputes msgmni + * for the owning namespace. + */ + +#include <linux/msg.h> +#include <linux/rcupdate.h> +#include <linux/notifier.h> +#include <linux/nsproxy.h> +#include <linux/ipc_namespace.h> + +#include "util.h" + + + +static BLOCKING_NOTIFIER_HEAD(ipcns_chain); + + +static int ipcns_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + struct ipc_namespace *ns; + + switch (action) { + case IPCNS_MEMCHANGED: /* amount of lowmem has changed */ + case IPCNS_CREATED: + case IPCNS_REMOVED: + /* + * It's time to recompute msgmni + */ + ns = container_of(self, struct ipc_namespace, ipcns_nb); + /* + * No need to get a reference on the ns: the 1st job of + * free_ipc_ns() is to unregister the callback routine. + * blocking_notifier_chain_unregister takes the wr lock to do + * it. + * When this callback routine is called the rd lock is held by + * blocking_notifier_call_chain. + * So the ipc ns cannot be freed while we are here. + */ + recompute_msgmni(ns); + break; + default: + break; + } + + return NOTIFY_OK; +} + +int register_ipcns_notifier(struct ipc_namespace *ns) +{ + memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); + ns->ipcns_nb.notifier_call = ipcns_callback; + ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; + return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); +} + +int cond_register_ipcns_notifier(struct ipc_namespace *ns) +{ + memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); + ns->ipcns_nb.notifier_call = ipcns_callback; + ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; + return blocking_notifier_chain_cond_register(&ipcns_chain, + &ns->ipcns_nb); +} + +int unregister_ipcns_notifier(struct ipc_namespace *ns) +{ + return blocking_notifier_chain_unregister(&ipcns_chain, + &ns->ipcns_nb); +} + +int ipcns_notify(unsigned long val) +{ + return blocking_notifier_call_chain(&ipcns_chain, val, NULL); +} diff --git a/ipc/msg.c b/ipc/msg.c index 46585a05473..32494e8cc7a 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -27,6 +27,7 @@ #include <linux/msg.h> #include <linux/spinlock.h> #include <linux/init.h> +#include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/list.h> #include <linux/security.h> @@ -70,7 +71,6 @@ struct msg_sender { #define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS]) #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) -#define msg_buildid(id, seq) ipc_buildid(id, seq) static void freeque(struct ipc_namespace *, struct kern_ipc_perm *); static int newque(struct ipc_namespace *, struct ipc_params *); @@ -78,11 +78,49 @@ static int newque(struct ipc_namespace *, struct ipc_params *); static int sysvipc_msg_proc_show(struct seq_file *s, void *it); #endif +/* + * Scale msgmni with the available lowmem size: the memory dedicated to msg + * queues should occupy at most 1/MSG_MEM_SCALE of lowmem. + * Also take into account the number of nsproxies created so far. + * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range. + */ +void recompute_msgmni(struct ipc_namespace *ns) +{ + struct sysinfo i; + unsigned long allowed; + int nb_ns; + + si_meminfo(&i); + allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit) + / MSGMNB; + nb_ns = atomic_read(&nr_ipc_ns); + allowed /= nb_ns; + + if (allowed < MSGMNI) { + ns->msg_ctlmni = MSGMNI; + goto out_callback; + } + + if (allowed > IPCMNI / nb_ns) { + ns->msg_ctlmni = IPCMNI / nb_ns; + goto out_callback; + } + + ns->msg_ctlmni = allowed; + +out_callback: + + printk(KERN_INFO "msgmni has been set to %d for ipc namespace %p\n", + ns->msg_ctlmni, ns); +} + void msg_init_ns(struct ipc_namespace *ns) { ns->msg_ctlmax = MSGMAX; ns->msg_ctlmnb = MSGMNB; - ns->msg_ctlmni = MSGMNI; + + recompute_msgmni(ns); + atomic_set(&ns->msg_bytes, 0); atomic_set(&ns->msg_hdrs, 0); ipc_init_ids(&ns->ids[IPC_MSG_IDS]); @@ -104,21 +142,6 @@ void __init msg_init(void) } /* - * This routine is called in the paths where the rw_mutex is held to protect - * access to the idr tree. - */ -static inline struct msg_queue *msg_lock_check_down(struct ipc_namespace *ns, - int id) -{ - struct kern_ipc_perm *ipcp = ipc_lock_check_down(&msg_ids(ns), id); - - if (IS_ERR(ipcp)) - return (struct msg_queue *)ipcp; - - return container_of(ipcp, struct msg_queue, q_perm); -} - -/* * msg_lock_(check_) routines are called in the paths where the rw_mutex * is not held. */ @@ -186,7 +209,6 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return id; } - msq->q_perm.id = msg_buildid(id, msq->q_perm.seq); msq->q_stime = msq->q_rtime = 0; msq->q_ctime = get_seconds(); msq->q_cbytes = msq->q_qnum = 0; @@ -324,19 +346,19 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) out.msg_rtime = in->msg_rtime; out.msg_ctime = in->msg_ctime; - if (in->msg_cbytes > USHRT_MAX) - out.msg_cbytes = USHRT_MAX; + if (in->msg_cbytes > USHORT_MAX) + out.msg_cbytes = USHORT_MAX; else out.msg_cbytes = in->msg_cbytes; out.msg_lcbytes = in->msg_cbytes; - if (in->msg_qnum > USHRT_MAX) - out.msg_qnum = USHRT_MAX; + if (in->msg_qnum > USHORT_MAX) + out.msg_qnum = USHORT_MAX; else out.msg_qnum = in->msg_qnum; - if (in->msg_qbytes > USHRT_MAX) - out.msg_qbytes = USHRT_MAX; + if (in->msg_qbytes > USHORT_MAX) + out.msg_qbytes = USHORT_MAX; else out.msg_qbytes = in->msg_qbytes; out.msg_lqbytes = in->msg_qbytes; @@ -351,31 +373,14 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version) } } -struct msq_setbuf { - unsigned long qbytes; - uid_t uid; - gid_t gid; - mode_t mode; -}; - static inline unsigned long -copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version) +copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) { switch(version) { case IPC_64: - { - struct msqid64_ds tbuf; - - if (copy_from_user(&tbuf, buf, sizeof(tbuf))) + if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; - - out->qbytes = tbuf.msg_qbytes; - out->uid = tbuf.msg_perm.uid; - out->gid = tbuf.msg_perm.gid; - out->mode = tbuf.msg_perm.mode; - return 0; - } case IPC_OLD: { struct msqid_ds tbuf_old; @@ -383,14 +388,14 @@ copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version) if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) return -EFAULT; - out->uid = tbuf_old.msg_perm.uid; - out->gid = tbuf_old.msg_perm.gid; - out->mode = tbuf_old.msg_perm.mode; + out->msg_perm.uid = tbuf_old.msg_perm.uid; + out->msg_perm.gid = tbuf_old.msg_perm.gid; + out->msg_perm.mode = tbuf_old.msg_perm.mode; if (tbuf_old.msg_qbytes == 0) - out->qbytes = tbuf_old.msg_lqbytes; + out->msg_qbytes = tbuf_old.msg_lqbytes; else - out->qbytes = tbuf_old.msg_qbytes; + out->msg_qbytes = tbuf_old.msg_qbytes; return 0; } @@ -399,10 +404,71 @@ copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version) } } -asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) +/* + * This function handles some msgctl commands which require the rw_mutex + * to be held in write mode. + * NOTE: no locks must be held, the rw_mutex is taken inside this function. + */ +static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, + struct msqid_ds __user *buf, int version) { struct kern_ipc_perm *ipcp; - struct msq_setbuf uninitialized_var(setbuf); + struct msqid64_ds msqid64; + struct msg_queue *msq; + int err; + + if (cmd == IPC_SET) { + if (copy_msqid_from_user(&msqid64, buf, version)) + return -EFAULT; + } + + ipcp = ipcctl_pre_down(&msg_ids(ns), msqid, cmd, + &msqid64.msg_perm, msqid64.msg_qbytes); + if (IS_ERR(ipcp)) + return PTR_ERR(ipcp); + + msq = container_of(ipcp, struct msg_queue, q_perm); + + err = security_msg_queue_msgctl(msq, cmd); + if (err) + goto out_unlock; + + switch (cmd) { + case IPC_RMID: + freeque(ns, ipcp); + goto out_up; + case IPC_SET: + if (msqid64.msg_qbytes > ns->msg_ctlmnb && + !capable(CAP_SYS_RESOURCE)) { + err = -EPERM; + goto out_unlock; + } + + msq->q_qbytes = msqid64.msg_qbytes; + + ipc_update_perm(&msqid64.msg_perm, ipcp); + msq->q_ctime = get_seconds(); + /* sleeping receivers might be excluded by + * stricter permissions. + */ + expunge_all(msq, -EAGAIN); + /* sleeping senders might be able to send + * due to a larger queue size. + */ + ss_wakeup(&msq->q_senders, 0); + break; + default: + err = -EINVAL; + } +out_unlock: + msg_unlock(msq); +out_up: + up_write(&msg_ids(ns).rw_mutex); + return err; +} + +asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) +{ struct msg_queue *msq; int err, version; struct ipc_namespace *ns; @@ -498,82 +564,13 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) return success_return; } case IPC_SET: - if (!buf) - return -EFAULT; - if (copy_msqid_from_user(&setbuf, buf, version)) - return -EFAULT; - break; case IPC_RMID: - break; + err = msgctl_down(ns, msqid, cmd, buf, version); + return err; default: return -EINVAL; } - down_write(&msg_ids(ns).rw_mutex); - msq = msg_lock_check_down(ns, msqid); - if (IS_ERR(msq)) { - err = PTR_ERR(msq); - goto out_up; - } - - ipcp = &msq->q_perm; - - err = audit_ipc_obj(ipcp); - if (err) - goto out_unlock_up; - if (cmd == IPC_SET) { - err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid, - setbuf.mode); - if (err) - goto out_unlock_up; - } - - err = -EPERM; - if (current->euid != ipcp->cuid && - current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) - /* We _could_ check for CAP_CHOWN above, but we don't */ - goto out_unlock_up; - - err = security_msg_queue_msgctl(msq, cmd); - if (err) - goto out_unlock_up; - - switch (cmd) { - case IPC_SET: - { - err = -EPERM; - if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) - goto out_unlock_up; - - msq->q_qbytes = setbuf.qbytes; - - ipcp->uid = setbuf.uid; - ipcp->gid = setbuf.gid; - ipcp->mode = (ipcp->mode & ~S_IRWXUGO) | - (S_IRWXUGO & setbuf.mode); - msq->q_ctime = get_seconds(); - /* sleeping receivers might be excluded by - * stricter permissions. - */ - expunge_all(msq, -EAGAIN); - /* sleeping senders might be able to send - * due to a larger queue size. - */ - ss_wakeup(&msq->q_senders, 0); - msg_unlock(msq); - break; - } - case IPC_RMID: - freeque(ns, &msq->q_perm); - break; - } - err = 0; -out_up: - up_write(&msg_ids(ns).rw_mutex); - return err; -out_unlock_up: - msg_unlock(msq); - goto out_up; out_unlock: msg_unlock(msq); return err; diff --git a/ipc/namespace.c b/ipc/namespace.c index 1b967655eb3..9171d948751 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -20,10 +20,20 @@ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) if (ns == NULL) return ERR_PTR(-ENOMEM); + atomic_inc(&nr_ipc_ns); + sem_init_ns(ns); msg_init_ns(ns); shm_init_ns(ns); + /* + * msgmni has already been computed for the new ipc ns. + * Thus, do the ipcns creation notification before registering that + * new ipcns in the chain. + */ + ipcns_notify(IPCNS_CREATED); + register_ipcns_notifier(ns); + kref_init(&ns->kref); return ns; } @@ -79,8 +89,24 @@ void free_ipc_ns(struct kref *kref) struct ipc_namespace *ns; ns = container_of(kref, struct ipc_namespace, kref); + /* + * Unregistering the hotplug notifier at the beginning guarantees + * that the ipc namespace won't be freed while we are inside the + * callback routine. Since the blocking_notifier_chain_XXX routines + * hold a rw lock on the notifier list, unregister_ipcns_notifier() + * won't take the rw lock before blocking_notifier_call_chain() has + * released the rd lock. + */ + unregister_ipcns_notifier(ns); sem_exit_ns(ns); msg_exit_ns(ns); shm_exit_ns(ns); kfree(ns); + atomic_dec(&nr_ipc_ns); + + /* + * Do the ipcns removal notification after decrementing nr_ipc_ns in + * order to have a correct value when recomputing msgmni. + */ + ipcns_notify(IPCNS_REMOVED); } diff --git a/ipc/sem.c b/ipc/sem.c index 0b45a4d383c..e9418df5ff3 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -91,7 +91,6 @@ #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) -#define sem_buildid(id, seq) ipc_buildid(id, seq) static int newary(struct ipc_namespace *, struct ipc_params *); static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); @@ -142,21 +141,6 @@ void __init sem_init (void) } /* - * This routine is called in the paths where the rw_mutex is held to protect - * access to the idr tree. - */ -static inline struct sem_array *sem_lock_check_down(struct ipc_namespace *ns, - int id) -{ - struct kern_ipc_perm *ipcp = ipc_lock_check_down(&sem_ids(ns), id); - - if (IS_ERR(ipcp)) - return (struct sem_array *)ipcp; - - return container_of(ipcp, struct sem_array, sem_perm); -} - -/* * sem_lock_(check_) routines are called in the paths where the rw_mutex * is not held. */ @@ -181,6 +165,25 @@ static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, return container_of(ipcp, struct sem_array, sem_perm); } +static inline void sem_lock_and_putref(struct sem_array *sma) +{ + ipc_lock_by_ptr(&sma->sem_perm); + ipc_rcu_putref(sma); +} + +static inline void sem_getref_and_unlock(struct sem_array *sma) +{ + ipc_rcu_getref(sma); + ipc_unlock(&(sma)->sem_perm); +} + +static inline void sem_putref(struct sem_array *sma) +{ + ipc_lock_by_ptr(&sma->sem_perm); + ipc_rcu_putref(sma); + ipc_unlock(&(sma)->sem_perm); +} + static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) { ipc_rmid(&sem_ids(ns), &s->sem_perm); @@ -268,7 +271,6 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) } ns->used_sems += nsems; - sma->sem_perm.id = sem_buildid(id, sma->sem_perm.seq); sma->sem_base = (struct sem *) &sma[1]; /* sma->sem_pending = NULL; */ sma->sem_pending_last = &sma->sem_pending; @@ -700,19 +702,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int i; if(nsems > SEMMSL_FAST) { - ipc_rcu_getref(sma); - sem_unlock(sma); + sem_getref_and_unlock(sma); sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); - sem_unlock(sma); + sem_putref(sma); return -ENOMEM; } - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); + sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { sem_unlock(sma); err = -EIDRM; @@ -733,38 +731,30 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int i; struct sem_undo *un; - ipc_rcu_getref(sma); - sem_unlock(sma); + sem_getref_and_unlock(sma); if(nsems > SEMMSL_FAST) { sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); - sem_unlock(sma); + sem_putref(sma); return -ENOMEM; } } if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); - sem_unlock(sma); + sem_putref(sma); err = -EFAULT; goto out_free; } for (i = 0; i < nsems; i++) { if (sem_io[i] > SEMVMX) { - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); - sem_unlock(sma); + sem_putref(sma); err = -ERANGE; goto out_free; } } - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); + sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { sem_unlock(sma); err = -EIDRM; @@ -830,28 +820,14 @@ out_free: return err; } -struct sem_setbuf { - uid_t uid; - gid_t gid; - mode_t mode; -}; - -static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __user *buf, int version) +static inline unsigned long +copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) { switch(version) { case IPC_64: - { - struct semid64_ds tbuf; - - if(copy_from_user(&tbuf, buf, sizeof(tbuf))) + if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; - - out->uid = tbuf.sem_perm.uid; - out->gid = tbuf.sem_perm.gid; - out->mode = tbuf.sem_perm.mode; - return 0; - } case IPC_OLD: { struct semid_ds tbuf_old; @@ -859,9 +835,9 @@ static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __ if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) return -EFAULT; - out->uid = tbuf_old.sem_perm.uid; - out->gid = tbuf_old.sem_perm.gid; - out->mode = tbuf_old.sem_perm.mode; + out->sem_perm.uid = tbuf_old.sem_perm.uid; + out->sem_perm.gid = tbuf_old.sem_perm.gid; + out->sem_perm.mode = tbuf_old.sem_perm.mode; return 0; } @@ -870,38 +846,29 @@ static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __ } } -static int semctl_down(struct ipc_namespace *ns, int semid, int semnum, - int cmd, int version, union semun arg) +/* + * This function handles some semctl commands which require the rw_mutex + * to be held in write mode. + * NOTE: no locks must be held, the rw_mutex is taken inside this function. + */ +static int semctl_down(struct ipc_namespace *ns, int semid, + int cmd, int version, union semun arg) { struct sem_array *sma; int err; - struct sem_setbuf uninitialized_var(setbuf); + struct semid64_ds semid64; struct kern_ipc_perm *ipcp; if(cmd == IPC_SET) { - if(copy_semid_from_user (&setbuf, arg.buf, version)) + if (copy_semid_from_user(&semid64, arg.buf, version)) return -EFAULT; } - sma = sem_lock_check_down(ns, semid); - if (IS_ERR(sma)) - return PTR_ERR(sma); - ipcp = &sma->sem_perm; - - err = audit_ipc_obj(ipcp); - if (err) - goto out_unlock; + ipcp = ipcctl_pre_down(&sem_ids(ns), semid, cmd, &semid64.sem_perm, 0); + if (IS_ERR(ipcp)) + return PTR_ERR(ipcp); - if (cmd == IPC_SET) { - err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); - if (err) - goto out_unlock; - } - if (current->euid != ipcp->cuid && - current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) { - err=-EPERM; - goto out_unlock; - } + sma = container_of(ipcp, struct sem_array, sem_perm); err = security_sem_semctl(sma, cmd); if (err) @@ -910,26 +877,19 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum, switch(cmd){ case IPC_RMID: freeary(ns, ipcp); - err = 0; - break; + goto out_up; case IPC_SET: - ipcp->uid = setbuf.uid; - ipcp->gid = setbuf.gid; - ipcp->mode = (ipcp->mode & ~S_IRWXUGO) - | (setbuf.mode & S_IRWXUGO); + ipc_update_perm(&semid64.sem_perm, ipcp); sma->sem_ctime = get_seconds(); - sem_unlock(sma); - err = 0; break; default: - sem_unlock(sma); err = -EINVAL; - break; } - return err; out_unlock: sem_unlock(sma); +out_up: + up_write(&sem_ids(ns).rw_mutex); return err; } @@ -963,9 +923,7 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) return err; case IPC_RMID: case IPC_SET: - down_write(&sem_ids(ns).rw_mutex); - err = semctl_down(ns,semid,semnum,cmd,version,arg); - up_write(&sem_ids(ns).rw_mutex); + err = semctl_down(ns, semid, cmd, version, arg); return err; default: return -EINVAL; @@ -1044,14 +1002,11 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) return ERR_PTR(PTR_ERR(sma)); nsems = sma->sem_nsems; - ipc_rcu_getref(sma); - sem_unlock(sma); + sem_getref_and_unlock(sma); new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); if (!new) { - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); - sem_unlock(sma); + sem_putref(sma); return ERR_PTR(-ENOMEM); } new->semadj = (short *) &new[1]; @@ -1062,13 +1017,10 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) if (un) { spin_unlock(&ulp->lock); kfree(new); - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); - sem_unlock(sma); + sem_putref(sma); goto out; } - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); + sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { sem_unlock(sma); spin_unlock(&ulp->lock); @@ -1298,6 +1250,7 @@ void exit_sem(struct task_struct *tsk) undo_list = tsk->sysvsem.undo_list; if (!undo_list) return; + tsk->sysvsem.undo_list = NULL; if (!atomic_dec_and_test(&undo_list->refcnt)) return; diff --git a/ipc/shm.c b/ipc/shm.c index e636910454a..554429ade07 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -60,7 +60,6 @@ static struct vm_operations_struct shm_vm_ops; #define shm_unlock(shp) \ ipc_unlock(&(shp)->shm_perm) -#define shm_buildid(id, seq) ipc_buildid(id, seq) static int newseg(struct ipc_namespace *, struct ipc_params *); static void shm_open(struct vm_area_struct *vma); @@ -127,18 +126,6 @@ static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns, return container_of(ipcp, struct shmid_kernel, shm_perm); } -static inline struct shmid_kernel *shm_lock_check_down( - struct ipc_namespace *ns, - int id) -{ - struct kern_ipc_perm *ipcp = ipc_lock_check_down(&shm_ids(ns), id); - - if (IS_ERR(ipcp)) - return (struct shmid_kernel *)ipcp; - - return container_of(ipcp, struct shmid_kernel, shm_perm); -} - /* * shm_lock_(check_) routines are called in the paths where the rw_mutex * is not held. @@ -169,12 +156,6 @@ static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) ipc_rmid(&shm_ids(ns), &s->shm_perm); } -static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp) -{ - return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); -} - - /* This is called by fork, once for every shm attach. */ static void shm_open(struct vm_area_struct *vma) @@ -416,7 +397,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (IS_ERR(file)) goto no_file; - id = shm_addid(ns, shp); + id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); if (id < 0) { error = id; goto no_id; @@ -428,7 +409,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_ctim = get_seconds(); shp->shm_segsz = size; shp->shm_nattch = 0; - shp->shm_perm.id = shm_buildid(id, shp->shm_perm.seq); shp->shm_file = file; /* * shmid gets reported as "inode#" in /proc/pid/maps. @@ -519,28 +499,14 @@ static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ } } -struct shm_setbuf { - uid_t uid; - gid_t gid; - mode_t mode; -}; - -static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __user *buf, int version) +static inline unsigned long +copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version) { switch(version) { case IPC_64: - { - struct shmid64_ds tbuf; - - if (copy_from_user(&tbuf, buf, sizeof(tbuf))) + if (copy_from_user(out, buf, sizeof(*out))) return -EFAULT; - - out->uid = tbuf.shm_perm.uid; - out->gid = tbuf.shm_perm.gid; - out->mode = tbuf.shm_perm.mode; - return 0; - } case IPC_OLD: { struct shmid_ds tbuf_old; @@ -548,9 +514,9 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __ if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) return -EFAULT; - out->uid = tbuf_old.shm_perm.uid; - out->gid = tbuf_old.shm_perm.gid; - out->mode = tbuf_old.shm_perm.mode; + out->shm_perm.uid = tbuf_old.shm_perm.uid; + out->shm_perm.gid = tbuf_old.shm_perm.gid; + out->shm_perm.mode = tbuf_old.shm_perm.mode; return 0; } @@ -624,9 +590,53 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, } } -asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) +/* + * This function handles some shmctl commands which require the rw_mutex + * to be held in write mode. + * NOTE: no locks must be held, the rw_mutex is taken inside this function. + */ +static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, + struct shmid_ds __user *buf, int version) +{ + struct kern_ipc_perm *ipcp; + struct shmid64_ds shmid64; + struct shmid_kernel *shp; + int err; + + if (cmd == IPC_SET) { + if (copy_shmid_from_user(&shmid64, buf, version)) + return -EFAULT; + } + + ipcp = ipcctl_pre_down(&shm_ids(ns), shmid, cmd, &shmid64.shm_perm, 0); + if (IS_ERR(ipcp)) + return PTR_ERR(ipcp); + + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + + err = security_shm_shmctl(shp, cmd); + if (err) + goto out_unlock; + switch (cmd) { + case IPC_RMID: + do_shm_rmid(ns, ipcp); + goto out_up; + case IPC_SET: + ipc_update_perm(&shmid64.shm_perm, ipcp); + shp->shm_ctim = get_seconds(); + break; + default: + err = -EINVAL; + } +out_unlock: + shm_unlock(shp); +out_up: + up_write(&shm_ids(ns).rw_mutex); + return err; +} + +asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) { - struct shm_setbuf setbuf; struct shmid_kernel *shp; int err, version; struct ipc_namespace *ns; @@ -783,97 +793,13 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) goto out; } case IPC_RMID: - { - /* - * We cannot simply remove the file. The SVID states - * that the block remains until the last person - * detaches from it, then is deleted. A shmat() on - * an RMID segment is legal in older Linux and if - * we change it apps break... - * - * Instead we set a destroyed flag, and then blow - * the name away when the usage hits zero. - */ - down_write(&shm_ids(ns).rw_mutex); - shp = shm_lock_check_down(ns, shmid); - if (IS_ERR(shp)) { - err = PTR_ERR(shp); - goto out_up; - } - - err = audit_ipc_obj(&(shp->shm_perm)); - if (err) - goto out_unlock_up; - - if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid && - !capable(CAP_SYS_ADMIN)) { - err=-EPERM; - goto out_unlock_up; - } - - err = security_shm_shmctl(shp, cmd); - if (err) - goto out_unlock_up; - - do_shm_rmid(ns, &shp->shm_perm); - up_write(&shm_ids(ns).rw_mutex); - goto out; - } - case IPC_SET: - { - if (!buf) { - err = -EFAULT; - goto out; - } - - if (copy_shmid_from_user (&setbuf, buf, version)) { - err = -EFAULT; - goto out; - } - down_write(&shm_ids(ns).rw_mutex); - shp = shm_lock_check_down(ns, shmid); - if (IS_ERR(shp)) { - err = PTR_ERR(shp); - goto out_up; - } - err = audit_ipc_obj(&(shp->shm_perm)); - if (err) - goto out_unlock_up; - err = audit_ipc_set_perm(0, setbuf.uid, setbuf.gid, setbuf.mode); - if (err) - goto out_unlock_up; - err=-EPERM; - if (current->euid != shp->shm_perm.uid && - current->euid != shp->shm_perm.cuid && - !capable(CAP_SYS_ADMIN)) { - goto out_unlock_up; - } - - err = security_shm_shmctl(shp, cmd); - if (err) - goto out_unlock_up; - - shp->shm_perm.uid = setbuf.uid; - shp->shm_perm.gid = setbuf.gid; - shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO) - | (setbuf.mode & S_IRWXUGO); - shp->shm_ctim = get_seconds(); - break; - } - + err = shmctl_down(ns, shmid, cmd, buf, version); + return err; default: - err = -EINVAL; - goto out; + return -EINVAL; } - err = 0; -out_unlock_up: - shm_unlock(shp); -out_up: - up_write(&shm_ids(ns).rw_mutex); - goto out; out_unlock: shm_unlock(shp); out: diff --git a/ipc/util.c b/ipc/util.c index fd1b50da9db..3339177b336 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -33,6 +33,7 @@ #include <linux/audit.h> #include <linux/nsproxy.h> #include <linux/rwsem.h> +#include <linux/memory.h> #include <linux/ipc_namespace.h> #include <asm/unistd.h> @@ -52,11 +53,57 @@ struct ipc_namespace init_ipc_ns = { }, }; +atomic_t nr_ipc_ns = ATOMIC_INIT(1); + + +#ifdef CONFIG_MEMORY_HOTPLUG + +static void ipc_memory_notifier(struct work_struct *work) +{ + ipcns_notify(IPCNS_MEMCHANGED); +} + +static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier); + + +static int ipc_memory_callback(struct notifier_block *self, + unsigned long action, void *arg) +{ + switch (action) { + case MEM_ONLINE: /* memory successfully brought online */ + case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */ + /* + * This is done by invoking the ipcns notifier chain with the + * IPC_MEMCHANGED event. + * In order not to keep the lock on the hotplug memory chain + * for too long, queue a work item that will, when waken up, + * activate the ipcns notification chain. + * No need to keep several ipc work items on the queue. + */ + if (!work_pending(&ipc_memory_wq)) + schedule_work(&ipc_memory_wq); + break; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_CANCEL_OFFLINE: + default: + break; + } + + return NOTIFY_OK; +} + +#endif /* CONFIG_MEMORY_HOTPLUG */ + /** * ipc_init - initialise IPC subsystem * * The various system5 IPC resources (semaphores, messages and shared * memory) are initialised + * A callback routine is registered into the memory hotplug notifier + * chain: since msgmni scales to lowmem this callback routine will be + * called upon successful memory add / remove to recompute msmgni. */ static int __init ipc_init(void) @@ -64,6 +111,8 @@ static int __init ipc_init(void) sem_init(); msg_init(); shm_init(); + hotplug_memory_notifier(ipc_memory_callback, IPC_CALLBACK_PRI); + register_ipcns_notifier(&init_ipc_ns); return 0; } __initcall(ipc_init); @@ -84,8 +133,8 @@ void ipc_init_ids(struct ipc_ids *ids) ids->seq = 0; { int seq_limit = INT_MAX/SEQ_MULTIPLIER; - if(seq_limit > USHRT_MAX) - ids->seq_max = USHRT_MAX; + if (seq_limit > USHORT_MAX) + ids->seq_max = USHORT_MAX; else ids->seq_max = seq_limit; } @@ -116,13 +165,12 @@ void __init ipc_init_proc_interface(const char *path, const char *header, iface->ids = ids; iface->show = show; - pde = create_proc_entry(path, - S_IRUGO, /* world readable */ - NULL /* parent dir */); - if (pde) { - pde->data = iface; - pde->proc_fops = &sysvipc_proc_fops; - } else { + pde = proc_create_data(path, + S_IRUGO, /* world readable */ + NULL, /* parent dir */ + &sysvipc_proc_fops, + iface); + if (!pde) { kfree(iface); } } @@ -231,6 +279,7 @@ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) if(ids->seq > ids->seq_max) ids->seq = 0; + new->id = ipc_buildid(id, new->seq); spin_lock_init(&new->lock); new->deleted = 0; rcu_read_lock(); @@ -761,6 +810,70 @@ int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, return ipcget_public(ns, ids, ops, params); } +/** + * ipc_update_perm - update the permissions of an IPC. + * @in: the permission given as input. + * @out: the permission of the ipc to set. + */ +void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out) +{ + out->uid = in->uid; + out->gid = in->gid; + out->mode = (out->mode & ~S_IRWXUGO) + | (in->mode & S_IRWXUGO); +} + +/** + * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd + * @ids: the table of ids where to look for the ipc + * @id: the id of the ipc to retrieve + * @cmd: the cmd to check + * @perm: the permission to set + * @extra_perm: one extra permission parameter used by msq + * + * This function does some common audit and permissions check for some IPC_XXX + * cmd and is called from semctl_down, shmctl_down and msgctl_down. + * It must be called without any lock held and + * - retrieves the ipc with the given id in the given table. + * - performs some audit and permission check, depending on the given cmd + * - returns the ipc with both ipc and rw_mutex locks held in case of success + * or an err-code without any lock held otherwise. + */ +struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd, + struct ipc64_perm *perm, int extra_perm) +{ + struct kern_ipc_perm *ipcp; + int err; + + down_write(&ids->rw_mutex); + ipcp = ipc_lock_check_down(ids, id); + if (IS_ERR(ipcp)) { + err = PTR_ERR(ipcp); + goto out_up; + } + + err = audit_ipc_obj(ipcp); + if (err) + goto out_unlock; + + if (cmd == IPC_SET) { + err = audit_ipc_set_perm(extra_perm, perm->uid, + perm->gid, perm->mode); + if (err) + goto out_unlock; + } + if (current->euid == ipcp->cuid || + current->euid == ipcp->uid || capable(CAP_SYS_ADMIN)) + return ipcp; + + err = -EPERM; +out_unlock: + ipc_unlock(ipcp); +out_up: + up_write(&ids->rw_mutex); + return ERR_PTR(err); +} + #ifdef __ARCH_WANT_IPC_PARSE_VERSION diff --git a/ipc/util.h b/ipc/util.h index f37d160c98f..cdb966aebe0 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -12,7 +12,6 @@ #include <linux/err.h> -#define USHRT_MAX 0xffff #define SEQ_MULTIPLIER (IPCMNI) void sem_init (void); @@ -112,6 +111,9 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); +void ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out); +struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd, + struct ipc64_perm *perm, int extra_perm); #if defined(__ia64__) || defined(__x86_64__) || defined(__hppa__) || defined(__XTENSA__) /* On IA-64, we always use the "64-bit version" of the IPC structures. */ @@ -124,6 +126,8 @@ extern void free_msg(struct msg_msg *msg); extern struct msg_msg *load_msg(const void __user *src, int len); extern int store_msg(void __user *dest, struct msg_msg *msg, int len); +extern void recompute_msgmni(struct ipc_namespace *); + static inline int ipc_buildid(int id, int seq) { return SEQ_MULTIPLIER * seq + id; diff --git a/kernel/Makefile b/kernel/Makefile index 6c5f081132a..188c43223f5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -11,7 +11,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o -obj-$(CONFIG_SYSCTL) += sysctl_check.o +obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 28fef6bf853..13430176b3c 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -272,7 +272,7 @@ static int audit_to_watch(struct audit_krule *krule, char *path, int len, return -EINVAL; watch = audit_init_watch(path); - if (unlikely(IS_ERR(watch))) + if (IS_ERR(watch)) return PTR_ERR(watch); audit_get_watch(watch); @@ -848,7 +848,7 @@ static struct audit_watch *audit_dupe_watch(struct audit_watch *old) return ERR_PTR(-ENOMEM); new = audit_init_watch(path); - if (unlikely(IS_ERR(new))) { + if (IS_ERR(new)) { kfree(path); goto out; } @@ -989,7 +989,7 @@ static void audit_update_watch(struct audit_parent *parent, audit_set_auditable(current->audit_context); nwatch = audit_dupe_watch(owatch); - if (unlikely(IS_ERR(nwatch))) { + if (IS_ERR(nwatch)) { mutex_unlock(&audit_filter_mutex); audit_panic("error updating watch, skipping"); return; @@ -1004,7 +1004,7 @@ static void audit_update_watch(struct audit_parent *parent, list_del_rcu(&oentry->list); nentry = audit_dupe_rule(&oentry->rule, nwatch); - if (unlikely(IS_ERR(nentry))) + if (IS_ERR(nentry)) audit_panic("error updating watch, removing"); else { int h = audit_hash_ino((u32)ino); @@ -1785,7 +1785,7 @@ int audit_update_lsm_rules(void) watch = entry->rule.watch; tree = entry->rule.tree; nentry = audit_dupe_rule(&entry->rule, watch); - if (unlikely(IS_ERR(nentry))) { + if (IS_ERR(nentry)) { /* save the first error encountered for the * return value */ if (!err) diff --git a/kernel/bounds.c b/kernel/bounds.c index c3c55544db2..3c530138183 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -8,11 +8,7 @@ /* Include headers that define the enum constants of interest */ #include <linux/page-flags.h> #include <linux/mmzone.h> - -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define BLANK() asm volatile("\n->" : : ) +#include <linux/kbuild.h> void foo(void) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6d8de051382..b9d467d83fc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -44,6 +44,7 @@ #include <linux/kmod.h> #include <linux/delayacct.h> #include <linux/cgroupstats.h> +#include <linux/hash.h> #include <asm/atomic.h> @@ -118,17 +119,7 @@ static int root_count; * be called. */ static int need_forkexit_callback; - -/* bits in struct cgroup flags field */ -enum { - /* Control Group is dead */ - CGRP_REMOVED, - /* Control Group has previously had a child cgroup or a task, - * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */ - CGRP_RELEASABLE, - /* Control Group requires release notifications to userspace */ - CGRP_NOTIFY_ON_RELEASE, -}; +static int need_mm_owner_callback __read_mostly; /* convenient tests for these bits */ inline int cgroup_is_removed(const struct cgroup *cgrp) @@ -204,6 +195,27 @@ static struct cg_cgroup_link init_css_set_link; static DEFINE_RWLOCK(css_set_lock); static int css_set_count; +/* hash table for cgroup groups. This improves the performance to + * find an existing css_set */ +#define CSS_SET_HASH_BITS 7 +#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) +static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; + +static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) +{ + int i; + int index; + unsigned long tmp = 0UL; + + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) + tmp += (unsigned long)css[i]; + tmp = (tmp >> 16) ^ tmp; + + index = hash_long(tmp, CSS_SET_HASH_BITS); + + return &css_set_table[index]; +} + /* We don't maintain the lists running through each css_set to its * task until after the first call to cgroup_iter_start(). This * reduces the fork()/exit() overhead for people who have cgroups @@ -230,7 +242,7 @@ static int use_task_css_set_links; static void unlink_css_set(struct css_set *cg) { write_lock(&css_set_lock); - list_del(&cg->list); + hlist_del(&cg->hlist); css_set_count--; while (!list_empty(&cg->cg_links)) { struct cg_cgroup_link *link; @@ -295,9 +307,7 @@ static inline void put_css_set_taskexit(struct css_set *cg) /* * find_existing_css_set() is a helper for * find_css_set(), and checks to see whether an existing - * css_set is suitable. This currently walks a linked-list for - * simplicity; a later patch will use a hash table for better - * performance + * css_set is suitable. * * oldcg: the cgroup group that we're using before the cgroup * transition @@ -314,7 +324,9 @@ static struct css_set *find_existing_css_set( { int i; struct cgroupfs_root *root = cgrp->root; - struct list_head *l = &init_css_set.list; + struct hlist_head *hhead; + struct hlist_node *node; + struct css_set *cg; /* Built the set of subsystem state objects that we want to * see in the new css_set */ @@ -331,18 +343,13 @@ static struct css_set *find_existing_css_set( } } - /* Look through existing cgroup groups to find one to reuse */ - do { - struct css_set *cg = - list_entry(l, struct css_set, list); - + hhead = css_set_hash(template); + hlist_for_each_entry(cg, node, hhead, hlist) { if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { /* All subsystems matched */ return cg; } - /* Try the next cgroup group */ - l = l->next; - } while (l != &init_css_set.list); + } /* No existing cgroup group matched */ return NULL; @@ -404,6 +411,8 @@ static struct css_set *find_css_set( struct list_head tmp_cg_links; struct cg_cgroup_link *link; + struct hlist_head *hhead; + /* First see if we already have a cgroup group that matches * the desired set */ write_lock(&css_set_lock); @@ -428,6 +437,7 @@ static struct css_set *find_css_set( kref_init(&res->ref); INIT_LIST_HEAD(&res->cg_links); INIT_LIST_HEAD(&res->tasks); + INIT_HLIST_NODE(&res->hlist); /* Copy the set of subsystem state objects generated in * find_existing_css_set() */ @@ -467,9 +477,12 @@ static struct css_set *find_css_set( BUG_ON(!list_empty(&tmp_cg_links)); - /* Link this cgroup group into the list */ - list_add(&res->list, &init_css_set.list); css_set_count++; + + /* Add this cgroup group to the hash table */ + hhead = css_set_hash(res->subsys); + hlist_add_head(&res->hlist, hhead); + write_unlock(&css_set_lock); return res; @@ -948,7 +961,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, int ret = 0; struct super_block *sb; struct cgroupfs_root *root; - struct list_head tmp_cg_links, *l; + struct list_head tmp_cg_links; INIT_LIST_HEAD(&tmp_cg_links); /* First find the desired set of subsystems */ @@ -990,6 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, /* New superblock */ struct cgroup *cgrp = &root->top_cgroup; struct inode *inode; + int i; BUG_ON(sb->s_root != NULL); @@ -1034,22 +1048,25 @@ static int cgroup_get_sb(struct file_system_type *fs_type, /* Link the top cgroup in this hierarchy into all * the css_set objects */ write_lock(&css_set_lock); - l = &init_css_set.list; - do { + for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { + struct hlist_head *hhead = &css_set_table[i]; + struct hlist_node *node; struct css_set *cg; - struct cg_cgroup_link *link; - cg = list_entry(l, struct css_set, list); - BUG_ON(list_empty(&tmp_cg_links)); - link = list_entry(tmp_cg_links.next, - struct cg_cgroup_link, - cgrp_link_list); - list_del(&link->cgrp_link_list); - link->cg = cg; - list_add(&link->cgrp_link_list, - &root->top_cgroup.css_sets); - list_add(&link->cg_link_list, &cg->cg_links); - l = l->next; - } while (l != &init_css_set.list); + + hlist_for_each_entry(cg, node, hhead, hlist) { + struct cg_cgroup_link *link; + + BUG_ON(list_empty(&tmp_cg_links)); + link = list_entry(tmp_cg_links.next, + struct cg_cgroup_link, + cgrp_link_list); + list_del(&link->cgrp_link_list); + link->cg = cg; + list_add(&link->cgrp_link_list, + &root->top_cgroup.css_sets); + list_add(&link->cg_link_list, &cg->cg_links); + } + } write_unlock(&css_set_lock); free_cg_links(&tmp_cg_links); @@ -1307,18 +1324,16 @@ enum cgroup_filetype { FILE_DIR, FILE_TASKLIST, FILE_NOTIFY_ON_RELEASE, - FILE_RELEASABLE, FILE_RELEASE_AGENT, }; -static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, - struct file *file, - const char __user *userbuf, - size_t nbytes, loff_t *unused_ppos) +static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + const char __user *userbuf, + size_t nbytes, loff_t *unused_ppos) { char buffer[64]; int retval = 0; - u64 val; char *end; if (!nbytes) @@ -1329,16 +1344,18 @@ static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, return -EFAULT; buffer[nbytes] = 0; /* nul-terminate */ - - /* strip newline if necessary */ - if (nbytes && (buffer[nbytes-1] == '\n')) - buffer[nbytes-1] = 0; - val = simple_strtoull(buffer, &end, 0); - if (*end) - return -EINVAL; - - /* Pass to subsystem */ - retval = cft->write_uint(cgrp, cft, val); + strstrip(buffer); + if (cft->write_u64) { + u64 val = simple_strtoull(buffer, &end, 0); + if (*end) + return -EINVAL; + retval = cft->write_u64(cgrp, cft, val); + } else { + s64 val = simple_strtoll(buffer, &end, 0); + if (*end) + return -EINVAL; + retval = cft->write_s64(cgrp, cft, val); + } if (!retval) retval = nbytes; return retval; @@ -1419,23 +1436,39 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, return -ENODEV; if (cft->write) return cft->write(cgrp, cft, file, buf, nbytes, ppos); - if (cft->write_uint) - return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos); + if (cft->write_u64 || cft->write_s64) + return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); + if (cft->trigger) { + int ret = cft->trigger(cgrp, (unsigned int)cft->private); + return ret ? ret : nbytes; + } return -EINVAL; } -static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft, - struct file *file, - char __user *buf, size_t nbytes, - loff_t *ppos) +static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, + loff_t *ppos) { char tmp[64]; - u64 val = cft->read_uint(cgrp, cft); + u64 val = cft->read_u64(cgrp, cft); int len = sprintf(tmp, "%llu\n", (unsigned long long) val); return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); } +static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, + loff_t *ppos) +{ + char tmp[64]; + s64 val = cft->read_s64(cgrp, cft); + int len = sprintf(tmp, "%lld\n", (long long) val); + + return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); +} + static ssize_t cgroup_common_file_read(struct cgroup *cgrp, struct cftype *cft, struct file *file, @@ -1490,11 +1523,56 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, if (cft->read) return cft->read(cgrp, cft, file, buf, nbytes, ppos); - if (cft->read_uint) - return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos); + if (cft->read_u64) + return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos); + if (cft->read_s64) + return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos); return -EINVAL; } +/* + * seqfile ops/methods for returning structured data. Currently just + * supports string->u64 maps, but can be extended in future. + */ + +struct cgroup_seqfile_state { + struct cftype *cft; + struct cgroup *cgroup; +}; + +static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) +{ + struct seq_file *sf = cb->state; + return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value); +} + +static int cgroup_seqfile_show(struct seq_file *m, void *arg) +{ + struct cgroup_seqfile_state *state = m->private; + struct cftype *cft = state->cft; + if (cft->read_map) { + struct cgroup_map_cb cb = { + .fill = cgroup_map_add, + .state = m, + }; + return cft->read_map(state->cgroup, cft, &cb); + } + return cft->read_seq_string(state->cgroup, cft, m); +} + +int cgroup_seqfile_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + kfree(seq->private); + return single_release(inode, file); +} + +static struct file_operations cgroup_seqfile_operations = { + .read = seq_read, + .llseek = seq_lseek, + .release = cgroup_seqfile_release, +}; + static int cgroup_file_open(struct inode *inode, struct file *file) { int err; @@ -1507,7 +1585,18 @@ static int cgroup_file_open(struct inode *inode, struct file *file) cft = __d_cft(file->f_dentry); if (!cft) return -ENODEV; - if (cft->open) + if (cft->read_map || cft->read_seq_string) { + struct cgroup_seqfile_state *state = + kzalloc(sizeof(*state), GFP_USER); + if (!state) + return -ENOMEM; + state->cft = cft; + state->cgroup = __d_cgrp(file->f_dentry->d_parent); + file->f_op = &cgroup_seqfile_operations; + err = single_open(file, cgroup_seqfile_show, state); + if (err < 0) + kfree(state); + } else if (cft->open) err = cft->open(inode, file); else err = 0; @@ -1715,7 +1804,7 @@ static void cgroup_advance_iter(struct cgroup *cgrp, * The tasklist_lock is not held here, as do_each_thread() and * while_each_thread() are protected by RCU. */ -void cgroup_enable_task_cg_lists(void) +static void cgroup_enable_task_cg_lists(void) { struct task_struct *p, *g; write_lock(&css_set_lock); @@ -1913,14 +2002,14 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) if (heap->size) { for (i = 0; i < heap->size; i++) { - struct task_struct *p = heap->ptrs[i]; + struct task_struct *q = heap->ptrs[i]; if (i == 0) { - latest_time = p->start_time; - latest_task = p; + latest_time = q->start_time; + latest_task = q; } /* Process the task per the caller's callback */ - scan->process_task(p, scan); - put_task_struct(p); + scan->process_task(q, scan); + put_task_struct(q); } /* * If we had to process any tasks at all, scan again @@ -2138,11 +2227,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, return notify_on_release(cgrp); } -static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft) -{ - return test_bit(CGRP_RELEASABLE, &cgrp->flags); -} - /* * for the common functions, 'private' gives the type of file */ @@ -2158,16 +2242,10 @@ static struct cftype files[] = { { .name = "notify_on_release", - .read_uint = cgroup_read_notify_on_release, + .read_u64 = cgroup_read_notify_on_release, .write = cgroup_common_file_write, .private = FILE_NOTIFY_ON_RELEASE, }, - - { - .name = "releasable", - .read_uint = cgroup_read_releasable, - .private = FILE_RELEASABLE, - } }; static struct cftype cft_release_agent = { @@ -2401,10 +2479,9 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) return 0; } -static void cgroup_init_subsys(struct cgroup_subsys *ss) +static void __init cgroup_init_subsys(struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; - struct list_head *l; printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); @@ -2415,34 +2492,19 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) BUG_ON(IS_ERR(css)); init_cgroup_css(css, ss, dummytop); - /* Update all cgroup groups to contain a subsys + /* Update the init_css_set to contain a subsys * pointer to this state - since the subsystem is - * newly registered, all tasks and hence all cgroup - * groups are in the subsystem's top cgroup. */ - write_lock(&css_set_lock); - l = &init_css_set.list; - do { - struct css_set *cg = - list_entry(l, struct css_set, list); - cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; - l = l->next; - } while (l != &init_css_set.list); - write_unlock(&css_set_lock); - - /* If this subsystem requested that it be notified with fork - * events, we should send it one now for every process in the - * system */ - if (ss->fork) { - struct task_struct *g, *p; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - ss->fork(ss, p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); - } + * newly registered, all tasks and hence the + * init_css_set is in the subsystem's top cgroup. */ + init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; need_forkexit_callback |= ss->fork || ss->exit; + need_mm_owner_callback |= !!ss->mm_owner_changed; + + /* At system boot, before all subsystems have been + * registered, no tasks have been forked, so we don't + * need to invoke fork callbacks here. */ + BUG_ON(!list_empty(&init_task.tasks)); ss->active = 1; } @@ -2458,9 +2520,9 @@ int __init cgroup_init_early(void) int i; kref_init(&init_css_set.ref); kref_get(&init_css_set.ref); - INIT_LIST_HEAD(&init_css_set.list); INIT_LIST_HEAD(&init_css_set.cg_links); INIT_LIST_HEAD(&init_css_set.tasks); + INIT_HLIST_NODE(&init_css_set.hlist); css_set_count = 1; init_cgroup_root(&rootnode); list_add(&rootnode.root_list, &roots); @@ -2473,6 +2535,9 @@ int __init cgroup_init_early(void) list_add(&init_css_set_link.cg_link_list, &init_css_set.cg_links); + for (i = 0; i < CSS_SET_TABLE_SIZE; i++) + INIT_HLIST_HEAD(&css_set_table[i]); + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; @@ -2502,7 +2567,7 @@ int __init cgroup_init(void) { int err; int i; - struct proc_dir_entry *entry; + struct hlist_head *hhead; err = bdi_init(&cgroup_backing_dev_info); if (err) @@ -2514,13 +2579,15 @@ int __init cgroup_init(void) cgroup_init_subsys(ss); } + /* Add init_css_set to the hash table */ + hhead = css_set_hash(init_css_set.subsys); + hlist_add_head(&init_css_set.hlist, hhead); + err = register_filesystem(&cgroup_fs_type); if (err < 0) goto out; - entry = create_proc_entry("cgroups", 0, NULL); - if (entry) - entry->proc_fops = &proc_cgroupstats_operations; + proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations); out: if (err) @@ -2683,6 +2750,34 @@ void cgroup_fork_callbacks(struct task_struct *child) } } +#ifdef CONFIG_MM_OWNER +/** + * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes + * @p: the new owner + * + * Called on every change to mm->owner. mm_init_owner() does not + * invoke this routine, since it assigns the mm->owner the first time + * and does not change it. + */ +void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) +{ + struct cgroup *oldcgrp, *newcgrp; + + if (need_mm_owner_callback) { + int i; + for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys *ss = subsys[i]; + oldcgrp = task_cgroup(old, ss->subsys_id); + newcgrp = task_cgroup(new, ss->subsys_id); + if (oldcgrp == newcgrp) + continue; + if (ss->mm_owner_changed) + ss->mm_owner_changed(ss, oldcgrp, newcgrp); + } + } +} +#endif /* CONFIG_MM_OWNER */ + /** * cgroup_post_fork - called on a new task after adding it to the task list * @child: the task in question diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c index 37301e877cb..c3dc3aba4c0 100644 --- a/kernel/cgroup_debug.c +++ b/kernel/cgroup_debug.c @@ -1,5 +1,5 @@ /* - * kernel/ccontainer_debug.c - Example cgroup subsystem that + * kernel/cgroup_debug.c - Example cgroup subsystem that * exposes debug info * * Copyright (C) Google Inc, 2007 @@ -62,25 +62,35 @@ static u64 current_css_set_refcount_read(struct cgroup *cont, return count; } +static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) +{ + return test_bit(CGRP_RELEASABLE, &cgrp->flags); +} + static struct cftype files[] = { { .name = "cgroup_refcount", - .read_uint = cgroup_refcount_read, + .read_u64 = cgroup_refcount_read, }, { .name = "taskcount", - .read_uint = taskcount_read, + .read_u64 = taskcount_read, }, { .name = "current_css_set", - .read_uint = current_css_set_read, + .read_u64 = current_css_set_read, }, { .name = "current_css_set_refcount", - .read_uint = current_css_set_refcount_read, + .read_u64 = current_css_set_refcount_read, }, + + { + .name = "releasable", + .read_u64 = releasable_read, + } }; static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) diff --git a/kernel/configs.c b/kernel/configs.c index e84d3f9c6c7..4c345210ed8 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -79,12 +79,11 @@ static int __init ikconfig_init(void) struct proc_dir_entry *entry; /* create the current config file */ - entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, - &proc_root); + entry = proc_create("config.gz", S_IFREG | S_IRUGO, NULL, + &ikconfig_file_ops); if (!entry) return -ENOMEM; - entry->proc_fops = &ikconfig_file_ops; entry->size = kernel_config_data_size; return 0; @@ -95,7 +94,7 @@ static int __init ikconfig_init(void) static void __exit ikconfig_cleanup(void) { - remove_proc_entry("config.gz", &proc_root); + remove_proc_entry("config.gz", NULL); } module_init(ikconfig_init); diff --git a/kernel/cpu.c b/kernel/cpu.c index 2011ad8d269..a98f6ab16ec 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -33,17 +33,13 @@ static struct { * an ongoing cpu hotplug operation. */ int refcount; - wait_queue_head_t writer_queue; } cpu_hotplug; -#define writer_exists() (cpu_hotplug.active_writer != NULL) - void __init cpu_hotplug_init(void) { cpu_hotplug.active_writer = NULL; mutex_init(&cpu_hotplug.lock); cpu_hotplug.refcount = 0; - init_waitqueue_head(&cpu_hotplug.writer_queue); } #ifdef CONFIG_HOTPLUG_CPU @@ -65,11 +61,8 @@ void put_online_cpus(void) if (cpu_hotplug.active_writer == current) return; mutex_lock(&cpu_hotplug.lock); - cpu_hotplug.refcount--; - - if (unlikely(writer_exists()) && !cpu_hotplug.refcount) - wake_up(&cpu_hotplug.writer_queue); - + if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) + wake_up_process(cpu_hotplug.active_writer); mutex_unlock(&cpu_hotplug.lock); } @@ -98,8 +91,8 @@ void cpu_maps_update_done(void) * Note that during a cpu-hotplug operation, the new readers, if any, * will be blocked by the cpu_hotplug.lock * - * Since cpu_maps_update_begin is always called after invoking - * cpu_maps_update_begin, we can be sure that only one writer is active. + * Since cpu_hotplug_begin() is always called after invoking + * cpu_maps_update_begin(), we can be sure that only one writer is active. * * Note that theoretically, there is a possibility of a livelock: * - Refcount goes to zero, last reader wakes up the sleeping @@ -115,19 +108,16 @@ void cpu_maps_update_done(void) */ static void cpu_hotplug_begin(void) { - DECLARE_WAITQUEUE(wait, current); - - mutex_lock(&cpu_hotplug.lock); - cpu_hotplug.active_writer = current; - add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait); - while (cpu_hotplug.refcount) { - set_current_state(TASK_UNINTERRUPTIBLE); + + for (;;) { + mutex_lock(&cpu_hotplug.lock); + if (likely(!cpu_hotplug.refcount)) + break; + __set_current_state(TASK_UNINTERRUPTIBLE); mutex_unlock(&cpu_hotplug.lock); schedule(); - mutex_lock(&cpu_hotplug.lock); } - remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait); } static void cpu_hotplug_done(void) @@ -136,7 +126,7 @@ static void cpu_hotplug_done(void) mutex_unlock(&cpu_hotplug.lock); } /* Need to know about CPUs going up/down? */ -int __cpuinit register_cpu_notifier(struct notifier_block *nb) +int __ref register_cpu_notifier(struct notifier_block *nb) { int ret; cpu_maps_update_begin(); @@ -149,7 +139,7 @@ int __cpuinit register_cpu_notifier(struct notifier_block *nb) EXPORT_SYMBOL(register_cpu_notifier); -void unregister_cpu_notifier(struct notifier_block *nb) +void __ref unregister_cpu_notifier(struct notifier_block *nb) { cpu_maps_update_begin(); raw_notifier_chain_unregister(&cpu_chain, nb); @@ -180,7 +170,7 @@ struct take_cpu_down_param { }; /* Take this CPU down. */ -static int take_cpu_down(void *_param) +static int __ref take_cpu_down(void *_param) { struct take_cpu_down_param *param = _param; int err; @@ -199,7 +189,7 @@ static int take_cpu_down(void *_param) } /* Requires cpu_add_remove_lock to be held */ -static int _cpu_down(unsigned int cpu, int tasks_frozen) +static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) { int err, nr_calls = 0; struct task_struct *p; @@ -274,7 +264,7 @@ out_release: return err; } -int cpu_down(unsigned int cpu) +int __ref cpu_down(unsigned int cpu) { int err = 0; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 48a976c52cf..8da627d3380 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -127,6 +127,7 @@ struct cpuset_hotplug_scanner { typedef enum { CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, + CS_MEM_HARDWALL, CS_MEMORY_MIGRATE, CS_SCHED_LOAD_BALANCE, CS_SPREAD_PAGE, @@ -144,6 +145,11 @@ static inline int is_mem_exclusive(const struct cpuset *cs) return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); } +static inline int is_mem_hardwall(const struct cpuset *cs) +{ + return test_bit(CS_MEM_HARDWALL, &cs->flags); +} + static inline int is_sched_load_balance(const struct cpuset *cs) { return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); @@ -735,7 +741,8 @@ static inline int started_after(void *p1, void *p2) * Return nonzero if this tasks's cpus_allowed mask should be changed (in other * words, if its mask is not equal to its cpuset's mask). */ -int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) +static int cpuset_test_cpumask(struct task_struct *tsk, + struct cgroup_scanner *scan) { return !cpus_equal(tsk->cpus_allowed, (cgroup_cs(scan->cg))->cpus_allowed); @@ -752,7 +759,8 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) * We don't need to re-check for the cgroup/cpuset membership, since we're * holding cgroup_lock() at this point. */ -void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) +static void cpuset_change_cpumask(struct task_struct *tsk, + struct cgroup_scanner *scan) { set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); } @@ -1023,19 +1031,6 @@ int current_cpuset_is_being_rebound(void) return task_cs(current) == cpuset_being_rebound; } -/* - * Call with cgroup_mutex held. - */ - -static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) -{ - if (simple_strtoul(buf, NULL, 10) != 0) - cpuset_memory_pressure_enabled = 1; - else - cpuset_memory_pressure_enabled = 0; - return 0; -} - static int update_relax_domain_level(struct cpuset *cs, char *buf) { int val = simple_strtol(buf, NULL, 10); @@ -1053,25 +1048,20 @@ static int update_relax_domain_level(struct cpuset *cs, char *buf) /* * update_flag - read a 0 or a 1 in a file and update associated flag - * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, - * CS_SCHED_LOAD_BALANCE, - * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, - * CS_SPREAD_PAGE, CS_SPREAD_SLAB) - * cs: the cpuset to update - * buf: the buffer where we read the 0 or 1 + * bit: the bit to update (see cpuset_flagbits_t) + * cs: the cpuset to update + * turning_on: whether the flag is being set or cleared * * Call with cgroup_mutex held. */ -static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) +static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, + int turning_on) { - int turning_on; struct cpuset trialcs; int err; int cpus_nonempty, balance_flag_changed; - turning_on = (simple_strtoul(buf, NULL, 10) != 0); - trialcs = *cs; if (turning_on) set_bit(bit, &trialcs.flags); @@ -1241,6 +1231,7 @@ typedef enum { FILE_MEMLIST, FILE_CPU_EXCLUSIVE, FILE_MEM_EXCLUSIVE, + FILE_MEM_HARDWALL, FILE_SCHED_LOAD_BALANCE, FILE_SCHED_RELAX_DOMAIN_LEVEL, FILE_MEMORY_PRESSURE_ENABLED, @@ -1289,46 +1280,71 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, case FILE_MEMLIST: retval = update_nodemask(cs, buffer); break; + case FILE_SCHED_RELAX_DOMAIN_LEVEL: + retval = update_relax_domain_level(cs, buffer); + break; + default: + retval = -EINVAL; + goto out2; + } + + if (retval == 0) + retval = nbytes; +out2: + cgroup_unlock(); +out1: + kfree(buffer); + return retval; +} + +static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + int retval = 0; + struct cpuset *cs = cgroup_cs(cgrp); + cpuset_filetype_t type = cft->private; + + cgroup_lock(); + + if (cgroup_is_removed(cgrp)) { + cgroup_unlock(); + return -ENODEV; + } + + switch (type) { case FILE_CPU_EXCLUSIVE: - retval = update_flag(CS_CPU_EXCLUSIVE, cs, buffer); + retval = update_flag(CS_CPU_EXCLUSIVE, cs, val); break; case FILE_MEM_EXCLUSIVE: - retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer); + retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); break; - case FILE_SCHED_LOAD_BALANCE: - retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); + case FILE_MEM_HARDWALL: + retval = update_flag(CS_MEM_HARDWALL, cs, val); break; - case FILE_SCHED_RELAX_DOMAIN_LEVEL: - retval = update_relax_domain_level(cs, buffer); + case FILE_SCHED_LOAD_BALANCE: + retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); break; case FILE_MEMORY_MIGRATE: - retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); + retval = update_flag(CS_MEMORY_MIGRATE, cs, val); break; case FILE_MEMORY_PRESSURE_ENABLED: - retval = update_memory_pressure_enabled(cs, buffer); + cpuset_memory_pressure_enabled = !!val; break; case FILE_MEMORY_PRESSURE: retval = -EACCES; break; case FILE_SPREAD_PAGE: - retval = update_flag(CS_SPREAD_PAGE, cs, buffer); + retval = update_flag(CS_SPREAD_PAGE, cs, val); cs->mems_generation = cpuset_mems_generation++; break; case FILE_SPREAD_SLAB: - retval = update_flag(CS_SPREAD_SLAB, cs, buffer); + retval = update_flag(CS_SPREAD_SLAB, cs, val); cs->mems_generation = cpuset_mems_generation++; break; default: retval = -EINVAL; - goto out2; + break; } - - if (retval == 0) - retval = nbytes; -out2: cgroup_unlock(); -out1: - kfree(buffer); return retval; } @@ -1390,33 +1406,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont, case FILE_MEMLIST: s += cpuset_sprintf_memlist(s, cs); break; - case FILE_CPU_EXCLUSIVE: - *s++ = is_cpu_exclusive(cs) ? '1' : '0'; - break; - case FILE_MEM_EXCLUSIVE: - *s++ = is_mem_exclusive(cs) ? '1' : '0'; - break; - case FILE_SCHED_LOAD_BALANCE: - *s++ = is_sched_load_balance(cs) ? '1' : '0'; - break; case FILE_SCHED_RELAX_DOMAIN_LEVEL: s += sprintf(s, "%d", cs->relax_domain_level); break; - case FILE_MEMORY_MIGRATE: - *s++ = is_memory_migrate(cs) ? '1' : '0'; - break; - case FILE_MEMORY_PRESSURE_ENABLED: - *s++ = cpuset_memory_pressure_enabled ? '1' : '0'; - break; - case FILE_MEMORY_PRESSURE: - s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); - break; - case FILE_SPREAD_PAGE: - *s++ = is_spread_page(cs) ? '1' : '0'; - break; - case FILE_SPREAD_SLAB: - *s++ = is_spread_slab(cs) ? '1' : '0'; - break; default: retval = -EINVAL; goto out; @@ -1429,121 +1421,137 @@ out: return retval; } - - +static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) +{ + struct cpuset *cs = cgroup_cs(cont); + cpuset_filetype_t type = cft->private; + switch (type) { + case FILE_CPU_EXCLUSIVE: + return is_cpu_exclusive(cs); + case FILE_MEM_EXCLUSIVE: + return is_mem_exclusive(cs); + case FILE_MEM_HARDWALL: + return is_mem_hardwall(cs); + case FILE_SCHED_LOAD_BALANCE: + return is_sched_load_balance(cs); + case FILE_MEMORY_MIGRATE: + return is_memory_migrate(cs); + case FILE_MEMORY_PRESSURE_ENABLED: + return cpuset_memory_pressure_enabled; + case FILE_MEMORY_PRESSURE: + return fmeter_getrate(&cs->fmeter); + case FILE_SPREAD_PAGE: + return is_spread_page(cs); + case FILE_SPREAD_SLAB: + return is_spread_slab(cs); + default: + BUG(); + } +} /* * for the common functions, 'private' gives the type of file */ -static struct cftype cft_cpus = { - .name = "cpus", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_CPULIST, -}; - -static struct cftype cft_mems = { - .name = "mems", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_MEMLIST, -}; - -static struct cftype cft_cpu_exclusive = { - .name = "cpu_exclusive", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_CPU_EXCLUSIVE, -}; - -static struct cftype cft_mem_exclusive = { - .name = "mem_exclusive", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_MEM_EXCLUSIVE, -}; - -static struct cftype cft_sched_load_balance = { - .name = "sched_load_balance", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_SCHED_LOAD_BALANCE, -}; - -static struct cftype cft_sched_relax_domain_level = { - .name = "sched_relax_domain_level", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, -}; - -static struct cftype cft_memory_migrate = { - .name = "memory_migrate", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_MEMORY_MIGRATE, +static struct cftype files[] = { + { + .name = "cpus", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, + .private = FILE_CPULIST, + }, + + { + .name = "mems", + .read = cpuset_common_file_read, + .write = cpuset_common_file_write, + .private = FILE_MEMLIST, + }, + + { + .name = "cpu_exclusive", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_CPU_EXCLUSIVE, + }, + + { + .name = "mem_exclusive", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_MEM_EXCLUSIVE, + }, + + { + .name = "mem_hardwall", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_MEM_HARDWALL, + }, + + { + .name = "sched_load_balance", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_SCHED_LOAD_BALANCE, + }, + + { + .name = "sched_relax_domain_level", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, + }, + + { + .name = "memory_migrate", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_MEMORY_MIGRATE, + }, + + { + .name = "memory_pressure", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_MEMORY_PRESSURE, + }, + + { + .name = "memory_spread_page", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_SPREAD_PAGE, + }, + + { + .name = "memory_spread_slab", + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, + .private = FILE_SPREAD_SLAB, + }, }; static struct cftype cft_memory_pressure_enabled = { .name = "memory_pressure_enabled", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, + .read_u64 = cpuset_read_u64, + .write_u64 = cpuset_write_u64, .private = FILE_MEMORY_PRESSURE_ENABLED, }; -static struct cftype cft_memory_pressure = { - .name = "memory_pressure", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_MEMORY_PRESSURE, -}; - -static struct cftype cft_spread_page = { - .name = "memory_spread_page", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_SPREAD_PAGE, -}; - -static struct cftype cft_spread_slab = { - .name = "memory_spread_slab", - .read = cpuset_common_file_read, - .write = cpuset_common_file_write, - .private = FILE_SPREAD_SLAB, -}; - static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) { int err; - if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0) - return err; - if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0) - return err; - if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0) - return err; - if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0) - return err; - if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0) - return err; - if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) - return err; - if ((err = cgroup_add_file(cont, ss, - &cft_sched_relax_domain_level)) < 0) - return err; - if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) - return err; - if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) - return err; - if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0) + err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); + if (err) return err; /* memory_pressure_enabled is in root cpuset only */ - if (err == 0 && !cont->parent) + if (!cont->parent) err = cgroup_add_file(cont, ss, - &cft_memory_pressure_enabled); - return 0; + &cft_memory_pressure_enabled); + return err; } /* @@ -1643,7 +1651,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) cpuset_update_task_memory_state(); if (is_sched_load_balance(cs)) - update_flag(CS_SCHED_LOAD_BALANCE, cs, "0"); + update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); number_of_cpusets--; kfree(cs); @@ -1708,7 +1716,8 @@ int __init cpuset_init(void) * Called by cgroup_scan_tasks() for each task in a cgroup. * Return nonzero to stop the walk through the tasks. */ -void cpuset_do_move_task(struct task_struct *tsk, struct cgroup_scanner *scan) +static void cpuset_do_move_task(struct task_struct *tsk, + struct cgroup_scanner *scan) { struct cpuset_hotplug_scanner *chsp; @@ -1970,14 +1979,14 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) } /* - * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive - * ancestor to the specified cpuset. Call holding callback_mutex. - * If no ancestor is mem_exclusive (an unusual configuration), then - * returns the root cpuset. + * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or + * mem_hardwall ancestor to the specified cpuset. Call holding + * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall + * (an unusual configuration), then returns the root cpuset. */ -static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) +static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) { - while (!is_mem_exclusive(cs) && cs->parent) + while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent) cs = cs->parent; return cs; } @@ -1991,7 +2000,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * __GFP_THISNODE is set, yes, we can always allocate. If zone * z's node is in our tasks mems_allowed, yes. If it's not a * __GFP_HARDWALL request and this zone's nodes is in the nearest - * mem_exclusive cpuset ancestor to this tasks cpuset, yes. + * hardwalled cpuset ancestor to this tasks cpuset, yes. * If the task has been OOM killed and has access to memory reserves * as specified by the TIF_MEMDIE flag, yes. * Otherwise, no. @@ -2014,7 +2023,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * and do not allow allocations outside the current tasks cpuset * unless the task has been OOM killed as is marked TIF_MEMDIE. * GFP_KERNEL allocations are not so marked, so can escape to the - * nearest enclosing mem_exclusive ancestor cpuset. + * nearest enclosing hardwalled ancestor cpuset. * * Scanning up parent cpusets requires callback_mutex. The * __alloc_pages() routine only calls here with __GFP_HARDWALL bit @@ -2037,7 +2046,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * in_interrupt - any node ok (current task context irrelevant) * GFP_ATOMIC - any node ok * TIF_MEMDIE - any node ok - * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok + * GFP_KERNEL - any node in enclosing hardwalled cpuset ok * GFP_USER - only nodes in current tasks mems allowed ok. * * Rule: @@ -2074,7 +2083,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) mutex_lock(&callback_mutex); task_lock(current); - cs = nearest_exclusive_ancestor(task_cs(current)); + cs = nearest_hardwall_ancestor(task_cs(current)); task_unlock(current); allowed = node_isset(node, cs->mems_allowed); diff --git a/kernel/dma.c b/kernel/dma.c index 6a82bb716da..d2c60a82279 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -149,12 +149,7 @@ static const struct file_operations proc_dma_operations = { static int __init proc_dma_init(void) { - struct proc_dir_entry *e; - - e = create_proc_entry("dma", 0, NULL); - if (e) - e->proc_fops = &proc_dma_operations; - + proc_create("dma", 0, NULL, &proc_dma_operations); return 0; } diff --git a/kernel/exit.c b/kernel/exit.c index 2a9d98c641a..ae0f2c4e452 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk) EXPORT_SYMBOL_GPL(exit_fs); +#ifdef CONFIG_MM_OWNER +/* + * Task p is exiting and it owned mm, lets find a new owner for it + */ +static inline int +mm_need_new_owner(struct mm_struct *mm, struct task_struct *p) +{ + /* + * If there are other users of the mm and the owner (us) is exiting + * we need to find a new owner to take on the responsibility. + */ + if (!mm) + return 0; + if (atomic_read(&mm->mm_users) <= 1) + return 0; + if (mm->owner != p) + return 0; + return 1; +} + +void mm_update_next_owner(struct mm_struct *mm) +{ + struct task_struct *c, *g, *p = current; + +retry: + if (!mm_need_new_owner(mm, p)) + return; + + read_lock(&tasklist_lock); + /* + * Search in the children + */ + list_for_each_entry(c, &p->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search in the siblings + */ + list_for_each_entry(c, &p->parent->children, sibling) { + if (c->mm == mm) + goto assign_new_owner; + } + + /* + * Search through everything else. We should not get + * here often + */ + do_each_thread(g, c) { + if (c->mm == mm) + goto assign_new_owner; + } while_each_thread(g, c); + + read_unlock(&tasklist_lock); + return; + +assign_new_owner: + BUG_ON(c == p); + get_task_struct(c); + /* + * The task_lock protects c->mm from changing. + * We always want mm->owner->mm == mm + */ + task_lock(c); + /* + * Delay read_unlock() till we have the task_lock() + * to ensure that c does not slip away underneath us + */ + read_unlock(&tasklist_lock); + if (c->mm != mm) { + task_unlock(c); + put_task_struct(c); + goto retry; + } + cgroup_mm_owner_callbacks(mm->owner, c); + mm->owner = c; + task_unlock(c); + put_task_struct(c); +} +#endif /* CONFIG_MM_OWNER */ + /* * Turn us into a lazy TLB process if we * aren't already.. @@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk) /* We don't want this task to be frozen prematurely */ clear_freeze_flag(tsk); task_unlock(tsk); + mm_update_next_owner(mm); mmput(mm); } diff --git a/kernel/fork.c b/kernel/fork.c index 6067e429f28..068ffe00752 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -381,14 +381,13 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) mm->ioctx_list = NULL; mm->free_area_cache = TASK_UNMAPPED_BASE; mm->cached_hole_size = ~0UL; - mm_init_cgroup(mm, p); + mm_init_owner(mm, p); if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; return mm; } - mm_free_cgroup(mm); free_mm(mm); return NULL; } @@ -432,13 +431,13 @@ void mmput(struct mm_struct *mm) if (atomic_dec_and_test(&mm->mm_users)) { exit_aio(mm); exit_mmap(mm); + set_mm_exe_file(mm, NULL); if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); list_del(&mm->mmlist); spin_unlock(&mmlist_lock); } put_swap_token(mm); - mm_free_cgroup(mm); mmdrop(mm); } } @@ -545,6 +544,8 @@ struct mm_struct *dup_mm(struct task_struct *tsk) if (init_new_context(tsk, mm)) goto fail_nocontext; + dup_mm_exe_file(oldmm, mm); + err = dup_mmap(mm, oldmm); if (err) goto free_pt; @@ -982,6 +983,13 @@ static void rt_mutex_init_task(struct task_struct *p) #endif } +#ifdef CONFIG_MM_OWNER +void mm_init_owner(struct mm_struct *mm, struct task_struct *p) +{ + mm->owner = p; +} +#endif /* CONFIG_MM_OWNER */ + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -1664,18 +1672,6 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp } /* - * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not - * supported yet - */ -static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp) -{ - if (unshare_flags & CLONE_SYSVSEM) - return -EINVAL; - - return 0; -} - -/* * unshare allows a process to 'unshare' part of the process * context which was originally shared using clone. copy_* * functions used by do_fork() cannot be used here directly @@ -1690,8 +1686,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) struct sighand_struct *new_sigh = NULL; struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; struct files_struct *fd, *new_fd = NULL; - struct sem_undo_list *new_ulist = NULL; struct nsproxy *new_nsproxy = NULL; + int do_sysvsem = 0; check_unshare_flags(&unshare_flags); @@ -1703,6 +1699,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) CLONE_NEWNET)) goto bad_unshare_out; + /* + * CLONE_NEWIPC must also detach from the undolist: after switching + * to a new ipc namespace, the semaphore arrays from the old + * namespace are unreachable. + */ + if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM)) + do_sysvsem = 1; if ((err = unshare_thread(unshare_flags))) goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) @@ -1713,13 +1716,17 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) goto bad_unshare_cleanup_sigh; if ((err = unshare_fd(unshare_flags, &new_fd))) goto bad_unshare_cleanup_vm; - if ((err = unshare_semundo(unshare_flags, &new_ulist))) - goto bad_unshare_cleanup_fd; if ((err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy, new_fs))) - goto bad_unshare_cleanup_semundo; + goto bad_unshare_cleanup_fd; - if (new_fs || new_mm || new_fd || new_ulist || new_nsproxy) { + if (new_fs || new_mm || new_fd || do_sysvsem || new_nsproxy) { + if (do_sysvsem) { + /* + * CLONE_SYSVSEM is equivalent to sys_exit(). + */ + exit_sem(current); + } if (new_nsproxy) { switch_task_namespaces(current, new_nsproxy); @@ -1755,7 +1762,6 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) if (new_nsproxy) put_nsproxy(new_nsproxy); -bad_unshare_cleanup_semundo: bad_unshare_cleanup_fd: if (new_fd) put_files_struct(new_fd); diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 6d9204f3a37..38a25b8d8bf 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -1,6 +1,7 @@ #include <linux/module.h> #include <linux/interrupt.h> #include <linux/device.h> +#include <linux/gfp.h> /* * Device resource management aware IRQ request/free implementation. diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 438a0146428..46e4ad1723f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> +#include <linux/slab.h> #include "internals.h" diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index f091d13def0..6fc0040f3e3 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -472,11 +472,7 @@ static const struct file_operations kallsyms_operations = { static int __init kallsyms_init(void) { - struct proc_dir_entry *entry; - - entry = create_proc_entry("kallsyms", 0444, NULL); - if (entry) - entry->proc_fops = &kallsyms_operations; + proc_create("kallsyms", 0444, NULL, &kallsyms_operations); return 0; } __initcall(kallsyms_init); diff --git a/kernel/kthread.c b/kernel/kthread.c index 92cf6930ab5..ac72eea4833 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -144,9 +144,9 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), spin_lock(&kthread_create_lock); list_add_tail(&create.list, &kthread_create_list); - wake_up_process(kthreadd_task); spin_unlock(&kthread_create_lock); + wake_up_process(kthreadd_task); wait_for_completion(&create.done); if (!IS_ERR(create.result)) { diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 7c74dab0d21..5e7b45c5692 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -233,14 +233,7 @@ static struct file_operations lstats_fops = { static int __init init_lstats_procfs(void) { - struct proc_dir_entry *pe; - - pe = create_proc_entry("latency_stats", 0644, NULL); - if (!pe) - return -ENOMEM; - - pe->proc_fops = &lstats_fops; - + proc_create("latency_stats", 0644, NULL, &lstats_fops); return 0; } __initcall(init_lstats_procfs); diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 8a135bd163c..dc5d29648d8 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -660,20 +660,12 @@ static const struct file_operations proc_lock_stat_operations = { static int __init lockdep_proc_init(void) { - struct proc_dir_entry *entry; - - entry = create_proc_entry("lockdep", S_IRUSR, NULL); - if (entry) - entry->proc_fops = &proc_lockdep_operations; - - entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL); - if (entry) - entry->proc_fops = &proc_lockdep_stats_operations; + proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations); + proc_create("lockdep_stats", S_IRUSR, NULL, + &proc_lockdep_stats_operations); #ifdef CONFIG_LOCK_STAT - entry = create_proc_entry("lock_stat", S_IRUSR, NULL); - if (entry) - entry->proc_fops = &proc_lock_stat_operations; + proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations); #endif return 0; diff --git a/kernel/marker.c b/kernel/marker.c index 005b9595459..139260e5460 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -23,6 +23,7 @@ #include <linux/rcupdate.h> #include <linux/marker.h> #include <linux/err.h> +#include <linux/slab.h> extern struct marker __start___markers[]; extern struct marker __stop___markers[]; diff --git a/kernel/notifier.c b/kernel/notifier.c index 643360d1bb1..823be11584e 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -31,6 +31,21 @@ static int notifier_chain_register(struct notifier_block **nl, return 0; } +static int notifier_chain_cond_register(struct notifier_block **nl, + struct notifier_block *n) +{ + while ((*nl) != NULL) { + if ((*nl) == n) + return 0; + if (n->priority > (*nl)->priority) + break; + nl = &((*nl)->next); + } + n->next = *nl; + rcu_assign_pointer(*nl, n); + return 0; +} + static int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n) { @@ -205,6 +220,29 @@ int blocking_notifier_chain_register(struct blocking_notifier_head *nh, EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); /** + * blocking_notifier_chain_cond_register - Cond add notifier to a blocking notifier chain + * @nh: Pointer to head of the blocking notifier chain + * @n: New entry in notifier chain + * + * Adds a notifier to a blocking notifier chain, only if not already + * present in the chain. + * Must be called in process context. + * + * Currently always returns zero. + */ +int blocking_notifier_chain_cond_register(struct blocking_notifier_head *nh, + struct notifier_block *n) +{ + int ret; + + down_write(&nh->rwsem); + ret = notifier_chain_cond_register(&nh->head, n); + up_write(&nh->rwsem); + return ret; +} +EXPORT_SYMBOL_GPL(blocking_notifier_chain_cond_register); + +/** * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain * @nh: Pointer to head of the blocking notifier chain * @n: Entry to remove from notifier chain diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index aead4d69f62..48d7ed6fc3a 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c @@ -7,6 +7,8 @@ #include <linux/module.h> #include <linux/cgroup.h> #include <linux/fs.h> +#include <linux/slab.h> +#include <linux/nsproxy.h> struct ns_cgroup { struct cgroup_subsys_state css; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index f5d332cf8c6..adc785146a1 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -139,6 +139,18 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) goto out; } + /* + * CLONE_NEWIPC must detach from the undolist: after switching + * to a new ipc namespace, the semaphore arrays from the old + * namespace are unreachable. In clone parlance, CLONE_SYSVSEM + * means share undolist with parent, so we must forbid using + * it along with CLONE_NEWIPC. + */ + if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) { + err = -EINVAL; + goto out; + } + new_ns = create_new_namespaces(flags, tsk, tsk->fs); if (IS_ERR(new_ns)) { err = PTR_ERR(new_ns); diff --git a/kernel/panic.c b/kernel/panic.c index 24af9f8bac9..425567f45b9 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -153,6 +153,8 @@ EXPORT_SYMBOL(panic); * 'M' - System experienced a machine check exception. * 'B' - System has hit bad_page. * 'U' - Userspace-defined naughtiness. + * 'A' - ACPI table overridden. + * 'W' - Taint on warning. * * The string is overwritten by the next call to print_taint(). */ @@ -161,7 +163,7 @@ const char *print_tainted(void) { static char buf[20]; if (tainted) { - snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c", + snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c%c", tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G', tainted & TAINT_FORCED_MODULE ? 'F' : ' ', tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', @@ -170,7 +172,8 @@ const char *print_tainted(void) tainted & TAINT_BAD_PAGE ? 'B' : ' ', tainted & TAINT_USER ? 'U' : ' ', tainted & TAINT_DIE ? 'D' : ' ', - tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' '); + tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' ', + tainted & TAINT_WARN ? 'W' : ' '); } else snprintf(buf, sizeof(buf), "Not tainted"); @@ -312,6 +315,7 @@ void warn_on_slowpath(const char *file, int line) print_modules(); dump_stack(); print_oops_end_marker(); + add_taint(TAINT_WARN); } EXPORT_SYMBOL(warn_on_slowpath); #endif diff --git a/kernel/printk.c b/kernel/printk.c index bdd4ea8c3f2..d3f9c0f788b 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1287,31 +1287,7 @@ void tty_write_message(struct tty_struct *tty, char *msg) */ int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) { - static DEFINE_SPINLOCK(ratelimit_lock); - static unsigned toks = 10 * 5 * HZ; - static unsigned long last_msg; - static int missed; - unsigned long flags; - unsigned long now = jiffies; - - spin_lock_irqsave(&ratelimit_lock, flags); - toks += now - last_msg; - last_msg = now; - if (toks > (ratelimit_burst * ratelimit_jiffies)) - toks = ratelimit_burst * ratelimit_jiffies; - if (toks >= ratelimit_jiffies) { - int lost = missed; - - missed = 0; - toks -= ratelimit_jiffies; - spin_unlock_irqrestore(&ratelimit_lock, flags); - if (lost) - printk(KERN_WARNING "printk: %d messages suppressed.\n", lost); - return 1; - } - missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; + return __ratelimit(ratelimit_jiffies, ratelimit_burst); } EXPORT_SYMBOL(__printk_ratelimit); diff --git a/kernel/profile.c b/kernel/profile.c index 606d7387265..ae7ead82cbc 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -587,10 +587,10 @@ static int __init create_proc_profile(void) return 0; if (create_hash_tables()) return -1; - entry = create_proc_entry("profile", S_IWUSR | S_IRUGO, NULL); + entry = proc_create("profile", S_IWUSR | S_IRUGO, + NULL, &proc_profile_operations); if (!entry) return 0; - entry->proc_fops = &proc_profile_operations; entry->size = (1+prof_len) * sizeof(atomic_t); hotcpu_notifier(profile_cpu_callback, 0); return 0; diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 47894f919d4..33acc424667 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -45,6 +45,7 @@ #include <linux/byteorder/swabb.h> #include <linux/stat.h> #include <linux/srcu.h> +#include <linux/slab.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " diff --git a/kernel/relay.c b/kernel/relay.c index d6204a48581..7de644cdec4 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -65,6 +65,35 @@ static struct vm_operations_struct relay_file_mmap_ops = { .close = relay_file_mmap_close, }; +/* + * allocate an array of pointers of struct page + */ +static struct page **relay_alloc_page_array(unsigned int n_pages) +{ + struct page **array; + size_t pa_size = n_pages * sizeof(struct page *); + + if (pa_size > PAGE_SIZE) { + array = vmalloc(pa_size); + if (array) + memset(array, 0, pa_size); + } else { + array = kzalloc(pa_size, GFP_KERNEL); + } + return array; +} + +/* + * free an array of pointers of struct page + */ +static void relay_free_page_array(struct page **array) +{ + if (is_vmalloc_addr(array)) + vfree(array); + else + kfree(array); +} + /** * relay_mmap_buf: - mmap channel buffer to process address space * @buf: relay channel buffer @@ -109,7 +138,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) *size = PAGE_ALIGN(*size); n_pages = *size >> PAGE_SHIFT; - buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL); + buf->page_array = relay_alloc_page_array(n_pages); if (!buf->page_array) return NULL; @@ -130,7 +159,7 @@ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) depopulate: for (j = 0; j < i; j++) __free_page(buf->page_array[j]); - kfree(buf->page_array); + relay_free_page_array(buf->page_array); return NULL; } @@ -189,7 +218,7 @@ static void relay_destroy_buf(struct rchan_buf *buf) vunmap(buf->start); for (i = 0; i < buf->page_count; i++) __free_page(buf->page_array[i]); - kfree(buf->page_array); + relay_free_page_array(buf->page_array); } chan->buf[buf->cpu] = NULL; kfree(buf->padding); @@ -1162,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in, ret = 0; spliced = 0; - while (len) { + while (len && !spliced) { ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); if (ret < 0) break; diff --git a/kernel/res_counter.c b/kernel/res_counter.c index efbfc0fc232..d3c61b4ebef 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -10,6 +10,7 @@ #include <linux/types.h> #include <linux/parser.h> #include <linux/fs.h> +#include <linux/slab.h> #include <linux/res_counter.h> #include <linux/uaccess.h> @@ -27,6 +28,8 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) } counter->usage += val; + if (counter->usage > counter->max_usage) + counter->max_usage = counter->usage; return 0; } @@ -65,6 +68,8 @@ res_counter_member(struct res_counter *counter, int member) switch (member) { case RES_USAGE: return &counter->usage; + case RES_MAX_USAGE: + return &counter->max_usage; case RES_LIMIT: return &counter->limit; case RES_FAILCNT: @@ -92,6 +97,11 @@ ssize_t res_counter_read(struct res_counter *counter, int member, pos, buf, s - buf); } +u64 res_counter_read_u64(struct res_counter *counter, int member) +{ + return *res_counter_member(counter, member); +} + ssize_t res_counter_write(struct res_counter *counter, int member, const char __user *userbuf, size_t nbytes, loff_t *pos, int (*write_strategy)(char *st_buf, unsigned long long *val)) diff --git a/kernel/resource.c b/kernel/resource.c index cee12cc47ca..74af2d7cb5a 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -131,14 +131,8 @@ static const struct file_operations proc_iomem_operations = { static int __init ioresources_init(void) { - struct proc_dir_entry *entry; - - entry = create_proc_entry("ioports", 0, NULL); - if (entry) - entry->proc_fops = &proc_ioports_operations; - entry = create_proc_entry("iomem", 0, NULL); - if (entry) - entry->proc_fops = &proc_iomem_operations; + proc_create("ioports", 0, NULL, &proc_ioports_operations); + proc_create("iomem", 0, NULL, &proc_iomem_operations); return 0; } __initcall(ioresources_init); diff --git a/kernel/sched.c b/kernel/sched.c index 740fb409e5b..e2f7f5acc80 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9057,13 +9057,13 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, } #ifdef CONFIG_FAIR_GROUP_SCHED -static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, +static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, u64 shareval) { return sched_group_set_shares(cgroup_tg(cgrp), shareval); } -static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) +static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) { struct task_group *tg = cgroup_tg(cgrp); @@ -9073,48 +9073,14 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) #ifdef CONFIG_RT_GROUP_SCHED static ssize_t cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, - struct file *file, - const char __user *userbuf, - size_t nbytes, loff_t *unused_ppos) + s64 val) { - char buffer[64]; - int retval = 0; - s64 val; - char *end; - - if (!nbytes) - return -EINVAL; - if (nbytes >= sizeof(buffer)) - return -E2BIG; - if (copy_from_user(buffer, userbuf, nbytes)) - return -EFAULT; - - buffer[nbytes] = 0; /* nul-terminate */ - - /* strip newline if necessary */ - if (nbytes && (buffer[nbytes-1] == '\n')) - buffer[nbytes-1] = 0; - val = simple_strtoll(buffer, &end, 0); - if (*end) - return -EINVAL; - - /* Pass to subsystem */ - retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val); - if (!retval) - retval = nbytes; - return retval; + return sched_group_set_rt_runtime(cgroup_tg(cgrp), val); } -static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft, - struct file *file, - char __user *buf, size_t nbytes, - loff_t *ppos) +static s64 cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft) { - char tmp[64]; - long val = sched_group_rt_runtime(cgroup_tg(cgrp)); - int len = sprintf(tmp, "%ld\n", val); - - return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); + return sched_group_rt_runtime(cgroup_tg(cgrp)); } static int cpu_rt_period_write_uint(struct cgroup *cgrp, struct cftype *cftype, @@ -9133,20 +9099,20 @@ static struct cftype cpu_files[] = { #ifdef CONFIG_FAIR_GROUP_SCHED { .name = "shares", - .read_uint = cpu_shares_read_uint, - .write_uint = cpu_shares_write_uint, + .read_u64 = cpu_shares_read_u64, + .write_u64 = cpu_shares_write_u64, }, #endif #ifdef CONFIG_RT_GROUP_SCHED { .name = "rt_runtime_us", - .read = cpu_rt_runtime_read, - .write = cpu_rt_runtime_write, + .read_s64 = cpu_rt_runtime_read, + .write_s64 = cpu_rt_runtime_write, }, { .name = "rt_period_us", - .read_uint = cpu_rt_period_read_uint, - .write_uint = cpu_rt_period_write_uint, + .read_u64 = cpu_rt_period_read_uint, + .write_u64 = cpu_rt_period_write_uint, }, #endif }; @@ -9277,8 +9243,8 @@ out: static struct cftype files[] = { { .name = "usage", - .read_uint = cpuusage_read, - .write_uint = cpuusage_write, + .read_u64 = cpuusage_read, + .write_u64 = cpuusage_write, }, }; diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index f3f4af4b8b0..8a9498e7c83 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -277,12 +277,9 @@ static int __init init_sched_debug_procfs(void) { struct proc_dir_entry *pe; - pe = create_proc_entry("sched_debug", 0644, NULL); + pe = proc_create("sched_debug", 0644, NULL, &sched_debug_fops); if (!pe) return -ENOMEM; - - pe->proc_fops = &sched_debug_fops; - return 0; } diff --git a/kernel/sys.c b/kernel/sys.c index f2a45136695..e423d0d9e6f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1545,6 +1545,19 @@ out: * */ +static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, + cputime_t *utimep, cputime_t *stimep) +{ + *utimep = cputime_add(*utimep, t->utime); + *stimep = cputime_add(*stimep, t->stime); + r->ru_nvcsw += t->nvcsw; + r->ru_nivcsw += t->nivcsw; + r->ru_minflt += t->min_flt; + r->ru_majflt += t->maj_flt; + r->ru_inblock += task_io_get_inblock(t); + r->ru_oublock += task_io_get_oublock(t); +} + static void k_getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; @@ -1554,6 +1567,11 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) memset((char *) r, 0, sizeof *r); utime = stime = cputime_zero; + if (who == RUSAGE_THREAD) { + accumulate_thread_rusage(p, r, &utime, &stime); + goto out; + } + rcu_read_lock(); if (!lock_task_sighand(p, &flags)) { rcu_read_unlock(); @@ -1586,14 +1604,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += p->signal->oublock; t = p; do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - r->ru_nvcsw += t->nvcsw; - r->ru_nivcsw += t->nivcsw; - r->ru_minflt += t->min_flt; - r->ru_majflt += t->maj_flt; - r->ru_inblock += task_io_get_inblock(t); - r->ru_oublock += task_io_get_oublock(t); + accumulate_thread_rusage(t, r, &utime, &stime); t = next_thread(t); } while (t != p); break; @@ -1605,6 +1616,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) unlock_task_sighand(p, &flags); rcu_read_unlock(); +out: cputime_to_timeval(utime, &r->ru_utime); cputime_to_timeval(stime, &r->ru_stime); } @@ -1618,7 +1630,8 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru) asmlinkage long sys_getrusage(int who, struct rusage __user *ru) { - if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) + if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && + who != RUSAGE_THREAD) return -EINVAL; return getrusage(current, who, ru); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fd3364827cc..d7ffdc59816 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -38,6 +38,7 @@ #include <linux/writeback.h> #include <linux/hugetlb.h> #include <linux/initrd.h> +#include <linux/key.h> #include <linux/times.h> #include <linux/limits.h> #include <linux/dcache.h> @@ -144,12 +145,6 @@ extern int no_unaligned_warning; extern int max_lock_depth; #endif -#ifdef CONFIG_SYSCTL_SYSCALL -static int parse_table(int __user *, int, void __user *, size_t __user *, - void __user *, size_t, struct ctl_table *); -#endif - - #ifdef CONFIG_PROC_SYSCTL static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -809,6 +804,14 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, +#ifdef CONFIG_KEYS + { + .ctl_name = CTL_UNNUMBERED, + .procname = "keys", + .mode = 0555, + .child = key_sysctls, + }, +#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt @@ -1430,6 +1433,76 @@ void register_sysctl_root(struct ctl_table_root *root) } #ifdef CONFIG_SYSCTL_SYSCALL +/* Perform the actual read/write of a sysctl table entry. */ +static int do_sysctl_strategy(struct ctl_table_root *root, + struct ctl_table *table, + int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +{ + int op = 0, rc; + + if (oldval) + op |= 004; + if (newval) + op |= 002; + if (sysctl_perm(root, table, op)) + return -EPERM; + + if (table->strategy) { + rc = table->strategy(table, name, nlen, oldval, oldlenp, + newval, newlen); + if (rc < 0) + return rc; + if (rc > 0) + return 0; + } + + /* If there is no strategy routine, or if the strategy returns + * zero, proceed with automatic r/w */ + if (table->data && table->maxlen) { + rc = sysctl_data(table, name, nlen, oldval, oldlenp, + newval, newlen); + if (rc < 0) + return rc; + } + return 0; +} + +static int parse_table(int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, + struct ctl_table_root *root, + struct ctl_table *table) +{ + int n; +repeat: + if (!nlen) + return -ENOTDIR; + if (get_user(n, name)) + return -EFAULT; + for ( ; table->ctl_name || table->procname; table++) { + if (!table->ctl_name) + continue; + if (n == table->ctl_name) { + int error; + if (table->child) { + if (sysctl_perm(root, table, 001)) + return -EPERM; + name++; + nlen--; + table = table->child; + goto repeat; + } + error = do_sysctl_strategy(root, table, name, nlen, + oldval, oldlenp, + newval, newlen); + return error; + } + } + return -ENOTDIR; +} + int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -1447,7 +1520,8 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) { error = parse_table(name, nlen, oldval, oldlenp, - newval, newlen, head->ctl_table); + newval, newlen, + head->root, head->ctl_table); if (error != -ENOTDIR) { sysctl_head_finish(head); break; @@ -1493,84 +1567,22 @@ static int test_perm(int mode, int op) return -EACCES; } -int sysctl_perm(struct ctl_table *table, int op) +int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op) { int error; + int mode; + error = security_sysctl(table, op); if (error) return error; - return test_perm(table->mode, op); -} - -#ifdef CONFIG_SYSCTL_SYSCALL -static int parse_table(int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - struct ctl_table *table) -{ - int n; -repeat: - if (!nlen) - return -ENOTDIR; - if (get_user(n, name)) - return -EFAULT; - for ( ; table->ctl_name || table->procname; table++) { - if (!table->ctl_name) - continue; - if (n == table->ctl_name) { - int error; - if (table->child) { - if (sysctl_perm(table, 001)) - return -EPERM; - name++; - nlen--; - table = table->child; - goto repeat; - } - error = do_sysctl_strategy(table, name, nlen, - oldval, oldlenp, - newval, newlen); - return error; - } - } - return -ENOTDIR; -} -/* Perform the actual read/write of a sysctl table entry. */ -int do_sysctl_strategy (struct ctl_table *table, - int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - int op = 0, rc; - - if (oldval) - op |= 004; - if (newval) - op |= 002; - if (sysctl_perm(table, op)) - return -EPERM; + if (root->permissions) + mode = root->permissions(root, current->nsproxy, table); + else + mode = table->mode; - if (table->strategy) { - rc = table->strategy(table, name, nlen, oldval, oldlenp, - newval, newlen); - if (rc < 0) - return rc; - if (rc > 0) - return 0; - } - - /* If there is no strategy routine, or if the strategy returns - * zero, proceed with automatic r/w */ - if (table->data && table->maxlen) { - rc = sysctl_data(table, name, nlen, oldval, oldlenp, - newval, newlen); - if (rc < 0) - return rc; - } - return 0; + return test_perm(mode, op); } -#endif /* CONFIG_SYSCTL_SYSCALL */ static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) { @@ -1583,9 +1595,13 @@ static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table) static __init int sysctl_init(void) { - int err; sysctl_set_parent(NULL, root_table); - err = sysctl_check_table(current->nsproxy, root_table); +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK + { + int err; + err = sysctl_check_table(current->nsproxy, root_table); + } +#endif return 0; } @@ -1712,10 +1728,12 @@ struct ctl_table_header *__register_sysctl_paths( header->unregistering = NULL; header->root = root; sysctl_set_parent(NULL, header->ctl_table); +#ifdef CONFIG_SYSCTL_SYSCALL_CHECK if (sysctl_check_table(namespaces, header->ctl_table)) { kfree(header); return NULL; } +#endif spin_lock(&sysctl_lock); header_list = lookup_header_list(root, namespaces); list_add_tail(&header->ctl_entry, header_list); diff --git a/kernel/time.c b/kernel/time.c index 35d373a9878..86729042e4c 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -35,6 +35,7 @@ #include <linux/syscalls.h> #include <linux/security.h> #include <linux/fs.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/unistd.h> diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 67fe8fc21fb..a40e20fd000 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -278,12 +278,9 @@ static int __init init_timer_list_procfs(void) { struct proc_dir_entry *pe; - pe = create_proc_entry("timer_list", 0644, NULL); + pe = proc_create("timer_list", 0644, NULL, &timer_list_fops); if (!pe) return -ENOMEM; - - pe->proc_fops = &timer_list_fops; - return 0; } __initcall(init_timer_list_procfs); diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 417da8c5bc7..c994530d166 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c @@ -415,12 +415,9 @@ static int __init init_tstats_procfs(void) { struct proc_dir_entry *pe; - pe = create_proc_entry("timer_stats", 0644, NULL); + pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); if (!pe) return -ENOMEM; - - pe->proc_fops = &tstats_fops; - return 0; } __initcall(init_tstats_procfs); diff --git a/kernel/user.c b/kernel/user.c index debce602bfd..aefbbfa3159 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -53,10 +53,6 @@ struct user_struct root_user = { .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, -#ifdef CONFIG_KEYS - .uid_keyring = &root_user_keyring, - .session_keyring = &root_session_keyring, -#endif #ifdef CONFIG_USER_SCHED .tg = &init_task_group, #endif @@ -420,12 +416,12 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) new->mq_bytes = 0; #endif new->locked_shm = 0; - - if (alloc_uid_keyring(new, current) < 0) - goto out_free_user; +#ifdef CONFIG_KEYS + new->uid_keyring = new->session_keyring = NULL; +#endif if (sched_create_user(new) < 0) - goto out_put_keys; + goto out_free_user; if (uids_user_create(new)) goto out_destoy_sched; @@ -459,9 +455,6 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) out_destoy_sched: sched_destroy_user(new); -out_put_keys: - key_put(new->uid_keyring); - key_put(new->session_keyring); out_free_user: kmem_cache_free(uid_cachep, new); out_unlock: diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 4c9006275df..a9ab0596de4 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/version.h> #include <linux/nsproxy.h> +#include <linux/slab.h> #include <linux/user_namespace.h> /* @@ -73,3 +74,4 @@ void free_user_ns(struct kref *kref) release_uids(ns); kfree(ns); } +EXPORT_SYMBOL(free_user_ns); diff --git a/kernel/utsname.c b/kernel/utsname.c index 816d7b24fa0..64d398f1244 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -14,6 +14,7 @@ #include <linux/utsname.h> #include <linux/version.h> #include <linux/err.h> +#include <linux/slab.h> /* * Clone a new ns copying an original utsname, setting refcount to 1 diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 00ff4d08e37..7db251a959c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -158,8 +158,8 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, * * Returns 0 if @work was already on a queue, non-zero otherwise. * - * We queue the work to the CPU it was submitted, but there is no - * guarantee that it will be processed by that CPU. + * We queue the work to the CPU on which it was submitted, but if the CPU dies + * it can be processed by another CPU. */ int queue_work(struct workqueue_struct *wq, struct work_struct *work) { @@ -772,7 +772,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name, } EXPORT_SYMBOL_GPL(__create_workqueue_key); -static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) +static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq) { /* * Our caller is either destroy_workqueue() or CPU_DEAD, @@ -808,19 +808,16 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) void destroy_workqueue(struct workqueue_struct *wq) { const cpumask_t *cpu_map = wq_cpu_map(wq); - struct cpu_workqueue_struct *cwq; int cpu; get_online_cpus(); spin_lock(&workqueue_lock); list_del(&wq->list); spin_unlock(&workqueue_lock); - put_online_cpus(); - for_each_cpu_mask(cpu, *cpu_map) { - cwq = per_cpu_ptr(wq->cpu_wq, cpu); - cleanup_workqueue_thread(cwq, cpu); - } + for_each_cpu_mask(cpu, *cpu_map) + cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu)); + put_online_cpus(); free_percpu(wq->cpu_wq); kfree(wq); @@ -838,7 +835,6 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, action &= ~CPU_TASKS_FROZEN; switch (action) { - case CPU_UP_PREPARE: cpu_set(cpu, cpu_populated_map); } @@ -861,11 +857,17 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, case CPU_UP_CANCELED: start_workqueue_thread(cwq, -1); case CPU_DEAD: - cleanup_workqueue_thread(cwq, cpu); + cleanup_workqueue_thread(cwq); break; } } + switch (action) { + case CPU_UP_CANCELED: + case CPU_DEAD: + cpu_clear(cpu, cpu_populated_map); + } + return NOTIFY_OK; } diff --git a/lib/Makefile b/lib/Makefile index 2d7001b7f5a..0ae4eb047aa 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -6,7 +6,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o + proportions.o prio_heap.o ratelimit.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index d3f5784807b..24c59ded47a 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -20,8 +20,8 @@ /* * Find the next set bit in a memory region. */ -unsigned long __find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset) +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); @@ -58,14 +58,14 @@ found_first: found_middle: return result + __ffs(tmp); } -EXPORT_SYMBOL(__find_next_bit); +EXPORT_SYMBOL(find_next_bit); /* * This implementation of find_{first,next}_zero_bit was stolen from * Linus' asm-alpha/bitops.h. */ -unsigned long __find_next_zero_bit(const unsigned long *addr, - unsigned long size, unsigned long offset) +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); @@ -102,15 +102,14 @@ found_first: found_middle: return result + ffz(tmp); } -EXPORT_SYMBOL(__find_next_zero_bit); +EXPORT_SYMBOL(find_next_zero_bit); #endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ #ifdef CONFIG_GENERIC_FIND_FIRST_BIT /* * Find the first set bit in a memory region. */ -unsigned long __find_first_bit(const unsigned long *addr, - unsigned long size) +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { const unsigned long *p = addr; unsigned long result = 0; @@ -131,13 +130,12 @@ unsigned long __find_first_bit(const unsigned long *addr, found: return result + __ffs(tmp); } -EXPORT_SYMBOL(__find_first_bit); +EXPORT_SYMBOL(find_first_bit); /* * Find the first cleared bit in a memory region. */ -unsigned long __find_first_zero_bit(const unsigned long *addr, - unsigned long size) +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) { const unsigned long *p = addr; unsigned long result = 0; @@ -158,7 +156,7 @@ unsigned long __find_first_zero_bit(const unsigned long *addr, found: return result + ffz(tmp); } -EXPORT_SYMBOL(__find_first_zero_bit); +EXPORT_SYMBOL(find_first_zero_bit); #endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifdef __BIG_ENDIAN diff --git a/lib/idr.c b/lib/idr.c index afbb0b1023d..8368c81fcb7 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -585,12 +585,11 @@ static void idr_cache_ctor(struct kmem_cache *idr_layer_cache, void *idr_layer) memset(idr_layer, 0, sizeof(struct idr_layer)); } -static int init_id_cache(void) +void __init idr_init_cache(void) { - if (!idr_layer_cache) - idr_layer_cache = kmem_cache_create("idr_layer_cache", - sizeof(struct idr_layer), 0, 0, idr_cache_ctor); - return 0; + idr_layer_cache = kmem_cache_create("idr_layer_cache", + sizeof(struct idr_layer), 0, SLAB_PANIC, + idr_cache_ctor); } /** @@ -602,7 +601,6 @@ static int init_id_cache(void) */ void idr_init(struct idr *idp) { - init_id_cache(); memset(idp, 0, sizeof(struct idr)); spin_lock_init(&idp->lock); } diff --git a/lib/inflate.c b/lib/inflate.c index 845f91d3ac1..9762294be06 100644 --- a/lib/inflate.c +++ b/lib/inflate.c @@ -811,6 +811,9 @@ DEBG("<dyn"); ll = malloc(sizeof(*ll) * (286+30)); /* literal/length and distance code lengths */ #endif + if (ll == NULL) + return 1; + /* make local bit buffer */ b = bb; k = bk; diff --git a/lib/iomap.c b/lib/iomap.c index dd6ca48fe6b..37a3ea4cac9 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -257,7 +257,7 @@ EXPORT_SYMBOL(ioport_unmap); void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { resource_size_t start = pci_resource_start(dev, bar); - unsigned long len = pci_resource_len(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); if (!len || !start) diff --git a/lib/lmb.c b/lib/lmb.c index 207147ab25e..83287d3869a 100644 --- a/lib/lmb.c +++ b/lib/lmb.c @@ -46,14 +46,13 @@ void lmb_dump_all(void) #endif /* DEBUG */ } -static unsigned long __init lmb_addrs_overlap(u64 base1, u64 size1, - u64 base2, u64 size2) +static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2, + u64 size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -static long __init lmb_addrs_adjacent(u64 base1, u64 size1, - u64 base2, u64 size2) +static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2) { if (base2 == base1 + size1) return 1; @@ -63,7 +62,7 @@ static long __init lmb_addrs_adjacent(u64 base1, u64 size1, return 0; } -static long __init lmb_regions_adjacent(struct lmb_region *rgn, +static long lmb_regions_adjacent(struct lmb_region *rgn, unsigned long r1, unsigned long r2) { u64 base1 = rgn->region[r1].base; @@ -74,7 +73,7 @@ static long __init lmb_regions_adjacent(struct lmb_region *rgn, return lmb_addrs_adjacent(base1, size1, base2, size2); } -static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r) +static void lmb_remove_region(struct lmb_region *rgn, unsigned long r) { unsigned long i; @@ -86,7 +85,7 @@ static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r) } /* Assumption: base addr of region 1 < base addr of region 2 */ -static void __init lmb_coalesce_regions(struct lmb_region *rgn, +static void lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) { rgn->region[r1].size += rgn->region[r2].size; @@ -118,7 +117,7 @@ void __init lmb_analyze(void) lmb.memory.size += lmb.memory.region[i].size; } -static long __init lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) +static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) { unsigned long coalesced = 0; long adjacent, i; @@ -182,7 +181,7 @@ static long __init lmb_add_region(struct lmb_region *rgn, u64 base, u64 size) return 0; } -long __init lmb_add(u64 base, u64 size) +long lmb_add(u64 base, u64 size) { struct lmb_region *_rgn = &lmb.memory; @@ -194,6 +193,55 @@ long __init lmb_add(u64 base, u64 size) } +long lmb_remove(u64 base, u64 size) +{ + struct lmb_region *rgn = &(lmb.memory); + u64 rgnbegin, rgnend; + u64 end = base + size; + int i; + + rgnbegin = rgnend = 0; /* supress gcc warnings */ + + /* Find the region where (base, size) belongs to */ + for (i=0; i < rgn->cnt; i++) { + rgnbegin = rgn->region[i].base; + rgnend = rgnbegin + rgn->region[i].size; + + if ((rgnbegin <= base) && (end <= rgnend)) + break; + } + + /* Didn't find the region */ + if (i == rgn->cnt) + return -1; + + /* Check to see if we are removing entire region */ + if ((rgnbegin == base) && (rgnend == end)) { + lmb_remove_region(rgn, i); + return 0; + } + + /* Check to see if region is matching at the front */ + if (rgnbegin == base) { + rgn->region[i].base = end; + rgn->region[i].size -= size; + return 0; + } + + /* Check to see if the region is matching at the end */ + if (rgnend == end) { + rgn->region[i].size -= size; + return 0; + } + + /* + * We need to split the entry - adjust the current one to the + * beginging of the hole and add the region after hole. + */ + rgn->region[i].size = base - rgn->region[i].base; + return lmb_add_region(rgn, end, rgnend - end); +} + long __init lmb_reserve(u64 base, u64 size) { struct lmb_region *_rgn = &lmb.reserved; @@ -426,3 +474,36 @@ int __init lmb_is_reserved(u64 addr) } return 0; } + +/* + * Given a <base, len>, find which memory regions belong to this range. + * Adjust the request and return a contiguous chunk. + */ +int lmb_find(struct lmb_property *res) +{ + int i; + u64 rstart, rend; + + rstart = res->base; + rend = rstart + res->size - 1; + + for (i = 0; i < lmb.memory.cnt; i++) { + u64 start = lmb.memory.region[i].base; + u64 end = start + lmb.memory.region[i].size - 1; + + if (start > rend) + return -1; + + if ((end >= rstart) && (start < rend)) { + /* adjust the request */ + if (rstart < start) + rstart = start; + if (rend > end) + rend = end; + res->base = rstart; + res->size = rend - rstart + 1; + return 0; + } + } + return -1; +} diff --git a/lib/ratelimit.c b/lib/ratelimit.c new file mode 100644 index 00000000000..485e3040dcd --- /dev/null +++ b/lib/ratelimit.c @@ -0,0 +1,51 @@ +/* + * ratelimit.c - Do something with rate limit. + * + * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com> + * + * This file is released under the GPLv2. + * + */ + +#include <linux/kernel.h> +#include <linux/jiffies.h> +#include <linux/module.h> + +/* + * __ratelimit - rate limiting + * @ratelimit_jiffies: minimum time in jiffies between two callbacks + * @ratelimit_burst: number of callbacks we do before ratelimiting + * + * This enforces a rate limit: not more than @ratelimit_burst callbacks + * in every ratelimit_jiffies + */ +int __ratelimit(int ratelimit_jiffies, int ratelimit_burst) +{ + static DEFINE_SPINLOCK(ratelimit_lock); + static unsigned toks = 10 * 5 * HZ; + static unsigned long last_msg; + static int missed; + unsigned long flags; + unsigned long now = jiffies; + + spin_lock_irqsave(&ratelimit_lock, flags); + toks += now - last_msg; + last_msg = now; + if (toks > (ratelimit_burst * ratelimit_jiffies)) + toks = ratelimit_burst * ratelimit_jiffies; + if (toks >= ratelimit_jiffies) { + int lost = missed; + + missed = 0; + toks -= ratelimit_jiffies; + spin_unlock_irqrestore(&ratelimit_lock, flags); + if (lost) + printk(KERN_WARNING "%s: %d messages suppressed\n", + __func__, lost); + return 1; + } + missed++; + spin_unlock_irqrestore(&ratelimit_lock, flags); + return 0; +} +EXPORT_SYMBOL(__ratelimit); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 025922807e6..d568894df8c 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -31,6 +31,7 @@ #include <linux/init.h> #include <linux/bootmem.h> +#include <linux/iommu-helper.h> #define OFFSET(val,align) ((unsigned long) \ ( (val) & ( (align) - 1))) @@ -282,15 +283,6 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr) return (addr & ~mask) != 0; } -static inline unsigned int is_span_boundary(unsigned int index, - unsigned int nslots, - unsigned long offset_slots, - unsigned long max_slots) -{ - unsigned long offset = (offset_slots + index) & (max_slots - 1); - return offset + nslots > max_slots; -} - /* * Allocates bounce buffer and returns its kernel virtual address. */ @@ -331,56 +323,53 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir) * request and allocate a buffer from that IO TLB pool. */ spin_lock_irqsave(&io_tlb_lock, flags); - { - index = ALIGN(io_tlb_index, stride); - if (index >= io_tlb_nslabs) - index = 0; - wrap = index; - - do { - while (is_span_boundary(index, nslots, offset_slots, - max_slots)) { - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - if (index == wrap) - goto not_found; - } - - /* - * If we find a slot that indicates we have 'nslots' - * number of contiguous buffers, we allocate the - * buffers from that slot and mark the entries as '0' - * indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in - * the next round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); - - goto found; - } + index = ALIGN(io_tlb_index, stride); + if (index >= io_tlb_nslabs) + index = 0; + wrap = index; + + do { + while (iommu_is_span_boundary(index, nslots, offset_slots, + max_slots)) { index += stride; if (index >= io_tlb_nslabs) index = 0; - } while (index != wrap); + if (index == wrap) + goto not_found; + } - not_found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - return NULL; - } - found: + /* + * If we find a slot that indicates we have 'nslots' number of + * contiguous buffers, we allocate the buffers from that slot + * and mark the entries as '0' indicating unavailable. + */ + if (io_tlb_list[index] >= nslots) { + int count = 0; + + for (i = index; i < (int) (index + nslots); i++) + io_tlb_list[i] = 0; + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); + + /* + * Update the indices to avoid searching in the next + * round. + */ + io_tlb_index = ((index + nslots) < io_tlb_nslabs + ? (index + nslots) : 0); + + goto found; + } + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + } while (index != wrap); + +not_found: + spin_unlock_irqrestore(&io_tlb_lock, flags); + return NULL; +found: spin_unlock_irqrestore(&io_tlb_lock, flags); /* @@ -566,7 +555,8 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. */ dma_addr_t -swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) +swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, + int dir, struct dma_attrs *attrs) { dma_addr_t dev_addr = virt_to_bus(ptr); void *map; @@ -599,6 +589,13 @@ swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) return dev_addr; } +EXPORT_SYMBOL(swiotlb_map_single_attrs); + +dma_addr_t +swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) +{ + return swiotlb_map_single_attrs(hwdev, ptr, size, dir, NULL); +} /* * Unmap a single streaming mode DMA translation. The dma_addr and size must @@ -609,8 +606,8 @@ swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir) * whatever the device wrote there. */ void -swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, - int dir) +swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir, struct dma_attrs *attrs) { char *dma_addr = bus_to_virt(dev_addr); @@ -620,7 +617,14 @@ swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, else if (dir == DMA_FROM_DEVICE) dma_mark_clean(dma_addr, size); } +EXPORT_SYMBOL(swiotlb_unmap_single_attrs); +void +swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, + int dir) +{ + return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL); +} /* * Make physical memory consistent for a single streaming mode DMA translation * after a transfer. @@ -691,6 +695,8 @@ swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, SYNC_FOR_DEVICE); } +void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int, + struct dma_attrs *); /* * Map a set of buffers described by scatterlist in streaming mode for DMA. * This is the scatter-gather version of the above swiotlb_map_single @@ -708,8 +714,8 @@ swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, * same here. */ int -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) +swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, + int dir, struct dma_attrs *attrs) { struct scatterlist *sg; void *addr; @@ -727,7 +733,8 @@ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, /* Don't panic here, we expect map_sg users to do proper error handling. */ swiotlb_full(hwdev, sg->length, dir, 0); - swiotlb_unmap_sg(hwdev, sgl, i, dir); + swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, + attrs); sgl[0].dma_length = 0; return 0; } @@ -738,14 +745,22 @@ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, } return nelems; } +EXPORT_SYMBOL(swiotlb_map_sg_attrs); + +int +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, + int dir) +{ + return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); +} /* * Unmap a set of streaming mode DMA translations. Again, cpu read rules * concerning calls here are the same as for swiotlb_unmap_single() above. */ void -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) +swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, + int nelems, int dir, struct dma_attrs *attrs) { struct scatterlist *sg; int i; @@ -760,6 +775,14 @@ swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); } } +EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); + +void +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, + int dir) +{ + return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); +} /* * Make physical memory consistent for a set of streaming mode DMA translations diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2c37c67ed8c..bbf953eeb58 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -199,7 +199,8 @@ static struct page *alloc_fresh_huge_page_node(int nid) struct page *page; page = alloc_pages_node(nid, - htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|__GFP_NOWARN, + htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| + __GFP_REPEAT|__GFP_NOWARN, HUGETLB_PAGE_ORDER); if (page) { if (arch_prepare_hugepage(page)) { @@ -294,7 +295,8 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, } spin_unlock(&hugetlb_lock); - page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, + page = alloc_pages(htlb_alloc_mask|__GFP_COMP| + __GFP_REPEAT|__GFP_NOWARN, HUGETLB_PAGE_ORDER); spin_lock(&hugetlb_lock); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2e0bfc93484..33add96cd5f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -26,15 +26,18 @@ #include <linux/backing-dev.h> #include <linux/bit_spinlock.h> #include <linux/rcupdate.h> +#include <linux/slab.h> #include <linux/swap.h> #include <linux/spinlock.h> #include <linux/fs.h> #include <linux/seq_file.h> +#include <linux/vmalloc.h> #include <asm/uaccess.h> struct cgroup_subsys mem_cgroup_subsys; static const int MEM_CGROUP_RECLAIM_RETRIES = 5; +static struct kmem_cache *page_cgroup_cache; /* * Statistics for memory cgroup. @@ -236,26 +239,12 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) css); } -static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) +struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) { return container_of(task_subsys_state(p, mem_cgroup_subsys_id), struct mem_cgroup, css); } -void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) -{ - struct mem_cgroup *mem; - - mem = mem_cgroup_from_task(p); - css_get(&mem->css); - mm->mem_cgroup = mem; -} - -void mm_free_cgroup(struct mm_struct *mm) -{ - css_put(&mm->mem_cgroup->css); -} - static inline int page_cgroup_locked(struct page *page) { return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); @@ -287,10 +276,10 @@ static void unlock_page_cgroup(struct page *page) bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } -static void __mem_cgroup_remove_list(struct page_cgroup *pc) +static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz, + struct page_cgroup *pc) { int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; - struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); if (from) MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; @@ -301,10 +290,10 @@ static void __mem_cgroup_remove_list(struct page_cgroup *pc) list_del_init(&pc->lru); } -static void __mem_cgroup_add_list(struct page_cgroup *pc) +static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz, + struct page_cgroup *pc) { int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; - struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc); if (!to) { MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; @@ -476,6 +465,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, int zid = zone_idx(z); struct mem_cgroup_per_zone *mz; + BUG_ON(!mem_cont); mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); if (active) src = &mz->active_list; @@ -560,7 +550,7 @@ retry: } unlock_page_cgroup(page); - pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); + pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask); if (pc == NULL) goto err; @@ -574,7 +564,7 @@ retry: mm = &init_mm; rcu_read_lock(); - mem = rcu_dereference(mm->mem_cgroup); + mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); /* * For every charge from the cgroup, increment reference count */ @@ -602,7 +592,6 @@ retry: mem_cgroup_out_of_memory(mem, gfp_mask); goto out; } - congestion_wait(WRITE, HZ/10); } pc->ref_cnt = 1; @@ -610,7 +599,7 @@ retry: pc->page = page; pc->flags = PAGE_CGROUP_FLAG_ACTIVE; if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) - pc->flags |= PAGE_CGROUP_FLAG_CACHE; + pc->flags = PAGE_CGROUP_FLAG_CACHE; lock_page_cgroup(page); if (page_get_page_cgroup(page)) { @@ -622,14 +611,14 @@ retry: */ res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); - kfree(pc); + kmem_cache_free(page_cgroup_cache, pc); goto retry; } page_assign_page_cgroup(page, pc); mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_add_list(pc); + __mem_cgroup_add_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); unlock_page_cgroup(page); @@ -637,7 +626,7 @@ done: return 0; out: css_put(&mem->css); - kfree(pc); + kmem_cache_free(page_cgroup_cache, pc); err: return -ENOMEM; } @@ -685,7 +674,7 @@ void mem_cgroup_uncharge_page(struct page *page) if (--(pc->ref_cnt) == 0) { mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(pc); + __mem_cgroup_remove_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); page_assign_page_cgroup(page, NULL); @@ -695,7 +684,7 @@ void mem_cgroup_uncharge_page(struct page *page) res_counter_uncharge(&mem->res, PAGE_SIZE); css_put(&mem->css); - kfree(pc); + kmem_cache_free(page_cgroup_cache, pc); return; } @@ -747,7 +736,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(pc); + __mem_cgroup_remove_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); page_assign_page_cgroup(page, NULL); @@ -759,7 +748,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage) mz = page_cgroup_zoneinfo(pc); spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_add_list(pc); + __mem_cgroup_add_list(mz, pc); spin_unlock_irqrestore(&mz->lru_lock, flags); unlock_page_cgroup(newpage); @@ -853,13 +842,10 @@ static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) return 0; } -static ssize_t mem_cgroup_read(struct cgroup *cont, - struct cftype *cft, struct file *file, - char __user *userbuf, size_t nbytes, loff_t *ppos) +static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) { - return res_counter_read(&mem_cgroup_from_cont(cont)->res, - cft->private, userbuf, nbytes, ppos, - NULL); + return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, + cft->private); } static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, @@ -871,27 +857,25 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, mem_cgroup_write_strategy); } -static ssize_t mem_force_empty_write(struct cgroup *cont, - struct cftype *cft, struct file *file, - const char __user *userbuf, - size_t nbytes, loff_t *ppos) +static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) { - struct mem_cgroup *mem = mem_cgroup_from_cont(cont); - int ret = mem_cgroup_force_empty(mem); - if (!ret) - ret = nbytes; - return ret; + struct mem_cgroup *mem; + + mem = mem_cgroup_from_cont(cont); + switch (event) { + case RES_MAX_USAGE: + res_counter_reset_max(&mem->res); + break; + case RES_FAILCNT: + res_counter_reset_failcnt(&mem->res); + break; + } + return 0; } -/* - * Note: This should be removed if cgroup supports write-only file. - */ -static ssize_t mem_force_empty_read(struct cgroup *cont, - struct cftype *cft, - struct file *file, char __user *userbuf, - size_t nbytes, loff_t *ppos) +static int mem_force_empty_write(struct cgroup *cont, unsigned int event) { - return -EINVAL; + return mem_cgroup_force_empty(mem_cgroup_from_cont(cont)); } static const struct mem_cgroup_stat_desc { @@ -902,9 +886,9 @@ static const struct mem_cgroup_stat_desc { [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, }; -static int mem_control_stat_show(struct seq_file *m, void *arg) +static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, + struct cgroup_map_cb *cb) { - struct cgroup *cont = m->private; struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); struct mem_cgroup_stat *stat = &mem_cont->stat; int i; @@ -914,8 +898,7 @@ static int mem_control_stat_show(struct seq_file *m, void *arg) val = mem_cgroup_read_stat(stat, i); val *= mem_cgroup_stat_desc[i].unit; - seq_printf(m, "%s %lld\n", mem_cgroup_stat_desc[i].msg, - (long long)val); + cb->fill(cb, mem_cgroup_stat_desc[i].msg, val); } /* showing # of active pages */ { @@ -925,52 +908,43 @@ static int mem_control_stat_show(struct seq_file *m, void *arg) MEM_CGROUP_ZSTAT_INACTIVE); active = mem_cgroup_get_all_zonestat(mem_cont, MEM_CGROUP_ZSTAT_ACTIVE); - seq_printf(m, "active %ld\n", (active) * PAGE_SIZE); - seq_printf(m, "inactive %ld\n", (inactive) * PAGE_SIZE); + cb->fill(cb, "active", (active) * PAGE_SIZE); + cb->fill(cb, "inactive", (inactive) * PAGE_SIZE); } return 0; } -static const struct file_operations mem_control_stat_file_operations = { - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int mem_control_stat_open(struct inode *unused, struct file *file) -{ - /* XXX __d_cont */ - struct cgroup *cont = file->f_dentry->d_parent->d_fsdata; - - file->f_op = &mem_control_stat_file_operations; - return single_open(file, mem_control_stat_show, cont); -} - static struct cftype mem_cgroup_files[] = { { .name = "usage_in_bytes", .private = RES_USAGE, - .read = mem_cgroup_read, + .read_u64 = mem_cgroup_read, + }, + { + .name = "max_usage_in_bytes", + .private = RES_MAX_USAGE, + .trigger = mem_cgroup_reset, + .read_u64 = mem_cgroup_read, }, { .name = "limit_in_bytes", .private = RES_LIMIT, .write = mem_cgroup_write, - .read = mem_cgroup_read, + .read_u64 = mem_cgroup_read, }, { .name = "failcnt", .private = RES_FAILCNT, - .read = mem_cgroup_read, + .trigger = mem_cgroup_reset, + .read_u64 = mem_cgroup_read, }, { .name = "force_empty", - .write = mem_force_empty_write, - .read = mem_force_empty_read, + .trigger = mem_force_empty_write, }, { .name = "stat", - .open = mem_control_stat_open, + .read_map = mem_control_stat_show, }, }; @@ -1010,6 +984,29 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) kfree(mem->info.nodeinfo[node]); } +static struct mem_cgroup *mem_cgroup_alloc(void) +{ + struct mem_cgroup *mem; + + if (sizeof(*mem) < PAGE_SIZE) + mem = kmalloc(sizeof(*mem), GFP_KERNEL); + else + mem = vmalloc(sizeof(*mem)); + + if (mem) + memset(mem, 0, sizeof(*mem)); + return mem; +} + +static void mem_cgroup_free(struct mem_cgroup *mem) +{ + if (sizeof(*mem) < PAGE_SIZE) + kfree(mem); + else + vfree(mem); +} + + static struct cgroup_subsys_state * mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { @@ -1018,17 +1015,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) if (unlikely((cont->parent) == NULL)) { mem = &init_mem_cgroup; - init_mm.mem_cgroup = mem; - } else - mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); - - if (mem == NULL) - return ERR_PTR(-ENOMEM); + page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC); + } else { + mem = mem_cgroup_alloc(); + if (!mem) + return ERR_PTR(-ENOMEM); + } res_counter_init(&mem->res); - memset(&mem->info, 0, sizeof(mem->info)); - for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; @@ -1038,7 +1033,7 @@ free_out: for_each_node_state(node, N_POSSIBLE) free_mem_cgroup_per_zone_info(mem, node); if (cont->parent != NULL) - kfree(mem); + mem_cgroup_free(mem); return ERR_PTR(-ENOMEM); } @@ -1058,7 +1053,7 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss, for_each_node_state(node, N_POSSIBLE) free_mem_cgroup_per_zone_info(mem, node); - kfree(mem_cgroup_from_cont(cont)); + mem_cgroup_free(mem_cgroup_from_cont(cont)); } static int mem_cgroup_populate(struct cgroup_subsys *ss, @@ -1098,10 +1093,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, if (!thread_group_leader(p)) goto out; - css_get(&mem->css); - rcu_assign_pointer(mm->mem_cgroup, mem); - css_put(&old_mem->css); - out: mmput(mm); } diff --git a/mm/mmap.c b/mm/mmap.c index 677d184b0d4..fac66337da2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -230,8 +230,11 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) might_sleep(); if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); - if (vma->vm_file) + if (vma->vm_file) { fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(vma->vm_mm); + } mpol_put(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); return next; @@ -623,8 +626,11 @@ again: remove_next = 1 + (end > next->vm_end); spin_unlock(&mapping->i_mmap_lock); if (remove_next) { - if (file) + if (file) { fput(file); + if (next->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); + } mm->map_count--; mpol_put(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); @@ -1154,6 +1160,8 @@ munmap_back: error = file->f_op->mmap(file, vma); if (error) goto unmap_and_free_vma; + if (vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); if (error) @@ -1185,6 +1193,8 @@ munmap_back: mpol_put(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); fput(file); + if (vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); } else { vma_link(mm, vma, prev, rb_link, rb_parent); file = vma->vm_file; @@ -1817,8 +1827,11 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, } vma_set_policy(new, pol); - if (new->vm_file) + if (new->vm_file) { get_file(new->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); + } if (new->vm_ops && new->vm_ops->open) new->vm_ops->open(new); @@ -2135,8 +2148,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma->vm_start = addr; new_vma->vm_end = addr + len; new_vma->vm_pgoff = pgoff; - if (new_vma->vm_file) + if (new_vma->vm_file) { get_file(new_vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + added_exe_file_vma(mm); + } if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); vma_link(mm, new_vma, prev, rb_link, rb_parent); diff --git a/mm/nommu.c b/mm/nommu.c index 1d32fe89d57..ef8c62cec69 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -966,8 +966,13 @@ unsigned long do_mmap_pgoff(struct file *file, INIT_LIST_HEAD(&vma->anon_vma_node); atomic_set(&vma->vm_usage, 1); - if (file) + if (file) { get_file(file); + if (vm_flags & VM_EXECUTABLE) { + added_exe_file_vma(current->mm); + vma->vm_mm = current->mm; + } + } vma->vm_file = file; vma->vm_flags = vm_flags; vma->vm_start = addr; @@ -1022,8 +1027,11 @@ unsigned long do_mmap_pgoff(struct file *file, up_write(&nommu_vma_sem); kfree(vml); if (vma) { - if (vma->vm_file) + if (vma->vm_file) { fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(vma->vm_mm); + } kfree(vma); } return ret; @@ -1053,7 +1061,7 @@ EXPORT_SYMBOL(do_mmap_pgoff); /* * handle mapping disposal for uClinux */ -static void put_vma(struct vm_area_struct *vma) +static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma) { if (vma) { down_write(&nommu_vma_sem); @@ -1075,8 +1083,11 @@ static void put_vma(struct vm_area_struct *vma) realalloc -= kobjsize(vma); askedalloc -= sizeof(*vma); - if (vma->vm_file) + if (vma->vm_file) { fput(vma->vm_file); + if (vma->vm_flags & VM_EXECUTABLE) + removed_exe_file_vma(mm); + } kfree(vma); } @@ -1113,7 +1124,7 @@ int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len) found: vml = *parent; - put_vma(vml->vma); + put_vma(mm, vml->vma); *parent = vml->next; realalloc -= kobjsize(vml); @@ -1158,7 +1169,7 @@ void exit_mmap(struct mm_struct * mm) while ((tmp = mm->context.vmlist)) { mm->context.vmlist = tmp->next; - put_vma(tmp->vma); + put_vma(mm, tmp->vma); realalloc -= kobjsize(tmp); askedalloc -= sizeof(*tmp); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d1cf4f05dcd..0a502e99ee2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1461,7 +1461,8 @@ __alloc_pages_internal(gfp_t gfp_mask, unsigned int order, struct task_struct *p = current; int do_retry; int alloc_flags; - int did_some_progress; + unsigned long did_some_progress; + unsigned long pages_reclaimed = 0; might_sleep_if(wait); @@ -1611,14 +1612,26 @@ nofail_alloc: * Don't let big-order allocations loop unless the caller explicitly * requests that. Wait for some write requests to complete then retry. * - * In this implementation, __GFP_REPEAT means __GFP_NOFAIL for order - * <= 3, but that may not be true in other implementations. + * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER + * means __GFP_NOFAIL, but that may not be true in other + * implementations. + * + * For order > PAGE_ALLOC_COSTLY_ORDER, if __GFP_REPEAT is + * specified, then we retry until we no longer reclaim any pages + * (above), or we've reclaimed an order of pages at least as + * large as the allocation's order. In both cases, if the + * allocation still fails, we stop retrying. */ + pages_reclaimed += did_some_progress; do_retry = 0; if (!(gfp_mask & __GFP_NORETRY)) { - if ((order <= PAGE_ALLOC_COSTLY_ORDER) || - (gfp_mask & __GFP_REPEAT)) + if (order <= PAGE_ALLOC_COSTLY_ORDER) { do_retry = 1; + } else { + if (gfp_mask & __GFP_REPEAT && + pages_reclaimed < (1 << order)) + do_retry = 1; + } if (gfp_mask & __GFP_NOFAIL) do_retry = 1; } @@ -2524,7 +2537,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, struct page *page; unsigned long end_pfn = start_pfn + size; unsigned long pfn; + struct zone *z; + z = &NODE_DATA(nid)->node_zones[zone]; for (pfn = start_pfn; pfn < end_pfn; pfn++) { /* * There can be holes in boot-time mem_map[]s @@ -2542,7 +2557,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, init_page_count(page); reset_page_mapcount(page); SetPageReserved(page); - /* * Mark the block movable so that blocks are reserved for * movable at startup. This will force kernel allocations @@ -2551,8 +2565,15 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, * kernel allocations are made. Later some blocks near * the start are marked MIGRATE_RESERVE by * setup_zone_migrate_reserve() + * + * bitmap is created for zone's valid pfn range. but memmap + * can be created for invalid pages (for alignment) + * check here not to call set_pageblock_migratetype() against + * pfn out of zone. */ - if ((pfn & (pageblock_nr_pages-1))) + if ((z->zone_start_pfn <= pfn) + && (pfn < z->zone_start_pfn + z->spanned_pages) + && !(pfn & (pageblock_nr_pages - 1))) set_pageblock_migratetype(page, MIGRATE_MOVABLE); INIT_LIST_HEAD(&page->lru); @@ -4464,6 +4485,8 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); + VM_BUG_ON(pfn < zone->zone_start_pfn); + VM_BUG_ON(pfn >= zone->zone_start_pfn + zone->spanned_pages); for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) if (flags & value) diff --git a/mm/slub.c b/mm/slub.c index 992ecd4f0d3..b145e798bf3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2978,7 +2978,7 @@ void __init kmem_cache_init(void) kmalloc_caches[0].refcount = -1; caches++; - hotplug_memory_notifier(slab_memory_callback, 1); + hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); #endif /* Able to allocate the per node structures */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 67051be7083..bd1bb592030 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1426,11 +1426,7 @@ static const struct file_operations proc_swaps_operations = { static int __init procswaps_init(void) { - struct proc_dir_entry *entry; - - entry = create_proc_entry("swaps", 0, NULL); - if (entry) - entry->proc_fops = &proc_swaps_operations; + proc_create("swaps", 0, NULL, &proc_swaps_operations); return 0; } __initcall(procswaps_init); diff --git a/mm/vmscan.c b/mm/vmscan.c index eceac9f9032..12e8627c974 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1299,6 +1299,9 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist, * hope that some of these pages can be written. But if the allocating task * holds filesystem locks which prevent writeout this might not work, and the * allocation attempt will fail. + * + * returns: 0, if no pages reclaimed + * else, the number of pages reclaimed */ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) @@ -1347,7 +1350,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, } total_scanned += sc->nr_scanned; if (nr_reclaimed >= sc->swap_cluster_max) { - ret = 1; + ret = nr_reclaimed; goto out; } @@ -1370,7 +1373,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, } /* top priority shrink_caches still had more to do? don't OOM, then */ if (!sc->all_unreclaimable && scan_global_lru(sc)) - ret = 1; + ret = nr_reclaimed; out: /* * Now that we've scanned all the zones at this priority level, note diff --git a/net/core/dev.c b/net/core/dev.c index e1df1ab3e04..ed49da59205 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1524,7 +1524,7 @@ static int dev_gso_segment(struct sk_buff *skb) if (!segs) return 0; - if (unlikely(IS_ERR(segs))) + if (IS_ERR(segs)) return PTR_ERR(segs); skb->next = segs; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2b5270efda..24eca23c2db 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1234,7 +1234,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) segs = ops->gso_segment(skb, features); rcu_read_unlock(); - if (!segs || unlikely(IS_ERR(segs))) + if (!segs || IS_ERR(segs)) goto out; skb = segs; diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index a4f1439ffdd..75497e55927 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -9,6 +9,7 @@ */ #include "irnet_irda.h" /* Private header */ +#include <linux/seq_file.h> /* * PPP disconnect work: we need to make sure we're in @@ -1717,34 +1718,23 @@ irnet_expiry_indication(discinfo_t * expiry, */ #ifdef CONFIG_PROC_FS -/*------------------------------------------------------------------*/ -/* - * Function irnet_proc_read (buf, start, offset, len, unused) - * - * Give some info to the /proc file system - */ static int -irnet_proc_read(char * buf, - char ** start, - off_t offset, - int len) +irnet_proc_show(struct seq_file *m, void *v) { irnet_socket * self; char * state; int i = 0; - len = 0; - /* Get the IrNET server information... */ - len += sprintf(buf+len, "IrNET server - "); - len += sprintf(buf+len, "IrDA state: %s, ", + seq_printf(m, "IrNET server - "); + seq_printf(m, "IrDA state: %s, ", (irnet_server.running ? "running" : "dead")); - len += sprintf(buf+len, "stsap_sel: %02x, ", irnet_server.s.stsap_sel); - len += sprintf(buf+len, "dtsap_sel: %02x\n", irnet_server.s.dtsap_sel); + seq_printf(m, "stsap_sel: %02x, ", irnet_server.s.stsap_sel); + seq_printf(m, "dtsap_sel: %02x\n", irnet_server.s.dtsap_sel); /* Do we need to continue ? */ if(!irnet_server.running) - return len; + return 0; /* Protect access to the instance list */ spin_lock_bh(&irnet_server.spinlock); @@ -1754,23 +1744,23 @@ irnet_proc_read(char * buf, while(self != NULL) { /* Start printing info about the socket. */ - len += sprintf(buf+len, "\nIrNET socket %d - ", i++); + seq_printf(m, "\nIrNET socket %d - ", i++); /* First, get the requested configuration */ - len += sprintf(buf+len, "Requested IrDA name: \"%s\", ", self->rname); - len += sprintf(buf+len, "daddr: %08x, ", self->rdaddr); - len += sprintf(buf+len, "saddr: %08x\n", self->rsaddr); + seq_printf(m, "Requested IrDA name: \"%s\", ", self->rname); + seq_printf(m, "daddr: %08x, ", self->rdaddr); + seq_printf(m, "saddr: %08x\n", self->rsaddr); /* Second, get all the PPP info */ - len += sprintf(buf+len, " PPP state: %s", + seq_printf(m, " PPP state: %s", (self->ppp_open ? "registered" : "unregistered")); if(self->ppp_open) { - len += sprintf(buf+len, ", unit: ppp%d", + seq_printf(m, ", unit: ppp%d", ppp_unit_number(&self->chan)); - len += sprintf(buf+len, ", channel: %d", + seq_printf(m, ", channel: %d", ppp_channel_index(&self->chan)); - len += sprintf(buf+len, ", mru: %d", + seq_printf(m, ", mru: %d", self->mru); /* Maybe add self->flags ? Later... */ } @@ -1789,10 +1779,10 @@ irnet_proc_read(char * buf, state = "weird"; else state = "idle"; - len += sprintf(buf+len, "\n IrDA state: %s, ", state); - len += sprintf(buf+len, "daddr: %08x, ", self->daddr); - len += sprintf(buf+len, "stsap_sel: %02x, ", self->stsap_sel); - len += sprintf(buf+len, "dtsap_sel: %02x\n", self->dtsap_sel); + seq_printf(m, "\n IrDA state: %s, ", state); + seq_printf(m, "daddr: %08x, ", self->daddr); + seq_printf(m, "stsap_sel: %02x, ", self->stsap_sel); + seq_printf(m, "dtsap_sel: %02x\n", self->dtsap_sel); /* Next socket, please... */ self = (irnet_socket *) hashbin_get_next(irnet_server.list); @@ -1801,8 +1791,21 @@ irnet_proc_read(char * buf, /* Spin lock end */ spin_unlock_bh(&irnet_server.spinlock); - return len; + return 0; } + +static int irnet_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, irnet_proc_show, NULL); +} + +static const struct file_operations irnet_proc_fops = { + .owner = THIS_MODULE, + .open = irnet_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* PROC_FS */ @@ -1841,7 +1844,7 @@ irda_irnet_init(void) #ifdef CONFIG_PROC_FS /* Add a /proc file for irnet infos */ - create_proc_info_entry("irnet", 0, proc_irda, irnet_proc_read); + proc_create("irnet", 0, proc_irda, &irnet_proc_fops); #endif /* CONFIG_PROC_FS */ /* Setup the IrNET server */ diff --git a/net/irda/irnet/irnet_irda.h b/net/irda/irnet/irnet_irda.h index 0ba92d0d520..3e408952a3f 100644 --- a/net/irda/irnet/irnet_irda.h +++ b/net/irda/irnet/irnet_irda.h @@ -159,14 +159,6 @@ static void DISCOVERY_MODE, void *); #endif -/* -------------------------- PROC ENTRY -------------------------- */ -#ifdef CONFIG_PROC_FS -static int - irnet_proc_read(char *, - char **, - off_t, - int); -#endif /* CONFIG_PROC_FS */ /**************************** VARIABLES ****************************/ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index bbd26893c0c..582ec3efc8a 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -214,7 +214,7 @@ int nf_queue(struct sk_buff *skb, segs = skb_gso_segment(skb, 0); kfree_skb(skb); - if (unlikely(IS_ERR(segs))) + if (IS_ERR(segs)) return 1; do { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 2519129c6d2..09cd9c0c2d8 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -150,7 +150,7 @@ static int xfrm_output_gso(struct sk_buff *skb) segs = skb_gso_segment(skb, 0); kfree_skb(skb); - if (unlikely(IS_ERR(segs))) + if (IS_ERR(segs)) return PTR_ERR(segs); do { diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c index 05e438f8b4e..e90dc5d0439 100644 --- a/samples/markers/marker-example.c +++ b/samples/markers/marker-example.c @@ -33,10 +33,8 @@ static struct file_operations mark_ops = { static int example_init(void) { printk(KERN_ALERT "example init\n"); - pentry_example = create_proc_entry("marker-example", 0444, NULL); - if (pentry_example) - pentry_example->proc_fops = &mark_ops; - else + pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops); + if (!pentry_example) return -EPERM; return 0; } diff --git a/scripts/Lindent b/scripts/Lindent index 9468ec7971d..9c4b3e2b709 100755 --- a/scripts/Lindent +++ b/scripts/Lindent @@ -1,2 +1,18 @@ #!/bin/sh -indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs -cp1 "$@" +PARAM="-npro -kr -i8 -ts8 -sob -l80 -ss -ncs -cp1" +RES=`indent --version` +V1=`echo $RES | cut -d' ' -f3 | cut -d'.' -f1` +V2=`echo $RES | cut -d' ' -f3 | cut -d'.' -f2` +V3=`echo $RES | cut -d' ' -f3 | cut -d'.' -f3` +if [ $V1 -gt 2 ]; then + PARAM="$PARAM -il0" +elif [ $V1 -eq 2 ]; then + if [ $V2 -gt 2 ]; then + PARAM="$PARAM -il0"; + elif [ $V2 -eq 2 ]; then + if [ $V3 -ge 10 ]; then + PARAM="$PARAM -il0" + fi + fi +fi +indent $PARAM "$@" diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 64ec4b8a51b..b6bbbcdc557 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -9,7 +9,7 @@ use strict; my $P = $0; $P =~ s@.*/@@g; -my $V = '0.16'; +my $V = '0.18'; use Getopt::Long qw(:config no_auto_abbrev); @@ -131,6 +131,17 @@ our $NonptrType; our $Type; our $Declare; +our $UTF8 = qr { + [\x09\x0A\x0D\x20-\x7E] # ASCII + | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte + | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs + | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte + | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates + | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 + | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 + | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 +}x; + our @typeList = ( qr{void}, qr{char}, @@ -692,7 +703,7 @@ sub annotate_values { while (length($cur)) { @av_paren_type = ('E') if ($#av_paren_type < 0); print " <" . join('', @av_paren_type) . - "> <$type> " if ($dbg_values > 1); + "> <$type> <$av_pending>" if ($dbg_values > 1); if ($cur =~ /^(\s+)/o) { print "WS($1)\n" if ($dbg_values > 1); if ($1 =~ /\n/ && $av_preprocessor) { @@ -705,9 +716,18 @@ sub annotate_values { $type = 'T'; } elsif ($cur =~ /^(#\s*define\s*$Ident)(\(?)/o) { - print "DEFINE($1)\n" if ($dbg_values > 1); + print "DEFINE($1,$2)\n" if ($dbg_values > 1); $av_preprocessor = 1; - $av_pending = 'N'; + push(@av_paren_type, $type); + if ($2 ne '') { + $av_pending = 'N'; + } + $type = 'E'; + + } elsif ($cur =~ /^(#\s*undef\s*$Ident)/o) { + print "UNDEF($1)\n" if ($dbg_values > 1); + $av_preprocessor = 1; + push(@av_paren_type, $type); } elsif ($cur =~ /^(#\s*(?:ifdef|ifndef|if))/o) { print "PRE_START($1)\n" if ($dbg_values > 1); @@ -715,7 +735,7 @@ sub annotate_values { push(@av_paren_type, $type); push(@av_paren_type, $type); - $type = 'N'; + $type = 'E'; } elsif ($cur =~ /^(#\s*(?:else|elif))/o) { print "PRE_RESTART($1)\n" if ($dbg_values > 1); @@ -723,7 +743,7 @@ sub annotate_values { push(@av_paren_type, $av_paren_type[$#av_paren_type]); - $type = 'N'; + $type = 'E'; } elsif ($cur =~ /^(#\s*(?:endif))/o) { print "PRE_END($1)\n" if ($dbg_values > 1); @@ -734,11 +754,16 @@ sub annotate_values { # one does, and continue as if the #endif was not here. pop(@av_paren_type); push(@av_paren_type, $type); - $type = 'N'; + $type = 'E'; } elsif ($cur =~ /^(\\\n)/o) { print "PRECONT($1)\n" if ($dbg_values > 1); + } elsif ($cur =~ /^(__attribute__)\s*\(?/o) { + print "ATTR($1)\n" if ($dbg_values > 1); + $av_pending = $type; + $type = 'N'; + } elsif ($cur =~ /^(sizeof)\s*(\()?/o) { print "SIZEOF($1)\n" if ($dbg_values > 1); if (defined $2) { @@ -930,7 +955,7 @@ sub process { # edge is a close comment then we must be in a comment # at context start. my $edge; - for (my $ln = $linenr; $ln < ($linenr + $realcnt); $ln++) { + for (my $ln = $linenr + 1; $ln < ($linenr + $realcnt); $ln++) { next if ($line =~ /^-/); ($edge) = ($rawlines[$ln - 1] =~ m@(/\*|\*/)@); last if (defined $edge); @@ -951,9 +976,9 @@ sub process { ##print "COMMENT:$in_comment edge<$edge> $rawline\n"; sanitise_line_reset($in_comment); - } elsif ($realcnt) { + } elsif ($realcnt && $rawline =~ /^(?:\+| |$)/) { # Standardise the strings and chars within the input to - # simplify matching. + # simplify matching -- only bother with positive lines. $line = sanitise_line($rawline); } push(@lines, $line); @@ -1066,17 +1091,14 @@ sub process { # UTF-8 regex found at http://www.w3.org/International/questions/qa-forms-utf-8.en.php if (($realfile =~ /^$/ || $line =~ /^\+/) && - !($rawline =~ m/^( - [\x09\x0A\x0D\x20-\x7E] # ASCII - | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte - | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs - | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte - | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates - | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 - | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 - | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 - )*$/x )) { - ERROR("Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $herecurr); + $rawline !~ m/^$UTF8*$/) { + my ($utf8_prefix) = ($rawline =~ /^($UTF8*)/); + + my $blank = copy_spacing($rawline); + my $ptr = substr($blank, 0, length($utf8_prefix)) . "^"; + my $hereptr = "$hereline$ptr\n"; + + ERROR("Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr); } #ignore lines being removed @@ -1112,7 +1134,7 @@ sub process { if ($rawline =~ /^\+\s* \t\s*\S/ || $rawline =~ /^\+\s* \s*/) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; - ERROR("use tabs not spaces\n" . $herevet); + ERROR("code indent should use tabs where possible\n" . $herevet); } # check for RCS/CVS revision markers @@ -1121,35 +1143,40 @@ sub process { } # Check for potential 'bare' types - if ($realcnt) { - my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0); - $s =~ s/\n./ /g; - $s =~ s/{.*$//; + my ($stat, $cond); + if ($realcnt && $line =~ /.\s*\S/) { + ($stat, $cond) = ctx_statement_block($linenr, + $realcnt, 0); + $stat =~ s/\n./\n /g; + $cond =~ s/\n./\n /g; + + my $s = $stat; + $s =~ s/{.*$//s; # Ignore goto labels. - if ($s =~ /$Ident:\*$/) { + if ($s =~ /$Ident:\*$/s) { # Ignore functions being called - } elsif ($s =~ /^.\s*$Ident\s*\(/) { + } elsif ($s =~ /^.\s*$Ident\s*\(/s) { # definitions in global scope can only start with types - } elsif ($s =~ /^.(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?($Ident)\b/) { + } elsif ($s =~ /^.(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?($Ident)\b/s) { possible($1, $s); # declarations always start with types - } elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:const\s+)?($Ident)\b(:?\s+$Sparse)?\s*\**\s*$Ident\s*(?:;|=|,)/) { + } elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:const\s+)?($Ident)\b(:?\s+$Sparse)?\s*\**\s*$Ident\s*(?:;|=|,)/s) { possible($1, $s); } # any (foo ... *) is a pointer cast, and foo is a type - while ($s =~ /\(($Ident)(?:\s+$Sparse)*\s*\*+\s*\)/g) { + while ($s =~ /\(($Ident)(?:\s+$Sparse)*\s*\*+\s*\)/sg) { possible($1, $s); } # Check for any sort of function declaration. # int foo(something bar, other baz); # void (*store_gdt)(x86_descr_ptr *); - if ($prev_values eq 'E' && $s =~ /^(.(?:typedef\s*)?(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*(?:\b$Ident|\(\*\s*$Ident\))\s*)\(/) { + if ($prev_values eq 'E' && $s =~ /^(.(?:typedef\s*)?(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*(?:\b$Ident|\(\*\s*$Ident\))\s*)\(/s) { my ($name_len) = length($1); my $ctx = $s; @@ -1282,18 +1309,19 @@ sub process { ($prevline !~ /^ }/) && ($prevline !~ /^.DECLARE_$Ident\(\Q$name\E\)/) && ($prevline !~ /^.LIST_HEAD\(\Q$name\E\)/) && + ($prevline !~ /^.$Type\s*\(\s*\*\s*\Q$name\E\s*\)\s*\(/) && ($prevline !~ /\b\Q$name\E(?:\s+$Attribute)?\s*(?:;|=|\[)/)) { WARN("EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr); } } # check for external initialisers. - if ($line =~ /^.$Type\s*$Ident\s*=\s*(0|NULL);/) { + if ($line =~ /^.$Type\s*$Ident\s*=\s*(0|NULL|false)\s*;/) { ERROR("do not initialise externals to 0 or NULL\n" . $herecurr); } # check for static initialisers. - if ($line =~ /\s*static\s.*=\s*(0|NULL);/) { + if ($line =~ /\s*static\s.*=\s*(0|NULL|false)\s*;/) { ERROR("do not initialise statics to 0 or NULL\n" . $herecurr); } @@ -1512,7 +1540,10 @@ sub process { if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) { ERROR("space required before that '$op' $at\n" . $hereptr); } - if ($ctx =~ /.xW/) { + if ($op eq '*' && $cc =~/\s*const\b/) { + # A unary '*' may be const + + } elsif ($ctx =~ /.xW/) { ERROR("space prohibited after that '$op' $at\n" . $hereptr); } @@ -1617,7 +1648,7 @@ sub process { # Check for illegal assignment in if conditional. if ($line =~ /\bif\s*\(/) { - my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0); + my ($s, $c) = ($stat, $cond); if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/) { ERROR("do not use assignment in if condition\n" . $herecurr); @@ -1695,7 +1726,7 @@ sub process { #warn if <asm/foo.h> is #included and <linux/foo.h> is available (uses RAW line) if ($tree && $rawline =~ m{^.\#\s*include\s*\<asm\/(.*)\.h\>}) { my $checkfile = "$root/include/linux/$1.h"; - if (-f $checkfile && $1 ne 'irq.h') { + if (-f $checkfile && $1 ne 'irq') { WARN("Use #include <linux/$1.h> instead of <asm/$1.h>\n" . $herecurr); } @@ -1910,7 +1941,8 @@ sub process { } # check for spinlock_t definitions without a comment. - if ($line =~ /^.\s*(struct\s+mutex|spinlock_t)\s+\S+;/) { + if ($line =~ /^.\s*(struct\s+mutex|spinlock_t)\s+\S+;/ || + $line =~ /^.\s*(DEFINE_MUTEX)\s*\(/) { my $which = $1; if (!ctx_has_comment($first_line, $linenr)) { CHK("$1 definition without comment\n" . $herecurr); @@ -1940,7 +1972,26 @@ sub process { } # check for new externs in .c files. - if ($line =~ /^.\s*extern\s/ && ($realfile =~ /\.c$/)) { + if ($realfile =~ /\.c$/ && defined $stat && + $stat =~ /^.\s*(?:extern\s+)?$Type\s+$Ident(\s*)\(/s) + { + my $paren_space = $1; + + my $s = $stat; + if (defined $cond) { + substr($s, 0, length($cond), ''); + } + if ($s =~ /^\s*;/) { + WARN("externs should be avoided in .c files\n" . $herecurr); + } + + if ($paren_space =~ /\n/) { + WARN("arguments for function declarations should follow identifier\n" . $herecurr); + } + + } elsif ($realfile =~ /\.c$/ && defined $stat && + $stat =~ /^.\s*extern\s+/) + { WARN("externs should be avoided in .c files\n" . $herecurr); } @@ -1964,11 +2015,11 @@ sub process { } # check for semaphores used as mutexes - if ($line =~ /\b(DECLARE_MUTEX|init_MUTEX)\s*\(/) { + if ($line =~ /^.\s*(DECLARE_MUTEX|init_MUTEX)\s*\(/) { WARN("mutexes are preferred for single holder semaphores\n" . $herecurr); } # check for semaphores used as mutexes - if ($line =~ /\binit_MUTEX_LOCKED\s*\(/) { + if ($line =~ /^.\s*init_MUTEX_LOCKED\s*\(/) { WARN("consider using a completion\n" . $herecurr); } # recommend strict_strto* over simple_strto* @@ -1979,11 +2030,24 @@ sub process { # use of NR_CPUS is usually wrong # ignore definitions of NR_CPUS and usage to define arrays as likely right if ($line =~ /\bNR_CPUS\b/ && - $line !~ /^.#\s*define\s+NR_CPUS\s+/ && - $line !~ /^.\s*$Declare\s.*\[[^\]]*NR_CPUS[^\]]*\]/) + $line !~ /^.#\s*if\b.*\bNR_CPUS\b/ && + $line !~ /^.#\s*define\b.*\bNR_CPUS\b/ && + $line !~ /^.\s*$Declare\s.*\[[^\]]*NR_CPUS[^\]]*\]/ && + $line !~ /\[[^\]]*\.\.\.[^\]]*NR_CPUS[^\]]*\]/ && + $line !~ /\[[^\]]*NR_CPUS[^\]]*\.\.\.[^\]]*\]/) { WARN("usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc\n" . $herecurr); } + +# check for %L{u,d,i} in strings + my $string; + while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) { + $string = substr($rawline, $-[1], $+[1] - $-[1]); + if ($string =~ /(?<!%)%L[udi]/) { + WARN("\%Ld/%Lu are not-standard C, use %lld/%llu\n" . $herecurr); + last; + } + } } # If we have no input at all, then there is nothing to report on diff --git a/security/Makefile b/security/Makefile index 9e8b0252501..7ef1107a728 100644 --- a/security/Makefile +++ b/security/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += commoncap.o smack/built-in.o obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o +obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o diff --git a/security/commoncap.c b/security/commoncap.c index e8c3f5e4670..5edabc7542a 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -383,8 +383,8 @@ int cap_bprm_secureexec (struct linux_binprm *bprm) current->egid != current->gid); } -int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, - size_t size, int flags) +int cap_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (!strcmp(name, XATTR_NAME_CAPS)) { if (!capable(CAP_SETFCAP)) @@ -397,7 +397,7 @@ int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, return 0; } -int cap_inode_removexattr(struct dentry *dentry, char *name) +int cap_inode_removexattr(struct dentry *dentry, const char *name) { if (!strcmp(name, XATTR_NAME_CAPS)) { if (!capable(CAP_SETFCAP)) diff --git a/security/device_cgroup.c b/security/device_cgroup.c new file mode 100644 index 00000000000..4ea583689ee --- /dev/null +++ b/security/device_cgroup.c @@ -0,0 +1,575 @@ +/* + * dev_cgroup.c - device cgroup subsystem + * + * Copyright 2007 IBM Corp + */ + +#include <linux/device_cgroup.h> +#include <linux/cgroup.h> +#include <linux/ctype.h> +#include <linux/list.h> +#include <linux/uaccess.h> +#include <linux/seq_file.h> + +#define ACC_MKNOD 1 +#define ACC_READ 2 +#define ACC_WRITE 4 +#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) + +#define DEV_BLOCK 1 +#define DEV_CHAR 2 +#define DEV_ALL 4 /* this represents all devices */ + +/* + * whitelist locking rules: + * cgroup_lock() cannot be taken under dev_cgroup->lock. + * dev_cgroup->lock can be taken with or without cgroup_lock(). + * + * modifications always require cgroup_lock + * modifications to a list which is visible require the + * dev_cgroup->lock *and* cgroup_lock() + * walking the list requires dev_cgroup->lock or cgroup_lock(). + * + * reasoning: dev_whitelist_copy() needs to kmalloc, so needs + * a mutex, which the cgroup_lock() is. Since modifying + * a visible list requires both locks, either lock can be + * taken for walking the list. + */ + +struct dev_whitelist_item { + u32 major, minor; + short type; + short access; + struct list_head list; +}; + +struct dev_cgroup { + struct cgroup_subsys_state css; + struct list_head whitelist; + spinlock_t lock; +}; + +static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) +{ + return container_of(cgroup_subsys_state(cgroup, devices_subsys_id), + struct dev_cgroup, css); +} + +struct cgroup_subsys devices_subsys; + +static int devcgroup_can_attach(struct cgroup_subsys *ss, + struct cgroup *new_cgroup, struct task_struct *task) +{ + if (current != task && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return 0; +} + +/* + * called under cgroup_lock() + */ +static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) +{ + struct dev_whitelist_item *wh, *tmp, *new; + + list_for_each_entry(wh, orig, list) { + new = kmalloc(sizeof(*wh), GFP_KERNEL); + if (!new) + goto free_and_exit; + new->major = wh->major; + new->minor = wh->minor; + new->type = wh->type; + new->access = wh->access; + list_add_tail(&new->list, dest); + } + + return 0; + +free_and_exit: + list_for_each_entry_safe(wh, tmp, dest, list) { + list_del(&wh->list); + kfree(wh); + } + return -ENOMEM; +} + +/* Stupid prototype - don't bother combining existing entries */ +/* + * called under cgroup_lock() + * since the list is visible to other tasks, we need the spinlock also + */ +static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, + struct dev_whitelist_item *wh) +{ + struct dev_whitelist_item *whcopy; + + whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL); + if (!whcopy) + return -ENOMEM; + + memcpy(whcopy, wh, sizeof(*whcopy)); + spin_lock(&dev_cgroup->lock); + list_add_tail(&whcopy->list, &dev_cgroup->whitelist); + spin_unlock(&dev_cgroup->lock); + return 0; +} + +/* + * called under cgroup_lock() + * since the list is visible to other tasks, we need the spinlock also + */ +static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, + struct dev_whitelist_item *wh) +{ + struct dev_whitelist_item *walk, *tmp; + + spin_lock(&dev_cgroup->lock); + list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { + if (walk->type == DEV_ALL) + goto remove; + if (walk->type != wh->type) + continue; + if (walk->major != ~0 && walk->major != wh->major) + continue; + if (walk->minor != ~0 && walk->minor != wh->minor) + continue; + +remove: + walk->access &= ~wh->access; + if (!walk->access) { + list_del(&walk->list); + kfree(walk); + } + } + spin_unlock(&dev_cgroup->lock); +} + +/* + * called from kernel/cgroup.c with cgroup_lock() held. + */ +static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, + struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; + struct cgroup *parent_cgroup; + int ret; + + dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); + if (!dev_cgroup) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&dev_cgroup->whitelist); + parent_cgroup = cgroup->parent; + + if (parent_cgroup == NULL) { + struct dev_whitelist_item *wh; + wh = kmalloc(sizeof(*wh), GFP_KERNEL); + if (!wh) { + kfree(dev_cgroup); + return ERR_PTR(-ENOMEM); + } + wh->minor = wh->major = ~0; + wh->type = DEV_ALL; + wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE; + list_add(&wh->list, &dev_cgroup->whitelist); + } else { + parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); + ret = dev_whitelist_copy(&dev_cgroup->whitelist, + &parent_dev_cgroup->whitelist); + if (ret) { + kfree(dev_cgroup); + return ERR_PTR(ret); + } + } + + spin_lock_init(&dev_cgroup->lock); + return &dev_cgroup->css; +} + +static void devcgroup_destroy(struct cgroup_subsys *ss, + struct cgroup *cgroup) +{ + struct dev_cgroup *dev_cgroup; + struct dev_whitelist_item *wh, *tmp; + + dev_cgroup = cgroup_to_devcgroup(cgroup); + list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { + list_del(&wh->list); + kfree(wh); + } + kfree(dev_cgroup); +} + +#define DEVCG_ALLOW 1 +#define DEVCG_DENY 2 +#define DEVCG_LIST 3 + +#define MAJMINLEN 10 +#define ACCLEN 4 + +static void set_access(char *acc, short access) +{ + int idx = 0; + memset(acc, 0, ACCLEN); + if (access & ACC_READ) + acc[idx++] = 'r'; + if (access & ACC_WRITE) + acc[idx++] = 'w'; + if (access & ACC_MKNOD) + acc[idx++] = 'm'; +} + +static char type_to_char(short type) +{ + if (type == DEV_ALL) + return 'a'; + if (type == DEV_CHAR) + return 'c'; + if (type == DEV_BLOCK) + return 'b'; + return 'X'; +} + +static void set_majmin(char *str, unsigned m) +{ + memset(str, 0, MAJMINLEN); + if (m == ~0) + sprintf(str, "*"); + else + snprintf(str, MAJMINLEN, "%d", m); +} + +static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, + struct seq_file *m) +{ + struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); + struct dev_whitelist_item *wh; + char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; + + spin_lock(&devcgroup->lock); + list_for_each_entry(wh, &devcgroup->whitelist, list) { + set_access(acc, wh->access); + set_majmin(maj, wh->major); + set_majmin(min, wh->minor); + seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), + maj, min, acc); + } + spin_unlock(&devcgroup->lock); + + return 0; +} + +/* + * may_access_whitelist: + * does the access granted to dev_cgroup c contain the access + * requested in whitelist item refwh. + * return 1 if yes, 0 if no. + * call with c->lock held + */ +static int may_access_whitelist(struct dev_cgroup *c, + struct dev_whitelist_item *refwh) +{ + struct dev_whitelist_item *whitem; + + list_for_each_entry(whitem, &c->whitelist, list) { + if (whitem->type & DEV_ALL) + return 1; + if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) + continue; + if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) + continue; + if (whitem->major != ~0 && whitem->major != refwh->major) + continue; + if (whitem->minor != ~0 && whitem->minor != refwh->minor) + continue; + if (refwh->access & (~(whitem->access | ACC_MASK))) + continue; + return 1; + } + return 0; +} + +/* + * parent_has_perm: + * when adding a new allow rule to a device whitelist, the rule + * must be allowed in the parent device + */ +static int parent_has_perm(struct cgroup *childcg, + struct dev_whitelist_item *wh) +{ + struct cgroup *pcg = childcg->parent; + struct dev_cgroup *parent; + int ret; + + if (!pcg) + return 1; + parent = cgroup_to_devcgroup(pcg); + spin_lock(&parent->lock); + ret = may_access_whitelist(parent, wh); + spin_unlock(&parent->lock); + return ret; +} + +/* + * Modify the whitelist using allow/deny rules. + * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD + * so we can give a container CAP_MKNOD to let it create devices but not + * modify the whitelist. + * It seems likely we'll want to add a CAP_CONTAINER capability to allow + * us to also grant CAP_SYS_ADMIN to containers without giving away the + * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN + * + * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting + * new access is only allowed if you're in the top-level cgroup, or your + * parent cgroup has the access you're asking for. + */ +static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, + struct file *file, const char __user *userbuf, + size_t nbytes, loff_t *ppos) +{ + struct cgroup *cur_cgroup; + struct dev_cgroup *devcgroup, *cur_devcgroup; + int filetype = cft->private; + char *buffer, *b; + int retval = 0, count; + struct dev_whitelist_item wh; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + devcgroup = cgroup_to_devcgroup(cgroup); + cur_cgroup = task_cgroup(current, devices_subsys.subsys_id); + cur_devcgroup = cgroup_to_devcgroup(cur_cgroup); + + buffer = kmalloc(nbytes+1, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + if (copy_from_user(buffer, userbuf, nbytes)) { + retval = -EFAULT; + goto out1; + } + buffer[nbytes] = 0; /* nul-terminate */ + + cgroup_lock(); + if (cgroup_is_removed(cgroup)) { + retval = -ENODEV; + goto out2; + } + + memset(&wh, 0, sizeof(wh)); + b = buffer; + + switch (*b) { + case 'a': + wh.type = DEV_ALL; + wh.access = ACC_MASK; + goto handle; + case 'b': + wh.type = DEV_BLOCK; + break; + case 'c': + wh.type = DEV_CHAR; + break; + default: + retval = -EINVAL; + goto out2; + } + b++; + if (!isspace(*b)) { + retval = -EINVAL; + goto out2; + } + b++; + if (*b == '*') { + wh.major = ~0; + b++; + } else if (isdigit(*b)) { + wh.major = 0; + while (isdigit(*b)) { + wh.major = wh.major*10+(*b-'0'); + b++; + } + } else { + retval = -EINVAL; + goto out2; + } + if (*b != ':') { + retval = -EINVAL; + goto out2; + } + b++; + + /* read minor */ + if (*b == '*') { + wh.minor = ~0; + b++; + } else if (isdigit(*b)) { + wh.minor = 0; + while (isdigit(*b)) { + wh.minor = wh.minor*10+(*b-'0'); + b++; + } + } else { + retval = -EINVAL; + goto out2; + } + if (!isspace(*b)) { + retval = -EINVAL; + goto out2; + } + for (b++, count = 0; count < 3; count++, b++) { + switch (*b) { + case 'r': + wh.access |= ACC_READ; + break; + case 'w': + wh.access |= ACC_WRITE; + break; + case 'm': + wh.access |= ACC_MKNOD; + break; + case '\n': + case '\0': + count = 3; + break; + default: + retval = -EINVAL; + goto out2; + } + } + +handle: + retval = 0; + switch (filetype) { + case DEVCG_ALLOW: + if (!parent_has_perm(cgroup, &wh)) + retval = -EPERM; + else + retval = dev_whitelist_add(devcgroup, &wh); + break; + case DEVCG_DENY: + dev_whitelist_rm(devcgroup, &wh); + break; + default: + retval = -EINVAL; + goto out2; + } + + if (retval == 0) + retval = nbytes; + +out2: + cgroup_unlock(); +out1: + kfree(buffer); + return retval; +} + +static struct cftype dev_cgroup_files[] = { + { + .name = "allow", + .write = devcgroup_access_write, + .private = DEVCG_ALLOW, + }, + { + .name = "deny", + .write = devcgroup_access_write, + .private = DEVCG_DENY, + }, + { + .name = "list", + .read_seq_string = devcgroup_seq_read, + .private = DEVCG_LIST, + }, +}; + +static int devcgroup_populate(struct cgroup_subsys *ss, + struct cgroup *cgroup) +{ + return cgroup_add_files(cgroup, ss, dev_cgroup_files, + ARRAY_SIZE(dev_cgroup_files)); +} + +struct cgroup_subsys devices_subsys = { + .name = "devices", + .can_attach = devcgroup_can_attach, + .create = devcgroup_create, + .destroy = devcgroup_destroy, + .populate = devcgroup_populate, + .subsys_id = devices_subsys_id, +}; + +int devcgroup_inode_permission(struct inode *inode, int mask) +{ + struct cgroup *cgroup; + struct dev_cgroup *dev_cgroup; + struct dev_whitelist_item *wh; + + dev_t device = inode->i_rdev; + if (!device) + return 0; + if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) + return 0; + cgroup = task_cgroup(current, devices_subsys.subsys_id); + dev_cgroup = cgroup_to_devcgroup(cgroup); + if (!dev_cgroup) + return 0; + + spin_lock(&dev_cgroup->lock); + list_for_each_entry(wh, &dev_cgroup->whitelist, list) { + if (wh->type & DEV_ALL) + goto acc_check; + if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) + continue; + if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) + continue; + if (wh->major != ~0 && wh->major != imajor(inode)) + continue; + if (wh->minor != ~0 && wh->minor != iminor(inode)) + continue; +acc_check: + if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) + continue; + if ((mask & MAY_READ) && !(wh->access & ACC_READ)) + continue; + spin_unlock(&dev_cgroup->lock); + return 0; + } + spin_unlock(&dev_cgroup->lock); + + return -EPERM; +} + +int devcgroup_inode_mknod(int mode, dev_t dev) +{ + struct cgroup *cgroup; + struct dev_cgroup *dev_cgroup; + struct dev_whitelist_item *wh; + + cgroup = task_cgroup(current, devices_subsys.subsys_id); + dev_cgroup = cgroup_to_devcgroup(cgroup); + if (!dev_cgroup) + return 0; + + spin_lock(&dev_cgroup->lock); + list_for_each_entry(wh, &dev_cgroup->whitelist, list) { + if (wh->type & DEV_ALL) + goto acc_check; + if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) + continue; + if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) + continue; + if (wh->major != ~0 && wh->major != MAJOR(dev)) + continue; + if (wh->minor != ~0 && wh->minor != MINOR(dev)) + continue; +acc_check: + if (!(wh->access & ACC_MKNOD)) + continue; + spin_unlock(&dev_cgroup->lock); + return 0; + } + spin_unlock(&dev_cgroup->lock); + return -EPERM; +} diff --git a/security/dummy.c b/security/dummy.c index 58d4dd1af5c..48cf30226e1 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -365,8 +365,8 @@ static void dummy_inode_delete (struct inode *ino) return; } -static int dummy_inode_setxattr (struct dentry *dentry, char *name, void *value, - size_t size, int flags) +static int dummy_inode_setxattr (struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (!strncmp(name, XATTR_SECURITY_PREFIX, sizeof(XATTR_SECURITY_PREFIX) - 1) && @@ -375,12 +375,13 @@ static int dummy_inode_setxattr (struct dentry *dentry, char *name, void *value, return 0; } -static void dummy_inode_post_setxattr (struct dentry *dentry, char *name, void *value, - size_t size, int flags) +static void dummy_inode_post_setxattr (struct dentry *dentry, const char *name, + const void *value, size_t size, + int flags) { } -static int dummy_inode_getxattr (struct dentry *dentry, char *name) +static int dummy_inode_getxattr (struct dentry *dentry, const char *name) { return 0; } @@ -390,7 +391,7 @@ static int dummy_inode_listxattr (struct dentry *dentry) return 0; } -static int dummy_inode_removexattr (struct dentry *dentry, char *name) +static int dummy_inode_removexattr (struct dentry *dentry, const char *name) { if (!strncmp(name, XATTR_SECURITY_PREFIX, sizeof(XATTR_SECURITY_PREFIX) - 1) && @@ -993,6 +994,13 @@ static inline int dummy_key_permission(key_ref_t key_ref, { return 0; } + +static int dummy_key_getsecurity(struct key *key, char **_buffer) +{ + *_buffer = NULL; + return 0; +} + #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT @@ -1209,6 +1217,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, key_alloc); set_to_dummy_if_null(ops, key_free); set_to_dummy_if_null(ops, key_permission); + set_to_dummy_if_null(ops, key_getsecurity); #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT set_to_dummy_if_null(ops, audit_rule_init); diff --git a/security/keys/Makefile b/security/keys/Makefile index 5145adfb6a0..747a464943a 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -14,3 +14,4 @@ obj-y := \ obj-$(CONFIG_KEYS_COMPAT) += compat.o obj-$(CONFIG_PROC_FS) += proc.o +obj-$(CONFIG_SYSCTL) += sysctl.o diff --git a/security/keys/compat.c b/security/keys/compat.c index e10ec995f27..c766c68a63b 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -79,6 +79,9 @@ asmlinkage long compat_sys_keyctl(u32 option, case KEYCTL_ASSUME_AUTHORITY: return keyctl_assume_authority(arg2); + case KEYCTL_GET_SECURITY: + return keyctl_get_security(arg2, compat_ptr(arg3), arg4); + default: return -EOPNOTSUPP; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 7d894ef7037..8c05587f501 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -57,10 +57,6 @@ struct key_user { int qnbytes; /* number of bytes allocated to this user */ }; -#define KEYQUOTA_MAX_KEYS 100 -#define KEYQUOTA_MAX_BYTES 10000 -#define KEYQUOTA_LINK_BYTES 4 /* a link in a keyring is worth 4 bytes */ - extern struct rb_root key_user_tree; extern spinlock_t key_user_lock; extern struct key_user root_key_user; @@ -68,6 +64,16 @@ extern struct key_user root_key_user; extern struct key_user *key_user_lookup(uid_t uid); extern void key_user_put(struct key_user *user); +/* + * key quota limits + * - root has its own separate limits to everyone else + */ +extern unsigned key_quota_root_maxkeys; +extern unsigned key_quota_root_maxbytes; +extern unsigned key_quota_maxkeys; +extern unsigned key_quota_maxbytes; + +#define KEYQUOTA_LINK_BYTES 4 /* a link in a keyring is worth 4 bytes */ extern struct rb_root key_serial_tree; @@ -77,8 +83,6 @@ extern struct mutex key_construction_mutex; extern wait_queue_head_t request_key_conswq; -extern void keyring_publish_name(struct key *keyring); - extern int __key_link(struct key *keyring, struct key *key); extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, @@ -102,14 +106,15 @@ extern key_ref_t search_process_keyrings(struct key_type *type, key_match_func_t match, struct task_struct *tsk); -extern struct key *find_keyring_by_name(const char *name, key_serial_t bound); +extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); extern int install_thread_keyring(struct task_struct *tsk); extern int install_process_keyring(struct task_struct *tsk); extern struct key *request_key_and_link(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux, struct key *dest_keyring, unsigned long flags); @@ -120,13 +125,15 @@ extern struct key *request_key_and_link(struct key_type *type, struct request_key_auth { struct key *target_key; struct task_struct *context; - char *callout_info; + void *callout_info; + size_t callout_len; pid_t pid; }; extern struct key_type key_type_request_key_auth; extern struct key *request_key_auth_new(struct key *target, - const char *callout_info); + const void *callout_info, + size_t callout_len); extern struct key *key_get_instantiation_authkey(key_serial_t target_id); @@ -152,7 +159,8 @@ extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t); extern long keyctl_set_reqkey_keyring(int); extern long keyctl_set_timeout(key_serial_t, unsigned); extern long keyctl_assume_authority(key_serial_t); - +extern long keyctl_get_security(key_serial_t keyid, char __user *buffer, + size_t buflen); /* * debugging key validation diff --git a/security/keys/key.c b/security/keys/key.c index 654d23baf35..14948cf83ef 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -1,6 +1,6 @@ /* Basic authentication token and access key management * - * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -27,6 +27,11 @@ DEFINE_SPINLOCK(key_serial_lock); struct rb_root key_user_tree; /* tree of quota records indexed by UID */ DEFINE_SPINLOCK(key_user_lock); +unsigned int key_quota_root_maxkeys = 200; /* root's key count quota */ +unsigned int key_quota_root_maxbytes = 20000; /* root's key space quota */ +unsigned int key_quota_maxkeys = 200; /* general key count quota */ +unsigned int key_quota_maxbytes = 20000; /* general key space quota */ + static LIST_HEAD(key_types_list); static DECLARE_RWSEM(key_types_sem); @@ -139,36 +144,6 @@ void key_user_put(struct key_user *user) /*****************************************************************************/ /* - * insert a key with a fixed serial number - */ -static void __init __key_insert_serial(struct key *key) -{ - struct rb_node *parent, **p; - struct key *xkey; - - parent = NULL; - p = &key_serial_tree.rb_node; - - while (*p) { - parent = *p; - xkey = rb_entry(parent, struct key, serial_node); - - if (key->serial < xkey->serial) - p = &(*p)->rb_left; - else if (key->serial > xkey->serial) - p = &(*p)->rb_right; - else - BUG(); - } - - /* we've found a suitable hole - arrange for this key to occupy it */ - rb_link_node(&key->serial_node, parent, p); - rb_insert_color(&key->serial_node, &key_serial_tree); - -} /* end __key_insert_serial() */ - -/*****************************************************************************/ -/* * assign a key the next unique serial number * - these are assigned randomly to avoid security issues through covert * channel problems @@ -266,11 +241,16 @@ struct key *key_alloc(struct key_type *type, const char *desc, /* check that the user's quota permits allocation of another key and * its description */ if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) { + unsigned maxkeys = (uid == 0) ? + key_quota_root_maxkeys : key_quota_maxkeys; + unsigned maxbytes = (uid == 0) ? + key_quota_root_maxbytes : key_quota_maxbytes; + spin_lock(&user->lock); if (!(flags & KEY_ALLOC_QUOTA_OVERRUN)) { - if (user->qnkeys + 1 >= KEYQUOTA_MAX_KEYS || - user->qnbytes + quotalen >= KEYQUOTA_MAX_BYTES - ) + if (user->qnkeys + 1 >= maxkeys || + user->qnbytes + quotalen >= maxbytes || + user->qnbytes + quotalen < user->qnbytes) goto no_quota; } @@ -375,11 +355,14 @@ int key_payload_reserve(struct key *key, size_t datalen) /* contemplate the quota adjustment */ if (delta != 0 && test_bit(KEY_FLAG_IN_QUOTA, &key->flags)) { + unsigned maxbytes = (key->user->uid == 0) ? + key_quota_root_maxbytes : key_quota_maxbytes; + spin_lock(&key->user->lock); if (delta > 0 && - key->user->qnbytes + delta > KEYQUOTA_MAX_BYTES - ) { + (key->user->qnbytes + delta >= maxbytes || + key->user->qnbytes + delta < key->user->qnbytes)) { ret = -EDQUOT; } else { @@ -757,11 +740,11 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, const char *description, const void *payload, size_t plen, + key_perm_t perm, unsigned long flags) { struct key_type *ktype; struct key *keyring, *key = NULL; - key_perm_t perm; key_ref_t key_ref; int ret; @@ -806,15 +789,17 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, goto found_matching_key; } - /* decide on the permissions we want */ - perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; - perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR; + /* if the client doesn't provide, decide on the permissions we want */ + if (perm == KEY_PERM_UNDEF) { + perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; + perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR; - if (ktype->read) - perm |= KEY_POS_READ | KEY_USR_READ; + if (ktype->read) + perm |= KEY_POS_READ | KEY_USR_READ; - if (ktype == &key_type_keyring || ktype->update) - perm |= KEY_USR_WRITE; + if (ktype == &key_type_keyring || ktype->update) + perm |= KEY_USR_WRITE; + } /* allocate a new key */ key = key_alloc(ktype, description, current->fsuid, current->fsgid, @@ -1018,17 +1003,4 @@ void __init key_init(void) rb_insert_color(&root_key_user.node, &key_user_tree); - /* record root's user standard keyrings */ - key_check(&root_user_keyring); - key_check(&root_session_keyring); - - __key_insert_serial(&root_user_keyring); - __key_insert_serial(&root_session_keyring); - - keyring_publish_name(&root_user_keyring); - keyring_publish_name(&root_session_keyring); - - /* link the two root keyrings together */ - key_link(&root_session_keyring, &root_user_keyring); - } /* end key_init() */ diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index d9ca15c109c..acc9c89e40a 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -19,6 +19,8 @@ #include <linux/capability.h> #include <linux/string.h> #include <linux/err.h> +#include <linux/vmalloc.h> +#include <linux/security.h> #include <asm/uaccess.h> #include "internal.h" @@ -62,9 +64,10 @@ asmlinkage long sys_add_key(const char __user *_type, char type[32], *description; void *payload; long ret; + bool vm; ret = -EINVAL; - if (plen > 32767) + if (plen > 1024 * 1024 - 1) goto error; /* draw all the data into kernel space */ @@ -81,11 +84,18 @@ asmlinkage long sys_add_key(const char __user *_type, /* pull the payload in if one was supplied */ payload = NULL; + vm = false; if (_payload) { ret = -ENOMEM; payload = kmalloc(plen, GFP_KERNEL); - if (!payload) - goto error2; + if (!payload) { + if (plen <= PAGE_SIZE) + goto error2; + vm = true; + payload = vmalloc(plen); + if (!payload) + goto error2; + } ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) @@ -102,7 +112,8 @@ asmlinkage long sys_add_key(const char __user *_type, /* create or update the requested key and add it to the target * keyring */ key_ref = key_create_or_update(keyring_ref, type, description, - payload, plen, KEY_ALLOC_IN_QUOTA); + payload, plen, KEY_PERM_UNDEF, + KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key_ref)) { ret = key_ref_to_ptr(key_ref)->serial; key_ref_put(key_ref); @@ -113,7 +124,10 @@ asmlinkage long sys_add_key(const char __user *_type, key_ref_put(keyring_ref); error3: - kfree(payload); + if (!vm) + kfree(payload); + else + vfree(payload); error2: kfree(description); error: @@ -140,6 +154,7 @@ asmlinkage long sys_request_key(const char __user *_type, struct key_type *ktype; struct key *key; key_ref_t dest_ref; + size_t callout_len; char type[32], *description, *callout_info; long ret; @@ -157,12 +172,14 @@ asmlinkage long sys_request_key(const char __user *_type, /* pull the callout info into kernel space */ callout_info = NULL; + callout_len = 0; if (_callout_info) { callout_info = strndup_user(_callout_info, PAGE_SIZE); if (IS_ERR(callout_info)) { ret = PTR_ERR(callout_info); goto error2; } + callout_len = strlen(callout_info); } /* get the destination keyring if specified */ @@ -183,8 +200,8 @@ asmlinkage long sys_request_key(const char __user *_type, } /* do the search */ - key = request_key_and_link(ktype, description, callout_info, NULL, - key_ref_to_ptr(dest_ref), + key = request_key_and_link(ktype, description, callout_info, + callout_len, NULL, key_ref_to_ptr(dest_ref), KEY_ALLOC_IN_QUOTA); if (IS_ERR(key)) { ret = PTR_ERR(key); @@ -714,10 +731,16 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid) /* transfer the quota burden to the new user */ if (test_bit(KEY_FLAG_IN_QUOTA, &key->flags)) { + unsigned maxkeys = (uid == 0) ? + key_quota_root_maxkeys : key_quota_maxkeys; + unsigned maxbytes = (uid == 0) ? + key_quota_root_maxbytes : key_quota_maxbytes; + spin_lock(&newowner->lock); - if (newowner->qnkeys + 1 >= KEYQUOTA_MAX_KEYS || - newowner->qnbytes + key->quotalen >= - KEYQUOTA_MAX_BYTES) + if (newowner->qnkeys + 1 >= maxkeys || + newowner->qnbytes + key->quotalen >= maxbytes || + newowner->qnbytes + key->quotalen < + newowner->qnbytes) goto quota_overrun; newowner->qnkeys++; @@ -821,9 +844,10 @@ long keyctl_instantiate_key(key_serial_t id, key_ref_t keyring_ref; void *payload; long ret; + bool vm = false; ret = -EINVAL; - if (plen > 32767) + if (plen > 1024 * 1024 - 1) goto error; /* the appropriate instantiation authorisation key must have been @@ -843,8 +867,14 @@ long keyctl_instantiate_key(key_serial_t id, if (_payload) { ret = -ENOMEM; payload = kmalloc(plen, GFP_KERNEL); - if (!payload) - goto error; + if (!payload) { + if (plen <= PAGE_SIZE) + goto error; + vm = true; + payload = vmalloc(plen); + if (!payload) + goto error; + } ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) @@ -877,7 +907,10 @@ long keyctl_instantiate_key(key_serial_t id, } error2: - kfree(payload); + if (!vm) + kfree(payload); + else + vfree(payload); error: return ret; @@ -1055,6 +1088,66 @@ error: } /* end keyctl_assume_authority() */ +/* + * get the security label of a key + * - the key must grant us view permission + * - if there's a buffer, we place up to buflen bytes of data into it + * - unless there's an error, we return the amount of information available, + * irrespective of how much we may have copied (including the terminal NUL) + * - implements keyctl(KEYCTL_GET_SECURITY) + */ +long keyctl_get_security(key_serial_t keyid, + char __user *buffer, + size_t buflen) +{ + struct key *key, *instkey; + key_ref_t key_ref; + char *context; + long ret; + + key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW); + if (IS_ERR(key_ref)) { + if (PTR_ERR(key_ref) != -EACCES) + return PTR_ERR(key_ref); + + /* viewing a key under construction is also permitted if we + * have the authorisation token handy */ + instkey = key_get_instantiation_authkey(keyid); + if (IS_ERR(instkey)) + return PTR_ERR(key_ref); + key_put(instkey); + + key_ref = lookup_user_key(NULL, keyid, 0, 1, 0); + if (IS_ERR(key_ref)) + return PTR_ERR(key_ref); + } + + key = key_ref_to_ptr(key_ref); + ret = security_key_getsecurity(key, &context); + if (ret == 0) { + /* if no information was returned, give userspace an empty + * string */ + ret = 1; + if (buffer && buflen > 0 && + copy_to_user(buffer, "", 1) != 0) + ret = -EFAULT; + } else if (ret > 0) { + /* return as much data as there's room for */ + if (buffer && buflen > 0) { + if (buflen > ret) + buflen = ret; + + if (copy_to_user(buffer, context, buflen) != 0) + ret = -EFAULT; + } + + kfree(context); + } + + key_ref_put(key_ref); + return ret; +} + /*****************************************************************************/ /* * the key control system call @@ -1135,6 +1228,11 @@ asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3, case KEYCTL_ASSUME_AUTHORITY: return keyctl_assume_authority((key_serial_t) arg2); + case KEYCTL_GET_SECURITY: + return keyctl_get_security((key_serial_t) arg2, + (char *) arg3, + (size_t) arg4); + default: return -EOPNOTSUPP; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 88292e3dee9..a9ab8affc09 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1,6 +1,6 @@ -/* keyring.c: keyring handling +/* Keyring handling * - * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -79,7 +79,7 @@ static DECLARE_RWSEM(keyring_serialise_link_sem); * publish the name of a keyring so that it can be found by name (if it has * one) */ -void keyring_publish_name(struct key *keyring) +static void keyring_publish_name(struct key *keyring) { int bucket; @@ -292,7 +292,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, struct keyring_list *keylist; struct timespec now; - unsigned long possessed; + unsigned long possessed, kflags; struct key *keyring, *key; key_ref_t key_ref; long err; @@ -319,6 +319,32 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, err = -EAGAIN; sp = 0; + /* firstly we should check to see if this top-level keyring is what we + * are looking for */ + key_ref = ERR_PTR(-EAGAIN); + kflags = keyring->flags; + if (keyring->type == type && match(keyring, description)) { + key = keyring; + + /* check it isn't negative and hasn't expired or been + * revoked */ + if (kflags & (1 << KEY_FLAG_REVOKED)) + goto error_2; + if (key->expiry && now.tv_sec >= key->expiry) + goto error_2; + key_ref = ERR_PTR(-ENOKEY); + if (kflags & (1 << KEY_FLAG_NEGATIVE)) + goto error_2; + goto found; + } + + /* otherwise, the top keyring must not be revoked, expired, or + * negatively instantiated if we are to search it */ + key_ref = ERR_PTR(-EAGAIN); + if (kflags & ((1 << KEY_FLAG_REVOKED) | (1 << KEY_FLAG_NEGATIVE)) || + (keyring->expiry && now.tv_sec >= keyring->expiry)) + goto error_2; + /* start processing a new keyring */ descend: if (test_bit(KEY_FLAG_REVOKED, &keyring->flags)) @@ -331,13 +357,14 @@ descend: /* iterate through the keys in this keyring first */ for (kix = 0; kix < keylist->nkeys; kix++) { key = keylist->keys[kix]; + kflags = key->flags; /* ignore keys not of this type */ if (key->type != type) continue; /* skip revoked keys and expired keys */ - if (test_bit(KEY_FLAG_REVOKED, &key->flags)) + if (kflags & (1 << KEY_FLAG_REVOKED)) continue; if (key->expiry && now.tv_sec >= key->expiry) @@ -352,8 +379,8 @@ descend: context, KEY_SEARCH) < 0) continue; - /* we set a different error code if we find a negative key */ - if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { + /* we set a different error code if we pass a negative key */ + if (kflags & (1 << KEY_FLAG_NEGATIVE)) { err = -ENOKEY; continue; } @@ -489,10 +516,9 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref, /* * find a keyring with the specified name * - all named keyrings are searched - * - only find keyrings with search permission for the process - * - only find keyrings with a serial number greater than the one specified + * - normally only finds keyrings with search permission for the current process */ -struct key *find_keyring_by_name(const char *name, key_serial_t bound) +struct key *find_keyring_by_name(const char *name, bool skip_perm_check) { struct key *keyring; int bucket; @@ -518,15 +544,11 @@ struct key *find_keyring_by_name(const char *name, key_serial_t bound) if (strcmp(keyring->description, name) != 0) continue; - if (key_permission(make_key_ref(keyring, 0), + if (!skip_perm_check && + key_permission(make_key_ref(keyring, 0), KEY_SEARCH) < 0) continue; - /* found a potential candidate, but we still need to - * check the serial number */ - if (keyring->serial <= bound) - continue; - /* we've got a match */ atomic_inc(&keyring->usage); read_unlock(&keyring_name_lock); diff --git a/security/keys/proc.c b/security/keys/proc.c index 694126003ed..f619170da76 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -70,19 +70,15 @@ static int __init key_proc_init(void) struct proc_dir_entry *p; #ifdef CONFIG_KEYS_DEBUG_PROC_KEYS - p = create_proc_entry("keys", 0, NULL); + p = proc_create("keys", 0, NULL, &proc_keys_fops); if (!p) panic("Cannot create /proc/keys\n"); - - p->proc_fops = &proc_keys_fops; #endif - p = create_proc_entry("key-users", 0, NULL); + p = proc_create("key-users", 0, NULL, &proc_key_users_fops); if (!p) panic("Cannot create /proc/key-users\n"); - p->proc_fops = &proc_key_users_fops; - return 0; } /* end key_proc_init() */ @@ -246,6 +242,10 @@ static int proc_key_users_show(struct seq_file *m, void *v) { struct rb_node *_p = v; struct key_user *user = rb_entry(_p, struct key_user, node); + unsigned maxkeys = (user->uid == 0) ? + key_quota_root_maxkeys : key_quota_maxkeys; + unsigned maxbytes = (user->uid == 0) ? + key_quota_root_maxbytes : key_quota_maxbytes; seq_printf(m, "%5u: %5d %d/%d %d/%d %d/%d\n", user->uid, @@ -253,10 +253,9 @@ static int proc_key_users_show(struct seq_file *m, void *v) atomic_read(&user->nkeys), atomic_read(&user->nikeys), user->qnkeys, - KEYQUOTA_MAX_KEYS, + maxkeys, user->qnbytes, - KEYQUOTA_MAX_BYTES - ); + maxbytes); return 0; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index c886a2bb792..5be6d018759 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -1,6 +1,6 @@ -/* process_keys.c: management of a process's keyrings +/* Management of a process's keyrings * - * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -23,6 +23,9 @@ /* session keyring create vs join semaphore */ static DEFINE_MUTEX(key_session_mutex); +/* user keyring creation semaphore */ +static DEFINE_MUTEX(key_user_keyring_mutex); + /* the root user's tracking struct */ struct key_user root_key_user = { .usage = ATOMIC_INIT(3), @@ -33,78 +36,84 @@ struct key_user root_key_user = { .uid = 0, }; -/* the root user's UID keyring */ -struct key root_user_keyring = { - .usage = ATOMIC_INIT(1), - .serial = 2, - .type = &key_type_keyring, - .user = &root_key_user, - .sem = __RWSEM_INITIALIZER(root_user_keyring.sem), - .perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, - .flags = 1 << KEY_FLAG_INSTANTIATED, - .description = "_uid.0", -#ifdef KEY_DEBUGGING - .magic = KEY_DEBUG_MAGIC, -#endif -}; - -/* the root user's default session keyring */ -struct key root_session_keyring = { - .usage = ATOMIC_INIT(1), - .serial = 1, - .type = &key_type_keyring, - .user = &root_key_user, - .sem = __RWSEM_INITIALIZER(root_session_keyring.sem), - .perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, - .flags = 1 << KEY_FLAG_INSTANTIATED, - .description = "_uid_ses.0", -#ifdef KEY_DEBUGGING - .magic = KEY_DEBUG_MAGIC, -#endif -}; - /*****************************************************************************/ /* - * allocate the keyrings to be associated with a UID + * install user and user session keyrings for a particular UID */ -int alloc_uid_keyring(struct user_struct *user, - struct task_struct *ctx) +static int install_user_keyrings(struct task_struct *tsk) { + struct user_struct *user = tsk->user; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; - /* concoct a default session keyring */ - sprintf(buf, "_uid_ses.%u", user->uid); + kenter("%p{%u}", user, user->uid); - session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, ctx, - KEY_ALLOC_IN_QUOTA, NULL); - if (IS_ERR(session_keyring)) { - ret = PTR_ERR(session_keyring); - goto error; + if (user->uid_keyring) { + kleave(" = 0 [exist]"); + return 0; } - /* and a UID specific keyring, pointed to by the default session - * keyring */ - sprintf(buf, "_uid.%u", user->uid); + mutex_lock(&key_user_keyring_mutex); + ret = 0; - uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, ctx, - KEY_ALLOC_IN_QUOTA, session_keyring); - if (IS_ERR(uid_keyring)) { - key_put(session_keyring); - ret = PTR_ERR(uid_keyring); - goto error; + if (!user->uid_keyring) { + /* get the UID-specific keyring + * - there may be one in existence already as it may have been + * pinned by a session, but the user_struct pointing to it + * may have been destroyed by setuid */ + sprintf(buf, "_uid.%u", user->uid); + + uid_keyring = find_keyring_by_name(buf, true); + if (IS_ERR(uid_keyring)) { + uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, + tsk, KEY_ALLOC_IN_QUOTA, + NULL); + if (IS_ERR(uid_keyring)) { + ret = PTR_ERR(uid_keyring); + goto error; + } + } + + /* get a default session keyring (which might also exist + * already) */ + sprintf(buf, "_uid_ses.%u", user->uid); + + session_keyring = find_keyring_by_name(buf, true); + if (IS_ERR(session_keyring)) { + session_keyring = + keyring_alloc(buf, user->uid, (gid_t) -1, + tsk, KEY_ALLOC_IN_QUOTA, NULL); + if (IS_ERR(session_keyring)) { + ret = PTR_ERR(session_keyring); + goto error_release; + } + + /* we install a link from the user session keyring to + * the user keyring */ + ret = key_link(session_keyring, uid_keyring); + if (ret < 0) + goto error_release_both; + } + + /* install the keyrings */ + user->uid_keyring = uid_keyring; + user->session_keyring = session_keyring; } - /* install the keyrings */ - user->uid_keyring = uid_keyring; - user->session_keyring = session_keyring; - ret = 0; + mutex_unlock(&key_user_keyring_mutex); + kleave(" = 0"); + return 0; +error_release_both: + key_put(session_keyring); +error_release: + key_put(uid_keyring); error: + mutex_unlock(&key_user_keyring_mutex); + kleave(" = %d", ret); return ret; - -} /* end alloc_uid_keyring() */ +} /*****************************************************************************/ /* @@ -481,7 +490,7 @@ key_ref_t search_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else { + else if (context->user->session_keyring) { key_ref = keyring_search_aux( make_key_ref(context->user->session_keyring, 1), context, type, description, match); @@ -614,6 +623,9 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, if (!context->signal->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ + ret = install_user_keyrings(context); + if (ret < 0) + goto error; ret = install_session_keyring( context, context->user->session_keyring); if (ret < 0) @@ -628,12 +640,24 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, break; case KEY_SPEC_USER_KEYRING: + if (!context->user->uid_keyring) { + ret = install_user_keyrings(context); + if (ret < 0) + goto error; + } + key = context->user->uid_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: + if (!context->user->session_keyring) { + ret = install_user_keyrings(context); + if (ret < 0) + goto error; + } + key = context->user->session_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); @@ -744,7 +768,7 @@ long join_session_keyring(const char *name) mutex_lock(&key_session_mutex); /* look for an existing keyring of this name */ - keyring = find_keyring_by_name(name, 0); + keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ keyring = keyring_alloc(name, tsk->uid, tsk->gid, tsk, diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 5ecc5057fb5..ba32ca6469b 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -16,6 +16,7 @@ #include <linux/kmod.h> #include <linux/err.h> #include <linux/keyctl.h> +#include <linux/slab.h> #include "internal.h" /* @@ -161,21 +162,22 @@ error_alloc: * call out to userspace for key construction * - we ignore program failure and go on key status instead */ -static int construct_key(struct key *key, const char *callout_info, void *aux) +static int construct_key(struct key *key, const void *callout_info, + size_t callout_len, void *aux) { struct key_construction *cons; request_key_actor_t actor; struct key *authkey; int ret; - kenter("%d,%s,%p", key->serial, callout_info, aux); + kenter("%d,%p,%zu,%p", key->serial, callout_info, callout_len, aux); cons = kmalloc(sizeof(*cons), GFP_KERNEL); if (!cons) return -ENOMEM; /* allocate an authorisation key */ - authkey = request_key_auth_new(key, callout_info); + authkey = request_key_auth_new(key, callout_info, callout_len); if (IS_ERR(authkey)) { kfree(cons); ret = PTR_ERR(authkey); @@ -331,6 +333,7 @@ alloc_failed: static struct key *construct_key_and_link(struct key_type *type, const char *description, const char *callout_info, + size_t callout_len, void *aux, struct key *dest_keyring, unsigned long flags) @@ -348,7 +351,7 @@ static struct key *construct_key_and_link(struct key_type *type, key_user_put(user); if (ret == 0) { - ret = construct_key(key, callout_info, aux); + ret = construct_key(key, callout_info, callout_len, aux); if (ret < 0) goto construction_failed; } @@ -370,7 +373,8 @@ construction_failed: */ struct key *request_key_and_link(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux, struct key *dest_keyring, unsigned long flags) @@ -378,8 +382,8 @@ struct key *request_key_and_link(struct key_type *type, struct key *key; key_ref_t key_ref; - kenter("%s,%s,%s,%p,%p,%lx", - type->name, description, callout_info, aux, + kenter("%s,%s,%p,%zu,%p,%p,%lx", + type->name, description, callout_info, callout_len, aux, dest_keyring, flags); /* search all the process keyrings for a key */ @@ -398,7 +402,8 @@ struct key *request_key_and_link(struct key_type *type, goto error; key = construct_key_and_link(type, description, callout_info, - aux, dest_keyring, flags); + callout_len, aux, dest_keyring, + flags); } error: @@ -434,10 +439,13 @@ struct key *request_key(struct key_type *type, const char *callout_info) { struct key *key; + size_t callout_len = 0; int ret; - key = request_key_and_link(type, description, callout_info, NULL, - NULL, KEY_ALLOC_IN_QUOTA); + if (callout_info) + callout_len = strlen(callout_info); + key = request_key_and_link(type, description, callout_info, callout_len, + NULL, NULL, KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key)) { ret = wait_for_key_construction(key, false); if (ret < 0) { @@ -458,14 +466,15 @@ EXPORT_SYMBOL(request_key); */ struct key *request_key_with_auxdata(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux) { struct key *key; int ret; - key = request_key_and_link(type, description, callout_info, aux, - NULL, KEY_ALLOC_IN_QUOTA); + key = request_key_and_link(type, description, callout_info, callout_len, + aux, NULL, KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key)) { ret = wait_for_key_construction(key, false); if (ret < 0) { @@ -485,10 +494,12 @@ EXPORT_SYMBOL(request_key_with_auxdata); */ struct key *request_key_async(struct key_type *type, const char *description, - const char *callout_info) + const void *callout_info, + size_t callout_len) { - return request_key_and_link(type, description, callout_info, NULL, - NULL, KEY_ALLOC_IN_QUOTA); + return request_key_and_link(type, description, callout_info, + callout_len, NULL, NULL, + KEY_ALLOC_IN_QUOTA); } EXPORT_SYMBOL(request_key_async); @@ -500,10 +511,11 @@ EXPORT_SYMBOL(request_key_async); */ struct key *request_key_async_with_auxdata(struct key_type *type, const char *description, - const char *callout_info, + const void *callout_info, + size_t callout_len, void *aux) { - return request_key_and_link(type, description, callout_info, aux, - NULL, KEY_ALLOC_IN_QUOTA); + return request_key_and_link(type, description, callout_info, + callout_len, aux, NULL, KEY_ALLOC_IN_QUOTA); } EXPORT_SYMBOL(request_key_async_with_auxdata); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index e42b5252486..bd237b0a633 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -15,6 +15,7 @@ #include <linux/sched.h> #include <linux/err.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include "internal.h" @@ -61,7 +62,7 @@ static void request_key_auth_describe(const struct key *key, seq_puts(m, "key:"); seq_puts(m, key->description); - seq_printf(m, " pid:%d ci:%zu", rka->pid, strlen(rka->callout_info)); + seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); } /* end request_key_auth_describe() */ @@ -77,7 +78,7 @@ static long request_key_auth_read(const struct key *key, size_t datalen; long ret; - datalen = strlen(rka->callout_info); + datalen = rka->callout_len; ret = datalen; /* we can return the data as is */ @@ -137,7 +138,8 @@ static void request_key_auth_destroy(struct key *key) * create an authorisation token for /sbin/request-key or whoever to gain * access to the caller's security data */ -struct key *request_key_auth_new(struct key *target, const char *callout_info) +struct key *request_key_auth_new(struct key *target, const void *callout_info, + size_t callout_len) { struct request_key_auth *rka, *irka; struct key *authkey = NULL; @@ -152,7 +154,7 @@ struct key *request_key_auth_new(struct key *target, const char *callout_info) kleave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } - rka->callout_info = kmalloc(strlen(callout_info) + 1, GFP_KERNEL); + rka->callout_info = kmalloc(callout_len, GFP_KERNEL); if (!rka->callout_info) { kleave(" = -ENOMEM"); kfree(rka); @@ -186,7 +188,8 @@ struct key *request_key_auth_new(struct key *target, const char *callout_info) } rka->target_key = key_get(target); - strcpy(rka->callout_info, callout_info); + memcpy(rka->callout_info, callout_info, callout_len); + rka->callout_len = callout_len; /* allocate the auth key */ sprintf(desc, "%x", target->serial); diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c new file mode 100644 index 00000000000..b611d493c2d --- /dev/null +++ b/security/keys/sysctl.c @@ -0,0 +1,50 @@ +/* Key management controls + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/key.h> +#include <linux/sysctl.h> +#include "internal.h" + +ctl_table key_sysctls[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "maxkeys", + .data = &key_quota_maxkeys, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "maxbytes", + .data = &key_quota_maxbytes, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "root_maxkeys", + .data = &key_quota_root_maxkeys, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "root_maxbytes", + .data = &key_quota_root_maxbytes, + .maxlen = sizeof(unsigned), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; diff --git a/security/security.c b/security/security.c index d5cb5898d96..8e64a29dc55 100644 --- a/security/security.c +++ b/security/security.c @@ -491,23 +491,23 @@ void security_inode_delete(struct inode *inode) security_ops->inode_delete(inode); } -int security_inode_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +int security_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (unlikely(IS_PRIVATE(dentry->d_inode))) return 0; return security_ops->inode_setxattr(dentry, name, value, size, flags); } -void security_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +void security_inode_post_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { if (unlikely(IS_PRIVATE(dentry->d_inode))) return; security_ops->inode_post_setxattr(dentry, name, value, size, flags); } -int security_inode_getxattr(struct dentry *dentry, char *name) +int security_inode_getxattr(struct dentry *dentry, const char *name) { if (unlikely(IS_PRIVATE(dentry->d_inode))) return 0; @@ -521,7 +521,7 @@ int security_inode_listxattr(struct dentry *dentry) return security_ops->inode_listxattr(dentry); } -int security_inode_removexattr(struct dentry *dentry, char *name) +int security_inode_removexattr(struct dentry *dentry, const char *name) { if (unlikely(IS_PRIVATE(dentry->d_inode))) return 0; @@ -1156,6 +1156,11 @@ int security_key_permission(key_ref_t key_ref, return security_ops->key_permission(key_ref, context, perm); } +int security_key_getsecurity(struct key *key, char **_buffer) +{ + return security_ops->key_getsecurity(key, _buffer); +} + #endif /* CONFIG_KEYS */ #ifdef CONFIG_AUDIT diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 04acb5af831..4e4de98941a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2619,7 +2619,7 @@ static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) return dentry_has_perm(current, mnt, dentry, FILE__GETATTR); } -static int selinux_inode_setotherxattr(struct dentry *dentry, char *name) +static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) { if (!strncmp(name, XATTR_SECURITY_PREFIX, sizeof XATTR_SECURITY_PREFIX - 1)) { @@ -2638,7 +2638,8 @@ static int selinux_inode_setotherxattr(struct dentry *dentry, char *name) return dentry_has_perm(current, NULL, dentry, FILE__SETATTR); } -static int selinux_inode_setxattr(struct dentry *dentry, char *name, void *value, size_t size, int flags) +static int selinux_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { struct task_security_struct *tsec = current->security; struct inode *inode = dentry->d_inode; @@ -2687,8 +2688,9 @@ static int selinux_inode_setxattr(struct dentry *dentry, char *name, void *value &ad); } -static void selinux_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, + int flags) { struct inode *inode = dentry->d_inode; struct inode_security_struct *isec = inode->i_security; @@ -2711,7 +2713,7 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, char *name, return; } -static int selinux_inode_getxattr(struct dentry *dentry, char *name) +static int selinux_inode_getxattr(struct dentry *dentry, const char *name) { return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); } @@ -2721,7 +2723,7 @@ static int selinux_inode_listxattr(struct dentry *dentry) return dentry_has_perm(current, NULL, dentry, FILE__GETATTR); } -static int selinux_inode_removexattr(struct dentry *dentry, char *name) +static int selinux_inode_removexattr(struct dentry *dentry, const char *name) { if (strcmp(name, XATTR_NAME_SELINUX)) return selinux_inode_setotherxattr(dentry, name); @@ -5298,6 +5300,20 @@ static int selinux_key_permission(key_ref_t key_ref, SECCLASS_KEY, perm, NULL); } +static int selinux_key_getsecurity(struct key *key, char **_buffer) +{ + struct key_security_struct *ksec = key->security; + char *context = NULL; + unsigned len; + int rc; + + rc = security_sid_to_context(ksec->sid, &context, &len); + if (!rc) + rc = len; + *_buffer = context; + return rc; +} + #endif static struct security_operations selinux_ops = { @@ -5486,6 +5502,7 @@ static struct security_operations selinux_ops = { .key_alloc = selinux_key_alloc, .key_free = selinux_key_free, .key_permission = selinux_key_permission, + .key_getsecurity = selinux_key_getsecurity, #endif #ifdef CONFIG_AUDIT @@ -5534,14 +5551,6 @@ static __init int selinux_init(void) else printk(KERN_DEBUG "SELinux: Starting in permissive mode\n"); -#ifdef CONFIG_KEYS - /* Add security information to initial keyrings */ - selinux_key_alloc(&root_user_keyring, current, - KEY_ALLOC_NOT_IN_QUOTA); - selinux_key_alloc(&root_session_keyring, current, - KEY_ALLOC_NOT_IN_QUOTA); -#endif - return 0; } diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 6445b644064..cdb14add27d 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -93,7 +93,7 @@ int security_change_sid(u32 ssid, u32 tsid, int security_sid_to_context(u32 sid, char **scontext, u32 *scontext_len); -int security_context_to_sid(char *scontext, u32 scontext_len, +int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *out_sid); int security_context_to_sid_default(char *scontext, u32 scontext_len, diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 2daaddbb301..25cac5a2aa8 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -708,7 +708,7 @@ out: } -static int security_context_to_sid_core(char *scontext, u32 scontext_len, +static int security_context_to_sid_core(const char *scontext, u32 scontext_len, u32 *sid, u32 def_sid, gfp_t gfp_flags) { char *scontext2; @@ -835,7 +835,7 @@ out: * Returns -%EINVAL if the context is invalid, -%ENOMEM if insufficient * memory is available, or 0 on success. */ -int security_context_to_sid(char *scontext, u32 scontext_len, u32 *sid) +int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *sid) { return security_context_to_sid_core(scontext, scontext_len, sid, SECSID_NULL, GFP_KERNEL); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 77ec16a3b68..5d2ec5650e6 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -574,8 +574,8 @@ static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) * * Returns 0 if access is permitted, an error code otherwise */ -static int smack_inode_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static int smack_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { int rc = 0; @@ -604,8 +604,8 @@ static int smack_inode_setxattr(struct dentry *dentry, char *name, * Set the pointer in the inode blob to the entry found * in the master label list. */ -static void smack_inode_post_setxattr(struct dentry *dentry, char *name, - void *value, size_t size, int flags) +static void smack_inode_post_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) { struct inode_smack *isp; char *nsp; @@ -641,7 +641,7 @@ static void smack_inode_post_setxattr(struct dentry *dentry, char *name, * * Returns 0 if access is permitted, an error code otherwise */ -static int smack_inode_getxattr(struct dentry *dentry, char *name) +static int smack_inode_getxattr(struct dentry *dentry, const char *name) { return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); } @@ -655,7 +655,7 @@ static int smack_inode_getxattr(struct dentry *dentry, char *name) * * Returns 0 if access is permitted, an error code otherwise */ -static int smack_inode_removexattr(struct dentry *dentry, char *name) +static int smack_inode_removexattr(struct dentry *dentry, const char *name) { int rc = 0; diff --git a/sound/core/info.c b/sound/core/info.c index 9977ec2eace..cb5ead3e202 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -544,7 +544,7 @@ int __init snd_info_init(void) { struct proc_dir_entry *p; - p = snd_create_proc_entry("asound", S_IFDIR | S_IRUGO | S_IXUGO, &proc_root); + p = snd_create_proc_entry("asound", S_IFDIR | S_IRUGO | S_IXUGO, NULL); if (p == NULL) return -ENOMEM; snd_proc_root = p; @@ -594,7 +594,7 @@ int __exit snd_info_done(void) #ifdef CONFIG_SND_OSSEMUL snd_info_free_entry(snd_oss_root); #endif - snd_remove_proc_entry(&proc_root, snd_proc_root); + snd_remove_proc_entry(NULL, snd_proc_root); } return 0; } diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 920e5780c22..23b7bc02728 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -629,9 +629,8 @@ static const struct file_operations snd_mem_proc_fops = { static int __init snd_mem_init(void) { #ifdef CONFIG_PROC_FS - snd_mem_proc = create_proc_entry(SND_MEM_PROC_FILE, 0644, NULL); - if (snd_mem_proc) - snd_mem_proc->proc_fops = &snd_mem_proc_fops; + snd_mem_proc = proc_create(SND_MEM_PROC_FILE, 0644, NULL, + &snd_mem_proc_fops); #endif return 0; } diff --git a/sound/sh/aica.c b/sound/sh/aica.c index d49417bf78c..9ca11332614 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -663,7 +663,7 @@ static int __init aica_init(void) return err; pd = platform_device_register_simple(SND_AICA_DRIVER, -1, aica_memory_space, 2); - if (unlikely(IS_ERR(pd))) { + if (IS_ERR(pd)) { platform_driver_unregister(&snd_aica_driver); return PTR_ERR(pd); } |