900 files changed, 20055 insertions, 10972 deletions
diff --git a/.gitignore b/.gitignore
index 22fb8fa9bc3..8d14531846b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@
 *.s
 *.ko
 *.so
+*.so.dbg
 *.mod.c
 *.i
 *.lst
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index d3290c46af5..aa38cc5692a 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -46,7 +46,7 @@
 
      <sect1><title>Atomic and pointer manipulation</title>
 !Iinclude/asm-x86/atomic_32.h
-!Iinclude/asm-x86/unaligned_32.h
+!Iinclude/asm-x86/unaligned.h
      </sect1>
 
      <sect1><title>Delaying, scheduling, and timer routines</title>
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt
new file mode 100644
index 00000000000..eda40fd39ca
--- /dev/null
+++ b/Documentation/accounting/cgroupstats.txt
@@ -0,0 +1,27 @@
+Control Groupstats is inspired by the discussion at
+http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
+suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
+
+Per cgroup statistics infrastructure re-uses code from the taskstats
+interface. A new set of cgroup operations are registered with commands
+and attributes specific to cgroups. It should be very easy to
+extend per cgroup statistics, by adding members to the cgroupstats
+structure.
+
+The current model for cgroupstats is a pull, a push model (to post
+statistics on interesting events), should be very easy to add. Currently
+user space requests for statistics by passing the cgroup path.
+Statistics about the state of all the tasks in the cgroup is returned to
+user space.
+
+NOTE: We currently rely on delay accounting for extracting information
+about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
+information will not be available.
+
+To extract cgroup statistics a utility very similar to getdelays.c
+has been developed, the sample output of the utility is shown below
+
+~/balbir/cgroupstats # ./getdelays  -C "/cgroup/a"
+sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
+~/balbir/cgroupstats # ./getdelays  -C "/cgroup"
+sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 552cabac060..da42ab414c4 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -87,30 +87,7 @@ changes occur:
 
 	This is used primarily during fault processing.
 
-5) void flush_tlb_pgtables(struct mm_struct *mm,
-			   unsigned long start, unsigned long end)
-
-   The software page tables for address space 'mm' for virtual
-   addresses in the range 'start' to 'end-1' are being torn down.
-
-   Some platforms cache the lowest level of the software page tables
-   in a linear virtually mapped array, to make TLB miss processing
-   more efficient.  On such platforms, since the TLB is caching the
-   software page table structure, it needs to be flushed when parts
-   of the software page table tree are unlinked/freed.
-
-   Sparc64 is one example of a platform which does this.
-
-   Usually, when munmap()'ing an area of user virtual address
-   space, the kernel leaves the page table parts around and just
-   marks the individual pte's as invalid.  However, if very large
-   portions of the address space are unmapped, the kernel frees up
-   those portions of the software page tables to prevent potential
-   excessive kernel memory usage caused by erratic mmap/mmunmap
-   sequences.  It is at these times that flush_tlb_pgtables will
-   be invoked.
-
-6) void update_mmu_cache(struct vm_area_struct *vma,
+5) void update_mmu_cache(struct vm_area_struct *vma,
 			 unsigned long address, pte_t pte)
 
 	At the end of every page fault, this routine is invoked to
@@ -123,7 +100,7 @@ changes occur:
 	translations for software managed TLB configurations.
 	The sparc64 port currently does this.
 
-7) void tlb_migrate_finish(struct mm_struct *mm)
+6) void tlb_migrate_finish(struct mm_struct *mm)
 
 	This interface is called at the end of an explicit
 	process migration. This interface provides a hook
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt
new file mode 100644
index 00000000000..98a26f81fa7
--- /dev/null
+++ b/Documentation/cgroups.txt
@@ -0,0 +1,545 @@
+				CGROUPS
+				-------
+
+Written by Paul Menage <menage@google.com> based on Documentation/cpusets.txt
+
+Original copyright statements from cpusets.txt:
+Portions Copyright (C) 2004 BULL SA.
+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+Modified by Paul Jackson <pj@sgi.com>
+Modified by Christoph Lameter <clameter@sgi.com>
+
+CONTENTS:
+=========
+
+1. Control Groups
+  1.1 What are cgroups ?
+  1.2 Why are cgroups needed ?
+  1.3 How are cgroups implemented ?
+  1.4 What does notify_on_release do ?
+  1.5 How do I use cgroups ?
+2. Usage Examples and Syntax
+  2.1 Basic Usage
+  2.2 Attaching processes
+3. Kernel API
+  3.1 Overview
+  3.2 Synchronization
+  3.3 Subsystem API
+4. Questions
+
+1. Control Groups
+==========
+
+1.1 What are cgroups ?
+----------------------
+
+Control Groups provide a mechanism for aggregating/partitioning sets of
+tasks, and all their future children, into hierarchical groups with
+specialized behaviour.
+
+Definitions:
+
+A *cgroup* associates a set of tasks with a set of parameters for one
+or more subsystems.
+
+A *subsystem* is a module that makes use of the task grouping
+facilities provided by cgroups to treat groups of tasks in
+particular ways. A subsystem is typically a "resource controller" that
+schedules a resource or applies per-cgroup limits, but it may be
+anything that wants to act on a group of processes, e.g. a
+virtualization subsystem.
+
+A *hierarchy* is a set of cgroups arranged in a tree, such that
+every task in the system is in exactly one of the cgroups in the
+hierarchy, and a set of subsystems; each subsystem has system-specific
+state attached to each cgroup in the hierarchy.  Each hierarchy has
+an instance of the cgroup virtual filesystem associated with it.
+
+At any one time there may be multiple active hierachies of task
+cgroups. Each hierarchy is a partition of all tasks in the system.
+
+User level code may create and destroy cgroups by name in an
+instance of the cgroup virtual file system, specify and query to
+which cgroup a task is assigned, and list the task pids assigned to
+a cgroup. Those creations and assignments only affect the hierarchy
+associated with that instance of the cgroup file system.
+
+On their own, the only use for cgroups is for simple job
+tracking. The intention is that other subsystems hook into the generic
+cgroup support to provide new attributes for cgroups, such as
+accounting/limiting the resources which processes in a cgroup can
+access. For example, cpusets (see Documentation/cpusets.txt) allows
+you to associate a set of CPUs and a set of memory nodes with the
+tasks in each cgroup.
+
+1.2 Why are cgroups needed ?
+----------------------------
+
+There are multiple efforts to provide process aggregations in the
+Linux kernel, mainly for resource tracking purposes. Such efforts
+include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
+namespaces. These all require the basic notion of a
+grouping/partitioning of processes, with newly forked processes ending
+in the same group (cgroup) as their parent process.
+
+The kernel cgroup patch provides the minimum essential kernel
+mechanisms required to efficiently implement such groups. It has
+minimal impact on the system fast paths, and provides hooks for
+specific subsystems such as cpusets to provide additional behaviour as
+desired.
+
+Multiple hierarchy support is provided to allow for situations where
+the division of tasks into cgroups is distinctly different for
+different subsystems - having parallel hierarchies allows each
+hierarchy to be a natural division of tasks, without having to handle
+complex combinations of tasks that would be present if several
+unrelated subsystems needed to be forced into the same tree of
+cgroups.
+
+At one extreme, each resource controller or subsystem could be in a
+separate hierarchy; at the other extreme, all subsystems
+would be attached to the same hierarchy.
+
+As an example of a scenario (originally proposed by vatsa@in.ibm.com)
+that can benefit from multiple hierarchies, consider a large
+university server with various users - students, professors, system
+tasks etc. The resource planning for this server could be along the
+following lines:
+
+       CPU :           Top cpuset
+                       /       \
+               CPUSet1         CPUSet2
+                  |              |
+               (Profs)         (Students)
+
+               In addition (system tasks) are attached to topcpuset (so
+               that they can run anywhere) with a limit of 20%
+
+       Memory : Professors (50%), students (30%), system (20%)
+
+       Disk : Prof (50%), students (30%), system (20%)
+
+       Network : WWW browsing (20%), Network File System (60%), others (20%)
+                               / \
+                       Prof (15%) students (5%)
+
+Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go
+into NFS network class.
+
+At the same time firefox/lynx will share an appropriate CPU/Memory class
+depending on who launched it (prof/student).
+
+With the ability to classify tasks differently for different resources
+(by putting those resource subsystems in different hierarchies) then
+the admin can easily set up a script which receives exec notifications
+and depending on who is launching the browser he can
+
+       # echo browser_pid > /mnt/<restype>/<userclass>/tasks
+
+With only a single hierarchy, he now would potentially have to create
+a separate cgroup for every browser launched and associate it with
+approp network and other resource class.  This may lead to
+proliferation of such cgroups.
+
+Also lets say that the administrator would like to give enhanced network
+access temporarily to a student's browser (since it is night and the user
+wants to do online gaming :)  OR give one of the students simulation
+apps enhanced CPU power,
+
+With ability to write pids directly to resource classes, its just a
+matter of :
+
+       # echo pid > /mnt/network/<new_class>/tasks
+       (after some time)
+       # echo pid > /mnt/network/<orig_class>/tasks
+
+Without this ability, he would have to split the cgroup into
+multiple separate ones and then associate the new cgroups with the
+new resource classes.
+
+
+
+1.3 How are cgroups implemented ?
+---------------------------------
+
+Control Groups extends the kernel as follows:
+
+ - Each task in the system has a reference-counted pointer to a
+   css_set.
+
+ - A css_set contains a set of reference-counted pointers to
+   cgroup_subsys_state objects, one for each cgroup subsystem
+   registered in the system. There is no direct link from a task to
+   the cgroup of which it's a member in each hierarchy, but this
+   can be determined by following pointers through the
+   cgroup_subsys_state objects. This is because accessing the
+   subsystem state is something that's expected to happen frequently
+   and in performance-critical code, whereas operations that require a
+   task's actual cgroup assignments (in particular, moving between
+   cgroups) are less common. A linked list runs through the cg_list
+   field of each task_struct using the css_set, anchored at
+   css_set->tasks.
+
+ - A cgroup hierarchy filesystem can be mounted  for browsing and
+   manipulation from user space.
+
+ - You can list all the tasks (by pid) attached to any cgroup.
+
+The implementation of cgroups requires a few, simple hooks
+into the rest of the kernel, none in performance critical paths:
+
+ - in init/main.c, to initialize the root cgroups and initial
+   css_set at system boot.
+
+ - in fork and exit, to attach and detach a task from its css_set.
+
+In addition a new file system, of type "cgroup" may be mounted, to
+enable browsing and modifying the cgroups presently known to the
+kernel.  When mounting a cgroup hierarchy, you may specify a
+comma-separated list of subsystems to mount as the filesystem mount
+options.  By default, mounting the cgroup filesystem attempts to
+mount a hierarchy containing all registered subsystems.
+
+If an active hierarchy with exactly the same set of subsystems already
+exists, it will be reused for the new mount. If no existing hierarchy
+matches, and any of the requested subsystems are in use in an existing
+hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
+is activated, associated with the requested subsystems.
+
+It's not currently possible to bind a new subsystem to an active
+cgroup hierarchy, or to unbind a subsystem from an active cgroup
+hierarchy. This may be possible in future, but is fraught with nasty
+error-recovery issues.
+
+When a cgroup filesystem is unmounted, if there are any
+child cgroups created below the top-level cgroup, that hierarchy
+will remain active even though unmounted; if there are no
+child cgroups then the hierarchy will be deactivated.
+
+No new system calls are added for cgroups - all support for
+querying and modifying cgroups is via this cgroup file system.
+
+Each task under /proc has an added file named 'cgroup' displaying,
+for each active hierarchy, the subsystem names and the cgroup name
+as the path relative to the root of the cgroup file system.
+
+Each cgroup is represented by a directory in the cgroup file system
+containing the following files describing that cgroup:
+
+ - tasks: list of tasks (by pid) attached to that cgroup
+ - notify_on_release flag: run /sbin/cgroup_release_agent on exit?
+
+Other subsystems such as cpusets may add additional files in each
+cgroup dir
+
+New cgroups are created using the mkdir system call or shell
+command.  The properties of a cgroup, such as its flags, are
+modified by writing to the appropriate file in that cgroups
+directory, as listed above.
+
+The named hierarchical structure of nested cgroups allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cgroup allows organizing the work load
+on a system into related sets of tasks.  A task may be re-attached to
+any other cgroup, if allowed by the permissions on the necessary
+cgroup file system directories.
+
+When a task is moved from one cgroup to another, it gets a new
+css_set pointer - if there's an already existing css_set with the
+desired collection of cgroups then that group is reused, else a new
+css_set is allocated. Note that the current implementation uses a
+linear search to locate an appropriate existing css_set, so isn't
+very efficient. A future version will use a hash table for better
+performance.
+
+To allow access from a cgroup to the css_sets (and hence tasks)
+that comprise it, a set of cg_cgroup_link objects form a lattice;
+each cg_cgroup_link is linked into a list of cg_cgroup_links for
+a single cgroup on its cont_link_list field, and a list of
+cg_cgroup_links for a single css_set on its cg_link_list.
+
+Thus the set of tasks in a cgroup can be listed by iterating over
+each css_set that references the cgroup, and sub-iterating over
+each css_set's task set.
+
+The use of a Linux virtual file system (vfs) to represent the
+cgroup hierarchy provides for a familiar permission and name space
+for cgroups, with a minimum of additional kernel code.
+
+1.4 What does notify_on_release do ?
+------------------------------------
+
+*** notify_on_release is disabled in the current patch set. It will be
+*** reactivated in a future patch in a less-intrusive manner
+
+If the notify_on_release flag is enabled (1) in a cgroup, then
+whenever the last task in the cgroup leaves (exits or attaches to
+some other cgroup) and the last child cgroup of that cgroup
+is removed, then the kernel runs the command specified by the contents
+of the "release_agent" file in that hierarchy's root directory,
+supplying the pathname (relative to the mount point of the cgroup
+file system) of the abandoned cgroup.  This enables automatic
+removal of abandoned cgroups.  The default value of
+notify_on_release in the root cgroup at system boot is disabled
+(0).  The default value of other cgroups at creation is the current
+value of their parents notify_on_release setting. The default value of
+a cgroup hierarchy's release_agent path is empty.
+
+1.5 How do I use cgroups ?
+--------------------------
+
+To start a new job that is to be contained within a cgroup, using
+the "cpuset" cgroup subsystem, the steps are something like:
+
+ 1) mkdir /dev/cgroup
+ 2) mount -t cgroup -ocpuset cpuset /dev/cgroup
+ 3) Create the new cgroup by doing mkdir's and write's (or echo's) in
+    the /dev/cgroup virtual file system.
+ 4) Start a task that will be the "founding father" of the new job.
+ 5) Attach that task to the new cgroup by writing its pid to the
+    /dev/cgroup tasks file for that cgroup.
+ 6) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cgroup
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cgroup:
+
+  mount -t cgroup cpuset -ocpuset /dev/cgroup
+  cd /dev/cgroup
+  mkdir Charlie
+  cd Charlie
+  /bin/echo 2-3 > cpus
+  /bin/echo 1 > mems
+  /bin/echo $$ > tasks
+  sh
+  # The subshell 'sh' is now running in cgroup Charlie
+  # The next line should display '/Charlie'
+  cat /proc/self/cgroup
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using the cgroups can be done through the cgroup
+virtual filesystem.
+
+To mount a cgroup hierarchy will all available subsystems, type:
+# mount -t cgroup xxx /dev/cgroup
+
+The "xxx" is not interpreted by the cgroup code, but will appear in
+/proc/mounts so may be any useful identifying string that you like.
+
+To mount a cgroup hierarchy with just the cpuset and numtasks
+subsystems, type:
+# mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup
+
+To change the set of subsystems bound to a mounted hierarchy, just
+remount with different options:
+
+# mount -o remount,cpuset,ns  /dev/cgroup
+
+Note that changing the set of subsystems is currently only supported
+when the hierarchy consists of a single (root) cgroup. Supporting
+the ability to arbitrarily bind/unbind subsystems from an existing
+cgroup hierarchy is intended to be implemented in the future.
+
+Then under /dev/cgroup you can find a tree that corresponds to the
+tree of the cgroups in the system. For instance, /dev/cgroup
+is the cgroup that holds the whole system.
+
+If you want to create a new cgroup under /dev/cgroup:
+# cd /dev/cgroup
+# mkdir my_cgroup
+
+Now you want to do something with this cgroup.
+# cd my_cgroup
+
+In this directory you can find several files:
+# ls
+notify_on_release release_agent tasks
+(plus whatever files are added by the attached subsystems)
+
+Now attach your shell to this cgroup:
+# /bin/echo $$ > tasks
+
+You can also create cgroups inside your cgroup by using mkdir in this
+directory.
+# mkdir my_sub_cs
+
+To remove a cgroup, just use rmdir:
+# rmdir my_sub_cs
+
+This will fail if the cgroup is in use (has cgroups inside, or
+has processes attached, or is held alive by other subsystem-specific
+reference).
+
+2.2 Attaching processes
+-----------------------
+
+# /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another:
+
+# /bin/echo PID1 > tasks
+# /bin/echo PID2 > tasks
+	...
+# /bin/echo PIDn > tasks
+
+3. Kernel API
+=============
+
+3.1 Overview
+------------
+
+Each kernel subsystem that wants to hook into the generic cgroup
+system needs to create a cgroup_subsys object. This contains
+various methods, which are callbacks from the cgroup system, along
+with a subsystem id which will be assigned by the cgroup system.
+
+Other fields in the cgroup_subsys object include:
+
+- subsys_id: a unique array index for the subsystem, indicating which
+  entry in cgroup->subsys[] this subsystem should be
+  managing. Initialized by cgroup_register_subsys(); prior to this
+  it should be initialized to -1
+
+- hierarchy: an index indicating which hierarchy, if any, this
+  subsystem is currently attached to. If this is -1, then the
+  subsystem is not attached to any hierarchy, and all tasks should be
+  considered to be members of the subsystem's top_cgroup. It should
+  be initialized to -1.
+
+- name: should be initialized to a unique subsystem name prior to
+  calling cgroup_register_subsystem. Should be no longer than
+  MAX_CGROUP_TYPE_NAMELEN
+
+Each cgroup object created by the system has an array of pointers,
+indexed by subsystem id; this pointer is entirely managed by the
+subsystem; the generic cgroup code will never touch this pointer.
+
+3.2 Synchronization
+-------------------
+
+There is a global mutex, cgroup_mutex, used by the cgroup
+system. This should be taken by anything that wants to modify a
+cgroup. It may also be taken to prevent cgroups from being
+modified, but more specific locks may be more appropriate in that
+situation.
+
+See kernel/cgroup.c for more details.
+
+Subsystems can take/release the cgroup_mutex via the functions
+cgroup_lock()/cgroup_unlock(), and can
+take/release the callback_mutex via the functions
+cgroup_lock()/cgroup_unlock().
+
+Accessing a task's cgroup pointer may be done in the following ways:
+- while holding cgroup_mutex
+- while holding the task's alloc_lock (via task_lock())
+- inside an rcu_read_lock() section via rcu_dereference()
+
+3.3 Subsystem API
+--------------------------
+
+Each subsystem should:
+
+- add an entry in linux/cgroup_subsys.h
+- define a cgroup_subsys object called <name>_subsys
+
+Each subsystem may export the following methods. The only mandatory
+methods are create/destroy. Any others that are null are presumed to
+be successful no-ops.
+
+struct cgroup_subsys_state *create(struct cgroup *cont)
+LL=cgroup_mutex
+
+Called to create a subsystem state object for a cgroup. The
+subsystem should allocate its subsystem state object for the passed
+cgroup, returning a pointer to the new object on success or a
+negative error code. On success, the subsystem pointer should point to
+a structure of type cgroup_subsys_state (typically embedded in a
+larger subsystem-specific object), which will be initialized by the
+cgroup system. Note that this will be called at initialization to
+create the root subsystem state for this subsystem; this case can be
+identified by the passed cgroup object having a NULL parent (since
+it's the root of the hierarchy) and may be an appropriate place for
+initialization code.
+
+void destroy(struct cgroup *cont)
+LL=cgroup_mutex
+
+The cgroup system is about to destroy the passed cgroup; the
+subsystem should do any necessary cleanup
+
+int can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+	       struct task_struct *task)
+LL=cgroup_mutex
+
+Called prior to moving a task into a cgroup; if the subsystem
+returns an error, this will abort the attach operation.  If a NULL
+task is passed, then a successful result indicates that *any*
+unspecified task can be moved into the cgroup. Note that this isn't
+called on a fork. If this method returns 0 (success) then this should
+remain valid while the caller holds cgroup_mutex.
+
+void attach(struct cgroup_subsys *ss, struct cgroup *cont,
+	    struct cgroup *old_cont, struct task_struct *task)
+LL=cgroup_mutex
+
+
+Called after the task has been attached to the cgroup, to allow any
+post-attachment activity that requires memory allocations or blocking.
+
+void fork(struct cgroup_subsy *ss, struct task_struct *task)
+LL=callback_mutex, maybe read_lock(tasklist_lock)
+
+Called when a task is forked into a cgroup. Also called during
+registration for all existing tasks.
+
+void exit(struct cgroup_subsys *ss, struct task_struct *task)
+LL=callback_mutex
+
+Called during task exit
+
+int populate(struct cgroup_subsys *ss, struct cgroup *cont)
+LL=none
+
+Called after creation of a cgroup to allow a subsystem to populate
+the cgroup directory with file entries.  The subsystem should make
+calls to cgroup_add_file() with objects of type cftype (see
+include/linux/cgroup.h for details).  Note that although this
+method can return an error code, the error code is currently not
+always handled well.
+
+void post_clone(struct cgroup_subsys *ss, struct cgroup *cont)
+
+Called at the end of cgroup_clone() to do any paramater
+initialization which might be required before a task could attach.  For
+example in cpusets, no task may attach before 'cpus' and 'mems' are set
+up.
+
+void bind(struct cgroup_subsys *ss, struct cgroup *root)
+LL=callback_mutex
+
+Called when a cgroup subsystem is rebound to a different hierarchy
+and root cgroup. Currently this will only involve movement between
+the default hierarchy (which never has sub-cgroups) and a hierarchy
+that is being created/destroyed (and hence has no sub-cgroups).
+
+4. Questions
+============
+
+Q: what's up with this '/bin/echo' ?
+A: bash's builtin 'echo' command does not check calls to write() against
+   errors. If you use it in the cgroup file system, you won't be
+   able to tell whether a command succeeded or failed.
+
+Q: When I attach processes, only the first of the line gets really attached !
+A: We can only return one error code per call to write(). So you should also
+   put only ONE pid.
+
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index b6d24c22274..a741f658a3c 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -220,7 +220,9 @@ A: The following happen, listed in no particular order :-)
   CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
   CPU is being offlined while tasks are frozen due to a suspend operation in
   progress
-- All process is migrated away from this outgoing CPU to a new CPU
+- All processes are migrated away from this outgoing CPU to new CPUs.
+  The new CPU is chosen from each process' current cpuset, which may be
+  a subset of all online CPUs.
 - All interrupts targeted to this CPU is migrated to a new CPU
 - timers/bottom half/task lets are also migrated to a new CPU
 - Once all services are migrated, kernel calls an arch specific routine
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index ec9de6917f0..141bef1c859 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -7,6 +7,7 @@ Written by Simon.Derr@bull.net
 Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
 Modified by Paul Jackson <pj@sgi.com>
 Modified by Christoph Lameter <clameter@sgi.com>
+Modified by Paul Menage <menage@google.com>
 
 CONTENTS:
 =========
@@ -16,9 +17,9 @@ CONTENTS:
   1.2 Why are cpusets needed ?
   1.3 How are cpusets implemented ?
   1.4 What are exclusive cpusets ?
-  1.5 What does notify_on_release do ?
-  1.6 What is memory_pressure ?
-  1.7 What is memory spread ?
+  1.5 What is memory_pressure ?
+  1.6 What is memory spread ?
+  1.7 What is sched_load_balance ?
   1.8 How do I use cpusets ?
 2. Usage Examples and Syntax
   2.1 Basic Usage
@@ -44,18 +45,19 @@ hierarchy visible in a virtual file system.  These are the essential
 hooks, beyond what is already present, required to manage dynamic
 job placement on large systems.
 
-Each task has a pointer to a cpuset.  Multiple tasks may reference
-the same cpuset.  Requests by a task, using the sched_setaffinity(2)
-system call to include CPUs in its CPU affinity mask, and using the
-mbind(2) and set_mempolicy(2) system calls to include Memory Nodes
-in its memory policy, are both filtered through that tasks cpuset,
-filtering out any CPUs or Memory Nodes not in that cpuset.  The
-scheduler will not schedule a task on a CPU that is not allowed in
-its cpus_allowed vector, and the kernel page allocator will not
-allocate a page on a node that is not allowed in the requesting tasks
-mems_allowed vector.
-
-User level code may create and destroy cpusets by name in the cpuset
+Cpusets use the generic cgroup subsystem described in
+Documentation/cgroup.txt.
+
+Requests by a task, using the sched_setaffinity(2) system call to
+include CPUs in its CPU affinity mask, and using the mbind(2) and
+set_mempolicy(2) system calls to include Memory Nodes in its memory
+policy, are both filtered through that tasks cpuset, filtering out any
+CPUs or Memory Nodes not in that cpuset.  The scheduler will not
+schedule a task on a CPU that is not allowed in its cpus_allowed
+vector, and the kernel page allocator will not allocate a page on a
+node that is not allowed in the requesting tasks mems_allowed vector.
+
+User level code may create and destroy cpusets by name in the cgroup
 virtual file system, manage the attributes and permissions of these
 cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
 specify and query to which cpuset a task is assigned, and list the
@@ -115,7 +117,7 @@ Cpusets extends these two mechanisms as follows:
  - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
    kernel.
  - Each task in the system is attached to a cpuset, via a pointer
-   in the task structure to a reference counted cpuset structure.
+   in the task structure to a reference counted cgroup structure.
  - Calls to sched_setaffinity are filtered to just those CPUs
    allowed in that tasks cpuset.
  - Calls to mbind and set_mempolicy are filtered to just
@@ -145,15 +147,10 @@ into the rest of the kernel, none in performance critical paths:
  - in page_alloc.c, to restrict memory to allowed nodes.
  - in vmscan.c, to restrict page recovery to the current cpuset.
 
-In addition a new file system, of type "cpuset" may be mounted,
-typically at /dev/cpuset, to enable browsing and modifying the cpusets
-presently known to the kernel.  No new system calls are added for
-cpusets - all support for querying and modifying cpusets is via
-this cpuset file system.
-
-Each task under /proc has an added file named 'cpuset', displaying
-the cpuset name, as the path relative to the root of the cpuset file
-system.
+You should mount the "cgroup" filesystem type in order to enable
+browsing and modifying the cpusets presently known to the kernel.  No
+new system calls are added for cpusets - all support for querying and
+modifying cpusets is via this cpuset file system.
 
 The /proc/<pid>/status file for each task has two added lines,
 displaying the tasks cpus_allowed (on which CPUs it may be scheduled)
@@ -163,16 +160,15 @@ in the format seen in the following example:
   Cpus_allowed:   ffffffff,ffffffff,ffffffff,ffffffff
   Mems_allowed:   ffffffff,ffffffff
 
-Each cpuset is represented by a directory in the cpuset file system
-containing the following files describing that cpuset:
+Each cpuset is represented by a directory in the cgroup file system
+containing (on top of the standard cgroup files) the following
+files describing that cpuset:
 
  - cpus: list of CPUs in that cpuset
  - mems: list of Memory Nodes in that cpuset
  - memory_migrate flag: if set, move pages to cpusets nodes
  - cpu_exclusive flag: is cpu placement exclusive?
  - mem_exclusive flag: is memory placement exclusive?
- - tasks: list of tasks (by pid) attached to that cpuset
- - notify_on_release flag: run /sbin/cpuset_release_agent on exit?
  - memory_pressure: measure of how much paging pressure in cpuset
 
 In addition, the root cpuset only has the following file:
@@ -237,21 +233,7 @@ such as requests from interrupt handlers, is allowed to be taken
 outside even a mem_exclusive cpuset.
 
 
-1.5 What does notify_on_release do ?
-------------------------------------
-
-If the notify_on_release flag is enabled (1) in a cpuset, then whenever
-the last task in the cpuset leaves (exits or attaches to some other
-cpuset) and the last child cpuset of that cpuset is removed, then
-the kernel runs the command /sbin/cpuset_release_agent, supplying the
-pathname (relative to the mount point of the cpuset file system) of the
-abandoned cpuset.  This enables automatic removal of abandoned cpusets.
-The default value of notify_on_release in the root cpuset at system
-boot is disabled (0).  The default value of other cpusets at creation
-is the current value of their parents notify_on_release setting.
-
-
-1.6 What is memory_pressure ?
+1.5 What is memory_pressure ?
 -----------------------------
 The memory_pressure of a cpuset provides a simple per-cpuset metric
 of the rate that the tasks in a cpuset are attempting to free up in
@@ -308,7 +290,7 @@ the tasks in the cpuset, in units of reclaims attempted per second,
 times 1000.
 
 
-1.7 What is memory spread ?
+1.6 What is memory spread ?
 ---------------------------
 There are two boolean flag files per cpuset that control where the
 kernel allocates pages for the file system buffers and related in
@@ -378,6 +360,142 @@ policy, especially for jobs that might have one thread reading in the
 data set, the memory allocation across the nodes in the jobs cpuset
 can become very uneven.
 
+1.7 What is sched_load_balance ?
+--------------------------------
+
+The kernel scheduler (kernel/sched.c) automatically load balances
+tasks.  If one CPU is underutilized, kernel code running on that
+CPU will look for tasks on other more overloaded CPUs and move those
+tasks to itself, within the constraints of such placement mechanisms
+as cpusets and sched_setaffinity.
+
+The algorithmic cost of load balancing and its impact on key shared
+kernel data structures such as the task list increases more than
+linearly with the number of CPUs being balanced.  So the scheduler
+has support to  partition the systems CPUs into a number of sched
+domains such that it only load balances within each sched domain.
+Each sched domain covers some subset of the CPUs in the system;
+no two sched domains overlap; some CPUs might not be in any sched
+domain and hence won't be load balanced.
+
+Put simply, it costs less to balance between two smaller sched domains
+than one big one, but doing so means that overloads in one of the
+two domains won't be load balanced to the other one.
+
+By default, there is one sched domain covering all CPUs, except those
+marked isolated using the kernel boot time "isolcpus=" argument.
+
+This default load balancing across all CPUs is not well suited for
+the following two situations:
+ 1) On large systems, load balancing across many CPUs is expensive.
+    If the system is managed using cpusets to place independent jobs
+    on separate sets of CPUs, full load balancing is unnecessary.
+ 2) Systems supporting realtime on some CPUs need to minimize
+    system overhead on those CPUs, including avoiding task load
+    balancing if that is not needed.
+
+When the per-cpuset flag "sched_load_balance" is enabled (the default
+setting), it requests that all the CPUs in that cpusets allowed 'cpus'
+be contained in a single sched domain, ensuring that load balancing
+can move a task (not otherwised pinned, as by sched_setaffinity)
+from any CPU in that cpuset to any other.
+
+When the per-cpuset flag "sched_load_balance" is disabled, then the
+scheduler will avoid load balancing across the CPUs in that cpuset,
+--except-- in so far as is necessary because some overlapping cpuset
+has "sched_load_balance" enabled.
+
+So, for example, if the top cpuset has the flag "sched_load_balance"
+enabled, then the scheduler will have one sched domain covering all
+CPUs, and the setting of the "sched_load_balance" flag in any other
+cpusets won't matter, as we're already fully load balancing.
+
+Therefore in the above two situations, the top cpuset flag
+"sched_load_balance" should be disabled, and only some of the smaller,
+child cpusets have this flag enabled.
+
+When doing this, you don't usually want to leave any unpinned tasks in
+the top cpuset that might use non-trivial amounts of CPU, as such tasks
+may be artificially constrained to some subset of CPUs, depending on
+the particulars of this flag setting in descendent cpusets.  Even if
+such a task could use spare CPU cycles in some other CPUs, the kernel
+scheduler might not consider the possibility of load balancing that
+task to that underused CPU.
+
+Of course, tasks pinned to a particular CPU can be left in a cpuset
+that disables "sched_load_balance" as those tasks aren't going anywhere
+else anyway.
+
+There is an impedance mismatch here, between cpusets and sched domains.
+Cpusets are hierarchical and nest.  Sched domains are flat; they don't
+overlap and each CPU is in at most one sched domain.
+
+It is necessary for sched domains to be flat because load balancing
+across partially overlapping sets of CPUs would risk unstable dynamics
+that would be beyond our understanding.  So if each of two partially
+overlapping cpusets enables the flag 'sched_load_balance', then we
+form a single sched domain that is a superset of both.  We won't move
+a task to a CPU outside it cpuset, but the scheduler load balancing
+code might waste some compute cycles considering that possibility.
+
+This mismatch is why there is not a simple one-to-one relation
+between which cpusets have the flag "sched_load_balance" enabled,
+and the sched domain configuration.  If a cpuset enables the flag, it
+will get balancing across all its CPUs, but if it disables the flag,
+it will only be assured of no load balancing if no other overlapping
+cpuset enables the flag.
+
+If two cpusets have partially overlapping 'cpus' allowed, and only
+one of them has this flag enabled, then the other may find its
+tasks only partially load balanced, just on the overlapping CPUs.
+This is just the general case of the top_cpuset example given a few
+paragraphs above.  In the general case, as in the top cpuset case,
+don't leave tasks that might use non-trivial amounts of CPU in
+such partially load balanced cpusets, as they may be artificially
+constrained to some subset of the CPUs allowed to them, for lack of
+load balancing to the other CPUs.
+
+1.7.1 sched_load_balance implementation details.
+------------------------------------------------
+
+The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary
+to most cpuset flags.)  When enabled for a cpuset, the kernel will
+ensure that it can load balance across all the CPUs in that cpuset
+(makes sure that all the CPUs in the cpus_allowed of that cpuset are
+in the same sched domain.)
+
+If two overlapping cpusets both have 'sched_load_balance' enabled,
+then they will be (must be) both in the same sched domain.
+
+If, as is the default, the top cpuset has 'sched_load_balance' enabled,
+then by the above that means there is a single sched domain covering
+the whole system, regardless of any other cpuset settings.
+
+The kernel commits to user space that it will avoid load balancing
+where it can.  It will pick as fine a granularity partition of sched
+domains as it can while still providing load balancing for any set
+of CPUs allowed to a cpuset having 'sched_load_balance' enabled.
+
+The internal kernel cpuset to scheduler interface passes from the
+cpuset code to the scheduler code a partition of the load balanced
+CPUs in the system. This partition is a set of subsets (represented
+as an array of cpumask_t) of CPUs, pairwise disjoint, that cover all
+the CPUs that must be load balanced.
+
+Whenever the 'sched_load_balance' flag changes, or CPUs come or go
+from a cpuset with this flag enabled, or a cpuset with this flag
+enabled is removed, the cpuset code builds a new such partition and
+passes it to the scheduler sched domain setup code, to have the sched
+domains rebuilt as necessary.
+
+This partition exactly defines what sched domains the scheduler should
+setup - one sched domain for each element (cpumask_t) in the partition.
+
+The scheduler remembers the currently active sched domain partitions.
+When the scheduler routine partition_sched_domains() is invoked from
+the cpuset code to update these sched domains, it compares the new
+partition requested with the current, and updates its sched domains,
+removing the old and adding the new, for each change.
 
 1.8 How do I use cpusets ?
 --------------------------
@@ -469,7 +587,7 @@ than stress the kernel.
 To start a new job that is to be contained within a cpuset, the steps are:
 
  1) mkdir /dev/cpuset
- 2) mount -t cpuset none /dev/cpuset
+ 2) mount -t cgroup -ocpuset cpuset /dev/cpuset
  3) Create the new cpuset by doing mkdir's and write's (or echo's) in
     the /dev/cpuset virtual file system.
  4) Start a task that will be the "founding father" of the new job.
@@ -481,7 +599,7 @@ For example, the following sequence of commands will setup a cpuset
 named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
 and then start a subshell 'sh' in that cpuset:
 
-  mount -t cpuset none /dev/cpuset
+  mount -t cgroup -ocpuset cpuset /dev/cpuset
   cd /dev/cpuset
   mkdir Charlie
   cd Charlie
@@ -513,7 +631,7 @@ Creating, modifying, using the cpusets can be done through the cpuset
 virtual filesystem.
 
 To mount it, type:
-# mount -t cpuset none /dev/cpuset
+# mount -t cgroup -o cpuset cpuset /dev/cpuset
 
 Then under /dev/cpuset you can find a tree that corresponds to the
 tree of the cpusets in the system. For instance, /dev/cpuset
@@ -556,6 +674,18 @@ To remove a cpuset, just use rmdir:
 This will fail if the cpuset is in use (has cpusets inside, or has
 processes attached).
 
+Note that for legacy reasons, the "cpuset" filesystem exists as a
+wrapper around the cgroup filesystem.
+
+The command
+
+mount -t cpuset X /dev/cpuset
+
+is equivalent to
+
+mount -t cgroup -ocpuset X /dev/cpuset
+echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
+
 2.2 Adding/removing cpus
 ------------------------
 
diff --git a/Documentation/device-mapper/dm-uevent.txt b/Documentation/device-mapper/dm-uevent.txt
new file mode 100644
index 00000000000..07edbd85c71
--- /dev/null
+++ b/Documentation/device-mapper/dm-uevent.txt
@@ -0,0 +1,97 @@
+The device-mapper uevent code adds the capability to device-mapper to create
+and send kobject uevents (uevents).  Previously device-mapper events were only
+available through the ioctl interface.  The advantage of the uevents interface
+is the event contains environment attributes providing increased context for
+the event avoiding the need to query the state of the device-mapper device after
+the event is received.
+
+There are two functions currently for device-mapper events.  The first function
+listed creates the event and the second function sends the event(s).
+
+void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+                    const char *path, unsigned nr_valid_paths)
+
+void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+
+
+The variables added to the uevent environment are:
+
+Variable Name: DM_TARGET
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description:
+Value: Name of device-mapper target that generated the event.
+
+Variable Name: DM_ACTION
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description:
+Value: Device-mapper specific action that caused the uevent action.
+	PATH_FAILED - A path has failed.
+	PATH_REINSTATED - A path has been reinstated.
+
+Variable Name: DM_SEQNUM
+Uevent Action(s): KOBJ_CHANGE
+Type: unsigned integer
+Description: A sequence number for this specific device-mapper device.
+Value: Valid unsigned integer range.
+
+Variable Name: DM_PATH
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: Major and minor number of the path device pertaining to this
+event.
+Value: Path name in the form of "Major:Minor"
+
+Variable Name: DM_NR_VALID_PATHS
+Uevent Action(s): KOBJ_CHANGE
+Type: unsigned integer
+Description:
+Value: Valid unsigned integer range.
+
+Variable Name: DM_NAME
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: Name of the device-mapper device.
+Value: Name
+
+Variable Name: DM_UUID
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: UUID of the device-mapper device.
+Value: UUID. (Empty string if there isn't one.)
+
+An example of the uevents generated as captured by udevmonitor is shown
+below.
+
+1.) Path failure.
+UEVENT[1192521009.711215] change@/block/dm-3
+ACTION=change
+DEVPATH=/block/dm-3
+SUBSYSTEM=block
+DM_TARGET=multipath
+DM_ACTION=PATH_FAILED
+DM_SEQNUM=1
+DM_PATH=8:32
+DM_NR_VALID_PATHS=0
+DM_NAME=mpath2
+DM_UUID=mpath-35333333000002328
+MINOR=3
+MAJOR=253
+SEQNUM=1130
+
+2.) Path reinstate.
+UEVENT[1192521132.989927] change@/block/dm-3
+ACTION=change
+DEVPATH=/block/dm-3
+SUBSYSTEM=block
+DM_TARGET=multipath
+DM_ACTION=PATH_REINSTATED
+DM_SEQNUM=2
+DM_PATH=8:32
+DM_NR_VALID_PATHS=1
+DM_NAME=mpath2
+DM_UUID=mpath-35333333000002328
+MINOR=3
+MAJOR=253
+SEQNUM=1131
diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt
index d9d523099bb..4d932dc6609 100644
--- a/Documentation/input/input-programming.txt
+++ b/Documentation/input/input-programming.txt
@@ -42,8 +42,8 @@ static int __init button_init(void)
 		goto err_free_irq;
 	}
 
-	button_dev->evbit[0] = BIT(EV_KEY);
-	button_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+	button_dev->evbit[0] = BIT_MASK(EV_KEY);
+	button_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
 
 	error = input_register_device(button_dev);
 	if (error) {
@@ -217,14 +217,15 @@ If you don't need absfuzz and absflat, you can set them to zero, which mean
 that the thing is precise and always returns to exactly the center position
 (if it has any).
 
-1.4 NBITS(), LONG(), BIT()
+1.4 BITS_TO_LONGS(), BIT_WORD(), BIT_MASK()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-These three macros from input.h help some bitfield computations:
+These three macros from bitops.h help some bitfield computations:
 
-	NBITS(x) - returns the length of a bitfield array in longs for x bits
-	LONG(x)  - returns the index in the array in longs for bit x
-	BIT(x)   - returns the index in a long for bit x
+	BITS_TO_LONGS(x) - returns the length of a bitfield array in longs for
+			   x bits
+	BIT_WORD(x)	 - returns the index in the array in longs for bit x
+	BIT_MASK(x)	 - returns the index in a long for bit x
 
 1.5 The id* and name fields
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index fe8b0c4892c..616043a6da9 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -77,7 +77,12 @@ applicable everywhere (see syntax).
   Optionally, dependencies only for this default value can be added with
   "if".
 
-- dependencies: "depends on"/"requires" <expr>
+- type definition + default value:
+	"def_bool"/"def_tristate" <expr> ["if" <expr>]
+  This is a shorthand notation for a type definition plus a value.
+  Optionally dependencies for this default value can be added with "if".
+
+- dependencies: "depends on" <expr>
   This defines a dependency for this menu entry. If multiple
   dependencies are defined, they are connected with '&&'. Dependencies
   are applied to all other options within this menu entry (which also
@@ -289,3 +294,10 @@ source:
 	"source" <prompt>
 
 This reads the specified configuration file. This file is always parsed.
+
+mainmenu:
+
+	"mainmenu" <prompt>
+
+This sets the config program's title bar if the config program chooses
+to use it.
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index f099b814d38..6166e2d7da7 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -518,6 +518,28 @@ more details, with real examples.
 	In this example for a specific GCC version the build will error out explaining
 	to the user why it stops.
 
+    cc-cross-prefix
+	cc-cross-prefix is used to check if there exist a $(CC) in path with
+	one of the listed prefixes. The first prefix where there exist a
+	prefix$(CC) in the PATH is returned - and if no prefix$(CC) is found
+	then nothing is returned.
+	Additional prefixes are separated by a single space in the
+	call of cc-cross-prefix.
+	This functionality is usefull for architecture Makefile that try
+	to set CROSS_COMPILE to well know values but may have several
+	values to select between.
+	It is recommended only to try to set CROSS_COMPILE is it is a cross
+	build (host arch is different from target arch). And is CROSS_COMPILE
+	is already set then leave it with the old value.
+
+	Example:
+		#arch/m68k/Makefile
+		ifneq ($(SUBARCH),$(ARCH))
+		        ifeq ($(CROSS_COMPILE),)
+		               CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu-)
+			endif
+		endif
+
 === 4 Host Program support
 
 Kbuild supports building executables on the host for use during the
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 1b37b28cc23..d0ac72cc19f 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -231,6 +231,32 @@ Dump-capture kernel config options (Arch Dependent, ia64)
   any space below the alignment point will be wasted.
 
 
+Extended crashkernel syntax
+===========================
+
+While the "crashkernel=size[@offset]" syntax is sufficient for most
+configurations, sometimes it's handy to have the reserved memory dependent
+on the value of System RAM -- that's mostly for distributors that pre-setup
+the kernel command line to avoid a unbootable system after some memory has
+been removed from the machine.
+
+The syntax is:
+
+    crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+    range=start-[end]
+
+For example:
+
+    crashkernel=512M-2G:64M,2G-:128M
+
+This would mean:
+
+    1) if the RAM is smaller than 512M, then don't reserve anything
+       (this is the "rescue" case)
+    2) if the RAM size is between 512M and 2G, then reserve 64M
+    3) if the RAM size is larger than 2G, then reserve 128M
+
+
 Boot into System Kernel
 =======================
 
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 189df0bcab9..7bf6bd2f530 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -431,8 +431,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			over the 8254 in addition to over the IO-APIC. The
 			kernel tries to set a sensible default.
 
-	hpet=		[X86-32,HPET] option to disable HPET and use PIT.
-			Format: disable
+	hpet=		[X86-32,HPET] option to control HPET usage
+			Format: { enable (default) | disable | force }
+			disable: disable HPET and use PIT instead
+			force: allow force enabled of undocumented chips (ICH4, VIA)
 
 	com20020=	[HW,NET] ARCnet - COM20020 chipset
 			Format:
@@ -497,6 +499,13 @@ and is between 256 and 4096 characters. It is defined in the file
 			[KNL] Reserve a chunk of physical memory to
 			hold a kernel to switch to with kexec on panic.
 
+	crashkernel=range1:size1[,range2:size2,...][@offset]
+			[KNL] Same as above, but depends on the memory
+			in the running system. The syntax of range is
+			start-[end] where start and end are both
+			a memory unit (amount[KMG]). See also
+			Documentation/kdump/kdump.txt for a example.
+
 	cs4232=		[HW,OSS]
 			Format: <io>,<irq>,<dma>,<dma2>,<mpuio>,<mpuirq>
 
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
new file mode 100644
index 00000000000..295a71bc301
--- /dev/null
+++ b/Documentation/markers.txt
@@ -0,0 +1,81 @@
+ 	             Using the Linux Kernel Markers
+
+			    Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Markers and their use. It provides
+examples of how to insert markers in the kernel and connect probe functions to
+them and provides some examples of probe functions.
+
+
+* Purpose of markers
+
+A marker placed in code provides a hook to call a function (probe) that you can
+provide at runtime. A marker can be "on" (a probe is connected to it) or "off"
+(no probe is attached). When a marker is "off" it has no effect, except for
+adding a tiny time penalty (checking a condition for a branch) and space
+penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section).  When a
+marker is "on", the function you provide is called each time the marker is
+executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the marker site).
+
+You can put markers at important locations in the code. Markers are
+lightweight hooks that can pass an arbitrary number of parameters,
+described in a printk-like format string, to the attached probe function.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+In order to use the macro trace_mark, you should include linux/marker.h.
+
+#include <linux/marker.h>
+
+And,
+
+trace_mark(subsystem_event, "%d %s", someint, somestring);
+Where :
+- subsystem_event is an identifier unique to your event
+    - subsystem is the name of your subsystem.
+    - event is the name of the event to mark.
+- "%d %s" is the formatted string for the serializer.
+- someint is an integer.
+- somestring is a char pointer.
+
+Connecting a function (probe) to a marker is done by providing a probe (function
+to call) for the specific marker through marker_probe_register() and can be
+activated by calling marker_arm(). Marker deactivation can be done by calling
+marker_disarm() as many times as marker_arm() has been called. Removing a probe
+is done through marker_probe_unregister(); it will disarm the probe and make
+sure there is no caller left using the probe when it returns. Probe removal is
+preempt-safe because preemption is disabled around the probe call. See the
+"Probe example" section below for a sample probe module.
+
+The marker mechanism supports inserting multiple instances of the same marker.
+Markers can be put in inline functions, inlined static functions, and
+unrolled loops as well as regular functions.
+
+The naming scheme "subsystem_event" is suggested here as a convention intended
+to limit collisions. Marker names are global to the kernel: they are considered
+as being the same whether they are in the core kernel image or in modules.
+Conflicting format strings for markers with the same name will cause the markers
+to be detected to have a different format string not to be armed and will output
+a printk warning which identifies the inconsistency:
+
+"Format mismatch for probe probe_name (format), marker (format)"
+
+
+* Probe / marker example
+
+See the example provided in samples/markers/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe marker-example (insmod order is not important)
+modprobe probe-example
+cat /proc/marker-example (returns an expected error)
+rmmod marker-example probe-example
+dmesg
diff --git a/Documentation/mips/00-INDEX b/Documentation/mips/00-INDEX
index 9df8a2eac7b..3f13bf8043d 100644
--- a/Documentation/mips/00-INDEX
+++ b/Documentation/mips/00-INDEX
@@ -4,5 +4,3 @@ AU1xxx_IDE.README
 	- README for MIPS AU1XXX IDE driver.
 GT64120.README
 	- README for dir with info on MIPS boards using GT-64120 or GT-64120A.
-time.README
-	- README for MIPS time services.
diff --git a/Documentation/mips/time.README b/Documentation/mips/time.README
deleted file mode 100644
index a4ce603ed3b..00000000000
--- a/Documentation/mips/time.README
+++ /dev/null
@@ -1,173 +0,0 @@
-README for MIPS time services
-
-Jun Sun
-jsun@mvista.com or jsun@junsun.net
-
-
-ABOUT
------
-This file describes the new arch/mips/kernel/time.c, related files and the 
-services they provide. 
-
-If you are short in patience and just want to know how to use time.c for a 
-new board or convert an existing board, go to the last section.
-
-
-FILES, COMPATABILITY AND CONFIGS
----------------------------------
-
-The old arch/mips/kernel/time.c is renamed to old-time.c.
-
-A new time.c is put there, together with include/asm-mips/time.h.
-
-Two configs variables are introduced, CONFIG_OLD_TIME_C and CONFIG_NEW_TIME_C.
-So we allow boards using 
-
-	1) old time.c (CONFIG_OLD_TIME_C)
-	2) new time.c (CONFIG_NEW_TIME_C)
-	3) neither (their own private time.c)
-
-However, it is expected every board will move to the new time.c in the near
-future.
-
-
-WHAT THE NEW CODE PROVIDES?
---------------------------- 
-
-The new time code provide the following services:
-
-  a) Implements functions required by Linux common code:
-	time_init
-
-  b) provides an abstraction of RTC and null RTC implementation as default.
-	extern unsigned long (*rtc_get_time)(void);
-	extern int (*rtc_set_time)(unsigned long);
-
-  c) high-level and low-level timer interrupt routines where the timer
-     interrupt source  may or may not be the CPU timer.  The high-level
-     routine is dispatched through do_IRQ() while the low-level is
-     dispatched in assemably code (usually int-handler.S)
-
-
-WHAT THE NEW CODE REQUIRES?
----------------------------
-
-For the new code to work properly, each board implementation needs to supply
-the following functions or values:
-
-  a) board_time_init - a function pointer.  Invoked at the beginnig of
-     time_init().  It is optional.
-	1. (optional) set up RTC routines
-	2. (optional) calibrate and set the mips_hpt_frequency
-
-  b) plat_timer_setup - a function pointer.  Invoked at the end of time_init()
-	1. (optional) over-ride any decisions made in time_init()
-	2. set up the irqaction for timer interrupt.
-	3. enable the timer interrupt
-
-  c) (optional) board-specific RTC routines.
-
-  d) (optional) mips_hpt_frequency - It must be definied if the board
-     is using CPU counter for timer interrupt.
-
-
-PORTING GUIDE
--------------
-
-Step 1: decide how you like to implement the time services.
-
-  a) does this board have a RTC?  If yes, implement the two RTC funcs.
-
-  b) does the CPU have counter/compare registers? 
-
-     If the answer is no, you need a timer to provide the timer interrupt
-     at 100 HZ speed.
-
-  c) The following sub steps assume your CPU has counter register.
-     Do you plan to use the CPU counter register as the timer interrupt
-     or use an exnternal timer?
-
-     In order to use CPU counter register as the timer interrupt source, you
-     must know the counter speed (mips_hpt_frequency).  It is usually the
-     same as the CPU speed or an integral divisor of it.
-
-  d) decide on whether you want to use high-level or low-level timer
-     interrupt routines.  The low-level one is presumably faster, but should
-     not make too mcuh difference.
-
-
-Step 2:  the machine setup() function
-
-  If you supply board_time_init(), set the function poointer.
-
-
-Step 3: implement rtc routines, board_time_init() and plat_timer_setup()
-  if needed.
-
-  board_time_init() -
-  	a) (optional) set up RTC routines,
-        b) (optional) calibrate and set the mips_hpt_frequency
- 	    (only needed if you intended to use cpu counter as timer interrupt
- 	     source)
-
-  plat_timer_setup() -
- 	a) (optional) over-write any choices made above by time_init().
- 	b) machine specific code should setup the timer irqaction.
- 	c) enable the timer interrupt
-
-
-  If the RTC chip is a common chip, I suggest the routines are put under
-  arch/mips/libs.  For example, for DS1386 chip, one would create
-  rtc-ds1386.c under arch/mips/lib directory.  Add the following line to
-  the arch/mips/lib/Makefile:
-
-	obj-$(CONFIG_DDB5476) += rtc-ds1386.o
-
-Step 4: if you are using low-level timer interrupt, change your interrupt
-  dispathcing code to check for timer interrupt and jump to 
-  ll_timer_interrupt() directly  if one is detected.
-
-Step 5: Modify arch/mips/config.in and add CONFIG_NEW_TIME_C to your machine.
-  Modify the appropriate defconfig if applicable.
-
-Final notes: 
-
-For some tricky cases, you may need to add your own wrapper functions 
-for some of the functions in time.c.  
-
-For example, you may define your own timer interrupt routine, which does
-some of its own processing and then calls timer_interrupt().
-
-You can also over-ride any of the built-in functions (RTC routines
-and/or timer interrupt routine).
-
-
-PORTING NOTES FOR SMP
-----------------------
-
-If you have a SMP box, things are slightly more complicated.
-
-The time service running every jiffy is logically divided into two parts:
-
-  1) the one for the whole system  (defined in timer_interrupt())
-  2) the one that should run for each CPU (defined in local_timer_interrupt())
-
-You need to decide on your timer interrupt sources.
-
-  case 1) - whole system has only one timer interrupt delivered to one CPU
-
-	In this case, you set up timer interrupt as in UP systems.  In addtion,
-	you need to set emulate_local_timer_interrupt to 1 so that other
-	CPUs get to call local_timer_interrupt().
-
-	THIS IS CURRENTLY NOT IMPLEMNETED.  However, it is rather easy to write
-	one should such a need arise.  You simply make a IPI call.
-
-  case 2) - each CPU has a separate timer interrupt
-
-	In this case, you need to set up IRQ such that each of them will
-	call local_timer_interrupt().  In addition, you need to arrange
-	one and only one of them to call timer_interrupt().
-
-	You can also do the low-level version of those interrupt routines,
-	following similar dispatching routes described above.
diff --git a/Documentation/thinkpad-acpi.txt b/Documentation/thinkpad-acpi.txt
index 60953d6c919..3b95bbacc77 100644
--- a/Documentation/thinkpad-acpi.txt
+++ b/Documentation/thinkpad-acpi.txt
@@ -105,10 +105,15 @@ The version of thinkpad-acpi's sysfs interface is exported by the driver
 as a driver attribute (see below).
 
 Sysfs driver attributes are on the driver's sysfs attribute space,
-for 2.6.20 this is /sys/bus/platform/drivers/thinkpad_acpi/.
+for 2.6.23 this is /sys/bus/platform/drivers/thinkpad_acpi/ and
+/sys/bus/platform/drivers/thinkpad_hwmon/
 
-Sysfs device attributes are on the driver's sysfs attribute space,
-for 2.6.20 this is /sys/devices/platform/thinkpad_acpi/.
+Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
+space, for 2.6.23 this is /sys/devices/platform/thinkpad_acpi/.
+
+Sysfs device attributes for the sensors and fan are on the
+thinkpad_hwmon device's sysfs attribute space, but you should locate it
+looking for a hwmon device with the name attribute of "thinkpad".
 
 Driver version
 --------------
@@ -766,7 +771,7 @@ Temperature sensors
 -------------------
 
 procfs: /proc/acpi/ibm/thermal
-sysfs device attributes: (hwmon) temp*_input
+sysfs device attributes: (hwmon "thinkpad") temp*_input
 
 Most ThinkPads include six or more separate temperature sensors but only
 expose the CPU temperature through the standard ACPI methods.  This
@@ -989,7 +994,9 @@ Fan control and monitoring: fan speed, fan enable/disable
 ---------------------------------------------------------
 
 procfs: /proc/acpi/ibm/fan
-sysfs device attributes: (hwmon) fan_input, pwm1, pwm1_enable
+sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1,
+			  pwm1_enable
+sysfs hwmon driver attributes: fan_watchdog
 
 NOTE NOTE NOTE: fan control operations are disabled by default for
 safety reasons.  To enable them, the module parameter "fan_control=1"
@@ -1131,7 +1138,7 @@ hwmon device attribute fan1_input:
 	which can take up to two minutes.  May return rubbish on older
 	ThinkPads.
 
-driver attribute fan_watchdog:
+hwmon driver attribute fan_watchdog:
 	Fan safety watchdog timer interval, in seconds.  Minimum is
 	1 second, maximum is 120 seconds.  0 disables the watchdog.
 
@@ -1233,3 +1240,9 @@ Sysfs interface changelog:
 		layer, the radio switch generates input event EV_RADIO,
 		and the driver enables hot key handling by default in
 		the firmware.
+
+0x020000:	ABI fix: added a separate hwmon platform device and
+		driver, which must be located by name (thinkpad)
+		and the hwmon class for libsensors4 (lm-sensors 3)
+		compatibility.  Moved all hwmon attributes to this
+		new platform device.
diff --git a/MAINTAINERS b/MAINTAINERS
index 2534dc4aa95..4ed41394e49 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2178,7 +2178,7 @@ S:	Maintained
 KCONFIG
 P:	Roman Zippel
 M:	zippel@linux-m68k.org
-L:	kbuild-devel@lists.sourceforge.net
+L:	linux-kbuild@vger.kernel.org
 S:	Maintained
 
 KDUMP
@@ -2207,6 +2207,7 @@ KERNEL BUILD (kbuild: Makefile, scripts/Makefile.*)
 P:	Sam Ravnborg
 M:	sam@ravnborg.org
 T:	git kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
+L:	linux-kbuild@vger.kernel.org
 S:	Maintained
 
 KERNEL JANITORS
diff --git a/Makefile b/Makefile
index 529b9048d97..f9c264e243a 100644
--- a/Makefile
+++ b/Makefile
@@ -774,6 +774,9 @@ vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) vmlinux.o
 ifdef CONFIG_HEADERS_CHECK
 	$(Q)$(MAKE) -f $(srctree)/Makefile headers_check
 endif
+ifdef CONFIG_SAMPLES
+	$(Q)$(MAKE) $(build)=samples
+endif
 	$(call vmlinux-modpost)
 	$(call if_changed_rule,vmlinux__)
 	$(Q)rm -f .old_version
@@ -884,10 +887,7 @@ prepare2: prepare3 outputmakefile
 
 prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \
                    include/asm include/config/auto.conf
-ifneq ($(KBUILD_MODULES),)
-	$(Q)mkdir -p $(MODVERDIR)
-	$(Q)rm -f $(MODVERDIR)/*
-endif
+	$(cmd_crmodverdir)
 
 archprepare: prepare1 scripts_basic
 
@@ -903,14 +903,24 @@ prepare: prepare0
 
 export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
 
-# FIXME: The asm symlink changes when $(ARCH) changes. That's
-# hard to detect, but I suppose "make mrproper" is a good idea
-# before switching between archs anyway.
-
-include/asm:
-	@echo '  SYMLINK $@ -> include/asm-$(SRCARCH)'
-	$(Q)if [ ! -d include ]; then mkdir -p include; fi;
-	@ln -fsn asm-$(SRCARCH) $@
+# The asm symlink changes when $(ARCH) changes.
+# Detect this and ask user to run make mrproper
+
+include/asm: FORCE
+	$(Q)set -e; asmlink=`readlink include/asm | cut -d '-' -f 2`;   \
+	if [ -L include/asm ]; then                                     \
+		if [ "$$asmlink" != "$(SRCARCH)" ]; then                \
+			echo "ERROR: the symlink $@ points to asm-$$asmlink but asm-$(SRCARCH) was expected"; \
+			echo "       set ARCH or save .config and run 'make mrproper' to fix it";             \
+			exit 1;                                         \
+		fi;                                                     \
+	else                                                            \
+		echo '  SYMLINK $@ -> include/asm-$(SRCARCH)';          \
+		if [ ! -d include ]; then                               \
+			mkdir -p include;                               \
+		fi;                                                     \
+		ln -fsn asm-$(SRCARCH) $@;                              \
+	fi
 
 # Generate some files
 # ---------------------------------------------------------------------------
@@ -1020,19 +1030,12 @@ _modinst_:
 	fi
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
 
-# If System.map exists, run depmod.  This deliberately does not have a
-# dependency on System.map since that would run the dependency tree on
-# vmlinux.  This depmod is only for convenience to give the initial
+# This depmod is only for convenience to give the initial
 # boot a modules.dep even before / is mounted read-write.  However the
 # boot script depmod is the master version.
-ifeq "$(strip $(INSTALL_MOD_PATH))" ""
-depmod_opts	:=
-else
-depmod_opts	:= -b $(INSTALL_MOD_PATH) -r
-endif
 PHONY += _modinst_post
 _modinst_post: _modinst_
-	if [ -r System.map -a -x $(DEPMOD) ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi
+	$(call cmd,depmod)
 
 else # CONFIG_MODULES
 
@@ -1220,8 +1223,7 @@ else # KBUILD_EXTMOD
 KBUILD_MODULES := 1
 PHONY += crmodverdir
 crmodverdir:
-	$(Q)mkdir -p $(MODVERDIR)
-	$(Q)rm -f $(MODVERDIR)/*
+	$(cmd_crmodverdir)
 
 PHONY += $(objtree)/Module.symvers
 $(objtree)/Module.symvers:
@@ -1249,15 +1251,6 @@ _emodinst_:
 	$(Q)mkdir -p $(MODLIB)/$(install-dir)
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
 
-# Run depmod only is we have System.map and depmod is executable
-quiet_cmd_depmod = DEPMOD  $(KERNELRELEASE)
-      cmd_depmod = if [ -r System.map -a -x $(DEPMOD) ]; then \
-                      $(DEPMOD) -ae -F System.map             \
-                      $(if $(strip $(INSTALL_MOD_PATH)),      \
-		      -b $(INSTALL_MOD_PATH) -r)              \
-		      $(KERNELRELEASE);                       \
-                   fi
-
 PHONY += _emodinst_post
 _emodinst_post: _emodinst_
 	$(call cmd,depmod)
@@ -1341,7 +1334,7 @@ define find-sources
 	  find $(__srctree)include/asm-generic $(RCS_FIND_IGNORE) \
 	       -name $1 -print; \
 	  find $(__srctree) $(RCS_FIND_IGNORE) \
-	       \( -name include -o -name arch \) -prune -o \
+	       \( -name include -o -name arch -o -name '.tmp_*' \) -prune -o \
 	       -name $1 -print; \
 	  )
 endef
@@ -1490,9 +1483,11 @@ endif
 
 # Modules
 / %/: prepare scripts FORCE
+	$(cmd_crmodverdir)
 	$(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \
 	$(build)=$(build-dir)
 %.ko: prepare scripts FORCE
+	$(cmd_crmodverdir)
 	$(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1)   \
 	$(build)=$(build-dir) $(@:.ko=.o)
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
@@ -1506,6 +1501,19 @@ quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN   $(wildcard $(rm-dirs)))
 quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN   $(wildcard $(rm-files)))
       cmd_rmfiles = rm -f $(rm-files)
 
+# Run depmod only is we have System.map and depmod is executable
+# and we build for the host arch
+quiet_cmd_depmod = DEPMOD  $(KERNELRELEASE)
+      cmd_depmod = \
+	if [ -r System.map -a -x $(DEPMOD) -a "$(SUBARCH)" == "$(ARCH)" ]; then \
+		$(DEPMOD) -ae -F System.map                                     \
+		$(if $(strip $(INSTALL_MOD_PATH)), -b $(INSTALL_MOD_PATH) -r)   \
+		$(KERNELRELEASE);                                               \
+	fi
+
+# Create temporary dir for module support files
+cmd_crmodverdir = $(Q)mkdir -p $(MODVERDIR); rm -f $(MODVERDIR)/*
+
 
 a_flags = -Wp,-MD,$(depfile) $(KBUILD_AFLAGS) $(AFLAGS_KERNEL) \
 	  $(NOSTDINC_FLAGS) $(KBUILD_CPPFLAGS) \
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 2a85dc33907..4c002ba37e5 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -654,7 +654,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/alpha/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/alpha/Kconfig.debug"
 
diff --git a/arch/alpha/kernel/semaphore.c b/arch/alpha/kernel/semaphore.c
index 8c8aaa205ea..8d2982aa1b8 100644
--- a/arch/alpha/kernel/semaphore.c
+++ b/arch/alpha/kernel/semaphore.c
@@ -69,7 +69,7 @@ __down_failed(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, tsk->pid, sem);
+	       tsk->comm, task_pid_nr(tsk), sem);
 #endif
 
 	tsk->state = TASK_UNINTERRUPTIBLE;
@@ -98,7 +98,7 @@ __down_failed(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down acquired(%p)\n",
-	       tsk->comm, tsk->pid, sem);
+	       tsk->comm, task_pid_nr(tsk), sem);
 #endif
 }
 
@@ -111,7 +111,7 @@ __down_failed_interruptible(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, tsk->pid, sem);
+	       tsk->comm, task_pid_nr(tsk), sem);
 #endif
 
 	tsk->state = TASK_INTERRUPTIBLE;
@@ -139,7 +139,7 @@ __down_failed_interruptible(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down %s(%p)\n",
-	       current->comm, current->pid,
+	       current->comm, task_pid_nr(current),
 	       (ret < 0 ? "interrupted" : "acquired"), sem);
 #endif
 	return ret;
@@ -168,7 +168,7 @@ down(struct semaphore *sem)
 #endif
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, current->pid, sem,
+	       current->comm, task_pid_nr(current), sem,
 	       atomic_read(&sem->count), __builtin_return_address(0));
 #endif
 	__down(sem);
@@ -182,7 +182,7 @@ down_interruptible(struct semaphore *sem)
 #endif
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, current->pid, sem,
+	       current->comm, task_pid_nr(current), sem,
 	       atomic_read(&sem->count), __builtin_return_address(0));
 #endif
 	return __down_interruptible(sem);
@@ -201,7 +201,7 @@ down_trylock(struct semaphore *sem)
 
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): down_trylock %s from %p\n",
-	       current->comm, current->pid,
+	       current->comm, task_pid_nr(current),
 	       ret ? "failed" : "acquired",
 	       __builtin_return_address(0));
 #endif
@@ -217,7 +217,7 @@ up(struct semaphore *sem)
 #endif
 #ifdef CONFIG_DEBUG_SEMAPHORE
 	printk("%s(%d): up(%p) <count=%d> from %p\n",
-	       current->comm, current->pid, sem,
+	       current->comm, task_pid_nr(current), sem,
 	       atomic_read(&sem->count), __builtin_return_address(0));
 #endif
 	__up(sem);
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index ec0f05e0d8f..2dc7f9fed21 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -182,7 +182,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15)
 #ifdef CONFIG_SMP
 	printk("CPU %d ", hard_smp_processor_id());
 #endif
-	printk("%s(%d): %s %ld\n", current->comm, current->pid, str, err);
+	printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err);
 	dik_show_regs(regs, r9_15);
 	add_taint(TAINT_DIE);
 	dik_show_trace((unsigned long *)(regs+1));
@@ -646,7 +646,7 @@ got_exception:
 	lock_kernel();
 
 	printk("%s(%d): unhandled unaligned exception\n",
-	       current->comm, current->pid);
+	       current->comm, task_pid_nr(current));
 
 	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx\n",
 	       pc, una_reg(26), regs->ps);
@@ -786,7 +786,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
 		}
 		if (++cnt < 5) {
 			printk("%s(%d): unaligned trap at %016lx: %p %lx %ld\n",
-			       current->comm, current->pid,
+			       current->comm, task_pid_nr(current),
 			       regs->pc - 4, va, opcode, reg);
 		}
 		last_time = jiffies;
diff --git a/arch/alpha/lib/fls.c b/arch/alpha/lib/fls.c
index 7ad84ea0acf..32afaa3fa68 100644
--- a/arch/alpha/lib/fls.c
+++ b/arch/alpha/lib/fls.c
@@ -3,7 +3,7 @@
  */
 
 #include <linux/module.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 /* This is fls(x)-1, except zero is held to zero.  This allows most
    efficent input into extbl, plus it allows easy handling of fls(0)=0.  */
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 25154df3055..4829f96585b 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -188,13 +188,13 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	/* We ran out of memory, or some other thing happened to us that
 	   made us unable to handle the page fault gracefully.  */
  out_of_memory:
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
 	}
 	printk(KERN_ALERT "VM: killing process %s(%d)\n",
-	       current->comm, current->pid);
+	       current->comm, task_pid_nr(current));
 	if (!user_mode(regs))
 		goto no_context;
 	do_group_exit(SIGKILL);
diff --git a/arch/alpha/oprofile/Kconfig b/arch/alpha/oprofile/Kconfig
deleted file mode 100644
index 5ade19801b9..00000000000
--- a/arch/alpha/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0a0c88d0039..4cee938df01 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1068,7 +1068,7 @@ endmenu
 
 source "fs/Kconfig"
 
-source "arch/arm/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/arm/Kconfig.debug"
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 93b7f8e22dc..4f1a03124a7 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -265,7 +265,7 @@ void __show_regs(struct pt_regs *regs)
 void show_regs(struct pt_regs * regs)
 {
 	printk("\n");
-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
+	printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm);
 	__show_regs(regs);
 	__backtrace();
 }
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 5feee722ea9..4b05dc5c102 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -382,16 +382,16 @@ static void clear_breakpoint(struct task_struct *task, struct debug_entry *bp)
 
 		if (ret != 2 || old_insn.thumb != BREAKINST_THUMB)
 			printk(KERN_ERR "%s:%d: corrupted Thumb breakpoint at "
-				"0x%08lx (0x%04x)\n", task->comm, task->pid,
-				addr, old_insn.thumb);
+				"0x%08lx (0x%04x)\n", task->comm,
+				task_pid_nr(task), addr, old_insn.thumb);
 	} else {
 		ret = swap_insn(task, addr & ~3, &old_insn.arm,
 				&bp->insn.arm, 4);
 
 		if (ret != 4 || old_insn.arm != BREAKINST_ARM)
 			printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at "
-				"0x%08lx (0x%08x)\n", task->comm, task->pid,
-				addr, old_insn.arm);
+				"0x%08lx (0x%08x)\n", task->comm,
+				task_pid_nr(task), addr, old_insn.arm);
 	}
 }
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 8ad47619c07..4764bd9ccee 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -223,7 +223,7 @@ static void __die(const char *str, int err, struct thread_info *thread, struct p
 	print_modules();
 	__show_regs(regs);
 	printk("Process %s (pid: %d, stack limit = 0x%p)\n",
-		tsk->comm, tsk->pid, thread + 1);
+		tsk->comm, task_pid_nr(tsk), thread + 1);
 
 	if (!user_mode(regs) || in_interrupt()) {
 		dump_mem("Stack: ", regs->ARM_sp,
@@ -337,7 +337,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_UNDEFINED) {
 		printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
-			current->comm, current->pid, pc);
+			current->comm, task_pid_nr(current), pc);
 		dump_instr(regs);
 	}
 #endif
@@ -388,7 +388,7 @@ static int bad_syscall(int n, struct pt_regs *regs)
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_SYSCALL) {
 		printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n",
-			current->pid, current->comm, n);
+			task_pid_nr(current), current->comm, n);
 		dump_instr(regs);
 	}
 #endif
@@ -565,7 +565,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 	 */
 	if (user_debug & UDBG_SYSCALL) {
 		printk("[%d] %s: arm syscall %d\n",
-		       current->pid, current->comm, no);
+		       task_pid_nr(current), current->comm, no);
 		dump_instr(regs);
 		if (user_mode(regs)) {
 			__show_regs(regs);
@@ -642,7 +642,7 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_BADABORT) {
 		printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n",
-			current->pid, current->comm, code, instr);
+			task_pid_nr(current), current->comm, code, instr);
 		dump_instr(regs);
 		show_pte(current->mm, addr);
 	}
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 074b7cb0774..e162cca5917 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -757,7 +757,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	if (ai_usermode & 1)
 		printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx "
 		       "Address=0x%08lx FSR 0x%03x\n", current->comm,
-			current->pid, instrptr,
+			task_pid_nr(current), instrptr,
 		        thumb_mode(regs) ? 4 : 8,
 		        thumb_mode(regs) ? tinstr : instr,
 		        addr, fsr);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 59ed1d05b71..a8a7dab757e 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -197,7 +197,7 @@ survive:
 	return fault;
 
 out_of_memory:
-	if (!is_init(tsk))
+	if (!is_global_init(tsk))
 		goto out;
 
 	/*
diff --git a/arch/arm/oprofile/Kconfig b/arch/arm/oprofile/Kconfig
deleted file mode 100644
index afd93ad02fe..00000000000
--- a/arch/arm/oprofile/Kconfig
+++ /dev/null
@@ -1,42 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-if OPROFILE
-
-config OPROFILE_ARMV6
-	bool
-	depends on CPU_V6 && !SMP
-	default y
-	select OPROFILE_ARM11_CORE
-
-config OPROFILE_MPCORE
-	bool
-	depends on CPU_V6 && SMP
-	default y
-	select OPROFILE_ARM11_CORE
-
-config OPROFILE_ARM11_CORE
-	bool
-
-endif
-
-endmenu
-
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 9a73ce7eb50..8a7caf8e7b4 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -89,7 +89,7 @@ void _exception(long signr, struct pt_regs *regs, int code,
 	 * generate the same exception over and over again and we get
 	 * nowhere.  Better to kill it and let the kernel panic.
 	 */
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		__sighandler_t handler;
 
 		spin_lock_irq(&current->sighand->siglock);
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index 11472f8701b..6560cb18b4e 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -160,7 +160,7 @@ bad_area:
 		if (exception_trace && printk_ratelimit())
 			printk("%s%s[%d]: segfault at %08lx pc %08lx "
 			       "sp %08lx ecr %lu\n",
-			       is_init(tsk) ? KERN_EMERG : KERN_INFO,
+			       is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
 			       tsk->comm, tsk->pid, address, regs->pc,
 			       regs->sp, ecr);
 		_exception(SIGSEGV, regs, code, address);
@@ -209,7 +209,7 @@ no_context:
 	 */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
@@ -231,7 +231,7 @@ do_sigbus:
 	if (exception_trace)
 		printk("%s%s[%d]: bus error at %08lx pc %08lx "
 		       "sp %08lx ecr %lu\n",
-		       is_init(tsk) ? KERN_EMERG : KERN_INFO,
+		       is_global_init(tsk) ? KERN_EMERG : KERN_INFO,
 		       tsk->comm, tsk->pid, address, regs->pc,
 		       regs->sp, ecr);
 
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index aa9db307331..4c5ca9d5e40 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -1012,7 +1012,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/blackfin/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 menu "Kernel hacking"
 
diff --git a/arch/blackfin/oprofile/Kconfig b/arch/blackfin/oprofile/Kconfig
deleted file mode 100644
index 0a2fd999c94..00000000000
--- a/arch/blackfin/oprofile/Kconfig
+++ /dev/null
@@ -1,29 +0,0 @@
-menu "Profiling support"
-depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-config HARDWARE_PM
-	tristate "Hardware Performance Monitor Profiling"
-	depends on PROFILING
-	help
-	  take use of hardware performance monitor to profiling the kernel
-	  and application.
-
-	  If unsure, say N.
-
-endmenu
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 6b4d026a00a..21900a9378b 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -196,6 +196,8 @@ source "sound/Kconfig"
 
 source "drivers/usb/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/cris/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 74eef7111f2..43153e767bb 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -375,6 +375,8 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/frv/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/frv/kernel/irq-mb93091.c b/arch/frv/kernel/irq-mb93091.c
index ad753c1e9b8..9e38f99bbab 100644
--- a/arch/frv/kernel/irq-mb93091.c
+++ b/arch/frv/kernel/irq-mb93091.c
@@ -17,10 +17,10 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
diff --git a/arch/frv/kernel/irq-mb93093.c b/arch/frv/kernel/irq-mb93093.c
index e0983f6926e..3c2752ca977 100644
--- a/arch/frv/kernel/irq-mb93093.c
+++ b/arch/frv/kernel/irq-mb93093.c
@@ -17,10 +17,10 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
diff --git a/arch/frv/kernel/irq-mb93493.c b/arch/frv/kernel/irq-mb93493.c
index c157eeff871..7754c7338e4 100644
--- a/arch/frv/kernel/irq-mb93493.c
+++ b/arch/frv/kernel/irq-mb93493.c
@@ -17,10 +17,10 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/delay.h>
 #include <asm/irq.h>
 #include <asm/irc-regs.h>
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index c7e59dcadee..7ddb69089ed 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -24,12 +24,12 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/module.h>
+#include <linux/bitops.h>
 
 #include <asm/atomic.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/delay.h>
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index e35f74e6e50..e2e9f57abe2 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -223,6 +223,8 @@ endmenu
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/h8300/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index b84d5050e92..d1bedbf9deb 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1080,7 +1080,9 @@ config APM_REAL_MODE_POWER_OFF
 
 endif # APM
 
-source "arch/x86/kernel/cpu/cpufreq/Kconfig"
+source "arch/x86/kernel/cpu/cpufreq/Kconfig_32"
+
+source "drivers/cpuidle/Kconfig"
 
 endmenu
 
@@ -1256,31 +1258,6 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-menuconfig INSTRUMENTATION
-	bool "Instrumentation Support"
-	default y
-	---help---
-	  Say Y here to get to see options related to performance measurement,
-	  debugging, and testing. This option alone does not add any kernel code.
-
-	  If you say N, all options in this submenu will be skipped and disabled.
-
-if INSTRUMENTATION
-
-source "arch/x86/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes"
-	depends on KALLSYMS && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-
-endif # INSTRUMENTATION
-
 source "arch/i386/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/i386/Makefile b/arch/i386/Makefile
index f036d2dee3d..b88e47ca303 100644
--- a/arch/i386/Makefile
+++ b/arch/i386/Makefile
@@ -102,7 +102,7 @@ core-$(CONFIG_XEN)		+= arch/x86/xen/
 # default subarch .h files
 mflags-y += -Iinclude/asm-x86/mach-default
 
-head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task_32.o
+head-y := arch/x86/kernel/head_32.o arch/x86/kernel/init_task.o
 
 libs-y 					+= arch/x86/lib/
 core-y					+= arch/x86/kernel/ \
@@ -131,9 +131,9 @@ all: bzImage
 zImage zlilo zdisk: KBUILD_IMAGE := arch/x86/boot/zImage
 
 zImage bzImage: vmlinux
-	$(Q)mkdir -p $(objtree)/arch/i386/boot
-	$(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage
 	$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
+	$(Q)mkdir -p $(objtree)/arch/i386/boot
+	$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/i386/boot/bzImage
 
 compressed: zImage
 
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index c60532d93c5..c89108e9770 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -592,20 +592,7 @@ config IRQ_PER_CPU
 
 source "arch/ia64/hp/sim/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/ia64/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes"
-	depends on KALLSYMS && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/ia64/Kconfig.debug"
 
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
index 449d3e75bfc..75fd90dc76a 100644
--- a/arch/ia64/configs/sn2_defconfig
+++ b/arch/ia64/configs/sn2_defconfig
@@ -26,6 +26,7 @@ CONFIG_TASK_IO_ACCOUNTING=y
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_LOG_BUF_SHIFT=20
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index a3405b3c1ee..d025a22eb22 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -773,7 +773,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro
 			if (flags & MAP_SHARED)
 				printk(KERN_INFO
 				       "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
-				       current->comm, current->pid, start);
+				       current->comm, task_pid_nr(current), start);
 			ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
 					   off);
 			if (IS_ERR((void *) ret))
@@ -786,7 +786,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro
 			if (flags & MAP_SHARED)
 				printk(KERN_INFO
 				       "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
-				       current->comm, current->pid, end);
+				       current->comm, task_pid_nr(current), end);
 			ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
 					   (off + len) - offset_in_page(end));
 			if (IS_ERR((void *) ret))
@@ -816,7 +816,7 @@ emulate_mmap (struct file *file, unsigned long start, unsigned long len, int pro
 
 	if ((flags & MAP_SHARED) && !is_congruent)
 		printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
-		       "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
+		       "(addr=0x%lx,off=0x%llx)\n", current->comm, task_pid_nr(current), start, off);
 
 	DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
 	    is_congruent ? "congruent" : "not congruent", poff);
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 73ca86d0381..8e4894b205e 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -967,7 +967,7 @@ find_memmap_space (void)
  * to use.  We can allocate partial granules only if the unavailable
  * parts exist, and are WB.
  */
-void
+unsigned long
 efi_memmap_init(unsigned long *s, unsigned long *e)
 {
 	struct kern_memdesc *k, *prev = NULL;
@@ -1084,6 +1084,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
 	/* reserve the memory we are using for kern_memmap */
 	*s = (u64)kern_memmap;
 	*e = (u64)++k;
+
+	return total_mem;
 }
 
 void
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f55fa07849c..59169bf7145 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -158,14 +158,14 @@
  */
 #define PROTECT_CTX(c, f) \
 	do {  \
-		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
+		DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \
 		spin_lock_irqsave(&(c)->ctx_lock, f); \
-		DPRINT(("spinlocked ctx %p  by [%d]\n", c, current->pid)); \
+		DPRINT(("spinlocked ctx %p  by [%d]\n", c, task_pid_nr(current))); \
 	} while(0)
 
 #define UNPROTECT_CTX(c, f) \
 	do { \
-		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
+		DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \
 		spin_unlock_irqrestore(&(c)->ctx_lock, f); \
 	} while(0)
 
@@ -227,12 +227,12 @@
 #ifdef PFM_DEBUGGING
 #define DPRINT(a) \
 	do { \
-		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+		if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
 	} while (0)
 
 #define DPRINT_ovfl(a) \
 	do { \
-		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
+		if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \
 	} while (0)
 #endif
 
@@ -913,7 +913,7 @@ pfm_mask_monitoring(struct task_struct *task)
 	unsigned long mask, val, ovfl_mask;
 	int i;
 
-	DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
+	DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task)));
 
 	ovfl_mask = pmu_conf->ovfl_val;
 	/*
@@ -992,12 +992,12 @@ pfm_restore_monitoring(struct task_struct *task)
 	ovfl_mask = pmu_conf->ovfl_val;
 
 	if (task != current) {
-		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
+		printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current));
 		return;
 	}
 	if (ctx->ctx_state != PFM_CTX_MASKED) {
 		printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
-			task->pid, current->pid, ctx->ctx_state);
+			task_pid_nr(task), task_pid_nr(current), ctx->ctx_state);
 		return;
 	}
 	psr = pfm_get_psr();
@@ -1051,7 +1051,8 @@ pfm_restore_monitoring(struct task_struct *task)
 		if ((mask & 0x1) == 0UL) continue;
 		ctx->th_pmcs[i] = ctx->ctx_pmcs[i];
 		ia64_set_pmc(i, ctx->th_pmcs[i]);
-		DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, ctx->th_pmcs[i]));
+		DPRINT(("[%d] pmc[%d]=0x%lx\n",
+					task_pid_nr(task), i, ctx->th_pmcs[i]));
 	}
 	ia64_srlz_d();
 
@@ -1370,7 +1371,7 @@ pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
 
 error_conflict:
 	DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
-  		pfm_sessions.pfs_sys_session[cpu]->pid,
+  		task_pid_nr(pfm_sessions.pfs_sys_session[cpu]),
 		cpu));
 abort:
 	UNLOCK_PFS(flags);
@@ -1442,7 +1443,7 @@ pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long siz
 
 	/* sanity checks */
 	if (task->mm == NULL || size == 0UL || vaddr == NULL) {
-		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
+		printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm);
 		return -EINVAL;
 	}
 
@@ -1459,7 +1460,7 @@ pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long siz
 
 	up_write(&task->mm->mmap_sem);
 	if (r !=0) {
-		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
+		printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size);
 	}
 
 	DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
@@ -1501,7 +1502,7 @@ pfm_free_smpl_buffer(pfm_context_t *ctx)
 	return 0;
 
 invalid_free:
-	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
+	printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current));
 	return -EINVAL;
 }
 #endif
@@ -1547,13 +1548,13 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 	unsigned long flags;
   	DECLARE_WAITQUEUE(wait, current);
 	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
 		return -EINVAL;
 	}
 
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current));
 		return -EINVAL;
 	}
 
@@ -1607,7 +1608,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 
 		PROTECT_CTX(ctx, flags);
 	}
-	DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
+	DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret));
   	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
 
@@ -1616,7 +1617,7 @@ pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
 	ret = -EINVAL;
 	msg = pfm_get_next_msg(ctx);
 	if (msg == NULL) {
-		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
+		printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current));
 		goto abort_locked;
 	}
 
@@ -1647,13 +1648,13 @@ pfm_poll(struct file *filp, poll_table * wait)
 	unsigned int mask = 0;
 
 	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current));
 		return 0;
 	}
 
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current));
 		return 0;
 	}
 
@@ -1692,7 +1693,7 @@ pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
 	ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
 
 	DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
-		current->pid,
+		task_pid_nr(current),
 		fd,
 		on,
 		ctx->ctx_async_queue, ret));
@@ -1707,13 +1708,13 @@ pfm_fasync(int fd, struct file *filp, int on)
 	int ret;
 
 	if (PFM_IS_FILE(filp) == 0) {
-		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 	/*
@@ -1759,7 +1760,7 @@ pfm_syswide_force_stop(void *info)
 	if (owner != ctx->ctx_task) {
 		printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
 			smp_processor_id(),
-			owner->pid, ctx->ctx_task->pid);
+			task_pid_nr(owner), task_pid_nr(ctx->ctx_task));
 		return;
 	}
 	if (GET_PMU_CTX() != ctx) {
@@ -1769,7 +1770,7 @@ pfm_syswide_force_stop(void *info)
 		return;
 	}
 
-	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));	
+	DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task)));
 	/*
 	 * the context is already protected in pfm_close(), we simply
 	 * need to mask interrupts to avoid a PMU interrupt race on
@@ -1821,7 +1822,7 @@ pfm_flush(struct file *filp, fl_owner_t id)
 
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 
@@ -1969,7 +1970,7 @@ pfm_close(struct inode *inode, struct file *filp)
 	
 	ctx = (pfm_context_t *)filp->private_data;
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
+		printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current));
 		return -EBADF;
 	}
 
@@ -2066,7 +2067,7 @@ pfm_close(struct inode *inode, struct file *filp)
 	 	 */
 		ctx->ctx_state = PFM_CTX_ZOMBIE;
 
-		DPRINT(("zombie ctx for [%d]\n", task->pid));
+		DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task)));
 		/*
 		 * cannot free the context on the spot. deferred until
 		 * the task notices the ZOMBIE state
@@ -2472,7 +2473,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t
 	/* invoke and lock buffer format, if found */
 	fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
 	if (fmt == NULL) {
-		DPRINT(("[%d] cannot find buffer format\n", task->pid));
+		DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task)));
 		return -EINVAL;
 	}
 
@@ -2483,7 +2484,7 @@ pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t
 
 	ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
 
-	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
+	DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret));
 
 	if (ret) goto error;
 
@@ -2605,23 +2606,23 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
 	 * no kernel task or task not owner by caller
 	 */
 	if (task->mm == NULL) {
-		DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
+		DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task)));
 		return -EPERM;
 	}
 	if (pfm_bad_permissions(task)) {
-		DPRINT(("no permission to attach to  [%d]\n", task->pid));
+		DPRINT(("no permission to attach to  [%d]\n", task_pid_nr(task)));
 		return -EPERM;
 	}
 	/*
 	 * cannot block in self-monitoring mode
 	 */
 	if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
-		DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
+		DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task)));
 		return -EINVAL;
 	}
 
 	if (task->exit_state == EXIT_ZOMBIE) {
-		DPRINT(("cannot attach to  zombie task [%d]\n", task->pid));
+		DPRINT(("cannot attach to  zombie task [%d]\n", task_pid_nr(task)));
 		return -EBUSY;
 	}
 
@@ -2631,7 +2632,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
 	if (task == current) return 0;
 
 	if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
+		DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state));
 		return -EBUSY;
 	}
 	/*
@@ -3512,7 +3513,7 @@ pfm_use_debug_registers(struct task_struct *task)
 
 	if (pmu_conf->use_rr_dbregs == 0) return 0;
 
-	DPRINT(("called for [%d]\n", task->pid));
+	DPRINT(("called for [%d]\n", task_pid_nr(task)));
 
 	/*
 	 * do it only once
@@ -3543,7 +3544,7 @@ pfm_use_debug_registers(struct task_struct *task)
 	DPRINT(("ptrace_use_dbregs=%u  sys_use_dbregs=%u by [%d] ret = %d\n",
 		  pfm_sessions.pfs_ptrace_use_dbregs,
 		  pfm_sessions.pfs_sys_use_dbregs,
-		  task->pid, ret));
+		  task_pid_nr(task), ret));
 
 	UNLOCK_PFS(flags);
 
@@ -3568,7 +3569,7 @@ pfm_release_debug_registers(struct task_struct *task)
 
 	LOCK_PFS(flags);
 	if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
-		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
+		printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task));
 		ret = -1;
 	}  else {
 		pfm_sessions.pfs_ptrace_use_dbregs--;
@@ -3620,7 +3621,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
 	/* sanity check */
 	if (unlikely(task == NULL)) {
-		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
+		printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current));
 		return -EINVAL;
 	}
 
@@ -3629,7 +3630,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		fmt = ctx->ctx_buf_fmt;
 
 		DPRINT(("restarting self %d ovfl=0x%lx\n",
-			task->pid,
+			task_pid_nr(task),
 			ctx->ctx_ovfl_regs[0]));
 
 		if (CTX_HAS_SMPL(ctx)) {
@@ -3653,11 +3654,11 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 				pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
 
 			if (rst_ctrl.bits.mask_monitoring == 0) {
-				DPRINT(("resuming monitoring for [%d]\n", task->pid));
+				DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task)));
 
 				if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
 			} else {
-				DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
+				DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task)));
 
 				// cannot use pfm_stop_monitoring(task, regs);
 			}
@@ -3714,10 +3715,10 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 	 * "self-monitoring".
 	 */
 	if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
-		DPRINT(("unblocking [%d] \n", task->pid));
+		DPRINT(("unblocking [%d] \n", task_pid_nr(task)));
 		complete(&ctx->ctx_restart_done);
 	} else {
-		DPRINT(("[%d] armed exit trap\n", task->pid));
+		DPRINT(("[%d] armed exit trap\n", task_pid_nr(task)));
 
 		ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
 
@@ -3805,7 +3806,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
 	 * don't bother if we are loaded and task is being debugged
 	 */
 	if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
-		DPRINT(("debug registers already in use for [%d]\n", task->pid));
+		DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task)));
 		return -EBUSY;
 	}
 
@@ -3846,7 +3847,7 @@ pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_
 	 * is shared by all processes running on it
  	 */
 	if (first_time && can_access_pmu) {
-		DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
+		DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task)));
 		for (i=0; i < pmu_conf->num_ibrs; i++) {
 			ia64_set_ibr(i, 0UL);
 			ia64_dv_serialize_instruction();
@@ -4035,7 +4036,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		return -EBUSY;
 	}
 	DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
-		PFM_CTX_TASK(ctx)->pid,
+		task_pid_nr(PFM_CTX_TASK(ctx)),
 		state,
 		is_system));
 	/*
@@ -4093,7 +4094,7 @@ pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		 * monitoring disabled in kernel at next reschedule
 		 */
 		ctx->ctx_saved_psr_up = 0;
-		DPRINT(("task=[%d]\n", task->pid));
+		DPRINT(("task=[%d]\n", task_pid_nr(task)));
 	}
 	return 0;
 }
@@ -4298,11 +4299,12 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
 		if (is_system) {
 			if (pfm_sessions.pfs_ptrace_use_dbregs) {
-				DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
+				DPRINT(("cannot load [%d] dbregs in use\n",
+							task_pid_nr(task)));
 				ret = -EBUSY;
 			} else {
 				pfm_sessions.pfs_sys_use_dbregs++;
-				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
+				DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs));
 				set_dbregs = 1;
 			}
 		}
@@ -4394,7 +4396,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 
 			/* allow user level control */
 			ia64_psr(regs)->sp = 0;
-			DPRINT(("clearing psr.sp for [%d]\n", task->pid));
+			DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task)));
 
 			SET_LAST_CPU(ctx, smp_processor_id());
 			INC_ACTIVATION();
@@ -4429,7 +4431,7 @@ pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
 		 */
 		SET_PMU_OWNER(task, ctx);
 
-		DPRINT(("context loaded on PMU for [%d]\n", task->pid));
+		DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task)));
 	} else {
 		/*
 		 * when not current, task MUST be stopped, so this is safe
@@ -4493,7 +4495,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	int prev_state, is_system;
 	int ret;
 
-	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
+	DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1));
 
 	prev_state = ctx->ctx_state;
 	is_system  = ctx->ctx_fl_system;
@@ -4568,7 +4570,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 		 */
 		ia64_psr(regs)->sp = 1;
 
-		DPRINT(("setting psr.sp for [%d]\n", task->pid));
+		DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task)));
 	}
 	/*
 	 * save PMDs to context
@@ -4608,7 +4610,7 @@ pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 	ctx->ctx_fl_can_restart  = 0;
 	ctx->ctx_fl_going_zombie = 0;
 
-	DPRINT(("disconnected [%d] from context\n", task->pid));
+	DPRINT(("disconnected [%d] from context\n", task_pid_nr(task)));
 
 	return 0;
 }
@@ -4631,7 +4633,7 @@ pfm_exit_thread(struct task_struct *task)
 
 	PROTECT_CTX(ctx, flags);
 
-	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
+	DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task)));
 
 	state = ctx->ctx_state;
 	switch(state) {
@@ -4640,13 +4642,13 @@ pfm_exit_thread(struct task_struct *task)
 	 		 * only comes to this function if pfm_context is not NULL, i.e., cannot
 			 * be in unloaded state
 	 		 */
-			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
+			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task));
 			break;
 		case PFM_CTX_LOADED:
 		case PFM_CTX_MASKED:
 			ret = pfm_context_unload(ctx, NULL, 0, regs);
 			if (ret) {
-				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
 			}
 			DPRINT(("ctx unloaded for current state was %d\n", state));
 
@@ -4655,12 +4657,12 @@ pfm_exit_thread(struct task_struct *task)
 		case PFM_CTX_ZOMBIE:
 			ret = pfm_context_unload(ctx, NULL, 0, regs);
 			if (ret) {
-				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
+				printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret);
 			}
 			free_ok = 1;
 			break;
 		default:
-			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
+			printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state);
 			break;
 	}
 	UNPROTECT_CTX(ctx, flags);
@@ -4744,7 +4746,7 @@ recheck:
 	DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
 		ctx->ctx_fd,
 		state,
-		task->pid,
+		task_pid_nr(task),
 		task->state, PFM_CMD_STOPPED(cmd)));
 
 	/*
@@ -4791,7 +4793,7 @@ recheck:
 	 */
 	if (PFM_CMD_STOPPED(cmd)) {
 		if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
-			DPRINT(("[%d] task not in stopped state\n", task->pid));
+			DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task)));
 			return -EBUSY;
 		}
 		/*
@@ -4884,7 +4886,7 @@ restart_args:
 	 * limit abuse to min page size
 	 */
 	if (unlikely(sz > PFM_MAX_ARGSIZE)) {
-		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
+		printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz);
 		return -E2BIG;
 	}
 
@@ -5031,11 +5033,11 @@ pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
 {
 	int ret;
 
-	DPRINT(("entering for [%d]\n", current->pid));
+	DPRINT(("entering for [%d]\n", task_pid_nr(current)));
 
 	ret = pfm_context_unload(ctx, NULL, 0, regs);
 	if (ret) {
-		printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
+		printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret);
 	}
 
 	/*
@@ -5072,7 +5074,7 @@ pfm_handle_work(void)
 
 	ctx = PFM_GET_CTX(current);
 	if (ctx == NULL) {
-		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
+		printk(KERN_ERR "perfmon: [%d] has no PFM context\n", task_pid_nr(current));
 		return;
 	}
 
@@ -5269,7 +5271,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 	DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
 		     "used_pmds=0x%lx\n",
 			pmc0,
-			task ? task->pid: -1,
+			task ? task_pid_nr(task): -1,
 			(regs ? regs->cr_iip : 0),
 			CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
 			ctx->ctx_used_pmds[0]));
@@ -5458,7 +5460,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 	}
 
 	DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
-			GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
+			GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1,
 			PFM_GET_WORK_PENDING(task),
 			ctx->ctx_fl_trap_reason,
 			ovfl_pmds,
@@ -5483,7 +5485,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str
 sanity_check:
 	printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
 			smp_processor_id(),
-			task ? task->pid : -1,
+			task ? task_pid_nr(task) : -1,
 			pmc0);
 	return;
 
@@ -5516,7 +5518,7 @@ stop_monitoring:
 	 *
 	 * Overall pretty hairy stuff....
 	 */
-	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
+	DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1));
 	pfm_clear_psr_up();
 	ia64_psr(regs)->up = 0;
 	ia64_psr(regs)->sp = 1;
@@ -5577,13 +5579,13 @@ pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
 
 report_spurious1:
 	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
-		this_cpu, task->pid);
+		this_cpu, task_pid_nr(task));
 	pfm_unfreeze_pmu();
 	return -1;
 report_spurious2:
 	printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 
 		this_cpu, 
-		task->pid);
+		task_pid_nr(task));
 	pfm_unfreeze_pmu();
 	return -1;
 }
@@ -5870,7 +5872,8 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
 	ia64_psr(regs)->sp = 1;
 
 	if (GET_PMU_OWNER() == task) {
-		DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
+		DPRINT(("cleared ownership for [%d]\n",
+					task_pid_nr(ctx->ctx_task)));
 		SET_PMU_OWNER(NULL, NULL);
 	}
 
@@ -5882,7 +5885,7 @@ pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
 	task->thread.pfm_context  = NULL;
 	task->thread.flags       &= ~IA64_THREAD_PM_VALID;
 
-	DPRINT(("force cleanup for [%d]\n",  task->pid));
+	DPRINT(("force cleanup for [%d]\n",  task_pid_nr(task)));
 }
 
 
@@ -6426,7 +6429,7 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 
 		if (PMD_IS_COUNTING(i)) {
 			DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
-				task->pid,
+				task_pid_nr(task),
 				i,
 				ctx->ctx_pmds[i].val,
 				val & ovfl_val));
@@ -6448,11 +6451,11 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
 			 */
 			if (pmc0 & (1UL << i)) {
 				val += 1 + ovfl_val;
-				DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
+				DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i));
 			}
 		}
 
-		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
+		DPRINT(("[%d] ctx_pmd[%d]=0x%lx  pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val));
 
 		if (is_self) ctx->th_pmds[i] = pmd_val;
 
@@ -6793,14 +6796,14 @@ dump_pmu_state(const char *from)
 	printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 
 		this_cpu, 
 		from, 
-		current->pid, 
+		task_pid_nr(current),
 		regs->cr_iip,
 		current->comm);
 
 	task = GET_PMU_OWNER();
 	ctx  = GET_PMU_CTX();
 
-	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
+	printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx);
 
 	psr = pfm_get_psr();
 
@@ -6848,7 +6851,7 @@ pfm_inherit(struct task_struct *task, struct pt_regs *regs)
 {
 	struct thread_struct *thread;
 
-	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
+	DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task)));
 
 	thread = &task->thread;
 
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
index ff80eab83b3..a7af1cb419f 100644
--- a/arch/ia64/kernel/perfmon_default_smpl.c
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -44,11 +44,11 @@ default_validate(struct task_struct *task, unsigned int flags, int cpu, void *da
 	int ret = 0;
 
 	if (data == NULL) {
-		DPRINT(("[%d] no argument passed\n", task->pid));
+		DPRINT(("[%d] no argument passed\n", task_pid_nr(task)));
 		return -EINVAL;
 	}
 
-	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
+	DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu));
 
 	/*
 	 * must hold at least the buffer header + one minimally sized entry
@@ -88,7 +88,7 @@ default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, v
 	hdr->hdr_count        = 0UL;
 
 	DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
-		task->pid,
+		task_pid_nr(task),
 		buf,
 		hdr->hdr_buf_size,
 		sizeof(*hdr),
@@ -245,7 +245,7 @@ default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, stru
 static int
 default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
 {
-	DPRINT(("[%d] exit(%p)\n", task->pid, buf));
+	DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf));
 	return 0;
 }
 
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index c613fc0e91c..2418289ee5c 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -105,7 +105,8 @@ show_regs (struct pt_regs *regs)
 	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
 
 	print_modules();
-	printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
+	printk("\nPid: %d, CPU %d, comm: %20s\n", task_pid_nr(current),
+			smp_processor_id(), current->comm);
 	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]    %s\n",
 	       regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
 	print_symbol("ip is at %s\n", ip);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c5cfcfa4c87..cbf67f1aa29 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -208,6 +208,48 @@ static int __init register_memory(void)
 
 __initcall(register_memory);
 
+
+#ifdef CONFIG_KEXEC
+static void __init setup_crashkernel(unsigned long total, int *n)
+{
+	unsigned long long base = 0, size = 0;
+	int ret;
+
+	ret = parse_crashkernel(boot_command_line, total,
+			&size, &base);
+	if (ret == 0 && size > 0) {
+		if (!base) {
+			sort_regions(rsvd_region, *n);
+			base = kdump_find_rsvd_region(size,
+					rsvd_region, *n);
+		}
+		if (base != ~0UL) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(size >> 20),
+					(unsigned long)(base >> 20),
+					(unsigned long)(total >> 20));
+			rsvd_region[*n].start =
+				(unsigned long)__va(base);
+			rsvd_region[*n].end =
+				(unsigned long)__va(base + size);
+			(*n)++;
+			crashk_res.start = base;
+			crashk_res.end = base + size - 1;
+		}
+	}
+	efi_memmap_res.start = ia64_boot_param->efi_memmap;
+	efi_memmap_res.end = efi_memmap_res.start +
+		ia64_boot_param->efi_memmap_size;
+	boot_param_res.start = __pa(ia64_boot_param);
+	boot_param_res.end = boot_param_res.start +
+		sizeof(*ia64_boot_param);
+}
+#else
+static inline void __init setup_crashkernel(unsigned long total, int *n)
+{}
+#endif
+
 /**
  * reserve_memory - setup reserved memory areas
  *
@@ -219,6 +261,7 @@ void __init
 reserve_memory (void)
 {
 	int n = 0;
+	unsigned long total_memory;
 
 	/*
 	 * none of the entries in this table overlap
@@ -254,50 +297,11 @@ reserve_memory (void)
 		n++;
 #endif
 
-	efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+	total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
 	n++;
 
-#ifdef CONFIG_KEXEC
-	/* crashkernel=size@offset specifies the size to reserve for a crash
-	 * kernel. If offset is 0, then it is determined automatically.
-	 * By reserving this memory we guarantee that linux never set's it
-	 * up as a DMA target.Useful for holding code to do something
-	 * appropriate after a kernel panic.
-	 */
-	{
-		char *from = strstr(boot_command_line, "crashkernel=");
-		unsigned long base, size;
-		if (from) {
-			size = memparse(from + 12, &from);
-			if (*from == '@')
-				base = memparse(from+1, &from);
-			else
-				base = 0;
-			if (size) {
-				if (!base) {
-					sort_regions(rsvd_region, n);
-					base = kdump_find_rsvd_region(size,
-							      	rsvd_region, n);
-					}
-				if (base != ~0UL) {
-					rsvd_region[n].start =
-						(unsigned long)__va(base);
-					rsvd_region[n].end =
-						(unsigned long)__va(base + size);
-					n++;
-					crashk_res.start = base;
-					crashk_res.end = base + size - 1;
-				}
-			}
-		}
-		efi_memmap_res.start = ia64_boot_param->efi_memmap;
-                efi_memmap_res.end = efi_memmap_res.start +
-                        ia64_boot_param->efi_memmap_size;
-                boot_param_res.start = __pa(ia64_boot_param);
-                boot_param_res.end = boot_param_res.start +
-                        sizeof(*ia64_boot_param);
-	}
-#endif
+	setup_crashkernel(total_memory, &n);
+
 	/* end of memory marker */
 	rsvd_region[n].start = ~0UL;
 	rsvd_region[n].end   = ~0UL;
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index aeec8184e86..cdb64cc4d9c 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -227,7 +227,7 @@ ia64_rt_sigreturn (struct sigscratch *scr)
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = task_pid_vnr(current);
 	si.si_uid = current->uid;
 	si.si_addr = sc;
 	force_sig_info(SIGSEGV, &si, current);
@@ -332,7 +332,7 @@ force_sigsegv_info (int sig, void __user *addr)
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = task_pid_vnr(current);
 	si.si_uid = current->uid;
 	si.si_addr = addr;
 	force_sig_info(SIGSEGV, &si, current);
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 3aeaf15e468..78d65cb947d 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -61,7 +61,7 @@ die (const char *str, struct pt_regs *regs, long err)
 
 	if (++die.lock_owner_depth < 3) {
 		printk("%s[%d]: %s %ld [%d]\n",
-			current->comm, current->pid, str, err, ++die_counter);
+		current->comm, task_pid_nr(current), str, err, ++die_counter);
 		(void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
 		show_regs(regs);
   	} else
@@ -315,7 +315,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
 				last.time = current_jiffies + 5 * HZ;
 				printk(KERN_WARNING
 		       			"%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
-		       			current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
+		       			current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr);
 			}
 		}
 	}
@@ -453,7 +453,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		if (code == 8) {
 # ifdef CONFIG_IA64_PRINT_HAZARDS
 			printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
-			       current->comm, current->pid,
+			       current->comm, task_pid_nr(current),
 			       regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
 # endif
 			return;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index fe6aa5a9f8f..2173de9fe91 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1340,7 +1340,8 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 			size_t len;
 
 			len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
-				      "ip=0x%016lx\n\r", current->comm, current->pid,
+				      "ip=0x%016lx\n\r", current->comm,
+				      task_pid_nr(current),
 				      ifa, regs->cr_iip + ipsr->ri);
 			/*
 			 * Don't call tty_write_message() if we're in the kernel; we might
@@ -1363,7 +1364,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
 				       "administrator\n"
 				       "echo 0 > /proc/sys/kernel/ignore-"
 				       "unaligned-usertrap to re-enable\n",
-				       current->comm, current->pid);
+				       current->comm, task_pid_nr(current));
 			}
 		}
 	} else {
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 32f26253c4e..7571076a16a 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -274,7 +274,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 
   out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 3e10152abbf..c6c19bf11be 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -127,8 +127,8 @@ ia64_init_addr_space (void)
 		vma->vm_mm = current->mm;
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
-		vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
 		vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 		down_write(&current->mm->mmap_sem);
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig
deleted file mode 100644
index 97271ab484d..00000000000
--- a/arch/ia64/oprofile/Kconfig
+++ /dev/null
@@ -1,20 +0,0 @@
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  Due to firmware bugs, you may need to use the "nohalt" boot
-	  option if you're using OProfile with the hardware performance
-	  counters.
-
-	  If unsure, say N.
-
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index bd5fe76401f..ab9a264cb19 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -426,7 +426,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/m32r/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/m32r/Kconfig.debug"
 
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index 97e0b1c0830..89ba4a0b5d5 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -196,7 +196,7 @@ static void show_registers(struct pt_regs *regs)
 		printk("SPI: %08lx\n", sp);
 	}
 	printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)",
-		current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current);
+		current->comm, task_pid_nr(current), 0xffff & i, 4096+(unsigned long)current);
 
 	/*
 	 * When in-kernel, we also print out the stack and code at the
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index 70a766aad3e..4a71df4c1b3 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -271,7 +271,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(tsk)) {
+	if (is_global_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/m32r/oprofile/Kconfig b/arch/m32r/oprofile/Kconfig
deleted file mode 100644
index 19d37730b66..00000000000
--- a/arch/m32r/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 20a9c08e59c..01dee84f840 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -683,6 +683,8 @@ endmenu
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/m68k/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 4e2752a0e89..97f556fa493 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -900,7 +900,7 @@ void show_registers(struct pt_regs *regs)
 	       regs->d4, regs->d5, regs->a0, regs->a1);
 
 	printk("Process %s (pid: %d, task=%p)\n",
-		current->comm, current->pid, current);
+		current->comm, task_pid_nr(current), current);
 	addr = (unsigned long)&fp->un;
 	printk("Frame format=%X ", regs->format);
 	switch (regs->format) {
@@ -1038,7 +1038,7 @@ void bad_super_trap (struct frame *fp)
 				fp->un.fmtb.daddr, space_names[ssw & DFC],
 				fp->ptregs.pc);
 	}
-	printk ("Current process id is %d\n", current->pid);
+	printk ("Current process id is %d\n", task_pid_nr(current));
 	die_if_kernel("BAD KERNEL TRAP", &fp->ptregs, 0);
 }
 
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index eaa61868115..f493f03231d 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -180,7 +180,7 @@ good_area:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig
index 185906b54cb..f52c627bdad 100644
--- a/arch/m68knommu/Kconfig
+++ b/arch/m68knommu/Kconfig
@@ -696,6 +696,8 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/m68knommu/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index cb027580cd1..4dc142d394a 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2005,7 +2005,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/mips/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/mips/Kconfig.debug"
 
diff --git a/arch/mips/au1000/pb1200/irqmap.c b/arch/mips/au1000/pb1200/irqmap.c
index 5f48b060379..bdf00e2a35e 100644
--- a/arch/mips/au1000/pb1200/irqmap.c
+++ b/arch/mips/au1000/pb1200/irqmap.c
@@ -36,8 +36,8 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/delay.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/mipsregs.h>
diff --git a/arch/mips/basler/excite/excite_irq.c b/arch/mips/basler/excite/excite_irq.c
index 1ecab635042..4903e067916 100644
--- a/arch/mips/basler/excite/excite_irq.c
+++ b/arch/mips/basler/excite/excite_irq.c
@@ -29,7 +29,7 @@
 #include <linux/timex.h>
 #include <linux/slab.h>
 #include <linux/random.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/arch/mips/bcm47xx/time.c b/arch/mips/bcm47xx/time.c
index 0ab4676c8bd..0c6f47b3fd9 100644
--- a/arch/mips/bcm47xx/time.c
+++ b/arch/mips/bcm47xx/time.c
@@ -46,10 +46,3 @@ void __init plat_time_init(void)
 	/* Set MIPS counter frequency for fixed_rate_gettimeoffset() */
 	mips_hpt_frequency = hz;
 }
-
-void __init
-plat_timer_setup(struct irqaction *irq)
-{
-	/* Enable the timer interrupt */
-	setup_irq(7, irq);
-}
diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig
index 49bcc58929b..892d4c38fd0 100644
--- a/arch/mips/configs/ip27_defconfig
+++ b/arch/mips/configs/ip27_defconfig
@@ -175,6 +175,7 @@ CONFIG_POSIX_MQUEUE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
diff --git a/arch/mips/configs/mipssim_defconfig b/arch/mips/configs/mipssim_defconfig
index 86dcb746435..61b72f5a953 100644
--- a/arch/mips/configs/mipssim_defconfig
+++ b/arch/mips/configs/mipssim_defconfig
@@ -1,71 +1,68 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.20
-# Tue Feb 20 21:47:35 2007
+# Linux kernel version: 2.6.23
+# Thu Oct 18 22:45:52 2007
 #
 CONFIG_MIPS=y
 
 #
 # Machine selection
 #
-CONFIG_ZONE_DMA=y
-# CONFIG_MIPS_MTX1 is not set
-# CONFIG_MIPS_BOSPORUS is not set
-# CONFIG_MIPS_PB1000 is not set
-# CONFIG_MIPS_PB1100 is not set
-# CONFIG_MIPS_PB1500 is not set
-# CONFIG_MIPS_PB1550 is not set
-# CONFIG_MIPS_PB1200 is not set
-# CONFIG_MIPS_DB1000 is not set
-# CONFIG_MIPS_DB1100 is not set
-# CONFIG_MIPS_DB1500 is not set
-# CONFIG_MIPS_DB1550 is not set
-# CONFIG_MIPS_DB1200 is not set
-# CONFIG_MIPS_MIRAGE is not set
+# CONFIG_MACH_ALCHEMY is not set
 # CONFIG_BASLER_EXCITE is not set
+# CONFIG_BCM47XX is not set
 # CONFIG_MIPS_COBALT is not set
 # CONFIG_MACH_DECSTATION is not set
 # CONFIG_MACH_JAZZ is not set
+# CONFIG_LASAT is not set
+# CONFIG_LEMOTE_FULONG is not set
 # CONFIG_MIPS_ATLAS is not set
 # CONFIG_MIPS_MALTA is not set
 # CONFIG_MIPS_SEAD is not set
-# CONFIG_WR_PPMC is not set
 CONFIG_MIPS_SIM=y
-# CONFIG_MOMENCO_JAGUAR_ATX is not set
-# CONFIG_MIPS_XXS1500 is not set
+# CONFIG_MARKEINS is not set
+# CONFIG_MACH_VR41XX is not set
 # CONFIG_PNX8550_JBS is not set
 # CONFIG_PNX8550_STB810 is not set
-# CONFIG_MACH_VR41XX is not set
+# CONFIG_PMC_MSP is not set
 # CONFIG_PMC_YOSEMITE is not set
 # CONFIG_QEMU is not set
-# CONFIG_MARKEINS is not set
 # CONFIG_SGI_IP22 is not set
 # CONFIG_SGI_IP27 is not set
 # CONFIG_SGI_IP32 is not set
-# CONFIG_SIBYTE_BIGSUR is not set
+# CONFIG_SIBYTE_CRHINE is not set
+# CONFIG_SIBYTE_CARMEL is not set
+# CONFIG_SIBYTE_CRHONE is not set
+# CONFIG_SIBYTE_RHONE is not set
 # CONFIG_SIBYTE_SWARM is not set
+# CONFIG_SIBYTE_LITTLESUR is not set
 # CONFIG_SIBYTE_SENTOSA is not set
-# CONFIG_SIBYTE_RHONE is not set
-# CONFIG_SIBYTE_CARMEL is not set
 # CONFIG_SIBYTE_PTSWARM is not set
-# CONFIG_SIBYTE_LITTLESUR is not set
-# CONFIG_SIBYTE_CRHINE is not set
-# CONFIG_SIBYTE_CRHONE is not set
+# CONFIG_SIBYTE_BIGSUR is not set
 # CONFIG_SNI_RM is not set
 # CONFIG_TOSHIBA_JMR3927 is not set
 # CONFIG_TOSHIBA_RBTX4927 is not set
 # CONFIG_TOSHIBA_RBTX4938 is not set
+# CONFIG_WR_PPMC is not set
 CONFIG_RWSEM_GENERIC_SPINLOCK=y
 # CONFIG_ARCH_HAS_ILOG2_U32 is not set
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_GENERIC_FIND_NEXT_BIT=y
 CONFIG_GENERIC_HWEIGHT=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_CLOCKEVENTS=y
 CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CMOS_UPDATE=y
 CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
 # CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ is not set
+CONFIG_BOOT_RAW=y
+CONFIG_CEVT_R4K=y
 CONFIG_DMA_NONCOHERENT=y
 CONFIG_DMA_NEED_PCI_MAP_STATE=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_SYS_HAS_EARLY_PRINTK=y
+# CONFIG_HOTPLUG_CPU is not set
+# CONFIG_NO_IOPORT is not set
 # CONFIG_CPU_BIG_ENDIAN is not set
 CONFIG_CPU_LITTLE_ENDIAN=y
 CONFIG_SYS_SUPPORTS_BIG_ENDIAN=y
@@ -76,6 +73,11 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5
 #
 # CPU selection
 #
+# CONFIG_TICK_ONESHOT is not set
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+# CONFIG_CPU_LOONGSON2 is not set
 CONFIG_CPU_MIPS32_R1=y
 # CONFIG_CPU_MIPS32_R2 is not set
 # CONFIG_CPU_MIPS64_R1 is not set
@@ -115,8 +117,8 @@ CONFIG_CPU_HAS_PREFETCH=y
 CONFIG_MIPS_MT_DISABLED=y
 # CONFIG_MIPS_MT_SMP is not set
 # CONFIG_MIPS_MT_SMTC is not set
+CONFIG_SYS_SUPPORTS_MULTITHREADING=y
 # CONFIG_MIPS_VPE_LOADER is not set
-# CONFIG_64BIT_PHYS_ADDR is not set
 CONFIG_CPU_HAS_LLSC=y
 CONFIG_CPU_HAS_SYNC=y
 CONFIG_GENERIC_HARDIRQS=y
@@ -130,50 +132,52 @@ CONFIG_FLATMEM_MANUAL=y
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
+# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
 CONFIG_SPLIT_PTLOCK_CPUS=4
 # CONFIG_RESOURCES_64BIT is not set
-CONFIG_ZONE_DMA_FLAG=1
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
 # CONFIG_HZ_48 is not set
-# CONFIG_HZ_100 is not set
+CONFIG_HZ_100=y
 # CONFIG_HZ_128 is not set
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_256 is not set
-CONFIG_HZ_1000=y
+# CONFIG_HZ_1000 is not set
 # CONFIG_HZ_1024 is not set
 CONFIG_SYS_SUPPORTS_ARBIT_HZ=y
-CONFIG_HZ=1000
+CONFIG_HZ=100
 CONFIG_PREEMPT_NONE=y
 # CONFIG_PREEMPT_VOLUNTARY is not set
 # CONFIG_PREEMPT is not set
 # CONFIG_KEXEC is not set
+# CONFIG_SECCOMP is not set
 CONFIG_LOCKDEP_SUPPORT=y
 CONFIG_STACKTRACE_SUPPORT=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 #
-# Code maturity level options
+# General setup
 #
 CONFIG_EXPERIMENTAL=y
 CONFIG_BROKEN_ON_SMP=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
+# CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
-# CONFIG_IPC_NS is not set
 CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_POSIX_MQUEUE is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
-# CONFIG_UTS_NS is not set
+# CONFIG_USER_NS is not set
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_FAIR_USER_SCHED=y
 CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
+# CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
 CONFIG_SYSCTL=y
 CONFIG_EMBEDDED=y
@@ -187,31 +191,29 @@ CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
+CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_EVENTFD=y
 CONFIG_SHMEM=y
-CONFIG_SLAB=y
 CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
 CONFIG_RT_MUTEXES=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_FORCE_UNLOAD is not set
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_KMOD=y
-
-#
-# Block layer
-#
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
+# CONFIG_BLK_DEV_IO_TRACE is not set
 # CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
 
 #
 # IO Schedulers
@@ -229,18 +231,11 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
 #
 # Bus options (PCI, PCMCIA, EISA, ISA, TC)
 #
+# CONFIG_ARCH_SUPPORTS_MSI is not set
 CONFIG_MMU=y
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
 # CONFIG_PCCARD is not set
 
 #
-# PCI Hotplug Support
-#
-
-#
 # Executable file formats
 #
 CONFIG_BINFMT_ELF=y
@@ -250,9 +245,8 @@ CONFIG_TRAD_SIGNALS=y
 #
 # Power management options
 #
-CONFIG_PM=y
-# CONFIG_PM_LEGACY is not set
-# CONFIG_PM_DEBUG is not set
+# CONFIG_PM is not set
+CONFIG_SUSPEND_UP_POSSIBLE=y
 
 #
 # Networking
@@ -262,75 +256,50 @@ CONFIG_NET=y
 #
 # Networking options
 #
-# CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
-CONFIG_XFRM=y
-# CONFIG_XFRM_USER is not set
-# CONFIG_XFRM_SUB_POLICY is not set
-CONFIG_XFRM_MIGRATE=y
-CONFIG_NET_KEY=y
-CONFIG_NET_KEY_MIGRATE=y
+# CONFIG_NET_KEY is not set
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_ASK_IP_FIB_HASH=y
 # CONFIG_IP_FIB_TRIE is not set
 CONFIG_IP_FIB_HASH=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
-CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_MULTIPLE_TABLES is not set
+# CONFIG_IP_ROUTE_MULTIPATH is not set
+# CONFIG_IP_ROUTE_VERBOSE is not set
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 # CONFIG_IP_PNP_RARP is not set
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
+# CONFIG_IP_MROUTE is not set
 # CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
+# CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
 # CONFIG_INET_XFRM_TUNNEL is not set
 # CONFIG_INET_TUNNEL is not set
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
 CONFIG_TCP_CONG_CUBIC=y
 CONFIG_DEFAULT_TCP_CONG="cubic"
-CONFIG_TCP_MD5SIG=y
+# CONFIG_TCP_MD5SIG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_INET6_XFRM_TUNNEL is not set
 # CONFIG_INET6_TUNNEL is not set
-CONFIG_NETWORK_SECMARK=y
+# CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
 # CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-CONFIG_IP_SCTP=m
-# CONFIG_SCTP_DBG_MSG is not set
-# CONFIG_SCTP_DBG_OBJCNT is not set
-# CONFIG_SCTP_HMAC_NONE is not set
-# CONFIG_SCTP_HMAC_SHA1 is not set
-CONFIG_SCTP_HMAC_MD5=y
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
+# CONFIG_IP_SCTP is not set
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
@@ -347,44 +316,7 @@ CONFIG_SCTP_HMAC_MD5=y
 #
 # QoS and/or fair queueing
 #
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_FIFO=y
-CONFIG_NET_SCH_CLK_JIFFIES=y
-# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
-# CONFIG_NET_SCH_CLK_CPU is not set
-
-#
-# Queueing/Scheduling
-#
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_INGRESS=m
-
-#
-# Classification
-#
-CONFIG_NET_CLS=y
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_ROUTE=y
-# CONFIG_NET_CLS_FW is not set
-# CONFIG_NET_CLS_U32 is not set
-# CONFIG_NET_CLS_RSVP is not set
-# CONFIG_NET_CLS_RSVP6 is not set
-# CONFIG_NET_EMATCH is not set
-# CONFIG_NET_CLS_ACT is not set
-# CONFIG_NET_CLS_POLICE is not set
-CONFIG_NET_ESTIMATOR=y
+# CONFIG_NET_SCHED is not set
 
 #
 # Network testing
@@ -393,8 +325,17 @@ CONFIG_NET_ESTIMATOR=y
 # CONFIG_HAMRADIO is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+
+#
+# Wireless
+#
+# CONFIG_CFG80211 is not set
+# CONFIG_WIRELESS_EXT is not set
+# CONFIG_MAC80211 is not set
 # CONFIG_IEEE80211 is not set
-CONFIG_FIB_RULES=y
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
 
 #
 # Device Drivers
@@ -403,52 +344,25 @@ CONFIG_FIB_RULES=y
 #
 # Generic Driver Options
 #
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 # CONFIG_FW_LOADER is not set
 # CONFIG_DEBUG_DRIVER is not set
 # CONFIG_DEBUG_DEVRES is not set
 # CONFIG_SYS_HYPERVISOR is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
 # CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
 # CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
 # CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-# CONFIG_PNPACPI is not set
-
-#
-# Block devices
-#
+CONFIG_BLK_DEV=y
 # CONFIG_BLK_DEV_COW_COMMON is not set
 CONFIG_BLK_DEV_LOOP=y
 # CONFIG_BLK_DEV_CRYPTOLOOP is not set
 CONFIG_BLK_DEV_NBD=y
 # CONFIG_BLK_DEV_RAM is not set
-# CONFIG_BLK_DEV_INITRD is not set
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_ATA_OVER_ETH is not set
-
-#
-# Misc devices
-#
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
+# CONFIG_MISC_DEVICES is not set
 # CONFIG_IDE is not set
 
 #
@@ -456,48 +370,29 @@ CONFIG_BLK_DEV_NBD=y
 #
 # CONFIG_RAID_ATTRS is not set
 # CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
 # CONFIG_SCSI_NETLINK is not set
-
-#
-# Serial ATA (prod) and Parallel ATA (experimental) drivers
-#
 # CONFIG_ATA is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
 # CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-
-#
-# I2O device support
-#
-
-#
-# Network device support
-#
 CONFIG_NETDEVICES=y
+# CONFIG_NETDEVICES_MULTIQUEUE is not set
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
 # CONFIG_EQUALIZER is not set
 # CONFIG_TUN is not set
+# CONFIG_VETH is not set
 # CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
 CONFIG_NET_ETHERNET=y
 # CONFIG_MII is not set
+# CONFIG_AX88796 is not set
 CONFIG_MIPS_SIM_NET=y
 # CONFIG_DM9000 is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_B44 is not set
 # CONFIG_NETDEV_1000 is not set
 # CONFIG_NETDEV_10000 is not set
 
@@ -513,49 +408,18 @@ CONFIG_MIPS_SIM_NET=y
 # CONFIG_NETCONSOLE is not set
 # CONFIG_NETPOLL is not set
 # CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
 # CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
 # CONFIG_PHONE is not set
 
 #
 # Input device support
 #
-CONFIG_INPUT=y
-# CONFIG_INPUT_FF_MEMLESS is not set
-
-#
-# Userland interfaces
-#
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
+# CONFIG_INPUT is not set
 
 #
 # Hardware I/O ports
 #
-CONFIG_SERIO=y
-# CONFIG_SERIO_I8042 is not set
-CONFIG_SERIO_SERPORT=y
-# CONFIG_SERIO_LIBPS2 is not set
-# CONFIG_SERIO_RAW is not set
+# CONFIG_SERIO is not set
 # CONFIG_GAMEPORT is not set
 
 #
@@ -581,31 +445,13 @@ CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
 # CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
 # CONFIG_WATCHDOG is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_RTC is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
 # CONFIG_TCG_TPM is not set
-
-#
-# I2C support
-#
 # CONFIG_I2C is not set
 
 #
@@ -613,118 +459,60 @@ CONFIG_LEGACY_PTY_COUNT=256
 #
 # CONFIG_SPI is not set
 # CONFIG_SPI_MASTER is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
 
 #
-# Dallas's 1-wire bus
+# Sonics Silicon Backplane
 #
-# CONFIG_W1 is not set
+CONFIG_SSB_POSSIBLE=y
+# CONFIG_SSB is not set
 
 #
-# Hardware Monitoring support
+# Multifunction device drivers
 #
-# CONFIG_HWMON is not set
-# CONFIG_HWMON_VID is not set
+# CONFIG_MFD_SM501 is not set
 
 #
 # Multimedia devices
 #
 # CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_DAB is not set
 
 #
 # Graphics support
 #
-# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
 # CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 #
-# Sound
+# Display device support
 #
-# CONFIG_SOUND is not set
+# CONFIG_DISPLAY_SUPPORT is not set
 
 #
-# HID Devices
-#
-# CONFIG_HID is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-# CONFIG_USB_ARCH_HAS_EHCI is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
+# Sound
 #
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
 # CONFIG_MMC is not set
-
-#
-# LED devices
-#
 # CONFIG_NEW_LEDS is not set
-
-#
-# LED drivers
-#
-
-#
-# LED Triggers
-#
-
-#
-# InfiniBand support
-#
-
-#
-# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
-#
-
-#
-# Real Time Clock
-#
+CONFIG_RTC_LIB=y
 # CONFIG_RTC_CLASS is not set
 
 #
-# DMA Engine support
-#
-# CONFIG_DMA_ENGINE is not set
-
-#
-# DMA Clients
-#
-
-#
-# DMA Devices
-#
-
-#
-# Auxiliary Display support
-#
-
-#
-# Virtualization
+# Userspace I/O
 #
+# CONFIG_UIO is not set
 
 #
 # File systems
 #
-CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT2_FS is not set
 # CONFIG_EXT3_FS is not set
 # CONFIG_EXT4DEV_FS is not set
 # CONFIG_REISERFS_FS is not set
@@ -732,6 +520,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_FS_POSIX_ACL is not set
 # CONFIG_XFS_FS is not set
 # CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
 # CONFIG_MINIX_FS is not set
 CONFIG_ROMFS_FS=y
 # CONFIG_INOTIFY is not set
@@ -760,10 +549,11 @@ CONFIG_ROMFS_FS=y
 CONFIG_PROC_FS=y
 # CONFIG_PROC_KCORE is not set
 CONFIG_PROC_SYSCTL=y
-# CONFIG_SYSFS is not set
-# CONFIG_TMPFS is not set
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
 # CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
+# CONFIG_CONFIGFS_FS is not set
 
 #
 # Miscellaneous filesystems
@@ -781,10 +571,7 @@ CONFIG_RAMFS=y
 # CONFIG_QNX4FS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
+CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 # CONFIG_NFS_V3_ACL is not set
@@ -796,6 +583,7 @@ CONFIG_LOCKD=y
 CONFIG_LOCKD_V4=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
+# CONFIG_SUNRPC_BIND34 is not set
 # CONFIG_RPCSEC_GSS_KRB5 is not set
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -803,22 +591,14 @@ CONFIG_SUNRPC=y
 # CONFIG_NCP_FS is not set
 # CONFIG_CODA_FS is not set
 # CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
 
 #
 # Partition Types
 #
 # CONFIG_PARTITION_ADVANCED is not set
 CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
 # CONFIG_NLS is not set
-
-#
-# Distributed Lock Manager
-#
+# CONFIG_DLM is not set
 
 #
 # Profiling support
@@ -833,20 +613,22 @@ CONFIG_TRACE_IRQFLAGS_SUPPORT=y
 CONFIG_ENABLE_MUST_CHECK=y
 # CONFIG_MAGIC_SYSRQ is not set
 # CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
 # CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
-CONFIG_LOG_BUF_SHIFT=14
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_SLAB is not set
 # CONFIG_DEBUG_RT_MUTEXES is not set
 # CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
-CONFIG_DEBUG_MUTEXES=y
+# CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_LOCK_ALLOC is not set
 # CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_DEBUG_KOBJECT is not set
@@ -854,7 +636,9 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 CONFIG_FORCED_INLINING=y
+# CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_FAULT_INJECTION is not set
 CONFIG_CROSSCOMPILE=y
 CONFIG_CMDLINE="nfsroot=192.168.192.169:/u1/mipsel,timeo=20 ip=dhcp"
 # CONFIG_DEBUG_STACK_USAGE is not set
@@ -865,60 +649,20 @@ CONFIG_CMDLINE="nfsroot=192.168.192.169:/u1/mipsel,timeo=20 ip=dhcp"
 # Security options
 #
 # CONFIG_KEYS is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_BLKCIPHER=m
-CONFIG_CRYPTO_HASH=y
-CONFIG_CRYPTO_MANAGER=y
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-CONFIG_CRYPTO_GF128MUL=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_CBC=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_LRW=m
-# CONFIG_CRYPTO_DES is not set
-CONFIG_CRYPTO_FCRYPT=m
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-CONFIG_CRYPTO_CAMELLIA=m
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+# CONFIG_CRYPTO is not set
 
 #
 # Library routines
 #
-CONFIG_BITREVERSE=y
 # CONFIG_CRC_CCITT is not set
-CONFIG_CRC16=y
-CONFIG_CRC32=y
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_ITU_T is not set
+# CONFIG_CRC32 is not set
+# CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
diff --git a/arch/mips/configs/sb1250-swarm_defconfig b/arch/mips/configs/sb1250-swarm_defconfig
index 3ed991ae0eb..49dfcef2518 100644
--- a/arch/mips/configs/sb1250-swarm_defconfig
+++ b/arch/mips/configs/sb1250-swarm_defconfig
@@ -196,6 +196,7 @@ CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_UTS_NS is not set
 # CONFIG_AUDIT is not set
 # CONFIG_IKCONFIG is not set
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
diff --git a/arch/mips/emma2rh/markeins/setup.c b/arch/mips/emma2rh/markeins/setup.c
index 5e1da53b04a..82f9e9013e7 100644
--- a/arch/mips/emma2rh/markeins/setup.c
+++ b/arch/mips/emma2rh/markeins/setup.c
@@ -104,12 +104,6 @@ void __init plat_time_init(void)
 	mips_hpt_frequency = (bus_frequency * (4 + reg)) / 4 / 2;
 }
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	/* we are using the cpu counter for timer interrupts */
-	setup_irq(CPU_IRQ_BASE + 7, irq);
-}
-
 static void markeins_board_init(void);
 extern void markeins_irq_setup(void);
 
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index 08b84d476c8..a915e569342 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -10,6 +10,7 @@
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
 
+#include <asm/smtc_ipi.h>
 #include <asm/time.h>
 
 static int mips_next_event(unsigned long delta,
diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c
index b997af713eb..7852c7cdf29 100644
--- a/arch/mips/kernel/irixelf.c
+++ b/arch/mips/kernel/irixelf.c
@@ -1172,8 +1172,8 @@ static int irix_core_dump(long signr, struct pt_regs *regs, struct file *file, u
 	prstatus.pr_sighold = current->blocked.sig[0];
 	psinfo.pr_pid = prstatus.pr_pid = current->pid;
 	psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid;
-	psinfo.pr_pgrp = prstatus.pr_pgrp = process_group(current);
-	psinfo.pr_sid = prstatus.pr_sid = process_session(current);
+	psinfo.pr_pgrp = prstatus.pr_pgrp = task_pgrp_nr(current);
+	psinfo.pr_sid = prstatus.pr_sid = task_session_nr(current);
 	if (current->pid == current->tgid) {
 		/*
 		 * This is the record for the group leader.  Add in the
diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c
index 85c2e389edd..a0a91056fda 100644
--- a/arch/mips/kernel/irixsig.c
+++ b/arch/mips/kernel/irixsig.c
@@ -609,7 +609,7 @@ repeat:
 		p = list_entry(_p, struct task_struct, sibling);
 		if ((type == IRIX_P_PID) && p->pid != pid)
 			continue;
-		if ((type == IRIX_P_PGID) && process_group(p) != pid)
+		if ((type == IRIX_P_PGID) && task_pgrp_nr(p) != pid)
 			continue;
 		if ((p->exit_signal != SIGCHLD))
 			continue;
diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c
index ee7790d9deb..4c477c7ff74 100644
--- a/arch/mips/kernel/sysirix.c
+++ b/arch/mips/kernel/sysirix.c
@@ -763,11 +763,11 @@ asmlinkage int irix_setpgrp(int flags)
 	printk("[%s:%d] setpgrp(%d) ", current->comm, current->pid, flags);
 #endif
 	if(!flags)
-		error = process_group(current);
+		error = task_pgrp_nr(current);
 	else
 		error = sys_setsid();
 #ifdef DEBUG_PROCGRPS
-	printk("returning %d\n", process_group(current));
+	printk("returning %d\n", task_pgrp_nr(current));
 #endif
 
 	return error;
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index ea7cfe766a8..c4e6866d5cb 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -40,17 +40,6 @@
 #include <irq.h>
 
 /*
- * The integer part of the number of usecs per jiffy is taken from tick,
- * but the fractional part is not recorded, so we calculate it using the
- * initial value of HZ.  This aids systems where tick isn't really an
- * integer (e.g. for HZ = 128).
- */
-#define USECS_PER_JIFFY		TICK_SIZE
-#define USECS_PER_JIFFY_FRAC	((unsigned long)(u32)((1000000ULL << 32) / HZ))
-
-#define TICK_SIZE	(tick_nsec / 1000)
-
-/*
  * forward reference
  */
 DEFINE_SPINLOCK(rtc_lock);
@@ -182,84 +171,59 @@ struct clocksource clocksource_mips = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init init_mips_clocksource(void)
+void __init clocksource_set_clock(struct clocksource *cs, unsigned int clock)
 {
 	u64 temp;
 	u32 shift;
 
-	if (!mips_hpt_frequency || clocksource_mips.read == null_hpt_read)
-		return;
-
-	/* Calclate a somewhat reasonable rating value */
-	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
 	/* Find a shift value */
 	for (shift = 32; shift > 0; shift--) {
 		temp = (u64) NSEC_PER_SEC << shift;
-		do_div(temp, mips_hpt_frequency);
+		do_div(temp, clock);
 		if ((temp >> 32) == 0)
 			break;
 	}
-	clocksource_mips.shift = shift;
-	clocksource_mips.mult = (u32)temp;
-
-	clocksource_register(&clocksource_mips);
+	cs->shift = shift;
+	cs->mult = (u32) temp;
 }
 
-void __init __weak plat_time_init(void)
+void __cpuinit clockevent_set_clock(struct clock_event_device *cd,
+	unsigned int clock)
 {
+	u64 temp;
+	u32 shift;
+
+	/* Find a shift value */
+	for (shift = 32; shift > 0; shift--) {
+		temp = (u64) NSEC_PER_SEC << shift;
+		do_div(temp, clock);
+		if ((temp >> 32) == 0)
+			break;
+	}
+	cd->shift = shift;
+	cd->mult = (u32) temp;
 }
 
-void __init __weak plat_timer_setup(struct irqaction *irq)
+static void __init init_mips_clocksource(void)
 {
-}
+	if (!mips_hpt_frequency || clocksource_mips.read == null_hpt_read)
+		return;
 
-#ifdef CONFIG_MIPS_MT_SMTC
-DEFINE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device);
+	/* Calclate a somewhat reasonable rating value */
+	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
 
-static void smtc_set_mode(enum clock_event_mode mode,
-                          struct clock_event_device *evt)
-{
+	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
+
+	clocksource_register(&clocksource_mips);
 }
 
-static void mips_broadcast(cpumask_t mask)
+void __init __weak plat_time_init(void)
 {
-	unsigned int cpu;
-
-	for_each_cpu_mask(cpu, mask)
-		smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0);
 }
 
-static void setup_smtc_dummy_clockevent_device(void)
+void __init __weak plat_timer_setup(struct irqaction *irq)
 {
-	//uint64_t mips_freq = mips_hpt_^frequency;
-	unsigned int cpu = smp_processor_id();
-	struct clock_event_device *cd;
-
-	cd = &per_cpu(smtc_dummy_clockevent_device, cpu);
-
-	cd->name		= "SMTC";
-	cd->features		= CLOCK_EVT_FEAT_DUMMY;
-
-	/* Calculate the min / max delta */
-	cd->mult	= 0; //div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32);
-	cd->shift		= 0; //32;
-	cd->max_delta_ns	= 0; //clockevent_delta2ns(0x7fffffff, cd);
-	cd->min_delta_ns	= 0; //clockevent_delta2ns(0x30, cd);
-
-	cd->rating		= 200;
-	cd->irq			= 17; //-1;
-//	if (cpu)
-//		cd->cpumask	= CPU_MASK_ALL; // cpumask_of_cpu(cpu);
-//	else
-		cd->cpumask	= cpumask_of_cpu(cpu);
-
-	cd->set_mode		= smtc_set_mode;
-
-	cd->broadcast		= mips_broadcast;
-
-	clockevents_register_device(cd);
 }
-#endif
 
 void __init time_init(void)
 {
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 7b78d137259..fa500787152 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -314,7 +314,7 @@ void show_registers(const struct pt_regs *regs)
 	__show_regs(regs);
 	print_modules();
 	printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n",
-	        current->comm, current->pid, current_thread_info(), current);
+	        current->comm, task_pid_nr(current), current_thread_info(), current);
 	show_stacktrace(current, regs);
 	show_code((unsigned int __user *) regs->cp0_epc);
 	printk("\n");
diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c
index 09314a20f9f..2cc6745991a 100644
--- a/arch/mips/lemote/lm2e/setup.c
+++ b/arch/mips/lemote/lm2e/setup.c
@@ -53,11 +53,6 @@ unsigned long bus_clock;
 unsigned int memsize;
 unsigned int highmemsize = 0;
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(MIPS_CPU_IRQ_BASE + 7, irq);
-}
-
 void __init plat_time_init(void)
 {
 	/* setup mips r4k timer */
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 5699c7713e2..fa636fc6b7b 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -173,7 +173,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(tsk)) {
+	if (is_global_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/mips/oprofile/Kconfig b/arch/mips/oprofile/Kconfig
deleted file mode 100644
index fb6f235348b..00000000000
--- a/arch/mips/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING && !MIPS_MT_SMTC && EXPERIMENTAL
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c
index f221d476362..7cfeda5a651 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_time.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c
@@ -86,8 +86,5 @@ void __init plat_timer_setup(struct irqaction *irq)
 #ifdef CONFIG_IRQ_MSP_CIC
 	/* we are using the vpe0 counter for timer interrupts */
 	setup_irq(MSP_INT_VPE0_TIMER, irq);
-#else
-	/* we are using the mips counter for timer interrupts */
-	setup_irq(MSP_INT_TIMER, irq);
 #endif
 }
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 015fcc363dc..855977ca51c 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -137,11 +137,6 @@ int rtc_mips_set_time(unsigned long tim)
 	return 0;
 }
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(7, irq);
-}
-
 void __init plat_time_init(void)
 {
 	mips_hpt_frequency = cpu_clock_freq / 2;
diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c
index 6eac36d1b8c..02b266a31c4 100644
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -69,8 +69,9 @@ void bcm1480_smp_init(void)
 
 void bcm1480_smp_finish(void)
 {
-	extern void bcm1480_time_init(void);
-	bcm1480_time_init();
+	extern void sb1480_clockevent_init(void);
+
+	sb1480_clockevent_init();
 	local_irq_enable();
 }
 
diff --git a/arch/mips/sibyte/bcm1480/time.c b/arch/mips/sibyte/bcm1480/time.c
index 5b4bfbbb5a2..c730744aa47 100644
--- a/arch/mips/sibyte/bcm1480/time.c
+++ b/arch/mips/sibyte/bcm1480/time.c
@@ -27,9 +27,8 @@
  */
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
-#include <linux/sched.h>
+#include <linux/percpu.h>
 #include <linux/spinlock.h>
-#include <linux/kernel_stat.h>
 
 #include <asm/irq.h>
 #include <asm/addrspace.h>
@@ -101,25 +100,36 @@ static void sibyte_set_mode(enum clock_event_mode mode,
 		break;
 
 	case CLOCK_EVT_MODE_UNUSED:	/* shuddup gcc */
+	case CLOCK_EVT_MODE_RESUME:
 		;
 	}
 }
 
-struct clock_event_device sibyte_hpt_clockevent = {
-	.name		= "bcm1480-counter",
-	.features	= CLOCK_EVT_FEAT_PERIODIC,
-	.set_mode	= sibyte_set_mode,
-	.shift		= 32,
-	.irq		= 0,
-};
+static int sibyte_next_event(unsigned long delta, struct clock_event_device *cd)
+{
+	unsigned int cpu = smp_processor_id();
+	void __iomem *timer_init;
+	unsigned int cnt;
+	int res;
+
+	timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
+	cnt = __raw_readq(timer_init);
+	cnt += delta;
+	__raw_writeq(cnt, timer_init);
+	res = ((long)(__raw_readq(timer_init) - cnt ) > 0) ? -ETIME : 0;
+
+	return res;
+}
+
+static DEFINE_PER_CPU(struct clock_event_device, sibyte_hpt_clockevent);
 
 static irqreturn_t sibyte_counter_handler(int irq, void *dev_id)
 {
-	struct clock_event_device *cd = &sibyte_hpt_clockevent;
 	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *cd = &per_cpu(sibyte_hpt_clockevent, cpu);
 
 	/* Reset the timer */
-	__raw_writeq(M_SCD_TIMER_ENABLE|M_SCD_TIMER_MODE_CONTINUOUS,
+	__raw_writeq(M_SCD_TIMER_ENABLE | M_SCD_TIMER_MODE_CONTINUOUS,
 	             IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)));
 	cd->event_handler(cd);
 
@@ -140,24 +150,21 @@ static struct irqaction sibyte_counter_irqaction = {
  * called directly from irq_handler.S when IP[4] is set during an
  * interrupt
  */
-static void __init sb1480_clockevent_init(void)
+void __cpuinit sb1480_clockevent_init(void)
 {
 	unsigned int cpu = smp_processor_id();
 	unsigned int irq = K_BCM1480_INT_TIMER_0 + cpu;
+	struct clock_event_device *cd = &per_cpu(sibyte_hpt_clockevent, cpu);
 
-	setup_irq(irq, &sibyte_counter_irqaction);
-}
+	cd->name		= "bcm1480-counter";
+	cd->features		= CLOCK_EVT_FEAT_PERIODIC |
+				  CLOCK_EVT_MODE_ONESHOT;
+	cd->set_next_event	= sibyte_next_event;
+	cd->set_mode		= sibyte_set_mode;
+	cd->irq			= irq;
+	clockevent_set_clock(cd, BCM1480_HPT_VALUE);
 
-void bcm1480_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	int irq = K_BCM1480_INT_TIMER_0 + cpu;
-
-	/* Reset the timer */
-	__raw_writeq(M_SCD_TIMER_ENABLE|M_SCD_TIMER_MODE_CONTINUOUS,
-	      IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)));
-
-	ll_timer_interrupt(irq);
+	setup_irq(irq, &sibyte_counter_irqaction);
 }
 
 static cycle_t bcm1480_hpt_read(void)
@@ -168,9 +175,26 @@ static cycle_t bcm1480_hpt_read(void)
 	return (jiffies + 1) * (BCM1480_HPT_VALUE / HZ) - count;
 }
 
+struct clocksource bcm1480_clocksource = {
+	.name	= "MIPS",
+	.rating	= 200,
+	.read	= bcm1480_hpt_read,
+	.mask	= CLOCKSOURCE_MASK(32),
+	.shift	= 32,
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+void __init sb1480_clocksource_init(void)
+{
+	struct clocksource *cs = &bcm1480_clocksource;
+
+	clocksource_set_clock(cs, BCM1480_HPT_VALUE);
+	clocksource_register(cs);
+}
+
 void __init bcm1480_hpt_setup(void)
 {
-	clocksource_mips.read = bcm1480_hpt_read;
 	mips_hpt_frequency = BCM1480_HPT_VALUE;
+	sb1480_clocksource_init();
 	sb1480_clockevent_init();
 }
diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c
index 7659174819c..500d17e84c0 100644
--- a/arch/mips/sibyte/sb1250/irq.c
+++ b/arch/mips/sibyte/sb1250/irq.c
@@ -400,43 +400,11 @@ static void sb1250_kgdb_interrupt(void)
 
 #endif 	/* CONFIG_KGDB */
 
-static inline void sb1250_timer_interrupt(void)
-{
-	int cpu = smp_processor_id();
-	int irq = K_INT_TIMER_0 + cpu;
-
-	irq_enter();
-	kstat_this_cpu.irqs[irq]++;
-
-	write_seqlock(&xtime_lock);
-
-	/* ACK interrupt */
-	____raw_writeq(M_SCD_TIMER_ENABLE | M_SCD_TIMER_MODE_CONTINUOUS,
-		       IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)));
-
-	/*
-	 * call the generic timer interrupt handling
-	 */
-	do_timer(1);
-
-	write_sequnlock(&xtime_lock);
-
-	/*
-	 * In UP mode, we call local_timer_interrupt() to do profiling
-	 * and process accouting.
-	 *
-	 * In SMP mode, local_timer_interrupt() is invoked by appropriate
-	 * low-level local timer interrupt handler.
-	 */
-	local_timer_interrupt(irq);
-
-	irq_exit();
-}
-
 extern void sb1250_mailbox_interrupt(void);
 
 asmlinkage void plat_irq_dispatch(void)
 {
+	unsigned int cpu = smp_processor_id();
 	unsigned int pending;
 
 	/*
@@ -454,7 +422,7 @@ asmlinkage void plat_irq_dispatch(void)
 	if (pending & CAUSEF_IP7) /* CPU performance counter interrupt */
 		do_IRQ(MIPS_CPU_IRQ_BASE + 7);
 	else if (pending & CAUSEF_IP4)
-		sb1250_timer_interrupt();
+		do_IRQ(K_INT_TIMER_0 + cpu); 	/* sb1250_timer_interrupt() */
 
 #ifdef CONFIG_SMP
 	else if (pending & CAUSEF_IP3)
diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c
index c38e1f34460..aaa4f30dda7 100644
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -57,8 +57,9 @@ void sb1250_smp_init(void)
 
 void sb1250_smp_finish(void)
 {
-	extern void sb1250_time_init(void);
-	sb1250_time_init();
+	extern void sb1250_clockevent_init(void);
+
+	sb1250_clockevent_init();
 	local_irq_enable();
 }
 
diff --git a/arch/mips/sibyte/sb1250/time.c b/arch/mips/sibyte/sb1250/time.c
index fe11fed8e0d..9ef54628bc9 100644
--- a/arch/mips/sibyte/sb1250/time.c
+++ b/arch/mips/sibyte/sb1250/time.c
@@ -100,6 +100,7 @@ static void sibyte_set_mode(enum clock_event_mode mode,
 		break;
 
 	case CLOCK_EVT_MODE_UNUSED:	/* shuddup gcc */
+	case CLOCK_EVT_MODE_RESUME:
 		;
 	}
 }
@@ -144,79 +145,7 @@ static struct irqaction sibyte_irqaction = {
 	.name		= "timer",
 };
 
-/*
- * The general purpose timer ticks at 1 Mhz independent if
- * the rest of the system
- */
-static void sibyte_set_mode(enum clock_event_mode mode,
-                           struct clock_event_device *evt)
-{
-	unsigned int cpu = smp_processor_id();
-	void __iomem *timer_cfg, *timer_init;
-
-	timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
-	timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
-
-	switch (mode) {
-	case CLOCK_EVT_MODE_PERIODIC:
-		__raw_writeq(0, timer_cfg);
-		__raw_writeq((V_SCD_TIMER_FREQ / HZ) - 1, timer_init);
-		__raw_writeq(M_SCD_TIMER_ENABLE | M_SCD_TIMER_MODE_CONTINUOUS,
-			     timer_cfg);
-		break;
-
-	case CLOCK_EVT_MODE_ONESHOT:
-		/* Stop the timer until we actually program a shot */
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		__raw_writeq(0, timer_cfg);
-		break;
-
-	case CLOCK_EVT_MODE_UNUSED:	/* shuddup gcc */
-		;
-	}
-}
-
-static int
-sibyte_next_event(unsigned long delta, struct clock_event_device *evt)
-{
-	unsigned int cpu = smp_processor_id();
-	void __iomem *timer_cfg, *timer_init;
-
-	timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
-	timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
-
-	__raw_writeq(0, timer_cfg);
-	__raw_writeq(delta, timer_init);
-	__raw_writeq(M_SCD_TIMER_ENABLE, timer_cfg);
-
-	return 0;
-}
-
-struct clock_event_device sibyte_hpt_clockevent = {
-	.name		= "sb1250-counter",
-	.features	= CLOCK_EVT_FEAT_PERIODIC,
-	.set_mode	= sibyte_set_mode,
-	.set_next_event	= sibyte_next_event,
-	.shift		= 32,
-	.irq		= 0,
-};
-
-static irqreturn_t sibyte_counter_handler(int irq, void *dev_id)
-{
-	struct clock_event_device *cd = &sibyte_hpt_clockevent;
-
-	cd->event_handler(cd);
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction sibyte_irqaction = {
-	.handler	= sibyte_counter_handler,
-	.flags		= IRQF_DISABLED | IRQF_PERCPU,
-	.name		= "timer",
-};
-
-static void __init sb1250_clockevent_init(void)
+void __cpuinit sb1250_clockevent_init(void)
 {
 	struct clock_event_device *cd = &sibyte_hpt_clockevent;
 	unsigned int cpu = smp_processor_id();
@@ -249,12 +178,6 @@ static void __init sb1250_clockevent_init(void)
 	clockevents_register_device(cd);
 }
 
-void __init plat_time_init(void)
-{
-	sb1250_clocksource_init();
-	sb1250_clockevent_init();
-}
-
 /*
  * The HPT is free running from SB1250_HPT_VALUE down to 0 then starts over
  * again.
@@ -267,3 +190,26 @@ static cycle_t sb1250_hpt_read(void)
 
 	return SB1250_HPT_VALUE - count;
 }
+
+struct clocksource bcm1250_clocksource = {
+	.name	= "MIPS",
+	.rating	= 200,
+	.read	= sb1250_hpt_read,
+	.mask	= CLOCKSOURCE_MASK(32),
+	.shift	= 32,
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+void __init sb1250_clocksource_init(void)
+{
+	struct clocksource *cs = &bcm1250_clocksource;
+
+	clocksource_set_clock(cs, V_SCD_TIMER_FREQ);
+	clocksource_register(cs);
+}
+
+void __init plat_time_init(void)
+{
+	sb1250_clocksource_init();
+	sb1250_clockevent_init();
+}
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 8b3ef0e4cd5..080c966263b 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -69,31 +69,6 @@ const char *get_system_type(void)
 	return "SiByte " SIBYTE_BOARD_NAME;
 }
 
-void __init plat_time_init(void)
-{
-#if defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X)
-	/* Setup HPT */
-	sb1250_hpt_setup();
-#endif
-}
-
-void __init plat_timer_setup(struct irqaction *irq)
-{
-        /*
-         * we don't set up irqaction, because we will deliver timer
-         * interrupts through low-level (direct) meachanism.
-         */
-
-        /* We only need to setup the generic timer */
-#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
-	bcm1480_time_init();
-#elif defined(CONFIG_SIBYTE_SB1250) || defined(CONFIG_SIBYTE_BCM112X)
-	sb1250_time_init();
-#else
-#error invalid SiByte board configuration
-#endif
-}
-
 int swarm_be_handler(struct pt_regs *regs, int is_fixup)
 {
 	if (!is_fixup && (regs->cp0_cause & 4)) {
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index b80877349d3..0910b35cb71 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -121,15 +121,6 @@ void __init plat_time_init(void)
 	setup_pit_timer();
 }
 
-/*
- * R4k counter based timer interrupt. Works on RM200-225 and possibly
- * others but not on RM400
- */
-static void __init sni_cpu_timer_setup(struct irqaction *irq)
-{
-        setup_irq(SNI_MIPS_IRQ_CPU_TIMER, irq);
-}
-
 void __init plat_timer_setup(struct irqaction *irq)
 {
 	switch (sni_brd_type) {
@@ -139,15 +130,6 @@ void __init plat_timer_setup(struct irqaction *irq)
 	case SNI_BRD_MINITOWER:
 	        sni_a20r_timer_setup(irq);
 	        break;
-
-	case SNI_BRD_PCI_TOWER:
-	case SNI_BRD_RM200:
-	case SNI_BRD_PCI_MTOWER:
-	case SNI_BRD_PCI_DESKTOP:
-	case SNI_BRD_PCI_TOWER_CPLUS:
-	case SNI_BRD_PCI_MTOWER_CPLUS:
-	        sni_cpu_timer_setup(irq);
-	        break;
 	}
 }
 
diff --git a/arch/mips/tx4927/common/tx4927_setup.c b/arch/mips/tx4927/common/tx4927_setup.c
index 8ce0989671d..36c5f200eb3 100644
--- a/arch/mips/tx4927/common/tx4927_setup.c
+++ b/arch/mips/tx4927/common/tx4927_setup.c
@@ -72,22 +72,6 @@ void __init plat_time_init(void)
 #endif
 }
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(TX4927_IRQ_CPU_TIMER, irq);
-
-#ifdef CONFIG_TOSHIBA_RBTX4927
-	{
-		extern void toshiba_rbtx4927_timer_setup(struct irqaction
-							 *irq);
-		toshiba_rbtx4927_timer_setup(irq);
-	}
-#endif
-
-	return;
-}
-
-
 #ifdef DEBUG
 void print_cp0(char *key, int num, char *name, u32 val)
 {
diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
index b97102a1c63..c7470fba618 100644
--- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
+++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
@@ -94,7 +94,6 @@
 #define TOSHIBA_RBTX4927_SETUP_EFWFU       ( 1 <<  3 )
 #define TOSHIBA_RBTX4927_SETUP_SETUP       ( 1 <<  4 )
 #define TOSHIBA_RBTX4927_SETUP_TIME_INIT   ( 1 <<  5 )
-#define TOSHIBA_RBTX4927_SETUP_TIMER_SETUP ( 1 <<  6 )
 #define TOSHIBA_RBTX4927_SETUP_PCIBIOS     ( 1 <<  7 )
 #define TOSHIBA_RBTX4927_SETUP_PCI1        ( 1 <<  8 )
 #define TOSHIBA_RBTX4927_SETUP_PCI2        ( 1 <<  9 )
@@ -108,7 +107,6 @@ static const u32 toshiba_rbtx4927_setup_debug_flag =
     (TOSHIBA_RBTX4927_SETUP_NONE | TOSHIBA_RBTX4927_SETUP_INFO |
      TOSHIBA_RBTX4927_SETUP_WARN | TOSHIBA_RBTX4927_SETUP_EROR |
      TOSHIBA_RBTX4927_SETUP_EFWFU | TOSHIBA_RBTX4927_SETUP_SETUP |
-     TOSHIBA_RBTX4927_SETUP_TIME_INIT | TOSHIBA_RBTX4927_SETUP_TIMER_SETUP
      | TOSHIBA_RBTX4927_SETUP_PCIBIOS | TOSHIBA_RBTX4927_SETUP_PCI1 |
      TOSHIBA_RBTX4927_SETUP_PCI2 | TOSHIBA_RBTX4927_SETUP_PCI66);
 #endif
@@ -947,14 +945,6 @@ toshiba_rbtx4927_time_init(void)
 
 }
 
-void __init toshiba_rbtx4927_timer_setup(struct irqaction *irq)
-{
-	TOSHIBA_RBTX4927_SETUP_DPRINTK(TOSHIBA_RBTX4927_SETUP_TIMER_SETUP,
-				       "-\n");
-	TOSHIBA_RBTX4927_SETUP_DPRINTK(TOSHIBA_RBTX4927_SETUP_TIMER_SETUP,
-				       "+\n");
-}
-
 static int __init toshiba_rbtx4927_rtc_init(void)
 {
 	static struct resource __initdata res = {
diff --git a/arch/mips/tx4938/common/setup.c b/arch/mips/tx4938/common/setup.c
index ab408226755..3ba4101d141 100644
--- a/arch/mips/tx4938/common/setup.c
+++ b/arch/mips/tx4938/common/setup.c
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/irq.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -43,8 +43,3 @@ plat_mem_setup(void)
 {
 	toshiba_rbtx4938_setup();
 }
-
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(TX4938_IRQ_CPU_TIMER, irq);
-}
diff --git a/arch/mips/vr41xx/common/init.c b/arch/mips/vr41xx/common/init.c
index 407cec203b2..8d760df686c 100644
--- a/arch/mips/vr41xx/common/init.c
+++ b/arch/mips/vr41xx/common/init.c
@@ -48,11 +48,6 @@ void __init plat_time_init(void)
 		mips_hpt_frequency = tclock / 4;
 }
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(TIMER_IRQ, irq);
-}
-
 void __init plat_mem_setup(void)
 {
 	vr41xx_calculate_clock_frequency();
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 3d73545e8c4..b8ef1787a19 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -267,7 +267,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/parisc/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/parisc/Kconfig.debug"
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index fb35ebc0c4d..2ce3806f02e 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -181,7 +181,7 @@ give_sigsegv:
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
-	si.si_pid = current->pid;
+	si.si_pid = task_pid_vnr(current);
 	si.si_uid = current->uid;
 	si.si_addr = &frame->uc;
 	force_sig_info(SIGSEGV, &si, current);
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index bbf029a184a..99fd56939af 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -219,7 +219,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err)
 			return; /* STFU */
 
 		printk(KERN_CRIT "%s (pid %d): %s (code %ld) at " RFMT "\n",
-			current->comm, current->pid, str, err, regs->iaoq[0]);
+			current->comm, task_pid_nr(current), str, err, regs->iaoq[0]);
 #ifdef PRINT_USER_FAULTS
 		/* XXX for debugging only */
 		show_regs(regs);
@@ -252,7 +252,7 @@ KERN_CRIT "                     ||     ||\n");
 	
 	if (err)
 		printk(KERN_CRIT "%s (pid %d): %s (code %ld)\n",
-			current->comm, current->pid, str, err);
+			current->comm, task_pid_nr(current), str, err);
 
 	/* Wot's wrong wif bein' racy? */
 	if (current->thread.flags & PARISC_KERNEL_DEATH) {
@@ -317,7 +317,7 @@ static void handle_break(struct pt_regs *regs)
 	if (unlikely(iir != GDB_BREAK_INSN)) {
 		printk(KERN_DEBUG "break %d,%d: pid=%d command='%s'\n",
 			iir & 31, (iir>>13) & ((1<<13)-1),
-			current->pid, current->comm);
+			task_pid_nr(current), current->comm);
 		show_regs(regs);
 	}
 #endif
@@ -747,7 +747,7 @@ void handle_interruption(int code, struct pt_regs *regs)
 		if (user_mode(regs)) {
 #ifdef PRINT_USER_FAULTS
 			printk(KERN_DEBUG "\nhandle_interruption() pid=%d command='%s'\n",
-			    current->pid, current->comm);
+			    task_pid_nr(current), current->comm);
 			show_regs(regs);
 #endif
 			/* SIGBUS, for lack of a better one. */
@@ -772,7 +772,7 @@ void handle_interruption(int code, struct pt_regs *regs)
 		else
 			printk(KERN_DEBUG "User Fault (long pointer) (fault %d) ",
 			       code);
-		printk("pid=%d command='%s'\n", current->pid, current->comm);
+		printk("pid=%d command='%s'\n", task_pid_nr(current), current->comm);
 		show_regs(regs);
 #endif
 		si.si_signo = SIGSEGV;
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 347bb922e6d..aebf3c16887 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -469,7 +469,7 @@ void handle_unaligned(struct pt_regs *regs)
 		    && ++unaligned_count < 5) {
 			char buf[256];
 			sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n",
-				current->comm, current->pid, regs->ior, regs->iaoq[0]);
+				current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]);
 			printk(KERN_WARNING "%s", buf);
 #ifdef DEBUG_UNALIGNED
 			show_regs(regs);
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 1c091b415cd..b2e3e9a8cec 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -211,7 +211,7 @@ bad_area:
 #ifdef PRINT_USER_FAULTS
 		printk(KERN_DEBUG "\n");
 		printk(KERN_DEBUG "do_page_fault() pid=%d command='%s' type=%lu address=0x%08lx\n",
-		    tsk->pid, tsk->comm, code, address);
+		    task_pid_nr(tsk), tsk->comm, code, address);
 		if (vma) {
 			printk(KERN_DEBUG "vm_start = 0x%08lx, vm_end = 0x%08lx\n",
 					vma->vm_start, vma->vm_end);
diff --git a/arch/parisc/oprofile/Kconfig b/arch/parisc/oprofile/Kconfig
deleted file mode 100644
index 5ade19801b9..00000000000
--- a/arch/parisc/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3763f681ce4..18f397ca05e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -669,20 +669,7 @@ source "arch/powerpc/sysdev/qe_lib/Kconfig"
 
 source "lib/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/powerpc/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes"
-	depends on !BOOKE && !4xx && KALLSYMS && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/powerpc/Kconfig.debug"
 
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 8b47c846421..dcd7c02727c 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -68,6 +68,7 @@ CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=15
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index bb8d4e46f0c..05582af50c5 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -71,6 +71,7 @@ CONFIG_TASK_DELAY_ACCT=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=17
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index c09eb8cfbe7..62a38406b62 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -71,6 +71,7 @@ CONFIG_AUDITSYSCALL=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=17
+CONFIG_CGROUPS=y
 CONFIG_CPUSETS=y
 CONFIG_SYSFS_DEPRECATED=y
 # CONFIG_RELAY is not set
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index e60a0c544d6..c0c8e8c3ced 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -61,45 +61,39 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 	for(;;);
 }
 
-static int __init early_parse_crashk(char *p)
-{
-	unsigned long size;
-
-	if (!p)
-		return 1;
-
-	size = memparse(p, &p);
-
-	if (*p == '@')
-		crashk_res.start = memparse(p + 1, &p);
-	else
-		crashk_res.start = KDUMP_KERNELBASE;
-
-	crashk_res.end = crashk_res.start + size - 1;
-
-	return 0;
-}
-early_param("crashkernel", early_parse_crashk);
-
 void __init reserve_crashkernel(void)
 {
-	unsigned long size;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	/* this is necessary because of lmb_phys_mem_size() */
+	lmb_analyze();
+
+	/* use common parsing */
+	ret = parse_crashkernel(boot_command_line, lmb_phys_mem_size(),
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size > 0) {
+		if (crash_base == 0)
+			crash_base = KDUMP_KERNELBASE;
+		crashk_res.start = crash_base;
+	} else {
+		/* handle the device tree */
+		crash_size = crashk_res.end - crashk_res.start + 1;
+	}
 
-	if (crashk_res.start == 0)
+	if (crash_size == 0)
 		return;
 
 	/* We might have got these values via the command line or the
 	 * device tree, either way sanitise them now. */
 
-	size = crashk_res.end - crashk_res.start + 1;
-
 	if (crashk_res.start != KDUMP_KERNELBASE)
 		printk("Crash kernel location must be 0x%x\n",
 				KDUMP_KERNELBASE);
 
 	crashk_res.start = KDUMP_KERNELBASE;
-	size = PAGE_ALIGN(size);
-	crashk_res.end = crashk_res.start + size - 1;
+	crash_size = PAGE_ALIGN(crash_size);
+	crashk_res.end = crashk_res.start + crash_size - 1;
 
 	/* Crash kernel trumps memory limit */
 	if (memory_limit && memory_limit <= crashk_res.end) {
@@ -108,7 +102,13 @@ void __init reserve_crashkernel(void)
 				memory_limit);
 	}
 
-	lmb_reserve(crashk_res.start, size);
+	printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+			"for crashkernel (System RAM: %ldMB)\n",
+			(unsigned long)(crash_size >> 20),
+			(unsigned long)(crashk_res.start >> 20),
+			(unsigned long)(lmb_phys_mem_size() >> 20));
+
+	lmb_reserve(crashk_res.start, crash_size);
 }
 
 int overlaps_crashkernel(unsigned long start, unsigned long size)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ea6ad7a2a7e..b9d88374f14 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -459,7 +459,7 @@ void show_regs(struct pt_regs * regs)
 		printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
 #endif
 	printk("TASK = %p[%d] '%s' THREAD: %p",
-	       current, current->pid, current->comm, task_thread_info(current));
+	       current, task_pid_nr(current), current->comm, task_thread_info(current));
 
 #ifdef CONFIG_SMP
 	printk(" CPU: %d", smp_processor_id());
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index bf9e39c6e29..59c464e26f3 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -201,7 +201,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 	 * generate the same exception over and over again and we get
 	 * nowhere.  Better to kill it and let the kernel panic.
 	 */
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		__sighandler_t handler;
 
 		spin_lock_irq(&current->sighand->siglock);
@@ -881,7 +881,7 @@ void nonrecoverable_exception(struct pt_regs *regs)
 void trace_syscall(struct pt_regs *regs)
 {
 	printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
-	       current, current->pid, regs->nip, regs->link, regs->gpr[0],
+	       current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
 	       regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
 }
 
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ab3546c5ac3..a18fda361cc 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -375,7 +375,7 @@ bad_area_nosemaphore:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig
deleted file mode 100644
index 7089e79689b..00000000000
--- a/arch/powerpc/oprofile/Kconfig
+++ /dev/null
@@ -1,24 +0,0 @@
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-config OPROFILE_CELL
-	bool "OProfile for Cell Broadband Engine"
-	depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
-	default y
-	help
-	  Profiling of Cell BE SPUs requires special support enabled
-	  by this option.
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 354c0586162..144177d77cf 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -41,13 +41,13 @@
 #include <linux/root_dev.h>
 #include <linux/serial.h>
 #include <linux/smp.h>
+#include <linux/bitops.h>
 
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/kexec.h>
 #include <asm/pci-bridge.h>
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 3a393c7f390..a1ab25c7082 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -332,7 +332,7 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
 		   err->disposition == RTAS_DISP_NOT_RECOVERED &&
 		   err->target == RTAS_TARGET_MEMORY &&
 		   err->type == RTAS_TYPE_ECC_UNCORR &&
-		   !(current->pid == 0 || is_init(current))) {
+		   !(current->pid == 0 || is_global_init(current))) {
 		/* Kill off a user process with an ECC error */
 		printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
 		       current->pid);
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index 607925c8a99..6473fa7cb4b 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -1317,7 +1317,7 @@ endmenu
 
 source "lib/Kconfig"
 
-source "arch/powerpc/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/ppc/Kconfig.debug"
 
diff --git a/arch/ppc/kernel/traps.c b/arch/ppc/kernel/traps.c
index 3f3b292eb77..c78568905c3 100644
--- a/arch/ppc/kernel/traps.c
+++ b/arch/ppc/kernel/traps.c
@@ -121,7 +121,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 	 * generate the same exception over and over again and we get
 	 * nowhere.  Better to kill it and let the kernel panic.
 	 */
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		__sighandler_t handler;
 
 		spin_lock_irq(&current->sighand->siglock);
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c
index 94913ddcf76..254c23b755e 100644
--- a/arch/ppc/mm/fault.c
+++ b/arch/ppc/mm/fault.c
@@ -290,7 +290,7 @@ bad_area:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/ppc/platforms/chestnut.c b/arch/ppc/platforms/chestnut.c
index 248684f50dd..dcd6070b85e 100644
--- a/arch/ppc/platforms/chestnut.c
+++ b/arch/ppc/platforms/chestnut.c
@@ -49,7 +49,6 @@ extern void gen550_progress(char *, unsigned short);
 extern void gen550_init(int, struct uart_port *);
 extern void mv64360_pcibios_fixup(mv64x60_handle_t *bh);
 
-#define BIT(x) (1<<x)
 #define CHESTNUT_PRESERVE_MASK (BIT(MV64x60_CPU2DEV_0_WIN) | \
 				BIT(MV64x60_CPU2DEV_1_WIN) | \
 				BIT(MV64x60_CPU2DEV_2_WIN) | \
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b71132166f6..4ec716d8c1a 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -529,21 +529,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/s390/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.	register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/s390/Kconfig.debug"
 
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index abb447a3e47..70c57378f42 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -166,7 +166,7 @@ void show_regs(struct pt_regs *regs)
 
         printk("CPU:    %d    %s\n", task_thread_info(tsk)->cpu, print_tainted());
         printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
-	       current->comm, current->pid, (void *) tsk,
+	       current->comm, task_pid_nr(current), (void *) tsk,
 	       (void *) tsk->thread.ksp);
 
 	show_registers(regs);
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 60604b2819b..b159a9d6568 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -64,7 +64,7 @@ out:
 
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 14c241ccdd4..2456b52ed06 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -211,7 +211,7 @@ static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
 	struct mm_struct *mm = tsk->mm;
 
 	up_read(&mm->mmap_sem);
-	if (is_init(tsk)) {
+	if (is_global_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		return 1;
diff --git a/arch/s390/oprofile/Kconfig b/arch/s390/oprofile/Kconfig
deleted file mode 100644
index 208220a5f23..00000000000
--- a/arch/s390/oprofile/Kconfig
+++ /dev/null
@@ -1,22 +0,0 @@
-
-menu "Profiling support"
-
-config PROFILING
-	bool "Profiling support"
-	help
-	  Say Y here to enable profiling support mechanisms used by
-	  profilers such as readprofile or OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 44982c1dfa2..247f8a65e73 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -758,7 +758,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/sh/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sh/Kconfig.debug"
 
diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index 790ed69b866..5c17de51987 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -104,24 +104,3 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 	(*rnk)(page_list, reboot_code_buffer, image->start, vbr_reg);
 }
 
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel.  By reserving this memory we guarantee
- * that linux never sets it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init parse_crashkernel(char *arg)
-{
-	unsigned long size, base;
-	size = memparse(arg, &arg);
-	if (*arg == '@') {
-		base = memparse(arg+1, &arg);
-		/* FIXME: Do I want a sanity check
-		 * to validate the memory range?
-		 */
-		crashk_res.start = base;
-		crashk_res.end   = base + size - 1;
-	}
-	return 0;
-}
-early_param("crashkernel", parse_crashkernel);
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index b4469992d6b..6d7f2b07e49 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -121,7 +121,7 @@ void machine_power_off(void)
 void show_regs(struct pt_regs * regs)
 {
 	printk("\n");
-	printk("Pid : %d, Comm: %20s\n", current->pid, current->comm);
+	printk("Pid : %d, Comm: %20s\n", task_pid_nr(current), current->comm);
 	print_symbol("PC is at %s\n", instruction_pointer(regs));
 	printk("PC  : %08lx SP  : %08lx SR  : %08lx ",
 	       regs->pc, regs->regs[15], regs->sr);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index b3027a6775b..b749403f6b3 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -128,6 +128,37 @@ static void __init register_bootmem_low_pages(void)
 	free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(pages));
 }
 
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long free_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+
+	ret = parse_crashkernel(boot_command_line, free_mem,
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size) {
+		if (crash_base > 0) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(crash_size >> 20),
+					(unsigned long)(crash_base >> 20),
+					(unsigned long)(free_mem >> 20));
+			crashk_res.start = crash_base;
+			crashk_res.end   = crash_base + crash_size - 1;
+			reserve_bootmem(crash_base, crash_size);
+		} else
+			printk(KERN_INFO "crashkernel reservation failed - "
+					"you have to specify a base address\n");
+	}
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
 void __init setup_bootmem_allocator(unsigned long free_pfn)
 {
 	unsigned long bootmap_size;
@@ -189,11 +220,8 @@ void __init setup_bootmem_allocator(unsigned long free_pfn)
 		}
 	}
 #endif
-#ifdef CONFIG_KEXEC
-	if (crashk_res.start != crashk_res.end)
-		reserve_bootmem(crashk_res.start,
-			crashk_res.end - crashk_res.start + 1);
-#endif
+
+	reserve_crashkernel();
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c
index 2f42442cf16..ca754fd4243 100644
--- a/arch/sh/kernel/signal.c
+++ b/arch/sh/kernel/signal.c
@@ -382,7 +382,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	set_fs(USER_DS);
 
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
-		 current->comm, current->pid, frame, regs->pc, regs->pr);
+		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
 	flush_cache_sigtramp(regs->pr);
 
@@ -462,7 +462,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	set_fs(USER_DS);
 
 	pr_debug("SIG deliver (%s:%d): sp=%p pc=%08lx pr=%08lx\n",
-		 current->comm, current->pid, frame, regs->pc, regs->pr);
+		 current->comm, task_pid_nr(current), frame, regs->pc, regs->pr);
 
 	flush_cache_sigtramp(regs->pr);
 
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index dcb46e71da1..cf99111cb33 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -95,8 +95,8 @@ void die(const char * str, struct pt_regs * regs, long err)
 	print_modules();
 	show_regs(regs);
 
-	printk("Process: %s (pid: %d, stack limit = %p)\n",
-	       current->comm, current->pid, task_stack_page(current) + 1);
+	printk("Process: %s (pid: %d, stack limit = %p)\n", current->comm,
+			task_pid_nr(current), task_stack_page(current) + 1);
 
 	if (!user_mode(regs) || in_interrupt())
 		dump_mem("Stack: ", regs->regs[15], THREAD_SIZE +
@@ -386,7 +386,8 @@ static int handle_unaligned_access(u16 instruction, struct pt_regs *regs)
 
 		printk(KERN_NOTICE "Fixing up unaligned userspace access "
 		       "in \"%s\" pid=%d pc=0x%p ins=0x%04hx\n",
-		       current->comm,current->pid,(u16*)regs->pc,instruction);
+		       current->comm, task_pid_nr(current),
+		       (u16 *)regs->pc, instruction);
 	}
 
 	ret = -EFAULT;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 4729668ce5b..f33cedb353f 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -207,7 +207,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/sh/oprofile/Kconfig b/arch/sh/oprofile/Kconfig
deleted file mode 100644
index 5ade19801b9..00000000000
--- a/arch/sh/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/sh64/Kconfig b/arch/sh64/Kconfig
index b3327ce8e82..ba204bac49d 100644
--- a/arch/sh64/Kconfig
+++ b/arch/sh64/Kconfig
@@ -284,7 +284,7 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/sh64/oprofile/Kconfig"
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sh64/Kconfig.debug"
 
diff --git a/arch/sh64/kernel/traps.c b/arch/sh64/kernel/traps.c
index 9d0d58fb29f..c03101fab46 100644
--- a/arch/sh64/kernel/traps.c
+++ b/arch/sh64/kernel/traps.c
@@ -764,7 +764,7 @@ static int misaligned_fixup(struct pt_regs *regs)
 		--user_mode_unaligned_fixup_count;
 		/* Only do 'count' worth of these reports, to remove a potential DoS against syslog */
 		printk("Fixing up unaligned userspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
-		       current->comm, current->pid, (__u32)regs->pc, opcode);
+		       current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
 	} else
 #endif
 	if (!user_mode(regs) && (kernel_mode_unaligned_fixup_count > 0)) {
@@ -774,7 +774,7 @@ static int misaligned_fixup(struct pt_regs *regs)
 			       (__u32)regs->pc, opcode);
 		} else {
 			printk("Fixing up unaligned kernelspace access in \"%s\" pid=%d pc=0x%08x ins=0x%08lx\n",
-			       current->comm, current->pid, (__u32)regs->pc, opcode);
+			       current->comm, task_pid_nr(current), (__u32)regs->pc, opcode);
 		}
 	}
 
diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c
index dd81c669c79..7c79a1ba805 100644
--- a/arch/sh64/mm/fault.c
+++ b/arch/sh64/mm/fault.c
@@ -81,7 +81,7 @@ static inline void print_vma(struct vm_area_struct *vma)
 
 static inline void print_task(struct task_struct *tsk)
 {
-	printk("Task pid %d\n", tsk->pid);
+	printk("Task pid %d\n", task_pid_nr(tsk));
 }
 
 static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
@@ -272,13 +272,13 @@ bad_area:
 			 * usermode, so only need a few */
 			count++;
 			printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
-				address, current->pid, current->comm,
+				address, task_pid_nr(current), current->comm,
 				(unsigned long) regs->pc);
 #if 0
 			show_regs(regs);
 #endif
 		}
-		if (is_init(tsk)) {
+		if (is_global_init(tsk)) {
 			panic("INIT had user mode bad_area\n");
 		}
 		tsk->thread.address = address;
@@ -320,14 +320,14 @@ no_context:
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		panic("INIT out of memory\n");
 		yield();
 		goto survive;
 	}
 	printk("fault:Out of memory\n");
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/sh64/oprofile/Kconfig b/arch/sh64/oprofile/Kconfig
deleted file mode 100644
index 19d37730b66..00000000000
--- a/arch/sh64/oprofile/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-
-menu "Profiling support"
-	depends on EXPERIMENTAL
-
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
-endmenu
-
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index c0f4ba109da..527adc808ad 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -320,11 +320,7 @@ endmenu
 
 source "fs/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/sparc/oprofile/Kconfig"
-
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sparc/Kconfig.debug"
 
diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c
index 003f8eed32f..fe562db475e 100644
--- a/arch/sparc/kernel/ptrace.c
+++ b/arch/sparc/kernel/ptrace.c
@@ -155,7 +155,7 @@ static inline void read_sunos_user(struct pt_regs *regs, unsigned long offset,
 		/* Rest of them are completely unsupported. */
 	default:
 		printk("%s [%d]: Wants to read user offset %ld\n",
-		       current->comm, current->pid, offset);
+		       current->comm, task_pid_nr(current), offset);
 		pt_error_return(regs, EIO);
 		return;
 	}
@@ -222,7 +222,7 @@ static inline void write_sunos_user(struct pt_regs *regs, unsigned long offset,
 		/* Rest of them are completely unsupported or "no-touch". */
 	default:
 		printk("%s [%d]: Wants to write user offset %ld\n",
-		       current->comm, current->pid, offset);
+		       current->comm, task_pid_nr(current), offset);
 		goto failure;
 	}
 success:
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index 6c0221e9a9f..42bf09db9a8 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -357,7 +357,7 @@ c_sys_nis_syscall (struct pt_regs *regs)
 	if (count++ > 5)
 		return -ENOSYS;
 	printk ("%s[%d]: Unimplemented SPARC system call %d\n",
-		current->comm, current->pid, (int)regs->u_regs[1]);
+		current->comm, task_pid_nr(current), (int)regs->u_regs[1]);
 #ifdef DEBUG_UNIMP_SYSCALL	
 	show_regs (regs);
 #endif
diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c
index f807172cab0..28c187c5d9f 100644
--- a/arch/sparc/kernel/sys_sunos.c
+++ b/arch/sparc/kernel/sys_sunos.c
@@ -866,7 +866,7 @@ asmlinkage int sunos_killpg(int pgrp, int sig)
 	rcu_read_lock();
 	ret = -EINVAL;
 	if (pgrp > 0)
-		ret = kill_pgrp(find_pid(pgrp), sig, 0);
+		ret = kill_pgrp(find_vpid(pgrp), sig, 0);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/arch/sparc/kernel/traps.c b/arch/sparc/kernel/traps.c
index 3bc3bff51e0..d404e799452 100644
--- a/arch/sparc/kernel/traps.c
+++ b/arch/sparc/kernel/traps.c
@@ -38,7 +38,7 @@ struct trap_trace_entry trapbuf[1024];
 
 void syscall_trace_entry(struct pt_regs *regs)
 {
-	printk("%s[%d]: ", current->comm, current->pid);
+	printk("%s[%d]: ", current->comm, task_pid_nr(current));
 	printk("scall<%d> (could be %d)\n", (int) regs->u_regs[UREG_G1],
 	       (int) regs->u_regs[UREG_I0]);
 }
@@ -99,7 +99,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 "              /_| \\__/ |_\\\n"
 "                 \\__U_/\n");
 
-	printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
+	printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
 	show_regs(regs);
 	add_taint(TAINT_DIE);
 
diff --git a/arch/sparc/oprofile/Kconfig b/arch/sparc/oprofile/Kconfig
deleted file mode 100644
index d8a84088471..00000000000
--- a/arch/sparc/oprofile/Kconfig
+++ /dev/null
@@ -1,17 +0,0 @@
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 59c4d752d28..c7a74e37698 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -460,20 +460,7 @@ source "drivers/fc4/Kconfig"
 
 source "fs/Kconfig"
 
-menu "Instrumentation Support"
-
-source "arch/sparc64/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes (EXPERIMENTAL)"
-	depends on KALLSYMS && EXPERIMENTAL && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
+source "kernel/Kconfig.instrumentation"
 
 source "arch/sparc64/Kconfig.debug"
 
diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c
index 8f7a06e2c7e..170d6ca8de6 100644
--- a/arch/sparc64/kernel/sys_sunos32.c
+++ b/arch/sparc64/kernel/sys_sunos32.c
@@ -831,7 +831,7 @@ asmlinkage int sunos_killpg(int pgrp, int sig)
 	rcu_read_lock();
 	ret = -EINVAL;
 	if (pgrp > 0)
-		ret = kill_pgrp(find_pid(pgrp), sig, 0);
+		ret = kill_pgrp(find_vpid(pgrp), sig, 0);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 34573a55b6e..e9c7e4f07ab 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2225,7 +2225,7 @@ void die_if_kernel(char *str, struct pt_regs *regs)
 "              /_| \\__/ |_\\\n"
 "                 \\__U_/\n");
 
-	printk("%s(%d): %s [#%d]\n", current->comm, current->pid, str, ++die_counter);
+	printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
 	notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
 	__asm__ __volatile__("flushw");
 	__show_regs(regs);
diff --git a/arch/sparc64/oprofile/Kconfig b/arch/sparc64/oprofile/Kconfig
deleted file mode 100644
index d8a84088471..00000000000
--- a/arch/sparc64/oprofile/Kconfig
+++ /dev/null
@@ -1,17 +0,0 @@
-config PROFILING
-	bool "Profiling support (EXPERIMENTAL)"
-	help
-	  Say Y here to enable the extended profiling support mechanisms used
-	  by profilers such as OProfile.
-	  
-
-config OPROFILE
-	tristate "OProfile system profiling (EXPERIMENTAL)"
-	depends on PROFILING
-	help
-	  OProfile is a profiling system capable of profiling the
-	  whole system, include the kernel, kernel modules, libraries,
-	  and applications.
-
-	  If unsure, say N.
-
diff --git a/arch/sparc64/solaris/misc.c b/arch/sparc64/solaris/misc.c
index 3b67de7455f..c86cb3091a8 100644
--- a/arch/sparc64/solaris/misc.c
+++ b/arch/sparc64/solaris/misc.c
@@ -415,7 +415,7 @@ asmlinkage int solaris_procids(int cmd, s32 pid, s32 pgid)
 	
 	switch (cmd) {
 	case 0: /* getpgrp */
-		return process_group(current);
+		return task_pgrp_nr(current);
 	case 1: /* setpgrp */
 		{
 			int (*sys_setpgid)(pid_t,pid_t) =
@@ -426,7 +426,7 @@ asmlinkage int solaris_procids(int cmd, s32 pid, s32 pgid)
 			ret = sys_setpgid(0, 0);
 			if (ret) return ret;
 			proc_clear_tty(current);
-			return process_group(current);
+			return task_pgrp_nr(current);
 		}
 	case 2: /* getsid */
 		{
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 740d8a922e4..d8925d28557 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -289,4 +289,6 @@ config INPUT
 	bool
 	default n
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/um/Kconfig.debug"
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index bd060551e61..cb3321f8e0a 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -108,7 +108,7 @@ out_nosemaphore:
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		up_read(&mm->mmap_sem);
 		yield();
 		down_read(&mm->mmap_sem);
diff --git a/arch/um/sys-x86_64/sysrq.c b/arch/um/sys-x86_64/sysrq.c
index ce3e07fcf28..76544403181 100644
--- a/arch/um/sys-x86_64/sysrq.c
+++ b/arch/um/sys-x86_64/sysrq.c
@@ -15,8 +15,8 @@ void __show_regs(struct pt_regs * regs)
 {
 	printk("\n");
 	print_modules();
-	printk("Pid: %d, comm: %.20s %s %s\n",
-	       current->pid, current->comm, print_tainted(), init_utsname()->release);
+	printk("Pid: %d, comm: %.20s %s %s\n", task_pid_nr(current),
+		current->comm, print_tainted(), init_utsname()->release);
 	printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff,
 	       PT_REGS_RIP(regs));
 	printk("\nRSP: %016lx  EFLAGS: %08lx\n", PT_REGS_RSP(regs),
diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig
index ace479ab273..b6a50b8b38d 100644
--- a/arch/v850/Kconfig
+++ b/arch/v850/Kconfig
@@ -331,6 +331,8 @@ source "sound/Kconfig"
 
 source "drivers/usb/Kconfig"
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/v850/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/x86/ia32/ia32_binfmt.c b/arch/x86/ia32/ia32_binfmt.c
index 5027650eb27..55822d2cf05 100644
--- a/arch/x86/ia32/ia32_binfmt.c
+++ b/arch/x86/ia32/ia32_binfmt.c
@@ -5,10 +5,6 @@
  * This tricks binfmt_elf.c into loading 32bit binaries using lots 
  * of ugly preprocessor tricks. Talk about very very poor man's inheritance.
  */ 
-#define __ASM_X86_64_ELF_H 1
-
-#undef ELF_CLASS
-#define ELF_CLASS ELFCLASS32
 
 #include <linux/types.h>
 #include <linux/stddef.h>
@@ -19,6 +15,7 @@
 #include <linux/binfmts.h>
 #include <linux/mm.h>
 #include <linux/security.h>
+#include <linux/elfcore-compat.h>
 
 #include <asm/segment.h> 
 #include <asm/ptrace.h>
@@ -31,6 +28,20 @@
 #include <asm/ia32.h>
 #include <asm/vsyscall32.h>
 
+#undef	ELF_ARCH
+#undef	ELF_CLASS
+#define ELF_CLASS	ELFCLASS32
+#define ELF_ARCH	EM_386
+
+#undef	elfhdr
+#undef	elf_phdr
+#undef	elf_note
+#undef	elf_addr_t
+#define elfhdr		elf32_hdr
+#define elf_phdr	elf32_phdr
+#define elf_note	elf32_note
+#define elf_addr_t	Elf32_Off
+
 #define ELF_NAME "elf/i386"
 
 #define AT_SYSINFO 32
@@ -48,74 +59,20 @@ int sysctl_vsyscall32 = 1;
 } while(0)
 
 struct file;
-struct elf_phdr; 
 
 #define IA32_EMULATOR 1
 
-#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x1000000)
-
-#undef ELF_ARCH
-#define ELF_ARCH EM_386
-
-#define ELF_DATA	ELFDATA2LSB
+#undef ELF_ET_DYN_BASE
 
-#define USE_ELF_CORE_DUMP 1
-
-/* Override elfcore.h */ 
-#define _LINUX_ELFCORE_H 1
-typedef unsigned int elf_greg_t;
-
-#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-
-struct elf_siginfo
-{
-	int	si_signo;			/* signal number */
-	int	si_code;			/* extra code */
-	int	si_errno;			/* errno */
-};
+#define ELF_ET_DYN_BASE		(TASK_UNMAPPED_BASE + 0x1000000)
 
 #define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
 
-struct elf_prstatus
-{
-	struct elf_siginfo pr_info;	/* Info associated with signal */
-	short	pr_cursig;		/* Current signal */
-	unsigned int pr_sigpend;	/* Set of pending signals */
-	unsigned int pr_sighold;	/* Set of held signals */
-	pid_t	pr_pid;
-	pid_t	pr_ppid;
-	pid_t	pr_pgrp;
-	pid_t	pr_sid;
-	struct compat_timeval pr_utime;	/* User time */
-	struct compat_timeval pr_stime;	/* System time */
-	struct compat_timeval pr_cutime;	/* Cumulative user time */
-	struct compat_timeval pr_cstime;	/* Cumulative system time */
-	elf_gregset_t pr_reg;	/* GP registers */
-	int pr_fpvalid;		/* True if math co-processor being used.  */
-};
-
-#define ELF_PRARGSZ	(80)	/* Number of chars for args */
-
-struct elf_prpsinfo
-{
-	char	pr_state;	/* numeric process state */
-	char	pr_sname;	/* char for pr_state */
-	char	pr_zomb;	/* zombie */
-	char	pr_nice;	/* nice val */
-	unsigned int pr_flag;	/* flags */
-	__u16	pr_uid;
-	__u16	pr_gid;
-	pid_t	pr_pid, pr_ppid, pr_pgrp, pr_sid;
-	/* Lots missing */
-	char	pr_fname[16];	/* filename of executable */
-	char	pr_psargs[ELF_PRARGSZ];	/* initial part of arg list */
-};
-
 #define _GET_SEG(x) \
 	({ __u32 seg; asm("movl %%" __stringify(x) ",%0" : "=r"(seg)); seg; })
 
 /* Assumes current==process to be dumped */
+#undef	ELF_CORE_COPY_REGS
 #define ELF_CORE_COPY_REGS(pr_reg, regs)       		\
 	pr_reg[0] = regs->rbx;				\
 	pr_reg[1] = regs->rcx;				\
@@ -135,36 +92,41 @@ struct elf_prpsinfo
 	pr_reg[15] = regs->rsp;				\
 	pr_reg[16] = regs->ss;
 
-#define user user32
+
+#define elf_prstatus	compat_elf_prstatus
+#define elf_prpsinfo	compat_elf_prpsinfo
+#define elf_fpregset_t	struct user_i387_ia32_struct
+#define	elf_fpxregset_t	struct user32_fxsr_struct
+#define user		user32
 
 #undef elf_read_implies_exec
 #define elf_read_implies_exec(ex, executable_stack)     (executable_stack != EXSTACK_DISABLE_X)
-//#include <asm/ia32.h>
-#include <linux/elf.h>
-
-typedef struct user_i387_ia32_struct elf_fpregset_t;
-typedef struct user32_fxsr_struct elf_fpxregset_t;
-
 
-static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
+#define elf_core_copy_regs		elf32_core_copy_regs
+static inline void elf32_core_copy_regs(compat_elf_gregset_t *elfregs,
+					struct pt_regs *regs)
 {
-	ELF_CORE_COPY_REGS((*elfregs), regs)
+	ELF_CORE_COPY_REGS((&elfregs->ebx), regs)
 }
 
-static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
+#define elf_core_copy_task_regs		elf32_core_copy_task_regs
+static inline int elf32_core_copy_task_regs(struct task_struct *t,
+					    compat_elf_gregset_t* elfregs)
 {	
 	struct pt_regs *pp = task_pt_regs(t);
-	ELF_CORE_COPY_REGS((*elfregs), pp);
+	ELF_CORE_COPY_REGS((&elfregs->ebx), pp);
 	/* fix wrong segments */ 
-	(*elfregs)[7] = t->thread.ds; 
-	(*elfregs)[9] = t->thread.fsindex; 
-	(*elfregs)[10] = t->thread.gsindex; 
-	(*elfregs)[8] = t->thread.es; 	
+	elfregs->ds = t->thread.ds;
+	elfregs->fs = t->thread.fsindex;
+	elfregs->gs = t->thread.gsindex;
+	elfregs->es = t->thread.es;
 	return 1; 
 }
 
+#define elf_core_copy_task_fpregs	elf32_core_copy_task_fpregs
 static inline int 
-elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
+elf32_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs,
+			    elf_fpregset_t *fpu)
 {
 	struct _fpstate_ia32 *fpstate = (void*)fpu; 
 	mm_segment_t oldfs = get_fs();
@@ -186,8 +148,9 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpr
 
 #define ELF_CORE_COPY_XFPREGS 1
 #define ELF_CORE_XFPREG_TYPE NT_PRXFPREG
+#define elf_core_copy_task_xfpregs	elf32_core_copy_task_xfpregs
 static inline int 
-elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
+elf32_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
 {
 	struct pt_regs *regs = task_pt_regs(t);
 	if (!tsk_used_math(t))
@@ -206,6 +169,10 @@ elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu)
 
 extern int force_personality32;
 
+#undef	ELF_EXEC_PAGESIZE
+#undef	ELF_HWCAP
+#undef	ELF_PLATFORM
+#undef	SET_PERSONALITY
 #define ELF_EXEC_PAGESIZE PAGE_SIZE
 #define ELF_HWCAP (boot_cpu_data.x86_capability[0])
 #define ELF_PLATFORM  ("i686")
@@ -231,6 +198,7 @@ do {							\
 
 #define load_elf_binary load_elf32_binary
 
+#undef	ELF_PLAT_INIT
 #define ELF_PLAT_INIT(r, load_addr)	elf32_init(r)
 
 #undef start_thread
diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32
index a3fa11f8f46..ccea590bbb9 100644
--- a/arch/x86/kernel/Makefile_32
+++ b/arch/x86/kernel/Makefile_32
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-extra-y := head_32.o init_task_32.o vmlinux.lds
+extra-y := head_32.o init_task.o vmlinux.lds
 
 obj-y	:= process_32.o signal_32.o entry_32.o traps_32.o irq_32.o \
 		ptrace_32.o time_32.o ioport_32.o ldt_32.o setup_32.o i8259_32.o sys_i386_32.o \
@@ -17,6 +17,7 @@ obj-$(CONFIG_MCA)		+= mca_32.o
 obj-$(CONFIG_X86_MSR)		+= msr.o
 obj-$(CONFIG_X86_CPUID)		+= cpuid.o
 obj-$(CONFIG_MICROCODE)		+= microcode.o
+obj-$(CONFIG_PCI)		+= early-quirks.o
 obj-$(CONFIG_APM)		+= apm_32.o
 obj-$(CONFIG_X86_SMP)		+= smp_32.o smpboot_32.o tsc_sync.o
 obj-$(CONFIG_SMP)		+= smpcommon_32.o
diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64
index 43da66213a4..dec06e76928 100644
--- a/arch/x86/kernel/Makefile_64
+++ b/arch/x86/kernel/Makefile_64
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-extra-y 	:= head_64.o head64.o init_task_64.o vmlinux.lds
+extra-y 	:= head_64.o head64.o init_task.o vmlinux.lds
 EXTRA_AFLAGS	:= -traditional
 obj-y	:= process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \
 		ptrace_64.o time_64.o ioport_64.o ldt_64.o setup_64.o i8259_64.o sys_x86_64.o \
@@ -39,7 +39,7 @@ obj-$(CONFIG_K8_NB)		+= k8.o
 obj-$(CONFIG_AUDIT)		+= audit_64.o
 
 obj-$(CONFIG_MODULES)		+= module_64.o
-obj-$(CONFIG_PCI)		+= early-quirks_64.o
+obj-$(CONFIG_PCI)		+= early-quirks.o
 
 obj-y				+= topology.o
 obj-y				+= intel_cacheinfo.o
diff --git a/arch/x86/kernel/acpi/Makefile_32 b/arch/x86/kernel/acpi/Makefile_32
index a4852a2e919..045dd54b33e 100644
--- a/arch/x86/kernel/acpi/Makefile_32
+++ b/arch/x86/kernel/acpi/Makefile_32
@@ -1,7 +1,4 @@
 obj-$(CONFIG_ACPI)		+= boot.o
-ifneq ($(CONFIG_PCI),)
-obj-$(CONFIG_X86_IO_APIC)	+= earlyquirk_32.o
-endif
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep_32.o wakeup_32.o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index afd2afe9102..f28b2e251b1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -555,7 +555,7 @@ EXPORT_SYMBOL(acpi_map_lsapic);
 
 int acpi_unmap_lsapic(int cpu)
 {
-	x86_cpu_to_apicid[cpu] = -1;
+	per_cpu(x86_cpu_to_apicid, cpu) = -1;
 	cpu_clear(cpu, cpu_present_map);
 	num_processors--;
 
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 2d39f55d29a..10b67170b13 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -29,7 +29,7 @@
 void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
 					unsigned int cpu)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	flags->bm_check = 0;
 	if (num_online_cpus() == 1)
@@ -72,7 +72,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 		struct acpi_processor_cx *cx, struct acpi_power_register *reg)
 {
 	struct cstate_entry *percpu_entry;
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	cpumask_t saved_mask;
 	int retval;
diff --git a/arch/x86/kernel/acpi/earlyquirk_32.c b/arch/x86/kernel/acpi/earlyquirk_32.c
deleted file mode 100644
index 23f78efc577..00000000000
--- a/arch/x86/kernel/acpi/earlyquirk_32.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* 
- * Do early PCI probing for bug detection when the main PCI subsystem is 
- * not up yet.
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/acpi.h>
-
-#include <asm/pci-direct.h>
-#include <asm/acpi.h>
-#include <asm/apic.h>
-
-#ifdef CONFIG_ACPI
-
-static int __init nvidia_hpet_check(struct acpi_table_header *header)
-{
-	return 0;
-}
-#endif
-
-static int __init check_bridge(int vendor, int device)
-{
-#ifdef CONFIG_ACPI
-	static int warned;
-	/* According to Nvidia all timer overrides are bogus unless HPET
-	   is enabled. */
-	if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) {
-		if (!warned && acpi_table_parse(ACPI_SIG_HPET,
-						nvidia_hpet_check)) {
-			warned = 1;
-			acpi_skip_timer_override = 1;
-			  printk(KERN_INFO "Nvidia board "
-                       "detected. Ignoring ACPI "
-                       "timer override.\n");
-                printk(KERN_INFO "If you got timer trouble "
-			 	 "try acpi_use_timer_override\n");
-
-		}
-	}
-#endif
-	if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) {
-		timer_over_8254 = 0;
-		printk(KERN_INFO "ATI board detected. Disabling timer routing "
-				"over 8254.\n");
-	}
-	return 0;
-}
-
-void __init check_acpi_pci(void)
-{
-	int num, slot, func;
-
-	/* Assume the machine supports type 1. If not it will 
-	   always read ffffffff and should not have any side effect.
-	   Actually a few buggy systems can machine check. Allow the user
-	   to disable it by command line option at least -AK */
-	if (!early_pci_allowed())
-		return;
-
-	/* Poor man's PCI discovery */
-	for (num = 0; num < 32; num++) {
-		for (slot = 0; slot < 32; slot++) {
-			for (func = 0; func < 8; func++) {
-				u32 class;
-				u32 vendor;
-				class = read_pci_config(num, slot, func,
-							PCI_CLASS_REVISION);
-				if (class == 0xffffffff)
-					break;
-
-				if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
-					continue;
-
-				vendor = read_pci_config(num, slot, func,
-							 PCI_VENDOR_ID);
-
-				if (check_bridge(vendor & 0xffff, vendor >> 16))
-					return;
-			}
-
-		}
-	}
-}
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index b54fded4983..2ed0a4ce62f 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -63,7 +63,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
 void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
 {
 	unsigned int cpu = pr->id;
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	pr->pdc = NULL;
 	if (c->x86_vendor == X86_VENDOR_INTEL)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 3bd2688bd44..d6405e0842b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -357,14 +357,14 @@ void alternatives_smp_switch(int smp)
 	if (smp) {
 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
 		clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-		clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+		clear_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_lock(mod->locks, mod->locks_end,
 					      mod->text, mod->text_end);
 	} else {
 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 		set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-		set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+		set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_unlock(mod->locks, mod->locks_end,
 						mod->text, mod->text_end);
@@ -432,7 +432,7 @@ void __init alternative_instructions(void)
 		if (1 == num_possible_cpus()) {
 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
 			set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
-			set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
+			set_bit(X86_FEATURE_UP, cpu_data(0).x86_capability);
 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
 						_text, _etext);
 		}
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig_32
index d8c6f132dc7..d8c6f132dc7 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig_32
diff --git a/arch/x86/kernel/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig_64
index a3fd51926cb..9c9699fdcf5 100644
--- a/arch/x86/kernel/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig_64
@@ -19,7 +19,7 @@ config X86_POWERNOW_K8
 	  To compile this driver as a module, choose M here: the
 	  module will be called powernow-k8.
 
-	  For details, take a look at <file:Documentation/cpu-freq/>. 
+	  For details, take a look at <file:Documentation/cpu-freq/>.
 
 	  If in doubt, say N.
 
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 2ca43ba32bc..fea0af0476b 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -77,7 +77,7 @@ static unsigned int acpi_pstate_strict;
 
 static int check_est_cpu(unsigned int cpuid)
 {
-	struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
+	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
 
 	if (cpu->x86_vendor != X86_VENDOR_INTEL ||
 	    !cpu_has(cpu, X86_FEATURE_EST))
@@ -560,7 +560,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	unsigned int cpu = policy->cpu;
 	struct acpi_cpufreq_data *data;
 	unsigned int result = 0;
-	struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
 	struct acpi_processor_performance *perf;
 
 	dprintk("acpi_cpufreq_cpu_init\n");
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c11baaf9f2b..326a4c81f68 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -305,7 +305,7 @@ static struct cpufreq_driver eps_driver = {
 
 static int __init eps_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	/* This driver will work only on Centaur C7 processors with
 	 * Enhanced SpeedStep/PowerSaver registers */
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
index 1e7ae7dafcf..94619c22f56 100644
--- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c
@@ -199,7 +199,7 @@ static int elanfreq_target (struct cpufreq_policy *policy,
 
 static int elanfreq_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	unsigned int i;
 	int result;
 
@@ -280,7 +280,7 @@ static struct cpufreq_driver elanfreq_driver = {
 
 static int __init elanfreq_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	/* Test if we have the right hardware */
 	if ((c->x86_vendor != X86_VENDOR_AMD) ||
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 5045f5d583c..749d00cb2eb 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -780,7 +780,7 @@ static int longhaul_setup_southbridge(void)
 
 static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	char *cpuname=NULL;
 	int ret;
 	u32 lo, hi;
@@ -959,7 +959,7 @@ static struct cpufreq_driver longhaul_driver = {
 
 static int __init longhaul_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6)
 		return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index b2689514295..af4a867a097 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -172,7 +172,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
 	u32 save_lo, save_hi;
 	u32 eax, ebx, ecx, edx;
 	u32 try_hi;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	if (!low_freq || !high_freq)
 		return -EINVAL;
@@ -298,7 +298,7 @@ static struct cpufreq_driver longrun_driver = {
  */
 static int __init longrun_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	if (c->x86_vendor != X86_VENDOR_TRANSMETA ||
 	    !cpu_has(c, X86_FEATURE_LONGRUN))
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 793eae854f4..14791ec55cf 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -195,7 +195,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
 
 static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
+	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
 	int cpuid = 0;
 	unsigned int i;
 
@@ -279,7 +279,7 @@ static struct cpufreq_driver p4clockmod_driver = {
 
 static int __init cpufreq_p4_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	int ret;
 
 	/*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index 6d028533931..42405b4e34e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -215,7 +215,7 @@ static struct cpufreq_driver powernow_k6_driver = {
  */
 static int __init powernow_k6_init(void)
 {
-	struct cpuinfo_x86      *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 
 	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) ||
 		((c->x86_model != 12) && (c->x86_model != 13)))
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index f3686a5f230..b5a9863d6cd 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -114,7 +114,7 @@ static int check_fsb(unsigned int fsbspeed)
 
 static int check_powernow(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	unsigned int maxei, eax, ebx, ecx, edx;
 
 	if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 !=6)) {
diff --git a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
index d9f3e90a7ae..42da9bd677d 100644
--- a/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
+++ b/arch/x86/kernel/cpu/cpufreq/sc520_freq.c
@@ -102,7 +102,7 @@ static int sc520_freq_target (struct cpufreq_policy *policy,
 
 static int sc520_freq_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	int result;
 
 	/* capability check */
@@ -151,7 +151,7 @@ static struct cpufreq_driver sc520_freq_driver = {
 
 static int __init sc520_freq_init(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	int err;
 
 	/* Test if we have the right hardware */
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index 811d4743854..3031f119619 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -230,7 +230,7 @@ static struct cpu_model models[] =
 
 static int centrino_cpu_init_table(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
 	struct cpu_model *model;
 
 	for(model = models; model->cpu_id != NULL; model++)
@@ -340,7 +340,7 @@ static unsigned int get_cur_freq(unsigned int cpu)
 
 static int centrino_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu];
+	struct cpuinfo_x86 *cpu = &cpu_data(policy->cpu);
 	unsigned freq;
 	unsigned l, h;
 	int ret;
@@ -612,7 +612,7 @@ static struct cpufreq_driver centrino_driver = {
  */
 static int __init centrino_init(void)
 {
-	struct cpuinfo_x86 *cpu = cpu_data;
+	struct cpuinfo_x86 *cpu = &cpu_data(0);
 
 	if (!cpu_has(cpu, X86_FEATURE_EST))
 		return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index b1acc8ce316..76c3ab0da46 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -228,7 +228,7 @@ EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency);
 
 unsigned int speedstep_detect_processor (void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 ebx, msr_lo, msr_hi;
 
 	dprintk("x86: %x, model: %x\n", c->x86, c->x86_model);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 297a2411694..9921b01fe19 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -295,7 +295,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
 #ifdef CONFIG_X86_HT
-	unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
+	unsigned int cpu = c->cpu_index;
 #endif
 
 	if (c->cpuid_level > 3) {
@@ -417,14 +417,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	if (new_l2) {
 		l2 = new_l2;
 #ifdef CONFIG_X86_HT
-		cpu_llc_id[cpu] = l2_id;
+		per_cpu(cpu_llc_id, cpu) = l2_id;
 #endif
 	}
 
 	if (new_l3) {
 		l3 = new_l3;
 #ifdef CONFIG_X86_HT
-		cpu_llc_id[cpu] = l3_id;
+		per_cpu(cpu_llc_id, cpu) = l3_id;
 #endif
 	}
 
@@ -459,7 +459,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 	struct _cpuid4_info	*this_leaf, *sibling_leaf;
 	unsigned long num_threads_sharing;
 	int index_msb, i;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
 	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
@@ -470,8 +470,8 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 		index_msb = get_count_order(num_threads_sharing);
 
 		for_each_online_cpu(i) {
-			if (c[i].apicid >> index_msb ==
-			    c[cpu].apicid >> index_msb) {
+			if (cpu_data(i).apicid >> index_msb ==
+			    c->apicid >> index_msb) {
 				cpu_set(i, this_leaf->shared_cpu_map);
 				if (i != cpu && cpuid4_info[i])  {
 					sibling_leaf = CPUID4_INFO_IDX(i, index);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 54cdbf1a40f..c02541e6e65 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -120,7 +120,9 @@ int reserve_perfctr_nmi(unsigned int msr)
 	unsigned int counter;
 
 	counter = nmi_perfctr_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+	/* register not managed by the allocator? */
+	if (counter > NMI_MAX_COUNTER_BITS)
+		return 1;
 
 	if (!test_and_set_bit(counter, perfctr_nmi_owner))
 		return 1;
@@ -132,7 +134,9 @@ void release_perfctr_nmi(unsigned int msr)
 	unsigned int counter;
 
 	counter = nmi_perfctr_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+	/* register not managed by the allocator? */
+	if (counter > NMI_MAX_COUNTER_BITS)
+		return;
 
 	clear_bit(counter, perfctr_nmi_owner);
 }
@@ -142,7 +146,9 @@ int reserve_evntsel_nmi(unsigned int msr)
 	unsigned int counter;
 
 	counter = nmi_evntsel_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+	/* register not managed by the allocator? */
+	if (counter > NMI_MAX_COUNTER_BITS)
+		return 1;
 
 	if (!test_and_set_bit(counter, evntsel_nmi_owner))
 		return 1;
@@ -154,7 +160,9 @@ void release_evntsel_nmi(unsigned int msr)
 	unsigned int counter;
 
 	counter = nmi_evntsel_msr_to_bit(msr);
-	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+	/* register not managed by the allocator? */
+	if (counter > NMI_MAX_COUNTER_BITS)
+		return;
 
 	clear_bit(counter, evntsel_nmi_owner);
 }
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 879a0f789b1..2d42b414b77 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -85,12 +85,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		/* nothing */
 	};
 	struct cpuinfo_x86 *c = v;
-	int i, n = c - cpu_data;
+	int i, n = 0;
 	int fpu_exception;
 
 #ifdef CONFIG_SMP
 	if (!cpu_online(n))
 		return 0;
+	n = c->cpu_index;
 #endif
 	seq_printf(m, "processor\t: %d\n"
 		"vendor_id\t: %s\n"
@@ -175,11 +176,15 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+	if (*pos == 0)	/* just in case, cpu 0 is not the first */
+		*pos = first_cpu(cpu_possible_map);
+	if ((*pos) < NR_CPUS && cpu_possible(*pos))
+		return &cpu_data(*pos);
+	return NULL;
 }
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	++*pos;
+	*pos = next_cpu(*pos, cpu_possible_map);
 	return c_start(m, pos);
 }
 static void c_stop(struct seq_file *m, void *v)
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 70dcf912d9f..05c9936a16c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -114,7 +114,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 static int cpuid_open(struct inode *inode, struct file *file)
 {
 	unsigned int cpu = iminor(file->f_path.dentry->d_inode);
-	struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	if (cpu >= NR_CPUS || !cpu_online(cpu))
 		return -ENXIO;	/* No such CPU */
@@ -134,15 +134,18 @@ static const struct file_operations cpuid_fops = {
 	.open = cpuid_open,
 };
 
-static int __cpuinit cpuid_device_create(int i)
+static __cpuinit int cpuid_device_create(int cpu)
 {
-	int err = 0;
 	struct device *dev;
 
-	dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), "cpu%d",i);
-	if (IS_ERR(dev))
-		err = PTR_ERR(dev);
-	return err;
+	dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu),
+			    "cpu%d", cpu);
+	return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+static void cpuid_device_destroy(int cpu)
+{
+	device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
 }
 
 static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb,
@@ -150,18 +153,21 @@ static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb,
 					      void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
+	int err = 0;
 
 	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		cpuid_device_create(cpu);
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		err = cpuid_device_create(cpu);
 		break;
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-		device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		cpuid_device_destroy(cpu);
 		break;
 	}
-	return NOTIFY_OK;
+	return err ? NOTIFY_BAD : NOTIFY_OK;
 }
 
 static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier =
@@ -198,7 +204,7 @@ static int __init cpuid_init(void)
 out_class:
 	i = 0;
 	for_each_online_cpu(i) {
-		device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, i));
+		cpuid_device_destroy(i);
 	}
 	class_destroy(cpuid_class);
 out_chrdev:
@@ -212,7 +218,7 @@ static void __exit cpuid_exit(void)
 	int cpu = 0;
 
 	for_each_online_cpu(cpu)
-		device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
+		cpuid_device_destroy(cpu);
 	class_destroy(cpuid_class);
 	unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
 	unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 32e75d0731a..72d0c56c1b4 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -47,6 +47,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 		if (!kdump_buf_page) {
 			printk(KERN_WARNING "Kdump: Kdump buffer page not"
 				" allocated\n");
+			kunmap_atomic(vaddr, KM_PTE0);
 			return -EFAULT;
 		}
 		copy_page(kdump_buf_page, vaddr);
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 3c86b979a40..d58039e8de7 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -288,7 +288,8 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat
 			request_resource(res, code_resource);
 			request_resource(res, data_resource);
 #ifdef CONFIG_KEXEC
-			request_resource(res, &crashk_res);
+			if (crashk_res.start != crashk_res.end)
+				request_resource(res, &crashk_res);
 #endif
 		}
 	}
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index e422b8159f6..57616865d8a 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -226,7 +226,8 @@ void __init e820_reserve_resources(void)
 			request_resource(res, &code_resource);
 			request_resource(res, &data_resource);
 #ifdef CONFIG_KEXEC
-			request_resource(res, &crashk_res);
+			if (crashk_res.start != crashk_res.end)
+				request_resource(res, &crashk_res);
 #endif
 		}
 	}
diff --git a/arch/x86/kernel/early-quirks_64.c b/arch/x86/kernel/early-quirks.c
index 13aa4fd728f..dc34acbd54a 100644
--- a/arch/x86/kernel/early-quirks_64.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -13,9 +13,13 @@
 #include <linux/acpi.h>
 #include <linux/pci_ids.h>
 #include <asm/pci-direct.h>
-#include <asm/proto.h>
-#include <asm/iommu.h>
 #include <asm/dma.h>
+#include <asm/io_apic.h>
+#include <asm/apic.h>
+
+#ifdef CONFIG_IOMMU
+#include <asm/iommu.h>
+#endif
 
 static void __init via_bugs(void)
 {
@@ -23,7 +27,8 @@ static void __init via_bugs(void)
 	if ((end_pfn > MAX_DMA32_PFN ||  force_iommu) &&
 	    !iommu_aperture_allowed) {
 		printk(KERN_INFO
-  "Looks like a VIA chipset. Disabling IOMMU. Override with iommu=allowed\n");
+		       "Looks like a VIA chipset. Disabling IOMMU."
+		       " Override with iommu=allowed\n");
 		iommu_aperture_disabled = 1;
 	}
 #endif
@@ -40,6 +45,7 @@ static int __init nvidia_hpet_check(struct acpi_table_header *header)
 static void __init nvidia_bugs(void)
 {
 #ifdef CONFIG_ACPI
+#ifdef CONFIG_X86_IO_APIC
 	/*
 	 * All timer overrides on Nvidia are
 	 * wrong unless HPET is enabled.
@@ -59,17 +65,20 @@ static void __init nvidia_bugs(void)
 			"try acpi_use_timer_override\n");
 	}
 #endif
+#endif
 	/* RED-PEN skip them on mptables too? */
 
 }
 
 static void __init ati_bugs(void)
 {
+#ifdef CONFIG_X86_IO_APIC
 	if (timer_over_8254 == 1) {
 		timer_over_8254 = 0;
 		printk(KERN_INFO
-	 	"ATI board detected. Disabling timer routing over 8254.\n");
+		"ATI board detected. Disabling timer routing over 8254.\n");
 	}
+#endif
 }
 
 struct chipset {
@@ -104,7 +113,7 @@ void __init early_quirks(void)
 				if (class == 0xffffffff)
 					break;
 
-		       		if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
+				if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
 					continue;
 
 				vendor = read_pci_config(num, slot, func,
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 4ae03e3e829..ce703e21c91 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -24,10 +24,19 @@
 #include <acpi/acpi_bus.h>
 #endif
 
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly
+/*
+ * which logical CPU number maps to which CPU (physical APIC ID)
+ *
+ * The following static array is used during kernel startup
+ * and the x86_cpu_to_apicid_ptr contains the address of the
+ * array during this time.  Is it zeroed when the per_cpu
+ * data area is removed.
+ */
+u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata
 					= { [0 ... NR_CPUS-1] = BAD_APICID };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
+void *x86_cpu_to_apicid_ptr;
+DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 
 struct genapic __read_mostly *genapic = &apic_flat;
 
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 91c7526768e..07352b74bda 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -172,7 +172,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
 	 */
 	cpu = first_cpu(cpumask);
 	if ((unsigned)cpu < NR_CPUS)
-		return x86_cpu_to_apicid[cpu];
+		return per_cpu(x86_cpu_to_apicid, cpu);
 	else
 		return BAD_APICID;
 }
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index a7eee0a4751..6b3469311e4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -58,7 +58,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 
 	for (i = 0; i < IDT_ENTRIES; i++)
 		set_intr_gate(i, early_idt_handler);
-	asm volatile("lidt %0" :: "m" (idt_descr));
+	load_idt((const struct desc_ptr *)&idt_descr);
 
 	early_printk("Kernel alive\n");
 
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index f8367074da0..22d8f00c80d 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -69,12 +69,15 @@ static inline void hpet_clear_mapping(void)
  * HPET command line enable / disable
  */
 static int boot_hpet_disable;
+int hpet_force_user;
 
 static int __init hpet_setup(char* str)
 {
 	if (str) {
 		if (!strncmp("disable", str, 7))
 			boot_hpet_disable = 1;
+		if (!strncmp("force", str, 5))
+			hpet_force_user = 1;
 	}
 	return 1;
 }
diff --git a/arch/x86/kernel/i8259_32.c b/arch/x86/kernel/i8259_32.c
index d34a10cc13a..f634fc715c9 100644
--- a/arch/x86/kernel/i8259_32.c
+++ b/arch/x86/kernel/i8259_32.c
@@ -403,7 +403,8 @@ void __init native_init_IRQ(void)
 		int vector = FIRST_EXTERNAL_VECTOR + i;
 		if (i >= NR_IRQS)
 			break;
-		if (vector != SYSCALL_VECTOR) 
+		/* SYSCALL_VECTOR was reserved in trap_init. */
+		if (!test_bit(vector, used_vectors))
 			set_intr_gate(vector, interrupt[i]);
 	}
 
diff --git a/arch/x86/kernel/init_task_32.c b/arch/x86/kernel/init_task.c
index d26fc063a76..468c9c43784 100644
--- a/arch/x86/kernel/init_task_32.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,7 +15,6 @@ static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
 static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct mm_struct init_mm = INIT_MM(init_mm);
-
 EXPORT_SYMBOL(init_mm);
 
 /*
@@ -25,7 +24,7 @@ EXPORT_SYMBOL(init_mm);
  * way process stacks are handled. This is done by having a special
  * "init_task" linker map entry..
  */
-union thread_union init_thread_union 
+union thread_union init_thread_union
 	__attribute__((__section__(".data.init_task"))) =
 		{ INIT_THREAD_INFO(init_task) };
 
@@ -35,12 +34,14 @@ union thread_union init_thread_union
  * All other task structs will be allocated on slabs in fork.c
  */
 struct task_struct init_task = INIT_TASK(init_task);
-
 EXPORT_SYMBOL(init_task);
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's.
- */ 
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
 
diff --git a/arch/x86/kernel/init_task_64.c b/arch/x86/kernel/init_task_64.c
deleted file mode 100644
index 4ff33d4f855..00000000000
--- a/arch/x86/kernel/init_task_64.c
+++ /dev/null
@@ -1,54 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-#include <linux/mqueue.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-
-EXPORT_SYMBOL(init_mm);
-
-/*
- * Initial task structure.
- *
- * We need to make sure that this is 8192-byte aligned due to the
- * way process stacks are handled. This is done by having a special
- * "init_task" linker map entry..
- */
-union thread_union init_thread_union 
-	__attribute__((__section__(".data.init_task"))) =
-		{ INIT_THREAD_INFO(init_task) };
-
-/*
- * Initial task structure.
- *
- * All other task structs will be allocated on slabs in fork.c
- */
-struct task_struct init_task = INIT_TASK(init_task);
-
-EXPORT_SYMBOL(init_task);
-/*
- * per-CPU TSS segments. Threads are completely 'soft' on Linux,
- * no more per-task TSS's. The TSS size is kept cacheline-aligned
- * so they are allowed to end up in the .data.cacheline_aligned
- * section. Since TSS's are completely CPU-local, we want them
- * on exact cacheline boundaries, to eliminate cacheline ping-pong.
- */ 
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
-
-/* Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-
-#define ALIGN_TO_4K __attribute__((section(".data.init_task")))
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 5f10c718953..0c55e9d86f6 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1198,7 +1198,7 @@ static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }
 static int __assign_irq_vector(int irq)
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
-	int vector, offset, i;
+	int vector, offset;
 
 	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
 
@@ -1215,11 +1215,8 @@ next:
 	}
 	if (vector == current_vector)
 		return -ENOSPC;
-	if (vector == SYSCALL_VECTOR)
+	if (test_and_set_bit(vector, used_vectors))
 		goto next;
-	for (i = 0; i < NR_IRQ_VECTORS; i++)
-		if (irq_vector[i] == vector)
-			goto next;
 
 	current_vector = vector;
 	current_offset = offset;
@@ -2295,6 +2292,12 @@ static inline void __init check_timer(void)
 
 void __init setup_IO_APIC(void)
 {
+	int i;
+
+	/* Reserve all the system vectors. */
+	for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++)
+		set_bit(i, used_vectors);
+
 	enable_IO_APIC();
 
 	if (acpi_ioapic)
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 8459ca64bc2..11b935f4f88 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -149,28 +149,6 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 			image->start, cpu_has_pae);
 }
 
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel.  By reserving this memory we guarantee
- * that linux never sets it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init parse_crashkernel(char *arg)
-{
-	unsigned long size, base;
-	size = memparse(arg, &arg);
-	if (*arg == '@') {
-		base = memparse(arg+1, &arg);
-		/* FIXME: Do I want a sanity check
-		 * to validate the memory range?
-		 */
-		crashk_res.start = base;
-		crashk_res.end   = base + size - 1;
-	}
-	return 0;
-}
-early_param("crashkernel", parse_crashkernel);
-
 void arch_crash_save_vmcoreinfo(void)
 {
 #ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7450b69710b..0d8577f0542 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -231,33 +231,6 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 			image->start);
 }
 
-/* crashkernel=size@addr specifies the location to reserve for
- * a crash kernel.  By reserving this memory we guarantee
- * that linux never set's it up as a DMA target.
- * Useful for holding code to do something appropriate
- * after a kernel panic.
- */
-static int __init setup_crashkernel(char *arg)
-{
-	unsigned long size, base;
-	char *p;
-	if (!arg)
-		return -EINVAL;
-	size = memparse(arg, &p);
-	if (arg == p)
-		return -EINVAL;
-	if (*p == '@') {
-		base = memparse(p+1, &p);
-		/* FIXME: Do I want a sanity check to validate the
-		 * memory range?  Yes you do, but it's too early for
-		 * e820 -AK */
-		crashk_res.start = base;
-		crashk_res.end   = base + size - 1;
-	}
-	return 0;
-}
-early_param("crashkernel", setup_crashkernel);
-
 void arch_crash_save_vmcoreinfo(void)
 {
 #ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/mce_64.c
index 66e6b797b2c..2cf20de5bec 100644
--- a/arch/x86/kernel/mce_64.c
+++ b/arch/x86/kernel/mce_64.c
@@ -799,7 +799,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
 {
 	int err;
 	int i;
-	if (!mce_available(&cpu_data[cpu]))
+
+	if (!mce_available(&cpu_data(cpu)))
 		return -EIO;
 
 	memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
diff --git a/arch/x86/kernel/mce_amd_64.c b/arch/x86/kernel/mce_amd_64.c
index 0d2afd96aca..752fb16a817 100644
--- a/arch/x86/kernel/mce_amd_64.c
+++ b/arch/x86/kernel/mce_amd_64.c
@@ -472,11 +472,11 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 	sprintf(name, "threshold_bank%i", bank);
 
 #ifdef CONFIG_SMP
-	if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) {	/* symlink */
+	if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {	/* symlink */
 		i = first_cpu(per_cpu(cpu_core_map, cpu));
 
 		/* first core not up yet */
-		if (cpu_data[i].cpu_core_id)
+		if (cpu_data(i).cpu_core_id)
 			goto out;
 
 		/* already linked */
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 09cf7811035..09c315214a5 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -132,7 +132,7 @@ static struct ucode_cpu_info {
 
 static void collect_cpu_info(int cpu_num)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu_num;
+	struct cpuinfo_x86 *c = &cpu_data(cpu_num);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 	unsigned int val[2];
 
@@ -522,7 +522,7 @@ static struct platform_device *microcode_pdev;
 static int cpu_request_microcode(int cpu)
 {
 	char name[30];
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	const struct firmware *firmware;
 	void *buf;
 	unsigned long size;
@@ -570,7 +570,7 @@ static int cpu_request_microcode(int cpu)
 
 static int apply_microcode_check_cpu(int cpu)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	cpumask_t old;
 	unsigned int val[2];
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
index 8bf0ca03ac8..ef4aab12358 100644
--- a/arch/x86/kernel/mpparse_64.c
+++ b/arch/x86/kernel/mpparse_64.c
@@ -57,6 +57,8 @@ unsigned long mp_lapic_addr = 0;
 
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_id = -1U;
+EXPORT_SYMBOL(boot_cpu_id);
+
 /* Internal processor count */
 unsigned int num_processors __cpuinitdata = 0;
 
@@ -86,7 +88,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 	return sum & 0xFF;
 }
 
-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 {
 	int cpu;
 	cpumask_t tmp_map;
@@ -123,7 +125,18 @@ static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
 		cpu = 0;
  	}
 	bios_cpu_apicid[cpu] = m->mpc_apicid;
-	x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+	/*
+	 * We get called early in the the start_kernel initialization
+	 * process when the per_cpu data area is not yet setup, so we
+	 * use a static array that is removed after the per_cpu data
+	 * area is created.
+	 */
+	if (x86_cpu_to_apicid_ptr) {
+		u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr;
+		x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+	} else {
+		per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
+	}
 
 	cpu_set(cpu, cpu_possible_map);
 	cpu_set(cpu, cpu_present_map);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index e18e516cf54..ee6eba4ecfe 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -112,7 +112,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
 static int msr_open(struct inode *inode, struct file *file)
 {
 	unsigned int cpu = iminor(file->f_path.dentry->d_inode);
-	struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	if (cpu >= NR_CPUS || !cpu_online(cpu))
 		return -ENXIO;	/* No such CPU */
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index b2b42bdb0a1..afaf9f12c03 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -11,7 +11,7 @@
 #include <asm/iommu.h>
 #include <asm/calgary.h>
 
-int iommu_merge __read_mostly = 0;
+int iommu_merge __read_mostly = 1;
 EXPORT_SYMBOL(iommu_merge);
 
 dma_addr_t bad_dma_address __read_mostly;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 097aeafce5f..7b899584d29 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -295,34 +295,52 @@ static int __init idle_setup(char *str)
 }
 early_param("idle", idle_setup);
 
-void show_regs(struct pt_regs * regs)
+void __show_registers(struct pt_regs *regs, int all)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
 	unsigned long d0, d1, d2, d3, d6, d7;
+	unsigned long esp;
+	unsigned short ss, gs;
+
+	if (user_mode_vm(regs)) {
+		esp = regs->esp;
+		ss = regs->xss & 0xffff;
+		savesegment(gs, gs);
+	} else {
+		esp = (unsigned long) (&regs->esp);
+		savesegment(ss, ss);
+		savesegment(gs, gs);
+	}
 
 	printk("\n");
-	printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
-	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
+	printk("Pid: %d, comm: %s %s (%s %.*s)\n",
+			task_pid_nr(current), current->comm,
+			print_tainted(), init_utsname()->release,
+			(int)strcspn(init_utsname()->version, " "),
+			init_utsname()->version);
+
+	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+			0xffff & regs->xcs, regs->eip, regs->eflags,
+			smp_processor_id());
 	print_symbol("EIP is at %s\n", regs->eip);
 
-	if (user_mode_vm(regs))
-		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
-	printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
-	       regs->eflags, print_tainted(), init_utsname()->release,
-	       (int)strcspn(init_utsname()->version, " "),
-	       init_utsname()->version);
 	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-		regs->eax,regs->ebx,regs->ecx,regs->edx);
-	printk("ESI: %08lx EDI: %08lx EBP: %08lx",
-		regs->esi, regs->edi, regs->ebp);
-	printk(" DS: %04x ES: %04x FS: %04x\n",
-	       0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs);
+		regs->eax, regs->ebx, regs->ecx, regs->edx);
+	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+		regs->esi, regs->edi, regs->ebp, esp);
+	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+	       regs->xds & 0xffff, regs->xes & 0xffff,
+	       regs->xfs & 0xffff, gs, ss);
+
+	if (!all)
+		return;
 
 	cr0 = read_cr0();
 	cr2 = read_cr2();
 	cr3 = read_cr3();
 	cr4 = read_cr4_safe();
-	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+			cr0, cr2, cr3, cr4);
 
 	get_debugreg(d0, 0);
 	get_debugreg(d1, 1);
@@ -330,10 +348,16 @@ void show_regs(struct pt_regs * regs)
 	get_debugreg(d3, 3);
 	printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
 			d0, d1, d2, d3);
+
 	get_debugreg(d6, 6);
 	get_debugreg(d7, 7);
-	printk("DR6: %08lx DR7: %08lx\n", d6, d7);
+	printk("DR6: %08lx DR7: %08lx\n",
+			d6, d7);
+}
 
+void show_regs(struct pt_regs *regs)
+{
+	__show_registers(regs, 1);
 	show_trace(NULL, regs, &regs->esp);
 }
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index d769e204f94..a4ce1911efd 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -45,9 +45,12 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
 	if (!(config & 0x2))
 		pci_write_config_byte(dev, 0xf4, config);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7320_MCH,	quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7525_MCH,	quirk_intel_irqbalance);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_E7520_MCH,	quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH,
+			quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH,
+			quirk_intel_irqbalance);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH,
+			quirk_intel_irqbalance);
 #endif
 
 #if defined(CONFIG_HPET_TIMER)
@@ -56,7 +59,8 @@ unsigned long force_hpet_address;
 static enum {
 	NONE_FORCE_HPET_RESUME,
 	OLD_ICH_FORCE_HPET_RESUME,
-	ICH_FORCE_HPET_RESUME
+	ICH_FORCE_HPET_RESUME,
+	VT8237_FORCE_HPET_RESUME
 } force_hpet_resume_type;
 
 static void __iomem *rcba_base;
@@ -146,17 +150,17 @@ static void ich_force_enable_hpet(struct pci_dev *dev)
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1,
-                         ich_force_enable_hpet);
+			 ich_force_enable_hpet);
 
 
 static struct pci_dev *cached_dev;
@@ -232,10 +236,91 @@ static void old_ich_force_enable_hpet(struct pci_dev *dev)
 	printk(KERN_DEBUG "Failed to force enable HPET\n");
 }
 
+/*
+ * Undocumented chipset features. Make sure that the user enforced
+ * this.
+ */
+static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
+{
+	if (hpet_force_user)
+		old_ich_force_enable_hpet(dev);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
+			 old_ich_force_enable_hpet_user);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
+			 old_ich_force_enable_hpet_user);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0,
+			 old_ich_force_enable_hpet_user);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12,
+			 old_ich_force_enable_hpet_user);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0,
-                         old_ich_force_enable_hpet);
+			 old_ich_force_enable_hpet);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12,
-                         old_ich_force_enable_hpet);
+			 old_ich_force_enable_hpet);
+
+
+static void vt8237_force_hpet_resume(void)
+{
+	u32 val;
+
+	if (!force_hpet_address || !cached_dev)
+		return;
+
+	val = 0xfed00000 | 0x80;
+	pci_write_config_dword(cached_dev, 0x68, val);
+
+	pci_read_config_dword(cached_dev, 0x68, &val);
+	if (val & 0x80)
+		printk(KERN_DEBUG "Force enabled HPET at resume\n");
+	else
+		BUG();
+}
+
+static void vt8237_force_enable_hpet(struct pci_dev *dev)
+{
+	u32 uninitialized_var(val);
+
+	if (!hpet_force_user || hpet_address || force_hpet_address)
+		return;
+
+	pci_read_config_dword(dev, 0x68, &val);
+	/*
+	 * Bit 7 is HPET enable bit.
+	 * Bit 31:10 is HPET base address (contrary to what datasheet claims)
+	 */
+	if (val & 0x80) {
+		force_hpet_address = (val & ~0x3ff);
+		printk(KERN_DEBUG "HPET at base address 0x%lx\n",
+			       force_hpet_address);
+		return;
+	}
+
+	/*
+	 * HPET is disabled. Trying enabling at FED00000 and check
+	 * whether it sticks
+	 */
+	val = 0xfed00000 | 0x80;
+	pci_write_config_dword(dev, 0x68, val);
+
+	pci_read_config_dword(dev, 0x68, &val);
+	if (val & 0x80) {
+		force_hpet_address = (val & ~0x3ff);
+		printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n",
+			       force_hpet_address);
+		cached_dev = dev;
+		force_hpet_resume_type = VT8237_FORCE_HPET_RESUME;
+		return;
+	}
+
+	printk(KERN_DEBUG "Failed to force enable HPET\n");
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235,
+			 vt8237_force_enable_hpet);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
+			 vt8237_force_enable_hpet);
+
 
 void force_hpet_resume(void)
 {
@@ -246,6 +331,9 @@ void force_hpet_resume(void)
 	    case OLD_ICH_FORCE_HPET_RESUME:
 		return old_ich_force_hpet_resume();
 
+	    case VT8237_FORCE_HPET_RESUME:
+		return vt8237_force_hpet_resume();
+
 	    default:
 		break;
 	}
diff --git a/arch/x86/kernel/reboot_64.c b/arch/x86/kernel/reboot_64.c
index 368db2b9c5a..776eb06b651 100644
--- a/arch/x86/kernel/reboot_64.c
+++ b/arch/x86/kernel/reboot_64.c
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <asm/io.h>
 #include <asm/delay.h>
+#include <asm/desc.h>
 #include <asm/hw_irq.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
@@ -136,7 +137,7 @@ void machine_emergency_restart(void)
 		}
 
 		case BOOT_TRIPLE: 
-			__asm__ __volatile__("lidt (%0)": :"r" (&no_idt));
+			load_idt((const struct desc_ptr *)&no_idt);
 			__asm__ __volatile__("int3");
 
 			reboot_type = BOOT_KBD;
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index 8b30b26ad06..1a07bbea7be 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -12,6 +12,7 @@
 #include <linux/interrupt.h>
 #include <asm/reboot_fixups.h>
 #include <asm/msr.h>
+#include <asm/geode.h>
 
 static void cs5530a_warm_reset(struct pci_dev *dev)
 {
@@ -24,11 +25,8 @@ static void cs5530a_warm_reset(struct pci_dev *dev)
 
 static void cs5536_warm_reset(struct pci_dev *dev)
 {
-	/*
-	 * 6.6.2.12 Soft Reset (DIVIL_SOFT_RESET)
-	 * writing 1 to the LSB of this MSR causes a hard reset.
-	 */
-	wrmsrl(0x51400017, 1ULL);
+	/* writing 1 to the LSB of this MSR causes a hard reset */
+	wrmsrl(MSR_DIVIL_SOFT_RESET, 1ULL);
 	udelay(50); /* shouldn't get here but be safe and spin a while */
 }
 
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index ba918823505..3558ac78c92 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -185,6 +185,12 @@ void __cpuinit check_efer(void)
 unsigned long kernel_eflags;
 
 /*
+ * Copies of the original ist values from the tss are only accessed during
+ * debugging, no special alignment required.
+ */
+DEFINE_PER_CPU(struct orig_ist, orig_ist);
+
+/*
  * cpu_init() initializes state that is per-CPU. Some data is already
  * initialized (naturally) in the bootstrap process, such as the GDT
  * and IDT. We reload them nevertheless, this function acts as a
@@ -224,8 +230,8 @@ void __cpuinit cpu_init (void)
  		memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE);
 
 	cpu_gdt_descr[cpu].size = GDT_SIZE;
-	asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
-	asm volatile("lidt %0" :: "m" (idt_descr));
+	load_gdt((const struct desc_ptr *)&cpu_gdt_descr[cpu]);
+	load_idt((const struct desc_ptr *)&idt_descr);
 
 	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
 	syscall_init();
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index b87a6fd5ba4..e4f19912476 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -378,6 +378,49 @@ extern unsigned long __init setup_memory(void);
 extern void zone_sizes_init(void);
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
+static inline unsigned long long get_total_mem(void)
+{
+	unsigned long long total;
+
+	total = max_low_pfn - min_low_pfn;
+#ifdef CONFIG_HIGHMEM
+	total += highend_pfn - highstart_pfn;
+#endif
+
+	return total << PAGE_SHIFT;
+}
+
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long total_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	total_mem = get_total_mem();
+
+	ret = parse_crashkernel(boot_command_line, total_mem,
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size > 0) {
+		if (crash_base > 0) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(crash_size >> 20),
+					(unsigned long)(crash_base >> 20),
+					(unsigned long)(total_mem >> 20));
+			crashk_res.start = crash_base;
+			crashk_res.end   = crash_base + crash_size - 1;
+			reserve_bootmem(crash_base, crash_size);
+		} else
+			printk(KERN_INFO "crashkernel reservation failed - "
+					"you have to specify a base address\n");
+	}
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
 void __init setup_bootmem_allocator(void)
 {
 	unsigned long bootmap_size;
@@ -453,11 +496,7 @@ void __init setup_bootmem_allocator(void)
 		}
 	}
 #endif
-#ifdef CONFIG_KEXEC
-	if (crashk_res.start != crashk_res.end)
-		reserve_bootmem(crashk_res.start,
-			crashk_res.end - crashk_res.start + 1);
-#endif
+	reserve_crashkernel();
 }
 
 /*
@@ -622,9 +661,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 #ifdef CONFIG_PCI
-#ifdef CONFIG_X86_IO_APIC
-	check_acpi_pci();	/* Checks more than just ACPI actually */
-#endif
+	early_quirks();
 #endif
 
 #ifdef CONFIG_ACPI
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 5a19f0cc5b6..31322d42eaa 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -191,6 +191,37 @@ static inline void copy_edd(void)
 }
 #endif
 
+#ifdef CONFIG_KEXEC
+static void __init reserve_crashkernel(void)
+{
+	unsigned long long free_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+
+	ret = parse_crashkernel(boot_command_line, free_mem,
+			&crash_size, &crash_base);
+	if (ret == 0 && crash_size) {
+		if (crash_base > 0) {
+			printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+					"for crashkernel (System RAM: %ldMB)\n",
+					(unsigned long)(crash_size >> 20),
+					(unsigned long)(crash_base >> 20),
+					(unsigned long)(free_mem >> 20));
+			crashk_res.start = crash_base;
+			crashk_res.end   = crash_base + crash_size - 1;
+			reserve_bootmem(crash_base, crash_size);
+		} else
+			printk(KERN_INFO "crashkernel reservation failed - "
+					"you have to specify a base address\n");
+	}
+}
+#else
+static inline void __init reserve_crashkernel(void)
+{}
+#endif
+
 #define EBDA_ADDR_POINTER 0x40E
 
 unsigned __initdata ebda_addr;
@@ -271,6 +302,11 @@ void __init setup_arch(char **cmdline_p)
 
 	dmi_scan_machine();
 
+#ifdef CONFIG_SMP
+	/* setup to use the static apicid table during kernel startup */
+	x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init;
+#endif
+
 #ifdef CONFIG_ACPI
 	/*
 	 * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
@@ -357,13 +393,7 @@ void __init setup_arch(char **cmdline_p)
 		}
 	}
 #endif
-#ifdef CONFIG_KEXEC
-	if (crashk_res.start != crashk_res.end) {
-		reserve_bootmem_generic(crashk_res.start,
-			crashk_res.end - crashk_res.start + 1);
-	}
-#endif
-
+	reserve_crashkernel();
 	paging_init();
 
 #ifdef CONFIG_PCI
@@ -529,7 +559,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
  		   but in the same order as the HT nodeids.
  		   If that doesn't result in a usable node fall back to the
  		   path for the previous case.  */
- 		int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
+		int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
  		if (ht_nodeid >= 0 &&
  		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
  			node = apicid_to_node[ht_nodeid];
@@ -853,6 +883,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 
 #ifdef CONFIG_SMP
 	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+	c->cpu_index = 0;
 #endif
 }
 
@@ -959,6 +990,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	struct cpuinfo_x86 *c = v;
+	int cpu = 0;
 
 	/* 
 	 * These flag bits must match the definitions in <asm/cpufeature.h>.
@@ -1037,8 +1069,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 
 #ifdef CONFIG_SMP
-	if (!cpu_online(c-cpu_data))
+	if (!cpu_online(c->cpu_index))
 		return 0;
+	cpu = c->cpu_index;
 #endif
 
 	seq_printf(m,"processor\t: %u\n"
@@ -1046,7 +1079,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		     "cpu family\t: %d\n"
 		     "model\t\t: %d\n"
 		     "model name\t: %s\n",
-		     (unsigned)(c-cpu_data),
+		     (unsigned)cpu,
 		     c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
 		     c->x86,
 		     (int)c->x86_model,
@@ -1058,7 +1091,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "stepping\t: unknown\n");
 	
 	if (cpu_has(c,X86_FEATURE_TSC)) {
-		unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
+		unsigned int freq = cpufreq_quick_get((unsigned)cpu);
 		if (!freq)
 			freq = cpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
@@ -1071,7 +1104,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	
 #ifdef CONFIG_SMP
 	if (smp_num_siblings * c->x86_max_cores > 1) {
-		int cpu = c - cpu_data;
 		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
 		seq_printf(m, "siblings\t: %d\n",
 			       cpus_weight(per_cpu(cpu_core_map, cpu)));
@@ -1129,12 +1161,16 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+	if (*pos == 0)	/* just in case, cpu 0 is not the first */
+		*pos = first_cpu(cpu_possible_map);
+	if ((*pos) < NR_CPUS && cpu_possible(*pos))
+		return &cpu_data(*pos);
+	return NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	++*pos;
+	*pos = next_cpu(*pos, cpu_possible_map);
 	return c_start(m, pos);
 }
 
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 0d79df3c563..6dc394b8725 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -200,8 +200,8 @@ badframe:
 	if (show_unhandled_signals && printk_ratelimit())
 		printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
 		       " esp:%lx oeax:%lx\n",
-		    current->pid > 1 ? KERN_INFO : KERN_EMERG,
-		    current->comm, current->pid, frame, regs->eip,
+		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
+		    current->comm, task_pid_nr(current), frame, regs->eip,
 		    regs->esp, regs->orig_eax);
 
 	force_sig(SIGSEGV, current);
diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
index 791d9f8036a..2621ca3b2e4 100644
--- a/arch/x86/kernel/smp_32.c
+++ b/arch/x86/kernel/smp_32.c
@@ -610,7 +610,7 @@ static void stop_this_cpu (void * dummy)
 	 */
 	cpu_clear(smp_processor_id(), cpu_online_map);
 	disable_local_APIC();
-	if (cpu_data[smp_processor_id()].hlt_works_ok)
+	if (cpu_data(smp_processor_id()).hlt_works_ok)
 		for(;;) halt();
 	for (;;);
 }
@@ -676,7 +676,7 @@ static int convert_apicid_to_cpu(int apic_id)
 	int i;
 
 	for (i = 0; i < NR_CPUS; i++) {
-		if (x86_cpu_to_apicid[i] == apic_id)
+		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
 			return i;
 	}
 	return -1;
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c
index 5c2964727d1..03fa6ed559c 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/smp_64.c
@@ -322,17 +322,27 @@ void unlock_ipi_call_lock(void)
 }
 
 /*
- * this function sends a 'generic call function' IPI to one other CPU
- * in the system.
- *
- * cpu is a standard Linux logical CPU number.
+ * this function sends a 'generic call function' IPI to all other CPU
+ * of the system defined in the mask.
  */
-static void
-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-				int nonatomic, int wait)
+
+static int
+__smp_call_function_mask(cpumask_t mask,
+			 void (*func)(void *), void *info,
+			 int wait)
 {
 	struct call_data_struct data;
-	int cpus = 1;
+	cpumask_t allbutself;
+	int cpus;
+
+	allbutself = cpu_online_map;
+	cpu_clear(smp_processor_id(), allbutself);
+
+	cpus_and(mask, mask, allbutself);
+	cpus = cpus_weight(mask);
+
+	if (!cpus)
+		return 0;
 
 	data.func = func;
 	data.info = info;
@@ -343,19 +353,55 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 
 	call_data = &data;
 	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
+
+	/* Send a message to other CPUs */
+	if (cpus_equal(mask, allbutself))
+		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+	else
+		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
 
 	/* Wait for response */
 	while (atomic_read(&data.started) != cpus)
 		cpu_relax();
 
 	if (!wait)
-		return;
+		return 0;
 
 	while (atomic_read(&data.finished) != cpus)
 		cpu_relax();
+
+	return 0;
+}
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on.  Must not include the current cpu.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function_mask(cpumask_t mask,
+			   void (*func)(void *), void *info,
+			   int wait)
+{
+	int ret;
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	spin_lock(&call_lock);
+	ret = __smp_call_function_mask(mask, func, info, wait);
+	spin_unlock(&call_lock);
+	return ret;
 }
+EXPORT_SYMBOL(smp_call_function_mask);
 
 /*
  * smp_call_function_single - Run a function on a specific CPU
@@ -374,6 +420,7 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
 	int nonatomic, int wait)
 {
 	/* prevent preemption and reschedule on another processor */
+	int ret;
 	int me = get_cpu();
 
 	/* Can deadlock when called with interrupts disabled */
@@ -387,51 +434,14 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
 		return 0;
 	}
 
-	spin_lock(&call_lock);
-	__smp_call_function_single(cpu, func, info, nonatomic, wait);
-	spin_unlock(&call_lock);
+	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
+
 	put_cpu();
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
 /*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-static void __smp_call_function (void (*func) (void *info), void *info,
-				int nonatomic, int wait)
-{
-	struct call_data_struct data;
-	int cpus = num_online_cpus()-1;
-
-	if (!cpus)
-		return;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	wmb();
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (!wait)
-		return;
-
-	while (atomic_read(&data.finished) != cpus)
-		cpu_relax();
-}
-
-/*
  * smp_call_function - run a function on all other CPUs.
  * @func: The function to run. This must be fast and non-blocking.
  * @info: An arbitrary pointer to pass to the function.
@@ -449,10 +459,7 @@ static void __smp_call_function (void (*func) (void *info), void *info,
 int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
 			int wait)
 {
-	spin_lock(&call_lock);
-	__smp_call_function(func,info,nonatomic,wait);
-	spin_unlock(&call_lock);
-	return 0;
+	return smp_call_function_mask(cpu_online_map, func, info, wait);
 }
 EXPORT_SYMBOL(smp_call_function);
 
@@ -479,7 +486,7 @@ void smp_send_stop(void)
 	/* Don't deadlock on the call lock in panic */
 	nolock = !spin_trylock(&call_lock);
 	local_irq_save(flags);
-	__smp_call_function(stop_this_cpu, NULL, 0, 0);
+	__smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
 	if (!nolock)
 		spin_unlock(&call_lock);
 	disable_local_APIC();
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index be3faac0471..7b8fdfa169d 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -67,7 +67,7 @@ int smp_num_siblings = 1;
 EXPORT_SYMBOL(smp_num_siblings);
 
 /* Last level cache ID of each logical CPU */
-int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
+DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID;
 
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
@@ -89,12 +89,20 @@ EXPORT_SYMBOL(cpu_possible_map);
 static cpumask_t smp_commenced_mask;
 
 /* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
+DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
+EXPORT_PER_CPU_SYMBOL(cpu_info);
 
-u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
-			{ [0 ... NR_CPUS-1] = 0xff };
-EXPORT_SYMBOL(x86_cpu_to_apicid);
+/*
+ * The following static array is used during kernel startup
+ * and the x86_cpu_to_apicid_ptr contains the address of the
+ * array during this time.  Is it zeroed when the per_cpu
+ * data area is removed.
+ */
+u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
+			{ [0 ... NR_CPUS-1] = BAD_APICID };
+void *x86_cpu_to_apicid_ptr;
+DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
 
 u8 apicid_2_node[MAX_APICID];
 
@@ -150,9 +158,10 @@ void __init smp_alloc_memory(void)
 
 void __cpuinit smp_store_cpu_info(int id)
 {
-	struct cpuinfo_x86 *c = cpu_data + id;
+	struct cpuinfo_x86 *c = &cpu_data(id);
 
 	*c = boot_cpu_data;
+	c->cpu_index = id;
 	if (id!=0)
 		identify_secondary_cpu(c);
 	/*
@@ -294,7 +303,7 @@ static int cpucount;
 /* maps the cpu to the sched domain representing multi-core */
 cpumask_t cpu_coregroup_map(int cpu)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	/*
 	 * For perf, we return last level cache shared map.
 	 * And for power savings, we return cpu_core_map
@@ -311,41 +320,41 @@ static cpumask_t cpu_sibling_setup_map;
 void __cpuinit set_cpu_sibling_map(int cpu)
 {
 	int i;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	cpu_set(cpu, cpu_sibling_setup_map);
 
 	if (smp_num_siblings > 1) {
 		for_each_cpu_mask(i, cpu_sibling_setup_map) {
-			if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
-			    c[cpu].cpu_core_id == c[i].cpu_core_id) {
+			if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
+			    c->cpu_core_id == cpu_data(i).cpu_core_id) {
 				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
 				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
 				cpu_set(i, per_cpu(cpu_core_map, cpu));
 				cpu_set(cpu, per_cpu(cpu_core_map, i));
-				cpu_set(i, c[cpu].llc_shared_map);
-				cpu_set(cpu, c[i].llc_shared_map);
+				cpu_set(i, c->llc_shared_map);
+				cpu_set(cpu, cpu_data(i).llc_shared_map);
 			}
 		}
 	} else {
 		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
 	}
 
-	cpu_set(cpu, c[cpu].llc_shared_map);
+	cpu_set(cpu, c->llc_shared_map);
 
 	if (current_cpu_data.x86_max_cores == 1) {
 		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
-		c[cpu].booted_cores = 1;
+		c->booted_cores = 1;
 		return;
 	}
 
 	for_each_cpu_mask(i, cpu_sibling_setup_map) {
-		if (cpu_llc_id[cpu] != BAD_APICID &&
-		    cpu_llc_id[cpu] == cpu_llc_id[i]) {
-			cpu_set(i, c[cpu].llc_shared_map);
-			cpu_set(cpu, c[i].llc_shared_map);
+		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
+		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
+			cpu_set(i, c->llc_shared_map);
+			cpu_set(cpu, cpu_data(i).llc_shared_map);
 		}
-		if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
+		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
 			cpu_set(i, per_cpu(cpu_core_map, cpu));
 			cpu_set(cpu, per_cpu(cpu_core_map, i));
 			/*
@@ -357,15 +366,15 @@ void __cpuinit set_cpu_sibling_map(int cpu)
 				 * the booted_cores for this new cpu
 				 */
 				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
-					c[cpu].booted_cores++;
+					c->booted_cores++;
 				/*
 				 * increment the core count for all
 				 * the other cpus in this package
 				 */
 				if (i != cpu)
-					c[i].booted_cores++;
-			} else if (i != cpu && !c[cpu].booted_cores)
-				c[cpu].booted_cores = c[i].booted_cores;
+					cpu_data(i).booted_cores++;
+			} else if (i != cpu && !c->booted_cores)
+				c->booted_cores = cpu_data(i).booted_cores;
 		}
 	}
 }
@@ -804,7 +813,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 
 	irq_ctx_init(cpu);
 
-	x86_cpu_to_apicid[cpu] = apicid;
+	per_cpu(x86_cpu_to_apicid, cpu) = apicid;
 	/*
 	 * This grunge runs the startup process for
 	 * the targeted processor.
@@ -844,7 +853,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 			/* number CPUs logically, starting from 1 (BSP is 0) */
 			Dprintk("OK.\n");
 			printk("CPU%d: ", cpu);
-			print_cpu_info(&cpu_data[cpu]);
+			print_cpu_info(&cpu_data(cpu));
 			Dprintk("CPU has booted.\n");
 		} else {
 			boot_error= 1;
@@ -866,7 +875,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
 		cpucount--;
 	} else {
-		x86_cpu_to_apicid[cpu] = apicid;
+		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
 		cpu_set(cpu, cpu_present_map);
 	}
 
@@ -915,7 +924,7 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
 	struct warm_boot_cpu_info info;
 	int	apicid, ret;
 
-	apicid = x86_cpu_to_apicid[cpu];
+	apicid = per_cpu(x86_cpu_to_apicid, cpu);
 	if (apicid == BAD_APICID) {
 		ret = -ENODEV;
 		goto exit;
@@ -961,11 +970,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	 */
 	smp_store_cpu_info(0); /* Final full version of the data */
 	printk("CPU%d: ", 0);
-	print_cpu_info(&cpu_data[0]);
+	print_cpu_info(&cpu_data(0));
 
 	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
 	boot_cpu_logical_apicid = logical_smp_processor_id();
-	x86_cpu_to_apicid[0] = boot_cpu_physical_apicid;
+	per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid;
 
 	current_thread_info()->cpu = 0;
 
@@ -1008,6 +1017,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
 		smpboot_clear_io_apic_irqs();
 		phys_cpu_present_map = physid_mask_of_physid(0);
+		map_cpu_to_logical_apicid();
 		cpu_set(0, per_cpu(cpu_sibling_map, 0));
 		cpu_set(0, per_cpu(cpu_core_map, 0));
 		return;
@@ -1029,6 +1039,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 		}
 		smpboot_clear_io_apic_irqs();
 		phys_cpu_present_map = physid_mask_of_physid(0);
+		map_cpu_to_logical_apicid();
 		cpu_set(0, per_cpu(cpu_sibling_map, 0));
 		cpu_set(0, per_cpu(cpu_core_map, 0));
 		return;
@@ -1082,7 +1093,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	Dprintk("Before bogomips.\n");
 	for (cpu = 0; cpu < NR_CPUS; cpu++)
 		if (cpu_isset(cpu, cpu_callout_map))
-			bogosum += cpu_data[cpu].loops_per_jiffy;
+			bogosum += cpu_data(cpu).loops_per_jiffy;
 	printk(KERN_INFO
 		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 		cpucount+1,
@@ -1152,7 +1163,7 @@ void __init native_smp_prepare_boot_cpu(void)
 void remove_siblinginfo(int cpu)
 {
 	int sibling;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
 		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
@@ -1160,15 +1171,15 @@ void remove_siblinginfo(int cpu)
 		 * last thread sibling in this cpu core going down
 		 */
 		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
-			c[sibling].booted_cores--;
+			cpu_data(sibling).booted_cores--;
 	}
 			
 	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
 		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
 	cpus_clear(per_cpu(cpu_sibling_map, cpu));
 	cpus_clear(per_cpu(cpu_core_map, cpu));
-	c[cpu].phys_proc_id = 0;
-	c[cpu].cpu_core_id = 0;
+	c->phys_proc_id = 0;
+	c->cpu_core_id = 0;
 	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index e351ac4ab5b..fd1fff6a35a 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -65,7 +65,7 @@ int smp_num_siblings = 1;
 EXPORT_SYMBOL(smp_num_siblings);
 
 /* Last level cache ID of each logical CPU */
-u8 cpu_llc_id[NR_CPUS] __cpuinitdata  = {[0 ... NR_CPUS-1] = BAD_APICID};
+DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID;
 
 /* Bitmask of currently online CPUs */
 cpumask_t cpu_online_map __read_mostly;
@@ -84,8 +84,8 @@ cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 
 /* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
+DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
+EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 /* Set when the idlers are all forked */
 int smp_threads_ready;
@@ -138,9 +138,10 @@ static unsigned long __cpuinit setup_trampoline(void)
 
 static void __cpuinit smp_store_cpu_info(int id)
 {
-	struct cpuinfo_x86 *c = cpu_data + id;
+	struct cpuinfo_x86 *c = &cpu_data(id);
 
 	*c = boot_cpu_data;
+	c->cpu_index = id;
 	identify_cpu(c);
 	print_cpu_info(c);
 }
@@ -237,7 +238,7 @@ void __cpuinit smp_callin(void)
 /* maps the cpu to the sched domain representing multi-core */
 cpumask_t cpu_coregroup_map(int cpu)
 {
-	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	/*
 	 * For perf, we return last level cache shared map.
 	 * And for power savings, we return cpu_core_map
@@ -254,41 +255,41 @@ static cpumask_t cpu_sibling_setup_map;
 static inline void set_cpu_sibling_map(int cpu)
 {
 	int i;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	cpu_set(cpu, cpu_sibling_setup_map);
 
 	if (smp_num_siblings > 1) {
 		for_each_cpu_mask(i, cpu_sibling_setup_map) {
-			if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
-			    c[cpu].cpu_core_id == c[i].cpu_core_id) {
+			if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
+			    c->cpu_core_id == cpu_data(i).cpu_core_id) {
 				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
 				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
 				cpu_set(i, per_cpu(cpu_core_map, cpu));
 				cpu_set(cpu, per_cpu(cpu_core_map, i));
-				cpu_set(i, c[cpu].llc_shared_map);
-				cpu_set(cpu, c[i].llc_shared_map);
+				cpu_set(i, c->llc_shared_map);
+				cpu_set(cpu, cpu_data(i).llc_shared_map);
 			}
 		}
 	} else {
 		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
 	}
 
-	cpu_set(cpu, c[cpu].llc_shared_map);
+	cpu_set(cpu, c->llc_shared_map);
 
 	if (current_cpu_data.x86_max_cores == 1) {
 		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
-		c[cpu].booted_cores = 1;
+		c->booted_cores = 1;
 		return;
 	}
 
 	for_each_cpu_mask(i, cpu_sibling_setup_map) {
-		if (cpu_llc_id[cpu] != BAD_APICID &&
-		    cpu_llc_id[cpu] == cpu_llc_id[i]) {
-			cpu_set(i, c[cpu].llc_shared_map);
-			cpu_set(cpu, c[i].llc_shared_map);
+		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
+		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
+			cpu_set(i, c->llc_shared_map);
+			cpu_set(cpu, cpu_data(i).llc_shared_map);
 		}
-		if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
+		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
 			cpu_set(i, per_cpu(cpu_core_map, cpu));
 			cpu_set(cpu, per_cpu(cpu_core_map, i));
 			/*
@@ -300,15 +301,15 @@ static inline void set_cpu_sibling_map(int cpu)
 				 * the booted_cores for this new cpu
 				 */
 				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
-					c[cpu].booted_cores++;
+					c->booted_cores++;
 				/*
 				 * increment the core count for all
 				 * the other cpus in this package
 				 */
 				if (i != cpu)
-					c[i].booted_cores++;
-			} else if (i != cpu && !c[cpu].booted_cores)
-				c[cpu].booted_cores = c[i].booted_cores;
+					cpu_data(i).booted_cores++;
+			} else if (i != cpu && !c->booted_cores)
+				c->booted_cores = cpu_data(i).booted_cores;
 		}
 	}
 }
@@ -694,7 +695,7 @@ do_rest:
 		clear_node_cpumask(cpu); /* was set by numa_add_cpu */
 		cpu_clear(cpu, cpu_present_map);
 		cpu_clear(cpu, cpu_possible_map);
-		x86_cpu_to_apicid[cpu] = BAD_APICID;
+		per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
 		return -EIO;
 	}
 
@@ -841,6 +842,26 @@ static int __init smp_sanity_check(unsigned max_cpus)
 }
 
 /*
+ * Copy apicid's found by MP_processor_info from initial array to the per cpu
+ * data area.  The x86_cpu_to_apicid_init array is then expendable and the
+ * x86_cpu_to_apicid_ptr is zeroed indicating that the static array is no
+ * longer available.
+ */
+void __init smp_set_apicids(void)
+{
+	int cpu;
+
+	for_each_cpu_mask(cpu, cpu_possible_map) {
+		if (per_cpu_offset(cpu))
+			per_cpu(x86_cpu_to_apicid, cpu) =
+						x86_cpu_to_apicid_init[cpu];
+	}
+
+	/* indicate the static array will be going away soon */
+	x86_cpu_to_apicid_ptr = NULL;
+}
+
+/*
  * Prepare for SMP bootup.  The MP table or ACPI has been read
  * earlier.  Just do some sanity checking here and enable APIC mode.
  */
@@ -849,6 +870,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	nmi_watchdog_default();
 	current_cpu_data = boot_cpu_data;
 	current_thread_info()->cpu = 0;  /* needed? */
+	smp_set_apicids();
 	set_cpu_sibling_map(0);
 
 	if (smp_sanity_check(max_cpus) < 0) {
@@ -968,7 +990,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 static void remove_siblinginfo(int cpu)
 {
 	int sibling;
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
 		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
@@ -976,15 +998,15 @@ static void remove_siblinginfo(int cpu)
 		 * last thread sibling in this cpu core going down
 		 */
 		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
-			c[sibling].booted_cores--;
+			cpu_data(sibling).booted_cores--;
 	}
 			
 	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
 		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
 	cpus_clear(per_cpu(cpu_sibling_map, cpu));
 	cpus_clear(per_cpu(cpu_core_map, cpu));
-	c[cpu].phys_proc_id = 0;
-	c[cpu].cpu_core_id = 0;
+	c->phys_proc_id = 0;
+	c->cpu_core_id = 0;
 	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index f8fafe527ff..622bb026828 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -32,9 +32,9 @@ void __save_processor_state(struct saved_context *ctxt)
 	/*
 	 * descriptor tables
 	 */
-	asm volatile ("sgdt %0" : "=m" (ctxt->gdt_limit));
-	asm volatile ("sidt %0" : "=m" (ctxt->idt_limit));
-	asm volatile ("str %0"  : "=m" (ctxt->tr));
+	store_gdt((struct desc_ptr *)&ctxt->gdt_limit);
+	store_idt((struct desc_ptr *)&ctxt->idt_limit);
+	store_tr(ctxt->tr);
 
 	/* XMM0..XMM15 should be handled by kernel_fpu_begin(). */
 	/*
@@ -91,8 +91,9 @@ void __restore_processor_state(struct saved_context *ctxt)
 	 * now restore the descriptor tables to their proper values
 	 * ltr is done i fix_processor_context().
 	 */
-	asm volatile ("lgdt %0" :: "m" (ctxt->gdt_limit));
-	asm volatile ("lidt %0" :: "m" (ctxt->idt_limit));
+	load_gdt((const struct desc_ptr *)&ctxt->gdt_limit);
+	load_idt((const struct desc_ptr *)&ctxt->idt_limit);
+
 
 	/*
 	 * segment registers
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index b132d3957df..cc9acace7e2 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -63,6 +63,9 @@
 
 int panic_on_unrecovered_nmi;
 
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
+EXPORT_SYMBOL_GPL(used_vectors);
+
 asmlinkage int system_call(void);
 
 /* Do we ignore FPU interrupts ? */
@@ -288,48 +291,24 @@ EXPORT_SYMBOL(dump_stack);
 void show_registers(struct pt_regs *regs)
 {
 	int i;
-	int in_kernel = 1;
-	unsigned long esp;
-	unsigned short ss, gs;
-
-	esp = (unsigned long) (&regs->esp);
-	savesegment(ss, ss);
-	savesegment(gs, gs);
-	if (user_mode_vm(regs)) {
-		in_kernel = 0;
-		esp = regs->esp;
-		ss = regs->xss & 0xffff;
-	}
+
 	print_modules();
-	printk(KERN_EMERG "CPU:    %d\n"
-		KERN_EMERG "EIP:    %04x:[<%08lx>]    %s VLI\n"
-		KERN_EMERG "EFLAGS: %08lx   (%s %.*s)\n",
-		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
-		print_tainted(), regs->eflags, init_utsname()->release,
-		(int)strcspn(init_utsname()->version, " "),
-		init_utsname()->version);
-	print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
-	printk(KERN_EMERG "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
-		regs->eax, regs->ebx, regs->ecx, regs->edx);
-	printk(KERN_EMERG "esi: %08lx   edi: %08lx   ebp: %08lx   esp: %08lx\n",
-		regs->esi, regs->edi, regs->ebp, esp);
-	printk(KERN_EMERG "ds: %04x   es: %04x   fs: %04x  gs: %04x  ss: %04x\n",
-	       regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
+	__show_registers(regs, 0);
 	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
-		TASK_COMM_LEN, current->comm, current->pid,
+		TASK_COMM_LEN, current->comm, task_pid_nr(current),
 		current_thread_info(), current, task_thread_info(current));
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
 	 */
-	if (in_kernel) {
+	if (!user_mode_vm(regs)) {
 		u8 *eip;
 		unsigned int code_prologue = code_bytes * 43 / 64;
 		unsigned int code_len = code_bytes;
 		unsigned char c;
 
 		printk("\n" KERN_EMERG "Stack: ");
-		show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
+		show_stack_log_lvl(NULL, regs, &regs->esp, KERN_EMERG);
 
 		printk(KERN_EMERG "Code: ");
 
@@ -374,11 +353,11 @@ int is_valid_bugaddr(unsigned long eip)
 void die(const char * str, struct pt_regs * regs, long err)
 {
 	static struct {
-		spinlock_t lock;
+		raw_spinlock_t lock;
 		u32 lock_owner;
 		int lock_owner_depth;
 	} die = {
-		.lock =			__SPIN_LOCK_UNLOCKED(die.lock),
+		.lock =			__RAW_SPIN_LOCK_UNLOCKED,
 		.lock_owner =		-1,
 		.lock_owner_depth =	0
 	};
@@ -389,13 +368,14 @@ void die(const char * str, struct pt_regs * regs, long err)
 
 	if (die.lock_owner != raw_smp_processor_id()) {
 		console_verbose();
-		spin_lock_irqsave(&die.lock, flags);
+		__raw_spin_lock(&die.lock);
+		raw_local_save_flags(flags);
 		die.lock_owner = smp_processor_id();
 		die.lock_owner_depth = 0;
 		bust_spinlocks(1);
 	}
 	else
-		local_save_flags(flags);
+		raw_local_save_flags(flags);
 
 	if (++die.lock_owner_depth < 3) {
 		unsigned long esp;
@@ -439,7 +419,8 @@ void die(const char * str, struct pt_regs * regs, long err)
 	bust_spinlocks(0);
 	die.lock_owner = -1;
 	add_taint(TAINT_DIE);
-	spin_unlock_irqrestore(&die.lock, flags);
+	__raw_spin_unlock(&die.lock);
+	raw_local_irq_restore(flags);
 
 	if (!regs)
 		return;
@@ -622,7 +603,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
 	    printk_ratelimit())
 		printk(KERN_INFO
 		    "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
-		    current->comm, current->pid,
+		    current->comm, task_pid_nr(current),
 		    regs->eip, regs->esp, error_code);
 
 	force_sig(SIGSEGV, current);
@@ -1142,6 +1123,8 @@ static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
 
 void __init trap_init(void)
 {
+	int i;
+
 #ifdef CONFIG_EISA
 	void __iomem *p = ioremap(0x0FFFD9, 4);
 	if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
@@ -1201,6 +1184,11 @@ void __init trap_init(void)
 
 	set_system_gate(SYSCALL_VECTOR,&system_call);
 
+	/* Reserve all the builtin and the syscall vector. */
+	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
+		set_bit(i, used_vectors);
+	set_bit(SYSCALL_VECTOR, used_vectors);
+
 	/*
 	 * Should be a barrier for any external CPU state.
 	 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index b4a9b3db199..df690c3fa45 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -462,7 +462,7 @@ void out_of_line_bug(void)
 EXPORT_SYMBOL(out_of_line_bug);
 #endif
 
-static DEFINE_SPINLOCK(die_lock);
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
 static int die_owner = -1;
 static unsigned int die_nest_count;
 
@@ -474,13 +474,13 @@ unsigned __kprobes long oops_begin(void)
 	oops_enter();
 
 	/* racy, but better than risking deadlock. */
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	cpu = smp_processor_id();
-	if (!spin_trylock(&die_lock)) { 
+	if (!__raw_spin_trylock(&die_lock)) {
 		if (cpu == die_owner) 
 			/* nested oops. should stop eventually */;
 		else
-			spin_lock(&die_lock);
+			__raw_spin_lock(&die_lock);
 	}
 	die_nest_count++;
 	die_owner = cpu;
@@ -494,12 +494,10 @@ void __kprobes oops_end(unsigned long flags)
 	die_owner = -1;
 	bust_spinlocks(0);
 	die_nest_count--;
-	if (die_nest_count)
-		/* We still own the lock */
-		local_irq_restore(flags);
-	else
+	if (!die_nest_count)
 		/* Nest count reaches zero, release the lock. */
-		spin_unlock_irqrestore(&die_lock, flags);
+		__raw_spin_unlock(&die_lock);
+	raw_local_irq_restore(flags);
 	if (panic_on_oops)
 		panic("Fatal exception");
 	oops_exit();
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index e87a3939ed4..b8a7cf67143 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -181,8 +181,8 @@ int recalibrate_cpu_khz(void)
 	if (cpu_has_tsc) {
 		cpu_khz = calculate_cpu_khz();
 		tsc_khz = cpu_khz;
-		cpu_data[0].loops_per_jiffy =
-			cpufreq_scale(cpu_data[0].loops_per_jiffy,
+		cpu_data(0).loops_per_jiffy =
+			cpufreq_scale(cpu_data(0).loops_per_jiffy,
 					cpu_khz_old, cpu_khz);
 		return 0;
 	} else
@@ -215,7 +215,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
 			return 0;
 		}
 		ref_freq = freq->old;
-		loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
+		loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy;
 		cpu_khz_ref = cpu_khz;
 	}
 
@@ -223,7 +223,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
 	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
 	    (val == CPUFREQ_RESUMECHANGE)) {
 		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-			cpu_data[freq->cpu].loops_per_jiffy =
+			cpu_data(freq->cpu).loops_per_jiffy =
 				cpufreq_scale(loops_per_jiffy_ref,
 						ref_freq, freq->new);
 
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 9f22e542c37..9c70af45b42 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -73,13 +73,13 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	struct cpufreq_freqs *freq = data;
 	unsigned long *lpj, dummy;
 
-	if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
+	if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
 		return 0;
 
 	lpj = &dummy;
 	if (!(freq->flags & CPUFREQ_CONST_LOOPS))
 #ifdef CONFIG_SMP
-		lpj = &cpu_data[freq->cpu].loops_per_jiffy;
+		lpj = &cpu_data(freq->cpu).loops_per_jiffy;
 #else
 		lpj = &boot_cpu_data.loops_per_jiffy;
 #endif
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 585541ca1a7..78f2250963a 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -48,7 +48,7 @@
 	({unsigned long v;  		\
 	extern char __vsyscall_0; 	\
 	  asm("" : "=r" (v) : "0" (x)); \
-	  ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
+	  ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); })
 
 /*
  * vsyscall_gtod_data contains data that is :
@@ -291,7 +291,7 @@ static void __cpuinit vsyscall_set_cpu(int cpu)
 #ifdef CONFIG_NUMA
 	node = cpu_to_node(cpu);
 #endif
-	if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
+	if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
 		write_rdtscp_aux((node << 12) | cpu);
 
 	/* Store cpu number in limit so that it can be loaded quickly
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c
index f6edb11364d..952e7a89c2a 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay_32.c
@@ -82,7 +82,7 @@ inline void __const_udelay(unsigned long xloops)
 	__asm__("mull %0"
 		:"=d" (xloops), "=&a" (d0)
 		:"1" (xloops), "0"
-		(cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
+		(cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));
 
 	__delay(++xloops);
 }
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
index 2dbebd30834..0ebbfb9e7c7 100644
--- a/arch/x86/lib/delay_64.c
+++ b/arch/x86/lib/delay_64.c
@@ -40,7 +40,8 @@ EXPORT_SYMBOL(__delay);
 
 inline void __const_udelay(unsigned long xloops)
 {
-	__delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1);
+	__delay(((xloops * HZ *
+		cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1);
 }
 EXPORT_SYMBOL(__const_udelay);
 
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9f38b12b4af..8bab2b2efaf 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -748,7 +748,7 @@ survive:
 			retval = get_user_pages(current, current->mm,
 					(unsigned long )to, 1, 1, 0, &pg, NULL);
 
-			if (retval == -ENOMEM && is_init(current)) {
+			if (retval == -ENOMEM && is_global_init(current)) {
 				up_read(&current->mm->mmap_sem);
 				congestion_wait(WRITE, HZ/50);
 				goto survive;
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index e4928aa6bdf..f93a730b44d 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -36,8 +36,8 @@ static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned = { [0 ... NR
 
 /* per CPU data structure (for /proc/cpuinfo et al), visible externally
  * indexed physically */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
-EXPORT_SYMBOL(cpu_data);
+DEFINE_PER_CPU(cpuinfo_x86, cpu_info) __cacheline_aligned;
+EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 /* physical ID of the CPU used to boot the system */
 unsigned char boot_cpu_id;
@@ -430,7 +430,7 @@ find_smp_config(void)
 void __init
 smp_store_cpu_info(int id)
 {
-	struct cpuinfo_x86 *c=&cpu_data[id];
+	struct cpuinfo_x86 *c = &cpu_data(id);
 
 	*c = boot_cpu_data;
 
@@ -634,7 +634,7 @@ do_boot_cpu(__u8 cpu)
 			cpu, smp_processor_id()));
 	
 		printk("CPU%d: ", cpu);
-		print_cpu_info(&cpu_data[cpu]);
+		print_cpu_info(&cpu_data(cpu));
 		wmb();
 		cpu_set(cpu, cpu_callout_map);
 		cpu_set(cpu, cpu_present_map);
@@ -683,7 +683,7 @@ smp_boot_cpus(void)
 	 */
 	smp_store_cpu_info(boot_cpu_id);
 	printk("CPU%d: ", boot_cpu_id);
-	print_cpu_info(&cpu_data[boot_cpu_id]);
+	print_cpu_info(&cpu_data(boot_cpu_id));
 
 	if(is_cpu_quad()) {
 		/* booting on a Quad CPU */
@@ -714,7 +714,7 @@ smp_boot_cpus(void)
 		unsigned long bogosum = 0;
 		for (i = 0; i < NR_CPUS; i++)
 			if (cpu_isset(i, cpu_online_map))
-				bogosum += cpu_data[i].loops_per_jiffy;
+				bogosum += cpu_data(i).loops_per_jiffy;
 		printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 			cpucount+1,
 			bogosum/(500000/HZ),
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index 6555c3d1437..b695d70e998 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -471,8 +471,8 @@ bad_area_nosemaphore:
 		    printk_ratelimit()) {
 			printk("%s%s[%d]: segfault at %08lx eip %08lx "
 			    "esp %08lx error %lx\n",
-			    tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
-			    tsk->comm, tsk->pid, address, regs->eip,
+			    task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+			    tsk->comm, task_pid_nr(tsk), address, regs->eip,
 			    regs->esp, error_code);
 		}
 		tsk->thread.cr2 = address;
@@ -564,7 +564,8 @@ no_context:
 		 * it's allocated already.
 		 */
 		if ((page >> PAGE_SHIFT) < max_low_pfn
-		    && (page & _PAGE_PRESENT)) {
+		    && (page & _PAGE_PRESENT)
+		    && !(page & _PAGE_PSE)) {
 			page &= PAGE_MASK;
 			page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
 			                                         & (PTRS_PER_PTE - 1)];
@@ -587,7 +588,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(tsk)) {
+	if (is_global_init(tsk)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 5e0e54906c4..00be7f0a71b 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -169,7 +169,7 @@ void dump_pagetable(unsigned long address)
 	pmd = pmd_offset(pud, address);
 	if (bad_address(pmd)) goto bad;
 	printk("PMD %lx ", pmd_val(*pmd));
-	if (!pmd_present(*pmd))	goto ret;	 
+	if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
 
 	pte = pte_offset_kernel(pmd, address);
 	if (bad_address(pte)) goto bad;
@@ -285,7 +285,6 @@ static int vmalloc_fault(unsigned long address)
 	return 0;
 }
 
-static int page_fault_trace;
 int show_unhandled_signals = 1;
 
 /*
@@ -354,10 +353,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if (likely(regs->eflags & X86_EFLAGS_IF))
 		local_irq_enable();
 
-	if (unlikely(page_fault_trace))
-		printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
-		       regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); 
-
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(address, regs, error_code);
 
@@ -488,7 +483,7 @@ bad_area_nosemaphore:
 		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
 		    printk_ratelimit()) {
 			printk(
-		       "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
+		       "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n",
 					tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
 					tsk->comm, tsk->pid, address, regs->rip,
 					regs->rsp, error_code);
@@ -554,7 +549,7 @@ no_context:
  */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		goto again;
 	}
@@ -621,10 +616,3 @@ void vmalloc_sync_all(void)
 	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 
 				(__START_KERNEL & PGDIR_MASK)));
 }
-
-static int __init enable_pagefaulttrace(char *str)
-{
-	page_fault_trace = 1;
-	return 1;
-}
-__setup("pagefaulttrace", enable_pagefaulttrace);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 5eec5e56d07..3d6926ba899 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -612,7 +612,7 @@ void __init init_cpu_to_node(void)
 {
 	int i;
  	for (i = 0; i < NR_CPUS; i++) {
-		u8 apicid = x86_cpu_to_apicid[i];
+		u8 apicid = x86_cpu_to_apicid_init[i];
 		if (apicid == BAD_APICID)
 			continue;
 		if (apicid_to_node[apicid] == NUMA_NO_NODE)
diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c
index 8a4f65bf956..c7b7dfe1d40 100644
--- a/arch/x86/mm/pageattr_64.c
+++ b/arch/x86/mm/pageattr_64.c
@@ -230,9 +230,14 @@ void global_flush_tlb(void)
 	struct page *pg, *next;
 	struct list_head l;
 
-	down_read(&init_mm.mmap_sem);
+	/*
+	 * Write-protect the semaphore, to exclude two contexts
+	 * doing a list_replace_init() call in parallel and to
+	 * exclude new additions to the deferred_pages list:
+	 */
+	down_write(&init_mm.mmap_sem);
 	list_replace_init(&deferred_pages, &l);
-	up_read(&init_mm.mmap_sem);
+	up_write(&init_mm.mmap_sem);
 
 	flush_map(&l);
 
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index c049ce414f0..0ed046a187f 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -13,25 +13,45 @@
 #include <linux/mm.h>
 #include <asm/ptrace.h>
 #include <asm/uaccess.h>
+#include <asm/stacktrace.h>
 
-struct frame_head {
-	struct frame_head * ebp;
-	unsigned long ret;
-} __attribute__((packed));
+static void backtrace_warning_symbol(void *data, char *msg,
+				     unsigned long symbol)
+{
+	/* Ignore warnings */
+}
 
-static struct frame_head *
-dump_kernel_backtrace(struct frame_head * head)
+static void backtrace_warning(void *data, char *msg)
 {
-	oprofile_add_trace(head->ret);
+	/* Ignore warnings */
+}
 
-	/* frame pointers should strictly progress back up the stack
-	 * (towards higher addresses) */
-	if (head >= head->ebp)
-		return NULL;
+static int backtrace_stack(void *data, char *name)
+{
+	/* Yes, we want all stacks */
+	return 0;
+}
+
+static void backtrace_address(void *data, unsigned long addr)
+{
+	unsigned int *depth = data;
 
-	return head->ebp;
+	if ((*depth)--)
+		oprofile_add_trace(addr);
 }
 
+static struct stacktrace_ops backtrace_ops = {
+	.warning = backtrace_warning,
+	.warning_symbol = backtrace_warning_symbol,
+	.stack = backtrace_stack,
+	.address = backtrace_address,
+};
+
+struct frame_head {
+	struct frame_head *ebp;
+	unsigned long ret;
+} __attribute__((packed));
+
 static struct frame_head *
 dump_user_backtrace(struct frame_head * head)
 {
@@ -53,72 +73,16 @@ dump_user_backtrace(struct frame_head * head)
 	return bufhead[0].ebp;
 }
 
-/*
- * |             | /\ Higher addresses
- * |             |
- * --------------- stack base (address of current_thread_info)
- * | thread info |
- * .             .
- * |    stack    |
- * --------------- saved regs->ebp value if valid (frame_head address)
- * .             .
- * --------------- saved regs->rsp value if x86_64
- * |             |
- * --------------- struct pt_regs * stored on stack if 32-bit
- * |             |
- * .             .
- * |             |
- * --------------- %esp
- * |             |
- * |             | \/ Lower addresses
- *
- * Thus, regs (or regs->rsp for x86_64) <-> stack base restricts the
- * valid(ish) ebp values. Note: (1) for x86_64, NMI and several other
- * exceptions use special stacks, maintained by the interrupt stack table
- * (IST). These stacks are set up in trap_init() in
- * arch/x86_64/kernel/traps.c. Thus, for x86_64, regs now does not point
- * to the kernel stack; instead, it points to some location on the NMI
- * stack. On the other hand, regs->rsp is the stack pointer saved when the
- * NMI occurred. (2) For 32-bit, regs->esp is not valid because the
- * processor does not save %esp on the kernel stack when interrupts occur
- * in the kernel mode.
- */
-#ifdef CONFIG_FRAME_POINTER
-static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
-{
-	unsigned long headaddr = (unsigned long)head;
-#ifdef CONFIG_X86_64
-	unsigned long stack = (unsigned long)regs->rsp;
-#else
-	unsigned long stack = (unsigned long)regs;
-#endif
-	unsigned long stack_base = (stack & ~(THREAD_SIZE - 1)) + THREAD_SIZE;
-
-	return headaddr > stack && headaddr < stack_base;
-}
-#else
-/* without fp, it's just junk */
-static int valid_kernel_stack(struct frame_head * head, struct pt_regs * regs)
-{
-	return 0;
-}
-#endif
-
-
 void
 x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
-	struct frame_head *head;
-
-#ifdef CONFIG_X86_64
-	head = (struct frame_head *)regs->rbp;
-#else
-	head = (struct frame_head *)regs->ebp;
-#endif
+	struct frame_head *head = (struct frame_head *)frame_pointer(regs);
+	unsigned long stack = stack_pointer(regs);
 
 	if (!user_mode_vm(regs)) {
-		while (depth-- && valid_kernel_stack(head, regs))
-			head = dump_kernel_backtrace(head);
+		if (depth)
+			dump_trace(NULL, regs, (unsigned long *)stack,
+				   &backtrace_ops, &depth);
 		return;
 	}
 
diff --git a/arch/x86_64/.gitignore b/arch/x86_64/.gitignore
new file mode 100644
index 00000000000..36ef4c374d2
--- /dev/null
+++ b/arch/x86_64/.gitignore
@@ -0,0 +1 @@
+boot
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 78cb68f2ebb..aab25f3ba3c 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -723,7 +723,9 @@ config ARCH_HIBERNATION_HEADER
 
 source "drivers/acpi/Kconfig"
 
-source "arch/x86/kernel/cpufreq/Kconfig"
+source "arch/x86/kernel/cpu/cpufreq/Kconfig_64"
+
+source "drivers/cpuidle/Kconfig"
 
 endmenu
 
@@ -766,9 +768,9 @@ source "fs/Kconfig.binfmt"
 config IA32_EMULATION
 	bool "IA32 Emulation"
 	help
-	  Include code to run 32-bit programs under a 64-bit kernel. You should likely
-	  turn this on, unless you're 100% sure that you don't have any 32-bit programs
-	  left.
+	  Include code to run 32-bit programs under a 64-bit kernel. You should
+	  likely turn this on, unless you're 100% sure that you don't have any
+	  32-bit programs left.
 
 config IA32_AOUT
        tristate "IA32 a.out support"
@@ -799,21 +801,6 @@ source "drivers/firmware/Kconfig"
 
 source fs/Kconfig
 
-menu "Instrumentation Support"
-
-source "arch/x86/oprofile/Kconfig"
-
-config KPROBES
-	bool "Kprobes"
-	depends on KALLSYMS && MODULES
-	help
-	  Kprobes allows you to trap at almost any kernel address and
-	  execute a callback function.  register_kprobe() establishes
-	  a probepoint and specifies the callback.  Kprobes is useful
-	  for kernel debugging, non-intrusive instrumentation and testing.
-	  If in doubt, say "N".
-endmenu
-
 source "arch/x86_64/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 03e1ede27b8..6d89ab762ff 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -74,7 +74,7 @@ KBUILD_CFLAGS += $(cflags-y)
 CFLAGS_KERNEL += $(cflags-kernel-y)
 KBUILD_AFLAGS += -m64
 
-head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task_64.o
+head-y := arch/x86/kernel/head_64.o arch/x86/kernel/head64.o arch/x86/kernel/init_task.o
 
 libs-y 					+= arch/x86/lib/
 core-y					+= arch/x86/kernel/ \
@@ -97,9 +97,9 @@ BOOTIMAGE                     := arch/x86/boot/bzImage
 KBUILD_IMAGE                  := $(BOOTIMAGE)
 
 bzImage: vmlinux
-	$(Q)mkdir -p $(objtree)/arch/x86_64/boot
-	$(Q)ln -fsn $(objtree)/arch/x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage
 	$(Q)$(MAKE) $(build)=$(boot) $(BOOTIMAGE)
+	$(Q)mkdir -p $(objtree)/arch/x86_64/boot
+	$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/x86_64/boot/bzImage
 
 bzlilo: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zlilo
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 7fbb44bea37..85ffbb49149 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -251,6 +251,8 @@ config EMBEDDED_RAMDISK_IMAGE
 	  provide one yourself.
 endmenu
 
+source "kernel/Kconfig.instrumentation"
+
 source "arch/xtensa/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 8be99c777d9..397bcd6ad08 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -176,7 +176,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause)
 	printk("Caught unhandled exception in '%s' "
 	       "(pid = %d, pc = %#010lx) - should not happen\n"
 	       "\tEXCCAUSE is %ld\n",
-	       current->comm, current->pid, regs->pc, exccause);
+	       current->comm, task_pid_nr(current), regs->pc, exccause);
 	force_sig(SIGILL, current);
 }
 
@@ -228,7 +228,7 @@ do_illegal_instruction(struct pt_regs *regs)
 	/* If in user mode, send SIGILL signal to current process. */
 
 	printk("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n",
-	    current->comm, current->pid, regs->pc);
+	    current->comm, task_pid_nr(current), regs->pc);
 	force_sig(SIGILL, current);
 }
 
@@ -254,7 +254,7 @@ do_unaligned_user (struct pt_regs *regs)
 	current->thread.error_code = -3;
 	printk("Unaligned memory access to %08lx in '%s' "
 	       "(pid = %d, pc = %#010lx)\n",
-	       regs->excvaddr, current->comm, current->pid, regs->pc);
+	       regs->excvaddr, current->comm, task_pid_nr(current), regs->pc);
 	info.si_signo = SIGBUS;
 	info.si_errno = 0;
 	info.si_code = BUS_ADRALN;
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 2f842859948..33f366be323 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -145,7 +145,7 @@ bad_area:
 	 */
 out_of_memory:
 	up_read(&mm->mmap_sem);
-	if (is_init(current)) {
+	if (is_global_init(current)) {
 		yield();
 		down_read(&mm->mmap_sem);
 		goto survive;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 3935469e366..8025d646ab3 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3367,7 +3367,7 @@ void submit_bio(int rw, struct bio *bio)
 		if (unlikely(block_dump)) {
 			char b[BDEVNAME_SIZE];
 			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
-				current->comm, current->pid,
+			current->comm, task_pid_nr(current),
 				(rw & WRITE) ? "WRITE" : "READ",
 				(unsigned long long)bio->bi_sector,
 				bdevname(bio->bi_bdev,b));
@@ -3739,7 +3739,7 @@ EXPORT_SYMBOL(end_dequeued_request);
 
 /**
  * end_request - end I/O on the current segment of the request
- * @rq:		the request being processed
+ * @req:	the request being processed
  * @uptodate:	error value or 0/1 uptodate flag
  *
  * Description:
diff --git a/drivers/Makefile b/drivers/Makefile
index d2dc01cc73e..cfe38ffff28 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_MCA)		+= mca/
 obj-$(CONFIG_EISA)		+= eisa/
 obj-$(CONFIG_LGUEST_GUEST)	+= lguest/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
+obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
 obj-$(CONFIG_MMC)		+= mmc/
 obj-$(CONFIG_NEW_LEDS)		+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 4875f0149eb..b83389145f2 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -88,7 +88,7 @@ config ACPI_PROC_EVENT
 
 config ACPI_AC
 	tristate "AC Adapter"
-	depends on X86
+	depends on X86 && POWER_SUPPLY
 	default y
 	help
 	  This driver adds support for the AC Adapter object, which indicates
@@ -97,7 +97,7 @@ config ACPI_AC
 
 config ACPI_BATTERY
 	tristate "Battery"
-	depends on X86
+	depends on X86 && POWER_SUPPLY
 	default y
 	help
 	  This driver adds support for battery information through
@@ -117,6 +117,7 @@ config ACPI_BUTTON
 config ACPI_VIDEO
 	tristate "Video"
 	depends on X86 && BACKLIGHT_CLASS_DEVICE && VIDEO_OUTPUT_CONTROL
+	depends on INPUT
 	help
 	  This driver implement the ACPI Extensions For Display Adapters
 	  for integrated graphics devices on motherboard, as specified in
@@ -349,12 +350,11 @@ config ACPI_HOTPLUG_MEMORY
 		$>modprobe acpi_memhotplug 
 
 config ACPI_SBS
-	tristate "Smart Battery System (EXPERIMENTAL)"
+	tristate "Smart Battery System"
 	depends on X86
-	depends on EXPERIMENTAL
+	depends on POWER_SUPPLY
 	help
-	  This driver adds support for the Smart Battery System.
-	  A "Smart Battery" is quite old and quite rare compared
-	  to today's ACPI "Control Method" battery.
+	  This driver adds support for the Smart Battery System, another
+	  type of access to battery information, found on some laptops.
 
 endif	# ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index d4336f1730e..54e3ab0e5fc 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -60,3 +60,4 @@ obj-$(CONFIG_ACPI_TOSHIBA)	+= toshiba_acpi.o
 obj-$(CONFIG_ACPI_HOTPLUG_MEMORY)	+= acpi_memhotplug.o
 obj-y				+= cm_sbs.o
 obj-$(CONFIG_ACPI_SBS)		+= sbs.o
+obj-$(CONFIG_ACPI_SBS)		+= sbshc.o
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 26d70702b31..e03de37a750 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -29,6 +29,7 @@
 #include <linux/types.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/power_supply.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
@@ -72,16 +73,37 @@ static struct acpi_driver acpi_ac_driver = {
 };
 
 struct acpi_ac {
+	struct power_supply charger;
 	struct acpi_device * device;
 	unsigned long state;
 };
 
+#define to_acpi_ac(x) container_of(x, struct acpi_ac, charger);
+
 static const struct file_operations acpi_ac_fops = {
 	.open = acpi_ac_open_fs,
 	.read = seq_read,
 	.llseek = seq_lseek,
 	.release = single_release,
 };
+static int get_ac_property(struct power_supply *psy,
+			   enum power_supply_property psp,
+			   union power_supply_propval *val)
+{
+	struct acpi_ac *ac = to_acpi_ac(psy);
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = ac->state;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static enum power_supply_property ac_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+};
 
 /* --------------------------------------------------------------------------
                                AC Adapter Management
@@ -208,6 +230,7 @@ static void acpi_ac_notify(acpi_handle handle, u32 event, void *data)
 		acpi_bus_generate_netlink_event(device->pnp.device_class,
 						  device->dev.bus_id, event,
 						  (u32) ac->state);
+		kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE);
 		break;
 	default:
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
@@ -244,7 +267,12 @@ static int acpi_ac_add(struct acpi_device *device)
 	result = acpi_ac_add_fs(device);
 	if (result)
 		goto end;
-
+	ac->charger.name = acpi_device_bid(device);
+	ac->charger.type = POWER_SUPPLY_TYPE_MAINS;
+	ac->charger.properties = ac_props;
+	ac->charger.num_properties = ARRAY_SIZE(ac_props);
+	ac->charger.get_property = get_ac_property;
+	power_supply_register(&ac->device->dev, &ac->charger);
 	status = acpi_install_notify_handler(device->handle,
 					     ACPI_ALL_NOTIFY, acpi_ac_notify,
 					     ac);
@@ -279,7 +307,8 @@ static int acpi_ac_remove(struct acpi_device *device, int type)
 
 	status = acpi_remove_notify_handler(device->handle,
 					    ACPI_ALL_NOTIFY, acpi_ac_notify);
-
+	if (ac->charger.dev)
+		power_supply_unregister(&ac->charger);
 	acpi_ac_remove_fs(device);
 
 	kfree(ac);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 9b2c0f74f86..681e26b56b1 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -1,6 +1,8 @@
 /*
- *  acpi_battery.c - ACPI Battery Driver ($Revision: 37 $)
+ *  battery.c - ACPI Battery Driver (Revision: 2.0)
  *
+ *  Copyright (C) 2007 Alexey Starikovskiy <astarikovskiy@suse.de>
+ *  Copyright (C) 2004-2007 Vladimir Lebedev <vladimir.p.lebedev@intel.com>
  *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
  *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *
@@ -27,244 +29,288 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/jiffies.h>
+
+#ifdef CONFIG_ACPI_PROCFS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <asm/uaccess.h>
+#endif
 
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
-#define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF
+#include <linux/power_supply.h>
 
-#define ACPI_BATTERY_FORMAT_BIF	"NNNNNNNNNSSSS"
-#define ACPI_BATTERY_FORMAT_BST	"NNNN"
+#define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF
 
 #define ACPI_BATTERY_COMPONENT		0x00040000
 #define ACPI_BATTERY_CLASS		"battery"
 #define ACPI_BATTERY_DEVICE_NAME	"Battery"
 #define ACPI_BATTERY_NOTIFY_STATUS	0x80
 #define ACPI_BATTERY_NOTIFY_INFO	0x81
-#define ACPI_BATTERY_UNITS_WATTS	"mW"
-#define ACPI_BATTERY_UNITS_AMPS		"mA"
 
 #define _COMPONENT		ACPI_BATTERY_COMPONENT
 
-#define ACPI_BATTERY_UPDATE_TIME	0
-
-#define ACPI_BATTERY_NONE_UPDATE	0
-#define ACPI_BATTERY_EASY_UPDATE	1
-#define ACPI_BATTERY_INIT_UPDATE	2
-
 ACPI_MODULE_NAME("battery");
 
 MODULE_AUTHOR("Paul Diefenbaugh");
+MODULE_AUTHOR("Alexey Starikovskiy <astarikovskiy@suse.de>");
 MODULE_DESCRIPTION("ACPI Battery Driver");
 MODULE_LICENSE("GPL");
 
-static unsigned int update_time = ACPI_BATTERY_UPDATE_TIME;
-
-/* 0 - every time, > 0 - by update_time */
-module_param(update_time, uint, 0644);
+static unsigned int cache_time = 1000;
+module_param(cache_time, uint, 0644);
+MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
 
+#ifdef CONFIG_ACPI_PROCFS
 extern struct proc_dir_entry *acpi_lock_battery_dir(void);
 extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
 
-static int acpi_battery_add(struct acpi_device *device);
-static int acpi_battery_remove(struct acpi_device *device, int type);
-static int acpi_battery_resume(struct acpi_device *device);
+enum acpi_battery_files {
+	info_tag = 0,
+	state_tag,
+	alarm_tag,
+	ACPI_BATTERY_NUMFILES,
+};
+
+#endif
 
 static const struct acpi_device_id battery_device_ids[] = {
 	{"PNP0C0A", 0},
 	{"", 0},
 };
-MODULE_DEVICE_TABLE(acpi, battery_device_ids);
-
-static struct acpi_driver acpi_battery_driver = {
-	.name = "battery",
-	.class = ACPI_BATTERY_CLASS,
-	.ids = battery_device_ids,
-	.ops = {
-		.add = acpi_battery_add,
-		.resume = acpi_battery_resume,
-		.remove = acpi_battery_remove,
-		},
-};
 
-struct acpi_battery_state {
-	acpi_integer state;
-	acpi_integer present_rate;
-	acpi_integer remaining_capacity;
-	acpi_integer present_voltage;
-};
-
-struct acpi_battery_info {
-	acpi_integer power_unit;
-	acpi_integer design_capacity;
-	acpi_integer last_full_capacity;
-	acpi_integer battery_technology;
-	acpi_integer design_voltage;
-	acpi_integer design_capacity_warning;
-	acpi_integer design_capacity_low;
-	acpi_integer battery_capacity_granularity_1;
-	acpi_integer battery_capacity_granularity_2;
-	acpi_string model_number;
-	acpi_string serial_number;
-	acpi_string battery_type;
-	acpi_string oem_info;
-};
-
-enum acpi_battery_files{
-	ACPI_BATTERY_INFO = 0,
-	ACPI_BATTERY_STATE,
-	ACPI_BATTERY_ALARM,
-	ACPI_BATTERY_NUMFILES,
-};
+MODULE_DEVICE_TABLE(acpi, battery_device_ids);
 
-struct acpi_battery_flags {
-	u8 battery_present_prev;
-	u8 alarm_present;
-	u8 init_update;
-	u8 update[ACPI_BATTERY_NUMFILES];
-	u8 power_unit;
-};
 
 struct acpi_battery {
-	struct mutex mutex;
+	struct mutex lock;
+	struct power_supply bat;
 	struct acpi_device *device;
-	struct acpi_battery_flags flags;
-	struct acpi_buffer bif_data;
-	struct acpi_buffer bst_data;
-	unsigned long alarm;
-	unsigned long update_time[ACPI_BATTERY_NUMFILES];
+	unsigned long update_time;
+	int current_now;
+	int capacity_now;
+	int voltage_now;
+	int design_capacity;
+	int full_charge_capacity;
+	int technology;
+	int design_voltage;
+	int design_capacity_warning;
+	int design_capacity_low;
+	int capacity_granularity_1;
+	int capacity_granularity_2;
+	int alarm;
+	char model_number[32];
+	char serial_number[32];
+	char type[32];
+	char oem_info[32];
+	int state;
+	int power_unit;
+	u8 alarm_present;
 };
 
+#define to_acpi_battery(x) container_of(x, struct acpi_battery, bat);
+
 inline int acpi_battery_present(struct acpi_battery *battery)
 {
 	return battery->device->status.battery_present;
 }
-inline char *acpi_battery_power_units(struct acpi_battery *battery)
-{
-	if (battery->flags.power_unit)
-		return ACPI_BATTERY_UNITS_AMPS;
-	else
-		return ACPI_BATTERY_UNITS_WATTS;
-}
 
-inline acpi_handle acpi_battery_handle(struct acpi_battery *battery)
+static int acpi_battery_technology(struct acpi_battery *battery)
 {
-	return battery->device->handle;
+	if (!strcasecmp("NiCd", battery->type))
+		return POWER_SUPPLY_TECHNOLOGY_NiCd;
+	if (!strcasecmp("NiMH", battery->type))
+		return POWER_SUPPLY_TECHNOLOGY_NiMH;
+	if (!strcasecmp("LION", battery->type))
+		return POWER_SUPPLY_TECHNOLOGY_LION;
+	if (!strcasecmp("LiP", battery->type))
+		return POWER_SUPPLY_TECHNOLOGY_LIPO;
+	return POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
 }
 
-/* --------------------------------------------------------------------------
-                               Battery Management
-   -------------------------------------------------------------------------- */
-
-static void acpi_battery_check_result(struct acpi_battery *battery, int result)
+static int acpi_battery_get_property(struct power_supply *psy,
+				     enum power_supply_property psp,
+				     union power_supply_propval *val)
 {
-	if (!battery)
-		return;
+	struct acpi_battery *battery = to_acpi_battery(psy);
 
-	if (result) {
-		battery->flags.init_update = 1;
+	if ((!acpi_battery_present(battery)) &&
+	     psp != POWER_SUPPLY_PROP_PRESENT)
+		return -ENODEV;
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		if (battery->state & 0x01)
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		else if (battery->state & 0x02)
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else if (battery->state == 0)
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = acpi_battery_present(battery);
+		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = acpi_battery_technology(battery);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		val->intval = battery->design_voltage * 1000;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = battery->voltage_now * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		val->intval = battery->current_now * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+		val->intval = battery->design_capacity * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_ENERGY_FULL:
+		val->intval = battery->full_charge_capacity * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+		val->intval = battery->capacity_now * 1000;
+		break;
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = battery->model_number;
+		break;
+	case POWER_SUPPLY_PROP_MANUFACTURER:
+		val->strval = battery->oem_info;
+		break;
+	default:
+		return -EINVAL;
 	}
+	return 0;
 }
 
-static int acpi_battery_extract_package(struct acpi_battery *battery,
-					union acpi_object *package,
-					struct acpi_buffer *format,
-					struct acpi_buffer *data,
-					char *package_name)
+static enum power_supply_property charge_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+static enum power_supply_property energy_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
+	POWER_SUPPLY_PROP_ENERGY_FULL,
+	POWER_SUPPLY_PROP_ENERGY_NOW,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
+
+#ifdef CONFIG_ACPI_PROCFS
+inline char *acpi_battery_units(struct acpi_battery *battery)
 {
-	acpi_status status = AE_OK;
-	struct acpi_buffer data_null = { 0, NULL };
+	return (battery->power_unit)?"mA":"mW";
+}
+#endif
 
-	status = acpi_extract_package(package, format, &data_null);
-	if (status != AE_BUFFER_OVERFLOW) {
-		ACPI_EXCEPTION((AE_INFO, status, "Extracting size %s",
-				package_name));
-		return -ENODEV;
-	}
+/* --------------------------------------------------------------------------
+                               Battery Management
+   -------------------------------------------------------------------------- */
+struct acpi_offsets {
+	size_t offset;		/* offset inside struct acpi_sbs_battery */
+	u8 mode;		/* int or string? */
+};
 
-	if (data_null.length != data->length) {
-		kfree(data->pointer);
-		data->pointer = kzalloc(data_null.length, GFP_KERNEL);
-		if (!data->pointer) {
-			ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY, "kzalloc()"));
-			return -ENOMEM;
-		}
-		data->length = data_null.length;
-	}
+static struct acpi_offsets state_offsets[] = {
+	{offsetof(struct acpi_battery, state), 0},
+	{offsetof(struct acpi_battery, current_now), 0},
+	{offsetof(struct acpi_battery, capacity_now), 0},
+	{offsetof(struct acpi_battery, voltage_now), 0},
+};
 
-	status = acpi_extract_package(package, format, data);
-	if (ACPI_FAILURE(status)) {
-		ACPI_EXCEPTION((AE_INFO, status, "Extracting %s",
-				package_name));
-		return -ENODEV;
-	}
+static struct acpi_offsets info_offsets[] = {
+	{offsetof(struct acpi_battery, power_unit), 0},
+	{offsetof(struct acpi_battery, design_capacity), 0},
+	{offsetof(struct acpi_battery, full_charge_capacity), 0},
+	{offsetof(struct acpi_battery, technology), 0},
+	{offsetof(struct acpi_battery, design_voltage), 0},
+	{offsetof(struct acpi_battery, design_capacity_warning), 0},
+	{offsetof(struct acpi_battery, design_capacity_low), 0},
+	{offsetof(struct acpi_battery, capacity_granularity_1), 0},
+	{offsetof(struct acpi_battery, capacity_granularity_2), 0},
+	{offsetof(struct acpi_battery, model_number), 1},
+	{offsetof(struct acpi_battery, serial_number), 1},
+	{offsetof(struct acpi_battery, type), 1},
+	{offsetof(struct acpi_battery, oem_info), 1},
+};
 
+static int extract_package(struct acpi_battery *battery,
+			   union acpi_object *package,
+			   struct acpi_offsets *offsets, int num)
+{
+	int i, *x;
+	union acpi_object *element;
+	if (package->type != ACPI_TYPE_PACKAGE)
+		return -EFAULT;
+	for (i = 0; i < num; ++i) {
+		if (package->package.count <= i)
+			return -EFAULT;
+		element = &package->package.elements[i];
+		if (offsets[i].mode) {
+			if (element->type != ACPI_TYPE_STRING &&
+			    element->type != ACPI_TYPE_BUFFER)
+				return -EFAULT;
+			strncpy((u8 *)battery + offsets[i].offset,
+				element->string.pointer, 32);
+		} else {
+			if (element->type != ACPI_TYPE_INTEGER)
+				return -EFAULT;
+			x = (int *)((u8 *)battery + offsets[i].offset);
+			*x = element->integer.value;
+		}
+	}
 	return 0;
 }
 
 static int acpi_battery_get_status(struct acpi_battery *battery)
 {
-	int result = 0;
-
-	result = acpi_bus_get_status(battery->device);
-	if (result) {
+	if (acpi_bus_get_status(battery->device)) {
 		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Evaluating _STA"));
 		return -ENODEV;
 	}
-	return result;
+	return 0;
 }
 
 static int acpi_battery_get_info(struct acpi_battery *battery)
 {
-	int result = 0;
+	int result = -EFAULT;
 	acpi_status status = 0;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BIF),
-		ACPI_BATTERY_FORMAT_BIF
-	};
-	union acpi_object *package = NULL;
-	struct acpi_buffer *data = NULL;
-	struct acpi_battery_info *bif = NULL;
-
-	battery->update_time[ACPI_BATTERY_INFO] = get_seconds();
 
 	if (!acpi_battery_present(battery))
 		return 0;
+	mutex_lock(&battery->lock);
+	status = acpi_evaluate_object(battery->device->handle, "_BIF",
+				      NULL, &buffer);
+	mutex_unlock(&battery->lock);
 
-	/* Evaluate _BIF */
-
-	status =
-	    acpi_evaluate_object(acpi_battery_handle(battery), "_BIF", NULL,
-				 &buffer);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BIF"));
 		return -ENODEV;
 	}
 
-	package = buffer.pointer;
-
-	data = &battery->bif_data;
-
-	/* Extract Package Data */
-
-	result =
-	    acpi_battery_extract_package(battery, package, &format, data,
-					 "_BIF");
-	if (result)
-		goto end;
-
-      end:
-
+	result = extract_package(battery, buffer.pointer,
+				 info_offsets, ARRAY_SIZE(info_offsets));
 	kfree(buffer.pointer);
-
-	if (!result) {
-		bif = data->pointer;
-		battery->flags.power_unit = bif->power_unit;
-	}
-
 	return result;
 }
 
@@ -273,342 +319,203 @@ static int acpi_battery_get_state(struct acpi_battery *battery)
 	int result = 0;
 	acpi_status status = 0;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BST),
-		ACPI_BATTERY_FORMAT_BST
-	};
-	union acpi_object *package = NULL;
-	struct acpi_buffer *data = NULL;
-
-	battery->update_time[ACPI_BATTERY_STATE] = get_seconds();
 
 	if (!acpi_battery_present(battery))
 		return 0;
 
-	/* Evaluate _BST */
+	if (battery->update_time &&
+	    time_before(jiffies, battery->update_time +
+			msecs_to_jiffies(cache_time)))
+		return 0;
+
+	mutex_lock(&battery->lock);
+	status = acpi_evaluate_object(battery->device->handle, "_BST",
+				      NULL, &buffer);
+	mutex_unlock(&battery->lock);
 
-	status =
-	    acpi_evaluate_object(acpi_battery_handle(battery), "_BST", NULL,
-				 &buffer);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BST"));
 		return -ENODEV;
 	}
 
-	package = buffer.pointer;
-
-	data = &battery->bst_data;
-
-	/* Extract Package Data */
-
-	result =
-	    acpi_battery_extract_package(battery, package, &format, data,
-					 "_BST");
-	if (result)
-		goto end;
-
-      end:
+	result = extract_package(battery, buffer.pointer,
+				 state_offsets, ARRAY_SIZE(state_offsets));
+	battery->update_time = jiffies;
 	kfree(buffer.pointer);
-
 	return result;
 }
 
-static int acpi_battery_get_alarm(struct acpi_battery *battery)
-{
-	battery->update_time[ACPI_BATTERY_ALARM] = get_seconds();
-
-	return 0;
-}
-
-static int acpi_battery_set_alarm(struct acpi_battery *battery,
-				  unsigned long alarm)
+static int acpi_battery_set_alarm(struct acpi_battery *battery)
 {
 	acpi_status status = 0;
-	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+	union acpi_object arg0 = { .type = ACPI_TYPE_INTEGER };
 	struct acpi_object_list arg_list = { 1, &arg0 };
 
-	battery->update_time[ACPI_BATTERY_ALARM] = get_seconds();
-
-	if (!acpi_battery_present(battery))
+	if (!acpi_battery_present(battery)|| !battery->alarm_present)
 		return -ENODEV;
 
-	if (!battery->flags.alarm_present)
-		return -ENODEV;
-
-	arg0.integer.value = alarm;
+	arg0.integer.value = battery->alarm;
 
-	status =
-	    acpi_evaluate_object(acpi_battery_handle(battery), "_BTP",
+	mutex_lock(&battery->lock);
+	status = acpi_evaluate_object(battery->device->handle, "_BTP",
 				 &arg_list, NULL);
+	mutex_unlock(&battery->lock);
+
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Alarm set to %d\n", (u32) alarm));
-
-	battery->alarm = alarm;
-
+	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Alarm set to %d\n", battery->alarm));
 	return 0;
 }
 
 static int acpi_battery_init_alarm(struct acpi_battery *battery)
 {
-	int result = 0;
 	acpi_status status = AE_OK;
 	acpi_handle handle = NULL;
-	struct acpi_battery_info *bif = battery->bif_data.pointer;
-	unsigned long alarm = battery->alarm;
 
 	/* See if alarms are supported, and if so, set default */
-
-	status = acpi_get_handle(acpi_battery_handle(battery), "_BTP", &handle);
-	if (ACPI_SUCCESS(status)) {
-		battery->flags.alarm_present = 1;
-		if (!alarm && bif) {
-			alarm = bif->design_capacity_warning;
-		}
-		result = acpi_battery_set_alarm(battery, alarm);
-		if (result)
-			goto end;
-	} else {
-		battery->flags.alarm_present = 0;
+	status = acpi_get_handle(battery->device->handle, "_BTP", &handle);
+	if (ACPI_FAILURE(status)) {
+		battery->alarm_present = 0;
+		return 0;
 	}
-
-      end:
-
-	return result;
+	battery->alarm_present = 1;
+	if (!battery->alarm)
+		battery->alarm = battery->design_capacity_warning;
+	return acpi_battery_set_alarm(battery);
 }
 
-static int acpi_battery_init_update(struct acpi_battery *battery)
+static int acpi_battery_update(struct acpi_battery *battery)
 {
-	int result = 0;
-
-	result = acpi_battery_get_status(battery);
-	if (result)
+	int saved_present = acpi_battery_present(battery);
+	int result = acpi_battery_get_status(battery);
+	if (result || !acpi_battery_present(battery))
 		return result;
-
-	battery->flags.battery_present_prev = acpi_battery_present(battery);
-
-	if (acpi_battery_present(battery)) {
+	if (saved_present != acpi_battery_present(battery) ||
+	    !battery->update_time) {
+		battery->update_time = 0;
 		result = acpi_battery_get_info(battery);
 		if (result)
 			return result;
-		result = acpi_battery_get_state(battery);
-		if (result)
-			return result;
-
-		acpi_battery_init_alarm(battery);
-	}
-
-	return result;
-}
-
-static int acpi_battery_update(struct acpi_battery *battery,
-			       int update, int *update_result_ptr)
-{
-	int result = 0;
-	int update_result = ACPI_BATTERY_NONE_UPDATE;
-
-	if (!acpi_battery_present(battery)) {
-		update = 1;
-	}
-
-	if (battery->flags.init_update) {
-		result = acpi_battery_init_update(battery);
-		if (result)
-			goto end;
-		update_result = ACPI_BATTERY_INIT_UPDATE;
-	} else if (update) {
-		result = acpi_battery_get_status(battery);
-		if (result)
-			goto end;
-		if ((!battery->flags.battery_present_prev & acpi_battery_present(battery))
-		    || (battery->flags.battery_present_prev & !acpi_battery_present(battery))) {
-			result = acpi_battery_init_update(battery);
-			if (result)
-				goto end;
-			update_result = ACPI_BATTERY_INIT_UPDATE;
+		if (battery->power_unit) {
+			battery->bat.properties = charge_battery_props;
+			battery->bat.num_properties =
+				ARRAY_SIZE(charge_battery_props);
 		} else {
-			update_result = ACPI_BATTERY_EASY_UPDATE;
+			battery->bat.properties = energy_battery_props;
+			battery->bat.num_properties =
+				ARRAY_SIZE(energy_battery_props);
 		}
+		acpi_battery_init_alarm(battery);
 	}
-
-      end:
-
-	battery->flags.init_update = (result != 0);
-
-	*update_result_ptr = update_result;
-
-	return result;
-}
-
-static void acpi_battery_notify_update(struct acpi_battery *battery)
-{
-	acpi_battery_get_status(battery);
-
-	if (battery->flags.init_update) {
-		return;
-	}
-
-	if ((!battery->flags.battery_present_prev &
-	     acpi_battery_present(battery)) ||
-	    (battery->flags.battery_present_prev &
-	     !acpi_battery_present(battery))) {
-		battery->flags.init_update = 1;
-	} else {
-		battery->flags.update[ACPI_BATTERY_INFO] = 1;
-		battery->flags.update[ACPI_BATTERY_STATE] = 1;
-		battery->flags.update[ACPI_BATTERY_ALARM] = 1;
-	}
+	return acpi_battery_get_state(battery);
 }
 
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
 
+#ifdef CONFIG_ACPI_PROCFS
 static struct proc_dir_entry *acpi_battery_dir;
 
 static int acpi_battery_print_info(struct seq_file *seq, int result)
 {
 	struct acpi_battery *battery = seq->private;
-	struct acpi_battery_info *bif = NULL;
-	char *units = "?";
 
 	if (result)
 		goto end;
 
-	if (acpi_battery_present(battery))
-		seq_printf(seq, "present:                 yes\n");
-	else {
-		seq_printf(seq, "present:                 no\n");
-		goto end;
-	}
-
-	bif = battery->bif_data.pointer;
-	if (!bif) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BIF buffer is NULL"));
-		result = -ENODEV;
+	seq_printf(seq, "present:                 %s\n",
+		   acpi_battery_present(battery)?"yes":"no");
+	if (!acpi_battery_present(battery))
 		goto end;
-	}
-
-	/* Battery Units */
-
-	units = acpi_battery_power_units(battery);
-
-	if (bif->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "design capacity:         unknown\n");
 	else
 		seq_printf(seq, "design capacity:         %d %sh\n",
-			   (u32) bif->design_capacity, units);
+			   battery->design_capacity,
+			   acpi_battery_units(battery));
 
-	if (bif->last_full_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->full_charge_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "last full capacity:      unknown\n");
 	else
 		seq_printf(seq, "last full capacity:      %d %sh\n",
-			   (u32) bif->last_full_capacity, units);
+			   battery->full_charge_capacity,
+			   acpi_battery_units(battery));
 
-	switch ((u32) bif->battery_technology) {
-	case 0:
-		seq_printf(seq, "battery technology:      non-rechargeable\n");
-		break;
-	case 1:
-		seq_printf(seq, "battery technology:      rechargeable\n");
-		break;
-	default:
-		seq_printf(seq, "battery technology:      unknown\n");
-		break;
-	}
+	seq_printf(seq, "battery technology:      %srechargeable\n",
+		   (!battery->technology)?"non-":"");
 
-	if (bif->design_voltage == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->design_voltage == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "design voltage:          unknown\n");
 	else
 		seq_printf(seq, "design voltage:          %d mV\n",
-			   (u32) bif->design_voltage);
+			   battery->design_voltage);
 	seq_printf(seq, "design capacity warning: %d %sh\n",
-		   (u32) bif->design_capacity_warning, units);
+		   battery->design_capacity_warning,
+		   acpi_battery_units(battery));
 	seq_printf(seq, "design capacity low:     %d %sh\n",
-		   (u32) bif->design_capacity_low, units);
+		   battery->design_capacity_low,
+		   acpi_battery_units(battery));
 	seq_printf(seq, "capacity granularity 1:  %d %sh\n",
-		   (u32) bif->battery_capacity_granularity_1, units);
+		   battery->capacity_granularity_1,
+		   acpi_battery_units(battery));
 	seq_printf(seq, "capacity granularity 2:  %d %sh\n",
-		   (u32) bif->battery_capacity_granularity_2, units);
-	seq_printf(seq, "model number:            %s\n", bif->model_number);
-	seq_printf(seq, "serial number:           %s\n", bif->serial_number);
-	seq_printf(seq, "battery type:            %s\n", bif->battery_type);
-	seq_printf(seq, "OEM info:                %s\n", bif->oem_info);
-
+		   battery->capacity_granularity_2,
+		   acpi_battery_units(battery));
+	seq_printf(seq, "model number:            %s\n", battery->model_number);
+	seq_printf(seq, "serial number:           %s\n", battery->serial_number);
+	seq_printf(seq, "battery type:            %s\n", battery->type);
+	seq_printf(seq, "OEM info:                %s\n", battery->oem_info);
       end:
-
 	if (result)
 		seq_printf(seq, "ERROR: Unable to read battery info\n");
-
 	return result;
 }
 
 static int acpi_battery_print_state(struct seq_file *seq, int result)
 {
 	struct acpi_battery *battery = seq->private;
-	struct acpi_battery_state *bst = NULL;
-	char *units = "?";
 
 	if (result)
 		goto end;
 
-	if (acpi_battery_present(battery))
-		seq_printf(seq, "present:                 yes\n");
-	else {
-		seq_printf(seq, "present:                 no\n");
-		goto end;
-	}
-
-	bst = battery->bst_data.pointer;
-	if (!bst) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BST buffer is NULL"));
-		result = -ENODEV;
+	seq_printf(seq, "present:                 %s\n",
+		   acpi_battery_present(battery)?"yes":"no");
+	if (!acpi_battery_present(battery))
 		goto end;
-	}
-
-	/* Battery Units */
-
-	units = acpi_battery_power_units(battery);
-
-	if (!(bst->state & 0x04))
-		seq_printf(seq, "capacity state:          ok\n");
-	else
-		seq_printf(seq, "capacity state:          critical\n");
 
-	if ((bst->state & 0x01) && (bst->state & 0x02)) {
+	seq_printf(seq, "capacity state:          %s\n",
+			(battery->state & 0x04)?"critical":"ok");
+	if ((battery->state & 0x01) && (battery->state & 0x02))
 		seq_printf(seq,
 			   "charging state:          charging/discharging\n");
-	} else if (bst->state & 0x01)
+	else if (battery->state & 0x01)
 		seq_printf(seq, "charging state:          discharging\n");
-	else if (bst->state & 0x02)
+	else if (battery->state & 0x02)
 		seq_printf(seq, "charging state:          charging\n");
-	else {
+	else
 		seq_printf(seq, "charging state:          charged\n");
-	}
 
-	if (bst->present_rate == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->current_now == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "present rate:            unknown\n");
 	else
 		seq_printf(seq, "present rate:            %d %s\n",
-			   (u32) bst->present_rate, units);
+			   battery->current_now, acpi_battery_units(battery));
 
-	if (bst->remaining_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+	if (battery->capacity_now == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "remaining capacity:      unknown\n");
 	else
 		seq_printf(seq, "remaining capacity:      %d %sh\n",
-			   (u32) bst->remaining_capacity, units);
-
-	if (bst->present_voltage == ACPI_BATTERY_VALUE_UNKNOWN)
+			   battery->capacity_now, acpi_battery_units(battery));
+	if (battery->voltage_now == ACPI_BATTERY_VALUE_UNKNOWN)
 		seq_printf(seq, "present voltage:         unknown\n");
 	else
 		seq_printf(seq, "present voltage:         %d mV\n",
-			   (u32) bst->present_voltage);
-
+			   battery->voltage_now);
       end:
-
-	if (result) {
+	if (result)
 		seq_printf(seq, "ERROR: Unable to read battery state\n");
-	}
 
 	return result;
 }
@@ -616,7 +523,6 @@ static int acpi_battery_print_state(struct seq_file *seq, int result)
 static int acpi_battery_print_alarm(struct seq_file *seq, int result)
 {
 	struct acpi_battery *battery = seq->private;
-	char *units = "?";
 
 	if (result)
 		goto end;
@@ -625,189 +531,121 @@ static int acpi_battery_print_alarm(struct seq_file *seq, int result)
 		seq_printf(seq, "present:                 no\n");
 		goto end;
 	}
-
-	/* Battery Units */
-
-	units = acpi_battery_power_units(battery);
-
 	seq_printf(seq, "alarm:                   ");
 	if (!battery->alarm)
 		seq_printf(seq, "unsupported\n");
 	else
-		seq_printf(seq, "%lu %sh\n", battery->alarm, units);
-
+		seq_printf(seq, "%u %sh\n", battery->alarm,
+				acpi_battery_units(battery));
       end:
-
 	if (result)
 		seq_printf(seq, "ERROR: Unable to read battery alarm\n");
-
 	return result;
 }
 
-static ssize_t
-acpi_battery_write_alarm(struct file *file,
-			 const char __user * buffer,
-			 size_t count, loff_t * ppos)
+static ssize_t acpi_battery_write_alarm(struct file *file,
+					const char __user * buffer,
+					size_t count, loff_t * ppos)
 {
 	int result = 0;
 	char alarm_string[12] = { '\0' };
 	struct seq_file *m = file->private_data;
 	struct acpi_battery *battery = m->private;
-	int update_result = ACPI_BATTERY_NONE_UPDATE;
 
 	if (!battery || (count > sizeof(alarm_string) - 1))
 		return -EINVAL;
-
-	mutex_lock(&battery->mutex);
-
-	result = acpi_battery_update(battery, 1, &update_result);
 	if (result) {
 		result = -ENODEV;
 		goto end;
 	}
-
 	if (!acpi_battery_present(battery)) {
 		result = -ENODEV;
 		goto end;
 	}
-
 	if (copy_from_user(alarm_string, buffer, count)) {
 		result = -EFAULT;
 		goto end;
 	}
-
 	alarm_string[count] = '\0';
-
-	result = acpi_battery_set_alarm(battery,
-					simple_strtoul(alarm_string, NULL, 0));
-	if (result)
-		goto end;
-
+	battery->alarm = simple_strtol(alarm_string, NULL, 0);
+	result = acpi_battery_set_alarm(battery);
       end:
-
-	acpi_battery_check_result(battery, result);
-
 	if (!result)
-		result = count;
-
-	mutex_unlock(&battery->mutex);
-
+		return count;
 	return result;
 }
 
 typedef int(*print_func)(struct seq_file *seq, int result);
-typedef int(*get_func)(struct acpi_battery *battery);
-
-static struct acpi_read_mux {
-	print_func print;
-	get_func get;
-} acpi_read_funcs[ACPI_BATTERY_NUMFILES] = {
-	{.get = acpi_battery_get_info, .print = acpi_battery_print_info},
-	{.get = acpi_battery_get_state, .print = acpi_battery_print_state},
-	{.get = acpi_battery_get_alarm, .print = acpi_battery_print_alarm},
+
+static print_func acpi_print_funcs[ACPI_BATTERY_NUMFILES] = {
+	acpi_battery_print_info,
+	acpi_battery_print_state,
+	acpi_battery_print_alarm,
 };
 
 static int acpi_battery_read(int fid, struct seq_file *seq)
 {
 	struct acpi_battery *battery = seq->private;
-	int result = 0;
-	int update_result = ACPI_BATTERY_NONE_UPDATE;
-	int update = 0;
-
-	mutex_lock(&battery->mutex);
-
-	update = (get_seconds() - battery->update_time[fid] >= update_time);
-	update = (update | battery->flags.update[fid]);
-
-	result = acpi_battery_update(battery, update, &update_result);
-	if (result)
-		goto end;
-
-	if (update_result == ACPI_BATTERY_EASY_UPDATE) {
-		result = acpi_read_funcs[fid].get(battery);
-		if (result)
-			goto end;
+	int result = acpi_battery_update(battery);
+	return acpi_print_funcs[fid](seq, result);
+}
+
+#define DECLARE_FILE_FUNCTIONS(_name) \
+static int acpi_battery_read_##_name(struct seq_file *seq, void *offset) \
+{ \
+	return acpi_battery_read(_name##_tag, seq); \
+} \
+static int acpi_battery_##_name##_open_fs(struct inode *inode, struct file *file) \
+{ \
+	return single_open(file, acpi_battery_read_##_name, PDE(inode)->data); \
+}
+
+DECLARE_FILE_FUNCTIONS(info);
+DECLARE_FILE_FUNCTIONS(state);
+DECLARE_FILE_FUNCTIONS(alarm);
+
+#undef DECLARE_FILE_FUNCTIONS
+
+#define FILE_DESCRIPTION_RO(_name) \
+	{ \
+	.name = __stringify(_name), \
+	.mode = S_IRUGO, \
+	.ops = { \
+		.open = acpi_battery_##_name##_open_fs, \
+		.read = seq_read, \
+		.llseek = seq_lseek, \
+		.release = single_release, \
+		.owner = THIS_MODULE, \
+		}, \
+	}
+
+#define FILE_DESCRIPTION_RW(_name) \
+	{ \
+	.name = __stringify(_name), \
+	.mode = S_IFREG | S_IRUGO | S_IWUSR, \
+	.ops = { \
+		.open = acpi_battery_##_name##_open_fs, \
+		.read = seq_read, \
+		.llseek = seq_lseek, \
+		.write = acpi_battery_write_##_name, \
+		.release = single_release, \
+		.owner = THIS_MODULE, \
+		}, \
 	}
 
-      end:
-	result = acpi_read_funcs[fid].print(seq, result);
-	acpi_battery_check_result(battery, result);
-	battery->flags.update[fid] = result;
-	mutex_unlock(&battery->mutex);
-	return result;
-}
-
-static int acpi_battery_read_info(struct seq_file *seq, void *offset)
-{
-	return acpi_battery_read(ACPI_BATTERY_INFO, seq);
-}
-
-static int acpi_battery_read_state(struct seq_file *seq, void *offset)
-{
-	return acpi_battery_read(ACPI_BATTERY_STATE, seq);
-}
-
-static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
-{
-	return acpi_battery_read(ACPI_BATTERY_ALARM, seq);
-}
-
-static int acpi_battery_info_open_fs(struct inode *inode, struct file *file)
-{
-	return single_open(file, acpi_battery_read_info, PDE(inode)->data);
-}
-
-static int acpi_battery_state_open_fs(struct inode *inode, struct file *file)
-{
-	return single_open(file, acpi_battery_read_state, PDE(inode)->data);
-}
-
-static int acpi_battery_alarm_open_fs(struct inode *inode, struct file *file)
-{
-	return single_open(file, acpi_battery_read_alarm, PDE(inode)->data);
-}
-
 static struct battery_file {
 	struct file_operations ops;
 	mode_t mode;
 	char *name;
 } acpi_battery_file[] = {
-	{
-	.name = "info",
-	.mode = S_IRUGO,
-	.ops = {
-	.open = acpi_battery_info_open_fs,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-	},
-	},
-	{
-	.name = "state",
-	.mode = S_IRUGO,
-	.ops = {
-	.open = acpi_battery_state_open_fs,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-	},
-	},
-	{
-	.name = "alarm",
-	.mode = S_IFREG | S_IRUGO | S_IWUSR,
-	.ops = {
-	.open = acpi_battery_alarm_open_fs,
-	.read = seq_read,
-	.write = acpi_battery_write_alarm,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-	},
-	},
+	FILE_DESCRIPTION_RO(info),
+	FILE_DESCRIPTION_RO(state),
+	FILE_DESCRIPTION_RW(alarm),
 };
 
+#undef FILE_DESCRIPTION_RO
+#undef FILE_DESCRIPTION_RW
+
 static int acpi_battery_add_fs(struct acpi_device *device)
 {
 	struct proc_dir_entry *entry = NULL;
@@ -832,25 +670,51 @@ static int acpi_battery_add_fs(struct acpi_device *device)
 			entry->owner = THIS_MODULE;
 		}
 	}
-
 	return 0;
 }
 
-static int acpi_battery_remove_fs(struct acpi_device *device)
+static void acpi_battery_remove_fs(struct acpi_device *device)
 {
 	int i;
-	if (acpi_device_dir(device)) {
-		for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) {
-			remove_proc_entry(acpi_battery_file[i].name,
+	if (!acpi_device_dir(device))
+		return;
+	for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i)
+		remove_proc_entry(acpi_battery_file[i].name,
 				  acpi_device_dir(device));
-		}
-		remove_proc_entry(acpi_device_bid(device), acpi_battery_dir);
-		acpi_device_dir(device) = NULL;
-	}
 
-	return 0;
+	remove_proc_entry(acpi_device_bid(device), acpi_battery_dir);
+	acpi_device_dir(device) = NULL;
+}
+
+#endif
+
+static ssize_t acpi_battery_alarm_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev));
+	return sprintf(buf, "%d\n", battery->alarm * 1000);
+}
+
+static ssize_t acpi_battery_alarm_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	unsigned long x;
+	struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev));
+	if (sscanf(buf, "%ld\n", &x) == 1)
+		battery->alarm = x/1000;
+	if (acpi_battery_present(battery))
+		acpi_battery_set_alarm(battery);
+	return count;
 }
 
+static struct device_attribute alarm_attr = {
+	.attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+	.show = acpi_battery_alarm_show,
+	.store = acpi_battery_alarm_store,
+};
+
 /* --------------------------------------------------------------------------
                                  Driver Interface
    -------------------------------------------------------------------------- */
@@ -858,33 +722,17 @@ static int acpi_battery_remove_fs(struct acpi_device *device)
 static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
 {
 	struct acpi_battery *battery = data;
-	struct acpi_device *device = NULL;
-
+	struct acpi_device *device;
 	if (!battery)
 		return;
-
 	device = battery->device;
-
-	switch (event) {
-	case ACPI_BATTERY_NOTIFY_STATUS:
-	case ACPI_BATTERY_NOTIFY_INFO:
-	case ACPI_NOTIFY_BUS_CHECK:
-	case ACPI_NOTIFY_DEVICE_CHECK:
-		device = battery->device;
-		acpi_battery_notify_update(battery);
-		acpi_bus_generate_proc_event(device, event,
+	acpi_battery_update(battery);
+	acpi_bus_generate_proc_event(device, event,
+				     acpi_battery_present(battery));
+	acpi_bus_generate_netlink_event(device->pnp.device_class,
+					device->dev.bus_id, event,
 					acpi_battery_present(battery));
-		acpi_bus_generate_netlink_event(device->pnp.device_class,
-						  device->dev.bus_id, event,
-						  acpi_battery_present(battery));
-		break;
-	default:
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-				  "Unsupported event [0x%x]\n", event));
-		break;
-	}
-
-	return;
+	kobject_uevent(&battery->bat.dev->kobj, KOBJ_CHANGE);
 }
 
 static int acpi_battery_add(struct acpi_device *device)
@@ -892,33 +740,27 @@ static int acpi_battery_add(struct acpi_device *device)
 	int result = 0;
 	acpi_status status = 0;
 	struct acpi_battery *battery = NULL;
-
 	if (!device)
 		return -EINVAL;
-
 	battery = kzalloc(sizeof(struct acpi_battery), GFP_KERNEL);
 	if (!battery)
 		return -ENOMEM;
-
-	mutex_init(&battery->mutex);
-
-	mutex_lock(&battery->mutex);
-
 	battery->device = device;
 	strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS);
 	acpi_driver_data(device) = battery;
-
-	result = acpi_battery_get_status(battery);
-	if (result)
-		goto end;
-
-	battery->flags.init_update = 1;
-
+	mutex_init(&battery->lock);
+	acpi_battery_update(battery);
+#ifdef CONFIG_ACPI_PROCFS
 	result = acpi_battery_add_fs(device);
 	if (result)
 		goto end;
-
+#endif
+	battery->bat.name = acpi_device_bid(device);
+	battery->bat.type = POWER_SUPPLY_TYPE_BATTERY;
+	battery->bat.get_property = acpi_battery_get_property;
+	result = power_supply_register(&battery->device->dev, &battery->bat);
+	result = device_create_file(battery->bat.dev, &alarm_attr);
 	status = acpi_install_notify_handler(device->handle,
 					     ACPI_ALL_NOTIFY,
 					     acpi_battery_notify, battery);
@@ -927,20 +769,16 @@ static int acpi_battery_add(struct acpi_device *device)
 		result = -ENODEV;
 		goto end;
 	}
-
 	printk(KERN_INFO PREFIX "%s Slot [%s] (battery %s)\n",
 	       ACPI_BATTERY_DEVICE_NAME, acpi_device_bid(device),
 	       device->status.battery_present ? "present" : "absent");
-
       end:
-
 	if (result) {
+#ifdef CONFIG_ACPI_PROCFS
 		acpi_battery_remove_fs(device);
+#endif
 		kfree(battery);
 	}
-
-	mutex_unlock(&battery->mutex);
-
 	return result;
 }
 
@@ -951,27 +789,19 @@ static int acpi_battery_remove(struct acpi_device *device, int type)
 
 	if (!device || !acpi_driver_data(device))
 		return -EINVAL;
-
 	battery = acpi_driver_data(device);
-
-	mutex_lock(&battery->mutex);
-
 	status = acpi_remove_notify_handler(device->handle,
 					    ACPI_ALL_NOTIFY,
 					    acpi_battery_notify);
-
+#ifdef CONFIG_ACPI_PROCFS
 	acpi_battery_remove_fs(device);
-
-	kfree(battery->bif_data.pointer);
-
-	kfree(battery->bst_data.pointer);
-
-	mutex_unlock(&battery->mutex);
-
-	mutex_destroy(&battery->mutex);
-
+#endif
+	if (battery->bat.dev) {
+		device_remove_file(battery->bat.dev, &alarm_attr);
+		power_supply_unregister(&battery->bat);
+	}
+	mutex_destroy(&battery->lock);
 	kfree(battery);
-
 	return 0;
 }
 
@@ -979,44 +809,48 @@ static int acpi_battery_remove(struct acpi_device *device, int type)
 static int acpi_battery_resume(struct acpi_device *device)
 {
 	struct acpi_battery *battery;
-
 	if (!device)
 		return -EINVAL;
-
-	battery = device->driver_data;
-
-	battery->flags.init_update = 1;
-
+	battery = acpi_driver_data(device);
+	battery->update_time = 0;
 	return 0;
 }
 
+static struct acpi_driver acpi_battery_driver = {
+	.name = "battery",
+	.class = ACPI_BATTERY_CLASS,
+	.ids = battery_device_ids,
+	.ops = {
+		.add = acpi_battery_add,
+		.resume = acpi_battery_resume,
+		.remove = acpi_battery_remove,
+		},
+};
+
 static int __init acpi_battery_init(void)
 {
-	int result;
-
 	if (acpi_disabled)
 		return -ENODEV;
-
+#ifdef CONFIG_ACPI_PROCFS
 	acpi_battery_dir = acpi_lock_battery_dir();
 	if (!acpi_battery_dir)
 		return -ENODEV;
-
-	result = acpi_bus_register_driver(&acpi_battery_driver);
-	if (result < 0) {
+#endif
+	if (acpi_bus_register_driver(&acpi_battery_driver) < 0) {
+#ifdef CONFIG_ACPI_PROCFS
 		acpi_unlock_battery_dir(acpi_battery_dir);
+#endif
 		return -ENODEV;
 	}
-
 	return 0;
 }
 
 static void __exit acpi_battery_exit(void)
 {
 	acpi_bus_unregister_driver(&acpi_battery_driver);
-
+#ifdef CONFIG_ACPI_PROCFS
 	acpi_unlock_battery_dir(acpi_battery_dir);
-
-	return;
+#endif
 }
 
 module_init(acpi_battery_init);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index cbfc81579c9..fb2cff9a2d2 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -286,15 +286,11 @@ DECLARE_WAIT_QUEUE_HEAD(acpi_bus_event_queue);
 
 extern int event_is_open;
 
-int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data)
+int acpi_bus_generate_proc_event4(const char *device_class, const char *bus_id, u8 type, int data)
 {
-	struct acpi_bus_event *event = NULL;
+	struct acpi_bus_event *event;
 	unsigned long flags = 0;
 
-
-	if (!device)
-		return -EINVAL;
-
 	/* drop event on the floor if no one's listening */
 	if (!event_is_open)
 		return 0;
@@ -303,8 +299,8 @@ int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data)
 	if (!event)
 		return -ENOMEM;
 
-	strcpy(event->device_class, device->pnp.device_class);
-	strcpy(event->bus_id, device->pnp.bus_id);
+	strcpy(event->device_class, device_class);
+	strcpy(event->bus_id, bus_id);
 	event->type = type;
 	event->data = data;
 
@@ -315,6 +311,17 @@ int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data)
 	wake_up_interruptible(&acpi_bus_event_queue);
 
 	return 0;
+
+}
+
+EXPORT_SYMBOL_GPL(acpi_bus_generate_proc_event4);
+
+int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data)
+{
+	if (!device)
+		return -EINVAL;
+	return acpi_bus_generate_proc_event4(device->pnp.device_class,
+					     device->pnp.bus_id, type, data);
 }
 
 EXPORT_SYMBOL(acpi_bus_generate_proc_event);
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 2e79a3395ec..301e832e696 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -434,18 +434,18 @@ static int acpi_button_add(struct acpi_device *device)
 	switch (button->type) {
 	case ACPI_BUTTON_TYPE_POWER:
 	case ACPI_BUTTON_TYPE_POWERF:
-		input->evbit[0] = BIT(EV_KEY);
+		input->evbit[0] = BIT_MASK(EV_KEY);
 		set_bit(KEY_POWER, input->keybit);
 		break;
 
 	case ACPI_BUTTON_TYPE_SLEEP:
 	case ACPI_BUTTON_TYPE_SLEEPF:
-		input->evbit[0] = BIT(EV_KEY);
+		input->evbit[0] = BIT_MASK(EV_KEY);
 		set_bit(KEY_SLEEP, input->keybit);
 		break;
 
 	case ACPI_BUTTON_TYPE_LID:
-		input->evbit[0] = BIT(EV_SW);
+		input->evbit[0] = BIT_MASK(EV_SW);
 		set_bit(SW_LID, input->swbit);
 		break;
 	}
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 3f7935ab0cf..7b4178393e3 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -121,6 +121,7 @@ static struct acpi_ec {
 	atomic_t event_count;
 	wait_queue_head_t wait;
 	struct list_head list;
+	u8 handlers_installed;
 } *boot_ec, *first_ec;
 
 /* --------------------------------------------------------------------------
@@ -425,7 +426,7 @@ int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
 	handler->func = func;
 	handler->data = data;
 	mutex_lock(&ec->lock);
-	list_add_tail(&handler->node, &ec->list);
+	list_add(&handler->node, &ec->list);
 	mutex_unlock(&ec->lock);
 	return 0;
 }
@@ -440,7 +441,6 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit)
 		if (query_bit == handler->query_bit) {
 			list_del(&handler->node);
 			kfree(handler);
-			break;
 		}
 	}
 	mutex_unlock(&ec->lock);
@@ -680,32 +680,50 @@ ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval)
 	status = acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe);
 	if (ACPI_FAILURE(status))
 		return status;
-
 	/* Find and register all query methods */
 	acpi_walk_namespace(ACPI_TYPE_METHOD, handle, 1,
 			    acpi_ec_register_query_methods, ec, NULL);
-
 	/* Use the global lock for all EC transactions? */
 	acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock);
-
 	ec->handle = handle;
-
-	printk(KERN_INFO PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx\n",
-			  ec->gpe, ec->command_addr, ec->data_addr);
-
 	return AE_CTRL_TERMINATE;
 }
 
+static void ec_remove_handlers(struct acpi_ec *ec)
+{
+	if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
+				ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
+		printk(KERN_ERR PREFIX "failed to remove space handler\n");
+	if (ACPI_FAILURE(acpi_remove_gpe_handler(NULL, ec->gpe,
+				&acpi_ec_gpe_handler)))
+		printk(KERN_ERR PREFIX "failed to remove gpe handler\n");
+	ec->handlers_installed = 0;
+}
+
 static int acpi_ec_add(struct acpi_device *device)
 {
 	struct acpi_ec *ec = NULL;
 
 	if (!device)
 		return -EINVAL;
-
 	strcpy(acpi_device_name(device), ACPI_EC_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_EC_CLASS);
 
+	/* Check for boot EC */
+	if (boot_ec) {
+		if (boot_ec->handle == device->handle) {
+			/* Pre-loaded EC from DSDT, just move pointer */
+			ec = boot_ec;
+			boot_ec = NULL;
+			goto end;
+		} else if (boot_ec->handle == ACPI_ROOT_OBJECT) {
+			/* ECDT-based EC, time to shut it down */
+			ec_remove_handlers(boot_ec);
+			kfree(boot_ec);
+			first_ec = boot_ec = NULL;
+		}
+	}
+
 	ec = make_acpi_ec();
 	if (!ec)
 		return -ENOMEM;
@@ -715,25 +733,14 @@ static int acpi_ec_add(struct acpi_device *device)
 		kfree(ec);
 		return -EINVAL;
 	}
-
-	/* Check if we found the boot EC */
-	if (boot_ec) {
-		if (boot_ec->gpe == ec->gpe) {
-			/* We might have incorrect info for GL at boot time */
-			mutex_lock(&boot_ec->lock);
-			boot_ec->global_lock = ec->global_lock;
-			/* Copy handlers from new ec into boot ec */
-			list_splice(&ec->list, &boot_ec->list);
-			mutex_unlock(&boot_ec->lock);
-			kfree(ec);
-			ec = boot_ec;
-		}
-	} else
-		first_ec = ec;
 	ec->handle = device->handle;
+      end:
+	if (!first_ec)
+		first_ec = ec;
 	acpi_driver_data(device) = ec;
-
 	acpi_ec_add_fs(device);
+	printk(KERN_INFO PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx\n",
+			  ec->gpe, ec->command_addr, ec->data_addr);
 	return 0;
 }
 
@@ -756,10 +763,7 @@ static int acpi_ec_remove(struct acpi_device *device, int type)
 	acpi_driver_data(device) = NULL;
 	if (ec == first_ec)
 		first_ec = NULL;
-
-	/* Don't touch boot EC */
-	if (boot_ec != ec)
-		kfree(ec);
+	kfree(ec);
 	return 0;
 }
 
@@ -789,6 +793,8 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context)
 static int ec_install_handlers(struct acpi_ec *ec)
 {
 	acpi_status status;
+	if (ec->handlers_installed)
+		return 0;
 	status = acpi_install_gpe_handler(NULL, ec->gpe,
 					  ACPI_GPE_EDGE_TRIGGERED,
 					  &acpi_ec_gpe_handler, ec);
@@ -807,6 +813,7 @@ static int ec_install_handlers(struct acpi_ec *ec)
 		return -ENODEV;
 	}
 
+	ec->handlers_installed = 1;
 	return 0;
 }
 
@@ -823,41 +830,22 @@ static int acpi_ec_start(struct acpi_device *device)
 	if (!ec)
 		return -EINVAL;
 
-	/* Boot EC is already working */
-	if (ec != boot_ec)
-		ret = ec_install_handlers(ec);
+	ret = ec_install_handlers(ec);
 
 	/* EC is fully operational, allow queries */
 	atomic_set(&ec->query_pending, 0);
-
 	return ret;
 }
 
 static int acpi_ec_stop(struct acpi_device *device, int type)
 {
-	acpi_status status;
 	struct acpi_ec *ec;
-
 	if (!device)
 		return -EINVAL;
-
 	ec = acpi_driver_data(device);
 	if (!ec)
 		return -EINVAL;
-
-	/* Don't touch boot EC */
-	if (ec == boot_ec)
-		return 0;
-
-	status = acpi_remove_address_space_handler(ec->handle,
-						   ACPI_ADR_SPACE_EC,
-						   &acpi_ec_space_handler);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
-
-	status = acpi_remove_gpe_handler(NULL, ec->gpe, &acpi_ec_gpe_handler);
-	if (ACPI_FAILURE(status))
-		return -ENODEV;
+	ec_remove_handlers(ec);
 
 	return 0;
 }
@@ -877,7 +865,7 @@ int __init acpi_ec_ecdt_probe(void)
 	status = acpi_get_table(ACPI_SIG_ECDT, 1,
 				(struct acpi_table_header **)&ecdt_ptr);
 	if (ACPI_SUCCESS(status)) {
-		printk(KERN_INFO PREFIX "EC description table is found, configuring boot EC\n\n");
+		printk(KERN_INFO PREFIX "EC description table is found, configuring boot EC\n");
 		boot_ec->command_addr = ecdt_ptr->control.address;
 		boot_ec->data_addr = ecdt_ptr->data.address;
 		boot_ec->gpe = ecdt_ptr->gpe;
@@ -899,7 +887,6 @@ int __init acpi_ec_ecdt_probe(void)
       error:
 	kfree(boot_ec);
 	boot_ec = NULL;
-
 	return -ENODEV;
 }
 
diff --git a/drivers/acpi/events/evevent.c b/drivers/acpi/events/evevent.c
index a1f87b5def2..e41287815ea 100644
--- a/drivers/acpi/events/evevent.c
+++ b/drivers/acpi/events/evevent.c
@@ -239,10 +239,8 @@ u32 acpi_ev_fixed_event_detect(void)
 	 * Read the fixed feature status and enable registers, as all the cases
 	 * depend on their values.  Ignore errors here.
 	 */
-	(void)acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-				    ACPI_REGISTER_PM1_STATUS, &fixed_status);
-	(void)acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-				    ACPI_REGISTER_PM1_ENABLE, &fixed_enable);
+	(void)acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &fixed_status);
+	(void)acpi_hw_register_read(ACPI_REGISTER_PM1_ENABLE, &fixed_enable);
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INTERRUPTS,
 			  "Fixed Event Block: Enable %08X Status %08X\n",
diff --git a/drivers/acpi/hardware/hwregs.c b/drivers/acpi/hardware/hwregs.c
index 1d371fa663f..73f9c5fb1ba 100644
--- a/drivers/acpi/hardware/hwregs.c
+++ b/drivers/acpi/hardware/hwregs.c
@@ -75,8 +75,7 @@ acpi_status acpi_hw_clear_acpi_status(void)
 
 	lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1_STATUS,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
 					ACPI_BITMASK_ALL_FIXED_STATUS);
 	if (ACPI_FAILURE(status)) {
 		goto unlock_and_exit;
@@ -259,7 +258,7 @@ struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id)
  *
  ******************************************************************************/
 
-acpi_status acpi_get_register(u32 register_id, u32 * return_value)
+acpi_status acpi_get_register_unlocked(u32 register_id, u32 * return_value)
 {
 	u32 register_value = 0;
 	struct acpi_bit_register_info *bit_reg_info;
@@ -276,8 +275,7 @@ acpi_status acpi_get_register(u32 register_id, u32 * return_value)
 
 	/* Read from the register */
 
-	status = acpi_hw_register_read(ACPI_MTX_LOCK,
-				       bit_reg_info->parent_register,
+	status = acpi_hw_register_read(bit_reg_info->parent_register,
 				       &register_value);
 
 	if (ACPI_SUCCESS(status)) {
@@ -298,6 +296,16 @@ acpi_status acpi_get_register(u32 register_id, u32 * return_value)
 	return_ACPI_STATUS(status);
 }
 
+acpi_status acpi_get_register(u32 register_id, u32 * return_value)
+{
+	acpi_status status;
+	acpi_cpu_flags flags;
+	flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
+	status = acpi_get_register_unlocked(register_id, return_value);
+	acpi_os_release_lock(acpi_gbl_hardware_lock, flags);
+	return status;
+}
+
 ACPI_EXPORT_SYMBOL(acpi_get_register)
 
 /*******************************************************************************
@@ -335,8 +343,7 @@ acpi_status acpi_set_register(u32 register_id, u32 value)
 
 	/* Always do a register read first so we can insert the new bits  */
 
-	status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-				       bit_reg_info->parent_register,
+	status = acpi_hw_register_read(bit_reg_info->parent_register,
 				       &register_value);
 	if (ACPI_FAILURE(status)) {
 		goto unlock_and_exit;
@@ -363,8 +370,7 @@ acpi_status acpi_set_register(u32 register_id, u32 value)
 						   bit_reg_info->
 						   access_bit_mask);
 		if (value) {
-			status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-							ACPI_REGISTER_PM1_STATUS,
+			status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
 							(u16) value);
 			register_value = 0;
 		}
@@ -377,8 +383,7 @@ acpi_status acpi_set_register(u32 register_id, u32 value)
 					   bit_reg_info->access_bit_mask,
 					   value);
 
-		status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						ACPI_REGISTER_PM1_ENABLE,
+		status = acpi_hw_register_write(ACPI_REGISTER_PM1_ENABLE,
 						(u16) register_value);
 		break;
 
@@ -397,15 +402,13 @@ acpi_status acpi_set_register(u32 register_id, u32 value)
 					   bit_reg_info->access_bit_mask,
 					   value);
 
-		status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						ACPI_REGISTER_PM1_CONTROL,
+		status = acpi_hw_register_write(ACPI_REGISTER_PM1_CONTROL,
 						(u16) register_value);
 		break;
 
 	case ACPI_REGISTER_PM2_CONTROL:
 
-		status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-					       ACPI_REGISTER_PM2_CONTROL,
+		status = acpi_hw_register_read(ACPI_REGISTER_PM2_CONTROL,
 					       &register_value);
 		if (ACPI_FAILURE(status)) {
 			goto unlock_and_exit;
@@ -430,8 +433,7 @@ acpi_status acpi_set_register(u32 register_id, u32 value)
 						     xpm2_control_block.
 						     address)));
 
-		status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						ACPI_REGISTER_PM2_CONTROL,
+		status = acpi_hw_register_write(ACPI_REGISTER_PM2_CONTROL,
 						(u8) (register_value));
 		break;
 
@@ -461,8 +463,7 @@ ACPI_EXPORT_SYMBOL(acpi_set_register)
  *
  * FUNCTION:    acpi_hw_register_read
  *
- * PARAMETERS:  use_lock            - Lock hardware? True/False
- *              register_id         - ACPI Register ID
+ * PARAMETERS:  register_id         - ACPI Register ID
  *              return_value        - Where the register value is returned
  *
  * RETURN:      Status and the value read.
@@ -471,19 +472,14 @@ ACPI_EXPORT_SYMBOL(acpi_set_register)
  *
  ******************************************************************************/
 acpi_status
-acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value)
+acpi_hw_register_read(u32 register_id, u32 * return_value)
 {
 	u32 value1 = 0;
 	u32 value2 = 0;
 	acpi_status status;
-	acpi_cpu_flags lock_flags = 0;
 
 	ACPI_FUNCTION_TRACE(hw_register_read);
 
-	if (ACPI_MTX_LOCK == use_lock) {
-		lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
-	}
-
 	switch (register_id) {
 	case ACPI_REGISTER_PM1_STATUS:	/* 16-bit access */
 
@@ -491,7 +487,7 @@ acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value)
 		    acpi_hw_low_level_read(16, &value1,
 					   &acpi_gbl_FADT.xpm1a_event_block);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* PM1B is optional */
@@ -507,7 +503,7 @@ acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value)
 		status =
 		    acpi_hw_low_level_read(16, &value1, &acpi_gbl_xpm1a_enable);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* PM1B is optional */
@@ -523,7 +519,7 @@ acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value)
 		    acpi_hw_low_level_read(16, &value1,
 					   &acpi_gbl_FADT.xpm1a_control_block);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		status =
@@ -558,10 +554,7 @@ acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value)
 		break;
 	}
 
-      unlock_and_exit:
-	if (ACPI_MTX_LOCK == use_lock) {
-		acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
-	}
+      exit:
 
 	if (ACPI_SUCCESS(status)) {
 		*return_value = value1;
@@ -574,8 +567,7 @@ acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value)
  *
  * FUNCTION:    acpi_hw_register_write
  *
- * PARAMETERS:  use_lock            - Lock hardware? True/False
- *              register_id         - ACPI Register ID
+ * PARAMETERS:  register_id         - ACPI Register ID
  *              Value               - The value to write
  *
  * RETURN:      Status
@@ -597,28 +589,22 @@ acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value)
  *
  ******************************************************************************/
 
-acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value)
+acpi_status acpi_hw_register_write(u32 register_id, u32 value)
 {
 	acpi_status status;
-	acpi_cpu_flags lock_flags = 0;
 	u32 read_value;
 
 	ACPI_FUNCTION_TRACE(hw_register_write);
 
-	if (ACPI_MTX_LOCK == use_lock) {
-		lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
-	}
-
 	switch (register_id) {
 	case ACPI_REGISTER_PM1_STATUS:	/* 16-bit access */
 
 		/* Perform a read first to preserve certain bits (per ACPI spec) */
 
-		status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-					       ACPI_REGISTER_PM1_STATUS,
+		status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS,
 					       &read_value);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* Insert the bits to be preserved */
@@ -632,7 +618,7 @@ acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value)
 		    acpi_hw_low_level_write(16, value,
 					    &acpi_gbl_FADT.xpm1a_event_block);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* PM1B is optional */
@@ -647,7 +633,7 @@ acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value)
 		status =
 		    acpi_hw_low_level_write(16, value, &acpi_gbl_xpm1a_enable);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* PM1B is optional */
@@ -661,11 +647,10 @@ acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value)
 		/*
 		 * Perform a read first to preserve certain bits (per ACPI spec)
 		 */
-		status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-					       ACPI_REGISTER_PM1_CONTROL,
+		status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL,
 					       &read_value);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		/* Insert the bits to be preserved */
@@ -679,7 +664,7 @@ acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value)
 		    acpi_hw_low_level_write(16, value,
 					    &acpi_gbl_FADT.xpm1a_control_block);
 		if (ACPI_FAILURE(status)) {
-			goto unlock_and_exit;
+			goto exit;
 		}
 
 		status =
@@ -728,11 +713,7 @@ acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value)
 		break;
 	}
 
-      unlock_and_exit:
-	if (ACPI_MTX_LOCK == use_lock) {
-		acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
-	}
-
+      exit:
 	return_ACPI_STATUS(status);
 }
 
diff --git a/drivers/acpi/hardware/hwsleep.c b/drivers/acpi/hardware/hwsleep.c
index cf69c0040a3..81b24842970 100644
--- a/drivers/acpi/hardware/hwsleep.c
+++ b/drivers/acpi/hardware/hwsleep.c
@@ -234,15 +234,11 @@ acpi_status acpi_enter_sleep_state_prep(u8 sleep_state)
 				"While executing method _SST"));
 	}
 
-	/*
-	 * 1) Disable/Clear all GPEs
-	 */
+	/* Disable/Clear all GPEs */
+
 	status = acpi_hw_disable_all_gpes();
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
 
-	return_ACPI_STATUS(AE_OK);
+	return_ACPI_STATUS(status);
 }
 
 ACPI_EXPORT_SYMBOL(acpi_enter_sleep_state_prep)
@@ -313,8 +309,7 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
 
 	/* Get current value of PM1A control */
 
-	status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-				       ACPI_REGISTER_PM1_CONTROL, &PM1Acontrol);
+	status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL, &PM1Acontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
@@ -341,15 +336,13 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
 
 	/* Write #1: fill in SLP_TYP data */
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1A_CONTROL,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
 					PM1Acontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1B_CONTROL,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1B_CONTROL,
 					PM1Bcontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
@@ -364,15 +357,13 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
 
 	ACPI_FLUSH_CPU_CACHE();
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1A_CONTROL,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
 					PM1Acontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
 
-	status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-					ACPI_REGISTER_PM1B_CONTROL,
+	status = acpi_hw_register_write(ACPI_REGISTER_PM1B_CONTROL,
 					PM1Bcontrol);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
@@ -392,8 +383,7 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
 		 */
 		acpi_os_stall(10000000);
 
-		status = acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						ACPI_REGISTER_PM1_CONTROL,
+		status = acpi_hw_register_write(ACPI_REGISTER_PM1_CONTROL,
 						sleep_enable_reg_info->
 						access_bit_mask);
 		if (ACPI_FAILURE(status)) {
@@ -404,7 +394,8 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
 	/* Wait until we enter sleep state */
 
 	do {
-		status = acpi_get_register(ACPI_BITREG_WAKE_STATUS, &in_value);
+		status = acpi_get_register_unlocked(ACPI_BITREG_WAKE_STATUS,
+						    &in_value);
 		if (ACPI_FAILURE(status)) {
 			return_ACPI_STATUS(status);
 		}
@@ -520,8 +511,7 @@ acpi_status acpi_leave_sleep_state(u8 sleep_state)
 
 		/* Get current value of PM1A control */
 
-		status = acpi_hw_register_read(ACPI_MTX_DO_NOT_LOCK,
-					       ACPI_REGISTER_PM1_CONTROL,
+		status = acpi_hw_register_read(ACPI_REGISTER_PM1_CONTROL,
 					       &PM1Acontrol);
 		if (ACPI_SUCCESS(status)) {
 
@@ -543,11 +533,9 @@ acpi_status acpi_leave_sleep_state(u8 sleep_state)
 
 			/* Just ignore any errors */
 
-			(void)acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						     ACPI_REGISTER_PM1A_CONTROL,
+			(void)acpi_hw_register_write(ACPI_REGISTER_PM1A_CONTROL,
 						     PM1Acontrol);
-			(void)acpi_hw_register_write(ACPI_MTX_DO_NOT_LOCK,
-						     ACPI_REGISTER_PM1B_CONTROL,
+			(void)acpi_hw_register_write(ACPI_REGISTER_PM1B_CONTROL,
 						     PM1Bcontrol);
 		}
 	}
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 352cf81af58..aabc6ca4a81 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1043,14 +1043,6 @@ static int __init acpi_wake_gpes_always_on_setup(char *str)
 __setup("acpi_wake_gpes_always_on", acpi_wake_gpes_always_on_setup);
 
 /*
- * max_cstate is defined in the base kernel so modules can
- * change it w/o depending on the state of the processor module.
- */
-unsigned int max_cstate = ACPI_PROCESSOR_MAX_POWER;
-
-EXPORT_SYMBOL(max_cstate);
-
-/*
  * Acquire a spinlock.
  *
  * handle is a pointer to the spinlock_t.
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 9f11dc296cd..235a51e328c 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -44,6 +44,7 @@
 #include <linux/seq_file.h>
 #include <linux/dmi.h>
 #include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -421,12 +422,6 @@ static int map_lsapic_id(struct acpi_subtable_header *entry,
 	return 0;
 }
 
-#ifdef CONFIG_IA64
-#define arch_cpu_to_apicid 	ia64_cpu_to_sapicid
-#else
-#define arch_cpu_to_apicid 	x86_cpu_to_apicid
-#endif
-
 static int map_madt_entry(u32 acpi_id)
 {
 	unsigned long madt_end, entry;
@@ -500,7 +495,7 @@ static int get_cpu_id(acpi_handle handle, u32 acpi_id)
 		return apic_id;
 
 	for (i = 0; i < NR_CPUS; ++i) {
-		if (arch_cpu_to_apicid[i] == apic_id)
+		if (cpu_physical_id(i) == apic_id)
 			return i;
 	}
 	return -1;
@@ -1049,11 +1044,13 @@ static int __init acpi_processor_init(void)
 		return -ENOMEM;
 	acpi_processor_dir->owner = THIS_MODULE;
 
+	result = cpuidle_register_driver(&acpi_idle_driver);
+	if (result < 0)
+		goto out_proc;
+
 	result = acpi_bus_register_driver(&acpi_processor_driver);
-	if (result < 0) {
-		remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
-		return result;
-	}
+	if (result < 0)
+		goto out_cpuidle;
 
 	acpi_processor_install_hotplug_notify();
 
@@ -1062,11 +1059,18 @@ static int __init acpi_processor_init(void)
 	acpi_processor_ppc_init();
 
 	return 0;
+
+out_cpuidle:
+	cpuidle_unregister_driver(&acpi_idle_driver);
+
+out_proc:
+	remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
+
+	return result;
 }
 
 static void __exit acpi_processor_exit(void)
 {
-
 	acpi_processor_ppc_exit();
 
 	acpi_thermal_cpufreq_exit();
@@ -1075,6 +1079,8 @@ static void __exit acpi_processor_exit(void)
 
 	acpi_bus_unregister_driver(&acpi_processor_driver);
 
+	cpuidle_unregister_driver(&acpi_idle_driver);
+
 	remove_proc_entry(ACPI_PROCESSOR_CLASS, acpi_root_dir);
 
 	return;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 1f6fb38de01..f996d0e3768 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -40,6 +40,7 @@
 #include <linux/sched.h>	/* need_resched() */
 #include <linux/latency.h>
 #include <linux/clockchips.h>
+#include <linux/cpuidle.h>
 
 /*
  * Include the apic definitions for x86 to have the APIC timer related defines
@@ -64,14 +65,22 @@ ACPI_MODULE_NAME("processor_idle");
 #define ACPI_PROCESSOR_FILE_POWER	"power"
 #define US_TO_PM_TIMER_TICKS(t)		((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
 #define PM_TIMER_TICK_NS		(1000000000ULL/PM_TIMER_FREQUENCY)
+#ifndef CONFIG_CPU_IDLE
 #define C2_OVERHEAD			4	/* 1us (3.579 ticks per us) */
 #define C3_OVERHEAD			4	/* 1us (3.579 ticks per us) */
 static void (*pm_idle_save) (void) __read_mostly;
-module_param(max_cstate, uint, 0644);
+#else
+#define C2_OVERHEAD			1	/* 1us */
+#define C3_OVERHEAD			1	/* 1us */
+#endif
+#define PM_TIMER_TICKS_TO_US(p)		(((p) * 1000)/(PM_TIMER_FREQUENCY/1000))
 
+static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
+module_param(max_cstate, uint, 0000);
 static unsigned int nocst __read_mostly;
 module_param(nocst, uint, 0000);
 
+#ifndef CONFIG_CPU_IDLE
 /*
  * bm_history -- bit-mask with a bit per jiffy of bus-master activity
  * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
@@ -82,9 +91,10 @@ module_param(nocst, uint, 0000);
 static unsigned int bm_history __read_mostly =
     (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
 module_param(bm_history, uint, 0644);
-/* --------------------------------------------------------------------------
-                                Power Management
-   -------------------------------------------------------------------------- */
+
+static int acpi_processor_set_power_policy(struct acpi_processor *pr);
+
+#endif
 
 /*
  * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3.
@@ -177,6 +187,18 @@ static inline u32 ticks_elapsed(u32 t1, u32 t2)
 		return ((0xFFFFFFFF - t1) + t2);
 }
 
+static inline u32 ticks_elapsed_in_us(u32 t1, u32 t2)
+{
+	if (t2 >= t1)
+		return PM_TIMER_TICKS_TO_US(t2 - t1);
+	else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
+		return PM_TIMER_TICKS_TO_US(((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
+	else
+		return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2);
+}
+
+#ifndef CONFIG_CPU_IDLE
+
 static void
 acpi_processor_power_activate(struct acpi_processor *pr,
 			      struct acpi_processor_cx *new)
@@ -248,6 +270,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
 	}
 }
+#endif /* !CONFIG_CPU_IDLE */
 
 #ifdef ARCH_APICTIMER_STOPS_ON_C3
 
@@ -330,6 +353,7 @@ int acpi_processor_resume(struct acpi_device * device)
 	return 0;
 }
 
+#ifndef CONFIG_CPU_IDLE
 static void acpi_processor_idle(void)
 {
 	struct acpi_processor *pr = NULL;
@@ -427,7 +451,7 @@ static void acpi_processor_idle(void)
 	 * an SMP system. We do it here instead of doing it at _CST/P_LVL
 	 * detection phase, to work cleanly with logical CPU hotplug.
 	 */
-	if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && 
+	if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
 	    !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
 		cx = &pr->power.states[ACPI_STATE_C1];
 #endif
@@ -727,6 +751,7 @@ static int acpi_processor_set_power_policy(struct acpi_processor *pr)
 
 	return 0;
 }
+#endif /* !CONFIG_CPU_IDLE */
 
 static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
 {
@@ -744,7 +769,7 @@ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
 #ifndef CONFIG_HOTPLUG_CPU
 	/*
 	 * Check for P_LVL2_UP flag before entering C2 and above on
-	 * an SMP system. 
+	 * an SMP system.
 	 */
 	if ((num_online_cpus() > 1) &&
 	    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
@@ -945,7 +970,12 @@ static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx)
 	 * Normalize the C2 latency to expidite policy
 	 */
 	cx->valid = 1;
+
+#ifndef CONFIG_CPU_IDLE
 	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+#else
+	cx->latency_ticks = cx->latency;
+#endif
 
 	return;
 }
@@ -1025,7 +1055,12 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr,
 	 * use this in our C3 policy
 	 */
 	cx->valid = 1;
+
+#ifndef CONFIG_CPU_IDLE
 	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+#else
+	cx->latency_ticks = cx->latency;
+#endif
 
 	return;
 }
@@ -1090,6 +1125,7 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr)
 
 	pr->power.count = acpi_processor_power_verify(pr);
 
+#ifndef CONFIG_CPU_IDLE
 	/*
 	 * Set Default Policy
 	 * ------------------
@@ -1101,6 +1137,7 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr)
 	result = acpi_processor_set_power_policy(pr);
 	if (result)
 		return result;
+#endif
 
 	/*
 	 * if one state of type C2 or C3 is available, mark this
@@ -1117,35 +1154,6 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr)
 	return 0;
 }
 
-int acpi_processor_cst_has_changed(struct acpi_processor *pr)
-{
-	int result = 0;
-
-
-	if (!pr)
-		return -EINVAL;
-
-	if (nocst) {
-		return -ENODEV;
-	}
-
-	if (!pr->flags.power_setup_done)
-		return -ENODEV;
-
-	/* Fall back to the default idle loop */
-	pm_idle = pm_idle_save;
-	synchronize_sched();	/* Relies on interrupts forcing exit from idle. */
-
-	pr->flags.power = 0;
-	result = acpi_processor_get_power_info(pr);
-	if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
-		pm_idle = acpi_processor_idle;
-
-	return result;
-}
-
-/* proc interface */
-
 static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
 {
 	struct acpi_processor *pr = seq->private;
@@ -1227,6 +1235,35 @@ static const struct file_operations acpi_processor_power_fops = {
 	.release = single_release,
 };
 
+#ifndef CONFIG_CPU_IDLE
+
+int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+{
+	int result = 0;
+
+
+	if (!pr)
+		return -EINVAL;
+
+	if (nocst) {
+		return -ENODEV;
+	}
+
+	if (!pr->flags.power_setup_done)
+		return -ENODEV;
+
+	/* Fall back to the default idle loop */
+	pm_idle = pm_idle_save;
+	synchronize_sched();	/* Relies on interrupts forcing exit from idle. */
+
+	pr->flags.power = 0;
+	result = acpi_processor_get_power_info(pr);
+	if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
+		pm_idle = acpi_processor_idle;
+
+	return result;
+}
+
 #ifdef CONFIG_SMP
 static void smp_callback(void *v)
 {
@@ -1249,7 +1286,366 @@ static int acpi_processor_latency_notify(struct notifier_block *b,
 static struct notifier_block acpi_processor_latency_notifier = {
 	.notifier_call = acpi_processor_latency_notify,
 };
+
+#endif
+
+#else /* CONFIG_CPU_IDLE */
+
+/**
+ * acpi_idle_bm_check - checks if bus master activity was detected
+ */
+static int acpi_idle_bm_check(void)
+{
+	u32 bm_status = 0;
+
+	acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
+	if (bm_status)
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
+	/*
+	 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
+	 * the true state of bus mastering activity; forcing us to
+	 * manually check the BMIDEA bit of each IDE channel.
+	 */
+	else if (errata.piix4.bmisx) {
+		if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
+		    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
+			bm_status = 1;
+	}
+	return bm_status;
+}
+
+/**
+ * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state
+ * @pr: the processor
+ * @target: the new target state
+ */
+static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
+					   struct acpi_processor_cx *target)
+{
+	if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) {
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+		pr->flags.bm_rld_set = 0;
+	}
+
+	if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) {
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
+		pr->flags.bm_rld_set = 1;
+	}
+}
+
+/**
+ * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
+ * @cx: cstate data
+ */
+static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
+{
+	if (cx->space_id == ACPI_CSTATE_FFH) {
+		/* Call into architectural FFH based C-state */
+		acpi_processor_ffh_cstate_enter(cx);
+	} else {
+		int unused;
+		/* IO port based C-state */
+		inb(cx->address);
+		/* Dummy wait op - must do something useless after P_LVL2 read
+		   because chipsets cannot guarantee that STPCLK# signal
+		   gets asserted in time to freeze execution properly. */
+		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	}
+}
+
+/**
+ * acpi_idle_enter_c1 - enters an ACPI C1 state-type
+ * @dev: the target CPU
+ * @state: the state data
+ *
+ * This is equivalent to the HALT instruction.
+ */
+static int acpi_idle_enter_c1(struct cpuidle_device *dev,
+			      struct cpuidle_state *state)
+{
+	struct acpi_processor *pr;
+	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+	pr = processors[smp_processor_id()];
+
+	if (unlikely(!pr))
+		return 0;
+
+	if (pr->flags.bm_check)
+		acpi_idle_update_bm_rld(pr, cx);
+
+	current_thread_info()->status &= ~TS_POLLING;
+	/*
+	 * TS_POLLING-cleared state must be visible before we test
+	 * NEED_RESCHED:
+	 */
+	smp_mb();
+	if (!need_resched())
+		safe_halt();
+	current_thread_info()->status |= TS_POLLING;
+
+	cx->usage++;
+
+	return 0;
+}
+
+/**
+ * acpi_idle_enter_simple - enters an ACPI state without BM handling
+ * @dev: the target CPU
+ * @state: the state data
+ */
+static int acpi_idle_enter_simple(struct cpuidle_device *dev,
+				  struct cpuidle_state *state)
+{
+	struct acpi_processor *pr;
+	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+	u32 t1, t2;
+	pr = processors[smp_processor_id()];
+
+	if (unlikely(!pr))
+		return 0;
+
+	if (acpi_idle_suspend)
+		return(acpi_idle_enter_c1(dev, state));
+
+	if (pr->flags.bm_check)
+		acpi_idle_update_bm_rld(pr, cx);
+
+	local_irq_disable();
+	current_thread_info()->status &= ~TS_POLLING;
+	/*
+	 * TS_POLLING-cleared state must be visible before we test
+	 * NEED_RESCHED:
+	 */
+	smp_mb();
+
+	if (unlikely(need_resched())) {
+		current_thread_info()->status |= TS_POLLING;
+		local_irq_enable();
+		return 0;
+	}
+
+	if (cx->type == ACPI_STATE_C3)
+		ACPI_FLUSH_CPU_CACHE();
+
+	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	acpi_state_timer_broadcast(pr, cx, 1);
+	acpi_idle_do_entry(cx);
+	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+
+#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
+	/* TSC could halt in idle, so notify users */
+	mark_tsc_unstable("TSC halts in idle");;
+#endif
+
+	local_irq_enable();
+	current_thread_info()->status |= TS_POLLING;
+
+	cx->usage++;
+
+	acpi_state_timer_broadcast(pr, cx, 0);
+	cx->time += ticks_elapsed(t1, t2);
+	return ticks_elapsed_in_us(t1, t2);
+}
+
+static int c3_cpu_count;
+static DEFINE_SPINLOCK(c3_lock);
+
+/**
+ * acpi_idle_enter_bm - enters C3 with proper BM handling
+ * @dev: the target CPU
+ * @state: the state data
+ *
+ * If BM is detected, the deepest non-C3 idle state is entered instead.
+ */
+static int acpi_idle_enter_bm(struct cpuidle_device *dev,
+			      struct cpuidle_state *state)
+{
+	struct acpi_processor *pr;
+	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
+	u32 t1, t2;
+	pr = processors[smp_processor_id()];
+
+	if (unlikely(!pr))
+		return 0;
+
+	if (acpi_idle_suspend)
+		return(acpi_idle_enter_c1(dev, state));
+
+	local_irq_disable();
+	current_thread_info()->status &= ~TS_POLLING;
+	/*
+	 * TS_POLLING-cleared state must be visible before we test
+	 * NEED_RESCHED:
+	 */
+	smp_mb();
+
+	if (unlikely(need_resched())) {
+		current_thread_info()->status |= TS_POLLING;
+		local_irq_enable();
+		return 0;
+	}
+
+	/*
+	 * Must be done before busmaster disable as we might need to
+	 * access HPET !
+	 */
+	acpi_state_timer_broadcast(pr, cx, 1);
+
+	if (acpi_idle_bm_check()) {
+		cx = pr->power.bm_state;
+
+		acpi_idle_update_bm_rld(pr, cx);
+
+		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+		acpi_idle_do_entry(cx);
+		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+	} else {
+		acpi_idle_update_bm_rld(pr, cx);
+
+		spin_lock(&c3_lock);
+		c3_cpu_count++;
+		/* Disable bus master arbitration when all CPUs are in C3 */
+		if (c3_cpu_count == num_online_cpus())
+			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
+		spin_unlock(&c3_lock);
+
+		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+		acpi_idle_do_entry(cx);
+		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+
+		spin_lock(&c3_lock);
+		/* Re-enable bus master arbitration */
+		if (c3_cpu_count == num_online_cpus())
+			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
+		c3_cpu_count--;
+		spin_unlock(&c3_lock);
+	}
+
+#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86_TSC)
+	/* TSC could halt in idle, so notify users */
+	mark_tsc_unstable("TSC halts in idle");
+#endif
+
+	local_irq_enable();
+	current_thread_info()->status |= TS_POLLING;
+
+	cx->usage++;
+
+	acpi_state_timer_broadcast(pr, cx, 0);
+	cx->time += ticks_elapsed(t1, t2);
+	return ticks_elapsed_in_us(t1, t2);
+}
+
+struct cpuidle_driver acpi_idle_driver = {
+	.name =		"acpi_idle",
+	.owner =	THIS_MODULE,
+};
+
+/**
+ * acpi_processor_setup_cpuidle - prepares and configures CPUIDLE
+ * @pr: the ACPI processor
+ */
+static int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
+{
+	int i, count = 0;
+	struct acpi_processor_cx *cx;
+	struct cpuidle_state *state;
+	struct cpuidle_device *dev = &pr->power.dev;
+
+	if (!pr->flags.power_setup_done)
+		return -EINVAL;
+
+	if (pr->flags.power == 0) {
+		return -EINVAL;
+	}
+
+	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
+		cx = &pr->power.states[i];
+		state = &dev->states[count];
+
+		if (!cx->valid)
+			continue;
+
+#ifdef CONFIG_HOTPLUG_CPU
+		if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
+		    !pr->flags.has_cst &&
+		    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
+			continue;
 #endif
+		cpuidle_set_statedata(state, cx);
+
+		snprintf(state->name, CPUIDLE_NAME_LEN, "C%d", i);
+		state->exit_latency = cx->latency;
+		state->target_residency = cx->latency * 6;
+		state->power_usage = cx->power;
+
+		state->flags = 0;
+		switch (cx->type) {
+			case ACPI_STATE_C1:
+			state->flags |= CPUIDLE_FLAG_SHALLOW;
+			state->enter = acpi_idle_enter_c1;
+			break;
+
+			case ACPI_STATE_C2:
+			state->flags |= CPUIDLE_FLAG_BALANCED;
+			state->flags |= CPUIDLE_FLAG_TIME_VALID;
+			state->enter = acpi_idle_enter_simple;
+			break;
+
+			case ACPI_STATE_C3:
+			state->flags |= CPUIDLE_FLAG_DEEP;
+			state->flags |= CPUIDLE_FLAG_TIME_VALID;
+			state->flags |= CPUIDLE_FLAG_CHECK_BM;
+			state->enter = pr->flags.bm_check ?
+					acpi_idle_enter_bm :
+					acpi_idle_enter_simple;
+			break;
+		}
+
+		count++;
+	}
+
+	dev->state_count = count;
+
+	if (!count)
+		return -EINVAL;
+
+	/* find the deepest state that can handle active BM */
+	if (pr->flags.bm_check) {
+		for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++)
+			if (pr->power.states[i].type == ACPI_STATE_C3)
+				break;
+		pr->power.bm_state = &pr->power.states[i-1];
+	}
+
+	return 0;
+}
+
+int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+{
+	int ret;
+
+	if (!pr)
+		return -EINVAL;
+
+	if (nocst) {
+		return -ENODEV;
+	}
+
+	if (!pr->flags.power_setup_done)
+		return -ENODEV;
+
+	cpuidle_pause_and_lock();
+	cpuidle_disable_device(&pr->power.dev);
+	acpi_processor_get_power_info(pr);
+	acpi_processor_setup_cpuidle(pr);
+	ret = cpuidle_enable_device(&pr->power.dev);
+	cpuidle_resume_and_unlock();
+
+	return ret;
+}
+
+#endif /* CONFIG_CPU_IDLE */
 
 int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 			      struct acpi_device *device)
@@ -1267,7 +1663,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 			       "ACPI: processor limited to max C-state %d\n",
 			       max_cstate);
 		first_run++;
-#ifdef CONFIG_SMP
+#if !defined (CONFIG_CPU_IDLE) && defined (CONFIG_SMP)
 		register_latency_notifier(&acpi_processor_latency_notifier);
 #endif
 	}
@@ -1285,6 +1681,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 	}
 
 	acpi_processor_get_power_info(pr);
+	pr->flags.power_setup_done = 1;
 
 	/*
 	 * Install the idle handler if processor power management is supported.
@@ -1292,6 +1689,13 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 	 * platforms that only support C1.
 	 */
 	if ((pr->flags.power) && (!boot_option_idle_override)) {
+#ifdef CONFIG_CPU_IDLE
+		acpi_processor_setup_cpuidle(pr);
+		pr->power.dev.cpu = pr->id;
+		if (cpuidle_register_device(&pr->power.dev))
+			return -EIO;
+#endif
+
 		printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id);
 		for (i = 1; i <= pr->power.count; i++)
 			if (pr->power.states[i].valid)
@@ -1299,10 +1703,12 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 				       pr->power.states[i].type);
 		printk(")\n");
 
+#ifndef CONFIG_CPU_IDLE
 		if (pr->id == 0) {
 			pm_idle_save = pm_idle;
 			pm_idle = acpi_processor_idle;
 		}
+#endif
 	}
 
 	/* 'power' [R] */
@@ -1316,21 +1722,24 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
 		entry->owner = THIS_MODULE;
 	}
 
-	pr->flags.power_setup_done = 1;
-
 	return 0;
 }
 
 int acpi_processor_power_exit(struct acpi_processor *pr,
 			      struct acpi_device *device)
 {
-
+#ifdef CONFIG_CPU_IDLE
+	if ((pr->flags.power) && (!boot_option_idle_override))
+		cpuidle_unregister_device(&pr->power.dev);
+#endif
 	pr->flags.power_setup_done = 0;
 
 	if (acpi_device_dir(device))
 		remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
 				  acpi_device_dir(device));
 
+#ifndef CONFIG_CPU_IDLE
+
 	/* Unregister the idle handler when processor #0 is removed. */
 	if (pr->id == 0) {
 		pm_idle = pm_idle_save;
@@ -1345,6 +1754,7 @@ int acpi_processor_power_exit(struct acpi_processor *pr,
 		unregister_latency_notifier(&acpi_processor_latency_notifier);
 #endif
 	}
+#endif
 
 	return 0;
 }
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index a578986e321..90fd09c65f9 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -1,6 +1,8 @@
 /*
- *  acpi_sbs.c - ACPI Smart Battery System Driver ($Revision: 1.16 $)
+ *  sbs.c - ACPI Smart Battery System Driver ($Revision: 2.0 $)
  *
+ *  Copyright (c) 2007 Alexey Starikovskiy <astarikovskiy@suse.de>
+ *  Copyright (c) 2005-2007 Vladimir Lebedev <vladimir.p.lebedev@intel.com>
  *  Copyright (c) 2005 Rich Townsend <rhdt@bartol.udel.edu>
  *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -26,15 +28,22 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
+
+#ifdef CONFIG_ACPI_PROCFS
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <asm/uaccess.h>
+#endif
+
 #include <linux/acpi.h>
 #include <linux/timer.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
 
-#define ACPI_SBS_COMPONENT		0x00080000
+#include <linux/power_supply.h>
+
+#include "sbshc.h"
+
 #define ACPI_SBS_CLASS			"sbs"
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_BATTERY_CLASS		"battery"
@@ -44,836 +53,436 @@
 #define ACPI_SBS_FILE_ALARM		"alarm"
 #define ACPI_BATTERY_DIR_NAME		"BAT%i"
 #define ACPI_AC_DIR_NAME		"AC0"
-#define ACPI_SBC_SMBUS_ADDR		0x9
-#define ACPI_SBSM_SMBUS_ADDR		0xa
-#define ACPI_SB_SMBUS_ADDR		0xb
-#define ACPI_SBS_AC_NOTIFY_STATUS	0x80
-#define ACPI_SBS_BATTERY_NOTIFY_STATUS	0x80
-#define ACPI_SBS_BATTERY_NOTIFY_INFO	0x81
 
-#define _COMPONENT			ACPI_SBS_COMPONENT
+enum acpi_sbs_device_addr {
+	ACPI_SBS_CHARGER = 0x9,
+	ACPI_SBS_MANAGER = 0xa,
+	ACPI_SBS_BATTERY = 0xb,
+};
 
-ACPI_MODULE_NAME("sbs");
+#define ACPI_SBS_NOTIFY_STATUS		0x80
+#define ACPI_SBS_NOTIFY_INFO		0x81
 
-MODULE_AUTHOR("Rich Townsend");
+MODULE_AUTHOR("Alexey Starikovskiy <astarikovskiy@suse.de>");
 MODULE_DESCRIPTION("Smart Battery System ACPI interface driver");
 MODULE_LICENSE("GPL");
 
-#define	xmsleep(t)	msleep(t)
-
-#define ACPI_EC_SMB_PRTCL	0x00	/* protocol, PEC */
-
-#define ACPI_EC_SMB_STS		0x01	/* status */
-#define ACPI_EC_SMB_ADDR	0x02	/* address */
-#define ACPI_EC_SMB_CMD		0x03	/* command */
-#define ACPI_EC_SMB_DATA	0x04	/* 32 data registers */
-#define ACPI_EC_SMB_BCNT	0x24	/* number of data bytes */
-
-#define ACPI_EC_SMB_STS_DONE	0x80
-#define ACPI_EC_SMB_STS_STATUS	0x1f
-
-#define ACPI_EC_SMB_PRTCL_WRITE		0x00
-#define ACPI_EC_SMB_PRTCL_READ		0x01
-#define ACPI_EC_SMB_PRTCL_WORD_DATA	0x08
-#define ACPI_EC_SMB_PRTCL_BLOCK_DATA	0x0a
-
-#define ACPI_EC_SMB_TRANSACTION_SLEEP	1
-#define ACPI_EC_SMB_ACCESS_SLEEP1	1
-#define ACPI_EC_SMB_ACCESS_SLEEP2	10
-
-#define	DEF_CAPACITY_UNIT	3
-#define	MAH_CAPACITY_UNIT	1
-#define	MWH_CAPACITY_UNIT	2
-#define	CAPACITY_UNIT		DEF_CAPACITY_UNIT
-
-#define	REQUEST_UPDATE_MODE	1
-#define	QUEUE_UPDATE_MODE	2
-
-#define	DATA_TYPE_COMMON	0
-#define	DATA_TYPE_INFO		1
-#define	DATA_TYPE_STATE		2
-#define	DATA_TYPE_ALARM		3
-#define	DATA_TYPE_AC_STATE	4
+static unsigned int cache_time = 1000;
+module_param(cache_time, uint, 0644);
+MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
 
 extern struct proc_dir_entry *acpi_lock_ac_dir(void);
 extern struct proc_dir_entry *acpi_lock_battery_dir(void);
 extern void acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir);
 extern void acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
 
-#define	MAX_SBS_BAT			4
+#define MAX_SBS_BAT			4
 #define ACPI_SBS_BLOCK_MAX		32
 
-#define ACPI_SBS_SMBUS_READ		1
-#define ACPI_SBS_SMBUS_WRITE		2
-
-#define ACPI_SBS_WORD_DATA		1
-#define ACPI_SBS_BLOCK_DATA		2
-
-#define	UPDATE_DELAY	10
-
-/* 0 - every time, > 0 - by update_time */
-static unsigned int update_time = 120;
-
-static unsigned int capacity_mode = CAPACITY_UNIT;
-
-module_param(update_time, uint, 0644);
-module_param(capacity_mode, uint, 0444);
-
-static int acpi_sbs_add(struct acpi_device *device);
-static int acpi_sbs_remove(struct acpi_device *device, int type);
-static int acpi_sbs_resume(struct acpi_device *device);
-
 static const struct acpi_device_id sbs_device_ids[] = {
-	{"ACPI0001", 0},
-	{"ACPI0005", 0},
+	{"ACPI0002", 0},
 	{"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, sbs_device_ids);
 
-static struct acpi_driver acpi_sbs_driver = {
-	.name = "sbs",
-	.class = ACPI_SBS_CLASS,
-	.ids = sbs_device_ids,
-	.ops = {
-		.add = acpi_sbs_add,
-		.remove = acpi_sbs_remove,
-		.resume = acpi_sbs_resume,
-		},
-};
-
-struct acpi_ac {
-	int ac_present;
-};
-
-struct acpi_battery_info {
-	int capacity_mode;
-	s16 full_charge_capacity;
-	s16 design_capacity;
-	s16 design_voltage;
-	int vscale;
-	int ipscale;
-	s16 serial_number;
-	char manufacturer_name[ACPI_SBS_BLOCK_MAX + 3];
-	char device_name[ACPI_SBS_BLOCK_MAX + 3];
-	char device_chemistry[ACPI_SBS_BLOCK_MAX + 3];
-};
-
-struct acpi_battery_state {
-	s16 voltage;
-	s16 amperage;
-	s16 remaining_capacity;
-	s16 battery_state;
-};
-
-struct acpi_battery_alarm {
-	s16 remaining_capacity;
-};
-
 struct acpi_battery {
-	int alive;
-	int id;
-	int init_state;
-	int battery_present;
+	struct power_supply bat;
 	struct acpi_sbs *sbs;
-	struct acpi_battery_info info;
-	struct acpi_battery_state state;
-	struct acpi_battery_alarm alarm;
-	struct proc_dir_entry *battery_entry;
+#ifdef CONFIG_ACPI_PROCFS
+	struct proc_dir_entry *proc_entry;
+#endif
+	unsigned long update_time;
+	char name[8];
+	char manufacturer_name[ACPI_SBS_BLOCK_MAX];
+	char device_name[ACPI_SBS_BLOCK_MAX];
+	char device_chemistry[ACPI_SBS_BLOCK_MAX];
+	u16 alarm_capacity;
+	u16 full_charge_capacity;
+	u16 design_capacity;
+	u16 design_voltage;
+	u16 serial_number;
+	u16 cycle_count;
+	u16 temp_now;
+	u16 voltage_now;
+	s16 current_now;
+	s16 current_avg;
+	u16 capacity_now;
+	u16 state_of_charge;
+	u16 state;
+	u16 mode;
+	u16 spec;
+	u8 id;
+	u8 present:1;
 };
 
+#define to_acpi_battery(x) container_of(x, struct acpi_battery, bat);
+
 struct acpi_sbs {
-	int base;
+	struct power_supply charger;
 	struct acpi_device *device;
-	struct mutex mutex;
-	int sbsm_present;
-	int sbsm_batteries_supported;
-	struct proc_dir_entry *ac_entry;
-	struct acpi_ac ac;
+	struct acpi_smb_hc *hc;
+	struct mutex lock;
+#ifdef CONFIG_ACPI_PROCFS
+	struct proc_dir_entry *charger_entry;
+#endif
 	struct acpi_battery battery[MAX_SBS_BAT];
-	int zombie;
-	struct timer_list update_timer;
-	int run_cnt;
-	int update_proc_flg;
+	u8 batteries_supported:4;
+	u8 manager_present:1;
+	u8 charger_present:1;
 };
 
-static int acpi_sbs_update_run(struct acpi_sbs *sbs, int id, int data_type);
-static void acpi_sbs_update_time(void *data);
+#define to_acpi_sbs(x) container_of(x, struct acpi_sbs, charger)
 
-union sbs_rw_data {
-	u16 word;
-	u8 block[ACPI_SBS_BLOCK_MAX + 2];
-};
-
-static int acpi_ec_sbs_access(struct acpi_sbs *sbs, u16 addr,
-			      char read_write, u8 command, int size,
-			      union sbs_rw_data *data);
-
-/* --------------------------------------------------------------------------
-                               SMBus Communication
-   -------------------------------------------------------------------------- */
-
-static int acpi_ec_sbs_read(struct acpi_sbs *sbs, u8 address, u8 * data)
+static inline int battery_scale(int log)
 {
-	u8 val;
-	int err;
-
-	err = ec_read(sbs->base + address, &val);
-	if (!err) {
-		*data = val;
-	}
-	xmsleep(ACPI_EC_SMB_TRANSACTION_SLEEP);
-	return (err);
-}
-
-static int acpi_ec_sbs_write(struct acpi_sbs *sbs, u8 address, u8 data)
-{
-	int err;
-
-	err = ec_write(sbs->base + address, data);
-	return (err);
-}
-
-static int
-acpi_ec_sbs_access(struct acpi_sbs *sbs, u16 addr,
-		   char read_write, u8 command, int size,
-		   union sbs_rw_data *data)
-{
-	unsigned char protocol, len = 0, temp[2] = { 0, 0 };
-	int i;
-
-	if (read_write == ACPI_SBS_SMBUS_READ) {
-		protocol = ACPI_EC_SMB_PRTCL_READ;
-	} else {
-		protocol = ACPI_EC_SMB_PRTCL_WRITE;
-	}
-
-	switch (size) {
-
-	case ACPI_SBS_WORD_DATA:
-		acpi_ec_sbs_write(sbs, ACPI_EC_SMB_CMD, command);
-		if (read_write == ACPI_SBS_SMBUS_WRITE) {
-			acpi_ec_sbs_write(sbs, ACPI_EC_SMB_DATA, data->word);
-			acpi_ec_sbs_write(sbs, ACPI_EC_SMB_DATA + 1,
-					  data->word >> 8);
-		}
-		protocol |= ACPI_EC_SMB_PRTCL_WORD_DATA;
-		break;
-	case ACPI_SBS_BLOCK_DATA:
-		acpi_ec_sbs_write(sbs, ACPI_EC_SMB_CMD, command);
-		if (read_write == ACPI_SBS_SMBUS_WRITE) {
-			len = min_t(u8, data->block[0], 32);
-			acpi_ec_sbs_write(sbs, ACPI_EC_SMB_BCNT, len);
-			for (i = 0; i < len; i++)
-				acpi_ec_sbs_write(sbs, ACPI_EC_SMB_DATA + i,
-						  data->block[i + 1]);
-		}
-		protocol |= ACPI_EC_SMB_PRTCL_BLOCK_DATA;
-		break;
-	default:
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"unsupported transaction %d", size));
-		return (-1);
-	}
-
-	acpi_ec_sbs_write(sbs, ACPI_EC_SMB_ADDR, addr << 1);
-	acpi_ec_sbs_write(sbs, ACPI_EC_SMB_PRTCL, protocol);
-
-	acpi_ec_sbs_read(sbs, ACPI_EC_SMB_STS, temp);
-
-	if (~temp[0] & ACPI_EC_SMB_STS_DONE) {
-		xmsleep(ACPI_EC_SMB_ACCESS_SLEEP1);
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_STS, temp);
-	}
-	if (~temp[0] & ACPI_EC_SMB_STS_DONE) {
-		xmsleep(ACPI_EC_SMB_ACCESS_SLEEP2);
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_STS, temp);
-	}
-	if ((~temp[0] & ACPI_EC_SMB_STS_DONE)
-	    || (temp[0] & ACPI_EC_SMB_STS_STATUS)) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"transaction %d error", size));
-		return (-1);
-	}
-
-	if (read_write == ACPI_SBS_SMBUS_WRITE) {
-		return (0);
-	}
-
-	switch (size) {
-
-	case ACPI_SBS_WORD_DATA:
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_DATA, temp);
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_DATA + 1, temp + 1);
-		data->word = (temp[1] << 8) | temp[0];
-		break;
-
-	case ACPI_SBS_BLOCK_DATA:
-		len = 0;
-		acpi_ec_sbs_read(sbs, ACPI_EC_SMB_BCNT, &len);
-		len = min_t(u8, len, 32);
-		for (i = 0; i < len; i++)
-			acpi_ec_sbs_read(sbs, ACPI_EC_SMB_DATA + i,
-					 data->block + i + 1);
-		data->block[0] = len;
-		break;
-	default:
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"unsupported transaction %d", size));
-		return (-1);
-	}
-
-	return (0);
+	int scale = 1;
+	while (log--)
+		scale *= 10;
+	return scale;
 }
 
-static int
-acpi_sbs_read_word(struct acpi_sbs *sbs, int addr, int func, u16 * word)
+static inline int acpi_battery_vscale(struct acpi_battery *battery)
 {
-	union sbs_rw_data data;
-	int result = 0;
-
-	result = acpi_ec_sbs_access(sbs, addr,
-				    ACPI_SBS_SMBUS_READ, func,
-				    ACPI_SBS_WORD_DATA, &data);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ec_sbs_access() failed"));
-	} else {
-		*word = data.word;
-	}
-
-	return result;
+	return battery_scale((battery->spec & 0x0f00) >> 8);
 }
 
-static int
-acpi_sbs_read_str(struct acpi_sbs *sbs, int addr, int func, char *str)
+static inline int acpi_battery_ipscale(struct acpi_battery *battery)
 {
-	union sbs_rw_data data;
-	int result = 0;
-
-	result = acpi_ec_sbs_access(sbs, addr,
-				    ACPI_SBS_SMBUS_READ, func,
-				    ACPI_SBS_BLOCK_DATA, &data);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ec_sbs_access() failed"));
-	} else {
-		strncpy(str, (const char *)data.block + 1, data.block[0]);
-		str[data.block[0]] = 0;
-	}
-
-	return result;
+	return battery_scale((battery->spec & 0xf000) >> 12);
 }
 
-static int
-acpi_sbs_write_word(struct acpi_sbs *sbs, int addr, int func, int word)
+static inline int acpi_battery_mode(struct acpi_battery *battery)
 {
-	union sbs_rw_data data;
-	int result = 0;
-
-	data.word = word;
-
-	result = acpi_ec_sbs_access(sbs, addr,
-				    ACPI_SBS_SMBUS_WRITE, func,
-				    ACPI_SBS_WORD_DATA, &data);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ec_sbs_access() failed"));
-	}
-
-	return result;
+	return (battery->mode & 0x8000);
 }
 
-static int sbs_zombie(struct acpi_sbs *sbs)
+static inline int acpi_battery_scale(struct acpi_battery *battery)
 {
-	return (sbs->zombie);
+	return (acpi_battery_mode(battery) ? 10 : 1) *
+	    acpi_battery_ipscale(battery);
 }
 
-static int sbs_mutex_lock(struct acpi_sbs *sbs)
+static int sbs_get_ac_property(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
 {
-	if (sbs_zombie(sbs)) {
-		return -ENODEV;
+	struct acpi_sbs *sbs = to_acpi_sbs(psy);
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		val->intval = sbs->charger_present;
+		break;
+	default:
+		return -EINVAL;
 	}
-	mutex_lock(&sbs->mutex);
 	return 0;
 }
 
-static void sbs_mutex_unlock(struct acpi_sbs *sbs)
+static int acpi_battery_technology(struct acpi_battery *battery)
 {
-	mutex_unlock(&sbs->mutex);
+	if (!strcasecmp("NiCd", battery->device_chemistry))
+		return POWER_SUPPLY_TECHNOLOGY_NiCd;
+	if (!strcasecmp("NiMH", battery->device_chemistry))
+		return POWER_SUPPLY_TECHNOLOGY_NiMH;
+	if (!strcasecmp("LION", battery->device_chemistry))
+		return POWER_SUPPLY_TECHNOLOGY_LION;
+	if (!strcasecmp("LiP", battery->device_chemistry))
+		return POWER_SUPPLY_TECHNOLOGY_LIPO;
+	return POWER_SUPPLY_TECHNOLOGY_UNKNOWN;
 }
 
-/* --------------------------------------------------------------------------
-                            Smart Battery System Management
-   -------------------------------------------------------------------------- */
-
-static int acpi_check_update_proc(struct acpi_sbs *sbs)
+static int acpi_sbs_battery_get_property(struct power_supply *psy,
+					 enum power_supply_property psp,
+					 union power_supply_propval *val)
 {
-	acpi_status status = AE_OK;
+	struct acpi_battery *battery = to_acpi_battery(psy);
 
-	if (update_time == 0) {
-		sbs->update_proc_flg = 0;
-		return 0;
-	}
-	if (sbs->update_proc_flg == 0) {
-		status = acpi_os_execute(OSL_GPE_HANDLER,
-					 acpi_sbs_update_time, sbs);
-		if (status != AE_OK) {
-			ACPI_EXCEPTION((AE_INFO, status,
-					"acpi_os_execute() failed"));
-			return 1;
-		}
-		sbs->update_proc_flg = 1;
+	if ((!battery->present) && psp != POWER_SUPPLY_PROP_PRESENT)
+		return -ENODEV;
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		if (battery->current_now < 0)
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		else if (battery->current_now > 0)
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_FULL;
+		break;
+	case POWER_SUPPLY_PROP_PRESENT:
+		val->intval = battery->present;
+		break;
+	case POWER_SUPPLY_PROP_TECHNOLOGY:
+		val->intval = acpi_battery_technology(battery);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
+		val->intval = battery->design_voltage *
+			acpi_battery_vscale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		val->intval = battery->voltage_now *
+				acpi_battery_vscale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		val->intval = abs(battery->current_now) *
+				acpi_battery_ipscale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_AVG:
+		val->intval = abs(battery->current_avg) *
+				acpi_battery_ipscale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CAPACITY:
+		val->intval = battery->state_of_charge;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN:
+	case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN:
+		val->intval = battery->design_capacity *
+			acpi_battery_scale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_FULL:
+	case POWER_SUPPLY_PROP_ENERGY_FULL:
+		val->intval = battery->full_charge_capacity *
+			acpi_battery_scale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_NOW:
+	case POWER_SUPPLY_PROP_ENERGY_NOW:
+		val->intval = battery->capacity_now *
+				acpi_battery_scale(battery) * 1000;
+		break;
+	case POWER_SUPPLY_PROP_TEMP:
+		val->intval = battery->temp_now - 2730;	// dK -> dC
+		break;
+	case POWER_SUPPLY_PROP_MODEL_NAME:
+		val->strval = battery->device_name;
+		break;
+	case POWER_SUPPLY_PROP_MANUFACTURER:
+		val->strval = battery->manufacturer_name;
+		break;
+	default:
+		return -EINVAL;
 	}
 	return 0;
 }
 
-static int acpi_sbs_generate_event(struct acpi_device *device,
-				   int event, int state, char *bid, char *class)
-{
-	char bid_saved[5];
-	char class_saved[20];
-	int result = 0;
-
-	strcpy(bid_saved, acpi_device_bid(device));
-	strcpy(class_saved, acpi_device_class(device));
-
-	strcpy(acpi_device_bid(device), bid);
-	strcpy(acpi_device_class(device), class);
-
-	result = acpi_bus_generate_proc_event(device, event, state);
-
-	strcpy(acpi_device_bid(device), bid_saved);
-	strcpy(acpi_device_class(device), class_saved);
-
-	acpi_bus_generate_netlink_event(class, bid, event, state);
-	return result;
-}
-
-static int acpi_battery_get_present(struct acpi_battery *battery)
-{
-	s16 state;
-	int result = 0;
-	int is_present = 0;
-
-	result = acpi_sbs_read_word(battery->sbs,
-				    ACPI_SBSM_SMBUS_ADDR, 0x01, &state);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-	}
-	if (!result) {
-		is_present = (state & 0x000f) & (1 << battery->id);
-	}
-	battery->battery_present = is_present;
-
-	return result;
-}
+static enum power_supply_property sbs_ac_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+};
 
-static int acpi_battery_select(struct acpi_battery *battery)
-{
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-	s16 state;
-	int foo;
+static enum power_supply_property sbs_charge_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CURRENT_AVG,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
+	POWER_SUPPLY_PROP_CHARGE_FULL,
+	POWER_SUPPLY_PROP_CHARGE_NOW,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
 
-	if (sbs->sbsm_present) {
+static enum power_supply_property sbs_energy_battery_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_PRESENT,
+	POWER_SUPPLY_PROP_TECHNOLOGY,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+	POWER_SUPPLY_PROP_CURRENT_AVG,
+	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN,
+	POWER_SUPPLY_PROP_ENERGY_FULL,
+	POWER_SUPPLY_PROP_ENERGY_NOW,
+	POWER_SUPPLY_PROP_TEMP,
+	POWER_SUPPLY_PROP_MODEL_NAME,
+	POWER_SUPPLY_PROP_MANUFACTURER,
+};
 
-		/* Take special care not to knobble other nibbles of
-		 * state (aka selector_state), since
-		 * it causes charging to halt on SBSELs */
+/* --------------------------------------------------------------------------
+                            Smart Battery System Management
+   -------------------------------------------------------------------------- */
 
-		result =
-		    acpi_sbs_read_word(sbs, ACPI_SBSM_SMBUS_ADDR, 0x01, &state);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_read_word() failed"));
-			goto end;
-		}
+struct acpi_battery_reader {
+	u8 command;		/* command for battery */
+	u8 mode;		/* word or block? */
+	size_t offset;		/* offset inside struct acpi_sbs_battery */
+};
 
-		foo = (state & 0x0fff) | (1 << (battery->id + 12));
-		result =
-		    acpi_sbs_write_word(sbs, ACPI_SBSM_SMBUS_ADDR, 0x01, foo);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_write_word() failed"));
-			goto end;
-		}
-	}
+static struct acpi_battery_reader info_readers[] = {
+	{0x01, SMBUS_READ_WORD, offsetof(struct acpi_battery, alarm_capacity)},
+	{0x03, SMBUS_READ_WORD, offsetof(struct acpi_battery, mode)},
+	{0x10, SMBUS_READ_WORD, offsetof(struct acpi_battery, full_charge_capacity)},
+	{0x17, SMBUS_READ_WORD, offsetof(struct acpi_battery, cycle_count)},
+	{0x18, SMBUS_READ_WORD, offsetof(struct acpi_battery, design_capacity)},
+	{0x19, SMBUS_READ_WORD, offsetof(struct acpi_battery, design_voltage)},
+	{0x1a, SMBUS_READ_WORD, offsetof(struct acpi_battery, spec)},
+	{0x1c, SMBUS_READ_WORD, offsetof(struct acpi_battery, serial_number)},
+	{0x20, SMBUS_READ_BLOCK, offsetof(struct acpi_battery, manufacturer_name)},
+	{0x21, SMBUS_READ_BLOCK, offsetof(struct acpi_battery, device_name)},
+	{0x22, SMBUS_READ_BLOCK, offsetof(struct acpi_battery, device_chemistry)},
+};
 
-      end:
-	return result;
-}
+static struct acpi_battery_reader state_readers[] = {
+	{0x08, SMBUS_READ_WORD, offsetof(struct acpi_battery, temp_now)},
+	{0x09, SMBUS_READ_WORD, offsetof(struct acpi_battery, voltage_now)},
+	{0x0a, SMBUS_READ_WORD, offsetof(struct acpi_battery, current_now)},
+	{0x0b, SMBUS_READ_WORD, offsetof(struct acpi_battery, current_avg)},
+	{0x0f, SMBUS_READ_WORD, offsetof(struct acpi_battery, capacity_now)},
+	{0x0e, SMBUS_READ_WORD, offsetof(struct acpi_battery, state_of_charge)},
+	{0x16, SMBUS_READ_WORD, offsetof(struct acpi_battery, state)},
+};
 
-static int acpi_sbsm_get_info(struct acpi_sbs *sbs)
+static int acpi_manager_get_info(struct acpi_sbs *sbs)
 {
 	int result = 0;
-	s16 battery_system_info;
-
-	result = acpi_sbs_read_word(sbs, ACPI_SBSM_SMBUS_ADDR, 0x04,
-				    &battery_system_info);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-	sbs->sbsm_present = 1;
-	sbs->sbsm_batteries_supported = battery_system_info & 0x000f;
-
-      end:
+	u16 battery_system_info;
 
+	result = acpi_smbus_read(sbs->hc, SMBUS_READ_WORD, ACPI_SBS_MANAGER,
+				 0x04, (u8 *)&battery_system_info);
+	if (!result)
+		sbs->batteries_supported = battery_system_info & 0x000f;
 	return result;
 }
 
 static int acpi_battery_get_info(struct acpi_battery *battery)
 {
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-	s16 battery_mode;
-	s16 specification_info;
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x03,
-				    &battery_mode);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-	battery->info.capacity_mode = (battery_mode & 0x8000) >> 15;
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x10,
-				    &battery->info.full_charge_capacity);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x18,
-				    &battery->info.design_capacity);
-
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x19,
-				    &battery->info.design_voltage);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
+	int i, result = 0;
+
+	for (i = 0; i < ARRAY_SIZE(info_readers); ++i) {
+		result = acpi_smbus_read(battery->sbs->hc,
+					 info_readers[i].mode,
+					 ACPI_SBS_BATTERY,
+					 info_readers[i].command,
+					 (u8 *) battery +
+						info_readers[i].offset);
+		if (result)
+			break;
 	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x1a,
-				    &specification_info);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	switch ((specification_info & 0x0f00) >> 8) {
-	case 1:
-		battery->info.vscale = 10;
-		break;
-	case 2:
-		battery->info.vscale = 100;
-		break;
-	case 3:
-		battery->info.vscale = 1000;
-		break;
-	default:
-		battery->info.vscale = 1;
-	}
-
-	switch ((specification_info & 0xf000) >> 12) {
-	case 1:
-		battery->info.ipscale = 10;
-		break;
-	case 2:
-		battery->info.ipscale = 100;
-		break;
-	case 3:
-		battery->info.ipscale = 1000;
-		break;
-	default:
-		battery->info.ipscale = 1;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x1c,
-				    &battery->info.serial_number);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_str(sbs, ACPI_SB_SMBUS_ADDR, 0x20,
-				   battery->info.manufacturer_name);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_str() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_str(sbs, ACPI_SB_SMBUS_ADDR, 0x21,
-				   battery->info.device_name);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_str() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_str(sbs, ACPI_SB_SMBUS_ADDR, 0x22,
-				   battery->info.device_chemistry);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_str() failed"));
-		goto end;
-	}
-
-      end:
 	return result;
 }
 
 static int acpi_battery_get_state(struct acpi_battery *battery)
 {
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
+	int i, result = 0;
 
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x09,
-				    &battery->state.voltage);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x0a,
-				    &battery->state.amperage);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x0f,
-				    &battery->state.remaining_capacity);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x16,
-				    &battery->state.battery_state);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
+	if (battery->update_time &&
+	    time_before(jiffies, battery->update_time +
+				msecs_to_jiffies(cache_time)))
+		return 0;
+	for (i = 0; i < ARRAY_SIZE(state_readers); ++i) {
+		result = acpi_smbus_read(battery->sbs->hc,
+					 state_readers[i].mode,
+					 ACPI_SBS_BATTERY,
+					 state_readers[i].command,
+				         (u8 *)battery +
+						state_readers[i].offset);
+		if (result)
+			goto end;
 	}
-
       end:
+	battery->update_time = jiffies;
 	return result;
 }
 
 static int acpi_battery_get_alarm(struct acpi_battery *battery)
 {
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-
-	result = acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x01,
-				    &battery->alarm.remaining_capacity);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-      end:
-
-	return result;
+	return acpi_smbus_read(battery->sbs->hc, SMBUS_READ_WORD,
+				 ACPI_SBS_BATTERY, 0x01,
+				 (u8 *)&battery->alarm_capacity);
 }
 
-static int acpi_battery_set_alarm(struct acpi_battery *battery,
-				  unsigned long alarm)
+static int acpi_battery_set_alarm(struct acpi_battery *battery)
 {
 	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-	s16 battery_mode;
-	int foo;
+	u16 value, sel = 1 << (battery->id + 12);
 
-	result = acpi_battery_select(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_select() failed"));
-		goto end;
-	}
+	int ret;
 
-	/* If necessary, enable the alarm */
 
-	if (alarm > 0) {
-		result =
-		    acpi_sbs_read_word(sbs, ACPI_SB_SMBUS_ADDR, 0x03,
-				       &battery_mode);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_read_word() failed"));
+	if (sbs->manager_present) {
+		ret = acpi_smbus_read(sbs->hc, SMBUS_READ_WORD, ACPI_SBS_MANAGER,
+				0x01, (u8 *)&value);
+		if (ret)
 			goto end;
-		}
-
-		result =
-		    acpi_sbs_write_word(sbs, ACPI_SB_SMBUS_ADDR, 0x01,
-					battery_mode & 0xbfff);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_write_word() failed"));
+		if ((value & 0xf000) != sel) {
+			value &= 0x0fff;
+			value |= sel;
+		ret = acpi_smbus_write(sbs->hc, SMBUS_WRITE_WORD,
+					 ACPI_SBS_MANAGER,
+					 0x01, (u8 *)&value, 2);
+		if (ret)
 			goto end;
 		}
 	}
-
-	foo = alarm / (battery->info.capacity_mode ? 10 : 1);
-	result = acpi_sbs_write_word(sbs, ACPI_SB_SMBUS_ADDR, 0x01, foo);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_write_word() failed"));
-		goto end;
-	}
-
+	ret = acpi_smbus_write(sbs->hc, SMBUS_WRITE_WORD, ACPI_SBS_BATTERY,
+				0x01, (u8 *)&battery->alarm_capacity, 2);
       end:
-
-	return result;
+	return ret;
 }
 
-static int acpi_battery_set_mode(struct acpi_battery *battery)
+static int acpi_ac_get_present(struct acpi_sbs *sbs)
 {
-	struct acpi_sbs *sbs = battery->sbs;
-	int result = 0;
-	s16 battery_mode;
-
-	if (capacity_mode == DEF_CAPACITY_UNIT) {
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs,
-				    ACPI_SB_SMBUS_ADDR, 0x03, &battery_mode);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	if (capacity_mode == MAH_CAPACITY_UNIT) {
-		battery_mode &= 0x7fff;
-	} else {
-		battery_mode |= 0x8000;
-	}
-	result = acpi_sbs_write_word(sbs,
-				     ACPI_SB_SMBUS_ADDR, 0x03, battery_mode);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_write_word() failed"));
-		goto end;
-	}
-
-	result = acpi_sbs_read_word(sbs,
-				    ACPI_SB_SMBUS_ADDR, 0x03, &battery_mode);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
+	int result;
+	u16 status;
 
-      end:
+	result = acpi_smbus_read(sbs->hc, SMBUS_READ_WORD, ACPI_SBS_CHARGER,
+				 0x13, (u8 *) & status);
+	if (!result)
+		sbs->charger_present = (status >> 15) & 0x1;
 	return result;
 }
 
-static int acpi_battery_init(struct acpi_battery *battery)
+static ssize_t acpi_battery_alarm_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
 {
-	int result = 0;
-
-	result = acpi_battery_select(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_select() failed"));
-		goto end;
-	}
-
-	result = acpi_battery_set_mode(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_set_mode() failed"));
-		goto end;
-	}
-
-	result = acpi_battery_get_info(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_info() failed"));
-		goto end;
-	}
-
-	result = acpi_battery_get_state(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_state() failed"));
-		goto end;
-	}
-
-	result = acpi_battery_get_alarm(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_alarm() failed"));
-		goto end;
-	}
-
-      end:
-	return result;
+	struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev));
+	acpi_battery_get_alarm(battery);
+	return sprintf(buf, "%d\n", battery->alarm_capacity *
+				acpi_battery_scale(battery) * 1000);
 }
 
-static int acpi_ac_get_present(struct acpi_sbs *sbs)
+static ssize_t acpi_battery_alarm_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
 {
-	int result = 0;
-	s16 charger_status;
-
-	result = acpi_sbs_read_word(sbs, ACPI_SBC_SMBUS_ADDR, 0x13,
-				    &charger_status);
-
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_read_word() failed"));
-		goto end;
-	}
-
-	sbs->ac.ac_present = (charger_status & 0x8000) >> 15;
-
-      end:
-
-	return result;
+	unsigned long x;
+	struct acpi_battery *battery = to_acpi_battery(dev_get_drvdata(dev));
+	if (sscanf(buf, "%ld\n", &x) == 1)
+		battery->alarm_capacity = x /
+			(1000 * acpi_battery_scale(battery));
+	if (battery->present)
+		acpi_battery_set_alarm(battery);
+	return count;
 }
 
+static struct device_attribute alarm_attr = {
+	.attr = {.name = "alarm", .mode = 0644, .owner = THIS_MODULE},
+	.show = acpi_battery_alarm_show,
+	.store = acpi_battery_alarm_store,
+};
+
 /* --------------------------------------------------------------------------
                               FS Interface (/proc/acpi)
    -------------------------------------------------------------------------- */
 
+#ifdef CONFIG_ACPI_PROCFS
 /* Generic Routines */
-
 static int
-acpi_sbs_generic_add_fs(struct proc_dir_entry **dir,
-			struct proc_dir_entry *parent_dir,
-			char *dir_name,
-			struct file_operations *info_fops,
-			struct file_operations *state_fops,
-			struct file_operations *alarm_fops, void *data)
+acpi_sbs_add_fs(struct proc_dir_entry **dir,
+		struct proc_dir_entry *parent_dir,
+		char *dir_name,
+		struct file_operations *info_fops,
+		struct file_operations *state_fops,
+		struct file_operations *alarm_fops, void *data)
 {
 	struct proc_dir_entry *entry = NULL;
 
 	if (!*dir) {
 		*dir = proc_mkdir(dir_name, parent_dir);
 		if (!*dir) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"proc_mkdir() failed"));
 			return -ENODEV;
 		}
 		(*dir)->owner = THIS_MODULE;
@@ -882,10 +491,7 @@ acpi_sbs_generic_add_fs(struct proc_dir_entry **dir,
 	/* 'info' [R] */
 	if (info_fops) {
 		entry = create_proc_entry(ACPI_SBS_FILE_INFO, S_IRUGO, *dir);
-		if (!entry) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"create_proc_entry() failed"));
-		} else {
+		if (entry) {
 			entry->proc_fops = info_fops;
 			entry->data = data;
 			entry->owner = THIS_MODULE;
@@ -895,10 +501,7 @@ acpi_sbs_generic_add_fs(struct proc_dir_entry **dir,
 	/* 'state' [R] */
 	if (state_fops) {
 		entry = create_proc_entry(ACPI_SBS_FILE_STATE, S_IRUGO, *dir);
-		if (!entry) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"create_proc_entry() failed"));
-		} else {
+		if (entry) {
 			entry->proc_fops = state_fops;
 			entry->data = data;
 			entry->owner = THIS_MODULE;
@@ -908,24 +511,19 @@ acpi_sbs_generic_add_fs(struct proc_dir_entry **dir,
 	/* 'alarm' [R/W] */
 	if (alarm_fops) {
 		entry = create_proc_entry(ACPI_SBS_FILE_ALARM, S_IRUGO, *dir);
-		if (!entry) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"create_proc_entry() failed"));
-		} else {
+		if (entry) {
 			entry->proc_fops = alarm_fops;
 			entry->data = data;
 			entry->owner = THIS_MODULE;
 		}
 	}
-
 	return 0;
 }
 
 static void
-acpi_sbs_generic_remove_fs(struct proc_dir_entry **dir,
+acpi_sbs_remove_fs(struct proc_dir_entry **dir,
 			   struct proc_dir_entry *parent_dir)
 {
-
 	if (*dir) {
 		remove_proc_entry(ACPI_SBS_FILE_INFO, *dir);
 		remove_proc_entry(ACPI_SBS_FILE_STATE, *dir);
@@ -933,82 +531,52 @@ acpi_sbs_generic_remove_fs(struct proc_dir_entry **dir,
 		remove_proc_entry((*dir)->name, parent_dir);
 		*dir = NULL;
 	}
-
 }
 
 /* Smart Battery Interface */
-
 static struct proc_dir_entry *acpi_battery_dir = NULL;
 
+static inline char *acpi_battery_units(struct acpi_battery *battery)
+{
+	return acpi_battery_mode(battery) ? " mWh" : " mAh";
+}
+
+
 static int acpi_battery_read_info(struct seq_file *seq, void *offset)
 {
 	struct acpi_battery *battery = seq->private;
 	struct acpi_sbs *sbs = battery->sbs;
-	int cscale;
 	int result = 0;
 
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
-
-	result = acpi_check_update_proc(sbs);
-	if (result)
-		goto end;
-
-	if (update_time == 0) {
-		result = acpi_sbs_update_run(sbs, battery->id, DATA_TYPE_INFO);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_update_run() failed"));
-		}
-	}
+	mutex_lock(&sbs->lock);
 
-	if (battery->battery_present) {
-		seq_printf(seq, "present:                 yes\n");
-	} else {
-		seq_printf(seq, "present:                 no\n");
+	seq_printf(seq, "present:                 %s\n",
+		   (battery->present) ? "yes" : "no");
+	if (!battery->present)
 		goto end;
-	}
 
-	if (battery->info.capacity_mode) {
-		cscale = battery->info.vscale * battery->info.ipscale;
-	} else {
-		cscale = battery->info.ipscale;
-	}
 	seq_printf(seq, "design capacity:         %i%s\n",
-		   battery->info.design_capacity * cscale,
-		   battery->info.capacity_mode ? "0 mWh" : " mAh");
-
+		   battery->design_capacity * acpi_battery_scale(battery),
+		   acpi_battery_units(battery));
 	seq_printf(seq, "last full capacity:      %i%s\n",
-		   battery->info.full_charge_capacity * cscale,
-		   battery->info.capacity_mode ? "0 mWh" : " mAh");
-
+		   battery->full_charge_capacity * acpi_battery_scale(battery),
+		   acpi_battery_units(battery));
 	seq_printf(seq, "battery technology:      rechargeable\n");
-
 	seq_printf(seq, "design voltage:          %i mV\n",
-		   battery->info.design_voltage * battery->info.vscale);
-
+		   battery->design_voltage * acpi_battery_vscale(battery));
 	seq_printf(seq, "design capacity warning: unknown\n");
 	seq_printf(seq, "design capacity low:     unknown\n");
 	seq_printf(seq, "capacity granularity 1:  unknown\n");
 	seq_printf(seq, "capacity granularity 2:  unknown\n");
-
-	seq_printf(seq, "model number:            %s\n",
-		   battery->info.device_name);
-
+	seq_printf(seq, "model number:            %s\n", battery->device_name);
 	seq_printf(seq, "serial number:           %i\n",
-		   battery->info.serial_number);
-
+		   battery->serial_number);
 	seq_printf(seq, "battery type:            %s\n",
-		   battery->info.device_chemistry);
-
+		   battery->device_chemistry);
 	seq_printf(seq, "OEM info:                %s\n",
-		   battery->info.manufacturer_name);
-
+		   battery->manufacturer_name);
       end:
-
-	sbs_mutex_unlock(sbs);
-
+	mutex_unlock(&sbs->lock);
 	return result;
 }
 
@@ -1022,73 +590,29 @@ static int acpi_battery_read_state(struct seq_file *seq, void *offset)
 	struct acpi_battery *battery = seq->private;
 	struct acpi_sbs *sbs = battery->sbs;
 	int result = 0;
-	int cscale;
-	int foo;
-
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
 
-	result = acpi_check_update_proc(sbs);
-	if (result)
+	mutex_lock(&sbs->lock);
+	seq_printf(seq, "present:                 %s\n",
+		   (battery->present) ? "yes" : "no");
+	if (!battery->present)
 		goto end;
 
-	if (update_time == 0) {
-		result = acpi_sbs_update_run(sbs, battery->id, DATA_TYPE_STATE);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_update_run() failed"));
-		}
-	}
-
-	if (battery->battery_present) {
-		seq_printf(seq, "present:                 yes\n");
-	} else {
-		seq_printf(seq, "present:                 no\n");
-		goto end;
-	}
-
-	if (battery->info.capacity_mode) {
-		cscale = battery->info.vscale * battery->info.ipscale;
-	} else {
-		cscale = battery->info.ipscale;
-	}
-
-	if (battery->state.battery_state & 0x0010) {
-		seq_printf(seq, "capacity state:          critical\n");
-	} else {
-		seq_printf(seq, "capacity state:          ok\n");
-	}
-
-	foo = (s16) battery->state.amperage * battery->info.ipscale;
-	if (battery->info.capacity_mode) {
-		foo = foo * battery->info.design_voltage / 1000;
-	}
-	if (battery->state.amperage < 0) {
-		seq_printf(seq, "charging state:          discharging\n");
-		seq_printf(seq, "present rate:            %d %s\n",
-			   -foo, battery->info.capacity_mode ? "mW" : "mA");
-	} else if (battery->state.amperage > 0) {
-		seq_printf(seq, "charging state:          charging\n");
-		seq_printf(seq, "present rate:            %d %s\n",
-			   foo, battery->info.capacity_mode ? "mW" : "mA");
-	} else {
-		seq_printf(seq, "charging state:          charged\n");
-		seq_printf(seq, "present rate:            0 %s\n",
-			   battery->info.capacity_mode ? "mW" : "mA");
-	}
-
+	acpi_battery_get_state(battery);
+	seq_printf(seq, "capacity state:          %s\n",
+		   (battery->state & 0x0010) ? "critical" : "ok");
+	seq_printf(seq, "charging state:          %s\n",
+		   (battery->current_now < 0) ? "discharging" :
+		   ((battery->current_now > 0) ? "charging" : "charged"));
+	seq_printf(seq, "present rate:            %d mA\n",
+		   abs(battery->current_now) * acpi_battery_ipscale(battery));
 	seq_printf(seq, "remaining capacity:      %i%s\n",
-		   battery->state.remaining_capacity * cscale,
-		   battery->info.capacity_mode ? "0 mWh" : " mAh");
-
+		   battery->capacity_now * acpi_battery_scale(battery),
+		   acpi_battery_units(battery));
 	seq_printf(seq, "present voltage:         %i mV\n",
-		   battery->state.voltage * battery->info.vscale);
+		   battery->voltage_now * acpi_battery_vscale(battery));
 
       end:
-
-	sbs_mutex_unlock(sbs);
-
+	mutex_unlock(&sbs->lock);
 	return result;
 }
 
@@ -1102,48 +626,25 @@ static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
 	struct acpi_battery *battery = seq->private;
 	struct acpi_sbs *sbs = battery->sbs;
 	int result = 0;
-	int cscale;
-
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
-
-	result = acpi_check_update_proc(sbs);
-	if (result)
-		goto end;
 
-	if (update_time == 0) {
-		result = acpi_sbs_update_run(sbs, battery->id, DATA_TYPE_ALARM);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_update_run() failed"));
-		}
-	}
+	mutex_lock(&sbs->lock);
 
-	if (!battery->battery_present) {
+	if (!battery->present) {
 		seq_printf(seq, "present:                 no\n");
 		goto end;
 	}
 
-	if (battery->info.capacity_mode) {
-		cscale = battery->info.vscale * battery->info.ipscale;
-	} else {
-		cscale = battery->info.ipscale;
-	}
-
+	acpi_battery_get_alarm(battery);
 	seq_printf(seq, "alarm:                   ");
-	if (battery->alarm.remaining_capacity) {
+	if (battery->alarm_capacity)
 		seq_printf(seq, "%i%s\n",
-			   battery->alarm.remaining_capacity * cscale,
-			   battery->info.capacity_mode ? "0 mWh" : " mAh");
-	} else {
+			   battery->alarm_capacity *
+			   acpi_battery_scale(battery),
+			   acpi_battery_units(battery));
+	else
 		seq_printf(seq, "disabled\n");
-	}
-
       end:
-
-	sbs_mutex_unlock(sbs);
-
+	mutex_unlock(&sbs->lock);
 	return result;
 }
 
@@ -1155,59 +656,29 @@ acpi_battery_write_alarm(struct file *file, const char __user * buffer,
 	struct acpi_battery *battery = seq->private;
 	struct acpi_sbs *sbs = battery->sbs;
 	char alarm_string[12] = { '\0' };
-	int result, old_alarm, new_alarm;
-
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
-
-	result = acpi_check_update_proc(sbs);
-	if (result)
-		goto end;
-
-	if (!battery->battery_present) {
+	int result = 0;
+	mutex_lock(&sbs->lock);
+	if (!battery->present) {
 		result = -ENODEV;
 		goto end;
 	}
-
 	if (count > sizeof(alarm_string) - 1) {
 		result = -EINVAL;
 		goto end;
 	}
-
 	if (copy_from_user(alarm_string, buffer, count)) {
 		result = -EFAULT;
 		goto end;
 	}
-
 	alarm_string[count] = 0;
-
-	old_alarm = battery->alarm.remaining_capacity;
-	new_alarm = simple_strtoul(alarm_string, NULL, 0);
-
-	result = acpi_battery_set_alarm(battery, new_alarm);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_set_alarm() failed"));
-		acpi_battery_set_alarm(battery, old_alarm);
-		goto end;
-	}
-	result = acpi_battery_get_alarm(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_alarm() failed"));
-		acpi_battery_set_alarm(battery, old_alarm);
-		goto end;
-	}
-
+	battery->alarm_capacity = simple_strtoul(alarm_string, NULL, 0) /
+					acpi_battery_scale(battery);
+	acpi_battery_set_alarm(battery);
       end:
-	sbs_mutex_unlock(sbs);
-
-	if (result) {
+	mutex_unlock(&sbs->lock);
+	if (result)
 		return result;
-	} else {
-		return count;
-	}
+	return count;
 }
 
 static int acpi_battery_alarm_open_fs(struct inode *inode, struct file *file)
@@ -1246,26 +717,15 @@ static struct proc_dir_entry *acpi_ac_dir = NULL;
 
 static int acpi_ac_read_state(struct seq_file *seq, void *offset)
 {
-	struct acpi_sbs *sbs = seq->private;
-	int result;
 
-	if (sbs_mutex_lock(sbs)) {
-		return -ENODEV;
-	}
+	struct acpi_sbs *sbs = seq->private;
 
-	if (update_time == 0) {
-		result = acpi_sbs_update_run(sbs, -1, DATA_TYPE_AC_STATE);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_update_run() failed"));
-		}
-	}
+	mutex_lock(&sbs->lock);
 
 	seq_printf(seq, "state:                   %s\n",
-		   sbs->ac.ac_present ? "on-line" : "off-line");
-
-	sbs_mutex_unlock(sbs);
+		   sbs->charger_present ? "on-line" : "off-line");
 
+	mutex_unlock(&sbs->lock);
 	return 0;
 }
 
@@ -1282,429 +742,203 @@ static struct file_operations acpi_ac_state_fops = {
 	.owner = THIS_MODULE,
 };
 
+#endif
+
 /* --------------------------------------------------------------------------
                                  Driver Interface
    -------------------------------------------------------------------------- */
+static int acpi_battery_read(struct acpi_battery *battery)
+{
+	int result = 0, saved_present = battery->present;
+	u16 state;
+
+	if (battery->sbs->manager_present) {
+		result = acpi_smbus_read(battery->sbs->hc, SMBUS_READ_WORD,
+				ACPI_SBS_MANAGER, 0x01, (u8 *)&state);
+		if (!result)
+			battery->present = state & (1 << battery->id);
+		state &= 0x0fff;
+		state |= 1 << (battery->id + 12);
+		acpi_smbus_write(battery->sbs->hc, SMBUS_WRITE_WORD,
+				  ACPI_SBS_MANAGER, 0x01, (u8 *)&state, 2);
+	} else if (battery->id == 0)
+		battery->present = 1;
+	if (result || !battery->present)
+		return result;
 
-/* Smart Battery */
+	if (saved_present != battery->present) {
+		battery->update_time = 0;
+		result = acpi_battery_get_info(battery);
+		if (result)
+			return result;
+	}
+	result = acpi_battery_get_state(battery);
+	return result;
+}
 
+/* Smart Battery */
 static int acpi_battery_add(struct acpi_sbs *sbs, int id)
 {
-	int is_present;
+	struct acpi_battery *battery = &sbs->battery[id];
 	int result;
-	char dir_name[32];
-	struct acpi_battery *battery;
-
-	battery = &sbs->battery[id];
-
-	battery->alive = 0;
 
-	battery->init_state = 0;
 	battery->id = id;
 	battery->sbs = sbs;
+	result = acpi_battery_read(battery);
+	if (result)
+		return result;
 
-	result = acpi_battery_select(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_select() failed"));
-		goto end;
-	}
-
-	result = acpi_battery_get_present(battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_battery_get_present() failed"));
-		goto end;
-	}
-
-	is_present = battery->battery_present;
-
-	if (is_present) {
-		result = acpi_battery_init(battery);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_battery_init() failed"));
-			goto end;
-		}
-		battery->init_state = 1;
-	}
-
-	sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
-
-	result = acpi_sbs_generic_add_fs(&battery->battery_entry,
-					 acpi_battery_dir,
-					 dir_name,
-					 &acpi_battery_info_fops,
-					 &acpi_battery_state_fops,
-					 &acpi_battery_alarm_fops, battery);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_generic_add_fs() failed"));
-		goto end;
+	sprintf(battery->name, ACPI_BATTERY_DIR_NAME, id);
+#ifdef CONFIG_ACPI_PROCFS
+	acpi_sbs_add_fs(&battery->proc_entry, acpi_battery_dir,
+			battery->name, &acpi_battery_info_fops,
+			&acpi_battery_state_fops, &acpi_battery_alarm_fops,
+			battery);
+#endif
+	battery->bat.name = battery->name;
+	battery->bat.type = POWER_SUPPLY_TYPE_BATTERY;
+	if (!acpi_battery_mode(battery)) {
+		battery->bat.properties = sbs_charge_battery_props;
+		battery->bat.num_properties =
+		    ARRAY_SIZE(sbs_charge_battery_props);
+	} else {
+		battery->bat.properties = sbs_energy_battery_props;
+		battery->bat.num_properties =
+		    ARRAY_SIZE(sbs_energy_battery_props);
 	}
-	battery->alive = 1;
-
+	battery->bat.get_property = acpi_sbs_battery_get_property;
+	result = power_supply_register(&sbs->device->dev, &battery->bat);
+	device_create_file(battery->bat.dev, &alarm_attr);
 	printk(KERN_INFO PREFIX "%s [%s]: Battery Slot [%s] (battery %s)\n",
-	       ACPI_SBS_DEVICE_NAME, acpi_device_bid(sbs->device), dir_name,
-	       sbs->battery->battery_present ? "present" : "absent");
-
-      end:
+	       ACPI_SBS_DEVICE_NAME, acpi_device_bid(sbs->device),
+	       battery->name, sbs->battery->present ? "present" : "absent");
 	return result;
 }
 
 static void acpi_battery_remove(struct acpi_sbs *sbs, int id)
 {
-
-	if (sbs->battery[id].battery_entry) {
-		acpi_sbs_generic_remove_fs(&(sbs->battery[id].battery_entry),
-					   acpi_battery_dir);
-	}
+	if (sbs->battery[id].bat.dev)
+		device_remove_file(sbs->battery[id].bat.dev, &alarm_attr);
+		power_supply_unregister(&sbs->battery[id].bat);
+#ifdef CONFIG_ACPI_PROCFS
+	if (sbs->battery[id].proc_entry) {
+		acpi_sbs_remove_fs(&(sbs->battery[id].proc_entry),
+				   acpi_battery_dir);
+	}
+#endif
 }
 
-static int acpi_ac_add(struct acpi_sbs *sbs)
+static int acpi_charger_add(struct acpi_sbs *sbs)
 {
 	int result;
 
 	result = acpi_ac_get_present(sbs);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ac_get_present() failed"));
+	if (result)
 		goto end;
-	}
-
-	result = acpi_sbs_generic_add_fs(&sbs->ac_entry,
-					 acpi_ac_dir,
-					 ACPI_AC_DIR_NAME,
-					 NULL, &acpi_ac_state_fops, NULL, sbs);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_generic_add_fs() failed"));
+#ifdef CONFIG_ACPI_PROCFS
+	result = acpi_sbs_add_fs(&sbs->charger_entry, acpi_ac_dir,
+				 ACPI_AC_DIR_NAME, NULL,
+				 &acpi_ac_state_fops, NULL, sbs);
+	if (result)
 		goto end;
-	}
-
+#endif
+	sbs->charger.name = "sbs-charger";
+	sbs->charger.type = POWER_SUPPLY_TYPE_MAINS;
+	sbs->charger.properties = sbs_ac_props;
+	sbs->charger.num_properties = ARRAY_SIZE(sbs_ac_props);
+	sbs->charger.get_property = sbs_get_ac_property;
+	power_supply_register(&sbs->device->dev, &sbs->charger);
 	printk(KERN_INFO PREFIX "%s [%s]: AC Adapter [%s] (%s)\n",
 	       ACPI_SBS_DEVICE_NAME, acpi_device_bid(sbs->device),
-	       ACPI_AC_DIR_NAME, sbs->ac.ac_present ? "on-line" : "off-line");
-
+	       ACPI_AC_DIR_NAME, sbs->charger_present ? "on-line" : "off-line");
       end:
-
 	return result;
 }
 
-static void acpi_ac_remove(struct acpi_sbs *sbs)
+static void acpi_charger_remove(struct acpi_sbs *sbs)
 {
-
-	if (sbs->ac_entry) {
-		acpi_sbs_generic_remove_fs(&sbs->ac_entry, acpi_ac_dir);
-	}
+	if (sbs->charger.dev)
+		power_supply_unregister(&sbs->charger);
+#ifdef CONFIG_ACPI_PROCFS
+	if (sbs->charger_entry)
+		acpi_sbs_remove_fs(&sbs->charger_entry, acpi_ac_dir);
+#endif
 }
 
-static void acpi_sbs_update_time_run(unsigned long data)
+void acpi_sbs_callback(void *context)
 {
-	acpi_os_execute(OSL_GPE_HANDLER, acpi_sbs_update_time, (void *)data);
-}
-
-static int acpi_sbs_update_run(struct acpi_sbs *sbs, int id, int data_type)
-{
-	struct acpi_battery *battery;
-	int result = 0, cnt;
-	int old_ac_present = -1;
-	int old_battery_present = -1;
-	int new_ac_present = -1;
-	int new_battery_present = -1;
-	int id_min = 0, id_max = MAX_SBS_BAT - 1;
-	char dir_name[32];
-	int do_battery_init = 0, do_ac_init = 0;
-	int old_remaining_capacity = 0;
-	int update_battery = 1;
-	int up_tm = update_time;
-
-	if (sbs_zombie(sbs)) {
-		goto end;
-	}
-
-	if (id >= 0) {
-		id_min = id_max = id;
-	}
-
-	if (data_type == DATA_TYPE_COMMON && up_tm > 0) {
-		cnt = up_tm / (up_tm > UPDATE_DELAY ? UPDATE_DELAY : up_tm);
-		if (sbs->run_cnt % cnt != 0) {
-			update_battery = 0;
-		}
-	}
-
-	sbs->run_cnt++;
-
-	old_ac_present = sbs->ac.ac_present;
-
-	result = acpi_ac_get_present(sbs);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_ac_get_present() failed"));
-	}
-
-	new_ac_present = sbs->ac.ac_present;
-
-	do_ac_init = (old_ac_present != new_ac_present);
-	if (sbs->run_cnt == 1 && data_type == DATA_TYPE_COMMON) {
-		do_ac_init = 1;
-	}
-
-	if (do_ac_init) {
-		result = acpi_sbs_generate_event(sbs->device,
-						 ACPI_SBS_AC_NOTIFY_STATUS,
-						 new_ac_present,
-						 ACPI_AC_DIR_NAME,
-						 ACPI_AC_CLASS);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_generate_event() failed"));
-		}
-	}
-
-	if (data_type == DATA_TYPE_COMMON) {
-		if (!do_ac_init && !update_battery) {
-			goto end;
-		}
-	}
-
-	if (data_type == DATA_TYPE_AC_STATE && !do_ac_init) {
-		goto end;
-	}
-
-	for (id = id_min; id <= id_max; id++) {
-		battery = &sbs->battery[id];
-		if (battery->alive == 0) {
-			continue;
-		}
-
-		old_remaining_capacity = battery->state.remaining_capacity;
-
-		old_battery_present = battery->battery_present;
-
-		result = acpi_battery_select(battery);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_battery_select() failed"));
-		}
-
-		result = acpi_battery_get_present(battery);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_battery_get_present() failed"));
-		}
-
-		new_battery_present = battery->battery_present;
-
-		do_battery_init = ((old_battery_present != new_battery_present)
-				   && new_battery_present);
-		if (!new_battery_present)
-			goto event;
-		if (do_ac_init || do_battery_init) {
-			result = acpi_battery_init(battery);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_battery_init() "
-						"failed"));
-			}
-		}
-		if (sbs_zombie(sbs)) {
-			goto end;
-		}
-
-		if ((data_type == DATA_TYPE_COMMON
-		     || data_type == DATA_TYPE_INFO)
-		    && new_battery_present) {
-			result = acpi_battery_get_info(battery);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_battery_get_info() failed"));
-			}
-		}
-		if (data_type == DATA_TYPE_INFO) {
-			continue;
-		}
-		if (sbs_zombie(sbs)) {
-			goto end;
-		}
-
-		if ((data_type == DATA_TYPE_COMMON
-		     || data_type == DATA_TYPE_STATE)
-		    && new_battery_present) {
-			result = acpi_battery_get_state(battery);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_battery_get_state() failed"));
-			}
-		}
-		if (data_type == DATA_TYPE_STATE) {
-			goto event;
-		}
-		if (sbs_zombie(sbs)) {
-			goto end;
-		}
-
-		if ((data_type == DATA_TYPE_COMMON
-		     || data_type == DATA_TYPE_ALARM)
-		    && new_battery_present) {
-			result = acpi_battery_get_alarm(battery);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_battery_get_alarm() "
-						"failed"));
-			}
-		}
-		if (data_type == DATA_TYPE_ALARM) {
-			continue;
-		}
-		if (sbs_zombie(sbs)) {
-			goto end;
-		}
-
-	      event:
-
-		if (old_battery_present != new_battery_present || do_ac_init ||
-		    old_remaining_capacity !=
-		    battery->state.remaining_capacity) {
-			sprintf(dir_name, ACPI_BATTERY_DIR_NAME, id);
-			result = acpi_sbs_generate_event(sbs->device,
-							 ACPI_SBS_BATTERY_NOTIFY_STATUS,
-							 new_battery_present,
-							 dir_name,
-							 ACPI_BATTERY_CLASS);
-			if (result) {
-				ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-						"acpi_sbs_generate_event() "
-						"failed"));
-			}
+	int id;
+	struct acpi_sbs *sbs = context;
+	struct acpi_battery *bat;
+	u8 saved_charger_state = sbs->charger_present;
+	u8 saved_battery_state;
+	acpi_ac_get_present(sbs);
+	if (sbs->charger_present != saved_charger_state) {
+#ifdef CONFIG_ACPI_PROC_EVENT
+		acpi_bus_generate_proc_event4(ACPI_AC_CLASS, ACPI_AC_DIR_NAME,
+					      ACPI_SBS_NOTIFY_STATUS,
+					      sbs->charger_present);
+#endif
+		kobject_uevent(&sbs->charger.dev->kobj, KOBJ_CHANGE);
+	}
+	if (sbs->manager_present) {
+		for (id = 0; id < MAX_SBS_BAT; ++id) {
+			if (!(sbs->batteries_supported & (1 << id)))
+				continue;
+			bat = &sbs->battery[id];
+			saved_battery_state = bat->present;
+			acpi_battery_read(bat);
+			if (saved_battery_state == bat->present)
+				continue;
+#ifdef CONFIG_ACPI_PROC_EVENT
+			acpi_bus_generate_proc_event4(ACPI_BATTERY_CLASS,
+						      bat->name,
+						      ACPI_SBS_NOTIFY_STATUS,
+						      bat->present);
+#endif
+			kobject_uevent(&bat->bat.dev->kobj, KOBJ_CHANGE);
 		}
 	}
-
-      end:
-
-	return result;
 }
 
-static void acpi_sbs_update_time(void *data)
-{
-	struct acpi_sbs *sbs = data;
-	unsigned long delay = -1;
-	int result;
-	unsigned int up_tm = update_time;
-
-	if (sbs_mutex_lock(sbs))
-		return;
-
-	result = acpi_sbs_update_run(sbs, -1, DATA_TYPE_COMMON);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_sbs_update_run() failed"));
-	}
-
-	if (sbs_zombie(sbs)) {
-		goto end;
-	}
-
-	if (!up_tm) {
-		if (timer_pending(&sbs->update_timer))
-			del_timer(&sbs->update_timer);
-	} else {
-		delay = (up_tm > UPDATE_DELAY ? UPDATE_DELAY : up_tm);
-		delay = jiffies + HZ * delay;
-		if (timer_pending(&sbs->update_timer)) {
-			mod_timer(&sbs->update_timer, delay);
-		} else {
-			sbs->update_timer.data = (unsigned long)data;
-			sbs->update_timer.function = acpi_sbs_update_time_run;
-			sbs->update_timer.expires = delay;
-			add_timer(&sbs->update_timer);
-		}
-	}
-
-      end:
-
-	sbs_mutex_unlock(sbs);
-}
+static int acpi_sbs_remove(struct acpi_device *device, int type);
 
 static int acpi_sbs_add(struct acpi_device *device)
 {
-	struct acpi_sbs *sbs = NULL;
-	int result = 0, remove_result = 0;
+	struct acpi_sbs *sbs;
+	int result = 0;
 	int id;
-	acpi_status status = AE_OK;
-	unsigned long val;
-
-	status =
-	    acpi_evaluate_integer(device->handle, "_EC", NULL, &val);
-	if (ACPI_FAILURE(status)) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Error obtaining _EC"));
-		return -EIO;
-	}
 
 	sbs = kzalloc(sizeof(struct acpi_sbs), GFP_KERNEL);
 	if (!sbs) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "kzalloc() failed"));
 		result = -ENOMEM;
 		goto end;
 	}
 
-	mutex_init(&sbs->mutex);
-
-	sbs_mutex_lock(sbs);
+	mutex_init(&sbs->lock);
 
-	sbs->base = 0xff & (val >> 8);
+	sbs->hc = acpi_driver_data(device->parent);
 	sbs->device = device;
-
 	strcpy(acpi_device_name(device), ACPI_SBS_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_SBS_CLASS);
 	acpi_driver_data(device) = sbs;
 
-	result = acpi_ac_add(sbs);
-	if (result) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "acpi_ac_add() failed"));
-		goto end;
-	}
-
-	acpi_sbsm_get_info(sbs);
-
-	if (!sbs->sbsm_present) {
-		result = acpi_battery_add(sbs, 0);
-		if (result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_battery_add() failed"));
-			goto end;
-		}
-	} else {
-		for (id = 0; id < MAX_SBS_BAT; id++) {
-			if ((sbs->sbsm_batteries_supported & (1 << id))) {
-				result = acpi_battery_add(sbs, id);
-				if (result) {
-					ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-							"acpi_battery_add() failed"));
-					goto end;
-				}
-			}
-		}
-	}
-
-	init_timer(&sbs->update_timer);
-	result = acpi_check_update_proc(sbs);
+	result = acpi_charger_add(sbs);
 	if (result)
 		goto end;
 
+	result = acpi_manager_get_info(sbs);
+	if (!result) {
+		sbs->manager_present = 1;
+		for (id = 0; id < MAX_SBS_BAT; ++id)
+			if ((sbs->batteries_supported & (1 << id)))
+				acpi_battery_add(sbs, id);
+	} else
+		acpi_battery_add(sbs, 0);
+	acpi_smbus_register_callback(sbs->hc, acpi_sbs_callback, sbs);
       end:
-
-	sbs_mutex_unlock(sbs);
-
-	if (result) {
-		remove_result = acpi_sbs_remove(device, 0);
-		if (remove_result) {
-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-					"acpi_sbs_remove() failed"));
-		}
-	}
-
+	if (result)
+		acpi_sbs_remove(device, 0);
 	return result;
 }
 
@@ -1713,39 +947,25 @@ static int acpi_sbs_remove(struct acpi_device *device, int type)
 	struct acpi_sbs *sbs;
 	int id;
 
-	if (!device) {
+	if (!device)
 		return -EINVAL;
-	}
-
 	sbs = acpi_driver_data(device);
-	if (!sbs) {
+	if (!sbs)
 		return -EINVAL;
-	}
-
-	sbs_mutex_lock(sbs);
-
-	sbs->zombie = 1;
-	del_timer_sync(&sbs->update_timer);
-	acpi_os_wait_events_complete(NULL);
-	del_timer_sync(&sbs->update_timer);
-
-	for (id = 0; id < MAX_SBS_BAT; id++) {
+	mutex_lock(&sbs->lock);
+	acpi_smbus_unregister_callback(sbs->hc);
+	for (id = 0; id < MAX_SBS_BAT; ++id)
 		acpi_battery_remove(sbs, id);
-	}
-
-	acpi_ac_remove(sbs);
-
-	sbs_mutex_unlock(sbs);
-
-	mutex_destroy(&sbs->mutex);
-
+	acpi_charger_remove(sbs);
+	mutex_unlock(&sbs->lock);
+	mutex_destroy(&sbs->lock);
 	kfree(sbs);
-
 	return 0;
 }
 
 static void acpi_sbs_rmdirs(void)
 {
+#ifdef CONFIG_ACPI_PROCFS
 	if (acpi_ac_dir) {
 		acpi_unlock_ac_dir(acpi_ac_dir);
 		acpi_ac_dir = NULL;
@@ -1754,69 +974,58 @@ static void acpi_sbs_rmdirs(void)
 		acpi_unlock_battery_dir(acpi_battery_dir);
 		acpi_battery_dir = NULL;
 	}
+#endif
 }
 
 static int acpi_sbs_resume(struct acpi_device *device)
 {
 	struct acpi_sbs *sbs;
-
 	if (!device)
 		return -EINVAL;
-
 	sbs = device->driver_data;
-
-	sbs->run_cnt = 0;
-
+	acpi_sbs_callback(sbs);
 	return 0;
 }
 
+static struct acpi_driver acpi_sbs_driver = {
+	.name = "sbs",
+	.class = ACPI_SBS_CLASS,
+	.ids = sbs_device_ids,
+	.ops = {
+		.add = acpi_sbs_add,
+		.remove = acpi_sbs_remove,
+		.resume = acpi_sbs_resume,
+		},
+};
+
 static int __init acpi_sbs_init(void)
 {
 	int result = 0;
 
 	if (acpi_disabled)
 		return -ENODEV;
-
-	if (capacity_mode != DEF_CAPACITY_UNIT
-	    && capacity_mode != MAH_CAPACITY_UNIT
-	    && capacity_mode != MWH_CAPACITY_UNIT) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"invalid capacity_mode = %d", capacity_mode));
-		return -EINVAL;
-	}
-
+#ifdef CONFIG_ACPI_PROCFS
 	acpi_ac_dir = acpi_lock_ac_dir();
-	if (!acpi_ac_dir) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_lock_ac_dir() failed"));
+	if (!acpi_ac_dir)
 		return -ENODEV;
-	}
-
 	acpi_battery_dir = acpi_lock_battery_dir();
 	if (!acpi_battery_dir) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_lock_battery_dir() failed"));
 		acpi_sbs_rmdirs();
 		return -ENODEV;
 	}
-
+#endif
 	result = acpi_bus_register_driver(&acpi_sbs_driver);
 	if (result < 0) {
-		ACPI_EXCEPTION((AE_INFO, AE_ERROR,
-				"acpi_bus_register_driver() failed"));
 		acpi_sbs_rmdirs();
 		return -ENODEV;
 	}
-
 	return 0;
 }
 
 static void __exit acpi_sbs_exit(void)
 {
 	acpi_bus_unregister_driver(&acpi_sbs_driver);
-
 	acpi_sbs_rmdirs();
-
 	return;
 }
 
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
new file mode 100644
index 00000000000..046d7c3ed35
--- /dev/null
+++ b/drivers/acpi/sbshc.c
@@ -0,0 +1,309 @@
+/*
+ * SMBus driver for ACPI Embedded Controller (v0.1)
+ *
+ * Copyright (c) 2007 Alexey Starikovskiy
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation version 2.
+ */
+
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/actypes.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include "sbshc.h"
+
+#define ACPI_SMB_HC_CLASS	"smbus_host_controller"
+#define ACPI_SMB_HC_DEVICE_NAME	"ACPI SMBus HC"
+
+struct acpi_smb_hc {
+	struct acpi_ec *ec;
+	struct mutex lock;
+	wait_queue_head_t wait;
+	u8 offset;
+	u8 query_bit;
+	smbus_alarm_callback callback;
+	void *context;
+};
+
+static int acpi_smbus_hc_add(struct acpi_device *device);
+static int acpi_smbus_hc_remove(struct acpi_device *device, int type);
+
+static const struct acpi_device_id sbs_device_ids[] = {
+	{"ACPI0001", 0},
+	{"ACPI0005", 0},
+	{"", 0},
+};
+
+MODULE_DEVICE_TABLE(acpi, sbs_device_ids);
+
+static struct acpi_driver acpi_smb_hc_driver = {
+	.name = "smbus_hc",
+	.class = ACPI_SMB_HC_CLASS,
+	.ids = sbs_device_ids,
+	.ops = {
+		.add = acpi_smbus_hc_add,
+		.remove = acpi_smbus_hc_remove,
+		},
+};
+
+union acpi_smb_status {
+	u8 raw;
+	struct {
+		u8 status:5;
+		u8 reserved:1;
+		u8 alarm:1;
+		u8 done:1;
+	} fields;
+};
+
+enum acpi_smb_status_codes {
+	SMBUS_OK = 0,
+	SMBUS_UNKNOWN_FAILURE = 0x07,
+	SMBUS_DEVICE_ADDRESS_NACK = 0x10,
+	SMBUS_DEVICE_ERROR = 0x11,
+	SMBUS_DEVICE_COMMAND_ACCESS_DENIED = 0x12,
+	SMBUS_UNKNOWN_ERROR = 0x13,
+	SMBUS_DEVICE_ACCESS_DENIED = 0x17,
+	SMBUS_TIMEOUT = 0x18,
+	SMBUS_HOST_UNSUPPORTED_PROTOCOL = 0x19,
+	SMBUS_BUSY = 0x1a,
+	SMBUS_PEC_ERROR = 0x1f,
+};
+
+enum acpi_smb_offset {
+	ACPI_SMB_PROTOCOL = 0,	/* protocol, PEC */
+	ACPI_SMB_STATUS = 1,	/* status */
+	ACPI_SMB_ADDRESS = 2,	/* address */
+	ACPI_SMB_COMMAND = 3,	/* command */
+	ACPI_SMB_DATA = 4,	/* 32 data registers */
+	ACPI_SMB_BLOCK_COUNT = 0x24,	/* number of data bytes */
+	ACPI_SMB_ALARM_ADDRESS = 0x25,	/* alarm address */
+	ACPI_SMB_ALARM_DATA = 0x26,	/* 2 bytes alarm data */
+};
+
+static inline int smb_hc_read(struct acpi_smb_hc *hc, u8 address, u8 *data)
+{
+	return ec_read(hc->offset + address, data);
+}
+
+static inline int smb_hc_write(struct acpi_smb_hc *hc, u8 address, u8 data)
+{
+	return ec_write(hc->offset + address, data);
+}
+
+static inline int smb_check_done(struct acpi_smb_hc *hc)
+{
+	union acpi_smb_status status = {.raw = 0};
+	smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw);
+	return status.fields.done && (status.fields.status == SMBUS_OK);
+}
+
+static int wait_transaction_complete(struct acpi_smb_hc *hc, int timeout)
+{
+	if (wait_event_timeout(hc->wait, smb_check_done(hc),
+			       msecs_to_jiffies(timeout)))
+		return 0;
+	else
+		return -ETIME;
+}
+
+int acpi_smbus_transaction(struct acpi_smb_hc *hc, u8 protocol, u8 address,
+		    u8 command, u8 *data, u8 length)
+{
+	int ret = -EFAULT, i;
+	u8 temp, sz = 0;
+
+	mutex_lock(&hc->lock);
+	if (smb_hc_read(hc, ACPI_SMB_PROTOCOL, &temp))
+		goto end;
+	if (temp) {
+		ret = -EBUSY;
+		goto end;
+	}
+	smb_hc_write(hc, ACPI_SMB_COMMAND, command);
+	smb_hc_write(hc, ACPI_SMB_COMMAND, command);
+	if (!(protocol & 0x01)) {
+		smb_hc_write(hc, ACPI_SMB_BLOCK_COUNT, length);
+		for (i = 0; i < length; ++i)
+			smb_hc_write(hc, ACPI_SMB_DATA + i, data[i]);
+	}
+	smb_hc_write(hc, ACPI_SMB_ADDRESS, address << 1);
+	smb_hc_write(hc, ACPI_SMB_PROTOCOL, protocol);
+	/*
+	 * Wait for completion. Save the status code, data size,
+	 * and data into the return package (if required by the protocol).
+	 */
+	ret = wait_transaction_complete(hc, 1000);
+	if (ret || !(protocol & 0x01))
+		goto end;
+	switch (protocol) {
+	case SMBUS_RECEIVE_BYTE:
+	case SMBUS_READ_BYTE:
+		sz = 1;
+		break;
+	case SMBUS_READ_WORD:
+		sz = 2;
+		break;
+	case SMBUS_READ_BLOCK:
+		if (smb_hc_read(hc, ACPI_SMB_BLOCK_COUNT, &sz)) {
+			ret = -EFAULT;
+			goto end;
+		}
+		sz &= 0x1f;
+		break;
+	}
+	for (i = 0; i < sz; ++i)
+		smb_hc_read(hc, ACPI_SMB_DATA + i, &data[i]);
+      end:
+	mutex_unlock(&hc->lock);
+	return ret;
+}
+
+int acpi_smbus_read(struct acpi_smb_hc *hc, u8 protocol, u8 address,
+		    u8 command, u8 *data)
+{
+	return acpi_smbus_transaction(hc, protocol, address, command, data, 0);
+}
+
+EXPORT_SYMBOL_GPL(acpi_smbus_read);
+
+int acpi_smbus_write(struct acpi_smb_hc *hc, u8 protocol, u8 address,
+		     u8 command, u8 *data, u8 length)
+{
+	return acpi_smbus_transaction(hc, protocol, address, command, data, length);
+}
+
+EXPORT_SYMBOL_GPL(acpi_smbus_write);
+
+int acpi_smbus_register_callback(struct acpi_smb_hc *hc,
+			         smbus_alarm_callback callback, void *context)
+{
+	mutex_lock(&hc->lock);
+	hc->callback = callback;
+	hc->context = context;
+	mutex_unlock(&hc->lock);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(acpi_smbus_register_callback);
+
+int acpi_smbus_unregister_callback(struct acpi_smb_hc *hc)
+{
+	mutex_lock(&hc->lock);
+	hc->callback = NULL;
+	hc->context = NULL;
+	mutex_unlock(&hc->lock);
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(acpi_smbus_unregister_callback);
+
+static void acpi_smbus_callback(void *context)
+{
+	struct acpi_smb_hc *hc = context;
+
+	if (hc->callback)
+		hc->callback(hc->context);
+}
+
+static int smbus_alarm(void *context)
+{
+	struct acpi_smb_hc *hc = context;
+	union acpi_smb_status status;
+	if (smb_hc_read(hc, ACPI_SMB_STATUS, &status.raw))
+		return 0;
+	/* Check if it is only a completion notify */
+	if (status.fields.done)
+		wake_up(&hc->wait);
+	if (!status.fields.alarm)
+		return 0;
+	mutex_lock(&hc->lock);
+	smb_hc_write(hc, ACPI_SMB_STATUS, status.raw);
+	if (hc->callback)
+		acpi_os_execute(OSL_GPE_HANDLER, acpi_smbus_callback, hc);
+	mutex_unlock(&hc->lock);
+	return 0;
+}
+
+typedef int (*acpi_ec_query_func) (void *data);
+
+extern int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
+			      acpi_handle handle, acpi_ec_query_func func,
+			      void *data);
+
+static int acpi_smbus_hc_add(struct acpi_device *device)
+{
+	int status;
+	unsigned long val;
+	struct acpi_smb_hc *hc;
+
+	if (!device)
+		return -EINVAL;
+
+	status = acpi_evaluate_integer(device->handle, "_EC", NULL, &val);
+	if (ACPI_FAILURE(status)) {
+		printk(KERN_ERR PREFIX "error obtaining _EC.\n");
+		return -EIO;
+	}
+
+	strcpy(acpi_device_name(device), ACPI_SMB_HC_DEVICE_NAME);
+	strcpy(acpi_device_class(device), ACPI_SMB_HC_CLASS);
+
+	hc = kzalloc(sizeof(struct acpi_smb_hc), GFP_KERNEL);
+	if (!hc)
+		return -ENOMEM;
+	mutex_init(&hc->lock);
+	init_waitqueue_head(&hc->wait);
+
+	hc->ec = acpi_driver_data(device->parent);
+	hc->offset = (val >> 8) & 0xff;
+	hc->query_bit = val & 0xff;
+	acpi_driver_data(device) = hc;
+
+	acpi_ec_add_query_handler(hc->ec, hc->query_bit, NULL, smbus_alarm, hc);
+	printk(KERN_INFO PREFIX "SBS HC: EC = 0x%p, offset = 0x%0x, query_bit = 0x%0x\n",
+		hc->ec, hc->offset, hc->query_bit);
+
+	return 0;
+}
+
+extern void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit);
+
+static int acpi_smbus_hc_remove(struct acpi_device *device, int type)
+{
+	struct acpi_smb_hc *hc;
+
+	if (!device)
+		return -EINVAL;
+
+	hc = acpi_driver_data(device);
+	acpi_ec_remove_query_handler(hc->ec, hc->query_bit);
+	kfree(hc);
+	return 0;
+}
+
+static int __init acpi_smb_hc_init(void)
+{
+	int result;
+
+	result = acpi_bus_register_driver(&acpi_smb_hc_driver);
+	if (result < 0)
+		return -ENODEV;
+	return 0;
+}
+
+static void __exit acpi_smb_hc_exit(void)
+{
+	acpi_bus_unregister_driver(&acpi_smb_hc_driver);
+}
+
+module_init(acpi_smb_hc_init);
+module_exit(acpi_smb_hc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alexey Starikovskiy");
+MODULE_DESCRIPTION("ACPI SMBus HC driver");
diff --git a/drivers/acpi/sbshc.h b/drivers/acpi/sbshc.h
new file mode 100644
index 00000000000..3bda3491a97
--- /dev/null
+++ b/drivers/acpi/sbshc.h
@@ -0,0 +1,27 @@
+struct acpi_smb_hc;
+enum acpi_smb_protocol {
+	SMBUS_WRITE_QUICK = 2,
+	SMBUS_READ_QUICK = 3,
+	SMBUS_SEND_BYTE = 4,
+	SMBUS_RECEIVE_BYTE = 5,
+	SMBUS_WRITE_BYTE = 6,
+	SMBUS_READ_BYTE = 7,
+	SMBUS_WRITE_WORD  = 8,
+	SMBUS_READ_WORD  = 9,
+	SMBUS_WRITE_BLOCK = 0xa,
+	SMBUS_READ_BLOCK = 0xb,
+	SMBUS_PROCESS_CALL = 0xc,
+	SMBUS_BLOCK_PROCESS_CALL = 0xd,
+};
+
+static const u8 SMBUS_PEC = 0x80;
+
+typedef void (*smbus_alarm_callback)(void *context);
+
+extern int acpi_smbus_read(struct acpi_smb_hc *hc, u8 protocol, u8 address,
+	       u8 command, u8 * data);
+extern int acpi_smbus_write(struct acpi_smb_hc *hc, u8 protocol, u8 slave_address,
+		u8 command, u8 * data, u8 length);
+extern int acpi_smbus_register_callback(struct acpi_smb_hc *hc,
+			         smbus_alarm_callback callback, void *context);
+extern int acpi_smbus_unregister_callback(struct acpi_smb_hc *hc);
diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c
index 048295ec370..f3d3867303e 100644
--- a/drivers/acpi/sleep/main.c
+++ b/drivers/acpi/sleep/main.c
@@ -44,7 +44,6 @@ int acpi_sleep_prepare(u32 acpi_state)
 	ACPI_FLUSH_CPU_CACHE();
 	acpi_enable_wakeup_device_prep(acpi_state);
 #endif
-	acpi_gpe_sleep_prepare(acpi_state);
 	acpi_enter_sleep_state_prep(acpi_state);
 	return 0;
 }
@@ -268,6 +267,11 @@ static void acpi_hibernation_leave(void)
 
 static void acpi_hibernation_finish(void)
 {
+	/*
+	 * If ACPI is not enabled by the BIOS and the boot kernel, we need to
+	 * enable it here.
+	 */
+	acpi_enable();
 	acpi_leave_sleep_state(ACPI_STATE_S4);
 	acpi_disable_wakeup_device(ACPI_STATE_S4);
 
diff --git a/drivers/acpi/sleep/sleep.h b/drivers/acpi/sleep/sleep.h
index ff1f8504f49..a2ea125ae2d 100644
--- a/drivers/acpi/sleep/sleep.h
+++ b/drivers/acpi/sleep/sleep.h
@@ -5,6 +5,5 @@ extern int acpi_suspend (u32 state);
 extern void acpi_enable_wakeup_device_prep(u8 sleep_state);
 extern void acpi_enable_wakeup_device(u8 sleep_state);
 extern void acpi_disable_wakeup_device(u8 sleep_state);
-extern void acpi_gpe_sleep_prepare(u32 sleep_state);
 
 extern int acpi_sleep_prepare(u32 acpi_state);
diff --git a/drivers/acpi/sleep/wakeup.c b/drivers/acpi/sleep/wakeup.c
index 97c27ddb144..ed8e41becf0 100644
--- a/drivers/acpi/sleep/wakeup.c
+++ b/drivers/acpi/sleep/wakeup.c
@@ -64,36 +64,29 @@ void acpi_enable_wakeup_device(u8 sleep_state)
 	ACPI_FUNCTION_TRACE("acpi_enable_wakeup_device");
 	spin_lock(&acpi_device_lock);
 	list_for_each_safe(node, next, &acpi_wakeup_device_list) {
-		struct acpi_device *dev = container_of(node,
-						       struct acpi_device,
-						       wakeup_list);
-
+		struct acpi_device *dev =
+			container_of(node, struct acpi_device, wakeup_list);
+		if (!dev->wakeup.flags.valid)
+			continue;
 		/* If users want to disable run-wake GPE,
 		 * we only disable it for wake and leave it for runtime
 		 */
-		if (dev->wakeup.flags.run_wake && !dev->wakeup.state.enabled) {
-			spin_unlock(&acpi_device_lock);
-			acpi_set_gpe_type(dev->wakeup.gpe_device,
-					  dev->wakeup.gpe_number,
-					  ACPI_GPE_TYPE_RUNTIME);
-			/* Re-enable it, since set_gpe_type will disable it */
-			acpi_enable_gpe(dev->wakeup.gpe_device,
-					dev->wakeup.gpe_number, ACPI_ISR);
-			spin_lock(&acpi_device_lock);
+		if (!dev->wakeup.state.enabled ||
+		    sleep_state > (u32) dev->wakeup.sleep_state) {
+			if (dev->wakeup.flags.run_wake) {
+				spin_unlock(&acpi_device_lock);
+				/* set_gpe_type will disable GPE, leave it like that */
+				acpi_set_gpe_type(dev->wakeup.gpe_device,
+						  dev->wakeup.gpe_number,
+						  ACPI_GPE_TYPE_RUNTIME);
+				spin_lock(&acpi_device_lock);
+			}
 			continue;
 		}
-
-		if (!dev->wakeup.flags.valid ||
-		    !dev->wakeup.state.enabled ||
-		    (sleep_state > (u32) dev->wakeup.sleep_state))
-			continue;
-
 		spin_unlock(&acpi_device_lock);
-		/* run-wake GPE has been enabled */
 		if (!dev->wakeup.flags.run_wake)
 			acpi_enable_gpe(dev->wakeup.gpe_device,
 					dev->wakeup.gpe_number, ACPI_ISR);
-		dev->wakeup.state.active = 1;
 		spin_lock(&acpi_device_lock);
 	}
 	spin_unlock(&acpi_device_lock);
@@ -112,26 +105,25 @@ void acpi_disable_wakeup_device(u8 sleep_state)
 
 	spin_lock(&acpi_device_lock);
 	list_for_each_safe(node, next, &acpi_wakeup_device_list) {
-		struct acpi_device *dev = container_of(node,
-						       struct acpi_device,
-						       wakeup_list);
+		struct acpi_device *dev =
+			container_of(node, struct acpi_device, wakeup_list);
 
-		if (dev->wakeup.flags.run_wake && !dev->wakeup.state.enabled) {
-			spin_unlock(&acpi_device_lock);
-			acpi_set_gpe_type(dev->wakeup.gpe_device,
-					  dev->wakeup.gpe_number,
-					  ACPI_GPE_TYPE_WAKE_RUN);
-			/* Re-enable it, since set_gpe_type will disable it */
-			acpi_enable_gpe(dev->wakeup.gpe_device,
-					dev->wakeup.gpe_number, ACPI_NOT_ISR);
-			spin_lock(&acpi_device_lock);
+		if (!dev->wakeup.flags.valid)
 			continue;
-		}
-
-		if (!dev->wakeup.flags.valid ||
-		    !dev->wakeup.state.active ||
-		    (sleep_state > (u32) dev->wakeup.sleep_state))
+		if (!dev->wakeup.state.enabled ||
+		    sleep_state > (u32) dev->wakeup.sleep_state) {
+			if (dev->wakeup.flags.run_wake) {
+				spin_unlock(&acpi_device_lock);
+				acpi_set_gpe_type(dev->wakeup.gpe_device,
+						  dev->wakeup.gpe_number,
+						  ACPI_GPE_TYPE_WAKE_RUN);
+				/* Re-enable it, since set_gpe_type will disable it */
+				acpi_enable_gpe(dev->wakeup.gpe_device,
+						dev->wakeup.gpe_number, ACPI_NOT_ISR);
+				spin_lock(&acpi_device_lock);
+			}
 			continue;
+		}
 
 		spin_unlock(&acpi_device_lock);
 		acpi_disable_wakeup_device_power(dev);
@@ -142,7 +134,6 @@ void acpi_disable_wakeup_device(u8 sleep_state)
 			acpi_clear_gpe(dev->wakeup.gpe_device,
 				       dev->wakeup.gpe_number, ACPI_NOT_ISR);
 		}
-		dev->wakeup.state.active = 0;
 		spin_lock(&acpi_device_lock);
 	}
 	spin_unlock(&acpi_device_lock);
@@ -160,48 +151,20 @@ static int __init acpi_wakeup_device_init(void)
 		struct acpi_device *dev = container_of(node,
 						       struct acpi_device,
 						       wakeup_list);
-
 		/* In case user doesn't load button driver */
-		if (dev->wakeup.flags.run_wake && !dev->wakeup.state.enabled) {
-			spin_unlock(&acpi_device_lock);
-			acpi_set_gpe_type(dev->wakeup.gpe_device,
-					  dev->wakeup.gpe_number,
-					  ACPI_GPE_TYPE_WAKE_RUN);
-			acpi_enable_gpe(dev->wakeup.gpe_device,
-					dev->wakeup.gpe_number, ACPI_NOT_ISR);
-			dev->wakeup.state.enabled = 1;
-			spin_lock(&acpi_device_lock);
-		}
+		if (!dev->wakeup.flags.run_wake || dev->wakeup.state.enabled)
+			continue;
+		spin_unlock(&acpi_device_lock);
+		acpi_set_gpe_type(dev->wakeup.gpe_device,
+				  dev->wakeup.gpe_number,
+				  ACPI_GPE_TYPE_WAKE_RUN);
+		acpi_enable_gpe(dev->wakeup.gpe_device,
+				dev->wakeup.gpe_number, ACPI_NOT_ISR);
+		dev->wakeup.state.enabled = 1;
+		spin_lock(&acpi_device_lock);
 	}
 	spin_unlock(&acpi_device_lock);
-
 	return 0;
 }
 
 late_initcall(acpi_wakeup_device_init);
-
-/*
- * Disable all wakeup GPEs before entering requested sleep state.
- *	@sleep_state:	ACPI state
- * Since acpi_enter_sleep_state() will disable all
- * RUNTIME GPEs, we simply mark all GPES that
- * are not enabled for wakeup from requested state as RUNTIME.
- */
-void acpi_gpe_sleep_prepare(u32 sleep_state)
-{
-	struct list_head *node, *next;
-
-	list_for_each_safe(node, next, &acpi_wakeup_device_list) {
-		struct acpi_device *dev = container_of(node,
-						       struct acpi_device,
-						       wakeup_list);
-
-		/* The GPE can wakeup system from this state, don't touch it */
-		if ((u32) dev->wakeup.sleep_state >= sleep_state)
-			continue;
-		/* acpi_set_gpe_type will automatically disable GPE */
-		acpi_set_gpe_type(dev->wakeup.gpe_device,
-				  dev->wakeup.gpe_number,
-				  ACPI_GPE_TYPE_RUNTIME);
-	}
-}
diff --git a/drivers/acpi/tables/tbutils.c b/drivers/acpi/tables/tbutils.c
index 8cc9492ffbf..5f1d85f2ffe 100644
--- a/drivers/acpi/tables/tbutils.c
+++ b/drivers/acpi/tables/tbutils.c
@@ -400,7 +400,7 @@ acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags)
 	u32 table_count;
 	struct acpi_table_header *table;
 	acpi_physical_address address;
-	acpi_physical_address rsdt_address;
+	acpi_physical_address uninitialized_var(rsdt_address);
 	u32 length;
 	u8 *table_entry;
 	acpi_status status;
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index ad898e10c1a..5f79b445121 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -195,6 +195,7 @@ struct acpi_thermal {
 	struct acpi_thermal_trips trips;
 	struct acpi_handle_list devices;
 	struct timer_list timer;
+	struct mutex lock;
 };
 
 static const struct file_operations acpi_thermal_state_fops = {
@@ -711,6 +712,7 @@ static void acpi_thermal_check(void *data)
 	int result = 0;
 	struct acpi_thermal *tz = data;
 	unsigned long sleep_time = 0;
+	unsigned long timeout_jiffies = 0;
 	int i = 0;
 	struct acpi_thermal_state state;
 
@@ -720,11 +722,15 @@ static void acpi_thermal_check(void *data)
 		return;
 	}
 
+	/* Check if someone else is already running */
+	if (!mutex_trylock(&tz->lock))
+		return;
+
 	state = tz->state;
 
 	result = acpi_thermal_get_temperature(tz);
 	if (result)
-		return;
+		goto unlock;
 
 	memset(&tz->state, 0, sizeof(tz->state));
 
@@ -787,10 +793,13 @@ static void acpi_thermal_check(void *data)
 	 * a thermal event occurs).  Note that _TSP and _TZD values are
 	 * given in 1/10th seconds (we must covert to milliseconds).
 	 */
-	if (tz->state.passive)
+	if (tz->state.passive) {
 		sleep_time = tz->trips.passive.tsp * 100;
-	else if (tz->polling_frequency > 0)
+		timeout_jiffies =  jiffies + (HZ * sleep_time) / 1000;
+	} else if (tz->polling_frequency > 0) {
 		sleep_time = tz->polling_frequency * 100;
+		timeout_jiffies =  round_jiffies(jiffies + (HZ * sleep_time) / 1000);
+	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s: temperature[%lu] sleep[%lu]\n",
 			  tz->name, tz->temperature, sleep_time));
@@ -804,17 +813,16 @@ static void acpi_thermal_check(void *data)
 			del_timer(&(tz->timer));
 	} else {
 		if (timer_pending(&(tz->timer)))
-			mod_timer(&(tz->timer),
-					jiffies + (HZ * sleep_time) / 1000);
+			mod_timer(&(tz->timer), timeout_jiffies);
 		else {
 			tz->timer.data = (unsigned long)tz;
 			tz->timer.function = acpi_thermal_run;
-			tz->timer.expires = jiffies + (HZ * sleep_time) / 1000;
+			tz->timer.expires = timeout_jiffies;
 			add_timer(&(tz->timer));
 		}
 	}
-
-	return;
+      unlock:
+	mutex_unlock(&tz->lock);
 }
 
 /* --------------------------------------------------------------------------
@@ -1251,7 +1259,7 @@ static int acpi_thermal_add(struct acpi_device *device)
 	strcpy(acpi_device_name(device), ACPI_THERMAL_DEVICE_NAME);
 	strcpy(acpi_device_class(device), ACPI_THERMAL_CLASS);
 	acpi_driver_data(device) = tz;
-
+	mutex_init(&tz->lock);
 	result = acpi_thermal_get_info(tz);
 	if (result)
 		goto end;
@@ -1321,7 +1329,7 @@ static int acpi_thermal_remove(struct acpi_device *device, int type)
 	}
 
 	acpi_thermal_remove_fs(device);
-
+	mutex_destroy(&tz->lock);
 	kfree(tz);
 	return 0;
 }
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index b8a2095cb5e..bac956b30c5 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -409,14 +409,17 @@ acpi_video_device_lcd_query_levels(struct acpi_video_device *device,
 static int
 acpi_video_device_lcd_set_level(struct acpi_video_device *device, int level)
 {
-	int status;
+	int status = AE_OK;
 	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
 	struct acpi_object_list args = { 1, &arg0 };
 
 
 	arg0.integer.value = level;
-	status = acpi_evaluate_object(device->dev->handle, "_BCM", &args, NULL);
 
+	if (device->cap._BCM)
+		status = acpi_evaluate_object(device->dev->handle, "_BCM",
+					      &args, NULL);
+	device->brightness->curr = level;
 	return status;
 }
 
@@ -424,11 +427,11 @@ static int
 acpi_video_device_lcd_get_level_current(struct acpi_video_device *device,
 					unsigned long *level)
 {
-	int status;
-
-	status = acpi_evaluate_integer(device->dev->handle, "_BQC", NULL, level);
-
-	return status;
+	if (device->cap._BQC)
+		return acpi_evaluate_integer(device->dev->handle, "_BQC", NULL,
+					     level);
+	*level = device->brightness->curr;
+	return AE_OK;
 }
 
 static int
@@ -1633,9 +1636,20 @@ static int
 acpi_video_get_next_level(struct acpi_video_device *device,
 			  u32 level_current, u32 event)
 {
-	int min, max, min_above, max_below, i, l;
+	int min, max, min_above, max_below, i, l, delta = 255;
 	max = max_below = 0;
 	min = min_above = 255;
+	/* Find closest level to level_current */
+	for (i = 0; i < device->brightness->count; i++) {
+		l = device->brightness->levels[i];
+		if (abs(l - level_current) < abs(delta)) {
+			delta = l - level_current;
+			if (!delta)
+				break;
+		}
+	}
+	/* Ajust level_current to closest available level */
+	level_current += delta;
 	for (i = 0; i < device->brightness->count; i++) {
 		l = device->brightness->levels[i];
 		if (l < min)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index cb136a919f2..ac4a0cb217a 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -188,7 +188,7 @@ static int sock_xmit(struct nbd_device *lo, int send, void *buf, int size,
 		if (signal_pending(current)) {
 			siginfo_t info;
 			printk(KERN_WARNING "nbd (pid %d: %s) got signal %d\n",
-				current->pid, current->comm,
+				task_pid_nr(current), current->comm,
 				dequeue_signal_lock(current, &current->blocked, &info));
 			result = -EINTR;
 			sock_shutdown(lo, !send);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index d70745c8425..af056105316 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1107,7 +1107,7 @@ int open_for_data(struct cdrom_device_info * cdi)
 		       is the default case! */
 		    cdinfo(CD_OPEN, "bummer. wrong media type.\n"); 
 		    cdinfo(CD_WARNING, "pid %d must open device O_NONBLOCK!\n",
-					(unsigned int)current->pid); 
+					(unsigned int)task_pid_nr(current));
 		    ret=-EMEDIUMTYPE;
 		    goto clean_up_and_return;
 		}
diff --git a/drivers/char/drm/drm_bufs.c b/drivers/char/drm/drm_bufs.c
index 856774fbe02..d24a6c2c2c2 100644
--- a/drivers/char/drm/drm_bufs.c
+++ b/drivers/char/drm/drm_bufs.c
@@ -1456,7 +1456,7 @@ int drm_freebufs(struct drm_device *dev, void *data,
 		buf = dma->buflist[idx];
 		if (buf->file_priv != file_priv) {
 			DRM_ERROR("Process %d freeing buffer not owned\n",
-				  current->pid);
+				  task_pid_nr(current));
 			return -EINVAL;
 		}
 		drm_free_buffer(dev, buf);
diff --git a/drivers/char/drm/drm_drv.c b/drivers/char/drm/drm_drv.c
index 72668b15e5c..44a46268b02 100644
--- a/drivers/char/drm/drm_drv.c
+++ b/drivers/char/drm/drm_drv.c
@@ -463,7 +463,7 @@ int drm_ioctl(struct inode *inode, struct file *filp,
 	++file_priv->ioctl_count;
 
 	DRM_DEBUG("pid=%d, cmd=0x%02x, nr=0x%02x, dev 0x%lx, auth=%d\n",
-		  current->pid, cmd, nr,
+		  task_pid_nr(current), cmd, nr,
 		  (long)old_encode_dev(file_priv->head->device),
 		  file_priv->authenticated);
 
diff --git a/drivers/char/drm/drm_fops.c b/drivers/char/drm/drm_fops.c
index f383fc37190..3992f73299c 100644
--- a/drivers/char/drm/drm_fops.c
+++ b/drivers/char/drm/drm_fops.c
@@ -234,7 +234,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 	if (!drm_cpu_valid())
 		return -EINVAL;
 
-	DRM_DEBUG("pid = %d, minor = %d\n", current->pid, minor);
+	DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor);
 
 	priv = drm_alloc(sizeof(*priv), DRM_MEM_FILES);
 	if (!priv)
@@ -244,7 +244,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 	filp->private_data = priv;
 	priv->filp = filp;
 	priv->uid = current->euid;
-	priv->pid = current->pid;
+	priv->pid = task_pid_nr(current);
 	priv->minor = minor;
 	priv->head = drm_heads[minor];
 	priv->ioctl_count = 0;
@@ -339,7 +339,8 @@ int drm_release(struct inode *inode, struct file *filp)
 	 */
 
 	DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n",
-		  current->pid, (long)old_encode_dev(file_priv->head->device),
+		  task_pid_nr(current),
+		  (long)old_encode_dev(file_priv->head->device),
 		  dev->open_count);
 
 	if (dev->driver->reclaim_buffers_locked && dev->lock.hw_lock) {
diff --git a/drivers/char/drm/drm_lock.c b/drivers/char/drm/drm_lock.c
index c6b73e744d6..bea2a7d5b2b 100644
--- a/drivers/char/drm/drm_lock.c
+++ b/drivers/char/drm/drm_lock.c
@@ -58,12 +58,12 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
 	if (lock->context == DRM_KERNEL_CONTEXT) {
 		DRM_ERROR("Process %d using kernel context %d\n",
-			  current->pid, lock->context);
+			  task_pid_nr(current), lock->context);
 		return -EINVAL;
 	}
 
 	DRM_DEBUG("%d (pid %d) requests lock (0x%08x), flags = 0x%08x\n",
-		  lock->context, current->pid,
+		  lock->context, task_pid_nr(current),
 		  dev->lock.hw_lock->lock, lock->flags);
 
 	if (drm_core_check_feature(dev, DRIVER_DMA_QUEUE))
@@ -153,7 +153,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
 	if (lock->context == DRM_KERNEL_CONTEXT) {
 		DRM_ERROR("Process %d using kernel context %d\n",
-			  current->pid, lock->context);
+			  task_pid_nr(current), lock->context);
 		return -EINVAL;
 	}
 
diff --git a/drivers/char/drm/drm_os_linux.h b/drivers/char/drm/drm_os_linux.h
index 114e54e0f61..76e44ac94fb 100644
--- a/drivers/char/drm/drm_os_linux.h
+++ b/drivers/char/drm/drm_os_linux.h
@@ -7,7 +7,7 @@
 #include <linux/delay.h>
 
 /** Current process ID */
-#define DRM_CURRENTPID			current->pid
+#define DRM_CURRENTPID			task_pid_nr(current)
 #define DRM_SUSER(p)			capable(CAP_SYS_ADMIN)
 #define DRM_UDELAY(d)			udelay(d)
 /** Read a byte from a MMIO region */
diff --git a/drivers/char/drm/i810_dma.c b/drivers/char/drm/i810_dma.c
index 8e841bdee6d..eb381a7c5be 100644
--- a/drivers/char/drm/i810_dma.c
+++ b/drivers/char/drm/i810_dma.c
@@ -1024,7 +1024,7 @@ static int i810_getbuf(struct drm_device *dev, void *data,
 	retcode = i810_dma_get_buffer(dev, d, file_priv);
 
 	DRM_DEBUG("i810_dma: %d returning %d, granted = %d\n",
-		  current->pid, retcode, d->granted);
+		  task_pid_nr(current), retcode, d->granted);
 
 	sarea_priv->last_dispatch = (int)hw_status[5];
 
diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c
index 43a1f78712d..69a363edb0d 100644
--- a/drivers/char/drm/i830_dma.c
+++ b/drivers/char/drm/i830_dma.c
@@ -1409,7 +1409,7 @@ static int i830_getbuf(struct drm_device *dev, void *data,
 	retcode = i830_dma_get_buffer(dev, d, file_priv);
 
 	DRM_DEBUG("i830_dma: %d returning %d, granted = %d\n",
-		  current->pid, retcode, d->granted);
+		  task_pid_nr(current), retcode, d->granted);
 
 	sarea_priv->last_dispatch = (int)hw_status[5];
 
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index 2e7ae42a550..0f8fb135da5 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -58,10 +58,10 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/delay.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/bitops.h>
 
 #include <asm/dma.h>
 #include <linux/slab.h>
diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index 212276affa1..fc54d234507 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -42,6 +42,7 @@
 #include <linux/sysrq.h>
 #include <linux/input.h>
 #include <linux/reboot.h>
+#include <linux/notifier.h>
 
 extern void ctrl_alt_del(void);
 
@@ -81,7 +82,8 @@ void compute_shiftstate(void);
 typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value,
 			    char up_flag);
 static k_handler_fn K_HANDLERS;
-static k_handler_fn *k_handler[16] = { K_HANDLERS };
+k_handler_fn *k_handler[16] = { K_HANDLERS };
+EXPORT_SYMBOL_GPL(k_handler);
 
 #define FN_HANDLERS\
 	fn_null,	fn_enter,	fn_show_ptregs,	fn_show_mem,\
@@ -127,7 +129,7 @@ int shift_state = 0;
  */
 
 static struct input_handler kbd_handler;
-static unsigned long key_down[NBITS(KEY_MAX)];		/* keyboard key bitmap */
+static unsigned long key_down[BITS_TO_LONGS(KEY_CNT)];	/* keyboard key bitmap */
 static unsigned char shift_down[NR_SHIFT];		/* shift state counters.. */
 static int dead_key_next;
 static int npadch = -1;					/* -1 or number assembled on pad */
@@ -160,6 +162,23 @@ static int sysrq_alt_use;
 static int sysrq_alt;
 
 /*
+ * Notifier list for console keyboard events
+ */
+static ATOMIC_NOTIFIER_HEAD(keyboard_notifier_list);
+
+int register_keyboard_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&keyboard_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_keyboard_notifier);
+
+int unregister_keyboard_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&keyboard_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_keyboard_notifier);
+
+/*
  * Translation of scancodes to keycodes. We set them on only the first
  * keyboard in the list that accepts the scancode and keycode.
  * Explanation for not choosing the first attached keyboard anymore:
@@ -1130,6 +1149,7 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
 	unsigned char type, raw_mode;
 	struct tty_struct *tty;
 	int shift_final;
+	struct keyboard_notifier_param param = { .vc = vc, .value = keycode, .down = down };
 
 	tty = vc->vc_tty;
 
@@ -1217,10 +1237,11 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
 		return;
 	}
 
-	shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
+	param.shift = shift_final = (shift_state | kbd->slockstate) ^ kbd->lockstate;
 	key_map = key_maps[shift_final];
 
-	if (!key_map) {
+	if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYCODE, &param) == NOTIFY_STOP || !key_map) {
+		atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNBOUND_KEYCODE, &param);
 		compute_shiftstate();
 		kbd->slockstate = 0;
 		return;
@@ -1237,6 +1258,9 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
 	type = KTYP(keysym);
 
 	if (type < 0xf0) {
+		param.value = keysym;
+		if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_UNICODE, &param) == NOTIFY_STOP)
+			return;
 		if (down && !raw_mode)
 			to_utf8(vc, keysym);
 		return;
@@ -1244,9 +1268,6 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
 
 	type -= 0xf0;
 
-	if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
-		return;
-
 	if (type == KT_LETTER) {
 		type = KT_LATIN;
 		if (vc_kbd_led(kbd, VC_CAPSLOCK)) {
@@ -1255,9 +1276,18 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
 				keysym = key_map[keycode];
 		}
 	}
+	param.value = keysym;
+
+	if (atomic_notifier_call_chain(&keyboard_notifier_list, KBD_KEYSYM, &param) == NOTIFY_STOP)
+		return;
+
+	if (raw_mode && type != KT_SPEC && type != KT_SHIFT)
+		return;
 
 	(*k_handler[type])(vc, keysym & 0xff, !down);
 
+	atomic_notifier_call_chain(&keyboard_notifier_list, KBD_POST_KEYSYM, &param);
+
 	if (type != KT_SLOCK)
 		kbd->slockstate = 0;
 }
@@ -1347,12 +1377,12 @@ static void kbd_start(struct input_handle *handle)
 static const struct input_device_id kbd_ids[] = {
 	{
                 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
-                .evbit = { BIT(EV_KEY) },
+                .evbit = { BIT_MASK(EV_KEY) },
         },
 
 	{
                 .flags = INPUT_DEVICE_ID_MATCH_EVBIT,
-                .evbit = { BIT(EV_SND) },
+                .evbit = { BIT_MASK(EV_SND) },
         },
 
 	{ },    /* Terminating entry */
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 661aca0e155..fd0abef7ee0 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -56,11 +56,11 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #include "mxser.h"
diff --git a/drivers/char/mxser_new.c b/drivers/char/mxser_new.c
index 854dbf59eb6..081c84c7b54 100644
--- a/drivers/char/mxser_new.c
+++ b/drivers/char/mxser_new.c
@@ -39,11 +39,11 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #include "mxser_new.h"
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index 859858561ab..9782cb4d30d 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -1178,9 +1178,9 @@ static int __devinit sonypi_create_input_devices(void)
 	jog_dev->id.bustype = BUS_ISA;
 	jog_dev->id.vendor = PCI_VENDOR_ID_SONY;
 
-	jog_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	jog_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_MIDDLE);
-	jog_dev->relbit[0] = BIT(REL_WHEEL);
+	jog_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	jog_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_MIDDLE);
+	jog_dev->relbit[0] = BIT_MASK(REL_WHEEL);
 
 	sonypi_device.input_key_dev = key_dev = input_allocate_device();
 	if (!key_dev) {
@@ -1193,7 +1193,7 @@ static int __devinit sonypi_create_input_devices(void)
 	key_dev->id.vendor = PCI_VENDOR_ID_SONY;
 
 	/* Initialize the Input Drivers: special keys */
-	key_dev->evbit[0] = BIT(EV_KEY);
+	key_dev->evbit[0] = BIT_MASK(EV_KEY);
 	for (i = 0; sonypi_inputkeys[i].sonypiev; i++)
 		if (sonypi_inputkeys[i].inputev)
 			set_bit(sonypi_inputkeys[i].inputev, key_dev->keybit);
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 85a23283dff..a6e1c9ba121 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -1467,7 +1467,7 @@ static int sx_open(struct tty_struct *tty, struct file *filp)
 
 	line = tty->index;
 	sx_dprintk(SX_DEBUG_OPEN, "%d: opening line %d. tty=%p ctty=%p, "
-			"np=%d)\n", current->pid, line, tty,
+			"np=%d)\n", task_pid_nr(current), line, tty,
 			current->signal->tty, sx_nports);
 
 	if ((line < 0) || (line >= SX_NPORTS) || (line >= sx_nports))
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 78d14935f2b..de60e1ea4fb 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -251,7 +251,7 @@ static void send_sig_all(int sig)
 	struct task_struct *p;
 
 	for_each_process(p) {
-		if (p->mm && !is_init(p))
+		if (p->mm && !is_global_init(p))
 			/* Not swapper, init nor kernel thread */
 			force_sig(sig, p);
 	}
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 9c867cf6de6..13a53575a01 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -103,6 +103,7 @@
 #include <linux/selection.h>
 
 #include <linux/kmod.h>
+#include <linux/nsproxy.h>
 
 #undef TTY_DEBUG_HANGUP
 
@@ -3107,7 +3108,7 @@ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 	 */
 	if (tty == real_tty && current->signal->tty != real_tty)
 		return -ENOTTY;
-	return put_user(pid_nr(real_tty->pgrp), p);
+	return put_user(pid_vnr(real_tty->pgrp), p);
 }
 
 /**
@@ -3141,7 +3142,7 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t
 	if (pgrp_nr < 0)
 		return -EINVAL;
 	rcu_read_lock();
-	pgrp = find_pid(pgrp_nr);
+	pgrp = find_vpid(pgrp_nr);
 	retval = -ESRCH;
 	if (!pgrp)
 		goto out_unlock;
@@ -3178,7 +3179,7 @@ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t _
 		return -ENOTTY;
 	if (!real_tty->session)
 		return -ENOTTY;
-	return put_user(pid_nr(real_tty->session), p);
+	return put_user(pid_vnr(real_tty->session), p);
 }
 
 /**
@@ -3528,8 +3529,8 @@ void __do_SAK(struct tty_struct *tty)
 	/* Kill the entire session */
 	do_each_pid_task(session, PIDTYPE_SID, p) {
 		printk(KERN_NOTICE "SAK: killed process %d"
-			" (%s): process_session(p)==tty->session\n",
-			p->pid, p->comm);
+			" (%s): task_session_nr(p)==tty->session\n",
+			task_pid_nr(p), p->comm);
 		send_sig(SIGKILL, p, 1);
 	} while_each_pid_task(session, PIDTYPE_SID, p);
 	/* Now kill any processes that happen to have the
@@ -3538,8 +3539,8 @@ void __do_SAK(struct tty_struct *tty)
 	do_each_thread(g, p) {
 		if (p->signal->tty == tty) {
 			printk(KERN_NOTICE "SAK: killed process %d"
-			    " (%s): process_session(p)==tty->session\n",
-			    p->pid, p->comm);
+			    " (%s): task_session_nr(p)==tty->session\n",
+			    task_pid_nr(p), p->comm);
 			send_sig(SIGKILL, p, 1);
 			continue;
 		}
@@ -3559,7 +3560,7 @@ void __do_SAK(struct tty_struct *tty)
 				    filp->private_data == tty) {
 					printk(KERN_NOTICE "SAK: killed process %d"
 					    " (%s): fd#%d opened to the tty\n",
-					    p->pid, p->comm, i);
+					    task_pid_nr(p), p->comm, i);
 					force_sig(SIGKILL, p);
 					break;
 				}
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 1764c67b585..7a5badfb7d8 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -99,6 +99,7 @@
 #include <linux/pm.h>
 #include <linux/font.h>
 #include <linux/bitops.h>
+#include <linux/notifier.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -223,6 +224,35 @@ enum {
 };
 
 /*
+ * Notifier list for console events.
+ */
+static ATOMIC_NOTIFIER_HEAD(vt_notifier_list);
+
+int register_vt_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&vt_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_vt_notifier);
+
+int unregister_vt_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&vt_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_vt_notifier);
+
+static void notify_write(struct vc_data *vc, unsigned int unicode)
+{
+	struct vt_notifier_param param = { .vc = vc, unicode = unicode };
+	atomic_notifier_call_chain(&vt_notifier_list, VT_WRITE, &param);
+}
+
+static void notify_update(struct vc_data *vc)
+{
+	struct vt_notifier_param param = { .vc = vc };
+	atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, &param);
+}
+
+/*
  *	Low-Level Functions
  */
 
@@ -718,6 +748,7 @@ int vc_allocate(unsigned int currcons)	/* return 0 on success */
 		return -ENXIO;
 	if (!vc_cons[currcons].d) {
 	    struct vc_data *vc;
+	    struct vt_notifier_param param;
 
 	    /* prevent users from taking too much memory */
 	    if (currcons >= MAX_NR_USER_CONSOLES && !capable(CAP_SYS_RESOURCE))
@@ -729,7 +760,7 @@ int vc_allocate(unsigned int currcons)	/* return 0 on success */
 	    /* although the numbers above are not valid since long ago, the
 	       point is still up-to-date and the comment still has its value
 	       even if only as a historical artifact.  --mj, July 1998 */
-	    vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
+	    param.vc = vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL);
 	    if (!vc)
 		return -ENOMEM;
 	    vc_cons[currcons].d = vc;
@@ -746,6 +777,7 @@ int vc_allocate(unsigned int currcons)	/* return 0 on success */
 	    }
 	    vc->vc_kmalloced = 1;
 	    vc_init(vc, vc->vc_rows, vc->vc_cols, 1);
+	    atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, &param);
 	}
 	return 0;
 }
@@ -907,6 +939,8 @@ void vc_deallocate(unsigned int currcons)
 
 	if (vc_cons_allocated(currcons)) {
 		struct vc_data *vc = vc_cons[currcons].d;
+		struct vt_notifier_param param = { .vc = vc };
+		atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, &param);
 		vc->vc_sw->con_deinit(vc);
 		put_pid(vc->vt_pid);
 		module_put(vc->vc_sw->owner);
@@ -1019,6 +1053,7 @@ static void lf(struct vc_data *vc)
 		vc->vc_pos += vc->vc_size_row;
 	}
 	vc->vc_need_wrap = 0;
+	notify_write(vc, '\n');
 }
 
 static void ri(struct vc_data *vc)
@@ -1039,6 +1074,7 @@ static inline void cr(struct vc_data *vc)
 {
 	vc->vc_pos -= vc->vc_x << 1;
 	vc->vc_need_wrap = vc->vc_x = 0;
+	notify_write(vc, '\r');
 }
 
 static inline void bs(struct vc_data *vc)
@@ -1047,6 +1083,7 @@ static inline void bs(struct vc_data *vc)
 		vc->vc_pos -= 2;
 		vc->vc_x--;
 		vc->vc_need_wrap = 0;
+		notify_write(vc, '\b');
 	}
 }
 
@@ -1593,6 +1630,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
 				break;
 		}
 		vc->vc_pos += (vc->vc_x << 1);
+		notify_write(vc, '\t');
 		return;
 	case 10: case 11: case 12:
 		lf(vc);
@@ -2252,6 +2290,7 @@ rescan_last_byte:
 				tc = conv_uni_to_pc(vc, ' '); /* A space is printed in the second column */
 				if (tc < 0) tc = ' ';
 			}
+			notify_write(vc, c);
 
 			if (inverse) {
 				FLUSH
@@ -2274,6 +2313,7 @@ rescan_last_byte:
 	release_console_sem();
 
 out:
+	notify_update(vc);
 	return n;
 #undef FLUSH
 }
@@ -2317,6 +2357,7 @@ static void console_callback(struct work_struct *ignored)
 		do_blank_screen(0);
 		blank_timer_expired = 0;
 	}
+	notify_update(vc_cons[fg_console].d);
 
 	release_console_sem();
 }
@@ -2418,6 +2459,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
 				continue;
 		}
 		scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos);
+		notify_write(vc, c);
 		cnt++;
 		if (myx == vc->vc_cols - 1) {
 			vc->vc_need_wrap = 1;
@@ -2436,6 +2478,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
 		}
 	}
 	set_cursor(vc);
+	notify_update(vc);
 
 quit:
 	clear_bit(0, &printing);
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
new file mode 100644
index 00000000000..3bed4127d4a
--- /dev/null
+++ b/drivers/cpuidle/Kconfig
@@ -0,0 +1,20 @@
+
+config CPU_IDLE
+	bool "CPU idle PM support"
+	help
+	  CPU idle is a generic framework for supporting software-controlled
+	  idle processor power management.  It includes modular cross-platform
+	  governors that can be swapped during runtime.
+
+	  If you're using a mobile platform that supports CPU idle PM (e.g.
+	  an ACPI-capable notebook), you should say Y here.
+
+config CPU_IDLE_GOV_LADDER
+	bool
+	depends on CPU_IDLE
+	default y
+
+config CPU_IDLE_GOV_MENU
+	bool
+	depends on CPU_IDLE && NO_HZ
+	default y
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
new file mode 100644
index 00000000000..5634f88379d
--- /dev/null
+++ b/drivers/cpuidle/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for cpuidle.
+#
+
+obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
new file mode 100644
index 00000000000..fdf4106b817
--- /dev/null
+++ b/drivers/cpuidle/cpuidle.c
@@ -0,0 +1,295 @@
+/*
+ * cpuidle.c - core cpuidle infrastructure
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/latency.h>
+#include <linux/cpu.h>
+#include <linux/cpuidle.h>
+
+#include "cpuidle.h"
+
+DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+EXPORT_PER_CPU_SYMBOL_GPL(cpuidle_devices);
+
+DEFINE_MUTEX(cpuidle_lock);
+LIST_HEAD(cpuidle_detected_devices);
+static void (*pm_idle_old)(void);
+
+static int enabled_devices;
+
+/**
+ * cpuidle_idle_call - the main idle loop
+ *
+ * NOTE: no locks or semaphores should be used here
+ */
+static void cpuidle_idle_call(void)
+{
+	struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
+	struct cpuidle_state *target_state;
+	int next_state;
+
+	/* check if the device is ready */
+	if (!dev || !dev->enabled) {
+		if (pm_idle_old)
+			pm_idle_old();
+		else
+			local_irq_enable();
+		return;
+	}
+
+	/* ask the governor for the next state */
+	next_state = cpuidle_curr_governor->select(dev);
+	if (need_resched())
+		return;
+	target_state = &dev->states[next_state];
+
+	/* enter the state and update stats */
+	dev->last_residency = target_state->enter(dev, target_state);
+	dev->last_state = target_state;
+	target_state->time += dev->last_residency;
+	target_state->usage++;
+
+	/* give the governor an opportunity to reflect on the outcome */
+	if (cpuidle_curr_governor->reflect)
+		cpuidle_curr_governor->reflect(dev);
+}
+
+/**
+ * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
+ */
+void cpuidle_install_idle_handler(void)
+{
+	if (enabled_devices && (pm_idle != cpuidle_idle_call)) {
+		/* Make sure all changes finished before we switch to new idle */
+		smp_wmb();
+		pm_idle = cpuidle_idle_call;
+	}
+}
+
+/**
+ * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
+ */
+void cpuidle_uninstall_idle_handler(void)
+{
+	if (enabled_devices && (pm_idle != pm_idle_old)) {
+		pm_idle = pm_idle_old;
+		cpu_idle_wait();
+	}
+}
+
+/**
+ * cpuidle_pause_and_lock - temporarily disables CPUIDLE
+ */
+void cpuidle_pause_and_lock(void)
+{
+	mutex_lock(&cpuidle_lock);
+	cpuidle_uninstall_idle_handler();
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
+
+/**
+ * cpuidle_resume_and_unlock - resumes CPUIDLE operation
+ */
+void cpuidle_resume_and_unlock(void)
+{
+	cpuidle_install_idle_handler();
+	mutex_unlock(&cpuidle_lock);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
+
+/**
+ * cpuidle_enable_device - enables idle PM for a CPU
+ * @dev: the CPU
+ *
+ * This function must be called between cpuidle_pause_and_lock and
+ * cpuidle_resume_and_unlock when used externally.
+ */
+int cpuidle_enable_device(struct cpuidle_device *dev)
+{
+	int ret, i;
+
+	if (dev->enabled)
+		return 0;
+	if (!cpuidle_curr_driver || !cpuidle_curr_governor)
+		return -EIO;
+	if (!dev->state_count)
+		return -EINVAL;
+
+	if ((ret = cpuidle_add_state_sysfs(dev)))
+		return ret;
+
+	if (cpuidle_curr_governor->enable &&
+	    (ret = cpuidle_curr_governor->enable(dev)))
+		goto fail_sysfs;
+
+	for (i = 0; i < dev->state_count; i++) {
+		dev->states[i].usage = 0;
+		dev->states[i].time = 0;
+	}
+	dev->last_residency = 0;
+	dev->last_state = NULL;
+
+	smp_wmb();
+
+	dev->enabled = 1;
+
+	enabled_devices++;
+	return 0;
+
+fail_sysfs:
+	cpuidle_remove_state_sysfs(dev);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_enable_device);
+
+/**
+ * cpuidle_disable_device - disables idle PM for a CPU
+ * @dev: the CPU
+ *
+ * This function must be called between cpuidle_pause_and_lock and
+ * cpuidle_resume_and_unlock when used externally.
+ */
+void cpuidle_disable_device(struct cpuidle_device *dev)
+{
+	if (!dev->enabled)
+		return;
+	if (!cpuidle_curr_driver || !cpuidle_curr_governor)
+		return;
+
+	dev->enabled = 0;
+
+	if (cpuidle_curr_governor->disable)
+		cpuidle_curr_governor->disable(dev);
+
+	cpuidle_remove_state_sysfs(dev);
+	enabled_devices--;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_disable_device);
+
+/**
+ * cpuidle_register_device - registers a CPU's idle PM feature
+ * @dev: the cpu
+ */
+int cpuidle_register_device(struct cpuidle_device *dev)
+{
+	int ret;
+	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
+
+	if (!sys_dev)
+		return -EINVAL;
+	if (!try_module_get(cpuidle_curr_driver->owner))
+		return -EINVAL;
+
+	init_completion(&dev->kobj_unregister);
+
+	mutex_lock(&cpuidle_lock);
+
+	per_cpu(cpuidle_devices, dev->cpu) = dev;
+	list_add(&dev->device_list, &cpuidle_detected_devices);
+	if ((ret = cpuidle_add_sysfs(sys_dev))) {
+		mutex_unlock(&cpuidle_lock);
+		module_put(cpuidle_curr_driver->owner);
+		return ret;
+	}
+
+	cpuidle_enable_device(dev);
+	cpuidle_install_idle_handler();
+
+	mutex_unlock(&cpuidle_lock);
+
+	return 0;
+
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_register_device);
+
+/**
+ * cpuidle_unregister_device - unregisters a CPU's idle PM feature
+ * @dev: the cpu
+ */
+void cpuidle_unregister_device(struct cpuidle_device *dev)
+{
+	struct sys_device *sys_dev = get_cpu_sysdev((unsigned long)dev->cpu);
+
+	cpuidle_pause_and_lock();
+
+	cpuidle_disable_device(dev);
+
+	cpuidle_remove_sysfs(sys_dev);
+	list_del(&dev->device_list);
+	wait_for_completion(&dev->kobj_unregister);
+	per_cpu(cpuidle_devices, dev->cpu) = NULL;
+
+	cpuidle_resume_and_unlock();
+
+	module_put(cpuidle_curr_driver->owner);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
+
+#ifdef CONFIG_SMP
+
+static void smp_callback(void *v)
+{
+	/* we already woke the CPU up, nothing more to do */
+}
+
+/*
+ * This function gets called when a part of the kernel has a new latency
+ * requirement.  This means we need to get all processors out of their C-state,
+ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
+ * wakes them all right up.
+ */
+static int cpuidle_latency_notify(struct notifier_block *b,
+		unsigned long l, void *v)
+{
+	smp_call_function(smp_callback, NULL, 0, 1);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpuidle_latency_notifier = {
+	.notifier_call = cpuidle_latency_notify,
+};
+
+#define latency_notifier_init(x) do { register_latency_notifier(x); } while (0)
+
+#else /* CONFIG_SMP */
+
+#define latency_notifier_init(x) do { } while (0)
+
+#endif /* CONFIG_SMP */
+
+/**
+ * cpuidle_init - core initializer
+ */
+static int __init cpuidle_init(void)
+{
+	int ret;
+
+	pm_idle_old = pm_idle;
+
+	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
+	if (ret)
+		return ret;
+
+	latency_notifier_init(&cpuidle_latency_notifier);
+
+	return 0;
+}
+
+core_initcall(cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
new file mode 100644
index 00000000000..9476ba33ee2
--- /dev/null
+++ b/drivers/cpuidle/cpuidle.h
@@ -0,0 +1,33 @@
+/*
+ * cpuidle.h - The internal header file
+ */
+
+#ifndef __DRIVER_CPUIDLE_H
+#define __DRIVER_CPUIDLE_H
+
+#include <linux/sysdev.h>
+
+/* For internal use only */
+extern struct cpuidle_governor *cpuidle_curr_governor;
+extern struct cpuidle_driver *cpuidle_curr_driver;
+extern struct list_head cpuidle_governors;
+extern struct list_head cpuidle_detected_devices;
+extern struct mutex cpuidle_lock;
+extern spinlock_t cpuidle_driver_lock;
+
+/* idle loop */
+extern void cpuidle_install_idle_handler(void);
+extern void cpuidle_uninstall_idle_handler(void);
+
+/* governors */
+extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
+
+/* sysfs */
+extern int cpuidle_add_class_sysfs(struct sysdev_class *cls);
+extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls);
+extern int cpuidle_add_state_sysfs(struct cpuidle_device *device);
+extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device);
+extern int cpuidle_add_sysfs(struct sys_device *sysdev);
+extern void cpuidle_remove_sysfs(struct sys_device *sysdev);
+
+#endif /* __DRIVER_CPUIDLE_H */
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
new file mode 100644
index 00000000000..2257004fe33
--- /dev/null
+++ b/drivers/cpuidle/driver.c
@@ -0,0 +1,56 @@
+/*
+ * driver.c - driver support
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/cpuidle.h>
+
+#include "cpuidle.h"
+
+struct cpuidle_driver *cpuidle_curr_driver;
+DEFINE_SPINLOCK(cpuidle_driver_lock);
+
+/**
+ * cpuidle_register_driver - registers a driver
+ * @drv: the driver
+ */
+int cpuidle_register_driver(struct cpuidle_driver *drv)
+{
+	if (!drv)
+		return -EINVAL;
+
+	spin_lock(&cpuidle_driver_lock);
+	if (cpuidle_curr_driver) {
+		spin_unlock(&cpuidle_driver_lock);
+		return -EBUSY;
+	}
+	cpuidle_curr_driver = drv;
+	spin_unlock(&cpuidle_driver_lock);
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_register_driver);
+
+/**
+ * cpuidle_unregister_driver - unregisters a driver
+ * @drv: the driver
+ */
+void cpuidle_unregister_driver(struct cpuidle_driver *drv)
+{
+	if (!drv)
+		return;
+
+	spin_lock(&cpuidle_driver_lock);
+	cpuidle_curr_driver = NULL;
+	spin_unlock(&cpuidle_driver_lock);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
new file mode 100644
index 00000000000..bb699cb2dc5
--- /dev/null
+++ b/drivers/cpuidle/governor.c
@@ -0,0 +1,141 @@
+/*
+ * governor.c - governor support
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/cpuidle.h>
+
+#include "cpuidle.h"
+
+LIST_HEAD(cpuidle_governors);
+struct cpuidle_governor *cpuidle_curr_governor;
+
+/**
+ * __cpuidle_find_governor - finds a governor of the specified name
+ * @str: the name
+ *
+ * Must be called with cpuidle_lock aquired.
+ */
+static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
+{
+	struct cpuidle_governor *gov;
+
+	list_for_each_entry(gov, &cpuidle_governors, governor_list)
+		if (!strnicmp(str, gov->name, CPUIDLE_NAME_LEN))
+			return gov;
+
+	return NULL;
+}
+
+/**
+ * cpuidle_switch_governor - changes the governor
+ * @gov: the new target governor
+ *
+ * NOTE: "gov" can be NULL to specify disabled
+ * Must be called with cpuidle_lock aquired.
+ */
+int cpuidle_switch_governor(struct cpuidle_governor *gov)
+{
+	struct cpuidle_device *dev;
+
+	if (gov == cpuidle_curr_governor)
+		return 0;
+
+	cpuidle_uninstall_idle_handler();
+
+	if (cpuidle_curr_governor) {
+		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+			cpuidle_disable_device(dev);
+		module_put(cpuidle_curr_governor->owner);
+	}
+
+	cpuidle_curr_governor = gov;
+
+	if (gov) {
+		if (!try_module_get(cpuidle_curr_governor->owner))
+			return -EINVAL;
+		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+			cpuidle_enable_device(dev);
+		cpuidle_install_idle_handler();
+		printk(KERN_INFO "cpuidle: using governor %s\n", gov->name);
+	}
+
+	return 0;
+}
+
+/**
+ * cpuidle_register_governor - registers a governor
+ * @gov: the governor
+ */
+int cpuidle_register_governor(struct cpuidle_governor *gov)
+{
+	int ret = -EEXIST;
+
+	if (!gov || !gov->select)
+		return -EINVAL;
+
+	mutex_lock(&cpuidle_lock);
+	if (__cpuidle_find_governor(gov->name) == NULL) {
+		ret = 0;
+		list_add_tail(&gov->governor_list, &cpuidle_governors);
+		if (!cpuidle_curr_governor ||
+		    cpuidle_curr_governor->rating < gov->rating)
+			cpuidle_switch_governor(gov);
+	}
+	mutex_unlock(&cpuidle_lock);
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_register_governor);
+
+/**
+ * cpuidle_replace_governor - find a replacement governor
+ * @exclude_rating: the rating that will be skipped while looking for
+ * new governor.
+ */
+static struct cpuidle_governor *cpuidle_replace_governor(int exclude_rating)
+{
+	struct cpuidle_governor *gov;
+	struct cpuidle_governor *ret_gov = NULL;
+	unsigned int max_rating = 0;
+
+	list_for_each_entry(gov, &cpuidle_governors, governor_list) {
+		if (gov->rating == exclude_rating)
+			continue;
+		if (gov->rating > max_rating) {
+			max_rating = gov->rating;
+			ret_gov = gov;
+		}
+	}
+
+	return ret_gov;
+}
+
+/**
+ * cpuidle_unregister_governor - unregisters a governor
+ * @gov: the governor
+ */
+void cpuidle_unregister_governor(struct cpuidle_governor *gov)
+{
+	if (!gov)
+		return;
+
+	mutex_lock(&cpuidle_lock);
+	if (gov == cpuidle_curr_governor) {
+		struct cpuidle_governor *new_gov;
+		new_gov = cpuidle_replace_governor(gov->rating);
+		cpuidle_switch_governor(new_gov);
+	}
+	list_del(&gov->governor_list);
+	mutex_unlock(&cpuidle_lock);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_unregister_governor);
diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile
new file mode 100644
index 00000000000..1b512722689
--- /dev/null
+++ b/drivers/cpuidle/governors/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for cpuidle governors.
+#
+
+obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
+obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
new file mode 100644
index 00000000000..eb666ecae7c
--- /dev/null
+++ b/drivers/cpuidle/governors/ladder.c
@@ -0,0 +1,166 @@
+/*
+ * ladder.c - the residency ladder algorithm
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *               Shaohua Li <shaohua.li@intel.com>
+ *               Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/latency.h>
+#include <linux/moduleparam.h>
+#include <linux/jiffies.h>
+
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#define PROMOTION_COUNT 4
+#define DEMOTION_COUNT 1
+
+struct ladder_device_state {
+	struct {
+		u32 promotion_count;
+		u32 demotion_count;
+		u32 promotion_time;
+		u32 demotion_time;
+	} threshold;
+	struct {
+		int promotion_count;
+		int demotion_count;
+	} stats;
+};
+
+struct ladder_device {
+	struct ladder_device_state states[CPUIDLE_STATE_MAX];
+	int last_state_idx;
+};
+
+static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
+
+/**
+ * ladder_do_selection - prepares private data for a state change
+ * @ldev: the ladder device
+ * @old_idx: the current state index
+ * @new_idx: the new target state index
+ */
+static inline void ladder_do_selection(struct ladder_device *ldev,
+				       int old_idx, int new_idx)
+{
+	ldev->states[old_idx].stats.promotion_count = 0;
+	ldev->states[old_idx].stats.demotion_count = 0;
+	ldev->last_state_idx = new_idx;
+}
+
+/**
+ * ladder_select_state - selects the next state to enter
+ * @dev: the CPU
+ */
+static int ladder_select_state(struct cpuidle_device *dev)
+{
+	struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
+	struct ladder_device_state *last_state;
+	int last_residency, last_idx = ldev->last_state_idx;
+
+	if (unlikely(!ldev))
+		return 0;
+
+	last_state = &ldev->states[last_idx];
+
+	if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID)
+		last_residency = cpuidle_get_last_residency(dev) - dev->states[last_idx].exit_latency;
+	else
+		last_residency = last_state->threshold.promotion_time + 1;
+
+	/* consider promotion */
+	if (last_idx < dev->state_count - 1 &&
+	    last_residency > last_state->threshold.promotion_time &&
+	    dev->states[last_idx + 1].exit_latency <= system_latency_constraint()) {
+		last_state->stats.promotion_count++;
+		last_state->stats.demotion_count = 0;
+		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
+			ladder_do_selection(ldev, last_idx, last_idx + 1);
+			return last_idx + 1;
+		}
+	}
+
+	/* consider demotion */
+	if (last_idx > 0 &&
+	    last_residency < last_state->threshold.demotion_time) {
+		last_state->stats.demotion_count++;
+		last_state->stats.promotion_count = 0;
+		if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
+			ladder_do_selection(ldev, last_idx, last_idx - 1);
+			return last_idx - 1;
+		}
+	}
+
+	/* otherwise remain at the current state */
+	return last_idx;
+}
+
+/**
+ * ladder_enable_device - setup for the governor
+ * @dev: the CPU
+ */
+static int ladder_enable_device(struct cpuidle_device *dev)
+{
+	int i;
+	struct ladder_device *ldev = &per_cpu(ladder_devices, dev->cpu);
+	struct ladder_device_state *lstate;
+	struct cpuidle_state *state;
+
+	ldev->last_state_idx = 0;
+
+	for (i = 0; i < dev->state_count; i++) {
+		state = &dev->states[i];
+		lstate = &ldev->states[i];
+
+		lstate->stats.promotion_count = 0;
+		lstate->stats.demotion_count = 0;
+
+		lstate->threshold.promotion_count = PROMOTION_COUNT;
+		lstate->threshold.demotion_count = DEMOTION_COUNT;
+
+		if (i < dev->state_count - 1)
+			lstate->threshold.promotion_time = state->exit_latency;
+		if (i > 0)
+			lstate->threshold.demotion_time = state->exit_latency;
+	}
+
+	return 0;
+}
+
+static struct cpuidle_governor ladder_governor = {
+	.name =		"ladder",
+	.rating =	10,
+	.enable =	ladder_enable_device,
+	.select =	ladder_select_state,
+	.owner =	THIS_MODULE,
+};
+
+/**
+ * init_ladder - initializes the governor
+ */
+static int __init init_ladder(void)
+{
+	return cpuidle_register_governor(&ladder_governor);
+}
+
+/**
+ * exit_ladder - exits the governor
+ */
+static void __exit exit_ladder(void)
+{
+	cpuidle_unregister_governor(&ladder_governor);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_ladder);
+module_exit(exit_ladder);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
new file mode 100644
index 00000000000..299d45c3bdd
--- /dev/null
+++ b/drivers/cpuidle/governors/menu.c
@@ -0,0 +1,137 @@
+/*
+ * menu.c - the menu idle governor
+ *
+ * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/latency.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+
+#define BREAK_FUZZ	4	/* 4 us */
+
+struct menu_device {
+	int		last_state_idx;
+
+	unsigned int	expected_us;
+	unsigned int	predicted_us;
+	unsigned int	last_measured_us;
+	unsigned int	elapsed_us;
+};
+
+static DEFINE_PER_CPU(struct menu_device, menu_devices);
+
+/**
+ * menu_select - selects the next idle state to enter
+ * @dev: the CPU
+ */
+static int menu_select(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &__get_cpu_var(menu_devices);
+	int i;
+
+	/* determine the expected residency time */
+	data->expected_us =
+		(u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
+
+	/* find the deepest idle state that satisfies our constraints */
+	for (i = 1; i < dev->state_count; i++) {
+		struct cpuidle_state *s = &dev->states[i];
+
+		if (s->target_residency > data->expected_us)
+			break;
+		if (s->target_residency > data->predicted_us)
+			break;
+		if (s->exit_latency > system_latency_constraint())
+			break;
+	}
+
+	data->last_state_idx = i - 1;
+	return i - 1;
+}
+
+/**
+ * menu_reflect - attempts to guess what happened after entry
+ * @dev: the CPU
+ *
+ * NOTE: it's important to be fast here because this operation will add to
+ *       the overall exit latency.
+ */
+static void menu_reflect(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &__get_cpu_var(menu_devices);
+	int last_idx = data->last_state_idx;
+	unsigned int measured_us =
+		cpuidle_get_last_residency(dev) + data->elapsed_us;
+	struct cpuidle_state *target = &dev->states[last_idx];
+
+	/*
+	 * Ugh, this idle state doesn't support residency measurements, so we
+	 * are basically lost in the dark.  As a compromise, assume we slept
+	 * for one full standard timer tick.  However, be aware that this
+	 * could potentially result in a suboptimal state transition.
+	 */
+	if (!(target->flags & CPUIDLE_FLAG_TIME_VALID))
+		measured_us = USEC_PER_SEC / HZ;
+
+	/* Predict time remaining until next break event */
+	if (measured_us + BREAK_FUZZ < data->expected_us - target->exit_latency) {
+		data->predicted_us = max(measured_us, data->last_measured_us);
+		data->last_measured_us = measured_us;
+		data->elapsed_us = 0;
+	} else {
+		if (data->elapsed_us < data->elapsed_us + measured_us)
+			data->elapsed_us = measured_us;
+		else
+			data->elapsed_us = -1;
+		data->predicted_us = max(measured_us, data->last_measured_us);
+	}
+}
+
+/**
+ * menu_enable_device - scans a CPU's states and does setup
+ * @dev: the CPU
+ */
+static int menu_enable_device(struct cpuidle_device *dev)
+{
+	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
+
+	memset(data, 0, sizeof(struct menu_device));
+
+	return 0;
+}
+
+static struct cpuidle_governor menu_governor = {
+	.name =		"menu",
+	.rating =	20,
+	.enable =	menu_enable_device,
+	.select =	menu_select,
+	.reflect =	menu_reflect,
+	.owner =	THIS_MODULE,
+};
+
+/**
+ * init_menu - initializes the governor
+ */
+static int __init init_menu(void)
+{
+	return cpuidle_register_governor(&menu_governor);
+}
+
+/**
+ * exit_menu - exits the governor
+ */
+static void __exit exit_menu(void)
+{
+	cpuidle_unregister_governor(&menu_governor);
+}
+
+MODULE_LICENSE("GPL");
+module_init(init_menu);
+module_exit(exit_menu);
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
new file mode 100644
index 00000000000..0f3515e77d4
--- /dev/null
+++ b/drivers/cpuidle/sysfs.c
@@ -0,0 +1,361 @@
+/*
+ * sysfs.c - sysfs support
+ *
+ * (C) 2006-2007 Shaohua Li <shaohua.li@intel.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/sysfs.h>
+#include <linux/cpu.h>
+
+#include "cpuidle.h"
+
+static unsigned int sysfs_switch;
+static int __init cpuidle_sysfs_setup(char *unused)
+{
+	sysfs_switch = 1;
+	return 1;
+}
+__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
+
+static ssize_t show_available_governors(struct sys_device *dev, char *buf)
+{
+	ssize_t i = 0;
+	struct cpuidle_governor *tmp;
+
+	mutex_lock(&cpuidle_lock);
+	list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
+		if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
+			goto out;
+		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
+	}
+
+out:
+	i+= sprintf(&buf[i], "\n");
+	mutex_unlock(&cpuidle_lock);
+	return i;
+}
+
+static ssize_t show_current_driver(struct sys_device *dev, char *buf)
+{
+	ssize_t ret;
+
+	spin_lock(&cpuidle_driver_lock);
+	if (cpuidle_curr_driver)
+		ret = sprintf(buf, "%s\n", cpuidle_curr_driver->name);
+	else
+		ret = sprintf(buf, "none\n");
+	spin_unlock(&cpuidle_driver_lock);
+
+	return ret;
+}
+
+static ssize_t show_current_governor(struct sys_device *dev, char *buf)
+{
+	ssize_t ret;
+
+	mutex_lock(&cpuidle_lock);
+	if (cpuidle_curr_governor)
+		ret = sprintf(buf, "%s\n", cpuidle_curr_governor->name);
+	else
+		ret = sprintf(buf, "none\n");
+	mutex_unlock(&cpuidle_lock);
+
+	return ret;
+}
+
+static ssize_t store_current_governor(struct sys_device *dev,
+	const char *buf, size_t count)
+{
+	char gov_name[CPUIDLE_NAME_LEN];
+	int ret = -EINVAL;
+	size_t len = count;
+	struct cpuidle_governor *gov;
+
+	if (!len || len >= sizeof(gov_name))
+		return -EINVAL;
+
+	memcpy(gov_name, buf, len);
+	gov_name[len] = '\0';
+	if (gov_name[len - 1] == '\n')
+		gov_name[--len] = '\0';
+
+	mutex_lock(&cpuidle_lock);
+
+	list_for_each_entry(gov, &cpuidle_governors, governor_list) {
+		if (strlen(gov->name) == len && !strcmp(gov->name, gov_name)) {
+			ret = cpuidle_switch_governor(gov);
+			break;
+		}
+	}
+
+	mutex_unlock(&cpuidle_lock);
+
+	if (ret)
+		return ret;
+	else
+		return count;
+}
+
+static SYSDEV_ATTR(current_driver, 0444, show_current_driver, NULL);
+static SYSDEV_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
+
+static struct attribute *cpuclass_default_attrs[] = {
+	&attr_current_driver.attr,
+	&attr_current_governor_ro.attr,
+	NULL
+};
+
+static SYSDEV_ATTR(available_governors, 0444, show_available_governors, NULL);
+static SYSDEV_ATTR(current_governor, 0644, show_current_governor,
+	store_current_governor);
+
+static struct attribute *cpuclass_switch_attrs[] = {
+	&attr_available_governors.attr,
+	&attr_current_driver.attr,
+	&attr_current_governor.attr,
+	NULL
+};
+
+static struct attribute_group cpuclass_attr_group = {
+	.attrs = cpuclass_default_attrs,
+	.name = "cpuidle",
+};
+
+/**
+ * cpuidle_add_class_sysfs - add CPU global sysfs attributes
+ */
+int cpuidle_add_class_sysfs(struct sysdev_class *cls)
+{
+	if (sysfs_switch)
+		cpuclass_attr_group.attrs = cpuclass_switch_attrs;
+
+	return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group);
+}
+
+/**
+ * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes
+ */
+void cpuidle_remove_class_sysfs(struct sysdev_class *cls)
+{
+	sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group);
+}
+
+struct cpuidle_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cpuidle_device *, char *);
+	ssize_t (*store)(struct cpuidle_device *, const char *, size_t count);
+};
+
+#define define_one_ro(_name, show) \
+	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
+#define define_one_rw(_name, show, store) \
+	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
+
+#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj)
+#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
+static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf)
+{
+	int ret = -EIO;
+	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+
+	if (cattr->show) {
+		mutex_lock(&cpuidle_lock);
+		ret = cattr->show(dev, buf);
+		mutex_unlock(&cpuidle_lock);
+	}
+	return ret;
+}
+
+static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
+		     const char * buf, size_t count)
+{
+	int ret = -EIO;
+	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+
+	if (cattr->store) {
+		mutex_lock(&cpuidle_lock);
+		ret = cattr->store(dev, buf, count);
+		mutex_unlock(&cpuidle_lock);
+	}
+	return ret;
+}
+
+static struct sysfs_ops cpuidle_sysfs_ops = {
+	.show = cpuidle_show,
+	.store = cpuidle_store,
+};
+
+static void cpuidle_sysfs_release(struct kobject *kobj)
+{
+	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+
+	complete(&dev->kobj_unregister);
+}
+
+static struct kobj_type ktype_cpuidle = {
+	.sysfs_ops = &cpuidle_sysfs_ops,
+	.release = cpuidle_sysfs_release,
+};
+
+struct cpuidle_state_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct cpuidle_state *, char *);
+	ssize_t (*store)(struct cpuidle_state *, const char *, size_t);
+};
+
+#define define_one_state_ro(_name, show) \
+static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
+
+#define define_show_state_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
+{ \
+	return sprintf(buf, "%u\n", state->_name);\
+}
+
+static ssize_t show_state_name(struct cpuidle_state *state, char *buf)
+{
+	return sprintf(buf, "%s\n", state->name);
+}
+
+define_show_state_function(exit_latency)
+define_show_state_function(power_usage)
+define_show_state_function(usage)
+define_show_state_function(time)
+define_one_state_ro(name, show_state_name);
+define_one_state_ro(latency, show_state_exit_latency);
+define_one_state_ro(power, show_state_power_usage);
+define_one_state_ro(usage, show_state_usage);
+define_one_state_ro(time, show_state_time);
+
+static struct attribute *cpuidle_state_default_attrs[] = {
+	&attr_name.attr,
+	&attr_latency.attr,
+	&attr_power.attr,
+	&attr_usage.attr,
+	&attr_time.attr,
+	NULL
+};
+
+#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
+#define kobj_to_state(k) (kobj_to_state_obj(k)->state)
+#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
+static ssize_t cpuidle_state_show(struct kobject * kobj,
+	struct attribute * attr ,char * buf)
+{
+	int ret = -EIO;
+	struct cpuidle_state *state = kobj_to_state(kobj);
+	struct cpuidle_state_attr * cattr = attr_to_stateattr(attr);
+
+	if (cattr->show)
+		ret = cattr->show(state, buf);
+
+	return ret;
+}
+
+static struct sysfs_ops cpuidle_state_sysfs_ops = {
+	.show = cpuidle_state_show,
+};
+
+static void cpuidle_state_sysfs_release(struct kobject *kobj)
+{
+	struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj);
+
+	complete(&state_obj->kobj_unregister);
+}
+
+static struct kobj_type ktype_state_cpuidle = {
+	.sysfs_ops = &cpuidle_state_sysfs_ops,
+	.default_attrs = cpuidle_state_default_attrs,
+	.release = cpuidle_state_sysfs_release,
+};
+
+static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i)
+{
+	kobject_unregister(&device->kobjs[i]->kobj);
+	wait_for_completion(&device->kobjs[i]->kobj_unregister);
+	kfree(device->kobjs[i]);
+	device->kobjs[i] = NULL;
+}
+
+/**
+ * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes
+ * @device: the target device
+ */
+int cpuidle_add_state_sysfs(struct cpuidle_device *device)
+{
+	int i, ret = -ENOMEM;
+	struct cpuidle_state_kobj *kobj;
+
+	/* state statistics */
+	for (i = 0; i < device->state_count; i++) {
+		kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
+		if (!kobj)
+			goto error_state;
+		kobj->state = &device->states[i];
+		init_completion(&kobj->kobj_unregister);
+
+		kobj->kobj.parent = &device->kobj;
+		kobj->kobj.ktype = &ktype_state_cpuidle;
+		kobject_set_name(&kobj->kobj, "state%d", i);
+		ret = kobject_register(&kobj->kobj);
+		if (ret) {
+			kfree(kobj);
+			goto error_state;
+		}
+		device->kobjs[i] = kobj;
+	}
+
+	return 0;
+
+error_state:
+	for (i = i - 1; i >= 0; i--)
+		cpuidle_free_state_kobj(device, i);
+	return ret;
+}
+
+/**
+ * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes
+ * @device: the target device
+ */
+void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
+{
+	int i;
+
+	for (i = 0; i < device->state_count; i++)
+		cpuidle_free_state_kobj(device, i);
+}
+
+/**
+ * cpuidle_add_sysfs - creates a sysfs instance for the target device
+ * @sysdev: the target device
+ */
+int cpuidle_add_sysfs(struct sys_device *sysdev)
+{
+	int cpu = sysdev->id;
+	struct cpuidle_device *dev;
+
+	dev = per_cpu(cpuidle_devices, cpu);
+	dev->kobj.parent = &sysdev->kobj;
+	dev->kobj.ktype = &ktype_cpuidle;
+	kobject_set_name(&dev->kobj, "%s", "cpuidle");
+	return kobject_register(&dev->kobj);
+}
+
+/**
+ * cpuidle_remove_sysfs - deletes a sysfs instance on the target device
+ * @sysdev: the target device
+ */
+void cpuidle_remove_sysfs(struct sys_device *sysdev)
+{
+	int cpu = sysdev->id;
+	struct cpuidle_device *dev;
+
+	dev = per_cpu(cpuidle_devices, cpu);
+	kobject_unregister(&dev->kobj);
+}
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index e80af67664c..2d23e304f5e 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -94,8 +94,6 @@ extern int edac_debug_level;
 
 #endif				/* !CONFIG_EDAC_DEBUG */
 
-#define BIT(x) (1 << (x))
-
 #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
 	PCI_DEVICE_ID_ ## vend ## _ ## dev
 
diff --git a/drivers/edac/pasemi_edac.c b/drivers/edac/pasemi_edac.c
index e66cdd42a39..9007d067722 100644
--- a/drivers/edac/pasemi_edac.c
+++ b/drivers/edac/pasemi_edac.c
@@ -270,6 +270,7 @@ static void __devexit pasemi_edac_remove(struct pci_dev *pdev)
 
 static const struct pci_device_id pasemi_edac_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_PASEMI, 0xa00a) },
+	{ }
 };
 
 MODULE_DEVICE_TABLE(pci, pasemi_edac_pci_tbl);
diff --git a/drivers/firmware/dcdbas.h b/drivers/firmware/dcdbas.h
index dcdba0f1b32..87bc3417de2 100644
--- a/drivers/firmware/dcdbas.h
+++ b/drivers/firmware/dcdbas.h
@@ -17,7 +17,6 @@
 #define _DCDBAS_H_
 
 #include <linux/device.h>
-#include <linux/input.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
 
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index a702e2f6da7..1ca6f4635ee 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -113,13 +113,13 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t
 
 	if (count > HID_MIN_BUFFER_SIZE) {
 		printk(KERN_WARNING "hidraw: pid %d passed too large report\n",
-				current->pid);
+				task_pid_nr(current));
 		return -EINVAL;
 	}
 
 	if (count < 2) {
 		printk(KERN_WARNING "hidraw: pid %d passed too short report\n",
-				current->pid);
+				task_pid_nr(current));
 		return -EINVAL;
 	}
 
diff --git a/drivers/hid/usbhid/usbkbd.c b/drivers/hid/usbhid/usbkbd.c
index b76b02f7b52..775a1ef28a2 100644
--- a/drivers/hid/usbhid/usbkbd.c
+++ b/drivers/hid/usbhid/usbkbd.c
@@ -274,8 +274,11 @@ static int usb_kbd_probe(struct usb_interface *iface,
 
 	input_set_drvdata(input_dev, kbd);
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_REP);
-	input_dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL) | BIT(LED_COMPOSE) | BIT(LED_KANA);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
+		BIT_MASK(EV_REP);
+	input_dev->ledbit[0] = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
+		BIT_MASK(LED_SCROLLL) | BIT_MASK(LED_COMPOSE) |
+		BIT_MASK(LED_KANA);
 
 	for (i = 0; i < 255; i++)
 		set_bit(usb_kbd_keycode[i], input_dev->keybit);
diff --git a/drivers/hid/usbhid/usbmouse.c b/drivers/hid/usbhid/usbmouse.c
index 5345c73bcf6..f8ad6910d3d 100644
--- a/drivers/hid/usbhid/usbmouse.c
+++ b/drivers/hid/usbhid/usbmouse.c
@@ -173,11 +173,13 @@ static int usb_mouse_probe(struct usb_interface *intf, const struct usb_device_i
 	usb_to_input_id(dev, &input_dev->id);
 	input_dev->dev.parent = &intf->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-	input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-	input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-	input_dev->relbit[0] |= BIT(REL_WHEEL);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) |
+		BIT_MASK(BTN_EXTRA);
+	input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
 
 	input_set_drvdata(input_dev, mouse);
 
diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c
index 4879125b4cd..1001d2e122a 100644
--- a/drivers/hwmon/applesmc.c
+++ b/drivers/hwmon/applesmc.c
@@ -1099,7 +1099,7 @@ static int applesmc_create_accelerometer(void)
 	idev->name = "applesmc";
 	idev->id.bustype = BUS_HOST;
 	idev->dev.parent = &pdev->dev;
-	idev->evbit[0] = BIT(EV_ABS);
+	idev->evbit[0] = BIT_MASK(EV_ABS);
 	input_set_abs_params(idev, ABS_X,
 			-256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT);
 	input_set_abs_params(idev, ABS_Y,
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 6f66551d9e5..5c82ec7f8bb 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -150,7 +150,7 @@ static struct coretemp_data *coretemp_update_device(struct device *dev)
 static int __devinit coretemp_probe(struct platform_device *pdev)
 {
 	struct coretemp_data *data;
-	struct cpuinfo_x86 *c = &(cpu_data)[pdev->id];
+	struct cpuinfo_x86 *c = &cpu_data(pdev->id);
 	int err;
 	u32 eax, edx;
 
@@ -359,7 +359,7 @@ static int __init coretemp_init(void)
 	struct pdev_entry *p, *n;
 
 	/* quick check if we run Intel */
-	if (cpu_data[0].x86_vendor != X86_VENDOR_INTEL)
+	if (cpu_data(0).x86_vendor != X86_VENDOR_INTEL)
 		goto exit;
 
 	err = platform_driver_register(&coretemp_driver);
@@ -367,7 +367,7 @@ static int __init coretemp_init(void)
 		goto exit;
 
 	for_each_online_cpu(i) {
-		struct cpuinfo_x86 *c = &(cpu_data)[i];
+		struct cpuinfo_x86 *c = &cpu_data(i);
 
 		/* check if family 6, models e, f, 16 */
 		if ((c->cpuid_level < 0) || (c->x86 != 0x6) ||
diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c
index 8a7ae03aeee..bab5fd2e4df 100644
--- a/drivers/hwmon/hdaps.c
+++ b/drivers/hwmon/hdaps.c
@@ -574,7 +574,7 @@ static int __init hdaps_init(void)
 	idev = hdaps_idev->input;
 	idev->name = "hdaps";
 	idev->dev.parent = &pdev->dev;
-	idev->evbit[0] = BIT(EV_ABS);
+	idev->evbit[0] = BIT_MASK(EV_ABS);
 	input_set_abs_params(idev, ABS_X,
 			-256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT);
 	input_set_abs_params(idev, ABS_Y,
diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c
index f17e771e42f..3330667280b 100644
--- a/drivers/hwmon/hwmon-vid.c
+++ b/drivers/hwmon/hwmon-vid.c
@@ -200,7 +200,7 @@ static u8 find_vrm(u8 eff_family, u8 eff_model, u8 eff_stepping, u8 vendor)
 
 u8 vid_which_vrm(void)
 {
-	struct cpuinfo_x86 *c = cpu_data;
+	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 eax;
 	u8 eff_family, eff_model, eff_stepping, vrm_ret;
 
diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c
index 00fad11733a..6426a61f8d4 100644
--- a/drivers/i2c/busses/i2c-pxa.c
+++ b/drivers/i2c/busses/i2c-pxa.c
@@ -85,7 +85,7 @@ struct bits {
 	const char *set;
 	const char *unset;
 };
-#define BIT(m, s, u)	{ .mask = m, .set = s, .unset = u }
+#define PXA_BIT(m, s, u)	{ .mask = m, .set = s, .unset = u }
 
 static inline void
 decode_bits(const char *prefix, const struct bits *bits, int num, u32 val)
@@ -100,17 +100,17 @@ decode_bits(const char *prefix, const struct bits *bits, int num, u32 val)
 }
 
 static const struct bits isr_bits[] = {
-	BIT(ISR_RWM,	"RX",		"TX"),
-	BIT(ISR_ACKNAK,	"NAK",		"ACK"),
-	BIT(ISR_UB,	"Bsy",		"Rdy"),
-	BIT(ISR_IBB,	"BusBsy",	"BusRdy"),
-	BIT(ISR_SSD,	"SlaveStop",	NULL),
-	BIT(ISR_ALD,	"ALD",		NULL),
-	BIT(ISR_ITE,	"TxEmpty",	NULL),
-	BIT(ISR_IRF,	"RxFull",	NULL),
-	BIT(ISR_GCAD,	"GenCall",	NULL),
-	BIT(ISR_SAD,	"SlaveAddr",	NULL),
-	BIT(ISR_BED,	"BusErr",	NULL),
+	PXA_BIT(ISR_RWM,	"RX",		"TX"),
+	PXA_BIT(ISR_ACKNAK,	"NAK",		"ACK"),
+	PXA_BIT(ISR_UB,		"Bsy",		"Rdy"),
+	PXA_BIT(ISR_IBB,	"BusBsy",	"BusRdy"),
+	PXA_BIT(ISR_SSD,	"SlaveStop",	NULL),
+	PXA_BIT(ISR_ALD,	"ALD",		NULL),
+	PXA_BIT(ISR_ITE,	"TxEmpty",	NULL),
+	PXA_BIT(ISR_IRF,	"RxFull",	NULL),
+	PXA_BIT(ISR_GCAD,	"GenCall",	NULL),
+	PXA_BIT(ISR_SAD,	"SlaveAddr",	NULL),
+	PXA_BIT(ISR_BED,	"BusErr",	NULL),
 };
 
 static void decode_ISR(unsigned int val)
@@ -120,21 +120,21 @@ static void decode_ISR(unsigned int val)
 }
 
 static const struct bits icr_bits[] = {
-	BIT(ICR_START,  "START",	NULL),
-	BIT(ICR_STOP,   "STOP",		NULL),
-	BIT(ICR_ACKNAK, "ACKNAK",	NULL),
-	BIT(ICR_TB,     "TB",		NULL),
-	BIT(ICR_MA,     "MA",		NULL),
-	BIT(ICR_SCLE,   "SCLE",		"scle"),
-	BIT(ICR_IUE,    "IUE",		"iue"),
-	BIT(ICR_GCD,    "GCD",		NULL),
-	BIT(ICR_ITEIE,  "ITEIE",	NULL),
-	BIT(ICR_IRFIE,  "IRFIE",	NULL),
-	BIT(ICR_BEIE,   "BEIE",		NULL),
-	BIT(ICR_SSDIE,  "SSDIE",	NULL),
-	BIT(ICR_ALDIE,  "ALDIE",	NULL),
-	BIT(ICR_SADIE,  "SADIE",	NULL),
-	BIT(ICR_UR,     "UR",		"ur"),
+	PXA_BIT(ICR_START,  "START",	NULL),
+	PXA_BIT(ICR_STOP,   "STOP",	NULL),
+	PXA_BIT(ICR_ACKNAK, "ACKNAK",	NULL),
+	PXA_BIT(ICR_TB,     "TB",	NULL),
+	PXA_BIT(ICR_MA,     "MA",	NULL),
+	PXA_BIT(ICR_SCLE,   "SCLE",	"scle"),
+	PXA_BIT(ICR_IUE,    "IUE",	"iue"),
+	PXA_BIT(ICR_GCD,    "GCD",	NULL),
+	PXA_BIT(ICR_ITEIE,  "ITEIE",	NULL),
+	PXA_BIT(ICR_IRFIE,  "IRFIE",	NULL),
+	PXA_BIT(ICR_BEIE,   "BEIE",	NULL),
+	PXA_BIT(ICR_SSDIE,  "SSDIE",	NULL),
+	PXA_BIT(ICR_ALDIE,  "ALDIE",	NULL),
+	PXA_BIT(ICR_SADIE,  "SADIE",	NULL),
+	PXA_BIT(ICR_UR,     "UR",		"ur"),
 };
 
 static void decode_ICR(unsigned int val)
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 6d9fd92763f..6eaece96524 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -1056,6 +1056,9 @@ endif
 config BLK_DEV_IDEDMA
 	def_bool BLK_DEV_IDEDMA_PCI || BLK_DEV_IDEDMA_PMAC || BLK_DEV_IDEDMA_ICS || BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
 
+config IDE_ARCH_OBSOLETE_INIT
+	def_bool ALPHA || (ARM && !ARCH_L7200) || BLACKFIN || X86 || IA64 || M32R || MIPS || PARISC || PPC || (SUPERH64 && BLK_DEV_IDEPCI) || SPARC
+
 endif
 
 config BLK_DEV_HD_ONLY
diff --git a/drivers/ide/arm/bast-ide.c b/drivers/ide/arm/bast-ide.c
index f7449d04114..48db6167bb9 100644
--- a/drivers/ide/arm/bast-ide.c
+++ b/drivers/ide/arm/bast-ide.c
@@ -45,7 +45,7 @@ bastide_register(unsigned int base, unsigned int aux, int irq,
 	hw.io_ports[IDE_CONTROL_OFFSET] = aux + (6 * 0x20);
 	hw.irq = irq;
 
-	ide_register_hw(&hw, 0, hwif);
+	ide_register_hw(&hw, NULL, 0, hwif);
 
 	return 0;
 }
diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c
index 3af33fbf1f8..410a0d13e35 100644
--- a/drivers/ide/arm/icside.c
+++ b/drivers/ide/arm/icside.c
@@ -316,27 +316,29 @@ static int icside_dma_end(ide_drive_t *drive)
 
 	drive->waiting_for_dma = 0;
 
-	disable_dma(hwif->hw.dma);
+	disable_dma(state->dev->dma);
 
 	/* Teardown mappings after DMA has completed. */
 	dma_unmap_sg(state->dev, hwif->sg_table, hwif->sg_nents,
 		     hwif->sg_dma_direction);
 
-	return get_dma_residue(hwif->hw.dma) != 0;
+	return get_dma_residue(state->dev->dma) != 0;
 }
 
 static void icside_dma_start(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = HWIF(drive);
+	struct icside_state *state = hwif->hwif_data;
 
 	/* We can not enable DMA on both channels simultaneously. */
-	BUG_ON(dma_channel_active(hwif->hw.dma));
-	enable_dma(hwif->hw.dma);
+	BUG_ON(dma_channel_active(state->dev->dma));
+	enable_dma(state->dev->dma);
 }
 
 static int icside_dma_setup(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = HWIF(drive);
+	struct icside_state *state = hwif->hwif_data;
 	struct request *rq = hwif->hwgroup->rq;
 	unsigned int dma_mode;
 
@@ -348,7 +350,7 @@ static int icside_dma_setup(ide_drive_t *drive)
 	/*
 	 * We can not enable DMA on both channels.
 	 */
-	BUG_ON(dma_channel_active(hwif->hw.dma));
+	BUG_ON(dma_channel_active(state->dev->dma));
 
 	icside_build_sglist(drive, rq);
 
@@ -365,14 +367,14 @@ static int icside_dma_setup(ide_drive_t *drive)
 	/*
 	 * Select the correct timing for this drive.
 	 */
-	set_dma_speed(hwif->hw.dma, drive->drive_data);
+	set_dma_speed(state->dev->dma, drive->drive_data);
 
 	/*
 	 * Tell the DMA engine about the SG table and
 	 * data direction.
 	 */
-	set_dma_sg(hwif->hw.dma, hwif->sg_table, hwif->sg_nents);
-	set_dma_mode(hwif->hw.dma, dma_mode);
+	set_dma_sg(state->dev->dma, hwif->sg_table, hwif->sg_nents);
+	set_dma_mode(state->dev->dma, dma_mode);
 
 	drive->waiting_for_dma = 1;
 
@@ -438,40 +440,16 @@ static void icside_dma_init(ide_hwif_t *hwif)
 #define icside_dma_init(hwif)	(0)
 #endif
 
-static ide_hwif_t *icside_find_hwif(unsigned long dataport)
-{
-	ide_hwif_t *hwif;
-	int index;
-
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = &ide_hwifs[index];
-		if (hwif->io_ports[IDE_DATA_OFFSET] == dataport)
-			goto found;
-	}
-
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = &ide_hwifs[index];
-		if (!hwif->io_ports[IDE_DATA_OFFSET])
-			goto found;
-	}
-
-	hwif = NULL;
-found:
-	return hwif;
-}
-
 static ide_hwif_t *
 icside_setup(void __iomem *base, struct cardinfo *info, struct expansion_card *ec)
 {
 	unsigned long port = (unsigned long)base + info->dataoffset;
 	ide_hwif_t *hwif;
 
-	hwif = icside_find_hwif(port);
+	hwif = ide_find_port(port);
 	if (hwif) {
 		int i;
 
-		memset(&hwif->hw, 0, sizeof(hw_regs_t));
-
 		/*
 		 * Ensure we're using MMIO
 		 */
@@ -479,13 +457,10 @@ icside_setup(void __iomem *base, struct cardinfo *info, struct expansion_card *e
 		hwif->mmio = 1;
 
 		for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
-			hwif->hw.io_ports[i] = port;
 			hwif->io_ports[i] = port;
 			port += 1 << info->stepping;
 		}
-		hwif->hw.io_ports[IDE_CONTROL_OFFSET] = (unsigned long)base + info->ctrloffset;
 		hwif->io_ports[IDE_CONTROL_OFFSET] = (unsigned long)base + info->ctrloffset;
-		hwif->hw.irq  = ec->irq;
 		hwif->irq     = ec->irq;
 		hwif->noprobe = 0;
 		hwif->chipset = ide_acorn;
@@ -500,6 +475,7 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 {
 	ide_hwif_t *hwif;
 	void __iomem *base;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
 	if (!base)
@@ -523,9 +499,9 @@ icside_register_v5(struct icside_state *state, struct expansion_card *ec)
 
 	state->hwif[0] = hwif;
 
-	probe_hwif_init(hwif);
+	idx[0] = hwif->index;
 
-	ide_proc_register_port(hwif);
+	ide_device_add(idx);
 
 	return 0;
 }
@@ -537,6 +513,7 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	void __iomem *ioc_base, *easi_base;
 	unsigned int sel = 0;
 	int ret;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
 	if (!ioc_base) {
@@ -592,7 +569,6 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	hwif->serialized  = 1;
 	hwif->config_data = (unsigned long)ioc_base;
 	hwif->select_data = sel;
-	hwif->hw.dma      = ec->dma;
 
 	mate->maskproc    = icside_maskproc;
 	mate->channel     = 1;
@@ -601,18 +577,16 @@ icside_register_v6(struct icside_state *state, struct expansion_card *ec)
 	mate->serialized  = 1;
 	mate->config_data = (unsigned long)ioc_base;
 	mate->select_data = sel | 1;
-	mate->hw.dma      = ec->dma;
 
 	if (ec->dma != NO_DMA && !request_dma(ec->dma, hwif->name)) {
 		icside_dma_init(hwif);
 		icside_dma_init(mate);
 	}
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
+	idx[0] = hwif->index;
+	idx[1] = mate->index;
 
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 
diff --git a/drivers/ide/arm/ide_arm.c b/drivers/ide/arm/ide_arm.c
index bce2bec8141..8957cbadf5c 100644
--- a/drivers/ide/arm/ide_arm.c
+++ b/drivers/ide/arm/ide_arm.c
@@ -31,5 +31,5 @@ void __init ide_arm_init(void)
 	memset(&hw, 0, sizeof(hw));
 	ide_std_init_ports(&hw, IDE_ARM_IO, IDE_ARM_IO + 0x206);
 	hw.irq = IDE_ARM_IRQ;
-	ide_register_hw(&hw, 1, NULL);
+	ide_register_hw(&hw, NULL, 1, NULL);
 }
diff --git a/drivers/ide/arm/rapide.c b/drivers/ide/arm/rapide.c
index 83811af1161..0775a3afef4 100644
--- a/drivers/ide/arm/rapide.c
+++ b/drivers/ide/arm/rapide.c
@@ -13,42 +13,25 @@
 
 #include <asm/ecard.h>
 
-/*
- * Something like this really should be in generic code, but isn't.
- */
 static ide_hwif_t *
 rapide_locate_hwif(void __iomem *base, void __iomem *ctrl, unsigned int sz, int irq)
 {
 	unsigned long port = (unsigned long)base;
-	ide_hwif_t *hwif;
-	int index, i;
-
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = ide_hwifs + index;
-		if (hwif->io_ports[IDE_DATA_OFFSET] == port)
-			goto found;
-	}
-
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = ide_hwifs + index;
-		if (hwif->io_ports[IDE_DATA_OFFSET] == 0)
-			goto found;
-	}
+	ide_hwif_t *hwif = ide_find_port(port);
+	int i;
 
-	return NULL;
+	if (hwif == NULL)
+		goto out;
 
- found:
 	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
-		hwif->hw.io_ports[i] = port;
 		hwif->io_ports[i] = port;
 		port += sz;
 	}
-	hwif->hw.io_ports[IDE_CONTROL_OFFSET] = (unsigned long)ctrl;
 	hwif->io_ports[IDE_CONTROL_OFFSET] = (unsigned long)ctrl;
-	hwif->hw.irq = hwif->irq = irq;
+	hwif->irq = irq;
 	hwif->mmio = 1;
 	default_hwif_mmiops(hwif);
-
+out:
 	return hwif;
 }
 
@@ -58,6 +41,7 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 	ide_hwif_t *hwif;
 	void __iomem *base;
 	int ret;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	ret = ecard_request_resources(ec);
 	if (ret)
@@ -74,8 +58,11 @@ rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
 		hwif->hwif_data = base;
 		hwif->gendev.parent = &ec->dev;
 		hwif->noprobe = 0;
-		probe_hwif_init(hwif);
-		ide_proc_register_port(hwif);
+
+		idx[0] = hwif->index;
+
+		ide_device_add(idx);
+
 		ecard_set_drvdata(ec, hwif);
 		goto out;
 	}
diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c
index 9a96a10ba9d..ff20377b4c8 100644
--- a/drivers/ide/cris/ide-cris.c
+++ b/drivers/ide/cris/ide-cris.c
@@ -782,7 +782,7 @@ init_e100_ide (void)
 		                ide_offsets,
 		                0, 0, cris_ide_ack_intr,
 		                ide_default_irq(0));
-		ide_register_hw(&hw, 1, &hwif);
+		ide_register_hw(&hw, NULL, 1, &hwif);
 		hwif->mmio = 1;
 		hwif->chipset = ide_etrax100;
 		hwif->set_pio_mode = &cris_set_pio_mode;
diff --git a/drivers/ide/h8300/ide-h8300.c b/drivers/ide/h8300/ide-h8300.c
index 6d26ad7360d..4a49b5c59ac 100644
--- a/drivers/ide/h8300/ide-h8300.c
+++ b/drivers/ide/h8300/ide-h8300.c
@@ -68,7 +68,6 @@ static inline void hw_setup(hw_regs_t *hw)
 		hw->io_ports[i] = CONFIG_H8300_IDE_BASE + H8300_IDE_GAP*i;
 	hw->io_ports[IDE_CONTROL_OFFSET] = CONFIG_H8300_IDE_ALT;
 	hw->irq = EXT_IRQ0 + CONFIG_H8300_IDE_IRQ;
-	hw->dma = NO_DMA;
 	hw->chipset = ide_generic;
 }
 
@@ -101,7 +100,7 @@ void __init h8300_ide_init(void)
 	hw_setup(&hw);
 
 	/* register if */
-	idx = ide_register_hw(&hw, 1, &hwif);
+	idx = ide_register_hw(&hw, NULL, 1, &hwif);
 	if (idx == -1) {
 		printk(KERN_ERR "ide-h8300: IDE I/F register failed\n");
 		return;
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 1d5f6823101..89df48fdc69 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -350,7 +350,7 @@ static int taskfile_load_raw(ide_drive_t *drive,
 
 	memset(&args, 0, sizeof(ide_task_t));
 	args.command_type = IDE_DRIVE_TASK_NO_DATA;
-	args.data_phase   = TASKFILE_IN;
+	args.data_phase   = TASKFILE_NO_DATA;
 	args.handler      = &task_no_data_intr;
 
 	/* convert gtf to IDE Taskfile */
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 2722d9165b6..00123d99527 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -593,28 +593,12 @@ static int smart_enable(ide_drive_t *drive)
 	return ide_raw_taskfile(drive, &args, NULL);
 }
 
-static int get_smart_values(ide_drive_t *drive, u8 *buf)
+static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd)
 {
 	ide_task_t args;
 
 	memset(&args, 0, sizeof(ide_task_t));
-	args.tfRegister[IDE_FEATURE_OFFSET]	= SMART_READ_VALUES;
-	args.tfRegister[IDE_NSECTOR_OFFSET]	= 0x01;
-	args.tfRegister[IDE_LCYL_OFFSET]	= SMART_LCYL_PASS;
-	args.tfRegister[IDE_HCYL_OFFSET]	= SMART_HCYL_PASS;
-	args.tfRegister[IDE_COMMAND_OFFSET]	= WIN_SMART;
-	args.command_type			= IDE_DRIVE_TASK_IN;
-	args.data_phase				= TASKFILE_IN;
-	args.handler				= &task_in_intr;
-	(void) smart_enable(drive);
-	return ide_raw_taskfile(drive, &args, buf);
-}
-
-static int get_smart_thresholds(ide_drive_t *drive, u8 *buf)
-{
-	ide_task_t args;
-	memset(&args, 0, sizeof(ide_task_t));
-	args.tfRegister[IDE_FEATURE_OFFSET]	= SMART_READ_THRESHOLDS;
+	args.tfRegister[IDE_FEATURE_OFFSET]	= sub_cmd;
 	args.tfRegister[IDE_NSECTOR_OFFSET]	= 0x01;
 	args.tfRegister[IDE_LCYL_OFFSET]	= SMART_LCYL_PASS;
 	args.tfRegister[IDE_HCYL_OFFSET]	= SMART_HCYL_PASS;
@@ -656,7 +640,7 @@ static int proc_idedisk_read_smart_thresholds
 	ide_drive_t	*drive = (ide_drive_t *)data;
 	int		len = 0, i = 0;
 
-	if (!get_smart_thresholds(drive, page)) {
+	if (get_smart_data(drive, page, SMART_READ_THRESHOLDS) == 0) {
 		unsigned short *val = (unsigned short *) page;
 		char *out = ((char *)val) + (SECTOR_WORDS * 4);
 		page = out;
@@ -675,7 +659,7 @@ static int proc_idedisk_read_smart_values
 	ide_drive_t	*drive = (ide_drive_t *)data;
 	int		len = 0, i = 0;
 
-	if (!get_smart_values(drive, page)) {
+	if (get_smart_data(drive, page, SMART_READ_VALUES) == 0) {
 		unsigned short *val = (unsigned short *) page;
 		char *out = ((char *)val) + (SECTOR_WORDS * 4);
 		page = out;
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 80b4f17f394..428f7a8a00b 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -901,10 +901,7 @@ void ide_dma_timeout (ide_drive_t *drive)
 
 EXPORT_SYMBOL(ide_dma_timeout);
 
-/*
- * Needed for allowing full modular support of ide-driver
- */
-static int ide_release_dma_engine(ide_hwif_t *hwif)
+static void ide_release_dma_engine(ide_hwif_t *hwif)
 {
 	if (hwif->dmatable_cpu) {
 		pci_free_consistent(hwif->pci_dev,
@@ -913,7 +910,6 @@ static int ide_release_dma_engine(ide_hwif_t *hwif)
 				    hwif->dmatable_dma);
 		hwif->dmatable_cpu = NULL;
 	}
-	return 1;
 }
 
 static int ide_release_iomio_dma(ide_hwif_t *hwif)
@@ -956,12 +952,6 @@ static int ide_mapped_mmio_dma(ide_hwif_t *hwif, unsigned long base, unsigned in
 {
 	printk(KERN_INFO "    %s: MMIO-DMA ", hwif->name);
 
- 	hwif->dma_base = base;
-
-	if(hwif->mate)
-		hwif->dma_master = (hwif->channel) ? hwif->mate->dma_base : base;
-	else
-		hwif->dma_master = base;
 	return 0;
 }
 
@@ -975,8 +965,6 @@ static int ide_iomio_dma(ide_hwif_t *hwif, unsigned long base, unsigned int port
 		return 1;
 	}
 
-	hwif->dma_base = base;
-
 	if (hwif->cds->extra) {
 		hwif->extra_base = base + (hwif->channel ? 8 : 16);
 
@@ -991,10 +979,6 @@ static int ide_iomio_dma(ide_hwif_t *hwif, unsigned long base, unsigned int port
 		}
 	}
 
-	if(hwif->mate)
-		hwif->dma_master = (hwif->channel) ? hwif->mate->dma_base:base;
-	else
-		hwif->dma_master = base;
 	return 0;
 }
 
@@ -1006,12 +990,9 @@ static int ide_dma_iobase(ide_hwif_t *hwif, unsigned long base, unsigned int por
 	return ide_iomio_dma(hwif, base, ports);
 }
 
-/*
- * This can be called for a dynamically installed interface. Don't __init it
- */
-void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_ports)
+void ide_setup_dma(ide_hwif_t *hwif, unsigned long base, unsigned num_ports)
 {
-	if (ide_dma_iobase(hwif, dma_base, num_ports))
+	if (ide_dma_iobase(hwif, base, num_ports))
 		return;
 
 	if (ide_allocate_dma_engine(hwif)) {
@@ -1019,6 +1000,13 @@ void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_p
 		return;
 	}
 
+	hwif->dma_base = base;
+
+	if (hwif->mate)
+		hwif->dma_master = hwif->channel ? hwif->mate->dma_base : base;
+	else
+		hwif->dma_master = base;
+
 	if (!(hwif->dma_command))
 		hwif->dma_command	= hwif->dma_base;
 	if (!(hwif->dma_vendor1))
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 5c8b008676f..c89f0d3058e 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -47,15 +47,15 @@
 #include <linux/device.h>
 #include <linux/kmod.h>
 #include <linux/scatterlist.h>
+#include <linux/bitops.h>
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/bitops.h>
 
 static int __ide_end_request(ide_drive_t *drive, struct request *rq,
-			     int uptodate, unsigned int nr_bytes)
+			     int uptodate, unsigned int nr_bytes, int dequeue)
 {
 	int ret = 1;
 
@@ -80,9 +80,11 @@ static int __ide_end_request(ide_drive_t *drive, struct request *rq,
 
 	if (!end_that_request_chunk(rq, uptodate, nr_bytes)) {
 		add_disk_randomness(rq->rq_disk);
-		if (!list_empty(&rq->queuelist))
-			blkdev_dequeue_request(rq);
-		HWGROUP(drive)->rq = NULL;
+		if (dequeue) {
+			if (!list_empty(&rq->queuelist))
+				blkdev_dequeue_request(rq);
+			HWGROUP(drive)->rq = NULL;
+		}
 		end_that_request_last(rq, uptodate);
 		ret = 0;
 	}
@@ -122,7 +124,7 @@ int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
 			nr_bytes = rq->hard_cur_sectors << 9;
 	}
 
-	ret = __ide_end_request(drive, rq, uptodate, nr_bytes);
+	ret = __ide_end_request(drive, rq, uptodate, nr_bytes, 1);
 
 	spin_unlock_irqrestore(&ide_lock, flags);
 	return ret;
@@ -255,39 +257,13 @@ int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
 			     int uptodate, int nr_sectors)
 {
 	unsigned long flags;
-	int ret = 1;
+	int ret;
 
 	spin_lock_irqsave(&ide_lock, flags);
-
 	BUG_ON(!blk_rq_started(rq));
-
-	/*
-	 * if failfast is set on a request, override number of sectors and
-	 * complete the whole request right now
-	 */
-	if (blk_noretry_request(rq) && end_io_error(uptodate))
-		nr_sectors = rq->hard_nr_sectors;
-
-	if (!blk_fs_request(rq) && end_io_error(uptodate) && !rq->errors)
-		rq->errors = -EIO;
-
-	/*
-	 * decide whether to reenable DMA -- 3 is a random magic for now,
-	 * if we DMA timeout more than 3 times, just stay in PIO
-	 */
-	if (drive->state == DMA_PIO_RETRY && drive->retry_pio <= 3) {
-		drive->state = 0;
-		HWGROUP(drive)->hwif->ide_dma_on(drive);
-	}
-
-	if (!end_that_request_first(rq, uptodate, nr_sectors)) {
-		add_disk_randomness(rq->rq_disk);
-		if (blk_rq_tagged(rq))
-			blk_queue_end_tag(drive->queue, rq);
-		end_that_request_last(rq, uptodate);
-		ret = 0;
-	}
+	ret = __ide_end_request(drive, rq, uptodate, nr_sectors << 9, 0);
 	spin_unlock_irqrestore(&ide_lock, flags);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(ide_end_dequeued_request);
@@ -800,7 +776,20 @@ static ide_startstop_t do_special (ide_drive_t *drive)
 		s->b.set_tune = 0;
 
 		if (set_pio_mode_abuse(drive->hwif, req_pio)) {
-			if (hwif->set_pio_mode)
+
+			if (hwif->set_pio_mode == NULL)
+				return ide_stopped;
+
+			/*
+			 * take ide_lock for drive->[no_]unmask/[no_]io_32bit
+			 */
+			if (req_pio == 8 || req_pio == 9) {
+				unsigned long flags;
+
+				spin_lock_irqsave(&ide_lock, flags);
+				hwif->set_pio_mode(drive, req_pio);
+				spin_unlock_irqrestore(&ide_lock, flags);
+			} else
 				hwif->set_pio_mode(drive, req_pio);
 		} else {
 			int keep_dma = drive->using_dma;
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index d4d790f91f9..95168833d06 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -693,35 +693,16 @@ static u8 ide_auto_reduce_xfer (ide_drive_t *drive)
 }
 #endif /* CONFIG_BLK_DEV_IDEDMA */
 
-/*
- * Update the 
- */
-int ide_driveid_update (ide_drive_t *drive)
+int ide_driveid_update(ide_drive_t *drive)
 {
-	ide_hwif_t *hwif	= HWIF(drive);
+	ide_hwif_t *hwif = drive->hwif;
 	struct hd_driveid *id;
-#if 0
-	id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC);
-	if (!id)
-		return 0;
-
-	taskfile_lib_get_identify(drive, (char *)&id);
+	unsigned long timeout, flags;
 
-	ide_fix_driveid(id);
-	if (id) {
-		drive->id->dma_ultra = id->dma_ultra;
-		drive->id->dma_mword = id->dma_mword;
-		drive->id->dma_1word = id->dma_1word;
-		/* anything more ? */
-		kfree(id);
-	}
-	return 1;
-#else
 	/*
 	 * Re-read drive->id for possible DMA mode
 	 * change (copied from ide-probe.c)
 	 */
-	unsigned long timeout, flags;
 
 	SELECT_MASK(drive, 1);
 	if (IDE_CONTROL_REG)
@@ -763,7 +744,6 @@ int ide_driveid_update (ide_drive_t *drive)
 	}
 
 	return 1;
-#endif
 }
 
 int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
index 2b8009c50e9..e245521af7b 100644
--- a/drivers/ide/ide-pnp.c
+++ b/drivers/ide/ide-pnp.c
@@ -40,9 +40,8 @@ static int idepnp_probe(struct pnp_dev * dev, const struct pnp_device_id *dev_id
 	ide_std_init_ports(&hw, pnp_port_start(dev, 0),
 				pnp_port_start(dev, 1));
 	hw.irq = pnp_irq(dev, 0);
-	hw.dma = NO_DMA;
 
-	index = ide_register_hw(&hw, 1, &hwif);
+	index = ide_register_hw(&hw, NULL, 1, &hwif);
 
 	if (index != -1) {
 	    	printk(KERN_INFO "ide%d: generic PnP IDE interface\n", index);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index e294c7415c2..d5146c57e5b 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -717,7 +717,7 @@ EXPORT_SYMBOL_GPL(ide_undecoded_slave);
  * This routine only knows how to look for drive units 0 and 1
  * on an interface, so any setting of MAX_DRIVES > 2 won't work here.
  */
-static void probe_hwif(ide_hwif_t *hwif, void (*fixup)(ide_hwif_t *hwif))
+static void probe_hwif(ide_hwif_t *hwif)
 {
 	unsigned long flags;
 	unsigned int irqd;
@@ -819,8 +819,8 @@ static void probe_hwif(ide_hwif_t *hwif, void (*fixup)(ide_hwif_t *hwif))
 		return;
 	}
 
-	if (fixup)
-		fixup(hwif);
+	if (hwif->fixup)
+		hwif->fixup(hwif);
 
 	for (unit = 0; unit < MAX_DRIVES; ++unit) {
 		ide_drive_t *drive = &hwif->drives[unit];
@@ -859,10 +859,11 @@ static void probe_hwif(ide_hwif_t *hwif, void (*fixup)(ide_hwif_t *hwif))
 }
 
 static int hwif_init(ide_hwif_t *hwif);
+static void hwif_register_devices(ide_hwif_t *hwif);
 
-int probe_hwif_init_with_fixup(ide_hwif_t *hwif, void (*fixup)(ide_hwif_t *hwif))
+static int probe_hwif_init(ide_hwif_t *hwif)
 {
-	probe_hwif(hwif, fixup);
+	probe_hwif(hwif);
 
 	if (!hwif_init(hwif)) {
 		printk(KERN_INFO "%s: failed to initialize IDE interface\n",
@@ -870,34 +871,12 @@ int probe_hwif_init_with_fixup(ide_hwif_t *hwif, void (*fixup)(ide_hwif_t *hwif)
 		return -1;
 	}
 
-	if (hwif->present) {
-		u16 unit = 0;
-		int ret;
+	if (hwif->present)
+		hwif_register_devices(hwif);
 
-		for (unit = 0; unit < MAX_DRIVES; ++unit) {
-			ide_drive_t *drive = &hwif->drives[unit];
-			/* For now don't attach absent drives, we may
-			   want them on default or a new "empty" class
-			   for hotplug reprobing ? */
-			if (drive->present) {
-				ret = device_register(&drive->gendev);
-				if (ret < 0)
-					printk(KERN_WARNING "IDE: %s: "
-						"device_register error: %d\n",
-						__FUNCTION__, ret);
-			}
-		}
-	}
 	return 0;
 }
 
-int probe_hwif_init(ide_hwif_t *hwif)
-{
-	return probe_hwif_init_with_fixup(hwif, NULL);
-}
-
-EXPORT_SYMBOL(probe_hwif_init);
-
 #if MAX_HWIFS > 1
 /*
  * save_match() is used to simplify logic in init_irq() below.
@@ -1379,6 +1358,24 @@ out:
 	return 0;
 }
 
+static void hwif_register_devices(ide_hwif_t *hwif)
+{
+	unsigned int i;
+
+	for (i = 0; i < MAX_DRIVES; i++) {
+		ide_drive_t *drive = &hwif->drives[i];
+
+		if (drive->present) {
+			int ret = device_register(&drive->gendev);
+
+			if (ret < 0)
+				printk(KERN_WARNING "IDE: %s: "
+					"device_register error: %d\n",
+					__FUNCTION__, ret);
+		}
+	}
+}
+
 int ideprobe_init (void)
 {
 	unsigned int index;
@@ -1390,27 +1387,18 @@ int ideprobe_init (void)
 
 	for (index = 0; index < MAX_HWIFS; ++index)
 		if (probe[index])
-			probe_hwif(&ide_hwifs[index], NULL);
+			probe_hwif(&ide_hwifs[index]);
 	for (index = 0; index < MAX_HWIFS; ++index)
 		if (probe[index])
 			hwif_init(&ide_hwifs[index]);
 	for (index = 0; index < MAX_HWIFS; ++index) {
 		if (probe[index]) {
 			ide_hwif_t *hwif = &ide_hwifs[index];
-			int unit;
 			if (!hwif->present)
 				continue;
 			if (hwif->chipset == ide_unknown || hwif->chipset == ide_forced)
 				hwif->chipset = ide_generic;
-			for (unit = 0; unit < MAX_DRIVES; ++unit)
-				if (hwif->drives[unit].present) {
-					int ret = device_register(
-						&hwif->drives[unit].gendev);
-					if (ret < 0)
-						printk(KERN_WARNING "IDE: %s: "
-							"device_register error: %d\n",
-							__FUNCTION__, ret);
-				}
+			hwif_register_devices(hwif);
 		}
 	}
 	for (index = 0; index < MAX_HWIFS; ++index)
@@ -1420,3 +1408,22 @@ int ideprobe_init (void)
 }
 
 EXPORT_SYMBOL_GPL(ideprobe_init);
+
+int ide_device_add(u8 idx[4])
+{
+	int i, rc = 0;
+
+	for (i = 0; i < 4; i++) {
+		if (idx[i] != 0xff)
+			rc |= probe_hwif_init(&ide_hwifs[idx[i]]);
+	}
+
+	for (i = 0; i < 4; i++) {
+		if (idx[i] != 0xff)
+			ide_proc_register_port(&ide_hwifs[idx[i]]);
+	}
+
+	return rc;
+}
+
+EXPORT_SYMBOL_GPL(ide_device_add);
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index fc1d8ae6a80..a4007d30da5 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -804,8 +804,6 @@ void ide_proc_register_port(ide_hwif_t *hwif)
 	create_proc_ide_drives(hwif);
 }
 
-EXPORT_SYMBOL_GPL(ide_proc_register_port);
-
 #ifdef CONFIG_BLK_DEV_IDEPCI
 void ide_pci_create_host_proc(const char *name, get_info_t *get_info)
 {
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 1fa57947bca..e5a86a962b2 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -621,7 +621,6 @@ typedef struct os_dat_s {
  */
 #define USE_IOTRACE	0
 #if USE_IOTRACE
-#include <linux/io_trace.h>
 #define IO_IDETAPE_FIFO	500
 #endif
 
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 2a3c8d49834..73ef6bf5fbc 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -8,23 +8,6 @@
  *  Copyright (C) 2003-2004	Bartlomiej Zolnierkiewicz
  *
  *  The big the bad and the ugly.
- *
- *  Problems to be fixed because of BH interface or the lack therefore.
- *
- *  Fill me in stupid !!!
- *
- *  HOST:
- *	General refers to the Controller and Driver "pair".
- *  DATA HANDLER:
- *	Under the context of Linux it generally refers to an interrupt handler.
- *	However, it correctly describes the 'HOST'
- *  DATA BLOCK:
- *	The amount of data needed to be transfered as predefined in the
- *	setup of the device.
- *  STORAGE ATOMIC:
- *	The 'DATA BLOCK' associated to the 'DATA HANDLER', and can be as
- *	small as a single sector or as large as the entire command block
- *	request.
  */
 
 #include <linux/module.h>
@@ -695,9 +678,6 @@ int ide_wait_cmd (ide_drive_t *drive, u8 cmd, u8 nsect, u8 feature, u8 sectors,
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 }
 
-/*
- * FIXME : this needs to map into at taskfile. <andre@linux-ide.org>
- */
 int ide_cmd_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg)
 {
 	int err = 0;
@@ -761,9 +741,6 @@ static int ide_wait_cmd_task(ide_drive_t *drive, u8 *buf)
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 }
 
-/*
- * FIXME : this needs to map into at taskfile. <andre@linux-ide.org>
- */
 int ide_task_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg)
 {
 	void __user *p = (void __user *)arg;
@@ -860,9 +837,14 @@ ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task)
 		case TASKFILE_OUT_DMA:
 		case TASKFILE_IN_DMAQ:
 		case TASKFILE_IN_DMA:
-			hwif->dma_setup(drive);
-			hwif->dma_exec_cmd(drive, taskfile->command);
-			hwif->dma_start(drive);
+			if (!drive->using_dma)
+				break;
+
+			if (!hwif->dma_setup(drive)) {
+				hwif->dma_exec_cmd(drive, taskfile->command);
+				hwif->dma_start(drive);
+				return ide_started;
+			}
 			break;
 
 	        default:
@@ -876,7 +858,8 @@ ide_startstop_t flagged_taskfile (ide_drive_t *drive, ide_task_t *task)
 				return task->prehandler(drive, task->rq);
 			}
 			ide_execute_command(drive, taskfile->command, task->handler, WAIT_WORSTCASE, NULL);
+			return ide_started;
 	}
 
-	return ide_started;
+	return ide_stopped;
 }
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
index 961e6c89728..674a65c1a13 100644
--- a/drivers/ide/ide.c
+++ b/drivers/ide/ide.c
@@ -168,7 +168,6 @@ static void init_hwif_default(ide_hwif_t *hwif, unsigned int index)
 
 	ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, &hwif->irq);
 
-	memcpy(&hwif->hw, &hw, sizeof(hw));
 	memcpy(hwif->io_ports, hw.io_ports, sizeof(hw.io_ports));
 
 	hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
@@ -214,7 +213,7 @@ static void __init init_ide_data (void)
 		init_hwif_data(hwif, index);
 		init_hwif_default(hwif, index);
 #if !defined(CONFIG_PPC32) || !defined(CONFIG_PCI)
-		hwif->irq = hwif->hw.irq =
+		hwif->irq =
 			ide_init_default_irq(hwif->io_ports[IDE_DATA_OFFSET]);
 #endif
 	}
@@ -265,6 +264,30 @@ static int ide_system_bus_speed(void)
 	return system_bus_speed;
 }
 
+ide_hwif_t * ide_find_port(unsigned long base)
+{
+	ide_hwif_t *hwif;
+	int i;
+
+	for (i = 0; i < MAX_HWIFS; i++) {
+		hwif = &ide_hwifs[i];
+		if (hwif->io_ports[IDE_DATA_OFFSET] == base)
+			goto found;
+	}
+
+	for (i = 0; i < MAX_HWIFS; i++) {
+		hwif = &ide_hwifs[i];
+		if (hwif->io_ports[IDE_DATA_OFFSET] == 0)
+			goto found;
+	}
+
+	hwif = NULL;
+found:
+	return hwif;
+}
+
+EXPORT_SYMBOL_GPL(ide_find_port);
+
 static struct resource* hwif_request_region(ide_hwif_t *hwif,
 					    unsigned long addr, int num)
 {
@@ -391,6 +414,8 @@ static void ide_hwif_restore(ide_hwif_t *hwif, ide_hwif_t *tmp_hwif)
 	hwif->cds			= tmp_hwif->cds;
 #endif
 
+	hwif->fixup			= tmp_hwif->fixup;
+
 	hwif->set_pio_mode		= tmp_hwif->set_pio_mode;
 	hwif->set_dma_mode		= tmp_hwif->set_dma_mode;
 	hwif->mdma_filter		= tmp_hwif->mdma_filter;
@@ -652,7 +677,6 @@ void ide_setup_ports (	hw_regs_t *hw,
 		}
 	}
 	hw->irq = irq;
-	hw->dma = NO_DMA;
 	hw->ack_intr = ack_intr;
 /*
  *	hw->iops = iops;
@@ -660,11 +684,11 @@ void ide_setup_ports (	hw_regs_t *hw,
 }
 
 /**
- *	ide_register_hw_with_fixup	-	register IDE interface
+ *	ide_register_hw		-	register IDE interface
  *	@hw: hardware registers
+ *	@fixup: fixup function
  *	@initializing: set while initializing built-in drivers
  *	@hwifp: pointer to returned hwif
- *	@fixup: fixup function
  *
  *	Register an IDE interface, specifying exactly the registers etc.
  *	Set init=1 iff calling before probes have taken place.
@@ -672,9 +696,8 @@ void ide_setup_ports (	hw_regs_t *hw,
  *	Returns -1 on error.
  */
 
-int ide_register_hw_with_fixup(hw_regs_t *hw, int initializing,
-			       ide_hwif_t **hwifp,
-			       void(*fixup)(ide_hwif_t *hwif))
+int ide_register_hw(hw_regs_t *hw, void (*fixup)(ide_hwif_t *),
+		    int initializing, ide_hwif_t **hwifp)
 {
 	int index, retry = 1;
 	ide_hwif_t *hwif;
@@ -682,7 +705,7 @@ int ide_register_hw_with_fixup(hw_regs_t *hw, int initializing,
 	do {
 		for (index = 0; index < MAX_HWIFS; ++index) {
 			hwif = &ide_hwifs[index];
-			if (hwif->hw.io_ports[IDE_DATA_OFFSET] == hw->io_ports[IDE_DATA_OFFSET])
+			if (hwif->io_ports[IDE_DATA_OFFSET] == hw->io_ports[IDE_DATA_OFFSET])
 				goto found;
 		}
 		for (index = 0; index < MAX_HWIFS; ++index) {
@@ -690,7 +713,7 @@ int ide_register_hw_with_fixup(hw_regs_t *hw, int initializing,
 			if (hwif->hold)
 				continue;
 			if ((!hwif->present && !hwif->mate && !initializing) ||
-			    (!hwif->hw.io_ports[IDE_DATA_OFFSET] && initializing))
+			    (!hwif->io_ports[IDE_DATA_OFFSET] && initializing))
 				goto found;
 		}
 		for (index = 0; index < MAX_HWIFS; index++)
@@ -706,16 +729,18 @@ found:
 	}
 	if (hwif->present)
 		return -1;
-	memcpy(&hwif->hw, hw, sizeof(*hw));
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports));
+	memcpy(hwif->io_ports, hw->io_ports, sizeof(hwif->io_ports));
 	hwif->irq = hw->irq;
 	hwif->noprobe = 0;
+	hwif->fixup = fixup;
 	hwif->chipset = hw->chipset;
 	hwif->gendev.parent = hw->dev;
+	hwif->ack_intr = hw->ack_intr;
+
+	if (initializing == 0) {
+		u8 idx[4] = { index, 0xff, 0xff, 0xff };
 
-	if (!initializing) {
-		probe_hwif_init_with_fixup(hwif, fixup);
-		ide_proc_register_port(hwif);
+		ide_device_add(idx);
 	}
 
 	if (hwifp)
@@ -724,13 +749,6 @@ found:
 	return (initializing || hwif->present) ? index : -1;
 }
 
-EXPORT_SYMBOL(ide_register_hw_with_fixup);
-
-int ide_register_hw(hw_regs_t *hw, int initializing, ide_hwif_t **hwifp)
-{
-	return ide_register_hw_with_fixup(hw, initializing, hwifp, NULL);
-}
-
 EXPORT_SYMBOL(ide_register_hw);
 
 /*
@@ -1046,7 +1064,7 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device
 			ide_init_hwif_ports(&hw, (unsigned long) args[0],
 					    (unsigned long) args[1], NULL);
 			hw.irq = args[2];
-			if (ide_register_hw(&hw, 0, NULL) == -1)
+			if (ide_register_hw(&hw, NULL, 0, NULL) == -1)
 				return -EIO;
 			return 0;
 		}
@@ -1397,6 +1415,9 @@ static int __init ide_setup(char *s)
 			"reset", "minus6", "ata66", "minus8", "minus9",
 			"minus10", "four", "qd65xx", "ht6560b", "cmd640_vlb",
 			"dtc2278", "umc8672", "ali14xx", NULL };
+
+		hw_regs_t hwregs;
+
 		hw = s[3] - '0';
 		hwif = &ide_hwifs[hw];
 		i = match_parm(&s[4], ide_words, vals, 3);
@@ -1506,9 +1527,9 @@ static int __init ide_setup(char *s)
 			case 2: /* base,ctl */
 				vals[2] = 0;	/* default irq = probe for it */
 			case 3: /* base,ctl,irq */
-				hwif->hw.irq = vals[2];
-				ide_init_hwif_ports(&hwif->hw, (unsigned long) vals[0], (unsigned long) vals[1], &hwif->irq);
-				memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+				memset(&hwregs, 0, sizeof(hwregs));
+				ide_init_hwif_ports(&hwregs, vals[0], vals[1], &hwif->irq);
+				memcpy(hwif->io_ports, hwregs.io_ports, sizeof(hwif->io_ports));
 				hwif->irq      = vals[2];
 				hwif->noprobe  = 0;
 				hwif->chipset  = ide_forced;
diff --git a/drivers/ide/legacy/ali14xx.c b/drivers/ide/legacy/ali14xx.c
index 2f0ef9b4403..10311ecc674 100644
--- a/drivers/ide/legacy/ali14xx.c
+++ b/drivers/ide/legacy/ali14xx.c
@@ -102,6 +102,8 @@ static void outReg (u8 data, u8 reg)
 	outb_p(data, dataPort);
 }
 
+static DEFINE_SPINLOCK(ali14xx_lock);
+
 /*
  * Set PIO mode for the specified drive.
  * This function computes timing parameters
@@ -129,14 +131,14 @@ static void ali14xx_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 	/* stuff timing parameters into controller registers */
 	driveNum = (HWIF(drive)->index << 1) + drive->select.b.unit;
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&ali14xx_lock, flags);
 	outb_p(regOn, basePort);
 	outReg(param1, regTab[driveNum].reg1);
 	outReg(param2, regTab[driveNum].reg2);
 	outReg(param3, regTab[driveNum].reg3);
 	outReg(param4, regTab[driveNum].reg4);
 	outb_p(regOff, basePort);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&ali14xx_lock, flags);
 }
 
 /*
@@ -193,6 +195,7 @@ static int __init initRegisters (void) {
 static int __init ali14xx_probe(void)
 {
 	ide_hwif_t *hwif, *mate;
+	static u8 idx[4] = { 0, 1, 0xff, 0xff };
 
 	printk(KERN_DEBUG "ali14xx: base=0x%03x, regOn=0x%02x.\n",
 			  basePort, regOn);
@@ -217,11 +220,7 @@ static int __init ali14xx_probe(void)
 	mate->mate = hwif;
 	mate->channel = 1;
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
-
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/legacy/buddha.c b/drivers/ide/legacy/buddha.c
index 101aee1711c..4a0be251a05 100644
--- a/drivers/ide/legacy/buddha.c
+++ b/drivers/ide/legacy/buddha.c
@@ -212,8 +212,8 @@ fail_base2:
 //						xsurf_iops,
 						IRQ_AMIGA_PORTS);
 			}	
-			
-			index = ide_register_hw(&hw, 1, &hwif);
+
+			index = ide_register_hw(&hw, NULL, 1, &hwif);
 			if (index != -1) {
 				hwif->mmio = 1;
 				printk("ide%d: ", index);
diff --git a/drivers/ide/legacy/dtc2278.c b/drivers/ide/legacy/dtc2278.c
index f1652125486..24a845d45bd 100644
--- a/drivers/ide/legacy/dtc2278.c
+++ b/drivers/ide/legacy/dtc2278.c
@@ -67,20 +67,24 @@ static void sub22 (char b, char c)
 	}
 }
 
+static DEFINE_SPINLOCK(dtc2278_lock);
+
 static void dtc2278_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
 	unsigned long flags;
 
 	if (pio >= 3) {
-		spin_lock_irqsave(&ide_lock, flags);
+		spin_lock_irqsave(&dtc2278_lock, flags);
 		/*
 		 * This enables PIO mode4 (3?) on the first interface
 		 */
 		sub22(1,0xc3);
 		sub22(0,0xa0);
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&dtc2278_lock, flags);
 	} else {
 		/* we don't know how to set it back again.. */
+		/* Actually we do - there is a data sheet available for the
+		   Winbond but does anyone actually care */
 	}
 
 	/*
@@ -94,6 +98,7 @@ static int __init dtc2278_probe(void)
 {
 	unsigned long flags;
 	ide_hwif_t *hwif, *mate;
+	static u8 idx[4] = { 0, 1, 0xff, 0xff };
 
 	hwif = &ide_hwifs[0];
 	mate = &ide_hwifs[1];
@@ -129,16 +134,13 @@ static int __init dtc2278_probe(void)
 
 	mate->serialized = 1;
 	mate->chipset = ide_dtc2278;
+	mate->pio_mask = ATA_PIO4;
 	mate->drives[0].no_unmask = 1;
 	mate->drives[1].no_unmask = 1;
 	mate->mate = hwif;
 	mate->channel = 1;
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
-
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c
index f0829b83e97..7d7936f1b90 100644
--- a/drivers/ide/legacy/falconide.c
+++ b/drivers/ide/legacy/falconide.c
@@ -72,7 +72,7 @@ void __init falconide_init(void)
 			0, 0, NULL,
 //			falconide_iops,
 			IRQ_MFP_IDE);
-	index = ide_register_hw(&hw, 1, NULL);
+	index = ide_register_hw(&hw, NULL, 1, NULL);
 
 	if (index != -1)
 	    printk("ide%d: Falcon IDE interface\n", index);
diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c
index 0830a021bbb..53331ee1e95 100644
--- a/drivers/ide/legacy/gayle.c
+++ b/drivers/ide/legacy/gayle.c
@@ -165,7 +165,7 @@ found:
 //			&gayle_iops,
 			IRQ_AMIGA_PORTS);
 
-	index = ide_register_hw(&hw, 1, &hwif);
+	index = ide_register_hw(&hw, NULL, 1, &hwif);
 	if (index != -1) {
 	    hwif->mmio = 1;
 	    switch (i) {
diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c
index 2e5a9cc5c0f..a4245d13f11 100644
--- a/drivers/ide/legacy/ht6560b.c
+++ b/drivers/ide/legacy/ht6560b.c
@@ -247,6 +247,8 @@ static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio)
 	}
 }
 
+static DEFINE_SPINLOCK(ht6560b_lock);
+
 /*
  *  Enable/Disable so called prefetch mode
  */
@@ -254,9 +256,9 @@ static void ht_set_prefetch(ide_drive_t *drive, u8 state)
 {
 	unsigned long flags;
 	int t = HT_PREFETCH_MODE << 8;
-	
-	spin_lock_irqsave(&ide_lock, flags);
-	
+
+	spin_lock_irqsave(&ht6560b_lock, flags);
+
 	/*
 	 *  Prefetch mode and unmask irq seems to conflict
 	 */
@@ -268,9 +270,9 @@ static void ht_set_prefetch(ide_drive_t *drive, u8 state)
 		drive->drive_data &= ~t;  /* disable prefetch mode */
 		drive->no_unmask = 0;
 	}
-	
-	spin_unlock_irqrestore(&ide_lock, flags);
-	
+
+	spin_unlock_irqrestore(&ht6560b_lock, flags);
+
 #ifdef DEBUG
 	printk("ht6560b: drive %s prefetch mode %sabled\n", drive->name, (state ? "en" : "dis"));
 #endif
@@ -287,16 +289,14 @@ static void ht6560b_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		ht_set_prefetch(drive, pio & 1);
 		return;
 	}
-	
+
 	timing = ht_pio2timings(drive, pio);
-	
-	spin_lock_irqsave(&ide_lock, flags);
-	
+
+	spin_lock_irqsave(&ht6560b_lock, flags);
 	drive->drive_data &= 0xff00;
 	drive->drive_data |= timing;
-	
-	spin_unlock_irqrestore(&ide_lock, flags);
-	
+	spin_unlock_irqrestore(&ht6560b_lock, flags);
+
 #ifdef DEBUG
 	printk("ht6560b: drive %s tuned to pio mode %#x timing=%#x\n", drive->name, pio, timing);
 #endif
@@ -311,6 +311,7 @@ MODULE_PARM_DESC(probe, "probe for HT6560B chipset");
 int __init ht6560b_init(void)
 {
 	ide_hwif_t *hwif, *mate;
+	static u8 idx[4] = { 0, 1, 0xff, 0xff };
 	int t;
 
 	if (probe_ht6560b == 0)
@@ -359,11 +360,7 @@ int __init ht6560b_init(void)
 	mate->drives[0].drive_data = t;
 	mate->drives[1].drive_data = t;
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
-
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 
diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c
index e8e360c2619..03715c05866 100644
--- a/drivers/ide/legacy/ide-cs.c
+++ b/drivers/ide/legacy/ide-cs.c
@@ -153,7 +153,7 @@ static int idecs_register(unsigned long io, unsigned long ctl, unsigned long irq
     hw.irq = irq;
     hw.chipset = ide_pci;
     hw.dev = &handle->dev;
-    return ide_register_hw_with_fixup(&hw, 0, NULL, ide_undecoded_slave);
+    return ide_register_hw(&hw, &ide_undecoded_slave, 0, NULL);
 }
 
 /*======================================================================
diff --git a/drivers/ide/legacy/ide_platform.c b/drivers/ide/legacy/ide_platform.c
index b992b2b91fe..7bb79f53dac 100644
--- a/drivers/ide/legacy/ide_platform.c
+++ b/drivers/ide/legacy/ide_platform.c
@@ -33,39 +33,24 @@ static ide_hwif_t *__devinit plat_ide_locate_hwif(void __iomem *base,
 	    int mmio)
 {
 	unsigned long port = (unsigned long)base;
-	ide_hwif_t *hwif;
-	int index, i;
-
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = ide_hwifs + index;
-		if (hwif->io_ports[IDE_DATA_OFFSET] == port)
-			goto found;
-	}
-
-	for (index = 0; index < MAX_HWIFS; ++index) {
-		hwif = ide_hwifs + index;
-		if (hwif->io_ports[IDE_DATA_OFFSET] == 0)
-			goto found;
-	}
+	ide_hwif_t *hwif = ide_find_port(port);
+	int i;
 
-	return NULL;
-
-found:
+	if (hwif == NULL)
+		goto out;
 
-	hwif->hw.io_ports[IDE_DATA_OFFSET] = port;
+	hwif->io_ports[IDE_DATA_OFFSET] = port;
 
 	port += (1 << pdata->ioport_shift);
 	for (i = IDE_ERROR_OFFSET; i <= IDE_STATUS_OFFSET;
 	     i++, port += (1 << pdata->ioport_shift))
-		hwif->hw.io_ports[i] = port;
+		hwif->io_ports[i] = port;
 
-	hwif->hw.io_ports[IDE_CONTROL_OFFSET] = (unsigned long)ctrl;
+	hwif->io_ports[IDE_CONTROL_OFFSET] = (unsigned long)ctrl;
 
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports));
-	hwif->hw.irq = hwif->irq = irq;
+	hwif->irq = irq;
 
-	hwif->hw.dma = NO_DMA;
-	hwif->chipset = hwif->hw.chipset = ide_generic;
+	hwif->chipset = ide_generic;
 
 	if (mmio) {
 		hwif->mmio = 1;
@@ -73,8 +58,8 @@ found:
 	}
 
 	hwif_prop.hwif = hwif;
-	hwif_prop.index = index;
-
+	hwif_prop.index = hwif->index;
+out:
 	return hwif;
 }
 
@@ -83,6 +68,7 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	struct resource *res_base, *res_alt, *res_irq;
 	ide_hwif_t *hwif;
 	struct pata_platform_info *pdata;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 	int ret = 0;
 	int mmio = 0;
 
@@ -130,10 +116,11 @@ static int __devinit plat_ide_probe(struct platform_device *pdev)
 	hwif->gendev.parent = &pdev->dev;
 	hwif->noprobe = 0;
 
-	probe_hwif_init(hwif);
+	idx[0] = hwif->index;
+
+	ide_device_add(idx);
 
 	platform_set_drvdata(pdev, hwif);
-	ide_proc_register_port(hwif);
 
 	return 0;
 
diff --git a/drivers/ide/legacy/macide.c b/drivers/ide/legacy/macide.c
index b557c45a5a9..e87cd2f1643 100644
--- a/drivers/ide/legacy/macide.c
+++ b/drivers/ide/legacy/macide.c
@@ -93,21 +93,21 @@ void macide_init(void)
 				0, 0, macide_ack_intr,
 //				quadra_ide_iops,
 				IRQ_NUBUS_F);
-		index = ide_register_hw(&hw, 1, &hwif);
+		index = ide_register_hw(&hw, NULL, 1, &hwif);
 		break;
 	case MAC_IDE_PB:
 		ide_setup_ports(&hw, IDE_BASE, macide_offsets,
 				0, 0, macide_ack_intr,
 //				macide_pb_iops,
 				IRQ_NUBUS_C);
-		index = ide_register_hw(&hw, 1, &hwif);
+		index = ide_register_hw(&hw, NULL, 1, &hwif);
 		break;
 	case MAC_IDE_BABOON:
 		ide_setup_ports(&hw, BABOON_BASE, macide_offsets,
 				0, 0, NULL,
 //				macide_baboon_iops,
 				IRQ_BABOON_1);
-		index = ide_register_hw(&hw, 1, &hwif);
+		index = ide_register_hw(&hw, NULL, 1, &hwif);
 		if (index == -1) break;
 		if (macintosh_config->ident == MAC_MODEL_PB190) {
 
diff --git a/drivers/ide/legacy/q40ide.c b/drivers/ide/legacy/q40ide.c
index e628a983ce3..44cdb745a6f 100644
--- a/drivers/ide/legacy/q40ide.c
+++ b/drivers/ide/legacy/q40ide.c
@@ -89,9 +89,8 @@ void q40_ide_setup_ports ( hw_regs_t *hw,
 		else
 			hw->io_ports[i] = Q40_ISA_IO_B(base + offsets[i]);
 	}
-	
+
 	hw->irq = irq;
-	hw->dma = NO_DMA;
 	hw->ack_intr = ack_intr;
 /*
  *	hw->iops = iops;
@@ -142,7 +141,7 @@ void q40ide_init(void)
 			0, NULL,
 //			m68kide_iops,
 			q40ide_default_irq(pcide_bases[i]));
-	index = ide_register_hw(&hw, 1, &hwif);
+	index = ide_register_hw(&hw, NULL, 1, &hwif);
 	// **FIXME**
 	if (index != -1)
 		hwif->mmio = 1;
diff --git a/drivers/ide/legacy/qd65xx.c b/drivers/ide/legacy/qd65xx.c
index 0c81d2d0b94..912e73853fa 100644
--- a/drivers/ide/legacy/qd65xx.c
+++ b/drivers/ide/legacy/qd65xx.c
@@ -89,26 +89,6 @@
 
 static int timings[4]={-1,-1,-1,-1}; /* stores current timing for each timer */
 
-static void qd_write_reg (u8 content, unsigned long reg)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ide_lock, flags);
-	outb(content,reg);
-	spin_unlock_irqrestore(&ide_lock, flags);
-}
-
-static u8 __init qd_read_reg (unsigned long reg)
-{
-	unsigned long flags;
-	u8 read;
-
-	spin_lock_irqsave(&ide_lock, flags);
-	read = inb(reg);
-	spin_unlock_irqrestore(&ide_lock, flags);
-	return read;
-}
-
 /*
  * qd_select:
  *
@@ -121,7 +101,7 @@ static void qd_select (ide_drive_t *drive)
 			(QD_TIMREG(drive) & 0x02);
 
 	if (timings[index] != QD_TIMING(drive))
-		qd_write_reg(timings[index] = QD_TIMING(drive), QD_TIMREG(drive));
+		outb(timings[index] = QD_TIMING(drive), QD_TIMREG(drive));
 }
 
 /*
@@ -284,7 +264,7 @@ static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	}
 
 	if (!HWIF(drive)->channel && drive->media != ide_disk) {
-		qd_write_reg(0x5f, QD_CONTROL_PORT);
+		outb(0x5f, QD_CONTROL_PORT);
 		printk(KERN_WARNING "%s: ATAPI: disabled read-ahead FIFO "
 			"and post-write buffer on %s.\n",
 			drive->name, HWIF(drive)->name);
@@ -301,16 +281,15 @@ static void qd6580_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static int __init qd_testreg(int port)
 {
-	u8 savereg;
-	u8 readreg;
 	unsigned long flags;
+	u8 savereg, readreg;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	local_irq_save(flags);
 	savereg = inb_p(port);
 	outb_p(QD_TESTVAL, port);	/* safe value */
 	readreg = inb_p(port);
 	outb(savereg, port);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	local_irq_restore(flags);
 
 	if (savereg == QD_TESTVAL) {
 		printk(KERN_ERR "Outch ! the probe for qd65xx isn't reliable !\n");
@@ -364,13 +343,13 @@ static void __exit qd_unsetup(ide_hwif_t *hwif)
 
 	if (set_pio_mode == (void *)qd6500_set_pio_mode) {
 		// will do it for both
-		qd_write_reg(QD6500_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
+		outb(QD6500_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
 	} else if (set_pio_mode == (void *)qd6580_set_pio_mode) {
 		if (QD_CONTROL(hwif) & QD_CONTR_SEC_DISABLED) {
-			qd_write_reg(QD6580_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
-			qd_write_reg(QD6580_DEF_DATA2, QD_TIMREG(&hwif->drives[1]));
+			outb(QD6580_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
+			outb(QD6580_DEF_DATA2, QD_TIMREG(&hwif->drives[1]));
 		} else {
-			qd_write_reg(hwif->channel ? QD6580_DEF_DATA2 : QD6580_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
+			outb(hwif->channel ? QD6580_DEF_DATA2 : QD6580_DEF_DATA, QD_TIMREG(&hwif->drives[0]));
 		}
 	} else {
 		printk(KERN_WARNING "Unknown qd65xx tuning fonction !\n");
@@ -389,10 +368,11 @@ static void __exit qd_unsetup(ide_hwif_t *hwif)
 static int __init qd_probe(int base)
 {
 	ide_hwif_t *hwif;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 	u8 config;
 	u8 unit;
 
-	config = qd_read_reg(QD_CONFIG_PORT);
+	config = inb(QD_CONFIG_PORT);
 
 	if (! ((config & QD_CONFIG_BASEPORT) >> 1 == (base == 0xb0)) )
 		return 1;
@@ -419,9 +399,9 @@ static int __init qd_probe(int base)
 
 		hwif->set_pio_mode = &qd6500_set_pio_mode;
 
-		probe_hwif_init(hwif);
+		idx[0] = unit;
 
-		ide_proc_register_port(hwif);
+		ide_device_add(idx);
 
 		return 1;
 	}
@@ -436,7 +416,7 @@ static int __init qd_probe(int base)
 
 		/* qd6580 found */
 
-		control = qd_read_reg(QD_CONTROL_PORT);
+		control = inb(QD_CONTROL_PORT);
 
 		printk(KERN_NOTICE "qd6580 at %#x\n", base);
 		printk(KERN_DEBUG "qd6580: config=%#x, control=%#x, ID3=%u\n",
@@ -453,11 +433,11 @@ static int __init qd_probe(int base)
 
 			hwif->set_pio_mode = &qd6580_set_pio_mode;
 
-			probe_hwif_init(hwif);
+			idx[0] = unit;
 
-			qd_write_reg(QD_DEF_CONTR,QD_CONTROL_PORT);
+			ide_device_add(idx);
 
-			ide_proc_register_port(hwif);
+			outb(QD_DEF_CONTR, QD_CONTROL_PORT);
 
 			return 1;
 		} else {
@@ -474,19 +454,17 @@ static int __init qd_probe(int base)
 
 			hwif->set_pio_mode = &qd6580_set_pio_mode;
 
-			probe_hwif_init(hwif);
-
 			qd_setup(mate, base, config | (control << 8),
 				 QD6580_DEF_DATA2, QD6580_DEF_DATA2);
 
 			mate->set_pio_mode = &qd6580_set_pio_mode;
 
-			probe_hwif_init(mate);
+			idx[0] = 0;
+			idx[1] = 1;
 
-			qd_write_reg(QD_DEF_CONTR,QD_CONTROL_PORT);
+			ide_device_add(idx);
 
-			ide_proc_register_port(hwif);
-			ide_proc_register_port(mate);
+			outb(QD_DEF_CONTR, QD_CONTROL_PORT);
 
 			return 0; /* no other qd65xx possible */
 		}
diff --git a/drivers/ide/legacy/umc8672.c b/drivers/ide/legacy/umc8672.c
index 1151c92dd53..79577b91687 100644
--- a/drivers/ide/legacy/umc8672.c
+++ b/drivers/ide/legacy/umc8672.c
@@ -124,8 +124,9 @@ static void umc_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static int __init umc8672_probe(void)
 {
-	unsigned long flags;
 	ide_hwif_t *hwif, *mate;
+	unsigned long flags;
+	static u8 idx[4] = { 0, 1, 0xff, 0xff };
 
 	if (!request_region(0x108, 2, "umc8672")) {
 		printk(KERN_ERR "umc8672: ports 0x108-0x109 already in use.\n");
@@ -158,11 +159,7 @@ static int __init umc8672_probe(void)
 	mate->mate = hwif;
 	mate->channel = 1;
 
-	probe_hwif_init(hwif);
-	probe_hwif_init(mate);
-
-	ide_proc_register_port(hwif);
-	ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index 2f322d7e881..1de58566e5b 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -601,8 +601,9 @@ static int au_ide_probe(struct device *dev)
 	_auide_hwif *ahwif = &auide_hwif;
 	ide_hwif_t *hwif;
 	struct resource *res;
-	hw_regs_t *hw;
 	int ret = 0;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
+	hw_regs_t hw;
 
 #if defined(CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA)
 	char *mode = "MWDMA2";
@@ -644,12 +645,12 @@ static int au_ide_probe(struct device *dev)
 	/* FIXME:  This might possibly break PCMCIA IDE devices */
 
 	hwif                            = &ide_hwifs[pdev->id];
-	hw 				= &hwif->hw;
-	hwif->irq = hw->irq             = ahwif->irq;
+	hwif->irq			= ahwif->irq;
 	hwif->chipset                   = ide_au1xxx;
 
-	auide_setup_ports(hw, ahwif);
-	memcpy(hwif->io_ports, hw->io_ports, sizeof(hwif->io_ports));
+	memset(&hw, 0, sizeof(hw));
+	auide_setup_ports(&hw, ahwif);
+	memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 
 	hwif->ultra_mask                = 0x0;  /* Disable Ultra DMA */
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
@@ -706,8 +707,10 @@ static int au_ide_probe(struct device *dev)
 	hwif->config_data               = 0;    /* no chipset-specific code */
 
 	hwif->drives[0].autotune        = 1;    /* 1=autotune, 2=noautotune, 0=default */
+	hwif->drives[1].autotune	= 1;
 #endif
-	hwif->drives[0].no_io_32bit     = 1;   
+	hwif->drives[0].no_io_32bit	= 1;
+	hwif->drives[1].no_io_32bit	= 1;
 
 	auide_hwif.hwif                 = hwif;
 	hwif->hwif_data                 = &auide_hwif;
@@ -717,9 +720,9 @@ static int au_ide_probe(struct device *dev)
 	dbdma_init_done = 1;
 #endif
 
-	probe_hwif_init(hwif);
+	idx[0] = hwif->index;
 
-	ide_proc_register_port(hwif);
+	ide_device_add(idx);
 
 	dev_set_drvdata(dev, hwif);
 
diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c
index c2e29571b00..521edd41b57 100644
--- a/drivers/ide/mips/swarm.c
+++ b/drivers/ide/mips/swarm.c
@@ -71,6 +71,7 @@ static int __devinit swarm_ide_probe(struct device *dev)
 	u8 __iomem *base;
 	phys_t offset, size;
 	int i;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	if (!SIBYTE_HAVE_IDE)
 		return -ENODEV;
@@ -119,18 +120,15 @@ static int __devinit swarm_ide_probe(struct device *dev)
 	hwif->noprobe = 0;
 
 	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++)
-		hwif->hw.io_ports[i] =
+		hwif->io_ports[i] =
 				(unsigned long)(base + ((0x1f0 + i) << 5));
-	hwif->hw.io_ports[IDE_CONTROL_OFFSET] =
+	hwif->io_ports[IDE_CONTROL_OFFSET] =
 				(unsigned long)(base + (0x3f6 << 5));
-	hwif->hw.irq = K_INT_GB_IDE;
+	hwif->irq = K_INT_GB_IDE;
 
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
-	hwif->irq = hwif->hw.irq;
+	idx[0] = hwif->index;
 
-	probe_hwif_init(hwif);
-
-	ide_proc_register_port(hwif);
+	ide_device_add(idx);
 
 	dev_set_drvdata(dev, hwif);
 
diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c
index b3dc12a70d5..19ec421f7b9 100644
--- a/drivers/ide/pci/aec62xx.c
+++ b/drivers/ide/pci/aec62xx.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/ide/pci/aec62xx.c		Version 0.26	Sep 1, 2007
+ * linux/drivers/ide/pci/aec62xx.c		Version 0.27	Sep 16, 2007
  *
  * Copyright (C) 1999-2002	Andre Hedrick <andre@linux-ide.org>
  * Copyright (C) 2007		MontaVista Software, Inc. <source@mvista.com>
@@ -141,19 +141,6 @@ static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	drive->hwif->set_dma_mode(drive, pio + XFER_PIO_0);
 }
 
-static void aec62xx_dma_lost_irq (ide_drive_t *drive)
-{
-	switch (HWIF(drive)->pci_dev->device) {
-		case PCI_DEVICE_ID_ARTOP_ATP860:
-		case PCI_DEVICE_ID_ARTOP_ATP860R:
-		case PCI_DEVICE_ID_ARTOP_ATP865:
-		case PCI_DEVICE_ID_ARTOP_ATP865R:
-			printk(" AEC62XX time out ");
-		default:
-			break;
-	}
-}
-
 static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev, const char *name)
 {
 	int bus_speed = system_bus_clock();
@@ -195,8 +182,6 @@ static void __devinit init_hwif_aec62xx(ide_hwif_t *hwif)
 	if (hwif->dma_base == 0)
 		return;
 
-	hwif->dma_lost_irq	= &aec62xx_dma_lost_irq;
-
 	if (dev->device == PCI_DEVICE_ID_ARTOP_ATP850UF)
 		return;
 
@@ -209,7 +194,7 @@ static void __devinit init_hwif_aec62xx(ide_hwif_t *hwif)
 	}
 }
 
-static ide_pci_device_t aec62xx_chipsets[] __devinitdata = {
+static const struct ide_port_info aec62xx_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "AEC6210",
 		.init_chipset	= init_chipset_aec62xx,
@@ -268,12 +253,12 @@ static ide_pci_device_t aec62xx_chipsets[] __devinitdata = {
  *	finds a device matching our IDE device tables.
  *
  *	NOTE: since we're going to modify the 'name' field for AEC-6[26]80[R]
- *	chips, pass a local copy of 'struct pci_device_id' down the call chain.
+ *	chips, pass a local copy of 'struct ide_port_info' down the call chain.
  */
- 
+
 static int __devinit aec62xx_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t d;
+	struct ide_port_info d;
 	u8 idx = id->driver_data;
 
 	d = aec62xx_chipsets[idx];
diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c
index 8ee2b48d105..a607dd31a64 100644
--- a/drivers/ide/pci/alim15x3.c
+++ b/drivers/ide/pci/alim15x3.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/ide/pci/alim15x3.c		Version 0.27	Aug 27 2007
+ * linux/drivers/ide/pci/alim15x3.c		Version 0.29	Sep 16 2007
  *
  *  Copyright (C) 1998-2000 Michel Aubry, Maintainer
  *  Copyright (C) 1998-2000 Andrzej Krzysztofowicz, Maintainer
@@ -492,6 +492,13 @@ static unsigned int __devinit init_chipset_ali15x3 (struct pci_dev *dev, const c
 		 * clear bit 7
 		 */
 		pci_write_config_byte(dev, 0x4b, tmpbyte & 0x7F);
+		/*
+		 * check m1533, 0x5e, bit 1~4 == 1001 => & 00011110 = 00010010
+		 */
+		if (m5229_revision >= 0x20 && isa_dev) {
+			pci_read_config_byte(isa_dev, 0x5e, &tmpbyte);
+			chip_is_1543c_e = ((tmpbyte & 0x1e) == 0x12) ? 1: 0;
+		}
 		goto out;
 	}
 
@@ -537,7 +544,30 @@ static unsigned int __devinit init_chipset_ali15x3 (struct pci_dev *dev, const c
 			pci_write_config_byte(isa_dev, 0x79, tmpbyte | 0x02);
 		}
 	}
+
 out:
+	/*
+	 * CD_ROM DMA on (m5229, 0x53, bit0)
+	 *      Enable this bit even if we want to use PIO.
+	 * PIO FIFO off (m5229, 0x53, bit1)
+	 *      The hardware will use 0x54h and 0x55h to control PIO FIFO.
+	 *	(Not on later devices it seems)
+	 *
+	 *	0x53 changes meaning on later revs - we must no touch
+	 *	bit 1 on them.  Need to check if 0x20 is the right break.
+	 */
+	if (m5229_revision >= 0x20) {
+		pci_read_config_byte(dev, 0x53, &tmpbyte);
+
+		if (m5229_revision <= 0x20)
+			tmpbyte = (tmpbyte & (~0x02)) | 0x01;
+		else if (m5229_revision == 0xc7 || m5229_revision == 0xc8)
+			tmpbyte |= 0x03;
+		else
+			tmpbyte |= 0x01;
+
+		pci_write_config_byte(dev, 0x53, tmpbyte);
+	}
 	pci_dev_put(north);
 	pci_dev_put(isa_dev);
 	local_irq_restore(flags);
@@ -616,36 +646,8 @@ static u8 __devinit ata66_ali15x3(ide_hwif_t *hwif)
 			if ((tmpbyte & (1 << hwif->channel)) == 0)
 				cbl = ATA_CBL_PATA80;
 		}
-	} else {
-		/*
-		 * check m1533, 0x5e, bit 1~4 == 1001 => & 00011110 = 00010010
-		 */
-		pci_read_config_byte(isa_dev, 0x5e, &tmpbyte);
-		chip_is_1543c_e = ((tmpbyte & 0x1e) == 0x12) ? 1: 0;
 	}
 
-	/*
-	 * CD_ROM DMA on (m5229, 0x53, bit0)
-	 *      Enable this bit even if we want to use PIO
-	 * PIO FIFO off (m5229, 0x53, bit1)
-	 *      The hardware will use 0x54h and 0x55h to control PIO FIFO
-	 *	(Not on later devices it seems)
-	 *
-	 *	0x53 changes meaning on later revs - we must no touch
-	 *	bit 1 on them. Need to check if 0x20 is the right break
-	 */
-	 
-	pci_read_config_byte(dev, 0x53, &tmpbyte);
-	
-	if(m5229_revision <= 0x20)
-		tmpbyte = (tmpbyte & (~0x02)) | 0x01;
-	else if (m5229_revision == 0xc7 || m5229_revision == 0xc8)
-		tmpbyte |= 0x03;
-	else
-		tmpbyte |= 0x01;
-
-	pci_write_config_byte(dev, 0x53, tmpbyte);
-
 	local_irq_restore(flags);
 
 	return cbl;
@@ -664,31 +666,9 @@ static void __devinit init_hwif_common_ali15x3 (ide_hwif_t *hwif)
 	hwif->set_dma_mode = &ali_set_dma_mode;
 	hwif->udma_filter = &ali_udma_filter;
 
-	/* don't use LBA48 DMA on ALi devices before rev 0xC5 */
-	if (m5229_revision <= 0xC4)
-		hwif->host_flags |= IDE_HFLAG_NO_LBA48_DMA;
-
 	if (hwif->dma_base == 0)
 		return;
 
-	/*
-	 * check in ->init_dma guarantees m5229_revision >= 0x20 here
-	 */
-
-	if (m5229_revision == 0x20)
-		hwif->host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
-
-	if (m5229_revision <= 0x20)
-		hwif->ultra_mask = 0x00; /* no udma */
-	else if (m5229_revision < 0xC2)
-		hwif->ultra_mask = ATA_UDMA2;
-	else if (m5229_revision == 0xC2 || m5229_revision == 0xC3)
-		hwif->ultra_mask = ATA_UDMA4;
-	else if (m5229_revision == 0xC4)
-		hwif->ultra_mask = ATA_UDMA5;
-	else
-		hwif->ultra_mask = ATA_UDMA6;
-
 	hwif->dma_setup = &ali15x3_dma_setup;
 
 	if (hwif->cbl != ATA_CBL_PATA40_SHORT)
@@ -766,7 +746,7 @@ static void __devinit init_dma_ali15x3 (ide_hwif_t *hwif, unsigned long dmabase)
 	ide_setup_dma(hwif, dmabase, 8);
 }
 
-static ide_pci_device_t ali15x3_chipset __devinitdata = {
+static const struct ide_port_info ali15x3_chipset __devinitdata = {
 	.name		= "ALI15X3",
 	.init_chipset	= init_chipset_ali15x3,
 	.init_hwif	= init_hwif_ali15x3,
@@ -792,15 +772,34 @@ static int __devinit alim15x3_init_one(struct pci_dev *dev, const struct pci_dev
 		{ },
 	};
 
-	ide_pci_device_t *d = &ali15x3_chipset;
+	struct ide_port_info d = ali15x3_chipset;
+	u8 rev = dev->revision;
 
 	if (pci_dev_present(ati_rs100))
 		printk(KERN_WARNING "alim15x3: ATI Radeon IGP Northbridge is not yet fully tested.\n");
 
+	/* don't use LBA48 DMA on ALi devices before rev 0xC5 */
+	if (rev <= 0xC4)
+		d.host_flags |= IDE_HFLAG_NO_LBA48_DMA;
+
+	if (rev >= 0x20) {
+		if (rev == 0x20)
+			d.host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
+
+		if (rev < 0xC2)
+			d.udma_mask = ATA_UDMA2;
+		else if (rev == 0xC2 || rev == 0xC3)
+			d.udma_mask = ATA_UDMA4;
+		else if (rev == 0xC4)
+			d.udma_mask = ATA_UDMA5;
+		else
+			d.udma_mask = ATA_UDMA6;
+	}
+
 #if defined(CONFIG_SPARC64)
-	d->init_hwif = init_hwif_common_ali15x3;
+	d.init_hwif = init_hwif_common_ali15x3;
 #endif /* CONFIG_SPARC64 */
-	return ide_setup_pci_device(dev, d);
+	return ide_setup_pci_device(dev, &d);
 }
 
 
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index 7cafefbf6c1..8d4125ec252 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -77,7 +77,7 @@ static struct amd_ide_chip {
 };
 
 static struct amd_ide_chip *amd_config;
-static ide_pci_device_t *amd_chipset;
+static const struct ide_port_info *amd_chipset;
 static unsigned int amd_80w;
 static unsigned int amd_clock;
 
@@ -242,19 +242,12 @@ static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev, const ch
 
 static void __devinit init_hwif_amd74xx(ide_hwif_t *hwif)
 {
-	int i;
-
 	if (hwif->irq == 0) /* 0 is bogus but will do for now */
 		hwif->irq = pci_get_legacy_ide_irq(hwif->pci_dev, hwif->channel);
 
 	hwif->set_pio_mode = &amd_set_pio_mode;
 	hwif->set_dma_mode = &amd_set_drive;
 
-	for (i = 0; i < 2; i++) {
-		hwif->drives[i].io_32bit = 1;
-		hwif->drives[i].unmask = 1;
-	}
-
 	if (!hwif->dma_base)
 		return;
 
@@ -270,16 +263,21 @@ static void __devinit init_hwif_amd74xx(ide_hwif_t *hwif)
 	}
 }
 
+#define IDE_HFLAGS_AMD \
+	(IDE_HFLAG_PIO_NO_BLACKLIST | \
+	 IDE_HFLAG_PIO_NO_DOWNGRADE | \
+	 IDE_HFLAG_POST_SET_MODE | \
+	 IDE_HFLAG_IO_32BIT | \
+	 IDE_HFLAG_UNMASK_IRQS | \
+	 IDE_HFLAG_BOOTABLE)
+
 #define DECLARE_AMD_DEV(name_str)					\
 	{								\
 		.name		= name_str,				\
 		.init_chipset	= init_chipset_amd74xx,			\
 		.init_hwif	= init_hwif_amd74xx,			\
 		.enablebits	= {{0x40,0x02,0x02}, {0x40,0x01,0x01}},	\
-		.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |		\
-				  IDE_HFLAG_PIO_NO_DOWNGRADE |		\
-				  IDE_HFLAG_POST_SET_MODE |		\
-				  IDE_HFLAG_BOOTABLE,			\
+		.host_flags	= IDE_HFLAGS_AMD,			\
 		.pio_mask	= ATA_PIO5,				\
 		.swdma_mask	= ATA_SWDMA2,				\
 		.mwdma_mask	= ATA_MWDMA2,				\
@@ -291,16 +289,13 @@ static void __devinit init_hwif_amd74xx(ide_hwif_t *hwif)
 		.init_chipset	= init_chipset_amd74xx,			\
 		.init_hwif	= init_hwif_amd74xx,			\
 		.enablebits	= {{0x50,0x02,0x02}, {0x50,0x01,0x01}},	\
-		.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |		\
-				  IDE_HFLAG_PIO_NO_DOWNGRADE |		\
-				  IDE_HFLAG_POST_SET_MODE |		\
-				  IDE_HFLAG_BOOTABLE,			\
+		.host_flags	= IDE_HFLAGS_AMD,			\
 		.pio_mask	= ATA_PIO5,				\
 		.swdma_mask	= ATA_SWDMA2,				\
 		.mwdma_mask	= ATA_MWDMA2,				\
 	}
 
-static ide_pci_device_t amd74xx_chipsets[] __devinitdata = {
+static const struct ide_port_info amd74xx_chipsets[] __devinitdata = {
 	/*  0 */ DECLARE_AMD_DEV("AMD7401"),
 	/*  1 */ DECLARE_AMD_DEV("AMD7409"),
 	/*  2 */ DECLARE_AMD_DEV("AMD7411"),
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index 30784305307..ef8e0164ef7 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -189,8 +189,7 @@ static void __devinit init_hwif_atiixp(ide_hwif_t *hwif)
 	hwif->dma_host_off = &atiixp_dma_host_off;
 }
 
-
-static ide_pci_device_t atiixp_pci_info[] __devinitdata = {
+static const struct ide_port_info atiixp_pci_info[] __devinitdata = {
 	{	/* 0 */
 		.name		= "ATIIXP",
 		.init_hwif	= init_hwif_atiixp,
diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c
index f369645e4d1..4aa48104e0c 100644
--- a/drivers/ide/pci/cmd640.c
+++ b/drivers/ide/pci/cmd640.c
@@ -185,6 +185,8 @@ static u8 recovery_counts[4] = {16, 16, 16, 16}; /* Recovery count (encoded) */
 
 #endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
 
+static DEFINE_SPINLOCK(cmd640_lock);
+
 /*
  * These are initialized to point at the devices we control
  */
@@ -258,12 +260,12 @@ static u8 get_cmd640_reg_vlb (u16 reg)
 
 static u8 get_cmd640_reg(u16 reg)
 {
-	u8 b;
 	unsigned long flags;
+	u8 b;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	b = __get_cmd640_reg(reg);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 	return b;
 }
 
@@ -271,9 +273,9 @@ static void put_cmd640_reg(u16 reg, u8 val)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	__put_cmd640_reg(reg,val);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 }
 
 static int __init match_pci_cmd640_device (void)
@@ -351,7 +353,7 @@ static int __init secondary_port_responding (void)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 
 	outb_p(0x0a, 0x170 + IDE_SELECT_OFFSET);	/* select drive0 */
 	udelay(100);
@@ -359,11 +361,11 @@ static int __init secondary_port_responding (void)
 		outb_p(0x1a, 0x170 + IDE_SELECT_OFFSET); /* select drive1 */
 		udelay(100);
 		if ((inb_p(0x170 + IDE_SELECT_OFFSET) & 0x1f) != 0x1a) {
-			spin_unlock_irqrestore(&ide_lock, flags);
+			spin_unlock_irqrestore(&cmd640_lock, flags);
 			return 0; /* nothing responded */
 		}
 	}
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 	return 1; /* success */
 }
 
@@ -440,11 +442,11 @@ static void __init setup_device_ptrs (void)
 static void set_prefetch_mode (unsigned int index, int mode)
 {
 	ide_drive_t *drive = cmd_drives[index];
+	unsigned long flags;
 	int reg = prefetch_regs[index];
 	u8 b;
-	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	b = __get_cmd640_reg(reg);
 	if (mode) {	/* want prefetch on? */
 #if CMD640_PREFETCH_MASKS
@@ -460,7 +462,7 @@ static void set_prefetch_mode (unsigned int index, int mode)
 		b |= prefetch_masks[index];	/* disable prefetch */
 	}
 	__put_cmd640_reg(reg, b);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 }
 
 /*
@@ -561,7 +563,7 @@ static void program_drive_counts (unsigned int index)
 	/*
 	 * Now that everything is ready, program the new timings
 	 */
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	/*
 	 * Program the address_setup clocks into ARTTIM reg,
 	 * and then the active/recovery counts into the DRWTIM reg
@@ -570,7 +572,7 @@ static void program_drive_counts (unsigned int index)
 	setup_count |= __get_cmd640_reg(arttim_regs[index]) & 0x3f;
 	__put_cmd640_reg(arttim_regs[index], setup_count);
 	__put_cmd640_reg(drwtim_regs[index], pack_nibbles(active_count, recovery_count));
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 }
 
 /*
@@ -670,20 +672,20 @@ static void cmd640_set_pio_mode(ide_drive_t *drive, const u8 pio)
 
 static int pci_conf1(void)
 {
-	u32 tmp;
 	unsigned long flags;
+	u32 tmp;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	outb(0x01, 0xCFB);
 	tmp = inl(0xCF8);
 	outl(0x80000000, 0xCF8);
 	if (inl(0xCF8) == 0x80000000) {
 		outl(tmp, 0xCF8);
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&cmd640_lock, flags);
 		return 1;
 	}
 	outl(tmp, 0xCF8);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 	return 0;
 }
 
@@ -691,15 +693,15 @@ static int pci_conf2(void)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&cmd640_lock, flags);
 	outb(0x00, 0xCFB);
 	outb(0x00, 0xCF8);
 	outb(0x00, 0xCFA);
 	if (inb(0xCF8) == 0x00 && inb(0xCF8) == 0x00) {
-		spin_unlock_irqrestore(&ide_lock, flags);
+		spin_unlock_irqrestore(&cmd640_lock, flags);
 		return 1;
 	}
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&cmd640_lock, flags);
 	return 0;
 }
 
diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c
index adee2ef6fd7..ea0143ef5fe 100644
--- a/drivers/ide/pci/cmd64x.c
+++ b/drivers/ide/pci/cmd64x.c
@@ -535,7 +535,6 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif)
 		hwif->ide_dma_test_irq	= &cmd648_ide_dma_test_irq;
 		break;
 	case PCI_DEVICE_ID_CMD_646:
-		hwif->chipset = ide_cmd646;
 		if (dev->revision == 0x01) {
 			hwif->ide_dma_end = &cmd646_1_ide_dma_end;
 			break;
@@ -549,7 +548,7 @@ static void __devinit init_hwif_cmd64x(ide_hwif_t *hwif)
 	}
 }
 
-static ide_pci_device_t cmd64x_chipsets[] __devinitdata = {
+static const struct ide_port_info cmd64x_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "CMD643",
 		.init_chipset	= init_chipset_cmd64x,
@@ -573,6 +572,7 @@ static ide_pci_device_t cmd64x_chipsets[] __devinitdata = {
 		.init_chipset	= init_chipset_cmd64x,
 		.init_hwif	= init_hwif_cmd64x,
 		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
+		.chipset	= ide_cmd646,
 		.host_flags	= IDE_HFLAG_ABUSE_PREFETCH | IDE_HFLAG_BOOTABLE,
 		.pio_mask	= ATA_PIO5,
 		.mwdma_mask	= ATA_MWDMA2,
@@ -591,7 +591,7 @@ static ide_pci_device_t cmd64x_chipsets[] __devinitdata = {
 
 static int __devinit cmd64x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t d;
+	struct ide_port_info d;
 	u8 idx = id->driver_data;
 
 	d = cmd64x_chipsets[idx];
diff --git a/drivers/ide/pci/cs5520.c b/drivers/ide/pci/cs5520.c
index aa98e817d38..0466462fd21 100644
--- a/drivers/ide/pci/cs5520.c
+++ b/drivers/ide/pci/cs5520.c
@@ -141,7 +141,7 @@ static void __devinit init_hwif_cs5520(ide_hwif_t *hwif)
 		.pio_mask	= ATA_PIO4,			\
 	}
 
-static ide_pci_device_t cyrix_chipsets[] __devinitdata = {
+static const struct ide_port_info cyrix_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_CS_DEV("Cyrix 5510"),
 	/* 1 */ DECLARE_CS_DEV("Cyrix 5520")
 };
@@ -154,9 +154,8 @@ static ide_pci_device_t cyrix_chipsets[] __devinitdata = {
  
 static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_hwif_t *hwif = NULL, *mate = NULL;
-	ata_index_t index;
-	ide_pci_device_t *d = &cyrix_chipsets[id->driver_data];
+	const struct ide_port_info *d = &cyrix_chipsets[id->driver_data];
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	ide_setup_pci_noise(dev, d);
 
@@ -172,29 +171,14 @@ static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_devic
 		return -ENODEV;
 	}
 
-	index.all = 0xf0f0;
-
 	/*
 	 *	Now the chipset is configured we can let the core
 	 *	do all the device setup for us
 	 */
 
-	ide_pci_setup_ports(dev, d, 14, &index);
-
-	if ((index.b.low & 0xf0) != 0xf0)
-		hwif = &ide_hwifs[index.b.low];
-	if ((index.b.high & 0xf0) != 0xf0)
-		mate = &ide_hwifs[index.b.high];
-
-	if (hwif)
-		probe_hwif_init(hwif);
-	if (mate)
-		probe_hwif_init(mate);
+	ide_pci_setup_ports(dev, d, 14, &idx[0]);
 
-	if (hwif)
-		ide_proc_register_port(hwif);
-	if (mate)
-		ide_proc_register_port(mate);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c
index ba0c6eba024..599408952bd 100644
--- a/drivers/ide/pci/cs5530.c
+++ b/drivers/ide/pci/cs5530.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/ide/pci/cs5530.c		Version 0.76	Aug 3 2007
+ * linux/drivers/ide/pci/cs5530.c		Version 0.77	Sep 24 2007
  *
  * Copyright (C) 2000			Andre Hedrick <andre@linux-ide.org>
  * Copyright (C) 2000			Mark Lord <mlord@pobox.com>
@@ -146,7 +146,6 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode)
 static unsigned int __devinit init_chipset_cs5530 (struct pci_dev *dev, const char *name)
 {
 	struct pci_dev *master_0 = NULL, *cs5530_0 = NULL;
-	unsigned long flags;
 
 	if (pci_resource_start(dev, 4) == 0)
 		return -EFAULT;
@@ -171,9 +170,6 @@ static unsigned int __devinit init_chipset_cs5530 (struct pci_dev *dev, const ch
 		goto out;
 	}
 
-	spin_lock_irqsave(&ide_lock, flags);
-		/* all CPUs (there should only be one CPU with this chipset) */
-
 	/*
 	 * Enable BusMaster and MemoryWriteAndInvalidate for the cs5530:
 	 * -->  OR 0x14 into 16-bit PCI COMMAND reg of function 0 of the cs5530
@@ -224,8 +220,6 @@ static unsigned int __devinit init_chipset_cs5530 (struct pci_dev *dev, const ch
 	pci_write_config_byte(master_0, 0x42, 0x00);
 	pci_write_config_byte(master_0, 0x43, 0xc1);
 
-	spin_unlock_irqrestore(&ide_lock, flags);
-
 out:
 	pci_dev_put(master_0);
 	pci_dev_put(cs5530_0);
@@ -261,7 +255,7 @@ static void __devinit init_hwif_cs5530 (ide_hwif_t *hwif)
 	hwif->udma_filter = cs5530_udma_filter;
 }
 
-static ide_pci_device_t cs5530_chipset __devinitdata = {
+static const struct ide_port_info cs5530_chipset __devinitdata = {
 	.name		= "CS5530",
 	.init_chipset	= init_chipset_cs5530,
 	.init_hwif	= init_hwif_cs5530,
diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c
index 5ac82ffa5c0..9094916e378 100644
--- a/drivers/ide/pci/cs5535.c
+++ b/drivers/ide/pci/cs5535.c
@@ -186,7 +186,7 @@ static void __devinit init_hwif_cs5535(ide_hwif_t *hwif)
 	hwif->cbl = cs5535_cable_detect(hwif->pci_dev);
 }
 
-static ide_pci_device_t cs5535_chipset __devinitdata = {
+static const struct ide_port_info cs5535_chipset __devinitdata = {
 	.name		= "CS5535",
 	.init_hwif	= init_hwif_cs5535,
 	.host_flags	= IDE_HFLAG_SINGLE | IDE_HFLAG_POST_SET_MODE |
diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c
index efc20bd97fd..3ef4fc10fe2 100644
--- a/drivers/ide/pci/cy82c693.c
+++ b/drivers/ide/pci/cy82c693.c
@@ -428,7 +428,6 @@ static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev, const c
  */
 static void __devinit init_hwif_cy82c693(ide_hwif_t *hwif)
 {
-	hwif->chipset = ide_cy82c693;
 	hwif->set_pio_mode = &cy82c693_set_pio_mode;
 
 	if (hwif->dma_base == 0)
@@ -449,11 +448,12 @@ static void __devinit init_iops_cy82c693(ide_hwif_t *hwif)
 	}
 }
 
-static ide_pci_device_t cy82c693_chipset __devinitdata = {
+static const struct ide_port_info cy82c693_chipset __devinitdata = {
 	.name		= "CY82C693",
 	.init_chipset	= init_chipset_cy82c693,
 	.init_iops	= init_iops_cy82c693,
 	.init_hwif	= init_hwif_cy82c693,
+	.chipset	= ide_cy82c693,
 	.host_flags	= IDE_HFLAG_SINGLE | IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 			  IDE_HFLAG_BOOTABLE,
 	.pio_mask	= ATA_PIO4,
diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c
index 46f4a888c03..83829081640 100644
--- a/drivers/ide/pci/delkin_cb.c
+++ b/drivers/ide/pci/delkin_cb.c
@@ -80,7 +80,7 @@ delkin_cb_probe (struct pci_dev *dev, const struct pci_device_id *id)
 	hw.irq = dev->irq;
 	hw.chipset = ide_pci;		/* this enables IRQ sharing */
 
-	rc = ide_register_hw_with_fixup(&hw, 0, &hwif, ide_undecoded_slave);
+	rc = ide_register_hw(&hw, &ide_undecoded_slave, 0, &hwif);
 	if (rc < 0) {
 		printk(KERN_ERR "delkin_cb: ide_register_hw failed (%d)\n", rc);
 		pci_disable_device(dev);
diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c
index 51165832e7f..f44d70852c3 100644
--- a/drivers/ide/pci/generic.c
+++ b/drivers/ide/pci/generic.c
@@ -54,37 +54,24 @@ __setup("all-generic-ide", ide_generic_all_on);
 module_param_named(all_generic_ide, ide_generic_all, bool, 0444);
 MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers.");
 
-static void __devinit init_hwif_generic (ide_hwif_t *hwif)
-{
-	switch(hwif->pci_dev->device) {
-		case PCI_DEVICE_ID_UMC_UM8673F:
-		case PCI_DEVICE_ID_UMC_UM8886A:
-		case PCI_DEVICE_ID_UMC_UM8886BF:
-			hwif->irq = hwif->channel ? 15 : 14;
-			break;
-		default:
-			break;
-	}
-}
+#define IDE_HFLAGS_UMC (IDE_HFLAG_NO_DMA | IDE_HFLAG_FORCE_LEGACY_IRQS)
 
-#define DECLARE_GENERIC_PCI_DEV(name_str, dma_setting) \
+#define DECLARE_GENERIC_PCI_DEV(name_str, extra_flags) \
 	{ \
 		.name		= name_str, \
-		.init_hwif	= init_hwif_generic, \
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA | \
-				  dma_setting | \
+				  extra_flags | \
 				  IDE_HFLAG_BOOTABLE, \
 		.swdma_mask	= ATA_SWDMA2, \
 		.mwdma_mask	= ATA_MWDMA2, \
 		.udma_mask	= ATA_UDMA6, \
 	}
 
-static ide_pci_device_t generic_chipsets[] __devinitdata = {
+static const struct ide_port_info generic_chipsets[] __devinitdata = {
 	/*  0 */ DECLARE_GENERIC_PCI_DEV("Unknown",	0),
 
 	{	/* 1 */
 		.name		= "NS87410",
-		.init_hwif	= init_hwif_generic,
 		.enablebits	= {{0x43,0x08,0x08}, {0x47,0x08,0x08}},
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 				  IDE_HFLAG_BOOTABLE,
@@ -95,16 +82,15 @@ static ide_pci_device_t generic_chipsets[] __devinitdata = {
 
 	/*  2 */ DECLARE_GENERIC_PCI_DEV("SAMURAI",	0),
 	/*  3 */ DECLARE_GENERIC_PCI_DEV("HT6565",	0),
-	/*  4 */ DECLARE_GENERIC_PCI_DEV("UM8673F",	IDE_HFLAG_NO_DMA),
-	/*  5 */ DECLARE_GENERIC_PCI_DEV("UM8886A",	IDE_HFLAG_NO_DMA),
-	/*  6 */ DECLARE_GENERIC_PCI_DEV("UM8886BF",	IDE_HFLAG_NO_DMA),
+	/*  4 */ DECLARE_GENERIC_PCI_DEV("UM8673F",	IDE_HFLAGS_UMC),
+	/*  5 */ DECLARE_GENERIC_PCI_DEV("UM8886A",	IDE_HFLAGS_UMC),
+	/*  6 */ DECLARE_GENERIC_PCI_DEV("UM8886BF",	IDE_HFLAGS_UMC),
 	/*  7 */ DECLARE_GENERIC_PCI_DEV("HINT_IDE",	0),
 	/*  8 */ DECLARE_GENERIC_PCI_DEV("VIA_IDE",	IDE_HFLAG_NO_AUTODMA),
 	/*  9 */ DECLARE_GENERIC_PCI_DEV("OPTI621V",	IDE_HFLAG_NO_AUTODMA),
 
 	{	/* 10 */
 		.name		= "VIA8237SATA",
-		.init_hwif	= init_hwif_generic,
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 				  IDE_HFLAG_OFF_BOARD,
 		.swdma_mask	= ATA_SWDMA2,
@@ -118,7 +104,6 @@ static ide_pci_device_t generic_chipsets[] __devinitdata = {
 
 	{	/* 14 */
 		.name		= "Revolution",
-		.init_hwif	= init_hwif_generic,
 		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA |
 				  IDE_HFLAG_OFF_BOARD,
 		.swdma_mask	= ATA_SWDMA2,
@@ -138,7 +123,7 @@ static ide_pci_device_t generic_chipsets[] __devinitdata = {
  
 static int __devinit generic_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d = &generic_chipsets[id->driver_data];
+	const struct ide_port_info *d = &generic_chipsets[id->driver_data];
 	int ret = -ENODEV;
 
 	/* Don't use the generic entry unless instructed to do so */
diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c
index 67af1a7dde3..ae6307fae4f 100644
--- a/drivers/ide/pci/hpt34x.c
+++ b/drivers/ide/pci/hpt34x.c
@@ -129,7 +129,7 @@ static void __devinit init_hwif_hpt34x(ide_hwif_t *hwif)
 	hwif->set_dma_mode = &hpt34x_set_mode;
 }
 
-static ide_pci_device_t hpt34x_chipsets[] __devinitdata = {
+static const struct ide_port_info hpt34x_chipsets[] __devinitdata = {
 	{ /* 0 */
 		.name		= "HPT343",
 		.init_chipset	= init_chipset_hpt34x,
@@ -158,7 +158,7 @@ static ide_pci_device_t hpt34x_chipsets[] __devinitdata = {
 
 static int __devinit hpt34x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d;
+	const struct ide_port_info *d;
 	u16 pcicmd = 0;
 
 	pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c
index 18f5b7ddaee..612b795241b 100644
--- a/drivers/ide/pci/hpt366.c
+++ b/drivers/ide/pci/hpt366.c
@@ -1425,7 +1425,7 @@ static int __devinit hpt36x_init(struct pci_dev *dev, struct pci_dev *dev2)
 	return 0;
 }
 
-static ide_pci_device_t hpt366_chipsets[] __devinitdata = {
+static const struct ide_port_info hpt366_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "HPT36x",
 		.init_chipset	= init_chipset_hpt366,
@@ -1510,7 +1510,7 @@ static int __devinit hpt366_init_one(struct pci_dev *dev, const struct pci_devic
 {
 	struct hpt_info *info = NULL;
 	struct pci_dev *dev2 = NULL;
-	ide_pci_device_t d;
+	struct ide_port_info d;
 	u8 idx = id->driver_data;
 	u8 rev = dev->revision;
 
diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c
index dfbe605120c..90b52ed37bf 100644
--- a/drivers/ide/pci/it8213.c
+++ b/drivers/ide/pci/it8213.c
@@ -193,7 +193,7 @@ static void __devinit init_hwif_it8213(ide_hwif_t *hwif)
 		.udma_mask	= ATA_UDMA6,		\
 	}
 
-static ide_pci_device_t it8213_chipsets[] __devinitdata = {
+static const struct ide_port_info it8213_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_ITE_DEV("IT8213"),
 };
 
diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c
index ec45b724720..1a7ddd12e65 100644
--- a/drivers/ide/pci/it821x.c
+++ b/drivers/ide/pci/it821x.c
@@ -638,7 +638,7 @@ static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev, const cha
 		.pio_mask	= ATA_PIO4,		\
 	}
 
-static ide_pci_device_t it821x_chipsets[] __devinitdata = {
+static const struct ide_port_info it821x_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_ITE_DEV("IT8212"),
 };
 
diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c
index 2eeff670d9a..bdf64d99770 100644
--- a/drivers/ide/pci/jmicron.c
+++ b/drivers/ide/pci/jmicron.c
@@ -118,7 +118,7 @@ static void __devinit init_hwif_jmicron(ide_hwif_t *hwif)
 		hwif->cbl = ata66_jmicron(hwif);
 }
 
-static ide_pci_device_t jmicron_chipset __devinitdata = {
+static const struct ide_port_info jmicron_chipset __devinitdata = {
 	.name		= "JMB",
 	.init_hwif	= init_hwif_jmicron,
 	.host_flags	= IDE_HFLAG_BOOTABLE,
diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c
index d21b5892382..d4df4642dbb 100644
--- a/drivers/ide/pci/ns87415.c
+++ b/drivers/ide/pci/ns87415.c
@@ -260,7 +260,7 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif)
 	hwif->ide_dma_end = &ns87415_ide_dma_end;
 }
 
-static ide_pci_device_t ns87415_chipset __devinitdata = {
+static const struct ide_port_info ns87415_chipset __devinitdata = {
 	.name		= "NS87415",
 #ifdef CONFIG_SUPERIO
 	.init_iops	= init_iops_ns87415,
diff --git a/drivers/ide/pci/opti621.c b/drivers/ide/pci/opti621.c
index 3573ffeaaa3..8953d9c3926 100644
--- a/drivers/ide/pci/opti621.c
+++ b/drivers/ide/pci/opti621.c
@@ -1,5 +1,5 @@
 /*
- *  linux/drivers/ide/pci/opti621.c		Version 0.8	Aug 27, 2007
+ *  linux/drivers/ide/pci/opti621.c		Version 0.9	Sep 24, 2007
  *
  *  Copyright (C) 1996-1998  Linus Torvalds & authors (see below)
  */
@@ -133,6 +133,8 @@ static int reg_base;
 #define PIO_NOT_EXIST 254
 #define PIO_DONT_KNOW 255
 
+static DEFINE_SPINLOCK(opti621_lock);
+
 /* there are stored pio numbers from other calls of opti621_set_pio_mode */
 static void compute_pios(ide_drive_t *drive, const u8 pio)
 /* Store values into drive->drive_data
@@ -278,7 +280,7 @@ static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio)
 		second.recovery_time, drdy);
 #endif
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&opti621_lock, flags);
 
      	reg_base = hwif->io_ports[IDE_DATA_OFFSET];
 
@@ -317,7 +319,7 @@ static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	/*  and read prefetch for both drives */
 	write_reg(misc, MISC_REG);
 
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&opti621_lock, flags);
 }
 
 /*
@@ -331,7 +333,7 @@ static void __devinit init_hwif_opti621 (ide_hwif_t *hwif)
 	hwif->set_pio_mode = &opti621_set_pio_mode;
 }
 
-static ide_pci_device_t opti621_chipsets[] __devinitdata = {
+static const struct ide_port_info opti621_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "OPTI621",
 		.init_hwif	= init_hwif_opti621,
diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c
index d1e7823454f..4234efeba60 100644
--- a/drivers/ide/pci/pdc202xx_new.c
+++ b/drivers/ide/pci/pdc202xx_new.c
@@ -513,7 +513,7 @@ static struct pci_dev * __devinit pdc20270_get_dev2(struct pci_dev *dev)
 		.udma_mask	= udma, \
 	}
 
-static ide_pci_device_t pdcnew_chipsets[] __devinitdata = {
+static const struct ide_port_info pdcnew_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_PDCNEW_DEV("PDC20268", ATA_UDMA5),
 	/* 1 */ DECLARE_PDCNEW_DEV("PDC20269", ATA_UDMA6),
 	/* 2 */ DECLARE_PDCNEW_DEV("PDC20270", ATA_UDMA5),
@@ -534,7 +534,7 @@ static ide_pci_device_t pdcnew_chipsets[] __devinitdata = {
  
 static int __devinit pdc202new_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d;
+	const struct ide_port_info *d;
 	struct pci_dev *bridge = dev->bus->self;
 	u8 idx = id->driver_data;
 
diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c
index 29306121dc4..e09742e2ba5 100644
--- a/drivers/ide/pci/pdc202xx_old.c
+++ b/drivers/ide/pci/pdc202xx_old.c
@@ -302,13 +302,6 @@ static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev,
 
 static void __devinit init_hwif_pdc202xx(ide_hwif_t *hwif)
 {
-	struct pci_dev *dev = hwif->pci_dev;
-
-	/* PDC20265 has problems with large LBA48 requests */
-	if ((dev->device == PCI_DEVICE_ID_PROMISE_20267) ||
-	    (dev->device == PCI_DEVICE_ID_PROMISE_20265))
-		hwif->rqsize = 256;
-
 	hwif->set_pio_mode = &pdc202xx_set_pio_mode;
 	hwif->set_dma_mode = &pdc202xx_set_mode;
 
@@ -382,7 +375,7 @@ static void __devinit pdc202ata4_fixup_irq(struct pci_dev *dev,
 	}
 }
 
-#define DECLARE_PDC2026X_DEV(name_str, udma) \
+#define DECLARE_PDC2026X_DEV(name_str, udma, extra_flags) \
 	{ \
 		.name		= name_str, \
 		.init_chipset	= init_chipset_pdc202xx, \
@@ -390,13 +383,14 @@ static void __devinit pdc202ata4_fixup_irq(struct pci_dev *dev,
 		.init_dma	= init_dma_pdc202xx, \
 		.extra		= 48, \
 		.host_flags	= IDE_HFLAG_ERROR_STOPS_FIFO | \
+				  extra_flags | \
 				  IDE_HFLAG_OFF_BOARD, \
 		.pio_mask	= ATA_PIO4, \
 		.mwdma_mask	= ATA_MWDMA2, \
 		.udma_mask	= udma, \
 	}
 
-static ide_pci_device_t pdc202xx_chipsets[] __devinitdata = {
+static const struct ide_port_info pdc202xx_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "PDC20246",
 		.init_chipset	= init_chipset_pdc202xx,
@@ -410,10 +404,10 @@ static ide_pci_device_t pdc202xx_chipsets[] __devinitdata = {
 		.udma_mask	= ATA_UDMA2,
 	},
 
-	/* 1 */ DECLARE_PDC2026X_DEV("PDC20262", ATA_UDMA4),
-	/* 2 */ DECLARE_PDC2026X_DEV("PDC20263", ATA_UDMA4),
-	/* 3 */ DECLARE_PDC2026X_DEV("PDC20265", ATA_UDMA5),
-	/* 4 */ DECLARE_PDC2026X_DEV("PDC20267", ATA_UDMA5),
+	/* 1 */ DECLARE_PDC2026X_DEV("PDC20262", ATA_UDMA4, 0),
+	/* 2 */ DECLARE_PDC2026X_DEV("PDC20263", ATA_UDMA4, 0),
+	/* 3 */ DECLARE_PDC2026X_DEV("PDC20265", ATA_UDMA5, IDE_HFLAG_RQSIZE_256),
+	/* 4 */ DECLARE_PDC2026X_DEV("PDC20267", ATA_UDMA5, IDE_HFLAG_RQSIZE_256),
 };
 
 /**
@@ -427,7 +421,7 @@ static ide_pci_device_t pdc202xx_chipsets[] __devinitdata = {
  
 static int __devinit pdc202xx_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d;
+	const struct ide_port_info *d;
 	u8 idx = id->driver_data;
 
 	d = &pdc202xx_chipsets[idx];
diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c
index ec0c6e96a21..9329d4a810e 100644
--- a/drivers/ide/pci/piix.c
+++ b/drivers/ide/pci/piix.c
@@ -396,7 +396,7 @@ static void __devinit init_hwif_ich(ide_hwif_t *hwif)
 		.udma_mask	= udma, \
 	}
 
-static ide_pci_device_t piix_pci_info[] __devinitdata = {
+static const struct ide_port_info piix_pci_info[] __devinitdata = {
 	/*  0 */ DECLARE_PIIX_DEV("PIIXa",	0x00),	/* no udma */
 	/*  1 */ DECLARE_PIIX_DEV("PIIXb",	0x00),	/* no udma */
 
@@ -449,9 +449,7 @@ static ide_pci_device_t piix_pci_info[] __devinitdata = {
  
 static int __devinit piix_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d = &piix_pci_info[id->driver_data];
-
-	return ide_setup_pci_device(dev, d);
+	return ide_setup_pci_device(dev, &piix_pci_info[id->driver_data]);
 }
 
 /**
diff --git a/drivers/ide/pci/rz1000.c b/drivers/ide/pci/rz1000.c
index dd2583ef1ad..6b10ae260fa 100644
--- a/drivers/ide/pci/rz1000.c
+++ b/drivers/ide/pci/rz1000.c
@@ -35,13 +35,13 @@ static void __devinit init_hwif_rz1000 (ide_hwif_t *hwif)
 	u16 reg;
 	struct pci_dev *dev = hwif->pci_dev;
 
-	hwif->chipset = ide_rz1000;
 	if (!pci_read_config_word (dev, 0x40, &reg) &&
 	    !pci_write_config_word(dev, 0x40, reg & 0xdfff)) {
 		printk(KERN_INFO "%s: disabled chipset read-ahead "
 			"(buggy RZ1000/RZ1001)\n", hwif->name);
 	} else {
-		hwif->serialized = 1;
+		if (hwif->mate)
+			hwif->mate->serialized = hwif->serialized = 1;
 		hwif->drives[0].no_unmask = 1;
 		hwif->drives[1].no_unmask = 1;
 		printk(KERN_INFO "%s: serialized, disabled unmasking "
@@ -49,9 +49,10 @@ static void __devinit init_hwif_rz1000 (ide_hwif_t *hwif)
 	}
 }
 
-static ide_pci_device_t rz1000_chipset __devinitdata = {
+static const struct ide_port_info rz1000_chipset __devinitdata = {
 	.name		= "RZ100x",
 	.init_hwif	= init_hwif_rz1000,
+	.chipset	= ide_rz1000,
 	.host_flags	= IDE_HFLAG_NO_DMA | IDE_HFLAG_BOOTABLE,
 };
 
diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c
index b2423e03bf3..d2c8b5524f2 100644
--- a/drivers/ide/pci/sc1200.c
+++ b/drivers/ide/pci/sc1200.c
@@ -372,7 +372,7 @@ static void __devinit init_hwif_sc1200 (ide_hwif_t *hwif)
 	hwif->ide_dma_end   = &sc1200_ide_dma_end;
 }
 
-static ide_pci_device_t sc1200_chipset __devinitdata = {
+static const struct ide_port_info sc1200_chipset __devinitdata = {
 	.name		= "SC1200",
 	.init_hwif	= init_hwif_sc1200,
 	.host_flags	= IDE_HFLAG_SERIALIZE |
diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c
index ae9b50331d2..ebb7132b9b8 100644
--- a/drivers/ide/pci/scc_pata.c
+++ b/drivers/ide/pci/scc_pata.c
@@ -538,12 +538,13 @@ static int setup_mmio_scc (struct pci_dev *dev, const char *name)
 /**
  *	init_setup_scc	-	set up an SCC PATA Controller
  *	@dev: PCI device
- *	@d: IDE PCI device
+ *	@d: IDE port info
  *
  *	Perform the initial set up for this device.
  */
 
-static int __devinit init_setup_scc(struct pci_dev *dev, ide_pci_device_t *d)
+static int __devinit init_setup_scc(struct pci_dev *dev,
+				    const struct ide_port_info *d)
 {
 	unsigned long ctl_base;
 	unsigned long dma_base;
@@ -702,7 +703,7 @@ static void __devinit init_hwif_scc(ide_hwif_t *hwif)
       .pio_mask		= ATA_PIO4,			\
   }
 
-static ide_pci_device_t scc_chipsets[] __devinitdata = {
+static const struct ide_port_info scc_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_SCC_DEV("sccIDE"),
 };
 
@@ -717,9 +718,7 @@ static ide_pci_device_t scc_chipsets[] __devinitdata = {
 
 static int __devinit scc_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t *d = &scc_chipsets[id->driver_data];
-
-	return init_setup_scc(dev, d);
+	return init_setup_scc(dev, &scc_chipsets[id->driver_data]);
 }
 
 /**
diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c
index a3d880e21d0..a7280311357 100644
--- a/drivers/ide/pci/serverworks.c
+++ b/drivers/ide/pci/serverworks.c
@@ -158,13 +158,6 @@ static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed)
 
 	u8 ultra_enable	 = 0, ultra_timing = 0, dma_timing = 0;
 
-	/* If we are about to put a disk into UDMA mode we screwed up.
-	   Our code assumes we never _ever_ do this on an OSB4 */
-	   
-	if(dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4 &&
-		drive->media == ide_disk && speed >= XFER_UDMA_0)
-			BUG();
-
 	pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing);
 	pci_read_config_byte(dev, 0x54, &ultra_enable);
 
@@ -373,7 +366,7 @@ static void __devinit init_hwif_svwks (ide_hwif_t *hwif)
 	}
 }
 
-static ide_pci_device_t serverworks_chipsets[] __devinitdata = {
+static const struct ide_port_info serverworks_chipsets[] __devinitdata = {
 	{	/* 0 */
 		.name		= "SvrWks OSB4",
 		.init_chipset	= init_chipset_svwks,
@@ -430,7 +423,7 @@ static ide_pci_device_t serverworks_chipsets[] __devinitdata = {
  
 static int __devinit svwks_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
-	ide_pci_device_t d;
+	struct ide_port_info d;
 	u8 idx = id->driver_data;
 
 	d = serverworks_chipsets[idx];
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index 5af74ea1d46..de820aa58cd 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -614,6 +614,7 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	void __iomem *virt_base;
 	ide_hwif_t *hwif;
 	int h;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	/*
 	 * Find an empty HWIF; if none available, return -ENOMEM.
@@ -654,10 +655,12 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 	}
 
 	if (hwif->io_ports[IDE_DATA_OFFSET] != cmd_base) {
+		hw_regs_t hw;
+
 		/* Initialize the IO registers */
-		sgiioc4_init_hwif_ports(&hwif->hw, cmd_base, ctl, irqport);
-		memcpy(hwif->io_ports, hwif->hw.io_ports,
-		       sizeof (hwif->io_ports));
+		memset(&hw, 0, sizeof(hw));
+		sgiioc4_init_hwif_ports(&hw, cmd_base, ctl, irqport);
+		memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 		hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
 	}
 
@@ -679,11 +682,10 @@ sgiioc4_ide_setup_pci_device(struct pci_dev *dev)
 
 	ide_init_sgiioc4(hwif);
 
-	if (probe_hwif_init(hwif))
-		return -EIO;
+	idx[0] = hwif->index;
 
-	/* Create /proc/ide entries */
-	ide_proc_register_port(hwif);
+	if (ide_device_add(idx))
+		return -EIO;
 
 	return 0;
 }
diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c
index 689786df1ed..dc915cb22be 100644
--- a/drivers/ide/pci/siimage.c
+++ b/drivers/ide/pci/siimage.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/ide/pci/siimage.c		Version 1.17	Oct 18 2007
+ * linux/drivers/ide/pci/siimage.c		Version 1.18	Oct 18 2007
  *
  * Copyright (C) 2001-2002	Andre Hedrick <andre@linux-ide.org>
  * Copyright (C) 2003		Red Hat <alan@redhat.com>
@@ -57,8 +57,8 @@
  
 static int pdev_is_sata(struct pci_dev *pdev)
 {
-	switch(pdev->device)
-	{
+#ifdef CONFIG_BLK_DEV_IDE_SATA
+	switch(pdev->device) {
 		case PCI_DEVICE_ID_SII_3112:
 		case PCI_DEVICE_ID_SII_1210SA:
 			return 1;
@@ -66,9 +66,10 @@ static int pdev_is_sata(struct pci_dev *pdev)
 			return 0;
 	}
 	BUG();
+#endif
 	return 0;
 }
- 
+
 /**
  *	is_sata			-	check if hwif is SATA
  *	@hwif:	interface to check
@@ -136,7 +137,7 @@ static inline unsigned long siimage_seldev(ide_drive_t *drive, int r)
  *	SI3112 SATA controller life is a bit simpler.
  */
 
-static u8 sil_udma_filter(ide_drive_t *drive)
+static u8 sil_pata_udma_filter(ide_drive_t *drive)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	unsigned long base = (unsigned long) hwif->hwif_data;
@@ -147,23 +148,23 @@ static u8 sil_udma_filter(ide_drive_t *drive)
 	else
 		pci_read_config_byte(hwif->pci_dev, 0x8A, &scsc);
 
-	if (is_sata(hwif)) {
-		mask = strstr(drive->id->model, "Maxtor") ? 0x3f : 0x7f;
-		goto out;
-	}
-
 	if ((scsc & 0x30) == 0x10)	/* 133 */
-		mask = 0x7f;
+		mask = ATA_UDMA6;
 	else if ((scsc & 0x30) == 0x20)	/* 2xPCI */
-		mask = 0x7f;
+		mask = ATA_UDMA6;
 	else if ((scsc & 0x30) == 0x00)	/* 100 */
-		mask = 0x3f;
+		mask = ATA_UDMA5;
 	else 	/* Disabled ? */
 		BUG();
-out:
+
 	return mask;
 }
 
+static u8 sil_sata_udma_filter(ide_drive_t *drive)
+{
+	return strstr(drive->id->model, "Maxtor") ? ATA_UDMA5 : ATA_UDMA6;
+}
+
 /**
  *	sil_set_pio_mode	-	set host controller for PIO mode
  *	@drive: drive
@@ -340,10 +341,11 @@ static int siimage_io_ide_dma_test_irq (ide_drive_t *drive)
 static int siimage_mmio_ide_dma_test_irq (ide_drive_t *drive)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
-	unsigned long base	= (unsigned long)hwif->hwif_data;
 	unsigned long addr	= siimage_selreg(hwif, 0x1);
 
 	if (SATA_ERROR_REG) {
+		unsigned long base = (unsigned long)hwif->hwif_data;
+
 		u32 ext_stat = readl((void __iomem *)(base + 0x10));
 		u8 watchdog = 0;
 		if (ext_stat & ((hwif->channel) ? 0x40 : 0x10)) {
@@ -376,7 +378,7 @@ static int siimage_mmio_ide_dma_test_irq (ide_drive_t *drive)
 }
 
 /**
- *	siimage_busproc		-	bus isolation ioctl
+ *	sil_sata_busproc	-	bus isolation IOCTL
  *	@drive: drive to isolate/restore
  *	@state: bus state to set
  *
@@ -384,8 +386,8 @@ static int siimage_mmio_ide_dma_test_irq (ide_drive_t *drive)
  *	SATA controller the work required is quite limited, we 
  *	just have to clean up the statistics
  */
- 
-static int siimage_busproc (ide_drive_t * drive, int state)
+
+static int sil_sata_busproc(ide_drive_t * drive, int state)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
 	u32 stat_config		= 0;
@@ -417,14 +419,14 @@ static int siimage_busproc (ide_drive_t * drive, int state)
 }
 
 /**
- *	siimage_reset_poll	-	wait for sata reset
+ *	sil_sata_reset_poll	-	wait for SATA reset
  *	@drive: drive we are resetting
  *
  *	Poll the SATA phy and see whether it has come back from the dead
  *	yet.
  */
- 
-static int siimage_reset_poll (ide_drive_t *drive)
+
+static int sil_sata_reset_poll(ide_drive_t *drive)
 {
 	if (SATA_STATUS_REG) {
 		ide_hwif_t *hwif	= HWIF(drive);
@@ -436,27 +438,22 @@ static int siimage_reset_poll (ide_drive_t *drive)
 			HWGROUP(drive)->polling = 0;
 			return ide_started;
 		}
-		return 0;
-	} else {
-		return 0;
 	}
+
+	return 0;
 }
 
 /**
- *	siimage_pre_reset	-	reset hook
+ *	sil_sata_pre_reset	-	reset hook
  *	@drive: IDE device being reset
  *
  *	For the SATA devices we need to handle recalibration/geometry
  *	differently
  */
- 
-static void siimage_pre_reset (ide_drive_t *drive)
-{
-	if (drive->media != ide_disk)
-		return;
 
-	if (is_sata(HWIF(drive)))
-	{
+static void sil_sata_pre_reset(ide_drive_t *drive)
+{
+	if (drive->media == ide_disk) {
 		drive->special.b.set_geometry = 0;
 		drive->special.b.recalibrate = 0;
 	}
@@ -502,7 +499,6 @@ static void siimage_reset (ide_drive_t *drive)
 			drive->failures++;
 		}
 	}
-
 }
 
 /**
@@ -758,16 +754,11 @@ static void __devinit init_mmio_iops_siimage(ide_hwif_t *hwif)
 		hwif->sata_misc[SATA_IEN_OFFSET]	= base + 0x148;
 	}
 
-	hw.irq				= hwif->pci_dev->irq;
+	memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 
-	memcpy(&hwif->hw, &hw, sizeof(hw));
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports));
+	hwif->irq = dev->irq;
 
-	hwif->irq			= hw.irq;
-
-       	base = (unsigned long) addr;
-
-	hwif->dma_base			= base + (ch ? 0x08 : 0x00);
+	hwif->dma_base = (unsigned long)addr + (ch ? 0x08 : 0x00);
 
 	hwif->mmio = 1;
 }
@@ -864,28 +855,31 @@ static u8 __devinit ata66_siimage(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_siimage(ide_hwif_t *hwif)
 {
+	u8 sata = is_sata(hwif);
+
 	hwif->resetproc = &siimage_reset;
 	hwif->set_pio_mode = &sil_set_pio_mode;
 	hwif->set_dma_mode = &sil_set_dma_mode;
-	hwif->reset_poll = &siimage_reset_poll;
-	hwif->pre_reset = &siimage_pre_reset;
-	hwif->udma_filter = &sil_udma_filter;
 
-	if(is_sata(hwif)) {
+	if (sata) {
 		static int first = 1;
 
-		hwif->busproc   = &siimage_busproc;
+		hwif->busproc = &sil_sata_busproc;
+		hwif->reset_poll = &sil_sata_reset_poll;
+		hwif->pre_reset = &sil_sata_pre_reset;
+		hwif->udma_filter = &sil_sata_udma_filter;
 
 		if (first) {
 			printk(KERN_INFO "siimage: For full SATA support you should use the libata sata_sil module.\n");
 			first = 0;
 		}
-	}
+	} else
+		hwif->udma_filter = &sil_pata_udma_filter;
 
 	if (hwif->dma_base == 0)
 		return;
 
-	if (is_sata(hwif))
+	if (sata)
 		hwif->host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
 
 	if (hwif->cbl != ATA_CBL_PATA40_SHORT)
@@ -911,7 +905,7 @@ static void __devinit init_hwif_siimage(ide_hwif_t *hwif)
 		.udma_mask	= ATA_UDMA6,		\
 	}
 
-static ide_pci_device_t siimage_chipsets[] __devinitdata = {
+static const struct ide_port_info siimage_chipsets[] __devinitdata = {
 	/* 0 */ DECLARE_SII_DEV("SiI680"),
 	/* 1 */ DECLARE_SII_DEV("SiI3112 Serial ATA"),
 	/* 2 */ DECLARE_SII_DEV("Adaptec AAR-1210SA")
diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c
index c1d280b0639..6b7bb53acef 100644
--- a/drivers/ide/pci/sis5513.c
+++ b/drivers/ide/pci/sis5513.c
@@ -264,7 +264,7 @@ static void sis_ata133_program_timings(ide_drive_t *drive, const u8 mode)
 	if (mode >= XFER_MW_DMA_0) {
 		t1 &= ~0x04;	/* disable UDMA */
 		idx = mode - XFER_MW_DMA_0 + 5;
-	}
+	} else
 		idx = mode - XFER_PIO_0;
 	t1 |= ini_time_value[clk][idx] << 12;
 	t1 |= act_time_value[clk][idx] << 16;
@@ -579,7 +579,7 @@ static void __devinit init_hwif_sis5513 (ide_hwif_t *hwif)
 		hwif->cbl = ata66_sis5513(hwif);
 }
 
-static ide_pci_device_t sis5513_chipset __devinitdata = {
+static const struct ide_port_info sis5513_chipset __devinitdata = {
 	.name		= "SIS5513",
 	.init_chipset	= init_chipset_sis5513,
 	.init_hwif	= init_hwif_sis5513,
diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c
index 0dce459b126..147d783f752 100644
--- a/drivers/ide/pci/sl82c105.c
+++ b/drivers/ide/pci/sl82c105.c
@@ -361,13 +361,6 @@ static void __devinit init_hwif_sl82c105(ide_hwif_t *hwif)
 	hwif->selectproc	= &sl82c105_selectproc;
 	hwif->resetproc 	= &sl82c105_resetproc;
 
-	/*
-	 * We support 32-bit I/O on this interface, and
-	 * it doesn't have problems with interrupts.
-	 */
-	hwif->drives[0].io_32bit = hwif->drives[1].io_32bit = 1;
-	hwif->drives[0].unmask   = hwif->drives[1].unmask   = 1;
-
 	if (!hwif->dma_base)
 		return;
 
@@ -394,12 +387,15 @@ static void __devinit init_hwif_sl82c105(ide_hwif_t *hwif)
 		hwif->serialized = hwif->mate->serialized = 1;
 }
 
-static ide_pci_device_t sl82c105_chipset __devinitdata = {
+static const struct ide_port_info sl82c105_chipset __devinitdata = {
 	.name		= "W82C105",
 	.init_chipset	= init_chipset_sl82c105,
 	.init_hwif	= init_hwif_sl82c105,
 	.enablebits	= {{0x40,0x01,0x01}, {0x40,0x10,0x10}},
-	.host_flags	= IDE_HFLAG_NO_AUTODMA | IDE_HFLAG_BOOTABLE,
+	.host_flags	= IDE_HFLAG_IO_32BIT |
+			  IDE_HFLAG_UNMASK_IRQS |
+			  IDE_HFLAG_NO_AUTODMA |
+			  IDE_HFLAG_BOOTABLE,
 	.pio_mask	= ATA_PIO5,
 };
 
diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c
index 4f22dffdf8e..eb4445b229e 100644
--- a/drivers/ide/pci/slc90e66.c
+++ b/drivers/ide/pci/slc90e66.c
@@ -1,5 +1,5 @@
 /*
- *  linux/drivers/ide/pci/slc90e66.c	Version 0.18	Aug 9, 2007
+ *  linux/drivers/ide/pci/slc90e66.c	Version 0.19	Sep 24, 2007
  *
  *  Copyright (C) 2000-2002 Andre Hedrick <andre@linux-ide.org>
  *  Copyright (C) 2006-2007 MontaVista Software, Inc. <source@mvista.com>
@@ -21,6 +21,8 @@
 
 #include <asm/io.h>
 
+static DEFINE_SPINLOCK(slc90e66_lock);
+
 static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 {
 	ide_hwif_t *hwif	= HWIF(drive);
@@ -40,7 +42,7 @@ static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 					{ 2, 1 },
 					{ 2, 3 }, };
 
-	spin_lock_irqsave(&ide_lock, flags);
+	spin_lock_irqsave(&slc90e66_lock, flags);
 	pci_read_config_word(dev, master_port, &master_data);
 
 	if (pio > 1)
@@ -71,7 +73,7 @@ static void slc90e66_set_pio_mode(ide_drive_t *drive, const u8 pio)
 	pci_write_config_word(dev, master_port, master_data);
 	if (is_slave)
 		pci_write_config_byte(dev, slave_port, slave_data);
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irqrestore(&slc90e66_lock, flags);
 }
 
 static void slc90e66_set_dma_mode(ide_drive_t *drive, const u8 speed)
@@ -146,7 +148,7 @@ static void __devinit init_hwif_slc90e66 (ide_hwif_t *hwif)
 		hwif->cbl = (reg47 & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
 }
 
-static ide_pci_device_t slc90e66_chipset __devinitdata = {
+static const struct ide_port_info slc90e66_chipset __devinitdata = {
 	.name		= "SLC90E66",
 	.init_hwif	= init_hwif_slc90e66,
 	.enablebits	= {{0x41,0x80,0x80}, {0x43,0x80,0x80}},
diff --git a/drivers/ide/pci/tc86c001.c b/drivers/ide/pci/tc86c001.c
index 631506e9b5d..a66ebd14664 100644
--- a/drivers/ide/pci/tc86c001.c
+++ b/drivers/ide/pci/tc86c001.c
@@ -218,7 +218,7 @@ static unsigned int __devinit init_chipset_tc86c001(struct pci_dev *dev,
 	return err;
 }
 
-static ide_pci_device_t tc86c001_chipset __devinitdata = {
+static const struct ide_port_info tc86c001_chipset __devinitdata = {
 	.name		= "TC86C001",
 	.init_chipset	= init_chipset_tc86c001,
 	.init_hwif	= init_hwif_tc86c001,
diff --git a/drivers/ide/pci/triflex.c b/drivers/ide/pci/triflex.c
index 30b52f62699..a227c41d23a 100644
--- a/drivers/ide/pci/triflex.c
+++ b/drivers/ide/pci/triflex.c
@@ -102,7 +102,7 @@ static void __devinit init_hwif_triflex(ide_hwif_t *hwif)
 	hwif->set_dma_mode = &triflex_set_mode;
 }
 
-static ide_pci_device_t triflex_device __devinitdata = {
+static const struct ide_port_info triflex_device __devinitdata = {
 	.name		= "TRIFLEX",
 	.init_hwif	= init_hwif_triflex,
 	.enablebits	= {{0x80, 0x01, 0x01}, {0x80, 0x02, 0x02}},
diff --git a/drivers/ide/pci/trm290.c b/drivers/ide/pci/trm290.c
index 140d486f623..5011ba22e36 100644
--- a/drivers/ide/pci/trm290.c
+++ b/drivers/ide/pci/trm290.c
@@ -250,7 +250,6 @@ static void __devinit init_hwif_trm290(ide_hwif_t *hwif)
 	u8 reg = 0;
 	struct pci_dev *dev = hwif->pci_dev;
 
-	hwif->chipset = ide_trm290;
 	cfgbase = pci_resource_start(dev, 4);
 	if ((dev->class & 5) && cfgbase) {
 		hwif->config_data = cfgbase;
@@ -320,9 +319,10 @@ static void __devinit init_hwif_trm290(ide_hwif_t *hwif)
 #endif
 }
 
-static ide_pci_device_t trm290_chipset __devinitdata = {
+static const struct ide_port_info trm290_chipset __devinitdata = {
 	.name		= "TRM290",
 	.init_hwif	= init_hwif_trm290,
+	.chipset	= ide_trm290,
 	.host_flags	= IDE_HFLAG_NO_ATAPI_DMA |
 #if 0 /* play it safe for now */
 			  IDE_HFLAG_TRUST_BIOS_FOR_DMA |
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index c8022a92a0e..a0d3c16b68e 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -1,6 +1,6 @@
 /*
  *
- * Version 3.49
+ * Version 3.50
  *
  * VIA IDE driver for Linux. Supported southbridges:
  *
@@ -422,65 +422,40 @@ static u8 __devinit via82cxxx_cable_detect(ide_hwif_t *hwif)
 
 static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif)
 {
-	struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
-	int i;
-
 	hwif->set_pio_mode = &via_set_pio_mode;
 	hwif->set_dma_mode = &via_set_drive;
 
-#ifdef CONFIG_PPC_CHRP
-	if(machine_is(chrp) && _chrp_type == _CHRP_Pegasos) {
-		hwif->irq = hwif->channel ? 15 : 14;
-	}
-#endif
-
-	for (i = 0; i < 2; i++) {
-		hwif->drives[i].io_32bit = 1;
-		hwif->drives[i].unmask = (vdev->via_config->flags & VIA_NO_UNMASK) ? 0 : 1;
-	}
-
 	if (!hwif->dma_base)
 		return;
 
-	hwif->ultra_mask = vdev->via_config->udma_mask;
-
 	if (hwif->cbl != ATA_CBL_PATA40_SHORT)
 		hwif->cbl = via82cxxx_cable_detect(hwif);
 }
 
-static ide_pci_device_t via82cxxx_chipsets[] __devinitdata = {
-	{	/* 0 */
-		.name		= "VP_IDE",
-		.init_chipset	= init_chipset_via82cxxx,
-		.init_hwif	= init_hwif_via82cxxx,
-		.enablebits	= {{0x40,0x02,0x02}, {0x40,0x01,0x01}},
-		.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |
-				  IDE_HFLAG_PIO_NO_DOWNGRADE |
-				  IDE_HFLAG_POST_SET_MODE |
-				  IDE_HFLAG_NO_AUTODMA |
-				  IDE_HFLAG_BOOTABLE,
-		.pio_mask	= ATA_PIO5,
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-	},{	/* 1 */
-		.name		= "VP_IDE",
-		.init_chipset	= init_chipset_via82cxxx,
-		.init_hwif	= init_hwif_via82cxxx,
-		.enablebits	= {{0x00,0x00,0x00}, {0x00,0x00,0x00}},
-		.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |
-				  IDE_HFLAG_PIO_NO_DOWNGRADE |
-				  IDE_HFLAG_POST_SET_MODE |
-				  IDE_HFLAG_BOOTABLE,
-		.pio_mask	= ATA_PIO5,
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-	}
+static const struct ide_port_info via82cxxx_chipset __devinitdata = {
+	.name		= "VP_IDE",
+	.init_chipset	= init_chipset_via82cxxx,
+	.init_hwif	= init_hwif_via82cxxx,
+	.enablebits	= { { 0x40, 0x02, 0x02 }, { 0x40, 0x01, 0x01 } },
+	.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |
+			  IDE_HFLAG_PIO_NO_DOWNGRADE |
+			  IDE_HFLAG_POST_SET_MODE |
+			  IDE_HFLAG_IO_32BIT |
+			  IDE_HFLAG_BOOTABLE,
+	.pio_mask	= ATA_PIO5,
+	.swdma_mask	= ATA_SWDMA2,
+	.mwdma_mask	= ATA_MWDMA2,
 };
 
 static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	struct pci_dev *isa = NULL;
 	struct via_isa_bridge *via_config;
+	u8 idx = id->driver_data;
+	struct ide_port_info d;
+
+	d = via82cxxx_chipset;
+
 	/*
 	 * Find the ISA bridge and check we know what it is.
 	 */
@@ -490,7 +465,23 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i
 		printk(KERN_WARNING "VP_IDE: Unknown VIA SouthBridge, disabling DMA.\n");
 		return -ENODEV;
 	}
-	return ide_setup_pci_device(dev, &via82cxxx_chipsets[id->driver_data]);
+
+	if (idx == 0)
+		d.host_flags |= IDE_HFLAG_NO_AUTODMA;
+	else
+		d.enablebits[1].reg = d.enablebits[0].reg = 0;
+
+	if ((via_config->flags & VIA_NO_UNMASK) == 0)
+		d.host_flags |= IDE_HFLAG_UNMASK_IRQS;
+
+#ifdef CONFIG_PPC_CHRP
+	if (machine_is(chrp) && _chrp_type == _CHRP_Pegasos)
+		d.host_flags |= IDE_HFLAG_FORCE_LEGACY_IRQS;
+#endif
+
+	d.udma_mask = via_config->udma_mask;
+
+	return ide_setup_pci_device(dev, &d);
 }
 
 static const struct pci_device_id via_pci_tbl[] = {
diff --git a/drivers/ide/ppc/mpc8xx.c b/drivers/ide/ppc/mpc8xx.c
index df2e92034f5..5f0da35ab5a 100644
--- a/drivers/ide/ppc/mpc8xx.c
+++ b/drivers/ide/ppc/mpc8xx.c
@@ -316,8 +316,8 @@ m8xx_ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
 
 	ide_hwifs[data_port].pio_mask = ATA_PIO4;
 	ide_hwifs[data_port].set_pio_mode = m8xx_ide_set_pio_mode;
+	ide_hwifs[data_port].ack_intr = (ide_ack_intr_t *)ide_interrupt_ack;
 
-	hw->ack_intr = (ide_ack_intr_t *) ide_interrupt_ack;
 	/* Enable Harddisk Interrupt,
 	 * and make it edge sensitive
 	 */
@@ -402,8 +402,8 @@ void m8xx_ide_init_hwif_ports (hw_regs_t *hw,
 
 	ide_hwifs[data_port].pio_mask = ATA_PIO4;
 	ide_hwifs[data_port].set_pio_mode = m8xx_ide_set_pio_mode;
+	ide_hwifs[data_port].ack_intr = (ide_ack_intr_t *)ide_interrupt_ack;
 
-	hw->ack_intr = (ide_ack_intr_t *) ide_interrupt_ack;
 	/* Enable Harddisk Interrupt,
 	 * and make it edge sensitive
 	 */
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index c5547935676..816b5311dad 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1039,6 +1039,8 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 {
 	struct device_node *np = pmif->node;
 	const int *bidp;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
+	hw_regs_t hw;
 
 	pmif->cable_80 = 0;
 	pmif->broken_dma = pmif->broken_dma_warn = 0;
@@ -1124,8 +1126,9 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 	/* Tell common code _not_ to mess with resources */
 	hwif->mmio = 1;
 	hwif->hwif_data = pmif;
-	pmac_ide_init_hwif_ports(&hwif->hw, pmif->regbase, 0, &hwif->irq);
-	memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+	memset(&hw, 0, sizeof(hw));
+	pmac_ide_init_hwif_ports(&hw, pmif->regbase, 0, &hwif->irq);
+	memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 	hwif->chipset = ide_pmac;
 	hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET] || pmif->mediabay;
 	hwif->hold = pmif->mediabay;
@@ -1163,10 +1166,9 @@ pmac_ide_setup_device(pmac_ide_hwif_t *pmif, ide_hwif_t *hwif)
 		pmac_ide_setup_dma(pmif, hwif);
 #endif /* CONFIG_BLK_DEV_IDEDMA_PMAC */
 
-	/* We probe the hwif now */
-	probe_hwif_init(hwif);
+	idx[0] = hwif->index;
 
-	ide_proc_register_port(hwif);
+	ide_device_add(idx);
 
 	return 0;
 }
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
index fff567bcedb..02d14bf85ab 100644
--- a/drivers/ide/setup-pci.c
+++ b/drivers/ide/setup-pci.c
@@ -147,15 +147,15 @@ static int ide_setup_pci_baseregs (struct pci_dev *dev, const char *name)
 #ifdef CONFIG_BLK_DEV_IDEDMA_PCI
 /**
  *	ide_get_or_set_dma_base		-	setup BMIBA
- *	@d: IDE pci device data
- *	@hwif: Interface
+ *	@d: IDE port info
+ *	@hwif: IDE interface
  *
  *	Fetch the DMA Bus-Master-I/O-Base-Address (BMIBA) from PCI space.
  *	Where a device has a partner that is already in DMA mode we check
  *	and enforce IDE simplex rules.
  */
 
-static unsigned long ide_get_or_set_dma_base(ide_pci_device_t *d, ide_hwif_t *hwif)
+static unsigned long ide_get_or_set_dma_base(const struct ide_port_info *d, ide_hwif_t *hwif)
 {
 	unsigned long	dma_base = 0;
 	struct pci_dev	*dev = hwif->pci_dev;
@@ -225,10 +225,11 @@ static unsigned long ide_get_or_set_dma_base(ide_pci_device_t *d, ide_hwif_t *hw
 }
 #endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
 
-void ide_setup_pci_noise (struct pci_dev *dev, ide_pci_device_t *d)
+void ide_setup_pci_noise(struct pci_dev *dev, const struct ide_port_info *d)
 {
-	printk(KERN_INFO "%s: IDE controller at PCI slot %s\n",
-			 d->name, pci_name(dev));
+	printk(KERN_INFO "%s: IDE controller (0x%04x:0x%04x rev 0x%02x) at "
+			 " PCI slot %s\n", d->name, dev->vendor, dev->device,
+			 dev->revision, pci_name(dev));
 }
 
 EXPORT_SYMBOL_GPL(ide_setup_pci_noise);
@@ -237,15 +238,15 @@ EXPORT_SYMBOL_GPL(ide_setup_pci_noise);
 /**
  *	ide_pci_enable	-	do PCI enables
  *	@dev: PCI device
- *	@d: IDE pci device data
+ *	@d: IDE port info
  *
  *	Enable the IDE PCI device. We attempt to enable the device in full
  *	but if that fails then we only need BAR4 so we will enable that.
  *	
  *	Returns zero on success or an error code
  */
- 
-static int ide_pci_enable(struct pci_dev *dev, ide_pci_device_t *d)
+
+static int ide_pci_enable(struct pci_dev *dev, const struct ide_port_info *d)
 {
 	int ret;
 
@@ -260,9 +261,9 @@ static int ide_pci_enable(struct pci_dev *dev, ide_pci_device_t *d)
 	}
 
 	/*
-	 * assume all devices can do 32-bit dma for now. we can add a
-	 * dma mask field to the ide_pci_device_t if we need it (or let
-	 * lower level driver set the dma mask)
+	 * assume all devices can do 32-bit DMA for now, we can add
+	 * a DMA mask field to the struct ide_port_info if we need it
+	 * (or let lower level driver set the DMA mask)
 	 */
 	ret = pci_set_dma_mask(dev, DMA_32BIT_MASK);
 	if (ret < 0) {
@@ -284,13 +285,13 @@ out:
 /**
  *	ide_pci_configure	-	configure an unconfigured device
  *	@dev: PCI device
- *	@d: IDE pci device data
+ *	@d: IDE port info
  *
  *	Enable and configure the PCI device we have been passed.
  *	Returns zero on success or an error code.
  */
- 
-static int ide_pci_configure(struct pci_dev *dev, ide_pci_device_t *d)
+
+static int ide_pci_configure(struct pci_dev *dev, const struct ide_port_info *d)
 {
 	u16 pcicmd = 0;
 	/*
@@ -318,15 +319,15 @@ static int ide_pci_configure(struct pci_dev *dev, ide_pci_device_t *d)
 
 /**
  *	ide_pci_check_iomem	-	check a register is I/O
- *	@dev: pci device
- *	@d: ide_pci_device
- *	@bar: bar number
+ *	@dev: PCI device
+ *	@d: IDE port info
+ *	@bar: BAR number
  *
  *	Checks if a BAR is configured and points to MMIO space. If so
  *	print an error and return an error code. Otherwise return 0
  */
- 
-static int ide_pci_check_iomem(struct pci_dev *dev, ide_pci_device_t *d, int bar)
+
+static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *d, int bar)
 {
 	ulong flags = pci_resource_flags(dev, bar);
 	
@@ -348,7 +349,7 @@ static int ide_pci_check_iomem(struct pci_dev *dev, ide_pci_device_t *d, int bar
 /**
  *	ide_hwif_configure	-	configure an IDE interface
  *	@dev: PCI device holding interface
- *	@d: IDE pci data
+ *	@d: IDE port info
  *	@mate: Paired interface if any
  *
  *	Perform the initial set up for the hardware interface structure. This
@@ -357,8 +358,8 @@ static int ide_pci_check_iomem(struct pci_dev *dev, ide_pci_device_t *d, int bar
  *
  *	Returns the new hardware interface structure, or NULL on a failure
  */
- 
-static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, ide_pci_device_t *d, ide_hwif_t *mate, int port, int irq)
+
+static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, const struct ide_port_info *d, ide_hwif_t *mate, int port, int irq)
 {
 	unsigned long ctl = 0, base = 0;
 	ide_hwif_t *hwif;
@@ -387,19 +388,20 @@ static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, ide_pci_device_t *d,
 		return NULL;	/* no room in ide_hwifs[] */
 	if (hwif->io_ports[IDE_DATA_OFFSET] != base ||
 	    hwif->io_ports[IDE_CONTROL_OFFSET] != (ctl | 2)) {
-		memset(&hwif->hw, 0, sizeof(hwif->hw));
-#ifndef IDE_ARCH_OBSOLETE_INIT
-		ide_std_init_ports(&hwif->hw, base, (ctl | 2));
-		hwif->hw.io_ports[IDE_IRQ_OFFSET] = 0;
+		hw_regs_t hw;
+
+		memset(&hw, 0, sizeof(hw));
+#ifndef CONFIG_IDE_ARCH_OBSOLETE_INIT
+		ide_std_init_ports(&hw, base, ctl | 2);
 #else
-		ide_init_hwif_ports(&hwif->hw, base, (ctl | 2), NULL);
+		ide_init_hwif_ports(&hw, base, ctl | 2, NULL);
 #endif
-		memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->io_ports));
+		memcpy(hwif->io_ports, hw.io_ports, sizeof(hwif->io_ports));
 		hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET];
 	}
-	hwif->chipset = ide_pci;
+	hwif->chipset = d->chipset ? d->chipset : ide_pci;
 	hwif->pci_dev = dev;
-	hwif->cds = (struct ide_pci_device_s *) d;
+	hwif->cds = d;
 	hwif->channel = port;
 
 	if (!hwif->irq)
@@ -414,21 +416,17 @@ static ide_hwif_t *ide_hwif_configure(struct pci_dev *dev, ide_pci_device_t *d,
 /**
  *	ide_hwif_setup_dma	-	configure DMA interface
  *	@dev: PCI device
- *	@d: IDE pci data
- *	@hwif: Hardware interface we are configuring
+ *	@d: IDE port info
+ *	@hwif: IDE interface
  *
  *	Set up the DMA base for the interface. Enable the master bits as
  *	necessary and attempt to bring the device DMA into a ready to use
  *	state
  */
- 
-#ifndef CONFIG_BLK_DEV_IDEDMA_PCI
-static void ide_hwif_setup_dma(struct pci_dev *dev, ide_pci_device_t *d, ide_hwif_t *hwif)
-{
-}
-#else
-static void ide_hwif_setup_dma(struct pci_dev *dev, ide_pci_device_t *d, ide_hwif_t *hwif)
+
+static void ide_hwif_setup_dma(struct pci_dev *dev, const struct ide_port_info *d, ide_hwif_t *hwif)
 {
+#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
 	u16 pcicmd;
 
 	pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
@@ -460,13 +458,13 @@ static void ide_hwif_setup_dma(struct pci_dev *dev, ide_pci_device_t *d, ide_hwi
 				"(BIOS)\n", hwif->name, d->name);
 		}
 	}
-}
 #endif /* CONFIG_BLK_DEV_IDEDMA_PCI*/
+}
 
 /**
  *	ide_setup_pci_controller	-	set up IDE PCI
  *	@dev: PCI device
- *	@d: IDE PCI data
+ *	@d: IDE port info
  *	@noisy: verbose flag
  *	@config: returned as 1 if we configured the hardware
  *
@@ -474,8 +472,8 @@ static void ide_hwif_setup_dma(struct pci_dev *dev, ide_pci_device_t *d, ide_hwi
  *	up the PCI side of the device, checks that the device is enabled
  *	and enables it if need be
  */
- 
-static int ide_setup_pci_controller(struct pci_dev *dev, ide_pci_device_t *d, int noisy, int *config)
+
+static int ide_setup_pci_controller(struct pci_dev *dev, const struct ide_port_info *d, int noisy, int *config)
 {
 	int ret;
 	u16 pcicmd;
@@ -500,9 +498,6 @@ static int ide_setup_pci_controller(struct pci_dev *dev, ide_pci_device_t *d, in
 		printk(KERN_INFO "%s: device enabled (Linux)\n", d->name);
 	}
 
-	if (noisy)
-		printk(KERN_INFO "%s: chipset revision %d\n",
-				 d->name, dev->revision);
 out:
 	return ret;
 }
@@ -510,9 +505,9 @@ out:
 /**
  *	ide_pci_setup_ports	-	configure ports/devices on PCI IDE
  *	@dev: PCI device
- *	@d: IDE pci device info
+ *	@d: IDE port info
  *	@pciirq: IRQ line
- *	@index: ata index to update
+ *	@idx: ATA index table to update
  *
  *	Scan the interfaces attached to this device and do any
  *	necessary per port setup. Attach the devices and ask the
@@ -522,26 +517,25 @@ out:
  *	but is also used directly as a helper function by some controllers
  *	where the chipset setup is not the default PCI IDE one.
  */
- 
-void ide_pci_setup_ports(struct pci_dev *dev, ide_pci_device_t *d, int pciirq, ata_index_t *index)
+
+void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d, int pciirq, u8 *idx)
 {
 	int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port;
-	int at_least_one_hwif_enabled = 0;
 	ide_hwif_t *hwif, *mate = NULL;
 	u8 tmp;
 
-	index->all = 0xf0f0;
-
 	/*
 	 * Set up the IDE ports
 	 */
-	 
+
 	for (port = 0; port < channels; ++port) {
-		ide_pci_enablebit_t *e = &(d->enablebits[port]);
-	
+		const ide_pci_enablebit_t *e = &(d->enablebits[port]);
+
 		if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) ||
-		    (tmp & e->mask) != e->val))
+		    (tmp & e->mask) != e->val)) {
+			printk(KERN_INFO "%s: IDE port disabled\n", d->name);
 			continue;	/* port not enabled */
+		}
 
 		if ((hwif = ide_hwif_configure(dev, d, mate, port, pciirq)) == NULL)
 			continue;
@@ -549,11 +543,7 @@ void ide_pci_setup_ports(struct pci_dev *dev, ide_pci_device_t *d, int pciirq, a
 		/* setup proper ancestral information */
 		hwif->gendev.parent = &dev->dev;
 
-		if (hwif->channel) {
-			index->b.high = hwif->index;
-		} else {
-			index->b.low = hwif->index;
-		}
+		*(idx + port) = hwif->index;
 
 		
 		if (d->init_iops)
@@ -562,15 +552,28 @@ void ide_pci_setup_ports(struct pci_dev *dev, ide_pci_device_t *d, int pciirq, a
 		if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0)
 			ide_hwif_setup_dma(dev, d, hwif);
 
-		if ((d->host_flags & IDE_HFLAG_LEGACY_IRQS) && hwif->irq == 0)
+		if ((!hwif->irq && (d->host_flags & IDE_HFLAG_LEGACY_IRQS)) ||
+		    (d->host_flags & IDE_HFLAG_FORCE_LEGACY_IRQS))
 			hwif->irq = port ? 15 : 14;
 
+		hwif->fixup = d->fixup;
+
 		hwif->host_flags = d->host_flags;
 		hwif->pio_mask = d->pio_mask;
 
 		if ((d->host_flags & IDE_HFLAG_SERIALIZE) && hwif->mate)
 			hwif->mate->serialized = hwif->serialized = 1;
 
+		if (d->host_flags & IDE_HFLAG_IO_32BIT) {
+			hwif->drives[0].io_32bit = 1;
+			hwif->drives[1].io_32bit = 1;
+		}
+
+		if (d->host_flags & IDE_HFLAG_UNMASK_IRQS) {
+			hwif->drives[0].unmask = 1;
+			hwif->drives[1].unmask = 1;
+		}
+
 		if (hwif->dma_base) {
 			hwif->swdma_mask = d->swdma_mask;
 			hwif->mwdma_mask = d->mwdma_mask;
@@ -580,6 +583,9 @@ void ide_pci_setup_ports(struct pci_dev *dev, ide_pci_device_t *d, int pciirq, a
 		hwif->drives[0].autotune = 1;
 		hwif->drives[1].autotune = 1;
 
+		if (d->host_flags & IDE_HFLAG_RQSIZE_256)
+			hwif->rqsize = 256;
+
 		if (d->init_hwif)
 			/* Call chipset-specific routine
 			 * for each enabled hwif
@@ -587,10 +593,7 @@ void ide_pci_setup_ports(struct pci_dev *dev, ide_pci_device_t *d, int pciirq, a
 			d->init_hwif(hwif);
 
 		mate = hwif;
-		at_least_one_hwif_enabled = 1;
 	}
-	if (!at_least_one_hwif_enabled)
-		printk(KERN_INFO "%s: neither IDE port enabled (BIOS)\n", d->name);
 }
 
 EXPORT_SYMBOL_GPL(ide_pci_setup_ports);
@@ -602,13 +605,13 @@ EXPORT_SYMBOL_GPL(ide_pci_setup_ports);
  *
  * One thing that is not standardized is the location of the
  * primary/secondary interface "enable/disable" bits.  For chipsets that
- * we "know" about, this information is in the ide_pci_device_t struct;
+ * we "know" about, this information is in the struct ide_port_info;
  * for all other chipsets, we just assume both interfaces are enabled.
  */
-static int do_ide_setup_pci_device(struct pci_dev *dev, ide_pci_device_t *d,
-				   ata_index_t *index, u8 noisy)
+static int do_ide_setup_pci_device(struct pci_dev *dev,
+				   const struct ide_port_info *d,
+				   u8 *idx, u8 noisy)
 {
-	static ata_index_t ata_index = { .b = { .low = 0xff, .high = 0xff } };
 	int tried_config = 0;
 	int pciirq, ret;
 
@@ -658,51 +661,35 @@ static int do_ide_setup_pci_device(struct pci_dev *dev, ide_pci_device_t *d,
 
 	/* FIXME: silent failure can happen */
 
-	*index = ata_index;
-	ide_pci_setup_ports(dev, d, pciirq, index);
+	ide_pci_setup_ports(dev, d, pciirq, idx);
 out:
 	return ret;
 }
 
-int ide_setup_pci_device(struct pci_dev *dev, ide_pci_device_t *d)
+int ide_setup_pci_device(struct pci_dev *dev, const struct ide_port_info *d)
 {
-	ide_hwif_t *hwif = NULL, *mate = NULL;
-	ata_index_t index_list;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 	int ret;
 
-	ret = do_ide_setup_pci_device(dev, d, &index_list, 1);
-	if (ret < 0)
-		goto out;
+	ret = do_ide_setup_pci_device(dev, d, &idx[0], 1);
 
-	if ((index_list.b.low & 0xf0) != 0xf0)
-		hwif = &ide_hwifs[index_list.b.low];
-	if ((index_list.b.high & 0xf0) != 0xf0)
-		mate = &ide_hwifs[index_list.b.high];
+	if (ret >= 0)
+		ide_device_add(idx);
 
-	if (hwif)
-		probe_hwif_init_with_fixup(hwif, d->fixup);
-	if (mate)
-		probe_hwif_init_with_fixup(mate, d->fixup);
-
-	if (hwif)
-		ide_proc_register_port(hwif);
-	if (mate)
-		ide_proc_register_port(mate);
-out:
 	return ret;
 }
 
 EXPORT_SYMBOL_GPL(ide_setup_pci_device);
 
 int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
-			  ide_pci_device_t *d)
+			  const struct ide_port_info *d)
 {
 	struct pci_dev *pdev[] = { dev1, dev2 };
-	ata_index_t index_list[2];
 	int ret, i;
+	u8 idx[4] = { 0xff, 0xff, 0xff, 0xff };
 
 	for (i = 0; i < 2; i++) {
-		ret = do_ide_setup_pci_device(pdev[i], d, index_list + i, !i);
+		ret = do_ide_setup_pci_device(pdev[i], d, &idx[i*2], !i);
 		/*
 		 * FIXME: Mom, mom, they stole me the helper function to undo
 		 * do_ide_setup_pci_device() on the first device!
@@ -711,25 +698,7 @@ int ide_setup_pci_devices(struct pci_dev *dev1, struct pci_dev *dev2,
 			goto out;
 	}
 
-	for (i = 0; i < 2; i++) {
-		u8 idx[2] = { index_list[i].b.low, index_list[i].b.high };
-		int j;
-
-		for (j = 0; j < 2; j++) {
-			if ((idx[j] & 0xf0) != 0xf0)
-				probe_hwif_init(ide_hwifs + idx[j]);
-		}
-	}
-
-	for (i = 0; i < 2; i++) {
-		u8 idx[2] = { index_list[i].b.low, index_list[i].b.high };
-		int j;
-
-		for (j = 0; j < 2; j++) {
-			if ((idx[j] & 0xf0) != 0xf0)
-				ide_proc_register_port(ide_hwifs + idx[j]);
-		}
-	}
+	ide_device_add(idx);
 out:
 	return ret;
 }
@@ -754,9 +723,6 @@ static LIST_HEAD(ide_pci_drivers);
  *	hands the controllers off to the core PCI code to do the rest of
  *	the work.
  *
- *	The driver_data of the driver table must point to an ide_pci_device_t
- *	describing the interface.
- *
  *	Returns are the same as for pci_register_driver
  */
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93644f82592..d08fb30768b 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2797,11 +2797,12 @@ static void cma_remove_one(struct ib_device *device)
 
 static int cma_init(void)
 {
-	int ret, low, high;
+	int ret, low, high, remaining;
 
 	get_random_bytes(&next_port, sizeof next_port);
 	inet_get_local_port_range(&low, &high);
-	next_port = ((unsigned int) next_port % (high - low)) + low;
+	remaining = (high - low) + 1;
+	next_port = ((unsigned int) next_port % remaining) + low;
 
 	cma_wq = create_singlethread_workqueue("rdma_cm");
 	if (!cma_wq)
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 1d62c8b88e1..e5b4e9bfbdc 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -495,7 +495,7 @@ static unsigned int evdev_poll(struct file *file, poll_table *wait)
 #ifdef CONFIG_COMPAT
 
 #define BITS_PER_LONG_COMPAT (sizeof(compat_long_t) * 8)
-#define NBITS_COMPAT(x) ((((x) - 1) / BITS_PER_LONG_COMPAT) + 1)
+#define BITS_TO_LONGS_COMPAT(x) ((((x) - 1) / BITS_PER_LONG_COMPAT) + 1)
 
 #ifdef __BIG_ENDIAN
 static int bits_to_user(unsigned long *bits, unsigned int maxbit,
@@ -504,7 +504,7 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
 	int len, i;
 
 	if (compat) {
-		len = NBITS_COMPAT(maxbit) * sizeof(compat_long_t);
+		len = BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t);
 		if (len > maxlen)
 			len = maxlen;
 
@@ -515,7 +515,7 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
 					 sizeof(compat_long_t)))
 				return -EFAULT;
 	} else {
-		len = NBITS(maxbit) * sizeof(long);
+		len = BITS_TO_LONGS(maxbit) * sizeof(long);
 		if (len > maxlen)
 			len = maxlen;
 
@@ -530,8 +530,8 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
 			unsigned int maxlen, void __user *p, int compat)
 {
 	int len = compat ?
-			NBITS_COMPAT(maxbit) * sizeof(compat_long_t) :
-			NBITS(maxbit) * sizeof(long);
+			BITS_TO_LONGS_COMPAT(maxbit) * sizeof(compat_long_t) :
+			BITS_TO_LONGS(maxbit) * sizeof(long);
 
 	if (len > maxlen)
 		len = maxlen;
@@ -545,7 +545,7 @@ static int bits_to_user(unsigned long *bits, unsigned int maxbit,
 static int bits_to_user(unsigned long *bits, unsigned int maxbit,
 			unsigned int maxlen, void __user *p, int compat)
 {
-	int len = NBITS(maxbit) * sizeof(long);
+	int len = BITS_TO_LONGS(maxbit) * sizeof(long);
 
 	if (len > maxlen)
 		len = maxlen;
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index ec1b6cfefcd..bfc6061f155 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -136,7 +136,8 @@ static int gameport_measure_speed(struct gameport *gameport)
 	}
 
 	gameport_close(gameport);
-	return (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx);
+	return (cpu_data(raw_smp_processor_id()).loops_per_jiffy *
+		(unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx);
 
 #else
 
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 2f2b020cd62..307c7b5c2b3 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -584,10 +584,10 @@ static int input_default_setkeycode(struct input_dev *dev,
 
 
 #define MATCH_BIT(bit, max) \
-		for (i = 0; i < NBITS(max); i++) \
+		for (i = 0; i < BITS_TO_LONGS(max); i++) \
 			if ((id->bit[i] & dev->bit[i]) != id->bit[i]) \
 				break; \
-		if (i != NBITS(max)) \
+		if (i != BITS_TO_LONGS(max)) \
 			continue;
 
 static const struct input_device_id *input_match_device(const struct input_device_id *id,
@@ -698,7 +698,7 @@ static void input_seq_print_bitmap(struct seq_file *seq, const char *name,
 {
 	int i;
 
-	for (i = NBITS(max) - 1; i > 0; i--)
+	for (i = BITS_TO_LONGS(max) - 1; i > 0; i--)
 		if (bitmap[i])
 			break;
 
@@ -892,7 +892,7 @@ static int input_print_modalias_bits(char *buf, int size,
 
 	len += snprintf(buf, max(size, 0), "%c", name);
 	for (i = min_bit; i < max_bit; i++)
-		if (bm[LONG(i)] & BIT(i))
+		if (bm[BIT_WORD(i)] & BIT_MASK(i))
 			len += snprintf(buf + len, max(size - len, 0), "%X,", i);
 	return len;
 }
@@ -991,7 +991,7 @@ static int input_print_bitmap(char *buf, int buf_size, unsigned long *bitmap,
 	int i;
 	int len = 0;
 
-	for (i = NBITS(max) - 1; i > 0; i--)
+	for (i = BITS_TO_LONGS(max) - 1; i > 0; i--)
 		if (bitmap[i])
 			break;
 
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 2b201f9aa02..22b2789ef58 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -844,8 +844,8 @@ static const struct input_device_id joydev_blacklist[] = {
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT(EV_KEY) },
-		.keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
 	},	/* Avoid itouchpads, touchscreens and tablets */
 	{ }	/* Terminating entry */
 };
@@ -854,20 +854,20 @@ static const struct input_device_id joydev_ids[] = {
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_ABS) },
-		.absbit = { BIT(ABS_X) },
+		.evbit = { BIT_MASK(EV_ABS) },
+		.absbit = { BIT_MASK(ABS_X) },
 	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_ABS) },
-		.absbit = { BIT(ABS_WHEEL) },
+		.evbit = { BIT_MASK(EV_ABS) },
+		.absbit = { BIT_MASK(ABS_WHEEL) },
 	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_ABS) },
-		.absbit = { BIT(ABS_THROTTLE) },
+		.evbit = { BIT_MASK(EV_ABS) },
+		.absbit = { BIT_MASK(ABS_THROTTLE) },
 	},
 	{ }	/* Terminating entry */
 };
diff --git a/drivers/input/joystick/a3d.c b/drivers/input/joystick/a3d.c
index ff701ab10d7..52ba16f487c 100644
--- a/drivers/input/joystick/a3d.c
+++ b/drivers/input/joystick/a3d.c
@@ -326,14 +326,19 @@ static int a3d_connect(struct gameport *gameport, struct gameport_driver *drv)
 
 		a3d->length = 33;
 
-		input_dev->evbit[0] |= BIT(EV_ABS) | BIT(EV_KEY) | BIT(EV_REL);
-		input_dev->relbit[0] |= BIT(REL_X) | BIT(REL_Y);
-		input_dev->absbit[0] |= BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_THROTTLE) | BIT(ABS_RUDDER)
-					| BIT(ABS_HAT0X) | BIT(ABS_HAT0Y) | BIT(ABS_HAT1X) | BIT(ABS_HAT1Y);
-		input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_RIGHT) | BIT(BTN_LEFT) | BIT(BTN_MIDDLE)
-							| BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-		input_dev->keybit[LONG(BTN_JOYSTICK)] |= BIT(BTN_TRIGGER) | BIT(BTN_THUMB) | BIT(BTN_TOP)
-							| BIT(BTN_PINKIE);
+		input_dev->evbit[0] |= BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY) |
+			BIT_MASK(EV_REL);
+		input_dev->relbit[0] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+		input_dev->absbit[0] |= BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+			BIT_MASK(ABS_THROTTLE) | BIT_MASK(ABS_RUDDER) |
+			BIT_MASK(ABS_HAT0X) | BIT_MASK(ABS_HAT0Y) |
+			BIT_MASK(ABS_HAT1X) | BIT_MASK(ABS_HAT1Y);
+		input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_RIGHT) |
+			BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE) |
+			BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
+		input_dev->keybit[BIT_WORD(BTN_JOYSTICK)] |=
+			BIT_MASK(BTN_TRIGGER) | BIT_MASK(BTN_THUMB) |
+			BIT_MASK(BTN_TOP) | BIT_MASK(BTN_PINKIE);
 
 		a3d_read(a3d, data);
 
@@ -348,9 +353,10 @@ static int a3d_connect(struct gameport *gameport, struct gameport_driver *drv)
 	} else {
 		a3d->length = 29;
 
-		input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_REL);
-		input_dev->relbit[0] |= BIT(REL_X) | BIT(REL_Y);
-		input_dev->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_RIGHT) | BIT(BTN_LEFT) | BIT(BTN_MIDDLE);
+		input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+		input_dev->relbit[0] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+		input_dev->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_RIGHT) |
+			BIT_MASK(BTN_LEFT) | BIT_MASK(BTN_MIDDLE);
 
 		a3d_read(a3d, data);
 
diff --git a/drivers/input/joystick/adi.c b/drivers/input/joystick/adi.c
index 28140c4a110..d1ca8a14950 100644
--- a/drivers/input/joystick/adi.c
+++ b/drivers/input/joystick/adi.c
@@ -431,7 +431,7 @@ static int adi_init_input(struct adi *adi, struct adi_port *port, int half)
 	input_dev->open = adi_open;
 	input_dev->close = adi_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < adi->axes10 + adi->axes8 + (adi->hats + (adi->pad != -1)) * 2; i++)
 		set_bit(adi->abs[i], input_dev->absbit);
diff --git a/drivers/input/joystick/amijoy.c b/drivers/input/joystick/amijoy.c
index b0f5541ec3e..5cf9f3610e6 100644
--- a/drivers/input/joystick/amijoy.c
+++ b/drivers/input/joystick/amijoy.c
@@ -137,9 +137,10 @@ static int __init amijoy_init(void)
 		amijoy_dev[i]->open = amijoy_open;
 		amijoy_dev[i]->close = amijoy_close;
 
-		amijoy_dev[i]->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-		amijoy_dev[i]->absbit[0] = BIT(ABS_X) | BIT(ABS_Y);
-		amijoy_dev[i]->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+		amijoy_dev[i]->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+		amijoy_dev[i]->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y);
+		amijoy_dev[i]->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+			BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 		for (j = 0; j < 2; j++) {
 			amijoy_dev[i]->absmin[ABS_X + j] = -1;
 			amijoy_dev[i]->absmax[ABS_X + j] = 1;
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index bdd157c1ebf..15739880afc 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -456,7 +456,7 @@ static int analog_init_device(struct analog_port *port, struct analog *analog, i
 	input_dev->open = analog_open;
 	input_dev->close = analog_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = j = 0; i < 4; i++)
 		if (analog->mask & (1 << i)) {
diff --git a/drivers/input/joystick/cobra.c b/drivers/input/joystick/cobra.c
index d3352a849b8..55646a6d89f 100644
--- a/drivers/input/joystick/cobra.c
+++ b/drivers/input/joystick/cobra.c
@@ -218,7 +218,7 @@ static int cobra_connect(struct gameport *gameport, struct gameport_driver *drv)
 		input_dev->open = cobra_open;
 		input_dev->close = cobra_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 		input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0);
 		input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0);
 		for (j = 0; cobra_btn[j]; j++)
diff --git a/drivers/input/joystick/db9.c b/drivers/input/joystick/db9.c
index b069ee18e35..27fc475bd3a 100644
--- a/drivers/input/joystick/db9.c
+++ b/drivers/input/joystick/db9.c
@@ -631,7 +631,7 @@ static struct db9 __init *db9_probe(int parport, int mode)
 		input_dev->open = db9_open;
 		input_dev->close = db9_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 		for (j = 0; j < db9_mode->n_buttons; j++)
 			set_bit(db9_mode->buttons[j], input_dev->keybit);
 		for (j = 0; j < db9_mode->n_axis; j++) {
diff --git a/drivers/input/joystick/gamecon.c b/drivers/input/joystick/gamecon.c
index 1a452e0e5f2..df2a9d02ca6 100644
--- a/drivers/input/joystick/gamecon.c
+++ b/drivers/input/joystick/gamecon.c
@@ -653,12 +653,12 @@ static int __init gc_setup_pad(struct gc *gc, int idx, int pad_type)
 	input_dev->close = gc_close;
 
 	if (pad_type != GC_SNESMOUSE) {
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 		for (i = 0; i < 2; i++)
 			input_set_abs_params(input_dev, ABS_X + i, -1, 1, 0, 0);
 	} else
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
 
 	gc->pads[0] |= gc_status_bit[idx];
 	gc->pads[pad_type] |= gc_status_bit[idx];
diff --git a/drivers/input/joystick/gf2k.c b/drivers/input/joystick/gf2k.c
index d514aebf755..1f6302c0eb3 100644
--- a/drivers/input/joystick/gf2k.c
+++ b/drivers/input/joystick/gf2k.c
@@ -315,7 +315,7 @@ static int gf2k_connect(struct gameport *gameport, struct gameport_driver *drv)
 	input_dev->open = gf2k_open;
 	input_dev->close = gf2k_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < gf2k_axes[gf2k->id]; i++)
 		set_bit(gf2k_abs[i], input_dev->absbit);
diff --git a/drivers/input/joystick/grip.c b/drivers/input/joystick/grip.c
index 73eb5ab6f14..fd3853ab1aa 100644
--- a/drivers/input/joystick/grip.c
+++ b/drivers/input/joystick/grip.c
@@ -370,7 +370,7 @@ static int grip_connect(struct gameport *gameport, struct gameport_driver *drv)
 		input_dev->open = grip_open;
 		input_dev->close = grip_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 		for (j = 0; (t = grip_abs[grip->mode[i]][j]) >= 0; j++) {
 
diff --git a/drivers/input/joystick/grip_mp.c b/drivers/input/joystick/grip_mp.c
index 4ed3a3eadf1..c57e21d68c0 100644
--- a/drivers/input/joystick/grip_mp.c
+++ b/drivers/input/joystick/grip_mp.c
@@ -606,7 +606,7 @@ static int register_slot(int slot, struct grip_mp *grip)
 	input_dev->open = grip_open;
 	input_dev->close = grip_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (j = 0; (t = grip_abs[port->mode][j]) >= 0; j++)
 		input_set_abs_params(input_dev, t, -1, 1, 0, 0);
diff --git a/drivers/input/joystick/guillemot.c b/drivers/input/joystick/guillemot.c
index d4e8073caf2..aa6bfb3fb8c 100644
--- a/drivers/input/joystick/guillemot.c
+++ b/drivers/input/joystick/guillemot.c
@@ -238,7 +238,7 @@ static int guillemot_connect(struct gameport *gameport, struct gameport_driver *
 	input_dev->open = guillemot_open;
 	input_dev->close = guillemot_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; (t = guillemot->type->abs[i]) >= 0; i++)
 		input_set_abs_params(input_dev, t, 0, 255, 0, 0);
diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
index 682244b1c04..6f826b37d9a 100644
--- a/drivers/input/joystick/iforce/iforce-main.c
+++ b/drivers/input/joystick/iforce/iforce-main.c
@@ -389,7 +389,8 @@ int iforce_init_device(struct iforce *iforce)
  * Set input device bitfields and ranges.
  */
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_FF_STATUS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+		BIT_MASK(EV_FF_STATUS);
 
 	for (i = 0; iforce->type->btn[i] >= 0; i++)
 		set_bit(iforce->type->btn[i], input_dev->keybit);
diff --git a/drivers/input/joystick/iforce/iforce.h b/drivers/input/joystick/iforce/iforce.h
index 40a853ac21c..a964a7cfd21 100644
--- a/drivers/input/joystick/iforce/iforce.h
+++ b/drivers/input/joystick/iforce/iforce.h
@@ -62,13 +62,13 @@
 #define FF_CORE_IS_PLAYED	3	/* Effect is currently being played */
 #define FF_CORE_SHOULD_PLAY	4	/* User wants the effect to be played */
 #define FF_CORE_UPDATE		5	/* Effect is being updated */
-#define FF_MODCORE_MAX		5
+#define FF_MODCORE_CNT		6
 
 struct iforce_core_effect {
 	/* Information about where modifiers are stored in the device's memory */
 	struct resource mod1_chunk;
 	struct resource mod2_chunk;
-	unsigned long flags[NBITS(FF_MODCORE_MAX)];
+	unsigned long flags[BITS_TO_LONGS(FF_MODCORE_CNT)];
 };
 
 #define FF_CMD_EFFECT		0x010e
diff --git a/drivers/input/joystick/interact.c b/drivers/input/joystick/interact.c
index 1aec1e9d7c5..bc8ea95dfd0 100644
--- a/drivers/input/joystick/interact.c
+++ b/drivers/input/joystick/interact.c
@@ -269,7 +269,7 @@ static int interact_connect(struct gameport *gameport, struct gameport_driver *d
 	input_dev->open = interact_open;
 	input_dev->close = interact_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; (t = interact_type[interact->type].abs[i]) >= 0; i++) {
 		set_bit(t, input_dev->absbit);
diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c
index b35604ee43a..54e676948eb 100644
--- a/drivers/input/joystick/magellan.c
+++ b/drivers/input/joystick/magellan.c
@@ -170,7 +170,7 @@ static int magellan_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < 9; i++)
 		set_bit(magellan_buttons[i], input_dev->keybit);
diff --git a/drivers/input/joystick/sidewinder.c b/drivers/input/joystick/sidewinder.c
index 2adf73f63c9..7b4865fdee5 100644
--- a/drivers/input/joystick/sidewinder.c
+++ b/drivers/input/joystick/sidewinder.c
@@ -758,7 +758,7 @@ static int sw_connect(struct gameport *gameport, struct gameport_driver *drv)
 		input_dev->open = sw_open;
 		input_dev->close = sw_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 		for (j = 0; (bits = sw_bit[sw->type][j]); j++) {
 			code = sw_abs[sw->type][j];
diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c
index abb7c4cf54a..d4087fd4965 100644
--- a/drivers/input/joystick/spaceball.c
+++ b/drivers/input/joystick/spaceball.c
@@ -228,18 +228,23 @@ static int spaceball_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	switch (id) {
 		case SPACEBALL_4000FLX:
 		case SPACEBALL_4000FLX_L:
-			input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_9);
-			input_dev->keybit[LONG(BTN_A)] |= BIT(BTN_A) | BIT(BTN_B) | BIT(BTN_C) | BIT(BTN_MODE);
+			input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_9);
+			input_dev->keybit[BIT_WORD(BTN_A)] |= BIT_MASK(BTN_A) |
+				BIT_MASK(BTN_B) | BIT_MASK(BTN_C) |
+				BIT_MASK(BTN_MODE);
 		default:
-			input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_2) | BIT(BTN_3) | BIT(BTN_4)
-				| BIT(BTN_5) | BIT(BTN_6) | BIT(BTN_7) | BIT(BTN_8);
+			input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_2) |
+				BIT_MASK(BTN_3) | BIT_MASK(BTN_4) |
+				BIT_MASK(BTN_5) | BIT_MASK(BTN_6) |
+				BIT_MASK(BTN_7) | BIT_MASK(BTN_8);
 		case SPACEBALL_3003C:
-			input_dev->keybit[LONG(BTN_0)] |= BIT(BTN_1) | BIT(BTN_8);
+			input_dev->keybit[BIT_WORD(BTN_0)] |= BIT_MASK(BTN_1) |
+				BIT_MASK(BTN_8);
 	}
 
 	for (i = 0; i < 3; i++) {
diff --git a/drivers/input/joystick/spaceorb.c b/drivers/input/joystick/spaceorb.c
index c4937f1e837..f7ce4004f4b 100644
--- a/drivers/input/joystick/spaceorb.c
+++ b/drivers/input/joystick/spaceorb.c
@@ -185,7 +185,7 @@ static int spaceorb_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < 6; i++)
 		set_bit(spaceorb_buttons[i], input_dev->keybit);
diff --git a/drivers/input/joystick/stinger.c b/drivers/input/joystick/stinger.c
index 8581ee991d4..baa10b2f7ba 100644
--- a/drivers/input/joystick/stinger.c
+++ b/drivers/input/joystick/stinger.c
@@ -156,10 +156,11 @@ static int stinger_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_A)] = BIT(BTN_A) | BIT(BTN_B) | BIT(BTN_C) | BIT(BTN_X) |
-					 BIT(BTN_Y) | BIT(BTN_Z) | BIT(BTN_TL) | BIT(BTN_TR) |
-					 BIT(BTN_START) | BIT(BTN_SELECT);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_A)] = BIT_MASK(BTN_A) | BIT_MASK(BTN_B) |
+		BIT_MASK(BTN_C) | BIT_MASK(BTN_X) | BIT_MASK(BTN_Y) |
+		BIT_MASK(BTN_Z) | BIT_MASK(BTN_TL) | BIT_MASK(BTN_TR) |
+		BIT_MASK(BTN_START) | BIT_MASK(BTN_SELECT);
 	input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 4);
 	input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 4);
 
diff --git a/drivers/input/joystick/tmdc.c b/drivers/input/joystick/tmdc.c
index 3b36ee04f72..0feeb8acb53 100644
--- a/drivers/input/joystick/tmdc.c
+++ b/drivers/input/joystick/tmdc.c
@@ -333,7 +333,7 @@ static int tmdc_setup_port(struct tmdc *tmdc, int idx, unsigned char *data)
 	input_dev->open = tmdc_open;
 	input_dev->close = tmdc_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	for (i = 0; i < port->absc && i < TMDC_ABS; i++)
 		if (port->abs[i] >= 0)
diff --git a/drivers/input/joystick/turbografx.c b/drivers/input/joystick/turbografx.c
index 8381c6f1437..bbebd4e2ad7 100644
--- a/drivers/input/joystick/turbografx.c
+++ b/drivers/input/joystick/turbografx.c
@@ -229,7 +229,7 @@ static struct tgfx __init *tgfx_probe(int parport, int *n_buttons, int n_devs)
 		input_dev->open = tgfx_open;
 		input_dev->close = tgfx_close;
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 		input_set_abs_params(input_dev, ABS_X, -1, 1, 0, 0);
 		input_set_abs_params(input_dev, ABS_Y, -1, 1, 0, 0);
 
diff --git a/drivers/input/joystick/twidjoy.c b/drivers/input/joystick/twidjoy.c
index c91504ec38e..1085c841fec 100644
--- a/drivers/input/joystick/twidjoy.c
+++ b/drivers/input/joystick/twidjoy.c
@@ -207,7 +207,7 @@ static int twidjoy_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 	input_set_abs_params(input_dev, ABS_X, -50, 50, 4, 4);
 	input_set_abs_params(input_dev, ABS_Y, -50, 50, 4, 4);
 
diff --git a/drivers/input/joystick/warrior.c b/drivers/input/joystick/warrior.c
index 4e85f72eefd..e928b6e3724 100644
--- a/drivers/input/joystick/warrior.c
+++ b/drivers/input/joystick/warrior.c
@@ -162,9 +162,11 @@ static int warrior_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TRIGGER)] = BIT(BTN_TRIGGER) | BIT(BTN_THUMB) | BIT(BTN_TOP) | BIT(BTN_TOP2);
-	input_dev->relbit[0] = BIT(REL_DIAL);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) |
+		BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TRIGGER)] = BIT_MASK(BTN_TRIGGER) |
+		BIT_MASK(BTN_THUMB) | BIT_MASK(BTN_TOP) | BIT_MASK(BTN_TOP2);
+	input_dev->relbit[0] = BIT_MASK(REL_DIAL);
 	input_set_abs_params(input_dev, ABS_X, -64, 64, 0, 8);
 	input_set_abs_params(input_dev, ABS_Y, -64, 64, 0, 8);
 	input_set_abs_params(input_dev, ABS_THROTTLE, -112, 112, 0, 0);
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 623629a69b0..6dd375825a1 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -658,7 +658,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 	input_dev->open = xpad_open;
 	input_dev->close = xpad_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
 	/* set up buttons */
 	for (i = 0; xpad_btn[i] >= 0; i++)
diff --git a/drivers/input/keyboard/aaed2000_kbd.c b/drivers/input/keyboard/aaed2000_kbd.c
index 63d6ead6b87..72abc196ce6 100644
--- a/drivers/input/keyboard/aaed2000_kbd.c
+++ b/drivers/input/keyboard/aaed2000_kbd.c
@@ -125,7 +125,7 @@ static int __devinit aaedkbd_probe(struct platform_device *pdev)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &pdev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = aaedkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(aaedkbd_keycode);
diff --git a/drivers/input/keyboard/amikbd.c b/drivers/input/keyboard/amikbd.c
index c67e84ec2d6..81bf7562aca 100644
--- a/drivers/input/keyboard/amikbd.c
+++ b/drivers/input/keyboard/amikbd.c
@@ -209,7 +209,7 @@ static int __init amikbd_init(void)
 	amikbd_dev->id.product = 0x0001;
 	amikbd_dev->id.version = 0x0100;
 
-	amikbd_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	amikbd_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 
 	for (i = 0; i < 0x78; i++)
 		set_bit(i, amikbd_dev->keybit);
diff --git a/drivers/input/keyboard/atakbd.c b/drivers/input/keyboard/atakbd.c
index a1800151b6c..4e92100c56a 100644
--- a/drivers/input/keyboard/atakbd.c
+++ b/drivers/input/keyboard/atakbd.c
@@ -237,7 +237,7 @@ static int __init atakbd_init(void)
 	atakbd_dev->id.product = 0x0001;
 	atakbd_dev->id.version = 0x0100;
 
-	atakbd_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	atakbd_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	atakbd_dev->keycode = atakbd_keycode;
 	atakbd_dev->keycodesize = sizeof(unsigned char);
 	atakbd_dev->keycodemax = ARRAY_SIZE(atakbd_keycode);
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 41fc3d03b6e..b39c5b31e62 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -900,27 +900,32 @@ static void atkbd_set_device_attrs(struct atkbd *atkbd)
 
 	input_set_drvdata(input_dev, atkbd);
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_MSC);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+		BIT_MASK(EV_MSC);
 
 	if (atkbd->write) {
-		input_dev->evbit[0] |= BIT(EV_LED);
-		input_dev->ledbit[0] = BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL);
+		input_dev->evbit[0] |= BIT_MASK(EV_LED);
+		input_dev->ledbit[0] = BIT_MASK(LED_NUML) |
+			BIT_MASK(LED_CAPSL) | BIT_MASK(LED_SCROLLL);
 	}
 
 	if (atkbd->extra)
-		input_dev->ledbit[0] |= BIT(LED_COMPOSE) | BIT(LED_SUSPEND) |
-					BIT(LED_SLEEP) | BIT(LED_MUTE) | BIT(LED_MISC);
+		input_dev->ledbit[0] |= BIT_MASK(LED_COMPOSE) |
+			BIT_MASK(LED_SUSPEND) | BIT_MASK(LED_SLEEP) |
+			BIT_MASK(LED_MUTE) | BIT_MASK(LED_MISC);
 
 	if (!atkbd->softrepeat) {
 		input_dev->rep[REP_DELAY] = 250;
 		input_dev->rep[REP_PERIOD] = 33;
 	}
 
-	input_dev->mscbit[0] = atkbd->softraw ? BIT(MSC_SCAN) : BIT(MSC_RAW) | BIT(MSC_SCAN);
+	input_dev->mscbit[0] = atkbd->softraw ? BIT_MASK(MSC_SCAN) :
+		BIT_MASK(MSC_RAW) | BIT_MASK(MSC_SCAN);
 
 	if (atkbd->scroll) {
-		input_dev->evbit[0] |= BIT(EV_REL);
-		input_dev->relbit[0] = BIT(REL_WHEEL) | BIT(REL_HWHEEL);
+		input_dev->evbit[0] |= BIT_MASK(EV_REL);
+		input_dev->relbit[0] = BIT_MASK(REL_WHEEL) |
+			BIT_MASK(REL_HWHEEL);
 		set_bit(BTN_MIDDLE, input_dev->keybit);
 	}
 
diff --git a/drivers/input/keyboard/corgikbd.c b/drivers/input/keyboard/corgikbd.c
index 6578bfff644..790fed368aa 100644
--- a/drivers/input/keyboard/corgikbd.c
+++ b/drivers/input/keyboard/corgikbd.c
@@ -325,7 +325,8 @@ static int __init corgikbd_probe(struct platform_device *pdev)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &pdev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR) | BIT(EV_SW);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+		BIT_MASK(EV_PWR) | BIT_MASK(EV_SW);
 	input_dev->keycode = corgikbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(corgikbd_keycode);
diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c
index e2a3293bc67..3eddf52a0bb 100644
--- a/drivers/input/keyboard/gpio_keys.c
+++ b/drivers/input/keyboard/gpio_keys.c
@@ -62,7 +62,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, input);
 
-	input->evbit[0] = BIT(EV_KEY);
+	input->evbit[0] = BIT_MASK(EV_KEY);
 
 	input->name = pdev->name;
 	input->phys = "gpio-keys/input0";
diff --git a/drivers/input/keyboard/hil_kbd.c b/drivers/input/keyboard/hil_kbd.c
index cdd254f2e6c..adbf29f0169 100644
--- a/drivers/input/keyboard/hil_kbd.c
+++ b/drivers/input/keyboard/hil_kbd.c
@@ -323,8 +323,9 @@ static int hil_kbd_connect(struct serio *serio, struct serio_driver *drv)
 		goto bail2;
 	}
 
-	kbd->dev->evbit[0]	= BIT(EV_KEY) | BIT(EV_REP);
-	kbd->dev->ledbit[0]	= BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL);
+	kbd->dev->evbit[0]	= BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+	kbd->dev->ledbit[0]	= BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
+		BIT_MASK(LED_SCROLLL);
 	kbd->dev->keycodemax	= HIL_KEYCODES_SET1_TBLSIZE;
 	kbd->dev->keycodesize	= sizeof(hil_kbd_set1[0]);
 	kbd->dev->keycode	= hil_kbd_set1;
diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c
index 499b6974457..50d80ecf0b8 100644
--- a/drivers/input/keyboard/hilkbd.c
+++ b/drivers/input/keyboard/hilkbd.c
@@ -266,8 +266,9 @@ hil_keyb_init(void)
 		if (hphilkeyb_keycode[i] != KEY_RESERVED)
 			set_bit(hphilkeyb_keycode[i], hil_dev.dev->keybit);
 
-	hil_dev.dev->evbit[0]	= BIT(EV_KEY) | BIT(EV_REP);
-	hil_dev.dev->ledbit[0]	= BIT(LED_NUML) | BIT(LED_CAPSL) | BIT(LED_SCROLLL);
+	hil_dev.dev->evbit[0]	= BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+	hil_dev.dev->ledbit[0]	= BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) |
+		BIT_MASK(LED_SCROLLL);
 	hil_dev.dev->keycodemax	= HIL_KEYCODES_SET1_TBLSIZE;
 	hil_dev.dev->keycodesize= sizeof(hphilkeyb_keycode[0]);
 	hil_dev.dev->keycode	= hphilkeyb_keycode;
diff --git a/drivers/input/keyboard/locomokbd.c b/drivers/input/keyboard/locomokbd.c
index 7a41b271f22..5a0ca18d675 100644
--- a/drivers/input/keyboard/locomokbd.c
+++ b/drivers/input/keyboard/locomokbd.c
@@ -233,7 +233,7 @@ static int locomokbd_probe(struct locomo_dev *dev)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = locomokbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(locomokbd_keycode);
diff --git a/drivers/input/keyboard/newtonkbd.c b/drivers/input/keyboard/newtonkbd.c
index b97a41e3ee5..48d1cab0aa1 100644
--- a/drivers/input/keyboard/newtonkbd.c
+++ b/drivers/input/keyboard/newtonkbd.c
@@ -106,7 +106,7 @@ static int nkbd_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = nkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(nkbd_keycode);
diff --git a/drivers/input/keyboard/pxa27x_keyboard.c b/drivers/input/keyboard/pxa27x_keyboard.c
index b7061aa3881..bdd64ee4c5c 100644
--- a/drivers/input/keyboard/pxa27x_keyboard.c
+++ b/drivers/input/keyboard/pxa27x_keyboard.c
@@ -183,8 +183,9 @@ static int __devinit pxakbd_probe(struct platform_device *pdev)
 	input_dev->close = pxakbd_close;
 	input_dev->dev.parent = &pdev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_REL);
-	input_dev->relbit[LONG(REL_WHEEL)] = BIT(REL_WHEEL);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+		BIT_MASK(EV_REL);
+	input_dev->relbit[BIT_WORD(REL_WHEEL)] = BIT_MASK(REL_WHEEL);
 	for (row = 0; row < pdata->nr_rows; row++) {
 		for (col = 0; col < pdata->nr_cols; col++) {
 			int code = pdata->keycodes[row][col];
diff --git a/drivers/input/keyboard/spitzkbd.c b/drivers/input/keyboard/spitzkbd.c
index 41b80385476..410d78a774d 100644
--- a/drivers/input/keyboard/spitzkbd.c
+++ b/drivers/input/keyboard/spitzkbd.c
@@ -381,7 +381,8 @@ static int __init spitzkbd_probe(struct platform_device *dev)
 	input_dev->id.product = 0x0001;
 	input_dev->id.version = 0x0100;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR) | BIT(EV_SW);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) |
+		BIT_MASK(EV_PWR) | BIT_MASK(EV_SW);
 	input_dev->keycode = spitzkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(spitzkbd_keycode);
diff --git a/drivers/input/keyboard/stowaway.c b/drivers/input/keyboard/stowaway.c
index b44b0684d54..7437219370b 100644
--- a/drivers/input/keyboard/stowaway.c
+++ b/drivers/input/keyboard/stowaway.c
@@ -110,7 +110,7 @@ static int skbd_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = skbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(skbd_keycode);
diff --git a/drivers/input/keyboard/sunkbd.c b/drivers/input/keyboard/sunkbd.c
index 1d4e39624cf..be0f5d19d02 100644
--- a/drivers/input/keyboard/sunkbd.c
+++ b/drivers/input/keyboard/sunkbd.c
@@ -277,9 +277,11 @@ static int sunkbd_connect(struct serio *serio, struct serio_driver *drv)
 
 	input_dev->event = sunkbd_event;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_SND) | BIT(EV_REP);
-	input_dev->ledbit[0] = BIT(LED_CAPSL) | BIT(LED_COMPOSE) | BIT(LED_SCROLLL) | BIT(LED_NUML);
-	input_dev->sndbit[0] = BIT(SND_CLICK) | BIT(SND_BELL);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
+		BIT_MASK(EV_SND) | BIT_MASK(EV_REP);
+	input_dev->ledbit[0] = BIT_MASK(LED_CAPSL) | BIT_MASK(LED_COMPOSE) |
+		BIT_MASK(LED_SCROLLL) | BIT_MASK(LED_NUML);
+	input_dev->sndbit[0] = BIT_MASK(SND_CLICK) | BIT_MASK(SND_BELL);
 
 	input_dev->keycode = sunkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
diff --git a/drivers/input/keyboard/xtkbd.c b/drivers/input/keyboard/xtkbd.c
index f3a56eb58ed..152a2c07050 100644
--- a/drivers/input/keyboard/xtkbd.c
+++ b/drivers/input/keyboard/xtkbd.c
@@ -110,7 +110,7 @@ static int xtkbd_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_dev->keycode = xtkbd->keycode;
 	input_dev->keycodesize = sizeof(unsigned char);
 	input_dev->keycodemax = ARRAY_SIZE(xtkbd_keycode);
diff --git a/drivers/input/misc/ati_remote.c b/drivers/input/misc/ati_remote.c
index 471aab20644..3a7937481ad 100644
--- a/drivers/input/misc/ati_remote.c
+++ b/drivers/input/misc/ati_remote.c
@@ -662,10 +662,10 @@ static void ati_remote_input_init(struct ati_remote *ati_remote)
 	struct input_dev *idev = ati_remote->idev;
 	int i;
 
-	idev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	idev->keybit[LONG(BTN_MOUSE)] = ( BIT(BTN_LEFT) | BIT(BTN_RIGHT) |
-					  BIT(BTN_SIDE) | BIT(BTN_EXTRA) );
-	idev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
+	idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 	for (i = 0; ati_remote_tbl[i].kind != KIND_END; i++)
 		if (ati_remote_tbl[i].type == EV_KEY)
 			set_bit(ati_remote_tbl[i].code, idev->keybit);
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index 1031543e5c3..f2709b82485 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -346,9 +346,10 @@ static int ati_remote2_input_init(struct ati_remote2 *ar2)
 	ar2->idev = idev;
 	input_set_drvdata(idev, ar2);
 
-	idev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_REL);
-	idev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT);
-	idev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	idev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_REL);
+	idev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT);
+	idev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 	for (i = 0; ati_remote2_key_table[i].key_code != KEY_RESERVED; i++)
 		set_bit(ati_remote2_key_table[i].key_code, idev->keybit);
 
diff --git a/drivers/input/misc/atlas_btns.c b/drivers/input/misc/atlas_btns.c
index e43e92fd9e2..4e3ad657ed8 100644
--- a/drivers/input/misc/atlas_btns.c
+++ b/drivers/input/misc/atlas_btns.c
@@ -81,7 +81,7 @@ static int atlas_acpi_button_add(struct acpi_device *device)
 	input_dev->name = "Atlas ACPI button driver";
 	input_dev->phys = "ASIM0000/atlas/input0";
 	input_dev->id.bustype = BUS_HOST;
-	input_dev->evbit[LONG(EV_KEY)] = BIT(EV_KEY);
+	input_dev->evbit[BIT_WORD(EV_KEY)] = BIT_MASK(EV_KEY);
 
 	set_bit(KEY_F1, input_dev->keybit);
 	set_bit(KEY_F2, input_dev->keybit);
diff --git a/drivers/input/misc/cobalt_btns.c b/drivers/input/misc/cobalt_btns.c
index 064b0793601..1aef97ed5e8 100644
--- a/drivers/input/misc/cobalt_btns.c
+++ b/drivers/input/misc/cobalt_btns.c
@@ -104,7 +104,7 @@ static int __devinit cobalt_buttons_probe(struct platform_device *pdev)
 	input->id.bustype = BUS_HOST;
 	input->cdev.dev = &pdev->dev;
 
-	input->evbit[0] = BIT(EV_KEY);
+	input->evbit[0] = BIT_MASK(EV_KEY);
 	for (i = 0; i < ARRAY_SIZE(buttons_map); i++) {
 		set_bit(buttons_map[i].keycode, input->keybit);
 		buttons_map[i].count = 0;
diff --git a/drivers/input/misc/ixp4xx-beeper.c b/drivers/input/misc/ixp4xx-beeper.c
index e759944041a..d2ade7443b7 100644
--- a/drivers/input/misc/ixp4xx-beeper.c
+++ b/drivers/input/misc/ixp4xx-beeper.c
@@ -109,8 +109,8 @@ static int __devinit ixp4xx_spkr_probe(struct platform_device *dev)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_SND);
-	input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	input_dev->evbit[0] = BIT_MASK(EV_SND);
+	input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 	input_dev->event = ixp4xx_spkr_event;
 
 	err = request_irq(IRQ_IXP4XX_TIMER2, &ixp4xx_spkr_interrupt,
diff --git a/drivers/input/misc/keyspan_remote.c b/drivers/input/misc/keyspan_remote.c
index 1bffc9fa98c..fd74347047d 100644
--- a/drivers/input/misc/keyspan_remote.c
+++ b/drivers/input/misc/keyspan_remote.c
@@ -497,7 +497,7 @@ static int keyspan_probe(struct usb_interface *interface, const struct usb_devic
 	usb_to_input_id(udev, &input_dev->id);
 	input_dev->dev.parent = &interface->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY);		/* We will only report KEY events. */
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);		/* We will only report KEY events. */
 	for (i = 0; i < ARRAY_SIZE(keyspan_key_table); i++)
 		if (keyspan_key_table[i] != KEY_RESERVED)
 			set_bit(keyspan_key_table[i], input_dev->keybit);
diff --git a/drivers/input/misc/m68kspkr.c b/drivers/input/misc/m68kspkr.c
index e9f26e766b4..0c64d9bb718 100644
--- a/drivers/input/misc/m68kspkr.c
+++ b/drivers/input/misc/m68kspkr.c
@@ -65,8 +65,8 @@ static int __devinit m68kspkr_probe(struct platform_device *dev)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_SND);
-	input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	input_dev->evbit[0] = BIT_MASK(EV_SND);
+	input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 	input_dev->event = m68kspkr_event;
 
 	err = input_register_device(input_dev);
diff --git a/drivers/input/misc/pcspkr.c b/drivers/input/misc/pcspkr.c
index c19f77fbaf2..4941a9e61e9 100644
--- a/drivers/input/misc/pcspkr.c
+++ b/drivers/input/misc/pcspkr.c
@@ -86,8 +86,8 @@ static int __devinit pcspkr_probe(struct platform_device *dev)
 	pcspkr_dev->id.version = 0x0100;
 	pcspkr_dev->dev.parent = &dev->dev;
 
-	pcspkr_dev->evbit[0] = BIT(EV_SND);
-	pcspkr_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	pcspkr_dev->evbit[0] = BIT_MASK(EV_SND);
+	pcspkr_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 	pcspkr_dev->event = pcspkr_event;
 
 	err = input_register_device(pcspkr_dev);
diff --git a/drivers/input/misc/powermate.c b/drivers/input/misc/powermate.c
index 448a470d28f..7a7b8c7b963 100644
--- a/drivers/input/misc/powermate.c
+++ b/drivers/input/misc/powermate.c
@@ -363,10 +363,11 @@ static int powermate_probe(struct usb_interface *intf, const struct usb_device_i
 
 	input_dev->event = powermate_input_event;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_MSC);
-	input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
-	input_dev->relbit[LONG(REL_DIAL)] = BIT(REL_DIAL);
-	input_dev->mscbit[LONG(MSC_PULSELED)] = BIT(MSC_PULSELED);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) |
+		BIT_MASK(EV_MSC);
+	input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
+	input_dev->relbit[BIT_WORD(REL_DIAL)] = BIT_MASK(REL_DIAL);
+	input_dev->mscbit[BIT_WORD(MSC_PULSELED)] = BIT_MASK(MSC_PULSELED);
 
 	/* get a handle to the interrupt data pipe */
 	pipe = usb_rcvintpipe(udev, endpoint->bEndpointAddress);
diff --git a/drivers/input/misc/sparcspkr.c b/drivers/input/misc/sparcspkr.c
index e36ec1d92be..a3637d87088 100644
--- a/drivers/input/misc/sparcspkr.c
+++ b/drivers/input/misc/sparcspkr.c
@@ -115,8 +115,8 @@ static int __devinit sparcspkr_probe(struct device *dev)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = dev;
 
-	input_dev->evbit[0] = BIT(EV_SND);
-	input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	input_dev->evbit[0] = BIT_MASK(EV_SND);
+	input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 
 	input_dev->event = state->event;
 
diff --git a/drivers/input/misc/yealink.c b/drivers/input/misc/yealink.c
index ab15880fd56..46279ef2b64 100644
--- a/drivers/input/misc/yealink.c
+++ b/drivers/input/misc/yealink.c
@@ -945,7 +945,7 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	/* input_dev->event = input_ev;	TODO */
 
 	/* register available key events */
-	input_dev->evbit[0] = BIT(EV_KEY);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
 	for (i = 0; i < 256; i++) {
 		int k = map_p1k_to_key(i);
 		if (k >= 0) {
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 64d70a9b714..2b5ed119c9a 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -455,24 +455,25 @@ int alps_init(struct psmouse *psmouse)
 	if (alps_hw_init(psmouse, &version))
 		goto init_fail;
 
-	dev1->evbit[LONG(EV_KEY)] |= BIT(EV_KEY);
-	dev1->keybit[LONG(BTN_TOUCH)] |= BIT(BTN_TOUCH);
-	dev1->keybit[LONG(BTN_TOOL_FINGER)] |= BIT(BTN_TOOL_FINGER);
-	dev1->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+	dev1->evbit[BIT_WORD(EV_KEY)] |= BIT_MASK(EV_KEY);
+	dev1->keybit[BIT_WORD(BTN_TOUCH)] |= BIT_MASK(BTN_TOUCH);
+	dev1->keybit[BIT_WORD(BTN_TOOL_FINGER)] |= BIT_MASK(BTN_TOOL_FINGER);
+	dev1->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 
-	dev1->evbit[LONG(EV_ABS)] |= BIT(EV_ABS);
+	dev1->evbit[BIT_WORD(EV_ABS)] |= BIT_MASK(EV_ABS);
 	input_set_abs_params(dev1, ABS_X, 0, 1023, 0, 0);
 	input_set_abs_params(dev1, ABS_Y, 0, 767, 0, 0);
 	input_set_abs_params(dev1, ABS_PRESSURE, 0, 127, 0, 0);
 
 	if (priv->i->flags & ALPS_WHEEL) {
-		dev1->evbit[LONG(EV_REL)] |= BIT(EV_REL);
-		dev1->relbit[LONG(REL_WHEEL)] |= BIT(REL_WHEEL);
+		dev1->evbit[BIT_WORD(EV_REL)] |= BIT_MASK(EV_REL);
+		dev1->relbit[BIT_WORD(REL_WHEEL)] |= BIT_MASK(REL_WHEEL);
 	}
 
 	if (priv->i->flags & (ALPS_FW_BK_1 | ALPS_FW_BK_2)) {
-		dev1->keybit[LONG(BTN_FORWARD)] |= BIT(BTN_FORWARD);
-		dev1->keybit[LONG(BTN_BACK)] |= BIT(BTN_BACK);
+		dev1->keybit[BIT_WORD(BTN_FORWARD)] |= BIT_MASK(BTN_FORWARD);
+		dev1->keybit[BIT_WORD(BTN_BACK)] |= BIT_MASK(BTN_BACK);
 	}
 
 	snprintf(priv->phys, sizeof(priv->phys), "%s/input1", psmouse->ps2dev.serio->phys);
@@ -483,9 +484,10 @@ int alps_init(struct psmouse *psmouse)
 	dev2->id.product = PSMOUSE_ALPS;
 	dev2->id.version = 0x0000;
 
-	dev2->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	dev2->relbit[LONG(REL_X)] |= BIT(REL_X) | BIT(REL_Y);
-	dev2->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+	dev2->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	dev2->relbit[BIT_WORD(REL_X)] |= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	dev2->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 
 	if (input_register_device(priv->dev2))
 		goto init_fail;
diff --git a/drivers/input/mouse/amimouse.c b/drivers/input/mouse/amimouse.c
index 239a0e16d91..a185ac78a42 100644
--- a/drivers/input/mouse/amimouse.c
+++ b/drivers/input/mouse/amimouse.c
@@ -111,9 +111,10 @@ static int __init amimouse_init(void)
 	amimouse_dev->id.product = 0x0002;
 	amimouse_dev->id.version = 0x0100;
 
-	amimouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	amimouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-	amimouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+	amimouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	amimouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	amimouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 	amimouse_dev->open = amimouse_open;
 	amimouse_dev->close = amimouse_close;
 
diff --git a/drivers/input/mouse/atarimouse.c b/drivers/input/mouse/atarimouse.c
index c8c7244b48a..98a3561d4b0 100644
--- a/drivers/input/mouse/atarimouse.c
+++ b/drivers/input/mouse/atarimouse.c
@@ -137,9 +137,10 @@ static int __init atamouse_init(void)
 	atamouse_dev->id.product = 0x0002;
 	atamouse_dev->id.version = 0x0100;
 
-	atamouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	atamouse_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-	atamouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
+	atamouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	atamouse_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	atamouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
 	atamouse_dev->open = atamouse_open;
 	atamouse_dev->close = atamouse_close;
 
diff --git a/drivers/input/mouse/hil_ptr.c b/drivers/input/mouse/hil_ptr.c
index 449bf4dcbbc..27f88fbb713 100644
--- a/drivers/input/mouse/hil_ptr.c
+++ b/drivers/input/mouse/hil_ptr.c
@@ -298,12 +298,12 @@ static int hil_ptr_connect(struct serio *serio, struct serio_driver *driver)
 	idd = ptr->idd + 1;
 	txt = "unknown";
 	if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_REL) {
-		ptr->dev->evbit[0] = BIT(EV_REL);
+		ptr->dev->evbit[0] = BIT_MASK(EV_REL);
 		txt = "relative";
 	}
 
 	if ((did & HIL_IDD_DID_TYPE_MASK) == HIL_IDD_DID_TYPE_ABS) {
-		ptr->dev->evbit[0] = BIT(EV_ABS);
+		ptr->dev->evbit[0] = BIT_MASK(EV_ABS);
 		txt = "absolute";
 	}
 	if (!ptr->dev->evbit[0])
@@ -311,7 +311,7 @@ static int hil_ptr_connect(struct serio *serio, struct serio_driver *driver)
 
 	ptr->nbtn = HIL_IDD_NUM_BUTTONS(idd);
 	if (ptr->nbtn)
-		ptr->dev->evbit[0] |= BIT(EV_KEY);
+		ptr->dev->evbit[0] |= BIT_MASK(EV_KEY);
 
 	naxsets = HIL_IDD_NUM_AXSETS(*idd);
 	ptr->naxes = HIL_IDD_NUM_AXES_PER_SET(*idd);
diff --git a/drivers/input/mouse/inport.c b/drivers/input/mouse/inport.c
index 79b624fe899..655a3921743 100644
--- a/drivers/input/mouse/inport.c
+++ b/drivers/input/mouse/inport.c
@@ -163,9 +163,10 @@ static int __init inport_init(void)
 	inport_dev->id.product = 0x0001;
 	inport_dev->id.version = 0x0100;
 
-	inport_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	inport_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	inport_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	inport_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	inport_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	inport_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	inport_dev->open  = inport_open;
 	inport_dev->close = inport_close;
diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c
index d7de4c53b3d..9ec57d80186 100644
--- a/drivers/input/mouse/lifebook.c
+++ b/drivers/input/mouse/lifebook.c
@@ -270,9 +270,10 @@ static int lifebook_create_relative_device(struct psmouse *psmouse)
 	dev2->id.version = 0x0000;
 	dev2->dev.parent = &psmouse->ps2dev.serio->dev;
 
-	dev2->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	dev2->relbit[LONG(REL_X)] = BIT(REL_X) | BIT(REL_Y);
-	dev2->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT);
+	dev2->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	dev2->relbit[BIT_WORD(REL_X)] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+	dev2->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT);
 
 	error = input_register_device(priv->dev2);
 	if (error)
@@ -295,9 +296,9 @@ int lifebook_init(struct psmouse *psmouse)
 	if (lifebook_absolute_mode(psmouse))
 		return -1;
 
-	dev1->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
+	dev1->evbit[0] = BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY);
 	dev1->relbit[0] = 0;
-	dev1->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	dev1->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(dev1, ABS_X, 0, max_coord, 0, 0);
 	input_set_abs_params(dev1, ABS_Y, 0, max_coord, 0, 0);
 
diff --git a/drivers/input/mouse/logibm.c b/drivers/input/mouse/logibm.c
index 26c3b2e2ca9..b23a4f3ea5c 100644
--- a/drivers/input/mouse/logibm.c
+++ b/drivers/input/mouse/logibm.c
@@ -156,9 +156,10 @@ static int __init logibm_init(void)
 	logibm_dev->id.product = 0x0001;
 	logibm_dev->id.version = 0x0100;
 
-	logibm_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	logibm_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	logibm_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	logibm_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	logibm_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	logibm_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	logibm_dev->open  = logibm_open;
 	logibm_dev->close = logibm_close;
diff --git a/drivers/input/mouse/pc110pad.c b/drivers/input/mouse/pc110pad.c
index 05d992e514f..8991ab0b4fe 100644
--- a/drivers/input/mouse/pc110pad.c
+++ b/drivers/input/mouse/pc110pad.c
@@ -144,9 +144,9 @@ static int __init pc110pad_init(void)
 	pc110pad_dev->id.product = 0x0001;
 	pc110pad_dev->id.version = 0x0100;
 
-	pc110pad_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	pc110pad_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y);
-	pc110pad_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	pc110pad_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	pc110pad_dev->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y);
+	pc110pad_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
 	pc110pad_dev->absmax[ABS_X] = 0x1ff;
 	pc110pad_dev->absmax[ABS_Y] = 0x0ff;
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 07352575653..da316d13d7f 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1115,9 +1115,10 @@ static int psmouse_switch_protocol(struct psmouse *psmouse, const struct psmouse
 
 	input_dev->dev.parent = &psmouse->ps2dev.serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	psmouse->set_rate = psmouse_set_rate;
 	psmouse->set_resolution = psmouse_set_resolution;
diff --git a/drivers/input/mouse/rpcmouse.c b/drivers/input/mouse/rpcmouse.c
index 355efd0423e..18a48636ba4 100644
--- a/drivers/input/mouse/rpcmouse.c
+++ b/drivers/input/mouse/rpcmouse.c
@@ -78,9 +78,10 @@ static int __init rpcmouse_init(void)
 	rpcmouse_dev->id.product = 0x0001;
 	rpcmouse_dev->id.version = 0x0100;
 
-	rpcmouse_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	rpcmouse_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	rpcmouse_dev->relbit[0]	= BIT(REL_X) | BIT(REL_Y);
+	rpcmouse_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	rpcmouse_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	rpcmouse_dev->relbit[0]	= BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	rpcmouse_lastx = (short) iomd_readl(IOMD_MOUSEX);
 	rpcmouse_lasty = (short) iomd_readl(IOMD_MOUSEY);
diff --git a/drivers/input/mouse/sermouse.c b/drivers/input/mouse/sermouse.c
index 77b8ee2b965..ed917bfd086 100644
--- a/drivers/input/mouse/sermouse.c
+++ b/drivers/input/mouse/sermouse.c
@@ -268,9 +268,10 @@ static int sermouse_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT);
-	input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT);
+	input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	if (c & 0x01) set_bit(BTN_MIDDLE, input_dev->keybit);
 	if (c & 0x02) set_bit(BTN_SIDE, input_dev->keybit);
diff --git a/drivers/input/mouse/touchkit_ps2.c b/drivers/input/mouse/touchkit_ps2.c
index 7b977fd2357..3fadb2accac 100644
--- a/drivers/input/mouse/touchkit_ps2.c
+++ b/drivers/input/mouse/touchkit_ps2.c
@@ -85,7 +85,7 @@ int touchkit_ps2_detect(struct psmouse *psmouse, int set_properties)
 		return -ENODEV;
 
 	if (set_properties) {
-		dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
+		dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 		set_bit(BTN_TOUCH, dev->keybit);
 		input_set_abs_params(dev, ABS_X, 0, TOUCHKIT_MAX_XC, 0, 0);
 		input_set_abs_params(dev, ABS_Y, 0, TOUCHKIT_MAX_YC, 0, 0);
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index 79146d6ed2a..78c3ea75da2 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -998,34 +998,36 @@ static const struct input_device_id mousedev_ids[] = {
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT |
 				INPUT_DEVICE_ID_MATCH_RELBIT,
-		.evbit = { BIT(EV_KEY) | BIT(EV_REL) },
-		.keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) },
-		.relbit = { BIT(REL_X) | BIT(REL_Y) },
+		.evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) },
+		.keybit = { [BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) },
+		.relbit = { BIT_MASK(REL_X) | BIT_MASK(REL_Y) },
 	},	/* A mouse like device, at least one button,
 		   two relative axes */
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_RELBIT,
-		.evbit = { BIT(EV_KEY) | BIT(EV_REL) },
-		.relbit = { BIT(REL_WHEEL) },
+		.evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_REL) },
+		.relbit = { BIT_MASK(REL_WHEEL) },
 	},	/* A separate scrollwheel */
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
-		.keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) },
-		.absbit = { BIT(ABS_X) | BIT(ABS_Y) },
+		.evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) },
+		.keybit = { [BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH) },
+		.absbit = { BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) },
 	},	/* A tablet like device, at least touch detection,
 		   two absolute axes */
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_KEYBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
-		.evbit = { BIT(EV_KEY) | BIT(EV_ABS) },
-		.keybit = { [LONG(BTN_TOOL_FINGER)] = BIT(BTN_TOOL_FINGER) },
-		.absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) |
-				BIT(ABS_TOOL_WIDTH) },
+		.evbit = { BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) },
+		.keybit = { [BIT_WORD(BTN_TOOL_FINGER)] =
+				BIT_MASK(BTN_TOOL_FINGER) },
+		.absbit = { BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+				BIT_MASK(ABS_PRESSURE) |
+				BIT_MASK(ABS_TOOL_WIDTH) },
 	},	/* A touchpad */
 
 	{ },	/* Terminating entry */
diff --git a/drivers/input/tablet/acecad.c b/drivers/input/tablet/acecad.c
index dd2310458c4..b973d0ef6d1 100644
--- a/drivers/input/tablet/acecad.c
+++ b/drivers/input/tablet/acecad.c
@@ -192,10 +192,14 @@ static int usb_acecad_probe(struct usb_interface *intf, const struct usb_device_
 	input_dev->open = usb_acecad_open;
 	input_dev->close = usb_acecad_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->absbit[0] = BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE);
-	input_dev->keybit[LONG(BTN_LEFT)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-	input_dev->keybit[LONG(BTN_DIGI)] = BIT(BTN_TOOL_PEN) |BIT(BTN_TOUCH) | BIT(BTN_STYLUS) | BIT(BTN_STYLUS2);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+		BIT_MASK(ABS_PRESSURE);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] = BIT_MASK(BTN_TOOL_PEN) |
+		BIT_MASK(BTN_TOUCH) | BIT_MASK(BTN_STYLUS) |
+		BIT_MASK(BTN_STYLUS2);
 
 	switch (id->driver_info) {
 		case 0:
diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c
index b2ca10f2fe0..d2c6da26472 100644
--- a/drivers/input/tablet/gtco.c
+++ b/drivers/input/tablet/gtco.c
@@ -573,10 +573,12 @@ static void gtco_setup_caps(struct input_dev *inputdev)
 	struct gtco *device = input_get_drvdata(inputdev);
 
 	/* Which events */
-	inputdev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_MSC);
+	inputdev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+		BIT_MASK(EV_MSC);
 
 	/* Misc event menu block */
-	inputdev->mscbit[0] = BIT(MSC_SCAN)|BIT(MSC_SERIAL)|BIT(MSC_RAW) ;
+	inputdev->mscbit[0] = BIT_MASK(MSC_SCAN) | BIT_MASK(MSC_SERIAL) |
+		BIT_MASK(MSC_RAW);
 
 	/* Absolute values based on HID report info */
 	input_set_abs_params(inputdev, ABS_X, device->min_X, device->max_X,
diff --git a/drivers/input/tablet/kbtab.c b/drivers/input/tablet/kbtab.c
index 91e6d00d4a4..1182fc13316 100644
--- a/drivers/input/tablet/kbtab.c
+++ b/drivers/input/tablet/kbtab.c
@@ -153,10 +153,13 @@ static int kbtab_probe(struct usb_interface *intf, const struct usb_device_id *i
 	input_dev->open = kbtab_open;
 	input_dev->close = kbtab_close;
 
-	input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_MSC);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_PEN) | BIT(BTN_TOUCH);
-	input_dev->mscbit[0] |= BIT(MSC_SERIAL);
+	input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+		BIT_MASK(EV_MSC);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_PEN) |
+		BIT_MASK(BTN_TOUCH);
+	input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
 	input_set_abs_params(input_dev, ABS_X, 0, 0x2000, 4, 0);
 	input_set_abs_params(input_dev, ABS_Y, 0, 0x1750, 4, 0);
 	input_set_abs_params(input_dev, ABS_PRESSURE, 0, 0xff, 0, 0);
diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c
index 064e123c9b7..d64b1ea136b 100644
--- a/drivers/input/tablet/wacom_sys.c
+++ b/drivers/input/tablet/wacom_sys.c
@@ -140,48 +140,58 @@ static void wacom_close(struct input_dev *dev)
 
 void input_dev_mo(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_1) | BIT(BTN_5);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_1) |
+		BIT_MASK(BTN_5);
 	input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0);
 }
 
 void input_dev_g4(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->evbit[0] |= BIT(EV_MSC);
-	input_dev->mscbit[0] |= BIT(MSC_SERIAL);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_FINGER);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_0) | BIT(BTN_4);
+	input_dev->evbit[0] |= BIT_MASK(EV_MSC);
+	input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_FINGER);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_0) |
+		BIT_MASK(BTN_4);
 }
 
 void input_dev_g(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->evbit[0] |= BIT(EV_REL);
-	input_dev->relbit[0] |= BIT(REL_WHEEL);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER) | BIT(BTN_TOOL_MOUSE) | BIT(BTN_STYLUS2);
+	input_dev->evbit[0] |= BIT_MASK(EV_REL);
+	input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER) |
+		BIT_MASK(BTN_TOOL_MOUSE) | BIT_MASK(BTN_STYLUS2);
 	input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0);
 }
 
 void input_dev_i3s(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_FINGER);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_0) | BIT(BTN_1) | BIT(BTN_2) | BIT(BTN_3);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_FINGER);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_0) |
+		BIT_MASK(BTN_1) | BIT_MASK(BTN_2) | BIT_MASK(BTN_3);
 	input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0);
 }
 
 void input_dev_i3(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_4) | BIT(BTN_5) | BIT(BTN_6) | BIT(BTN_7);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_4) |
+		BIT_MASK(BTN_5) | BIT_MASK(BTN_6) | BIT_MASK(BTN_7);
 	input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0);
 }
 
 void input_dev_i(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->evbit[0] |= BIT(EV_MSC) | BIT(EV_REL);
-	input_dev->mscbit[0] |= BIT(MSC_SERIAL);
-	input_dev->relbit[0] |= BIT(REL_WHEEL);
-	input_dev->keybit[LONG(BTN_LEFT)] |= BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE) | BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER) | BIT(BTN_TOOL_MOUSE)	| BIT(BTN_TOOL_BRUSH)
-		| BIT(BTN_TOOL_PENCIL) | BIT(BTN_TOOL_AIRBRUSH) | BIT(BTN_TOOL_LENS) | BIT(BTN_STYLUS2);
+	input_dev->evbit[0] |= BIT_MASK(EV_MSC) | BIT_MASK(EV_REL);
+	input_dev->mscbit[0] |= BIT_MASK(MSC_SERIAL);
+	input_dev->relbit[0] |= BIT_MASK(REL_WHEEL);
+	input_dev->keybit[BIT_WORD(BTN_LEFT)] |= BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE) |
+		BIT_MASK(BTN_SIDE) | BIT_MASK(BTN_EXTRA);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER) |
+		BIT_MASK(BTN_TOOL_MOUSE) | BIT_MASK(BTN_TOOL_BRUSH) |
+		BIT_MASK(BTN_TOOL_PENCIL) | BIT_MASK(BTN_TOOL_AIRBRUSH) |
+		BIT_MASK(BTN_TOOL_LENS) | BIT_MASK(BTN_STYLUS2);
 	input_set_abs_params(input_dev, ABS_DISTANCE, 0, wacom_wac->features->distance_max, 0, 0);
 	input_set_abs_params(input_dev, ABS_WHEEL, 0, 1023, 0, 0);
 	input_set_abs_params(input_dev, ABS_TILT_X, 0, 127, 0, 0);
@@ -192,12 +202,13 @@ void input_dev_i(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 
 void input_dev_pl(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_STYLUS2) | BIT(BTN_TOOL_RUBBER);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_STYLUS2) |
+		BIT_MASK(BTN_TOOL_RUBBER);
 }
 
 void input_dev_pt(struct input_dev *input_dev, struct wacom_wac *wacom_wac)
 {
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_RUBBER);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_RUBBER);
 }
 
 static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *id)
@@ -243,12 +254,13 @@ static int wacom_probe(struct usb_interface *intf, const struct usb_device_id *i
 	input_dev->open = wacom_open;
 	input_dev->close = wacom_close;
 
-	input_dev->evbit[0] |= BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_DIGI)] |= BIT(BTN_TOOL_PEN) | BIT(BTN_TOUCH) | BIT(BTN_STYLUS);
+	input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_DIGI)] |= BIT_MASK(BTN_TOOL_PEN) |
+		BIT_MASK(BTN_TOUCH) | BIT_MASK(BTN_STYLUS);
 	input_set_abs_params(input_dev, ABS_X, 0, wacom_wac->features->x_max, 4, 0);
 	input_set_abs_params(input_dev, ABS_Y, 0, wacom_wac->features->y_max, 4, 0);
 	input_set_abs_params(input_dev, ABS_PRESSURE, 0, wacom_wac->features->pressure_max, 0, 0);
-	input_dev->absbit[LONG(ABS_MISC)] |= BIT(ABS_MISC);
+	input_dev->absbit[BIT_WORD(ABS_MISC)] |= BIT_MASK(ABS_MISC);
 
 	wacom_init_input_dev(input_dev, wacom_wac);
 
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index 51ae4fb7d12..f59aecf5ec1 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -917,8 +917,8 @@ static int __devinit ads7846_probe(struct spi_device *spi)
 	input_dev->phys = ts->phys;
 	input_dev->dev.parent = &spi->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(input_dev, ABS_X,
 			pdata->x_min ? : 0,
 			pdata->x_max ? : MAX_12BIT,
diff --git a/drivers/input/touchscreen/corgi_ts.c b/drivers/input/touchscreen/corgi_ts.c
index e6a31d11878..b1b2e07bf08 100644
--- a/drivers/input/touchscreen/corgi_ts.c
+++ b/drivers/input/touchscreen/corgi_ts.c
@@ -302,8 +302,8 @@ static int __init corgits_probe(struct platform_device *pdev)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &pdev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(input_dev, ABS_X, X_AXIS_MIN, X_AXIS_MAX, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, Y_AXIS_MIN, Y_AXIS_MAX, 0, 0);
 	input_set_abs_params(input_dev, ABS_PRESSURE, PRESSURE_MIN, PRESSURE_MAX, 0, 0);
diff --git a/drivers/input/touchscreen/elo.c b/drivers/input/touchscreen/elo.c
index 557d781719f..d20689cdbd5 100644
--- a/drivers/input/touchscreen/elo.c
+++ b/drivers/input/touchscreen/elo.c
@@ -320,8 +320,8 @@ static int elo_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
 	serio_set_drvdata(serio, elo);
 	err = serio_open(serio, drv);
diff --git a/drivers/input/touchscreen/fujitsu_ts.c b/drivers/input/touchscreen/fujitsu_ts.c
index daf7a4afc93..80b21800355 100644
--- a/drivers/input/touchscreen/fujitsu_ts.c
+++ b/drivers/input/touchscreen/fujitsu_ts.c
@@ -122,8 +122,8 @@ static int fujitsu_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.vendor = SERIO_FUJITSU;
 	input_dev->id.product = 0;
 	input_dev->id.version = 0x0100;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
 	input_set_abs_params(input_dev, ABS_X, 0, 4096, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, 0, 4096, 0, 0);
diff --git a/drivers/input/touchscreen/gunze.c b/drivers/input/touchscreen/gunze.c
index 39d602600d7..a48a15868c4 100644
--- a/drivers/input/touchscreen/gunze.c
+++ b/drivers/input/touchscreen/gunze.c
@@ -137,8 +137,8 @@ static int gunze_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.product = 0x0051;
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(input_dev, ABS_X, 24, 1000, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, 24, 1000, 0, 0);
 
diff --git a/drivers/input/touchscreen/h3600_ts_input.c b/drivers/input/touchscreen/h3600_ts_input.c
index 09ed7803cb8..2ae6c6016a8 100644
--- a/drivers/input/touchscreen/h3600_ts_input.c
+++ b/drivers/input/touchscreen/h3600_ts_input.c
@@ -373,8 +373,9 @@ static int h3600ts_connect(struct serio *serio, struct serio_driver *drv)
 
 	input_dev->event = h3600ts_event;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS) | BIT(EV_LED) | BIT(EV_PWR);
-	input_dev->ledbit[0] = BIT(LED_SLEEP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) |
+		BIT_MASK(EV_LED) | BIT_MASK(EV_PWR);
+	input_dev->ledbit[0] = BIT_MASK(LED_SLEEP);
 	input_set_abs_params(input_dev, ABS_X, 60, 985, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, 35, 1024, 0, 0);
 
diff --git a/drivers/input/touchscreen/hp680_ts_input.c b/drivers/input/touchscreen/hp680_ts_input.c
index 1a15475aedf..c38d4e0f95c 100644
--- a/drivers/input/touchscreen/hp680_ts_input.c
+++ b/drivers/input/touchscreen/hp680_ts_input.c
@@ -81,8 +81,8 @@ static int __init hp680_ts_init(void)
 	if (!hp680_ts_dev)
 		return -ENOMEM;
 
-	hp680_ts_dev->evbit[0] = BIT(EV_ABS) | BIT(EV_KEY);
-	hp680_ts_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	hp680_ts_dev->evbit[0] = BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY);
+	hp680_ts_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
 	input_set_abs_params(hp680_ts_dev, ABS_X,
 		HP680_TS_ABS_X_MIN, HP680_TS_ABS_X_MAX, 0, 0);
diff --git a/drivers/input/touchscreen/mk712.c b/drivers/input/touchscreen/mk712.c
index 44140feeffc..80a65886870 100644
--- a/drivers/input/touchscreen/mk712.c
+++ b/drivers/input/touchscreen/mk712.c
@@ -186,8 +186,8 @@ static int __init mk712_init(void)
 	mk712_dev->open    = mk712_open;
 	mk712_dev->close   = mk712_close;
 
-	mk712_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	mk712_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	mk712_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	mk712_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(mk712_dev, ABS_X, 0, 0xfff, 88, 0);
 	input_set_abs_params(mk712_dev, ABS_Y, 0, 0xfff, 88, 0);
 
diff --git a/drivers/input/touchscreen/mtouch.c b/drivers/input/touchscreen/mtouch.c
index 4ec3b1f940c..9077228418b 100644
--- a/drivers/input/touchscreen/mtouch.c
+++ b/drivers/input/touchscreen/mtouch.c
@@ -151,8 +151,8 @@ static int mtouch_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.product = 0;
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(mtouch->dev, ABS_X, MTOUCH_MIN_XC, MTOUCH_MAX_XC, 0, 0);
 	input_set_abs_params(mtouch->dev, ABS_Y, MTOUCH_MIN_YC, MTOUCH_MAX_YC, 0, 0);
 
diff --git a/drivers/input/touchscreen/penmount.c b/drivers/input/touchscreen/penmount.c
index f2c0d3c7149..c7f9cebebbb 100644
--- a/drivers/input/touchscreen/penmount.c
+++ b/drivers/input/touchscreen/penmount.c
@@ -113,8 +113,8 @@ static int pm_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
 
-        input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-        input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+        input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+        input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
         input_set_abs_params(pm->dev, ABS_X, 0, 0x3ff, 0, 0);
         input_set_abs_params(pm->dev, ABS_Y, 0, 0x3ff, 0, 0);
 
diff --git a/drivers/input/touchscreen/touchright.c b/drivers/input/touchscreen/touchright.c
index 3def7bb1df4..3a5c142c2a7 100644
--- a/drivers/input/touchscreen/touchright.c
+++ b/drivers/input/touchscreen/touchright.c
@@ -125,8 +125,8 @@ static int tr_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.product = 0;
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(tr->dev, ABS_X, TR_MIN_XC, TR_MAX_XC, 0, 0);
 	input_set_abs_params(tr->dev, ABS_Y, TR_MIN_YC, TR_MAX_YC, 0, 0);
 
diff --git a/drivers/input/touchscreen/touchwin.c b/drivers/input/touchscreen/touchwin.c
index ac4bdcf1866..763a656a59f 100644
--- a/drivers/input/touchscreen/touchwin.c
+++ b/drivers/input/touchscreen/touchwin.c
@@ -132,8 +132,8 @@ static int tw_connect(struct serio *serio, struct serio_driver *drv)
 	input_dev->id.product = 0;
 	input_dev->id.version = 0x0100;
 	input_dev->dev.parent = &serio->dev;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(tw->dev, ABS_X, TW_MIN_XC, TW_MAX_XC, 0, 0);
 	input_set_abs_params(tw->dev, ABS_Y, TW_MIN_YC, TW_MAX_YC, 0, 0);
 
diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 89373b01d8f..7549939b953 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -517,7 +517,7 @@ static int ucb1400_ts_probe(struct device *dev)
 	idev->id.product	= id;
 	idev->open		= ucb1400_ts_open;
 	idev->close		= ucb1400_ts_close;
-	idev->evbit[0]		= BIT(EV_ABS);
+	idev->evbit[0]		= BIT_MASK(EV_ABS);
 
 	ucb1400_adc_enable(ucb);
 	x_res = ucb1400_ts_read_xres(ucb);
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index 9fb3d5c3099..5f34b78d5dd 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -868,8 +868,8 @@ static int usbtouch_probe(struct usb_interface *intf,
 	input_dev->open = usbtouch_open;
 	input_dev->close = usbtouch_close;
 
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-	input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 	input_set_abs_params(input_dev, ABS_X, type->min_xc, type->max_xc, 0, 0);
 	input_set_abs_params(input_dev, ABS_Y, type->min_yc, type->max_yc, 0, 0);
 	if (type->max_press)
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 428872b653e..669f6f67449 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -486,11 +486,13 @@ static void b1dma_handle_rx(avmcard *card)
 					card->name);
 		} else {
 			memcpy(skb_put(skb, MsgLen), card->msgbuf, MsgLen);
-			if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF)
+			if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_CONF) {
+				spin_lock(&card->lock);
 				capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
-						     CAPIMSG_NCCI(skb->data),
-						     CAPIMSG_MSGID(skb->data));
-
+					CAPIMSG_NCCI(skb->data),
+					CAPIMSG_MSGID(skb->data));
+				spin_unlock(&card->lock);
+			}
 			capi_ctr_handle_message(ctrl, ApplId, skb);
 		}
 		break;
@@ -500,9 +502,9 @@ static void b1dma_handle_rx(avmcard *card)
 		ApplId = _get_word(&p);
 		NCCI = _get_word(&p);
 		WindowSize = _get_word(&p);
-
+		spin_lock(&card->lock);
 		capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize);
-
+		spin_unlock(&card->lock);
 		break;
 
 	case RECEIVE_FREE_NCCI:
@@ -510,9 +512,11 @@ static void b1dma_handle_rx(avmcard *card)
 		ApplId = _get_word(&p);
 		NCCI = _get_word(&p);
 
-		if (NCCI != 0xffffffff)
+		if (NCCI != 0xffffffff) {
+			spin_lock(&card->lock);
 			capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI);
-
+			spin_unlock(&card->lock);
+		}
 		break;
 
 	case RECEIVE_START:
@@ -751,10 +755,10 @@ void b1dma_reset_ctr(struct capi_ctr *ctrl)
 
 	spin_lock_irqsave(&card->lock, flags);
  	b1dma_reset(card);
-	spin_unlock_irqrestore(&card->lock, flags);
 
 	memset(cinfo->version, 0, sizeof(cinfo->version));
 	capilib_release(&cinfo->ncci_head);
+	spin_unlock_irqrestore(&card->lock, flags);
 	capi_ctr_reseted(ctrl);
 }
 
@@ -803,8 +807,11 @@ void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl)
 	avmcard *card = cinfo->card;
 	struct sk_buff *skb;
 	void *p;
+	unsigned long flags;
 
+	spin_lock_irqsave(&card->lock, flags);
 	capilib_release_appl(&cinfo->ncci_head, appl);
+	spin_unlock_irqrestore(&card->lock, flags);
 
 	skb = alloc_skb(7, GFP_ATOMIC);
 	if (!skb) {
@@ -832,10 +839,13 @@ u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 	u16 retval = CAPI_NOERROR;
 
  	if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) {
+		unsigned long flags;
+		spin_lock_irqsave(&card->lock, flags);
 		retval = capilib_data_b3_req(&cinfo->ncci_head,
 					     CAPIMSG_APPID(skb->data),
 					     CAPIMSG_NCCI(skb->data),
 					     CAPIMSG_MSGID(skb->data));
+		spin_unlock_irqrestore(&card->lock, flags);
 	}
 	if (retval == CAPI_NOERROR) 
 		b1dma_queue_tx(card, skb);
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 8710cf6214d..4bbbbe68807 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -678,7 +678,9 @@ static irqreturn_t c4_handle_interrupt(avmcard *card)
                 for (i=0; i < card->nr_controllers; i++) {
 			avmctrl_info *cinfo = &card->ctrlinfo[i];
 			memset(cinfo->version, 0, sizeof(cinfo->version));
+			spin_lock_irqsave(&card->lock, flags);
 			capilib_release(&cinfo->ncci_head);
+			spin_unlock_irqrestore(&card->lock, flags);
 			capi_ctr_reseted(&cinfo->capi_ctrl);
 		}
 		card->nlogcontr = 0;
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index c925020fe9b..6130724e46e 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -180,8 +180,8 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
 
 			ApplId = (unsigned) b1_get_word(card->port);
 			MsgLen = t1_get_slice(card->port, card->msgbuf);
-			spin_unlock_irqrestore(&card->lock, flags);
 			if (!(skb = alloc_skb(MsgLen, GFP_ATOMIC))) {
+				spin_unlock_irqrestore(&card->lock, flags);
 				printk(KERN_ERR "%s: incoming packet dropped\n",
 						card->name);
 			} else {
@@ -190,7 +190,7 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
 					capilib_data_b3_conf(&cinfo->ncci_head, ApplId,
 							     CAPIMSG_NCCI(skb->data),
 							     CAPIMSG_MSGID(skb->data));
-
+				spin_unlock_irqrestore(&card->lock, flags);
 				capi_ctr_handle_message(ctrl, ApplId, skb);
 			}
 			break;
@@ -200,21 +200,17 @@ static irqreturn_t t1isa_interrupt(int interrupt, void *devptr)
 			ApplId = b1_get_word(card->port);
 			NCCI = b1_get_word(card->port);
 			WindowSize = b1_get_word(card->port);
-			spin_unlock_irqrestore(&card->lock, flags);
-
 			capilib_new_ncci(&cinfo->ncci_head, ApplId, NCCI, WindowSize);
-
+			spin_unlock_irqrestore(&card->lock, flags);
 			break;
 
 		case RECEIVE_FREE_NCCI:
 
 			ApplId = b1_get_word(card->port);
 			NCCI = b1_get_word(card->port);
-			spin_unlock_irqrestore(&card->lock, flags);
-
 			if (NCCI != 0xffffffff)
 				capilib_free_ncci(&cinfo->ncci_head, ApplId, NCCI);
-
+			spin_unlock_irqrestore(&card->lock, flags);
 			break;
 
 		case RECEIVE_START:
@@ -333,13 +329,16 @@ static void t1isa_reset_ctr(struct capi_ctr *ctrl)
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
 	avmcard *card = cinfo->card;
 	unsigned int port = card->port;
+	unsigned long flags;
 
 	t1_disable_irq(port);
 	b1_reset(port);
 	b1_reset(port);
 
 	memset(cinfo->version, 0, sizeof(cinfo->version));
+	spin_lock_irqsave(&card->lock, flags);
 	capilib_release(&cinfo->ncci_head);
+	spin_unlock_irqrestore(&card->lock, flags);
 	capi_ctr_reseted(ctrl);
 }
 
@@ -466,29 +465,26 @@ static u16 t1isa_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 	u8 subcmd = CAPIMSG_SUBCOMMAND(skb->data);
 	u16 dlen, retval;
 
+	spin_lock_irqsave(&card->lock, flags);
 	if (CAPICMD(cmd, subcmd) == CAPI_DATA_B3_REQ) {
 		retval = capilib_data_b3_req(&cinfo->ncci_head,
 					     CAPIMSG_APPID(skb->data),
 					     CAPIMSG_NCCI(skb->data),
 					     CAPIMSG_MSGID(skb->data));
-		if (retval != CAPI_NOERROR) 
+		if (retval != CAPI_NOERROR) {
+			spin_unlock_irqrestore(&card->lock, flags);
 			return retval;
-
+		}
 		dlen = CAPIMSG_DATALEN(skb->data);
 
-		spin_lock_irqsave(&card->lock, flags);
 		b1_put_byte(port, SEND_DATA_B3_REQ);
 		t1_put_slice(port, skb->data, len);
 		t1_put_slice(port, skb->data + len, dlen);
-		spin_unlock_irqrestore(&card->lock, flags);
 	} else {
-
-		spin_lock_irqsave(&card->lock, flags);
 		b1_put_byte(port, SEND_MESSAGE);
 		t1_put_slice(port, skb->data, len);
-		spin_unlock_irqrestore(&card->lock, flags);
 	}
-
+	spin_unlock_irqrestore(&card->lock, flags);
 	dev_kfree_skb_any(skb);
 	return CAPI_NOERROR;
 }
diff --git a/drivers/isdn/sc/debug.h b/drivers/isdn/sc/debug.h
deleted file mode 100644
index e9db96ede4b..00000000000
--- a/drivers/isdn/sc/debug.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* $Id: debug.h,v 1.2.8.1 2001/09/23 22:24:59 kai Exp $
- *
- * Copyright (C) 1996  SpellCaster Telecommunications Inc.
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- *
- * For more information, please contact gpl-info@spellcast.com or write:
- *
- *     SpellCaster Telecommunications Inc.
- *     5621 Finch Avenue East, Unit #3
- *     Scarborough, Ontario  Canada
- *     M1B 2T9
- *     +1 (416) 297-8565
- *     +1 (416) 297-6433 Facsimile
- */
-
-#define REQUEST_IRQ(a,b,c,d,e) request_irq(a,b,c,d,e)
-#define FREE_IRQ(a,b) free_irq(a,b)
diff --git a/drivers/isdn/sc/includes.h b/drivers/isdn/sc/includes.h
index 5286e0c810a..4766e5b7737 100644
--- a/drivers/isdn/sc/includes.h
+++ b/drivers/isdn/sc/includes.h
@@ -14,4 +14,3 @@
 #include <linux/timer.h>
 #include <linux/wait.h>
 #include <linux/isdnif.h>
-#include "debug.h"
diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c
index 0bf76344a0d..d09c854cfac 100644
--- a/drivers/isdn/sc/init.c
+++ b/drivers/isdn/sc/init.c
@@ -404,7 +404,7 @@ static void __exit sc_exit(void)
 		/*
 		 * Release the IRQ
 		 */
-		FREE_IRQ(sc_adapter[i]->interrupt, NULL);
+		free_irq(sc_adapter[i]->interrupt, NULL);
 
 		/*
 		 * Reset for a clean start
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c
index 2766e4fc4ea..883da72b536 100644
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -791,8 +791,10 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
 			if (hid->keycode[i])
 				set_bit(hid->keycode[i], input_dev->keybit);
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_LED) | BIT(EV_REP);
-		input_dev->ledbit[0] = BIT(LED_SCROLLL) | BIT(LED_CAPSL) | BIT(LED_NUML);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_LED) |
+			BIT_MASK(EV_REP);
+		input_dev->ledbit[0] = BIT_MASK(LED_SCROLLL) |
+			BIT_MASK(LED_CAPSL) | BIT_MASK(LED_NUML);
 		input_dev->event = adbhid_kbd_event;
 		input_dev->keycodemax = KEY_FN;
 		input_dev->keycodesize = sizeof(hid->keycode[0]);
@@ -801,16 +803,18 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
 	case ADB_MOUSE:
 		sprintf(hid->name, "ADB mouse");
 
-		input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-		input_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-		input_dev->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+		input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+		input_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+			BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+		input_dev->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 		break;
 
 	case ADB_MISC:
 		switch (original_handler_id) {
 		case 0x02: /* Adjustable keyboard button device */
 			sprintf(hid->name, "ADB adjustable keyboard buttons");
-			input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+			input_dev->evbit[0] = BIT_MASK(EV_KEY) |
+				BIT_MASK(EV_REP);
 			set_bit(KEY_SOUND, input_dev->keybit);
 			set_bit(KEY_MUTE, input_dev->keybit);
 			set_bit(KEY_VOLUMEUP, input_dev->keybit);
@@ -818,7 +822,8 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
 			break;
 		case 0x1f: /* Powerbook button device */
 			sprintf(hid->name, "ADB Powerbook buttons");
-			input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+			input_dev->evbit[0] = BIT_MASK(EV_KEY) |
+				BIT_MASK(EV_REP);
 			set_bit(KEY_MUTE, input_dev->keybit);
 			set_bit(KEY_VOLUMEUP, input_dev->keybit);
 			set_bit(KEY_VOLUMEDOWN, input_dev->keybit);
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c
index 33dee3a773e..89302309da9 100644
--- a/drivers/macintosh/mac_hid.c
+++ b/drivers/macintosh/mac_hid.c
@@ -117,9 +117,10 @@ static int emumousebtn_input_register(void)
 	emumousebtn->id.product = 0x0001;
 	emumousebtn->id.version = 0x0100;
 
-	emumousebtn->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	emumousebtn->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_MIDDLE) | BIT(BTN_RIGHT);
-	emumousebtn->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	emumousebtn->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	emumousebtn->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_MIDDLE) | BIT_MASK(BTN_RIGHT);
+	emumousebtn->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
 
 	ret = input_register_device(emumousebtn);
 	if (ret)
diff --git a/drivers/macintosh/mediabay.c b/drivers/macintosh/mediabay.c
index c803d2bba65..48d647abea4 100644
--- a/drivers/macintosh/mediabay.c
+++ b/drivers/macintosh/mediabay.c
@@ -563,7 +563,7 @@ static void media_bay_step(int i)
 				ide_init_hwif_ports(&hw, (unsigned long) bay->cd_base, (unsigned long) 0, NULL);
 				hw.irq = bay->cd_irq;
 				hw.chipset = ide_pmac;
-				bay->cd_index = ide_register_hw(&hw, 0, NULL);
+				bay->cd_index = ide_register_hw(&hw, NULL, 0, NULL);
 				pmu_resume();
 			}
 			if (bay->cd_index == -1) {
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 34a8c60a254..9b6fbf044fd 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -267,6 +267,12 @@ config DM_MULTIPATH_RDAC
 	---help---
 	  Multipath support for LSI/Engenio RDAC.
 
+config DM_MULTIPATH_HP
+        tristate "HP MSA multipath support (EXPERIMENTAL)"
+        depends on DM_MULTIPATH && BLK_DEV_DM && EXPERIMENTAL
+        ---help---
+          Multipath support for HP MSA (Active/Passive) series hardware.
+
 config DM_DELAY
 	tristate "I/O delaying target (EXPERIMENTAL)"
 	depends on BLK_DEV_DM && EXPERIMENTAL
@@ -276,4 +282,10 @@ config DM_DELAY
 
 	If unsure, say N.
 
+config DM_UEVENT
+	bool "DM uevents (EXPERIMENTAL)"
+	depends on BLK_DEV_DM && EXPERIMENTAL
+	---help---
+	Generate udev events for DM events.
+
 endif # MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index c49366cdc05..d9aa7edb878 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -8,6 +8,7 @@ dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o
 dm-snapshot-objs := dm-snap.o dm-exception-store.o
 dm-mirror-objs	:= dm-log.o dm-raid1.o
 dm-rdac-objs	:= dm-mpath-rdac.o
+dm-hp-sw-objs	:= dm-mpath-hp-sw.o
 md-mod-objs     := md.o bitmap.o
 raid456-objs	:= raid5.o raid6algos.o raid6recov.o raid6tables.o \
 		   raid6int1.o raid6int2.o raid6int4.o \
@@ -35,6 +36,7 @@ obj-$(CONFIG_DM_CRYPT)		+= dm-crypt.o
 obj-$(CONFIG_DM_DELAY)		+= dm-delay.o
 obj-$(CONFIG_DM_MULTIPATH)	+= dm-multipath.o dm-round-robin.o
 obj-$(CONFIG_DM_MULTIPATH_EMC)	+= dm-emc.o
+obj-$(CONFIG_DM_MULTIPATH_HP)	+= dm-hp-sw.o
 obj-$(CONFIG_DM_MULTIPATH_RDAC)	+= dm-rdac.o
 obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
@@ -48,6 +50,10 @@ ifeq ($(CONFIG_ALTIVEC),y)
 altivec_flags := -maltivec -mabi=altivec
 endif
 
+ifeq ($(CONFIG_DM_UEVENT),y)
+dm-mod-objs			+= dm-uevent.o
+endif
+
 targets += raid6int1.c
 $(obj)/raid6int1.c:   UNROLL := 1
 $(obj)/raid6int1.c:   $(src)/raid6int.uc $(src)/unroll.pl FORCE
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
index 3f7b827649e..d4509be0fe6 100644
--- a/drivers/md/dm-bio-list.h
+++ b/drivers/md/dm-bio-list.h
@@ -21,11 +21,6 @@ static inline int bio_list_empty(const struct bio_list *bl)
 	return bl->head == NULL;
 }
 
-#define BIO_LIST_INIT { .head = NULL, .tail = NULL }
-
-#define BIO_LIST(bl) \
-	struct bio_list bl = BIO_LIST_INIT
-
 static inline void bio_list_init(struct bio_list *bl)
 {
 	bl->head = bl->tail = NULL;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 64fee90bb68..b41f945df8a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -36,7 +36,6 @@ struct dm_crypt_io {
 	struct work_struct work;
 	atomic_t pending;
 	int error;
-	int post_process;
 };
 
 /*
@@ -57,7 +56,7 @@ struct crypt_config;
 
 struct crypt_iv_operations {
 	int (*ctr)(struct crypt_config *cc, struct dm_target *ti,
-	           const char *opts);
+		   const char *opts);
 	void (*dtr)(struct crypt_config *cc);
 	const char *(*status)(struct crypt_config *cc);
 	int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector);
@@ -80,6 +79,8 @@ struct crypt_config {
 	mempool_t *page_pool;
 	struct bio_set *bs;
 
+	struct workqueue_struct *io_queue;
+	struct workqueue_struct *crypt_queue;
 	/*
 	 * crypto related data
 	 */
@@ -137,7 +138,7 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
 }
 
 static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
-	                      const char *opts)
+			      const char *opts)
 {
 	struct crypto_cipher *essiv_tfm;
 	struct crypto_hash *hash_tfm;
@@ -175,6 +176,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 
 	if (err) {
 		ti->error = "Error calculating hash in ESSIV";
+		kfree(salt);
 		return err;
 	}
 
@@ -188,7 +190,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
 	if (crypto_cipher_blocksize(essiv_tfm) !=
 	    crypto_blkcipher_ivsize(cc->tfm)) {
 		ti->error = "Block size of ESSIV cipher does "
-			        "not match IV size of block cipher";
+			    "not match IV size of block cipher";
 		crypto_free_cipher(essiv_tfm);
 		kfree(salt);
 		return -EINVAL;
@@ -319,10 +321,10 @@ crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out,
 	return r;
 }
 
-static void
-crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx,
-                   struct bio *bio_out, struct bio *bio_in,
-                   sector_t sector, int write)
+static void crypt_convert_init(struct crypt_config *cc,
+			       struct convert_context *ctx,
+			       struct bio *bio_out, struct bio *bio_in,
+			       sector_t sector, int write)
 {
 	ctx->bio_in = bio_in;
 	ctx->bio_out = bio_out;
@@ -338,7 +340,7 @@ crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx,
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
 static int crypt_convert(struct crypt_config *cc,
-                         struct convert_context *ctx)
+			 struct convert_context *ctx)
 {
 	int r = 0;
 
@@ -370,7 +372,7 @@ static int crypt_convert(struct crypt_config *cc,
 		}
 
 		r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length,
-		                              ctx->write, ctx->sector);
+					      ctx->write, ctx->sector);
 		if (r < 0)
 			break;
 
@@ -380,13 +382,13 @@ static int crypt_convert(struct crypt_config *cc,
 	return r;
 }
 
- static void dm_crypt_bio_destructor(struct bio *bio)
- {
+static void dm_crypt_bio_destructor(struct bio *bio)
+{
 	struct dm_crypt_io *io = bio->bi_private;
 	struct crypt_config *cc = io->target->private;
 
 	bio_free(bio, cc->bs);
- }
+}
 
 /*
  * Generate a new unfragmented bio with the given size
@@ -458,7 +460,7 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
  * One of the bios was finished. Check for completion of
  * the whole request and correctly clean up the buffer.
  */
-static void dec_pending(struct dm_crypt_io *io, int error)
+static void crypt_dec_pending(struct dm_crypt_io *io, int error)
 {
 	struct crypt_config *cc = (struct crypt_config *) io->target->private;
 
@@ -474,18 +476,36 @@ static void dec_pending(struct dm_crypt_io *io, int error)
 }
 
 /*
- * kcryptd:
+ * kcryptd/kcryptd_io:
  *
  * Needed because it would be very unwise to do decryption in an
  * interrupt context.
+ *
+ * kcryptd performs the actual encryption or decryption.
+ *
+ * kcryptd_io performs the IO submission.
+ *
+ * They must be separated as otherwise the final stages could be
+ * starved by new requests which can block in the first stages due
+ * to memory allocation.
  */
-static struct workqueue_struct *_kcryptd_workqueue;
 static void kcryptd_do_work(struct work_struct *work);
+static void kcryptd_do_crypt(struct work_struct *work);
 
 static void kcryptd_queue_io(struct dm_crypt_io *io)
 {
+	struct crypt_config *cc = io->target->private;
+
 	INIT_WORK(&io->work, kcryptd_do_work);
-	queue_work(_kcryptd_workqueue, &io->work);
+	queue_work(cc->io_queue, &io->work);
+}
+
+static void kcryptd_queue_crypt(struct dm_crypt_io *io)
+{
+	struct crypt_config *cc = io->target->private;
+
+	INIT_WORK(&io->work, kcryptd_do_crypt);
+	queue_work(cc->crypt_queue, &io->work);
 }
 
 static void crypt_endio(struct bio *clone, int error)
@@ -508,13 +528,12 @@ static void crypt_endio(struct bio *clone, int error)
 	}
 
 	bio_put(clone);
-	io->post_process = 1;
-	kcryptd_queue_io(io);
+	kcryptd_queue_crypt(io);
 	return;
 
 out:
 	bio_put(clone);
-	dec_pending(io, error);
+	crypt_dec_pending(io, error);
 }
 
 static void clone_init(struct dm_crypt_io *io, struct bio *clone)
@@ -544,7 +563,7 @@ static void process_read(struct dm_crypt_io *io)
 	 */
 	clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs);
 	if (unlikely(!clone)) {
-		dec_pending(io, -ENOMEM);
+		crypt_dec_pending(io, -ENOMEM);
 		return;
 	}
 
@@ -579,7 +598,7 @@ static void process_write(struct dm_crypt_io *io)
 	while (remaining) {
 		clone = crypt_alloc_buffer(io, remaining);
 		if (unlikely(!clone)) {
-			dec_pending(io, -ENOMEM);
+			crypt_dec_pending(io, -ENOMEM);
 			return;
 		}
 
@@ -589,7 +608,7 @@ static void process_write(struct dm_crypt_io *io)
 		if (unlikely(crypt_convert(cc, &ctx) < 0)) {
 			crypt_free_buffer_pages(cc, clone);
 			bio_put(clone);
-			dec_pending(io, -EIO);
+			crypt_dec_pending(io, -EIO);
 			return;
 		}
 
@@ -624,17 +643,23 @@ static void process_read_endio(struct dm_crypt_io *io)
 	crypt_convert_init(cc, &ctx, io->base_bio, io->base_bio,
 			   io->base_bio->bi_sector - io->target->begin, 0);
 
-	dec_pending(io, crypt_convert(cc, &ctx));
+	crypt_dec_pending(io, crypt_convert(cc, &ctx));
 }
 
 static void kcryptd_do_work(struct work_struct *work)
 {
 	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
 
-	if (io->post_process)
-		process_read_endio(io);
-	else if (bio_data_dir(io->base_bio) == READ)
+	if (bio_data_dir(io->base_bio) == READ)
 		process_read(io);
+}
+
+static void kcryptd_do_crypt(struct work_struct *work)
+{
+	struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+
+	if (bio_data_dir(io->base_bio) == READ)
+		process_read_endio(io);
 	else
 		process_write(io);
 }
@@ -690,7 +715,7 @@ static int crypt_set_key(struct crypt_config *cc, char *key)
 	cc->key_size = key_size; /* initial settings */
 
 	if ((!key_size && strcmp(key, "-")) ||
-	    (key_size && crypt_decode_key(cc->key, key, key_size) < 0))
+	   (key_size && crypt_decode_key(cc->key, key, key_size) < 0))
 		return -EINVAL;
 
 	set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
@@ -746,7 +771,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
  	if (crypt_set_key(cc, argv[1])) {
 		ti->error = "Error decoding key";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	/* Compatiblity mode for old dm-crypt cipher strings */
@@ -757,19 +782,19 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (strcmp(chainmode, "ecb") && !ivmode) {
 		ti->error = "This chaining mode requires an IV mechanism";
-		goto bad1;
+		goto bad_cipher;
 	}
 
-	if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)", chainmode, 
-		     cipher) >= CRYPTO_MAX_ALG_NAME) {
+	if (snprintf(cc->cipher, CRYPTO_MAX_ALG_NAME, "%s(%s)",
+		     chainmode, cipher) >= CRYPTO_MAX_ALG_NAME) {
 		ti->error = "Chain mode + cipher name is too long";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	tfm = crypto_alloc_blkcipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(tfm)) {
 		ti->error = "Error allocating crypto tfm";
-		goto bad1;
+		goto bad_cipher;
 	}
 
 	strcpy(cc->cipher, cipher);
@@ -793,18 +818,18 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		cc->iv_gen_ops = &crypt_iv_null_ops;
 	else {
 		ti->error = "Invalid IV mode";
-		goto bad2;
+		goto bad_ivmode;
 	}
 
 	if (cc->iv_gen_ops && cc->iv_gen_ops->ctr &&
 	    cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0)
-		goto bad2;
+		goto bad_ivmode;
 
 	cc->iv_size = crypto_blkcipher_ivsize(tfm);
 	if (cc->iv_size)
 		/* at least a 64 bit sector number should fit in our buffer */
 		cc->iv_size = max(cc->iv_size,
-		                  (unsigned int)(sizeof(u64) / sizeof(u8)));
+				  (unsigned int)(sizeof(u64) / sizeof(u8)));
 	else {
 		if (cc->iv_gen_ops) {
 			DMWARN("Selected cipher does not support IVs");
@@ -817,13 +842,13 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	cc->io_pool = mempool_create_slab_pool(MIN_IOS, _crypt_io_pool);
 	if (!cc->io_pool) {
 		ti->error = "Cannot allocate crypt io mempool";
-		goto bad3;
+		goto bad_slab_pool;
 	}
 
 	cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
 	if (!cc->page_pool) {
 		ti->error = "Cannot allocate page mempool";
-		goto bad4;
+		goto bad_page_pool;
 	}
 
 	cc->bs = bioset_create(MIN_IOS, MIN_IOS);
@@ -834,25 +859,25 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (crypto_blkcipher_setkey(tfm, cc->key, key_size) < 0) {
 		ti->error = "Error setting key";
-		goto bad5;
+		goto bad_device;
 	}
 
 	if (sscanf(argv[2], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid iv_offset sector";
-		goto bad5;
+		goto bad_device;
 	}
 	cc->iv_offset = tmpll;
 
 	if (sscanf(argv[4], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid device sector";
-		goto bad5;
+		goto bad_device;
 	}
 	cc->start = tmpll;
 
 	if (dm_get_device(ti, argv[3], cc->start, ti->len,
-	                  dm_table_get_mode(ti->table), &cc->dev)) {
+			  dm_table_get_mode(ti->table), &cc->dev)) {
 		ti->error = "Device lookup failed";
-		goto bad5;
+		goto bad_device;
 	}
 
 	if (ivmode && cc->iv_gen_ops) {
@@ -861,27 +886,45 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		cc->iv_mode = kmalloc(strlen(ivmode) + 1, GFP_KERNEL);
 		if (!cc->iv_mode) {
 			ti->error = "Error kmallocing iv_mode string";
-			goto bad5;
+			goto bad_ivmode_string;
 		}
 		strcpy(cc->iv_mode, ivmode);
 	} else
 		cc->iv_mode = NULL;
 
+	cc->io_queue = create_singlethread_workqueue("kcryptd_io");
+	if (!cc->io_queue) {
+		ti->error = "Couldn't create kcryptd io queue";
+		goto bad_io_queue;
+	}
+
+	cc->crypt_queue = create_singlethread_workqueue("kcryptd");
+	if (!cc->crypt_queue) {
+		ti->error = "Couldn't create kcryptd queue";
+		goto bad_crypt_queue;
+	}
+
 	ti->private = cc;
 	return 0;
 
-bad5:
+bad_crypt_queue:
+	destroy_workqueue(cc->io_queue);
+bad_io_queue:
+	kfree(cc->iv_mode);
+bad_ivmode_string:
+	dm_put_device(ti, cc->dev);
+bad_device:
 	bioset_free(cc->bs);
 bad_bs:
 	mempool_destroy(cc->page_pool);
-bad4:
+bad_page_pool:
 	mempool_destroy(cc->io_pool);
-bad3:
+bad_slab_pool:
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
 		cc->iv_gen_ops->dtr(cc);
-bad2:
+bad_ivmode:
 	crypto_free_blkcipher(tfm);
-bad1:
+bad_cipher:
 	/* Must zero key material before freeing */
 	memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
 	kfree(cc);
@@ -892,7 +935,8 @@ static void crypt_dtr(struct dm_target *ti)
 {
 	struct crypt_config *cc = (struct crypt_config *) ti->private;
 
-	flush_workqueue(_kcryptd_workqueue);
+	destroy_workqueue(cc->io_queue);
+	destroy_workqueue(cc->crypt_queue);
 
 	bioset_free(cc->bs);
 	mempool_destroy(cc->page_pool);
@@ -918,9 +962,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
 	io = mempool_alloc(cc->io_pool, GFP_NOIO);
 	io->target = ti;
 	io->base_bio = bio;
-	io->error = io->post_process = 0;
+	io->error = 0;
 	atomic_set(&io->pending, 0);
-	kcryptd_queue_io(io);
+
+	if (bio_data_dir(io->base_bio) == READ)
+		kcryptd_queue_io(io);
+	else
+		kcryptd_queue_crypt(io);
 
 	return DM_MAPIO_SUBMITTED;
 }
@@ -1037,25 +1085,12 @@ static int __init dm_crypt_init(void)
 	if (!_crypt_io_pool)
 		return -ENOMEM;
 
-	_kcryptd_workqueue = create_workqueue("kcryptd");
-	if (!_kcryptd_workqueue) {
-		r = -ENOMEM;
-		DMERR("couldn't create kcryptd");
-		goto bad1;
-	}
-
 	r = dm_register_target(&crypt_target);
 	if (r < 0) {
 		DMERR("register failed %d", r);
-		goto bad2;
+		kmem_cache_destroy(_crypt_io_pool);
 	}
 
-	return 0;
-
-bad2:
-	destroy_workqueue(_kcryptd_workqueue);
-bad1:
-	kmem_cache_destroy(_crypt_io_pool);
 	return r;
 }
 
@@ -1066,7 +1101,6 @@ static void __exit dm_crypt_exit(void)
 	if (r < 0)
 		DMERR("unregister failed %d", r);
 
-	destroy_workqueue(_kcryptd_workqueue);
 	kmem_cache_destroy(_crypt_io_pool);
 }
 
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 6928c136d3c..bdd37f881c4 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -83,7 +83,7 @@ static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
 	struct dm_delay_info *delayed, *next;
 	unsigned long next_expires = 0;
 	int start_timer = 0;
-	BIO_LIST(flush_bios);
+	struct bio_list flush_bios = { };
 
 	mutex_lock(&delayed_bios_lock);
 	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
@@ -163,34 +163,32 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
-	if (argc == 3) {
-		dc->dev_write = NULL;
+	dc->dev_write = NULL;
+	if (argc == 3)
 		goto out;
-	}
 
 	if (sscanf(argv[4], "%llu", &tmpll) != 1) {
 		ti->error = "Invalid write device sector";
-		goto bad;
+		goto bad_dev_read;
 	}
 	dc->start_write = tmpll;
 
 	if (sscanf(argv[5], "%u", &dc->write_delay) != 1) {
 		ti->error = "Invalid write delay";
-		goto bad;
+		goto bad_dev_read;
 	}
 
 	if (dm_get_device(ti, argv[3], dc->start_write, ti->len,
 			  dm_table_get_mode(ti->table), &dc->dev_write)) {
 		ti->error = "Write device lookup failed";
-		dm_put_device(ti, dc->dev_read);
-		goto bad;
+		goto bad_dev_read;
 	}
 
 out:
 	dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache);
 	if (!dc->delayed_pool) {
 		DMERR("Couldn't create delayed bio pool.");
-		goto bad;
+		goto bad_dev_write;
 	}
 
 	setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
@@ -203,6 +201,11 @@ out:
 	ti->private = dc;
 	return 0;
 
+bad_dev_write:
+	if (dc->dev_write)
+		dm_put_device(ti, dc->dev_write);
+bad_dev_read:
+	dm_put_device(ti, dc->dev_read);
 bad:
 	kfree(dc);
 	return -EINVAL;
@@ -305,7 +308,7 @@ static int delay_status(struct dm_target *ti, status_type_t type,
 		       (unsigned long long) dc->start_read,
 		       dc->read_delay);
 		if (dc->dev_write)
-			DMEMIT("%s %llu %u", dc->dev_write->name,
+			DMEMIT(" %s %llu %u", dc->dev_write->name,
 			       (unsigned long long) dc->start_write,
 			       dc->write_delay);
 		break;
diff --git a/drivers/md/dm-emc.c b/drivers/md/dm-emc.c
index 342517261ec..6b91b9ab1d4 100644
--- a/drivers/md/dm-emc.c
+++ b/drivers/md/dm-emc.c
@@ -81,7 +81,7 @@ static struct bio *get_failover_bio(struct dm_path *path, unsigned data_size)
 	}
 
 	if (bio_add_page(bio, page, data_size, 0) != data_size) {
-		DMERR("get_failover_bio: alloc_page() failed.");
+		DMERR("get_failover_bio: bio_add_page() failed.");
 		__free_page(page);
 		bio_put(bio);
 		return NULL;
@@ -211,12 +211,10 @@ fail_path:
 
 static struct emc_handler *alloc_emc_handler(void)
 {
-	struct emc_handler *h = kmalloc(sizeof(*h), GFP_KERNEL);
+	struct emc_handler *h = kzalloc(sizeof(*h), GFP_KERNEL);
 
-	if (h) {
-		memset(h, 0, sizeof(*h));
+	if (h)
 		spin_lock_init(&h->lock);
-	}
 
 	return h;
 }
diff --git a/drivers/md/dm-hw-handler.c b/drivers/md/dm-hw-handler.c
index baafaaba4d4..2ee84d8aa0b 100644
--- a/drivers/md/dm-hw-handler.c
+++ b/drivers/md/dm-hw-handler.c
@@ -91,12 +91,10 @@ void dm_put_hw_handler(struct hw_handler_type *hwht)
 
 static struct hwh_internal *_alloc_hw_handler(struct hw_handler_type *hwht)
 {
-	struct hwh_internal *hwhi = kmalloc(sizeof(*hwhi), GFP_KERNEL);
+	struct hwh_internal *hwhi = kzalloc(sizeof(*hwhi), GFP_KERNEL);
 
-	if (hwhi) {
-		memset(hwhi, 0, sizeof(*hwhi));
+	if (hwhi)
 		hwhi->hwht = *hwht;
-	}
 
 	return hwhi;
 }
diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h
index e0832e6fcf3..46809dcb121 100644
--- a/drivers/md/dm-hw-handler.h
+++ b/drivers/md/dm-hw-handler.h
@@ -58,5 +58,6 @@ unsigned dm_scsi_err_handler(struct hw_handler *hwh, struct bio *bio);
 #define MP_FAIL_PATH 1
 #define MP_BYPASS_PG 2
 #define MP_ERROR_IO  4	/* Don't retry this I/O */
+#define MP_RETRY 8
 
 #endif
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index b441d82c338..138200bf5e0 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -700,7 +700,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 	int r;
 	char *new_name = (char *) param + param->data_start;
 
-	if (new_name < (char *) (param + 1) ||
+	if (new_name < (char *) param->data ||
 	    invalid_str(new_name, (void *) param + param_size)) {
 		DMWARN("Invalid new logical volume name supplied.");
 		return -EINVAL;
@@ -726,7 +726,7 @@ static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
 	if (!md)
 		return -ENXIO;
 
-	if (geostr < (char *) (param + 1) ||
+	if (geostr < (char *) param->data ||
 	    invalid_str(geostr, (void *) param + param_size)) {
 		DMWARN("Invalid geometry supplied.");
 		goto out;
@@ -1233,7 +1233,7 @@ static int target_message(struct dm_ioctl *param, size_t param_size)
 	if (r)
 		goto out;
 
-	if (tmsg < (struct dm_target_msg *) (param + 1) ||
+	if (tmsg < (struct dm_target_msg *) param->data ||
 	    invalid_str(tmsg->message, (void *) param + param_size)) {
 		DMWARN("Invalid target message parameters.");
 		r = -EINVAL;
@@ -1358,7 +1358,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param)
 	if (tmp.data_size < sizeof(tmp))
 		return -EINVAL;
 
-	dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
+	dmi = vmalloc(tmp.data_size);
 	if (!dmi)
 		return -ENOMEM;
 
@@ -1515,3 +1515,35 @@ void dm_interface_exit(void)
 
 	dm_hash_exit();
 }
+
+/**
+ * dm_copy_name_and_uuid - Copy mapped device name & uuid into supplied buffers
+ * @md: Pointer to mapped_device
+ * @name: Buffer (size DM_NAME_LEN) for name
+ * @uuid: Buffer (size DM_UUID_LEN) for uuid or empty string if uuid not defined
+ */
+int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid)
+{
+	int r = 0;
+	struct hash_cell *hc;
+
+	if (!md)
+		return -ENXIO;
+
+	dm_get(md);
+	down_read(&_hash_lock);
+	hc = dm_get_mdptr(md);
+	if (!hc || hc->md != md) {
+		r = -ENXIO;
+		goto out;
+	}
+
+	strcpy(name, hc->name);
+	strcpy(uuid, hc->uuid ? : "");
+
+out:
+	up_read(&_hash_lock);
+	dm_put(md);
+
+	return r;
+}
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index a66428d860f..072ee4353ea 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -696,7 +696,7 @@ static struct dirty_log_type _disk_type = {
 	.module = THIS_MODULE,
 	.ctr = disk_ctr,
 	.dtr = disk_dtr,
-	.suspend = disk_flush,
+	.postsuspend = disk_flush,
 	.resume = disk_resume,
 	.get_region_size = core_get_region_size,
 	.is_clean = core_is_clean,
diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h
index 86a301c8daf..3fae87eb596 100644
--- a/drivers/md/dm-log.h
+++ b/drivers/md/dm-log.h
@@ -32,7 +32,8 @@ struct dirty_log_type {
 	 * There are times when we don't want the log to touch
 	 * the disk.
 	 */
-	int (*suspend)(struct dirty_log *log);
+	int (*presuspend)(struct dirty_log *log);
+	int (*postsuspend)(struct dirty_log *log);
 	int (*resume)(struct dirty_log *log);
 
 	/*
diff --git a/drivers/md/dm-mpath-hp-sw.c b/drivers/md/dm-mpath-hp-sw.c
new file mode 100644
index 00000000000..204bf42c944
--- /dev/null
+++ b/drivers/md/dm-mpath-hp-sw.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2005 Mike Christie, All rights reserved.
+ * Copyright (C) 2007 Red Hat, Inc. All rights reserved.
+ * Authors: Mike Christie
+ *          Dave Wysochanski
+ *
+ * This file is released under the GPL.
+ *
+ * This module implements the specific path activation code for
+ * HP StorageWorks and FSC FibreCat Asymmetric (Active/Passive)
+ * storage arrays.
+ * These storage arrays have controller-based failover, not
+ * LUN-based failover.  However, LUN-based failover is the design
+ * of dm-multipath. Thus, this module is written for LUN-based failover.
+ */
+#include <linux/blkdev.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
+
+#include "dm.h"
+#include "dm-hw-handler.h"
+
+#define DM_MSG_PREFIX "multipath hp-sw"
+#define DM_HP_HWH_NAME "hp-sw"
+#define DM_HP_HWH_VER "1.0.0"
+
+struct hp_sw_context {
+	unsigned char sense[SCSI_SENSE_BUFFERSIZE];
+};
+
+/*
+ * hp_sw_error_is_retryable - Is an HP-specific check condition retryable?
+ * @req: path activation request
+ *
+ * Examine error codes of request and determine whether the error is retryable.
+ * Some error codes are already retried by scsi-ml (see
+ * scsi_decide_disposition), but some HP specific codes are not.
+ * The intent of this routine is to supply the logic for the HP specific
+ * check conditions.
+ *
+ * Returns:
+ *  1 - command completed with retryable error
+ *  0 - command completed with non-retryable error
+ *
+ * Possible optimizations
+ * 1. More hardware-specific error codes
+ */
+static int hp_sw_error_is_retryable(struct request *req)
+{
+	/*
+	 * NOT_READY is known to be retryable
+	 * For now we just dump out the sense data and call it retryable
+	 */
+	if (status_byte(req->errors) == CHECK_CONDITION)
+		__scsi_print_sense(DM_HP_HWH_NAME, req->sense, req->sense_len);
+
+	/*
+	 * At this point we don't have complete information about all the error
+	 * codes from this hardware, so we are just conservative and retry
+	 * when in doubt.
+	 */
+	return 1;
+}
+
+/*
+ * hp_sw_end_io - Completion handler for HP path activation.
+ * @req: path activation request
+ * @error: scsi-ml error
+ *
+ *  Check sense data, free request structure, and notify dm that
+ *  pg initialization has completed.
+ *
+ * Context: scsi-ml softirq
+ *
+ */
+static void hp_sw_end_io(struct request *req, int error)
+{
+	struct dm_path *path = req->end_io_data;
+	unsigned err_flags = 0;
+
+	if (!error) {
+		DMDEBUG("%s path activation command - success",
+			path->dev->name);
+		goto out;
+	}
+
+	if (hp_sw_error_is_retryable(req)) {
+		DMDEBUG("%s path activation command - retry",
+			path->dev->name);
+		err_flags = MP_RETRY;
+		goto out;
+	}
+
+	DMWARN("%s path activation fail - error=0x%x",
+	       path->dev->name, error);
+	err_flags = MP_FAIL_PATH;
+
+out:
+	req->end_io_data = NULL;
+	__blk_put_request(req->q, req);
+	dm_pg_init_complete(path, err_flags);
+}
+
+/*
+ * hp_sw_get_request - Allocate an HP specific path activation request
+ * @path: path on which request will be sent (needed for request queue)
+ *
+ * The START command is used for path activation request.
+ * These arrays are controller-based failover, not LUN based.
+ * One START command issued to a single path will fail over all
+ * LUNs for the same controller.
+ *
+ * Possible optimizations
+ * 1. Make timeout configurable
+ * 2. Preallocate request
+ */
+static struct request *hp_sw_get_request(struct dm_path *path)
+{
+	struct request *req;
+	struct block_device *bdev = path->dev->bdev;
+	struct request_queue *q = bdev_get_queue(bdev);
+	struct hp_sw_context *h = path->hwhcontext;
+
+	req = blk_get_request(q, WRITE, GFP_NOIO);
+	if (!req)
+		goto out;
+
+	req->timeout = 60 * HZ;
+
+	req->errors = 0;
+	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	req->cmd_flags |= REQ_FAILFAST | REQ_NOMERGE;
+	req->end_io_data = path;
+	req->sense = h->sense;
+	memset(req->sense, 0, SCSI_SENSE_BUFFERSIZE);
+
+	memset(&req->cmd, 0, BLK_MAX_CDB);
+	req->cmd[0] = START_STOP;
+	req->cmd[4] = 1;
+	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
+
+out:
+	return req;
+}
+
+/*
+ * hp_sw_pg_init - HP path activation implementation.
+ * @hwh: hardware handler specific data
+ * @bypassed: unused; is the path group bypassed? (see dm-mpath.c)
+ * @path: path to send initialization command
+ *
+ * Send an HP-specific path activation command on 'path'.
+ * Do not try to optimize in any way, just send the activation command.
+ * More than one path activation command may be sent to the same controller.
+ * This seems to work fine for basic failover support.
+ *
+ * Possible optimizations
+ * 1. Detect an in-progress activation request and avoid submitting another one
+ * 2. Model the controller and only send a single activation request at a time
+ * 3. Determine the state of a path before sending an activation request
+ *
+ * Context: kmpathd (see process_queued_ios() in dm-mpath.c)
+ */
+static void hp_sw_pg_init(struct hw_handler *hwh, unsigned bypassed,
+			  struct dm_path *path)
+{
+	struct request *req;
+	struct hp_sw_context *h;
+
+	path->hwhcontext = hwh->context;
+	h = hwh->context;
+
+	req = hp_sw_get_request(path);
+	if (!req) {
+		DMERR("%s path activation command - allocation fail",
+		      path->dev->name);
+		goto retry;
+	}
+
+	DMDEBUG("%s path activation command - sent", path->dev->name);
+
+	blk_execute_rq_nowait(req->q, NULL, req, 1, hp_sw_end_io);
+	return;
+
+retry:
+	dm_pg_init_complete(path, MP_RETRY);
+}
+
+static int hp_sw_create(struct hw_handler *hwh, unsigned argc, char **argv)
+{
+	struct hp_sw_context *h;
+
+	h = kmalloc(sizeof(*h), GFP_KERNEL);
+	if (!h)
+		return -ENOMEM;
+
+	hwh->context = h;
+
+	return 0;
+}
+
+static void hp_sw_destroy(struct hw_handler *hwh)
+{
+	struct hp_sw_context *h = hwh->context;
+
+	kfree(h);
+}
+
+static struct hw_handler_type hp_sw_hwh = {
+	.name = DM_HP_HWH_NAME,
+	.module = THIS_MODULE,
+	.create = hp_sw_create,
+	.destroy = hp_sw_destroy,
+	.pg_init = hp_sw_pg_init,
+};
+
+static int __init hp_sw_init(void)
+{
+	int r;
+
+	r = dm_register_hw_handler(&hp_sw_hwh);
+	if (r < 0)
+		DMERR("register failed %d", r);
+	else
+		DMINFO("version " DM_HP_HWH_VER " loaded");
+
+	return r;
+}
+
+static void __exit hp_sw_exit(void)
+{
+	int r;
+
+	r = dm_unregister_hw_handler(&hp_sw_hwh);
+	if (r < 0)
+		DMERR("unregister failed %d", r);
+}
+
+module_init(hp_sw_init);
+module_exit(hp_sw_exit);
+
+MODULE_DESCRIPTION("DM Multipath HP StorageWorks / FSC FibreCat (A/P) support");
+MODULE_AUTHOR("Mike Christie, Dave Wysochanski <dm-devel@redhat.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DM_HP_HWH_VER);
diff --git a/drivers/md/dm-mpath-rdac.c b/drivers/md/dm-mpath-rdac.c
index 16b16134577..e04eb5c697f 100644
--- a/drivers/md/dm-mpath-rdac.c
+++ b/drivers/md/dm-mpath-rdac.c
@@ -664,20 +664,21 @@ static struct hw_handler_type rdac_handler = {
 
 static int __init rdac_init(void)
 {
-	int r = dm_register_hw_handler(&rdac_handler);
-
-	if (r < 0) {
-		DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
-		return r;
-	}
+	int r;
 
 	rdac_wkqd = create_singlethread_workqueue("rdac_wkqd");
 	if (!rdac_wkqd) {
 		DMERR("Failed to create workqueue rdac_wkqd.");
-		dm_unregister_hw_handler(&rdac_handler);
 		return -ENOMEM;
 	}
 
+	r = dm_register_hw_handler(&rdac_handler);
+	if (r < 0) {
+		DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r);
+		destroy_workqueue(rdac_wkqd);
+		return r;
+	}
+
 	DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER);
 	return 0;
 }
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 31056abca89..24b2b1e32fa 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -10,6 +10,7 @@
 #include "dm-hw-handler.h"
 #include "dm-bio-list.h"
 #include "dm-bio-record.h"
+#include "dm-uevent.h"
 
 #include <linux/ctype.h>
 #include <linux/init.h>
@@ -75,6 +76,8 @@ struct multipath {
 	unsigned queue_io;		/* Must we queue all I/O? */
 	unsigned queue_if_no_path;	/* Queue I/O if last path fails? */
 	unsigned saved_queue_if_no_path;/* Saved state during suspension */
+	unsigned pg_init_retries;	/* Number of times to retry pg_init */
+	unsigned pg_init_count;		/* Number of times pg_init called */
 
 	struct work_struct process_queued_ios;
 	struct bio_list queued_ios;
@@ -225,6 +228,8 @@ static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
 		m->pg_init_required = 0;
 		m->queue_io = 0;
 	}
+
+	m->pg_init_count = 0;
 }
 
 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
@@ -424,6 +429,7 @@ static void process_queued_ios(struct work_struct *work)
 		must_queue = 0;
 
 	if (m->pg_init_required && !m->pg_init_in_progress) {
+		m->pg_init_count++;
 		m->pg_init_required = 0;
 		m->pg_init_in_progress = 1;
 		init_required = 1;
@@ -689,9 +695,11 @@ static int parse_features(struct arg_set *as, struct multipath *m)
 	int r;
 	unsigned argc;
 	struct dm_target *ti = m->ti;
+	const char *param_name;
 
 	static struct param _params[] = {
-		{0, 1, "invalid number of feature args"},
+		{0, 3, "invalid number of feature args"},
+		{1, 50, "pg_init_retries must be between 1 and 50"},
 	};
 
 	r = read_param(_params, shift(as), &argc, &ti->error);
@@ -701,12 +709,28 @@ static int parse_features(struct arg_set *as, struct multipath *m)
 	if (!argc)
 		return 0;
 
-	if (!strnicmp(shift(as), MESG_STR("queue_if_no_path")))
-		return queue_if_no_path(m, 1, 0);
-	else {
+	do {
+		param_name = shift(as);
+		argc--;
+
+		if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
+			r = queue_if_no_path(m, 1, 0);
+			continue;
+		}
+
+		if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
+		    (argc >= 1)) {
+			r = read_param(_params + 1, shift(as),
+				       &m->pg_init_retries, &ti->error);
+			argc--;
+			continue;
+		}
+
 		ti->error = "Unrecognised multipath feature request";
-		return -EINVAL;
-	}
+		r = -EINVAL;
+	} while (argc && !r);
+
+	return r;
 }
 
 static int multipath_ctr(struct dm_target *ti, unsigned int argc,
@@ -834,6 +858,9 @@ static int fail_path(struct pgpath *pgpath)
 	if (pgpath == m->current_pgpath)
 		m->current_pgpath = NULL;
 
+	dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
+		      pgpath->path.dev->name, m->nr_valid_paths);
+
 	queue_work(kmultipathd, &m->trigger_event);
 
 out:
@@ -873,6 +900,9 @@ static int reinstate_path(struct pgpath *pgpath)
 	if (!m->nr_valid_paths++ && m->queue_size)
 		queue_work(kmultipathd, &m->process_queued_ios);
 
+	dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
+		      pgpath->path.dev->name, m->nr_valid_paths);
+
 	queue_work(kmultipathd, &m->trigger_event);
 
 out:
@@ -976,6 +1006,26 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
 }
 
 /*
+ * Should we retry pg_init immediately?
+ */
+static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
+{
+	unsigned long flags;
+	int limit_reached = 0;
+
+	spin_lock_irqsave(&m->lock, flags);
+
+	if (m->pg_init_count <= m->pg_init_retries)
+		m->pg_init_required = 1;
+	else
+		limit_reached = 1;
+
+	spin_unlock_irqrestore(&m->lock, flags);
+
+	return limit_reached;
+}
+
+/*
  * pg_init must call this when it has completed its initialisation
  */
 void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
@@ -985,8 +1035,14 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
 	struct multipath *m = pg->m;
 	unsigned long flags;
 
-	/* We insist on failing the path if the PG is already bypassed. */
-	if (err_flags && pg->bypassed)
+	/*
+	 * If requested, retry pg_init until maximum number of retries exceeded.
+	 * If retry not requested and PG already bypassed, always fail the path.
+	 */
+	if (err_flags & MP_RETRY) {
+		if (pg_init_limit_reached(m, pgpath))
+			err_flags |= MP_FAIL_PATH;
+	} else if (err_flags && pg->bypassed)
 		err_flags |= MP_FAIL_PATH;
 
 	if (err_flags & MP_FAIL_PATH)
@@ -996,7 +1052,7 @@ void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
 		bypass_pg(m, pg, 1);
 
 	spin_lock_irqsave(&m->lock, flags);
-	if (err_flags) {
+	if (err_flags & ~MP_RETRY) {
 		m->current_pgpath = NULL;
 		m->current_pg = NULL;
 	} else if (!m->pg_init_required)
@@ -1148,11 +1204,15 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
 
 	/* Features */
 	if (type == STATUSTYPE_INFO)
-		DMEMIT("1 %u ", m->queue_size);
-	else if (m->queue_if_no_path)
-		DMEMIT("1 queue_if_no_path ");
-	else
-		DMEMIT("0 ");
+		DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
+	else {
+		DMEMIT("%u ", m->queue_if_no_path +
+			      (m->pg_init_retries > 0) * 2);
+		if (m->queue_if_no_path)
+			DMEMIT("queue_if_no_path ");
+		if (m->pg_init_retries)
+			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
+	}
 
 	if (hwh->type && hwh->type->status)
 		sz += hwh->type->status(hwh, type, result + sz, maxlen - sz);
diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c
index f10a0c89b3f..ca1bb636a3e 100644
--- a/drivers/md/dm-path-selector.c
+++ b/drivers/md/dm-path-selector.c
@@ -94,12 +94,10 @@ out:
 
 static struct ps_internal *_alloc_path_selector(struct path_selector_type *pst)
 {
-	struct ps_internal *psi = kmalloc(sizeof(*psi), GFP_KERNEL);
+	struct ps_internal *psi = kzalloc(sizeof(*psi), GFP_KERNEL);
 
-	if (psi) {
-		memset(psi, 0, sizeof(*psi));
+	if (psi)
 		psi->pst = *pst;
-	}
 
 	return psi;
 }
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d09ff15490a..31123d4a6b9 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -19,6 +19,7 @@
 #include <linux/time.h>
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
+#include <linux/log2.h>
 
 #define DM_MSG_PREFIX "raid1"
 #define DM_IO_PAGES 64
@@ -113,6 +114,7 @@ struct region {
  * Mirror set structures.
  *---------------------------------------------------------------*/
 struct mirror {
+	struct mirror_set *ms;
 	atomic_t error_count;
 	struct dm_dev *dev;
 	sector_t offset;
@@ -974,6 +976,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
 
 	if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
 		ti->error = "Error creating dirty region hash";
+		dm_io_client_destroy(ms->io_client);
 		kfree(ms);
 		return NULL;
 	}
@@ -994,7 +997,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
 
 static inline int _check_region_size(struct dm_target *ti, uint32_t size)
 {
-	return !(size % (PAGE_SIZE >> 9) || (size & (size - 1)) ||
+	return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) ||
 		 size > ti->len);
 }
 
@@ -1015,6 +1018,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
 		return -ENXIO;
 	}
 
+	ms->mirror[mirror].ms = ms;
 	ms->mirror[mirror].offset = offset;
 
 	return 0;
@@ -1163,16 +1167,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ms->kmirrord_wq = create_singlethread_workqueue("kmirrord");
 	if (!ms->kmirrord_wq) {
 		DMERR("couldn't start kmirrord");
-		free_context(ms, ti, m);
-		return -ENOMEM;
+		r = -ENOMEM;
+		goto err_free_context;
 	}
 	INIT_WORK(&ms->kmirrord_work, do_mirror);
 
 	r = parse_features(ms, argc, argv, &args_used);
-	if (r) {
-		free_context(ms, ti, ms->nr_mirrors);
-		return r;
-	}
+	if (r)
+		goto err_destroy_wq;
 
 	argv += args_used;
 	argc -= args_used;
@@ -1188,19 +1190,22 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (argc) {
 		ti->error = "Too many mirror arguments";
-		free_context(ms, ti, ms->nr_mirrors);
-		return -EINVAL;
+		r = -EINVAL;
+		goto err_destroy_wq;
 	}
 
 	r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
-	if (r) {
-		destroy_workqueue(ms->kmirrord_wq);
-		free_context(ms, ti, ms->nr_mirrors);
-		return r;
-	}
+	if (r)
+		goto err_destroy_wq;
 
 	wake(ms);
 	return 0;
+
+err_destroy_wq:
+	destroy_workqueue(ms->kmirrord_wq);
+err_free_context:
+	free_context(ms, ti, ms->nr_mirrors);
+	return r;
 }
 
 static void mirror_dtr(struct dm_target *ti)
@@ -1302,7 +1307,7 @@ static void mirror_postsuspend(struct dm_target *ti)
 	wait_event(_kmirrord_recovery_stopped,
 		   !atomic_read(&ms->rh.recovery_in_flight));
 
-	if (log->type->suspend && log->type->suspend(log))
+	if (log->type->postsuspend && log->type->postsuspend(log))
 		/* FIXME: need better error handling */
 		DMWARN("log suspend failed");
 }
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 98a633f3d6b..cee16fadd9e 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/log2.h>
 
 #include "dm-snap.h"
 #include "dm-bio-list.h"
@@ -415,7 +416,7 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
 	chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
 
 	/* Check chunk_size is a power of 2 */
-	if (chunk_size & (chunk_size - 1)) {
+	if (!is_power_of_2(chunk_size)) {
 		*error = "Chunk size is not a power of 2";
 		return -EINVAL;
 	}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 51f5e076001..969944a8aba 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -11,6 +11,7 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/slab.h>
+#include <linux/log2.h>
 
 #define DM_MSG_PREFIX "striped"
 
@@ -99,7 +100,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	/*
 	 * chunk_size is a power of two
 	 */
-	if (!chunk_size || (chunk_size & (chunk_size - 1)) ||
+	if (!is_power_of_2(chunk_size) ||
 	    (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) {
 		ti->error = "Invalid chunk size";
 		return -EINVAL;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index fbe477bb2c6..8939e610508 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -213,12 +213,11 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
 int dm_table_create(struct dm_table **result, int mode,
 		    unsigned num_targets, struct mapped_device *md)
 {
-	struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
+	struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
 
 	if (!t)
 		return -ENOMEM;
 
-	memset(t, 0, sizeof(*t));
 	INIT_LIST_HEAD(&t->devices);
 	atomic_set(&t->holders, 1);
 
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 477a041a41c..835cf95b857 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -88,12 +88,10 @@ void dm_put_target_type(struct target_type *t)
 
 static struct tt_internal *alloc_target(struct target_type *t)
 {
-	struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
+	struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
 
-	if (ti) {
-		memset(ti, 0, sizeof(*ti));
+	if (ti)
 		ti->tt = *t;
-	}
 
 	return ti;
 }
diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c
new file mode 100644
index 00000000000..50377e5dc2a
--- /dev/null
+++ b/drivers/md/dm-uevent.c
@@ -0,0 +1,222 @@
+/*
+ * Device Mapper Uevent Support (dm-uevent)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007
+ * 	Author: Mike Anderson <andmike@linux.vnet.ibm.com>
+ */
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/kobject.h>
+#include <linux/dm-ioctl.h>
+
+#include "dm.h"
+#include "dm-uevent.h"
+
+#define DM_MSG_PREFIX "uevent"
+
+static const struct {
+	enum dm_uevent_type type;
+	enum kobject_action action;
+	char *name;
+} _dm_uevent_type_names[] = {
+	{DM_UEVENT_PATH_FAILED, KOBJ_CHANGE, "PATH_FAILED"},
+	{DM_UEVENT_PATH_REINSTATED, KOBJ_CHANGE, "PATH_REINSTATED"},
+};
+
+static struct kmem_cache *_dm_event_cache;
+
+struct dm_uevent {
+	struct mapped_device *md;
+	enum kobject_action action;
+	struct kobj_uevent_env ku_env;
+	struct list_head elist;
+	char name[DM_NAME_LEN];
+	char uuid[DM_UUID_LEN];
+};
+
+static void dm_uevent_free(struct dm_uevent *event)
+{
+	kmem_cache_free(_dm_event_cache, event);
+}
+
+static struct dm_uevent *dm_uevent_alloc(struct mapped_device *md)
+{
+	struct dm_uevent *event;
+
+	event = kmem_cache_zalloc(_dm_event_cache, GFP_ATOMIC);
+	if (!event)
+		return NULL;
+
+	INIT_LIST_HEAD(&event->elist);
+	event->md = md;
+
+	return event;
+}
+
+static struct dm_uevent *dm_build_path_uevent(struct mapped_device *md,
+					      struct dm_target *ti,
+					      enum kobject_action action,
+					      const char *dm_action,
+					      const char *path,
+					      unsigned nr_valid_paths)
+{
+	struct dm_uevent *event;
+
+	event = dm_uevent_alloc(md);
+	if (!event) {
+		DMERR("%s: dm_uevent_alloc() failed", __FUNCTION__);
+		goto err_nomem;
+	}
+
+	event->action = action;
+
+	if (add_uevent_var(&event->ku_env, "DM_TARGET=%s", ti->type->name)) {
+		DMERR("%s: add_uevent_var() for DM_TARGET failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_ACTION=%s", dm_action)) {
+		DMERR("%s: add_uevent_var() for DM_ACTION failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_SEQNUM=%u",
+			   dm_next_uevent_seq(md))) {
+		DMERR("%s: add_uevent_var() for DM_SEQNUM failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_PATH=%s", path)) {
+		DMERR("%s: add_uevent_var() for DM_PATH failed", __FUNCTION__);
+		goto err_add;
+	}
+
+	if (add_uevent_var(&event->ku_env, "DM_NR_VALID_PATHS=%d",
+			   nr_valid_paths)) {
+		DMERR("%s: add_uevent_var() for DM_NR_VALID_PATHS failed",
+		      __FUNCTION__);
+		goto err_add;
+	}
+
+	return event;
+
+err_add:
+	dm_uevent_free(event);
+err_nomem:
+	return ERR_PTR(-ENOMEM);
+}
+
+/**
+ * dm_send_uevents - send uevents for given list
+ *
+ * @events:	list of events to send
+ * @kobj:	kobject generating event
+ *
+ */
+void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+{
+	int r;
+	struct dm_uevent *event, *next;
+
+	list_for_each_entry_safe(event, next, events, elist) {
+		list_del_init(&event->elist);
+
+		/*
+		 * Need to call dm_copy_name_and_uuid from here for now.
+		 * Context of previous var adds and locking used for
+		 * hash_cell not compatable.
+		 */
+		if (dm_copy_name_and_uuid(event->md, event->name,
+					  event->uuid)) {
+			DMERR("%s: dm_copy_name_and_uuid() failed",
+			      __FUNCTION__);
+			goto uevent_free;
+		}
+
+		if (add_uevent_var(&event->ku_env, "DM_NAME=%s", event->name)) {
+			DMERR("%s: add_uevent_var() for DM_NAME failed",
+			      __FUNCTION__);
+			goto uevent_free;
+		}
+
+		if (add_uevent_var(&event->ku_env, "DM_UUID=%s", event->uuid)) {
+			DMERR("%s: add_uevent_var() for DM_UUID failed",
+			      __FUNCTION__);
+			goto uevent_free;
+		}
+
+		r = kobject_uevent_env(kobj, event->action, event->ku_env.envp);
+		if (r)
+			DMERR("%s: kobject_uevent_env failed", __FUNCTION__);
+uevent_free:
+		dm_uevent_free(event);
+	}
+}
+EXPORT_SYMBOL_GPL(dm_send_uevents);
+
+/**
+ * dm_path_uevent - called to create a new path event and queue it
+ *
+ * @event_type:	path event type enum
+ * @ti:			pointer to a dm_target
+ * @path:		string containing pathname
+ * @nr_valid_paths:	number of valid paths remaining
+ *
+ */
+void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+		   const char *path, unsigned nr_valid_paths)
+{
+	struct mapped_device *md = dm_table_get_md(ti->table);
+	struct dm_uevent *event;
+
+	if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) {
+		DMERR("%s: Invalid event_type %d", __FUNCTION__, event_type);
+		goto out;
+	}
+
+	event = dm_build_path_uevent(md, ti,
+				     _dm_uevent_type_names[event_type].action,
+				     _dm_uevent_type_names[event_type].name,
+				     path, nr_valid_paths);
+	if (IS_ERR(event))
+		goto out;
+
+	dm_uevent_add(md, &event->elist);
+
+out:
+	dm_put(md);
+}
+EXPORT_SYMBOL_GPL(dm_path_uevent);
+
+int dm_uevent_init(void)
+{
+	_dm_event_cache = KMEM_CACHE(dm_uevent, 0);
+	if (!_dm_event_cache)
+		return -ENOMEM;
+
+	DMINFO("version 1.0.3");
+
+	return 0;
+}
+
+void dm_uevent_exit(void)
+{
+	kmem_cache_destroy(_dm_event_cache);
+}
diff --git a/drivers/md/dm-uevent.h b/drivers/md/dm-uevent.h
new file mode 100644
index 00000000000..2eccc8bd671
--- /dev/null
+++ b/drivers/md/dm-uevent.h
@@ -0,0 +1,59 @@
+/*
+ * Device Mapper Uevent Support
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2007
+ * 	Author: Mike Anderson <andmike@linux.vnet.ibm.com>
+ */
+#ifndef DM_UEVENT_H
+#define DM_UEVENT_H
+
+enum dm_uevent_type {
+	DM_UEVENT_PATH_FAILED,
+	DM_UEVENT_PATH_REINSTATED,
+};
+
+#ifdef CONFIG_DM_UEVENT
+
+extern int dm_uevent_init(void);
+extern void dm_uevent_exit(void);
+extern void dm_send_uevents(struct list_head *events, struct kobject *kobj);
+extern void dm_path_uevent(enum dm_uevent_type event_type,
+			   struct dm_target *ti, const char *path,
+			   unsigned nr_valid_paths);
+
+#else
+
+static inline int dm_uevent_init(void)
+{
+	return 0;
+}
+static inline void dm_uevent_exit(void)
+{
+}
+static inline void dm_send_uevents(struct list_head *events,
+				   struct kobject *kobj)
+{
+}
+static inline void dm_path_uevent(enum dm_uevent_type event_type,
+				  struct dm_target *ti, const char *path,
+				  unsigned nr_valid_paths)
+{
+}
+
+#endif	/* CONFIG_DM_UEVENT */
+
+#endif	/* DM_UEVENT_H */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d837d37f620..07cbbb8eb3e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -7,6 +7,7 @@
 
 #include "dm.h"
 #include "dm-bio-list.h"
+#include "dm-uevent.h"
 
 #include <linux/init.h>
 #include <linux/module.h>
@@ -112,6 +113,9 @@ struct mapped_device {
 	 */
 	atomic_t event_nr;
 	wait_queue_head_t eventq;
+	atomic_t uevent_seq;
+	struct list_head uevent_list;
+	spinlock_t uevent_lock; /* Protect access to uevent_list */
 
 	/*
 	 * freeze/thaw support require holding onto a super block
@@ -143,11 +147,19 @@ static int __init local_init(void)
 		return -ENOMEM;
 	}
 
+	r = dm_uevent_init();
+	if (r) {
+		kmem_cache_destroy(_tio_cache);
+		kmem_cache_destroy(_io_cache);
+		return r;
+	}
+
 	_major = major;
 	r = register_blkdev(_major, _name);
 	if (r < 0) {
 		kmem_cache_destroy(_tio_cache);
 		kmem_cache_destroy(_io_cache);
+		dm_uevent_exit();
 		return r;
 	}
 
@@ -162,6 +174,7 @@ static void local_exit(void)
 	kmem_cache_destroy(_tio_cache);
 	kmem_cache_destroy(_io_cache);
 	unregister_blkdev(_major, _name);
+	dm_uevent_exit();
 
 	_major = 0;
 
@@ -751,15 +764,13 @@ static void __clone_and_map(struct clone_info *ci)
 /*
  * Split the bio into several clones.
  */
-static void __split_bio(struct mapped_device *md, struct bio *bio)
+static int __split_bio(struct mapped_device *md, struct bio *bio)
 {
 	struct clone_info ci;
 
 	ci.map = dm_get_table(md);
-	if (!ci.map) {
-		bio_io_error(bio);
-		return;
-	}
+	if (unlikely(!ci.map))
+		return -EIO;
 
 	ci.md = md;
 	ci.bio = bio;
@@ -779,6 +790,8 @@ static void __split_bio(struct mapped_device *md, struct bio *bio)
 	/* drop the extra reference count */
 	dec_pending(ci.io, 0);
 	dm_table_put(ci.map);
+
+	return 0;
 }
 /*-----------------------------------------------------------------
  * CRUD END
@@ -790,7 +803,7 @@ static void __split_bio(struct mapped_device *md, struct bio *bio)
  */
 static int dm_request(struct request_queue *q, struct bio *bio)
 {
-	int r;
+	int r = -EIO;
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
 
@@ -815,18 +828,11 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 	while (test_bit(DMF_BLOCK_IO, &md->flags)) {
 		up_read(&md->io_lock);
 
-		if (bio_rw(bio) == READA) {
-			bio_io_error(bio);
-			return 0;
-		}
-
-		r = queue_io(md, bio);
-		if (r < 0) {
-			bio_io_error(bio);
-			return 0;
+		if (bio_rw(bio) != READA)
+			r = queue_io(md, bio);
 
-		} else if (r == 0)
-			return 0;	/* deferred successfully */
+		if (r <= 0)
+			goto out_req;
 
 		/*
 		 * We're in a while loop, because someone could suspend
@@ -835,8 +841,13 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 		down_read(&md->io_lock);
 	}
 
-	__split_bio(md, bio);
+	r = __split_bio(md, bio);
 	up_read(&md->io_lock);
+
+out_req:
+	if (r < 0)
+		bio_io_error(bio);
+
 	return 0;
 }
 
@@ -977,6 +988,9 @@ static struct mapped_device *alloc_dev(int minor)
 	atomic_set(&md->holders, 1);
 	atomic_set(&md->open_count, 0);
 	atomic_set(&md->event_nr, 0);
+	atomic_set(&md->uevent_seq, 0);
+	INIT_LIST_HEAD(&md->uevent_list);
+	spin_lock_init(&md->uevent_lock);
 
 	md->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!md->queue)
@@ -1044,12 +1058,14 @@ static struct mapped_device *alloc_dev(int minor)
 	return NULL;
 }
 
+static void unlock_fs(struct mapped_device *md);
+
 static void free_dev(struct mapped_device *md)
 {
 	int minor = md->disk->first_minor;
 
 	if (md->suspended_bdev) {
-		thaw_bdev(md->suspended_bdev, NULL);
+		unlock_fs(md);
 		bdput(md->suspended_bdev);
 	}
 	mempool_destroy(md->tio_pool);
@@ -1073,8 +1089,16 @@ static void free_dev(struct mapped_device *md)
  */
 static void event_callback(void *context)
 {
+	unsigned long flags;
+	LIST_HEAD(uevents);
 	struct mapped_device *md = (struct mapped_device *) context;
 
+	spin_lock_irqsave(&md->uevent_lock, flags);
+	list_splice_init(&md->uevent_list, &uevents);
+	spin_unlock_irqrestore(&md->uevent_lock, flags);
+
+	dm_send_uevents(&uevents, &md->disk->kobj);
+
 	atomic_inc(&md->event_nr);
 	wake_up(&md->eventq);
 }
@@ -1233,7 +1257,8 @@ static void __flush_deferred_io(struct mapped_device *md, struct bio *c)
 	while (c) {
 		n = c->bi_next;
 		c->bi_next = NULL;
-		__split_bio(md, c);
+		if (__split_bio(md, c))
+			bio_io_error(c);
 		c = n;
 	}
 }
@@ -1491,6 +1516,11 @@ out:
 /*-----------------------------------------------------------------
  * Event notification.
  *---------------------------------------------------------------*/
+uint32_t dm_next_uevent_seq(struct mapped_device *md)
+{
+	return atomic_add_return(1, &md->uevent_seq);
+}
+
 uint32_t dm_get_event_nr(struct mapped_device *md)
 {
 	return atomic_read(&md->event_nr);
@@ -1502,6 +1532,15 @@ int dm_wait_event(struct mapped_device *md, int event_nr)
 			(event_nr != atomic_read(&md->event_nr)));
 }
 
+void dm_uevent_add(struct mapped_device *md, struct list_head *elist)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&md->uevent_lock, flags);
+	list_add(elist, &md->uevent_list);
+	spin_unlock_irqrestore(&md->uevent_lock, flags);
+}
+
 /*
  * The gendisk is only valid as long as you have a reference
  * count on 'md'.
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index 7e052378c47..f3831f31223 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -198,7 +198,7 @@ struct kcopyd_job {
 	 * These fields are only used if the job has been split
 	 * into more manageable parts.
 	 */
-	struct semaphore lock;
+	struct mutex lock;
 	atomic_t sub_jobs;
 	sector_t progress;
 };
@@ -456,7 +456,7 @@ static void segment_complete(int read_err,
 	sector_t count = 0;
 	struct kcopyd_job *job = (struct kcopyd_job *) context;
 
-	down(&job->lock);
+	mutex_lock(&job->lock);
 
 	/* update the error */
 	if (read_err)
@@ -480,7 +480,7 @@ static void segment_complete(int read_err,
 			job->progress += count;
 		}
 	}
-	up(&job->lock);
+	mutex_unlock(&job->lock);
 
 	if (count) {
 		int i;
@@ -562,7 +562,7 @@ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
 		dispatch_job(job);
 
 	else {
-		init_MUTEX(&job->lock);
+		mutex_init(&job->lock);
 		job->progress = 0;
 		split_job(job);
 	}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c059ae6f37e..808cd954945 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4717,7 +4717,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
 
 void md_unregister_thread(mdk_thread_t *thread)
 {
-	dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid);
+	dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
 
 	kthread_stop(thread->tsk);
 	kfree(thread);
diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c
index 5a12b567955..154a7ce7cb8 100644
--- a/drivers/media/dvb/cinergyT2/cinergyT2.c
+++ b/drivers/media/dvb/cinergyT2/cinergyT2.c
@@ -820,7 +820,7 @@ static int cinergyt2_register_rc(struct cinergyt2 *cinergyt2)
 
 	input_dev->name = DRIVER_NAME " remote control";
 	input_dev->phys = cinergyt2->phys;
-	input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	for (i = 0; i < ARRAY_SIZE(rc_keys); i += 3)
 		set_bit(rc_keys[i + 2], input_dev->keybit);
 	input_dev->keycodesize = 0;
diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
index 7b9f35bfb4f..c0c2c22ddd8 100644
--- a/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
+++ b/drivers/media/dvb/dvb-usb/dvb-usb-remote.c
@@ -106,7 +106,7 @@ int dvb_usb_remote_init(struct dvb_usb_device *d)
 	if (!input_dev)
 		return -ENOMEM;
 
-	input_dev->evbit[0] = BIT(EV_KEY);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
 	input_dev->name = "IR-receiver inside an USB DVB receiver";
 	input_dev->phys = d->rc_phys;
 	usb_to_input_id(d->udev, &input_dev->id);
diff --git a/drivers/media/dvb/ttpci/av7110_ir.c b/drivers/media/dvb/ttpci/av7110_ir.c
index 5d19c402dad..a283e1de83f 100644
--- a/drivers/media/dvb/ttpci/av7110_ir.c
+++ b/drivers/media/dvb/ttpci/av7110_ir.c
@@ -27,7 +27,7 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/kernel.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 #include "av7110.h"
 #include "av7110_hw.h"
diff --git a/drivers/media/dvb/ttusb-dec/ttusb_dec.c b/drivers/media/dvb/ttusb-dec/ttusb_dec.c
index 5e691fd7990..1ec981d98b9 100644
--- a/drivers/media/dvb/ttusb-dec/ttusb_dec.c
+++ b/drivers/media/dvb/ttusb-dec/ttusb_dec.c
@@ -1198,7 +1198,7 @@ static int ttusb_init_rc( struct ttusb_dec *dec)
 
 	input_dev->name = "ttusb_dec remote control";
 	input_dev->phys = dec->rc_phys;
-	input_dev->evbit[0] = BIT(EV_KEY);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
 	input_dev->keycodesize = sizeof(u16);
 	input_dev->keycodemax = 0x1a;
 	input_dev->keycode = rc_keys;
diff --git a/drivers/media/video/usbvideo/konicawc.c b/drivers/media/video/usbvideo/konicawc.c
index 491505d6fde..3e93f805877 100644
--- a/drivers/media/video/usbvideo/konicawc.c
+++ b/drivers/media/video/usbvideo/konicawc.c
@@ -238,8 +238,8 @@ static void konicawc_register_input(struct konicawc *cam, struct usb_device *dev
 	usb_to_input_id(dev, &input_dev->id);
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY);
-	input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
+	input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
 
 	input_dev->private = cam;
 
diff --git a/drivers/media/video/usbvideo/quickcam_messenger.c b/drivers/media/video/usbvideo/quickcam_messenger.c
index dd1a6d6bbc9..d847273eeba 100644
--- a/drivers/media/video/usbvideo/quickcam_messenger.c
+++ b/drivers/media/video/usbvideo/quickcam_messenger.c
@@ -102,8 +102,8 @@ static void qcm_register_input(struct qcm *cam, struct usb_device *dev)
 	usb_to_input_id(dev, &input_dev->id);
 	input_dev->dev.parent = &dev->dev;
 
-	input_dev->evbit[0] = BIT(EV_KEY);
-	input_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
+	input_dev->evbit[0] = BIT_MASK(EV_KEY);
+	input_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
 
 	input_dev->private = cam;
 
diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c
index 1c14fa2bd41..419e5af7853 100644
--- a/drivers/media/video/zoran_driver.c
+++ b/drivers/media/video/zoran_driver.c
@@ -1285,7 +1285,7 @@ zoran_open (struct inode *inode,
 	}
 
 	dprintk(1, KERN_INFO "%s: zoran_open(%s, pid=[%d]), users(-)=%d\n",
-		ZR_DEVNAME(zr), current->comm, current->pid, zr->user);
+		ZR_DEVNAME(zr), current->comm, task_pid_nr(current), zr->user);
 
 	/* now, create the open()-specific file_ops struct */
 	fh = kzalloc(sizeof(struct zoran_fh), GFP_KERNEL);
@@ -1358,7 +1358,7 @@ zoran_close (struct inode *inode,
 	struct zoran *zr = fh->zr;
 
 	dprintk(1, KERN_INFO "%s: zoran_close(%s, pid=[%d]), users(+)=%d\n",
-		ZR_DEVNAME(zr), current->comm, current->pid, zr->user);
+		ZR_DEVNAME(zr), current->comm, task_pid_nr(current), zr->user);
 
 	/* kernel locks (fs/device.c), so don't do that ourselves
 	 * (prevents deadlocks) */
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 346c44eff95..cf02ddc3436 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -111,6 +111,21 @@ config ASUS_LAPTOP
 
 	  If you have an ACPI-compatible ASUS laptop, say Y or M here.
 
+config FUJITSU_LAPTOP
+        tristate "Fujitsu Laptop Extras"
+        depends on X86
+        depends on ACPI
+        depends on BACKLIGHT_CLASS_DEVICE
+        ---help---
+	  This is a driver for laptops built by Fujitsu:
+
+	    * P2xxx/P5xxx/S6xxx/S7xxx series Lifebooks
+	    * Possibly other Fujitsu laptop models
+
+	  It adds support for LCD brightness control.
+
+	  If you have a Fujitsu laptop, say Y or M here.
+
 config MSI_LAPTOP
         tristate "MSI Laptop Extras"
         depends on X86
@@ -134,6 +149,7 @@ config SONY_LAPTOP
 	tristate "Sony Laptop Extras"
 	depends on X86 && ACPI
 	select BACKLIGHT_CLASS_DEVICE
+	depends on INPUT
 	  ---help---
 	  This mini-driver drives the SNC and SPIC devices present in the ACPI
 	  BIOS of the Sony Vaio laptops.
@@ -156,6 +172,7 @@ config THINKPAD_ACPI
 	select BACKLIGHT_CLASS_DEVICE
 	select HWMON
 	select NVRAM
+	depends on INPUT
 	---help---
 	  This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
 	  support for Fn-Fx key combinations, Bluetooth control, video
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index a24c61475c2..87f2685d728 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -15,4 +15,5 @@ obj-$(CONFIG_PHANTOM)		+= phantom.o
 obj-$(CONFIG_SGI_IOC4)		+= ioc4.o
 obj-$(CONFIG_SONY_LAPTOP)	+= sony-laptop.o
 obj-$(CONFIG_THINKPAD_ACPI)	+= thinkpad_acpi.o
+obj-$(CONFIG_FUJITSU_LAPTOP)	+= fujitsu-laptop.o
 obj-$(CONFIG_EEPROM_93CX6)	+= eeprom_93cx6.o
diff --git a/drivers/misc/fujitsu-laptop.c b/drivers/misc/fujitsu-laptop.c
new file mode 100644
index 00000000000..d366a6cc1fd
--- /dev/null
+++ b/drivers/misc/fujitsu-laptop.c
@@ -0,0 +1,358 @@
+/*-*-linux-c-*-*/
+
+/*
+  Copyright (C) 2007 Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
+  Based on earlier work:
+    Copyright (C) 2003 Shane Spencer <shane@bogomip.com>
+    Adrian Yee <brewt-fujitsu@brewt.org>
+
+  Templated from msi-laptop.c which is copyright by its respective authors.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+  02110-1301, USA.
+ */
+
+/*
+ * fujitsu-laptop.c - Fujitsu laptop support, providing access to additional
+ * features made available on a range of Fujitsu laptops including the
+ * P2xxx/P5xxx/S6xxx/S7xxx series.
+ *
+ * This driver exports a few files in /sys/devices/platform/fujitsu-laptop/;
+ * others may be added at a later date.
+ *
+ *   lcd_level - Screen brightness: contains a single integer in the
+ *   range 0..7. (rw)
+ *
+ * In addition to these platform device attributes the driver
+ * registers itself in the Linux backlight control subsystem and is
+ * available to userspace under /sys/class/backlight/fujitsu-laptop/.
+ *
+ * This driver has been tested on a Fujitsu Lifebook S7020.  It should
+ * work on most P-series and S-series Lifebooks, but YMMV.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+#include <linux/backlight.h>
+#include <linux/platform_device.h>
+#include <linux/autoconf.h>
+
+#define FUJITSU_DRIVER_VERSION "0.3"
+
+#define FUJITSU_LCD_N_LEVELS 8
+
+#define ACPI_FUJITSU_CLASS              "fujitsu"
+#define ACPI_FUJITSU_HID                "FUJ02B1"
+#define ACPI_FUJITSU_DRIVER_NAME        "Fujitsu laptop FUJ02B1 ACPI extras driver"
+#define ACPI_FUJITSU_DEVICE_NAME        "Fujitsu FUJ02B1"
+
+struct fujitsu_t {
+	acpi_handle acpi_handle;
+	struct backlight_device *bl_device;
+	struct platform_device *pf_device;
+
+	unsigned long fuj02b1_state;
+	unsigned int brightness_changed;
+	unsigned int brightness_level;
+};
+
+static struct fujitsu_t *fujitsu;
+
+/* Hardware access */
+
+static int set_lcd_level(int level)
+{
+	acpi_status status = AE_OK;
+	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+	struct acpi_object_list arg_list = { 1, &arg0 };
+	acpi_handle handle = NULL;
+
+	if (level < 0 || level >= FUJITSU_LCD_N_LEVELS)
+		return -EINVAL;
+
+	if (!fujitsu)
+		return -EINVAL;
+
+	status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle);
+	if (ACPI_FAILURE(status)) {
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "SBLL not present\n"));
+		return -ENODEV;
+	}
+
+	arg0.integer.value = level;
+
+	status = acpi_evaluate_object(handle, NULL, &arg_list, NULL);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	return 0;
+}
+
+static int get_lcd_level(void)
+{
+	unsigned long state = 0;
+	acpi_status status = AE_OK;
+
+	// Get the Brightness
+	status =
+	    acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state);
+	if (status < 0)
+		return status;
+
+	fujitsu->fuj02b1_state = state;
+	fujitsu->brightness_level = state & 0x0fffffff;
+
+	if (state & 0x80000000)
+		fujitsu->brightness_changed = 1;
+	else
+		fujitsu->brightness_changed = 0;
+
+	if (status < 0)
+		return status;
+
+	return fujitsu->brightness_level;
+}
+
+/* Backlight device stuff */
+
+static int bl_get_brightness(struct backlight_device *b)
+{
+	return get_lcd_level();
+}
+
+static int bl_update_status(struct backlight_device *b)
+{
+	return set_lcd_level(b->props.brightness);
+}
+
+static struct backlight_ops fujitsubl_ops = {
+	.get_brightness = bl_get_brightness,
+	.update_status = bl_update_status,
+};
+
+/* Platform device */
+
+static ssize_t show_lcd_level(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+
+	int ret;
+
+	ret = get_lcd_level();
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%i\n", ret);
+}
+
+static ssize_t store_lcd_level(struct device *dev,
+			       struct device_attribute *attr, const char *buf,
+			       size_t count)
+{
+
+	int level, ret;
+
+	if (sscanf(buf, "%i", &level) != 1
+	    || (level < 0 || level >= FUJITSU_LCD_N_LEVELS))
+		return -EINVAL;
+
+	ret = set_lcd_level(level);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level);
+
+static struct attribute *fujitsupf_attributes[] = {
+	&dev_attr_lcd_level.attr,
+	NULL
+};
+
+static struct attribute_group fujitsupf_attribute_group = {
+	.attrs = fujitsupf_attributes
+};
+
+static struct platform_driver fujitsupf_driver = {
+	.driver = {
+		   .name = "fujitsu-laptop",
+		   .owner = THIS_MODULE,
+		   }
+};
+
+/* ACPI device */
+
+int acpi_fujitsu_add(struct acpi_device *device)
+{
+	int result = 0;
+	int state = 0;
+
+	ACPI_FUNCTION_TRACE("acpi_fujitsu_add");
+
+	if (!device)
+		return -EINVAL;
+
+	fujitsu->acpi_handle = device->handle;
+	sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME);
+	sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS);
+	acpi_driver_data(device) = fujitsu;
+
+	result = acpi_bus_get_power(fujitsu->acpi_handle, &state);
+	if (result) {
+		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
+				  "Error reading power state\n"));
+		goto end;
+	}
+
+	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+	       acpi_device_name(device), acpi_device_bid(device),
+	       !device->power.state ? "on" : "off");
+
+      end:
+
+	return result;
+}
+
+int acpi_fujitsu_remove(struct acpi_device *device, int type)
+{
+	ACPI_FUNCTION_TRACE("acpi_fujitsu_remove");
+
+	if (!device || !acpi_driver_data(device))
+		return -EINVAL;
+	fujitsu->acpi_handle = 0;
+
+	return 0;
+}
+
+static const struct acpi_device_id fujitsu_device_ids[] = {
+	{ACPI_FUJITSU_HID, 0},
+	{"", 0},
+};
+
+static struct acpi_driver acpi_fujitsu_driver = {
+	.name = ACPI_FUJITSU_DRIVER_NAME,
+	.class = ACPI_FUJITSU_CLASS,
+	.ids = fujitsu_device_ids,
+	.ops = {
+		.add = acpi_fujitsu_add,
+		.remove = acpi_fujitsu_remove,
+		},
+};
+
+/* Initialization */
+
+static int __init fujitsu_init(void)
+{
+	int ret, result;
+
+	if (acpi_disabled)
+		return -ENODEV;
+
+	fujitsu = kmalloc(sizeof(struct fujitsu_t), GFP_KERNEL);
+	if (!fujitsu)
+		return -ENOMEM;
+	memset(fujitsu, 0, sizeof(struct fujitsu_t));
+
+	result = acpi_bus_register_driver(&acpi_fujitsu_driver);
+	if (result < 0) {
+		ret = -ENODEV;
+		goto fail_acpi;
+	}
+
+	/* Register backlight stuff */
+
+	fujitsu->bl_device =
+	    backlight_device_register("fujitsu-laptop", NULL, NULL,
+				      &fujitsubl_ops);
+	if (IS_ERR(fujitsu->bl_device))
+		return PTR_ERR(fujitsu->bl_device);
+
+	fujitsu->bl_device->props.max_brightness = FUJITSU_LCD_N_LEVELS - 1;
+	ret = platform_driver_register(&fujitsupf_driver);
+	if (ret)
+		goto fail_backlight;
+
+	/* Register platform stuff */
+
+	fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1);
+	if (!fujitsu->pf_device) {
+		ret = -ENOMEM;
+		goto fail_platform_driver;
+	}
+
+	ret = platform_device_add(fujitsu->pf_device);
+	if (ret)
+		goto fail_platform_device1;
+
+	ret =
+	    sysfs_create_group(&fujitsu->pf_device->dev.kobj,
+			       &fujitsupf_attribute_group);
+	if (ret)
+		goto fail_platform_device2;
+
+	printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION
+	       " successfully loaded.\n");
+
+	return 0;
+
+      fail_platform_device2:
+
+	platform_device_del(fujitsu->pf_device);
+
+      fail_platform_device1:
+
+	platform_device_put(fujitsu->pf_device);
+
+      fail_platform_driver:
+
+	platform_driver_unregister(&fujitsupf_driver);
+
+      fail_backlight:
+
+	backlight_device_unregister(fujitsu->bl_device);
+
+      fail_acpi:
+
+	kfree(fujitsu);
+
+	return ret;
+}
+
+static void __exit fujitsu_cleanup(void)
+{
+	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
+			   &fujitsupf_attribute_group);
+	platform_device_unregister(fujitsu->pf_device);
+	platform_driver_unregister(&fujitsupf_driver);
+	backlight_device_unregister(fujitsu->bl_device);
+
+	acpi_bus_unregister_driver(&acpi_fujitsu_driver);
+
+	kfree(fujitsu);
+
+	printk(KERN_INFO "fujitsu-laptop: driver unloaded.\n");
+}
+
+module_init(fujitsu_init);
+module_exit(fujitsu_cleanup);
+
+MODULE_AUTHOR("Jonathan Woithe");
+MODULE_DESCRIPTION("Fujitsu laptop extras support");
+MODULE_VERSION(FUJITSU_DRIVER_VERSION);
+MODULE_LICENSE("GPL");
diff --git a/drivers/misc/ibmasm/remote.c b/drivers/misc/ibmasm/remote.c
index 0550ce075fc..1d9defb1a10 100644
--- a/drivers/misc/ibmasm/remote.c
+++ b/drivers/misc/ibmasm/remote.c
@@ -226,9 +226,9 @@ int ibmasm_init_remote_input_dev(struct service_processor *sp)
 	mouse_dev->id.product = pdev->device;
 	mouse_dev->id.version = 1;
 	mouse_dev->dev.parent = sp->dev;
-	mouse_dev->evbit[0]  = BIT(EV_KEY) | BIT(EV_ABS);
-	mouse_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) |
-		BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
+	mouse_dev->evbit[0]  = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	mouse_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+		BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
 	set_bit(BTN_TOUCH, mouse_dev->keybit);
 	mouse_dev->name = "ibmasm RSA I remote mouse";
 	input_set_abs_params(mouse_dev, ABS_X, 0, MOUSE_X_MAX, 0, 0);
@@ -239,7 +239,7 @@ int ibmasm_init_remote_input_dev(struct service_processor *sp)
 	keybd_dev->id.product = pdev->device;
 	keybd_dev->id.version = 2;
 	keybd_dev->dev.parent = sp->dev;
-	keybd_dev->evbit[0]  = BIT(EV_KEY);
+	keybd_dev->evbit[0]  = BIT_MASK(EV_KEY);
 	keybd_dev->name = "ibmasm RSA I remote keyboard";
 
 	for (i = 0; i < XLATE_SIZE; i++) {
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index 5108b7c576d..cd221fd0fb9 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -9,6 +9,7 @@
  *  You need an userspace library to cooperate with this driver. It (and other
  *  info) may be obtained here:
  *  http://www.fi.muni.cz/~xslaby/phantom.html
+ *  or alternatively, you might use OpenHaptics provided by Sensable.
  */
 
 #include <linux/kernel.h>
@@ -24,13 +25,14 @@
 #include <asm/atomic.h>
 #include <asm/io.h>
 
-#define PHANTOM_VERSION		"n0.9.5"
+#define PHANTOM_VERSION		"n0.9.7"
 
 #define PHANTOM_MAX_MINORS	8
 
 #define PHN_IRQCTL		0x4c    /* irq control in caddr space */
 
 #define PHB_RUNNING		1
+#define PHB_NOT_OH		2
 
 static struct class *phantom_class;
 static int phantom_major;
@@ -47,7 +49,11 @@ struct phantom_device {
 	struct cdev cdev;
 
 	struct mutex open_lock;
-	spinlock_t ioctl_lock;
+	spinlock_t regs_lock;
+
+	/* used in NOT_OH mode */
+	struct phm_regs oregs;
+	u32 ctl_reg;
 };
 
 static unsigned char phantom_devices[PHANTOM_MAX_MINORS];
@@ -82,6 +88,7 @@ static long phantom_ioctl(struct file *file, unsigned int cmd,
 	struct phm_regs rs;
 	struct phm_reg r;
 	void __user *argp = (void __user *)arg;
+	unsigned long flags;
 	unsigned int i;
 
 	if (_IOC_TYPE(cmd) != PH_IOC_MAGIC ||
@@ -96,32 +103,45 @@ static long phantom_ioctl(struct file *file, unsigned int cmd,
 		if (r.reg > 7)
 			return -EINVAL;
 
-		spin_lock(&dev->ioctl_lock);
+		spin_lock_irqsave(&dev->regs_lock, flags);
 		if (r.reg == PHN_CONTROL && (r.value & PHN_CTL_IRQ) &&
 				phantom_status(dev, dev->status | PHB_RUNNING)){
-			spin_unlock(&dev->ioctl_lock);
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
 			return -ENODEV;
 		}
 
 		pr_debug("phantom: writing %x to %u\n", r.value, r.reg);
+
+		/* preserve amp bit (don't allow to change it when in NOT_OH) */
+		if (r.reg == PHN_CONTROL && (dev->status & PHB_NOT_OH)) {
+			r.value &= ~PHN_CTL_AMP;
+			r.value |= dev->ctl_reg & PHN_CTL_AMP;
+			dev->ctl_reg = r.value;
+		}
+
 		iowrite32(r.value, dev->iaddr + r.reg);
 		ioread32(dev->iaddr); /* PCI posting */
 
 		if (r.reg == PHN_CONTROL && !(r.value & PHN_CTL_IRQ))
 			phantom_status(dev, dev->status & ~PHB_RUNNING);
-		spin_unlock(&dev->ioctl_lock);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
 		break;
 	case PHN_SET_REGS:
 		if (copy_from_user(&rs, argp, sizeof(rs)))
 			return -EFAULT;
 
 		pr_debug("phantom: SRS %u regs %x\n", rs.count, rs.mask);
-		spin_lock(&dev->ioctl_lock);
-		for (i = 0; i < min(rs.count, 8U); i++)
-			if ((1 << i) & rs.mask)
-				iowrite32(rs.values[i], dev->oaddr + i);
-		ioread32(dev->iaddr); /* PCI posting */
-		spin_unlock(&dev->ioctl_lock);
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_NOT_OH)
+			memcpy(&dev->oregs, &rs, sizeof(rs));
+		else {
+			u32 m = min(rs.count, 8U);
+			for (i = 0; i < m; i++)
+				if (rs.mask & BIT(i))
+					iowrite32(rs.values[i], dev->oaddr + i);
+			ioread32(dev->iaddr); /* PCI posting */
+		}
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
 		break;
 	case PHN_GET_REG:
 		if (copy_from_user(&r, argp, sizeof(r)))
@@ -135,20 +155,35 @@ static long phantom_ioctl(struct file *file, unsigned int cmd,
 		if (copy_to_user(argp, &r, sizeof(r)))
 			return -EFAULT;
 		break;
-	case PHN_GET_REGS:
+	case PHN_GET_REGS: {
+		u32 m;
+
 		if (copy_from_user(&rs, argp, sizeof(rs)))
 			return -EFAULT;
 
+		m = min(rs.count, 8U);
+
 		pr_debug("phantom: GRS %u regs %x\n", rs.count, rs.mask);
-		spin_lock(&dev->ioctl_lock);
-		for (i = 0; i < min(rs.count, 8U); i++)
-			if ((1 << i) & rs.mask)
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		for (i = 0; i < m; i++)
+			if (rs.mask & BIT(i))
 				rs.values[i] = ioread32(dev->iaddr + i);
-		spin_unlock(&dev->ioctl_lock);
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
 
 		if (copy_to_user(argp, &rs, sizeof(rs)))
 			return -EFAULT;
 		break;
+	} case PHN_NOT_OH:
+		spin_lock_irqsave(&dev->regs_lock, flags);
+		if (dev->status & PHB_RUNNING) {
+			printk(KERN_ERR "phantom: you need to set NOT_OH "
+					"before you start the device!\n");
+			spin_unlock_irqrestore(&dev->regs_lock, flags);
+			return -EINVAL;
+		}
+		dev->status |= PHB_NOT_OH;
+		spin_unlock_irqrestore(&dev->regs_lock, flags);
+		break;
 	default:
 		return -ENOTTY;
 	}
@@ -171,8 +206,11 @@ static int phantom_open(struct inode *inode, struct file *file)
 		return -EINVAL;
 	}
 
+	WARN_ON(dev->status & PHB_NOT_OH);
+
 	file->private_data = dev;
 
+	atomic_set(&dev->counter, 0);
 	dev->opened++;
 	mutex_unlock(&dev->open_lock);
 
@@ -187,6 +225,7 @@ static int phantom_release(struct inode *inode, struct file *file)
 
 	dev->opened = 0;
 	phantom_status(dev, dev->status & ~PHB_RUNNING);
+	dev->status &= ~PHB_NOT_OH;
 
 	mutex_unlock(&dev->open_lock);
 
@@ -220,12 +259,32 @@ static struct file_operations phantom_file_ops = {
 static irqreturn_t phantom_isr(int irq, void *data)
 {
 	struct phantom_device *dev = data;
+	unsigned int i;
+	u32 ctl;
 
-	if (!(ioread32(dev->iaddr + PHN_CONTROL) & PHN_CTL_IRQ))
+	spin_lock(&dev->regs_lock);
+	ctl = ioread32(dev->iaddr + PHN_CONTROL);
+	if (!(ctl & PHN_CTL_IRQ)) {
+		spin_unlock(&dev->regs_lock);
 		return IRQ_NONE;
+	}
 
 	iowrite32(0, dev->iaddr);
 	iowrite32(0xc0, dev->iaddr);
+
+	if (dev->status & PHB_NOT_OH) {
+		struct phm_regs *r = &dev->oregs;
+		u32 m = min(r->count, 8U);
+
+		for (i = 0; i < m; i++)
+			if (r->mask & BIT(i))
+				iowrite32(r->values[i], dev->oaddr + i);
+
+		dev->ctl_reg ^= PHN_CTL_AMP;
+		iowrite32(dev->ctl_reg, dev->iaddr + PHN_CONTROL);
+	}
+	spin_unlock(&dev->regs_lock);
+
 	ioread32(dev->iaddr); /* PCI posting */
 
 	atomic_inc(&dev->counter);
@@ -297,7 +356,7 @@ static int __devinit phantom_probe(struct pci_dev *pdev,
 	}
 
 	mutex_init(&pht->open_lock);
-	spin_lock_init(&pht->ioctl_lock);
+	spin_lock_init(&pht->regs_lock);
 	init_waitqueue_head(&pht->wait);
 	cdev_init(&pht->cdev, &phantom_file_ops);
 	pht->cdev.owner = THIS_MODULE;
@@ -378,6 +437,8 @@ static int phantom_suspend(struct pci_dev *pdev, pm_message_t state)
 	iowrite32(0, dev->caddr + PHN_IRQCTL);
 	ioread32(dev->caddr + PHN_IRQCTL); /* PCI posting */
 
+	synchronize_irq(pdev->irq);
+
 	return 0;
 }
 
diff --git a/drivers/misc/sony-laptop.c b/drivers/misc/sony-laptop.c
index e73a71f04bb..1bfbb87e579 100644
--- a/drivers/misc/sony-laptop.c
+++ b/drivers/misc/sony-laptop.c
@@ -411,9 +411,9 @@ static int sony_laptop_setup_input(void)
 	jog_dev->id.bustype = BUS_ISA;
 	jog_dev->id.vendor = PCI_VENDOR_ID_SONY;
 
-	jog_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-	jog_dev->keybit[LONG(BTN_MOUSE)] = BIT(BTN_MIDDLE);
-	jog_dev->relbit[0] = BIT(REL_WHEEL);
+	jog_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+	jog_dev->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_MIDDLE);
+	jog_dev->relbit[0] = BIT_MASK(REL_WHEEL);
 
 	error = input_register_device(jog_dev);
 	if (error)
@@ -1173,7 +1173,8 @@ static struct acpi_driver sony_nc_driver = {
 #define SONYPI_TYPE3_OFFSET	0x12
 
 struct sony_pic_ioport {
-	struct acpi_resource_io	io;
+	struct acpi_resource_io	io1;
+	struct acpi_resource_io	io2;
 	struct list_head	list;
 };
 
@@ -1443,11 +1444,11 @@ static u8 sony_pic_call1(u8 dev)
 {
 	u8 v1, v2;
 
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2,
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2,
 			ITERATIONS_LONG);
-	outb(dev, spic_dev.cur_ioport->io.minimum + 4);
-	v1 = inb_p(spic_dev.cur_ioport->io.minimum + 4);
-	v2 = inb_p(spic_dev.cur_ioport->io.minimum);
+	outb(dev, spic_dev.cur_ioport->io1.minimum + 4);
+	v1 = inb_p(spic_dev.cur_ioport->io1.minimum + 4);
+	v2 = inb_p(spic_dev.cur_ioport->io1.minimum);
 	dprintk("sony_pic_call1: 0x%.4x\n", (v2 << 8) | v1);
 	return v2;
 }
@@ -1456,13 +1457,13 @@ static u8 sony_pic_call2(u8 dev, u8 fn)
 {
 	u8 v1;
 
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2,
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2,
 			ITERATIONS_LONG);
-	outb(dev, spic_dev.cur_ioport->io.minimum + 4);
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2,
+	outb(dev, spic_dev.cur_ioport->io1.minimum + 4);
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2,
 			ITERATIONS_LONG);
-	outb(fn, spic_dev.cur_ioport->io.minimum);
-	v1 = inb_p(spic_dev.cur_ioport->io.minimum);
+	outb(fn, spic_dev.cur_ioport->io1.minimum);
+	v1 = inb_p(spic_dev.cur_ioport->io1.minimum);
 	dprintk("sony_pic_call2: 0x%.4x\n", v1);
 	return v1;
 }
@@ -1471,13 +1472,13 @@ static u8 sony_pic_call3(u8 dev, u8 fn, u8 v)
 {
 	u8 v1;
 
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2, ITERATIONS_LONG);
-	outb(dev, spic_dev.cur_ioport->io.minimum + 4);
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2, ITERATIONS_LONG);
-	outb(fn, spic_dev.cur_ioport->io.minimum);
-	wait_on_command(inb_p(spic_dev.cur_ioport->io.minimum + 4) & 2, ITERATIONS_LONG);
-	outb(v, spic_dev.cur_ioport->io.minimum);
-	v1 = inb_p(spic_dev.cur_ioport->io.minimum);
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2, ITERATIONS_LONG);
+	outb(dev, spic_dev.cur_ioport->io1.minimum + 4);
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2, ITERATIONS_LONG);
+	outb(fn, spic_dev.cur_ioport->io1.minimum);
+	wait_on_command(inb_p(spic_dev.cur_ioport->io1.minimum + 4) & 2, ITERATIONS_LONG);
+	outb(v, spic_dev.cur_ioport->io1.minimum);
+	v1 = inb_p(spic_dev.cur_ioport->io1.minimum);
 	dprintk("sony_pic_call3: 0x%.4x\n", v1);
 	return v1;
 }
@@ -2074,7 +2075,18 @@ sony_pic_read_possible_resource(struct acpi_resource *resource, void *context)
 
 	switch (resource->type) {
 	case ACPI_RESOURCE_TYPE_START_DEPENDENT:
+		{
+			/* start IO enumeration */
+			struct sony_pic_ioport *ioport = kzalloc(sizeof(*ioport), GFP_KERNEL);
+			if (!ioport)
+				return AE_ERROR;
+
+			list_add(&ioport->list, &dev->ioports);
+			return AE_OK;
+		}
+
 	case ACPI_RESOURCE_TYPE_END_DEPENDENT:
+		/* end IO enumeration */
 		return AE_OK;
 
 	case ACPI_RESOURCE_TYPE_IRQ:
@@ -2101,7 +2113,7 @@ sony_pic_read_possible_resource(struct acpi_resource *resource, void *context)
 				if (!interrupt)
 					return AE_ERROR;
 
-				list_add_tail(&interrupt->list, &dev->interrupts);
+				list_add(&interrupt->list, &dev->interrupts);
 				interrupt->irq.triggering = p->triggering;
 				interrupt->irq.polarity = p->polarity;
 				interrupt->irq.sharable = p->sharable;
@@ -2113,18 +2125,27 @@ sony_pic_read_possible_resource(struct acpi_resource *resource, void *context)
 	case ACPI_RESOURCE_TYPE_IO:
 		{
 			struct acpi_resource_io *io = &resource->data.io;
-			struct sony_pic_ioport *ioport = NULL;
+			struct sony_pic_ioport *ioport =
+				list_first_entry(&dev->ioports, struct sony_pic_ioport, list);
 			if (!io) {
 				dprintk("Blank IO resource\n");
 				return AE_OK;
 			}
 
-			ioport = kzalloc(sizeof(*ioport), GFP_KERNEL);
-			if (!ioport)
+			if (!ioport->io1.minimum) {
+				memcpy(&ioport->io1, io, sizeof(*io));
+				dprintk("IO1 at 0x%.4x (0x%.2x)\n", ioport->io1.minimum,
+						ioport->io1.address_length);
+			}
+			else if (!ioport->io2.minimum) {
+				memcpy(&ioport->io2, io, sizeof(*io));
+				dprintk("IO2 at 0x%.4x (0x%.2x)\n", ioport->io2.minimum,
+						ioport->io2.address_length);
+			}
+			else {
+				printk(KERN_ERR DRV_PFX "Unknown SPIC Type, more than 2 IO Ports\n");
 				return AE_ERROR;
-
-			list_add_tail(&ioport->list, &dev->ioports);
-			memcpy(&ioport->io, io, sizeof(*io));
+			}
 			return AE_OK;
 		}
 	default:
@@ -2199,10 +2220,22 @@ static int sony_pic_enable(struct acpi_device *device,
 {
 	acpi_status status;
 	int result = 0;
+	/* Type 1 resource layout is:
+	 *    IO
+	 *    IO
+	 *    IRQNoFlags
+	 *    End
+	 *
+	 * Type 2 and 3 resource layout is:
+	 *    IO
+	 *    IRQNoFlags
+	 *    End
+	 */
 	struct {
-		struct acpi_resource io_res;
-		struct acpi_resource irq_res;
-		struct acpi_resource end;
+		struct acpi_resource res1;
+		struct acpi_resource res2;
+		struct acpi_resource res3;
+		struct acpi_resource res4;
 	} *resource;
 	struct acpi_buffer buffer = { 0, NULL };
 
@@ -2217,21 +2250,49 @@ static int sony_pic_enable(struct acpi_device *device,
 	buffer.length = sizeof(*resource) + 1;
 	buffer.pointer = resource;
 
-	/* setup io resource */
-	resource->io_res.type = ACPI_RESOURCE_TYPE_IO;
-	resource->io_res.length = sizeof(struct acpi_resource);
-	memcpy(&resource->io_res.data.io, &ioport->io,
-			sizeof(struct acpi_resource_io));
+	/* setup Type 1 resources */
+	if (spic_dev.model == SONYPI_DEVICE_TYPE1) {
 
-	/* setup irq resource */
-	resource->irq_res.type = ACPI_RESOURCE_TYPE_IRQ;
-	resource->irq_res.length = sizeof(struct acpi_resource);
-	memcpy(&resource->irq_res.data.irq, &irq->irq,
-			sizeof(struct acpi_resource_irq));
-	/* we requested a shared irq */
-	resource->irq_res.data.irq.sharable = ACPI_SHARED;
+		/* setup io resources */
+		resource->res1.type = ACPI_RESOURCE_TYPE_IO;
+		resource->res1.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res1.data.io, &ioport->io1,
+				sizeof(struct acpi_resource_io));
 
-	resource->end.type = ACPI_RESOURCE_TYPE_END_TAG;
+		resource->res2.type = ACPI_RESOURCE_TYPE_IO;
+		resource->res2.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res2.data.io, &ioport->io2,
+				sizeof(struct acpi_resource_io));
+
+		/* setup irq resource */
+		resource->res3.type = ACPI_RESOURCE_TYPE_IRQ;
+		resource->res3.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res3.data.irq, &irq->irq,
+				sizeof(struct acpi_resource_irq));
+		/* we requested a shared irq */
+		resource->res3.data.irq.sharable = ACPI_SHARED;
+
+		resource->res4.type = ACPI_RESOURCE_TYPE_END_TAG;
+
+	}
+	/* setup Type 2/3 resources */
+	else {
+		/* setup io resource */
+		resource->res1.type = ACPI_RESOURCE_TYPE_IO;
+		resource->res1.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res1.data.io, &ioport->io1,
+				sizeof(struct acpi_resource_io));
+
+		/* setup irq resource */
+		resource->res2.type = ACPI_RESOURCE_TYPE_IRQ;
+		resource->res2.length = sizeof(struct acpi_resource);
+		memcpy(&resource->res2.data.irq, &irq->irq,
+				sizeof(struct acpi_resource_irq));
+		/* we requested a shared irq */
+		resource->res2.data.irq.sharable = ACPI_SHARED;
+
+		resource->res3.type = ACPI_RESOURCE_TYPE_END_TAG;
+	}
 
 	/* Attempt to set the resource */
 	dprintk("Evaluating _SRS\n");
@@ -2239,7 +2300,7 @@ static int sony_pic_enable(struct acpi_device *device,
 
 	/* check for total failure */
 	if (ACPI_FAILURE(status)) {
-		printk(KERN_ERR DRV_PFX "Error evaluating _SRS");
+		printk(KERN_ERR DRV_PFX "Error evaluating _SRS\n");
 		result = -ENODEV;
 		goto end;
 	}
@@ -2268,11 +2329,14 @@ static irqreturn_t sony_pic_irq(int irq, void *dev_id)
 
 	struct sony_pic_dev *dev = (struct sony_pic_dev *) dev_id;
 
-	ev = inb_p(dev->cur_ioport->io.minimum);
-	data_mask = inb_p(dev->cur_ioport->io.minimum + dev->evport_offset);
+	ev = inb_p(dev->cur_ioport->io1.minimum);
+	if (dev->cur_ioport->io2.minimum)
+		data_mask = inb_p(dev->cur_ioport->io2.minimum);
+	else
+		data_mask = inb_p(dev->cur_ioport->io1.minimum + dev->evport_offset);
 
 	dprintk("event ([%.2x] [%.2x]) at port 0x%.4x(+0x%.2x)\n",
-			ev, data_mask, dev->cur_ioport->io.minimum, dev->evport_offset);
+			ev, data_mask, dev->cur_ioport->io1.minimum, dev->evport_offset);
 
 	if (ev == 0x00 || ev == 0xff)
 		return IRQ_HANDLED;
@@ -2323,8 +2387,11 @@ static int sony_pic_remove(struct acpi_device *device, int type)
 	}
 
 	free_irq(spic_dev.cur_irq->irq.interrupts[0], &spic_dev);
-	release_region(spic_dev.cur_ioport->io.minimum,
-			spic_dev.cur_ioport->io.address_length);
+	release_region(spic_dev.cur_ioport->io1.minimum,
+			spic_dev.cur_ioport->io1.address_length);
+	if (spic_dev.cur_ioport->io2.minimum)
+		release_region(spic_dev.cur_ioport->io2.minimum,
+				spic_dev.cur_ioport->io2.address_length);
 
 	sonypi_compat_exit();
 
@@ -2397,14 +2464,36 @@ static int sony_pic_add(struct acpi_device *device)
 		goto err_remove_input;
 
 	/* request io port */
-	list_for_each_entry(io, &spic_dev.ioports, list) {
-		if (request_region(io->io.minimum, io->io.address_length,
+	list_for_each_entry_reverse(io, &spic_dev.ioports, list) {
+		if (request_region(io->io1.minimum, io->io1.address_length,
 					"Sony Programable I/O Device")) {
-			dprintk("I/O port: 0x%.4x (0x%.4x) + 0x%.2x\n",
-					io->io.minimum, io->io.maximum,
-					io->io.address_length);
-			spic_dev.cur_ioport = io;
-			break;
+			dprintk("I/O port1: 0x%.4x (0x%.4x) + 0x%.2x\n",
+					io->io1.minimum, io->io1.maximum,
+					io->io1.address_length);
+			/* Type 1 have 2 ioports */
+			if (io->io2.minimum) {
+				if (request_region(io->io2.minimum,
+						io->io2.address_length,
+						"Sony Programable I/O Device")) {
+					dprintk("I/O port2: 0x%.4x (0x%.4x) + 0x%.2x\n",
+							io->io2.minimum, io->io2.maximum,
+							io->io2.address_length);
+					spic_dev.cur_ioport = io;
+					break;
+				}
+				else {
+					dprintk("Unable to get I/O port2: "
+							"0x%.4x (0x%.4x) + 0x%.2x\n",
+							io->io2.minimum, io->io2.maximum,
+							io->io2.address_length);
+					release_region(io->io1.minimum,
+							io->io1.address_length);
+				}
+			}
+			else {
+				spic_dev.cur_ioport = io;
+				break;
+			}
 		}
 	}
 	if (!spic_dev.cur_ioport) {
@@ -2414,7 +2503,7 @@ static int sony_pic_add(struct acpi_device *device)
 	}
 
 	/* request IRQ */
-	list_for_each_entry(irq, &spic_dev.interrupts, list) {
+	list_for_each_entry_reverse(irq, &spic_dev.interrupts, list) {
 		if (!request_irq(irq->irq.interrupts[0], sony_pic_irq,
 					IRQF_SHARED, "sony-laptop", &spic_dev)) {
 			dprintk("IRQ: %d - triggering: %d - "
@@ -2462,8 +2551,11 @@ err_free_irq:
 	free_irq(spic_dev.cur_irq->irq.interrupts[0], &spic_dev);
 
 err_release_region:
-	release_region(spic_dev.cur_ioport->io.minimum,
-			spic_dev.cur_ioport->io.address_length);
+	release_region(spic_dev.cur_ioport->io1.minimum,
+			spic_dev.cur_ioport->io1.address_length);
+	if (spic_dev.cur_ioport->io2.minimum)
+		release_region(spic_dev.cur_ioport->io2.minimum,
+				spic_dev.cur_ioport->io2.address_length);
 
 err_remove_compat:
 	sonypi_compat_exit();
diff --git a/drivers/misc/thinkpad_acpi.c b/drivers/misc/thinkpad_acpi.c
index 81e068fa7ac..e953276664a 100644
--- a/drivers/misc/thinkpad_acpi.c
+++ b/drivers/misc/thinkpad_acpi.c
@@ -22,7 +22,7 @@
  */
 
 #define IBM_VERSION "0.16"
-#define TPACPI_SYSFS_VERSION 0x010000
+#define TPACPI_SYSFS_VERSION 0x020000
 
 /*
  *  Changelog:
@@ -117,6 +117,12 @@ IBM_BIOS_MODULE_ALIAS("K[U,X-Z]");
 
 #define __unused __attribute__ ((unused))
 
+static enum {
+	TPACPI_LIFE_INIT = 0,
+	TPACPI_LIFE_RUNNING,
+	TPACPI_LIFE_EXITING,
+} tpacpi_lifecycle;
+
 /****************************************************************************
  ****************************************************************************
  *
@@ -342,6 +348,9 @@ static void dispatch_acpi_notify(acpi_handle handle, u32 event, void *data)
 {
 	struct ibm_struct *ibm = data;
 
+	if (tpacpi_lifecycle != TPACPI_LIFE_RUNNING)
+		return;
+
 	if (!ibm || !ibm->acpi || !ibm->acpi->notify)
 		return;
 
@@ -517,8 +526,10 @@ static char *next_cmd(char **cmds)
  ****************************************************************************/
 
 static struct platform_device *tpacpi_pdev;
+static struct platform_device *tpacpi_sensors_pdev;
 static struct device *tpacpi_hwmon;
 static struct input_dev *tpacpi_inputdev;
+static struct mutex tpacpi_inputdev_send_mutex;
 
 
 static int tpacpi_resume_handler(struct platform_device *pdev)
@@ -543,6 +554,12 @@ static struct platform_driver tpacpi_pdriver = {
 	.resume = tpacpi_resume_handler,
 };
 
+static struct platform_driver tpacpi_hwmon_pdriver = {
+	.driver = {
+		.name = IBM_HWMON_DRVR_NAME,
+		.owner = THIS_MODULE,
+	},
+};
 
 /*************************************************************************
  * thinkpad-acpi driver attributes
@@ -692,6 +709,8 @@ static int parse_strtoul(const char *buf,
 {
 	char *endp;
 
+	while (*buf && isspace(*buf))
+		buf++;
 	*value = simple_strtoul(buf, &endp, 0);
 	while (*endp && isspace(*endp))
 		endp++;
@@ -989,6 +1008,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 
 	int res, i;
 	int status;
+	int hkeyv;
 
 	vdbg_printk(TPACPI_DBG_INIT, "initializing hotkey subdriver\n");
 
@@ -1014,18 +1034,35 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 			return res;
 
 		/* mask not supported on 570, 600e/x, 770e, 770x, A21e, A2xm/p,
-		   A30, R30, R31, T20-22, X20-21, X22-24 */
-		tp_features.hotkey_mask =
-			acpi_evalf(hkey_handle, NULL, "DHKN", "qv");
+		   A30, R30, R31, T20-22, X20-21, X22-24.  Detected by checking
+		   for HKEY interface version 0x100 */
+		if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
+			if ((hkeyv >> 8) != 1) {
+				printk(IBM_ERR "unknown version of the "
+				       "HKEY interface: 0x%x\n", hkeyv);
+				printk(IBM_ERR "please report this to %s\n",
+				       IBM_MAIL);
+			} else {
+				/*
+				 * MHKV 0x100 in A31, R40, R40e,
+				 * T4x, X31, and later
+				 * */
+				tp_features.hotkey_mask = 1;
+			}
+		}
 
 		vdbg_printk(TPACPI_DBG_INIT, "hotkey masks are %s\n",
 			str_supported(tp_features.hotkey_mask));
 
 		if (tp_features.hotkey_mask) {
-			/* MHKA available in A31, R40, R40e, T4x, X31, and later */
 			if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
-					"MHKA", "qd"))
+					"MHKA", "qd")) {
+				printk(IBM_ERR
+				       "missing MHKA handler, "
+				       "please report this to %s\n",
+				       IBM_MAIL);
 				hotkey_all_mask = 0x080cU; /* FN+F12, FN+F4, FN+F3 */
+			}
 		}
 
 		res = hotkey_get(&hotkey_orig_status, &hotkey_orig_mask);
@@ -1131,6 +1168,8 @@ static void tpacpi_input_send_key(unsigned int scancode,
 				  unsigned int keycode)
 {
 	if (keycode != KEY_RESERVED) {
+		mutex_lock(&tpacpi_inputdev_send_mutex);
+
 		input_report_key(tpacpi_inputdev, keycode, 1);
 		if (keycode == KEY_UNKNOWN)
 			input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
@@ -1142,6 +1181,8 @@ static void tpacpi_input_send_key(unsigned int scancode,
 			input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
 				    scancode);
 		input_sync(tpacpi_inputdev);
+
+		mutex_unlock(&tpacpi_inputdev_send_mutex);
 	}
 }
 
@@ -1149,18 +1190,47 @@ static void tpacpi_input_send_radiosw(void)
 {
 	int wlsw;
 
-	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw))
+	mutex_lock(&tpacpi_inputdev_send_mutex);
+
+	if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) {
 		input_report_switch(tpacpi_inputdev,
 				    SW_RADIO, !!wlsw);
+		input_sync(tpacpi_inputdev);
+	}
+
+	mutex_unlock(&tpacpi_inputdev_send_mutex);
 }
 
 static void hotkey_notify(struct ibm_struct *ibm, u32 event)
 {
 	u32 hkey;
 	unsigned int keycode, scancode;
-	int send_acpi_ev = 0;
+	int send_acpi_ev;
+	int ignore_acpi_ev;
+
+	if (event != 0x80) {
+		printk(IBM_ERR "unknown HKEY notification event %d\n", event);
+		/* forward it to userspace, maybe it knows how to handle it */
+		acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class,
+						ibm->acpi->device->dev.bus_id,
+						event, 0);
+		return;
+	}
+
+	while (1) {
+		if (!acpi_evalf(hkey_handle, &hkey, "MHKP", "d")) {
+			printk(IBM_ERR "failed to retrieve HKEY event\n");
+			return;
+		}
+
+		if (hkey == 0) {
+			/* queue empty */
+			return;
+		}
+
+		send_acpi_ev = 0;
+		ignore_acpi_ev = 0;
 
-	if (event == 0x80 && acpi_evalf(hkey_handle, &hkey, "MHKP", "d")) {
 		switch (hkey >> 12) {
 		case 1:
 			/* 0x1000-0x1FFF: key presses */
@@ -1182,9 +1252,11 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event)
 			 * eat up known LID events */
 			if (hkey != 0x5001 && hkey != 0x5002) {
 				printk(IBM_ERR
-					"unknown LID-related hotkey event: 0x%04x\n",
-					hkey);
+				       "unknown LID-related HKEY event: 0x%04x\n",
+				       hkey);
 				send_acpi_ev = 1;
+			} else {
+				ignore_acpi_ev = 1;
 			}
 			break;
 		case 7:
@@ -1202,21 +1274,18 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event)
 			printk(IBM_NOTICE "unhandled HKEY event 0x%04x\n", hkey);
 			send_acpi_ev = 1;
 		}
-	} else {
-		printk(IBM_ERR "unknown hotkey notification event %d\n", event);
-		hkey = 0;
-		send_acpi_ev = 1;
-	}
 
-	/* Legacy events */
-	if (send_acpi_ev || hotkey_report_mode < 2)
-		acpi_bus_generate_proc_event(ibm->acpi->device, event, hkey);
+		/* Legacy events */
+		if (!ignore_acpi_ev && (send_acpi_ev || hotkey_report_mode < 2)) {
+			acpi_bus_generate_proc_event(ibm->acpi->device, event, hkey);
+		}
 
-	/* netlink events */
-	if (send_acpi_ev) {
-		acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class,
-						ibm->acpi->device->dev.bus_id,
-						event, hkey);
+		/* netlink events */
+		if (!ignore_acpi_ev && send_acpi_ev) {
+			acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class,
+							ibm->acpi->device->dev.bus_id,
+							event, hkey);
+		}
 	}
 }
 
@@ -2812,7 +2881,7 @@ static int __init thermal_init(struct ibm_init_struct *iibm)
 
 	switch(thermal_read_mode) {
 	case TPACPI_THERMAL_TPEC_16:
-		res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+		res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
 				&thermal_temp_input16_group);
 		if (res)
 			return res;
@@ -2820,7 +2889,7 @@ static int __init thermal_init(struct ibm_init_struct *iibm)
 	case TPACPI_THERMAL_TPEC_8:
 	case TPACPI_THERMAL_ACPI_TMP07:
 	case TPACPI_THERMAL_ACPI_UPDT:
-		res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+		res = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
 				&thermal_temp_input8_group);
 		if (res)
 			return res;
@@ -2837,13 +2906,13 @@ static void thermal_exit(void)
 {
 	switch(thermal_read_mode) {
 	case TPACPI_THERMAL_TPEC_16:
-		sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+		sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
 				   &thermal_temp_input16_group);
 		break;
 	case TPACPI_THERMAL_TPEC_8:
 	case TPACPI_THERMAL_ACPI_TMP07:
 	case TPACPI_THERMAL_ACPI_UPDT:
-		sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+		sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj,
 				   &thermal_temp_input16_group);
 		break;
 	case TPACPI_THERMAL_NONE:
@@ -3626,7 +3695,7 @@ static struct device_attribute dev_attr_fan_fan1_input =
 	__ATTR(fan1_input, S_IRUGO,
 		fan_fan1_input_show, NULL);
 
-/* sysfs fan fan_watchdog (driver) ------------------------------------- */
+/* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */
 static ssize_t fan_fan_watchdog_show(struct device_driver *drv,
 				     char *buf)
 {
@@ -3768,10 +3837,10 @@ static int __init fan_init(struct ibm_init_struct *iibm)
 
 	if (fan_status_access_mode != TPACPI_FAN_NONE ||
 	    fan_control_access_mode != TPACPI_FAN_WR_NONE) {
-		rc = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+		rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
 					 &fan_attr_group);
 		if (!(rc < 0))
-			rc = driver_create_file(&tpacpi_pdriver.driver,
+			rc = driver_create_file(&tpacpi_hwmon_pdriver.driver,
 					&driver_attr_fan_watchdog);
 		if (rc < 0)
 			return rc;
@@ -3854,8 +3923,8 @@ static void fan_exit(void)
 	vdbg_printk(TPACPI_DBG_EXIT, "cancelling any pending fan watchdog tasks\n");
 
 	/* FIXME: can we really do this unconditionally? */
-	sysfs_remove_group(&tpacpi_pdev->dev.kobj, &fan_attr_group);
-	driver_remove_file(&tpacpi_pdriver.driver, &driver_attr_fan_watchdog);
+	sysfs_remove_group(&tpacpi_sensors_pdev->dev.kobj, &fan_attr_group);
+	driver_remove_file(&tpacpi_hwmon_pdriver.driver, &driver_attr_fan_watchdog);
 
 	cancel_delayed_work(&fan_watchdog_task);
 	flush_scheduled_work();
@@ -3888,6 +3957,9 @@ static void fan_watchdog_fire(struct work_struct *ignored)
 {
 	int rc;
 
+	if (tpacpi_lifecycle != TPACPI_LIFE_RUNNING)
+		return;
+
 	printk(IBM_NOTICE "fan watchdog: enabling fan\n");
 	rc = fan_set_enable();
 	if (rc < 0) {
@@ -3908,7 +3980,8 @@ static void fan_watchdog_reset(void)
 	if (fan_watchdog_active)
 		cancel_delayed_work(&fan_watchdog_task);
 
-	if (fan_watchdog_maxinterval > 0) {
+	if (fan_watchdog_maxinterval > 0 &&
+	    tpacpi_lifecycle != TPACPI_LIFE_EXITING) {
 		fan_watchdog_active = 1;
 		if (!schedule_delayed_work(&fan_watchdog_task,
 				msecs_to_jiffies(fan_watchdog_maxinterval
@@ -4302,6 +4375,19 @@ static struct ibm_struct fan_driver_data = {
  ****************************************************************************
  ****************************************************************************/
 
+/* sysfs name ---------------------------------------------------------- */
+static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", IBM_NAME);
+}
+
+static struct device_attribute dev_attr_thinkpad_acpi_pdev_name =
+	__ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL);
+
+/* --------------------------------------------------------------------- */
+
 /* /proc support */
 static struct proc_dir_entry *proc_dir;
 
@@ -4674,6 +4760,8 @@ static int __init thinkpad_acpi_module_init(void)
 {
 	int ret, i;
 
+	tpacpi_lifecycle = TPACPI_LIFE_INIT;
+
 	/* Parameter checking */
 	if (hotkey_report_mode > 2)
 		return -EINVAL;
@@ -4702,19 +4790,31 @@ static int __init thinkpad_acpi_module_init(void)
 
 	ret = platform_driver_register(&tpacpi_pdriver);
 	if (ret) {
-		printk(IBM_ERR "unable to register platform driver\n");
+		printk(IBM_ERR "unable to register main platform driver\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
 	tp_features.platform_drv_registered = 1;
 
+	ret = platform_driver_register(&tpacpi_hwmon_pdriver);
+	if (ret) {
+		printk(IBM_ERR "unable to register hwmon platform driver\n");
+		thinkpad_acpi_module_exit();
+		return ret;
+	}
+	tp_features.sensors_pdrv_registered = 1;
+
 	ret = tpacpi_create_driver_attributes(&tpacpi_pdriver.driver);
+	if (!ret) {
+		tp_features.platform_drv_attrs_registered = 1;
+		ret = tpacpi_create_driver_attributes(&tpacpi_hwmon_pdriver.driver);
+	}
 	if (ret) {
 		printk(IBM_ERR "unable to create sysfs driver attributes\n");
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
-	tp_features.platform_drv_attrs_registered = 1;
+	tp_features.sensors_pdrv_attrs_registered = 1;
 
 
 	/* Device initialization */
@@ -4727,7 +4827,26 @@ static int __init thinkpad_acpi_module_init(void)
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
-	tpacpi_hwmon = hwmon_device_register(&tpacpi_pdev->dev);
+	tpacpi_sensors_pdev = platform_device_register_simple(
+							IBM_HWMON_DRVR_NAME,
+							-1, NULL, 0);
+	if (IS_ERR(tpacpi_sensors_pdev)) {
+		ret = PTR_ERR(tpacpi_sensors_pdev);
+		tpacpi_sensors_pdev = NULL;
+		printk(IBM_ERR "unable to register hwmon platform device\n");
+		thinkpad_acpi_module_exit();
+		return ret;
+	}
+	ret = device_create_file(&tpacpi_sensors_pdev->dev,
+				 &dev_attr_thinkpad_acpi_pdev_name);
+	if (ret) {
+		printk(IBM_ERR
+			"unable to create sysfs hwmon device attributes\n");
+		thinkpad_acpi_module_exit();
+		return ret;
+	}
+	tp_features.sensors_pdev_attrs_registered = 1;
+	tpacpi_hwmon = hwmon_device_register(&tpacpi_sensors_pdev->dev);
 	if (IS_ERR(tpacpi_hwmon)) {
 		ret = PTR_ERR(tpacpi_hwmon);
 		tpacpi_hwmon = NULL;
@@ -4735,6 +4854,7 @@ static int __init thinkpad_acpi_module_init(void)
 		thinkpad_acpi_module_exit();
 		return ret;
 	}
+	mutex_init(&tpacpi_inputdev_send_mutex);
 	tpacpi_inputdev = input_allocate_device();
 	if (!tpacpi_inputdev) {
 		printk(IBM_ERR "unable to allocate input device\n");
@@ -4769,6 +4889,7 @@ static int __init thinkpad_acpi_module_init(void)
 		tp_features.input_device_registered = 1;
 	}
 
+	tpacpi_lifecycle = TPACPI_LIFE_RUNNING;
 	return 0;
 }
 
@@ -4776,6 +4897,8 @@ static void thinkpad_acpi_module_exit(void)
 {
 	struct ibm_struct *ibm, *itmp;
 
+	tpacpi_lifecycle = TPACPI_LIFE_EXITING;
+
 	list_for_each_entry_safe_reverse(ibm, itmp,
 					 &tpacpi_all_drivers,
 					 all_drivers) {
@@ -4794,12 +4917,22 @@ static void thinkpad_acpi_module_exit(void)
 	if (tpacpi_hwmon)
 		hwmon_device_unregister(tpacpi_hwmon);
 
+	if (tp_features.sensors_pdev_attrs_registered)
+		device_remove_file(&tpacpi_sensors_pdev->dev,
+				   &dev_attr_thinkpad_acpi_pdev_name);
+	if (tpacpi_sensors_pdev)
+		platform_device_unregister(tpacpi_sensors_pdev);
 	if (tpacpi_pdev)
 		platform_device_unregister(tpacpi_pdev);
 
+	if (tp_features.sensors_pdrv_attrs_registered)
+		tpacpi_remove_driver_attributes(&tpacpi_hwmon_pdriver.driver);
 	if (tp_features.platform_drv_attrs_registered)
 		tpacpi_remove_driver_attributes(&tpacpi_pdriver.driver);
 
+	if (tp_features.sensors_pdrv_registered)
+		platform_driver_unregister(&tpacpi_hwmon_pdriver);
+
 	if (tp_features.platform_drv_registered)
 		platform_driver_unregister(&tpacpi_pdriver);
 
diff --git a/drivers/misc/thinkpad_acpi.h b/drivers/misc/thinkpad_acpi.h
index acd5835ec88..3abcc812063 100644
--- a/drivers/misc/thinkpad_acpi.h
+++ b/drivers/misc/thinkpad_acpi.h
@@ -58,13 +58,14 @@
 
 #define IBM_NAME "thinkpad"
 #define IBM_DESC "ThinkPad ACPI Extras"
-#define IBM_FILE "thinkpad_acpi"
+#define IBM_FILE IBM_NAME "_acpi"
 #define IBM_URL "http://ibm-acpi.sf.net/"
 #define IBM_MAIL "ibm-acpi-devel@lists.sourceforge.net"
 
 #define IBM_PROC_DIR "ibm"
 #define IBM_ACPI_EVENT_PREFIX "ibm"
 #define IBM_DRVR_NAME IBM_FILE
+#define IBM_HWMON_DRVR_NAME IBM_NAME "_hwmon"
 
 #define IBM_LOG IBM_FILE ": "
 #define IBM_ERR	   KERN_ERR    IBM_LOG
@@ -171,6 +172,7 @@ static int parse_strtoul(const char *buf, unsigned long max,
 
 /* Device model */
 static struct platform_device *tpacpi_pdev;
+static struct platform_device *tpacpi_sensors_pdev;
 static struct device *tpacpi_hwmon;
 static struct platform_driver tpacpi_pdriver;
 static struct input_dev *tpacpi_inputdev;
@@ -233,22 +235,25 @@ struct ibm_init_struct {
 
 static struct {
 #ifdef CONFIG_THINKPAD_ACPI_BAY
-	u16 bay_status:1;
-	u16 bay_eject:1;
-	u16 bay_status2:1;
-	u16 bay_eject2:1;
+	u32 bay_status:1;
+	u32 bay_eject:1;
+	u32 bay_status2:1;
+	u32 bay_eject2:1;
 #endif
-	u16 bluetooth:1;
-	u16 hotkey:1;
-	u16 hotkey_mask:1;
-	u16 hotkey_wlsw:1;
-	u16 light:1;
-	u16 light_status:1;
-	u16 wan:1;
-	u16 fan_ctrl_status_undef:1;
-	u16 input_device_registered:1;
-	u16 platform_drv_registered:1;
-	u16 platform_drv_attrs_registered:1;
+	u32 bluetooth:1;
+	u32 hotkey:1;
+	u32 hotkey_mask:1;
+	u32 hotkey_wlsw:1;
+	u32 light:1;
+	u32 light_status:1;
+	u32 wan:1;
+	u32 fan_ctrl_status_undef:1;
+	u32 input_device_registered:1;
+	u32 platform_drv_registered:1;
+	u32 platform_drv_attrs_registered:1;
+	u32 sensors_pdrv_registered:1;
+	u32 sensors_pdrv_attrs_registered:1;
+	u32 sensors_pdev_attrs_registered:1;
 } tp_features;
 
 struct thinkpad_id_data {
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a4f1bf33164..6330c8cc72b 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1309,7 +1309,7 @@ static int ubi_thread(void *u)
 	struct ubi_device *ubi = u;
 
 	ubi_msg("background thread \"%s\" started, PID %d",
-		ubi->bgt_name, current->pid);
+		ubi->bgt_name, task_pid_nr(current));
 
 	set_freezable();
 	for (;;) {
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index 862f47223fd..6f8e7d4cf74 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -1491,7 +1491,7 @@ vortex_up(struct net_device *dev)
 	struct vortex_private *vp = netdev_priv(dev);
 	void __iomem *ioaddr = vp->ioaddr;
 	unsigned int config;
-	int i, mii_reg1, mii_reg5, err;
+	int i, mii_reg1, mii_reg5, err = 0;
 
 	if (VORTEX_PCI(vp)) {
 		pci_set_power_state(VORTEX_PCI(vp), PCI_D0);	/* Go active */
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 96cee4badd2..da767d3d5af 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -26,7 +26,7 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/dma-mapping.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <linux/delay.h>
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 7a045a37056..084f0292ea6 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -126,7 +126,7 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator);
 
 // ================= main 802.3ad protocol functions ==================
 static int ad_lacpdu_send(struct port *port);
-static int ad_marker_send(struct port *port, struct marker *marker);
+static int ad_marker_send(struct port *port, struct bond_marker *marker);
 static void ad_mux_machine(struct port *port);
 static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
 static void ad_tx_machine(struct port *port);
@@ -139,8 +139,8 @@ static void ad_initialize_port(struct port *port, int lacp_fast);
 static void ad_initialize_lacpdu(struct lacpdu *Lacpdu);
 static void ad_enable_collecting_distributing(struct port *port);
 static void ad_disable_collecting_distributing(struct port *port);
-static void ad_marker_info_received(struct marker *marker_info, struct port *port);
-static void ad_marker_response_received(struct marker *marker, struct port *port);
+static void ad_marker_info_received(struct bond_marker *marker_info, struct port *port);
+static void ad_marker_response_received(struct bond_marker *marker, struct port *port);
 
 
 /////////////////////////////////////////////////////////////////////////////////
@@ -889,12 +889,12 @@ static int ad_lacpdu_send(struct port *port)
  * Returns:   0 on success
  *          < 0 on error
  */
-static int ad_marker_send(struct port *port, struct marker *marker)
+static int ad_marker_send(struct port *port, struct bond_marker *marker)
 {
 	struct slave *slave = port->slave;
 	struct sk_buff *skb;
-	struct marker_header *marker_header;
-	int length = sizeof(struct marker_header);
+	struct bond_marker_header *marker_header;
+	int length = sizeof(struct bond_marker_header);
 	struct mac_addr lacpdu_multicast_address = AD_MULTICAST_LACPDU_ADDR;
 
 	skb = dev_alloc_skb(length + 16);
@@ -909,7 +909,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
 	skb->network_header = skb->mac_header + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
-	marker_header = (struct marker_header *)skb_put(skb, length);
+	marker_header = (struct bond_marker_header *)skb_put(skb, length);
 
 	marker_header->ad_header.destination_address = lacpdu_multicast_address;
 	/* Note: source addres is set to be the member's PERMANENT address, because we use it
@@ -1709,7 +1709,7 @@ static void ad_disable_collecting_distributing(struct port *port)
  */
 static void ad_marker_info_send(struct port *port)
 {
-	struct marker marker;
+	struct bond_marker marker;
 	u16 index;
 
 	// fill the marker PDU with the appropriate values
@@ -1742,13 +1742,14 @@ static void ad_marker_info_send(struct port *port)
  * @port: the port we're looking at
  *
  */
-static void ad_marker_info_received(struct marker *marker_info,struct port *port)
+static void ad_marker_info_received(struct bond_marker *marker_info,
+	struct port *port)
 {
-	struct marker marker;
+	struct bond_marker marker;
 
 	// copy the received marker data to the response marker
 	//marker = *marker_info;
-	memcpy(&marker, marker_info, sizeof(struct marker));
+	memcpy(&marker, marker_info, sizeof(struct bond_marker));
 	// change the marker subtype to marker response
 	marker.tlv_type=AD_MARKER_RESPONSE_SUBTYPE;
 	// send the marker response
@@ -1767,7 +1768,8 @@ static void ad_marker_info_received(struct marker *marker_info,struct port *port
  * response for marker PDU's, in this stage, but only to respond to marker
  * information.
  */
-static void ad_marker_response_received(struct marker *marker, struct port *port)
+static void ad_marker_response_received(struct bond_marker *marker,
+	struct port *port)
 {
 	marker=NULL; // just to satisfy the compiler
 	port=NULL;  // just to satisfy the compiler
@@ -2164,15 +2166,15 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
 		case AD_TYPE_MARKER:
 			// No need to convert fields to Little Endian since we don't use the marker's fields.
 
-			switch (((struct marker *)lacpdu)->tlv_type) {
+			switch (((struct bond_marker *)lacpdu)->tlv_type) {
 			case AD_MARKER_INFORMATION_SUBTYPE:
 				dprintk("Received Marker Information on port %d\n", port->actor_port_number);
-				ad_marker_info_received((struct marker *)lacpdu, port);
+				ad_marker_info_received((struct bond_marker *)lacpdu, port);
 				break;
 
 			case AD_MARKER_RESPONSE_SUBTYPE:
 				dprintk("Received Marker Response on port %d\n", port->actor_port_number);
-				ad_marker_response_received((struct marker *)lacpdu, port);
+				ad_marker_response_received((struct bond_marker *)lacpdu, port);
 				break;
 
 			default:
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index 862952fa6fd..f1655726494 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -92,7 +92,7 @@ typedef enum {
 typedef enum {
 	AD_MARKER_INFORMATION_SUBTYPE = 1, // marker imformation subtype
 	AD_MARKER_RESPONSE_SUBTYPE     // marker response subtype
-} marker_subtype_t;
+} bond_marker_subtype_t;
 
 // timers types(43.4.9 in the 802.3ad standard)
 typedef enum {
@@ -148,7 +148,7 @@ typedef struct lacpdu_header {
 } lacpdu_header_t;
 
 // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard)
-typedef struct marker {
+typedef struct bond_marker {
 	u8 subtype;		 //  = 0x02  (marker PDU)
 	u8 version_number;	 //  = 0x01
 	u8 tlv_type;		 //  = 0x01  (marker information)
@@ -161,12 +161,12 @@ typedef struct marker {
 	u8 tlv_type_terminator;	     //  = 0x00
 	u8 terminator_length;	     //  = 0x00
 	u8 reserved_90[90];	     //  = 0
-} marker_t;
+} bond_marker_t;
 
-typedef struct marker_header {
+typedef struct bond_marker_header {
 	struct ad_header ad_header;
-	struct marker marker;
-} marker_header_t;
+	struct bond_marker marker;
+} bond_marker_header_t;
 
 #pragma pack()
 
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 314b2f68f78..edd6828f0a7 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -234,6 +234,7 @@
 #include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/init.h>
+#include <linux/bitops.h>
 
 #include <linux/if.h>
 #include <linux/mii.h>
@@ -247,7 +248,6 @@
 #include <asm/irq.h>
 #include <asm/dma.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/ethernet.h>
 #include <asm/cache.h>
 
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 04426170338..2a3df145850 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -41,9 +41,9 @@
 #include <linux/timer.h>
 #include <linux/cache.h>
 #include <linux/mutex.h>
+#include <linux/bitops.h>
 #include "t3cdev.h"
 #include <asm/semaphore.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 
 typedef irqreturn_t(*intr_handler_t) (int, void *);
diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c
index 243fc6b354b..e3dd8b13690 100644
--- a/drivers/net/eth16i.c
+++ b/drivers/net/eth16i.c
@@ -170,7 +170,6 @@ static char *version =
 
 
 /* Few macros */
-#define BIT(a)		       ( (1 << (a)) )
 #define BITSET(ioaddr, bnum)   ((outb(((inb(ioaddr)) | (bnum)), ioaddr)))
 #define BITCLR(ioaddr, bnum)   ((outb(((inb(ioaddr)) & (~(bnum))), ioaddr)))
 
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index cfbb7aacfe9..70ddf1acfd8 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -992,7 +992,7 @@ static void nv_enable_irq(struct net_device *dev)
 		if (np->msi_flags & NV_MSI_X_ENABLED)
 			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
 		else
-			enable_irq(dev->irq);
+			enable_irq(np->pci_dev->irq);
 	} else {
 		enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 		enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
@@ -1008,7 +1008,7 @@ static void nv_disable_irq(struct net_device *dev)
 		if (np->msi_flags & NV_MSI_X_ENABLED)
 			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
 		else
-			disable_irq(dev->irq);
+			disable_irq(np->pci_dev->irq);
 	} else {
 		disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 		disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_TX].vector);
@@ -1607,7 +1607,7 @@ static void nv_do_rx_refill(unsigned long data)
 		if (np->msi_flags & NV_MSI_X_ENABLED)
 			disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
 		else
-			disable_irq(dev->irq);
+			disable_irq(np->pci_dev->irq);
 	} else {
 		disable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 	}
@@ -1625,7 +1625,7 @@ static void nv_do_rx_refill(unsigned long data)
 		if (np->msi_flags & NV_MSI_X_ENABLED)
 			enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
 		else
-			enable_irq(dev->irq);
+			enable_irq(np->pci_dev->irq);
 	} else {
 		enable_irq(np->msi_x_entry[NV_MSI_X_VECTOR_RX].vector);
 	}
@@ -2408,13 +2408,13 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
 	struct fe_priv *np = netdev_priv(dev);
 	u32 flags;
 	u32 vlanflags = 0;
-	u32 rx_processed_cnt = 0;
+	int rx_work = 0;
 	struct sk_buff *skb;
 	int len;
 
 	while((np->get_rx.ex != np->put_rx.ex) &&
 	      !((flags = le32_to_cpu(np->get_rx.ex->flaglen)) & NV_RX2_AVAIL) &&
-	      (rx_processed_cnt++ < limit)) {
+	      (rx_work < limit)) {
 
 		dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: flags 0x%x.\n",
 					dev->name, flags);
@@ -2517,9 +2517,11 @@ next_pkt:
 			np->get_rx.ex = np->first_rx.ex;
 		if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
 			np->get_rx_ctx = np->first_rx_ctx;
+
+		rx_work++;
 	}
 
-	return rx_processed_cnt;
+	return rx_work;
 }
 
 static void set_bufsize(struct net_device *dev)
@@ -3558,10 +3560,12 @@ static int nv_request_irq(struct net_device *dev, int intr_test)
 	if (ret != 0 && np->msi_flags & NV_MSI_CAPABLE) {
 		if ((ret = pci_enable_msi(np->pci_dev)) == 0) {
 			np->msi_flags |= NV_MSI_ENABLED;
+			dev->irq = np->pci_dev->irq;
 			if (request_irq(np->pci_dev->irq, handler, IRQF_SHARED, dev->name, dev) != 0) {
 				printk(KERN_INFO "forcedeth: request_irq failed %d\n", ret);
 				pci_disable_msi(np->pci_dev);
 				np->msi_flags &= ~NV_MSI_ENABLED;
+				dev->irq = np->pci_dev->irq;
 				goto out_err;
 			}
 
@@ -3624,7 +3628,7 @@ static void nv_do_nic_poll(unsigned long data)
 		if (np->msi_flags & NV_MSI_X_ENABLED)
 			disable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
 		else
-			disable_irq_lockdep(dev->irq);
+			disable_irq_lockdep(np->pci_dev->irq);
 		mask = np->irqmask;
 	} else {
 		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
@@ -3642,6 +3646,8 @@ static void nv_do_nic_poll(unsigned long data)
 	}
 	np->nic_poll_irq = 0;
 
+	/* disable_irq() contains synchronize_irq, thus no irq handler can run now */
+
 	if (np->recover_error) {
 		np->recover_error = 0;
 		printk(KERN_INFO "forcedeth: MAC in recoverable error state\n");
@@ -3678,7 +3684,6 @@ static void nv_do_nic_poll(unsigned long data)
 		}
 	}
 
-	/* FIXME: Do we need synchronize_irq(dev->irq) here? */
 
 	writel(mask, base + NvRegIrqMask);
 	pci_push(base);
@@ -3691,7 +3696,7 @@ static void nv_do_nic_poll(unsigned long data)
 		if (np->msi_flags & NV_MSI_X_ENABLED)
 			enable_irq_lockdep(np->msi_x_entry[NV_MSI_X_VECTOR_ALL].vector);
 		else
-			enable_irq_lockdep(dev->irq);
+			enable_irq_lockdep(np->pci_dev->irq);
 	} else {
 		if (np->nic_poll_irq & NVREG_IRQ_RX_ALL) {
 			nv_nic_irq_rx(0, dev);
@@ -4948,7 +4953,7 @@ static int nv_close(struct net_device *dev)
 #ifdef CONFIG_FORCEDETH_NAPI
 	napi_disable(&np->napi);
 #endif
-	synchronize_irq(dev->irq);
+	synchronize_irq(np->pci_dev->irq);
 
 	del_timer_sync(&np->oom_kick);
 	del_timer_sync(&np->nic_poll);
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 04c6faec88d..f2a4d399a6e 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -88,7 +88,7 @@ static void skb_align(struct sk_buff *skb, int align)
 static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
 {
 	struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, napi);
-	struct net_device *dev = to_net_dev(fep->dev);
+	struct net_device *dev = fep->ndev;
 	const struct fs_platform_info *fpi = fep->fpi;
 	cbd_t __iomem *bdp;
 	struct sk_buff *skb, *skbn, *skbt;
@@ -217,7 +217,7 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
 
 	fep->cur_rx = bdp;
 
-	if (received >= budget) {
+	if (received < budget) {
 		/* done */
 		netif_rx_complete(dev, napi);
 		(*fep->ops->napi_enable_rx)(dev);
@@ -807,20 +807,23 @@ static int fs_enet_open(struct net_device *dev)
 	int r;
 	int err;
 
-	napi_enable(&fep->napi);
+	if (fep->fpi->use_napi)
+		napi_enable(&fep->napi);
 
 	/* Install our interrupt handler. */
 	r = fs_request_irq(dev, fep->interrupt, "fs_enet-mac", fs_enet_interrupt);
 	if (r != 0) {
 		printk(KERN_ERR DRV_MODULE_NAME
 		       ": %s Could not allocate FS_ENET IRQ!", dev->name);
-		napi_disable(&fep->napi);
+		if (fep->fpi->use_napi)
+			napi_disable(&fep->napi);
 		return -EINVAL;
 	}
 
 	err = fs_init_phy(dev);
-	if(err) {
-		napi_disable(&fep->napi);
+	if (err) {
+		if (fep->fpi->use_napi)
+			napi_disable(&fep->napi);
 		return err;
 	}
 	phy_start(fep->phydev);
@@ -1232,7 +1235,7 @@ static int __devinit fs_enet_probe(struct of_device *ofdev,
 	fpi->rx_ring = 32;
 	fpi->tx_ring = 32;
 	fpi->rx_copybreak = 240;
-	fpi->use_napi = 0;
+	fpi->use_napi = 1;
 	fpi->napi_weight = 17;
 
 	ret = find_phy(ofdev->node, fpi);
@@ -1249,11 +1252,11 @@ static int __devinit fs_enet_probe(struct of_device *ofdev,
 		goto out_free_fpi;
 	}
 
-	SET_MODULE_OWNER(ndev);
 	dev_set_drvdata(&ofdev->dev, ndev);
 
 	fep = netdev_priv(ndev);
 	fep->dev = &ofdev->dev;
+	fep->ndev = ndev;
 	fep->fpi = fpi;
 	fep->ops = match->data;
 
@@ -1288,10 +1291,11 @@ static int __devinit fs_enet_probe(struct of_device *ofdev,
 	ndev->stop = fs_enet_close;
 	ndev->get_stats = fs_enet_get_stats;
 	ndev->set_multicast_list = fs_set_multicast_list;
-	if (fpi->use_napi) {
-		ndev->poll = fs_enet_rx_napi;
-		ndev->weight = fpi->napi_weight;
-	}
+
+	if (fpi->use_napi)
+		netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi,
+		               fpi->napi_weight);
+
 	ndev->ethtool_ops = &fs_ethtool_ops;
 	ndev->do_ioctl = fs_ioctl;
 
diff --git a/drivers/net/fs_enet/fs_enet.h b/drivers/net/fs_enet/fs_enet.h
index baf6477165a..c675e29aadc 100644
--- a/drivers/net/fs_enet/fs_enet.h
+++ b/drivers/net/fs_enet/fs_enet.h
@@ -75,6 +75,7 @@ struct phy_info {
 struct fs_enet_private {
 	struct napi_struct napi;
 	struct device *dev;	/* pointer back to the device (must be initialized first) */
+	struct net_device *ndev;
 	spinlock_t lock;	/* during all ops except TX pckt processing */
 	spinlock_t tx_lock;	/* during fs_start_xmit and fs_tx         */
 	struct fs_platform_info *fpi;
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index cc288d8f6a5..38268d7335a 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -956,10 +956,12 @@ static int gfar_enet_open(struct net_device *dev)
 	}
 
 	err = startup_gfar(dev);
-	if (err)
+	if (err) {
 #ifdef CONFIG_GFAR_NAPI
 		napi_disable(&priv->napi);
 #endif
+		return err;
+	}
 
 	netif_start_queue(dev);
 
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index c16cc8b946a..46cd7735e6f 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -749,7 +749,6 @@ struct gfar_private {
 	uint32_t msg_enable;
 
 	/* Network Statistics */
-	struct net_device_stats stats;
 	struct gfar_extra_stats extra_stats;
 };
 
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index bc02e469480..11b83dae00a 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -21,6 +21,7 @@
 
 
 #include <linux/module.h>
+#include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/if_arp.h>
@@ -35,7 +36,6 @@
 #include <linux/sockios.h>
 #include <linux/workqueue.h>
 #include <asm/atomic.h>
-#include <asm/bitops.h>
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/net/ibm_newemac/mal.c b/drivers/net/ibm_newemac/mal.c
index a680eb05ba6..9a88f71db00 100644
--- a/drivers/net/ibm_newemac/mal.c
+++ b/drivers/net/ibm_newemac/mal.c
@@ -322,7 +322,7 @@ void mal_poll_disable(struct mal_instance *mal, struct mal_commac *commac)
 		msleep(1);
 
 	/* Synchronize with the MAL NAPI poller */
-	__napi_synchronize(&mal->napi);
+	napi_synchronize(&mal->napi);
 }
 
 void mal_poll_enable(struct mal_instance *mal, struct mal_commac *commac)
diff --git a/drivers/net/mac89x0.c b/drivers/net/mac89x0.c
index 30854f09496..a19b5958cee 100644
--- a/drivers/net/mac89x0.c
+++ b/drivers/net/mac89x0.c
@@ -99,9 +99,9 @@ static char *version =
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
+#include <linux/bitops.h>
 
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/hwtest.h>
 #include <asm/macints.h>
diff --git a/drivers/net/meth.h b/drivers/net/meth.h
index ea3b8fc86d1..a78dc1ca8c2 100644
--- a/drivers/net/meth.h
+++ b/drivers/net/meth.h
@@ -28,9 +28,6 @@
 #define RX_BUFFER_OFFSET (sizeof(rx_status_vector)+2) /* staus vector + 2 bytes of padding */
 #define RX_BUCKET_SIZE 256
 
-#undef BIT
-#define BIT(x)	(1UL << (x))
-
 /* For more detailed explanations of what each field menas,
    see Nick's great comments to #defines below (or docs, if
    you are lucky enough toget hold of them :)*/
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 5f994b5beda..ff92aca0a7b 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -282,7 +282,6 @@ struct pcnet32_private {
 
 	struct net_device	*dev;
 	struct napi_struct	napi;
-	struct net_device_stats	stats;
 	char			tx_full;
 	char			phycount;	/* number of phys found */
 	int			options;
@@ -442,7 +441,9 @@ static struct pcnet32_access pcnet32_dwio = {
 
 static void pcnet32_netif_stop(struct net_device *dev)
 {
+#ifdef CONFIG_PCNET32_NAPI
 	struct pcnet32_private *lp = netdev_priv(dev);
+#endif
 	dev->trans_start = jiffies;
 #ifdef CONFIG_PCNET32_NAPI
 	napi_disable(&lp->napi);
@@ -452,7 +453,9 @@ static void pcnet32_netif_stop(struct net_device *dev)
 
 static void pcnet32_netif_start(struct net_device *dev)
 {
+#ifdef CONFIG_PCNET32_NAPI
 	struct pcnet32_private *lp = netdev_priv(dev);
+#endif
 	netif_wake_queue(dev);
 #ifdef CONFIG_PCNET32_NAPI
 	napi_enable(&lp->napi);
@@ -1178,15 +1181,15 @@ static void pcnet32_rx_entry(struct net_device *dev,
 		 * buffers, with only the last correctly noting the error.
 		 */
 		if (status & 0x01)	/* Only count a general error at the */
-			lp->stats.rx_errors++;	/* end of a packet. */
+			dev->stats.rx_errors++;	/* end of a packet. */
 		if (status & 0x20)
-			lp->stats.rx_frame_errors++;
+			dev->stats.rx_frame_errors++;
 		if (status & 0x10)
-			lp->stats.rx_over_errors++;
+			dev->stats.rx_over_errors++;
 		if (status & 0x08)
-			lp->stats.rx_crc_errors++;
+			dev->stats.rx_crc_errors++;
 		if (status & 0x04)
-			lp->stats.rx_fifo_errors++;
+			dev->stats.rx_fifo_errors++;
 		return;
 	}
 
@@ -1197,13 +1200,13 @@ static void pcnet32_rx_entry(struct net_device *dev,
 		if (netif_msg_drv(lp))
 			printk(KERN_ERR "%s: Impossible packet size %d!\n",
 			       dev->name, pkt_len);
-		lp->stats.rx_errors++;
+		dev->stats.rx_errors++;
 		return;
 	}
 	if (pkt_len < 60) {
 		if (netif_msg_rx_err(lp))
 			printk(KERN_ERR "%s: Runt packet!\n", dev->name);
-		lp->stats.rx_errors++;
+		dev->stats.rx_errors++;
 		return;
 	}
 
@@ -1237,7 +1240,7 @@ static void pcnet32_rx_entry(struct net_device *dev,
 			printk(KERN_ERR
 			       "%s: Memory squeeze, dropping packet.\n",
 			       dev->name);
-		lp->stats.rx_dropped++;
+		dev->stats.rx_dropped++;
 		return;
 	}
 	skb->dev = dev;
@@ -1256,7 +1259,7 @@ static void pcnet32_rx_entry(struct net_device *dev,
 					       pkt_len,
 					       PCI_DMA_FROMDEVICE);
 	}
-	lp->stats.rx_bytes += skb->len;
+	dev->stats.rx_bytes += skb->len;
 	skb->protocol = eth_type_trans(skb, dev);
 #ifdef CONFIG_PCNET32_NAPI
 	netif_receive_skb(skb);
@@ -1264,7 +1267,7 @@ static void pcnet32_rx_entry(struct net_device *dev,
 	netif_rx(skb);
 #endif
 	dev->last_rx = jiffies;
-	lp->stats.rx_packets++;
+	dev->stats.rx_packets++;
 	return;
 }
 
@@ -1312,21 +1315,21 @@ static int pcnet32_tx(struct net_device *dev)
 		if (status & 0x4000) {
 			/* There was a major error, log it. */
 			int err_status = le32_to_cpu(lp->tx_ring[entry].misc);
-			lp->stats.tx_errors++;
+			dev->stats.tx_errors++;
 			if (netif_msg_tx_err(lp))
 				printk(KERN_ERR
 				       "%s: Tx error status=%04x err_status=%08x\n",
 				       dev->name, status,
 				       err_status);
 			if (err_status & 0x04000000)
-				lp->stats.tx_aborted_errors++;
+				dev->stats.tx_aborted_errors++;
 			if (err_status & 0x08000000)
-				lp->stats.tx_carrier_errors++;
+				dev->stats.tx_carrier_errors++;
 			if (err_status & 0x10000000)
-				lp->stats.tx_window_errors++;
+				dev->stats.tx_window_errors++;
 #ifndef DO_DXSUFLO
 			if (err_status & 0x40000000) {
-				lp->stats.tx_fifo_errors++;
+				dev->stats.tx_fifo_errors++;
 				/* Ackk!  On FIFO errors the Tx unit is turned off! */
 				/* Remove this verbosity later! */
 				if (netif_msg_tx_err(lp))
@@ -1337,7 +1340,7 @@ static int pcnet32_tx(struct net_device *dev)
 			}
 #else
 			if (err_status & 0x40000000) {
-				lp->stats.tx_fifo_errors++;
+				dev->stats.tx_fifo_errors++;
 				if (!lp->dxsuflo) {	/* If controller doesn't recover ... */
 					/* Ackk!  On FIFO errors the Tx unit is turned off! */
 					/* Remove this verbosity later! */
@@ -1351,8 +1354,8 @@ static int pcnet32_tx(struct net_device *dev)
 #endif
 		} else {
 			if (status & 0x1800)
-				lp->stats.collisions++;
-			lp->stats.tx_packets++;
+				dev->stats.collisions++;
+			dev->stats.tx_packets++;
 		}
 
 		/* We must free the original skb */
@@ -1849,6 +1852,9 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
 	lp->mii_if.mdio_read = mdio_read;
 	lp->mii_if.mdio_write = mdio_write;
 
+	/* napi.weight is used in both the napi and non-napi cases */
+	lp->napi.weight = lp->rx_ring_size / 2;
+
 #ifdef CONFIG_PCNET32_NAPI
 	netif_napi_add(dev, &lp->napi, pcnet32_poll, lp->rx_ring_size / 2);
 #endif
@@ -2471,7 +2477,7 @@ static void pcnet32_tx_timeout(struct net_device *dev)
 		       "%s: transmit timed out, status %4.4x, resetting.\n",
 		       dev->name, lp->a.read_csr(ioaddr, CSR0));
 	lp->a.write_csr(ioaddr, CSR0, CSR0_STOP);
-	lp->stats.tx_errors++;
+	dev->stats.tx_errors++;
 	if (netif_msg_tx_err(lp)) {
 		int i;
 		printk(KERN_DEBUG
@@ -2541,7 +2547,7 @@ static int pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	lp->tx_ring[entry].status = cpu_to_le16(status);
 
 	lp->cur_tx++;
-	lp->stats.tx_bytes += skb->len;
+	dev->stats.tx_bytes += skb->len;
 
 	/* Trigger an immediate send poll. */
 	lp->a.write_csr(ioaddr, CSR0, CSR0_INTEN | CSR0_TXPOLL);
@@ -2586,7 +2592,7 @@ pcnet32_interrupt(int irq, void *dev_id)
 
 		/* Log misc errors. */
 		if (csr0 & 0x4000)
-			lp->stats.tx_errors++;	/* Tx babble. */
+			dev->stats.tx_errors++;	/* Tx babble. */
 		if (csr0 & 0x1000) {
 			/*
 			 * This happens when our receive ring is full. This
@@ -2599,7 +2605,7 @@ pcnet32_interrupt(int irq, void *dev_id)
 			 * don't get a rx interrupt, but a missed frame
 			 * interrupt sooner or later.
 			 */
-			lp->stats.rx_errors++;	/* Missed a Rx frame. */
+			dev->stats.rx_errors++;	/* Missed a Rx frame. */
 		}
 		if (csr0 & 0x0800) {
 			if (netif_msg_drv(lp))
@@ -2661,7 +2667,7 @@ static int pcnet32_close(struct net_device *dev)
 
 	spin_lock_irqsave(&lp->lock, flags);
 
-	lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112);
+	dev->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112);
 
 	if (netif_msg_ifdown(lp))
 		printk(KERN_DEBUG
@@ -2698,10 +2704,10 @@ static struct net_device_stats *pcnet32_get_stats(struct net_device *dev)
 	unsigned long flags;
 
 	spin_lock_irqsave(&lp->lock, flags);
-	lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112);
+	dev->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112);
 	spin_unlock_irqrestore(&lp->lock, flags);
 
-	return &lp->stats;
+	return &dev->stats;
 }
 
 /* taken from the sunlance driver, which it took from the depca driver */
diff --git a/drivers/net/s2io-regs.h b/drivers/net/s2io-regs.h
index aef66e2d98d..01f08d726ac 100644
--- a/drivers/net/s2io-regs.h
+++ b/drivers/net/s2io-regs.h
@@ -20,17 +20,17 @@ struct XENA_dev_config {
 
 /* General Control-Status Registers */
 	u64 general_int_status;
-#define GEN_INTR_TXPIC             BIT(0)
-#define GEN_INTR_TXDMA             BIT(1)
-#define GEN_INTR_TXMAC             BIT(2)
-#define GEN_INTR_TXXGXS            BIT(3)
-#define GEN_INTR_TXTRAFFIC         BIT(8)
-#define GEN_INTR_RXPIC             BIT(32)
-#define GEN_INTR_RXDMA             BIT(33)
-#define GEN_INTR_RXMAC             BIT(34)
-#define GEN_INTR_MC                BIT(35)
-#define GEN_INTR_RXXGXS            BIT(36)
-#define GEN_INTR_RXTRAFFIC         BIT(40)
+#define GEN_INTR_TXPIC             s2BIT(0)
+#define GEN_INTR_TXDMA             s2BIT(1)
+#define GEN_INTR_TXMAC             s2BIT(2)
+#define GEN_INTR_TXXGXS            s2BIT(3)
+#define GEN_INTR_TXTRAFFIC         s2BIT(8)
+#define GEN_INTR_RXPIC             s2BIT(32)
+#define GEN_INTR_RXDMA             s2BIT(33)
+#define GEN_INTR_RXMAC             s2BIT(34)
+#define GEN_INTR_MC                s2BIT(35)
+#define GEN_INTR_RXXGXS            s2BIT(36)
+#define GEN_INTR_RXTRAFFIC         s2BIT(40)
 #define GEN_ERROR_INTR             GEN_INTR_TXPIC | GEN_INTR_RXPIC | \
                                    GEN_INTR_TXDMA | GEN_INTR_RXDMA | \
                                    GEN_INTR_TXMAC | GEN_INTR_RXMAC | \
@@ -54,36 +54,36 @@ struct XENA_dev_config {
 
 
 	u64 adapter_status;
-#define ADAPTER_STATUS_TDMA_READY          BIT(0)
-#define ADAPTER_STATUS_RDMA_READY          BIT(1)
-#define ADAPTER_STATUS_PFC_READY           BIT(2)
-#define ADAPTER_STATUS_TMAC_BUF_EMPTY      BIT(3)
-#define ADAPTER_STATUS_PIC_QUIESCENT       BIT(5)
-#define ADAPTER_STATUS_RMAC_REMOTE_FAULT   BIT(6)
-#define ADAPTER_STATUS_RMAC_LOCAL_FAULT    BIT(7)
+#define ADAPTER_STATUS_TDMA_READY          s2BIT(0)
+#define ADAPTER_STATUS_RDMA_READY          s2BIT(1)
+#define ADAPTER_STATUS_PFC_READY           s2BIT(2)
+#define ADAPTER_STATUS_TMAC_BUF_EMPTY      s2BIT(3)
+#define ADAPTER_STATUS_PIC_QUIESCENT       s2BIT(5)
+#define ADAPTER_STATUS_RMAC_REMOTE_FAULT   s2BIT(6)
+#define ADAPTER_STATUS_RMAC_LOCAL_FAULT    s2BIT(7)
 #define ADAPTER_STATUS_RMAC_PCC_IDLE       vBIT(0xFF,8,8)
 #define ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE  vBIT(0x0F,8,8)
 #define ADAPTER_STATUS_RC_PRC_QUIESCENT    vBIT(0xFF,16,8)
-#define ADAPTER_STATUS_MC_DRAM_READY       BIT(24)
-#define ADAPTER_STATUS_MC_QUEUES_READY     BIT(25)
-#define ADAPTER_STATUS_M_PLL_LOCK          BIT(30)
-#define ADAPTER_STATUS_P_PLL_LOCK          BIT(31)
+#define ADAPTER_STATUS_MC_DRAM_READY       s2BIT(24)
+#define ADAPTER_STATUS_MC_QUEUES_READY     s2BIT(25)
+#define ADAPTER_STATUS_M_PLL_LOCK          s2BIT(30)
+#define ADAPTER_STATUS_P_PLL_LOCK          s2BIT(31)
 
 	u64 adapter_control;
-#define ADAPTER_CNTL_EN                    BIT(7)
-#define ADAPTER_EOI_TX_ON                  BIT(15)
-#define ADAPTER_LED_ON                     BIT(23)
+#define ADAPTER_CNTL_EN                    s2BIT(7)
+#define ADAPTER_EOI_TX_ON                  s2BIT(15)
+#define ADAPTER_LED_ON                     s2BIT(23)
 #define ADAPTER_UDPI(val)                  vBIT(val,36,4)
-#define ADAPTER_WAIT_INT                   BIT(48)
-#define ADAPTER_ECC_EN                     BIT(55)
+#define ADAPTER_WAIT_INT                   s2BIT(48)
+#define ADAPTER_ECC_EN                     s2BIT(55)
 
 	u64 serr_source;
-#define SERR_SOURCE_PIC			BIT(0)
-#define SERR_SOURCE_TXDMA		BIT(1)
-#define SERR_SOURCE_RXDMA		BIT(2)
-#define SERR_SOURCE_MAC                 BIT(3)
-#define SERR_SOURCE_MC                  BIT(4)
-#define SERR_SOURCE_XGXS                BIT(5)
+#define SERR_SOURCE_PIC			s2BIT(0)
+#define SERR_SOURCE_TXDMA		s2BIT(1)
+#define SERR_SOURCE_RXDMA		s2BIT(2)
+#define SERR_SOURCE_MAC                 s2BIT(3)
+#define SERR_SOURCE_MC                  s2BIT(4)
+#define SERR_SOURCE_XGXS                s2BIT(5)
 #define	SERR_SOURCE_ANY			(SERR_SOURCE_PIC	| \
 					SERR_SOURCE_TXDMA	| \
 					SERR_SOURCE_RXDMA	| \
@@ -101,41 +101,41 @@ struct XENA_dev_config {
 #define	PCI_MODE_PCIX_M2_66		0x5
 #define	PCI_MODE_PCIX_M2_100		0x6
 #define	PCI_MODE_PCIX_M2_133		0x7
-#define	PCI_MODE_UNSUPPORTED		BIT(0)
-#define	PCI_MODE_32_BITS		BIT(8)
-#define	PCI_MODE_UNKNOWN_MODE		BIT(9)
+#define	PCI_MODE_UNSUPPORTED		s2BIT(0)
+#define	PCI_MODE_32_BITS		s2BIT(8)
+#define	PCI_MODE_UNKNOWN_MODE		s2BIT(9)
 
 	u8 unused_0[0x800 - 0x128];
 
 /* PCI-X Controller registers */
 	u64 pic_int_status;
 	u64 pic_int_mask;
-#define PIC_INT_TX                     BIT(0)
-#define PIC_INT_FLSH                   BIT(1)
-#define PIC_INT_MDIO                   BIT(2)
-#define PIC_INT_IIC                    BIT(3)
-#define PIC_INT_GPIO                   BIT(4)
-#define PIC_INT_RX                     BIT(32)
+#define PIC_INT_TX                     s2BIT(0)
+#define PIC_INT_FLSH                   s2BIT(1)
+#define PIC_INT_MDIO                   s2BIT(2)
+#define PIC_INT_IIC                    s2BIT(3)
+#define PIC_INT_GPIO                   s2BIT(4)
+#define PIC_INT_RX                     s2BIT(32)
 
 	u64 txpic_int_reg;
 	u64 txpic_int_mask;
-#define PCIX_INT_REG_ECC_SG_ERR                BIT(0)
-#define PCIX_INT_REG_ECC_DB_ERR                BIT(1)
-#define PCIX_INT_REG_FLASHR_R_FSM_ERR          BIT(8)
-#define PCIX_INT_REG_FLASHR_W_FSM_ERR          BIT(9)
-#define PCIX_INT_REG_INI_TX_FSM_SERR           BIT(10)
-#define PCIX_INT_REG_INI_TXO_FSM_ERR           BIT(11)
-#define PCIX_INT_REG_TRT_FSM_SERR              BIT(13)
-#define PCIX_INT_REG_SRT_FSM_SERR              BIT(14)
-#define PCIX_INT_REG_PIFR_FSM_SERR             BIT(15)
-#define PCIX_INT_REG_WRC_TX_SEND_FSM_SERR      BIT(21)
-#define PCIX_INT_REG_RRC_TX_REQ_FSM_SERR       BIT(23)
-#define PCIX_INT_REG_INI_RX_FSM_SERR           BIT(48)
-#define PCIX_INT_REG_RA_RX_FSM_SERR            BIT(50)
+#define PCIX_INT_REG_ECC_SG_ERR                s2BIT(0)
+#define PCIX_INT_REG_ECC_DB_ERR                s2BIT(1)
+#define PCIX_INT_REG_FLASHR_R_FSM_ERR          s2BIT(8)
+#define PCIX_INT_REG_FLASHR_W_FSM_ERR          s2BIT(9)
+#define PCIX_INT_REG_INI_TX_FSM_SERR           s2BIT(10)
+#define PCIX_INT_REG_INI_TXO_FSM_ERR           s2BIT(11)
+#define PCIX_INT_REG_TRT_FSM_SERR              s2BIT(13)
+#define PCIX_INT_REG_SRT_FSM_SERR              s2BIT(14)
+#define PCIX_INT_REG_PIFR_FSM_SERR             s2BIT(15)
+#define PCIX_INT_REG_WRC_TX_SEND_FSM_SERR      s2BIT(21)
+#define PCIX_INT_REG_RRC_TX_REQ_FSM_SERR       s2BIT(23)
+#define PCIX_INT_REG_INI_RX_FSM_SERR           s2BIT(48)
+#define PCIX_INT_REG_RA_RX_FSM_SERR            s2BIT(50)
 /*
-#define PCIX_INT_REG_WRC_RX_SEND_FSM_SERR      BIT(52)
-#define PCIX_INT_REG_RRC_RX_REQ_FSM_SERR       BIT(54)
-#define PCIX_INT_REG_RRC_RX_SPLIT_FSM_SERR     BIT(58)
+#define PCIX_INT_REG_WRC_RX_SEND_FSM_SERR      s2BIT(52)
+#define PCIX_INT_REG_RRC_RX_REQ_FSM_SERR       s2BIT(54)
+#define PCIX_INT_REG_RRC_RX_SPLIT_FSM_SERR     s2BIT(58)
 */
 	u64 txpic_alarms;
 	u64 rxpic_int_reg;
@@ -144,92 +144,92 @@ struct XENA_dev_config {
 
 	u64 flsh_int_reg;
 	u64 flsh_int_mask;
-#define PIC_FLSH_INT_REG_CYCLE_FSM_ERR         BIT(63)
-#define PIC_FLSH_INT_REG_ERR                   BIT(62)
+#define PIC_FLSH_INT_REG_CYCLE_FSM_ERR         s2BIT(63)
+#define PIC_FLSH_INT_REG_ERR                   s2BIT(62)
 	u64 flash_alarms;
 
 	u64 mdio_int_reg;
 	u64 mdio_int_mask;
-#define MDIO_INT_REG_MDIO_BUS_ERR              BIT(0)
-#define MDIO_INT_REG_DTX_BUS_ERR               BIT(8)
-#define MDIO_INT_REG_LASI                      BIT(39)
+#define MDIO_INT_REG_MDIO_BUS_ERR              s2BIT(0)
+#define MDIO_INT_REG_DTX_BUS_ERR               s2BIT(8)
+#define MDIO_INT_REG_LASI                      s2BIT(39)
 	u64 mdio_alarms;
 
 	u64 iic_int_reg;
 	u64 iic_int_mask;
-#define IIC_INT_REG_BUS_FSM_ERR                BIT(4)
-#define IIC_INT_REG_BIT_FSM_ERR                BIT(5)
-#define IIC_INT_REG_CYCLE_FSM_ERR              BIT(6)
-#define IIC_INT_REG_REQ_FSM_ERR                BIT(7)
-#define IIC_INT_REG_ACK_ERR                    BIT(8)
+#define IIC_INT_REG_BUS_FSM_ERR                s2BIT(4)
+#define IIC_INT_REG_BIT_FSM_ERR                s2BIT(5)
+#define IIC_INT_REG_CYCLE_FSM_ERR              s2BIT(6)
+#define IIC_INT_REG_REQ_FSM_ERR                s2BIT(7)
+#define IIC_INT_REG_ACK_ERR                    s2BIT(8)
 	u64 iic_alarms;
 
 	u8 unused4[0x08];
 
 	u64 gpio_int_reg;
-#define GPIO_INT_REG_DP_ERR_INT                BIT(0)
-#define GPIO_INT_REG_LINK_DOWN                 BIT(1)
-#define GPIO_INT_REG_LINK_UP                   BIT(2)
+#define GPIO_INT_REG_DP_ERR_INT                s2BIT(0)
+#define GPIO_INT_REG_LINK_DOWN                 s2BIT(1)
+#define GPIO_INT_REG_LINK_UP                   s2BIT(2)
 	u64 gpio_int_mask;
-#define GPIO_INT_MASK_LINK_DOWN                BIT(1)
-#define GPIO_INT_MASK_LINK_UP                  BIT(2)
+#define GPIO_INT_MASK_LINK_DOWN                s2BIT(1)
+#define GPIO_INT_MASK_LINK_UP                  s2BIT(2)
 	u64 gpio_alarms;
 
 	u8 unused5[0x38];
 
 	u64 tx_traffic_int;
-#define TX_TRAFFIC_INT_n(n)                    BIT(n)
+#define TX_TRAFFIC_INT_n(n)                    s2BIT(n)
 	u64 tx_traffic_mask;
 
 	u64 rx_traffic_int;
-#define RX_TRAFFIC_INT_n(n)                    BIT(n)
+#define RX_TRAFFIC_INT_n(n)                    s2BIT(n)
 	u64 rx_traffic_mask;
 
 /* PIC Control registers */
 	u64 pic_control;
-#define PIC_CNTL_RX_ALARM_MAP_1                BIT(0)
+#define PIC_CNTL_RX_ALARM_MAP_1                s2BIT(0)
 #define PIC_CNTL_SHARED_SPLITS(n)              vBIT(n,11,5)
 
 	u64 swapper_ctrl;
-#define SWAPPER_CTRL_PIF_R_FE                  BIT(0)
-#define SWAPPER_CTRL_PIF_R_SE                  BIT(1)
-#define SWAPPER_CTRL_PIF_W_FE                  BIT(8)
-#define SWAPPER_CTRL_PIF_W_SE                  BIT(9)
-#define SWAPPER_CTRL_TXP_FE                    BIT(16)
-#define SWAPPER_CTRL_TXP_SE                    BIT(17)
-#define SWAPPER_CTRL_TXD_R_FE                  BIT(18)
-#define SWAPPER_CTRL_TXD_R_SE                  BIT(19)
-#define SWAPPER_CTRL_TXD_W_FE                  BIT(20)
-#define SWAPPER_CTRL_TXD_W_SE                  BIT(21)
-#define SWAPPER_CTRL_TXF_R_FE                  BIT(22)
-#define SWAPPER_CTRL_TXF_R_SE                  BIT(23)
-#define SWAPPER_CTRL_RXD_R_FE                  BIT(32)
-#define SWAPPER_CTRL_RXD_R_SE                  BIT(33)
-#define SWAPPER_CTRL_RXD_W_FE                  BIT(34)
-#define SWAPPER_CTRL_RXD_W_SE                  BIT(35)
-#define SWAPPER_CTRL_RXF_W_FE                  BIT(36)
-#define SWAPPER_CTRL_RXF_W_SE                  BIT(37)
-#define SWAPPER_CTRL_XMSI_FE                   BIT(40)
-#define SWAPPER_CTRL_XMSI_SE                   BIT(41)
-#define SWAPPER_CTRL_STATS_FE                  BIT(48)
-#define SWAPPER_CTRL_STATS_SE                  BIT(49)
+#define SWAPPER_CTRL_PIF_R_FE                  s2BIT(0)
+#define SWAPPER_CTRL_PIF_R_SE                  s2BIT(1)
+#define SWAPPER_CTRL_PIF_W_FE                  s2BIT(8)
+#define SWAPPER_CTRL_PIF_W_SE                  s2BIT(9)
+#define SWAPPER_CTRL_TXP_FE                    s2BIT(16)
+#define SWAPPER_CTRL_TXP_SE                    s2BIT(17)
+#define SWAPPER_CTRL_TXD_R_FE                  s2BIT(18)
+#define SWAPPER_CTRL_TXD_R_SE                  s2BIT(19)
+#define SWAPPER_CTRL_TXD_W_FE                  s2BIT(20)
+#define SWAPPER_CTRL_TXD_W_SE                  s2BIT(21)
+#define SWAPPER_CTRL_TXF_R_FE                  s2BIT(22)
+#define SWAPPER_CTRL_TXF_R_SE                  s2BIT(23)
+#define SWAPPER_CTRL_RXD_R_FE                  s2BIT(32)
+#define SWAPPER_CTRL_RXD_R_SE                  s2BIT(33)
+#define SWAPPER_CTRL_RXD_W_FE                  s2BIT(34)
+#define SWAPPER_CTRL_RXD_W_SE                  s2BIT(35)
+#define SWAPPER_CTRL_RXF_W_FE                  s2BIT(36)
+#define SWAPPER_CTRL_RXF_W_SE                  s2BIT(37)
+#define SWAPPER_CTRL_XMSI_FE                   s2BIT(40)
+#define SWAPPER_CTRL_XMSI_SE                   s2BIT(41)
+#define SWAPPER_CTRL_STATS_FE                  s2BIT(48)
+#define SWAPPER_CTRL_STATS_SE                  s2BIT(49)
 
 	u64 pif_rd_swapper_fb;
 #define IF_RD_SWAPPER_FB                            0x0123456789ABCDEF
 
 	u64 scheduled_int_ctrl;
-#define SCHED_INT_CTRL_TIMER_EN                BIT(0)
-#define SCHED_INT_CTRL_ONE_SHOT                BIT(1)
+#define SCHED_INT_CTRL_TIMER_EN                s2BIT(0)
+#define SCHED_INT_CTRL_ONE_SHOT                s2BIT(1)
 #define SCHED_INT_CTRL_INT2MSI(val)		vBIT(val,10,6)
 #define SCHED_INT_PERIOD                       TBD
 
 	u64 txreqtimeout;
 #define TXREQTO_VAL(val)						vBIT(val,0,32)
-#define TXREQTO_EN								BIT(63)
+#define TXREQTO_EN								s2BIT(63)
 
 	u64 statsreqtimeout;
 #define STATREQTO_VAL(n)                       TBD
-#define STATREQTO_EN                           BIT(63)
+#define STATREQTO_EN                           s2BIT(63)
 
 	u64 read_retry_delay;
 	u64 read_retry_acceleration;
@@ -255,10 +255,10 @@ struct XENA_dev_config {
 
 	/* Automated statistics collection */
 	u64 stat_cfg;
-#define STAT_CFG_STAT_EN           BIT(0)
-#define STAT_CFG_ONE_SHOT_EN       BIT(1)
-#define STAT_CFG_STAT_NS_EN        BIT(8)
-#define STAT_CFG_STAT_RO           BIT(9)
+#define STAT_CFG_STAT_EN           s2BIT(0)
+#define STAT_CFG_ONE_SHOT_EN       s2BIT(1)
+#define STAT_CFG_STAT_NS_EN        s2BIT(8)
+#define STAT_CFG_STAT_RO           s2BIT(9)
 #define STAT_TRSF_PER(n)           TBD
 #define	PER_SEC					   0x208d5
 #define	SET_UPDT_PERIOD(n)		   vBIT((PER_SEC*n),32,32)
@@ -290,18 +290,18 @@ struct XENA_dev_config {
 #define	I2C_CONTROL_DEV_ID(id)		vBIT(id,1,3)
 #define	I2C_CONTROL_ADDR(addr)		vBIT(addr,5,11)
 #define	I2C_CONTROL_BYTE_CNT(cnt)	vBIT(cnt,22,2)
-#define	I2C_CONTROL_READ			BIT(24)
-#define	I2C_CONTROL_NACK			BIT(25)
+#define	I2C_CONTROL_READ			s2BIT(24)
+#define	I2C_CONTROL_NACK			s2BIT(25)
 #define	I2C_CONTROL_CNTL_START		vBIT(0xE,28,4)
 #define	I2C_CONTROL_CNTL_END(val)	(val & vBIT(0x1,28,4))
 #define	I2C_CONTROL_GET_DATA(val)	(u32)(val & 0xFFFFFFFF)
 #define	I2C_CONTROL_SET_DATA(val)	vBIT(val,32,32)
 
 	u64 gpio_control;
-#define GPIO_CTRL_GPIO_0		BIT(8)
+#define GPIO_CTRL_GPIO_0		s2BIT(8)
 	u64 misc_control;
-#define FAULT_BEHAVIOUR			BIT(0)
-#define EXT_REQ_EN			BIT(1)
+#define FAULT_BEHAVIOUR			s2BIT(0)
+#define EXT_REQ_EN			s2BIT(1)
 #define MISC_LINK_STABILITY_PRD(val)   vBIT(val,29,3)
 
 	u8 unused7_1[0x230 - 0x208];
@@ -317,29 +317,29 @@ struct XENA_dev_config {
 /* TxDMA registers */
 	u64 txdma_int_status;
 	u64 txdma_int_mask;
-#define TXDMA_PFC_INT                  BIT(0)
-#define TXDMA_TDA_INT                  BIT(1)
-#define TXDMA_PCC_INT                  BIT(2)
-#define TXDMA_TTI_INT                  BIT(3)
-#define TXDMA_LSO_INT                  BIT(4)
-#define TXDMA_TPA_INT                  BIT(5)
-#define TXDMA_SM_INT                   BIT(6)
+#define TXDMA_PFC_INT                  s2BIT(0)
+#define TXDMA_TDA_INT                  s2BIT(1)
+#define TXDMA_PCC_INT                  s2BIT(2)
+#define TXDMA_TTI_INT                  s2BIT(3)
+#define TXDMA_LSO_INT                  s2BIT(4)
+#define TXDMA_TPA_INT                  s2BIT(5)
+#define TXDMA_SM_INT                   s2BIT(6)
 	u64 pfc_err_reg;
-#define PFC_ECC_SG_ERR			BIT(7)
-#define PFC_ECC_DB_ERR			BIT(15)
-#define PFC_SM_ERR_ALARM		BIT(23)
-#define PFC_MISC_0_ERR			BIT(31)
-#define PFC_MISC_1_ERR			BIT(32)
-#define PFC_PCIX_ERR			BIT(39)
+#define PFC_ECC_SG_ERR			s2BIT(7)
+#define PFC_ECC_DB_ERR			s2BIT(15)
+#define PFC_SM_ERR_ALARM		s2BIT(23)
+#define PFC_MISC_0_ERR			s2BIT(31)
+#define PFC_MISC_1_ERR			s2BIT(32)
+#define PFC_PCIX_ERR			s2BIT(39)
 	u64 pfc_err_mask;
 	u64 pfc_err_alarm;
 
 	u64 tda_err_reg;
 #define TDA_Fn_ECC_SG_ERR		vBIT(0xff,0,8)
 #define TDA_Fn_ECC_DB_ERR		vBIT(0xff,8,8)
-#define TDA_SM0_ERR_ALARM		BIT(22)
-#define TDA_SM1_ERR_ALARM		BIT(23)
-#define TDA_PCIX_ERR			BIT(39)
+#define TDA_SM0_ERR_ALARM		s2BIT(22)
+#define TDA_SM1_ERR_ALARM		s2BIT(23)
+#define TDA_PCIX_ERR			s2BIT(39)
 	u64 tda_err_mask;
 	u64 tda_err_alarm;
 
@@ -351,40 +351,40 @@ struct XENA_dev_config {
 #define PCC_SM_ERR_ALARM		vBIT(0xff,32,8)
 #define PCC_WR_ERR_ALARM		vBIT(0xff,40,8)
 #define PCC_N_SERR			vBIT(0xff,48,8)
-#define PCC_6_COF_OV_ERR		BIT(56)
-#define PCC_7_COF_OV_ERR		BIT(57)
-#define PCC_6_LSO_OV_ERR		BIT(58)
-#define PCC_7_LSO_OV_ERR		BIT(59)
+#define PCC_6_COF_OV_ERR		s2BIT(56)
+#define PCC_7_COF_OV_ERR		s2BIT(57)
+#define PCC_6_LSO_OV_ERR		s2BIT(58)
+#define PCC_7_LSO_OV_ERR		s2BIT(59)
 #define PCC_ENABLE_FOUR			vBIT(0x0F,0,8)
 	u64 pcc_err_mask;
 	u64 pcc_err_alarm;
 
 	u64 tti_err_reg;
-#define TTI_ECC_SG_ERR			BIT(7)
-#define TTI_ECC_DB_ERR			BIT(15)
-#define TTI_SM_ERR_ALARM		BIT(23)
+#define TTI_ECC_SG_ERR			s2BIT(7)
+#define TTI_ECC_DB_ERR			s2BIT(15)
+#define TTI_SM_ERR_ALARM		s2BIT(23)
 	u64 tti_err_mask;
 	u64 tti_err_alarm;
 
 	u64 lso_err_reg;
-#define LSO6_SEND_OFLOW			BIT(12)
-#define LSO7_SEND_OFLOW			BIT(13)
-#define LSO6_ABORT			BIT(14)
-#define LSO7_ABORT			BIT(15)
-#define LSO6_SM_ERR_ALARM		BIT(22)
-#define LSO7_SM_ERR_ALARM		BIT(23)
+#define LSO6_SEND_OFLOW			s2BIT(12)
+#define LSO7_SEND_OFLOW			s2BIT(13)
+#define LSO6_ABORT			s2BIT(14)
+#define LSO7_ABORT			s2BIT(15)
+#define LSO6_SM_ERR_ALARM		s2BIT(22)
+#define LSO7_SM_ERR_ALARM		s2BIT(23)
 	u64 lso_err_mask;
 	u64 lso_err_alarm;
 
 	u64 tpa_err_reg;
-#define TPA_TX_FRM_DROP			BIT(7)
-#define TPA_SM_ERR_ALARM		BIT(23)
+#define TPA_TX_FRM_DROP			s2BIT(7)
+#define TPA_SM_ERR_ALARM		s2BIT(23)
 
 	u64 tpa_err_mask;
 	u64 tpa_err_alarm;
 
 	u64 sm_err_reg;
-#define SM_SM_ERR_ALARM			BIT(15)
+#define SM_SM_ERR_ALARM			s2BIT(15)
 	u64 sm_err_mask;
 	u64 sm_err_alarm;
 
@@ -397,7 +397,7 @@ struct XENA_dev_config {
 #define X_MAX_FIFOS                        8
 #define X_FIFO_MAX_LEN                     0x1FFF	/*8191 */
 	u64 tx_fifo_partition_0;
-#define TX_FIFO_PARTITION_EN               BIT(0)
+#define TX_FIFO_PARTITION_EN               s2BIT(0)
 #define TX_FIFO_PARTITION_0_PRI(val)       vBIT(val,5,3)
 #define TX_FIFO_PARTITION_0_LEN(val)       vBIT(val,19,13)
 #define TX_FIFO_PARTITION_1_PRI(val)       vBIT(val,37,3)
@@ -437,16 +437,16 @@ struct XENA_dev_config {
 	u64 tx_w_round_robin_4;
 
 	u64 tti_command_mem;
-#define TTI_CMD_MEM_WE                     BIT(7)
-#define TTI_CMD_MEM_STROBE_NEW_CMD         BIT(15)
-#define TTI_CMD_MEM_STROBE_BEING_EXECUTED  BIT(15)
+#define TTI_CMD_MEM_WE                     s2BIT(7)
+#define TTI_CMD_MEM_STROBE_NEW_CMD         s2BIT(15)
+#define TTI_CMD_MEM_STROBE_BEING_EXECUTED  s2BIT(15)
 #define TTI_CMD_MEM_OFFSET(n)              vBIT(n,26,6)
 
 	u64 tti_data1_mem;
 #define TTI_DATA1_MEM_TX_TIMER_VAL(n)      vBIT(n,6,26)
 #define TTI_DATA1_MEM_TX_TIMER_AC_CI(n)    vBIT(n,38,2)
-#define TTI_DATA1_MEM_TX_TIMER_AC_EN       BIT(38)
-#define TTI_DATA1_MEM_TX_TIMER_CI_EN       BIT(39)
+#define TTI_DATA1_MEM_TX_TIMER_AC_EN       s2BIT(38)
+#define TTI_DATA1_MEM_TX_TIMER_CI_EN       s2BIT(39)
 #define TTI_DATA1_MEM_TX_URNG_A(n)         vBIT(n,41,7)
 #define TTI_DATA1_MEM_TX_URNG_B(n)         vBIT(n,49,7)
 #define TTI_DATA1_MEM_TX_URNG_C(n)         vBIT(n,57,7)
@@ -459,11 +459,11 @@ struct XENA_dev_config {
 
 /* Tx Protocol assist */
 	u64 tx_pa_cfg;
-#define TX_PA_CFG_IGNORE_FRM_ERR           BIT(1)
-#define TX_PA_CFG_IGNORE_SNAP_OUI          BIT(2)
-#define TX_PA_CFG_IGNORE_LLC_CTRL          BIT(3)
-#define	TX_PA_CFG_IGNORE_L2_ERR			   BIT(6)
-#define RX_PA_CFG_STRIP_VLAN_TAG		BIT(15)
+#define TX_PA_CFG_IGNORE_FRM_ERR           s2BIT(1)
+#define TX_PA_CFG_IGNORE_SNAP_OUI          s2BIT(2)
+#define TX_PA_CFG_IGNORE_LLC_CTRL          s2BIT(3)
+#define	TX_PA_CFG_IGNORE_L2_ERR			   s2BIT(6)
+#define RX_PA_CFG_STRIP_VLAN_TAG		s2BIT(15)
 
 /* Recent add, used only debug purposes. */
 	u64 pcc_enable;
@@ -477,31 +477,31 @@ struct XENA_dev_config {
 /* RxDMA Registers */
 	u64 rxdma_int_status;
 	u64 rxdma_int_mask;
-#define RXDMA_INT_RC_INT_M             BIT(0)
-#define RXDMA_INT_RPA_INT_M            BIT(1)
-#define RXDMA_INT_RDA_INT_M            BIT(2)
-#define RXDMA_INT_RTI_INT_M            BIT(3)
+#define RXDMA_INT_RC_INT_M             s2BIT(0)
+#define RXDMA_INT_RPA_INT_M            s2BIT(1)
+#define RXDMA_INT_RDA_INT_M            s2BIT(2)
+#define RXDMA_INT_RTI_INT_M            s2BIT(3)
 
 	u64 rda_err_reg;
 #define RDA_RXDn_ECC_SG_ERR		vBIT(0xFF,0,8)
 #define RDA_RXDn_ECC_DB_ERR		vBIT(0xFF,8,8)
-#define RDA_FRM_ECC_SG_ERR		BIT(23)
-#define RDA_FRM_ECC_DB_N_AERR		BIT(31)
-#define RDA_SM1_ERR_ALARM		BIT(38)
-#define RDA_SM0_ERR_ALARM		BIT(39)
-#define RDA_MISC_ERR			BIT(47)
-#define RDA_PCIX_ERR			BIT(55)
-#define RDA_RXD_ECC_DB_SERR		BIT(63)
+#define RDA_FRM_ECC_SG_ERR		s2BIT(23)
+#define RDA_FRM_ECC_DB_N_AERR		s2BIT(31)
+#define RDA_SM1_ERR_ALARM		s2BIT(38)
+#define RDA_SM0_ERR_ALARM		s2BIT(39)
+#define RDA_MISC_ERR			s2BIT(47)
+#define RDA_PCIX_ERR			s2BIT(55)
+#define RDA_RXD_ECC_DB_SERR		s2BIT(63)
 	u64 rda_err_mask;
 	u64 rda_err_alarm;
 
 	u64 rc_err_reg;
 #define RC_PRCn_ECC_SG_ERR		vBIT(0xFF,0,8)
 #define RC_PRCn_ECC_DB_ERR		vBIT(0xFF,8,8)
-#define RC_FTC_ECC_SG_ERR		BIT(23)
-#define RC_FTC_ECC_DB_ERR		BIT(31)
+#define RC_FTC_ECC_SG_ERR		s2BIT(23)
+#define RC_FTC_ECC_DB_ERR		s2BIT(31)
 #define RC_PRCn_SM_ERR_ALARM		vBIT(0xFF,32,8)
-#define RC_FTC_SM_ERR_ALARM		BIT(47)
+#define RC_FTC_SM_ERR_ALARM		s2BIT(47)
 #define RC_RDA_FAIL_WR_Rn		vBIT(0xFF,48,8)
 	u64 rc_err_mask;
 	u64 rc_err_alarm;
@@ -517,18 +517,18 @@ struct XENA_dev_config {
 	u64 prc_pcix_err_alarm;
 
 	u64 rpa_err_reg;
-#define RPA_ECC_SG_ERR			BIT(7)
-#define RPA_ECC_DB_ERR			BIT(15)
-#define RPA_FLUSH_REQUEST		BIT(22)
-#define RPA_SM_ERR_ALARM		BIT(23)
-#define RPA_CREDIT_ERR			BIT(31)
+#define RPA_ECC_SG_ERR			s2BIT(7)
+#define RPA_ECC_DB_ERR			s2BIT(15)
+#define RPA_FLUSH_REQUEST		s2BIT(22)
+#define RPA_SM_ERR_ALARM		s2BIT(23)
+#define RPA_CREDIT_ERR			s2BIT(31)
 	u64 rpa_err_mask;
 	u64 rpa_err_alarm;
 
 	u64 rti_err_reg;
-#define RTI_ECC_SG_ERR			BIT(7)
-#define RTI_ECC_DB_ERR			BIT(15)
-#define RTI_SM_ERR_ALARM		BIT(23)
+#define RTI_ECC_SG_ERR			s2BIT(7)
+#define RTI_ECC_DB_ERR			s2BIT(15)
+#define RTI_SM_ERR_ALARM		s2BIT(23)
 	u64 rti_err_mask;
 	u64 rti_err_alarm;
 
@@ -568,49 +568,49 @@ struct XENA_dev_config {
 #endif
 	u64 prc_rxd0_n[RX_MAX_RINGS];
 	u64 prc_ctrl_n[RX_MAX_RINGS];
-#define PRC_CTRL_RC_ENABLED                    BIT(7)
-#define PRC_CTRL_RING_MODE                     (BIT(14)|BIT(15))
+#define PRC_CTRL_RC_ENABLED                    s2BIT(7)
+#define PRC_CTRL_RING_MODE                     (s2BIT(14)|s2BIT(15))
 #define PRC_CTRL_RING_MODE_1                   vBIT(0,14,2)
 #define PRC_CTRL_RING_MODE_3                   vBIT(1,14,2)
 #define PRC_CTRL_RING_MODE_5                   vBIT(2,14,2)
 #define PRC_CTRL_RING_MODE_x                   vBIT(3,14,2)
-#define PRC_CTRL_NO_SNOOP                      (BIT(22)|BIT(23))
-#define PRC_CTRL_NO_SNOOP_DESC                 BIT(22)
-#define PRC_CTRL_NO_SNOOP_BUFF                 BIT(23)
-#define PRC_CTRL_BIMODAL_INTERRUPT             BIT(37)
-#define PRC_CTRL_GROUP_READS                   BIT(38)
+#define PRC_CTRL_NO_SNOOP                      (s2BIT(22)|s2BIT(23))
+#define PRC_CTRL_NO_SNOOP_DESC                 s2BIT(22)
+#define PRC_CTRL_NO_SNOOP_BUFF                 s2BIT(23)
+#define PRC_CTRL_BIMODAL_INTERRUPT             s2BIT(37)
+#define PRC_CTRL_GROUP_READS                   s2BIT(38)
 #define PRC_CTRL_RXD_BACKOFF_INTERVAL(val)     vBIT(val,40,24)
 
 	u64 prc_alarm_action;
-#define PRC_ALARM_ACTION_RR_R0_STOP            BIT(3)
-#define PRC_ALARM_ACTION_RW_R0_STOP            BIT(7)
-#define PRC_ALARM_ACTION_RR_R1_STOP            BIT(11)
-#define PRC_ALARM_ACTION_RW_R1_STOP            BIT(15)
-#define PRC_ALARM_ACTION_RR_R2_STOP            BIT(19)
-#define PRC_ALARM_ACTION_RW_R2_STOP            BIT(23)
-#define PRC_ALARM_ACTION_RR_R3_STOP            BIT(27)
-#define PRC_ALARM_ACTION_RW_R3_STOP            BIT(31)
-#define PRC_ALARM_ACTION_RR_R4_STOP            BIT(35)
-#define PRC_ALARM_ACTION_RW_R4_STOP            BIT(39)
-#define PRC_ALARM_ACTION_RR_R5_STOP            BIT(43)
-#define PRC_ALARM_ACTION_RW_R5_STOP            BIT(47)
-#define PRC_ALARM_ACTION_RR_R6_STOP            BIT(51)
-#define PRC_ALARM_ACTION_RW_R6_STOP            BIT(55)
-#define PRC_ALARM_ACTION_RR_R7_STOP            BIT(59)
-#define PRC_ALARM_ACTION_RW_R7_STOP            BIT(63)
+#define PRC_ALARM_ACTION_RR_R0_STOP            s2BIT(3)
+#define PRC_ALARM_ACTION_RW_R0_STOP            s2BIT(7)
+#define PRC_ALARM_ACTION_RR_R1_STOP            s2BIT(11)
+#define PRC_ALARM_ACTION_RW_R1_STOP            s2BIT(15)
+#define PRC_ALARM_ACTION_RR_R2_STOP            s2BIT(19)
+#define PRC_ALARM_ACTION_RW_R2_STOP            s2BIT(23)
+#define PRC_ALARM_ACTION_RR_R3_STOP            s2BIT(27)
+#define PRC_ALARM_ACTION_RW_R3_STOP            s2BIT(31)
+#define PRC_ALARM_ACTION_RR_R4_STOP            s2BIT(35)
+#define PRC_ALARM_ACTION_RW_R4_STOP            s2BIT(39)
+#define PRC_ALARM_ACTION_RR_R5_STOP            s2BIT(43)
+#define PRC_ALARM_ACTION_RW_R5_STOP            s2BIT(47)
+#define PRC_ALARM_ACTION_RR_R6_STOP            s2BIT(51)
+#define PRC_ALARM_ACTION_RW_R6_STOP            s2BIT(55)
+#define PRC_ALARM_ACTION_RR_R7_STOP            s2BIT(59)
+#define PRC_ALARM_ACTION_RW_R7_STOP            s2BIT(63)
 
 /* Receive traffic interrupts */
 	u64 rti_command_mem;
-#define RTI_CMD_MEM_WE                          BIT(7)
-#define RTI_CMD_MEM_STROBE                      BIT(15)
-#define RTI_CMD_MEM_STROBE_NEW_CMD              BIT(15)
-#define RTI_CMD_MEM_STROBE_CMD_BEING_EXECUTED   BIT(15)
+#define RTI_CMD_MEM_WE                          s2BIT(7)
+#define RTI_CMD_MEM_STROBE                      s2BIT(15)
+#define RTI_CMD_MEM_STROBE_NEW_CMD              s2BIT(15)
+#define RTI_CMD_MEM_STROBE_CMD_BEING_EXECUTED   s2BIT(15)
 #define RTI_CMD_MEM_OFFSET(n)                   vBIT(n,29,3)
 
 	u64 rti_data1_mem;
 #define RTI_DATA1_MEM_RX_TIMER_VAL(n)      vBIT(n,3,29)
-#define RTI_DATA1_MEM_RX_TIMER_AC_EN       BIT(38)
-#define RTI_DATA1_MEM_RX_TIMER_CI_EN       BIT(39)
+#define RTI_DATA1_MEM_RX_TIMER_AC_EN       s2BIT(38)
+#define RTI_DATA1_MEM_RX_TIMER_CI_EN       s2BIT(39)
 #define RTI_DATA1_MEM_RX_URNG_A(n)         vBIT(n,41,7)
 #define RTI_DATA1_MEM_RX_URNG_B(n)         vBIT(n,49,7)
 #define RTI_DATA1_MEM_RX_URNG_C(n)         vBIT(n,57,7)
@@ -622,10 +622,10 @@ struct XENA_dev_config {
 #define RTI_DATA2_MEM_RX_UFC_D(n)          vBIT(n,48,16)
 
 	u64 rx_pa_cfg;
-#define RX_PA_CFG_IGNORE_FRM_ERR           BIT(1)
-#define RX_PA_CFG_IGNORE_SNAP_OUI          BIT(2)
-#define RX_PA_CFG_IGNORE_LLC_CTRL          BIT(3)
-#define RX_PA_CFG_IGNORE_L2_ERR            BIT(6)
+#define RX_PA_CFG_IGNORE_FRM_ERR           s2BIT(1)
+#define RX_PA_CFG_IGNORE_SNAP_OUI          s2BIT(2)
+#define RX_PA_CFG_IGNORE_LLC_CTRL          s2BIT(3)
+#define RX_PA_CFG_IGNORE_L2_ERR            s2BIT(6)
 
 	u64 unused_11_1;
 
@@ -641,64 +641,64 @@ struct XENA_dev_config {
 /* Media Access Controller Register */
 	u64 mac_int_status;
 	u64 mac_int_mask;
-#define MAC_INT_STATUS_TMAC_INT            BIT(0)
-#define MAC_INT_STATUS_RMAC_INT            BIT(1)
+#define MAC_INT_STATUS_TMAC_INT            s2BIT(0)
+#define MAC_INT_STATUS_RMAC_INT            s2BIT(1)
 
 	u64 mac_tmac_err_reg;
-#define TMAC_ECC_SG_ERR				BIT(7)
-#define TMAC_ECC_DB_ERR				BIT(15)
-#define TMAC_TX_BUF_OVRN			BIT(23)
-#define TMAC_TX_CRI_ERR				BIT(31)
-#define TMAC_TX_SM_ERR				BIT(39)
-#define TMAC_DESC_ECC_SG_ERR			BIT(47)
-#define TMAC_DESC_ECC_DB_ERR			BIT(55)
+#define TMAC_ECC_SG_ERR				s2BIT(7)
+#define TMAC_ECC_DB_ERR				s2BIT(15)
+#define TMAC_TX_BUF_OVRN			s2BIT(23)
+#define TMAC_TX_CRI_ERR				s2BIT(31)
+#define TMAC_TX_SM_ERR				s2BIT(39)
+#define TMAC_DESC_ECC_SG_ERR			s2BIT(47)
+#define TMAC_DESC_ECC_DB_ERR			s2BIT(55)
 
 	u64 mac_tmac_err_mask;
 	u64 mac_tmac_err_alarm;
 
 	u64 mac_rmac_err_reg;
-#define RMAC_RX_BUFF_OVRN			BIT(0)
-#define RMAC_FRM_RCVD_INT			BIT(1)
-#define RMAC_UNUSED_INT				BIT(2)
-#define RMAC_RTS_PNUM_ECC_SG_ERR		BIT(5)
-#define RMAC_RTS_DS_ECC_SG_ERR			BIT(6)
-#define RMAC_RD_BUF_ECC_SG_ERR			BIT(7)
-#define RMAC_RTH_MAP_ECC_SG_ERR			BIT(8)
-#define RMAC_RTH_SPDM_ECC_SG_ERR		BIT(9)
-#define RMAC_RTS_VID_ECC_SG_ERR			BIT(10)
-#define RMAC_DA_SHADOW_ECC_SG_ERR		BIT(11)
-#define RMAC_RTS_PNUM_ECC_DB_ERR		BIT(13)
-#define RMAC_RTS_DS_ECC_DB_ERR			BIT(14)
-#define RMAC_RD_BUF_ECC_DB_ERR			BIT(15)
-#define RMAC_RTH_MAP_ECC_DB_ERR			BIT(16)
-#define RMAC_RTH_SPDM_ECC_DB_ERR		BIT(17)
-#define RMAC_RTS_VID_ECC_DB_ERR			BIT(18)
-#define RMAC_DA_SHADOW_ECC_DB_ERR		BIT(19)
-#define RMAC_LINK_STATE_CHANGE_INT		BIT(31)
-#define RMAC_RX_SM_ERR				BIT(39)
-#define RMAC_SINGLE_ECC_ERR			(BIT(5) | BIT(6) | BIT(7) |\
-						BIT(8)  | BIT(9) | BIT(10)|\
-						BIT(11))
-#define RMAC_DOUBLE_ECC_ERR			(BIT(13) | BIT(14) | BIT(15) |\
-						BIT(16)  | BIT(17) | BIT(18)|\
-						BIT(19))
+#define RMAC_RX_BUFF_OVRN			s2BIT(0)
+#define RMAC_FRM_RCVD_INT			s2BIT(1)
+#define RMAC_UNUSED_INT				s2BIT(2)
+#define RMAC_RTS_PNUM_ECC_SG_ERR		s2BIT(5)
+#define RMAC_RTS_DS_ECC_SG_ERR			s2BIT(6)
+#define RMAC_RD_BUF_ECC_SG_ERR			s2BIT(7)
+#define RMAC_RTH_MAP_ECC_SG_ERR			s2BIT(8)
+#define RMAC_RTH_SPDM_ECC_SG_ERR		s2BIT(9)
+#define RMAC_RTS_VID_ECC_SG_ERR			s2BIT(10)
+#define RMAC_DA_SHADOW_ECC_SG_ERR		s2BIT(11)
+#define RMAC_RTS_PNUM_ECC_DB_ERR		s2BIT(13)
+#define RMAC_RTS_DS_ECC_DB_ERR			s2BIT(14)
+#define RMAC_RD_BUF_ECC_DB_ERR			s2BIT(15)
+#define RMAC_RTH_MAP_ECC_DB_ERR			s2BIT(16)
+#define RMAC_RTH_SPDM_ECC_DB_ERR		s2BIT(17)
+#define RMAC_RTS_VID_ECC_DB_ERR			s2BIT(18)
+#define RMAC_DA_SHADOW_ECC_DB_ERR		s2BIT(19)
+#define RMAC_LINK_STATE_CHANGE_INT		s2BIT(31)
+#define RMAC_RX_SM_ERR				s2BIT(39)
+#define RMAC_SINGLE_ECC_ERR			(s2BIT(5) | s2BIT(6) | s2BIT(7) |\
+						s2BIT(8)  | s2BIT(9) | s2BIT(10)|\
+						s2BIT(11))
+#define RMAC_DOUBLE_ECC_ERR			(s2BIT(13) | s2BIT(14) | s2BIT(15) |\
+						s2BIT(16)  | s2BIT(17) | s2BIT(18)|\
+						s2BIT(19))
 	u64 mac_rmac_err_mask;
 	u64 mac_rmac_err_alarm;
 
 	u8 unused14[0x100 - 0x40];
 
 	u64 mac_cfg;
-#define MAC_CFG_TMAC_ENABLE             BIT(0)
-#define MAC_CFG_RMAC_ENABLE             BIT(1)
-#define MAC_CFG_LAN_NOT_WAN             BIT(2)
-#define MAC_CFG_TMAC_LOOPBACK           BIT(3)
-#define MAC_CFG_TMAC_APPEND_PAD         BIT(4)
-#define MAC_CFG_RMAC_STRIP_FCS          BIT(5)
-#define MAC_CFG_RMAC_STRIP_PAD          BIT(6)
-#define MAC_CFG_RMAC_PROM_ENABLE        BIT(7)
-#define MAC_RMAC_DISCARD_PFRM           BIT(8)
-#define MAC_RMAC_BCAST_ENABLE           BIT(9)
-#define MAC_RMAC_ALL_ADDR_ENABLE        BIT(10)
+#define MAC_CFG_TMAC_ENABLE             s2BIT(0)
+#define MAC_CFG_RMAC_ENABLE             s2BIT(1)
+#define MAC_CFG_LAN_NOT_WAN             s2BIT(2)
+#define MAC_CFG_TMAC_LOOPBACK           s2BIT(3)
+#define MAC_CFG_TMAC_APPEND_PAD         s2BIT(4)
+#define MAC_CFG_RMAC_STRIP_FCS          s2BIT(5)
+#define MAC_CFG_RMAC_STRIP_PAD          s2BIT(6)
+#define MAC_CFG_RMAC_PROM_ENABLE        s2BIT(7)
+#define MAC_RMAC_DISCARD_PFRM           s2BIT(8)
+#define MAC_RMAC_BCAST_ENABLE           s2BIT(9)
+#define MAC_RMAC_ALL_ADDR_ENABLE        s2BIT(10)
 #define MAC_RMAC_INVLD_IPG_THR(val)     vBIT(val,16,8)
 
 	u64 tmac_avg_ipg;
@@ -710,14 +710,14 @@ struct XENA_dev_config {
 #define RMAC_MAX_PYLD_LEN_JUMBO_DEF vBIT(9600,2,14)
 
 	u64 rmac_err_cfg;
-#define RMAC_ERR_FCS                    BIT(0)
-#define RMAC_ERR_FCS_ACCEPT             BIT(1)
-#define RMAC_ERR_TOO_LONG               BIT(1)
-#define RMAC_ERR_TOO_LONG_ACCEPT        BIT(1)
-#define RMAC_ERR_RUNT                   BIT(2)
-#define RMAC_ERR_RUNT_ACCEPT            BIT(2)
-#define RMAC_ERR_LEN_MISMATCH           BIT(3)
-#define RMAC_ERR_LEN_MISMATCH_ACCEPT    BIT(3)
+#define RMAC_ERR_FCS                    s2BIT(0)
+#define RMAC_ERR_FCS_ACCEPT             s2BIT(1)
+#define RMAC_ERR_TOO_LONG               s2BIT(1)
+#define RMAC_ERR_TOO_LONG_ACCEPT        s2BIT(1)
+#define RMAC_ERR_RUNT                   s2BIT(2)
+#define RMAC_ERR_RUNT_ACCEPT            s2BIT(2)
+#define RMAC_ERR_LEN_MISMATCH           s2BIT(3)
+#define RMAC_ERR_LEN_MISMATCH_ACCEPT    s2BIT(3)
 
 	u64 rmac_cfg_key;
 #define RMAC_CFG_KEY(val)               vBIT(val,0,16)
@@ -728,15 +728,15 @@ struct XENA_dev_config {
 #define MAC_MC_ADDR_START_OFFSET    16
 #define MAC_MC_ALL_MC_ADDR_OFFSET   63	/* enables all multicast pkts */
 	u64 rmac_addr_cmd_mem;
-#define RMAC_ADDR_CMD_MEM_WE                    BIT(7)
+#define RMAC_ADDR_CMD_MEM_WE                    s2BIT(7)
 #define RMAC_ADDR_CMD_MEM_RD                    0
-#define RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD        BIT(15)
-#define RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING  BIT(15)
+#define RMAC_ADDR_CMD_MEM_STROBE_NEW_CMD        s2BIT(15)
+#define RMAC_ADDR_CMD_MEM_STROBE_CMD_EXECUTING  s2BIT(15)
 #define RMAC_ADDR_CMD_MEM_OFFSET(n)             vBIT(n,26,6)
 
 	u64 rmac_addr_data0_mem;
 #define RMAC_ADDR_DATA0_MEM_ADDR(n)    vBIT(n,0,48)
-#define RMAC_ADDR_DATA0_MEM_USER       BIT(48)
+#define RMAC_ADDR_DATA0_MEM_USER       s2BIT(48)
 
 	u64 rmac_addr_data1_mem;
 #define RMAC_ADDR_DATA1_MEM_MASK(n)    vBIT(n,0,48)
@@ -753,10 +753,10 @@ struct XENA_dev_config {
 	u64 tmac_ipg_cfg;
 
 	u64 rmac_pause_cfg;
-#define RMAC_PAUSE_GEN             BIT(0)
-#define RMAC_PAUSE_GEN_ENABLE      BIT(0)
-#define RMAC_PAUSE_RX              BIT(1)
-#define RMAC_PAUSE_RX_ENABLE       BIT(1)
+#define RMAC_PAUSE_GEN             s2BIT(0)
+#define RMAC_PAUSE_GEN_ENABLE      s2BIT(0)
+#define RMAC_PAUSE_RX              s2BIT(1)
+#define RMAC_PAUSE_RX_ENABLE       s2BIT(1)
 #define RMAC_PAUSE_HG_PTIME_DEF    vBIT(0xFFFF,16,16)
 #define RMAC_PAUSE_HG_PTIME(val)    vBIT(val,16,16)
 
@@ -787,29 +787,29 @@ struct XENA_dev_config {
 #define MAX_DIX_MAP                         4
 	u64 rts_dix_map_n[MAX_DIX_MAP];
 #define RTS_DIX_MAP_ETYPE(val)             vBIT(val,0,16)
-#define RTS_DIX_MAP_SCW(val)               BIT(val,21)
+#define RTS_DIX_MAP_SCW(val)               s2BIT(val,21)
 
 	u64 rts_q_alternates;
 	u64 rts_default_q;
 
 	u64 rts_ctrl;
-#define RTS_CTRL_IGNORE_SNAP_OUI           BIT(2)
-#define RTS_CTRL_IGNORE_LLC_CTRL           BIT(3)
+#define RTS_CTRL_IGNORE_SNAP_OUI           s2BIT(2)
+#define RTS_CTRL_IGNORE_LLC_CTRL           s2BIT(3)
 
 	u64 rts_pn_cam_ctrl;
-#define RTS_PN_CAM_CTRL_WE                 BIT(7)
-#define RTS_PN_CAM_CTRL_STROBE_NEW_CMD     BIT(15)
-#define RTS_PN_CAM_CTRL_STROBE_BEING_EXECUTED   BIT(15)
+#define RTS_PN_CAM_CTRL_WE                 s2BIT(7)
+#define RTS_PN_CAM_CTRL_STROBE_NEW_CMD     s2BIT(15)
+#define RTS_PN_CAM_CTRL_STROBE_BEING_EXECUTED   s2BIT(15)
 #define RTS_PN_CAM_CTRL_OFFSET(n)          vBIT(n,24,8)
 	u64 rts_pn_cam_data;
-#define RTS_PN_CAM_DATA_TCP_SELECT         BIT(7)
+#define RTS_PN_CAM_DATA_TCP_SELECT         s2BIT(7)
 #define RTS_PN_CAM_DATA_PORT(val)          vBIT(val,8,16)
 #define RTS_PN_CAM_DATA_SCW(val)           vBIT(val,24,8)
 
 	u64 rts_ds_mem_ctrl;
-#define RTS_DS_MEM_CTRL_WE                 BIT(7)
-#define RTS_DS_MEM_CTRL_STROBE_NEW_CMD     BIT(15)
-#define RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED   BIT(15)
+#define RTS_DS_MEM_CTRL_WE                 s2BIT(7)
+#define RTS_DS_MEM_CTRL_STROBE_NEW_CMD     s2BIT(15)
+#define RTS_DS_MEM_CTRL_STROBE_CMD_BEING_EXECUTED   s2BIT(15)
 #define RTS_DS_MEM_CTRL_OFFSET(n)          vBIT(n,26,6)
 	u64 rts_ds_mem_data;
 #define RTS_DS_MEM_DATA(n)                 vBIT(n,0,8)
@@ -823,23 +823,23 @@ struct XENA_dev_config {
 
 /* memory controller registers */
 	u64 mc_int_status;
-#define MC_INT_STATUS_MC_INT               BIT(0)
+#define MC_INT_STATUS_MC_INT               s2BIT(0)
 	u64 mc_int_mask;
-#define MC_INT_MASK_MC_INT                 BIT(0)
+#define MC_INT_MASK_MC_INT                 s2BIT(0)
 
 	u64 mc_err_reg;
-#define MC_ERR_REG_ECC_DB_ERR_L            BIT(14)
-#define MC_ERR_REG_ECC_DB_ERR_U            BIT(15)
-#define MC_ERR_REG_MIRI_ECC_DB_ERR_0       BIT(18)
-#define MC_ERR_REG_MIRI_ECC_DB_ERR_1       BIT(20)
-#define MC_ERR_REG_MIRI_CRI_ERR_0          BIT(22)
-#define MC_ERR_REG_MIRI_CRI_ERR_1          BIT(23)
-#define MC_ERR_REG_SM_ERR                  BIT(31)
-#define MC_ERR_REG_ECC_ALL_SNG		   (BIT(2) | BIT(3) | BIT(4) | BIT(5) |\
-					BIT(17) | BIT(19))
-#define MC_ERR_REG_ECC_ALL_DBL		   (BIT(10) | BIT(11) | BIT(12) |\
-					BIT(13) | BIT(18) | BIT(20))
-#define PLL_LOCK_N			BIT(39)
+#define MC_ERR_REG_ECC_DB_ERR_L            s2BIT(14)
+#define MC_ERR_REG_ECC_DB_ERR_U            s2BIT(15)
+#define MC_ERR_REG_MIRI_ECC_DB_ERR_0       s2BIT(18)
+#define MC_ERR_REG_MIRI_ECC_DB_ERR_1       s2BIT(20)
+#define MC_ERR_REG_MIRI_CRI_ERR_0          s2BIT(22)
+#define MC_ERR_REG_MIRI_CRI_ERR_1          s2BIT(23)
+#define MC_ERR_REG_SM_ERR                  s2BIT(31)
+#define MC_ERR_REG_ECC_ALL_SNG		   (s2BIT(2) | s2BIT(3) | s2BIT(4) | s2BIT(5) |\
+					s2BIT(17) | s2BIT(19))
+#define MC_ERR_REG_ECC_ALL_DBL		   (s2BIT(10) | s2BIT(11) | s2BIT(12) |\
+					s2BIT(13) | s2BIT(18) | s2BIT(20))
+#define PLL_LOCK_N			s2BIT(39)
 	u64 mc_err_mask;
 	u64 mc_err_alarm;
 
@@ -857,8 +857,8 @@ struct XENA_dev_config {
 #define RX_QUEUE_CFG_Q7_SZ(n)              vBIT(n,56,8)
 
 	u64 mc_rldram_mrs;
-#define	MC_RLDRAM_QUEUE_SIZE_ENABLE			BIT(39)
-#define	MC_RLDRAM_MRS_ENABLE				BIT(47)
+#define	MC_RLDRAM_QUEUE_SIZE_ENABLE			s2BIT(39)
+#define	MC_RLDRAM_MRS_ENABLE				s2BIT(47)
 
 	u64 mc_rldram_interleave;
 
@@ -871,11 +871,11 @@ struct XENA_dev_config {
 	u64 mc_rldram_ref_per;
 	u8 unused20[0x220 - 0x208];
 	u64 mc_rldram_test_ctrl;
-#define MC_RLDRAM_TEST_MODE		BIT(47)
-#define MC_RLDRAM_TEST_WRITE	BIT(7)
-#define MC_RLDRAM_TEST_GO		BIT(15)
-#define MC_RLDRAM_TEST_DONE		BIT(23)
-#define MC_RLDRAM_TEST_PASS		BIT(31)
+#define MC_RLDRAM_TEST_MODE		s2BIT(47)
+#define MC_RLDRAM_TEST_WRITE	s2BIT(7)
+#define MC_RLDRAM_TEST_GO		s2BIT(15)
+#define MC_RLDRAM_TEST_DONE		s2BIT(23)
+#define MC_RLDRAM_TEST_PASS		s2BIT(31)
 
 	u8 unused21[0x240 - 0x228];
 	u64 mc_rldram_test_add;
@@ -888,7 +888,7 @@ struct XENA_dev_config {
 
 	u8 unused24_1[0x360 - 0x308];
 	u64 mc_rldram_ctrl;
-#define	MC_RLDRAM_ENABLE_ODT		BIT(7)
+#define	MC_RLDRAM_ENABLE_ODT		s2BIT(7)
 
 	u8 unused24_2[0x640 - 0x368];
 	u64 mc_rldram_ref_per_herc;
@@ -906,24 +906,24 @@ struct XENA_dev_config {
 	/* XGXS control registers */
 
 	u64 xgxs_int_status;
-#define XGXS_INT_STATUS_TXGXS              BIT(0)
-#define XGXS_INT_STATUS_RXGXS              BIT(1)
+#define XGXS_INT_STATUS_TXGXS              s2BIT(0)
+#define XGXS_INT_STATUS_RXGXS              s2BIT(1)
 	u64 xgxs_int_mask;
-#define XGXS_INT_MASK_TXGXS                BIT(0)
-#define XGXS_INT_MASK_RXGXS                BIT(1)
+#define XGXS_INT_MASK_TXGXS                s2BIT(0)
+#define XGXS_INT_MASK_RXGXS                s2BIT(1)
 
 	u64 xgxs_txgxs_err_reg;
-#define TXGXS_ECC_SG_ERR		BIT(7)
-#define TXGXS_ECC_DB_ERR		BIT(15)
-#define TXGXS_ESTORE_UFLOW		BIT(31)
-#define TXGXS_TX_SM_ERR			BIT(39)
+#define TXGXS_ECC_SG_ERR		s2BIT(7)
+#define TXGXS_ECC_DB_ERR		s2BIT(15)
+#define TXGXS_ESTORE_UFLOW		s2BIT(31)
+#define TXGXS_TX_SM_ERR			s2BIT(39)
 
 	u64 xgxs_txgxs_err_mask;
 	u64 xgxs_txgxs_err_alarm;
 
 	u64 xgxs_rxgxs_err_reg;
-#define RXGXS_ESTORE_OFLOW		BIT(7)
-#define RXGXS_RX_SM_ERR			BIT(39)
+#define RXGXS_ESTORE_OFLOW		s2BIT(7)
+#define RXGXS_RX_SM_ERR			s2BIT(39)
 	u64 xgxs_rxgxs_err_mask;
 	u64 xgxs_rxgxs_err_alarm;
 
@@ -942,10 +942,10 @@ struct XENA_dev_config {
 #define SPI_CONTROL_BYTECNT(cnt)	vBIT(cnt,29,3)
 #define SPI_CONTROL_CMD(cmd)		vBIT(cmd,32,8)
 #define SPI_CONTROL_ADDR(addr)		vBIT(addr,40,24)
-#define SPI_CONTROL_SEL1		BIT(4)
-#define SPI_CONTROL_REQ			BIT(7)
-#define SPI_CONTROL_NACK		BIT(5)
-#define SPI_CONTROL_DONE		BIT(6)
+#define SPI_CONTROL_SEL1		s2BIT(4)
+#define SPI_CONTROL_REQ			s2BIT(7)
+#define SPI_CONTROL_NACK		s2BIT(5)
+#define SPI_CONTROL_DONE		s2BIT(6)
 	u64 spi_data;
 #define SPI_DATA_WRITE(data,len)	vBIT(data,0,len)
 };
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 22e4054d4fc..b8c0e7b4ca1 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -1716,7 +1716,7 @@ static int init_nic(struct s2io_nic *nic)
 			MISC_LINK_STABILITY_PRD(3);
 		writeq(val64, &bar0->misc_control);
 		val64 = readq(&bar0->pic_control2);
-		val64 &= ~(BIT(13)|BIT(14)|BIT(15));
+		val64 &= ~(s2BIT(13)|s2BIT(14)|s2BIT(15));
 		writeq(val64, &bar0->pic_control2);
 	}
 	if (strstr(nic->product_name, "CX4")) {
@@ -2427,7 +2427,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
 		}
 		if ((rxdp->Control_1 & RXD_OWN_XENA) &&
 			((nic->rxd_mode == RXD_MODE_3B) &&
-				(rxdp->Control_2 & BIT(0)))) {
+				(rxdp->Control_2 & s2BIT(0)))) {
 			mac_control->rings[ring_no].rx_curr_put_info.
 					offset = off;
 			goto end;
@@ -2540,7 +2540,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
 				rxdp->Control_2 |= SET_BUFFER2_SIZE_3
 								(dev->mtu + 4);
 			}
-			rxdp->Control_2 |= BIT(0);
+			rxdp->Control_2 |= s2BIT(0);
 		}
 		rxdp->Host_Control = (unsigned long) (skb);
 		if (alloc_tab & ((1 << rxsync_frequency) - 1))
@@ -3377,7 +3377,7 @@ static void s2io_reset(struct s2io_nic * sp)
 		pci_write_config_dword(sp->pdev, 0x68, 0x7C);
 
 		/* Clearing PCI_STATUS error reflected here */
-		writeq(BIT(62), &bar0->txpic_int_reg);
+		writeq(s2BIT(62), &bar0->txpic_int_reg);
 	}
 
 	/* Reset device statistics maintained by OS */
@@ -3575,7 +3575,7 @@ static int wait_for_msix_trans(struct s2io_nic *nic, int i)
 
 	do {
 		val64 = readq(&bar0->xmsi_access);
-		if (!(val64 & BIT(15)))
+		if (!(val64 & s2BIT(15)))
 			break;
 		mdelay(1);
 		cnt++;
@@ -3597,7 +3597,7 @@ static void restore_xmsi_data(struct s2io_nic *nic)
 	for (i=0; i < MAX_REQUESTED_MSI_X; i++) {
 		writeq(nic->msix_info[i].addr, &bar0->xmsi_address);
 		writeq(nic->msix_info[i].data, &bar0->xmsi_data);
-		val64 = (BIT(7) | BIT(15) | vBIT(i, 26, 6));
+		val64 = (s2BIT(7) | s2BIT(15) | vBIT(i, 26, 6));
 		writeq(val64, &bar0->xmsi_access);
 		if (wait_for_msix_trans(nic, i)) {
 			DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__);
@@ -3614,7 +3614,7 @@ static void store_xmsi_data(struct s2io_nic *nic)
 
 	/* Store and display */
 	for (i=0; i < MAX_REQUESTED_MSI_X; i++) {
-		val64 = (BIT(15) | vBIT(i, 26, 6));
+		val64 = (s2BIT(15) | vBIT(i, 26, 6));
 		writeq(val64, &bar0->xmsi_access);
 		if (wait_for_msix_trans(nic, i)) {
 			DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__);
@@ -4634,7 +4634,7 @@ static void s2io_updt_stats(struct s2io_nic *sp)
 		do {
 			udelay(100);
 			val64 = readq(&bar0->stat_cfg);
-			if (!(val64 & BIT(0)))
+			if (!(val64 & s2BIT(0)))
 				break;
 			cnt++;
 			if (cnt == 5)
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h
index f6b45565304..cc1797a071a 100644
--- a/drivers/net/s2io.h
+++ b/drivers/net/s2io.h
@@ -14,7 +14,7 @@
 #define _S2IO_H
 
 #define TBD 0
-#define BIT(loc)		(0x8000000000000000ULL >> (loc))
+#define s2BIT(loc)		(0x8000000000000000ULL >> (loc))
 #define vBIT(val, loc, sz)	(((u64)val) << (64-loc-sz))
 #define INV(d)  ((d&0xff)<<24) | (((d>>8)&0xff)<<16) | (((d>>16)&0xff)<<8)| ((d>>24)&0xff)
 
@@ -473,42 +473,42 @@ struct TxFIFO_element {
 
 	u64 List_Control;
 #define TX_FIFO_LAST_TXD_NUM( val)     vBIT(val,0,8)
-#define TX_FIFO_FIRST_LIST             BIT(14)
-#define TX_FIFO_LAST_LIST              BIT(15)
+#define TX_FIFO_FIRST_LIST             s2BIT(14)
+#define TX_FIFO_LAST_LIST              s2BIT(15)
 #define TX_FIFO_FIRSTNLAST_LIST        vBIT(3,14,2)
-#define TX_FIFO_SPECIAL_FUNC           BIT(23)
-#define TX_FIFO_DS_NO_SNOOP            BIT(31)
-#define TX_FIFO_BUFF_NO_SNOOP          BIT(30)
+#define TX_FIFO_SPECIAL_FUNC           s2BIT(23)
+#define TX_FIFO_DS_NO_SNOOP            s2BIT(31)
+#define TX_FIFO_BUFF_NO_SNOOP          s2BIT(30)
 };
 
 /* Tx descriptor structure */
 struct TxD {
 	u64 Control_1;
 /* bit mask */
-#define TXD_LIST_OWN_XENA       BIT(7)
-#define TXD_T_CODE              (BIT(12)|BIT(13)|BIT(14)|BIT(15))
+#define TXD_LIST_OWN_XENA       s2BIT(7)
+#define TXD_T_CODE              (s2BIT(12)|s2BIT(13)|s2BIT(14)|s2BIT(15))
 #define TXD_T_CODE_OK(val)      (|(val & TXD_T_CODE))
 #define GET_TXD_T_CODE(val)     ((val & TXD_T_CODE)<<12)
-#define TXD_GATHER_CODE         (BIT(22) | BIT(23))
-#define TXD_GATHER_CODE_FIRST   BIT(22)
-#define TXD_GATHER_CODE_LAST    BIT(23)
-#define TXD_TCP_LSO_EN          BIT(30)
-#define TXD_UDP_COF_EN          BIT(31)
-#define TXD_UFO_EN		BIT(31) | BIT(30)
+#define TXD_GATHER_CODE         (s2BIT(22) | s2BIT(23))
+#define TXD_GATHER_CODE_FIRST   s2BIT(22)
+#define TXD_GATHER_CODE_LAST    s2BIT(23)
+#define TXD_TCP_LSO_EN          s2BIT(30)
+#define TXD_UDP_COF_EN          s2BIT(31)
+#define TXD_UFO_EN		s2BIT(31) | s2BIT(30)
 #define TXD_TCP_LSO_MSS(val)    vBIT(val,34,14)
 #define TXD_UFO_MSS(val)	vBIT(val,34,14)
 #define TXD_BUFFER0_SIZE(val)   vBIT(val,48,16)
 
 	u64 Control_2;
-#define TXD_TX_CKO_CONTROL      (BIT(5)|BIT(6)|BIT(7))
-#define TXD_TX_CKO_IPV4_EN      BIT(5)
-#define TXD_TX_CKO_TCP_EN       BIT(6)
-#define TXD_TX_CKO_UDP_EN       BIT(7)
-#define TXD_VLAN_ENABLE         BIT(15)
+#define TXD_TX_CKO_CONTROL      (s2BIT(5)|s2BIT(6)|s2BIT(7))
+#define TXD_TX_CKO_IPV4_EN      s2BIT(5)
+#define TXD_TX_CKO_TCP_EN       s2BIT(6)
+#define TXD_TX_CKO_UDP_EN       s2BIT(7)
+#define TXD_VLAN_ENABLE         s2BIT(15)
 #define TXD_VLAN_TAG(val)       vBIT(val,16,16)
 #define TXD_INT_NUMBER(val)     vBIT(val,34,6)
-#define TXD_INT_TYPE_PER_LIST   BIT(47)
-#define TXD_INT_TYPE_UTILZ      BIT(46)
+#define TXD_INT_TYPE_PER_LIST   s2BIT(47)
+#define TXD_INT_TYPE_UTILZ      s2BIT(46)
 #define TXD_SET_MARKER         vBIT(0x6,0,4)
 
 	u64 Buffer_Pointer;
@@ -525,14 +525,14 @@ struct list_info_hold {
 struct RxD_t {
 	u64 Host_Control;	/* reserved for host */
 	u64 Control_1;
-#define RXD_OWN_XENA            BIT(7)
-#define RXD_T_CODE              (BIT(12)|BIT(13)|BIT(14)|BIT(15))
+#define RXD_OWN_XENA            s2BIT(7)
+#define RXD_T_CODE              (s2BIT(12)|s2BIT(13)|s2BIT(14)|s2BIT(15))
 #define RXD_FRAME_PROTO         vBIT(0xFFFF,24,8)
-#define RXD_FRAME_PROTO_IPV4    BIT(27)
-#define RXD_FRAME_PROTO_IPV6    BIT(28)
-#define RXD_FRAME_IP_FRAG	BIT(29)
-#define RXD_FRAME_PROTO_TCP     BIT(30)
-#define RXD_FRAME_PROTO_UDP     BIT(31)
+#define RXD_FRAME_PROTO_IPV4    s2BIT(27)
+#define RXD_FRAME_PROTO_IPV6    s2BIT(28)
+#define RXD_FRAME_IP_FRAG	s2BIT(29)
+#define RXD_FRAME_PROTO_TCP     s2BIT(30)
+#define RXD_FRAME_PROTO_UDP     s2BIT(31)
 #define TCP_OR_UDP_FRAME        (RXD_FRAME_PROTO_TCP | RXD_FRAME_PROTO_UDP)
 #define RXD_GET_L3_CKSUM(val)   ((u16)(val>> 16) & 0xFFFF)
 #define RXD_GET_L4_CKSUM(val)   ((u16)(val) & 0xFFFF)
@@ -998,26 +998,26 @@ static inline void SPECIAL_REG_WRITE(u64 val, void __iomem *addr, int order)
 /*  Interrupt masks for the general interrupt mask register */
 #define DISABLE_ALL_INTRS   0xFFFFFFFFFFFFFFFFULL
 
-#define TXPIC_INT_M         BIT(0)
-#define TXDMA_INT_M         BIT(1)
-#define TXMAC_INT_M         BIT(2)
-#define TXXGXS_INT_M        BIT(3)
-#define TXTRAFFIC_INT_M     BIT(8)
-#define PIC_RX_INT_M        BIT(32)
-#define RXDMA_INT_M         BIT(33)
-#define RXMAC_INT_M         BIT(34)
-#define MC_INT_M            BIT(35)
-#define RXXGXS_INT_M        BIT(36)
-#define RXTRAFFIC_INT_M     BIT(40)
+#define TXPIC_INT_M         s2BIT(0)
+#define TXDMA_INT_M         s2BIT(1)
+#define TXMAC_INT_M         s2BIT(2)
+#define TXXGXS_INT_M        s2BIT(3)
+#define TXTRAFFIC_INT_M     s2BIT(8)
+#define PIC_RX_INT_M        s2BIT(32)
+#define RXDMA_INT_M         s2BIT(33)
+#define RXMAC_INT_M         s2BIT(34)
+#define MC_INT_M            s2BIT(35)
+#define RXXGXS_INT_M        s2BIT(36)
+#define RXTRAFFIC_INT_M     s2BIT(40)
 
 /*  PIC level Interrupts TODO*/
 
 /*  DMA level Inressupts */
-#define TXDMA_PFC_INT_M     BIT(0)
-#define TXDMA_PCC_INT_M     BIT(2)
+#define TXDMA_PFC_INT_M     s2BIT(0)
+#define TXDMA_PCC_INT_M     s2BIT(2)
 
 /*  PFC block interrupts */
-#define PFC_MISC_ERR_1      BIT(0)	/* Interrupt to indicate FIFO full */
+#define PFC_MISC_ERR_1      s2BIT(0)	/* Interrupt to indicate FIFO full */
 
 /* PCC block interrupts. */
 #define	PCC_FB_ECC_ERR	   vBIT(0xff, 16, 8)	/* Interrupt to indicate
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 70d7e478a3e..24cfb6275d9 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1384,13 +1384,9 @@ static int sky2_up(struct net_device *dev)
 	sky2_prefetch_init(hw, txqaddr[port], sky2->tx_le_map,
 			   TX_RING_SIZE - 1);
 
-	napi_enable(&hw->napi);
-
 	err = sky2_rx_start(sky2);
-	if (err) {
-		napi_disable(&hw->napi);
+	if (err)
 		goto err_out;
-	}
 
 	/* Enable interrupts from phy/mac for port */
 	imask = sky2_read32(hw, B0_IMSK);
@@ -1679,13 +1675,13 @@ static int sky2_down(struct net_device *dev)
 	/* Stop more packets from being queued */
 	netif_stop_queue(dev);
 
-	napi_disable(&hw->napi);
-
 	/* Disable port IRQ */
 	imask = sky2_read32(hw, B0_IMSK);
 	imask &= ~portirq_msk[port];
 	sky2_write32(hw, B0_IMSK, imask);
 
+	synchronize_irq(hw->pdev->irq);
+
 	sky2_gmac_reset(hw, port);
 
 	/* Stop transmitter */
@@ -1699,6 +1695,9 @@ static int sky2_down(struct net_device *dev)
 	ctrl &= ~(GM_GPCR_TX_ENA | GM_GPCR_RX_ENA);
 	gma_write16(hw, port, GM_GP_CTRL, ctrl);
 
+	/* Make sure no packets are pending */
+	napi_synchronize(&hw->napi);
+
 	sky2_write8(hw, SK_REG(port, GPHY_CTRL), GPC_RST_SET);
 
 	/* Workaround shared GMAC reset */
@@ -1736,8 +1735,6 @@ static int sky2_down(struct net_device *dev)
 	/* turn off LED's */
 	sky2_write16(hw, B0_Y2LED, LED_STAT_OFF);
 
-	synchronize_irq(hw->pdev->irq);
-
 	sky2_tx_clean(dev);
 	sky2_rx_clean(sky2);
 
@@ -2048,9 +2045,6 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu)
 	err = sky2_rx_start(sky2);
 	sky2_write32(hw, B0_IMSK, imask);
 
-	/* Unconditionally re-enable NAPI because even if we
-	 * call dev_close() that will do a napi_disable().
-	 */
 	napi_enable(&hw->napi);
 
 	if (err)
@@ -2915,6 +2909,7 @@ static void sky2_restart(struct work_struct *work)
 	rtnl_lock();
 	sky2_write32(hw, B0_IMSK, 0);
 	sky2_read32(hw, B0_IMSK);
+	napi_disable(&hw->napi);
 
 	for (i = 0; i < hw->ports; i++) {
 		dev = hw->dev[i];
@@ -2924,6 +2919,7 @@ static void sky2_restart(struct work_struct *work)
 
 	sky2_reset(hw);
 	sky2_write32(hw, B0_IMSK, Y2_IS_BASE);
+	napi_enable(&hw->napi);
 
 	for (i = 0; i < hw->ports; i++) {
 		dev = hw->dev[i];
@@ -4191,7 +4187,6 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 		err = -ENOMEM;
 		goto err_out_free_pci;
 	}
-	netif_napi_add(dev, &hw->napi, sky2_poll, NAPI_WEIGHT);
 
 	if (!disable_msi && pci_enable_msi(pdev) == 0) {
 		err = sky2_test_msi(hw);
@@ -4207,6 +4202,8 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 		goto err_out_free_netdev;
 	}
 
+	netif_napi_add(dev, &hw->napi, sky2_poll, NAPI_WEIGHT);
+
 	err = request_irq(pdev->irq, sky2_intr,
 			  (hw->flags & SKY2_HW_USE_MSI) ? 0 : IRQF_SHARED,
 			  dev->name, hw);
@@ -4215,6 +4212,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 		goto err_out_unregister;
 	}
 	sky2_write32(hw, B0_IMSK, Y2_IS_BASE);
+	napi_enable(&hw->napi);
 
 	sky2_show_addr(dev);
 
@@ -4265,23 +4263,18 @@ err_out:
 static void __devexit sky2_remove(struct pci_dev *pdev)
 {
 	struct sky2_hw *hw = pci_get_drvdata(pdev);
-	struct net_device *dev0, *dev1;
+	int i;
 
 	if (!hw)
 		return;
 
 	del_timer_sync(&hw->watchdog_timer);
+	cancel_work_sync(&hw->restart_work);
 
-	flush_scheduled_work();
+	for (i = hw->ports; i >= 0; --i)
+		unregister_netdev(hw->dev[i]);
 
 	sky2_write32(hw, B0_IMSK, 0);
-	synchronize_irq(hw->pdev->irq);
-
-	dev0 = hw->dev[0];
-	dev1 = hw->dev[1];
-	if (dev1)
-		unregister_netdev(dev1);
-	unregister_netdev(dev0);
 
 	sky2_power_aux(hw);
 
@@ -4296,9 +4289,9 @@ static void __devexit sky2_remove(struct pci_dev *pdev)
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 
-	if (dev1)
-		free_netdev(dev1);
-	free_netdev(dev0);
+	for (i = hw->ports; i >= 0; --i)
+		free_netdev(hw->dev[i]);
+
 	iounmap(hw->regs);
 	kfree(hw);
 
@@ -4328,6 +4321,7 @@ static int sky2_suspend(struct pci_dev *pdev, pm_message_t state)
 	}
 
 	sky2_write32(hw, B0_IMSK, 0);
+	napi_disable(&hw->napi);
 	sky2_power_aux(hw);
 
 	pci_save_state(pdev);
@@ -4362,8 +4356,8 @@ static int sky2_resume(struct pci_dev *pdev)
 		pci_write_config_dword(pdev, PCI_DEV_REG3, 0);
 
 	sky2_reset(hw);
-
 	sky2_write32(hw, B0_IMSK, Y2_IS_BASE);
+	napi_enable(&hw->napi);
 
 	for (i = 0; i < hw->ports; i++) {
 		struct net_device *dev = hw->dev[i];
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index fab055ffcc9..571060a3c91 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -46,7 +46,7 @@
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/pci-bridge.h>
 #include <net/checksum.h>
 
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 76e55612430..a7afeea156b 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -34,9 +34,9 @@
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/dma-mapping.h>
+#include <linux/bitops.h>
 
 #include <asm/processor.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/uaccess.h>
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
index 8f198befba3..cb51dc51cce 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
@@ -29,7 +29,7 @@
 #include "bcm43xx_radio.h"
 #include "bcm43xx.h"
 
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 
 static void bcm43xx_led_changestate(struct bcm43xx_led *led)
diff --git a/drivers/net/wireless/hostap/hostap_common.h b/drivers/net/wireless/hostap/hostap_common.h
index ceb7f1e5e9e..517f8984514 100644
--- a/drivers/net/wireless/hostap/hostap_common.h
+++ b/drivers/net/wireless/hostap/hostap_common.h
@@ -4,9 +4,6 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 
-#define BIT(x) (1 << (x))
-
-
 /* IEEE 802.11 defines */
 
 /* Information Element IDs */
diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c
index 40f516d42c5..d8f5efcfcab 100644
--- a/drivers/net/wireless/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/hostap/hostap_ioctl.c
@@ -2920,7 +2920,7 @@ static int prism2_ioctl_priv_monitor(struct net_device *dev, int *i)
 
 	printk(KERN_DEBUG "%s: process %d (%s) used deprecated iwpriv monitor "
 	       "- update software to use iwconfig mode monitor\n",
-	       dev->name, current->pid, current->comm);
+	       dev->name, task_pid_nr(current), current->comm);
 
 	/* Backward compatibility code - this can be removed at some point */
 
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index 2d46a16c094..c144e3cdb89 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -1858,14 +1858,6 @@ static void ipw2100_down(struct ipw2100_priv *priv)
 
 	modify_acceptable_latency("ipw2100", INFINITE_LATENCY);
 
-#ifdef ACPI_CSTATE_LIMIT_DEFINED
-	if (priv->config & CFG_C3_DISABLED) {
-		IPW_DEBUG_INFO(": Resetting C3 transitions.\n");
-		acpi_set_cstate_limit(priv->cstate_limit);
-		priv->config &= ~CFG_C3_DISABLED;
-	}
-#endif
-
 	/* We have to signal any supplicant if we are disassociating */
 	if (associated)
 		wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL);
@@ -2091,14 +2083,6 @@ static void isr_indicate_rf_kill(struct ipw2100_priv *priv, u32 status)
 	/* RF_KILL is now enabled (else we wouldn't be here) */
 	priv->status |= STATUS_RF_KILL_HW;
 
-#ifdef ACPI_CSTATE_LIMIT_DEFINED
-	if (priv->config & CFG_C3_DISABLED) {
-		IPW_DEBUG_INFO(": Resetting C3 transitions.\n");
-		acpi_set_cstate_limit(priv->cstate_limit);
-		priv->config &= ~CFG_C3_DISABLED;
-	}
-#endif
-
 	/* Make sure the RF Kill check timer is running */
 	priv->stop_rf_kill = 0;
 	cancel_delayed_work(&priv->rf_kill);
@@ -2329,23 +2313,10 @@ static void ipw2100_corruption_detected(struct ipw2100_priv *priv, int i)
 	u32 match, reg;
 	int j;
 #endif
-#ifdef ACPI_CSTATE_LIMIT_DEFINED
-	int limit;
-#endif
 
 	IPW_DEBUG_INFO(": PCI latency error detected at 0x%04zX.\n",
 		       i * sizeof(struct ipw2100_status));
 
-#ifdef ACPI_CSTATE_LIMIT_DEFINED
-	IPW_DEBUG_INFO(": Disabling C3 transitions.\n");
-	limit = acpi_get_cstate_limit();
-	if (limit > 2) {
-		priv->cstate_limit = limit;
-		acpi_set_cstate_limit(2);
-		priv->config |= CFG_C3_DISABLED;
-	}
-#endif
-
 #ifdef IPW2100_DEBUG_C3
 	/* Halt the fimrware so we can get a good image */
 	write_register(priv->net_dev, IPW_REG_RESET_REG,
diff --git a/drivers/net/wireless/ipw2100.h b/drivers/net/wireless/ipw2100.h
index de7d384d38a..2b8be2418fa 100644
--- a/drivers/net/wireless/ipw2100.h
+++ b/drivers/net/wireless/ipw2100.h
@@ -479,7 +479,6 @@ enum {
 #define CFG_ASSOCIATE           (1<<6)
 #define CFG_FIXED_RATE          (1<<7)
 #define CFG_ADHOC_CREATE        (1<<8)
-#define CFG_C3_DISABLED         (1<<9)
 #define CFG_PASSIVE_SCAN        (1<<10)
 #ifdef CONFIG_IPW2100_MONITOR
 #define CFG_CRC_CHECK           (1<<11)
@@ -508,7 +507,6 @@ struct ipw2100_priv {
 	u8 bssid[ETH_ALEN];
 	u8 channel;
 	int last_mode;
-	int cstate_limit;
 
 	unsigned long connect_start;
 	unsigned long last_reset;
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index 67d28ee80f2..c5e0d89c3ec 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -22,9 +22,9 @@
 #include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/bitops.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/system.h>
 #include <asm/addrspace.h>
 
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
index b0198549846..d182760f035 100644
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ b/drivers/pcmcia/m8xx_pcmcia.c
@@ -48,9 +48,9 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/fsl_devices.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/system.h>
 #include <asm/time.h>
 #include <asm/mpc8xx.h>
diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c
index 397f4ce849d..87b3493d88e 100644
--- a/drivers/ps3/ps3av.c
+++ b/drivers/ps3/ps3av.c
@@ -729,7 +729,7 @@ static void ps3av_monitor_info_dump(const struct ps3av_pkt_av_get_monitor_info *
 
 static const struct ps3av_monitor_quirk {
 	const char *monitor_name;
-	u32 clear_60, clear_50, clear_vesa;
+	u32 clear_60;
 } ps3av_monitor_quirks[] = {
 	{
 		.monitor_name	= "DELL 2007WFP",
@@ -757,10 +757,6 @@ static void ps3av_fixup_monitor_info(struct ps3av_info_monitor *info)
 				quirk->monitor_name);
 			info->res_60.res_bits &= ~quirk->clear_60;
 			info->res_60.native &= ~quirk->clear_60;
-			info->res_50.res_bits &= ~quirk->clear_50;
-			info->res_50.native &= ~quirk->clear_50;
-			info->res_vesa.res_bits &= ~quirk->clear_vesa;
-			info->res_vesa.native &= ~quirk->clear_vesa;
 			break;
 		}
 	}
diff --git a/drivers/ps3/vuart.c b/drivers/ps3/vuart.c
index bea25a1391e..9dea585ef80 100644
--- a/drivers/ps3/vuart.c
+++ b/drivers/ps3/vuart.c
@@ -22,11 +22,11 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
+#include <linux/bitops.h>
 #include <asm/ps3.h>
 
 #include <asm/firmware.h>
 #include <asm/lv1call.h>
-#include <asm/bitops.h>
 
 #include "vuart.h"
 
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index e4bf68ca96f..2fd49edcc71 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -21,11 +21,11 @@
 #include <linux/interrupt.h>
 #include <linux/string.h>
 #include <linux/pm.h>
+#include <linux/bitops.h>
 
 #include <linux/amba/bus.h>
 
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/hardware.h>
 #include <asm/irq.h>
 #include <asm/rtc.h>
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 0918b787c4d..6f1e9a9804b 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -29,8 +29,8 @@
 #include <linux/interrupt.h>
 #include <linux/string.h>
 #include <linux/pm.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/hardware.h>
 #include <asm/irq.h>
 #include <asm/rtc.h>
diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c
index 16ea828e99f..ef7bc0a125e 100644
--- a/drivers/s390/cio/idset.c
+++ b/drivers/s390/cio/idset.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/slab.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include "idset.h"
 #include "css.h"
 
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 399695f7b1a..3561982749e 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -59,13 +59,13 @@
  *    1.15  Changed for 2.6 Kernel  No longer compiles on 2.4 or lower
  *    1.25  Added Packing support
  */
-#include <asm/bitops.h>
 #include <asm/ccwdev.h>
 #include <asm/ccwgroup.h>
 #include <asm/debug.h>
 #include <asm/idals.h>
 #include <asm/io.h>
 
+#include <linux/bitops.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
diff --git a/drivers/scsi/FlashPoint.c b/drivers/scsi/FlashPoint.c
index a7f916c0c9c..1c9078191d9 100644
--- a/drivers/scsi/FlashPoint.c
+++ b/drivers/scsi/FlashPoint.c
@@ -25,9 +25,6 @@
 
 #define FAILURE         0xFFFFFFFFL
 
-#define BIT(x)          ((unsigned char)(1<<(x)))	/* single-bit mask in bit position x */
-#define BITW(x)          ((unsigned short)(1<<(x)))	/* single-bit mask in bit position x */
-
 struct sccb;
 typedef void (*CALL_BK_FN) (struct sccb *);
 
@@ -374,9 +371,9 @@ typedef struct SCCBscam_info {
 #define  SCAM_ENABLED   BIT(2)
 #define  SCAM_LEVEL2    BIT(3)
 
-#define	RENEGO_ENA		BITW(10)
-#define	CONNIO_ENA		BITW(11)
-#define  GREEN_PC_ENA   BITW(12)
+#define	RENEGO_ENA		BIT(10)
+#define	CONNIO_ENA		BIT(11)
+#define  GREEN_PC_ENA   BIT(12)
 
 #define  AUTO_RATE_00   00
 #define  AUTO_RATE_05   01
@@ -511,23 +508,23 @@ typedef struct SCCBscam_info {
 
 #define  hp_intena		 0x40
 
-#define  RESET		 BITW(7)
-#define  PROG_HLT		 BITW(6)
-#define  PARITY		 BITW(5)
-#define  FIFO		 BITW(4)
-#define  SEL		 BITW(3)
-#define  SCAM_SEL		 BITW(2)
-#define  RSEL		 BITW(1)
-#define  TIMEOUT		 BITW(0)
-#define  BUS_FREE		 BITW(15)
-#define  XFER_CNT_0	 BITW(14)
-#define  PHASE		 BITW(13)
-#define  IUNKWN		 BITW(12)
-#define  ICMD_COMP	 BITW(11)
-#define  ITICKLE		 BITW(10)
-#define  IDO_STRT		 BITW(9)
-#define  ITAR_DISC	 BITW(8)
-#define  AUTO_INT		 (BITW(12)+BITW(11)+BITW(10)+BITW(9)+BITW(8))
+#define  RESET		 BIT(7)
+#define  PROG_HLT		 BIT(6)
+#define  PARITY		 BIT(5)
+#define  FIFO		 BIT(4)
+#define  SEL		 BIT(3)
+#define  SCAM_SEL		 BIT(2)
+#define  RSEL		 BIT(1)
+#define  TIMEOUT		 BIT(0)
+#define  BUS_FREE		 BIT(15)
+#define  XFER_CNT_0	 BIT(14)
+#define  PHASE		 BIT(13)
+#define  IUNKWN		 BIT(12)
+#define  ICMD_COMP	 BIT(11)
+#define  ITICKLE		 BIT(10)
+#define  IDO_STRT		 BIT(9)
+#define  ITAR_DISC	 BIT(8)
+#define  AUTO_INT		 (BIT(12)+BIT(11)+BIT(10)+BIT(9)+BIT(8))
 #define  CLR_ALL_INT	 0xFFFF
 #define  CLR_ALL_INT_1	 0xFF00
 
@@ -674,37 +671,37 @@ typedef struct SCCBscam_info {
 #define  BIOS_DATA_OFFSET     0x60
 #define  BIOS_RELATIVE_CARD   0x64
 
-#define  AR3      (BITW(9) + BITW(8))
-#define  SDATA    BITW(10)
+#define  AR3      (BIT(9) + BIT(8))
+#define  SDATA    BIT(10)
 
-#define  CRD_OP   BITW(11)	/* Cmp Reg. w/ Data */
+#define  CRD_OP   BIT(11)	/* Cmp Reg. w/ Data */
 
-#define  CRR_OP   BITW(12)	/* Cmp Reg. w. Reg. */
+#define  CRR_OP   BIT(12)	/* Cmp Reg. w. Reg. */
 
-#define  CPE_OP   (BITW(14)+BITW(11))	/* Cmp SCSI phs & Branch EQ */
+#define  CPE_OP   (BIT(14)+BIT(11))	/* Cmp SCSI phs & Branch EQ */
 
-#define  CPN_OP   (BITW(14)+BITW(12))	/* Cmp SCSI phs & Branch NOT EQ */
+#define  CPN_OP   (BIT(14)+BIT(12))	/* Cmp SCSI phs & Branch NOT EQ */
 
 #define  ADATA_OUT   0x00
-#define  ADATA_IN    BITW(8)
-#define  ACOMMAND    BITW(10)
-#define  ASTATUS     (BITW(10)+BITW(8))
-#define  AMSG_OUT    (BITW(10)+BITW(9))
-#define  AMSG_IN     (BITW(10)+BITW(9)+BITW(8))
+#define  ADATA_IN    BIT(8)
+#define  ACOMMAND    BIT(10)
+#define  ASTATUS     (BIT(10)+BIT(8))
+#define  AMSG_OUT    (BIT(10)+BIT(9))
+#define  AMSG_IN     (BIT(10)+BIT(9)+BIT(8))
 
-#define  BRH_OP   BITW(13)	/* Branch */
+#define  BRH_OP   BIT(13)	/* Branch */
 
 #define  ALWAYS   0x00
-#define  EQUAL    BITW(8)
-#define  NOT_EQ   BITW(9)
+#define  EQUAL    BIT(8)
+#define  NOT_EQ   BIT(9)
 
-#define  TCB_OP   (BITW(13)+BITW(11))	/* Test condition & branch */
+#define  TCB_OP   (BIT(13)+BIT(11))	/* Test condition & branch */
 
-#define  FIFO_0      BITW(10)
+#define  FIFO_0      BIT(10)
 
-#define  MPM_OP   BITW(15)	/* Match phase and move data */
+#define  MPM_OP   BIT(15)	/* Match phase and move data */
 
-#define  MRR_OP   BITW(14)	/* Move DReg. to Reg. */
+#define  MRR_OP   BIT(14)	/* Move DReg. to Reg. */
 
 #define  S_IDREG  (BIT(2)+BIT(1)+BIT(0))
 
@@ -712,9 +709,9 @@ typedef struct SCCBscam_info {
 #define  D_AR1    BIT(0)
 #define  D_BUCKET (BIT(2) + BIT(1) + BIT(0))
 
-#define  RAT_OP      (BITW(14)+BITW(13)+BITW(11))
+#define  RAT_OP      (BIT(14)+BIT(13)+BIT(11))
 
-#define  SSI_OP      (BITW(15)+BITW(11))
+#define  SSI_OP      (BIT(15)+BIT(11))
 
 #define  SSI_ITAR_DISC	(ITAR_DISC >> 8)
 #define  SSI_IDO_STRT	(IDO_STRT >> 8)
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 30905cebefb..a5763c6e936 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -521,7 +521,7 @@ config SCSI_DPT_I2O
 
 config SCSI_ADVANSYS
 	tristate "AdvanSys SCSI support"
-	depends on SCSI
+	depends on SCSI && VIRT_TO_BUS
 	depends on ISA || EISA || PCI
 	help
 	  This is a driver for all SCSI host adapters manufactured by
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index fa7ba64483f..252d1806467 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -47,9 +47,9 @@
 #include <linux/scatterlist.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #include <scsi/scsi.h>
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 7ef0afc3cd6..5f3a0d7b18d 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -285,7 +285,7 @@ static void sas_discover_domain(struct work_struct *work)
 	dev = port->port_dev;
 
 	SAS_DPRINTK("DOING DISCOVERY on port %d, pid:%d\n", port->id,
-		    current->pid);
+		    task_pid_nr(current));
 
 	switch (dev->dev_type) {
 	case SAS_END_DEV:
@@ -320,7 +320,7 @@ static void sas_discover_domain(struct work_struct *work)
 	}
 
 	SAS_DPRINTK("DONE DISCOVERY on port %d, pid:%d, result:%d\n", port->id,
-		    current->pid, error);
+		    task_pid_nr(current), error);
 }
 
 static void sas_revalidate_domain(struct work_struct *work)
@@ -334,12 +334,12 @@ static void sas_revalidate_domain(struct work_struct *work)
 			&port->disc.pending);
 
 	SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id,
-		    current->pid);
+		    task_pid_nr(current));
 	if (port->port_dev)
 		res = sas_ex_revalidate_domain(port->port_dev);
 
 	SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n",
-		    port->id, current->pid, res);
+		    port->id, task_pid_nr(current), res);
 }
 
 /* ---------- Events ---------- */
diff --git a/drivers/scsi/nsp32.h b/drivers/scsi/nsp32.h
index a976e8193d1..6715ecb3bfc 100644
--- a/drivers/scsi/nsp32.h
+++ b/drivers/scsi/nsp32.h
@@ -69,11 +69,6 @@ typedef u32 u32_le;
 typedef u16 u16_le;
 
 /*
- * MACRO
- */
-#define BIT(x)      (1UL << (x))
-
-/*
  * BASIC Definitions
  */
 #ifndef TRUE
diff --git a/drivers/scsi/pcmcia/nsp_cs.h b/drivers/scsi/pcmcia/nsp_cs.h
index b7f0fa24641..98397559c53 100644
--- a/drivers/scsi/pcmcia/nsp_cs.h
+++ b/drivers/scsi/pcmcia/nsp_cs.h
@@ -24,7 +24,6 @@
 /************************************
  * Some useful macros...
  */
-#define BIT(x)      (1L << (x))
 
 /* SCSI initiator must be ID 7 */
 #define NSP_INITIATOR_ID  7
diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h
index 9bb3d1d2a92..fe415ec8565 100644
--- a/drivers/scsi/qla4xxx/ql4_fw.h
+++ b/drivers/scsi/qla4xxx/ql4_fw.h
@@ -671,7 +671,7 @@ struct continuation_t1_entry {
 #define ET_CONTINUE	ET_CONT_T1
 
 /* Marker entry structure*/
-struct marker_entry {
+struct qla4_marker_entry {
 	struct qla4_header hdr;	/* 00-03 */
 
 	uint32_t system_defined; /* 04-07 */
diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c
index 5006ecb3ef5..e4461b5d767 100644
--- a/drivers/scsi/qla4xxx/ql4_iocb.c
+++ b/drivers/scsi/qla4xxx/ql4_iocb.c
@@ -69,7 +69,7 @@ static int qla4xxx_get_req_pkt(struct scsi_qla_host *ha,
 static int qla4xxx_send_marker_iocb(struct scsi_qla_host *ha,
 				    struct ddb_entry *ddb_entry, int lun)
 {
-	struct marker_entry *marker_entry;
+	struct qla4_marker_entry *marker_entry;
 	unsigned long flags = 0;
 	uint8_t status = QLA_SUCCESS;
 
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 72229df9dc1..40604a09292 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -263,15 +263,15 @@ static unsigned int pl01x_get_mctrl(struct uart_port *port)
 	unsigned int result = 0;
 	unsigned int status = readw(uap->port.membase + UART01x_FR);
 
-#define BIT(uartbit, tiocmbit)		\
+#define TIOCMBIT(uartbit, tiocmbit)	\
 	if (status & uartbit)		\
 		result |= tiocmbit
 
-	BIT(UART01x_FR_DCD, TIOCM_CAR);
-	BIT(UART01x_FR_DSR, TIOCM_DSR);
-	BIT(UART01x_FR_CTS, TIOCM_CTS);
-	BIT(UART011_FR_RI, TIOCM_RNG);
-#undef BIT
+	TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR);
+	TIOCMBIT(UART01x_FR_DSR, TIOCM_DSR);
+	TIOCMBIT(UART01x_FR_CTS, TIOCM_CTS);
+	TIOCMBIT(UART011_FR_RI, TIOCM_RNG);
+#undef TIOCMBIT
 	return result;
 }
 
@@ -282,18 +282,18 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl)
 
 	cr = readw(uap->port.membase + UART011_CR);
 
-#define	BIT(tiocmbit, uartbit)		\
+#define	TIOCMBIT(tiocmbit, uartbit)		\
 	if (mctrl & tiocmbit)		\
 		cr |= uartbit;		\
 	else				\
 		cr &= ~uartbit
 
-	BIT(TIOCM_RTS, UART011_CR_RTS);
-	BIT(TIOCM_DTR, UART011_CR_DTR);
-	BIT(TIOCM_OUT1, UART011_CR_OUT1);
-	BIT(TIOCM_OUT2, UART011_CR_OUT2);
-	BIT(TIOCM_LOOP, UART011_CR_LBE);
-#undef BIT
+	TIOCMBIT(TIOCM_RTS, UART011_CR_RTS);
+	TIOCMBIT(TIOCM_DTR, UART011_CR_DTR);
+	TIOCMBIT(TIOCM_OUT1, UART011_CR_OUT1);
+	TIOCMBIT(TIOCM_OUT2, UART011_CR_OUT2);
+	TIOCMBIT(TIOCM_LOOP, UART011_CR_LBE);
+#undef TIOCMBIT
 
 	writew(cr, uap->port.membase + UART011_CR);
 }
diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index 7e8724d3571..f523cdf4b02 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -442,11 +442,11 @@ static char *serial_version = "$Revision: 1.25 $";
 #include <asm/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
 #include <linux/delay.h>
 
 #include <asm/arch/svinto.h>
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index f013b4012c9..1f4f6d02fe2 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -460,7 +460,7 @@ static int checkintf(struct dev_state *ps, unsigned int ifnum)
 		return 0;
 	/* if not yet claimed, claim it for the driver */
 	dev_warn(&ps->dev->dev, "usbfs: process %d (%s) did not claim interface %u before use\n",
-	       current->pid, current->comm, ifnum);
+	       task_pid_nr(current), current->comm, ifnum);
 	return claimintf(ps, ifnum);
 }
 
diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c
index 73726c570a6..1d174dcb3ac 100644
--- a/drivers/usb/gadget/file_storage.c
+++ b/drivers/usb/gadget/file_storage.c
@@ -4006,7 +4006,7 @@ static int __init fsg_bind(struct usb_gadget *gadget)
 	DBG(fsg, "removable=%d, stall=%d, buflen=%u\n",
 			mod_data.removable, mod_data.can_stall,
 			mod_data.buflen);
-	DBG(fsg, "I/O thread pid: %d\n", fsg->thread_task->pid);
+	DBG(fsg, "I/O thread pid: %d\n", task_pid_nr(fsg->thread_task));
 
 	set_bit(REGISTERED, &fsg->atomic_bitflags);
 
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 9bb2cbfe4a3..5fb8675e0d6 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -62,7 +62,7 @@ struct cfb_info {
 	struct display_switch	*dispsw;
 	struct display		*display;
 	struct pci_dev		*dev;
-	unsigned char 		__iomem *region;
+	unsigned char		__iomem *region;
 	unsigned char		__iomem *regs;
 	u_int			id;
 	int			func_use_count;
@@ -97,11 +97,11 @@ MODULE_PARM_DESC(default_font, "Default font name");
 /*
  * Our access methods.
  */
-#define cyber2000fb_writel(val,reg,cfb)	writel(val, (cfb)->regs + (reg))
-#define cyber2000fb_writew(val,reg,cfb)	writew(val, (cfb)->regs + (reg))
-#define cyber2000fb_writeb(val,reg,cfb)	writeb(val, (cfb)->regs + (reg))
+#define cyber2000fb_writel(val, reg, cfb)	writel(val, (cfb)->regs + (reg))
+#define cyber2000fb_writew(val, reg, cfb)	writew(val, (cfb)->regs + (reg))
+#define cyber2000fb_writeb(val, reg, cfb)	writeb(val, (cfb)->regs + (reg))
 
-#define cyber2000fb_readb(reg,cfb)	readb((cfb)->regs + (reg))
+#define cyber2000fb_readb(reg, cfb)		readb((cfb)->regs + (reg))
 
 static inline void
 cyber2000_crtcw(unsigned int reg, unsigned int val, struct cfb_info *cfb)
@@ -221,12 +221,8 @@ cyber2000fb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 static void
 cyber2000fb_imageblit(struct fb_info *info, const struct fb_image *image)
 {
-//	struct cfb_info *cfb = (struct cfb_info *)info;
-
-//	if (!(cfb->fb.var.accel_flags & FB_ACCELF_TEXT)) {
-		cfb_imageblit(info, image);
-		return;
-//	}
+	cfb_imageblit(info, image);
+	return;
 }
 
 static int cyber2000fb_sync(struct fb_info *info)
@@ -277,12 +273,12 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 
 	/*
 	 * Pseudocolour:
-	 *         8     8
+	 *	   8     8
 	 * pixel --/--+--/-->  red lut  --> red dac
-	 *            |  8
-	 *            +--/--> green lut --> green dac
-	 *            |  8
-	 *            +--/-->  blue lut --> blue dac
+	 *	      |  8
+	 *	      +--/--> green lut --> green dac
+	 *	      |  8
+	 *	      +--/-->  blue lut --> blue dac
 	 */
 	case FB_VISUAL_PSEUDOCOLOR:
 		if (regno >= NR_PALETTE)
@@ -292,9 +288,9 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 		green >>= 8;
 		blue >>= 8;
 
-		cfb->palette[regno].red   = red;
+		cfb->palette[regno].red = red;
 		cfb->palette[regno].green = green;
-		cfb->palette[regno].blue  = blue;
+		cfb->palette[regno].blue = blue;
 
 		cyber2000fb_writeb(regno, 0x3c8, cfb);
 		cyber2000fb_writeb(red, 0x3c9, cfb);
@@ -304,12 +300,12 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 
 	/*
 	 * Direct colour:
-	 *          n     rl
-	 *  pixel --/--+--/-->  red lut  --> red dac
-	 *             |  gl
-	 *             +--/--> green lut --> green dac
-	 *             |  bl
-	 *             +--/-->  blue lut --> blue dac
+	 *	   n     rl
+	 * pixel --/--+--/-->  red lut  --> red dac
+	 *	      |  gl
+	 *	      +--/--> green lut --> green dac
+	 *	      |  bl
+	 *	      +--/-->  blue lut --> blue dac
 	 * n = bpp, rl = red length, gl = green length, bl = blue length
 	 */
 	case FB_VISUAL_DIRECTCOLOR:
@@ -325,9 +321,11 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 			 * to the high 6 bits of the LUT.
 			 */
 			cyber2000fb_writeb(regno << 2, 0x3c8, cfb);
-			cyber2000fb_writeb(cfb->palette[regno >> 1].red, 0x3c9, cfb);
+			cyber2000fb_writeb(cfb->palette[regno >> 1].red,
+					   0x3c9, cfb);
 			cyber2000fb_writeb(green, 0x3c9, cfb);
-			cyber2000fb_writeb(cfb->palette[regno >> 1].blue, 0x3c9, cfb);
+			cyber2000fb_writeb(cfb->palette[regno >> 1].blue,
+					   0x3c9, cfb);
 
 			green = cfb->palette[regno << 3].green;
 
@@ -335,9 +333,9 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 		}
 
 		if (var->green.length >= 5 && regno < 32) {
-			cfb->palette[regno << 3].red   = red;
+			cfb->palette[regno << 3].red = red;
 			cfb->palette[regno << 3].green = green;
-			cfb->palette[regno << 3].blue  = blue;
+			cfb->palette[regno << 3].blue = blue;
 
 			/*
 			 * The 5 bits of each colour component are
@@ -351,9 +349,9 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 		}
 
 		if (var->green.length == 4 && regno < 16) {
-			cfb->palette[regno << 4].red   = red;
+			cfb->palette[regno << 4].red = red;
 			cfb->palette[regno << 4].green = green;
-			cfb->palette[regno << 4].blue  = blue;
+			cfb->palette[regno << 4].blue = blue;
 
 			/*
 			 * The 5 bits of each colour component are
@@ -377,12 +375,12 @@ cyber2000fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 
 	/*
 	 * True colour:
-	 *          n     rl
-	 *  pixel --/--+--/--> red dac
-	 *             |  gl
-	 *             +--/--> green dac
-	 *             |  bl
-	 *             +--/--> blue dac
+	 *	   n     rl
+	 * pixel --/--+--/--> red dac
+	 *	      |  gl
+	 *	      +--/--> green dac
+	 *	      |  bl
+	 *	      +--/--> blue dac
 	 * n = bpp, rl = red length, gl = green length, bl = blue length
 	 */
 	case FB_VISUAL_TRUECOLOR:
@@ -494,9 +492,9 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw)
 
 	/* PLL registers */
 	cyber2000_grphw(EXT_DCLK_MULT, hw->clock_mult, cfb);
-	cyber2000_grphw(EXT_DCLK_DIV,  hw->clock_div, cfb);
+	cyber2000_grphw(EXT_DCLK_DIV, hw->clock_div, cfb);
 	cyber2000_grphw(EXT_MCLK_MULT, cfb->mclk_mult, cfb);
-	cyber2000_grphw(EXT_MCLK_DIV,  cfb->mclk_div, cfb);
+	cyber2000_grphw(EXT_MCLK_DIV, cfb->mclk_div, cfb);
 	cyber2000_grphw(0x90, 0x01, cfb);
 	cyber2000_grphw(0xb9, 0x80, cfb);
 	cyber2000_grphw(0xb9, 0x00, cfb);
@@ -515,8 +513,8 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw)
 	/*
 	 * Set up accelerator registers
 	 */
-	cyber2000fb_writew(hw->width,     CO_REG_SRC_WIDTH,  cfb);
-	cyber2000fb_writew(hw->width,     CO_REG_DEST_WIDTH, cfb);
+	cyber2000fb_writew(hw->width, CO_REG_SRC_WIDTH, cfb);
+	cyber2000fb_writew(hw->width, CO_REG_DEST_WIDTH, cfb);
 	cyber2000fb_writeb(hw->co_pixfmt, CO_REG_PIXFMT, cfb);
 }
 
@@ -549,15 +547,15 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
 {
 	u_int Htotal, Hblankend, Hsyncend;
 	u_int Vtotal, Vdispend, Vblankstart, Vblankend, Vsyncstart, Vsyncend;
-#define BIT(v,b1,m,b2) (((v >> b1) & m) << b2)
+#define ENCODE_BIT(v, b1, m, b2) ((((v) >> (b1)) & (m)) << (b2))
 
 	hw->crtc[13] = hw->pitch;
 	hw->crtc[17] = 0xe3;
 	hw->crtc[14] = 0;
 	hw->crtc[8]  = 0;
 
-	Htotal      = var->xres + var->right_margin +
-		      var->hsync_len + var->left_margin;
+	Htotal     = var->xres + var->right_margin +
+		     var->hsync_len + var->left_margin;
 
 	if (Htotal > 2080)
 		return -EINVAL;
@@ -567,15 +565,15 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
 	hw->crtc[2] = var->xres >> 3;
 	hw->crtc[4] = (var->xres + var->right_margin) >> 3;
 
-	Hblankend   = (Htotal - 4*8) >> 3;
+	Hblankend   = (Htotal - 4 * 8) >> 3;
 
-	hw->crtc[3] = BIT(Hblankend,  0, 0x1f,  0) |
-		      BIT(1,          0, 0x01,  7);
+	hw->crtc[3] = ENCODE_BIT(Hblankend,  0, 0x1f,  0) |
+		      ENCODE_BIT(1,          0, 0x01,  7);
 
 	Hsyncend    = (var->xres + var->right_margin + var->hsync_len) >> 3;
 
-	hw->crtc[5] = BIT(Hsyncend,   0, 0x1f,  0) |
-		      BIT(Hblankend,  5, 0x01,  7);
+	hw->crtc[5] = ENCODE_BIT(Hsyncend,   0, 0x1f,  0) |
+		      ENCODE_BIT(Hblankend,  5, 0x01,  7);
 
 	Vdispend    = var->yres - 1;
 	Vsyncstart  = var->yres + var->lower_margin;
@@ -590,20 +588,20 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
 	Vblankend   = Vtotal - 10;
 
 	hw->crtc[6]  = Vtotal;
-	hw->crtc[7]  = BIT(Vtotal,     8, 0x01,  0) |
-			BIT(Vdispend,   8, 0x01,  1) |
-			BIT(Vsyncstart, 8, 0x01,  2) |
-			BIT(Vblankstart,8, 0x01,  3) |
-			BIT(1,          0, 0x01,  4) |
-	        	BIT(Vtotal,     9, 0x01,  5) |
-			BIT(Vdispend,   9, 0x01,  6) |
-			BIT(Vsyncstart, 9, 0x01,  7);
-	hw->crtc[9]  = BIT(0,          0, 0x1f,  0) |
-		        BIT(Vblankstart,9, 0x01,  5) |
-			BIT(1,          0, 0x01,  6);
+	hw->crtc[7]  = ENCODE_BIT(Vtotal,     8, 0x01,  0) |
+			ENCODE_BIT(Vdispend,   8, 0x01,  1) |
+			ENCODE_BIT(Vsyncstart, 8, 0x01,  2) |
+			ENCODE_BIT(Vblankstart, 8, 0x01,  3) |
+			ENCODE_BIT(1,          0, 0x01,  4) |
+			ENCODE_BIT(Vtotal,     9, 0x01,  5) |
+			ENCODE_BIT(Vdispend,   9, 0x01,  6) |
+			ENCODE_BIT(Vsyncstart, 9, 0x01,  7);
+	hw->crtc[9]  = ENCODE_BIT(0,          0, 0x1f,  0) |
+			ENCODE_BIT(Vblankstart, 9, 0x01,  5) |
+			ENCODE_BIT(1,          0, 0x01,  6);
 	hw->crtc[10] = Vsyncstart;
-	hw->crtc[11] = BIT(Vsyncend,   0, 0x0f,  0) |
-		       BIT(1,          0, 0x01,  7);
+	hw->crtc[11] = ENCODE_BIT(Vsyncend,   0, 0x0f,  0) |
+		       ENCODE_BIT(1,          0, 0x01,  7);
 	hw->crtc[12] = Vdispend;
 	hw->crtc[15] = Vblankstart;
 	hw->crtc[16] = Vblankend;
@@ -615,10 +613,10 @@ cyber2000fb_decode_crtc(struct par_info *hw, struct cfb_info *cfb,
 	 * 4=LINECOMP:10 5-IVIDEO 6=FIXCNT
 	 */
 	hw->crtc_ofl =
-		BIT(Vtotal,     10, 0x01,  0) |
-		BIT(Vdispend,   10, 0x01,  1) |
-		BIT(Vsyncstart, 10, 0x01,  2) |
-		BIT(Vblankstart,10, 0x01,  3) |
+		ENCODE_BIT(Vtotal, 10, 0x01, 0) |
+		ENCODE_BIT(Vdispend, 10, 0x01, 1) |
+		ENCODE_BIT(Vsyncstart, 10, 0x01, 2) |
+		ENCODE_BIT(Vblankstart, 10, 0x01, 3) |
 		EXT_CRT_VRTOFL_LINECOMP10;
 
 	/* woody: set the interlaced bit... */
@@ -750,11 +748,11 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	var->red.msb_right	= 0;
 	var->green.msb_right	= 0;
 	var->blue.msb_right	= 0;
+	var->transp.offset	= 0;
+	var->transp.length	= 0;
 
 	switch (var->bits_per_pixel) {
 	case 8:	/* PSEUDOCOLOUR, 256 */
-		var->transp.offset	= 0;
-		var->transp.length	= 0;
 		var->red.offset		= 0;
 		var->red.length		= 8;
 		var->green.offset	= 0;
@@ -766,8 +764,6 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	case 16:/* DIRECTCOLOUR, 64k or 32k */
 		switch (var->green.length) {
 		case 6: /* RGB565, 64k */
-			var->transp.offset	= 0;
-			var->transp.length	= 0;
 			var->red.offset		= 11;
 			var->red.length		= 5;
 			var->green.offset	= 5;
@@ -778,8 +774,6 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 
 		default:
 		case 5: /* RGB555, 32k */
-			var->transp.offset	= 0;
-			var->transp.length	= 0;
 			var->red.offset		= 10;
 			var->red.length		= 5;
 			var->green.offset	= 5;
@@ -802,8 +796,6 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 		break;
 
 	case 24:/* TRUECOLOUR, 16m */
-		var->transp.offset	= 0;
-		var->transp.length	= 0;
 		var->red.offset		= 16;
 		var->red.length		= 8;
 		var->green.offset	= 8;
@@ -830,7 +822,7 @@ cyber2000fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	mem = var->xres_virtual * var->yres_virtual * (var->bits_per_pixel / 8);
 	if (mem > cfb->fb.fix.smem_len)
 		var->yres_virtual = cfb->fb.fix.smem_len * 8 /
-			(var->bits_per_pixel * var->xres_virtual);
+				    (var->bits_per_pixel * var->xres_virtual);
 
 	if (var->yres > var->yres_virtual)
 		var->yres = var->yres_virtual;
@@ -921,7 +913,7 @@ static int cyber2000fb_set_par(struct fb_info *info)
 		hw.fetch <<= 1;
 	hw.fetch += 1;
 
-	cfb->fb.fix.line_length	= var->xres_virtual * var->bits_per_pixel / 8;
+	cfb->fb.fix.line_length = var->xres_virtual * var->bits_per_pixel / 8;
 
 	/*
 	 * Same here - if the size of the video mode exceeds the
@@ -952,7 +944,6 @@ static int cyber2000fb_set_par(struct fb_info *info)
 	return 0;
 }
 
-
 /*
  *    Pan or Wrap the Display
  */
@@ -1002,15 +993,15 @@ static int cyber2000fb_blank(int blank, struct fb_info *info)
 	switch (blank) {
 	case FB_BLANK_POWERDOWN:	/* powerdown - both sync lines down */
 		sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_0;
-		break;	
+		break;
 	case FB_BLANK_HSYNC_SUSPEND:	/* hsync off */
 		sync = EXT_SYNC_CTL_VS_NORMAL | EXT_SYNC_CTL_HS_0;
-		break;	
+		break;
 	case FB_BLANK_VSYNC_SUSPEND:	/* vsync off */
 		sync = EXT_SYNC_CTL_VS_0 | EXT_SYNC_CTL_HS_NORMAL;
 		break;
-	case FB_BLANK_NORMAL:	        /* soft blank */
-	default: /* unblank */
+	case FB_BLANK_NORMAL:		/* soft blank */
+	default:			/* unblank */
 		break;
 	}
 
@@ -1018,7 +1009,8 @@ static int cyber2000fb_blank(int blank, struct fb_info *info)
 
 	if (blank <= 1) {
 		/* turn on ramdacs */
-		cfb->ramdac_powerdown &= ~(RAMDAC_DACPWRDN | RAMDAC_BYPASS | RAMDAC_RAMPWRDN);
+		cfb->ramdac_powerdown &= ~(RAMDAC_DACPWRDN | RAMDAC_BYPASS |
+					   RAMDAC_RAMPWRDN);
 		cyber2000fb_write_ramdac_ctrl(cfb);
 	}
 
@@ -1043,7 +1035,8 @@ static int cyber2000fb_blank(int blank, struct fb_info *info)
 
 	if (blank >= 2) {
 		/* turn off ramdacs */
-		cfb->ramdac_powerdown |= RAMDAC_DACPWRDN | RAMDAC_BYPASS | RAMDAC_RAMPWRDN;
+		cfb->ramdac_powerdown |= RAMDAC_DACPWRDN | RAMDAC_BYPASS |
+					 RAMDAC_RAMPWRDN;
 		cyber2000fb_write_ramdac_ctrl(cfb);
 	}
 
@@ -1068,7 +1061,7 @@ static struct fb_ops cyber2000fb_ops = {
  * of this driver.  It is here solely at the moment to support the other
  * CyberPro modules external to this driver.
  */
-static struct cfb_info		*int_cfb_info;
+static struct cfb_info *int_cfb_info;
 
 /*
  * Enable access to the extended registers
@@ -1085,6 +1078,7 @@ void cyber2000fb_enable_extregs(struct cfb_info *cfb)
 		cyber2000_grphw(EXT_FUNC_CTL, old, cfb);
 	}
 }
+EXPORT_SYMBOL(cyber2000fb_enable_extregs);
 
 /*
  * Disable access to the extended registers
@@ -1104,11 +1098,13 @@ void cyber2000fb_disable_extregs(struct cfb_info *cfb)
 	else
 		cfb->func_use_count -= 1;
 }
+EXPORT_SYMBOL(cyber2000fb_disable_extregs);
 
 void cyber2000fb_get_fb_var(struct cfb_info *cfb, struct fb_var_screeninfo *var)
 {
 	memcpy(var, &cfb->fb.var, sizeof(struct fb_var_screeninfo));
 }
+EXPORT_SYMBOL(cyber2000fb_get_fb_var);
 
 /*
  * Attach a capture/tv driver to the core CyberX0X0 driver.
@@ -1122,13 +1118,15 @@ int cyber2000fb_attach(struct cyberpro_info *info, int idx)
 		info->fb_size	      = int_cfb_info->fb.fix.smem_len;
 		info->enable_extregs  = cyber2000fb_enable_extregs;
 		info->disable_extregs = cyber2000fb_disable_extregs;
-		info->info            = int_cfb_info;
+		info->info	      = int_cfb_info;
 
-		strlcpy(info->dev_name, int_cfb_info->fb.fix.id, sizeof(info->dev_name));
+		strlcpy(info->dev_name, int_cfb_info->fb.fix.id,
+			sizeof(info->dev_name));
 	}
 
 	return int_cfb_info != NULL;
 }
+EXPORT_SYMBOL(cyber2000fb_attach);
 
 /*
  * Detach a capture/tv driver from the core CyberX0X0 driver.
@@ -1136,12 +1134,7 @@ int cyber2000fb_attach(struct cyberpro_info *info, int idx)
 void cyber2000fb_detach(int idx)
 {
 }
-
-EXPORT_SYMBOL(cyber2000fb_attach);
 EXPORT_SYMBOL(cyber2000fb_detach);
-EXPORT_SYMBOL(cyber2000fb_enable_extregs);
-EXPORT_SYMBOL(cyber2000fb_disable_extregs);
-EXPORT_SYMBOL(cyber2000fb_get_fb_var);
 
 /*
  * These parameters give
@@ -1205,7 +1198,7 @@ static void cyberpro_init_hw(struct cfb_info *cfb)
 	int i;
 
 	for (i = 0; i < sizeof(igs_regs); i += 2)
-		cyber2000_grphw(igs_regs[i], igs_regs[i+1], cfb);
+		cyber2000_grphw(igs_regs[i], igs_regs[i + 1], cfb);
 
 	if (cfb->id == ID_CYBERPRO_5000) {
 		unsigned char val;
@@ -1215,8 +1208,8 @@ static void cyberpro_init_hw(struct cfb_info *cfb)
 	}
 }
 
-static struct cfb_info * __devinit
-cyberpro_alloc_fb_info(unsigned int id, char *name)
+static struct cfb_info __devinit *cyberpro_alloc_fb_info(unsigned int id,
+							 char *name)
 {
 	struct cfb_info *cfb;
 
@@ -1228,9 +1221,9 @@ cyberpro_alloc_fb_info(unsigned int id, char *name)
 	cfb->id			= id;
 
 	if (id == ID_CYBERPRO_5000)
-		cfb->ref_ps	= 40690; // 24.576 MHz
+		cfb->ref_ps	= 40690; /* 24.576 MHz */
 	else
-		cfb->ref_ps	= 69842; // 14.31818 MHz (69841?)
+		cfb->ref_ps	= 69842; /* 14.31818 MHz (69841?) */
 
 	cfb->divisors[0]	= 1;
 	cfb->divisors[1]	= 2;
@@ -1282,8 +1275,7 @@ cyberpro_alloc_fb_info(unsigned int id, char *name)
 	return cfb;
 }
 
-static void
-cyberpro_free_fb_info(struct cfb_info *cfb)
+static void cyberpro_free_fb_info(struct cfb_info *cfb)
 {
 	if (cfb) {
 		/*
@@ -1300,8 +1292,7 @@ cyberpro_free_fb_info(struct cfb_info *cfb)
  *  video=cyber2000:font:fontname
  */
 #ifndef MODULE
-static int
-cyber2000fb_setup(char *options)
+static int cyber2000fb_setup(char *options)
 {
 	char *opt;
 
@@ -1315,7 +1306,8 @@ cyber2000fb_setup(char *options)
 		if (strncmp(opt, "font:", 5) == 0) {
 			static char default_font_storage[40];
 
-			strlcpy(default_font_storage, opt + 5, sizeof(default_font_storage));
+			strlcpy(default_font_storage, opt + 5,
+				sizeof(default_font_storage));
 			default_font = default_font_storage;
 			continue;
 		}
@@ -1354,10 +1346,18 @@ static int __devinit cyberpro_common_probe(struct cfb_info *cfb)
 	 * Determine the size of the memory.
 	 */
 	switch (cfb->mem_ctl2 & MEM_CTL2_SIZE_MASK) {
-	case MEM_CTL2_SIZE_4MB:	smem_size = 0x00400000; break;
-	case MEM_CTL2_SIZE_2MB:	smem_size = 0x00200000; break;
-	case MEM_CTL2_SIZE_1MB: smem_size = 0x00100000; break;
-	default:		smem_size = 0x00100000; break;
+	case MEM_CTL2_SIZE_4MB:
+		smem_size = 0x00400000;
+		break;
+	case MEM_CTL2_SIZE_2MB:
+		smem_size = 0x00200000;
+		break;
+	case MEM_CTL2_SIZE_1MB:
+		smem_size = 0x00100000;
+		break;
+	default:
+		smem_size = 0x00100000;
+		break;
 	}
 
 	cfb->fb.fix.smem_len   = smem_size;
@@ -1366,8 +1366,8 @@ static int __devinit cyberpro_common_probe(struct cfb_info *cfb)
 
 	err = -EINVAL;
 	if (!fb_find_mode(&cfb->fb.var, &cfb->fb, NULL, NULL, 0,
-	    		  &cyber2000fb_default_mode, 8)) {
-		printk("%s: no valid mode found\n", cfb->fb.fix.id);
+			  &cyber2000fb_default_mode, 8)) {
+		printk(KERN_ERR "%s: no valid mode found\n", cfb->fb.fix.id);
 		goto failed;
 	}
 
@@ -1377,7 +1377,7 @@ static int __devinit cyberpro_common_probe(struct cfb_info *cfb)
 	if (cfb->fb.var.yres_virtual < cfb->fb.var.yres)
 		cfb->fb.var.yres_virtual = cfb->fb.var.yres;
 
-//	fb_set_var(&cfb->fb.var, -1, &cfb->fb);
+/*	fb_set_var(&cfb->fb.var, -1, &cfb->fb); */
 
 	/*
 	 * Calculate the hsync and vsync frequencies.  Note that
@@ -1425,20 +1425,20 @@ static void cyberpro_common_resume(struct cfb_info *cfb)
 
 #include <asm/arch/hardware.h>
 
-static int __devinit
-cyberpro_vl_probe(void)
+static int __devinit cyberpro_vl_probe(void)
 {
 	struct cfb_info *cfb;
 	int err = -ENOMEM;
 
-	if (!request_mem_region(FB_START,FB_SIZE,"CyberPro2010")) return err;
+	if (!request_mem_region(FB_START, FB_SIZE, "CyberPro2010"))
+		return err;
 
 	cfb = cyberpro_alloc_fb_info(ID_CYBERPRO_2010, "CyberPro2010");
 	if (!cfb)
 		goto failed_release;
 
 	cfb->dev = NULL;
-	cfb->region = ioremap(FB_START,FB_SIZE);
+	cfb->region = ioremap(FB_START, FB_SIZE);
 	if (!cfb->region)
 		goto failed_ioremap;
 
@@ -1475,7 +1475,7 @@ failed:
 failed_ioremap:
 	cyberpro_free_fb_info(cfb);
 failed_release:
-	release_mem_region(FB_START,FB_SIZE);
+	release_mem_region(FB_START, FB_SIZE);
 
 	return err;
 }
@@ -1538,7 +1538,8 @@ static int cyberpro_pci_enable_mmio(struct cfb_info *cfb)
 	 * Allow the CyberPro to accept PCI burst accesses
 	 */
 	if (cfb->id == ID_CYBERPRO_2010) {
-		printk(KERN_INFO "%s: NOT enabling PCI bursts\n", cfb->fb.fix.id);
+		printk(KERN_INFO "%s: NOT enabling PCI bursts\n",
+		       cfb->fb.fix.id);
 	} else {
 		val = cyber2000_grphr(EXT_BUS_CTL, cfb);
 		if (!(val & EXT_BUS_CTL_PCIBURST_WRITE)) {
@@ -1688,9 +1689,10 @@ static int cyberpro_pci_resume(struct pci_dev *dev)
 }
 
 static struct pci_device_id cyberpro_pci_table[] = {
-//	Not yet
-//	{ PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_1682,
-//		PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_IGA_1682 },
+/*	Not yet
+ *	{ PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_1682,
+ *		PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_IGA_1682 },
+ */
 	{ PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2000,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0, ID_CYBERPRO_2000 },
 	{ PCI_VENDOR_ID_INTERG, PCI_DEVICE_ID_INTERG_2010,
@@ -1700,7 +1702,7 @@ static struct pci_device_id cyberpro_pci_table[] = {
 	{ 0, }
 };
 
-MODULE_DEVICE_TABLE(pci,cyberpro_pci_table);
+MODULE_DEVICE_TABLE(pci, cyberpro_pci_table);
 
 static struct pci_driver cyberpro_driver = {
 	.name		= "CyberPro",
diff --git a/drivers/video/geode/video_gx.c b/drivers/video/geode/video_gx.c
index 7f3f18d0671..febf09c6349 100644
--- a/drivers/video/geode/video_gx.c
+++ b/drivers/video/geode/video_gx.c
@@ -127,7 +127,7 @@ static void gx_set_dclk_frequency(struct fb_info *info)
 	int timeout = 1000;
 
 	/* Rev. 1 Geode GXs use a 14 MHz reference clock instead of 48 MHz. */
-	if (cpu_data->x86_mask == 1) {
+	if (cpu_data(0).x86_mask == 1) {
 		pll_table = gx_pll_table_14MHz;
 		pll_table_len = ARRAY_SIZE(gx_pll_table_14MHz);
 	} else {
diff --git a/drivers/video/pnx4008/sdum.h b/drivers/video/pnx4008/sdum.h
index e8c5dcdd881..189c3d64138 100644
--- a/drivers/video/pnx4008/sdum.h
+++ b/drivers/video/pnx4008/sdum.h
@@ -77,9 +77,6 @@
 #define CONF_DIRTYDETECTION_OFF	(0x600)
 #define CONF_DIRTYDETECTION_ON	(0x601)
 
-/* Set the corresponding bit. */
-#define BIT(n) (0x1U << (n))
-
 struct dumchannel_uf {
 	int channelnr;
 	u32 *dirty;
diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c
index 38bd3737259..a684b1e8737 100644
--- a/drivers/watchdog/at91rm9200_wdt.c
+++ b/drivers/watchdog/at91rm9200_wdt.c
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/init.h>
@@ -19,7 +20,6 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 #include <asm/arch/at91_st.h>
 
diff --git a/drivers/watchdog/ks8695_wdt.c b/drivers/watchdog/ks8695_wdt.c
index 7150fb945ea..e3a29c30230 100644
--- a/drivers/watchdog/ks8695_wdt.c
+++ b/drivers/watchdog/ks8695_wdt.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/init.h>
@@ -18,7 +19,6 @@
 #include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/arch/regs-timer.h>
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index 719b066f73c..635ca454f56 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -39,11 +39,11 @@
 #include <linux/platform_device.h>
 #include <linux/moduleparam.h>
 #include <linux/clk.h>
+#include <linux/bitops.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
 #include <asm/hardware.h>
-#include <asm/bitops.h>
 
 #include <asm/arch/prcm.h>
 
diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c
index 3475f47aaa4..34a2b3b8180 100644
--- a/drivers/watchdog/sa1100_wdt.c
+++ b/drivers/watchdog/sa1100_wdt.c
@@ -25,13 +25,13 @@
 #include <linux/miscdevice.h>
 #include <linux/watchdog.h>
 #include <linux/init.h>
+#include <linux/bitops.h>
 
 #ifdef CONFIG_ARCH_PXA
 #include <asm/arch/pxa-regs.h>
 #endif
 
 #include <asm/hardware.h>
-#include <asm/bitops.h>
 #include <asm/uaccess.h>
 
 #define OSCR_FREQ		CLOCK_TICK_RATE
diff --git a/fs/Kconfig b/fs/Kconfig
index e31f3691b15..cc28a69246a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -220,7 +220,7 @@ config JBD
 
 config JBD_DEBUG
 	bool "JBD (ext3) debugging support"
-	depends on JBD
+	depends on JBD && DEBUG_FS
 	help
 	  If you are using the ext3 journaled file system (or potentially any
 	  other file system/device using JBD), this option allows you to
@@ -229,10 +229,10 @@ config JBD_DEBUG
 	  debugging output will be turned off.
 
 	  If you select Y here, then you will be able to turn on debugging
-	  with "echo N > /proc/sys/fs/jbd-debug", where N is a number between
-	  1 and 5, the higher the number, the more debugging output is
-	  generated.  To turn debugging off again, do
-	  "echo 0 > /proc/sys/fs/jbd-debug".
+	  with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a
+	  number between 1 and 5, the higher the number, the more debugging
+	  output is generated.  To turn debugging off again, do
+	  "echo 0 > /sys/kernel/debug/jbd/jbd-debug".
 
 config JBD2
 	tristate
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index e7204d71acc..45f5992a095 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -80,7 +80,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = task_pgrp_nr(current);
 
 	*minproto = *maxproto = AUTOFS_PROTO_VERSION;
 
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index c1489533277..5efff3c0d88 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -214,8 +214,8 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr
 
 	oz_mode = autofs_oz_mode(sbi);
 	DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, "
-				"oz_mode = %d\n", pid_nr(task_pid(current)),
-				process_group(current), sbi->catatonic,
+				"oz_mode = %d\n", task_pid_nr(current),
+				task_pgrp_nr(current), sbi->catatonic,
 				oz_mode));
 
 	/*
@@ -536,7 +536,7 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
 	struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
 	void __user *argp = (void __user *)arg;
 
-	DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,process_group(current)));
+	DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current)));
 
 	if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d85f42fa920..2d4ae40718d 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -131,7 +131,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
    filesystem without "magic".) */
 
 static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || process_group(current) == sbi->oz_pgrp;
+	return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
 }
 
 /* Does a dentry have some pending activity? */
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index cd81f083667..7f05d6ccdb1 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -226,7 +226,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
 
 	*uid = current->uid;
 	*gid = current->gid;
-	*pgrp = process_group(current);
+	*pgrp = task_pgrp_nr(current);
 
 	*minproto = AUTOFS_MIN_PROTO_VERSION;
 	*maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -323,7 +323,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->pipe = NULL;
 	sbi->catatonic = 1;
 	sbi->exp_timeout = 0;
-	sbi->oz_pgrp = process_group(current);
+	sbi->oz_pgrp = task_pgrp_nr(current);
 	sbi->sb = s;
 	sbi->version = 0;
 	sbi->sub_version = 0;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 45ff3d63b75..2bbcc8151dc 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -582,7 +582,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
 	oz_mode = autofs4_oz_mode(sbi);
 
 	DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
-		 current->pid, process_group(current), sbi->catatonic, oz_mode);
+		 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
 
 	unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name);
 	if (!unhashed) {
@@ -976,7 +976,7 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
 	void __user *p = (void __user *)arg;
 
 	DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u",
-		cmd,arg,sbi,process_group(current));
+		cmd,arg,sbi,task_pgrp_nr(current));
 
 	if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
 	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6e2f3b8dde7..ba8de7ca260 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1383,10 +1383,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
 	prstatus->pr_sigpend = p->pending.signal.sig[0];
 	prstatus->pr_sighold = p->blocked.sig[0];
-	prstatus->pr_pid = p->pid;
-	prstatus->pr_ppid = p->parent->pid;
-	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = process_session(p);
+	prstatus->pr_pid = task_pid_vnr(p);
+	prstatus->pr_ppid = task_pid_vnr(p->parent);
+	prstatus->pr_pgrp = task_pgrp_vnr(p);
+	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1429,10 +1429,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 			psinfo->pr_psargs[i] = ' ';
 	psinfo->pr_psargs[len] = 0;
 
-	psinfo->pr_pid = p->pid;
-	psinfo->pr_ppid = p->parent->pid;
-	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = process_session(p);
+	psinfo->pr_pid = task_pid_vnr(p);
+	psinfo->pr_ppid = task_pid_vnr(p->parent);
+	psinfo->pr_pgrp = task_pgrp_vnr(p);
+	psinfo->pr_sid = task_session_vnr(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 033861c6b8f..32649f2a165 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1342,10 +1342,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
 	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
 	prstatus->pr_sigpend = p->pending.signal.sig[0];
 	prstatus->pr_sighold = p->blocked.sig[0];
-	prstatus->pr_pid = p->pid;
-	prstatus->pr_ppid = p->parent->pid;
-	prstatus->pr_pgrp = process_group(p);
-	prstatus->pr_sid = process_session(p);
+	prstatus->pr_pid = task_pid_vnr(p);
+	prstatus->pr_ppid = task_pid_vnr(p->parent);
+	prstatus->pr_pgrp = task_pgrp_vnr(p);
+	prstatus->pr_sid = task_session_vnr(p);
 	if (thread_group_leader(p)) {
 		/*
 		 * This is the record for the group leader.  Add in the
@@ -1391,10 +1391,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 			psinfo->pr_psargs[i] = ' ';
 	psinfo->pr_psargs[len] = 0;
 
-	psinfo->pr_pid = p->pid;
-	psinfo->pr_ppid = p->parent->pid;
-	psinfo->pr_pgrp = process_group(p);
-	psinfo->pr_sid = process_session(p);
+	psinfo->pr_pid = task_pid_vnr(p);
+	psinfo->pr_ppid = task_pid_vnr(p->parent);
+	psinfo->pr_pgrp = task_pgrp_vnr(p);
+	psinfo->pr_sid = task_session_vnr(p);
 
 	i = p->state ? ffz(~p->state) + 1 : 0;
 	psinfo->pr_state = i;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index bed6215c079..3d419163c3d 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,19 @@
+Version 1.51
+------------
+Fix memory leak in statfs when mounted to very old servers (e.g.
+Windows 9x).  Add new feature "POSIX open" which allows servers
+which support the current POSIX Extensions to provide better semantics
+(e.g. delete for open files opened with posix open).  Take into
+account umask on posix mkdir not just older style mkdir.  Add
+ability to mount to IPC$ share (which allows CIFS named pipes to be
+opened, read and written as if they were files).  When 1st tree
+connect fails (e.g. due to signing negotiation failure) fix
+leak that causes cifsd not to stop and rmmod to fail to cleanup
+cifs_request_buffers pool. Fix problem with POSIX Open/Mkdir on
+bigendian architectures. Fix possible memory corruption when
+EAGAIN returned on kern_recvmsg. Return better error if server
+requires packet signing but client has disabled it.
+
 Version 1.50
 ------------
 Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is
@@ -6,7 +22,10 @@ done with "serverino" mount option).  Add support for POSIX Unlink
 Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix"
 mount option to allow disabling the CIFS Unix Extensions for just
 that mount. Fix hang on spinlock in find_writable_file (race when
-reopening file after session crash).
+reopening file after session crash).  Byte range unlock request to
+windows server could unlock more bytes (on server copy of file)
+than intended if start of unlock request is well before start of
+a previous byte range lock that we issued.
 
 Version 1.49
 ------------
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 6ecd9d6ba3f..ff6ba8d823f 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -3,4 +3,4 @@
 #
 obj-$(CONFIG_CIFS) += cifs.o
 
-cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o export.o
+cifs-objs := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o fcntl.o readdir.o ioctl.o sess.o export.o cifsacl.o
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index f50a88d58f7..2a01f3ef96a 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -385,10 +385,9 @@ asn1_oid_decode(struct asn1_ctx *ctx,
 	unsigned long *optr;
 
 	size = eoc - ctx->pointer + 1;
-	*oid = kmalloc(size * sizeof (unsigned long), GFP_ATOMIC);
-	if (*oid == NULL) {
+	*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
+	if (*oid == NULL)
 		return 0;
-	}
 
 	optr = *oid;
 
@@ -581,9 +580,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
 			return 0;
 		} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
 			   || (tag != ASN1_SEQ)) {
-			cFYI(1,
-			     ("Exit 6 cls = %d con = %d tag = %d end = %p (%d)",
-			      cls, con, tag, end, *end));
+			cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
+				cls, con, tag, end, *end));
 		}
 
 		if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 1bf8cf522ad..73c4c419663 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -209,13 +209,16 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
 		i++;
 		tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
 		dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
-		length =
-		    sprintf(buf,
-			    "\n%d) %s Uses: %d Type: %s DevInfo: 0x%x "
-			    "Attributes: 0x%x\nPathComponentMax: %d Status: %d",
-			    i, tcon->treeName,
-			    atomic_read(&tcon->useCount),
-			    tcon->nativeFileSystem,
+		length = sprintf(buf, "\n%d) %s Uses: %d ", i,
+				 tcon->treeName, atomic_read(&tcon->useCount));
+		buf += length;
+		if (tcon->nativeFileSystem) {
+			length = sprintf(buf, "Type: %s ",
+					 tcon->nativeFileSystem);
+			buf += length;
+		}
+		length = sprintf(buf, "DevInfo: 0x%x Attributes: 0x%x"
+				 "\nPathComponentMax: %d Status: %d",
 			    le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
 			    le32_to_cpu(tcon->fsAttrInfo.Attributes),
 			    le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
@@ -876,11 +879,16 @@ security_flags_write(struct file *file, const char __user *buffer,
 	if (count < 3) {
 		/* single char or single char followed by null */
 		c = flags_string[0];
-		if (c == '0' || c == 'n' || c == 'N')
+		if (c == '0' || c == 'n' || c == 'N') {
 			extended_security = CIFSSEC_DEF; /* default */
-		else if (c == '1' || c == 'y' || c == 'Y')
+			return count;
+		} else if (c == '1' || c == 'y' || c == 'Y') {
 			extended_security = CIFSSEC_MAX;
-		return count;
+			return count;
+		} else if (!isdigit(c)) {
+			cERROR(1, ("invalid flag %c", c));
+			return -EINVAL;
+		}
 	}
 	/* else we have a number */
 
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
new file mode 100644
index 00000000000..e8e56353f5a
--- /dev/null
+++ b/fs/cifs/cifsacl.c
@@ -0,0 +1,333 @@
+/*
+ *   fs/cifs/cifsacl.c
+ *
+ *   Copyright (C) International Business Machines  Corp., 2007
+ *   Author(s): Steve French (sfrench@us.ibm.com)
+ *
+ *   Contains the routines for mapping CIFS/NTFS ACLs
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Lesser General Public License
+ *   along with this library; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/fs.h>
+#include "cifspdu.h"
+#include "cifsglob.h"
+#include "cifsacl.h"
+#include "cifsproto.h"
+#include "cifs_debug.h"
+
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+
+static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
+	{{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
+	{{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
+	{{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
+	{{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
+	{{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"},
+	{{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"},
+	{{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"}
+};
+
+
+/* security id for everyone */
+static const struct cifs_sid sid_everyone =
+		{1, 1, {0, 0, 0, 0, 0, 0}, {} };
+/* group users */
+static const struct cifs_sid sid_user =
+		{1, 2 , {0, 0, 0, 0, 0, 5}, {} };
+
+
+int match_sid(struct cifs_sid *ctsid)
+{
+	int i, j;
+	int num_subauth, num_sat, num_saw;
+	struct cifs_sid *cwsid;
+
+	if (!ctsid)
+		return (-1);
+
+	for (i = 0; i < NUM_WK_SIDS; ++i) {
+		cwsid = &(wksidarr[i].cifssid);
+
+		/* compare the revision */
+		if (ctsid->revision != cwsid->revision)
+			continue;
+
+		/* compare all of the six auth values */
+		for (j = 0; j < 6; ++j) {
+			if (ctsid->authority[j] != cwsid->authority[j])
+				break;
+		}
+		if (j < 6)
+			continue; /* all of the auth values did not match */
+
+		/* compare all of the subauth values if any */
+		num_sat = ctsid->num_subauth;
+		num_saw = cwsid->num_subauth;
+		num_subauth = num_sat < num_saw ? num_sat : num_saw;
+		if (num_subauth) {
+			for (j = 0; j < num_subauth; ++j) {
+				if (ctsid->sub_auth[j] != cwsid->sub_auth[j])
+					break;
+			}
+			if (j < num_subauth)
+				continue; /* all sub_auth values do not match */
+		}
+
+		cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname));
+		return (0); /* sids compare/match */
+	}
+
+	cFYI(1, ("No matching sid"));
+	return (-1);
+}
+
+/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
+   the same returns 1, if they do not match returns 0 */
+int compare_sids(struct cifs_sid *ctsid, struct cifs_sid *cwsid)
+{
+	int i;
+	int num_subauth, num_sat, num_saw;
+
+	if ((!ctsid) || (!cwsid))
+		return (0);
+
+	/* compare the revision */
+	if (ctsid->revision != cwsid->revision)
+		return (0);
+
+	/* compare all of the six auth values */
+	for (i = 0; i < 6; ++i) {
+		if (ctsid->authority[i] != cwsid->authority[i])
+			return (0);
+	}
+
+	/* compare all of the subauth values if any */
+	num_sat = ctsid->num_subauth;
+	num_saw = cwsid->num_subauth;
+	num_subauth = num_sat < num_saw ? num_sat : num_saw;
+	if (num_subauth) {
+		for (i = 0; i < num_subauth; ++i) {
+			if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
+				return (0);
+		}
+	}
+
+	return (1); /* sids compare/match */
+}
+
+
+static void parse_ace(struct cifs_ace *pace, char *end_of_acl)
+{
+	int num_subauth;
+
+	/* validate that we do not go past end of acl */
+
+	/* XXX this if statement can be removed
+	if (end_of_acl < (char *)pace + sizeof(struct cifs_ace)) {
+		cERROR(1, ("ACL too small to parse ACE"));
+		return;
+	} */
+
+	num_subauth = pace->num_subauth;
+	if (num_subauth) {
+#ifdef CONFIG_CIFS_DEBUG2
+		int i;
+		cFYI(1, ("ACE revision %d num_subauth %d",
+			pace->revision, pace->num_subauth));
+		for (i = 0; i < num_subauth; ++i) {
+			cFYI(1, ("ACE sub_auth[%d]: 0x%x", i,
+				le32_to_cpu(pace->sub_auth[i])));
+		}
+
+		/* BB add length check to make sure that we do not have huge
+			num auths and therefore go off the end */
+
+		cFYI(1, ("RID %d", le32_to_cpu(pace->sub_auth[num_subauth-1])));
+#endif
+	}
+
+	return;
+}
+
+static void parse_ntace(struct cifs_ntace *pntace, char *end_of_acl)
+{
+	/* validate that we do not go past end of acl */
+	if (end_of_acl < (char *)pntace + sizeof(struct cifs_ntace)) {
+		cERROR(1, ("ACL too small to parse NT ACE"));
+		return;
+	}
+
+#ifdef CONFIG_CIFS_DEBUG2
+	cFYI(1, ("NTACE type %d flags 0x%x size %d, access Req 0x%x",
+		pntace->type, pntace->flags, pntace->size,
+		pntace->access_req));
+#endif
+	return;
+}
+
+
+
+static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
+		       struct cifs_sid *pownersid, struct cifs_sid *pgrpsid)
+{
+	int i;
+	int num_aces = 0;
+	int acl_size;
+	char *acl_base;
+	struct cifs_ntace **ppntace;
+	struct cifs_ace **ppace;
+
+	/* BB need to add parm so we can store the SID BB */
+
+	/* validate that we do not go past end of acl */
+	if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size)) {
+		cERROR(1, ("ACL too small to parse DACL"));
+		return;
+	}
+
+#ifdef CONFIG_CIFS_DEBUG2
+	cFYI(1, ("DACL revision %d size %d num aces %d",
+		le16_to_cpu(pdacl->revision), le16_to_cpu(pdacl->size),
+		le32_to_cpu(pdacl->num_aces)));
+#endif
+
+	acl_base = (char *)pdacl;
+	acl_size = sizeof(struct cifs_acl);
+
+	num_aces = le32_to_cpu(pdacl->num_aces);
+	if (num_aces  > 0) {
+		ppntace = kmalloc(num_aces * sizeof(struct cifs_ntace *),
+				GFP_KERNEL);
+		ppace = kmalloc(num_aces * sizeof(struct cifs_ace *),
+				GFP_KERNEL);
+
+/*		cifscred->cecount = pdacl->num_aces;
+		cifscred->ntaces = kmalloc(num_aces *
+			sizeof(struct cifs_ntace *), GFP_KERNEL);
+		cifscred->aces = kmalloc(num_aces *
+			sizeof(struct cifs_ace *), GFP_KERNEL);*/
+
+		for (i = 0; i < num_aces; ++i) {
+			ppntace[i] = (struct cifs_ntace *)
+					(acl_base + acl_size);
+			ppace[i] = (struct cifs_ace *) ((char *)ppntace[i] +
+					sizeof(struct cifs_ntace));
+
+			parse_ntace(ppntace[i], end_of_acl);
+			if (end_of_acl < ((char *)ppace[i] +
+					(le16_to_cpu(ppntace[i]->size) -
+					sizeof(struct cifs_ntace)))) {
+				cERROR(1, ("ACL too small to parse ACE"));
+				break;
+			} else
+				parse_ace(ppace[i], end_of_acl);
+
+/*			memcpy((void *)(&(cifscred->ntaces[i])),
+				(void *)ppntace[i],
+				sizeof(struct cifs_ntace));
+			memcpy((void *)(&(cifscred->aces[i])),
+				(void *)ppace[i],
+				sizeof(struct cifs_ace)); */
+
+			acl_base = (char *)ppntace[i];
+			acl_size = le16_to_cpu(ppntace[i]->size);
+		}
+
+		kfree(ppace);
+		kfree(ppntace);
+	}
+
+	return;
+}
+
+
+static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
+{
+
+	/* BB need to add parm so we can store the SID BB */
+
+	/* validate that we do not go past end of acl */
+	if (end_of_acl < (char *)psid + sizeof(struct cifs_sid)) {
+		cERROR(1, ("ACL too small to parse SID"));
+		return -EINVAL;
+	}
+
+	if (psid->num_subauth) {
+#ifdef CONFIG_CIFS_DEBUG2
+		int i;
+		cFYI(1, ("SID revision %d num_auth %d First subauth 0x%x",
+			psid->revision, psid->num_subauth, psid->sub_auth[0]));
+
+		for (i = 0; i < psid->num_subauth; i++) {
+			cFYI(1, ("SID sub_auth[%d]: 0x%x ", i,
+				le32_to_cpu(psid->sub_auth[i])));
+		}
+
+		/* BB add length check to make sure that we do not have huge
+			num auths and therefore go off the end */
+		cFYI(1, ("RID 0x%x",
+			le32_to_cpu(psid->sub_auth[psid->num_subauth-1])));
+#endif
+	}
+
+	return 0;
+}
+
+
+/* Convert CIFS ACL to POSIX form */
+int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len)
+{
+	int rc;
+	struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
+	struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
+	char *end_of_acl = ((char *)pntsd) + acl_len;
+
+	owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+				le32_to_cpu(pntsd->osidoffset));
+	group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+				le32_to_cpu(pntsd->gsidoffset));
+	dacl_ptr = (struct cifs_acl *)((char *)pntsd +
+				le32_to_cpu(pntsd->dacloffset));
+#ifdef CONFIG_CIFS_DEBUG2
+	cFYI(1, ("revision %d type 0x%x ooffset 0x%x goffset 0x%x "
+		 "sacloffset 0x%x dacloffset 0x%x",
+		 pntsd->revision, pntsd->type, le32_to_cpu(pntsd->osidoffset),
+		 le32_to_cpu(pntsd->gsidoffset),
+		 le32_to_cpu(pntsd->sacloffset),
+		 le32_to_cpu(pntsd->dacloffset)));
+#endif
+	rc = parse_sid(owner_sid_ptr, end_of_acl);
+	if (rc)
+		return rc;
+
+	rc = parse_sid(group_sid_ptr, end_of_acl);
+	if (rc)
+		return rc;
+
+	parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, group_sid_ptr);
+
+/*	cifscred->uid = owner_sid_ptr->rid;
+	cifscred->gid = group_sid_ptr->rid;
+	memcpy((void *)(&(cifscred->osid)), (void *)owner_sid_ptr,
+			sizeof (struct cifs_sid));
+	memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
+			sizeof (struct cifs_sid)); */
+
+
+	return (0);
+}
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 5eff35d6e56..420f8781364 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/cifsacl.h
  *
- *   Copyright (c) International Business Machines  Corp., 2005
+ *   Copyright (c) International Business Machines  Corp., 2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -22,17 +22,65 @@
 #ifndef _CIFSACL_H
 #define _CIFSACL_H
 
+
+#define NUM_AUTHS 6 /* number of authority fields */
+#define NUM_SUBAUTHS 5 /* number of sub authority fields */
+#define NUM_WK_SIDS 7 /* number of well known sids */
+#define SIDNAMELENGTH 20 /* long enough for the ones we care about */
+
+#define READ_BIT        0x4
+#define WRITE_BIT       0x2
+#define EXEC_BIT        0x1
+
+#define UBITSHIFT	6
+#define GBITSHIFT	3
+
+struct cifs_ntsd {
+	__le16 revision; /* revision level */
+	__le16 type;
+	__le32 osidoffset;
+	__le32 gsidoffset;
+	__le32 sacloffset;
+	__le32 dacloffset;
+} __attribute__((packed));
+
 struct cifs_sid {
 	__u8 revision; /* revision level */
-	__u8 num_subauths;
+	__u8 num_subauth;
+	__u8 authority[6];
+	__le32 sub_auth[5]; /* sub_auth[num_subauth] */ /* BB FIXME endianness BB */
+} __attribute__((packed));
+
+struct cifs_acl {
+	__le16 revision; /* revision level */
+	__le16 size;
+	__le32 num_aces;
+} __attribute__((packed));
+
+struct cifs_ntace { /* first part of ACE which contains perms */
+	__u8 type;
+	__u8 flags;
+	__le16 size;
+	__le32 access_req;
+} __attribute__((packed));
+
+struct cifs_ace { /* last part of ACE which includes user info */
+	__u8 revision; /* revision level */
+	__u8 num_subauth;
 	__u8 authority[6];
-	__u32 sub_auth[4];
-	/* next sub_auth if any ... */
+	__le32 sub_auth[5];
 } __attribute__((packed));
 
-/* everyone */
-/* extern const struct cifs_sid sid_everyone;*/
-/* group users */
-/* extern const struct cifs_sid sid_user;*/
+struct cifs_wksid {
+	struct cifs_sid cifssid;
+	char sidname[SIDNAMELENGTH];
+} __attribute__((packed));
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+
+extern int match_sid(struct cifs_sid *);
+extern int compare_sids(struct cifs_sid *, struct cifs_sid *);
+
+#endif /*  CONFIG_CIFS_EXPERIMENTAL */
 
 #endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 36272293027..632070b4275 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -345,7 +345,7 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 	user = kmalloc(2 + (len * 2), GFP_KERNEL);
 	if (user == NULL)
 		goto calc_exit_2;
-	len = cifs_strtoUCS(user, ses->userName, len, nls_cp);
+	len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
 	UniStrupr(user);
 	hmac_md5_update((char *)user, 2*len, pctxt);
 
@@ -356,7 +356,8 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
 		domain = kmalloc(2 + (len * 2), GFP_KERNEL);
 		if (domain == NULL)
 			goto calc_exit_1;
-		len = cifs_strtoUCS(domain, ses->domainName, len, nls_cp);
+		len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
+					nls_cp);
 		/* the following line was removed since it didn't work well
 		   with lower cased domain name that passed as an option.
 		   Maybe converting the domain name earlier makes sense */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ba8f7868cb2..a6fbea57c4b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -49,10 +49,6 @@
 static struct quotactl_ops cifs_quotactl_ops;
 #endif /* QUOTA */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-extern struct export_operations cifs_export_ops;
-#endif /* EXPERIMENTAL */
-
 int cifsFYI = 0;
 int cifsERROR = 1;
 int traceSMB = 0;
@@ -240,9 +236,9 @@ static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd)
 
 	cifs_sb = CIFS_SB(inode->i_sb);
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
 		return 0;
-	} else /* file mode might have been restricted at mount time
+	else /* file mode might have been restricted at mount time
 		on the client (above and beyond ACL on servers) for
 		servers which do not support setting and viewing mode bits,
 		so allowing client to check permissions is useful */
@@ -312,15 +308,15 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 					seq_printf(s, ",domain=%s",
 					   cifs_sb->tcon->ses->domainName);
 			}
+			if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
+			   !(cifs_sb->tcon->unix_ext))
+				seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
+			if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
+			   !(cifs_sb->tcon->unix_ext))
+				seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
 		}
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
 			seq_printf(s, ",posixpaths");
-		if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) ||
-		   !(cifs_sb->tcon->unix_ext))
-			seq_printf(s, ",uid=%d", cifs_sb->mnt_uid);
-		if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) ||
-		   !(cifs_sb->tcon->unix_ext))
-			seq_printf(s, ",gid=%d", cifs_sb->mnt_gid);
 		seq_printf(s, ",rsize=%d", cifs_sb->rsize);
 		seq_printf(s, ",wsize=%d", cifs_sb->wsize);
 	}
@@ -346,7 +342,7 @@ int cifs_xquota_set(struct super_block *sb, int quota_type, qid_t qid,
 	if (pTcon) {
 		cFYI(1, ("set type: 0x%x id: %d", quota_type, qid));
 	} else {
-		return -EIO;
+		rc = -EIO;
 	}
 
 	FreeXid(xid);
@@ -716,7 +712,7 @@ static int
 cifs_init_inodecache(void)
 {
 	cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
-					      sizeof (struct cifsInodeInfo),
+					      sizeof(struct cifsInodeInfo),
 					      0, (SLAB_RECLAIM_ACCOUNT|
 						SLAB_MEM_SPREAD),
 					      cifs_init_once);
@@ -816,8 +812,8 @@ static int
 cifs_init_mids(void)
 {
 	cifs_mid_cachep = kmem_cache_create("cifs_mpx_ids",
-				sizeof (struct mid_q_entry), 0,
-				SLAB_HWCACHE_ALIGN, NULL);
+					    sizeof(struct mid_q_entry), 0,
+					    SLAB_HWCACHE_ALIGN, NULL);
 	if (cifs_mid_cachep == NULL)
 		return -ENOMEM;
 
@@ -829,8 +825,8 @@ cifs_init_mids(void)
 	}
 
 	cifs_oplock_cachep = kmem_cache_create("cifs_oplock_structs",
-				sizeof (struct oplock_q_entry), 0,
-				SLAB_HWCACHE_ALIGN, NULL);
+					sizeof(struct oplock_q_entry), 0,
+					SLAB_HWCACHE_ALIGN, NULL);
 	if (cifs_oplock_cachep == NULL) {
 		mempool_destroy(cifs_mid_poolp);
 		kmem_cache_destroy(cifs_mid_cachep);
@@ -882,7 +878,8 @@ static int cifs_oplock_thread(void *dummyarg)
 				the call */
 				/* mutex_lock(&inode->i_mutex);*/
 				if (S_ISREG(inode->i_mode)) {
-					rc = filemap_fdatawrite(inode->i_mapping);
+					rc =
+					   filemap_fdatawrite(inode->i_mapping);
 					if (CIFS_I(inode)->clientCanCacheRead
 									 == 0) {
 						filemap_fdatawait(inode->i_mapping);
@@ -907,8 +904,7 @@ static int cifs_oplock_thread(void *dummyarg)
 					    0 /* len */ , 0 /* offset */, 0,
 					    0, LOCKING_ANDX_OPLOCK_RELEASE,
 					    0 /* wait flag */);
-					cFYI(1, 
-					      ("Oplock release rc = %d ", rc));
+					cFYI(1, ("Oplock release rc = %d", rc));
 				}
 			} else
 				spin_unlock(&GlobalMid_Lock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a20de77a385..0a3ee5a322b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -1,7 +1,7 @@
 /*
  *   fs/cifs/cifsfs.h
  *
- *   Copyright (c) International Business Machines  Corp., 2002, 2005
+ *   Copyright (c) International Business Machines  Corp., 2002, 2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -99,7 +99,12 @@ extern int 	cifs_setxattr(struct dentry *, const char *, const void *,
 			size_t, int);
 extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
-extern int cifs_ioctl (struct inode *inode, struct file *filep,
+extern int cifs_ioctl(struct inode *inode, struct file *filep,
 		       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.50"
+
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+extern struct export_operations cifs_export_ops;
+#endif /* EXPERIMENTAL */
+
+#define CIFS_VERSION   "1.51"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b98742fc3b5..87f51f23276 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -19,6 +19,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include "cifs_fs_sb.h"
+#include "cifsacl.h"
 /*
  * The sizes of various internal tables and strings
  */
@@ -89,7 +90,8 @@ enum statusEnum {
 };
 
 enum securityEnum {
-	LANMAN = 0,             /* Legacy LANMAN auth */
+	PLAINTXT = 0, 		/* Legacy with Plaintext passwords */
+	LANMAN,			/* Legacy LANMAN auth */
 	NTLM,			/* Legacy NTLM012 auth with NTLM hash */
 	NTLMv2,			/* Legacy NTLM auth with NTLMv2 hash */
 	RawNTLMSSP,		/* NTLMSSP without SPNEGO */
@@ -115,6 +117,17 @@ struct mac_key {
 	} data;
 };
 
+struct cifs_cred {
+	int uid;
+	int gid;
+	int mode;
+	int cecount;
+	struct cifs_sid osid;
+	struct cifs_sid gsid;
+	struct cifs_ntace *ntaces;
+	struct cifs_ace *aces;
+};
+
 /*
  *****************************************************************
  * Except the CIFS PDUs themselves all the
@@ -279,6 +292,7 @@ struct cifsTconInfo {
 	FILE_SYSTEM_DEVICE_INFO fsDevInfo;
 	FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
 	FILE_SYSTEM_UNIX_INFO fsUnixInfo;
+	unsigned ipc:1;		/* set if connection to IPC$ eg for RPC/PIPES */
 	unsigned retry:1;
 	unsigned nocase:1;
 	unsigned unix_ext:1; /* if off disable Linux extensions to CIFS protocol
@@ -329,6 +343,7 @@ struct cifsFileInfo {
 	struct list_head llist; /* list of byte range locks we have. */
 	unsigned closePend:1;	/* file is marked to close */
 	unsigned invalidHandle:1;  /* file closed via session abend */
+	unsigned messageMode:1;    /* for pipes: message vs byte mode */
 	atomic_t wrtPending;   /* handle in use - defer close */
 	struct semaphore fh_sem; /* prevents reopen race after dead ses*/
 	char *search_resume_name; /* BB removeme BB */
@@ -464,6 +479,9 @@ struct dir_notify_req {
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 #define   CIFSSEC_MAY_LANMAN	0x00010
 #define   CIFSSEC_MAY_PLNTXT	0x00020
+#else
+#define   CIFSSEC_MAY_LANMAN    0
+#define   CIFSSEC_MAY_PLNTXT    0
 #endif /* weak passwords */
 #define   CIFSSEC_MAY_SEAL	0x00040 /* not supported yet */
 
@@ -477,14 +495,23 @@ require use of the stronger protocol */
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 #define   CIFSSEC_MUST_LANMAN	0x10010
 #define   CIFSSEC_MUST_PLNTXT	0x20020
+#ifdef CONFIG_CIFS_UPCALL
+#define   CIFSSEC_MASK          0x3F03F /* allows weak security but also krb5 */
+#else
 #define   CIFSSEC_MASK          0x37037 /* current flags supported if weak */
+#endif /* UPCALL */
+#else /* do not allow weak pw hash */
+#ifdef CONFIG_CIFS_UPCALL
+#define   CIFSSEC_MASK          0x0F00F /* flags supported if no weak allowed */
 #else
-#define	  CIFSSEC_MASK          0x07007 /* flags supported if no weak config */
+#define	  CIFSSEC_MASK          0x07007 /* flags supported if no weak allowed */
+#endif /* UPCALL */
 #endif /* WEAK_PW_HASH */
 #define   CIFSSEC_MUST_SEAL	0x40040 /* not supported yet */
 
 #define   CIFSSEC_DEF  CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2
 #define   CIFSSEC_MAX  CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2
+#define   CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5)
 /*
  *****************************************************************
  * All constants go here
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 6a2056e58ce..c41ff74e912 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -215,6 +215,12 @@
 					 /* file_execute, file_read_attributes*/
 					 /* write_dac, and delete.           */
 
+#define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES)
+#define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
+				| FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES)
+#define FILE_EXEC_RIGHTS (FILE_EXECUTE)
+
+
 /*
  * Invalid readdir handle
  */
@@ -360,10 +366,10 @@ struct smb_hdr {
 	__u8 WordCount;
 } __attribute__((packed));
 /* given a pointer to an smb_hdr retrieve the value of byte count */
-#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) )
-#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) ) )
+#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
+#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
 /* given a pointer to an smb_hdr retrieve the pointer to the byte area */
-#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2* smb_var->WordCount) + 2 )
+#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2)
 
 /*
  * Computer Name Length (since Netbios name was length 16 with last byte 0x20)
@@ -716,6 +722,14 @@ typedef struct smb_com_findclose_req {
 #define REQ_OPENDIRONLY    0x00000008
 #define REQ_EXTENDED_INFO  0x00000010
 
+/* File type */
+#define DISK_TYPE		0x0000
+#define BYTE_PIPE_TYPE		0x0001
+#define MESSAGE_PIPE_TYPE	0x0002
+#define PRINTER_TYPE		0x0003
+#define COMM_DEV_TYPE		0x0004
+#define UNKNOWN_TYPE		0xFFFF
+
 typedef struct smb_com_open_req {	/* also handles create */
 	struct smb_hdr hdr;	/* wct = 24 */
 	__u8 AndXCommand;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 04a69dafedb..1a883663b22 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -50,7 +50,8 @@ extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *,
 			int * /* bytes returned */ , const int long_op);
 extern int SendReceive2(const unsigned int /* xid */ , struct cifsSesInfo *,
 			struct kvec *, int /* nvec to send */,
-			int * /* type of buf returned */ , const int long_op);
+			int * /* type of buf returned */ , const int long_op,
+			const int logError /* whether to log status code*/ );
 extern int SendReceiveBlockingLock(const unsigned int /* xid */ ,
 					struct cifsTconInfo *,
 				struct smb_hdr * /* input */ ,
@@ -65,7 +66,7 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			enum securityEnum *secType);
 extern int cifs_inet_pton(int, char *source, void *dst);
-extern int map_smb_to_linux_error(struct smb_hdr *smb);
+extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr);
 extern void header_assemble(struct smb_hdr *, char /* command */ ,
 			    const struct cifsTconInfo *, int /* length of
 			    fixed section (word count) in two byte units */);
@@ -304,12 +305,13 @@ extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn,
 				 const char *pass);
 extern int CalcNTLMv2_partial_mac_key(struct cifsSesInfo *,
 			const struct nls_table *);
-extern void CalcNTLMv2_response(const struct cifsSesInfo *, char * );
+extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
 extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
 			     const struct nls_table *);
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key);
 #endif /* CIFS_WEAK_PW_HASH */
+extern int parse_sec_desc(struct cifs_ntsd *, int);
 extern int CIFSSMBCopy(int xid,
 			struct cifsTconInfo *source_tcon,
 			const char *fromName,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 8eb102f940d..f0d9a485d09 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -34,10 +34,10 @@
 #include <asm/uaccess.h>
 #include "cifspdu.h"
 #include "cifsglob.h"
+#include "cifsacl.h"
 #include "cifsproto.h"
 #include "cifs_unicode.h"
 #include "cifs_debug.h"
-#include "cifsacl.h"
 
 #ifdef CONFIG_CIFS_POSIX
 static struct {
@@ -94,9 +94,8 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
 	write_lock(&GlobalSMBSeslock);
 	list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
 		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
-		if (open_file) {
+		if (open_file)
 			open_file->invalidHandle = TRUE;
-		}
 	}
 	write_unlock(&GlobalSMBSeslock);
 	/* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -439,8 +438,13 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 
 	pSMB->hdr.Mid = GetNextMid(server);
 	pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS);
+
 	if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5)
 		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
+	else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_KRB5) {
+		cFYI(1, ("Kerberos only mechanism, enable extended security"));
+		pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
+	}
 
 	count = 0;
 	for (i = 0; i < CIFS_NUM_PROT; i++) {
@@ -513,7 +517,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 				(int)ts.tv_sec, (int)utc.tv_sec,
 				(int)(utc.tv_sec - ts.tv_sec)));
 			val = (int)(utc.tv_sec - ts.tv_sec);
-			seconds = val < 0 ? -val : val;
+			seconds = abs(val);
 			result = (seconds / MIN_TZ_ADJ) * MIN_TZ_ADJ;
 			remain = seconds % MIN_TZ_ADJ;
 			if (remain >= (MIN_TZ_ADJ / 2))
@@ -574,7 +578,20 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 		server->secType = NTLM;
 	else if (secFlags & CIFSSEC_MAY_NTLMV2)
 		server->secType = NTLMv2;
-	/* else krb5 ... any others ... */
+	else if (secFlags & CIFSSEC_MAY_KRB5)
+		server->secType = Kerberos;
+	else if (secFlags & CIFSSEC_MAY_LANMAN)
+		server->secType = LANMAN;
+/* #ifdef CONFIG_CIFS_EXPERIMENTAL
+	else if (secFlags & CIFSSEC_MAY_PLNTXT)
+		server->secType = ??
+#endif */
+	else {
+		rc = -EOPNOTSUPP;
+		cERROR(1, ("Invalid security type"));
+		goto neg_err_exit;
+	}
+	/* else ... any others ...? */
 
 	/* one byte, so no need to convert this or EncryptionKeyLen from
 	   little endian */
@@ -604,22 +621,26 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
 		(server->capabilities & CAP_EXTENDED_SECURITY)) {
 		count = pSMBr->ByteCount;
-		if (count < 16)
+		if (count < 16) {
 			rc = -EIO;
-		else if (count == 16) {
-			server->secType = RawNTLMSSP;
-			if (server->socketUseCount.counter > 1) {
-				if (memcmp(server->server_GUID,
-					   pSMBr->u.extended_response.
-					   GUID, 16) != 0) {
-					cFYI(1, ("server UID changed"));
-					memcpy(server->server_GUID,
-						pSMBr->u.extended_response.GUID,
-						16);
-				}
-			} else
+			goto neg_err_exit;
+		}
+
+		if (server->socketUseCount.counter > 1) {
+			if (memcmp(server->server_GUID,
+				   pSMBr->u.extended_response.
+				   GUID, 16) != 0) {
+				cFYI(1, ("server UID changed"));
 				memcpy(server->server_GUID,
-				       pSMBr->u.extended_response.GUID, 16);
+					pSMBr->u.extended_response.GUID,
+					16);
+			}
+		} else
+			memcpy(server->server_GUID,
+			       pSMBr->u.extended_response.GUID, 16);
+
+		if (count == 16) {
+			server->secType = RawNTLMSSP;
 		} else {
 			rc = decode_negTokenInit(pSMBr->u.extended_response.
 						 SecurityBlob,
@@ -642,10 +663,12 @@ signing_check:
 		/* MUST_SIGN already includes the MAY_SIGN FLAG
 		   so if this is zero it means that signing is disabled */
 		cFYI(1, ("Signing disabled"));
-		if (server->secMode & SECMODE_SIGN_REQUIRED)
+		if (server->secMode & SECMODE_SIGN_REQUIRED) {
 			cERROR(1, ("Server requires "
-				   "/proc/fs/cifs/PacketSigningEnabled "
-				   "to be on"));
+				   "packet signing to be enabled in "
+				   "/proc/fs/cifs/SecurityFlags."));
+			rc = -EOPNOTSUPP;
+		}
 		server->secMode &=
 			~(SECMODE_SIGN_ENABLED | SECMODE_SIGN_REQUIRED);
 	} else if ((secFlags & CIFSSEC_MUST_SIGN) == CIFSSEC_MUST_SIGN) {
@@ -1052,7 +1075,7 @@ PsxCreat:
 				InformationLevel) - 4;
 	offset = param_offset + params;
 	pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset);
-	pdata->Level = SMB_QUERY_FILE_UNIX_BASIC;
+	pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
 	pdata->Permissions = cpu_to_le64(mode);
 	pdata->PosixOpenFlags = cpu_to_le32(posix_flags);
 	pdata->OpenFlags =  cpu_to_le32(*pOplock);
@@ -1098,8 +1121,8 @@ PsxCreat:
 	if (cpu_to_le32(FILE_CREATE) == psx_rsp->CreateAction)
 		*pOplock |= CIFS_CREATE_ACTION;
 	/* check to make sure response data is there */
-	if (psx_rsp->ReturnedLevel != SMB_QUERY_FILE_UNIX_BASIC) {
-		pRetData->Type = -1; /* unknown */
+	if (psx_rsp->ReturnedLevel != cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC)) {
+		pRetData->Type = cpu_to_le32(-1); /* unknown */
 #ifdef CONFIG_CIFS_DEBUG2
 		cFYI(1, ("unknown type"));
 #endif
@@ -1107,12 +1130,12 @@ PsxCreat:
 		if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP)
 					+ sizeof(FILE_UNIX_BASIC_INFO)) {
 			cERROR(1, ("Open response data too small"));
-			pRetData->Type = -1;
+			pRetData->Type = cpu_to_le32(-1);
 			goto psx_create_err;
 		}
 		memcpy((char *) pRetData,
 			(char *)psx_rsp + sizeof(OPEN_PSX_RSP),
-			sizeof (FILE_UNIX_BASIC_INFO));
+			sizeof(FILE_UNIX_BASIC_INFO));
 	}
 
 psx_create_err:
@@ -1193,9 +1216,9 @@ OldOpenRetry:
 	}
 	if (*pOplock & REQ_OPLOCK)
 		pSMB->OpenFlags = cpu_to_le16(REQ_OPLOCK);
-	else if (*pOplock & REQ_BATCHOPLOCK) {
+	else if (*pOplock & REQ_BATCHOPLOCK)
 		pSMB->OpenFlags = cpu_to_le16(REQ_BATCHOPLOCK);
-	}
+
 	pSMB->OpenFlags |= cpu_to_le16(REQ_MORE_INFO);
 	/* BB fixme add conversion for access_flags to bits 0 - 2 of mode */
 	/* 0 = read
@@ -1310,9 +1333,8 @@ openRetry:
 	}
 	if (*pOplock & REQ_OPLOCK)
 		pSMB->OpenFlags = cpu_to_le32(REQ_OPLOCK);
-	else if (*pOplock & REQ_BATCHOPLOCK) {
+	else if (*pOplock & REQ_BATCHOPLOCK)
 		pSMB->OpenFlags = cpu_to_le32(REQ_BATCHOPLOCK);
-	}
 	pSMB->DesiredAccess = cpu_to_le32(access_flags);
 	pSMB->AllocationSize = 0;
 	/* set file as system file if special file such
@@ -1424,9 +1446,8 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
 
 	iov[0].iov_base = (char *)pSMB;
 	iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
-	rc = SendReceive2(xid, tcon->ses, iov,
-			  1 /* num iovecs */,
-			  &resp_buf_type, 0);
+	rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
+			 &resp_buf_type, 0 /* not long op */, 1 /* log err */ );
 	cifs_stats_inc(&tcon->num_reads);
 	pSMBr = (READ_RSP *)iov[0].iov_base;
 	if (rc) {
@@ -1446,11 +1467,11 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
 			*nbytes = 0;
 		} else {
 			pReadData = (char *) (&pSMBr->hdr.Protocol) +
-			    le16_to_cpu(pSMBr->DataOffset);
-/*                      if (rc = copy_to_user(buf, pReadData, data_length)) {
+					le16_to_cpu(pSMBr->DataOffset);
+/*			if (rc = copy_to_user(buf, pReadData, data_length)) {
 				cERROR(1,("Faulting on read rc = %d",rc));
 				rc = -EFAULT;
-                        }*/ /* can not use copy_to_user when using page cache*/
+			}*/ /* can not use copy_to_user when using page cache*/
 			if (*buf)
 				memcpy(*buf, pReadData, data_length);
 		}
@@ -1645,7 +1666,7 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
 
 
 	rc = SendReceive2(xid, tcon->ses, iov, n_vec + 1, &resp_buf_type,
-			  long_op);
+			  long_op, 0 /* do not log STATUS code */ );
 	cifs_stats_inc(&tcon->num_writes);
 	if (rc) {
 		cFYI(1, ("Send error Write2 = %d", rc));
@@ -2538,7 +2559,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
 		cFYI(1, ("data starts after end of smb"));
 		return -EINVAL;
 	} else if (data_count + *ppdata > end_of_smb) {
-		cFYI(1,("data %p + count %d (%p) ends after end of smb %p start %p",
+		cFYI(1, ("data %p + count %d (%p) ends after end of smb %p start %p",
 			*ppdata, data_count, (data_count + *ppdata),
 			end_of_smb, pSMBr));
 		return -EINVAL;
@@ -2615,7 +2636,7 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
 					reparse_buf->TargetNameOffset +
 					reparse_buf->TargetNameLen) >
 						end_of_smb) {
-					cFYI(1,("reparse buf goes beyond SMB"));
+					cFYI(1, ("reparse buf beyond SMB"));
 					rc = -EIO;
 					goto qreparse_out;
 				}
@@ -3042,25 +3063,12 @@ GetExtAttrOut:
 
 #endif /* CONFIG_POSIX */
 
-
-/* security id for everyone */
-static const struct cifs_sid sid_everyone =
-		{1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}};
-/* group users */
-static const struct cifs_sid sid_user =
-		{1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}};
-
-/* Convert CIFS ACL to POSIX form */
-static int parse_sec_desc(struct cifs_sid *psec_desc, int acl_len)
-{
-	return 0;
-}
-
+#ifdef CONFIG_CIFS_EXPERIMENTAL
 /* Get Security Descriptor (by handle) from remote server for a file or dir */
 int
 CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
 		/* BB fix up return info */ char *acl_inf, const int buflen,
-		  const int acl_type /* ACCESS/DEFAULT not sure implication */)
+		  const int acl_type)
 {
 	int rc = 0;
 	int buf_type = 0;
@@ -3085,12 +3093,13 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
 	iov[0].iov_base = (char *)pSMB;
 	iov[0].iov_len = pSMB->hdr.smb_buf_length + 4;
 
-	rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 0);
+	rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
+			 0 /* not long op */, 0 /* do not log STATUS codes */ );
 	cifs_stats_inc(&tcon->num_acl_get);
 	if (rc) {
 		cFYI(1, ("Send error in QuerySecDesc = %d", rc));
 	} else {                /* decode response */
-		struct cifs_sid *psec_desc;
+		struct cifs_ntsd *psec_desc;
 		__le32 * parm;
 		int parm_len;
 		int data_len;
@@ -3105,8 +3114,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
 			goto qsec_out;
 		pSMBr = (struct smb_com_ntransact_rsp *)iov[0].iov_base;
 
-		cERROR(1, ("smb %p parm %p data %p",
-			  pSMBr, parm, psec_desc));  /* BB removeme BB */
+		cFYI(1, ("smb %p parm %p data %p", pSMBr, parm, psec_desc));
 
 		if (le32_to_cpu(pSMBr->ParameterCount) != 4) {
 			rc = -EIO;      /* bad smb */
@@ -3115,7 +3123,7 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
 
 /* BB check that data area is minimum length and as big as acl_len */
 
-		acl_len = le32_to_cpu(*(__le32 *)parm);
+		acl_len = le32_to_cpu(*parm);
 		/* BB check if (acl_len > bufsize) */
 
 		parse_sec_desc(psec_desc, acl_len);
@@ -3128,6 +3136,7 @@ qsec_out:
 /*	cifs_small_buf_release(pSMB); */ /* Freed earlier now in SendReceive2 */
 	return rc;
 }
+#endif /* CONFIG_CIFS_EXPERIMENTAL */
 
 /* Legacy Query Path Information call for lookup to old servers such
    as Win9x/WinME */
@@ -3363,6 +3372,9 @@ UnixQPathInfoRetry:
 		rc = validate_t2((struct smb_t2_rsp *)pSMBr);
 
 		if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) {
+			cERROR(1, ("Malformed FILE_UNIX_BASIC_INFO response.\n"
+				   "Unix Extensions can be disabled on mount "
+				   "by specifying the nosfu mount option."));
 			rc = -EIO;	/* bad smb */
 		} else {
 			__u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3883,12 +3895,10 @@ getDFSRetry:
 	pSMB->hdr.Mid = GetNextMid(ses->server);
 	pSMB->hdr.Tid = ses->ipc_tid;
 	pSMB->hdr.Uid = ses->Suid;
-	if (ses->capabilities & CAP_STATUS32) {
+	if (ses->capabilities & CAP_STATUS32)
 		pSMB->hdr.Flags2 |= SMBFLG2_ERR_STATUS;
-	}
-	if (ses->capabilities & CAP_DFS) {
+	if (ses->capabilities & CAP_DFS)
 		pSMB->hdr.Flags2 |= SMBFLG2_DFS;
-	}
 
 	if (ses->capabilities & CAP_UNICODE) {
 		pSMB->hdr.Flags2 |= SMBFLG2_UNICODE;
@@ -4060,10 +4070,6 @@ oldQFSInfoRetry:
 		(void **) &pSMBr);
 	if (rc)
 		return rc;
-	rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB,
-		      (void **) &pSMBr);
-	if (rc)
-		return rc;
 
 	params = 2;     /* level */
 	pSMB->TotalDataCount = 0;
@@ -4265,7 +4271,7 @@ QFSAttributeRetry:
 			     *) (((char *) &pSMBr->hdr.Protocol) +
 				 data_offset);
 			memcpy(&tcon->fsAttrInfo, response_data,
-			       sizeof (FILE_SYSTEM_ATTRIBUTE_INFO));
+			       sizeof(FILE_SYSTEM_ATTRIBUTE_INFO));
 		}
 	}
 	cifs_buf_release(pSMB);
@@ -4334,7 +4340,7 @@ QFSDeviceRetry:
 				(((char *) &pSMBr->hdr.Protocol) +
 				 data_offset);
 			memcpy(&tcon->fsDevInfo, response_data,
-			       sizeof (FILE_SYSTEM_DEVICE_INFO));
+			       sizeof(FILE_SYSTEM_DEVICE_INFO));
 		}
 	}
 	cifs_buf_release(pSMB);
@@ -4402,7 +4408,7 @@ QFSUnixRetry:
 			     *) (((char *) &pSMBr->hdr.Protocol) +
 				 data_offset);
 			memcpy(&tcon->fsUnixInfo, response_data,
-			       sizeof (FILE_SYSTEM_UNIX_INFO));
+			       sizeof(FILE_SYSTEM_UNIX_INFO));
 		}
 	}
 	cifs_buf_release(pSMB);
@@ -4612,7 +4618,7 @@ SetEOFRetry:
 		strncpy(pSMB->FileName, fileName, name_len);
 	}
 	params = 6 + name_len;
-	data_count = sizeof (struct file_end_of_file_info);
+	data_count = sizeof(struct file_end_of_file_info);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
 	pSMB->MaxDataCount = cpu_to_le16(4100);
 	pSMB->MaxSetupCount = 0;
@@ -4800,7 +4806,7 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
 
 	data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
 
-	count = sizeof (FILE_BASIC_INFO);
+	count = sizeof(FILE_BASIC_INFO);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
 	pSMB->MaxDataCount = cpu_to_le16(1000);	/* BB find max SMB PDU from sess */
 	pSMB->SetupCount = 1;
@@ -4871,7 +4877,7 @@ SetTimesRetry:
 	}
 
 	params = 6 + name_len;
-	count = sizeof (FILE_BASIC_INFO);
+	count = sizeof(FILE_BASIC_INFO);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
 	pSMB->MaxDataCount = cpu_to_le16(1000);	/* BB find exact max SMB PDU from sess structure BB */
 	pSMB->MaxSetupCount = 0;
@@ -4900,7 +4906,7 @@ SetTimesRetry:
 		pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
 	pSMB->Reserved4 = 0;
 	pSMB->hdr.smb_buf_length += byte_count;
-	memcpy(data_offset, data, sizeof (FILE_BASIC_INFO));
+	memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
 	pSMB->ByteCount = cpu_to_le16(byte_count);
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
 			 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5003,7 +5009,7 @@ setPermsRetry:
 	}
 
 	params = 6 + name_len;
-	count = sizeof (FILE_UNIX_BASIC_INFO);
+	count = sizeof(FILE_UNIX_BASIC_INFO);
 	pSMB->MaxParameterCount = cpu_to_le16(2);
 	pSMB->MaxDataCount = cpu_to_le16(1000);	/* BB find exact max SMB PDU from sess structure BB */
 	pSMB->MaxSetupCount = 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4af3588c1a9..19ee11f7f35 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -124,7 +124,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 	struct mid_q_entry *mid_entry;
 
 	spin_lock(&GlobalMid_Lock);
-	if ( kthread_should_stop() ) {
+	if (kthread_should_stop()) {
 		/* the demux thread will exit normally
 		next time through the loop */
 		spin_unlock(&GlobalMid_Lock);
@@ -151,9 +151,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
 	}
 	list_for_each(tmp, &GlobalTreeConnectionList) {
 		tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
-		if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) {
+		if ((tcon) && (tcon->ses) && (tcon->ses->server == server))
 			tcon->tidStatus = CifsNeedReconnect;
-		}
 	}
 	read_unlock(&GlobalSMBSeslock);
 	/* do not want to be sending data on a socket we are freeing */
@@ -187,7 +186,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 	spin_unlock(&GlobalMid_Lock);
 	up(&server->tcpSem);
 
-	while ( (!kthread_should_stop()) && (server->tcpStatus != CifsGood)) {
+	while ((!kthread_should_stop()) && (server->tcpStatus != CifsGood)) {
 		try_to_freeze();
 		if (server->protocolType == IPV6) {
 			rc = ipv6_connect(&server->addr.sockAddr6,
@@ -204,7 +203,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
 		} else {
 			atomic_inc(&tcpSesReconnectCount);
 			spin_lock(&GlobalMid_Lock);
-			if ( !kthread_should_stop() )
+			if (!kthread_should_stop())
 				server->tcpStatus = CifsGood;
 			server->sequence_number = 0;
 			spin_unlock(&GlobalMid_Lock);
@@ -352,17 +351,15 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 
 	current->flags |= PF_MEMALLOC;
 	server->tsk = current;	/* save process info to wake at shutdown */
-	cFYI(1, ("Demultiplex PID: %d", current->pid));
+	cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
 	write_lock(&GlobalSMBSeslock);
 	atomic_inc(&tcpSesAllocCount);
 	length = tcpSesAllocCount.counter;
 	write_unlock(&GlobalSMBSeslock);
 	complete(&cifsd_complete);
-	if (length  > 1) {
-		mempool_resize(cifs_req_poolp,
-			length + cifs_min_rcv,
-			GFP_KERNEL);
-	}
+	if (length  > 1)
+		mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
+				GFP_KERNEL);
 
 	set_freezable();
 	while (!kthread_should_stop()) {
@@ -378,7 +375,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 			}
 		} else if (isLargeBuf) {
 			/* we are reusing a dirty large buf, clear its start */
-			memset(bigbuf, 0, sizeof (struct smb_hdr));
+			memset(bigbuf, 0, sizeof(struct smb_hdr));
 		}
 
 		if (smallbuf == NULL) {
@@ -391,7 +388,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 			}
 			/* beginning of smb buffer is cleared in our buf_get */
 		} else /* if existing small buf clear beginning */
-			memset(smallbuf, 0, sizeof (struct smb_hdr));
+			memset(smallbuf, 0, sizeof(struct smb_hdr));
 
 		isLargeBuf = FALSE;
 		isMultiRsp = FALSE;
@@ -400,11 +397,13 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 		iov.iov_len = 4;
 		smb_msg.msg_control = NULL;
 		smb_msg.msg_controllen = 0;
+		pdu_length = 4; /* enough to get RFC1001 header */
+incomplete_rcv:
 		length =
 		    kernel_recvmsg(csocket, &smb_msg,
-				 &iov, 1, 4, 0 /* BB see socket.h flags */);
+				&iov, 1, pdu_length, 0 /* BB other flags? */);
 
-		if ( kthread_should_stop() ) {
+		if (kthread_should_stop()) {
 			break;
 		} else if (server->tcpStatus == CifsNeedReconnect) {
 			cFYI(1, ("Reconnect after server stopped responding"));
@@ -416,7 +415,10 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 			msleep(1); /* minimum sleep to prevent looping
 				allowing socket to clear and app threads to set
 				tcpStatus CifsNeedReconnect if server hung */
-			continue;
+			if (pdu_length < 4)
+				goto incomplete_rcv;
+			else
+				continue;
 		} else if (length <= 0) {
 			if (server->tcpStatus == CifsNew) {
 				cFYI(1, ("tcp session abend after SMBnegprot"));
@@ -437,13 +439,11 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 			wake_up(&server->response_q);
 			continue;
 		} else if (length < 4) {
-			cFYI(1,
-			    ("Frame under four bytes received (%d bytes long)",
+			cFYI(1, ("less than four bytes received (%d bytes)",
 			      length));
-			cifs_reconnect(server);
-			csocket = server->ssocket;
-			wake_up(&server->response_q);
-			continue;
+			pdu_length -= length;
+			msleep(1);
+			goto incomplete_rcv;
 		}
 
 		/* The right amount was read from socket - 4 bytes */
@@ -504,7 +504,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 
 		/* else we have an SMB response */
 		if ((pdu_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) ||
-			    (pdu_length < sizeof (struct smb_hdr) - 1 - 4)) {
+			    (pdu_length < sizeof(struct smb_hdr) - 1 - 4)) {
 			cERROR(1, ("Invalid size SMB length %d pdu_length %d",
 					length, pdu_length+4));
 			cifs_reconnect(server);
@@ -528,7 +528,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 		     total_read += length) {
 			length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
 						pdu_length - total_read, 0);
-			if ( kthread_should_stop() ||
+			if (kthread_should_stop() ||
 			    (length == -EINTR)) {
 				/* then will exit */
 				reconnect = 2;
@@ -546,6 +546,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 					      allowing socket to clear and app
 					      threads to set tcpStatus
 					      CifsNeedReconnect if server hung*/
+				length = 0;
 				continue;
 			} else if (length <= 0) {
 				cERROR(1, ("Received no data, expecting %d",
@@ -631,9 +632,9 @@ multi_t2_fnd:
 			/* Was previous buf put in mpx struct for multi-rsp? */
 			if (!isMultiRsp) {
 				/* smb buffer will be freed by user thread */
-				if (isLargeBuf) {
+				if (isLargeBuf)
 					bigbuf = NULL;
-				} else
+				else
 					smallbuf = NULL;
 			}
 			wake_up_process(task_to_wake);
@@ -676,9 +677,8 @@ multi_t2_fnd:
 		server->ssocket = NULL;
 	}
 	/* buffer usuallly freed in free_mid - need to free it here on exit */
-	if (bigbuf != NULL)
-		cifs_buf_release(bigbuf);
-	if (smallbuf != NULL)
+	cifs_buf_release(bigbuf);
+	if (smallbuf) /* no sense logging a debug message if NULL */
 		cifs_small_buf_release(smallbuf);
 
 	read_lock(&GlobalSMBSeslock);
@@ -702,9 +702,8 @@ multi_t2_fnd:
 		list_for_each(tmp, &GlobalSMBSessionList) {
 			ses = list_entry(tmp, struct cifsSesInfo,
 					 cifsSessionList);
-			if (ses->server == server) {
+			if (ses->server == server)
 				ses->status = CifsExiting;
-			}
 		}
 
 		spin_lock(&GlobalMid_Lock);
@@ -714,9 +713,8 @@ multi_t2_fnd:
 				cFYI(1, ("Clearing Mid 0x%x - waking up ",
 					 mid_entry->mid));
 				task_to_wake = mid_entry->tsk;
-				if (task_to_wake) {
+				if (task_to_wake)
 					wake_up_process(task_to_wake);
-				}
 			}
 		}
 		spin_unlock(&GlobalMid_Lock);
@@ -749,18 +747,15 @@ multi_t2_fnd:
 	list_for_each(tmp, &GlobalSMBSessionList) {
 		ses = list_entry(tmp, struct cifsSesInfo,
 				cifsSessionList);
-		if (ses->server == server) {
+		if (ses->server == server)
 			ses->server = NULL;
-		}
 	}
 	write_unlock(&GlobalSMBSeslock);
 
 	kfree(server);
-	if (length  > 0) {
-		mempool_resize(cifs_req_poolp,
-			length + cifs_min_rcv,
-			GFP_KERNEL);
-	}
+	if (length  > 0)
+		mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
+				GFP_KERNEL);
 
 	return 0;
 }
@@ -1477,7 +1472,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
 	if (psin_server->sin_port) { /* user overrode default port */
 		rc = (*csocket)->ops->connect(*csocket,
 				(struct sockaddr *) psin_server,
-				sizeof (struct sockaddr_in), 0);
+				sizeof(struct sockaddr_in), 0);
 		if (rc >= 0)
 			connected = 1;
 	}
@@ -1493,7 +1488,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
 
 			rc = (*csocket)->ops->connect(*csocket,
 					(struct sockaddr *) psin_server,
-					sizeof (struct sockaddr_in), 0);
+					sizeof(struct sockaddr_in), 0);
 			if (rc >= 0)
 				connected = 1;
 		}
@@ -1502,7 +1497,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
 		psin_server->sin_port = htons(RFC1001_PORT);
 		rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *)
 					      psin_server,
-					      sizeof (struct sockaddr_in), 0);
+					      sizeof(struct sockaddr_in), 0);
 		if (rc >= 0)
 			connected = 1;
 	}
@@ -1610,7 +1605,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
 	if (psin_server->sin6_port) { /* user overrode default port */
 		rc = (*csocket)->ops->connect(*csocket,
 				(struct sockaddr *) psin_server,
-				sizeof (struct sockaddr_in6), 0);
+				sizeof(struct sockaddr_in6), 0);
 		if (rc >= 0)
 			connected = 1;
 	}
@@ -1626,7 +1621,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
 
 			rc = (*csocket)->ops->connect(*csocket,
 					(struct sockaddr *) psin_server,
-					sizeof (struct sockaddr_in6), 0);
+					sizeof(struct sockaddr_in6), 0);
 			if (rc >= 0)
 				connected = 1;
 		}
@@ -1634,7 +1629,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
 	if (!connected) {
 		psin_server->sin6_port = htons(RFC1001_PORT);
 		rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *)
-				 psin_server, sizeof (struct sockaddr_in6), 0);
+				 psin_server, sizeof(struct sockaddr_in6), 0);
 		if (rc >= 0)
 			connected = 1;
 	}
@@ -1750,7 +1745,16 @@ void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
 			cFYI(1, ("very large write cap"));
 #endif /* CIFS_DEBUG2 */
 		if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
-			cFYI(1, ("setting capabilities failed"));
+			if (vol_info == NULL) {
+				cFYI(1, ("resetting capabilities failed"));
+			} else
+				cERROR(1, ("Negotiating Unix capabilities "
+					   "with the server failed.  Consider "
+					   "mounting with the Unix Extensions\n"
+					   "disabled, if problems are found, "
+					   "by specifying the nounix mount "
+					   "option."));
+
 		}
 	}
 }
@@ -1909,8 +1913,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			return rc;
 		}
 
-		srvTcp = kmalloc(sizeof (struct TCP_Server_Info), GFP_KERNEL);
-		if (srvTcp == NULL) {
+		srvTcp = kzalloc(sizeof(struct TCP_Server_Info), GFP_KERNEL);
+		if (!srvTcp) {
 			rc = -ENOMEM;
 			sock_release(csocket);
 			kfree(volume_info.UNC);
@@ -1919,9 +1923,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			FreeXid(xid);
 			return rc;
 		} else {
-			memset(srvTcp, 0, sizeof (struct TCP_Server_Info));
 			memcpy(&srvTcp->addr.sockAddr, &sin_server,
-				sizeof (struct sockaddr_in));
+				sizeof(struct sockaddr_in));
 			atomic_set(&srvTcp->inFlight, 0);
 			/* BB Add code for ipv6 case too */
 			srvTcp->ssocket = csocket;
@@ -2173,8 +2176,18 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 						if (tsk)
 							kthread_stop(tsk);
 					}
-				} else
+				} else {
 					cFYI(1, ("No session or bad tcon"));
+					if ((pSesInfo->server) &&
+					    (pSesInfo->server->tsk)) {
+						struct task_struct *tsk;
+						force_sig(SIGKILL,
+							pSesInfo->server->tsk);
+						tsk = pSesInfo->server->tsk;
+						if (tsk)
+							kthread_stop(tsk);
+					}
+				}
 				sesInfoFree(pSesInfo);
 				/* pSesInfo = NULL; */
 			}
@@ -2185,8 +2198,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		tcon->ses = pSesInfo;
 
 		/* do not care if following two calls succeed - informational */
-		CIFSSMBQFSDeviceInfo(xid, tcon);
-		CIFSSMBQFSAttributeInfo(xid, tcon);
+		if (!tcon->ipc) {
+			CIFSSMBQFSDeviceInfo(xid, tcon);
+			CIFSSMBQFSAttributeInfo(xid, tcon);
+		}
 
 		/* tell server which Unix caps we support */
 		if (tcon->ses->capabilities & CAP_UNIX)
@@ -2526,8 +2541,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 sesssetup_nomem:	/* do not return an error on nomem for the info strings,
 			   since that could make reconnection harder, and
 			   reconnection might be needed to free memory */
-	if (smb_buffer)
-		cifs_buf_release(smb_buffer);
+	cifs_buf_release(smb_buffer);
 
 	return rc;
 }
@@ -2547,7 +2561,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 	int remaining_words = 0;
 	int bytes_returned = 0;
 	int len;
-	int SecurityBlobLength = sizeof (NEGOTIATE_MESSAGE);
+	int SecurityBlobLength = sizeof(NEGOTIATE_MESSAGE);
 	PNEGOTIATE_MESSAGE SecurityBlob;
 	PCHALLENGE_MESSAGE SecurityBlob2;
 	__u32 negotiate_flags, capabilities;
@@ -2865,15 +2879,14 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
 		rc = -EIO;
 	}
 
-	if (smb_buffer)
-		cifs_buf_release(smb_buffer);
+	cifs_buf_release(smb_buffer);
 
 	return rc;
 }
 static int
 CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
-		char *ntlm_session_key, int ntlmv2_flag,
-		const struct nls_table *nls_codepage)
+			char *ntlm_session_key, int ntlmv2_flag,
+			const struct nls_table *nls_codepage)
 {
 	struct smb_hdr *smb_buffer;
 	struct smb_hdr *smb_buffer_response;
@@ -2886,7 +2899,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	int remaining_words = 0;
 	int bytes_returned = 0;
 	int len;
-	int SecurityBlobLength = sizeof (AUTHENTICATE_MESSAGE);
+	int SecurityBlobLength = sizeof(AUTHENTICATE_MESSAGE);
 	PAUTHENTICATE_MESSAGE SecurityBlob;
 	__u32 negotiate_flags, capabilities;
 	__u16 count;
@@ -2901,8 +2914,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 		return -ENOMEM;
 	}
 	smb_buffer_response = smb_buffer;
-	pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
-	pSMBr = (SESSION_SETUP_ANDX *) smb_buffer_response;
+	pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
+	pSMBr = (SESSION_SETUP_ANDX *)smb_buffer_response;
 
 	/* send SMBsessionSetup here */
 	header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
@@ -2921,7 +2934,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 		smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
 
 	capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
-	    CAP_EXTENDED_SECURITY;
+			CAP_EXTENDED_SECURITY;
 	if (ses->capabilities & CAP_UNICODE) {
 		smb_buffer->Flags2 |= SMBFLG2_UNICODE;
 		capabilities |= CAP_UNICODE;
@@ -2936,15 +2949,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	}
 	pSMB->req.Capabilities = cpu_to_le32(capabilities);
 
-	bcc_ptr = (char *) &pSMB->req.SecurityBlob;
-	SecurityBlob = (PAUTHENTICATE_MESSAGE) bcc_ptr;
+	bcc_ptr = (char *)&pSMB->req.SecurityBlob;
+	SecurityBlob = (PAUTHENTICATE_MESSAGE)bcc_ptr;
 	strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8);
 	SecurityBlob->MessageType = NtLmAuthenticate;
 	bcc_ptr += SecurityBlobLength;
-	negotiate_flags =
-	    NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET |
-	    NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO |
-	    0x80000000 | NTLMSSP_NEGOTIATE_128;
+	negotiate_flags = NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET |
+			NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO |
+			0x80000000 | NTLMSSP_NEGOTIATE_128;
 	if (sign_CIFS_PDUs)
 		negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN;
 	if (ntlmv2_flag)
@@ -2979,36 +2991,32 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 			SecurityBlob->DomainName.Length = 0;
 			SecurityBlob->DomainName.MaximumLength = 0;
 		} else {
-			__u16 len =
-			    cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
+			__u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
 					  nls_codepage);
-			len *= 2;
+			ln *= 2;
 			SecurityBlob->DomainName.MaximumLength =
-			    cpu_to_le16(len);
+			    cpu_to_le16(ln);
 			SecurityBlob->DomainName.Buffer =
 			    cpu_to_le32(SecurityBlobLength);
-			bcc_ptr += len;
-			SecurityBlobLength += len;
-			SecurityBlob->DomainName.Length =
-			    cpu_to_le16(len);
+			bcc_ptr += ln;
+			SecurityBlobLength += ln;
+			SecurityBlob->DomainName.Length = cpu_to_le16(ln);
 		}
 		if (user == NULL) {
 			SecurityBlob->UserName.Buffer = 0;
 			SecurityBlob->UserName.Length = 0;
 			SecurityBlob->UserName.MaximumLength = 0;
 		} else {
-			__u16 len =
-			    cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
+			__u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
 					  nls_codepage);
-			len *= 2;
+			ln *= 2;
 			SecurityBlob->UserName.MaximumLength =
-			    cpu_to_le16(len);
+			    cpu_to_le16(ln);
 			SecurityBlob->UserName.Buffer =
 			    cpu_to_le32(SecurityBlobLength);
-			bcc_ptr += len;
-			SecurityBlobLength += len;
-			SecurityBlob->UserName.Length =
-			    cpu_to_le16(len);
+			bcc_ptr += ln;
+			SecurityBlobLength += ln;
+			SecurityBlob->UserName.Length = cpu_to_le16(ln);
 		}
 
 		/* SecurityBlob->WorkstationName.Length =
@@ -3052,33 +3060,32 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 			SecurityBlob->DomainName.Length = 0;
 			SecurityBlob->DomainName.MaximumLength = 0;
 		} else {
-			__u16 len;
+			__u16 ln;
 			negotiate_flags |= NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED;
 			strncpy(bcc_ptr, domain, 63);
-			len = strnlen(domain, 64);
+			ln = strnlen(domain, 64);
 			SecurityBlob->DomainName.MaximumLength =
-			    cpu_to_le16(len);
+			    cpu_to_le16(ln);
 			SecurityBlob->DomainName.Buffer =
 			    cpu_to_le32(SecurityBlobLength);
-			bcc_ptr += len;
-			SecurityBlobLength += len;
-			SecurityBlob->DomainName.Length = cpu_to_le16(len);
+			bcc_ptr += ln;
+			SecurityBlobLength += ln;
+			SecurityBlob->DomainName.Length = cpu_to_le16(ln);
 		}
 		if (user == NULL) {
 			SecurityBlob->UserName.Buffer = 0;
 			SecurityBlob->UserName.Length = 0;
 			SecurityBlob->UserName.MaximumLength = 0;
 		} else {
-			__u16 len;
+			__u16 ln;
 			strncpy(bcc_ptr, user, 63);
-			len = strnlen(user, 64);
-			SecurityBlob->UserName.MaximumLength =
-			    cpu_to_le16(len);
+			ln = strnlen(user, 64);
+			SecurityBlob->UserName.MaximumLength = cpu_to_le16(ln);
 			SecurityBlob->UserName.Buffer =
-			    cpu_to_le32(SecurityBlobLength);
-			bcc_ptr += len;
-			SecurityBlobLength += len;
-			SecurityBlob->UserName.Length = cpu_to_le16(len);
+						cpu_to_le32(SecurityBlobLength);
+			bcc_ptr += ln;
+			SecurityBlobLength += ln;
+			SecurityBlob->UserName.Length = cpu_to_le16(ln);
 		}
 		/* BB fill in our workstation name if known BB */
 
@@ -3100,12 +3107,11 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 	rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
 			 &bytes_returned, 1);
 	if (rc) {
-/*    rc = map_smb_to_linux_error(smb_buffer_response);  *//* done in SendReceive now */
-	} else if ((smb_buffer_response->WordCount == 3)
-		   || (smb_buffer_response->WordCount == 4)) {
+/*   rc = map_smb_to_linux_error(smb_buffer_response) done in SendReceive now */
+	} else if ((smb_buffer_response->WordCount == 3) ||
+		   (smb_buffer_response->WordCount == 4)) {
 		__u16 action = le16_to_cpu(pSMBr->resp.Action);
-		__u16 blob_len =
-		    le16_to_cpu(pSMBr->resp.SecurityBlobLength);
+		__u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
 		if (action & GUEST_LOGIN)
 			cFYI(1, (" Guest login")); /* BB Should we set anything
 							 in SesInfo struct ? */
@@ -3145,8 +3151,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 					} else {
 						remaining_words = BCC(smb_buffer_response) / 2;
 					}
-					len =
-					    UniStrnlen((wchar_t *) bcc_ptr,remaining_words - 1);
+					len = UniStrnlen((wchar_t *) bcc_ptr,
+							remaining_words - 1);
 /* We look for obvious messed up bcc or strings in response so we do not go off
   the end since (at least) WIN2K and Windows XP have a major bug in not null
   terminating last Unicode string in response  */
@@ -3230,7 +3236,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 						<= BCC(smb_buffer_response)) {
 						if (ses->serverOS)
 							kfree(ses->serverOS);
-						ses->serverOS = kzalloc(len + 1,GFP_KERNEL);
+						ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
 						strncpy(ses->serverOS,bcc_ptr, len);
 
 						bcc_ptr += len;
@@ -3259,28 +3265,24 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 						bcc_ptr[0] = 0;
 						bcc_ptr++;
 					} else
-						cFYI(1,
-						     ("field of length %d "
+						cFYI(1, ("field of length %d "
 						   "extends beyond end of smb ",
 						      len));
 				}
 			} else {
-				cERROR(1,
-				       (" Security Blob extends beyond end "
+				cERROR(1, ("Security Blob extends beyond end "
 					"of SMB"));
 			}
 		} else {
 			cERROR(1, ("No session structure passed in."));
 		}
 	} else {
-		cERROR(1,
-		       (" Invalid Word count %d: ",
+		cERROR(1, ("Invalid Word count %d: ",
 			smb_buffer_response->WordCount));
 		rc = -EIO;
 	}
 
-	if (smb_buffer)
-		cifs_buf_release(smb_buffer);
+	cifs_buf_release(smb_buffer);
 
 	return rc;
 }
@@ -3389,6 +3391,18 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 		bcc_ptr = pByteArea(smb_buffer_response);
 		length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2);
 		/* skip service field (NB: this field is always ASCII) */
+		if (length == 3) {
+			if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
+			    (bcc_ptr[2] == 'C')) {
+				cFYI(1, ("IPC connection"));
+				tcon->ipc = 1;
+			}
+		} else if (length == 2) {
+			if ((bcc_ptr[0] == 'A') && (bcc_ptr[1] == ':')) {
+				/* the most common case */
+				cFYI(1, ("disk share connection"));
+			}
+		}
 		bcc_ptr += length + 1;
 		strncpy(tcon->treeName, tree, MAX_TREE_SIZE);
 		if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
@@ -3399,9 +3413,11 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 				kfree(tcon->nativeFileSystem);
 				tcon->nativeFileSystem =
 				    kzalloc(length + 2, GFP_KERNEL);
-				cifs_strfromUCS_le(tcon->nativeFileSystem,
-						   (__le16 *) bcc_ptr,
-						   length, nls_codepage);
+				if (tcon->nativeFileSystem)
+					cifs_strfromUCS_le(
+						tcon->nativeFileSystem,
+						(__le16 *) bcc_ptr,
+						length, nls_codepage);
 				bcc_ptr += 2 * length;
 				bcc_ptr[0] = 0;	/* null terminate the string */
 				bcc_ptr[1] = 0;
@@ -3416,8 +3432,9 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 				kfree(tcon->nativeFileSystem);
 				tcon->nativeFileSystem =
 				    kzalloc(length + 1, GFP_KERNEL);
-				strncpy(tcon->nativeFileSystem, bcc_ptr,
-					length);
+				if (tcon->nativeFileSystem)
+					strncpy(tcon->nativeFileSystem, bcc_ptr,
+						length);
 			}
 			/* else do not bother copying these information fields*/
 		}
@@ -3433,8 +3450,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 		ses->ipc_tid = smb_buffer_response->Tid;
 	}
 
-	if (smb_buffer)
-		cifs_buf_release(smb_buffer);
+	cifs_buf_release(smb_buffer);
 	return rc;
 }
 
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 4830acc86d7..793404b1092 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -3,7 +3,7 @@
  *
  *   vfs operations that deal with dentries
  *
- *   Copyright (C) International Business Machines  Corp., 2002,2005
+ *   Copyright (C) International Business Machines  Corp., 2002,2007
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
  *   This library is free software; you can redistribute it and/or modify
@@ -269,7 +269,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 			CIFSSMBClose(xid, pTcon, fileHandle);
 		} else if (newinode) {
 			pCifsFile =
-			   kzalloc(sizeof (struct cifsFileInfo), GFP_KERNEL);
+			   kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 
 			if (pCifsFile == NULL)
 				goto cifs_create_out;
@@ -397,7 +397,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
 				/* BB Do not bother to decode buf since no
 				   local inode yet to put timestamps in,
 				   but we can reuse it safely */
-				int bytes_written;
+				unsigned int bytes_written;
 				struct win_dev *pdev;
 				pdev = (struct win_dev *)buf;
 				if (S_ISCHR(mode)) {
@@ -450,8 +450,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 
 	xid = GetXid();
 
-	cFYI(1,
-	     (" parent inode = 0x%p name is: %s and dentry = 0x%p",
+	cFYI(1, (" parent inode = 0x%p name is: %s and dentry = 0x%p",
 	      parent_dir_inode, direntry->d_name.name, direntry));
 
 	/* check whether path exists */
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 893fd0aebff..d614b91caec 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -43,6 +43,7 @@
 #include <linux/exportfs.h>
 #include "cifsglob.h"
 #include "cifs_debug.h"
+#include "cifsfs.h"
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
 static struct dentry *cifs_get_parent(struct dentry *dentry)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 894b1f7b299..1e7e4c06d9e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -467,7 +467,7 @@ reopen_error_exit:
 int cifs_close(struct inode *inode, struct file *file)
 {
 	int rc = 0;
-	int xid;
+	int xid, timeout;
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *pTcon;
 	struct cifsFileInfo *pSMBFile =
@@ -485,9 +485,9 @@ int cifs_close(struct inode *inode, struct file *file)
 			/* no sense reconnecting to close a file that is
 			   already closed */
 			if (pTcon->tidStatus != CifsNeedReconnect) {
-				int timeout = 2;
+				timeout = 2;
 				while ((atomic_read(&pSMBFile->wrtPending) != 0)
-					 && (timeout < 1000) ) {
+					&& (timeout <= 2048)) {
 					/* Give write a better chance to get to
 					server ahead of the close.  We do not
 					want to add a wait_q here as it would
@@ -522,12 +522,30 @@ int cifs_close(struct inode *inode, struct file *file)
 		list_del(&pSMBFile->flist);
 		list_del(&pSMBFile->tlist);
 		write_unlock(&GlobalSMBSeslock);
+		timeout = 10;
+		/* We waited above to give the SMBWrite a chance to issue
+		   on the wire (so we do not get SMBWrite returning EBADF
+		   if writepages is racing with close.  Note that writepages
+		   does not specify a file handle, so it is possible for a file
+		   to be opened twice, and the application close the "wrong"
+		   file handle - in these cases we delay long enough to allow
+		   the SMBWrite to get on the wire before the SMB Close.
+		   We allow total wait here over 45 seconds, more than
+		   oplock break time, and more than enough to allow any write
+		   to complete on the server, or to time out on the client */
+		while ((atomic_read(&pSMBFile->wrtPending) != 0)
+				&& (timeout <= 50000)) {
+			cERROR(1, ("writes pending, delay free of handle"));
+			msleep(timeout);
+			timeout *= 8;
+		}
 		kfree(pSMBFile->search_resume_name);
 		kfree(file->private_data);
 		file->private_data = NULL;
 	} else
 		rc = -EBADF;
 
+	read_lock(&GlobalSMBSeslock);
 	if (list_empty(&(CIFS_I(inode)->openFileList))) {
 		cFYI(1, ("closing last open instance for inode %p", inode));
 		/* if the file is not open we do not know if we can cache info
@@ -535,6 +553,7 @@ int cifs_close(struct inode *inode, struct file *file)
 		CIFS_I(inode)->clientCanCacheRead = FALSE;
 		CIFS_I(inode)->clientCanCacheAll  = FALSE;
 	}
+	read_unlock(&GlobalSMBSeslock);
 	if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
 		rc = CIFS_I(inode)->write_behind_rc;
 	FreeXid(xid);
@@ -767,7 +786,8 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 			mutex_lock(&fid->lock_mutex);
 			list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
 				if (pfLock->fl_start <= li->offset &&
-						length >= li->length) {
+						(pfLock->fl_start + length) >=
+						(li->offset + li->length)) {
 					stored_rc = CIFSSMBLock(xid, pTcon,
 							netfid,
 							li->length, li->offset,
@@ -1022,6 +1042,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
 	}
 
 	read_lock(&GlobalSMBSeslock);
+refind_writable:
 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
 		if (open_file->closePend)
 			continue;
@@ -1029,24 +1050,49 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
 		    ((open_file->pfile->f_flags & O_RDWR) ||
 		     (open_file->pfile->f_flags & O_WRONLY))) {
 			atomic_inc(&open_file->wrtPending);
+
+			if (!open_file->invalidHandle) {
+				/* found a good writable file */
+				read_unlock(&GlobalSMBSeslock);
+				return open_file;
+			}
+	
 			read_unlock(&GlobalSMBSeslock);
-			if ((open_file->invalidHandle) &&
-			   (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) {
-				rc = cifs_reopen_file(open_file->pfile, FALSE);
-				/* if it fails, try another handle - might be */
-				/* dangerous to hold up writepages with retry */
-				if (rc) {
-					cFYI(1,
-					      ("failed on reopen file in wp"));
+			/* Had to unlock since following call can block */
+			rc = cifs_reopen_file(open_file->pfile, FALSE);
+			if (!rc) { 
+				if (!open_file->closePend)
+					return open_file;
+				else { /* start over in case this was deleted */
+				       /* since the list could be modified */
 					read_lock(&GlobalSMBSeslock);
-					/* can not use this handle, no write
-					pending on this one after all */
-					atomic_dec
-					     (&open_file->wrtPending);
-					continue;
+					atomic_dec(&open_file->wrtPending);
+					goto refind_writable;
 				}
 			}
-			return open_file;
+
+			/* if it fails, try another handle if possible -
+			(we can not do this if closePending since
+			loop could be modified - in which case we
+			have to start at the beginning of the list
+			again. Note that it would be bad
+			to hold up writepages here (rather than
+			in caller) with continuous retries */
+			cFYI(1, ("wp failed on reopen file"));
+			read_lock(&GlobalSMBSeslock);
+			/* can not use this handle, no write
+			   pending on this one after all */
+			atomic_dec(&open_file->wrtPending);
+			
+			if (open_file->closePend) /* list could have changed */
+				goto refind_writable;
+			/* else we simply continue to the next entry. Thus
+			   we do not loop on reopen errors.  If we
+			   can not reopen the file, for example if we
+			   reconnected to a server with another client
+			   racing to delete or lock the file we would not
+			   make progress if we restarted before the beginning
+			   of the loop here. */
 		}
 	}
 	read_unlock(&GlobalSMBSeslock);
@@ -1709,7 +1755,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 	struct page *page;
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *pTcon;
-	int bytes_read = 0;
+	unsigned int bytes_read = 0;
 	unsigned int read_size, i;
 	char *smb_read_data = NULL;
 	struct smb_com_read_rsp *pSMBr;
@@ -1803,7 +1849,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 
 			i +=  bytes_read >> PAGE_CACHE_SHIFT;
 			cifs_stats_bytes_read(pTcon, bytes_read);
-			if ((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) {
+			if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
 				i++; /* account for partial page */
 
 				/* server copy of file can have smaller size
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 279f3c5e0ce..5e8b388be3b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -115,7 +115,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 		inode->i_mode = le64_to_cpu(findData.Permissions);
 		/* since we set the inode type below we need to mask off
 		   to avoid strange results if bits set above */
-			inode->i_mode &= ~S_IFMT;
+		inode->i_mode &= ~S_IFMT;
 		if (type == UNIX_FILE) {
 			inode->i_mode |= S_IFREG;
 		} else if (type == UNIX_SYMLINK) {
@@ -575,19 +575,33 @@ int cifs_get_inode_info(struct inode **pinode,
 	return rc;
 }
 
+static const struct inode_operations cifs_ipc_inode_ops = {
+	.lookup = cifs_lookup,
+};
+
 /* gets root inode */
 void cifs_read_inode(struct inode *inode)
 {
-	int xid;
+	int xid, rc;
 	struct cifs_sb_info *cifs_sb;
 
 	cifs_sb = CIFS_SB(inode->i_sb);
 	xid = GetXid();
 
 	if (cifs_sb->tcon->unix_ext)
-		cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid);
+		rc = cifs_get_inode_info_unix(&inode, "", inode->i_sb, xid);
 	else
-		cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid);
+		rc = cifs_get_inode_info(&inode, "", NULL, inode->i_sb, xid);
+	if (rc && cifs_sb->tcon->ipc) {
+		cFYI(1, ("ipc connection - fake read inode"));
+		inode->i_mode |= S_IFDIR;
+		inode->i_nlink = 2;
+		inode->i_op = &cifs_ipc_inode_ops;
+		inode->i_fop = &simple_dir_operations;
+		inode->i_uid = cifs_sb->mnt_uid;
+		inode->i_gid = cifs_sb->mnt_gid;
+	}
+
 	/* can not call macro FreeXid here since in a void func */
 	_FreeXid(xid);
 }
@@ -919,18 +933,25 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 			goto mkdir_out;
 		}
 
+		mode &= ~current->fs->umask;
 		rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT,
 				mode, NULL /* netfid */, pInfo, &oplock,
 				full_path, cifs_sb->local_nls,
 				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
-		if (rc) {
+		if (rc == -EOPNOTSUPP) {
+			kfree(pInfo);
+			goto mkdir_retry_old;
+		} else if (rc) {
 			cFYI(1, ("posix mkdir returned 0x%x", rc));
 			d_drop(direntry);
 		} else {
 			int obj_type;
-			if (pInfo->Type == -1) /* no return info - go query */
+			if (pInfo->Type == cpu_to_le32(-1)) {
+				/* no return info, go query for it */
+				kfree(pInfo);
 				goto mkdir_get_info;
+			}
 /*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
 	to set uid/gid */
 			inc_nlink(inode);
@@ -940,8 +961,10 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 				direntry->d_op = &cifs_dentry_ops;
 
 			newinode = new_inode(inode->i_sb);
-			if (newinode == NULL)
+			if (newinode == NULL) {
+				kfree(pInfo);
 				goto mkdir_get_info;
+			}
 			/* Is an i_ino of zero legal? */
 			/* Are there sanity checks we can use to ensure that
 			   the server is really filling in that field? */
@@ -972,7 +995,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
 		kfree(pInfo);
 		goto mkdir_out;
 	}
-
+mkdir_retry_old:
 	/* BB add setting the equivalent of mode via CreateX w/ACLs */
 	rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls,
 			  cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1377,8 +1400,17 @@ static int cifs_vmtruncate(struct inode *inode, loff_t offset)
 	}
 	i_size_write(inode, offset);
 	spin_unlock(&inode->i_lock);
+	/*
+	 * unmap_mapping_range is called twice, first simply for efficiency
+	 * so that truncate_inode_pages does fewer single-page unmaps. However
+	 * after this first call, and before truncate_inode_pages finishes,
+	 * it is possible for private pages to be COWed, which remain after
+	 * truncate_inode_pages finishes, hence the second unmap_mapping_range
+	 * call must be made for correctness.
+	 */
 	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
 	truncate_inode_pages(mapping, offset);
+	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
 	goto out_truncate;
 
 do_expand:
@@ -1469,7 +1501,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 			atomic_dec(&open_file->wrtPending);
 			cFYI(1, ("SetFSize for attrs rc = %d", rc));
 			if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
-				int bytes_written;
+				unsigned int bytes_written;
 				rc = CIFSSMBWrite(xid, pTcon,
 						  nfid, 0, attrs->ia_size,
 						  &bytes_written, NULL, NULL,
@@ -1502,7 +1534,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 					cifs_sb->mnt_cifs_flags &
 						CIFS_MOUNT_MAP_SPECIAL_CHR);
 				if (rc == 0) {
-					int bytes_written;
+					unsigned int bytes_written;
 					rc = CIFSSMBWrite(xid, pTcon,
 							netfid, 0,
 							attrs->ia_size,
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 6a85ef7b879..11f265726db 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -237,7 +237,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
 	char *tmp_path = NULL;
 	char *tmpbuffer;
 	unsigned char *referrals = NULL;
-	int num_referrals = 0;
+	unsigned int num_referrals = 0;
 	int len;
 	__u16 fid;
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0bcec0844be..51ec681fe74 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -169,7 +169,6 @@ cifs_buf_get(void)
 void
 cifs_buf_release(void *buf_to_free)
 {
-
 	if (buf_to_free == NULL) {
 		/* cFYI(1, ("Null buffer passed to cifs_buf_release"));*/
 		return;
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 2bfed3f45d0..f06359cb22e 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -114,10 +114,16 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
 	{ERRusempx, -EIO},
 	{ERRusestd, -EIO},
 	{ERR_NOTIFY_ENUM_DIR, -ENOBUFS},
-	{ERRaccountexpired, -EACCES},
+	{ERRnoSuchUser, -EACCES},
+/*	{ERRaccountexpired, -EACCES},
 	{ERRbadclient, -EACCES},
 	{ERRbadLogonTime, -EACCES},
-	{ERRpasswordExpired, -EACCES},
+	{ERRpasswordExpired, -EACCES},*/
+	{ERRaccountexpired, -EKEYEXPIRED},
+	{ERRbadclient, -EACCES},
+	{ERRbadLogonTime, -EACCES},
+	{ERRpasswordExpired, -EKEYEXPIRED},
+
 	{ERRnosupport, -EINVAL},
 	{0, 0}
 };
@@ -270,7 +276,7 @@ static const struct {
 	 from NT_STATUS_NO_SUCH_USER to NT_STATUS_LOGON_FAILURE
 	 during the session setup } */
 	{
-	ERRDOS, ERRnoaccess, NT_STATUS_NO_SUCH_USER}, {
+	ERRDOS, ERRnoaccess, NT_STATUS_NO_SUCH_USER}, { /* could map to 2238 */
 	ERRHRD, ERRgeneral, NT_STATUS_GROUP_EXISTS}, {
 	ERRHRD, ERRgeneral, NT_STATUS_NO_SUCH_GROUP}, {
 	ERRHRD, ERRgeneral, NT_STATUS_MEMBER_IN_GROUP}, {
@@ -285,10 +291,10 @@ static const struct {
 	ERRHRD, ERRgeneral, NT_STATUS_PASSWORD_RESTRICTION}, {
 	ERRDOS, ERRnoaccess, NT_STATUS_LOGON_FAILURE}, {
 	ERRHRD, ERRgeneral, NT_STATUS_ACCOUNT_RESTRICTION}, {
-	ERRSRV, 2241, NT_STATUS_INVALID_LOGON_HOURS}, {
-	ERRSRV, 2240, NT_STATUS_INVALID_WORKSTATION}, {
+	ERRSRV, ERRbadLogonTime, NT_STATUS_INVALID_LOGON_HOURS}, {
+	ERRSRV, ERRbadclient, NT_STATUS_INVALID_WORKSTATION}, {
 	ERRSRV, ERRpasswordExpired, NT_STATUS_PASSWORD_EXPIRED}, {
-	ERRSRV, 2239, NT_STATUS_ACCOUNT_DISABLED}, {
+	ERRSRV, ERRaccountexpired, NT_STATUS_ACCOUNT_DISABLED}, {
 	ERRHRD, ERRgeneral, NT_STATUS_NONE_MAPPED}, {
 	ERRHRD, ERRgeneral, NT_STATUS_TOO_MANY_LUIDS_REQUESTED}, {
 	ERRHRD, ERRgeneral, NT_STATUS_LUIDS_EXHAUSTED}, {
@@ -585,7 +591,7 @@ static const struct {
 	ERRDOS, ERRnoaccess, NT_STATUS_TRUST_FAILURE}, {
 	ERRHRD, ERRgeneral, NT_STATUS_MUTANT_LIMIT_EXCEEDED}, {
 	ERRDOS, ERRnetlogonNotStarted, NT_STATUS_NETLOGON_NOT_STARTED}, {
-	ERRSRV, 2239, NT_STATUS_ACCOUNT_EXPIRED}, {
+	ERRSRV, ERRaccountexpired, NT_STATUS_ACCOUNT_EXPIRED}, {
 	ERRHRD, ERRgeneral, NT_STATUS_POSSIBLE_DEADLOCK}, {
 	ERRHRD, ERRgeneral, NT_STATUS_NETWORK_CREDENTIAL_CONFLICT}, {
 	ERRHRD, ERRgeneral, NT_STATUS_REMOTE_SESSION_LIMIT}, {
@@ -754,7 +760,7 @@ ntstatus_to_dos(__u32 ntstatus, __u8 * eclass, __u16 * ecode)
 }
 
 int
-map_smb_to_linux_error(struct smb_hdr *smb)
+map_smb_to_linux_error(struct smb_hdr *smb, int logErr)
 {
 	unsigned int i;
 	int rc = -EIO;	/* if transport error smb error may not be set */
@@ -771,7 +777,9 @@ map_smb_to_linux_error(struct smb_hdr *smb)
 		/* translate the newer STATUS codes to old style SMB errors
 		 * and then to POSIX errors */
 		__u32 err = le32_to_cpu(smb->Status.CifsError);
-		if (cifsFYI & CIFS_RC)
+		if (logErr && (err != (NT_STATUS_MORE_PROCESSING_REQUIRED)))
+			cifs_print_status(err);
+		else if (cifsFYI & CIFS_RC)
 			cifs_print_status(err);
 		ntstatus_to_dos(err, &smberrclass, &smberrcode);
 	} else {
@@ -813,7 +821,7 @@ map_smb_to_linux_error(struct smb_hdr *smb)
 	}
 	/* else ERRHRD class errors or junk  - return EIO */
 
-	cFYI(1, (" !!Mapping smb error code %d to POSIX err %d !!",
+	cFYI(1, ("Mapping smb error code %d to POSIX err %d",
 		 smberrcode, rc));
 
 	/* generic corrective action e.g. reconnect SMB session on
@@ -899,8 +907,11 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time)
 		cERROR(1, ("illegal hours %d", st->Hours));
 	days = sd->Day;
 	month = sd->Month;
-	if ((days > 31) || (month > 12))
+	if ((days > 31) || (month > 12)) {
 		cERROR(1, ("illegal date, month %d day: %d", month, days));
+		if (month > 12)
+			month = 12;
+	}
 	month -= 1;
 	days += total_days_of_prev_months[month];
 	days += 3652; /* account for difference in days between 1980 and 1970 */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 916df943133..3746580e970 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -121,7 +121,7 @@ static void AdjustForTZ(struct cifsTconInfo *tcon, struct inode *inode)
 
 
 static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
-			  char *buf, int *pobject_type, int isNewInode)
+			  char *buf, unsigned int *pobject_type, int isNewInode)
 {
 	loff_t local_size;
 	struct timespec local_mtime;
@@ -294,7 +294,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 }
 
 static void unix_fill_in_inode(struct inode *tmp_inode,
-	FILE_UNIX_INFO *pfindData, int *pobject_type, int isNewInode)
+	FILE_UNIX_INFO *pfindData, unsigned int *pobject_type, int isNewInode)
 {
 	loff_t local_size;
 	struct timespec local_mtime;
@@ -826,7 +826,7 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
 	int rc = 0;
 	struct qstr qstring;
 	struct cifsFileInfo *pCifsF;
-	unsigned obj_type;
+	unsigned int obj_type;
 	ino_t  inum;
 	struct cifs_sb_info *cifs_sb;
 	struct inode *tmp_inode;
@@ -1067,7 +1067,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
 		for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
 			if (current_entry == NULL) {
 				/* evaluate whether this case is an error */
-				cERROR(1,("past end of SMB num to fill %d i %d",
+				cERROR(1, ("past SMB end,  num to fill %d i %d",
 					  num_to_fill, i));
 				break;
 			}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 892be9b4d1f..899dc6078d9 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -67,14 +67,59 @@ static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
 		pSMB->req.hdr.Flags2 |= SMBFLG2_DFS;
 		capabilities |= CAP_DFS;
 	}
-	if (ses->capabilities & CAP_UNIX) {
+	if (ses->capabilities & CAP_UNIX)
 		capabilities |= CAP_UNIX;
-	}
 
 	/* BB check whether to init vcnum BB */
 	return capabilities;
 }
 
+static void
+unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp)
+{
+	char *bcc_ptr = *pbcc_area;
+	int bytes_ret = 0;
+
+	/* Copy OS version */
+	bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
+				  nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
+				  32, nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2; /* trailing null */
+
+	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
+				  32, nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2; /* trailing null */
+
+	*pbcc_area = bcc_ptr;
+}
+
+static void unicode_domain_string(char **pbcc_area, struct cifsSesInfo *ses,
+				   const struct nls_table *nls_cp)
+{
+	char *bcc_ptr = *pbcc_area;
+	int bytes_ret = 0;
+
+	/* copy domain */
+	if (ses->domainName == NULL) {
+		/* Sending null domain better than using a bogus domain name (as
+		we did briefly in 2.6.18) since server will use its default */
+		*bcc_ptr = 0;
+		*(bcc_ptr+1) = 0;
+		bytes_ret = 0;
+	} else
+		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
+					  256, nls_cp);
+	bcc_ptr += 2 * bytes_ret;
+	bcc_ptr += 2;  /* account for null terminator */
+
+	*pbcc_area = bcc_ptr;
+}
+
+
 static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
 				   const struct nls_table *nls_cp)
 {
@@ -100,32 +145,9 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
 	}
 	bcc_ptr += 2 * bytes_ret;
 	bcc_ptr += 2; /* account for null termination */
-	/* copy domain */
-	if (ses->domainName == NULL) {
-		/* Sending null domain better than using a bogus domain name (as
-		we did briefly in 2.6.18) since server will use its default */
-		*bcc_ptr = 0;
-		*(bcc_ptr+1) = 0;
-		bytes_ret = 0;
-	} else
-		bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName,
-					  256, nls_cp);
-	bcc_ptr += 2 * bytes_ret;
-	bcc_ptr += 2;  /* account for null terminator */
-
-	/* Copy OS version */
-	bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
-				  nls_cp);
-	bcc_ptr += 2 * bytes_ret;
-	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
-				  32, nls_cp);
-	bcc_ptr += 2 * bytes_ret;
-	bcc_ptr += 2; /* trailing null */
 
-	bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
-				  32, nls_cp);
-	bcc_ptr += 2 * bytes_ret;
-	bcc_ptr += 2; /* trailing null */
+	unicode_domain_string(&bcc_ptr, ses, nls_cp);
+	unicode_oslm_strings(&bcc_ptr, nls_cp);
 
 	*pbcc_area = bcc_ptr;
 }
@@ -203,14 +225,11 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
 	if (len >= words_left)
 		return rc;
 
-	if (ses->serverOS)
-		kfree(ses->serverOS);
+	kfree(ses->serverOS);
 	/* UTF-8 string will not grow more than four times as big as UCS-16 */
 	ses->serverOS = kzalloc(4 * len, GFP_KERNEL);
-	if (ses->serverOS != NULL) {
-		cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len,
-				   nls_cp);
-	}
+	if (ses->serverOS != NULL)
+		cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp);
 	data += 2 * (len + 1);
 	words_left -= len + 1;
 
@@ -220,8 +239,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
 	if (len >= words_left)
 		return rc;
 
-	if (ses->serverNOS)
-		kfree(ses->serverNOS);
+	kfree(ses->serverNOS);
 	ses->serverNOS = kzalloc(4 * len, GFP_KERNEL); /* BB this is wrong length FIXME BB */
 	if (ses->serverNOS != NULL) {
 		cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len,
@@ -240,8 +258,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft,
 	if (len > words_left)
 		return rc;
 
-	if (ses->serverDomain)
-		kfree(ses->serverDomain);
+	kfree(ses->serverDomain);
 	ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */
 	if (ses->serverDomain != NULL) {
 		cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
@@ -271,8 +288,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
 	if (len >= bleft)
 		return rc;
 
-	if (ses->serverOS)
-		kfree(ses->serverOS);
+	kfree(ses->serverOS);
 
 	ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
 	if (ses->serverOS)
@@ -289,8 +305,7 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
 	if (len >= bleft)
 		return rc;
 
-	if (ses->serverNOS)
-		kfree(ses->serverNOS);
+	kfree(ses->serverNOS);
 
 	ses->serverNOS = kzalloc(len + 1, GFP_KERNEL);
 	if (ses->serverNOS)
@@ -479,7 +494,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 		if (ses->capabilities & CAP_UNICODE) {
 			if (iov[0].iov_len % 2) {
 				*bcc_ptr = 0;
-			}	bcc_ptr++;
+				bcc_ptr++;
+			}
 			unicode_ssetup_strings(&bcc_ptr, ses, nls_cp);
 		} else
 			ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
@@ -497,7 +513,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 
 	iov[1].iov_base = str_area;
 	iov[1].iov_len = count;
-	rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type, 0);
+	rc = SendReceive2(xid, ses, iov, 2 /* num_iovecs */, &resp_buf_type,
+			  0 /* not long op */, 1 /* log NT STATUS if any */ );
 	/* SMB request buf freed in SendReceive2 */
 
 	cFYI(1, ("ssetup rc from sendrecv2 is %d", rc));
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index 2ef0be28882..7f50e8577c1 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -173,9 +173,10 @@
 #define ERRusestd		251	/* temporarily unable to use either raw
 					   or mpx */
 #define ERR_NOTIFY_ENUM_DIR	1024
+#define ERRnoSuchUser		2238	/* user account does not exist */
 #define ERRaccountexpired	2239
-#define ERRbadclient		2240
-#define ERRbadLogonTime		2241
+#define ERRbadclient		2240	/* can not logon from this client */
+#define ERRbadLogonTime		2241	/* logon hours do not allow this */
 #define ERRpasswordExpired	2242
 #define ERRnetlogonNotStarted	2455
 #define ERRnosupport		0xFFFF
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 746bc9405db..7ed32b3cb78 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -55,7 +55,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
 	if (temp == NULL)
 		return temp;
 	else {
-		memset(temp, 0, sizeof (struct mid_q_entry));
+		memset(temp, 0, sizeof(struct mid_q_entry));
 		temp->mid = smb_buffer->Mid;	/* always LE */
 		temp->pid = current->pid;
 		temp->command = smb_buffer->Command;
@@ -158,7 +158,7 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
 	iov.iov_len = len;
 
 	smb_msg.msg_name = sin;
-	smb_msg.msg_namelen = sizeof (struct sockaddr);
+	smb_msg.msg_namelen = sizeof(struct sockaddr);
 	smb_msg.msg_control = NULL;
 	smb_msg.msg_controllen = 0;
 	smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/
@@ -228,7 +228,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
 		return -ENOTSOCK; /* BB eventually add reconnect code here */
 
 	smb_msg.msg_name = sin;
-	smb_msg.msg_namelen = sizeof (struct sockaddr);
+	smb_msg.msg_namelen = sizeof(struct sockaddr);
 	smb_msg.msg_control = NULL;
 	smb_msg.msg_controllen = 0;
 	smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/
@@ -363,9 +363,8 @@ static int allocate_mid(struct cifsSesInfo *ses, struct smb_hdr *in_buf,
 		} /* else ok - we are setting up session */
 	}
 	*ppmidQ = AllocMidQEntry(in_buf, ses);
-	if (*ppmidQ == NULL) {
+	if (*ppmidQ == NULL)
 		return -ENOMEM;
-	}
 	return 0;
 }
 
@@ -419,7 +418,7 @@ static int wait_for_response(struct cifsSesInfo *ses,
 int
 SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 	     struct kvec *iov, int n_vec, int *pRespBufType /* ret */,
-	     const int long_op)
+	     const int long_op, const int logError)
 {
 	int rc = 0;
 	unsigned int receive_len;
@@ -465,7 +464,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 		wake_up(&ses->server->request_q);
 		return rc;
 	}
-
 	rc = cifs_sign_smb2(iov, n_vec, ses->server, &midQ->sequence_number);
 
 	midQ->midState = MID_REQUEST_SUBMITTED;
@@ -568,13 +566,11 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 			}
 
 			/* BB special case reconnect tid and uid here? */
-			/* BB special case Errbadpassword and pwdexpired here */
-			rc = map_smb_to_linux_error(midQ->resp_buf);
+			rc = map_smb_to_linux_error(midQ->resp_buf, logError);
 
 			/* convert ByteCount if necessary */
-			if (receive_len >=
-			    sizeof (struct smb_hdr) -
-			    4 /* do not count RFC1001 header */  +
+			if (receive_len >= sizeof(struct smb_hdr) - 4
+			    /* do not count RFC1001 header */  +
 			    (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
 				BCC(midQ->resp_buf) =
 					le16_to_cpu(BCC_LE(midQ->resp_buf));
@@ -749,12 +745,11 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 			*pbytes_returned = out_buf->smb_buf_length;
 
 			/* BB special case reconnect tid and uid here? */
-			rc = map_smb_to_linux_error(out_buf);
+			rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
 
 			/* convert ByteCount if necessary */
-			if (receive_len >=
-			    sizeof (struct smb_hdr) -
-			    4 /* do not count RFC1001 header */  +
+			if (receive_len >= sizeof(struct smb_hdr) - 4
+			    /* do not count RFC1001 header */  +
 			    (2 * out_buf->WordCount) + 2 /* bcc */ )
 				BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
 		} else {
@@ -993,12 +988,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 			*pbytes_returned = out_buf->smb_buf_length;
 
 			/* BB special case reconnect tid and uid here? */
-			rc = map_smb_to_linux_error(out_buf);
+			rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
 
 			/* convert ByteCount if necessary */
-			if (receive_len >=
-			    sizeof (struct smb_hdr) -
-			    4 /* do not count RFC1001 header */  +
+			if (receive_len >= sizeof(struct smb_hdr) - 4
+			    /* do not count RFC1001 header */  +
 			    (2 * out_buf->WordCount) + 2 /* bcc */ )
 				BCC(out_buf) = le16_to_cpu(BCC_LE(out_buf));
 		} else {
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index f61e433d281..369e838bebd 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -261,21 +261,26 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
 				cifs_sb->local_nls,
 				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
-/*		else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
+#ifdef CONFIG_CIFS_EXPERIMENTAL
+		else if(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 			__u16 fid;
 			int oplock = FALSE;
-			rc = CIFSSMBOpen(xid, pTcon, full_path,
-					 FILE_OPEN, GENERIC_READ, 0, &fid,
-					 &oplock, NULL, cifs_sb->local_nls,
-					 cifs_sb->mnt_cifs_flags &
-					 CIFS_MOUNT_MAP_SPECIAL_CHR);
+			if (experimEnabled) 
+				rc = CIFSSMBOpen(xid, pTcon, full_path,
+					FILE_OPEN, GENERIC_READ, 0, &fid,
+					&oplock, NULL, cifs_sb->local_nls,
+					cifs_sb->mnt_cifs_flags &
+					CIFS_MOUNT_MAP_SPECIAL_CHR);
+			/* else rc is EOPNOTSUPP from above */
+
 			if(rc == 0) {
 				rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
 					ea_value, buf_size,
 					ACL_TYPE_ACCESS);
 				CIFSSMBClose(xid, pTcon, fid);
 			}
-		} */  /* BB enable after fixing up return data */
+		}
+#endif /* EXPERIMENTAL */
 #else
 		cFYI(1, ("query POSIX ACL not supported yet"));
 #endif /* CONFIG_CIFS_POSIX */
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index cdb4c07a787..359e531094d 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -51,7 +51,7 @@ static void *alloc_upcall(int opcode, int size)
 
         inp->ih.opcode = opcode;
 	inp->ih.pid = current->pid;
-	inp->ih.pgid = process_group(current);
+	inp->ih.pgid = task_pgrp_nr(current);
 #ifdef CONFIG_CODA_FS_OLD_API
 	memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
 	inp->ih.cred.cr_fsuid = current->fsuid;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 6438941ab1f..4f741546f4b 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -456,7 +456,7 @@ static int check_version(struct dlm_write_request *req)
 		printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
 		       "user (%d.%d.%d) kernel (%d.%d.%d)\n",
 		       current->comm,
-		       current->pid,
+		       task_pid_nr(current),
 		       req->version[0],
 		       req->version[1],
 		       req->version[2],
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index de618929195..34f68f3a069 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -325,15 +325,14 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
 	int wake_nests = 0;
 	unsigned long flags;
 	struct task_struct *this_task = current;
-	struct list_head *lsthead = &psw->wake_task_list, *lnk;
+	struct list_head *lsthead = &psw->wake_task_list;
 	struct wake_task_node *tncur;
 	struct wake_task_node tnode;
 
 	spin_lock_irqsave(&psw->lock, flags);
 
 	/* Try to see if the current task is already inside this wakeup call */
-	list_for_each(lnk, lsthead) {
-		tncur = list_entry(lnk, struct wake_task_node, llink);
+	list_for_each_entry(tncur, lsthead, llink) {
 
 		if (tncur->wq == wq ||
 		    (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) {
diff --git a/fs/exec.c b/fs/exec.c
index 070ddf13cb7..2c942e2d14e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -234,7 +234,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	vma->vm_start = vma->vm_end - PAGE_SIZE;
 
 	vma->vm_flags = VM_STACK_FLAGS;
-	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	err = insert_vm_struct(mm, vma);
 	if (err) {
 		up_write(&mm->mmap_sem);
@@ -775,8 +775,8 @@ static int de_thread(struct task_struct *tsk)
 	 * Reparenting needs write_lock on tasklist_lock,
 	 * so it is safe to do it under read_lock.
 	 */
-	if (unlikely(tsk->group_leader == child_reaper(tsk)))
-		tsk->nsproxy->pid_ns->child_reaper = tsk;
+	if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
+		task_active_pid_ns(tsk)->child_reaper = tsk;
 
 	zap_other_threads(tsk);
 	read_unlock(&tasklist_lock);
@@ -841,8 +841,8 @@ static int de_thread(struct task_struct *tsk)
 		 */
 		tsk->start_time = leader->start_time;
 
-		BUG_ON(leader->tgid != tsk->tgid);
-		BUG_ON(tsk->pid == tsk->tgid);
+		BUG_ON(!same_thread_group(leader, tsk));
+		BUG_ON(has_group_leader_pid(tsk));
 		/*
 		 * An exec() starts a new thread group with the
 		 * TGID of the previous thread group. Rehash the
@@ -857,7 +857,7 @@ static int de_thread(struct task_struct *tsk)
 		 */
 		detach_pid(tsk, PIDTYPE_PID);
 		tsk->pid = leader->pid;
-		attach_pid(tsk, PIDTYPE_PID,  find_pid(tsk->pid));
+		attach_pid(tsk, PIDTYPE_PID,  task_pid(leader));
 		transfer_pid(leader, tsk, PIDTYPE_PGID);
 		transfer_pid(leader, tsk, PIDTYPE_SID);
 		list_replace_rcu(&leader->tasks, &tsk->tasks);
@@ -1433,7 +1433,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
 			case 'p':
 				pid_in_pattern = 1;
 				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", current->tgid);
+					      "%d", task_tgid_vnr(current));
 				if (rc > out_end - out_ptr)
 					goto out;
 				out_ptr += rc;
@@ -1513,7 +1513,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
 	if (!ispipe && !pid_in_pattern
             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
-			      ".%d", current->tgid);
+			      ".%d", task_tgid_vnr(current));
 		if (rc > out_end - out_ptr)
 			goto out;
 		out_ptr += rc;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3dec003b773..9b162cd6c16 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2954,7 +2954,7 @@ int ext3_write_inode(struct inode *inode, int wait)
 		return 0;
 
 	if (ext3_journal_current_handle()) {
-		jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+		jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n");
 		dump_stack();
 		return -EIO;
 	}
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index f58cbb26323..408373819e3 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -741,12 +741,11 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
 		}
 	} else {
 		/* Allocate a buffer where we construct the new block. */
-		s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+		s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
 		/* assert(header == s->base) */
 		error = -ENOMEM;
 		if (s->base == NULL)
 			goto cleanup;
-		memset(s->base, 0, sb->s_blocksize);
 		header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
 		header(s->base)->h_blocks = cpu_to_le32(1);
 		header(s->base)->h_refcount = cpu_to_le32(1);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c9db73fc5e3..8685263ccc4 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -18,6 +18,7 @@
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 #include <linux/rcupdate.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -292,7 +293,7 @@ int f_setown(struct file *filp, unsigned long arg, int force)
 		who = -who;
 	}
 	rcu_read_lock();
-	pid = find_pid(who);
+	pid = find_vpid(who);
 	result = __f_setown(filp, pid, type, force);
 	rcu_read_unlock();
 	return result;
@@ -308,7 +309,7 @@ pid_t f_getown(struct file *filp)
 {
 	pid_t pid;
 	read_lock(&filp->f_owner.lock);
-	pid = pid_nr(filp->f_owner.pid);
+	pid = pid_nr_ns(filp->f_owner.pid, current->nsproxy->pid_ns);
 	if (filp->f_owner.pid_type == PIDTYPE_PGID)
 		pid = -pid;
 	read_unlock(&filp->f_owner.lock);
diff --git a/fs/file_table.c b/fs/file_table.c
index 3176fefc92e..664e3f2309b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -323,12 +323,11 @@ void file_kill(struct file *file)
 
 int fs_may_remount_ro(struct super_block *sb)
 {
-	struct list_head *p;
+	struct file *file;
 
 	/* Check that no files are currently opened for writing. */
 	file_list_lock();
-	list_for_each(p, &sb->s_files) {
-		struct file *file = list_entry(p, struct file, f_u.fu_list);
+	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
 		struct inode *inode = file->f_path.dentry->d_inode;
 
 		/* File with pending delete? */
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 686734ff973..0fca82021d7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -89,7 +89,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 		if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
 			printk(KERN_DEBUG
 			       "%s(%d): dirtied inode %lu (%s) on %s\n",
-			       current->comm, current->pid, inode->i_ino,
+			       current->comm, task_pid_nr(current), inode->i_ino,
 			       name, inode->i_sb->s_id);
 	}
 
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 10d2c211d18..d6ff77e8e7e 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -25,6 +25,7 @@
 #include <linux/capability.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
+#include <linux/pid_namespace.h>
 
 static int set_task_ioprio(struct task_struct *task, int ioprio)
 {
@@ -93,7 +94,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_vpid(who);
 			if (p)
 				ret = set_task_ioprio(p, ioprio);
 			break;
@@ -101,7 +102,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
 			if (!who)
 				pgrp = task_pgrp(current);
 			else
-				pgrp = find_pid(who);
+				pgrp = find_vpid(who);
 			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
@@ -180,7 +181,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 			if (!who)
 				p = current;
 			else
-				p = find_task_by_pid(who);
+				p = find_task_by_vpid(who);
 			if (p)
 				ret = get_task_ioprio(p);
 			break;
@@ -188,7 +189,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
 			if (!who)
 				pgrp = task_pgrp(current);
 			else
-				pgrp = find_pid(who);
+				pgrp = find_vpid(who);
 			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 				tmpio = get_task_ioprio(p);
 				if (tmpio < 0)
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a263d82761d..8f1f2aa5fb3 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -466,7 +466,7 @@ void journal_commit_transaction(journal_t *journal)
 	spin_unlock(&journal->j_list_lock);
 
 	if (err)
-		__journal_abort_hard(journal);
+		journal_abort(journal, err);
 
 	journal_write_revoke_records(journal, commit_transaction);
 
@@ -524,7 +524,7 @@ void journal_commit_transaction(journal_t *journal)
 
 			descriptor = journal_get_descriptor_buffer(journal);
 			if (!descriptor) {
-				__journal_abort_hard(journal);
+				journal_abort(journal, -EIO);
 				continue;
 			}
 
@@ -557,7 +557,7 @@ void journal_commit_transaction(journal_t *journal)
 		   and repeat this loop: we'll fall into the
 		   refile-on-abort condition above. */
 		if (err) {
-			__journal_abort_hard(journal);
+			journal_abort(journal, err);
 			continue;
 		}
 
@@ -748,7 +748,7 @@ wait_for_iobuf:
 		err = -EIO;
 
 	if (err)
-		__journal_abort_hard(journal);
+		journal_abort(journal, err);
 
 	/* End of a transaction!  Finally, we can do checkpoint
            processing: any buffers committed as a result of this
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 5d9fec0b7eb..5d14243499d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -35,6 +35,7 @@
 #include <linux/kthread.h>
 #include <linux/poison.h>
 #include <linux/proc_fs.h>
+#include <linux/debugfs.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -654,10 +655,9 @@ static journal_t * journal_init_common (void)
 	journal_t *journal;
 	int err;
 
-	journal = kmalloc(sizeof(*journal), GFP_KERNEL);
+	journal = kzalloc(sizeof(*journal), GFP_KERNEL);
 	if (!journal)
 		goto fail;
-	memset(journal, 0, sizeof(*journal));
 
 	init_waitqueue_head(&journal->j_wait_transaction_locked);
 	init_waitqueue_head(&journal->j_wait_logspace);
@@ -1852,64 +1852,41 @@ void journal_put_journal_head(struct journal_head *jh)
 }
 
 /*
- * /proc tunables
+ * debugfs tunables
  */
-#if defined(CONFIG_JBD_DEBUG)
-int journal_enable_debug;
-EXPORT_SYMBOL(journal_enable_debug);
-#endif
+#ifdef CONFIG_JBD_DEBUG
 
-#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
+u8 journal_enable_debug __read_mostly;
+EXPORT_SYMBOL(journal_enable_debug);
 
-static struct proc_dir_entry *proc_jbd_debug;
+static struct dentry *jbd_debugfs_dir;
+static struct dentry *jbd_debug;
 
-static int read_jbd_debug(char *page, char **start, off_t off,
-			  int count, int *eof, void *data)
+static void __init jbd_create_debugfs_entry(void)
 {
-	int ret;
-
-	ret = sprintf(page + off, "%d\n", journal_enable_debug);
-	*eof = 1;
-	return ret;
+	jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
+	if (jbd_debugfs_dir)
+		jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO,
+					       jbd_debugfs_dir,
+					       &journal_enable_debug);
 }
 
-static int write_jbd_debug(struct file *file, const char __user *buffer,
-			   unsigned long count, void *data)
+static void __exit jbd_remove_debugfs_entry(void)
 {
-	char buf[32];
-
-	if (count > ARRAY_SIZE(buf) - 1)
-		count = ARRAY_SIZE(buf) - 1;
-	if (copy_from_user(buf, buffer, count))
-		return -EFAULT;
-	buf[ARRAY_SIZE(buf) - 1] = '\0';
-	journal_enable_debug = simple_strtoul(buf, NULL, 10);
-	return count;
+	debugfs_remove(jbd_debug);
+	debugfs_remove(jbd_debugfs_dir);
 }
 
-#define JBD_PROC_NAME "sys/fs/jbd-debug"
+#else
 
-static void __init create_jbd_proc_entry(void)
+static inline void jbd_create_debugfs_entry(void)
 {
-	proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
-	if (proc_jbd_debug) {
-		/* Why is this so hard? */
-		proc_jbd_debug->read_proc = read_jbd_debug;
-		proc_jbd_debug->write_proc = write_jbd_debug;
-	}
 }
 
-static void __exit remove_jbd_proc_entry(void)
+static inline void jbd_remove_debugfs_entry(void)
 {
-	if (proc_jbd_debug)
-		remove_proc_entry(JBD_PROC_NAME, NULL);
 }
 
-#else
-
-#define create_jbd_proc_entry() do {} while (0)
-#define remove_jbd_proc_entry() do {} while (0)
-
 #endif
 
 struct kmem_cache *jbd_handle_cache;
@@ -1966,7 +1943,7 @@ static int __init journal_init(void)
 	ret = journal_init_caches();
 	if (ret != 0)
 		journal_destroy_caches();
-	create_jbd_proc_entry();
+	jbd_create_debugfs_entry();
 	return ret;
 }
 
@@ -1977,7 +1954,7 @@ static void __exit journal_exit(void)
 	if (n)
 		printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
 #endif
-	remove_jbd_proc_entry();
+	jbd_remove_debugfs_entry();
 	journal_destroy_caches();
 }
 
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 2a5f4b833e3..c5d9694b6a2 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -250,10 +250,10 @@ int journal_recover(journal_t *journal)
 	if (!err)
 		err = do_one_pass(journal, &info, PASS_REPLAY);
 
-	jbd_debug(0, "JBD: recovery, exit status %d, "
+	jbd_debug(1, "JBD: recovery, exit status %d, "
 		  "recovered transactions %u to %u\n",
 		  err, info.start_transaction, info.end_transaction);
-	jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
+	jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
 		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
 
 	/* Restart the log at the next transaction ID, thus invalidating
@@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal)
 #ifdef CONFIG_JBD_DEBUG
 		int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
 #endif
-		jbd_debug(0,
+		jbd_debug(1,
 			  "JBD: ignoring %d transaction%s from the journal.\n",
 			  dropped, (dropped == 1) ? "" : "s");
 		journal->j_transaction_sequence = ++info.end_transaction;
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 9841b1e5af0..08ff6c7028c 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
 
 alloc_transaction:
 	if (!journal->j_running_transaction) {
-		new_transaction = kmalloc(sizeof(*new_transaction),
+		new_transaction = kzalloc(sizeof(*new_transaction),
 						GFP_NOFS|__GFP_NOFAIL);
 		if (!new_transaction) {
 			ret = -ENOMEM;
 			goto out;
 		}
-		memset(new_transaction, 0, sizeof(*new_transaction));
 	}
 
 	jbd_debug(3, "New handle %p going live.\n", handle);
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 2a49f2c51a9..4130adabd76 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -80,28 +80,28 @@
 #define JFFS2_ERROR(fmt, ...)						\
 	do {								\
 		printk(JFFS2_ERR_MSG_PREFIX				\
-			" (%d) %s: " fmt, current->pid,			\
+			" (%d) %s: " fmt, task_pid_nr(current),		\
 			__FUNCTION__ , ##__VA_ARGS__);			\
 	} while(0)
 
 #define JFFS2_WARNING(fmt, ...)						\
 	do {								\
 		printk(JFFS2_WARN_MSG_PREFIX				\
-			" (%d) %s: " fmt, current->pid,			\
+			" (%d) %s: " fmt, task_pid_nr(current),		\
 			__FUNCTION__ , ##__VA_ARGS__);			\
 	} while(0)
 
 #define JFFS2_NOTICE(fmt, ...)						\
 	do {								\
 		printk(JFFS2_NOTICE_MSG_PREFIX				\
-			" (%d) %s: " fmt, current->pid,			\
+			" (%d) %s: " fmt, task_pid_nr(current),		\
 			__FUNCTION__ , ##__VA_ARGS__);			\
 	} while(0)
 
 #define JFFS2_DEBUG(fmt, ...)						\
 	do {								\
 		printk(JFFS2_DBG_MSG_PREFIX				\
-			" (%d) %s: " fmt, current->pid,			\
+			" (%d) %s: " fmt, task_pid_nr(current),		\
 			__FUNCTION__ , ##__VA_ARGS__);			\
 	} while(0)
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 07daa797259..860752998fb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1411,7 +1411,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 		mnt_flags |= MNT_RELATIME;
 
 	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
-		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME);
+		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
 
 	/* ... and get the mountpoint */
 	retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index af8b235d405..11833f4caea 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -168,7 +168,8 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	spin_unlock(&inode->i_lock);
 
 	spin_unlock(&clp->cl_lock);
-	kfree(delegation);
+	if (delegation != NULL)
+		nfs_free_delegation(delegation);
 	return status;
 }
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8ec7fbd8240..35334539d94 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -562,6 +562,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	nfs_fattr_init(&fattr);
 	desc->entry = &my_entry;
 
+	nfs_block_sillyrename(dentry);
 	while(!desc->entry->eof) {
 		res = readdir_search_pagecache(desc);
 
@@ -592,6 +593,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			break;
 		}
 	}
+	nfs_unblock_sillyrename(dentry);
 	unlock_kernel();
 	if (res > 0)
 		res = 0;
@@ -866,6 +868,7 @@ struct dentry_operations nfs_dentry_operations = {
 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
 	struct dentry *res;
+	struct dentry *parent;
 	struct inode *inode = NULL;
 	int error;
 	struct nfs_fh fhandle;
@@ -894,26 +897,31 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		goto out_unlock;
 	}
 
+	parent = dentry->d_parent;
+	/* Protect against concurrent sillydeletes */
+	nfs_block_sillyrename(parent);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error == -ENOENT)
 		goto no_entry;
 	if (error < 0) {
 		res = ERR_PTR(error);
-		goto out_unlock;
+		goto out_unblock_sillyrename;
 	}
 	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
 	res = (struct dentry *)inode;
 	if (IS_ERR(res))
-		goto out_unlock;
+		goto out_unblock_sillyrename;
 
 no_entry:
 	res = d_materialise_unique(dentry, inode);
 	if (res != NULL) {
 		if (IS_ERR(res))
-			goto out_unlock;
+			goto out_unblock_sillyrename;
 		dentry = res;
 	}
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+out_unblock_sillyrename:
+	nfs_unblock_sillyrename(parent);
 out_unlock:
 	unlock_kernel();
 out:
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index d29f90d00aa..b3bb89f7d5d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -131,7 +131,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
 {
 	/* Ensure that dirty pages are flushed out with the right creds */
 	if (filp->f_mode & FMODE_WRITE)
-		filemap_fdatawrite(filp->f_mapping);
+		nfs_wb_all(filp->f_path.dentry->d_inode);
 	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6d2f2a3eccf..db5d96dc610 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -514,7 +514,7 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 	return ctx;
 }
 
-void put_nfs_open_context(struct nfs_open_context *ctx)
+static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait)
 {
 	struct inode *inode = ctx->path.dentry->d_inode;
 
@@ -522,8 +522,12 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
 		return;
 	list_del(&ctx->list);
 	spin_unlock(&inode->i_lock);
-	if (ctx->state != NULL)
-		nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+	if (ctx->state != NULL) {
+		if (wait)
+			nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
+		else
+			nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+	}
 	if (ctx->cred != NULL)
 		put_rpccred(ctx->cred);
 	dput(ctx->path.dentry);
@@ -531,6 +535,16 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
 	kfree(ctx);
 }
 
+void put_nfs_open_context(struct nfs_open_context *ctx)
+{
+	__put_nfs_open_context(ctx, 0);
+}
+
+static void put_nfs_open_context_sync(struct nfs_open_context *ctx)
+{
+	__put_nfs_open_context(ctx, 1);
+}
+
 /*
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
@@ -577,7 +591,7 @@ static void nfs_file_clear_open_context(struct file *filp)
 		spin_lock(&inode->i_lock);
 		list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
 		spin_unlock(&inode->i_lock);
-		put_nfs_open_context(ctx);
+		put_nfs_open_context_sync(ctx);
 	}
 }
 
@@ -1169,6 +1183,9 @@ static void init_once(struct kmem_cache * cachep, void *foo)
 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 	nfsi->ncommit = 0;
 	nfsi->npages = 0;
+	atomic_set(&nfsi->silly_count, 1);
+	INIT_HLIST_HEAD(&nfsi->silly_list);
+	init_waitqueue_head(&nfsi->waitqueue);
 	nfs4_init_once(nfsi);
 }
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index d2802b1ca3b..b35069a2aa9 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -178,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -209,6 +209,7 @@ extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
+extern void nfs4_close_sync(struct path *, struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -235,6 +236,7 @@ extern struct svc_version nfs4_callback_version1;
 #else
 
 #define nfs4_close_state(a, b, c) do { } while (0)
+#define nfs4_close_sync(a, b, c) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
 #endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cb99fd90a9a..f03d9d5f5ba 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1305,7 +1305,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-int nfs4_do_close(struct path *path, struct nfs4_state *state)
+int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_closedata *calldata;
@@ -1333,8 +1333,11 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state)
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
+	status = 0;
+	if (wait)
+		status = rpc_wait_for_completion_task(task);
 	rpc_put_task(task);
-	return 0;
+	return status;
 out_free_calldata:
 	kfree(calldata);
 out:
@@ -1365,13 +1368,14 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct
 	}
 	ret = PTR_ERR(filp);
 out_close:
-	nfs4_close_state(path, state, nd->intent.open.flags);
+	nfs4_close_sync(path, state, nd->intent.open.flags);
 	return ret;
 }
 
 struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+	struct dentry *parent;
 	struct path path = {
 		.mnt = nd->mnt,
 		.dentry = dentry,
@@ -1394,6 +1398,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
+	parent = dentry->d_parent;
+	/* Protect against concurrent sillydeletes */
+	nfs_block_sillyrename(parent);
 	state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
@@ -1401,12 +1408,14 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 			d_add(dentry, NULL);
 			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 		}
+		nfs_unblock_sillyrename(parent);
 		return (struct dentry *)state;
 	}
 	res = d_add_unique(dentry, igrab(state->inode));
 	if (res != NULL)
 		path.dentry = res;
 	nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
+	nfs_unblock_sillyrename(parent);
 	nfs4_intent_set_file(nd, &path, state);
 	return res;
 }
@@ -1444,7 +1453,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 		nfs4_intent_set_file(nd, &path, state);
 		return 1;
 	}
-	nfs4_close_state(&path, state, openflags);
+	nfs4_close_sync(&path, state, openflags);
 out_drop:
 	d_drop(dentry);
 	return 0;
@@ -1898,7 +1907,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
 		status = nfs4_intent_set_file(nd, &path, state);
 	else
-		nfs4_close_state(&path, state, flags);
+		nfs4_close_sync(&path, state, flags);
 out:
 	return status;
 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index bfb36261cec..23a9a36556b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -425,7 +425,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
 /*
  * Close the current file.
  */
-void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
+static void __nfs4_close(struct path *path, struct nfs4_state *state, mode_t mode, int wait)
 {
 	struct nfs4_state_owner *owner = state->owner;
 	int call_close = 0;
@@ -466,7 +466,17 @@ void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
 		nfs4_put_open_state(state);
 		nfs4_put_state_owner(owner);
 	} else
-		nfs4_do_close(path, state);
+		nfs4_do_close(path, state, wait);
+}
+
+void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
+{
+	__nfs4_close(path, state, mode, 0);
+}
+
+void nfs4_close_sync(struct path *path, struct nfs4_state *state, mode_t mode)
+{
+	__nfs4_close(path, state, mode, 1);
 }
 
 /*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 1aed850d18f..ce558c2e4d5 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -11,9 +11,11 @@
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
-
+#include <linux/sched.h>
+#include <linux/wait.h>
 
 struct nfs_unlinkdata {
+	struct hlist_node list;
 	struct nfs_removeargs args;
 	struct nfs_removeres res;
 	struct inode *dir;
@@ -52,6 +54,20 @@ static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
 	return 0;
 }
 
+static void nfs_free_dname(struct nfs_unlinkdata *data)
+{
+	kfree(data->args.name.name);
+	data->args.name.name = NULL;
+	data->args.name.len = 0;
+}
+
+static void nfs_dec_sillycount(struct inode *dir)
+{
+	struct nfs_inode *nfsi = NFS_I(dir);
+	if (atomic_dec_return(&nfsi->silly_count) == 1)
+		wake_up(&nfsi->waitqueue);
+}
+
 /**
  * nfs_async_unlink_init - Initialize the RPC info
  * task: rpc_task of the sillydelete
@@ -95,6 +111,8 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 static void nfs_async_unlink_release(void *calldata)
 {
 	struct nfs_unlinkdata	*data = calldata;
+
+	nfs_dec_sillycount(data->dir);
 	nfs_free_unlinkdata(data);
 }
 
@@ -104,33 +122,100 @@ static const struct rpc_call_ops nfs_unlink_ops = {
 	.rpc_release = nfs_async_unlink_release,
 };
 
-static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
+static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
 {
 	struct rpc_task *task;
+	struct dentry *alias;
+
+	alias = d_lookup(parent, &data->args.name);
+	if (alias != NULL) {
+		int ret = 0;
+		/*
+		 * Hey, we raced with lookup... See if we need to transfer
+		 * the sillyrename information to the aliased dentry.
+		 */
+		nfs_free_dname(data);
+		spin_lock(&alias->d_lock);
+		if (!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+			alias->d_fsdata = data;
+			alias->d_flags ^= DCACHE_NFSFS_RENAMED;
+			ret = 1;
+		}
+		spin_unlock(&alias->d_lock);
+		nfs_dec_sillycount(dir);
+		dput(alias);
+		return ret;
+	}
+	data->dir = igrab(dir);
+	if (!data->dir) {
+		nfs_dec_sillycount(dir);
+		return 0;
+	}
+	data->args.fh = NFS_FH(dir);
+	nfs_fattr_init(&data->res.dir_attr);
+
+	task = rpc_run_task(NFS_CLIENT(dir), RPC_TASK_ASYNC, &nfs_unlink_ops, data);
+	if (!IS_ERR(task))
+		rpc_put_task(task);
+	return 1;
+}
+
+static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
+{
 	struct dentry *parent;
 	struct inode *dir;
+	int ret = 0;
 
-	if (nfs_copy_dname(dentry, data) < 0)
-		goto out_free;
 
 	parent = dget_parent(dentry);
 	if (parent == NULL)
 		goto out_free;
-	dir = igrab(parent->d_inode);
+	dir = parent->d_inode;
+	if (nfs_copy_dname(dentry, data) == 0)
+		goto out_dput;
+	/* Non-exclusive lock protects against concurrent lookup() calls */
+	spin_lock(&dir->i_lock);
+	if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
+		/* Deferred delete */
+		hlist_add_head(&data->list, &NFS_I(dir)->silly_list);
+		spin_unlock(&dir->i_lock);
+		ret = 1;
+		goto out_dput;
+	}
+	spin_unlock(&dir->i_lock);
+	ret = nfs_do_call_unlink(parent, dir, data);
+out_dput:
 	dput(parent);
-	if (dir == NULL)
-		goto out_free;
+out_free:
+	return ret;
+}
 
-	data->dir = dir;
-	data->args.fh = NFS_FH(dir);
-	nfs_fattr_init(&data->res.dir_attr);
+void nfs_block_sillyrename(struct dentry *dentry)
+{
+	struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
 
-	task = rpc_run_task(NFS_CLIENT(dir), RPC_TASK_ASYNC, &nfs_unlink_ops, data);
-	if (!IS_ERR(task))
-		rpc_put_task(task);
-	return 1;
-out_free:
-	return 0;
+	wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1);
+}
+
+void nfs_unblock_sillyrename(struct dentry *dentry)
+{
+	struct inode *dir = dentry->d_inode;
+	struct nfs_inode *nfsi = NFS_I(dir);
+	struct nfs_unlinkdata *data;
+
+	atomic_inc(&nfsi->silly_count);
+	spin_lock(&dir->i_lock);
+	while (!hlist_empty(&nfsi->silly_list)) {
+		if (!atomic_inc_not_zero(&nfsi->silly_count))
+			break;
+		data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list);
+		hlist_del(&data->list);
+		spin_unlock(&dir->i_lock);
+		if (nfs_do_call_unlink(dentry, dir, data) == 0)
+			nfs_free_unlinkdata(data);
+		spin_lock(&dir->i_lock);
+	}
+	spin_unlock(&dir->i_lock);
 }
 
 /**
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0cf9d1cd9bd..89527a487ed 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -174,8 +174,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int
 		return;
 	if (count != nfs_page_length(page))
 		return;
-	if (count != PAGE_CACHE_SIZE)
-		zero_user_page(page, count, PAGE_CACHE_SIZE - count, KM_USER0);
 	SetPageUptodate(page);
 }
 
@@ -627,7 +625,8 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				return ERR_PTR(error);
 			}
 			spin_unlock(&inode->i_lock);
-			return new;
+			req = new;
+			goto zero_page;
 		}
 		spin_unlock(&inode->i_lock);
 
@@ -655,13 +654,23 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 	if (offset < req->wb_offset) {
 		req->wb_offset = offset;
 		req->wb_pgbase = offset;
-		req->wb_bytes = rqend - req->wb_offset;
+		req->wb_bytes = max(end, rqend) - req->wb_offset;
+		goto zero_page;
 	}
 
 	if (end > rqend)
 		req->wb_bytes = end - req->wb_offset;
 
 	return req;
+zero_page:
+	/* If this page might potentially be marked as up to date,
+	 * then we need to zero any uninitalised data. */
+	if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE
+			&& !PageUptodate(req->wb_page))
+		zero_user_page(req->wb_page, req->wb_bytes,
+				PAGE_CACHE_SIZE - req->wb_bytes,
+				KM_USER0);
+	return req;
 }
 
 int nfs_flush_incompatible(struct file *file, struct page *page)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 46934c97f8f..d0199189924 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1029,13 +1029,13 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		if (EX_WGATHER(exp)) {
 			if (atomic_read(&inode->i_writecount) > 1
 			    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
-				dprintk("nfsd: write defer %d\n", current->pid);
+				dprintk("nfsd: write defer %d\n", task_pid_nr(current));
 				msleep(10);
-				dprintk("nfsd: write resume %d\n", current->pid);
+				dprintk("nfsd: write resume %d\n", task_pid_nr(current));
 			}
 
 			if (inode->i_state & I_DIRTY) {
-				dprintk("nfsd: write sync %d\n", current->pid);
+				dprintk("nfsd: write sync %d\n", task_pid_nr(current));
 				host_err=nfsd_sync(file);
 			}
 #if 0
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index f14b541fab9..9cc7c0418b7 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1372,7 +1372,7 @@ static ssize_t o2hb_region_pid_read(struct o2hb_region *reg,
 
 	spin_lock(&o2hb_live_lock);
 	if (reg->hr_task)
-		pid = reg->hr_task->pid;
+		pid = task_pid_nr(reg->hr_task);
 	spin_unlock(&o2hb_live_lock);
 
 	if (!pid)
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 75cd877f6d4..cd046060114 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -192,7 +192,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
  * previous token if args expands to nothing.
  */
 #define __mlog_printk(level, fmt, args...)				\
-	printk(level "(%u,%lu):%s:%d " fmt, current->pid,		\
+	printk(level "(%u,%lu):%s:%d " fmt, task_pid_nr(current),	\
 	       __mlog_cpu_guess, __PRETTY_FUNCTION__, __LINE__ ,	\
 	       ##args)
 
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index a2c33160bfd..2fde7bf9143 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -259,7 +259,7 @@ static void dlm_print_reco_node_status(struct dlm_ctxt *dlm)
 	struct dlm_lock_resource *res;
 
 	mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n",
-	     dlm->name, dlm->dlm_reco_thread_task->pid,
+	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
 	     dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive",
 	     dlm->reco.dead_node, dlm->reco.new_master);
 
@@ -420,7 +420,7 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm)
 	if (dlm_in_recovery(dlm)) {
 		mlog(0, "%s: reco thread %d in recovery: "
 		     "state=%d, master=%u, dead=%u\n",
-		     dlm->name, dlm->dlm_reco_thread_task->pid,
+		     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
 		     dlm->reco.state, dlm->reco.new_master,
 		     dlm->reco.dead_node);
 	}
@@ -483,7 +483,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 		return 0;
 	}
 	mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
-	     dlm->name, dlm->dlm_reco_thread_task->pid,
+	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
 	     dlm->reco.dead_node);
 	spin_unlock(&dlm->spinlock);
 
@@ -507,7 +507,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 		mlog(0, "another node will master this recovery session.\n");
 	}
 	mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n",
-	     dlm->name, dlm->dlm_reco_thread_task->pid, dlm->reco.new_master,
+	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master,
 	     dlm->node_num, dlm->reco.dead_node);
 
 	/* it is safe to start everything back up here
@@ -520,7 +520,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 
 master_here:
 	mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n",
-	     dlm->dlm_reco_thread_task->pid,
+	     task_pid_nr(dlm->dlm_reco_thread_task),
 	     dlm->name, dlm->reco.dead_node, dlm->node_num);
 
 	status = dlm_remaster_locks(dlm, dlm->reco.dead_node);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 27b59f5f3bd..63c95afb561 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -77,6 +77,7 @@
 #include <linux/cpuset.h>
 #include <linux/rcupdate.h>
 #include <linux/delayacct.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -145,8 +146,7 @@ static inline const char *get_task_state(struct task_struct *tsk)
 					    TASK_UNINTERRUPTIBLE |
 					    TASK_STOPPED |
 					    TASK_TRACED)) |
-			(tsk->exit_state & (EXIT_ZOMBIE |
-					    EXIT_DEAD));
+					   tsk->exit_state;
 	const char **p = &task_state_array[0];
 
 	while (state) {
@@ -161,8 +161,15 @@ static inline char *task_state(struct task_struct *p, char *buffer)
 	struct group_info *group_info;
 	int g;
 	struct fdtable *fdt = NULL;
+	struct pid_namespace *ns;
+	pid_t ppid, tpid;
 
+	ns = current->nsproxy->pid_ns;
 	rcu_read_lock();
+	ppid = pid_alive(p) ?
+		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
+	tpid = pid_alive(p) && p->ptrace ?
+		task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0;
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
@@ -172,9 +179,9 @@ static inline char *task_state(struct task_struct *p, char *buffer)
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
-		p->tgid, p->pid,
-		pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
-		pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
+		task_tgid_nr_ns(p, ns),
+		task_pid_nr_ns(p, ns),
+		ppid, tpid,
 		p->uid, p->euid, p->suid, p->fsuid,
 		p->gid, p->egid, p->sgid, p->fsgid);
 
@@ -394,6 +401,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
 	unsigned long rsslim = 0;
 	char tcomm[sizeof(task->comm)];
 	unsigned long flags;
+	struct pid_namespace *ns;
+
+	ns = current->nsproxy->pid_ns;
 
 	state = *get_task_state(task);
 	vsize = eip = esp = 0;
@@ -416,7 +426,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
 		struct signal_struct *sig = task->signal;
 
 		if (sig->tty) {
-			tty_pgrp = pid_nr(sig->tty->pgrp);
+			tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns);
 			tty_nr = new_encode_dev(tty_devnum(sig->tty));
 		}
 
@@ -446,12 +456,12 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
 			maj_flt += sig->maj_flt;
 			utime = cputime_add(utime, sig->utime);
 			stime = cputime_add(stime, sig->stime);
-			gtime += cputime_add(gtime, sig->gtime);
+			gtime = cputime_add(gtime, sig->gtime);
 		}
 
-		sid = signal_session(sig);
-		pgid = process_group(task);
-		ppid = rcu_dereference(task->real_parent)->tgid;
+		sid = task_session_nr_ns(task, ns);
+		pgid = task_pgrp_nr_ns(task, ns);
+		ppid = task_ppid_nr_ns(task, ns);
 
 		unlock_task_sighand(task, &flags);
 	}
@@ -483,7 +493,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole)
 	res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
 %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
-		task->pid,
+		task_pid_nr_ns(task, ns),
 		tcomm,
 		state,
 		ppid,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4fe74d15641..39a3d7c969c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -63,16 +63,19 @@
 #include <linux/mm.h>
 #include <linux/rcupdate.h>
 #include <linux/kallsyms.h>
+#include <linux/resource.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/cgroup.h>
 #include <linux/cpuset.h>
 #include <linux/audit.h>
 #include <linux/poll.h>
 #include <linux/nsproxy.h>
 #include <linux/oom.h>
 #include <linux/elf.h>
+#include <linux/pid_namespace.h>
 #include "internal.h"
 
 /* NOTE:
@@ -301,6 +304,78 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 	return sprintf(buffer, "%lu\n", points);
 }
 
+struct limit_names {
+	char *name;
+	char *unit;
+};
+
+static const struct limit_names lnames[RLIM_NLIMITS] = {
+	[RLIMIT_CPU] = {"Max cpu time", "ms"},
+	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
+	[RLIMIT_DATA] = {"Max data size", "bytes"},
+	[RLIMIT_STACK] = {"Max stack size", "bytes"},
+	[RLIMIT_CORE] = {"Max core file size", "bytes"},
+	[RLIMIT_RSS] = {"Max resident set", "bytes"},
+	[RLIMIT_NPROC] = {"Max processes", "processes"},
+	[RLIMIT_NOFILE] = {"Max open files", "files"},
+	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
+	[RLIMIT_AS] = {"Max address space", "bytes"},
+	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
+	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
+	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
+	[RLIMIT_NICE] = {"Max nice priority", NULL},
+	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
+};
+
+/* Display limits for a process */
+static int proc_pid_limits(struct task_struct *task, char *buffer)
+{
+	unsigned int i;
+	int count = 0;
+	unsigned long flags;
+	char *bufptr = buffer;
+
+	struct rlimit rlim[RLIM_NLIMITS];
+
+	rcu_read_lock();
+	if (!lock_task_sighand(task,&flags)) {
+		rcu_read_unlock();
+		return 0;
+	}
+	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
+	unlock_task_sighand(task, &flags);
+	rcu_read_unlock();
+
+	/*
+	 * print the file header
+	 */
+	count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n",
+			"Limit", "Soft Limit", "Hard Limit", "Units");
+
+	for (i = 0; i < RLIM_NLIMITS; i++) {
+		if (rlim[i].rlim_cur == RLIM_INFINITY)
+			count += sprintf(&bufptr[count], "%-25s %-20s ",
+					 lnames[i].name, "unlimited");
+		else
+			count += sprintf(&bufptr[count], "%-25s %-20lu ",
+					 lnames[i].name, rlim[i].rlim_cur);
+
+		if (rlim[i].rlim_max == RLIM_INFINITY)
+			count += sprintf(&bufptr[count], "%-20s ", "unlimited");
+		else
+			count += sprintf(&bufptr[count], "%-20lu ",
+					 rlim[i].rlim_max);
+
+		if (lnames[i].unit)
+			count += sprintf(&bufptr[count], "%-10s\n",
+					 lnames[i].unit);
+		else
+			count += sprintf(&bufptr[count], "\n");
+	}
+
+	return count;
+}
+
 /************************************************************************/
 /*                       Here the fs part begins                        */
 /************************************************************************/
@@ -349,18 +424,21 @@ struct proc_mounts {
 static int mounts_open(struct inode *inode, struct file *file)
 {
 	struct task_struct *task = get_proc_task(inode);
+	struct nsproxy *nsp;
 	struct mnt_namespace *ns = NULL;
 	struct proc_mounts *p;
 	int ret = -EINVAL;
 
 	if (task) {
-		task_lock(task);
-		if (task->nsproxy) {
-			ns = task->nsproxy->mnt_ns;
+		rcu_read_lock();
+		nsp = task_nsproxy(task);
+		if (nsp) {
+			ns = nsp->mnt_ns;
 			if (ns)
 				get_mnt_ns(ns);
 		}
-		task_unlock(task);
+		rcu_read_unlock();
+
 		put_task_struct(task);
 	}
 
@@ -423,16 +501,20 @@ static int mountstats_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *m = file->private_data;
+		struct nsproxy *nsp;
 		struct mnt_namespace *mnt_ns = NULL;
 		struct task_struct *task = get_proc_task(inode);
 
 		if (task) {
-			task_lock(task);
-			if (task->nsproxy)
-				mnt_ns = task->nsproxy->mnt_ns;
-			if (mnt_ns)
-				get_mnt_ns(mnt_ns);
-			task_unlock(task);
+			rcu_read_lock();
+			nsp = task_nsproxy(task);
+			if (nsp) {
+				mnt_ns = nsp->mnt_ns;
+				if (mnt_ns)
+					get_mnt_ns(mnt_ns);
+			}
+			rcu_read_unlock();
+
 			put_task_struct(task);
 		}
 
@@ -1437,7 +1519,7 @@ static int proc_readfd_common(struct file * filp, void * dirent,
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *p = get_proc_task(inode);
-	unsigned int fd, tid, ino;
+	unsigned int fd, ino;
 	int retval;
 	struct files_struct * files;
 	struct fdtable *fdt;
@@ -1446,7 +1528,6 @@ static int proc_readfd_common(struct file * filp, void * dirent,
 	if (!p)
 		goto out_no_task;
 	retval = 0;
-	tid = p->pid;
 
 	fd = filp->f_pos;
 	switch (fd) {
@@ -1681,7 +1762,6 @@ static int proc_pident_readdir(struct file *filp,
 		const struct pid_entry *ents, unsigned int nents)
 {
 	int i;
-	int pid;
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	struct task_struct *task = get_proc_task(inode);
@@ -1694,7 +1774,6 @@ static int proc_pident_readdir(struct file *filp,
 		goto out_no_task;
 
 	ret = 0;
-	pid = task->pid;
 	i = filp->f_pos;
 	switch (i) {
 	case 0:
@@ -1928,14 +2007,14 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
 			      int buflen)
 {
 	char tmp[PROC_NUMBUF];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", task_tgid_vnr(current));
 	return vfs_readlink(dentry,buffer,buflen,tmp);
 }
 
 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	char tmp[PROC_NUMBUF];
-	sprintf(tmp, "%d", current->tgid);
+	sprintf(tmp, "%d", task_tgid_vnr(current));
 	return ERR_PTR(vfs_follow_link(nd,tmp));
 }
 
@@ -2101,6 +2180,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("environ",    S_IRUSR, environ),
 	INF("auxv",       S_IRUSR, pid_auxv),
 	INF("status",     S_IRUGO, pid_status),
+	INF("limits",	  S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
 #endif
@@ -2130,9 +2210,12 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_SCHEDSTATS
 	INF("schedstat",  S_IRUGO, pid_schedstat),
 #endif
-#ifdef CONFIG_CPUSETS
+#ifdef CONFIG_PROC_PID_CPUSET
 	REG("cpuset",     S_IRUGO, cpuset),
 #endif
+#ifdef CONFIG_CGROUPS
+	REG("cgroup",  S_IRUGO, cgroup),
+#endif
 	INF("oom_score",  S_IRUGO, oom_score),
 	REG("oom_adj",    S_IRUGO|S_IWUSR, oom_adjust),
 #ifdef CONFIG_AUDITSYSCALL
@@ -2193,27 +2276,27 @@ static const struct inode_operations proc_tgid_base_inode_operations = {
  *       that no dcache entries will exist at process exit time it
  *       just makes it very unlikely that any will persist.
  */
-void proc_flush_task(struct task_struct *task)
+static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 {
 	struct dentry *dentry, *leader, *dir;
 	char buf[PROC_NUMBUF];
 	struct qstr name;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
-	dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+	name.len = snprintf(buf, sizeof(buf), "%d", pid);
+	dentry = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (dentry) {
 		shrink_dcache_parent(dentry);
 		d_drop(dentry);
 		dput(dentry);
 	}
 
-	if (thread_group_leader(task))
+	if (tgid == 0)
 		goto out;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", task->tgid);
-	leader = d_hash_and_lookup(proc_mnt->mnt_root, &name);
+	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
+	leader = d_hash_and_lookup(mnt->mnt_root, &name);
 	if (!leader)
 		goto out;
 
@@ -2224,7 +2307,7 @@ void proc_flush_task(struct task_struct *task)
 		goto out_put_leader;
 
 	name.name = buf;
-	name.len = snprintf(buf, sizeof(buf), "%d", task->pid);
+	name.len = snprintf(buf, sizeof(buf), "%d", pid);
 	dentry = d_hash_and_lookup(dir, &name);
 	if (dentry) {
 		shrink_dcache_parent(dentry);
@@ -2239,6 +2322,36 @@ out:
 	return;
 }
 
+/*
+ * when flushing dentries from proc one need to flush them from global
+ * proc (proc_mnt) and from all the namespaces' procs this task was seen
+ * in. this call is supposed to make all this job.
+ */
+
+void proc_flush_task(struct task_struct *task)
+{
+	int i, leader;
+	struct pid *pid, *tgid;
+	struct upid *upid;
+
+	leader = thread_group_leader(task);
+	proc_flush_task_mnt(proc_mnt, task->pid, leader ? task->tgid : 0);
+	pid = task_pid(task);
+	if (pid->level == 0)
+		return;
+
+	tgid = task_tgid(task);
+	for (i = 1; i <= pid->level; i++) {
+		upid = &pid->numbers[i];
+		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
+				leader ? 0 : tgid->numbers[i].nr);
+	}
+
+	upid = &pid->numbers[pid->level];
+	if (upid->nr == 1)
+		pid_ns_release_proc(upid->ns);
+}
+
 static struct dentry *proc_pid_instantiate(struct inode *dir,
 					   struct dentry * dentry,
 					   struct task_struct *task, const void *ptr)
@@ -2274,6 +2387,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
 	struct dentry *result = ERR_PTR(-ENOENT);
 	struct task_struct *task;
 	unsigned tgid;
+	struct pid_namespace *ns;
 
 	result = proc_base_lookup(dir, dentry);
 	if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
@@ -2283,8 +2397,9 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
 	if (tgid == ~0U)
 		goto out;
 
+	ns = dentry->d_sb->s_fs_info;
 	rcu_read_lock();
-	task = find_task_by_pid(tgid);
+	task = find_task_by_pid_ns(tgid, ns);
 	if (task)
 		get_task_struct(task);
 	rcu_read_unlock();
@@ -2301,7 +2416,8 @@ out:
  * Find the first task with tgid >= tgid
  *
  */
-static struct task_struct *next_tgid(unsigned int tgid)
+static struct task_struct *next_tgid(unsigned int tgid,
+		struct pid_namespace *ns)
 {
 	struct task_struct *task;
 	struct pid *pid;
@@ -2309,9 +2425,9 @@ static struct task_struct *next_tgid(unsigned int tgid)
 	rcu_read_lock();
 retry:
 	task = NULL;
-	pid = find_ge_pid(tgid);
+	pid = find_ge_pid(tgid, ns);
 	if (pid) {
-		tgid = pid->nr + 1;
+		tgid = pid_nr_ns(pid, ns) + 1;
 		task = pid_task(pid, PIDTYPE_PID);
 		/* What we to know is if the pid we have find is the
 		 * pid of a thread_group_leader.  Testing for task
@@ -2351,6 +2467,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
 	struct task_struct *task;
 	int tgid;
+	struct pid_namespace *ns;
 
 	if (!reaper)
 		goto out_no_task;
@@ -2361,11 +2478,12 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			goto out;
 	}
 
+	ns = filp->f_dentry->d_sb->s_fs_info;
 	tgid = filp->f_pos - TGID_OFFSET;
-	for (task = next_tgid(tgid);
+	for (task = next_tgid(tgid, ns);
 	     task;
-	     put_task_struct(task), task = next_tgid(tgid + 1)) {
-		tgid = task->pid;
+	     put_task_struct(task), task = next_tgid(tgid + 1, ns)) {
+		tgid = task_pid_nr_ns(task, ns);
 		filp->f_pos = tgid + TGID_OFFSET;
 		if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) {
 			put_task_struct(task);
@@ -2388,6 +2506,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("environ",   S_IRUSR, environ),
 	INF("auxv",      S_IRUSR, pid_auxv),
 	INF("status",    S_IRUGO, pid_status),
+	INF("limits",	 S_IRUSR, pid_limits),
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
 #endif
@@ -2416,9 +2535,12 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_SCHEDSTATS
 	INF("schedstat", S_IRUGO, pid_schedstat),
 #endif
-#ifdef CONFIG_CPUSETS
+#ifdef CONFIG_PROC_PID_CPUSET
 	REG("cpuset",    S_IRUGO, cpuset),
 #endif
+#ifdef CONFIG_CGROUPS
+	REG("cgroup",  S_IRUGO, cgroup),
+#endif
 	INF("oom_score", S_IRUGO, oom_score),
 	REG("oom_adj",   S_IRUGO|S_IWUSR, oom_adjust),
 #ifdef CONFIG_AUDITSYSCALL
@@ -2486,6 +2608,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
 	struct task_struct *task;
 	struct task_struct *leader = get_proc_task(dir);
 	unsigned tid;
+	struct pid_namespace *ns;
 
 	if (!leader)
 		goto out_no_task;
@@ -2494,14 +2617,15 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
 	if (tid == ~0U)
 		goto out;
 
+	ns = dentry->d_sb->s_fs_info;
 	rcu_read_lock();
-	task = find_task_by_pid(tid);
+	task = find_task_by_pid_ns(tid, ns);
 	if (task)
 		get_task_struct(task);
 	rcu_read_unlock();
 	if (!task)
 		goto out;
-	if (leader->tgid != task->tgid)
+	if (!same_thread_group(leader, task))
 		goto out_drop_task;
 
 	result = proc_task_instantiate(dir, dentry, task, NULL);
@@ -2526,14 +2650,14 @@ out_no_task:
  * threads past it.
  */
 static struct task_struct *first_tid(struct task_struct *leader,
-					int tid, int nr)
+		int tid, int nr, struct pid_namespace *ns)
 {
 	struct task_struct *pos;
 
 	rcu_read_lock();
 	/* Attempt to start with the pid of a thread */
 	if (tid && (nr > 0)) {
-		pos = find_task_by_pid(tid);
+		pos = find_task_by_pid_ns(tid, ns);
 		if (pos && (pos->group_leader == leader))
 			goto found;
 	}
@@ -2602,6 +2726,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
 	ino_t ino;
 	int tid;
 	unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
+	struct pid_namespace *ns;
 
 	task = get_proc_task(inode);
 	if (!task)
@@ -2635,12 +2760,13 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
 	/* f_version caches the tgid value that the last readdir call couldn't
 	 * return. lseek aka telldir automagically resets f_version to 0.
 	 */
+	ns = filp->f_dentry->d_sb->s_fs_info;
 	tid = (int)filp->f_version;
 	filp->f_version = 0;
-	for (task = first_tid(leader, tid, pos - 2);
+	for (task = first_tid(leader, tid, pos - 2, ns);
 	     task;
 	     task = next_tid(task), pos++) {
-		tid = task->pid;
+		tid = task_pid_nr_ns(task, ns);
 		if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
 			/* returning this tgid failed, save it as the first
 			 * pid for the next readir call */
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 99ca00485fc..abe6a3f0436 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -448,7 +448,7 @@ out_mod:
 	return NULL;
 }			
 
-int proc_fill_super(struct super_block *s, void *data, int silent)
+int proc_fill_super(struct super_block *s)
 {
 	struct inode * root_inode;
 
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index d6dc72c78bc..e0d064e9764 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -91,7 +91,8 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
 		LOAD_INT(a), LOAD_FRAC(a),
 		LOAD_INT(b), LOAD_FRAC(b),
 		LOAD_INT(c), LOAD_FRAC(c),
-		nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid);
+		nr_running(), nr_threads,
+		task_active_pid_ns(current)->last_pid);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
diff --git a/fs/proc/root.c b/fs/proc/root.c
index cf3046638b0..ec9cb3b6c93 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -18,32 +18,90 @@
 #include <linux/bitops.h>
 #include <linux/smp_lock.h>
 #include <linux/mount.h>
+#include <linux/pid_namespace.h>
 
 #include "internal.h"
 
 struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
 
+static int proc_test_super(struct super_block *sb, void *data)
+{
+	return sb->s_fs_info == data;
+}
+
+static int proc_set_super(struct super_block *sb, void *data)
+{
+	struct pid_namespace *ns;
+
+	ns = (struct pid_namespace *)data;
+	sb->s_fs_info = get_pid_ns(ns);
+	return set_anon_super(sb, NULL);
+}
+
 static int proc_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
+	int err;
+	struct super_block *sb;
+	struct pid_namespace *ns;
+	struct proc_inode *ei;
+
 	if (proc_mnt) {
 		/* Seed the root directory with a pid so it doesn't need
 		 * to be special in base.c.  I would do this earlier but
 		 * the only task alive when /proc is mounted the first time
 		 * is the init_task and it doesn't have any pids.
 		 */
-		struct proc_inode *ei;
 		ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
 		if (!ei->pid)
 			ei->pid = find_get_pid(1);
 	}
-	return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
+
+	if (flags & MS_KERNMOUNT)
+		ns = (struct pid_namespace *)data;
+	else
+		ns = current->nsproxy->pid_ns;
+
+	sb = sget(fs_type, proc_test_super, proc_set_super, ns);
+	if (IS_ERR(sb))
+		return PTR_ERR(sb);
+
+	if (!sb->s_root) {
+		sb->s_flags = flags;
+		err = proc_fill_super(sb);
+		if (err) {
+			up_write(&sb->s_umount);
+			deactivate_super(sb);
+			return err;
+		}
+
+		ei = PROC_I(sb->s_root->d_inode);
+		if (!ei->pid) {
+			rcu_read_lock();
+			ei->pid = get_pid(find_pid_ns(1, ns));
+			rcu_read_unlock();
+		}
+
+		sb->s_flags |= MS_ACTIVE;
+		ns->proc_mnt = mnt;
+	}
+
+	return simple_set_mnt(mnt, sb);
+}
+
+static void proc_kill_sb(struct super_block *sb)
+{
+	struct pid_namespace *ns;
+
+	ns = (struct pid_namespace *)sb->s_fs_info;
+	kill_anon_super(sb);
+	put_pid_ns(ns);
 }
 
 static struct file_system_type proc_fs_type = {
 	.name		= "proc",
 	.get_sb		= proc_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= proc_kill_sb,
 };
 
 void __init proc_root_init(void)
@@ -54,12 +112,13 @@ void __init proc_root_init(void)
 	err = register_filesystem(&proc_fs_type);
 	if (err)
 		return;
-	proc_mnt = kern_mount(&proc_fs_type);
+	proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
 	err = PTR_ERR(proc_mnt);
 	if (IS_ERR(proc_mnt)) {
 		unregister_filesystem(&proc_fs_type);
 		return;
 	}
+
 	proc_misc_init();
 
 	proc_net_init();
@@ -153,6 +212,22 @@ struct proc_dir_entry proc_root = {
 	.parent		= &proc_root,
 };
 
+int pid_ns_prepare_proc(struct pid_namespace *ns)
+{
+	struct vfsmount *mnt;
+
+	mnt = kern_mount_data(&proc_fs_type, ns);
+	if (IS_ERR(mnt))
+		return PTR_ERR(mnt);
+
+	return 0;
+}
+
+void pid_ns_release_proc(struct pid_namespace *ns)
+{
+	mntput(ns->proc_mnt);
+}
+
 EXPORT_SYMBOL(proc_symlink);
 EXPORT_SYMBOL(proc_mkdir);
 EXPORT_SYMBOL(create_proc_entry);
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 2a5dd34649b..16b331dd991 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -47,7 +47,9 @@
     test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
 
 static inline void get_bit_address(struct super_block *s,
-				   b_blocknr_t block, int *bmap_nr, int *offset)
+				   b_blocknr_t block,
+				   unsigned int *bmap_nr,
+				   unsigned int *offset)
 {
 	/* It is in the bitmap block number equal to the block
 	 * number divided by the number of bits in a block. */
@@ -56,10 +58,10 @@ static inline void get_bit_address(struct super_block *s,
 	*offset = block & ((s->s_blocksize << 3) - 1);
 }
 
-#ifdef CONFIG_REISERFS_CHECK
 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
 {
-	int bmap, offset;
+	unsigned int bmap, offset;
+	unsigned int bmap_count = reiserfs_bmap_count(s);
 
 	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
 		reiserfs_warning(s,
@@ -75,25 +77,26 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
 	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
 			      &(REISERFS_SB(s)->s_properties)))) {
 		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
-		if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) {
+		if (block >= bmap1 &&
+		    block <= bmap1 + bmap_count) {
 			reiserfs_warning(s, "vs: 4019: is_reusable: "
 					 "bitmap block %lu(%u) can't be freed or reused",
-					 block, SB_BMAP_NR(s));
+					 block, bmap_count);
 			return 0;
 		}
 	} else {
 		if (offset == 0) {
 			reiserfs_warning(s, "vs: 4020: is_reusable: "
 					 "bitmap block %lu(%u) can't be freed or reused",
-					 block, SB_BMAP_NR(s));
+					 block, bmap_count);
 			return 0;
 		}
 	}
 
-	if (bmap >= SB_BMAP_NR(s)) {
+	if (bmap >= bmap_count) {
 		reiserfs_warning(s,
 				 "vs-4030: is_reusable: there is no so many bitmap blocks: "
-				 "block=%lu, bitmap_nr=%d", block, bmap);
+				 "block=%lu, bitmap_nr=%u", block, bmap);
 		return 0;
 	}
 
@@ -106,12 +109,11 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
 
 	return 1;
 }
-#endif				/* CONFIG_REISERFS_CHECK */
 
 /* searches in journal structures for a given block number (bmap, off). If block
    is found in reiserfs journal it suggests next free block candidate to test. */
-static inline int is_block_in_journal(struct super_block *s, int bmap, int
-				      off, int *next)
+static inline int is_block_in_journal(struct super_block *s, unsigned int bmap,
+				      int off, int *next)
 {
 	b_blocknr_t tmp;
 
@@ -132,8 +134,8 @@ static inline int is_block_in_journal(struct super_block *s, int bmap, int
 /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
  * block; */
 static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
-			     int bmap_n, int *beg, int boundary, int min,
-			     int max, int unfm)
+			     unsigned int bmap_n, int *beg, int boundary,
+			     int min, int max, int unfm)
 {
 	struct super_block *s = th->t_super;
 	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
@@ -143,8 +145,8 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
 
 	BUG_ON(!th->t_trans_id);
 
-	RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)",
-	       bmap_n, SB_BMAP_NR(s) - 1);
+	RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of "
+	       "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1);
 	PROC_INFO_INC(s, scan_bitmap.bmap);
 /* this is unclear and lacks comments, explain how journal bitmaps
    work here for the reader.  Convey a sense of the design here. What
@@ -249,12 +251,12 @@ static int bmap_hash_id(struct super_block *s, u32 id)
 	} else {
 		hash_in = (char *)(&id);
 		hash = keyed_hash(hash_in, 4);
-		bm = hash % SB_BMAP_NR(s);
+		bm = hash % reiserfs_bmap_count(s);
 		if (!bm)
 			bm = 1;
 	}
 	/* this can only be true when SB_BMAP_NR = 1 */
-	if (bm >= SB_BMAP_NR(s))
+	if (bm >= reiserfs_bmap_count(s))
 		bm = 0;
 	return bm;
 }
@@ -273,7 +275,7 @@ static inline int block_group_used(struct super_block *s, u32 id)
 	 * to make a better decision. This favors long-term performace gain
 	 * with a better on-disk layout vs. a short term gain of skipping the
 	 * read and potentially having a bad placement. */
-	if (info->first_zero_hint == 0) {
+	if (info->free_count == UINT_MAX) {
 		struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm);
 		brelse(bh);
 	}
@@ -309,16 +311,16 @@ __le32 reiserfs_choose_packing(struct inode * dir)
  * bitmap and place new blocks there. Returns number of allocated blocks. */
 static int scan_bitmap(struct reiserfs_transaction_handle *th,
 		       b_blocknr_t * start, b_blocknr_t finish,
-		       int min, int max, int unfm, unsigned long file_block)
+		       int min, int max, int unfm, sector_t file_block)
 {
 	int nr_allocated = 0;
 	struct super_block *s = th->t_super;
 	/* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
 	 * - Hans, it is not a block number - Zam. */
 
-	int bm, off;
-	int end_bm, end_off;
-	int off_max = s->s_blocksize << 3;
+	unsigned int bm, off;
+	unsigned int end_bm, end_off;
+	unsigned int off_max = s->s_blocksize << 3;
 
 	BUG_ON(!th->t_trans_id);
 
@@ -328,10 +330,10 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th,
 
 	get_bit_address(s, *start, &bm, &off);
 	get_bit_address(s, finish, &end_bm, &end_off);
-	if (bm > SB_BMAP_NR(s))
+	if (bm > reiserfs_bmap_count(s))
 		return 0;
-	if (end_bm > SB_BMAP_NR(s))
-		end_bm = SB_BMAP_NR(s);
+	if (end_bm > reiserfs_bmap_count(s))
+		end_bm = reiserfs_bmap_count(s);
 
 	/* When the bitmap is more than 10% free, anyone can allocate.
 	 * When it's less than 10% free, only files that already use the
@@ -385,7 +387,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 	struct reiserfs_super_block *rs;
 	struct buffer_head *sbh, *bmbh;
 	struct reiserfs_bitmap_info *apbi;
-	int nr, offset;
+	unsigned int nr, offset;
 
 	BUG_ON(!th->t_trans_id);
 
@@ -397,10 +399,12 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
 
 	get_bit_address(s, block, &nr, &offset);
 
-	if (nr >= sb_bmap_nr(rs)) {
+	if (nr >= reiserfs_bmap_count(s)) {
 		reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
-				 "block %lu is out of range on %s",
-				 block, reiserfs_bdevname(s));
+				 "block %lu is out of range on %s "
+				 "(nr=%u,max=%u)", block,
+				 reiserfs_bdevname(s), nr,
+				 reiserfs_bmap_count(s));
 		return;
 	}
 
@@ -434,12 +438,19 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
 			 int for_unformatted)
 {
 	struct super_block *s = th->t_super;
-
 	BUG_ON(!th->t_trans_id);
 
 	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
-	RFALSE(is_reusable(s, block, 1) == 0,
-	       "vs-4071: can not free such block");
+	if (!is_reusable(s, block, 1))
+		return;
+
+	if (block > sb_block_count(REISERFS_SB(s)->s_rs)) {
+		reiserfs_panic(th->t_super, "bitmap-4072",
+			       "Trying to free block outside file system "
+			       "boundaries (%lu > %lu)",
+			       block, sb_block_count(REISERFS_SB(s)->s_rs));
+		return;
+	}
 	/* mark it before we clear it, just in case */
 	journal_mark_freed(th, s, block);
 	_reiserfs_free_block(th, inode, block, for_unformatted);
@@ -449,11 +460,11 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th,
 static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
 					 struct inode *inode, b_blocknr_t block)
 {
+	BUG_ON(!th->t_trans_id);
 	RFALSE(!th->t_super,
 	       "vs-4060: trying to free block on nonexistent device");
-	RFALSE(is_reusable(th->t_super, block, 1) == 0,
-	       "vs-4070: can not free such block");
-	BUG_ON(!th->t_trans_id);
+	if (!is_reusable(th->t_super, block, 1))
+		return;
 	_reiserfs_free_block(th, inode, block, 1);
 }
 
@@ -1207,27 +1218,22 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb,
 {
 	unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size);
 
-	info->first_zero_hint = 1 << (sb->s_blocksize_bits + 3);
+	/* The first bit must ALWAYS be 1 */
+	BUG_ON(!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data));
+
+	info->free_count = 0;
 
 	while (--cur >= (unsigned long *)bh->b_data) {
-		int base = ((char *)cur - bh->b_data) << 3;
+		int i;
 
 		/* 0 and ~0 are special, we can optimize for them */
-		if (*cur == 0) {
-			info->first_zero_hint = base;
+		if (*cur == 0)
 			info->free_count += BITS_PER_LONG;
-		} else if (*cur != ~0L) {       /* A mix, investigate */
-			int b;
-			for (b = BITS_PER_LONG - 1; b >= 0; b--) {
-				if (!reiserfs_test_le_bit(b, cur)) {
-					info->first_zero_hint = base + b;
+		else if (*cur != ~0L)	/* A mix, investigate */
+			for (i = BITS_PER_LONG - 1; i >= 0; i--)
+				if (!reiserfs_test_le_bit(i, cur))
 					info->free_count++;
-				}
-			}
-		}
 	}
-	/* The first bit must ALWAYS be 1 */
-	BUG_ON(info->first_zero_hint == 0);
 }
 
 struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
@@ -1257,7 +1263,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
 		BUG_ON(!buffer_uptodate(bh));
 		BUG_ON(atomic_read(&bh->b_count) == 0);
 
-		if (info->first_zero_hint == 0)
+		if (info->free_count == UINT_MAX)
 			reiserfs_cache_bitmap_metadata(sb, bh, info);
 	}
 
@@ -1267,12 +1273,13 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
 int reiserfs_init_bitmap_cache(struct super_block *sb)
 {
 	struct reiserfs_bitmap_info *bitmap;
+	unsigned int bmap_nr = reiserfs_bmap_count(sb);
 
-	bitmap = vmalloc(sizeof (*bitmap) * SB_BMAP_NR(sb));
+	bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
 	if (bitmap == NULL)
 		return -ENOMEM;
 
-	memset(bitmap, 0, sizeof (*bitmap) * SB_BMAP_NR(sb));
+	memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr);
 
 	SB_AP_BITMAP(sb) = bitmap;
 
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0804289d355..a991af96f3f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -199,7 +199,7 @@ static inline void set_block_dev_mapped(struct buffer_head *bh,
 // files which were created in the earlier version can not be longer,
 // than 2 gb
 //
-static int file_capable(struct inode *inode, long block)
+static int file_capable(struct inode *inode, sector_t block)
 {
 	if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||	// it is new file.
 	    block < (1 << (31 - inode->i_sb->s_blocksize_bits)))	// old file, but 'block' is inside of 2gb
@@ -242,7 +242,7 @@ static int restart_transaction(struct reiserfs_transaction_handle *th,
 // Please improve the english/clarity in the comment above, as it is
 // hard to understand.
 
-static int _get_block_create_0(struct inode *inode, long block,
+static int _get_block_create_0(struct inode *inode, sector_t block,
 			       struct buffer_head *bh_result, int args)
 {
 	INITIALIZE_PATH(path);
@@ -250,7 +250,7 @@ static int _get_block_create_0(struct inode *inode, long block,
 	struct buffer_head *bh;
 	struct item_head *ih, tmp_ih;
 	int fs_gen;
-	int blocknr;
+	b_blocknr_t blocknr;
 	char *p = NULL;
 	int chars;
 	int ret;
@@ -569,7 +569,7 @@ static int convert_tail_for_hole(struct inode *inode,
 }
 
 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
-				  long block,
+				  sector_t block,
 				  struct inode *inode,
 				  b_blocknr_t * allocated_block_nr,
 				  struct treepath *path, int flags)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 4cad9e75ef5..bb05a3e51b9 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -219,11 +219,12 @@ static void allocate_bitmap_nodes(struct super_block *p_s_sb)
 	}
 }
 
-static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
+static int set_bit_in_list_bitmap(struct super_block *p_s_sb,
+				  b_blocknr_t block,
 				  struct reiserfs_list_bitmap *jb)
 {
-	int bmap_nr = block / (p_s_sb->s_blocksize << 3);
-	int bit_nr = block % (p_s_sb->s_blocksize << 3);
+	unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3);
+	unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3);
 
 	if (!jb->bitmaps[bmap_nr]) {
 		jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb);
@@ -239,7 +240,7 @@ static void cleanup_bitmap_list(struct super_block *p_s_sb,
 	if (jb->bitmaps == NULL)
 		return;
 
-	for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) {
+	for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) {
 		if (jb->bitmaps[i]) {
 			free_bitmap_node(p_s_sb, jb->bitmaps[i]);
 			jb->bitmaps[i] = NULL;
@@ -289,7 +290,7 @@ static int free_bitmap_nodes(struct super_block *p_s_sb)
 */
 int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
 				   struct reiserfs_list_bitmap *jb_array,
-				   int bmap_nr)
+				   unsigned int bmap_nr)
 {
 	int i;
 	int failed = 0;
@@ -483,7 +484,7 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct
 **
 */
 int reiserfs_in_journal(struct super_block *p_s_sb,
-			int bmap_nr, int bit_nr, int search_all,
+			unsigned int bmap_nr, int bit_nr, int search_all,
 			b_blocknr_t * next_zero_bit)
 {
 	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
@@ -1013,7 +1014,7 @@ static int flush_commit_list(struct super_block *s,
 			     struct reiserfs_journal_list *jl, int flushall)
 {
 	int i;
-	int bn;
+	b_blocknr_t bn;
 	struct buffer_head *tbh = NULL;
 	unsigned long trans_id = jl->j_trans_id;
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
@@ -2307,8 +2308,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
    Right now it is only used from journal code. But later we might use it
    from other places.
    Note: Do not use journal_getblk/sb_getblk functions here! */
-static struct buffer_head *reiserfs_breada(struct block_device *dev, int block,
-					   int bufsize, unsigned int max_block)
+static struct buffer_head *reiserfs_breada(struct block_device *dev,
+					   b_blocknr_t block, int bufsize,
+					   b_blocknr_t max_block)
 {
 	struct buffer_head *bhlist[BUFNR];
 	unsigned int blocks = BUFNR;
@@ -2732,7 +2734,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	journal->j_persistent_trans = 0;
 	if (reiserfs_allocate_list_bitmaps(p_s_sb,
 					   journal->j_list_bitmap,
-					   SB_BMAP_NR(p_s_sb)))
+					   reiserfs_bmap_count(p_s_sb)))
 		goto free_and_return;
 	allocate_bitmap_nodes(p_s_sb);
 
@@ -2740,7 +2742,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
 	SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
 						 REISERFS_OLD_DISK_OFFSET_IN_BYTES
 						 / p_s_sb->s_blocksize +
-						 SB_BMAP_NR(p_s_sb) +
+						 reiserfs_bmap_count(p_s_sb) +
 						 1 :
 						 REISERFS_DISK_OFFSET_IN_BYTES /
 						 p_s_sb->s_blocksize + 2);
diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index bc808a91eea..5e7388b32d0 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -356,13 +356,11 @@ extern struct tree_balance *cur_tb;
 void reiserfs_panic(struct super_block *sb, const char *fmt, ...)
 {
 	do_reiserfs_warning(fmt);
-	printk(KERN_EMERG "REISERFS: panic (device %s): %s\n",
-	       reiserfs_bdevname(sb), error_buf);
-	BUG();
 
-	/* this is not actually called, but makes reiserfs_panic() "noreturn" */
-	panic("REISERFS: panic (device %s): %s\n",
-	      reiserfs_bdevname(sb), error_buf);
+	dump_stack();
+
+	panic(KERN_EMERG "REISERFS: panic (device %s): %s\n",
+	       reiserfs_bdevname(sb), error_buf);
 }
 
 void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...)
diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 976cc7887a0..f71c3948ede 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c
@@ -61,7 +61,8 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 	}
 
 	/* count used bits in last bitmap block */
-	block_r = SB_BLOCK_COUNT(s) - (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8;
+	block_r = SB_BLOCK_COUNT(s) -
+			(reiserfs_bmap_count(s) - 1) * s->s_blocksize * 8;
 
 	/* count bitmap blocks in new fs */
 	bmap_nr_new = block_count_new / (s->s_blocksize * 8);
@@ -73,7 +74,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 
 	/* save old values */
 	block_count = SB_BLOCK_COUNT(s);
-	bmap_nr = SB_BMAP_NR(s);
+	bmap_nr = reiserfs_bmap_count(s);
 
 	/* resizing of reiserfs bitmaps (journal and real), if needed */
 	if (bmap_nr_new > bmap_nr) {
@@ -119,7 +120,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 			return -ENOMEM;
 		}
 		memset(bitmap, 0,
-		       sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s));
+		       sizeof(struct reiserfs_bitmap_info) * bmap_nr_new);
 		for (i = 0; i < bmap_nr; i++)
 			bitmap[i] = old_bitmap[i];
 
@@ -143,7 +144,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 			mark_buffer_dirty(bh);
 			sync_dirty_buffer(bh);
 			// update bitmap_info stuff
-			bitmap[i].first_zero_hint = 1;
 			bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
 			brelse(bh);
 		}
@@ -173,8 +173,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 	for (i = block_r; i < s->s_blocksize * 8; i++)
 		reiserfs_test_and_clear_le_bit(i, bh->b_data);
 	info->free_count += s->s_blocksize * 8 - block_r;
-	if (!info->first_zero_hint)
-		info->first_zero_hint = block_r;
 
 	journal_mark_dirty(&th, s, bh);
 	brelse(bh);
@@ -196,9 +194,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 	brelse(bh);
 
 	info->free_count -= s->s_blocksize * 8 - block_r_new;
-	/* Extreme case where last bitmap is the only valid block in itself. */
-	if (!info->free_count)
-		info->first_zero_hint = 0;
 	/* update super */
 	reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
 	free_blocks = SB_FREE_BLOCKS(s);
@@ -206,7 +201,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
 			   free_blocks + (block_count_new - block_count -
 					  (bmap_nr_new - bmap_nr)));
 	PUT_SB_BLOCK_COUNT(s, block_count_new);
-	PUT_SB_BMAP_NR(s, bmap_nr_new);
+	PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new);
 	s->s_dirt = 1;
 
 	journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 981027d1187..ca41567d789 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -559,7 +559,7 @@ static int is_tree_node(struct buffer_head *bh, int level)
 /* The function is NOT SCHEDULE-SAFE! */
 static void search_by_key_reada(struct super_block *s,
 				struct buffer_head **bh,
-				unsigned long *b, int num)
+				b_blocknr_t *b, int num)
 {
 	int i, j;
 
@@ -611,7 +611,7 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/*
 					   DISK_LEAF_NODE_LEVEL */
     )
 {
-	int n_block_number;
+	b_blocknr_t n_block_number;
 	int expected_level;
 	struct buffer_head *p_s_bh;
 	struct path_element *p_s_last_element;
@@ -619,7 +619,7 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key,	/*
 	int right_neighbor_of_leaf_node;
 	int fs_gen;
 	struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
-	unsigned long reada_blocks[SEARCH_BY_KEY_READA];
+	b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
 	int reada_count = 0;
 
 #ifdef CONFIG_REISERFS_CHECK
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b82897ae090..57adfe90d5a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1725,6 +1725,21 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
 		set_sb_umount_state(rs, REISERFS_ERROR_FS);
 		set_sb_fs_state(rs, 0);
 
+		/* Clear out s_bmap_nr if it would wrap. We can handle this
+		 * case, but older revisions can't. This will cause the
+		 * file system to fail mount on those older implementations,
+		 * avoiding corruption. -jeffm */
+		if (bmap_would_wrap(reiserfs_bmap_count(s)) &&
+		    sb_bmap_nr(rs) != 0) {
+			reiserfs_warning(s, "super-2030: This file system "
+					"claims to use %u bitmap blocks in "
+					"its super block, but requires %u. "
+					"Clearing to zero.", sb_bmap_nr(rs),
+					reiserfs_bmap_count(s));
+
+			set_sb_bmap_nr(rs, 0);
+		}
+
 		if (old_format_only(s)) {
 			/* filesystem of format 3.5 either with standard or non-standard
 			   journal */
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index fab4b9b2664..1597f6b649e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -484,7 +484,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
 	/* Resize it so we're ok to write there */
 	newattrs.ia_size = buffer_size;
 	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-	mutex_lock(&xinode->i_mutex);
+	mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR);
 	err = notify_change(fp->f_path.dentry, &newattrs);
 	if (err)
 		goto out_filp;
@@ -1223,7 +1223,8 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
 		if (!IS_ERR(dentry)) {
 			if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) {
 				struct inode *inode = dentry->d_parent->d_inode;
-				mutex_lock(&inode->i_mutex);
+				mutex_lock_nested(&inode->i_mutex,
+						  I_MUTEX_XATTR);
 				err = inode->i_op->mkdir(inode, dentry, 0700);
 				mutex_unlock(&inode->i_mutex);
 				if (err) {
diff --git a/fs/select.c b/fs/select.c
index 7dede89658f..47f47925aea 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -177,11 +177,6 @@ get_max:
 	return max;
 }
 
-#define BIT(i)		(1UL << ((i)&(__NFDBITS-1)))
-#define MEM(i,m)	((m)+(unsigned)(i)/__NFDBITS)
-#define ISSET(i,m)	(((i)&*(m)) != 0)
-#define SET(i,m)	(*(m) |= (i))
-
 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
 #define POLLEX_SET (POLLPRI)
diff --git a/fs/super.c b/fs/super.c
index 1bfcca2104b..d28fde7e1cf 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -40,10 +40,6 @@
 #include <asm/uaccess.h>
 
 
-void get_filesystem(struct file_system_type *fs);
-void put_filesystem(struct file_system_type *fs);
-struct file_system_type *get_fs_type(const char *name);
-
 LIST_HEAD(super_blocks);
 DEFINE_SPINLOCK(sb_lock);
 
@@ -336,21 +332,21 @@ struct super_block *sget(struct file_system_type *type,
 			void *data)
 {
 	struct super_block *s = NULL;
-	struct list_head *p;
+	struct super_block *old;
 	int err;
 
 retry:
 	spin_lock(&sb_lock);
-	if (test) list_for_each(p, &type->fs_supers) {
-		struct super_block *old;
-		old = list_entry(p, struct super_block, s_instances);
-		if (!test(old, data))
-			continue;
-		if (!grab_super(old))
-			goto retry;
-		if (s)
-			destroy_super(s);
-		return old;
+	if (test) {
+		list_for_each_entry(old, &type->fs_supers, s_instances) {
+			if (!test(old, data))
+				continue;
+			if (!grab_super(old))
+				goto retry;
+			if (s)
+				destroy_super(s);
+			return old;
+		}
 	}
 	if (!s) {
 		spin_unlock(&sb_lock);
@@ -948,9 +944,9 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
 	return mnt;
 }
 
-struct vfsmount *kern_mount(struct file_system_type *type)
+struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
 {
-	return vfs_kern_mount(type, 0, type->name, NULL);
+	return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
 }
 
-EXPORT_SYMBOL(kern_mount);
+EXPORT_SYMBOL_GPL(kern_mount_data);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 726449d4fd2..3586c7a28d2 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -54,8 +54,8 @@ xfs_fs_decode_fh(
 		struct dentry	*de),
 	void			*context)
 {
-	xfs_fid2_t		ifid;
-	xfs_fid2_t		pfid;
+	xfs_fid_t		ifid;
+	xfs_fid_t		pfid;
 	void			*parent = NULL;
 	int			is64 = 0;
 	__u32			*p = fh;
@@ -144,7 +144,7 @@ xfs_fs_get_dentry(
 	struct dentry		*result;
 	int			error;
 
-	error = xfs_vget(XFS_M(sb), &vp, (fid_t *)data);
+	error = xfs_vget(XFS_M(sb), &vp, data);
 	if (error || vp == NULL)
 		return ERR_PTR(-ESTALE) ;
 
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
index e794ca4efc7..2f36071a86f 100644
--- a/fs/xfs/linux-2.6/xfs_export.h
+++ b/fs/xfs/linux-2.6/xfs_export.h
@@ -71,13 +71,13 @@ xfs_fileid_length(int hasparent, int is64)
 
 /*
  * Decode encoded inode information (either for the inode itself
- * or the parent) into an xfs_fid2_t structure.  Advances and
+ * or the parent) into an xfs_fid_t structure.  Advances and
  * returns the new data pointer
  */
 static inline __u32 *
-xfs_fileid_decode_fid2(__u32 *p, xfs_fid2_t *fid, int is64)
+xfs_fileid_decode_fid2(__u32 *p, xfs_fid_t *fid, int is64)
 {
-	fid->fid_len = sizeof(xfs_fid2_t) - sizeof(fid->fid_len);
+	fid->fid_len = sizeof(xfs_fid_t) - sizeof(fid->fid_len);
 	fid->fid_pad = 0;
 	fid->fid_ino = *p++;
 #if XFS_BIG_INUMS
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index ffec630e7db..2b34bad48b0 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -152,11 +152,11 @@ xfs_find_handle(
 		lock_mode = xfs_ilock_map_shared(ip);
 
 		/* fill in fid section of handle from inode */
-		handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) -
-					    sizeof(handle.ha_fid.xfs_fid_len);
-		handle.ha_fid.xfs_fid_pad = 0;
-		handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen;
-		handle.ha_fid.xfs_fid_ino = ip->i_ino;
+		handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
+					sizeof(handle.ha_fid.fid_len);
+		handle.ha_fid.fid_pad = 0;
+		handle.ha_fid.fid_gen = ip->i_d.di_gen;
+		handle.ha_fid.fid_ino = ip->i_ino;
 
 		xfs_iunlock_map_shared(ip, lock_mode);
 
@@ -222,10 +222,10 @@ xfs_vget_fsop_handlereq(
 	if (hlen < sizeof(*handlep))
 		memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen);
 	if (hlen > sizeof(handlep->ha_fsid)) {
-		if (handlep->ha_fid.xfs_fid_len !=
-				(hlen - sizeof(handlep->ha_fsid)
-					- sizeof(handlep->ha_fid.xfs_fid_len))
-		    || handlep->ha_fid.xfs_fid_pad)
+		if (handlep->ha_fid.fid_len !=
+		    (hlen - sizeof(handlep->ha_fsid) -
+		            sizeof(handlep->ha_fid.fid_len)) ||
+		    handlep->ha_fid.fid_pad)
 			return XFS_ERROR(EINVAL);
 	}
 
@@ -233,9 +233,9 @@ xfs_vget_fsop_handlereq(
 	 * Crack the handle, obtain the inode # & generation #
 	 */
 	xfid = (struct xfs_fid *)&handlep->ha_fid;
-	if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) {
-		ino  = xfid->xfs_fid_ino;
-		igen = xfid->xfs_fid_gen;
+	if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) {
+		ino  = xfid->fid_ino;
+		igen = xfid->fid_gen;
 	} else {
 		return XFS_ERROR(EINVAL);
 	}
diff --git a/fs/xfs/xfs_dmops.c b/fs/xfs/xfs_dmops.c
index 6cd5704258a..a1e55fb9d5d 100644
--- a/fs/xfs/xfs_dmops.c
+++ b/fs/xfs/xfs_dmops.c
@@ -41,29 +41,16 @@ int
 xfs_dmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
 {
 	if (args->flags & XFSMNT_DMAPI) {
-		struct xfs_dmops *ops;
-
-		ops = symbol_get(xfs_dmcore_xfs);
-		if (!ops) {
-			request_module("xfs_dmapi");
-			ops = symbol_get(xfs_dmcore_xfs);
-		}
-
-		if (!ops) {
-			cmn_err(CE_WARN, "XFS: no dmapi support available.");
-			return EINVAL;
-		}
-		mp->m_dm_ops = ops;
-	} else {
-		mp->m_dm_ops = &xfs_dmcore_stub;
+		cmn_err(CE_WARN,
+			"XFS: dmapi support not available in this kernel.");
+		return EINVAL;
 	}
 
+	mp->m_dm_ops = &xfs_dmcore_stub;
 	return 0;
 }
 
 void
 xfs_dmops_put(struct xfs_mount *mp)
 {
-	if (mp->m_dm_ops != &xfs_dmcore_stub)
-		symbol_put(xfs_dmcore_xfs);
 }
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index ec3c9c27e0d..aab96627651 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -389,30 +389,13 @@ typedef struct xfs_fsop_attrmulti_handlereq {
  */
 typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */
 
-
-#ifndef HAVE_FID
-#define MAXFIDSZ	46
-
-typedef struct fid {
-	__u16		fid_len;		/* length of data in bytes */
-	unsigned char	fid_data[MAXFIDSZ];	/* data (fid_len worth)  */
-} fid_t;
-#endif
-
 typedef struct xfs_fid {
-	__u16	xfs_fid_len;		/* length of remainder	*/
-	__u16	xfs_fid_pad;
-	__u32	xfs_fid_gen;		/* generation number	*/
-	__u64	xfs_fid_ino;		/* 64 bits inode number */
+	__u16	fid_len;		/* length of remainder	*/
+	__u16	fid_pad;
+	__u32	fid_gen;		/* generation number	*/
+	__u64	fid_ino;		/* 64 bits inode number */
 } xfs_fid_t;
 
-typedef struct xfs_fid2 {
-	__u16	fid_len;	/* length of remainder */
-	__u16	fid_pad;	/* padding, must be zero */
-	__u32	fid_gen;	/* generation number */
-	__u64	fid_ino;	/* inode number */
-} xfs_fid2_t;
-
 typedef struct xfs_handle {
 	union {
 		__s64	    align;	/* force alignment of ha_fid	 */
@@ -422,9 +405,9 @@ typedef struct xfs_handle {
 } xfs_handle_t;
 #define ha_fsid ha_u._ha_fsid
 
-#define XFS_HSIZE(handle)	(((char *) &(handle).ha_fid.xfs_fid_pad	 \
+#define XFS_HSIZE(handle)	(((char *) &(handle).ha_fid.fid_pad	 \
 				 - (char *) &(handle))			  \
-				 + (handle).ha_fid.xfs_fid_len)
+				 + (handle).ha_fid.fid_len)
 
 /*
  * Flags for going down operation
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
index c266a0184b4..2ec1d8a2735 100644
--- a/fs/xfs/xfs_qmops.c
+++ b/fs/xfs/xfs_qmops.c
@@ -135,19 +135,13 @@ int
 xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
 {
 	if (args->flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA)) {
-		struct xfs_qmops *ops;
-
-		ops = symbol_get(xfs_qmcore_xfs);
-		if (!ops) {
-			request_module("xfs_quota");
-			ops = symbol_get(xfs_qmcore_xfs);
-		}
-
-		if (!ops) {
-			cmn_err(CE_WARN, "XFS: no quota support available.");
-			return EINVAL;
-		}
-		mp->m_qm_ops = ops;
+#ifdef CONFIG_XFS_QUOTA
+		mp->m_qm_ops = &xfs_qmcore_xfs;
+#else
+		cmn_err(CE_WARN,
+			"XFS: qouta support not available in this kernel.");
+		return EINVAL;
+#endif
 	} else {
 		mp->m_qm_ops = &xfs_qmcore_stub;
 	}
@@ -158,6 +152,4 @@ xfs_qmops_get(struct xfs_mount *mp, struct xfs_mount_args *args)
 void
 xfs_qmops_put(struct xfs_mount *mp)
 {
-	if (mp->m_qm_ops != &xfs_qmcore_stub)
-		symbol_put(xfs_qmcore_xfs);
 }
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index a5a8454f2a6..a1544597bcd 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -1635,9 +1635,8 @@ int
 xfs_vget(
 	xfs_mount_t	*mp,
 	bhv_vnode_t	**vpp,
-	fid_t		*fidp)
+	xfs_fid_t	*xfid)
 {
-	xfs_fid_t	*xfid = (struct xfs_fid *)fidp;
 	xfs_inode_t	*ip;
 	int		error;
 	xfs_ino_t	ino;
@@ -1647,11 +1646,11 @@ xfs_vget(
 	 * Invalid.  Since handles can be created in user space and passed in
 	 * via gethandle(), this is not cause for a panic.
 	 */
-	if (xfid->xfs_fid_len != sizeof(*xfid) - sizeof(xfid->xfs_fid_len))
+	if (xfid->fid_len != sizeof(*xfid) - sizeof(xfid->fid_len))
 		return XFS_ERROR(EINVAL);
 
-	ino  = xfid->xfs_fid_ino;
-	igen = xfid->xfs_fid_gen;
+	ino  = xfid->fid_ino;
+	igen = xfid->fid_gen;
 
 	/*
 	 * NFS can sometimes send requests for ino 0.  Fail them gracefully.
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index bc99e3eb7db..a592fe02a33 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -2,7 +2,7 @@
 #define _XFS_VFSOPS_H 1
 
 struct cred;
-struct fid;
+struct xfs_fid;
 struct inode;
 struct kstatfs;
 struct xfs_mount;
@@ -17,7 +17,7 @@ int xfs_root(struct xfs_mount *mp, bhv_vnode_t **vpp);
 int xfs_statvfs(struct xfs_mount *mp, struct kstatfs *statp,
 		bhv_vnode_t *vp);
 int xfs_sync(struct xfs_mount *mp, int flags);
-int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct fid *fidp);
+int xfs_vget(struct xfs_mount *mp, bhv_vnode_t **vpp, struct xfs_fid *xfid);
 int xfs_parseargs(struct xfs_mount *mp, char *options,
 		struct xfs_mount_args *args, int update);
 int xfs_showargs(struct xfs_mount *mp, struct seq_file *m);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 5e3c57ca998..efd5aff9eaf 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3466,23 +3466,14 @@ std_return:
 }
 
 
-/*
- * xfs_fid2
- *
- * A fid routine that takes a pointer to a previously allocated
- * fid structure (like xfs_fast_fid) but uses a 64 bit inode number.
- */
 int
 xfs_fid2(
 	xfs_inode_t	*ip,
-	fid_t		*fidp)
+	xfs_fid_t	*xfid)
 {
-	xfs_fid2_t	*xfid = (xfs_fid2_t *)fidp;
-
 	vn_trace_entry(ip, __FUNCTION__, (inst_t *)__return_address);
-	ASSERT(sizeof(fid_t) >= sizeof(xfs_fid2_t));
 
-	xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len);
+	xfid->fid_len = sizeof(xfs_fid_t) - sizeof(xfid->fid_len);
 	xfid->fid_pad = 0;
 	/*
 	 * use memcpy because the inode is a long long and there's no
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index f36e74f2f0c..b7e461c40cf 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -39,7 +39,7 @@ int xfs_readdir(struct xfs_inode	*dp, void *dirent, size_t bufsize,
 int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry,
 		char *target_path, mode_t mode, bhv_vnode_t **vpp,
 		struct cred *credp);
-int xfs_fid2(struct xfs_inode *ip, fid_t	*fidp);
+int xfs_fid2(struct xfs_inode *ip, struct xfs_fid *xfid);
 int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 int xfs_inode_flush(struct xfs_inode *ip, int flags);
diff --git a/include/acpi/achware.h b/include/acpi/achware.h
index 9df275cf7bc..4053df94345 100644
--- a/include/acpi/achware.h
+++ b/include/acpi/achware.h
@@ -71,9 +71,9 @@ u32 acpi_hw_get_mode(void);
 struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id);
 
 acpi_status
-acpi_hw_register_read(u8 use_lock, u32 register_id, u32 * return_value);
+acpi_hw_register_read(u32 register_id, u32 * return_value);
 
-acpi_status acpi_hw_register_write(u8 use_lock, u32 register_id, u32 value);
+acpi_status acpi_hw_register_write(u32 register_id, u32 value);
 
 acpi_status
 acpi_hw_low_level_read(u32 width,
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 86aea44ce6d..7b74b60a68a 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -264,7 +264,6 @@ struct acpi_device_wakeup_flags {
 
 struct acpi_device_wakeup_state {
 	u8 enabled:1;
-	u8 active:1;
 };
 
 struct acpi_device_wakeup {
@@ -333,6 +332,7 @@ int acpi_bus_get_power(acpi_handle handle, int *state);
 int acpi_bus_set_power(acpi_handle handle, int state);
 #ifdef CONFIG_ACPI_PROC_EVENT
 int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data);
+int acpi_bus_generate_proc_event4(const char *class, const char *bid, u8 type, int data);
 int acpi_bus_receive_event(struct acpi_bus_event *event);
 #else
 static inline int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data)
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 3d7ab9e0c9f..9512f0456ad 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -314,6 +314,8 @@ acpi_resource_to_address64(struct acpi_resource *resource,
  */
 acpi_status acpi_get_register(u32 register_id, u32 * return_value);
 
+acpi_status acpi_get_register_unlocked(u32 register_id, u32 *return_value);
+
 acpi_status acpi_set_register(u32 register_id, u32 value);
 
 acpi_status
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 99934a999e6..26d79f6db8a 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -3,6 +3,7 @@
 
 #include <linux/kernel.h>
 #include <linux/cpu.h>
+#include <linux/cpuidle.h>
 
 #include <asm/acpi.h>
 
@@ -75,7 +76,9 @@ struct acpi_processor_cx {
 };
 
 struct acpi_processor_power {
+	struct cpuidle_device dev;
 	struct acpi_processor_cx *state;
+	struct acpi_processor_cx *bm_state;
 	unsigned long bm_check_timestamp;
 	u32 default_state;
 	u32 bm_activity;
@@ -199,6 +202,7 @@ struct acpi_processor_flags {
 	u8 bm_check:1;
 	u8 has_cst:1;
 	u8 power_setup_done:1;
+	u8 bm_rld_set:1;
 };
 
 struct acpi_processor {
@@ -322,6 +326,7 @@ int acpi_processor_power_exit(struct acpi_processor *pr,
 			      struct acpi_device *device);
 int acpi_processor_suspend(struct acpi_device * device, pm_message_t state);
 int acpi_processor_resume(struct acpi_device * device);
+extern struct cpuidle_driver acpi_idle_driver;
 
 /* in processor_thermal.c */
 int acpi_processor_get_limit_info(struct acpi_processor *pr);
diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 381b4f5b4d5..9e19a704d48 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -1,6 +1,10 @@
 #ifndef _ALPHA_BITOPS_H
 #define _ALPHA_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/compiler.h>
 #include <asm/barrier.h>
 
diff --git a/include/asm-alpha/ide.h b/include/asm-alpha/ide.h
index 2a5cc0b367a..b7bf68d0407 100644
--- a/include/asm-alpha/ide.h
+++ b/include/asm-alpha/ide.h
@@ -40,7 +40,6 @@ static inline unsigned long ide_default_io_base(int index)
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_PCI
diff --git a/include/asm-alpha/tlbflush.h b/include/asm-alpha/tlbflush.h
index 1ca3ed3bd6d..eefab3fb51a 100644
--- a/include/asm-alpha/tlbflush.h
+++ b/include/asm-alpha/tlbflush.h
@@ -92,17 +92,6 @@ flush_tlb_other(struct mm_struct *mm)
 	if (*mmc) *mmc = 0;
 }
 
-/* Flush a specified range of user mapping page tables from TLB.
-   Although Alpha uses VPTE caches, this can be a nop, as Alpha does
-   not have finegrained tlb flushing, so it will flush VPTE stuff
-   during next flush_tlb_range.  */
-
-static inline void
-flush_tlb_pgtables(struct mm_struct *mm, unsigned long start,
-		   unsigned long end)
-{
-}
-
 #ifndef CONFIG_SMP
 /* Flush everything (kernel mapping may also have changed
    due to vmalloc/vfree).  */
diff --git a/include/asm-arm/arch-ixp4xx/io.h b/include/asm-arm/arch-ixp4xx/io.h
index c72f9d79417..eeeea90cd5a 100644
--- a/include/asm-arm/arch-ixp4xx/io.h
+++ b/include/asm-arm/arch-ixp4xx/io.h
@@ -17,9 +17,6 @@
 
 #define IO_SPACE_LIMIT 0xffff0000
 
-#define	BIT(x)	((1)<<(x))
-
-
 extern int (*ixp4xx_pci_read)(u32 addr, u32 cmd, u32* data);
 extern int ixp4xx_pci_write(u32 addr, u32 cmd, u32 data);
 
diff --git a/include/asm-arm/bitops.h b/include/asm-arm/bitops.h
index 52fe05895de..47a6b086eee 100644
--- a/include/asm-arm/bitops.h
+++ b/include/asm-arm/bitops.h
@@ -19,6 +19,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/system.h>
 
diff --git a/include/asm-arm/ide.h b/include/asm-arm/ide.h
index 4f68c8a5a19..f348fcf3150 100644
--- a/include/asm-arm/ide.h
+++ b/include/asm-arm/ide.h
@@ -18,7 +18,6 @@
 #endif
 
 #if !defined(CONFIG_ARCH_L7200)
-# define IDE_ARCH_OBSOLETE_INIT
 # ifdef CONFIG_ARCH_CLPS7500
 #  define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 # else
diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h
index 71be4fded7e..8c6bc1bb9d1 100644
--- a/include/asm-arm/tlbflush.h
+++ b/include/asm-arm/tlbflush.h
@@ -463,11 +463,6 @@ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
  */
 extern void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte);
 
-/*
- * ARM processors do not cache TLB tables in RAM.
- */
-#define flush_tlb_pgtables(mm,start,end)	do { } while (0)
-
 #endif
 
 #endif /* CONFIG_MMU */
diff --git a/include/asm-avr32/bitops.h b/include/asm-avr32/bitops.h
index f3faddfd46a..1a50b69b1a1 100644
--- a/include/asm-avr32/bitops.h
+++ b/include/asm-avr32/bitops.h
@@ -8,6 +8,10 @@
 #ifndef __ASM_AVR32_BITOPS_H
 #define __ASM_AVR32_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/byteorder.h>
 #include <asm/system.h>
 
diff --git a/include/asm-avr32/tlbflush.h b/include/asm-avr32/tlbflush.h
index 730e268f81f..5bc7c88a577 100644
--- a/include/asm-avr32/tlbflush.h
+++ b/include/asm-avr32/tlbflush.h
@@ -19,7 +19,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 extern void flush_tlb(void);
 extern void flush_tlb_all(void);
@@ -29,12 +28,6 @@ extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
 extern void __flush_tlb_page(unsigned long asid, unsigned long page);
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	/* Nothing to do */
-}
-
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 #endif /* __ASM_AVR32_TLBFLUSH_H */
diff --git a/include/asm-blackfin/bitops.h b/include/asm-blackfin/bitops.h
index 03ecedc1f2a..b39a175c79c 100644
--- a/include/asm-blackfin/bitops.h
+++ b/include/asm-blackfin/bitops.h
@@ -11,6 +11,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/sched.h>
diff --git a/include/asm-blackfin/ide.h b/include/asm-blackfin/ide.h
index 41b2db46a16..121e272581d 100644
--- a/include/asm-blackfin/ide.h
+++ b/include/asm-blackfin/ide.h
@@ -20,7 +20,6 @@
 #define MAX_HWIFS	1
 
 /* Legacy ... BLK_DEV_IDECS */
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 
diff --git a/include/asm-blackfin/tlbflush.h b/include/asm-blackfin/tlbflush.h
index 10a07ba1e01..277b400924b 100644
--- a/include/asm-blackfin/tlbflush.h
+++ b/include/asm-blackfin/tlbflush.h
@@ -53,10 +53,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
 	BUG();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	BUG();
-}
-
 #endif
diff --git a/include/asm-cris/arch-v32/ide.h b/include/asm-cris/arch-v32/ide.h
index 6590f657500..11296170d05 100644
--- a/include/asm-cris/arch-v32/ide.h
+++ b/include/asm-cris/arch-v32/ide.h
@@ -54,7 +54,7 @@ static inline unsigned long ide_default_io_base(int index)
 #define SUPPORT_VLB_SYNC 0
 
 #define IDE_ARCH_ACK_INTR
-#define ide_ack_intr(hwif)	(hwif)->hw.ack_intr(hwif)
+#define ide_ack_intr(hwif)	((hwif)->ack_intr(hwif))
 
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h
index 617151b9b72..e2f49c27ed2 100644
--- a/include/asm-cris/bitops.h
+++ b/include/asm-cris/bitops.h
@@ -14,6 +14,10 @@
 /* Currently this is unsuitable for consumption outside the kernel.  */
 #ifdef __KERNEL__ 
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/arch/bitops.h>
 #include <asm/system.h>
 #include <asm/atomic.h>
diff --git a/include/asm-cris/posix_types.h b/include/asm-cris/posix_types.h
index 7b9ed22ab5d..92000d0c3f9 100644
--- a/include/asm-cris/posix_types.h
+++ b/include/asm-cris/posix_types.h
@@ -52,7 +52,7 @@ typedef struct {
 } __kernel_fsid_t;
 
 #ifdef __KERNEL__
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 #undef	__FD_SET
 #define __FD_SET(fd,fdsetp) set_bit(fd, (void *)(fdsetp))
diff --git a/include/asm-cris/tlbflush.h b/include/asm-cris/tlbflush.h
index 0569612477e..20697e7ef4f 100644
--- a/include/asm-cris/tlbflush.h
+++ b/include/asm-cris/tlbflush.h
@@ -38,13 +38,6 @@ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long st
 	flush_tlb_mm(vma->vm_mm);
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                      unsigned long start, unsigned long end)
-{
-        /* CRIS does not keep any page table caches in TLB */
-}
-
-
 static inline void flush_tlb(void)
 {
 	flush_tlb_mm(current->mm);
diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index 8dba74b1a25..e29de7131b7 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -21,6 +21,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/ffz.h>
 
 /*
diff --git a/include/asm-frv/tlbflush.h b/include/asm-frv/tlbflush.h
index 8370f97e41e..7ac5eafc5d9 100644
--- a/include/asm-frv/tlbflush.h
+++ b/include/asm-frv/tlbflush.h
@@ -57,7 +57,6 @@ do {								\
 #define __flush_tlb_global()			flush_tlb_all()
 #define flush_tlb()				flush_tlb_all()
 #define flush_tlb_kernel_range(start, end)	flush_tlb_all()
-#define flush_tlb_pgtables(mm,start,end)	do { } while(0)
 
 #else
 
@@ -66,7 +65,6 @@ do {								\
 #define flush_tlb_mm(mm)			BUG()
 #define flush_tlb_page(vma,addr)		BUG()
 #define flush_tlb_range(mm,start,end)		BUG()
-#define flush_tlb_pgtables(mm,start,end)	BUG()
 #define flush_tlb_kernel_range(start, end)	BUG()
 
 #endif
diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index e022a0f59e6..15e6f253dda 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -19,6 +19,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/hweight.h>
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index cd8a9641bd6..4657f3e410f 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -3,9 +3,6 @@
 
 #include <asm/types.h>
 
-#define BITOP_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-
 #ifdef CONFIG_SMP
 #include <asm/spinlock.h>
 #include <asm/cache.h>		/* we use L1_CACHE_BYTES */
@@ -66,8 +63,8 @@ extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
  */
 static inline void set_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long flags;
 
 	_atomic_spin_lock_irqsave(p, flags);
@@ -87,8 +84,8 @@ static inline void set_bit(int nr, volatile unsigned long *addr)
  */
 static inline void clear_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long flags;
 
 	_atomic_spin_lock_irqsave(p, flags);
@@ -108,8 +105,8 @@ static inline void clear_bit(int nr, volatile unsigned long *addr)
  */
 static inline void change_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long flags;
 
 	_atomic_spin_lock_irqsave(p, flags);
@@ -128,8 +125,8 @@ static inline void change_bit(int nr, volatile unsigned long *addr)
  */
 static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old;
 	unsigned long flags;
 
@@ -152,8 +149,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
  */
 static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old;
 	unsigned long flags;
 
@@ -175,8 +172,8 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
  */
 static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old;
 	unsigned long flags;
 
diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h
index 46a825cf2ae..697cc2b7e0f 100644
--- a/include/asm-generic/bitops/non-atomic.h
+++ b/include/asm-generic/bitops/non-atomic.h
@@ -3,9 +3,6 @@
 
 #include <asm/types.h>
 
-#define BITOP_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-
 /**
  * __set_bit - Set a bit in memory
  * @nr: the bit to set
@@ -17,16 +14,16 @@
  */
 static inline void __set_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 
 	*p  |= mask;
 }
 
 static inline void __clear_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 
 	*p &= ~mask;
 }
@@ -42,8 +39,8 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
  */
 static inline void __change_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 
 	*p ^= mask;
 }
@@ -59,8 +56,8 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
  */
 static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old = *p;
 
 	*p = old | mask;
@@ -78,8 +75,8 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
  */
 static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old = *p;
 
 	*p = old & ~mask;
@@ -90,8 +87,8 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
 static inline int __test_and_change_bit(int nr,
 					    volatile unsigned long *addr)
 {
-	unsigned long mask = BITOP_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
 	unsigned long old = *p;
 
 	*p = old ^ mask;
@@ -105,7 +102,7 @@ static inline int __test_and_change_bit(int nr,
  */
 static inline int test_bit(int nr, const volatile unsigned long *addr)
 {
-	return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
 }
 
 #endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5615440027e..9f584cc5c5f 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -12,7 +12,11 @@
 /* .data section */
 #define DATA_DATA							\
 	*(.data)							\
-	*(.data.init.refok)
+	*(.data.init.refok)						\
+	. = ALIGN(8);							\
+	VMLINUX_SYMBOL(__start___markers) = .;				\
+	*(__markers)							\
+	VMLINUX_SYMBOL(__stop___markers) = .;
 
 #define RO_DATA(align)							\
 	. = ALIGN((align));						\
@@ -20,6 +24,7 @@
 		VMLINUX_SYMBOL(__start_rodata) = .;			\
 		*(.rodata) *(.rodata.*)					\
 		*(__vermagic)		/* Kernel version magic */	\
+		*(__markers_strings)	/* Markers: strings */		\
 	}								\
 									\
 	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {		\
diff --git a/include/asm-h8300/bitops.h b/include/asm-h8300/bitops.h
index e64ad315656..cb18e3b0aa9 100644
--- a/include/asm-h8300/bitops.h
+++ b/include/asm-h8300/bitops.h
@@ -10,6 +10,11 @@
 #include <asm/system.h>
 
 #ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 /*
  * Function prototypes to keep gcc -Wall happy
  */
diff --git a/include/asm-h8300/tlbflush.h b/include/asm-h8300/tlbflush.h
index 9a2c5c9fd70..41c148a9208 100644
--- a/include/asm-h8300/tlbflush.h
+++ b/include/asm-h8300/tlbflush.h
@@ -52,10 +52,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
 	BUG();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	BUG();
-}
-
 #endif /* _H8300_TLBFLUSH_H */
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 2144f1a8ed6..a977affaebe 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -9,6 +9,10 @@
  * O(1) scheduler patch
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <asm/intrinsics.h>
diff --git a/include/asm-ia64/cacheflush.h b/include/asm-ia64/cacheflush.h
index 4906916d715..afcfbda76e2 100644
--- a/include/asm-ia64/cacheflush.h
+++ b/include/asm-ia64/cacheflush.h
@@ -7,8 +7,8 @@
  */
 
 #include <linux/page-flags.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/page.h>
 
 /*
diff --git a/include/asm-ia64/ide.h b/include/asm-ia64/ide.h
index e928675de35..1ccf2380932 100644
--- a/include/asm-ia64/ide.h
+++ b/include/asm-ia64/ide.h
@@ -46,7 +46,6 @@ static inline unsigned long ide_default_io_base(int index)
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_PCI
diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
index 3a62878e84f..f93308f54b6 100644
--- a/include/asm-ia64/meminit.h
+++ b/include/asm-ia64/meminit.h
@@ -35,7 +35,7 @@ extern void find_memory (void);
 extern void reserve_memory (void);
 extern void find_initrd (void);
 extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
-extern void efi_memmap_init(unsigned long *, unsigned long *);
+extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
 extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
 
 extern unsigned long vmcore_find_descriptor_size(unsigned long address);
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index 0971ec90807..e6204f14f61 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -150,7 +150,7 @@
 # ifndef __ASSEMBLY__
 
 #include <linux/sched.h>	/* for mm_struct */
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 1703c9d885b..471cc2ee9ac 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -14,8 +14,8 @@
 #include <linux/threads.h>
 #include <linux/kernel.h>
 #include <linux/cpumask.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/io.h>
 #include <asm/param.h>
 #include <asm/processor.h>
diff --git a/include/asm-ia64/spinlock.h b/include/asm-ia64/spinlock.h
index ff857e31738..0229fb95fb3 100644
--- a/include/asm-ia64/spinlock.h
+++ b/include/asm-ia64/spinlock.h
@@ -11,9 +11,9 @@
 
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/bitops.h>
 
 #include <asm/atomic.h>
-#include <asm/bitops.h>
 #include <asm/intrinsics.h>
 #include <asm/system.h>
 
diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
index e37f9fbf33a..80bcb0a38e8 100644
--- a/include/asm-ia64/tlbflush.h
+++ b/include/asm-ia64/tlbflush.h
@@ -84,19 +84,6 @@ flush_tlb_page (struct vm_area_struct *vma, unsigned long addr)
 }
 
 /*
- * Flush the TLB entries mapping the virtually mapped linear page
- * table corresponding to address range [START-END).
- */
-static inline void
-flush_tlb_pgtables (struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	/*
-	 * Deprecated.  The virtual page table is now flushed via the normal gather/flush
-	 * interface (see tlb.h).
-	 */
-}
-
-/*
  * Flush the local TLB. Invoked from another cpu using an IPI.
  */
 #ifdef CONFIG_SMP
diff --git a/include/asm-m32r/bitops.h b/include/asm-m32r/bitops.h
index 313a02c4a88..6dc9b81bf9f 100644
--- a/include/asm-m32r/bitops.h
+++ b/include/asm-m32r/bitops.h
@@ -11,6 +11,10 @@
  *    Copyright (C) 2004  Hirokazu Takata <takata at linux-m32r.org>
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/assembler.h>
 #include <asm/system.h>
diff --git a/include/asm-m32r/ide.h b/include/asm-m32r/ide.h
index 4672a49e876..5d2044e529a 100644
--- a/include/asm-m32r/ide.h
+++ b/include/asm-m32r/ide.h
@@ -65,7 +65,6 @@ static __inline__ unsigned long ide_default_io_base(int index)
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_BLK_DEV_IDEPCI
diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h
index 92d7266783f..86505387be0 100644
--- a/include/asm-m32r/pgtable.h
+++ b/include/asm-m32r/pgtable.h
@@ -21,9 +21,9 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/threads.h>
+#include <linux/bitops.h>
 #include <asm/processor.h>
 #include <asm/addrspace.h>
-#include <asm/bitops.h>
 #include <asm/page.h>
 
 struct mm_struct;
diff --git a/include/asm-m32r/tlbflush.h b/include/asm-m32r/tlbflush.h
index 3d37ac002bc..0ef95307784 100644
--- a/include/asm-m32r/tlbflush.h
+++ b/include/asm-m32r/tlbflush.h
@@ -12,7 +12,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 extern void local_flush_tlb_all(void);
@@ -93,8 +92,6 @@ static __inline__ void __flush_tlb_all(void)
 	);
 }
 
-#define flush_tlb_pgtables(mm, start, end)	do { } while (0)
-
 extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
 
 #endif	/* _ASM_M32R_TLBFLUSH_H */
diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h
index da151f70cdc..2976b5d68e9 100644
--- a/include/asm-m68k/bitops.h
+++ b/include/asm-m68k/bitops.h
@@ -8,6 +8,10 @@
  * for more details.
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 
 /*
diff --git a/include/asm-m68k/ide.h b/include/asm-m68k/ide.h
index f9ffb2cbbae..909c6dfd385 100644
--- a/include/asm-m68k/ide.h
+++ b/include/asm-m68k/ide.h
@@ -137,7 +137,7 @@ ide_get_lock(irq_handler_t handler, void *data)
 #endif /* CONFIG_BLK_DEV_FALCON_IDE */
 
 #define IDE_ARCH_ACK_INTR
-#define ide_ack_intr(hwif)	((hwif)->hw.ack_intr ? (hwif)->hw.ack_intr(hwif) : 1)
+#define ide_ack_intr(hwif)	((hwif)->ack_intr ? (hwif)->ack_intr(hwif) : 1)
 
 #endif /* __KERNEL__ */
 #endif /* _M68K_IDE_H */
diff --git a/include/asm-m68k/tlbflush.h b/include/asm-m68k/tlbflush.h
index 31678831ee4..17707ec315e 100644
--- a/include/asm-m68k/tlbflush.h
+++ b/include/asm-m68k/tlbflush.h
@@ -92,11 +92,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 	flush_tlb_all();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
-
 #else
 
 
@@ -219,11 +214,6 @@ static inline void flush_tlb_kernel_page (unsigned long addr)
 	sun3_put_segmap (addr & ~(SUN3_PMEG_SIZE - 1), SUN3_INVALID_PMEG);
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
-
 #endif
 
 #endif /* _M68K_TLBFLUSH_H */
diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h
index b8b2770d687..f8dfb7ba2e2 100644
--- a/include/asm-m68knommu/bitops.h
+++ b/include/asm-m68knommu/bitops.h
@@ -10,6 +10,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm-generic/bitops/ffs.h>
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/sched.h>
diff --git a/include/asm-m68knommu/tlbflush.h b/include/asm-m68knommu/tlbflush.h
index de858db28b0..a470cfb803e 100644
--- a/include/asm-m68knommu/tlbflush.h
+++ b/include/asm-m68knommu/tlbflush.h
@@ -52,10 +52,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
 	BUG();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	BUG();
-}
-
 #endif /* _M68KNOMMU_TLBFLUSH_H */
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index 77ed0c79830..ec75ce4cdb8 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -9,6 +9,10 @@
 #ifndef _ASM_BITOPS_H
 #define _ASM_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <linux/irqflags.h>
 #include <linux/types.h>
diff --git a/include/asm-mips/fpu.h b/include/asm-mips/fpu.h
index 483685b1592..e59d4c03966 100644
--- a/include/asm-mips/fpu.h
+++ b/include/asm-mips/fpu.h
@@ -12,12 +12,12 @@
 
 #include <linux/sched.h>
 #include <linux/thread_info.h>
+#include <linux/bitops.h>
 
 #include <asm/mipsregs.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
 #include <asm/hazards.h>
-#include <asm/bitops.h>
 #include <asm/processor.h>
 #include <asm/current.h>
 
diff --git a/include/asm-mips/ip32/crime.h b/include/asm-mips/ip32/crime.h
index a13702fafa8..7c36b0e5b1c 100644
--- a/include/asm-mips/ip32/crime.h
+++ b/include/asm-mips/ip32/crime.h
@@ -17,9 +17,6 @@
  */
 #define CRIME_BASE	0x14000000	/* physical */
 
-#undef BIT
-#define BIT(x)	(1UL << (x))
-
 struct sgi_crime {
 	volatile unsigned long id;
 #define CRIME_ID_MASK			0xff
diff --git a/include/asm-mips/ip32/mace.h b/include/asm-mips/ip32/mace.h
index 990082c81f3..d08d7c67213 100644
--- a/include/asm-mips/ip32/mace.h
+++ b/include/asm-mips/ip32/mace.h
@@ -17,9 +17,6 @@
  */
 #define MACE_BASE	0x1f000000	/* physical */
 
-#undef BIT
-#define BIT(x) (1UL << (x))
-
 /*
  * PCI interface
  */
diff --git a/include/asm-mips/mach-generic/ide.h b/include/asm-mips/mach-generic/ide.h
index a77128362a7..4ec2b930dfb 100644
--- a/include/asm-mips/mach-generic/ide.h
+++ b/include/asm-mips/mach-generic/ide.h
@@ -98,7 +98,6 @@ static __inline__ unsigned long ide_default_io_base(int index)
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_BLK_DEV_IDEPCI
diff --git a/include/asm-mips/sni.h b/include/asm-mips/sni.h
index 4d43dbb7f8b..af081457f84 100644
--- a/include/asm-mips/sni.h
+++ b/include/asm-mips/sni.h
@@ -141,8 +141,6 @@ extern unsigned int sni_brd_type;
 #define A20R_PT_TIM0_ACK        0xbc050000
 #define A20R_PT_TIM1_ACK        0xbc060000
 
-#define SNI_MIPS_IRQ_CPU_TIMER  (MIPS_CPU_IRQ_BASE+7)
-
 #define SNI_A20R_IRQ_BASE       MIPS_CPU_IRQ_BASE
 #define SNI_A20R_IRQ_TIMER      (SNI_A20R_IRQ_BASE+5)
 
diff --git a/include/asm-mips/time.h b/include/asm-mips/time.h
index cf76f4f7435..bc47af313bc 100644
--- a/include/asm-mips/time.h
+++ b/include/asm-mips/time.h
@@ -21,6 +21,7 @@
 #include <linux/ptrace.h>
 #include <linux/rtc.h>
 #include <linux/spinlock.h>
+#include <linux/clockchips.h>
 #include <linux/clocksource.h>
 
 extern spinlock_t rtc_lock;
@@ -83,4 +84,8 @@ static inline void mips_clockevent_init(void)
 }
 #endif
 
+extern void clocksource_set_clock(struct clocksource *cs, unsigned int clock);
+extern void clockevent_set_clock(struct clock_event_device *cd,
+		unsigned int clock);
+
 #endif /* _ASM_TIME_H */
diff --git a/include/asm-mips/tlbflush.h b/include/asm-mips/tlbflush.h
index 730e841fb08..86b21de12e9 100644
--- a/include/asm-mips/tlbflush.h
+++ b/include/asm-mips/tlbflush.h
@@ -11,7 +11,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 extern void local_flush_tlb_all(void);
 extern void local_flush_tlb_mm(struct mm_struct *mm);
@@ -45,10 +44,4 @@ extern void flush_tlb_one(unsigned long vaddr);
 
 #endif /* CONFIG_SMP */
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-	unsigned long start, unsigned long end)
-{
-	/* Nothing to do on MIPS.  */
-}
-
 #endif /* __ASM_TLBFLUSH_H */
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
index 03ae287baf8..f8eebcbad01 100644
--- a/include/asm-parisc/bitops.h
+++ b/include/asm-parisc/bitops.h
@@ -1,6 +1,10 @@
 #ifndef _PARISC_BITOPS_H
 #define _PARISC_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/types.h>		/* for BITS_PER_LONG/SHIFT_PER_LONG */
 #include <asm/byteorder.h>
diff --git a/include/asm-parisc/ide.h b/include/asm-parisc/ide.h
index b27bf7aeb25..be8760fbc8e 100644
--- a/include/asm-parisc/ide.h
+++ b/include/asm-parisc/ide.h
@@ -17,7 +17,6 @@
 #define MAX_HWIFS	2
 #endif
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #define ide_request_irq(irq,hand,flg,dev,id)	request_irq((irq),(hand),(flg),(dev),(id))
diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h
index e88cacd6372..9ab79c8e5a4 100644
--- a/include/asm-parisc/pgtable.h
+++ b/include/asm-parisc/pgtable.h
@@ -11,9 +11,9 @@
  */
 
 #include <linux/mm.h>		/* for vm_area_struct */
+#include <linux/bitops.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
-#include <asm/bitops.h>
 
 /*
  * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
diff --git a/include/asm-parisc/tlbflush.h b/include/asm-parisc/tlbflush.h
index 270cf309772..b72ec66db69 100644
--- a/include/asm-parisc/tlbflush.h
+++ b/include/asm-parisc/tlbflush.h
@@ -57,10 +57,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 #endif
 }
 
-extern __inline__ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-}
- 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 	unsigned long addr)
 {
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index e85c3e078ba..733b4af7f4f 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -38,6 +38,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/asm-compat.h>
 #include <asm/synch.h>
diff --git a/include/asm-powerpc/ide.h b/include/asm-powerpc/ide.h
index 1644e44c875..fd7f5a430f0 100644
--- a/include/asm-powerpc/ide.h
+++ b/include/asm-powerpc/ide.h
@@ -69,12 +69,11 @@ static __inline__ unsigned long ide_default_io_base(int index)
 
 #ifdef CONFIG_BLK_DEV_MPC8xx_IDE
 #define IDE_ARCH_ACK_INTR  1
-#define ide_ack_intr(hwif) (hwif->hw.ack_intr ? hwif->hw.ack_intr(hwif) : 1)
+#define ide_ack_intr(hwif) ((hwif)->ack_intr ? (hwif)->ack_intr(hwif) : 1)
 #endif
 
 #endif /* __powerpc64__ */
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #endif /* __KERNEL__ */
diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h
index 870967e4720..4a82fdccee9 100644
--- a/include/asm-powerpc/iommu.h
+++ b/include/asm-powerpc/iommu.h
@@ -26,9 +26,9 @@
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/bitops.h>
 #include <asm/machdep.h>
 #include <asm/types.h>
-#include <asm/bitops.h>
 
 #define IOMMU_PAGE_SHIFT      12
 #define IOMMU_PAGE_SIZE       (ASM_CONST(1) << IOMMU_PAGE_SHIFT)
diff --git a/include/asm-powerpc/mmu_context.h b/include/asm-powerpc/mmu_context.h
index f863ac21409..9102b8bf0ea 100644
--- a/include/asm-powerpc/mmu_context.h
+++ b/include/asm-powerpc/mmu_context.h
@@ -8,7 +8,7 @@
 
 #ifndef CONFIG_PPC64
 #include <asm/atomic.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 /*
  * On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index a022f806bb2..b6b036ccee3 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -8,7 +8,6 @@
  *  - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
@@ -174,15 +173,5 @@ extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
  */
 extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
 
-/*
- * This is called in munmap when we have freed up some page-table
- * pages.  We don't need to do anything here, there's nothing special
- * about our page-table pages.  -- paulus
- */
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
-
 #endif /*__KERNEL__ */
 #endif /* _ASM_POWERPC_TLBFLUSH_H */
diff --git a/include/asm-ppc/mmu_context.h b/include/asm-ppc/mmu_context.h
index a6441a063e5..b2e25d8997b 100644
--- a/include/asm-ppc/mmu_context.h
+++ b/include/asm-ppc/mmu_context.h
@@ -2,8 +2,9 @@
 #ifndef __PPC_MMU_CONTEXT_H
 #define __PPC_MMU_CONTEXT_H
 
+#include <linux/bitops.h>
+
 #include <asm/atomic.h>
-#include <asm/bitops.h>
 #include <asm/mmu.h>
 #include <asm/cputable.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index d756b34d25f..34d9a6357c3 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -15,6 +15,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 
 /*
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index 66793f55c8b..6de2632a3e4 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -14,7 +14,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 /*
@@ -152,10 +151,4 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 
 #endif
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                      unsigned long start, unsigned long end)
-{
-        /* S/390 does not keep any page table caches in TLB */
-}
-
 #endif /* _S390_TLBFLUSH_H */
diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h
index 9d7021723a2..df805f20b26 100644
--- a/include/asm-sh/bitops.h
+++ b/include/asm-sh/bitops.h
@@ -2,6 +2,11 @@
 #define __ASM_SH_BITOPS_H
 
 #ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/system.h>
 /* For __swab32 */
 #include <asm/byteorder.h>
diff --git a/include/asm-sh/tlbflush.h b/include/asm-sh/tlbflush.h
index 455fb8da441..e0ac97221ae 100644
--- a/include/asm-sh/tlbflush.h
+++ b/include/asm-sh/tlbflush.h
@@ -9,7 +9,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 extern void local_flush_tlb_all(void);
 extern void local_flush_tlb_mm(struct mm_struct *mm);
@@ -47,9 +46,4 @@ extern void flush_tlb_one(unsigned long asid, unsigned long page);
 
 #endif /* CONFIG_SMP */
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	/* Nothing to do */
-}
 #endif /* __ASM_SH_TLBFLUSH_H */
diff --git a/include/asm-sh64/bitops.h b/include/asm-sh64/bitops.h
index 444d5ea92ce..600c59efb4c 100644
--- a/include/asm-sh64/bitops.h
+++ b/include/asm-sh64/bitops.h
@@ -13,6 +13,11 @@
  */
 
 #ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/system.h>
 /* For __swab32 */
diff --git a/include/asm-sh64/ide.h b/include/asm-sh64/ide.h
index c9d84d5f772..b6e31e8b941 100644
--- a/include/asm-sh64/ide.h
+++ b/include/asm-sh64/ide.h
@@ -19,7 +19,6 @@
 /* Without this, the initialisation of PCI IDE cards end up calling
  * ide_init_hwif_ports, which won't work. */
 #ifdef CONFIG_BLK_DEV_IDEPCI
-#define IDE_ARCH_OBSOLETE_INIT 1
 #define ide_default_io_ctl(base)	(0)
 #endif
 
diff --git a/include/asm-sh64/tlbflush.h b/include/asm-sh64/tlbflush.h
index e45beadc29e..16a164a2375 100644
--- a/include/asm-sh64/tlbflush.h
+++ b/include/asm-sh64/tlbflush.h
@@ -20,10 +20,6 @@ extern void flush_tlb_mm(struct mm_struct *mm);
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			    unsigned long end);
 extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
 
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
diff --git a/include/asm-sparc/bitops.h b/include/asm-sparc/bitops.h
index 00bd0a679d7..cb3cefab6e0 100644
--- a/include/asm-sparc/bitops.h
+++ b/include/asm-sparc/bitops.h
@@ -14,6 +14,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 extern unsigned long ___set_bit(unsigned long *addr, unsigned long mask);
 extern unsigned long ___clear_bit(unsigned long *addr, unsigned long mask);
 extern unsigned long ___change_bit(unsigned long *addr, unsigned long mask);
diff --git a/include/asm-sparc/ide.h b/include/asm-sparc/ide.h
index a6d735a1310..404022765fc 100644
--- a/include/asm-sparc/ide.h
+++ b/include/asm-sparc/ide.h
@@ -18,7 +18,6 @@
 #undef  MAX_HWIFS
 #define MAX_HWIFS	2
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #define __ide_insl(data_reg, buffer, wcount) \
diff --git a/include/asm-sparc/tlbflush.h b/include/asm-sparc/tlbflush.h
index a619da5cfaa..b957e29d2ae 100644
--- a/include/asm-sparc/tlbflush.h
+++ b/include/asm-sparc/tlbflush.h
@@ -13,7 +13,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 #ifdef CONFIG_SMP
@@ -42,11 +41,6 @@ BTFIXUPDEF_CALL(void, flush_tlb_mm, struct mm_struct *)
 BTFIXUPDEF_CALL(void, flush_tlb_range, struct vm_area_struct *, unsigned long, unsigned long)
 BTFIXUPDEF_CALL(void, flush_tlb_page, struct vm_area_struct *, unsigned long)
 
-// Thanks to Anton Blanchard, our pagetables became uncached in 2.4. Wee!
-// extern void flush_tlb_pgtables(struct mm_struct *mm,
-//     unsigned long start, unsigned long end);
-#define flush_tlb_pgtables(mm, start, end)	do{ }while(0)
-
 #define flush_tlb_all() BTFIXUP_CALL(flush_tlb_all)()
 #define flush_tlb_mm(mm) BTFIXUP_CALL(flush_tlb_mm)(mm)
 #define flush_tlb_range(vma,start,end) BTFIXUP_CALL(flush_tlb_range)(vma,start,end)
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index dd4bfe993b6..982ce8992b9 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -7,6 +7,10 @@
 #ifndef _SPARC64_BITOPS_H
 #define _SPARC64_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/byteorder.h>
 
diff --git a/include/asm-sparc64/ide.h b/include/asm-sparc64/ide.h
index 55149cf933c..ac7eb210b94 100644
--- a/include/asm-sparc64/ide.h
+++ b/include/asm-sparc64/ide.h
@@ -24,7 +24,6 @@
 # endif
 #endif
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #define __ide_insl(data_reg, buffer, wcount) \
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h
index 42c09949526..1c1c5ea5cea 100644
--- a/include/asm-sparc64/smp.h
+++ b/include/asm-sparc64/smp.h
@@ -26,7 +26,7 @@
  *	Private routines/data
  */
  
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/atomic.h>
 #include <asm/percpu.h>
 
diff --git a/include/asm-sparc64/tlbflush.h b/include/asm-sparc64/tlbflush.h
index 3487328570e..fbb675dbe0c 100644
--- a/include/asm-sparc64/tlbflush.h
+++ b/include/asm-sparc64/tlbflush.h
@@ -41,11 +41,4 @@ do {	flush_tsb_kernel_range(start,end); \
 
 #endif /* ! CONFIG_SMP */
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end)
-{
-	/* We don't use virtual page tables for TLB miss processing
-	 * any more.  Nowadays we use the TSB.
-	 */
-}
-
 #endif /* _SPARC64_TLBFLUSH_H */
diff --git a/include/asm-um/bitops.h b/include/asm-um/bitops.h
index 46d781953d3..e4d38d437b9 100644
--- a/include/asm-um/bitops.h
+++ b/include/asm-um/bitops.h
@@ -1,6 +1,10 @@
 #ifndef __UM_BITOPS_H
 #define __UM_BITOPS_H
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include "asm/arch/bitops.h"
 
 #endif
diff --git a/include/asm-um/tlbflush.h b/include/asm-um/tlbflush.h
index 9d647c55350..614f2c09117 100644
--- a/include/asm-um/tlbflush.h
+++ b/include/asm-um/tlbflush.h
@@ -17,7 +17,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_kernel_vm() flushes the kernel vm area
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  */
 
 extern void flush_tlb_all(void);
@@ -29,9 +28,4 @@ extern void flush_tlb_kernel_vm(void);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void __flush_tlb_one(unsigned long addr);
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-}
-
 #endif
diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h
index 8eafdb1c08b..f82f5b4a56e 100644
--- a/include/asm-v850/bitops.h
+++ b/include/asm-v850/bitops.h
@@ -13,6 +13,9 @@
 #ifndef __V850_BITOPS_H__
 #define __V850_BITOPS_H__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
 
 #include <linux/compiler.h>	/* unlikely  */
 #include <asm/byteorder.h>	/* swab32 */
diff --git a/include/asm-v850/tlbflush.h b/include/asm-v850/tlbflush.h
index 5f2f85f636e..c44aa64449c 100644
--- a/include/asm-v850/tlbflush.h
+++ b/include/asm-v850/tlbflush.h
@@ -61,10 +61,4 @@ static inline void flush_tlb_kernel_page(unsigned long addr)
 	BUG ();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	BUG ();
-}
-
 #endif /* __V850_TLBFLUSH_H__ */
diff --git a/include/asm-x86/acpi_32.h b/include/asm-x86/acpi_32.h
index 125179adf04..723493e6c85 100644
--- a/include/asm-x86/acpi_32.h
+++ b/include/asm-x86/acpi_32.h
@@ -81,11 +81,7 @@ int __acpi_release_global_lock(unsigned int *lock);
         :"=r"(n_hi), "=r"(n_lo)     \
         :"0"(n_hi), "1"(n_lo))
 
-#ifdef CONFIG_X86_IO_APIC
-extern void check_acpi_pci(void);
-#else
-static inline void check_acpi_pci(void) { }
-#endif
+extern void early_quirks(void);
 
 #ifdef CONFIG_ACPI
 extern int acpi_lapic;
diff --git a/include/asm-x86/bitops_32.h b/include/asm-x86/bitops_32.h
index c96641f7502..3268a341cf4 100644
--- a/include/asm-x86/bitops_32.h
+++ b/include/asm-x86/bitops_32.h
@@ -5,6 +5,10 @@
  * Copyright 1992, Linus Torvalds.
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <linux/compiler.h>
 #include <asm/alternative.h>
 
diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h
index 525edf2ce5c..dacaa5f1feb 100644
--- a/include/asm-x86/bitops_64.h
+++ b/include/asm-x86/bitops_64.h
@@ -5,6 +5,10 @@
  * Copyright 1992, Linus Torvalds.
  */
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/alternative.h>
 
 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h
index 53cb96b68a6..66ba7987184 100644
--- a/include/asm-x86/compat.h
+++ b/include/asm-x86/compat.h
@@ -6,6 +6,7 @@
  */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <asm/user32.h>
 
 #define COMPAT_USER_HZ	100
 
@@ -181,6 +182,11 @@ struct compat_shmid64_ds {
 };
 
 /*
+ * The type of struct elf_prstatus.pr_reg in compatible core dumps.
+ */
+typedef struct user_regs_struct32 compat_elf_gregset_t;
+
+/*
  * A pointer passed in from user mode. This should not
  * be used for syscall parameters, just declare them
  * as pointers because the syscall entry code will have
diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h
index ac991b5ca0f..7d9c938e69f 100644
--- a/include/asm-x86/desc_64.h
+++ b/include/asm-x86/desc_64.h
@@ -20,6 +20,16 @@ extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
 #define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8))
 #define clear_LDT()  asm volatile("lldt %w0"::"r" (0))
 
+static inline unsigned long __store_tr(void)
+{
+       unsigned long tr;
+
+       asm volatile ("str %w0":"=r" (tr));
+       return tr;
+}
+
+#define store_tr(tr) (tr) = __store_tr()
+
 /*
  * This is the ldt that every process will get unless we need
  * something other than this.
@@ -31,6 +41,16 @@ extern struct desc_ptr cpu_gdt_descr[];
 /* the cpu gdt accessor */
 #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address)
 
+static inline void load_gdt(const struct desc_ptr *ptr)
+{
+	asm volatile("lgdt %w0"::"m" (*ptr));
+}
+
+static inline void store_gdt(struct desc_ptr *ptr)
+{
+       asm("sgdt %w0":"=m" (*ptr));
+}
+
 static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)  
 {
 	struct gate_struct s; 	
@@ -71,6 +91,16 @@ static inline void set_system_gate_ist(int nr, void *func, unsigned ist)
 	_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist);
 }
 
+static inline void load_idt(const struct desc_ptr *ptr)
+{
+	asm volatile("lidt %w0"::"m" (*ptr));
+}
+
+static inline void store_idt(struct desc_ptr *dtr)
+{
+       asm("sidt %w0":"=m" (*dtr));
+}
+
 static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, 
 					 unsigned size) 
 { 
diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h
index d94898831ba..771af336734 100644
--- a/include/asm-x86/geode.h
+++ b/include/asm-x86/geode.h
@@ -38,6 +38,8 @@ extern int geode_get_dev_base(unsigned int dev);
 #define MSR_LBAR_ACPI		0x5140000E
 #define MSR_LBAR_PMS		0x5140000F
 
+#define MSR_DIVIL_SOFT_RESET	0x51400017
+
 #define MSR_PIC_YSEL_LOW	0x51400020
 #define MSR_PIC_YSEL_HIGH	0x51400021
 #define MSR_PIC_ZSEL_LOW	0x51400022
diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h
index d4ab6db050b..4f51519fc19 100644
--- a/include/asm-x86/hpet.h
+++ b/include/asm-x86/hpet.h
@@ -64,6 +64,7 @@
 /* hpet memory map physical address */
 extern unsigned long hpet_address;
 extern unsigned long force_hpet_address;
+extern int hpet_force_user;
 extern int is_hpet_enabled(void);
 extern int hpet_enable(void);
 extern unsigned long hpet_readl(unsigned long a);
diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h
index e7817a3d657..42130adf9c7 100644
--- a/include/asm-x86/ide.h
+++ b/include/asm-x86/ide.h
@@ -62,7 +62,6 @@ static __inline__ unsigned long ide_default_io_base(int index)
 	}
 }
 
-#define IDE_ARCH_OBSOLETE_INIT
 #define ide_default_io_ctl(base)	((base) + 0x206) /* obsolete */
 
 #ifdef CONFIG_BLK_DEV_IDEPCI
diff --git a/include/asm-x86/io_apic_64.h b/include/asm-x86/io_apic_64.h
index d9f2e54324d..e2c13675ee4 100644
--- a/include/asm-x86/io_apic_64.h
+++ b/include/asm-x86/io_apic_64.h
@@ -133,4 +133,6 @@ void enable_NMI_through_LVT0 (void * dummy);
 
 extern spinlock_t i8259A_lock;
 
+extern int timer_over_8254;
+
 #endif
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index a7c75ea408a..6d011bd6067 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -119,7 +119,7 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
 	 */
 	local_irq_save(flags);
 	for_each_cpu_mask(query_cpu, mask) {
-		__send_IPI_dest_field(x86_cpu_to_apicid[query_cpu],
+		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
 				      vector, APIC_DEST_PHYSICAL);
 	}
 	local_irq_restore(flags);
diff --git a/include/asm-x86/irq_32.h b/include/asm-x86/irq_32.h
index 36f310632c4..aca9c96e8e6 100644
--- a/include/asm-x86/irq_32.h
+++ b/include/asm-x86/irq_32.h
@@ -45,4 +45,7 @@ unsigned int do_IRQ(struct pt_regs *regs);
 void init_IRQ(void);
 void __init native_init_IRQ(void);
 
+/* Interrupt vector management */
+extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
+
 #endif /* _ASM_IRQ_H */
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index a02eb299134..a4944732be0 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -73,8 +73,32 @@
 #define MSR_P6_EVNTSEL0			0x00000186
 #define MSR_P6_EVNTSEL1			0x00000187
 
-/* K7/K8 MSRs. Not complete. See the architecture manual for a more
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
    complete list. */
+
+#define MSR_AMD64_IBSFETCHCTL		0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD		0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD	0xc0011032
+#define MSR_AMD64_IBSOPCTL		0xc0011033
+#define MSR_AMD64_IBSOPRIP		0xc0011034
+#define MSR_AMD64_IBSOPDATA		0xc0011035
+#define MSR_AMD64_IBSOPDATA2		0xc0011036
+#define MSR_AMD64_IBSOPDATA3		0xc0011037
+#define MSR_AMD64_IBSDCLINAD		0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD		0xc0011039
+#define MSR_AMD64_IBSCTL		0xc001103a
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1			0xc001001a
+#define MSR_K8_TOP_MEM2			0xc001001d
+#define MSR_K8_SYSCFG			0xc0010010
+#define MSR_K8_HWCR			0xc0010015
+#define MSR_K8_ENABLE_C1E		0xc0010055
+#define K8_MTRRFIXRANGE_DRAM_ENABLE	0x00040000 /* MtrrFixDramEn bit    */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY	0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK	0x18181818 /* Mask: RdMem|WrMem    */
+
+/* K7 MSRs */
 #define MSR_K7_EVNTSEL0			0xc0010000
 #define MSR_K7_PERFCTR0			0xc0010004
 #define MSR_K7_EVNTSEL1			0xc0010001
@@ -83,20 +107,10 @@
 #define MSR_K7_PERFCTR2			0xc0010006
 #define MSR_K7_EVNTSEL3			0xc0010003
 #define MSR_K7_PERFCTR3			0xc0010007
-#define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K7_CLK_CTL			0xc001001b
-#define MSR_K8_TOP_MEM2			0xc001001d
-#define MSR_K8_SYSCFG			0xc0010010
-
-#define K8_MTRRFIXRANGE_DRAM_ENABLE	0x00040000 /* MtrrFixDramEn bit    */
-#define K8_MTRRFIXRANGE_DRAM_MODIFY	0x00080000 /* MtrrFixDramModEn bit */
-#define K8_MTRR_RDMEM_WRMEM_MASK	0x18181818 /* Mask: RdMem|WrMem    */
-
 #define MSR_K7_HWCR			0xc0010015
-#define MSR_K8_HWCR			0xc0010015
 #define MSR_K7_FID_VID_CTL		0xc0010041
 #define MSR_K7_FID_VID_STATUS		0xc0010042
-#define MSR_K8_ENABLE_C1E		0xc0010055
 
 /* K6 MSRs */
 #define MSR_K6_EFER			0xc0000080
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index acd4b339c49..ed3e70d8d04 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -17,10 +17,7 @@
 #include <linux/threads.h>
 #include <asm/paravirt.h>
 
-#ifndef _I386_BITOPS_H
-#include <asm/bitops.h>
-#endif
-
+#include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index a79f5355e3b..9b0ff477b39 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -9,7 +9,7 @@
  * the x86-64 page table tree.
  */
 #include <asm/processor.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <linux/threads.h>
 #include <asm/pda.h>
 
diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h
index 83800e7496e..13976b08683 100644
--- a/include/asm-x86/processor_32.h
+++ b/include/asm-x86/processor_32.h
@@ -79,6 +79,7 @@ struct cpuinfo_x86 {
 	unsigned char booted_cores;	/* number of cores as seen by OS */
 	__u8 phys_proc_id; 		/* Physical processor id. */
 	__u8 cpu_core_id;  		/* Core id */
+	__u8 cpu_index;			/* index into per_cpu list */
 #endif
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
@@ -103,14 +104,19 @@ extern struct tss_struct doublefault_tss;
 DECLARE_PER_CPU(struct tss_struct, init_tss);
 
 #ifdef CONFIG_SMP
-extern struct cpuinfo_x86 cpu_data[];
-#define current_cpu_data cpu_data[smp_processor_id()]
+DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+#define cpu_data(cpu)		per_cpu(cpu_info, cpu)
+#define current_cpu_data	cpu_data(smp_processor_id())
 #else
-#define cpu_data (&boot_cpu_data)
-#define current_cpu_data boot_cpu_data
+#define cpu_data(cpu)		boot_cpu_data
+#define current_cpu_data	boot_cpu_data
 #endif
 
-extern	int cpu_llc_id[NR_CPUS];
+/*
+ * the following now lives in the per cpu area:
+ * extern	int cpu_llc_id[NR_CPUS];
+ */
+DECLARE_PER_CPU(u8, cpu_llc_id);
 extern char ignore_fpu_irq;
 
 void __init cpu_detect(struct cpuinfo_x86 *c);
diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h
index f422becbddd..e4f19970a82 100644
--- a/include/asm-x86/processor_64.h
+++ b/include/asm-x86/processor_64.h
@@ -74,6 +74,7 @@ struct cpuinfo_x86 {
 	__u8	booted_cores;	/* number of cores as seen by OS */
 	__u8	phys_proc_id;	/* Physical Processor id. */
 	__u8	cpu_core_id;	/* Core id. */
+	__u8	cpu_index;	/* index into per_cpu list */
 #endif
 } ____cacheline_aligned;
 
@@ -88,11 +89,12 @@ struct cpuinfo_x86 {
 #define X86_VENDOR_UNKNOWN 0xff
 
 #ifdef CONFIG_SMP
-extern struct cpuinfo_x86 cpu_data[];
-#define current_cpu_data cpu_data[smp_processor_id()]
+DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
+#define cpu_data(cpu)		per_cpu(cpu_info, cpu)
+#define current_cpu_data	cpu_data(smp_processor_id())
 #else
-#define cpu_data (&boot_cpu_data)
-#define current_cpu_data boot_cpu_data
+#define cpu_data(cpu)		boot_cpu_data
+#define current_cpu_data	boot_cpu_data
 #endif
 
 extern char ignore_irq13;
@@ -390,12 +392,6 @@ static inline void sync_core(void)
 	asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
 } 
 
-#define ARCH_HAS_PREFETCH
-static inline void prefetch(void *x) 
-{ 
-	asm volatile("prefetcht0 (%0)" :: "r" (x));
-} 
-
 #define ARCH_HAS_PREFETCHW 1
 static inline void prefetchw(void *x) 
 { 
diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h
index c44a3a93b5a..dabba55f7ed 100644
--- a/include/asm-x86/proto.h
+++ b/include/asm-x86/proto.h
@@ -83,8 +83,6 @@ extern unsigned tsc_khz;
 extern int reboot_force;
 extern int notsc_setup(char *);
 
-extern int timer_over_8254;
-
 extern int gsi_irq_sharing(int gsi);
 
 extern int force_mwait;
diff --git a/include/asm-x86/ptrace_32.h b/include/asm-x86/ptrace_32.h
index 6002597b9e1..78d063dabe0 100644
--- a/include/asm-x86/ptrace_32.h
+++ b/include/asm-x86/ptrace_32.h
@@ -55,6 +55,8 @@ static inline int v8086_mode(struct pt_regs *regs)
 }
 
 #define instruction_pointer(regs) ((regs)->eip)
+#define frame_pointer(regs) ((regs)->ebp)
+#define stack_pointer(regs) ((regs)->esp)
 #define regs_return_value(regs) ((regs)->eax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/include/asm-x86/ptrace_64.h b/include/asm-x86/ptrace_64.h
index 7f166ccb060..7bfe61e1b70 100644
--- a/include/asm-x86/ptrace_64.h
+++ b/include/asm-x86/ptrace_64.h
@@ -40,6 +40,8 @@ struct pt_regs {
 #define user_mode(regs) (!!((regs)->cs & 3))
 #define user_mode_vm(regs) user_mode(regs)
 #define instruction_pointer(regs) ((regs)->rip)
+#define frame_pointer(regs) ((regs)->rbp)
+#define stack_pointer(regs) ((regs)->rsp)
 #define regs_return_value(regs) ((regs)->rax)
 
 extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h
index ee46038d126..7056d868452 100644
--- a/include/asm-x86/smp_32.h
+++ b/include/asm-x86/smp_32.h
@@ -11,7 +11,7 @@
 #endif
 
 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/mpspec.h>
 #include <asm/apic.h>
 #ifdef CONFIG_X86_IO_APIC
@@ -39,9 +39,11 @@ extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
 
 #define MAX_APICID 256
-extern u8 x86_cpu_to_apicid[];
+extern u8 __initdata x86_cpu_to_apicid_init[];
+extern void *x86_cpu_to_apicid_ptr;
+DECLARE_PER_CPU(u8, x86_cpu_to_apicid);
 
-#define cpu_physical_id(cpu)	x86_cpu_to_apicid[cpu]
+#define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
 
 extern void set_cpu_sibling_map(int cpu);
 
diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h
index d30e9b684fd..6f0e0273b64 100644
--- a/include/asm-x86/smp_64.h
+++ b/include/asm-x86/smp_64.h
@@ -37,6 +37,8 @@ extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern void smp_send_reschedule(int cpu);
+extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *),
+				  void *info, int wait);
 
 /*
  * cpu_sibling_map and cpu_core_map now live
@@ -47,7 +49,7 @@ extern void smp_send_reschedule(int cpu);
  */
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 DECLARE_PER_CPU(cpumask_t, cpu_core_map);
-extern u8 cpu_llc_id[NR_CPUS];
+DECLARE_PER_CPU(u8, cpu_llc_id);
 
 #define SMP_TRAMPOLINE_BASE 0x6000
 
@@ -84,7 +86,9 @@ static inline int hard_smp_processor_id(void)
  * Some lowlevel functions might want to know about
  * the real APIC ID <-> CPU # mapping.
  */
-extern u8 x86_cpu_to_apicid[NR_CPUS];	/* physical ID */
+extern u8 __initdata x86_cpu_to_apicid_init[];
+extern void *x86_cpu_to_apicid_ptr;
+DECLARE_PER_CPU(u8, x86_cpu_to_apicid);	/* physical ID */
 extern u8 bios_cpu_apicid[];
 
 static inline int cpu_present_to_apicid(int mps_cpu)
@@ -115,8 +119,9 @@ static __inline int logical_smp_processor_id(void)
 }
 
 #ifdef CONFIG_SMP
-#define cpu_physical_id(cpu)		x86_cpu_to_apicid[cpu]
+#define cpu_physical_id(cpu)		per_cpu(x86_cpu_to_apicid, cpu)
 #else
+extern unsigned int boot_cpu_id;
 #define cpu_physical_id(cpu)		boot_cpu_id
 #endif /* !CONFIG_SMP */
 #endif
diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h
index db6283eb5e4..ef8468883ba 100644
--- a/include/asm-x86/system_32.h
+++ b/include/asm-x86/system_32.h
@@ -315,5 +315,6 @@ extern unsigned long arch_align_stack(unsigned long sp);
 extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
 
 void default_idle(void);
+void __show_registers(struct pt_regs *, int all);
 
 #endif
diff --git a/include/asm-x86/tlbflush_32.h b/include/asm-x86/tlbflush_32.h
index a50fa674148..2bd5b95e204 100644
--- a/include/asm-x86/tlbflush_32.h
+++ b/include/asm-x86/tlbflush_32.h
@@ -78,7 +78,6 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  *  - flush_tlb_others(cpumask, mm, va) flushes a TLBs on other cpus
  *
  * ..but the i386 has somewhat limited tlb flushing capabilities,
@@ -166,10 +165,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
 	flush_tlb_all();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	/* i386 does not keep any page table caches in TLB */
-}
-
 #endif /* _I386_TLBFLUSH_H */
diff --git a/include/asm-x86/tlbflush_64.h b/include/asm-x86/tlbflush_64.h
index 888eb4abdd0..7731fd23d57 100644
--- a/include/asm-x86/tlbflush_64.h
+++ b/include/asm-x86/tlbflush_64.h
@@ -31,7 +31,6 @@ static inline void __flush_tlb_all(void)
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(vma, start, end) flushes a range of pages
  *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
  *
  * x86-64 can only flush individual pages or full VMs. For a range flush
  * we always do the full VM. Might be worth trying if for a small
@@ -98,12 +97,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
 	flush_tlb_all();
 }
 
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-				      unsigned long start, unsigned long end)
-{
-	/* x86_64 does not keep any page table caches in a software TLB.
-	   The CPUs do in their hardware TLBs, but they are handled
-	   by the normal TLB flushing algorithms. */
-}
-
 #endif /* _X8664_TLBFLUSH_H */
diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h
index ae1074603c4..9040f5a6127 100644
--- a/include/asm-x86/topology_32.h
+++ b/include/asm-x86/topology_32.h
@@ -28,8 +28,8 @@
 #define _ASM_I386_TOPOLOGY_H
 
 #ifdef CONFIG_X86_HT
-#define topology_physical_package_id(cpu)	(cpu_data[cpu].phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data[cpu].cpu_core_id)
+#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
+#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #endif
diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h
index 848c17f9222..a718dda037e 100644
--- a/include/asm-x86/topology_64.h
+++ b/include/asm-x86/topology_64.h
@@ -5,7 +5,7 @@
 #ifdef CONFIG_NUMA
 
 #include <asm/mpspec.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 
 extern cpumask_t cpu_online_map;
 
@@ -56,8 +56,8 @@ extern int __node_distance(int, int);
 #endif
 
 #ifdef CONFIG_SMP
-#define topology_physical_package_id(cpu)	(cpu_data[cpu].phys_proc_id)
-#define topology_core_id(cpu)			(cpu_data[cpu].cpu_core_id)
+#define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
+#define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
 #define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h
index 78db04cf6e4..23261e8f2e5 100644
--- a/include/asm-xtensa/bitops.h
+++ b/include/asm-xtensa/bitops.h
@@ -15,6 +15,10 @@
 
 #ifdef __KERNEL__
 
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
 #include <asm/processor.h>
 #include <asm/byteorder.h>
 #include <asm/system.h>
diff --git a/include/asm-xtensa/tlbflush.h b/include/asm-xtensa/tlbflush.h
index 7c637b3c352..46d240074f7 100644
--- a/include/asm-xtensa/tlbflush.h
+++ b/include/asm-xtensa/tlbflush.h
@@ -41,17 +41,6 @@ extern void flush_tlb_range(struct vm_area_struct*,unsigned long,unsigned long);
 
 #define flush_tlb_kernel_range(start,end) flush_tlb_all()
 
-
-/* This is calld in munmap when we have freed up some page-table pages.
- * We don't need to do anything here, there's nothing special about our
- * page-table pages.
- */
-
-static inline void flush_tlb_pgtables(struct mm_struct *mm,
-                                      unsigned long start, unsigned long end)
-{
-}
-
 /* TLB operations. */
 
 static inline unsigned long itlb_probe(unsigned long addr)
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 7ac8303c847..e3ffd14a3f0 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -47,6 +47,7 @@ header-y += coda_psdev.h
 header-y += coff.h
 header-y += comstats.h
 header-y += const.h
+header-y += cgroupstats.h
 header-y += cycx_cfm.h
 header-y += dlm_device.h
 header-y += dlm_netlink.h
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index bf5e0009de7..8ccedf7a0a5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -189,32 +189,6 @@ extern int ec_transaction(u8 command,
 extern int acpi_blacklisted(void);
 extern void acpi_bios_year(char *s);
 
-#define	ACPI_CSTATE_LIMIT_DEFINED	/* for driver builds */
-#ifdef	CONFIG_ACPI
-
-/*
- * Set highest legal C-state
- * 0: C0 okay, but not C1
- * 1: C1 okay, but not C2
- * 2: C2 okay, but not C3 etc.
- */
-
-extern unsigned int max_cstate;
-
-static inline unsigned int acpi_get_cstate_limit(void)
-{
-	return max_cstate;
-}
-static inline void acpi_set_cstate_limit(unsigned int new_limit)
-{
-	max_cstate = new_limit;
-	return;
-}
-#else
-static inline unsigned int acpi_get_cstate_limit(void) { return 0; }
-static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
-#endif
-
 #ifdef CONFIG_ACPI_NUMA
 int acpi_get_pxm(acpi_handle handle);
 int acpi_get_node(acpi_handle *handle);
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 64b4641904f..acad1105d94 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/string.h>
+#include <linux/kernel.h>
 
 /*
  * bitmaps provide bit arrays that consume one or more unsigned
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index b9fb8ee3308..69c1edb9fe5 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -2,6 +2,14 @@
 #define _LINUX_BITOPS_H
 #include <asm/types.h>
 
+#ifdef	__KERNEL__
+#define BIT(nr)			(1UL << (nr))
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_LONG)
+#define BITS_PER_BYTE		8
+#endif
+
 /*
  * Include this here because some architectures need generic_ffs/fls in
  * scope
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
new file mode 100644
index 00000000000..87479328d46
--- /dev/null
+++ b/include/linux/cgroup.h
@@ -0,0 +1,327 @@
+#ifndef _LINUX_CGROUP_H
+#define _LINUX_CGROUP_H
+/*
+ *  cgroup interface
+ *
+ *  Copyright (C) 2003 BULL SA
+ *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <linux/cpumask.h>
+#include <linux/nodemask.h>
+#include <linux/rcupdate.h>
+#include <linux/cgroupstats.h>
+
+#ifdef CONFIG_CGROUPS
+
+struct cgroupfs_root;
+struct cgroup_subsys;
+struct inode;
+
+extern int cgroup_init_early(void);
+extern int cgroup_init(void);
+extern void cgroup_init_smp(void);
+extern void cgroup_lock(void);
+extern void cgroup_unlock(void);
+extern void cgroup_fork(struct task_struct *p);
+extern void cgroup_fork_callbacks(struct task_struct *p);
+extern void cgroup_post_fork(struct task_struct *p);
+extern void cgroup_exit(struct task_struct *p, int run_callbacks);
+extern int cgroupstats_build(struct cgroupstats *stats,
+				struct dentry *dentry);
+
+extern struct file_operations proc_cgroup_operations;
+
+/* Define the enumeration of all cgroup subsystems */
+#define SUBSYS(_x) _x ## _subsys_id,
+enum cgroup_subsys_id {
+#include <linux/cgroup_subsys.h>
+	CGROUP_SUBSYS_COUNT
+};
+#undef SUBSYS
+
+/* Per-subsystem/per-cgroup state maintained by the system. */
+struct cgroup_subsys_state {
+	/* The cgroup that this subsystem is attached to. Useful
+	 * for subsystems that want to know about the cgroup
+	 * hierarchy structure */
+	struct cgroup *cgroup;
+
+	/* State maintained by the cgroup system to allow
+	 * subsystems to be "busy". Should be accessed via css_get()
+	 * and css_put() */
+
+	atomic_t refcnt;
+
+	unsigned long flags;
+};
+
+/* bits in struct cgroup_subsys_state flags field */
+enum {
+	CSS_ROOT, /* This CSS is the root of the subsystem */
+};
+
+/*
+ * Call css_get() to hold a reference on the cgroup;
+ *
+ */
+
+static inline void css_get(struct cgroup_subsys_state *css)
+{
+	/* We don't need to reference count the root state */
+	if (!test_bit(CSS_ROOT, &css->flags))
+		atomic_inc(&css->refcnt);
+}
+/*
+ * css_put() should be called to release a reference taken by
+ * css_get()
+ */
+
+extern void __css_put(struct cgroup_subsys_state *css);
+static inline void css_put(struct cgroup_subsys_state *css)
+{
+	if (!test_bit(CSS_ROOT, &css->flags))
+		__css_put(css);
+}
+
+struct cgroup {
+	unsigned long flags;		/* "unsigned long" so bitops work */
+
+	/* count users of this cgroup. >0 means busy, but doesn't
+	 * necessarily indicate the number of tasks in the
+	 * cgroup */
+	atomic_t count;
+
+	/*
+	 * We link our 'sibling' struct into our parent's 'children'.
+	 * Our children link their 'sibling' into our 'children'.
+	 */
+	struct list_head sibling;	/* my parent's children */
+	struct list_head children;	/* my children */
+
+	struct cgroup *parent;	/* my parent */
+	struct dentry *dentry;	  	/* cgroup fs entry */
+
+	/* Private pointers for each registered subsystem */
+	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+	struct cgroupfs_root *root;
+	struct cgroup *top_cgroup;
+
+	/*
+	 * List of cg_cgroup_links pointing at css_sets with
+	 * tasks in this cgroup. Protected by css_set_lock
+	 */
+	struct list_head css_sets;
+
+	/*
+	 * Linked list running through all cgroups that can
+	 * potentially be reaped by the release agent. Protected by
+	 * release_list_lock
+	 */
+	struct list_head release_list;
+};
+
+/* A css_set is a structure holding pointers to a set of
+ * cgroup_subsys_state objects. This saves space in the task struct
+ * object and speeds up fork()/exit(), since a single inc/dec and a
+ * list_add()/del() can bump the reference count on the entire
+ * cgroup set for a task.
+ */
+
+struct css_set {
+
+	/* Reference count */
+	struct kref ref;
+
+	/*
+	 * List running through all cgroup groups. Protected by
+	 * css_set_lock
+	 */
+	struct list_head list;
+
+	/*
+	 * List running through all tasks using this cgroup
+	 * group. Protected by css_set_lock
+	 */
+	struct list_head tasks;
+
+	/*
+	 * List of cg_cgroup_link objects on link chains from
+	 * cgroups referenced from this css_set. Protected by
+	 * css_set_lock
+	 */
+	struct list_head cg_links;
+
+	/*
+	 * Set of subsystem states, one for each subsystem. This array
+	 * is immutable after creation apart from the init_css_set
+	 * during subsystem registration (at boot time).
+	 */
+	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+};
+
+/* struct cftype:
+ *
+ * The files in the cgroup filesystem mostly have a very simple read/write
+ * handling, some common function will take care of it. Nevertheless some cases
+ * (read tasks) are special and therefore I define this structure for every
+ * kind of file.
+ *
+ *
+ * When reading/writing to a file:
+ *	- the cgroup to use in file->f_dentry->d_parent->d_fsdata
+ *	- the 'cftype' of the file is file->f_dentry->d_fsdata
+ */
+
+#define MAX_CFTYPE_NAME 64
+struct cftype {
+	/* By convention, the name should begin with the name of the
+	 * subsystem, followed by a period */
+	char name[MAX_CFTYPE_NAME];
+	int private;
+	int (*open) (struct inode *inode, struct file *file);
+	ssize_t (*read) (struct cgroup *cont, struct cftype *cft,
+			 struct file *file,
+			 char __user *buf, size_t nbytes, loff_t *ppos);
+	/*
+	 * read_uint() is a shortcut for the common case of returning a
+	 * single integer. Use it in place of read()
+	 */
+	u64 (*read_uint) (struct cgroup *cont, struct cftype *cft);
+	ssize_t (*write) (struct cgroup *cont, struct cftype *cft,
+			  struct file *file,
+			  const char __user *buf, size_t nbytes, loff_t *ppos);
+
+	/*
+	 * write_uint() is a shortcut for the common case of accepting
+	 * a single integer (as parsed by simple_strtoull) from
+	 * userspace. Use in place of write(); return 0 or error.
+	 */
+	int (*write_uint) (struct cgroup *cont, struct cftype *cft, u64 val);
+
+	int (*release) (struct inode *inode, struct file *file);
+};
+
+/* Add a new file to the given cgroup directory. Should only be
+ * called by subsystems from within a populate() method */
+int cgroup_add_file(struct cgroup *cont, struct cgroup_subsys *subsys,
+		       const struct cftype *cft);
+
+/* Add a set of new files to the given cgroup directory. Should
+ * only be called by subsystems from within a populate() method */
+int cgroup_add_files(struct cgroup *cont,
+			struct cgroup_subsys *subsys,
+			const struct cftype cft[],
+			int count);
+
+int cgroup_is_removed(const struct cgroup *cont);
+
+int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
+
+int cgroup_task_count(const struct cgroup *cont);
+
+/* Return true if the cgroup is a descendant of the current cgroup */
+int cgroup_is_descendant(const struct cgroup *cont);
+
+/* Control Group subsystem type. See Documentation/cgroups.txt for details */
+
+struct cgroup_subsys {
+	struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
+						  struct cgroup *cont);
+	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cont);
+	int (*can_attach)(struct cgroup_subsys *ss,
+			  struct cgroup *cont, struct task_struct *tsk);
+	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cont,
+			struct cgroup *old_cont, struct task_struct *tsk);
+	void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
+	void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
+	int (*populate)(struct cgroup_subsys *ss,
+			struct cgroup *cont);
+	void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cont);
+	void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
+	int subsys_id;
+	int active;
+	int early_init;
+#define MAX_CGROUP_TYPE_NAMELEN 32
+	const char *name;
+
+	/* Protected by RCU */
+	struct cgroupfs_root *root;
+
+	struct list_head sibling;
+
+	void *private;
+};
+
+#define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
+#include <linux/cgroup_subsys.h>
+#undef SUBSYS
+
+static inline struct cgroup_subsys_state *cgroup_subsys_state(
+	struct cgroup *cont, int subsys_id)
+{
+	return cont->subsys[subsys_id];
+}
+
+static inline struct cgroup_subsys_state *task_subsys_state(
+	struct task_struct *task, int subsys_id)
+{
+	return rcu_dereference(task->cgroups->subsys[subsys_id]);
+}
+
+static inline struct cgroup* task_cgroup(struct task_struct *task,
+					       int subsys_id)
+{
+	return task_subsys_state(task, subsys_id)->cgroup;
+}
+
+int cgroup_path(const struct cgroup *cont, char *buf, int buflen);
+
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss);
+
+/* A cgroup_iter should be treated as an opaque object */
+struct cgroup_iter {
+	struct list_head *cg_link;
+	struct list_head *task;
+};
+
+/* To iterate across the tasks in a cgroup:
+ *
+ * 1) call cgroup_iter_start to intialize an iterator
+ *
+ * 2) call cgroup_iter_next() to retrieve member tasks until it
+ *    returns NULL or until you want to end the iteration
+ *
+ * 3) call cgroup_iter_end() to destroy the iterator.
+ */
+void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it);
+struct task_struct *cgroup_iter_next(struct cgroup *cont,
+					struct cgroup_iter *it);
+void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it);
+
+#else /* !CONFIG_CGROUPS */
+
+static inline int cgroup_init_early(void) { return 0; }
+static inline int cgroup_init(void) { return 0; }
+static inline void cgroup_init_smp(void) {}
+static inline void cgroup_fork(struct task_struct *p) {}
+static inline void cgroup_fork_callbacks(struct task_struct *p) {}
+static inline void cgroup_post_fork(struct task_struct *p) {}
+static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
+
+static inline void cgroup_lock(void) {}
+static inline void cgroup_unlock(void) {}
+static inline int cgroupstats_build(struct cgroupstats *stats,
+					struct dentry *dentry)
+{
+	return -EINVAL;
+}
+
+#endif /* !CONFIG_CGROUPS */
+
+#endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
new file mode 100644
index 00000000000..0b9bfbde816
--- /dev/null
+++ b/include/linux/cgroup_subsys.h
@@ -0,0 +1,38 @@
+/* Add subsystem definitions of the form SUBSYS(<name>) in this
+ * file. Surround each one by a line of comment markers so that
+ * patches don't collide
+ */
+
+/* */
+
+/* */
+
+#ifdef CONFIG_CPUSETS
+SUBSYS(cpuset)
+#endif
+
+/* */
+
+#ifdef CONFIG_CGROUP_CPUACCT
+SUBSYS(cpuacct)
+#endif
+
+/* */
+
+#ifdef CONFIG_CGROUP_DEBUG
+SUBSYS(debug)
+#endif
+
+/* */
+
+#ifdef CONFIG_CGROUP_NS
+SUBSYS(ns)
+#endif
+
+/* */
+
+#ifdef CONFIG_FAIR_CGROUP_SCHED
+SUBSYS(cpu_cgroup)
+#endif
+
+/* */
diff --git a/include/linux/cgroupstats.h b/include/linux/cgroupstats.h
new file mode 100644
index 00000000000..4f53abf6855
--- /dev/null
+++ b/include/linux/cgroupstats.h
@@ -0,0 +1,70 @@
+/* cgroupstats.h - exporting per-cgroup statistics
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Balbir Singh <balbir@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#ifndef _LINUX_CGROUPSTATS_H
+#define _LINUX_CGROUPSTATS_H
+
+#include <linux/taskstats.h>
+
+/*
+ * Data shared between user space and kernel space on a per cgroup
+ * basis. This data is shared using taskstats.
+ *
+ * Most of these states are derived by looking at the task->state value
+ * For the nr_io_wait state, a flag in the delay accounting structure
+ * indicates that the task is waiting on IO
+ *
+ * Each member is aligned to a 8 byte boundary.
+ */
+struct cgroupstats {
+	__u64	nr_sleeping;		/* Number of tasks sleeping */
+	__u64	nr_running;		/* Number of tasks running */
+	__u64	nr_stopped;		/* Number of tasks in stopped state */
+	__u64	nr_uninterruptible;	/* Number of tasks in uninterruptible */
+					/* state */
+	__u64	nr_io_wait;		/* Number of tasks waiting on IO */
+};
+
+/*
+ * Commands sent from userspace
+ * Not versioned. New commands should only be inserted at the enum's end
+ * prior to __CGROUPSTATS_CMD_MAX
+ */
+
+enum {
+	CGROUPSTATS_CMD_UNSPEC = __TASKSTATS_CMD_MAX,	/* Reserved */
+	CGROUPSTATS_CMD_GET,		/* user->kernel request/get-response */
+	CGROUPSTATS_CMD_NEW,		/* kernel->user event */
+	__CGROUPSTATS_CMD_MAX,
+};
+
+#define CGROUPSTATS_CMD_MAX (__CGROUPSTATS_CMD_MAX - 1)
+
+enum {
+	CGROUPSTATS_TYPE_UNSPEC = 0,	/* Reserved */
+	CGROUPSTATS_TYPE_CGROUP_STATS,	/* contains name + stats */
+	__CGROUPSTATS_TYPE_MAX,
+};
+
+#define CGROUPSTATS_TYPE_MAX (__CGROUPSTATS_TYPE_MAX - 1)
+
+enum {
+	CGROUPSTATS_CMD_ATTR_UNSPEC = 0,
+	CGROUPSTATS_CMD_ATTR_FD,
+	__CGROUPSTATS_CMD_ATTR_MAX,
+};
+
+#define CGROUPSTATS_CMD_ATTR_MAX (__CGROUPSTATS_CMD_ATTR_MAX - 1)
+
+#endif /* _LINUX_CGROUPSTATS_H */
diff --git a/include/linux/cpu_acct.h b/include/linux/cpu_acct.h
new file mode 100644
index 00000000000..6b5fd8a66c8
--- /dev/null
+++ b/include/linux/cpu_acct.h
@@ -0,0 +1,14 @@
+
+#ifndef _LINUX_CPU_ACCT_H
+#define _LINUX_CPU_ACCT_H
+
+#include <linux/cgroup.h>
+#include <asm/cputime.h>
+
+#ifdef CONFIG_CGROUP_CPUACCT
+extern void cpuacct_charge(struct task_struct *, cputime_t cputime);
+#else
+static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {}
+#endif
+
+#endif
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
new file mode 100644
index 00000000000..16a51546db4
--- /dev/null
+++ b/include/linux/cpuidle.h
@@ -0,0 +1,180 @@
+/*
+ * cpuidle.h - a generic framework for CPU idle power management
+ *
+ * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *          Shaohua Li <shaohua.li@intel.com>
+ *          Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#ifndef _LINUX_CPUIDLE_H
+#define _LINUX_CPUIDLE_H
+
+#include <linux/percpu.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+#include <linux/completion.h>
+
+#define CPUIDLE_STATE_MAX	8
+#define CPUIDLE_NAME_LEN	16
+
+struct cpuidle_device;
+
+
+/****************************
+ * CPUIDLE DEVICE INTERFACE *
+ ****************************/
+
+struct cpuidle_state {
+	char		name[CPUIDLE_NAME_LEN];
+	void		*driver_data;
+
+	unsigned int	flags;
+	unsigned int	exit_latency; /* in US */
+	unsigned int	power_usage; /* in mW */
+	unsigned int	target_residency; /* in US */
+
+	unsigned int	usage;
+	unsigned int	time; /* in US */
+
+	int (*enter)	(struct cpuidle_device *dev,
+			 struct cpuidle_state *state);
+};
+
+/* Idle State Flags */
+#define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
+#define CPUIDLE_FLAG_CHECK_BM	(0x02) /* BM activity will exit state */
+#define CPUIDLE_FLAG_SHALLOW	(0x10) /* low latency, minimal savings */
+#define CPUIDLE_FLAG_BALANCED	(0x20) /* medium latency, moderate savings */
+#define CPUIDLE_FLAG_DEEP	(0x40) /* high latency, large savings */
+
+#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
+
+/**
+ * cpuidle_get_statedata - retrieves private driver state data
+ * @state: the state
+ */
+static inline void * cpuidle_get_statedata(struct cpuidle_state *state)
+{
+	return state->driver_data;
+}
+
+/**
+ * cpuidle_set_statedata - stores private driver state data
+ * @state: the state
+ * @data: the private data
+ */
+static inline void
+cpuidle_set_statedata(struct cpuidle_state *state, void *data)
+{
+	state->driver_data = data;
+}
+
+struct cpuidle_state_kobj {
+	struct cpuidle_state *state;
+	struct completion kobj_unregister;
+	struct kobject kobj;
+};
+
+struct cpuidle_device {
+	int			enabled:1;
+	unsigned int		cpu;
+
+	int			last_residency;
+	int			state_count;
+	struct cpuidle_state	states[CPUIDLE_STATE_MAX];
+	struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
+	struct cpuidle_state	*last_state;
+
+	struct list_head 	device_list;
+	struct kobject		kobj;
+	struct completion	kobj_unregister;
+	void			*governor_data;
+};
+
+DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+
+/**
+ * cpuidle_get_last_residency - retrieves the last state's residency time
+ * @dev: the target CPU
+ *
+ * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_VALID isn't set
+ */
+static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
+{
+	return dev->last_residency;
+}
+
+
+/****************************
+ * CPUIDLE DRIVER INTERFACE *
+ ****************************/
+
+struct cpuidle_driver {
+	char			name[CPUIDLE_NAME_LEN];
+	struct module 		*owner;
+};
+
+#ifdef CONFIG_CPU_IDLE
+
+extern int cpuidle_register_driver(struct cpuidle_driver *drv);
+extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
+extern int cpuidle_register_device(struct cpuidle_device *dev);
+extern void cpuidle_unregister_device(struct cpuidle_device *dev);
+
+extern void cpuidle_pause_and_lock(void);
+extern void cpuidle_resume_and_unlock(void);
+extern int cpuidle_enable_device(struct cpuidle_device *dev);
+extern void cpuidle_disable_device(struct cpuidle_device *dev);
+
+#else
+
+static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
+{return 0;}
+static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
+static inline int cpuidle_register_device(struct cpuidle_device *dev)
+{return 0;}
+static inline void cpuidle_unregister_device(struct cpuidle_device *dev) { }
+
+static inline void cpuidle_pause_and_lock(void) { }
+static inline void cpuidle_resume_and_unlock(void) { }
+static inline int cpuidle_enable_device(struct cpuidle_device *dev)
+{return 0;}
+static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
+
+#endif
+
+/******************************
+ * CPUIDLE GOVERNOR INTERFACE *
+ ******************************/
+
+struct cpuidle_governor {
+	char			name[CPUIDLE_NAME_LEN];
+	struct list_head 	governor_list;
+	unsigned int		rating;
+
+	int  (*enable)		(struct cpuidle_device *dev);
+	void (*disable)		(struct cpuidle_device *dev);
+
+	int  (*select)		(struct cpuidle_device *dev);
+	void (*reflect)		(struct cpuidle_device *dev);
+
+	struct module 		*owner;
+};
+
+#ifdef CONFIG_CPU_IDLE
+
+extern int cpuidle_register_governor(struct cpuidle_governor *gov);
+extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
+
+#else
+
+static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
+{return 0;}
+static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { }
+
+#endif
+
+#endif /* _LINUX_CPUIDLE_H */
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index ea44d2e768a..ecae585ec3d 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
+#include <linux/cgroup.h>
 
 #ifdef CONFIG_CPUSETS
 
@@ -19,9 +20,8 @@ extern int number_of_cpusets;	/* How many cpusets are defined in system? */
 extern int cpuset_init_early(void);
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
-extern void cpuset_fork(struct task_struct *p);
-extern void cpuset_exit(struct task_struct *p);
 extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
+extern cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
@@ -76,18 +76,22 @@ static inline int cpuset_do_slab_mem_spread(void)
 
 extern void cpuset_track_online_nodes(void);
 
+extern int current_cpuset_is_being_rebound(void);
+
 #else /* !CONFIG_CPUSETS */
 
 static inline int cpuset_init_early(void) { return 0; }
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
-static inline void cpuset_fork(struct task_struct *p) {}
-static inline void cpuset_exit(struct task_struct *p) {}
 
 static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
 {
 	return cpu_possible_map;
 }
+static inline cpumask_t cpuset_cpus_allowed_locked(struct task_struct *p)
+{
+	return cpu_possible_map;
+}
 
 static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
 {
@@ -148,6 +152,11 @@ static inline int cpuset_do_slab_mem_spread(void)
 
 static inline void cpuset_track_online_nodes(void) {}
 
+static inline int current_cpuset_is_being_rebound(void)
+{
+	return 0;
+}
+
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 55d1ca5e60f..ab94bc08355 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -26,6 +26,7 @@
  * Used to set current->delays->flags
  */
 #define DELAYACCT_PF_SWAPIN	0x00000001	/* I am doing a swapin */
+#define DELAYACCT_PF_BLKIO	0x00000002	/* I am waiting on IO */
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 
@@ -39,6 +40,14 @@ extern void __delayacct_blkio_end(void);
 extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 
+static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
+{
+	if (p->delays)
+		return (p->delays->flags & DELAYACCT_PF_BLKIO);
+	else
+		return 0;
+}
+
 static inline void delayacct_set_flag(int flag)
 {
 	if (current->delays)
@@ -71,6 +80,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
 
 static inline void delayacct_blkio_start(void)
 {
+	delayacct_set_flag(DELAYACCT_PF_BLKIO);
 	if (current->delays)
 		__delayacct_blkio_start();
 }
@@ -79,6 +89,7 @@ static inline void delayacct_blkio_end(void)
 {
 	if (current->delays)
 		__delayacct_blkio_end();
+	delayacct_clear_flag(DELAYACCT_PF_BLKIO);
 }
 
 static inline int delayacct_add_tsk(struct taskstats *d,
@@ -116,6 +127,8 @@ static inline int delayacct_add_tsk(struct taskstats *d,
 { return 0; }
 static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 { return 0; }
+static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
+{ return 0; }
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
 #endif
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 499f5373e21..37c66d1254b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -183,11 +183,14 @@ int dm_resume(struct mapped_device *md);
  */
 uint32_t dm_get_event_nr(struct mapped_device *md);
 int dm_wait_event(struct mapped_device *md, int event_nr);
+uint32_t dm_next_uevent_seq(struct mapped_device *md);
+void dm_uevent_add(struct mapped_device *md, struct list_head *elist);
 
 /*
  * Info functions.
  */
 const char *dm_device_name(struct mapped_device *md);
+int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid);
 struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct mapped_device *md);
 int dm_noflush_suspending(struct dm_target *ti);
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index b9348610782..523281c5b7f 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -131,6 +131,7 @@ struct dm_ioctl {
 	char name[DM_NAME_LEN];	/* device name */
 	char uuid[DM_UUID_LEN];	/* unique identifier for
 				 * the block device */
+	char data[7];		/* padding or data */
 };
 
 /*
@@ -285,9 +286,9 @@ typedef char ioctl_struct[308];
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	11
+#define DM_VERSION_MINOR	12
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2006-10-12)"
+#define DM_VERSION_EXTRA	"-ioctl (2007-10-02)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6a4d170ad9a..1657e995f72 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -123,6 +123,7 @@ extern int dir_notify_enable;
 #define MS_SLAVE	(1<<19)	/* change to slave */
 #define MS_SHARED	(1<<20)	/* change to shared */
 #define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
 #define MS_ACTIVE	(1<<30)
 #define MS_NOUSER	(1<<31)
 
@@ -1459,7 +1460,8 @@ void unnamed_dev_init(void);
 
 extern int register_filesystem(struct file_system_type *);
 extern int unregister_filesystem(struct file_system_type *);
-extern struct vfsmount *kern_mount(struct file_system_type *);
+extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
+#define kern_mount(type) kern_mount_data(type, NULL)
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
 extern void umount_tree(struct vfsmount *, int, struct list_head *);
@@ -1922,6 +1924,8 @@ extern int vfs_fstat(unsigned int, struct kstat *);
 
 extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long);
 
+extern void get_filesystem(struct file_system_type *fs);
+extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
 extern struct super_block *user_get_super(dev_t);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index edb8024d744..6e35b92b1d2 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -469,8 +469,8 @@ struct hid_device {							/* device report descriptor */
 	/* handler for raw output data, used by hidraw */
 	int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t);
 #ifdef CONFIG_USB_HIDINPUT_POWERBOOK
-	unsigned long pb_pressed_fn[NBITS(KEY_MAX)];
-	unsigned long pb_pressed_numlock[NBITS(KEY_MAX)];
+	unsigned long pb_pressed_fn[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long pb_pressed_numlock[BITS_TO_LONGS(KEY_CNT)];
 #endif
 };
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 19db0a4ae44..2e4b8dd03cf 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -192,22 +192,20 @@ typedef unsigned char	byte;	/* used everywhere */
 struct hwif_s;
 typedef int (ide_ack_intr_t)(struct hwif_s *);
 
-#ifndef NO_DMA
-#define NO_DMA  255
-#endif
-
 /*
  * hwif_chipset_t is used to keep track of the specific hardware
  * chipset used by each IDE interface, if known.
  */
-typedef enum {	ide_unknown,	ide_generic,	ide_pci,
+enum {		ide_unknown,	ide_generic,	ide_pci,
 		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
 		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
 		ide_rz1000,	ide_trm290,
 		ide_cmd646,	ide_cy82c693,	ide_4drives,
 		ide_pmac,	ide_etrax100,	ide_acorn,
 		ide_au1xxx, ide_forced
-} hwif_chipset_t;
+};
+
+typedef u8 hwif_chipset_t;
 
 /*
  * Structure to hold all information about the location of this port
@@ -215,22 +213,16 @@ typedef enum {	ide_unknown,	ide_generic,	ide_pci,
 typedef struct hw_regs_s {
 	unsigned long	io_ports[IDE_NR_PORTS];	/* task file registers */
 	int		irq;			/* our irq number */
-	int		dma;			/* our dma entry */
 	ide_ack_intr_t	*ack_intr;		/* acknowledge interrupt */
 	hwif_chipset_t  chipset;
 	struct device	*dev;
 } hw_regs_t;
 
-/*
- * Register new hardware with ide
- */
-int ide_register_hw(hw_regs_t *, int, struct hwif_s **);
-int ide_register_hw_with_fixup(hw_regs_t *, int, struct hwif_s **,
-			       void (*)(struct hwif_s *));
+struct hwif_s * ide_find_port(unsigned long);
+
+int ide_register_hw(hw_regs_t *, void (*)(struct hwif_s *), int,
+		    struct hwif_s **);
 
-/*
- * Set up hw_regs_t structure before calling ide_register_hw (optional)
- */
 void ide_setup_ports(	hw_regs_t *hw,
 			unsigned long base,
 			int *offsets,
@@ -268,11 +260,7 @@ static inline void ide_std_init_ports(hw_regs_t *hw,
 # define ide_init_default_irq(base)	(0)
 #endif
 
-/*
- * ide_init_hwif_ports() is OBSOLETE and will be removed in 2.7 series.
- * New ports shouldn't define IDE_ARCH_OBSOLETE_INIT in <asm/ide.h>.
- */
-#ifdef IDE_ARCH_OBSOLETE_INIT
+#ifdef CONFIG_IDE_ARCH_OBSOLETE_INIT
 static inline void ide_init_hwif_ports(hw_regs_t *hw,
 				       unsigned long io_addr,
 				       unsigned long ctl_addr,
@@ -302,7 +290,7 @@ static inline void ide_init_hwif_ports(hw_regs_t *hw,
 	if (io_addr || ctl_addr)
 		printk(KERN_WARNING "%s: must not be called\n", __FUNCTION__);
 }
-#endif /* IDE_ARCH_OBSOLETE_INIT */
+#endif /* CONFIG_IDE_ARCH_OBSOLETE_INIT */
 
 /* Currently only m68k, apus and m8xx need it */
 #ifndef IDE_ARCH_ACK_INTR
@@ -363,7 +351,6 @@ typedef union {
  * ATA DATA Register Special.
  * ATA NSECTOR Count Register().
  * ATAPI Byte Count Register.
- * Channel index ordering pairs.
  */
 typedef union {
 	unsigned all			:16;
@@ -378,7 +365,7 @@ typedef union {
 #error "Please fix <asm/byteorder.h>"
 #endif
 	} b;
-} ata_nsector_t, ata_data_t, atapi_bcount_t, ata_index_t;
+} ata_nsector_t, ata_data_t, atapi_bcount_t;
 
 /*
  * ATA-IDE Select Register, aka Device-Head
@@ -657,7 +644,7 @@ typedef struct ide_drive_s {
     ((1<<ide_pci)|(1<<ide_cmd646)|(1<<ide_ali14xx))
 #define IDE_CHIPSET_IS_PCI(c)	((IDE_CHIPSET_PCI_MASK >> (c)) & 1)
 
-struct ide_pci_device_s;
+struct ide_port_info;
 
 typedef struct hwif_s {
 	struct hwif_s *next;		/* for linked-list in ide_hwgroup_t */
@@ -672,7 +659,6 @@ typedef struct hwif_s {
 	unsigned long	sata_scr[SATA_NR_PORTS];
 	unsigned long	sata_misc[SATA_NR_PORTS];
 
-	hw_regs_t	hw;		/* Hardware info */
 	ide_drive_t	drives[MAX_DRIVES];	/* drive info */
 
 	u8 major;	/* our major number */
@@ -694,7 +680,9 @@ typedef struct hwif_s {
 	hwif_chipset_t chipset;	/* sub-module for tuning.. */
 
 	struct pci_dev  *pci_dev;	/* for pci chipsets */
-	struct ide_pci_device_s	*cds;	/* chipset device struct */
+	const struct ide_port_info *cds;	/* chipset device struct */
+
+	ide_ack_intr_t *ack_intr;
 
 	void (*rw_disk)(ide_drive_t *, struct request *);
 
@@ -725,6 +713,8 @@ typedef struct hwif_s {
 	u8 (*mdma_filter)(ide_drive_t *);
 	u8 (*udma_filter)(ide_drive_t *);
 
+	void (*fixup)(struct hwif_s *);
+
 	void (*ata_input_data)(ide_drive_t *, void *, u32);
 	void (*ata_output_data)(ide_drive_t *, void *, u32);
 
@@ -841,8 +831,6 @@ typedef struct hwgroup_s {
 
 		/* for pci chipsets */
 	struct pci_dev *pci_dev;
-		/* chipset device struct */
-	struct ide_pci_device_s *cds;
 
 		/* current request */
 	struct request *rq;
@@ -1030,36 +1018,16 @@ extern int ide_end_request (ide_drive_t *drive, int uptodate, int nrsecs);
 int ide_end_dequeued_request(ide_drive_t *drive, struct request *rq,
 			     int uptodate, int nr_sectors);
 
-/*
- * This is used on exit from the driver to designate the next irq handler
- * and also to start the safety timer.
- */
 extern void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry);
 
-/*
- * This is used on exit from the driver to designate the next irq handler
- * and start the safety time safely and atomically from the IRQ handler
- * with respect to the command issue (which it also does)
- */
 extern void ide_execute_command(ide_drive_t *, task_ioreg_t cmd, ide_handler_t *, unsigned int, ide_expiry_t *);
 
 ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8);
 
-/*
- * ide_error() takes action based on the error returned by the controller.
- * The caller should return immediately after invoking this.
- *
- * (drive, msg, status)
- */
 ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat);
 
 ide_startstop_t __ide_abort(ide_drive_t *, struct request *);
 
-/*
- * Abort a running command on the controller triggering the abort
- * from a host side, non error situation
- * (drive, msg)
- */
 extern ide_startstop_t ide_abort(ide_drive_t *, const char *);
 
 extern void ide_fix_driveid(struct hd_driveid *);
@@ -1075,15 +1043,8 @@ extern void ide_fixstring(u8 *, const int, const int);
 
 int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
 
-/*
- * Start a reset operation for an IDE interface.
- * The caller should return immediately after invoking this.
- */
 extern ide_startstop_t ide_do_reset (ide_drive_t *);
 
-/*
- * This function is intended to be used prior to invoking ide_do_drive_cmd().
- */
 extern void ide_init_drive_cmd (struct request *rq);
 
 /*
@@ -1098,13 +1059,6 @@ typedef enum {
 
 extern int ide_do_drive_cmd(ide_drive_t *, struct request *, ide_action_t);
 
-/*
- * Clean up after success/failure of an explicit drive cmd.
- * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_CMD).
- * stat/err are used only when (HWGROUP(drive)->rq->cmd == IDE_DRIVE_TASK_MASK).
- *
- * (ide_drive_t *drive, u8 stat, u8 err)
- */
 extern void ide_end_drive_cmd(ide_drive_t *, u8, u8);
 
 /*
@@ -1177,10 +1131,6 @@ extern int taskfile_lib_get_identify(ide_drive_t *drive, u8 *);
 
 extern int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout);
 
-/*
- * ide_stall_queue() can be used by a drive to give excess bandwidth back
- * to the hwgroup by sleeping for timeout jiffies.
- */
 extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
 
 extern int ide_spin_wait_hwgroup(ide_drive_t *);
@@ -1200,8 +1150,8 @@ extern int __ide_pci_register_driver(struct pci_driver *driver, struct module *o
 #define ide_pci_register_driver(d) pci_register_driver(d)
 #endif
 
-void ide_pci_setup_ports(struct pci_dev *, struct ide_pci_device_s *, int, ata_index_t *);
-extern void ide_setup_pci_noise (struct pci_dev *dev, struct ide_pci_device_s *d);
+void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, int, u8 *);
+void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *);
 
 extern void default_hwif_iops(ide_hwif_t *);
 extern void default_hwif_mmiops(ide_hwif_t *);
@@ -1261,6 +1211,14 @@ enum {
 	IDE_HFLAG_SERIALIZE		= (1 << 20),
 	/* use legacy IRQs */
 	IDE_HFLAG_LEGACY_IRQS		= (1 << 21),
+	/* force use of legacy IRQs */
+	IDE_HFLAG_FORCE_LEGACY_IRQS	= (1 << 22),
+	/* limit LBA48 requests to 256 sectors */
+	IDE_HFLAG_RQSIZE_256		= (1 << 23),
+	/* use 32-bit I/O ops */
+	IDE_HFLAG_IO_32BIT		= (1 << 24),
+	/* unmask IRQs */
+	IDE_HFLAG_UNMASK_IRQS		= (1 << 25),
 };
 
 #ifdef CONFIG_BLK_DEV_OFFBOARD
@@ -1269,7 +1227,7 @@ enum {
 # define IDE_HFLAG_OFF_BOARD	0
 #endif
 
-typedef struct ide_pci_device_s {
+struct ide_port_info {
 	char			*name;
 	unsigned int		(*init_chipset)(struct pci_dev *, const char *);
 	void			(*init_iops)(ide_hwif_t *);
@@ -1277,17 +1235,17 @@ typedef struct ide_pci_device_s {
 	void			(*init_dma)(ide_hwif_t *, unsigned long);
 	void			(*fixup)(ide_hwif_t *);
 	ide_pci_enablebit_t	enablebits[2];
+	hwif_chipset_t		chipset;
 	unsigned int		extra;
-	struct ide_pci_device_s	*next;
 	u32			host_flags;
 	u8			pio_mask;
 	u8			swdma_mask;
 	u8			mwdma_mask;
 	u8			udma_mask;
-} ide_pci_device_t;
+};
 
-extern int ide_setup_pci_device(struct pci_dev *, ide_pci_device_t *);
-extern int ide_setup_pci_devices(struct pci_dev *, struct pci_dev *, ide_pci_device_t *);
+int ide_setup_pci_device(struct pci_dev *, const struct ide_port_info *);
+int ide_setup_pci_devices(struct pci_dev *, struct pci_dev *, const struct ide_port_info *);
 
 void ide_map_sg(ide_drive_t *, struct request *);
 void ide_init_sg_cmd(ide_drive_t *, struct request *);
@@ -1370,8 +1328,7 @@ void ide_unregister_region(struct gendisk *);
 
 void ide_undecoded_slave(ide_hwif_t *);
 
-int probe_hwif_init_with_fixup(ide_hwif_t *, void (*)(ide_hwif_t *));
-extern int probe_hwif_init(ide_hwif_t *);
+int ide_device_add(u8 idx[4]);
 
 static inline void *ide_get_hwifdata (ide_hwif_t * hwif)
 {
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d4b2f1c76e1..cae35b6b9ae 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -67,9 +67,6 @@
 	.posix_timers	 = LIST_HEAD_INIT(sig.posix_timers),		\
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
-	.pgrp		= 0,						\
-	.tty_old_pgrp   = NULL,						\
-	{ .__session      = 0},						\
 }
 
 extern struct nsproxy init_nsproxy;
@@ -94,15 +91,18 @@ extern struct group_info init_groups;
 
 #define INIT_STRUCT_PID {						\
 	.count 		= ATOMIC_INIT(1),				\
-	.nr		= 0, 						\
-	/* Don't put this struct pid in pid_hash */			\
-	.pid_chain	= { .next = NULL, .pprev = NULL },		\
 	.tasks		= {						\
 		{ .first = &init_task.pids[PIDTYPE_PID].node },		\
 		{ .first = &init_task.pids[PIDTYPE_PGID].node },	\
 		{ .first = &init_task.pids[PIDTYPE_SID].node },		\
 	},								\
 	.rcu		= RCU_HEAD_INIT,				\
+	.level		= 0,						\
+	.numbers	= { {						\
+		.nr		= 0,					\
+		.ns		= &init_pid_ns,				\
+		.pid_chain	= { .next = NULL, .pprev = NULL },	\
+	}, }								\
 }
 
 #define INIT_PID_LINK(type) 					\
diff --git a/include/linux/input.h b/include/linux/input.h
index f30da6fc08e..62268929856 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -98,6 +98,7 @@ struct input_absinfo {
 #define EV_PWR			0x16
 #define EV_FF_STATUS		0x17
 #define EV_MAX			0x1f
+#define EV_CNT			(EV_MAX+1)
 
 /*
  * Synchronization events.
@@ -567,6 +568,7 @@ struct input_absinfo {
 /* We avoid low common keys in module aliases so they don't get huge. */
 #define KEY_MIN_INTERESTING	KEY_MUTE
 #define KEY_MAX			0x1ff
+#define KEY_CNT			(KEY_MAX+1)
 
 /*
  * Relative axes
@@ -583,6 +585,7 @@ struct input_absinfo {
 #define REL_WHEEL		0x08
 #define REL_MISC		0x09
 #define REL_MAX			0x0f
+#define REL_CNT			(REL_MAX+1)
 
 /*
  * Absolute axes
@@ -615,6 +618,7 @@ struct input_absinfo {
 #define ABS_VOLUME		0x20
 #define ABS_MISC		0x28
 #define ABS_MAX			0x3f
+#define ABS_CNT			(ABS_MAX+1)
 
 /*
  * Switch events
@@ -625,6 +629,7 @@ struct input_absinfo {
 #define SW_HEADPHONE_INSERT	0x02  /* set = inserted */
 #define SW_RADIO		0x03  /* set = radio enabled */
 #define SW_MAX			0x0f
+#define SW_CNT			(SW_MAX+1)
 
 /*
  * Misc events
@@ -636,6 +641,7 @@ struct input_absinfo {
 #define MSC_RAW			0x03
 #define MSC_SCAN		0x04
 #define MSC_MAX			0x07
+#define MSC_CNT			(MSC_MAX+1)
 
 /*
  * LEDs
@@ -653,6 +659,7 @@ struct input_absinfo {
 #define LED_MAIL		0x09
 #define LED_CHARGING		0x0a
 #define LED_MAX			0x0f
+#define LED_CNT			(LED_MAX+1)
 
 /*
  * Autorepeat values
@@ -670,6 +677,7 @@ struct input_absinfo {
 #define SND_BELL		0x01
 #define SND_TONE		0x02
 #define SND_MAX			0x07
+#define SND_CNT			(SND_MAX+1)
 
 /*
  * IDs.
@@ -920,6 +928,7 @@ struct ff_effect {
 #define FF_AUTOCENTER	0x61
 
 #define FF_MAX		0x7f
+#define FF_CNT		(FF_MAX+1)
 
 #ifdef __KERNEL__
 
@@ -932,10 +941,6 @@ struct ff_effect {
 #include <linux/timer.h>
 #include <linux/mod_devicetable.h>
 
-#define NBITS(x) (((x)/BITS_PER_LONG)+1)
-#define BIT(x)	(1UL<<((x)%BITS_PER_LONG))
-#define LONG(x) ((x)/BITS_PER_LONG)
-
 /**
  * struct input_dev - represents an input device
  * @name: name of the device
@@ -1005,28 +1010,30 @@ struct ff_effect {
  * @going_away: marks devices that are in a middle of unregistering and
  *	causes input_open_device*() fail with -ENODEV.
  * @dev: driver model's view of this device
+ * @cdev: union for struct device pointer
  * @h_list: list of input handles associated with the device. When
  *	accessing the list dev->mutex must be held
  * @node: used to place the device onto input_dev_list
  */
 struct input_dev {
-
+	/* private: */
 	void *private;	/* do not use */
+	/* public: */
 
 	const char *name;
 	const char *phys;
 	const char *uniq;
 	struct input_id id;
 
-	unsigned long evbit[NBITS(EV_MAX)];
-	unsigned long keybit[NBITS(KEY_MAX)];
-	unsigned long relbit[NBITS(REL_MAX)];
-	unsigned long absbit[NBITS(ABS_MAX)];
-	unsigned long mscbit[NBITS(MSC_MAX)];
-	unsigned long ledbit[NBITS(LED_MAX)];
-	unsigned long sndbit[NBITS(SND_MAX)];
-	unsigned long ffbit[NBITS(FF_MAX)];
-	unsigned long swbit[NBITS(SW_MAX)];
+	unsigned long evbit[BITS_TO_LONGS(EV_CNT)];
+	unsigned long keybit[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long relbit[BITS_TO_LONGS(REL_CNT)];
+	unsigned long absbit[BITS_TO_LONGS(ABS_CNT)];
+	unsigned long mscbit[BITS_TO_LONGS(MSC_CNT)];
+	unsigned long ledbit[BITS_TO_LONGS(LED_CNT)];
+	unsigned long sndbit[BITS_TO_LONGS(SND_CNT)];
+	unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
+	unsigned long swbit[BITS_TO_LONGS(SW_CNT)];
 
 	unsigned int keycodemax;
 	unsigned int keycodesize;
@@ -1044,10 +1051,10 @@ struct input_dev {
 	int abs[ABS_MAX + 1];
 	int rep[REP_MAX + 1];
 
-	unsigned long key[NBITS(KEY_MAX)];
-	unsigned long led[NBITS(LED_MAX)];
-	unsigned long snd[NBITS(SND_MAX)];
-	unsigned long sw[NBITS(SW_MAX)];
+	unsigned long key[BITS_TO_LONGS(KEY_CNT)];
+	unsigned long led[BITS_TO_LONGS(LED_CNT)];
+	unsigned long snd[BITS_TO_LONGS(SND_CNT)];
+	unsigned long sw[BITS_TO_LONGS(SW_CNT)];
 
 	int absmax[ABS_MAX + 1];
 	int absmin[ABS_MAX + 1];
@@ -1291,7 +1298,7 @@ static inline void input_set_abs_params(struct input_dev *dev, int axis, int min
 	dev->absfuzz[axis] = fuzz;
 	dev->absflat[axis] = flat;
 
-	dev->absbit[LONG(axis)] |= BIT(axis);
+	dev->absbit[BIT_WORD(axis)] |= BIT_MASK(axis);
 }
 
 extern struct class input_class;
@@ -1332,7 +1339,7 @@ struct ff_device {
 
 	void *private;
 
-	unsigned long ffbit[NBITS(FF_MAX)];
+	unsigned long ffbit[BITS_TO_LONGS(FF_CNT)];
 
 	struct mutex mutex;
 
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index ee111834091..408696ea518 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -89,6 +89,7 @@ struct kern_ipc_perm
 {
 	spinlock_t	lock;
 	int		deleted;
+	int		id;
 	key_t		key;
 	uid_t		uid;
 	gid_t		gid;
@@ -110,6 +111,8 @@ struct ipc_namespace {
 	int		msg_ctlmax;
 	int		msg_ctlmnb;
 	int		msg_ctlmni;
+	atomic_t	msg_bytes;
+	atomic_t	msg_hdrs;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index a3abf51e488..16e7ed855a1 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -58,7 +58,7 @@
  * CONFIG_JBD_DEBUG is on.
  */
 #define JBD_EXPENSIVE_CHECKING
-extern int journal_enable_debug;
+extern u8 journal_enable_debug;
 
 #define jbd_debug(n, f, a...)						\
 	do {								\
@@ -248,17 +248,7 @@ typedef struct journal_superblock_s
 #include <linux/fs.h>
 #include <linux/sched.h>
 
-#define JBD_ASSERTIONS
-#ifdef JBD_ASSERTIONS
-#define J_ASSERT(assert)						\
-do {									\
-	if (!(assert)) {						\
-		printk (KERN_EMERG					\
-			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
-			__FUNCTION__, __FILE__, __LINE__, # assert);	\
-		BUG();							\
-	}								\
-} while (0)
+#define J_ASSERT(assert)	BUG_ON(!(assert))
 
 #if defined(CONFIG_BUFFER_DEBUG)
 void buffer_assertion_failure(struct buffer_head *bh);
@@ -274,10 +264,6 @@ void buffer_assertion_failure(struct buffer_head *bh);
 #define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
 #endif
 
-#else
-#define J_ASSERT(assert)	do { } while (0)
-#endif		/* JBD_ASSERTIONS */
-
 #if defined(JBD_PARANOID_IOFAIL)
 #define J_EXPECT(expr, why...)		J_ASSERT(expr)
 #define J_EXPECT_BH(bh, expr, why...)	J_ASSERT_BH(bh, expr)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ad4b82ce84a..2d9c448d8c5 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -187,6 +187,8 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
+int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
+		unsigned long long *crash_size, unsigned long long *crash_base);
 
 #else /* !CONFIG_KEXEC */
 struct pt_regs;
diff --git a/include/linux/keyboard.h b/include/linux/keyboard.h
index 33b5c2e325b..65c2d70853e 100644
--- a/include/linux/keyboard.h
+++ b/include/linux/keyboard.h
@@ -23,10 +23,21 @@
 #define MAX_NR_OF_USER_KEYMAPS 256 	/* should be at least 7 */
 
 #ifdef __KERNEL__
+struct notifier_block;
 extern const int NR_TYPES;
 extern const int max_vals[];
 extern unsigned short *key_maps[MAX_NR_KEYMAPS];
 extern unsigned short plain_map[NR_KEYS];
+
+struct keyboard_notifier_param {
+	struct vc_data *vc;	/* VC on which the keyboard press was done */
+	int down;		/* Pressure of the key? */
+	int shift;		/* Current shift mask */
+	unsigned int value;	/* keycode, unicode value or keysym */
+};
+
+extern int register_keyboard_notifier(struct notifier_block *nb);
+extern int unregister_keyboard_notifier(struct notifier_block *nb);
 #endif
 
 #define MAX_NR_FUNC	256	/* max nr of strings assigned to keys */
diff --git a/include/linux/list.h b/include/linux/list.h
index b0cf0135fe3..75ce2cb4ff6 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -478,8 +478,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
 		pos = n, n = pos->next)
 
 /**
- * list_for_each_prev_safe - iterate over a list backwards safe against removal
-			of list entry
+ * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
  * @pos:	the &struct list_head to use as a loop cursor.
  * @n:		another &struct list_head to use as temporary storage
  * @head:	the head for your list.
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index f6279f68a82..4c4d236ded1 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -276,6 +276,14 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
 				 (lock)->dep_map.key, sub)
 
 /*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+	{ .name = (_name), .key = (void *)(_key), }
+
+
+/*
  * Acquire a lock.
  *
  * Values for "read":
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 722d4755060..1fa0c2ce4de 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -37,6 +37,7 @@
 
 #define SMB_SUPER_MAGIC		0x517B
 #define USBDEVICE_SUPER_MAGIC	0x9fa2
+#define CGROUP_SUPER_MAGIC	0x27e0eb
 
 #define FUTEXFS_SUPER_MAGIC	0xBAD1DEA
 #define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
diff --git a/include/linux/marker.h b/include/linux/marker.h
new file mode 100644
index 00000000000..5f36cf946bc
--- /dev/null
+++ b/include/linux/marker.h
@@ -0,0 +1,129 @@
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+
+struct module;
+struct marker;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @mdata: pointer of type struct marker
+ * @private_data: caller site private data
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(const struct marker *mdata,
+	void *private_data, const char *fmt, ...);
+
+struct marker {
+	const char *name;	/* Marker name */
+	const char *format;	/* Marker format string, describing the
+				 * variable argument list.
+				 */
+	char state;		/* Marker state. */
+	marker_probe_func *call;/* Probe handler function pointer */
+	void *private;		/* Private probe data */
+} __attribute__((aligned(8)));
+
+#ifdef CONFIG_MARKERS
+
+/*
+ * Note : the empty asm volatile with read constraint is used here instead of a
+ * "used" attribute to fix a gcc 4.1.x bug.
+ * Make sure the alignment of the structure in the __markers section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ */
+#define __trace_mark(name, call_data, format, args...)			\
+	do {								\
+		static const char __mstrtab_name_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= #name;						\
+		static const char __mstrtab_format_##name[]		\
+		__attribute__((section("__markers_strings")))		\
+		= format;						\
+		static struct marker __mark_##name			\
+		__attribute__((section("__markers"), aligned(8))) =	\
+		{ __mstrtab_name_##name, __mstrtab_format_##name,	\
+		0, __mark_empty_function, NULL };			\
+		__mark_check_format(format, ## args);			\
+		if (unlikely(__mark_##name.state)) {			\
+			preempt_disable();				\
+			(*__mark_##name.call)				\
+				(&__mark_##name, call_data,		\
+				format, ## args);			\
+			preempt_enable();				\
+		}							\
+	} while (0)
+
+extern void marker_update_probe_range(struct marker *begin,
+	struct marker *end, struct module *probe_module, int *refcount);
+#else /* !CONFIG_MARKERS */
+#define __trace_mark(name, call_data, format, args...) \
+		__mark_check_format(format, ## args)
+static inline void marker_update_probe_range(struct marker *begin,
+	struct marker *end, struct module *probe_module, int *refcount)
+{ }
+#endif /* CONFIG_MARKERS */
+
+/**
+ * trace_mark - Marker
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker.
+ */
+#define trace_mark(name, format, args...) \
+	__trace_mark(name, NULL, format, ## args)
+
+#define MARK_MAX_FORMAT_LEN	1024
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+/* To be used for string format validity checking with gcc */
+static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...)
+{
+}
+
+extern marker_probe_func __mark_empty_function;
+
+/*
+ * Connect a probe to a marker.
+ * private data pointer must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *name, const char *format,
+				marker_probe_func *probe, void *private);
+
+/*
+ * Returns the private data given to marker_probe_register.
+ */
+extern void *marker_probe_unregister(const char *name);
+/*
+ * Unregister a marker by providing the registered private data.
+ */
+extern void *marker_probe_unregister_private_data(void *private);
+
+extern int marker_arm(const char *name);
+extern int marker_disarm(const char *name);
+extern void *marker_get_private_data(const char *name);
+
+#endif
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 38c04d61ee0..59c4865bc85 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -148,14 +148,6 @@ extern void mpol_rebind_task(struct task_struct *tsk,
 					const nodemask_t *new);
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 extern void mpol_fix_fork_child_flag(struct task_struct *p);
-#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
-
-#ifdef CONFIG_CPUSETS
-#define current_cpuset_is_being_rebound() \
-				(cpuset_being_rebound == current->cpuset)
-#else
-#define current_cpuset_is_being_rebound() 0
-#endif
 
 extern struct mempolicy default_policy;
 extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
@@ -173,8 +165,6 @@ static inline void check_highest_zone(enum zone_type k)
 int do_migrate_pages(struct mm_struct *mm,
 	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
 
-extern void *cpuset_being_rebound;	/* Trigger mpol_copy vma rebind */
-
 #else
 
 struct mempolicy {};
@@ -248,8 +238,6 @@ static inline void mpol_fix_fork_child_flag(struct task_struct *p)
 {
 }
 
-#define set_cpuset_being_rebound(x) do {} while (0)
-
 static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
  		unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol)
 {
diff --git a/include/linux/module.h b/include/linux/module.h
index 642f325e491..2cbc0b87e32 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -15,6 +15,7 @@
 #include <linux/stringify.h>
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
+#include <linux/marker.h>
 #include <asm/local.h>
 
 #include <asm/module.h>
@@ -354,6 +355,10 @@ struct module
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */
 	char *args;
+#ifdef CONFIG_MARKERS
+	struct marker *markers;
+	unsigned int num_markers;
+#endif
 };
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
@@ -457,6 +462,8 @@ int unregister_module_notifier(struct notifier_block * nb);
 
 extern void print_modules(void);
 
+extern void module_update_markers(struct module *probe_module, int *refcount);
+
 #else /* !CONFIG_MODULES... */
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
@@ -556,6 +563,11 @@ static inline void print_modules(void)
 {
 }
 
+static inline void module_update_markers(struct module *probe_module,
+		int *refcount)
+{
+}
+
 #endif /* CONFIG_MODULES */
 
 struct device_driver;
diff --git a/include/linux/msg.h b/include/linux/msg.h
index f1b60740d64..10a3d5a1abf 100644
--- a/include/linux/msg.h
+++ b/include/linux/msg.h
@@ -77,7 +77,6 @@ struct msg_msg {
 /* one msq_queue structure for each present queue on the system */
 struct msg_queue {
 	struct kern_ipc_perm q_perm;
-	int q_id;
 	time_t q_stime;			/* last msgsnd time */
 	time_t q_rtime;			/* last msgrcv time */
 	time_t q_ctime;			/* last change time */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 452c88d971a..6f85db3535e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -407,6 +407,24 @@ static inline void napi_enable(struct napi_struct *n)
 	clear_bit(NAPI_STATE_SCHED, &n->state);
 }
 
+#ifdef CONFIG_SMP
+/**
+ *	napi_synchronize - wait until NAPI is not running
+ *	@n: napi context
+ *
+ * Wait until NAPI is done being scheduled on this context.
+ * Waits till any outstanding processing completes but
+ * does not disable future activations.
+ */
+static inline void napi_synchronize(const struct napi_struct *n)
+{
+	while (test_bit(NAPI_STATE_SCHED, &n->state))
+		msleep(1);
+}
+#else
+# define napi_synchronize(n)	barrier()
+#endif
+
 /*
  *	The DEVICE structure.
  *	Actually, this whole structure is a big mistake.  It mixes I/O
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c5164c257f7..e82a6ebc725 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -160,6 +160,12 @@ struct nfs_inode {
 	/* Open contexts for shared mmap writes */
 	struct list_head	open_files;
 
+	/* Number of in-flight sillydelete RPC calls */
+	atomic_t		silly_count;
+	/* List of deferred sillydelete requests */
+	struct hlist_head	silly_list;
+	wait_queue_head_t	waitqueue;
+
 #ifdef CONFIG_NFS_V4
 	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */
@@ -394,6 +400,8 @@ extern void nfs_release_automount_timer(void);
  */
 extern int  nfs_async_unlink(struct inode *dir, struct dentry *dentry);
 extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
+extern void nfs_block_sillyrename(struct dentry *dentry);
+extern void nfs_unblock_sillyrename(struct dentry *dentry);
 
 /*
  * linux/fs/nfs/write.c
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index fad7ff17e46..0c40cc0b4a3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -231,5 +231,22 @@ static inline int notifier_to_errno(int ret)
 #define PM_SUSPEND_PREPARE	0x0003 /* Going to suspend the system */
 #define PM_POST_SUSPEND		0x0004 /* Suspend finished */
 
+/* Console keyboard events.
+ * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and
+ * KBD_KEYSYM. */
+#define KBD_KEYCODE		0x0001 /* Keyboard keycode, called before any other */
+#define KBD_UNBOUND_KEYCODE	0x0002 /* Keyboard keycode which is not bound to any other */
+#define KBD_UNICODE		0x0003 /* Keyboard unicode */
+#define KBD_KEYSYM		0x0004 /* Keyboard keysym */
+#define KBD_POST_KEYSYM		0x0005 /* Called after keyboard keysym interpretation */
+
+extern struct blocking_notifier_head reboot_notifier_list;
+
+/* Virtual Terminal events. */
+#define VT_ALLOCATE		0x0001 /* Console got allocated */
+#define VT_DEALLOCATE		0x0002 /* Console will be deallocated */
+#define VT_WRITE		0x0003 /* A char got output */
+#define VT_UPDATE		0x0004 /* A bigger update occurred */
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_NOTIFIER_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 033a648709b..0e66b57631f 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -32,8 +32,39 @@ struct nsproxy {
 };
 extern struct nsproxy init_nsproxy;
 
+/*
+ * the namespaces access rules are:
+ *
+ *  1. only current task is allowed to change tsk->nsproxy pointer or
+ *     any pointer on the nsproxy itself
+ *
+ *  2. when accessing (i.e. reading) current task's namespaces - no
+ *     precautions should be taken - just dereference the pointers
+ *
+ *  3. the access to other task namespaces is performed like this
+ *     rcu_read_lock();
+ *     nsproxy = task_nsproxy(tsk);
+ *     if (nsproxy != NULL) {
+ *             / *
+ *               * work with the namespaces here
+ *               * e.g. get the reference on one of them
+ *               * /
+ *     } / *
+ *         * NULL task_nsproxy() means that this task is
+ *         * almost dead (zombie)
+ *         * /
+ *     rcu_read_unlock();
+ *
+ */
+
+static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
+{
+	return rcu_dereference(tsk->nsproxy);
+}
+
 int copy_namespaces(unsigned long flags, struct task_struct *tsk);
-void get_task_namespaces(struct task_struct *tsk);
+void exit_task_namespaces(struct task_struct *tsk);
+void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
 void free_nsproxy(struct nsproxy *ns);
 int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
 	struct fs_struct *);
@@ -45,14 +76,15 @@ static inline void put_nsproxy(struct nsproxy *ns)
 	}
 }
 
-static inline void exit_task_namespaces(struct task_struct *p)
+static inline void get_nsproxy(struct nsproxy *ns)
 {
-	struct nsproxy *ns = p->nsproxy;
-	if (ns) {
-		task_lock(p);
-		p->nsproxy = NULL;
-		task_unlock(p);
-		put_nsproxy(ns);
-	}
+	atomic_inc(&ns->count);
 }
+
+#ifdef CONFIG_CGROUP_NS
+int ns_cgroup_clone(struct task_struct *tsk);
+#else
+static inline int ns_cgroup_clone(struct task_struct *tsk) { return 0; }
+#endif
+
 #endif
diff --git a/include/linux/of.h b/include/linux/of.h
index 6df80e98591..5c39b9270ff 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -16,8 +16,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/types.h>
+#include <linux/bitops.h>
 
-#include <asm/bitops.h>
 #include <asm/prom.h>
 
 /* flag descriptions */
diff --git a/include/linux/phantom.h b/include/linux/phantom.h
index d3ebbfae690..96f4048a6cc 100644
--- a/include/linux/phantom.h
+++ b/include/linux/phantom.h
@@ -30,7 +30,11 @@ struct phm_regs {
 #define PHN_SET_REG		_IOW (PH_IOC_MAGIC, 1, struct phm_reg *)
 #define PHN_GET_REGS		_IOWR(PH_IOC_MAGIC, 2, struct phm_regs *)
 #define PHN_SET_REGS		_IOW (PH_IOC_MAGIC, 3, struct phm_regs *)
-#define PH_IOC_MAXNR		3
+/* this ioctl tells the driver, that the caller is not OpenHaptics and might
+ * use improved registers update (no more phantom switchoffs when using
+ * libphantom) */
+#define PHN_NOT_OH		_IO  (PH_IOC_MAGIC, 4)
+#define PH_IOC_MAXNR		4
 
 #define PHN_CONTROL		0x6     /* control byte in iaddr space */
 #define PHN_CTL_AMP		0x1     /*   switch after torques change */
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 1e0e4e3423a..e29a900a849 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -40,15 +40,28 @@ enum pid_type
  * processes.
  */
 
-struct pid
-{
-	atomic_t count;
+
+/*
+ * struct upid is used to get the id of the struct pid, as it is
+ * seen in particular namespace. Later the struct pid is found with
+ * find_pid_ns() using the int nr and struct pid_namespace *ns.
+ */
+
+struct upid {
 	/* Try to keep pid_chain in the same cacheline as nr for find_pid */
 	int nr;
+	struct pid_namespace *ns;
 	struct hlist_node pid_chain;
+};
+
+struct pid
+{
+	atomic_t count;
 	/* lists of tasks that use this pid */
 	struct hlist_head tasks[PIDTYPE_MAX];
 	struct rcu_head rcu;
+	int level;
+	struct upid numbers[1];
 };
 
 extern struct pid init_struct_pid;
@@ -83,26 +96,60 @@ extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
 extern void FASTCALL(transfer_pid(struct task_struct *old,
 				  struct task_struct *new, enum pid_type));
 
+struct pid_namespace;
+extern struct pid_namespace init_pid_ns;
+
 /*
  * look up a PID in the hash table. Must be called with the tasklist_lock
  * or rcu_read_lock() held.
+ *
+ * find_pid_ns() finds the pid in the namespace specified
+ * find_pid() find the pid by its global id, i.e. in the init namespace
+ * find_vpid() finr the pid by its virtual id, i.e. in the current namespace
+ *
+ * see also find_task_by_pid() set in include/linux/sched.h
  */
-extern struct pid *FASTCALL(find_pid(int nr));
+extern struct pid *FASTCALL(find_pid_ns(int nr, struct pid_namespace *ns));
+extern struct pid *find_vpid(int nr);
+extern struct pid *find_pid(int nr);
 
 /*
  * Lookup a PID in the hash table, and return with it's count elevated.
  */
 extern struct pid *find_get_pid(int nr);
-extern struct pid *find_ge_pid(int nr);
+extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 
-extern struct pid *alloc_pid(void);
+extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void FASTCALL(free_pid(struct pid *pid));
+extern void zap_pid_ns_processes(struct pid_namespace *pid_ns);
+
+/*
+ * the helpers to get the pid's id seen from different namespaces
+ *
+ * pid_nr()    : global id, i.e. the id seen from the init namespace;
+ * pid_vnr()   : virtual id, i.e. the id seen from the namespace this pid
+ *               belongs to. this only makes sence when called in the
+ *               context of the task that belongs to the same namespace;
+ * pid_nr_ns() : id seen from the ns specified.
+ *
+ * see also task_xid_nr() etc in include/linux/sched.h
+ */
 
 static inline pid_t pid_nr(struct pid *pid)
 {
 	pid_t nr = 0;
 	if (pid)
-		nr = pid->nr;
+		nr = pid->numbers[0].nr;
+	return nr;
+}
+
+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
+
+static inline pid_t pid_vnr(struct pid *pid)
+{
+	pid_t nr = 0;
+	if (pid)
+		nr = pid->numbers[pid->level].nr;
 	return nr;
 }
 
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index b9a17e08ff0..0135c76c76c 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -4,7 +4,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/threads.h>
-#include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/kref.h>
 
@@ -20,13 +19,21 @@ struct pid_namespace {
 	struct pidmap pidmap[PIDMAP_ENTRIES];
 	int last_pid;
 	struct task_struct *child_reaper;
+	struct kmem_cache *pid_cachep;
+	int level;
+	struct pid_namespace *parent;
+#ifdef CONFIG_PROC_FS
+	struct vfsmount *proc_mnt;
+#endif
 };
 
 extern struct pid_namespace init_pid_ns;
 
-static inline void get_pid_ns(struct pid_namespace *ns)
+static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
 {
-	kref_get(&ns->kref);
+	if (ns != &init_pid_ns)
+		kref_get(&ns->kref);
+	return ns;
 }
 
 extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
@@ -34,12 +41,19 @@ extern void free_pid_ns(struct kref *kref);
 
 static inline void put_pid_ns(struct pid_namespace *ns)
 {
-	kref_put(&ns->kref, free_pid_ns);
+	if (ns != &init_pid_ns)
+		kref_put(&ns->kref, free_pid_ns);
 }
 
-static inline struct task_struct *child_reaper(struct task_struct *tsk)
+static inline struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
 {
-	return init_pid_ns.child_reaper;
+	return tsk->nsproxy->pid_ns;
+}
+
+static inline struct task_struct *task_child_reaper(struct task_struct *tsk)
+{
+	BUG_ON(tsk != current);
+	return tsk->nsproxy->pid_ns->child_reaper;
 }
 
 #endif /* _LINUX_PID_NS_H */
diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
index 1adfe668d03..af7c36a5a52 100644
--- a/include/linux/prefetch.h
+++ b/include/linux/prefetch.h
@@ -34,17 +34,12 @@
 	
 */
 
-/*
- *	These cannot be do{}while(0) macros. See the mental gymnastics in
- *	the loop macro.
- */
- 
 #ifndef ARCH_HAS_PREFETCH
-static inline void prefetch(const void *x) {;}
+#define prefetch(x) __builtin_prefetch(x)
 #endif
 
 #ifndef ARCH_HAS_PREFETCHW
-static inline void prefetchw(const void *x) {;}
+#define prefetchw(x) __builtin_prefetch(x,1)
 #endif
 
 #ifndef ARCH_HAS_SPINLOCK_PREFETCH
diff --git a/include/linux/prio_heap.h b/include/linux/prio_heap.h
new file mode 100644
index 00000000000..08094350f26
--- /dev/null
+++ b/include/linux/prio_heap.h
@@ -0,0 +1,58 @@
+#ifndef _LINUX_PRIO_HEAP_H
+#define _LINUX_PRIO_HEAP_H
+
+/*
+ * Simple insertion-only static-sized priority heap containing
+ * pointers, based on CLR, chapter 7
+ */
+
+#include <linux/gfp.h>
+
+/**
+ * struct ptr_heap - simple static-sized priority heap
+ * @ptrs - pointer to data area
+ * @max - max number of elements that can be stored in @ptrs
+ * @size - current number of valid elements in @ptrs (in the range 0..@size-1
+ * @gt: comparison operator, which should implement "greater than"
+ */
+struct ptr_heap {
+	void **ptrs;
+	int max;
+	int size;
+	int (*gt)(void *, void *);
+};
+
+/**
+ * heap_init - initialize an empty heap with a given memory size
+ * @heap: the heap structure to be initialized
+ * @size: amount of memory to use in bytes
+ * @gfp_mask: mask to pass to kmalloc()
+ * @gt: comparison operator, which should implement "greater than"
+ */
+extern int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
+		     int (*gt)(void *, void *));
+
+/**
+ * heap_free - release a heap's storage
+ * @heap: the heap structure whose data should be released
+ */
+void heap_free(struct ptr_heap *heap);
+
+/**
+ * heap_insert - insert a value into the heap and return any overflowed value
+ * @heap: the heap to be operated on
+ * @p: the pointer to be inserted
+ *
+ * Attempts to insert the given value into the priority heap. If the
+ * heap is full prior to the insertion, then the resulting heap will
+ * consist of the smallest @max elements of the original heap and the
+ * new element; the greatest element will be removed from the heap and
+ * returned. Note that the returned element will be the new element
+ * (i.e. no change to the heap) if the new element is greater than all
+ * elements currently in the heap.
+ */
+extern void *heap_insert(struct ptr_heap *heap, void *p);
+
+
+
+#endif /* _LINUX_PRIO_HEAP_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 20741f668f7..1ff46167206 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -125,7 +125,8 @@ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
 
 extern struct vfsmount *proc_mnt;
-extern int proc_fill_super(struct super_block *,void *,int);
+struct pid_namespace;
+extern int proc_fill_super(struct super_block *);
 extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
 
 /*
@@ -142,6 +143,9 @@ extern const struct file_operations proc_kcore_operations;
 extern const struct file_operations proc_kmsg_operations;
 extern const struct file_operations ppc_htab_operations;
 
+extern int pid_ns_prepare_proc(struct pid_namespace *ns);
+extern void pid_ns_release_proc(struct pid_namespace *ns);
+
 /*
  * proc_tty.c
  */
@@ -207,7 +211,9 @@ extern void proc_net_remove(struct net *net, const char *name);
 #define proc_net_create(net, name, mode, info)	({ (void)(mode), NULL; })
 static inline void proc_net_remove(struct net *net, const char *name) {}
 
-static inline void proc_flush_task(struct task_struct *task) { }
+static inline void proc_flush_task(struct task_struct *task)
+{
+}
 
 static inline struct proc_dir_entry *create_proc_entry(const char *name,
 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
@@ -232,6 +238,15 @@ static inline void proc_tty_unregister_driver(struct tty_driver *driver) {};
 
 extern struct proc_dir_entry proc_root;
 
+static inline int pid_ns_prepare_proc(struct pid_namespace *ns)
+{
+	return 0;
+}
+
+static inline void pid_ns_release_proc(struct pid_namespace *ns)
+{
+}
+
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 8dcf237d338..72bfccd3da2 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -85,7 +85,7 @@ void reiserfs_warning(struct super_block *s, const char *fmt, ...);
 if( !( cond ) ) 								\
   reiserfs_panic( NULL, "reiserfs[%i]: assertion " scond " failed at "	\
 		  __FILE__ ":%i:%s: " format "\n",		\
-		  in_interrupt() ? -1 : current -> pid, __LINE__ , __FUNCTION__ , ##args )
+		  in_interrupt() ? -1 : task_pid_nr(current), __LINE__ , __FUNCTION__ , ##args )
 
 #define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args)
 
@@ -283,6 +283,18 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
 	return sb->s_fs_info;
 }
 
+/* Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16
+ * which overflows on large file systems. */
+static inline u32 reiserfs_bmap_count(struct super_block *sb)
+{
+	return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1;
+}
+
+static inline int bmap_would_wrap(unsigned bmap_nr)
+{
+	return bmap_nr > ((1LL << 16) - 1);
+}
+
 /** this says about version of key of all items (but stat data) the
     object consists of */
 #define get_inode_item_key_version( inode )                                    \
@@ -1734,8 +1746,8 @@ int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *,
 int journal_mark_freed(struct reiserfs_transaction_handle *,
 		       struct super_block *, b_blocknr_t blocknr);
 int journal_transaction_should_end(struct reiserfs_transaction_handle *, int);
-int reiserfs_in_journal(struct super_block *p_s_sb, int bmap_nr, int bit_nr,
-			int searchall, b_blocknr_t * next);
+int reiserfs_in_journal(struct super_block *p_s_sb, unsigned int bmap_nr,
+			int bit_nr, int searchall, b_blocknr_t *next);
 int journal_begin(struct reiserfs_transaction_handle *,
 		  struct super_block *p_s_sb, unsigned long);
 int journal_join_abort(struct reiserfs_transaction_handle *,
@@ -1743,7 +1755,7 @@ int journal_join_abort(struct reiserfs_transaction_handle *,
 void reiserfs_journal_abort(struct super_block *sb, int errno);
 void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...);
 int reiserfs_allocate_list_bitmaps(struct super_block *s,
-				   struct reiserfs_list_bitmap *, int);
+				   struct reiserfs_list_bitmap *, unsigned int);
 
 void add_save_link(struct reiserfs_transaction_handle *th,
 		   struct inode *inode, int truncate);
@@ -2041,7 +2053,7 @@ struct buffer_head *get_FEB(struct tree_balance *);
  * arguments, such as node, search path, transaction_handle, etc. */
 struct __reiserfs_blocknr_hint {
 	struct inode *inode;	/* inode passed to allocator, if we allocate unf. nodes */
-	long block;		/* file offset, in blocks */
+	sector_t block;		/* file offset, in blocks */
 	struct in_core_key key;
 	struct treepath *path;	/* search path, used by allocator to deternine search_start by
 				 * various ways */
@@ -2099,7 +2111,8 @@ static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb,
 static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
 					    *th, struct inode *inode,
 					    b_blocknr_t * new_blocknrs,
-					    struct treepath *path, long block)
+					    struct treepath *path,
+					    sector_t block)
 {
 	reiserfs_blocknr_hint_t hint = {
 		.th = th,
@@ -2116,7 +2129,8 @@ static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle
 static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle
 					     *th, struct inode *inode,
 					     b_blocknr_t * new_blocknrs,
-					     struct treepath *path, long block)
+					     struct treepath *path,
+					     sector_t block)
 {
 	reiserfs_blocknr_hint_t hint = {
 		.th = th,
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index ff9e9234f8b..10fa0c83201 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -265,9 +265,7 @@ enum journal_state_bits {
 typedef __u32(*hashf_t) (const signed char *, int);
 
 struct reiserfs_bitmap_info {
-	// FIXME: Won't work with block sizes > 8K
-	__u16 first_zero_hint;
-	__u16 free_count;
+	__u32 free_count;
 };
 
 struct proc_dir_entry;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 10a83d8d577..13df99fb276 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,6 +25,7 @@
 #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
 #define CLONE_NEWIPC		0x08000000	/* New ipcs */
 #define CLONE_NEWUSER		0x10000000	/* New user namespace */
+#define CLONE_NEWPID		0x20000000	/* New pid namespace */
 #define CLONE_NEWNET		0x40000000	/* New network namespace */
 
 /*
@@ -428,7 +429,17 @@ struct signal_struct {
 	cputime_t it_prof_incr, it_virt_incr;
 
 	/* job control IDs */
-	pid_t pgrp;
+
+	/*
+	 * pgrp and session fields are deprecated.
+	 * use the task_session_Xnr and task_pgrp_Xnr routines below
+	 */
+
+	union {
+		pid_t pgrp __deprecated;
+		pid_t __pgrp;
+	};
+
 	struct pid *tty_old_pgrp;
 
 	union {
@@ -736,6 +747,8 @@ struct sched_domain {
 #endif
 };
 
+extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new);
+
 #endif	/* CONFIG_SMP */
 
 /*
@@ -756,8 +769,6 @@ static inline int above_background_load(void)
 }
 
 struct io_context;			/* See blkdev.h */
-struct cpuset;
-
 #define NGROUPS_SMALL		32
 #define NGROUPS_PER_BLOCK	((int)(PAGE_SIZE / sizeof(gid_t)))
 struct group_info {
@@ -1125,11 +1136,16 @@ struct task_struct {
 	short il_next;
 #endif
 #ifdef CONFIG_CPUSETS
-	struct cpuset *cpuset;
 	nodemask_t mems_allowed;
 	int cpuset_mems_generation;
 	int cpuset_mem_spread_rotor;
 #endif
+#ifdef CONFIG_CGROUPS
+	/* Control Group info protected by css_set_lock */
+	struct css_set *cgroups;
+	/* cg_list protected by css_set_lock and tsk->alloc_lock */
+	struct list_head cg_list;
+#endif
 #ifdef CONFIG_FUTEX
 	struct robust_list_head __user *robust_list;
 #ifdef CONFIG_COMPAT
@@ -1185,24 +1201,14 @@ static inline int rt_task(struct task_struct *p)
 	return rt_prio(p->prio);
 }
 
-static inline pid_t process_group(struct task_struct *tsk)
+static inline void set_task_session(struct task_struct *tsk, pid_t session)
 {
-	return tsk->signal->pgrp;
+	tsk->signal->__session = session;
 }
 
-static inline pid_t signal_session(struct signal_struct *sig)
+static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp)
 {
-	return sig->__session;
-}
-
-static inline pid_t process_session(struct task_struct *tsk)
-{
-	return signal_session(tsk->signal);
-}
-
-static inline void set_signal_session(struct signal_struct *sig, pid_t session)
-{
-	sig->__session = session;
+	tsk->signal->__pgrp = pgrp;
 }
 
 static inline struct pid *task_pid(struct task_struct *task)
@@ -1225,6 +1231,88 @@ static inline struct pid *task_session(struct task_struct *task)
 	return task->group_leader->pids[PIDTYPE_SID].pid;
 }
 
+struct pid_namespace;
+
+/*
+ * the helpers to get the task's different pids as they are seen
+ * from various namespaces
+ *
+ * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
+ * task_xid_vnr()    : virtual id, i.e. the id seen from the namespace the task
+ *                     belongs to. this only makes sence when called in the
+ *                     context of the task that belongs to the same namespace;
+ * task_xid_nr_ns()  : id seen from the ns specified;
+ *
+ * set_task_vxid()   : assigns a virtual id to a task;
+ *
+ * task_ppid_nr_ns() : the parent's id as seen from the namespace specified.
+ *                     the result depends on the namespace and whether the
+ *                     task in question is the namespace's init. e.g. for the
+ *                     namespace's init this will return 0 when called from
+ *                     the namespace of this init, or appropriate id otherwise.
+ *
+ *
+ * see also pid_nr() etc in include/linux/pid.h
+ */
+
+static inline pid_t task_pid_nr(struct task_struct *tsk)
+{
+	return tsk->pid;
+}
+
+pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_pid_vnr(struct task_struct *tsk)
+{
+	return pid_vnr(task_pid(tsk));
+}
+
+
+static inline pid_t task_tgid_nr(struct task_struct *tsk)
+{
+	return tsk->tgid;
+}
+
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_tgid_vnr(struct task_struct *tsk)
+{
+	return pid_vnr(task_tgid(tsk));
+}
+
+
+static inline pid_t task_pgrp_nr(struct task_struct *tsk)
+{
+	return tsk->signal->__pgrp;
+}
+
+pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
+{
+	return pid_vnr(task_pgrp(tsk));
+}
+
+
+static inline pid_t task_session_nr(struct task_struct *tsk)
+{
+	return tsk->signal->__session;
+}
+
+pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+
+static inline pid_t task_session_vnr(struct task_struct *tsk)
+{
+	return pid_vnr(task_session(tsk));
+}
+
+
+static inline pid_t task_ppid_nr_ns(struct task_struct *tsk,
+		struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_pid(rcu_dereference(tsk->real_parent)), ns);
+}
+
 /**
  * pid_alive - check that a task structure is not stale
  * @p: Task structure to be checked.
@@ -1239,16 +1327,22 @@ static inline int pid_alive(struct task_struct *p)
 }
 
 /**
- * is_init - check if a task structure is init
+ * is_global_init - check if a task structure is init
  * @tsk: Task structure to be checked.
  *
  * Check if a task structure is the first user space task the kernel created.
  */
-static inline int is_init(struct task_struct *tsk)
+static inline int is_global_init(struct task_struct *tsk)
 {
 	return tsk->pid == 1;
 }
 
+/*
+ * is_container_init:
+ * check whether in the task is init in its own pid namespace.
+ */
+extern int is_container_init(struct task_struct *tsk);
+
 extern struct pid *cad_pid;
 
 extern void free_task(struct task_struct *tsk);
@@ -1420,8 +1514,32 @@ extern struct task_struct init_task;
 
 extern struct   mm_struct init_mm;
 
-#define find_task_by_pid(nr)	find_task_by_pid_type(PIDTYPE_PID, nr)
-extern struct task_struct *find_task_by_pid_type(int type, int pid);
+extern struct pid_namespace init_pid_ns;
+
+/*
+ * find a task by one of its numerical ids
+ *
+ * find_task_by_pid_type_ns():
+ *      it is the most generic call - it finds a task by all id,
+ *      type and namespace specified
+ * find_task_by_pid_ns():
+ *      finds a task by its pid in the specified namespace
+ * find_task_by_vpid():
+ *      finds a task by its virtual pid
+ * find_task_by_pid():
+ *      finds a task by its global pid
+ *
+ * see also find_pid() etc in include/linux/pid.h
+ */
+
+extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
+		struct pid_namespace *ns);
+
+extern struct task_struct *find_task_by_pid(pid_t nr);
+extern struct task_struct *find_task_by_vpid(pid_t nr);
+extern struct task_struct *find_task_by_pid_ns(pid_t nr,
+		struct pid_namespace *ns);
+
 extern void __set_special_pids(pid_t session, pid_t pgrp);
 
 /* per-UID process charging. */
@@ -1608,6 +1726,12 @@ static inline int has_group_leader_pid(struct task_struct *p)
 	return p->pid == p->tgid;
 }
 
+static inline
+int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+{
+	return p1->tgid == p2->tgid;
+}
+
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
 	return list_entry(rcu_dereference(p->thread_group.next),
@@ -1625,7 +1749,8 @@ static inline int thread_group_empty(struct task_struct *p)
 /*
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
- * pins the final release of task.io_context.  Also protects ->cpuset.
+ * pins the final release of task.io_context.  Also protects ->cpuset and
+ * ->cgroup.subsys[].
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/include/linux/sem.h b/include/linux/sem.h
index 9aaffb0b1d8..c8eaad9e4b7 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -90,7 +90,6 @@ struct sem {
 /* One sem_array data structure for each set of semaphores in the system. */
 struct sem_array {
 	struct kern_ipc_perm	sem_perm;	/* permissions .. see ipc.h */
-	int			sem_id;
 	time_t			sem_otime;	/* last semop time */
 	time_t			sem_ctime;	/* last change time */
 	struct sem		*sem_base;	/* ptr to first semaphore in array */
diff --git a/include/linux/shm.h b/include/linux/shm.h
index bea65d9c93e..eeaed921a1d 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -79,7 +79,6 @@ struct shmid_kernel /* private to the kernel */
 {	
 	struct kern_ipc_perm	shm_perm;
 	struct file *		shm_file;
-	int			id;
 	unsigned long		shm_nattch;
 	unsigned long		shm_segsz;
 	time_t			shm_atim;
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9a7252e089b..f4a1395e05f 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -40,6 +40,7 @@ enum tick_nohz_mode {
  * @idle_sleeps:	Number of idle calls, where the sched tick was stopped
  * @idle_entrytime:	Time when the idle call was entered
  * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
+ * @sleep_length:	Duration of the current idle sleep
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
@@ -52,6 +53,7 @@ struct tick_sched {
 	unsigned long			idle_sleeps;
 	ktime_t				idle_entrytime;
 	ktime_t				idle_sleeptime;
+	ktime_t				sleep_length;
 	unsigned long			last_jiffies;
 	unsigned long			next_jiffies;
 	ktime_t				idle_expires;
@@ -100,10 +102,17 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 extern void tick_nohz_stop_sched_tick(void);
 extern void tick_nohz_restart_sched_tick(void);
 extern void tick_nohz_update_jiffies(void);
+extern ktime_t tick_nohz_get_sleep_length(void);
 # else
 static inline void tick_nohz_stop_sched_tick(void) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
 static inline void tick_nohz_update_jiffies(void) { }
+static inline ktime_t tick_nohz_get_sleep_length(void)
+{
+	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
+
+	return len;
+}
 # endif /* !NO_HZ */
 
 #endif
diff --git a/include/linux/types.h b/include/linux/types.h
index 0351bf2fac8..4f0dad21c91 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -3,12 +3,9 @@
 
 #ifdef	__KERNEL__
 
-#define BITS_TO_LONGS(bits) \
-	(((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
 
-#define BITS_PER_BYTE 8
 #endif
 
 #include <linux/posix_types.h>
diff --git a/include/linux/uinput.h b/include/linux/uinput.h
index a6c1e8eed22..15ddd4483b0 100644
--- a/include/linux/uinput.h
+++ b/include/linux/uinput.h
@@ -162,10 +162,6 @@ struct uinput_ff_erase {
 #define UI_FF_UPLOAD		1
 #define UI_FF_ERASE		2
 
-#ifndef NBITS
-#define NBITS(x) ((((x)-1)/(sizeof(long)*8))+1)
-#endif	/* NBITS */
-
 #define UINPUT_MAX_NAME_SIZE	80
 struct uinput_user_dev {
 	char name[UINPUT_MAX_NAME_SIZE];
diff --git a/include/linux/vt.h b/include/linux/vt.h
index ba806e8711b..02c1c028877 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -1,6 +1,18 @@
 #ifndef _LINUX_VT_H
 #define _LINUX_VT_H
 
+#ifdef __KERNEL__
+struct notifier_block;
+
+struct vt_notifier_param {
+	struct vc_data *vc;	/* VC on which the update happened */
+	unsigned int c;		/* Printed char */
+};
+
+extern int register_vt_notifier(struct notifier_block *nb);
+extern int unregister_vt_notifier(struct notifier_block *nb);
+#endif
+
 /*
  * These constants are also useful for user-level apps (e.g., VC
  * resizing).
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ce6badc98f6..7daafdc2514 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <linux/lockdep.h>
 #include <asm/atomic.h>
 
 struct workqueue_struct;
@@ -28,6 +29,9 @@ struct work_struct {
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
@@ -41,10 +45,23 @@ struct execute_work {
 	struct work_struct work;
 };
 
+#ifdef CONFIG_LOCKDEP
+/*
+ * NB: because we have to copy the lockdep_map, setting _key
+ * here is required, otherwise it could get initialised to the
+ * copy of the lockdep_map!
+ */
+#define __WORK_INIT_LOCKDEP_MAP(n, k) \
+	.lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
+#else
+#define __WORK_INIT_LOCKDEP_MAP(n, k)
+#endif
+
 #define __WORK_INITIALIZER(n, f) {				\
 	.data = WORK_DATA_INIT(),				\
 	.entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
+	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
 	}
 
 #define __DELAYED_WORK_INITIALIZER(n, f) {			\
@@ -76,12 +93,24 @@ struct execute_work {
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
+#ifdef CONFIG_LOCKDEP
+#define INIT_WORK(_work, _func)						\
+	do {								\
+		static struct lock_class_key __key;			\
+									\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
+		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
+		INIT_LIST_HEAD(&(_work)->entry);			\
+		PREPARE_WORK((_work), (_func));				\
+	} while (0)
+#else
 #define INIT_WORK(_work, _func)						\
 	do {								\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
+#endif
 
 #define INIT_DELAYED_WORK(_work, _func)				\
 	do {							\
@@ -118,9 +147,23 @@ struct execute_work {
 	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
-extern struct workqueue_struct *__create_workqueue(const char *name,
-						    int singlethread,
-						    int freezeable);
+extern struct workqueue_struct *
+__create_workqueue_key(const char *name, int singlethread,
+		       int freezeable, struct lock_class_key *key);
+
+#ifdef CONFIG_LOCKDEP
+#define __create_workqueue(name, singlethread, freezeable)	\
+({								\
+	static struct lock_class_key __key;			\
+								\
+	__create_workqueue_key((name), (singlethread),		\
+			       (freezeable), &__key);		\
+})
+#else
+#define __create_workqueue(name, singlethread, freezeable)	\
+	__create_workqueue_key((name), (singlethread), (freezeable), NULL)
+#endif
+
 #define create_workqueue(name) __create_workqueue((name), 0, 0)
 #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
 #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 686425a97b0..625346c47ee 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -44,7 +44,7 @@ extern unsigned int p9_debug_level;
 do {  \
 	if ((p9_debug_level & level) == level) \
 		printk(KERN_NOTICE "-- %s (%d): " \
-		format , __FUNCTION__, current->pid , ## arg); \
+		format , __FUNCTION__, task_pid_nr(current) , ## arg); \
 } while (0)
 
 #define PRINT_FCALL_ERROR(s, fcall) P9_DPRINTK(P9_DEBUG_ERROR,   \
@@ -59,7 +59,7 @@ do {  \
 #define P9_EPRINTK(level, format, arg...) \
 do { \
 	printk(level "9p: %s (%d): " \
-		format , __FUNCTION__, current->pid , ## arg); \
+		format , __FUNCTION__, task_pid_nr(current), ## arg); \
 } while (0)
 
 
diff --git a/include/net/scm.h b/include/net/scm.h
index 423cb1d5ac2..06df126103c 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -4,6 +4,8 @@
 #include <linux/limits.h>
 #include <linux/net.h>
 #include <linux/security.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
 
 /* Well, we should have at least one descriptor open
  * to accept passed FDs 8)
@@ -54,7 +56,7 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
 	struct task_struct *p = current;
 	scm->creds.uid = p->uid;
 	scm->creds.gid = p->gid;
-	scm->creds.pid = p->tgid;
+	scm->creds.pid = task_tgid_vnr(p);
 	scm->fp = NULL;
 	scm->seq = 0;
 	unix_get_peersec_dgram(sock, scm);
diff --git a/include/video/sstfb.h b/include/video/sstfb.h
index baa163f770a..b52f0738124 100644
--- a/include/video/sstfb.h
+++ b/include/video/sstfb.h
@@ -68,7 +68,6 @@
 #  define print_var(X,Y...)
 #endif
 
-#define BIT(x)		(1ul<<(x))
 #define POW2(x)		(1ul<<(x))
 
 /*
diff --git a/include/video/tdfx.h b/include/video/tdfx.h
index 05b63c2a5ab..7431d9681e5 100644
--- a/include/video/tdfx.h
+++ b/include/video/tdfx.h
@@ -79,8 +79,6 @@
 
 /* register bitfields (not all, only as needed) */
 
-#define BIT(x)	(1UL << (x))
-
 /* COMMAND_2D reg. values */
 #define TDFX_ROP_COPY		0xcc	/* src */
 #define TDFX_ROP_INVERT		0x55	/* NOT dst */
diff --git a/init/Kconfig b/init/Kconfig
index a29a688c47d..541382d539a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -270,9 +270,43 @@ config LOG_BUF_SHIFT
 		     13 =>  8 KB
 		     12 =>  4 KB
 
+config CGROUPS
+	bool "Control Group support"
+	help
+	  This option will let you use process cgroup subsystems
+	  such as Cpusets
+
+	  Say N if unsure.
+
+config CGROUP_DEBUG
+	bool "Example debug cgroup subsystem"
+	depends on CGROUPS
+	help
+	  This option enables a simple cgroup subsystem that
+	  exports useful debugging information about the cgroups
+	  framework
+
+	  Say N if unsure
+
+config CGROUP_NS
+        bool "Namespace cgroup subsystem"
+        depends on CGROUPS
+        help
+          Provides a simple namespace cgroup subsystem to
+          provide hierarchical naming of sets of namespaces,
+          for instance virtual servers and checkpoint/restart
+          jobs.
+
+config CGROUP_CPUACCT
+	bool "Simple CPU accounting cgroup subsystem"
+	depends on CGROUPS
+	help
+	  Provides a simple Resource Controller for monitoring the
+	  total CPU consumed by the tasks in a cgroup
+
 config CPUSETS
 	bool "Cpuset support"
-	depends on SMP
+	depends on SMP && CGROUPS
 	help
 	  This option will let you create and manage CPUSETs which
 	  allow dynamically partitioning a system into sets of CPUs and
@@ -300,6 +334,16 @@ config FAIR_USER_SCHED
 	  This option will choose userid as the basis for grouping
 	  tasks, thus providing equal CPU bandwidth to each user.
 
+config FAIR_CGROUP_SCHED
+	bool "Control groups"
+ 	depends on CGROUPS
+ 	help
+	  This option allows you to create arbitrary task groups
+	  using the "cgroup" pseudo filesystem and control
+	  the cpu bandwidth allocated to each such task group.
+	  Refer to Documentation/cgroups.txt for more information
+	  on "cgroup" pseudo filesystem.
+
 endchoice
 
 config SYSFS_DEPRECATED
@@ -322,6 +366,11 @@ config SYSFS_DEPRECATED
 	  If you are using a distro that was released in 2006 or later,
 	  it should be safe to say N here.
 
+config PROC_PID_CPUSET
+	bool "Include legacy /proc/<pid>/cpuset file"
+	depends on CPUSETS
+	default y
+
 config RELAY
 	bool "Kernel->user space relay support (formerly relayfs)"
 	help
diff --git a/init/main.c b/init/main.c
index 9def935ab13..0dd0e7a1f63 100644
--- a/init/main.c
+++ b/init/main.c
@@ -39,6 +39,7 @@
 #include <linux/writeback.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/efi.h>
 #include <linux/tick.h>
 #include <linux/interrupt.h>
@@ -523,6 +524,7 @@ asmlinkage void __init start_kernel(void)
 	 */
 	unwind_init();
 	lockdep_init();
+	cgroup_init_early();
 
 	local_irq_disable();
 	early_boot_irqs_off();
@@ -640,6 +642,7 @@ asmlinkage void __init start_kernel(void)
 #ifdef CONFIG_PROC_FS
 	proc_root_init();
 #endif
+	cgroup_init();
 	cpuset_init();
 	taskstats_init_early();
 	delayacct_init();
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 20f1fed8fa4..c0b26dc4617 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -29,6 +29,8 @@
 #include <linux/audit.h>
 #include <linux/signal.h>
 #include <linux/mutex.h>
+#include <linux/nsproxy.h>
+#include <linux/pid.h>
 
 #include <net/sock.h>
 #include "util.h"
@@ -330,7 +332,8 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
 			(info->notify_owner &&
 			 info->notify.sigev_notify == SIGEV_SIGNAL) ?
 				info->notify.sigev_signo : 0,
-			pid_nr(info->notify_owner));
+			pid_nr_ns(info->notify_owner,
+				current->nsproxy->pid_ns));
 	spin_unlock(&info->lock);
 	buffer[sizeof(buffer)-1] = '\0';
 	slen = strlen(buffer)+1;
@@ -507,7 +510,7 @@ static void __do_notify(struct mqueue_inode_info *info)
 			sig_i.si_errno = 0;
 			sig_i.si_code = SI_MESGQ;
 			sig_i.si_value = info->notify.sigev_value;
-			sig_i.si_pid = current->tgid;
+			sig_i.si_pid = task_pid_vnr(current);
 			sig_i.si_uid = current->uid;
 
 			kill_pid_info(info->notify.sigev_signo,
diff --git a/ipc/msg.c b/ipc/msg.c
index a03fcb522ff..fdf3db5731c 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -34,7 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/audit.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 
 #include <asm/current.h>
@@ -66,23 +66,15 @@ struct msg_sender {
 #define SEARCH_NOTEQUAL		3
 #define SEARCH_LESSEQUAL	4
 
-static atomic_t msg_bytes =	ATOMIC_INIT(0);
-static atomic_t msg_hdrs =	ATOMIC_INIT(0);
-
 static struct ipc_ids init_msg_ids;
 
 #define msg_ids(ns)	(*((ns)->ids[IPC_MSG_IDS]))
 
-#define msg_lock(ns, id)	((struct msg_queue*)ipc_lock(&msg_ids(ns), id))
 #define msg_unlock(msq)		ipc_unlock(&(msq)->q_perm)
-#define msg_rmid(ns, id)	((struct msg_queue*)ipc_rmid(&msg_ids(ns), id))
-#define msg_checkid(ns, msq, msgid)	\
-	ipc_checkid(&msg_ids(ns), &msq->q_perm, msgid)
-#define msg_buildid(ns, id, seq) \
-	ipc_buildid(&msg_ids(ns), id, seq)
-
-static void freeque (struct ipc_namespace *ns, struct msg_queue *msq, int id);
-static int newque (struct ipc_namespace *ns, key_t key, int msgflg);
+#define msg_buildid(id, seq)	ipc_buildid(id, seq)
+
+static void freeque(struct ipc_namespace *, struct msg_queue *);
+static int newque(struct ipc_namespace *, struct ipc_params *);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
 #endif
@@ -93,7 +85,9 @@ static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
 	ns->msg_ctlmni = MSGMNI;
-	ipc_init_ids(ids, ns->msg_ctlmni);
+	atomic_set(&ns->msg_bytes, 0);
+	atomic_set(&ns->msg_hdrs, 0);
+	ipc_init_ids(ids);
 }
 
 int msg_init_ns(struct ipc_namespace *ns)
@@ -110,20 +104,25 @@ int msg_init_ns(struct ipc_namespace *ns)
 
 void msg_exit_ns(struct ipc_namespace *ns)
 {
-	int i;
 	struct msg_queue *msq;
+	int next_id;
+	int total, in_use;
+
+	down_write(&msg_ids(ns).rw_mutex);
+
+	in_use = msg_ids(ns).in_use;
 
-	mutex_lock(&msg_ids(ns).mutex);
-	for (i = 0; i <= msg_ids(ns).max_id; i++) {
-		msq = msg_lock(ns, i);
+	for (total = 0, next_id = 0; total < in_use; next_id++) {
+		msq = idr_find(&msg_ids(ns).ipcs_idr, next_id);
 		if (msq == NULL)
 			continue;
-
-		freeque(ns, msq, i);
+		ipc_lock_by_ptr(&msq->q_perm);
+		freeque(ns, msq);
+		total++;
 	}
-	mutex_unlock(&msg_ids(ns).mutex);
 
-	ipc_fini_ids(ns->ids[IPC_MSG_IDS]);
+	up_write(&msg_ids(ns).rw_mutex);
+
 	kfree(ns->ids[IPC_MSG_IDS]);
 	ns->ids[IPC_MSG_IDS] = NULL;
 }
@@ -136,10 +135,55 @@ void __init msg_init(void)
 				IPC_MSG_IDS, sysvipc_msg_proc_show);
 }
 
-static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
+/*
+ * This routine is called in the paths where the rw_mutex is held to protect
+ * access to the idr tree.
+ */
+static inline struct msg_queue *msg_lock_check_down(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check_down(&msg_ids(ns), id);
+
+	return container_of(ipcp, struct msg_queue, q_perm);
+}
+
+/*
+ * msg_lock_(check_) routines are called in the paths where the rw_mutex
+ * is not held.
+ */
+static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id);
+
+	return container_of(ipcp, struct msg_queue, q_perm);
+}
+
+static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id);
+
+	return container_of(ipcp, struct msg_queue, q_perm);
+}
+
+static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
+{
+	ipc_rmid(&msg_ids(ns), &s->q_perm);
+}
+
+/**
+ * newque - Create a new msg queue
+ * @ns: namespace
+ * @params: ptr to the structure that contains the key and msgflg
+ *
+ * Called with msg_ids.rw_mutex held (writer)
+ */
+static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 {
 	struct msg_queue *msq;
 	int id, retval;
+	key_t key = params->key;
+	int msgflg = params->flg;
 
 	msq = ipc_rcu_alloc(sizeof(*msq));
 	if (!msq)
@@ -155,14 +199,17 @@ static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
 		return retval;
 	}
 
+	/*
+	 * ipc_addid() locks msq
+	 */
 	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
-	if (id == -1) {
+	if (id < 0) {
 		security_msg_queue_free(msq);
 		ipc_rcu_putref(msq);
-		return -ENOSPC;
+		return id;
 	}
 
-	msq->q_id = msg_buildid(ns, id, msq->q_perm.seq);
+	msq->q_perm.id = msg_buildid(id, msq->q_perm.seq);
 	msq->q_stime = msq->q_rtime = 0;
 	msq->q_ctime = get_seconds();
 	msq->q_cbytes = msq->q_qnum = 0;
@@ -171,9 +218,10 @@ static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
 	INIT_LIST_HEAD(&msq->q_messages);
 	INIT_LIST_HEAD(&msq->q_receivers);
 	INIT_LIST_HEAD(&msq->q_senders);
+
 	msg_unlock(msq);
 
-	return msq->q_id;
+	return msq->q_perm.id;
 }
 
 static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
@@ -224,19 +272,19 @@ static void expunge_all(struct msg_queue *msq, int res)
 
 /*
  * freeque() wakes up waiters on the sender and receiver waiting queue,
- * removes the message queue from message queue ID
- * array, and cleans up all the messages associated with this queue.
+ * removes the message queue from message queue ID IDR, and cleans up all the
+ * messages associated with this queue.
  *
- * msg_ids.mutex and the spinlock for this message queue is hold
- * before freeque() is called. msg_ids.mutex remains locked on exit.
+ * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
+ * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
  */
-static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
+static void freeque(struct ipc_namespace *ns, struct msg_queue *msq)
 {
 	struct list_head *tmp;
 
 	expunge_all(msq, -EIDRM);
 	ss_wakeup(&msq->q_senders, 1);
-	msq = msg_rmid(ns, id);
+	msg_rmid(ns, msq);
 	msg_unlock(msq);
 
 	tmp = msq->q_messages.next;
@@ -244,49 +292,40 @@ static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
 		struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
 
 		tmp = tmp->next;
-		atomic_dec(&msg_hdrs);
+		atomic_dec(&ns->msg_hdrs);
 		free_msg(msg);
 	}
-	atomic_sub(msq->q_cbytes, &msg_bytes);
+	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
 	security_msg_queue_free(msq);
 	ipc_rcu_putref(msq);
 }
 
+/*
+ * Called with msg_ids.rw_mutex and ipcp locked.
+ */
+static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
+{
+	struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
+
+	return security_msg_queue_associate(msq, msgflg);
+}
+
 asmlinkage long sys_msgget(key_t key, int msgflg)
 {
-	struct msg_queue *msq;
-	int id, ret = -EPERM;
 	struct ipc_namespace *ns;
+	struct ipc_ops msg_ops;
+	struct ipc_params msg_params;
 
 	ns = current->nsproxy->ipc_ns;
-	
-	mutex_lock(&msg_ids(ns).mutex);
-	if (key == IPC_PRIVATE) 
-		ret = newque(ns, key, msgflg);
-	else if ((id = ipc_findkey(&msg_ids(ns), key)) == -1) { /* key not used */
-		if (!(msgflg & IPC_CREAT))
-			ret = -ENOENT;
-		else
-			ret = newque(ns, key, msgflg);
-	} else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
-		ret = -EEXIST;
-	} else {
-		msq = msg_lock(ns, id);
-		BUG_ON(msq == NULL);
-		if (ipcperms(&msq->q_perm, msgflg))
-			ret = -EACCES;
-		else {
-			int qid = msg_buildid(ns, id, msq->q_perm.seq);
-
-			ret = security_msg_queue_associate(msq, msgflg);
-			if (!ret)
-				ret = qid;
-		}
-		msg_unlock(msq);
-	}
-	mutex_unlock(&msg_ids(ns).mutex);
 
-	return ret;
+	msg_ops.getnew = newque;
+	msg_ops.associate = msg_security;
+	msg_ops.more_checks = NULL;
+
+	msg_params.key = key;
+	msg_params.flg = msgflg;
+
+	return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
 }
 
 static inline unsigned long
@@ -420,23 +459,23 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 		msginfo.msgmnb = ns->msg_ctlmnb;
 		msginfo.msgssz = MSGSSZ;
 		msginfo.msgseg = MSGSEG;
-		mutex_lock(&msg_ids(ns).mutex);
+		down_read(&msg_ids(ns).rw_mutex);
 		if (cmd == MSG_INFO) {
 			msginfo.msgpool = msg_ids(ns).in_use;
-			msginfo.msgmap = atomic_read(&msg_hdrs);
-			msginfo.msgtql = atomic_read(&msg_bytes);
+			msginfo.msgmap = atomic_read(&ns->msg_hdrs);
+			msginfo.msgtql = atomic_read(&ns->msg_bytes);
 		} else {
 			msginfo.msgmap = MSGMAP;
 			msginfo.msgpool = MSGPOOL;
 			msginfo.msgtql = MSGTQL;
 		}
-		max_id = msg_ids(ns).max_id;
-		mutex_unlock(&msg_ids(ns).mutex);
+		max_id = ipc_get_maxid(&msg_ids(ns));
+		up_read(&msg_ids(ns).rw_mutex);
 		if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
 			return -EFAULT;
 		return (max_id < 0) ? 0 : max_id;
 	}
-	case MSG_STAT:
+	case MSG_STAT:	/* msqid is an index rather than a msg queue id */
 	case IPC_STAT:
 	{
 		struct msqid64_ds tbuf;
@@ -444,21 +483,16 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 
 		if (!buf)
 			return -EFAULT;
-		if (cmd == MSG_STAT && msqid >= msg_ids(ns).entries->size)
-			return -EINVAL;
-
-		memset(&tbuf, 0, sizeof(tbuf));
-
-		msq = msg_lock(ns, msqid);
-		if (msq == NULL)
-			return -EINVAL;
 
 		if (cmd == MSG_STAT) {
-			success_return = msg_buildid(ns, msqid, msq->q_perm.seq);
+			msq = msg_lock(ns, msqid);
+			if (IS_ERR(msq))
+				return PTR_ERR(msq);
+			success_return = msq->q_perm.id;
 		} else {
-			err = -EIDRM;
-			if (msg_checkid(ns, msq, msqid))
-				goto out_unlock;
+			msq = msg_lock_check(ns, msqid);
+			if (IS_ERR(msq))
+				return PTR_ERR(msq);
 			success_return = 0;
 		}
 		err = -EACCES;
@@ -469,6 +503,8 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 		if (err)
 			goto out_unlock;
 
+		memset(&tbuf, 0, sizeof(tbuf));
+
 		kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm);
 		tbuf.msg_stime  = msq->q_stime;
 		tbuf.msg_rtime  = msq->q_rtime;
@@ -495,15 +531,13 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 		return  -EINVAL;
 	}
 
-	mutex_lock(&msg_ids(ns).mutex);
-	msq = msg_lock(ns, msqid);
-	err = -EINVAL;
-	if (msq == NULL)
+	down_write(&msg_ids(ns).rw_mutex);
+	msq = msg_lock_check_down(ns, msqid);
+	if (IS_ERR(msq)) {
+		err = PTR_ERR(msq);
 		goto out_up;
+	}
 
-	err = -EIDRM;
-	if (msg_checkid(ns, msq, msqid))
-		goto out_unlock_up;
 	ipcp = &msq->q_perm;
 
 	err = audit_ipc_obj(ipcp);
@@ -552,12 +586,12 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 		break;
 	}
 	case IPC_RMID:
-		freeque(ns, msq, msqid);
+		freeque(ns, msq);
 		break;
 	}
 	err = 0;
 out_up:
-	mutex_unlock(&msg_ids(ns).mutex);
+	up_write(&msg_ids(ns).rw_mutex);
 	return err;
 out_unlock_up:
 	msg_unlock(msq);
@@ -611,7 +645,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
 				msr->r_msg = ERR_PTR(-E2BIG);
 			} else {
 				msr->r_msg = NULL;
-				msq->q_lrpid = msr->r_tsk->pid;
+				msq->q_lrpid = task_pid_vnr(msr->r_tsk);
 				msq->q_rtime = get_seconds();
 				wake_up_process(msr->r_tsk);
 				smp_mb();
@@ -646,14 +680,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 	msg->m_type = mtype;
 	msg->m_ts = msgsz;
 
-	msq = msg_lock(ns, msqid);
-	err = -EINVAL;
-	if (msq == NULL)
+	msq = msg_lock_check(ns, msqid);
+	if (IS_ERR(msq)) {
+		err = PTR_ERR(msq);
 		goto out_free;
-
-	err= -EIDRM;
-	if (msg_checkid(ns, msq, msqid))
-		goto out_unlock_free;
+	}
 
 	for (;;) {
 		struct msg_sender s;
@@ -695,7 +726,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		}
 	}
 
-	msq->q_lspid = current->tgid;
+	msq->q_lspid = task_tgid_vnr(current);
 	msq->q_stime = get_seconds();
 
 	if (!pipelined_send(msq, msg)) {
@@ -703,8 +734,8 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		list_add_tail(&msg->m_list, &msq->q_messages);
 		msq->q_cbytes += msgsz;
 		msq->q_qnum++;
-		atomic_add(msgsz, &msg_bytes);
-		atomic_inc(&msg_hdrs);
+		atomic_add(msgsz, &ns->msg_bytes);
+		atomic_inc(&ns->msg_hdrs);
 	}
 
 	err = 0;
@@ -760,13 +791,9 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 	mode = convert_mode(&msgtyp, msgflg);
 	ns = current->nsproxy->ipc_ns;
 
-	msq = msg_lock(ns, msqid);
-	if (msq == NULL)
-		return -EINVAL;
-
-	msg = ERR_PTR(-EIDRM);
-	if (msg_checkid(ns, msq, msqid))
-		goto out_unlock;
+	msq = msg_lock_check(ns, msqid);
+	if (IS_ERR(msq))
+		return PTR_ERR(msq);
 
 	for (;;) {
 		struct msg_receiver msr_d;
@@ -810,10 +837,10 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext,
 			list_del(&msg->m_list);
 			msq->q_qnum--;
 			msq->q_rtime = get_seconds();
-			msq->q_lrpid = current->tgid;
+			msq->q_lrpid = task_tgid_vnr(current);
 			msq->q_cbytes -= msg->m_ts;
-			atomic_sub(msg->m_ts, &msg_bytes);
-			atomic_dec(&msg_hdrs);
+			atomic_sub(msg->m_ts, &ns->msg_bytes);
+			atomic_dec(&ns->msg_hdrs);
 			ss_wakeup(&msq->q_senders, 0);
 			msg_unlock(msq);
 			break;
@@ -926,7 +953,7 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
 	return seq_printf(s,
 			"%10d %10d  %4o  %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
 			msq->q_perm.key,
-			msq->q_id,
+			msq->q_perm.id,
 			msq->q_perm.mode,
 			msq->q_cbytes,
 			msq->q_qnum,
diff --git a/ipc/sem.c b/ipc/sem.c
index b676fef6d20..35952c0bae4 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -80,7 +80,7 @@
 #include <linux/audit.h>
 #include <linux/capability.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
@@ -88,18 +88,14 @@
 
 #define sem_ids(ns)	(*((ns)->ids[IPC_SEM_IDS]))
 
-#define sem_lock(ns, id)	((struct sem_array*)ipc_lock(&sem_ids(ns), id))
 #define sem_unlock(sma)		ipc_unlock(&(sma)->sem_perm)
-#define sem_rmid(ns, id)	((struct sem_array*)ipc_rmid(&sem_ids(ns), id))
-#define sem_checkid(ns, sma, semid)	\
-	ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid)
-#define sem_buildid(ns, id, seq) \
-	ipc_buildid(&sem_ids(ns), id, seq)
+#define sem_checkid(sma, semid)	ipc_checkid(&sma->sem_perm, semid)
+#define sem_buildid(id, seq)	ipc_buildid(id, seq)
 
 static struct ipc_ids init_sem_ids;
 
-static int newary(struct ipc_namespace *, key_t, int, int);
-static void freeary(struct ipc_namespace *ns, struct sem_array *sma, int id);
+static int newary(struct ipc_namespace *, struct ipc_params *);
+static void freeary(struct ipc_namespace *, struct sem_array *);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #endif
@@ -129,7 +125,7 @@ static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
 	ns->sc_semopm = SEMOPM;
 	ns->sc_semmni = SEMMNI;
 	ns->used_sems = 0;
-	ipc_init_ids(ids, ns->sc_semmni);
+	ipc_init_ids(ids);
 }
 
 int sem_init_ns(struct ipc_namespace *ns)
@@ -146,20 +142,24 @@ int sem_init_ns(struct ipc_namespace *ns)
 
 void sem_exit_ns(struct ipc_namespace *ns)
 {
-	int i;
 	struct sem_array *sma;
+	int next_id;
+	int total, in_use;
 
-	mutex_lock(&sem_ids(ns).mutex);
-	for (i = 0; i <= sem_ids(ns).max_id; i++) {
-		sma = sem_lock(ns, i);
+	down_write(&sem_ids(ns).rw_mutex);
+
+	in_use = sem_ids(ns).in_use;
+
+	for (total = 0, next_id = 0; total < in_use; next_id++) {
+		sma = idr_find(&sem_ids(ns).ipcs_idr, next_id);
 		if (sma == NULL)
 			continue;
-
-		freeary(ns, sma, i);
+		ipc_lock_by_ptr(&sma->sem_perm);
+		freeary(ns, sma);
+		total++;
 	}
-	mutex_unlock(&sem_ids(ns).mutex);
+	up_write(&sem_ids(ns).rw_mutex);
 
-	ipc_fini_ids(ns->ids[IPC_SEM_IDS]);
 	kfree(ns->ids[IPC_SEM_IDS]);
 	ns->ids[IPC_SEM_IDS] = NULL;
 }
@@ -173,6 +173,42 @@ void __init sem_init (void)
 }
 
 /*
+ * This routine is called in the paths where the rw_mutex is held to protect
+ * access to the idr tree.
+ */
+static inline struct sem_array *sem_lock_check_down(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check_down(&sem_ids(ns), id);
+
+	return container_of(ipcp, struct sem_array, sem_perm);
+}
+
+/*
+ * sem_lock_(check_) routines are called in the paths where the rw_mutex
+ * is not held.
+ */
+static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id);
+
+	return container_of(ipcp, struct sem_array, sem_perm);
+}
+
+static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
+
+	return container_of(ipcp, struct sem_array, sem_perm);
+}
+
+static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
+{
+	ipc_rmid(&sem_ids(ns), &s->sem_perm);
+}
+
+/*
  * Lockless wakeup algorithm:
  * Without the check/retry algorithm a lockless wakeup is possible:
  * - queue.status is initialized to -EINTR before blocking.
@@ -206,12 +242,23 @@ void __init sem_init (void)
  */
 #define IN_WAKEUP	1
 
-static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
+/**
+ * newary - Create a new semaphore set
+ * @ns: namespace
+ * @params: ptr to the structure that contains key, semflg and nsems
+ *
+ * Called with sem_ids.rw_mutex held (as a writer)
+ */
+
+static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 {
 	int id;
 	int retval;
 	struct sem_array *sma;
 	int size;
+	key_t key = params->key;
+	int nsems = params->u.nsems;
+	int semflg = params->flg;
 
 	if (!nsems)
 		return -EINVAL;
@@ -236,14 +283,14 @@ static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
 	}
 
 	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
-	if(id == -1) {
+	if (id < 0) {
 		security_sem_free(sma);
 		ipc_rcu_putref(sma);
-		return -ENOSPC;
+		return id;
 	}
 	ns->used_sems += nsems;
 
-	sma->sem_id = sem_buildid(ns, id, sma->sem_perm.seq);
+	sma->sem_perm.id = sem_buildid(id, sma->sem_perm.seq);
 	sma->sem_base = (struct sem *) &sma[1];
 	/* sma->sem_pending = NULL; */
 	sma->sem_pending_last = &sma->sem_pending;
@@ -252,48 +299,56 @@ static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
 	sma->sem_ctime = get_seconds();
 	sem_unlock(sma);
 
-	return sma->sem_id;
+	return sma->sem_perm.id;
+}
+
+
+/*
+ * Called with sem_ids.rw_mutex and ipcp locked.
+ */
+static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
+{
+	struct sem_array *sma;
+
+	sma = container_of(ipcp, struct sem_array, sem_perm);
+	return security_sem_associate(sma, semflg);
 }
 
-asmlinkage long sys_semget (key_t key, int nsems, int semflg)
+/*
+ * Called with sem_ids.rw_mutex and ipcp locked.
+ */
+static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
+				struct ipc_params *params)
 {
-	int id, err = -EINVAL;
 	struct sem_array *sma;
+
+	sma = container_of(ipcp, struct sem_array, sem_perm);
+	if (params->u.nsems > sma->sem_nsems)
+		return -EINVAL;
+
+	return 0;
+}
+
+asmlinkage long sys_semget(key_t key, int nsems, int semflg)
+{
 	struct ipc_namespace *ns;
+	struct ipc_ops sem_ops;
+	struct ipc_params sem_params;
 
 	ns = current->nsproxy->ipc_ns;
 
 	if (nsems < 0 || nsems > ns->sc_semmsl)
 		return -EINVAL;
-	mutex_lock(&sem_ids(ns).mutex);
-	
-	if (key == IPC_PRIVATE) {
-		err = newary(ns, key, nsems, semflg);
-	} else if ((id = ipc_findkey(&sem_ids(ns), key)) == -1) {  /* key not used */
-		if (!(semflg & IPC_CREAT))
-			err = -ENOENT;
-		else
-			err = newary(ns, key, nsems, semflg);
-	} else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
-		err = -EEXIST;
-	} else {
-		sma = sem_lock(ns, id);
-		BUG_ON(sma==NULL);
-		if (nsems > sma->sem_nsems)
-			err = -EINVAL;
-		else if (ipcperms(&sma->sem_perm, semflg))
-			err = -EACCES;
-		else {
-			int semid = sem_buildid(ns, id, sma->sem_perm.seq);
-			err = security_sem_associate(sma, semflg);
-			if (!err)
-				err = semid;
-		}
-		sem_unlock(sma);
-	}
 
-	mutex_unlock(&sem_ids(ns).mutex);
-	return err;
+	sem_ops.getnew = newary;
+	sem_ops.associate = sem_security;
+	sem_ops.more_checks = sem_more_checks;
+
+	sem_params.key = key;
+	sem_params.flg = semflg;
+	sem_params.u.nsems = nsems;
+
+	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
 }
 
 /* Manage the doubly linked list sma->sem_pending as a FIFO:
@@ -487,15 +542,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
 	return semzcnt;
 }
 
-/* Free a semaphore set. freeary() is called with sem_ids.mutex locked and
- * the spinlock for this semaphore set hold. sem_ids.mutex remains locked
- * on exit.
+/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
+ * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
+ * remains locked on exit.
  */
-static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id)
+static void freeary(struct ipc_namespace *ns, struct sem_array *sma)
 {
 	struct sem_undo *un;
 	struct sem_queue *q;
-	int size;
 
 	/* Invalidate the existing undo structures for this semaphore set.
 	 * (They will be freed without any further action in exit_sem()
@@ -518,12 +572,11 @@ static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id)
 		q = n;
 	}
 
-	/* Remove the semaphore set from the ID array*/
-	sma = sem_rmid(ns, id);
+	/* Remove the semaphore set from the IDR */
+	sem_rmid(ns, sma);
 	sem_unlock(sma);
 
 	ns->used_sems -= sma->sem_nsems;
-	size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
 	security_sem_free(sma);
 	ipc_rcu_putref(sma);
 }
@@ -576,7 +629,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
 		seminfo.semmnu = SEMMNU;
 		seminfo.semmap = SEMMAP;
 		seminfo.semume = SEMUME;
-		mutex_lock(&sem_ids(ns).mutex);
+		down_read(&sem_ids(ns).rw_mutex);
 		if (cmd == SEM_INFO) {
 			seminfo.semusz = sem_ids(ns).in_use;
 			seminfo.semaem = ns->used_sems;
@@ -584,8 +637,8 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
 			seminfo.semusz = SEMUSZ;
 			seminfo.semaem = SEMAEM;
 		}
-		max_id = sem_ids(ns).max_id;
-		mutex_unlock(&sem_ids(ns).mutex);
+		max_id = ipc_get_maxid(&sem_ids(ns));
+		up_read(&sem_ids(ns).rw_mutex);
 		if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 
 			return -EFAULT;
 		return (max_id < 0) ? 0: max_id;
@@ -595,14 +648,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
 		struct semid64_ds tbuf;
 		int id;
 
-		if(semid >= sem_ids(ns).entries->size)
-			return -EINVAL;
-
-		memset(&tbuf,0,sizeof(tbuf));
-
 		sma = sem_lock(ns, semid);
-		if(sma == NULL)
-			return -EINVAL;
+		if (IS_ERR(sma))
+			return PTR_ERR(sma);
 
 		err = -EACCES;
 		if (ipcperms (&sma->sem_perm, S_IRUGO))
@@ -612,7 +660,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
 		if (err)
 			goto out_unlock;
 
-		id = sem_buildid(ns, semid, sma->sem_perm.seq);
+		id = sma->sem_perm.id;
+
+		memset(&tbuf, 0, sizeof(tbuf));
 
 		kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
 		tbuf.sem_otime  = sma->sem_otime;
@@ -642,16 +692,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 	ushort* sem_io = fast_sem_io;
 	int nsems;
 
-	sma = sem_lock(ns, semid);
-	if(sma==NULL)
-		return -EINVAL;
+	sma = sem_lock_check(ns, semid);
+	if (IS_ERR(sma))
+		return PTR_ERR(sma);
 
 	nsems = sma->sem_nsems;
 
-	err=-EIDRM;
-	if (sem_checkid(ns,sma,semid))
-		goto out_unlock;
-
 	err = -EACCES;
 	if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO))
 		goto out_unlock;
@@ -795,7 +841,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		for (un = sma->undo; un; un = un->id_next)
 			un->semadj[semnum] = 0;
 		curr->semval = val;
-		curr->sempid = current->tgid;
+		curr->sempid = task_tgid_vnr(current);
 		sma->sem_ctime = get_seconds();
 		/* maybe some queued-up processes were waiting for this */
 		update_queue(sma);
@@ -863,14 +909,10 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
 		if(copy_semid_from_user (&setbuf, arg.buf, version))
 			return -EFAULT;
 	}
-	sma = sem_lock(ns, semid);
-	if(sma==NULL)
-		return -EINVAL;
+	sma = sem_lock_check_down(ns, semid);
+	if (IS_ERR(sma))
+		return PTR_ERR(sma);
 
-	if (sem_checkid(ns,sma,semid)) {
-		err=-EIDRM;
-		goto out_unlock;
-	}	
 	ipcp = &sma->sem_perm;
 
 	err = audit_ipc_obj(ipcp);
@@ -894,7 +936,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
 
 	switch(cmd){
 	case IPC_RMID:
-		freeary(ns, sma, semid);
+		freeary(ns, sma);
 		err = 0;
 		break;
 	case IPC_SET:
@@ -948,45 +990,15 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
 		return err;
 	case IPC_RMID:
 	case IPC_SET:
-		mutex_lock(&sem_ids(ns).mutex);
+		down_write(&sem_ids(ns).rw_mutex);
 		err = semctl_down(ns,semid,semnum,cmd,version,arg);
-		mutex_unlock(&sem_ids(ns).mutex);
+		up_write(&sem_ids(ns).rw_mutex);
 		return err;
 	default:
 		return -EINVAL;
 	}
 }
 
-static inline void lock_semundo(void)
-{
-	struct sem_undo_list *undo_list;
-
-	undo_list = current->sysvsem.undo_list;
-	if (undo_list)
-		spin_lock(&undo_list->lock);
-}
-
-/* This code has an interaction with copy_semundo().
- * Consider; two tasks are sharing the undo_list. task1
- * acquires the undo_list lock in lock_semundo().  If task2 now
- * exits before task1 releases the lock (by calling
- * unlock_semundo()), then task1 will never call spin_unlock().
- * This leave the sem_undo_list in a locked state.  If task1 now creats task3
- * and once again shares the sem_undo_list, the sem_undo_list will still be
- * locked, and future SEM_UNDO operations will deadlock.  This case is
- * dealt with in copy_semundo() by having it reinitialize the spin lock when 
- * the refcnt goes from 1 to 2.
- */
-static inline void unlock_semundo(void)
-{
-	struct sem_undo_list *undo_list;
-
-	undo_list = current->sysvsem.undo_list;
-	if (undo_list)
-		spin_unlock(&undo_list->lock);
-}
-
-
 /* If the task doesn't already have a undo_list, then allocate one
  * here.  We guarantee there is only one thread using this undo list,
  * and current is THE ONE
@@ -1047,22 +1059,17 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	if (error)
 		return ERR_PTR(error);
 
-	lock_semundo();
+	spin_lock(&ulp->lock);
 	un = lookup_undo(ulp, semid);
-	unlock_semundo();
+	spin_unlock(&ulp->lock);
 	if (likely(un!=NULL))
 		goto out;
 
 	/* no undo structure around - allocate one. */
-	sma = sem_lock(ns, semid);
-	un = ERR_PTR(-EINVAL);
-	if(sma==NULL)
-		goto out;
-	un = ERR_PTR(-EIDRM);
-	if (sem_checkid(ns,sma,semid)) {
-		sem_unlock(sma);
-		goto out;
-	}
+	sma = sem_lock_check(ns, semid);
+	if (IS_ERR(sma))
+		return ERR_PTR(PTR_ERR(sma));
+
 	nsems = sma->sem_nsems;
 	ipc_rcu_getref(sma);
 	sem_unlock(sma);
@@ -1077,10 +1084,10 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	new->semadj = (short *) &new[1];
 	new->semid = semid;
 
-	lock_semundo();
+	spin_lock(&ulp->lock);
 	un = lookup_undo(ulp, semid);
 	if (un) {
-		unlock_semundo();
+		spin_unlock(&ulp->lock);
 		kfree(new);
 		ipc_lock_by_ptr(&sma->sem_perm);
 		ipc_rcu_putref(sma);
@@ -1091,7 +1098,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	ipc_rcu_putref(sma);
 	if (sma->sem_perm.deleted) {
 		sem_unlock(sma);
-		unlock_semundo();
+		spin_unlock(&ulp->lock);
 		kfree(new);
 		un = ERR_PTR(-EIDRM);
 		goto out;
@@ -1102,7 +1109,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 	sma->undo = new;
 	sem_unlock(sma);
 	un = new;
-	unlock_semundo();
+	spin_unlock(&ulp->lock);
 out:
 	return un;
 }
@@ -1168,15 +1175,14 @@ retry_undos:
 	} else
 		un = NULL;
 
-	sma = sem_lock(ns, semid);
-	error=-EINVAL;
-	if(sma==NULL)
+	sma = sem_lock_check(ns, semid);
+	if (IS_ERR(sma)) {
+		error = PTR_ERR(sma);
 		goto out_free;
-	error = -EIDRM;
-	if (sem_checkid(ns,sma,semid))
-		goto out_unlock_free;
+	}
+
 	/*
-	 * semid identifies are not unique - find_undo may have
+	 * semid identifiers are not unique - find_undo may have
 	 * allocated an undo structure, it was invalidated by an RMID
 	 * and now a new array with received the same id. Check and retry.
 	 */
@@ -1196,7 +1202,7 @@ retry_undos:
 	if (error)
 		goto out_unlock_free;
 
-	error = try_atomic_semop (sma, sops, nsops, un, current->tgid);
+	error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
 	if (error <= 0) {
 		if (alter && error == 0)
 			update_queue (sma);
@@ -1211,7 +1217,7 @@ retry_undos:
 	queue.sops = sops;
 	queue.nsops = nsops;
 	queue.undo = un;
-	queue.pid = current->tgid;
+	queue.pid = task_tgid_vnr(current);
 	queue.id = semid;
 	queue.alter = alter;
 	if (alter)
@@ -1242,7 +1248,7 @@ retry_undos:
 	}
 
 	sma = sem_lock(ns, semid);
-	if(sma==NULL) {
+	if (IS_ERR(sma)) {
 		BUG_ON(queue.prev != NULL);
 		error = -EIDRM;
 		goto out_free;
@@ -1279,10 +1285,6 @@ asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsop
 
 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
  * parent and child tasks.
- *
- * See the notes above unlock_semundo() regarding the spin_lock_init()
- * in this code.  Initialize the undo_list->lock here instead of get_undo_list()
- * because of the reasoning in the comment above unlock_semundo.
  */
 
 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
@@ -1342,13 +1344,13 @@ void exit_sem(struct task_struct *tsk)
 		if(semid == -1)
 			continue;
 		sma = sem_lock(ns, semid);
-		if (sma == NULL)
+		if (IS_ERR(sma))
 			continue;
 
 		if (u->semid == -1)
 			goto next_entry;
 
-		BUG_ON(sem_checkid(ns,sma,u->semid));
+		BUG_ON(sem_checkid(sma, u->semid));
 
 		/* remove u from the sma->undo list */
 		for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
@@ -1382,7 +1384,7 @@ found:
 					semaphore->semval = 0;
 				if (semaphore->semval > SEMVMX)
 					semaphore->semval = SEMVMX;
-				semaphore->sempid = current->tgid;
+				semaphore->sempid = task_tgid_vnr(current);
 			}
 		}
 		sma->sem_otime = get_seconds();
@@ -1402,7 +1404,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
 	return seq_printf(s,
 			  "%10d %10d  %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
 			  sma->sem_perm.key,
-			  sma->sem_id,
+			  sma->sem_perm.id,
 			  sma->sem_perm.mode,
 			  sma->sem_nsems,
 			  sma->sem_perm.uid,
diff --git a/ipc/shm.c b/ipc/shm.c
index 5fc5cf50cf1..3818fae625c 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -35,7 +35,7 @@
 #include <linux/capability.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/nsproxy.h>
 #include <linux/mount.h>
 
@@ -59,17 +59,11 @@ static struct ipc_ids init_shm_ids;
 
 #define shm_ids(ns)	(*((ns)->ids[IPC_SHM_IDS]))
 
-#define shm_lock(ns, id)		\
-	((struct shmid_kernel*)ipc_lock(&shm_ids(ns),id))
 #define shm_unlock(shp)			\
 	ipc_unlock(&(shp)->shm_perm)
-#define shm_get(ns, id)			\
-	((struct shmid_kernel*)ipc_get(&shm_ids(ns),id))
-#define shm_buildid(ns, id, seq)	\
-	ipc_buildid(&shm_ids(ns), id, seq)
+#define shm_buildid(id, seq)	ipc_buildid(id, seq)
 
-static int newseg (struct ipc_namespace *ns, key_t key,
-		int shmflg, size_t size);
+static int newseg(struct ipc_namespace *, struct ipc_params *);
 static void shm_open(struct vm_area_struct *vma);
 static void shm_close(struct vm_area_struct *vma);
 static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
@@ -84,9 +78,13 @@ static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
 	ns->shm_ctlall = SHMALL;
 	ns->shm_ctlmni = SHMMNI;
 	ns->shm_tot = 0;
-	ipc_init_ids(ids, 1);
+	ipc_init_ids(ids);
 }
 
+/*
+ * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
+ * Only shm_ids.rw_mutex remains locked on exit.
+ */
 static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
 	if (shp->shm_nattch){
@@ -112,20 +110,24 @@ int shm_init_ns(struct ipc_namespace *ns)
 
 void shm_exit_ns(struct ipc_namespace *ns)
 {
-	int i;
 	struct shmid_kernel *shp;
+	int next_id;
+	int total, in_use;
+
+	down_write(&shm_ids(ns).rw_mutex);
 
-	mutex_lock(&shm_ids(ns).mutex);
-	for (i = 0; i <= shm_ids(ns).max_id; i++) {
-		shp = shm_lock(ns, i);
+	in_use = shm_ids(ns).in_use;
+
+	for (total = 0, next_id = 0; total < in_use; next_id++) {
+		shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
 		if (shp == NULL)
 			continue;
-
+		ipc_lock_by_ptr(&shp->shm_perm);
 		do_shm_rmid(ns, shp);
+		total++;
 	}
-	mutex_unlock(&shm_ids(ns).mutex);
+	up_write(&shm_ids(ns).rw_mutex);
 
-	ipc_fini_ids(ns->ids[IPC_SHM_IDS]);
 	kfree(ns->ids[IPC_SHM_IDS]);
 	ns->ids[IPC_SHM_IDS] = NULL;
 }
@@ -138,17 +140,49 @@ void __init shm_init (void)
 				IPC_SHM_IDS, sysvipc_shm_proc_show);
 }
 
-static inline int shm_checkid(struct ipc_namespace *ns,
-		struct shmid_kernel *s, int id)
+/*
+ * shm_lock_(check_)down routines are called in the paths where the rw_mutex
+ * is held to protect access to the idr tree.
+ */
+static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns,
+						int id)
 {
-	if (ipc_checkid(&shm_ids(ns), &s->shm_perm, id))
-		return -EIDRM;
-	return 0;
+	struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+static inline struct shmid_kernel *shm_lock_check_down(
+						struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check_down(&shm_ids(ns), id);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+/*
+ * shm_lock_(check_) routines are called in the paths where the rw_mutex
+ * is not held.
+ */
+static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
+						int id)
+{
+	struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
+
+	return container_of(ipcp, struct shmid_kernel, shm_perm);
 }
 
-static inline struct shmid_kernel *shm_rmid(struct ipc_namespace *ns, int id)
+static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
-	return (struct shmid_kernel *)ipc_rmid(&shm_ids(ns), id);
+	ipc_rmid(&shm_ids(ns), &s->shm_perm);
 }
 
 static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
@@ -166,9 +200,9 @@ static void shm_open(struct vm_area_struct *vma)
 	struct shmid_kernel *shp;
 
 	shp = shm_lock(sfd->ns, sfd->id);
-	BUG_ON(!shp);
+	BUG_ON(IS_ERR(shp));
 	shp->shm_atim = get_seconds();
-	shp->shm_lprid = current->tgid;
+	shp->shm_lprid = task_tgid_vnr(current);
 	shp->shm_nattch++;
 	shm_unlock(shp);
 }
@@ -176,15 +210,16 @@ static void shm_open(struct vm_area_struct *vma)
 /*
  * shm_destroy - free the struct shmid_kernel
  *
+ * @ns: namespace
  * @shp: struct to free
  *
- * It has to be called with shp and shm_ids.mutex locked,
+ * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
  * but returns with shp unlocked and freed.
  */
 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
 	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	shm_rmid(ns, shp->id);
+	shm_rmid(ns, shp);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shp->shm_file))
 		shmem_lock(shp->shm_file, 0, shp->mlock_user);
@@ -209,11 +244,11 @@ static void shm_close(struct vm_area_struct *vma)
 	struct shmid_kernel *shp;
 	struct ipc_namespace *ns = sfd->ns;
 
-	mutex_lock(&shm_ids(ns).mutex);
+	down_write(&shm_ids(ns).rw_mutex);
 	/* remove from the list of attaches of the shm segment */
-	shp = shm_lock(ns, sfd->id);
-	BUG_ON(!shp);
-	shp->shm_lprid = current->tgid;
+	shp = shm_lock_down(ns, sfd->id);
+	BUG_ON(IS_ERR(shp));
+	shp->shm_lprid = task_tgid_vnr(current);
 	shp->shm_dtim = get_seconds();
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
@@ -221,7 +256,7 @@ static void shm_close(struct vm_area_struct *vma)
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
-	mutex_unlock(&shm_ids(ns).mutex);
+	up_write(&shm_ids(ns).rw_mutex);
 }
 
 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -337,8 +372,19 @@ static struct vm_operations_struct shm_vm_ops = {
 #endif
 };
 
-static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size)
+/**
+ * newseg - Create a new shared memory segment
+ * @ns: namespace
+ * @params: ptr to the structure that contains key, size and shmflg
+ *
+ * Called with shm_ids.rw_mutex held as a writer.
+ */
+
+static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 {
+	key_t key = params->key;
+	int shmflg = params->flg;
+	size_t size = params->u.size;
 	int error;
 	struct shmid_kernel *shp;
 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
@@ -387,28 +433,30 @@ static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size)
 	if (IS_ERR(file))
 		goto no_file;
 
-	error = -ENOSPC;
 	id = shm_addid(ns, shp);
-	if(id == -1) 
+	if (id < 0) {
+		error = id;
 		goto no_id;
+	}
 
-	shp->shm_cprid = current->tgid;
+	shp->shm_cprid = task_tgid_vnr(current);
 	shp->shm_lprid = 0;
 	shp->shm_atim = shp->shm_dtim = 0;
 	shp->shm_ctim = get_seconds();
 	shp->shm_segsz = size;
 	shp->shm_nattch = 0;
-	shp->id = shm_buildid(ns, id, shp->shm_perm.seq);
+	shp->shm_perm.id = shm_buildid(id, shp->shm_perm.seq);
 	shp->shm_file = file;
 	/*
 	 * shmid gets reported as "inode#" in /proc/pid/maps.
 	 * proc-ps tools use this. Changing this will break them.
 	 */
-	file->f_dentry->d_inode->i_ino = shp->id;
+	file->f_dentry->d_inode->i_ino = shp->shm_perm.id;
 
 	ns->shm_tot += numpages;
+	error = shp->shm_perm.id;
 	shm_unlock(shp);
-	return shp->id;
+	return error;
 
 no_id:
 	fput(file);
@@ -418,42 +466,49 @@ no_file:
 	return error;
 }
 
-asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
+/*
+ * Called with shm_ids.rw_mutex and ipcp locked.
+ */
+static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
 {
 	struct shmid_kernel *shp;
-	int err, id = 0;
+
+	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+	return security_shm_associate(shp, shmflg);
+}
+
+/*
+ * Called with shm_ids.rw_mutex and ipcp locked.
+ */
+static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
+				struct ipc_params *params)
+{
+	struct shmid_kernel *shp;
+
+	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+	if (shp->shm_segsz < params->u.size)
+		return -EINVAL;
+
+	return 0;
+}
+
+asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
+{
 	struct ipc_namespace *ns;
+	struct ipc_ops shm_ops;
+	struct ipc_params shm_params;
 
 	ns = current->nsproxy->ipc_ns;
 
-	mutex_lock(&shm_ids(ns).mutex);
-	if (key == IPC_PRIVATE) {
-		err = newseg(ns, key, shmflg, size);
-	} else if ((id = ipc_findkey(&shm_ids(ns), key)) == -1) {
-		if (!(shmflg & IPC_CREAT))
-			err = -ENOENT;
-		else
-			err = newseg(ns, key, shmflg, size);
-	} else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
-		err = -EEXIST;
-	} else {
-		shp = shm_lock(ns, id);
-		BUG_ON(shp==NULL);
-		if (shp->shm_segsz < size)
-			err = -EINVAL;
-		else if (ipcperms(&shp->shm_perm, shmflg))
-			err = -EACCES;
-		else {
-			int shmid = shm_buildid(ns, id, shp->shm_perm.seq);
-			err = security_shm_associate(shp, shmflg);
-			if (!err)
-				err = shmid;
-		}
-		shm_unlock(shp);
-	}
-	mutex_unlock(&shm_ids(ns).mutex);
+	shm_ops.getnew = newseg;
+	shm_ops.associate = shm_security;
+	shm_ops.more_checks = shm_more_checks;
 
-	return err;
+	shm_params.key = key;
+	shm_params.flg = shmflg;
+	shm_params.u.size = size;
+
+	return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
 }
 
 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
@@ -547,20 +602,26 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
 	}
 }
 
+/*
+ * Called with shm_ids.rw_mutex held as a reader
+ */
 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 		unsigned long *swp)
 {
-	int i;
+	int next_id;
+	int total, in_use;
 
 	*rss = 0;
 	*swp = 0;
 
-	for (i = 0; i <= shm_ids(ns).max_id; i++) {
+	in_use = shm_ids(ns).in_use;
+
+	for (total = 0, next_id = 0; total < in_use; next_id++) {
 		struct shmid_kernel *shp;
 		struct inode *inode;
 
-		shp = shm_get(ns, i);
-		if(!shp)
+		shp = idr_find(&shm_ids(ns).ipcs_idr, next_id);
+		if (shp == NULL)
 			continue;
 
 		inode = shp->shm_file->f_path.dentry->d_inode;
@@ -575,6 +636,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 			*swp += info->swapped;
 			spin_unlock(&info->lock);
 		}
+
+		total++;
 	}
 }
 
@@ -610,8 +673,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 		shminfo.shmmin = SHMMIN;
 		if(copy_shminfo_to_user (buf, &shminfo, version))
 			return -EFAULT;
-		/* reading a integer is always atomic */
-		err= shm_ids(ns).max_id;
+
+		down_read(&shm_ids(ns).rw_mutex);
+		err = ipc_get_maxid(&shm_ids(ns));
+		up_read(&shm_ids(ns).rw_mutex);
+
 		if(err<0)
 			err = 0;
 		goto out;
@@ -625,14 +691,14 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 			return err;
 
 		memset(&shm_info,0,sizeof(shm_info));
-		mutex_lock(&shm_ids(ns).mutex);
+		down_read(&shm_ids(ns).rw_mutex);
 		shm_info.used_ids = shm_ids(ns).in_use;
 		shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
 		shm_info.shm_tot = ns->shm_tot;
 		shm_info.swap_attempts = 0;
 		shm_info.swap_successes = 0;
-		err = shm_ids(ns).max_id;
-		mutex_unlock(&shm_ids(ns).mutex);
+		err = ipc_get_maxid(&shm_ids(ns));
+		up_read(&shm_ids(ns).rw_mutex);
 		if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
 			err = -EFAULT;
 			goto out;
@@ -646,20 +712,25 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 	{
 		struct shmid64_ds tbuf;
 		int result;
-		memset(&tbuf, 0, sizeof(tbuf));
-		shp = shm_lock(ns, shmid);
-		if(shp==NULL) {
-			err = -EINVAL;
+
+		if (!buf) {
+			err = -EFAULT;
 			goto out;
-		} else if(cmd==SHM_STAT) {
-			err = -EINVAL;
-			if (shmid > shm_ids(ns).max_id)
-				goto out_unlock;
-			result = shm_buildid(ns, shmid, shp->shm_perm.seq);
+		}
+
+		if (cmd == SHM_STAT) {
+			shp = shm_lock(ns, shmid);
+			if (IS_ERR(shp)) {
+				err = PTR_ERR(shp);
+				goto out;
+			}
+			result = shp->shm_perm.id;
 		} else {
-			err = shm_checkid(ns, shp,shmid);
-			if(err)
-				goto out_unlock;
+			shp = shm_lock_check(ns, shmid);
+			if (IS_ERR(shp)) {
+				err = PTR_ERR(shp);
+				goto out;
+			}
 			result = 0;
 		}
 		err=-EACCES;
@@ -668,6 +739,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 		err = security_shm_shmctl(shp, cmd);
 		if (err)
 			goto out_unlock;
+		memset(&tbuf, 0, sizeof(tbuf));
 		kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
 		tbuf.shm_segsz	= shp->shm_segsz;
 		tbuf.shm_atime	= shp->shm_atim;
@@ -686,14 +758,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 	{
-		shp = shm_lock(ns, shmid);
-		if(shp==NULL) {
-			err = -EINVAL;
+		shp = shm_lock_check(ns, shmid);
+		if (IS_ERR(shp)) {
+			err = PTR_ERR(shp);
 			goto out;
 		}
-		err = shm_checkid(ns, shp,shmid);
-		if(err)
-			goto out_unlock;
 
 		err = audit_ipc_obj(&(shp->shm_perm));
 		if (err)
@@ -742,14 +811,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 		 *	Instead we set a destroyed flag, and then blow
 		 *	the name away when the usage hits zero.
 		 */
-		mutex_lock(&shm_ids(ns).mutex);
-		shp = shm_lock(ns, shmid);
-		err = -EINVAL;
-		if (shp == NULL) 
+		down_write(&shm_ids(ns).rw_mutex);
+		shp = shm_lock_check_down(ns, shmid);
+		if (IS_ERR(shp)) {
+			err = PTR_ERR(shp);
 			goto out_up;
-		err = shm_checkid(ns, shp, shmid);
-		if(err)
-			goto out_unlock_up;
+		}
 
 		err = audit_ipc_obj(&(shp->shm_perm));
 		if (err)
@@ -767,24 +834,27 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 			goto out_unlock_up;
 
 		do_shm_rmid(ns, shp);
-		mutex_unlock(&shm_ids(ns).mutex);
+		up_write(&shm_ids(ns).rw_mutex);
 		goto out;
 	}
 
 	case IPC_SET:
 	{
+		if (!buf) {
+			err = -EFAULT;
+			goto out;
+		}
+
 		if (copy_shmid_from_user (&setbuf, buf, version)) {
 			err = -EFAULT;
 			goto out;
 		}
-		mutex_lock(&shm_ids(ns).mutex);
-		shp = shm_lock(ns, shmid);
-		err=-EINVAL;
-		if(shp==NULL)
+		down_write(&shm_ids(ns).rw_mutex);
+		shp = shm_lock_check_down(ns, shmid);
+		if (IS_ERR(shp)) {
+			err = PTR_ERR(shp);
 			goto out_up;
-		err = shm_checkid(ns, shp,shmid);
-		if(err)
-			goto out_unlock_up;
+		}
 		err = audit_ipc_obj(&(shp->shm_perm));
 		if (err)
 			goto out_unlock_up;
@@ -819,7 +889,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 out_unlock_up:
 	shm_unlock(shp);
 out_up:
-	mutex_unlock(&shm_ids(ns).mutex);
+	up_write(&shm_ids(ns).rw_mutex);
 	goto out;
 out_unlock:
 	shm_unlock(shp);
@@ -890,13 +960,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	 * additional creator id...
 	 */
 	ns = current->nsproxy->ipc_ns;
-	shp = shm_lock(ns, shmid);
-	if(shp == NULL)
+	shp = shm_lock_check(ns, shmid);
+	if (IS_ERR(shp)) {
+		err = PTR_ERR(shp);
 		goto out;
-
-	err = shm_checkid(ns, shp,shmid);
-	if (err)
-		goto out_unlock;
+	}
 
 	err = -EACCES;
 	if (ipcperms(&shp->shm_perm, acc_mode))
@@ -925,7 +993,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 
 	file->private_data = sfd;
 	file->f_mapping = shp->shm_file->f_mapping;
-	sfd->id = shp->id;
+	sfd->id = shp->shm_perm.id;
 	sfd->ns = get_ipc_ns(ns);
 	sfd->file = shp->shm_file;
 	sfd->vm_ops = NULL;
@@ -955,16 +1023,16 @@ invalid:
 	fput(file);
 
 out_nattch:
-	mutex_lock(&shm_ids(ns).mutex);
-	shp = shm_lock(ns, shmid);
-	BUG_ON(!shp);
+	down_write(&shm_ids(ns).rw_mutex);
+	shp = shm_lock_down(ns, shmid);
+	BUG_ON(IS_ERR(shp));
 	shp->shm_nattch--;
 	if(shp->shm_nattch == 0 &&
 	   shp->shm_perm.mode & SHM_DEST)
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
-	mutex_unlock(&shm_ids(ns).mutex);
+	up_write(&shm_ids(ns).rw_mutex);
 
 out:
 	return err;
@@ -1094,7 +1162,7 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
 		format = BIG_STRING;
 	return seq_printf(s, format,
 			  shp->shm_perm.key,
-			  shp->id,
+			  shp->shm_perm.id,
 			  shp->shm_perm.mode,
 			  shp->shm_segsz,
 			  shp->shm_cprid,
diff --git a/ipc/util.c b/ipc/util.c
index 44e5135aee4..1aa0ebf71ba 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -32,6 +32,7 @@
 #include <linux/proc_fs.h>
 #include <linux/audit.h>
 #include <linux/nsproxy.h>
+#include <linux/rwsem.h>
 
 #include <asm/unistd.h>
 
@@ -129,23 +130,16 @@ __initcall(ipc_init);
 /**
  *	ipc_init_ids		-	initialise IPC identifiers
  *	@ids: Identifier set
- *	@size: Number of identifiers
  *
- *	Given a size for the ipc identifier range (limited below IPCMNI)
- *	set up the sequence range to use then allocate and initialise the
- *	array itself. 
+ *	Set up the sequence range to use for the ipc identifier range (limited
+ *	below IPCMNI) then initialise the ids idr.
  */
  
-void ipc_init_ids(struct ipc_ids* ids, int size)
+void ipc_init_ids(struct ipc_ids *ids)
 {
-	int i;
+	init_rwsem(&ids->rw_mutex);
 
-	mutex_init(&ids->mutex);
-
-	if(size > IPCMNI)
-		size = IPCMNI;
 	ids->in_use = 0;
-	ids->max_id = -1;
 	ids->seq = 0;
 	{
 		int seq_limit = INT_MAX/SEQ_MULTIPLIER;
@@ -155,17 +149,7 @@ void ipc_init_ids(struct ipc_ids* ids, int size)
 		 	ids->seq_max = seq_limit;
 	}
 
-	ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size +
-				     sizeof(struct ipc_id_ary));
-
-	if(ids->entries == NULL) {
-		printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n");
-		size = 0;
-		ids->entries = &ids->nullentry;
-	}
-	ids->entries->size = size;
-	for(i=0;i<size;i++)
-		ids->entries->p[i] = NULL;
+	idr_init(&ids->ipcs_idr);
 }
 
 #ifdef CONFIG_PROC_FS
@@ -208,99 +192,96 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
  *	@ids: Identifier set
  *	@key: The key to find
  *	
- *	Requires ipc_ids.mutex locked.
- *	Returns the identifier if found or -1 if not.
+ *	Requires ipc_ids.rw_mutex locked.
+ *	Returns the LOCKED pointer to the ipc structure if found or NULL
+ *	if not.
+ *	If key is found ipc points to the owning ipc structure
  */
  
-int ipc_findkey(struct ipc_ids* ids, key_t key)
+static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 {
-	int id;
-	struct kern_ipc_perm* p;
-	int max_id = ids->max_id;
+	struct kern_ipc_perm *ipc;
+	int next_id;
+	int total;
 
-	/*
-	 * rcu_dereference() is not needed here
-	 * since ipc_ids.mutex is held
-	 */
-	for (id = 0; id <= max_id; id++) {
-		p = ids->entries->p[id];
-		if(p==NULL)
+	for (total = 0, next_id = 0; total < ids->in_use; next_id++) {
+		ipc = idr_find(&ids->ipcs_idr, next_id);
+
+		if (ipc == NULL)
+			continue;
+
+		if (ipc->key != key) {
+			total++;
 			continue;
-		if (key == p->key)
-			return id;
+		}
+
+		ipc_lock_by_ptr(ipc);
+		return ipc;
 	}
-	return -1;
+
+	return NULL;
 }
 
-/*
- * Requires ipc_ids.mutex locked
+/**
+ *	ipc_get_maxid 	-	get the last assigned id
+ *	@ids: IPC identifier set
+ *
+ *	Called with ipc_ids.rw_mutex held.
  */
-static int grow_ary(struct ipc_ids* ids, int newsize)
-{
-	struct ipc_id_ary* new;
-	struct ipc_id_ary* old;
-	int i;
-	int size = ids->entries->size;
-
-	if(newsize > IPCMNI)
-		newsize = IPCMNI;
-	if(newsize <= size)
-		return newsize;
-
-	new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
-			    sizeof(struct ipc_id_ary));
-	if(new == NULL)
-		return size;
-	new->size = newsize;
-	memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size);
-	for(i=size;i<newsize;i++) {
-		new->p[i] = NULL;
-	}
-	old = ids->entries;
 
-	/*
-	 * Use rcu_assign_pointer() to make sure the memcpyed contents
-	 * of the new array are visible before the new array becomes visible.
-	 */
-	rcu_assign_pointer(ids->entries, new);
+int ipc_get_maxid(struct ipc_ids *ids)
+{
+	struct kern_ipc_perm *ipc;
+	int max_id = -1;
+	int total, id;
+
+	if (ids->in_use == 0)
+		return -1;
 
-	__ipc_fini_ids(ids, old);
-	return newsize;
+	if (ids->in_use == IPCMNI)
+		return IPCMNI - 1;
+
+	/* Look for the last assigned id */
+	total = 0;
+	for (id = 0; id < IPCMNI && total < ids->in_use; id++) {
+		ipc = idr_find(&ids->ipcs_idr, id);
+		if (ipc != NULL) {
+			max_id = id;
+			total++;
+		}
+	}
+	return max_id;
 }
 
 /**
  *	ipc_addid 	-	add an IPC identifier
  *	@ids: IPC identifier set
  *	@new: new IPC permission set
- *	@size: new size limit for the id array
+ *	@size: limit for the number of used ids
  *
- *	Add an entry 'new' to the IPC arrays. The permissions object is
+ *	Add an entry 'new' to the IPC ids idr. The permissions object is
  *	initialised and the first free entry is set up and the id assigned
- *	is returned. The list is returned in a locked state on success.
- *	On failure the list is not locked and -1 is returned.
+ *	is returned. The 'new' entry is returned in a locked state on success.
+ *	On failure the entry is not locked and a negative err-code is returned.
  *
- *	Called with ipc_ids.mutex held.
+ *	Called with ipc_ids.rw_mutex held as a writer.
  */
  
 int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 {
-	int id;
+	int id, err;
 
-	size = grow_ary(ids,size);
+	if (size > IPCMNI)
+		size = IPCMNI;
+
+	if (ids->in_use >= size)
+		return -ENOSPC;
+
+	err = idr_get_new(&ids->ipcs_idr, new, &id);
+	if (err)
+		return err;
 
-	/*
-	 * rcu_dereference()() is not needed here since
-	 * ipc_ids.mutex is held
-	 */
-	for (id = 0; id < size; id++) {
-		if(ids->entries->p[id] == NULL)
-			goto found;
-	}
-	return -1;
-found:
 	ids->in_use++;
-	if (id > ids->max_id)
-		ids->max_id = id;
 
 	new->cuid = new->uid = current->euid;
 	new->gid = new->cgid = current->egid;
@@ -313,48 +294,153 @@ found:
 	new->deleted = 0;
 	rcu_read_lock();
 	spin_lock(&new->lock);
-	ids->entries->p[id] = new;
 	return id;
 }
 
 /**
+ *	ipcget_new	-	create a new ipc object
+ *	@ns: namespace
+ *	@ids: IPC identifer set
+ *	@ops: the actual creation routine to call
+ *	@params: its parameters
+ *
+ *	This routine is called by sys_msgget, sys_semget() and sys_shmget()
+ *	when the key is IPC_PRIVATE.
+ */
+int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
+		struct ipc_ops *ops, struct ipc_params *params)
+{
+	int err;
+retry:
+	err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
+
+	if (!err)
+		return -ENOMEM;
+
+	down_write(&ids->rw_mutex);
+	err = ops->getnew(ns, params);
+	up_write(&ids->rw_mutex);
+
+	if (err == -EAGAIN)
+		goto retry;
+
+	return err;
+}
+
+/**
+ *	ipc_check_perms	-	check security and permissions for an IPC
+ *	@ipcp: ipc permission set
+ *	@ops: the actual security routine to call
+ *	@params: its parameters
+ *
+ *	This routine is called by sys_msgget(), sys_semget() and sys_shmget()
+ *      when the key is not IPC_PRIVATE and that key already exists in the
+ *      ids IDR.
+ *
+ *	On success, the IPC id is returned.
+ *
+ *	It is called with ipc_ids.rw_mutex and ipcp->lock held.
+ */
+static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops,
+			struct ipc_params *params)
+{
+	int err;
+
+	if (ipcperms(ipcp, params->flg))
+		err = -EACCES;
+	else {
+		err = ops->associate(ipcp, params->flg);
+		if (!err)
+			err = ipcp->id;
+	}
+
+	return err;
+}
+
+/**
+ *	ipcget_public	-	get an ipc object or create a new one
+ *	@ns: namespace
+ *	@ids: IPC identifer set
+ *	@ops: the actual creation routine to call
+ *	@params: its parameters
+ *
+ *	This routine is called by sys_msgget, sys_semget() and sys_shmget()
+ *	when the key is not IPC_PRIVATE.
+ *	It adds a new entry if the key is not found and does some permission
+ *      / security checkings if the key is found.
+ *
+ *	On success, the ipc id is returned.
+ */
+int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
+		struct ipc_ops *ops, struct ipc_params *params)
+{
+	struct kern_ipc_perm *ipcp;
+	int flg = params->flg;
+	int err;
+retry:
+	err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL);
+
+	/*
+	 * Take the lock as a writer since we are potentially going to add
+	 * a new entry + read locks are not "upgradable"
+	 */
+	down_write(&ids->rw_mutex);
+	ipcp = ipc_findkey(ids, params->key);
+	if (ipcp == NULL) {
+		/* key not used */
+		if (!(flg & IPC_CREAT))
+			err = -ENOENT;
+		else if (!err)
+			err = -ENOMEM;
+		else
+			err = ops->getnew(ns, params);
+	} else {
+		/* ipc object has been locked by ipc_findkey() */
+
+		if (flg & IPC_CREAT && flg & IPC_EXCL)
+			err = -EEXIST;
+		else {
+			err = 0;
+			if (ops->more_checks)
+				err = ops->more_checks(ipcp, params);
+			if (!err)
+				/*
+				 * ipc_check_perms returns the IPC id on
+				 * success
+				 */
+				err = ipc_check_perms(ipcp, ops, params);
+		}
+		ipc_unlock(ipcp);
+	}
+	up_write(&ids->rw_mutex);
+
+	if (err == -EAGAIN)
+		goto retry;
+
+	return err;
+}
+
+
+/**
  *	ipc_rmid	-	remove an IPC identifier
- *	@ids: identifier set
- *	@id: Identifier to remove
+ *	@ids: IPC identifier set
+ *	@ipcp: ipc perm structure containing the identifier to remove
  *
- *	The identifier must be valid, and in use. The kernel will panic if
- *	fed an invalid identifier. The entry is removed and internal
- *	variables recomputed. The object associated with the identifier
- *	is returned.
- *	ipc_ids.mutex and the spinlock for this ID is hold before this function
- *	is called, and remain locked on the exit.
+ *	ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held
+ *	before this function is called, and remain locked on the exit.
  */
  
-struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id)
+void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
 {
-	struct kern_ipc_perm* p;
-	int lid = id % SEQ_MULTIPLIER;
-	BUG_ON(lid >= ids->entries->size);
+	int lid = ipcid_to_idx(ipcp->id);
+
+	idr_remove(&ids->ipcs_idr, lid);
 
-	/* 
-	 * do not need a rcu_dereference()() here to force ordering
-	 * on Alpha, since the ipc_ids.mutex is held.
-	 */	
-	p = ids->entries->p[lid];
-	ids->entries->p[lid] = NULL;
-	BUG_ON(p==NULL);
 	ids->in_use--;
 
-	if (lid == ids->max_id) {
-		do {
-			lid--;
-			if(lid == -1)
-				break;
-		} while (ids->entries->p[lid] == NULL);
-		ids->max_id = lid;
-	}
-	p->deleted = 1;
-	return p;
+	ipcp->deleted = 1;
+
+	return;
 }
 
 /**
@@ -491,10 +577,12 @@ static void ipc_do_vfree(struct work_struct *work)
  */
 static void ipc_schedule_free(struct rcu_head *head)
 {
-	struct ipc_rcu_grace *grace =
-		container_of(head, struct ipc_rcu_grace, rcu);
-	struct ipc_rcu_sched *sched =
-			container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
+	struct ipc_rcu_grace *grace;
+	struct ipc_rcu_sched *sched;
+
+	grace = container_of(head, struct ipc_rcu_grace, rcu);
+	sched = container_of(&(grace->data[0]), struct ipc_rcu_sched,
+				data[0]);
 
 	INIT_WORK(&sched->work, ipc_do_vfree);
 	schedule_work(&sched->work);
@@ -583,7 +671,7 @@ void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out)
 }
 
 /**
- *	ipc64_perm_to_ipc_perm	-	convert old ipc permissions to new
+ *	ipc64_perm_to_ipc_perm	-	convert new ipc permissions to old
  *	@in: new style IPC permissions
  *	@out: old style IPC permissions
  *
@@ -602,44 +690,37 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out)
 	out->seq	= in->seq;
 }
 
-/*
- * So far only shm_get_stat() calls ipc_get() via shm_get(), so ipc_get()
- * is called with shm_ids.mutex locked.  Since grow_ary() is also called with
- * shm_ids.mutex down(for Shared Memory), there is no need to add read
- * barriers here to gurantee the writes in grow_ary() are seen in order 
- * here (for Alpha).
+/**
+ * ipc_lock - Lock an ipc structure without rw_mutex held
+ * @ids: IPC identifier set
+ * @id: ipc id to look for
+ *
+ * Look for an id in the ipc ids idr and lock the associated ipc object.
  *
- * However ipc_get() itself does not necessary require ipc_ids.mutex down. So
- * if in the future ipc_get() is used by other places without ipc_ids.mutex
- * down, then ipc_get() needs read memery barriers as ipc_lock() does.
+ * The ipc object is locked on exit.
+ *
+ * This is the routine that should be called when the rw_mutex is not already
+ * held, i.e. idr tree not protected: it protects the idr tree in read mode
+ * during the idr_find().
  */
-struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id)
-{
-	struct kern_ipc_perm* out;
-	int lid = id % SEQ_MULTIPLIER;
-	if(lid >= ids->entries->size)
-		return NULL;
-	out = ids->entries->p[lid];
-	return out;
-}
 
-struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
+struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
 {
-	struct kern_ipc_perm* out;
-	int lid = id % SEQ_MULTIPLIER;
-	struct ipc_id_ary* entries;
+	struct kern_ipc_perm *out;
+	int lid = ipcid_to_idx(id);
+
+	down_read(&ids->rw_mutex);
 
 	rcu_read_lock();
-	entries = rcu_dereference(ids->entries);
-	if(lid >= entries->size) {
-		rcu_read_unlock();
-		return NULL;
-	}
-	out = entries->p[lid];
-	if(out == NULL) {
+	out = idr_find(&ids->ipcs_idr, lid);
+	if (out == NULL) {
 		rcu_read_unlock();
-		return NULL;
+		up_read(&ids->rw_mutex);
+		return ERR_PTR(-EINVAL);
 	}
+
+	up_read(&ids->rw_mutex);
+
 	spin_lock(&out->lock);
 	
 	/* ipc_rmid() may have already freed the ID while ipc_lock
@@ -648,33 +729,44 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
 	if (out->deleted) {
 		spin_unlock(&out->lock);
 		rcu_read_unlock();
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	}
+
 	return out;
 }
 
-void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
-{
-	rcu_read_lock();
-	spin_lock(&perm->lock);
-}
+/**
+ * ipc_lock_down - Lock an ipc structure with rw_sem held
+ * @ids: IPC identifier set
+ * @id: ipc id to look for
+ *
+ * Look for an id in the ipc ids idr and lock the associated ipc object.
+ *
+ * The ipc object is locked on exit.
+ *
+ * This is the routine that should be called when the rw_mutex is already
+ * held, i.e. idr tree protected.
+ */
 
-void ipc_unlock(struct kern_ipc_perm* perm)
+struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id)
 {
-	spin_unlock(&perm->lock);
-	rcu_read_unlock();
-}
+	struct kern_ipc_perm *out;
+	int lid = ipcid_to_idx(id);
 
-int ipc_buildid(struct ipc_ids* ids, int id, int seq)
-{
-	return SEQ_MULTIPLIER*seq + id;
-}
+	rcu_read_lock();
+	out = idr_find(&ids->ipcs_idr, lid);
+	if (out == NULL) {
+		rcu_read_unlock();
+		return ERR_PTR(-EINVAL);
+	}
 
-int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid)
-{
-	if(uid/SEQ_MULTIPLIER != ipcp->seq)
-		return 1;
-	return 0;
+	spin_lock(&out->lock);
+
+	/*
+	 * No need to verify that the structure is still valid since the
+	 * rw_mutex is held.
+	 */
+	return out;
 }
 
 #ifdef __ARCH_WANT_IPC_PARSE_VERSION
@@ -707,27 +799,30 @@ struct ipc_proc_iter {
 	struct ipc_proc_iface *iface;
 };
 
-static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
+/*
+ * This routine locks the ipc structure found at least at position pos.
+ */
+struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
+					loff_t *new_pos)
 {
-	struct ipc_proc_iter *iter = s->private;
-	struct ipc_proc_iface *iface = iter->iface;
-	struct kern_ipc_perm *ipc = it;
-	loff_t p;
-	struct ipc_ids *ids;
+	struct kern_ipc_perm *ipc;
+	int total, id;
 
-	ids = iter->ns->ids[iface->ids];
+	total = 0;
+	for (id = 0; id < pos && total < ids->in_use; id++) {
+		ipc = idr_find(&ids->ipcs_idr, id);
+		if (ipc != NULL)
+			total++;
+	}
 
-	/* If we had an ipc id locked before, unlock it */
-	if (ipc && ipc != SEQ_START_TOKEN)
-		ipc_unlock(ipc);
+	if (total >= ids->in_use)
+		return NULL;
 
-	/*
-	 * p = *pos - 1 (because id 0 starts at position 1)
-	 *          + 1 (because we increment the position by one)
-	 */
-	for (p = *pos; p <= ids->max_id; p++) {
-		if ((ipc = ipc_lock(ids, p)) != NULL) {
-			*pos = p + 1;
+	for ( ; pos < IPCMNI; pos++) {
+		ipc = idr_find(&ids->ipcs_idr, pos);
+		if (ipc != NULL) {
+			*new_pos = pos + 1;
+			ipc_lock_by_ptr(ipc);
 			return ipc;
 		}
 	}
@@ -736,16 +831,27 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
 	return NULL;
 }
 
+static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
+{
+	struct ipc_proc_iter *iter = s->private;
+	struct ipc_proc_iface *iface = iter->iface;
+	struct kern_ipc_perm *ipc = it;
+
+	/* If we had an ipc id locked before, unlock it */
+	if (ipc && ipc != SEQ_START_TOKEN)
+		ipc_unlock(ipc);
+
+	return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos);
+}
+
 /*
- * File positions: pos 0 -> header, pos n -> ipc id + 1.
- * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START.
+ * File positions: pos 0 -> header, pos n -> ipc id = n - 1.
+ * SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START.
  */
 static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
 {
 	struct ipc_proc_iter *iter = s->private;
 	struct ipc_proc_iface *iface = iter->iface;
-	struct kern_ipc_perm *ipc;
-	loff_t p;
 	struct ipc_ids *ids;
 
 	ids = iter->ns->ids[iface->ids];
@@ -754,7 +860,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
 	 * Take the lock - this will be released by the corresponding
 	 * call to stop().
 	 */
-	mutex_lock(&ids->mutex);
+	down_read(&ids->rw_mutex);
 
 	/* pos < 0 is invalid */
 	if (*pos < 0)
@@ -765,13 +871,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
 		return SEQ_START_TOKEN;
 
 	/* Find the (pos-1)th ipc */
-	for (p = *pos - 1; p <= ids->max_id; p++) {
-		if ((ipc = ipc_lock(ids, p)) != NULL) {
-			*pos = p + 1;
-			return ipc;
-		}
-	}
-	return NULL;
+	return sysvipc_find_ipc(ids, *pos - 1, pos);
 }
 
 static void sysvipc_proc_stop(struct seq_file *s, void *it)
@@ -781,13 +881,13 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
 	struct ipc_proc_iface *iface = iter->iface;
 	struct ipc_ids *ids;
 
-	/* If we had a locked segment, release it */
+	/* If we had a locked structure, release it */
 	if (ipc && ipc != SEQ_START_TOKEN)
 		ipc_unlock(ipc);
 
 	ids = iter->ns->ids[iface->ids];
 	/* Release the lock we took in start() */
-	mutex_unlock(&ids->mutex);
+	up_read(&ids->rw_mutex);
 }
 
 static int sysvipc_proc_show(struct seq_file *s, void *it)
diff --git a/ipc/util.h b/ipc/util.h
index 333e891bcac..9ffea40457c 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -10,6 +10,9 @@
 #ifndef _IPC_UTIL_H
 #define _IPC_UTIL_H
 
+#include <linux/idr.h>
+#include <linux/err.h>
+
 #define USHRT_MAX 0xffff
 #define SEQ_MULTIPLIER	(IPCMNI)
 
@@ -25,24 +28,46 @@ void sem_exit_ns(struct ipc_namespace *ns);
 void msg_exit_ns(struct ipc_namespace *ns);
 void shm_exit_ns(struct ipc_namespace *ns);
 
-struct ipc_id_ary {
-	int size;
-	struct kern_ipc_perm *p[0];
-};
-
 struct ipc_ids {
 	int in_use;
-	int max_id;
 	unsigned short seq;
 	unsigned short seq_max;
-	struct mutex mutex;
-	struct ipc_id_ary nullentry;
-	struct ipc_id_ary* entries;
+	struct rw_semaphore rw_mutex;
+	struct idr ipcs_idr;
+};
+
+/*
+ * Structure that holds the parameters needed by the ipc operations
+ * (see after)
+ */
+struct ipc_params {
+	key_t key;
+	int flg;
+	union {
+		size_t size;	/* for shared memories */
+		int nsems;	/* for semaphores */
+	} u;			/* holds the getnew() specific param */
+};
+
+/*
+ * Structure that holds some ipc operations. This structure is used to unify
+ * the calls to sys_msgget(), sys_semget(), sys_shmget()
+ *      . routine to call to create a new ipc object. Can be one of newque,
+ *        newary, newseg
+ *      . routine to call to check permissions for a new ipc object.
+ *        Can be one of security_msg_associate, security_sem_associate,
+ *        security_shm_associate
+ *      . routine to call for an extra check if needed
+ */
+struct ipc_ops {
+	int (*getnew) (struct ipc_namespace *, struct ipc_params *);
+	int (*associate) (struct kern_ipc_perm *, int);
+	int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *);
 };
 
 struct seq_file;
 
-void ipc_init_ids(struct ipc_ids *ids, int size);
+void ipc_init_ids(struct ipc_ids *);
 #ifdef CONFIG_PROC_FS
 void __init ipc_init_proc_interface(const char *path, const char *header,
 		int ids, int (*show)(struct seq_file *, void *));
@@ -54,14 +79,19 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
 #define IPC_MSG_IDS	1
 #define IPC_SHM_IDS	2
 
-/* must be called with ids->mutex acquired.*/
-int ipc_findkey(struct ipc_ids* ids, key_t key);
-int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
+#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
+
+/* must be called with ids->rw_mutex acquired for writing */
+int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
+
+/* must be called with ids->rw_mutex acquired for reading */
+int ipc_get_maxid(struct ipc_ids *);
 
 /* must be called with both locks acquired. */
-struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id);
+void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *);
 
-int ipcperms (struct kern_ipc_perm *ipcp, short flg);
+/* must be called with ipcp locked */
+int ipcperms(struct kern_ipc_perm *ipcp, short flg);
 
 /* for rare, potentially huge allocations.
  * both function can sleep
@@ -79,24 +109,12 @@ void* ipc_rcu_alloc(int size);
 void ipc_rcu_getref(void *ptr);
 void ipc_rcu_putref(void *ptr);
 
-static inline void __ipc_fini_ids(struct ipc_ids *ids,
-		struct ipc_id_ary *entries)
-{
-	if (entries != &ids->nullentry)
-		ipc_rcu_putref(entries);
-}
-
-static inline void ipc_fini_ids(struct ipc_ids *ids)
-{
-	__ipc_fini_ids(ids, ids->entries);
-}
-
-struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id);
-struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id);
-void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp);
-void ipc_unlock(struct kern_ipc_perm* perm);
-int ipc_buildid(struct ipc_ids* ids, int id, int seq);
-int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid);
+/*
+ * ipc_lock_down: called with rw_mutex held
+ * ipc_lock: called without that lock held
+ */
+struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int);
+struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 
 void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
 void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
@@ -111,5 +129,89 @@ int ipc_parse_version (int *cmd);
 extern void free_msg(struct msg_msg *msg);
 extern struct msg_msg *load_msg(const void __user *src, int len);
 extern int store_msg(void __user *dest, struct msg_msg *msg, int len);
+extern int ipcget_new(struct ipc_namespace *, struct ipc_ids *,
+			struct ipc_ops *, struct ipc_params *);
+extern int ipcget_public(struct ipc_namespace *, struct ipc_ids *,
+			struct ipc_ops *, struct ipc_params *);
+
+static inline int ipc_buildid(int id, int seq)
+{
+	return SEQ_MULTIPLIER * seq + id;
+}
+
+/*
+ * Must be called with ipcp locked
+ */
+static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
+{
+	if (uid / SEQ_MULTIPLIER != ipcp->seq)
+		return 1;
+	return 0;
+}
+
+static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
+{
+	rcu_read_lock();
+	spin_lock(&perm->lock);
+}
+
+static inline void ipc_unlock(struct kern_ipc_perm *perm)
+{
+	spin_unlock(&perm->lock);
+	rcu_read_unlock();
+}
+
+static inline struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids,
+						int id)
+{
+	struct kern_ipc_perm *out;
+
+	out = ipc_lock_down(ids, id);
+	if (IS_ERR(out))
+		return out;
+
+	if (ipc_checkid(out, id)) {
+		ipc_unlock(out);
+		return ERR_PTR(-EIDRM);
+	}
+
+	return out;
+}
+
+static inline struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids,
+						int id)
+{
+	struct kern_ipc_perm *out;
+
+	out = ipc_lock(ids, id);
+	if (IS_ERR(out))
+		return out;
+
+	if (ipc_checkid(out, id)) {
+		ipc_unlock(out);
+		return ERR_PTR(-EIDRM);
+	}
+
+	return out;
+}
+
+/**
+ * ipcget - Common sys_*get() code
+ * @ns : namsepace
+ * @ids : IPC identifier set
+ * @ops : operations to be called on ipc object creation, permission checks
+ *        and further checks
+ * @params : the parameters needed by the previous operations.
+ *
+ * Common routine called by sys_msgget(), sys_semget() and sys_shmget().
+ */
+static inline int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
+			struct ipc_ops *ops, struct ipc_params *params)
+{
+	if (params->key == IPC_PRIVATE)
+		return ipcget_new(ns, ids, ops, params);
+	else
+		return ipcget_public(ns, ids, ops, params);
+}
 
 #endif
diff --git a/kernel/Kconfig.instrumentation b/kernel/Kconfig.instrumentation
new file mode 100644
index 00000000000..f5f2c769d95
--- /dev/null
+++ b/kernel/Kconfig.instrumentation
@@ -0,0 +1,49 @@
+menuconfig INSTRUMENTATION
+	bool "Instrumentation Support"
+	default y
+	---help---
+	  Say Y here to get to see options related to performance measurement,
+	  system-wide debugging, and testing. This option alone does not add any
+	  kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled. If you're trying to debug the kernel itself, go see the
+	  Kernel Hacking menu.
+
+if INSTRUMENTATION
+
+config PROFILING
+	bool "Profiling support (EXPERIMENTAL)"
+	help
+	  Say Y here to enable the extended profiling support mechanisms used
+	  by profilers such as OProfile.
+
+config OPROFILE
+	tristate "OProfile system profiling (EXPERIMENTAL)"
+	depends on PROFILING
+	depends on ALPHA || ARM || BLACKFIN || X86_32 || IA64 || M32R || MIPS || PARISC || PPC || S390 || SUPERH || SPARC || X86_64
+	help
+	  OProfile is a profiling system capable of profiling the
+	  whole system, include the kernel, kernel modules, libraries,
+	  and applications.
+
+	  If unsure, say N.
+
+config KPROBES
+	bool "Kprobes"
+	depends on KALLSYMS && MODULES
+	depends on X86_32 || IA64 || PPC || S390 || SPARC64 || X86_64 || AVR32
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.  register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+
+config MARKERS
+	bool "Activate markers"
+	help
+	  Place an empty function call at each marker site. Can be
+	  dynamically changed for a probe function.
+
+endif # INSTRUMENTATION
diff --git a/kernel/Makefile b/kernel/Makefile
index d63fbb18798..79f017e09fb 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,9 +8,10 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
-	    utsname.o sysctl_check.o
+	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o \
+	    utsname.o notifier.o
 
+obj-$(CONFIG_SYSCTL) += sysctl_check.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
@@ -36,7 +37,11 @@ obj-$(CONFIG_PM) += power/
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_CGROUPS) += cgroup.o
+obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
+obj-$(CONFIG_CGROUP_CPUACCT) += cpu_acct.o
+obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
@@ -51,6 +56,7 @@ obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
+obj-$(CONFIG_MARKERS) += marker.o
 
 ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/capability.c b/kernel/capability.c
index cbc5fd60c0f..efbd9cdce13 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
 
 /*
@@ -61,8 +62,8 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
 	spin_lock(&task_capability_lock);
 	read_lock(&tasklist_lock);
 
-	if (pid && pid != current->pid) {
-		target = find_task_by_pid(pid);
+	if (pid && pid != task_pid_vnr(current)) {
+		target = find_task_by_vpid(pid);
 		if (!target) {
 			ret = -ESRCH;
 			goto out;
@@ -95,7 +96,7 @@ static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
 	int found = 0;
 	struct pid *pgrp;
 
-	pgrp = find_pid(pgrp_nr);
+	pgrp = find_vpid(pgrp_nr);
 	do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
 		target = g;
 		while_each_thread(g, target) {
@@ -129,7 +130,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
      int found = 0;
 
      do_each_thread(g, target) {
-             if (target == current || is_init(target))
+             if (target == current || is_container_init(target->group_leader))
                      continue;
              found = 1;
 	     if (security_capset_check(target, effective, inheritable,
@@ -184,7 +185,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	if (get_user(pid, &header->pid))
 		return -EFAULT;
 
-	if (pid && pid != current->pid && !capable(CAP_SETPCAP))
+	if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
 		return -EPERM;
 
 	if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
@@ -195,8 +196,8 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
 	spin_lock(&task_capability_lock);
 	read_lock(&tasklist_lock);
 
-	if (pid > 0 && pid != current->pid) {
-		target = find_task_by_pid(pid);
+	if (pid > 0 && pid != task_pid_vnr(current)) {
+		target = find_task_by_vpid(pid);
 		if (!target) {
 			ret = -ESRCH;
 			goto out;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
new file mode 100644
index 00000000000..5987dccdb2a
--- /dev/null
+++ b/kernel/cgroup.c
@@ -0,0 +1,2805 @@
+/*
+ *  kernel/cgroup.c
+ *
+ *  Generic process-grouping system.
+ *
+ *  Based originally on the cpuset system, extracted by Paul Menage
+ *  Copyright (C) 2006 Google, Inc
+ *
+ *  Copyright notices from the original cpuset code:
+ *  --------------------------------------------------
+ *  Copyright (C) 2003 BULL SA.
+ *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *
+ *  Portions derived from Patrick Mochel's sysfs code.
+ *  sysfs is Copyright (c) 2001-3 Patrick Mochel
+ *
+ *  2003-10-10 Written by Simon Derr.
+ *  2003-10-22 Updates by Stephen Hemminger.
+ *  2004 May-July Rework by Paul Jackson.
+ *  ---------------------------------------------------
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#include <linux/cgroup.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/backing-dev.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/magic.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sort.h>
+#include <linux/kmod.h>
+#include <linux/delayacct.h>
+#include <linux/cgroupstats.h>
+
+#include <asm/atomic.h>
+
+static DEFINE_MUTEX(cgroup_mutex);
+
+/* Generate an array of cgroup subsystem pointers */
+#define SUBSYS(_x) &_x ## _subsys,
+
+static struct cgroup_subsys *subsys[] = {
+#include <linux/cgroup_subsys.h>
+};
+
+/*
+ * A cgroupfs_root represents the root of a cgroup hierarchy,
+ * and may be associated with a superblock to form an active
+ * hierarchy
+ */
+struct cgroupfs_root {
+	struct super_block *sb;
+
+	/*
+	 * The bitmask of subsystems intended to be attached to this
+	 * hierarchy
+	 */
+	unsigned long subsys_bits;
+
+	/* The bitmask of subsystems currently attached to this hierarchy */
+	unsigned long actual_subsys_bits;
+
+	/* A list running through the attached subsystems */
+	struct list_head subsys_list;
+
+	/* The root cgroup for this hierarchy */
+	struct cgroup top_cgroup;
+
+	/* Tracks how many cgroups are currently defined in hierarchy.*/
+	int number_of_cgroups;
+
+	/* A list running through the mounted hierarchies */
+	struct list_head root_list;
+
+	/* Hierarchy-specific flags */
+	unsigned long flags;
+
+	/* The path to use for release notifications. No locking
+	 * between setting and use - so if userspace updates this
+	 * while child cgroups exist, you could miss a
+	 * notification. We ensure that it's always a valid
+	 * NUL-terminated string */
+	char release_agent_path[PATH_MAX];
+};
+
+
+/*
+ * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
+ * subsystems that are otherwise unattached - it never has more than a
+ * single cgroup, and all tasks are part of that cgroup.
+ */
+static struct cgroupfs_root rootnode;
+
+/* The list of hierarchy roots */
+
+static LIST_HEAD(roots);
+static int root_count;
+
+/* dummytop is a shorthand for the dummy hierarchy's top cgroup */
+#define dummytop (&rootnode.top_cgroup)
+
+/* This flag indicates whether tasks in the fork and exit paths should
+ * take callback_mutex and check for fork/exit handlers to call. This
+ * avoids us having to do extra work in the fork/exit path if none of the
+ * subsystems need to be called.
+ */
+static int need_forkexit_callback;
+
+/* bits in struct cgroup flags field */
+enum {
+	/* Control Group is dead */
+	CGRP_REMOVED,
+	/* Control Group has previously had a child cgroup or a task,
+	 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */
+	CGRP_RELEASABLE,
+	/* Control Group requires release notifications to userspace */
+	CGRP_NOTIFY_ON_RELEASE,
+};
+
+/* convenient tests for these bits */
+inline int cgroup_is_removed(const struct cgroup *cgrp)
+{
+	return test_bit(CGRP_REMOVED, &cgrp->flags);
+}
+
+/* bits in struct cgroupfs_root flags field */
+enum {
+	ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
+};
+
+inline int cgroup_is_releasable(const struct cgroup *cgrp)
+{
+	const int bits =
+		(1 << CGRP_RELEASABLE) |
+		(1 << CGRP_NOTIFY_ON_RELEASE);
+	return (cgrp->flags & bits) == bits;
+}
+
+inline int notify_on_release(const struct cgroup *cgrp)
+{
+	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+}
+
+/*
+ * for_each_subsys() allows you to iterate on each subsystem attached to
+ * an active hierarchy
+ */
+#define for_each_subsys(_root, _ss) \
+list_for_each_entry(_ss, &_root->subsys_list, sibling)
+
+/* for_each_root() allows you to iterate across the active hierarchies */
+#define for_each_root(_root) \
+list_for_each_entry(_root, &roots, root_list)
+
+/* the list of cgroups eligible for automatic release. Protected by
+ * release_list_lock */
+static LIST_HEAD(release_list);
+static DEFINE_SPINLOCK(release_list_lock);
+static void cgroup_release_agent(struct work_struct *work);
+static DECLARE_WORK(release_agent_work, cgroup_release_agent);
+static void check_for_release(struct cgroup *cgrp);
+
+/* Link structure for associating css_set objects with cgroups */
+struct cg_cgroup_link {
+	/*
+	 * List running through cg_cgroup_links associated with a
+	 * cgroup, anchored on cgroup->css_sets
+	 */
+	struct list_head cgrp_link_list;
+	/*
+	 * List running through cg_cgroup_links pointing at a
+	 * single css_set object, anchored on css_set->cg_links
+	 */
+	struct list_head cg_link_list;
+	struct css_set *cg;
+};
+
+/* The default css_set - used by init and its children prior to any
+ * hierarchies being mounted. It contains a pointer to the root state
+ * for each subsystem. Also used to anchor the list of css_sets. Not
+ * reference-counted, to improve performance when child cgroups
+ * haven't been created.
+ */
+
+static struct css_set init_css_set;
+static struct cg_cgroup_link init_css_set_link;
+
+/* css_set_lock protects the list of css_set objects, and the
+ * chain of tasks off each css_set.  Nests outside task->alloc_lock
+ * due to cgroup_iter_start() */
+static DEFINE_RWLOCK(css_set_lock);
+static int css_set_count;
+
+/* We don't maintain the lists running through each css_set to its
+ * task until after the first call to cgroup_iter_start(). This
+ * reduces the fork()/exit() overhead for people who have cgroups
+ * compiled into their kernel but not actually in use */
+static int use_task_css_set_links;
+
+/* When we create or destroy a css_set, the operation simply
+ * takes/releases a reference count on all the cgroups referenced
+ * by subsystems in this css_set. This can end up multiple-counting
+ * some cgroups, but that's OK - the ref-count is just a
+ * busy/not-busy indicator; ensuring that we only count each cgroup
+ * once would require taking a global lock to ensure that no
+ * subsystems moved between hierarchies while we were doing so.
+ *
+ * Possible TODO: decide at boot time based on the number of
+ * registered subsystems and the number of CPUs or NUMA nodes whether
+ * it's better for performance to ref-count every subsystem, or to
+ * take a global lock and only add one ref count to each hierarchy.
+ */
+
+/*
+ * unlink a css_set from the list and free it
+ */
+static void unlink_css_set(struct css_set *cg)
+{
+	write_lock(&css_set_lock);
+	list_del(&cg->list);
+	css_set_count--;
+	while (!list_empty(&cg->cg_links)) {
+		struct cg_cgroup_link *link;
+		link = list_entry(cg->cg_links.next,
+				  struct cg_cgroup_link, cg_link_list);
+		list_del(&link->cg_link_list);
+		list_del(&link->cgrp_link_list);
+		kfree(link);
+	}
+	write_unlock(&css_set_lock);
+}
+
+static void __release_css_set(struct kref *k, int taskexit)
+{
+	int i;
+	struct css_set *cg = container_of(k, struct css_set, ref);
+
+	unlink_css_set(cg);
+
+	rcu_read_lock();
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup *cgrp = cg->subsys[i]->cgroup;
+		if (atomic_dec_and_test(&cgrp->count) &&
+		    notify_on_release(cgrp)) {
+			if (taskexit)
+				set_bit(CGRP_RELEASABLE, &cgrp->flags);
+			check_for_release(cgrp);
+		}
+	}
+	rcu_read_unlock();
+	kfree(cg);
+}
+
+static void release_css_set(struct kref *k)
+{
+	__release_css_set(k, 0);
+}
+
+static void release_css_set_taskexit(struct kref *k)
+{
+	__release_css_set(k, 1);
+}
+
+/*
+ * refcounted get/put for css_set objects
+ */
+static inline void get_css_set(struct css_set *cg)
+{
+	kref_get(&cg->ref);
+}
+
+static inline void put_css_set(struct css_set *cg)
+{
+	kref_put(&cg->ref, release_css_set);
+}
+
+static inline void put_css_set_taskexit(struct css_set *cg)
+{
+	kref_put(&cg->ref, release_css_set_taskexit);
+}
+
+/*
+ * find_existing_css_set() is a helper for
+ * find_css_set(), and checks to see whether an existing
+ * css_set is suitable. This currently walks a linked-list for
+ * simplicity; a later patch will use a hash table for better
+ * performance
+ *
+ * oldcg: the cgroup group that we're using before the cgroup
+ * transition
+ *
+ * cgrp: the cgroup that we're moving into
+ *
+ * template: location in which to build the desired set of subsystem
+ * state objects for the new cgroup group
+ */
+
+static struct css_set *find_existing_css_set(
+	struct css_set *oldcg,
+	struct cgroup *cgrp,
+	struct cgroup_subsys_state *template[])
+{
+	int i;
+	struct cgroupfs_root *root = cgrp->root;
+	struct list_head *l = &init_css_set.list;
+
+	/* Built the set of subsystem state objects that we want to
+	 * see in the new css_set */
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		if (root->subsys_bits & (1ull << i)) {
+			/* Subsystem is in this hierarchy. So we want
+			 * the subsystem state from the new
+			 * cgroup */
+			template[i] = cgrp->subsys[i];
+		} else {
+			/* Subsystem is not in this hierarchy, so we
+			 * don't want to change the subsystem state */
+			template[i] = oldcg->subsys[i];
+		}
+	}
+
+	/* Look through existing cgroup groups to find one to reuse */
+	do {
+		struct css_set *cg =
+			list_entry(l, struct css_set, list);
+
+		if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
+			/* All subsystems matched */
+			return cg;
+		}
+		/* Try the next cgroup group */
+		l = l->next;
+	} while (l != &init_css_set.list);
+
+	/* No existing cgroup group matched */
+	return NULL;
+}
+
+/*
+ * allocate_cg_links() allocates "count" cg_cgroup_link structures
+ * and chains them on tmp through their cgrp_link_list fields. Returns 0 on
+ * success or a negative error
+ */
+
+static int allocate_cg_links(int count, struct list_head *tmp)
+{
+	struct cg_cgroup_link *link;
+	int i;
+	INIT_LIST_HEAD(tmp);
+	for (i = 0; i < count; i++) {
+		link = kmalloc(sizeof(*link), GFP_KERNEL);
+		if (!link) {
+			while (!list_empty(tmp)) {
+				link = list_entry(tmp->next,
+						  struct cg_cgroup_link,
+						  cgrp_link_list);
+				list_del(&link->cgrp_link_list);
+				kfree(link);
+			}
+			return -ENOMEM;
+		}
+		list_add(&link->cgrp_link_list, tmp);
+	}
+	return 0;
+}
+
+static void free_cg_links(struct list_head *tmp)
+{
+	while (!list_empty(tmp)) {
+		struct cg_cgroup_link *link;
+		link = list_entry(tmp->next,
+				  struct cg_cgroup_link,
+				  cgrp_link_list);
+		list_del(&link->cgrp_link_list);
+		kfree(link);
+	}
+}
+
+/*
+ * find_css_set() takes an existing cgroup group and a
+ * cgroup object, and returns a css_set object that's
+ * equivalent to the old group, but with the given cgroup
+ * substituted into the appropriate hierarchy. Must be called with
+ * cgroup_mutex held
+ */
+
+static struct css_set *find_css_set(
+	struct css_set *oldcg, struct cgroup *cgrp)
+{
+	struct css_set *res;
+	struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
+	int i;
+
+	struct list_head tmp_cg_links;
+	struct cg_cgroup_link *link;
+
+	/* First see if we already have a cgroup group that matches
+	 * the desired set */
+	write_lock(&css_set_lock);
+	res = find_existing_css_set(oldcg, cgrp, template);
+	if (res)
+		get_css_set(res);
+	write_unlock(&css_set_lock);
+
+	if (res)
+		return res;
+
+	res = kmalloc(sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return NULL;
+
+	/* Allocate all the cg_cgroup_link objects that we'll need */
+	if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
+		kfree(res);
+		return NULL;
+	}
+
+	kref_init(&res->ref);
+	INIT_LIST_HEAD(&res->cg_links);
+	INIT_LIST_HEAD(&res->tasks);
+
+	/* Copy the set of subsystem state objects generated in
+	 * find_existing_css_set() */
+	memcpy(res->subsys, template, sizeof(res->subsys));
+
+	write_lock(&css_set_lock);
+	/* Add reference counts and links from the new css_set. */
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup *cgrp = res->subsys[i]->cgroup;
+		struct cgroup_subsys *ss = subsys[i];
+		atomic_inc(&cgrp->count);
+		/*
+		 * We want to add a link once per cgroup, so we
+		 * only do it for the first subsystem in each
+		 * hierarchy
+		 */
+		if (ss->root->subsys_list.next == &ss->sibling) {
+			BUG_ON(list_empty(&tmp_cg_links));
+			link = list_entry(tmp_cg_links.next,
+					  struct cg_cgroup_link,
+					  cgrp_link_list);
+			list_del(&link->cgrp_link_list);
+			list_add(&link->cgrp_link_list, &cgrp->css_sets);
+			link->cg = res;
+			list_add(&link->cg_link_list, &res->cg_links);
+		}
+	}
+	if (list_empty(&rootnode.subsys_list)) {
+		link = list_entry(tmp_cg_links.next,
+				  struct cg_cgroup_link,
+				  cgrp_link_list);
+		list_del(&link->cgrp_link_list);
+		list_add(&link->cgrp_link_list, &dummytop->css_sets);
+		link->cg = res;
+		list_add(&link->cg_link_list, &res->cg_links);
+	}
+
+	BUG_ON(!list_empty(&tmp_cg_links));
+
+	/* Link this cgroup group into the list */
+	list_add(&res->list, &init_css_set.list);
+	css_set_count++;
+	INIT_LIST_HEAD(&res->tasks);
+	write_unlock(&css_set_lock);
+
+	return res;
+}
+
+/*
+ * There is one global cgroup mutex. We also require taking
+ * task_lock() when dereferencing a task's cgroup subsys pointers.
+ * See "The task_lock() exception", at the end of this comment.
+ *
+ * A task must hold cgroup_mutex to modify cgroups.
+ *
+ * Any task can increment and decrement the count field without lock.
+ * So in general, code holding cgroup_mutex can't rely on the count
+ * field not changing.  However, if the count goes to zero, then only
+ * attach_task() can increment it again.  Because a count of zero
+ * means that no tasks are currently attached, therefore there is no
+ * way a task attached to that cgroup can fork (the other way to
+ * increment the count).  So code holding cgroup_mutex can safely
+ * assume that if the count is zero, it will stay zero. Similarly, if
+ * a task holds cgroup_mutex on a cgroup with zero count, it
+ * knows that the cgroup won't be removed, as cgroup_rmdir()
+ * needs that mutex.
+ *
+ * The cgroup_common_file_write handler for operations that modify
+ * the cgroup hierarchy holds cgroup_mutex across the entire operation,
+ * single threading all such cgroup modifications across the system.
+ *
+ * The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
+ * (usually) take cgroup_mutex.  These are the two most performance
+ * critical pieces of code here.  The exception occurs on cgroup_exit(),
+ * when a task in a notify_on_release cgroup exits.  Then cgroup_mutex
+ * is taken, and if the cgroup count is zero, a usermode call made
+ * to /sbin/cgroup_release_agent with the name of the cgroup (path
+ * relative to the root of cgroup file system) as the argument.
+ *
+ * A cgroup can only be deleted if both its 'count' of using tasks
+ * is zero, and its list of 'children' cgroups is empty.  Since all
+ * tasks in the system use _some_ cgroup, and since there is always at
+ * least one task in the system (init, pid == 1), therefore, top_cgroup
+ * always has either children cgroups and/or using tasks.  So we don't
+ * need a special hack to ensure that top_cgroup cannot be deleted.
+ *
+ *	The task_lock() exception
+ *
+ * The need for this exception arises from the action of
+ * attach_task(), which overwrites one tasks cgroup pointer with
+ * another.  It does so using cgroup_mutexe, however there are
+ * several performance critical places that need to reference
+ * task->cgroup without the expense of grabbing a system global
+ * mutex.  Therefore except as noted below, when dereferencing or, as
+ * in attach_task(), modifying a task'ss cgroup pointer we use
+ * task_lock(), which acts on a spinlock (task->alloc_lock) already in
+ * the task_struct routinely used for such matters.
+ *
+ * P.S.  One more locking exception.  RCU is used to guard the
+ * update of a tasks cgroup pointer by attach_task()
+ */
+
+/**
+ * cgroup_lock - lock out any changes to cgroup structures
+ *
+ */
+
+void cgroup_lock(void)
+{
+	mutex_lock(&cgroup_mutex);
+}
+
+/**
+ * cgroup_unlock - release lock on cgroup changes
+ *
+ * Undo the lock taken in a previous cgroup_lock() call.
+ */
+
+void cgroup_unlock(void)
+{
+	mutex_unlock(&cgroup_mutex);
+}
+
+/*
+ * A couple of forward declarations required, due to cyclic reference loop:
+ * cgroup_mkdir -> cgroup_create -> cgroup_populate_dir ->
+ * cgroup_add_file -> cgroup_create_file -> cgroup_dir_inode_operations
+ * -> cgroup_mkdir.
+ */
+
+static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
+static int cgroup_populate_dir(struct cgroup *cgrp);
+static struct inode_operations cgroup_dir_inode_operations;
+static struct file_operations proc_cgroupstats_operations;
+
+static struct backing_dev_info cgroup_backing_dev_info = {
+	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+};
+
+static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
+{
+	struct inode *inode = new_inode(sb);
+
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = current->fsuid;
+		inode->i_gid = current->fsgid;
+		inode->i_blocks = 0;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
+	}
+	return inode;
+}
+
+static void cgroup_diput(struct dentry *dentry, struct inode *inode)
+{
+	/* is dentry a directory ? if so, kfree() associated cgroup */
+	if (S_ISDIR(inode->i_mode)) {
+		struct cgroup *cgrp = dentry->d_fsdata;
+		BUG_ON(!(cgroup_is_removed(cgrp)));
+		/* It's possible for external users to be holding css
+		 * reference counts on a cgroup; css_put() needs to
+		 * be able to access the cgroup after decrementing
+		 * the reference count in order to know if it needs to
+		 * queue the cgroup to be handled by the release
+		 * agent */
+		synchronize_rcu();
+		kfree(cgrp);
+	}
+	iput(inode);
+}
+
+static void remove_dir(struct dentry *d)
+{
+	struct dentry *parent = dget(d->d_parent);
+
+	d_delete(d);
+	simple_rmdir(parent->d_inode, d);
+	dput(parent);
+}
+
+static void cgroup_clear_directory(struct dentry *dentry)
+{
+	struct list_head *node;
+
+	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+	spin_lock(&dcache_lock);
+	node = dentry->d_subdirs.next;
+	while (node != &dentry->d_subdirs) {
+		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+		list_del_init(node);
+		if (d->d_inode) {
+			/* This should never be called on a cgroup
+			 * directory with child cgroups */
+			BUG_ON(d->d_inode->i_mode & S_IFDIR);
+			d = dget_locked(d);
+			spin_unlock(&dcache_lock);
+			d_delete(d);
+			simple_unlink(dentry->d_inode, d);
+			dput(d);
+			spin_lock(&dcache_lock);
+		}
+		node = dentry->d_subdirs.next;
+	}
+	spin_unlock(&dcache_lock);
+}
+
+/*
+ * NOTE : the dentry must have been dget()'ed
+ */
+static void cgroup_d_remove_dir(struct dentry *dentry)
+{
+	cgroup_clear_directory(dentry);
+
+	spin_lock(&dcache_lock);
+	list_del_init(&dentry->d_u.d_child);
+	spin_unlock(&dcache_lock);
+	remove_dir(dentry);
+}
+
+static int rebind_subsystems(struct cgroupfs_root *root,
+			      unsigned long final_bits)
+{
+	unsigned long added_bits, removed_bits;
+	struct cgroup *cgrp = &root->top_cgroup;
+	int i;
+
+	removed_bits = root->actual_subsys_bits & ~final_bits;
+	added_bits = final_bits & ~root->actual_subsys_bits;
+	/* Check that any added subsystems are currently free */
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		unsigned long long bit = 1ull << i;
+		struct cgroup_subsys *ss = subsys[i];
+		if (!(bit & added_bits))
+			continue;
+		if (ss->root != &rootnode) {
+			/* Subsystem isn't free */
+			return -EBUSY;
+		}
+	}
+
+	/* Currently we don't handle adding/removing subsystems when
+	 * any child cgroups exist. This is theoretically supportable
+	 * but involves complex error handling, so it's being left until
+	 * later */
+	if (!list_empty(&cgrp->children))
+		return -EBUSY;
+
+	/* Process each subsystem */
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		unsigned long bit = 1UL << i;
+		if (bit & added_bits) {
+			/* We're binding this subsystem to this hierarchy */
+			BUG_ON(cgrp->subsys[i]);
+			BUG_ON(!dummytop->subsys[i]);
+			BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
+			cgrp->subsys[i] = dummytop->subsys[i];
+			cgrp->subsys[i]->cgroup = cgrp;
+			list_add(&ss->sibling, &root->subsys_list);
+			rcu_assign_pointer(ss->root, root);
+			if (ss->bind)
+				ss->bind(ss, cgrp);
+
+		} else if (bit & removed_bits) {
+			/* We're removing this subsystem */
+			BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
+			BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
+			if (ss->bind)
+				ss->bind(ss, dummytop);
+			dummytop->subsys[i]->cgroup = dummytop;
+			cgrp->subsys[i] = NULL;
+			rcu_assign_pointer(subsys[i]->root, &rootnode);
+			list_del(&ss->sibling);
+		} else if (bit & final_bits) {
+			/* Subsystem state should already exist */
+			BUG_ON(!cgrp->subsys[i]);
+		} else {
+			/* Subsystem state shouldn't exist */
+			BUG_ON(cgrp->subsys[i]);
+		}
+	}
+	root->subsys_bits = root->actual_subsys_bits = final_bits;
+	synchronize_rcu();
+
+	return 0;
+}
+
+static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+	struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
+	struct cgroup_subsys *ss;
+
+	mutex_lock(&cgroup_mutex);
+	for_each_subsys(root, ss)
+		seq_printf(seq, ",%s", ss->name);
+	if (test_bit(ROOT_NOPREFIX, &root->flags))
+		seq_puts(seq, ",noprefix");
+	if (strlen(root->release_agent_path))
+		seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
+struct cgroup_sb_opts {
+	unsigned long subsys_bits;
+	unsigned long flags;
+	char *release_agent;
+};
+
+/* Convert a hierarchy specifier into a bitmask of subsystems and
+ * flags. */
+static int parse_cgroupfs_options(char *data,
+				     struct cgroup_sb_opts *opts)
+{
+	char *token, *o = data ?: "all";
+
+	opts->subsys_bits = 0;
+	opts->flags = 0;
+	opts->release_agent = NULL;
+
+	while ((token = strsep(&o, ",")) != NULL) {
+		if (!*token)
+			return -EINVAL;
+		if (!strcmp(token, "all")) {
+			opts->subsys_bits = (1 << CGROUP_SUBSYS_COUNT) - 1;
+		} else if (!strcmp(token, "noprefix")) {
+			set_bit(ROOT_NOPREFIX, &opts->flags);
+		} else if (!strncmp(token, "release_agent=", 14)) {
+			/* Specifying two release agents is forbidden */
+			if (opts->release_agent)
+				return -EINVAL;
+			opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
+			if (!opts->release_agent)
+				return -ENOMEM;
+			strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
+			opts->release_agent[PATH_MAX - 1] = 0;
+		} else {
+			struct cgroup_subsys *ss;
+			int i;
+			for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+				ss = subsys[i];
+				if (!strcmp(token, ss->name)) {
+					set_bit(i, &opts->subsys_bits);
+					break;
+				}
+			}
+			if (i == CGROUP_SUBSYS_COUNT)
+				return -ENOENT;
+		}
+	}
+
+	/* We can't have an empty hierarchy */
+	if (!opts->subsys_bits)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int cgroup_remount(struct super_block *sb, int *flags, char *data)
+{
+	int ret = 0;
+	struct cgroupfs_root *root = sb->s_fs_info;
+	struct cgroup *cgrp = &root->top_cgroup;
+	struct cgroup_sb_opts opts;
+
+	mutex_lock(&cgrp->dentry->d_inode->i_mutex);
+	mutex_lock(&cgroup_mutex);
+
+	/* See what subsystems are wanted */
+	ret = parse_cgroupfs_options(data, &opts);
+	if (ret)
+		goto out_unlock;
+
+	/* Don't allow flags to change at remount */
+	if (opts.flags != root->flags) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	ret = rebind_subsystems(root, opts.subsys_bits);
+
+	/* (re)populate subsystem files */
+	if (!ret)
+		cgroup_populate_dir(cgrp);
+
+	if (opts.release_agent)
+		strcpy(root->release_agent_path, opts.release_agent);
+ out_unlock:
+	if (opts.release_agent)
+		kfree(opts.release_agent);
+	mutex_unlock(&cgroup_mutex);
+	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+	return ret;
+}
+
+static struct super_operations cgroup_ops = {
+	.statfs = simple_statfs,
+	.drop_inode = generic_delete_inode,
+	.show_options = cgroup_show_options,
+	.remount_fs = cgroup_remount,
+};
+
+static void init_cgroup_root(struct cgroupfs_root *root)
+{
+	struct cgroup *cgrp = &root->top_cgroup;
+	INIT_LIST_HEAD(&root->subsys_list);
+	INIT_LIST_HEAD(&root->root_list);
+	root->number_of_cgroups = 1;
+	cgrp->root = root;
+	cgrp->top_cgroup = cgrp;
+	INIT_LIST_HEAD(&cgrp->sibling);
+	INIT_LIST_HEAD(&cgrp->children);
+	INIT_LIST_HEAD(&cgrp->css_sets);
+	INIT_LIST_HEAD(&cgrp->release_list);
+}
+
+static int cgroup_test_super(struct super_block *sb, void *data)
+{
+	struct cgroupfs_root *new = data;
+	struct cgroupfs_root *root = sb->s_fs_info;
+
+	/* First check subsystems */
+	if (new->subsys_bits != root->subsys_bits)
+	    return 0;
+
+	/* Next check flags */
+	if (new->flags != root->flags)
+		return 0;
+
+	return 1;
+}
+
+static int cgroup_set_super(struct super_block *sb, void *data)
+{
+	int ret;
+	struct cgroupfs_root *root = data;
+
+	ret = set_anon_super(sb, NULL);
+	if (ret)
+		return ret;
+
+	sb->s_fs_info = root;
+	root->sb = sb;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = CGROUP_SUPER_MAGIC;
+	sb->s_op = &cgroup_ops;
+
+	return 0;
+}
+
+static int cgroup_get_rootdir(struct super_block *sb)
+{
+	struct inode *inode =
+		cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
+	struct dentry *dentry;
+
+	if (!inode)
+		return -ENOMEM;
+
+	inode->i_op = &simple_dir_inode_operations;
+	inode->i_fop = &simple_dir_operations;
+	inode->i_op = &cgroup_dir_inode_operations;
+	/* directories start off with i_nlink == 2 (for "." entry) */
+	inc_nlink(inode);
+	dentry = d_alloc_root(inode);
+	if (!dentry) {
+		iput(inode);
+		return -ENOMEM;
+	}
+	sb->s_root = dentry;
+	return 0;
+}
+
+static int cgroup_get_sb(struct file_system_type *fs_type,
+			 int flags, const char *unused_dev_name,
+			 void *data, struct vfsmount *mnt)
+{
+	struct cgroup_sb_opts opts;
+	int ret = 0;
+	struct super_block *sb;
+	struct cgroupfs_root *root;
+	struct list_head tmp_cg_links, *l;
+	INIT_LIST_HEAD(&tmp_cg_links);
+
+	/* First find the desired set of subsystems */
+	ret = parse_cgroupfs_options(data, &opts);
+	if (ret) {
+		if (opts.release_agent)
+			kfree(opts.release_agent);
+		return ret;
+	}
+
+	root = kzalloc(sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return -ENOMEM;
+
+	init_cgroup_root(root);
+	root->subsys_bits = opts.subsys_bits;
+	root->flags = opts.flags;
+	if (opts.release_agent) {
+		strcpy(root->release_agent_path, opts.release_agent);
+		kfree(opts.release_agent);
+	}
+
+	sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
+
+	if (IS_ERR(sb)) {
+		kfree(root);
+		return PTR_ERR(sb);
+	}
+
+	if (sb->s_fs_info != root) {
+		/* Reusing an existing superblock */
+		BUG_ON(sb->s_root == NULL);
+		kfree(root);
+		root = NULL;
+	} else {
+		/* New superblock */
+		struct cgroup *cgrp = &root->top_cgroup;
+		struct inode *inode;
+
+		BUG_ON(sb->s_root != NULL);
+
+		ret = cgroup_get_rootdir(sb);
+		if (ret)
+			goto drop_new_super;
+		inode = sb->s_root->d_inode;
+
+		mutex_lock(&inode->i_mutex);
+		mutex_lock(&cgroup_mutex);
+
+		/*
+		 * We're accessing css_set_count without locking
+		 * css_set_lock here, but that's OK - it can only be
+		 * increased by someone holding cgroup_lock, and
+		 * that's us. The worst that can happen is that we
+		 * have some link structures left over
+		 */
+		ret = allocate_cg_links(css_set_count, &tmp_cg_links);
+		if (ret) {
+			mutex_unlock(&cgroup_mutex);
+			mutex_unlock(&inode->i_mutex);
+			goto drop_new_super;
+		}
+
+		ret = rebind_subsystems(root, root->subsys_bits);
+		if (ret == -EBUSY) {
+			mutex_unlock(&cgroup_mutex);
+			mutex_unlock(&inode->i_mutex);
+			goto drop_new_super;
+		}
+
+		/* EBUSY should be the only error here */
+		BUG_ON(ret);
+
+		list_add(&root->root_list, &roots);
+		root_count++;
+
+		sb->s_root->d_fsdata = &root->top_cgroup;
+		root->top_cgroup.dentry = sb->s_root;
+
+		/* Link the top cgroup in this hierarchy into all
+		 * the css_set objects */
+		write_lock(&css_set_lock);
+		l = &init_css_set.list;
+		do {
+			struct css_set *cg;
+			struct cg_cgroup_link *link;
+			cg = list_entry(l, struct css_set, list);
+			BUG_ON(list_empty(&tmp_cg_links));
+			link = list_entry(tmp_cg_links.next,
+					  struct cg_cgroup_link,
+					  cgrp_link_list);
+			list_del(&link->cgrp_link_list);
+			link->cg = cg;
+			list_add(&link->cgrp_link_list,
+				 &root->top_cgroup.css_sets);
+			list_add(&link->cg_link_list, &cg->cg_links);
+			l = l->next;
+		} while (l != &init_css_set.list);
+		write_unlock(&css_set_lock);
+
+		free_cg_links(&tmp_cg_links);
+
+		BUG_ON(!list_empty(&cgrp->sibling));
+		BUG_ON(!list_empty(&cgrp->children));
+		BUG_ON(root->number_of_cgroups != 1);
+
+		cgroup_populate_dir(cgrp);
+		mutex_unlock(&inode->i_mutex);
+		mutex_unlock(&cgroup_mutex);
+	}
+
+	return simple_set_mnt(mnt, sb);
+
+ drop_new_super:
+	up_write(&sb->s_umount);
+	deactivate_super(sb);
+	free_cg_links(&tmp_cg_links);
+	return ret;
+}
+
+static void cgroup_kill_sb(struct super_block *sb) {
+	struct cgroupfs_root *root = sb->s_fs_info;
+	struct cgroup *cgrp = &root->top_cgroup;
+	int ret;
+
+	BUG_ON(!root);
+
+	BUG_ON(root->number_of_cgroups != 1);
+	BUG_ON(!list_empty(&cgrp->children));
+	BUG_ON(!list_empty(&cgrp->sibling));
+
+	mutex_lock(&cgroup_mutex);
+
+	/* Rebind all subsystems back to the default hierarchy */
+	ret = rebind_subsystems(root, 0);
+	/* Shouldn't be able to fail ... */
+	BUG_ON(ret);
+
+	/*
+	 * Release all the links from css_sets to this hierarchy's
+	 * root cgroup
+	 */
+	write_lock(&css_set_lock);
+	while (!list_empty(&cgrp->css_sets)) {
+		struct cg_cgroup_link *link;
+		link = list_entry(cgrp->css_sets.next,
+				  struct cg_cgroup_link, cgrp_link_list);
+		list_del(&link->cg_link_list);
+		list_del(&link->cgrp_link_list);
+		kfree(link);
+	}
+	write_unlock(&css_set_lock);
+
+	if (!list_empty(&root->root_list)) {
+		list_del(&root->root_list);
+		root_count--;
+	}
+	mutex_unlock(&cgroup_mutex);
+
+	kfree(root);
+	kill_litter_super(sb);
+}
+
+static struct file_system_type cgroup_fs_type = {
+	.name = "cgroup",
+	.get_sb = cgroup_get_sb,
+	.kill_sb = cgroup_kill_sb,
+};
+
+static inline struct cgroup *__d_cgrp(struct dentry *dentry)
+{
+	return dentry->d_fsdata;
+}
+
+static inline struct cftype *__d_cft(struct dentry *dentry)
+{
+	return dentry->d_fsdata;
+}
+
+/*
+ * Called with cgroup_mutex held.  Writes path of cgroup into buf.
+ * Returns 0 on success, -errno on error.
+ */
+int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
+{
+	char *start;
+
+	if (cgrp == dummytop) {
+		/*
+		 * Inactive subsystems have no dentry for their root
+		 * cgroup
+		 */
+		strcpy(buf, "/");
+		return 0;
+	}
+
+	start = buf + buflen;
+
+	*--start = '\0';
+	for (;;) {
+		int len = cgrp->dentry->d_name.len;
+		if ((start -= len) < buf)
+			return -ENAMETOOLONG;
+		memcpy(start, cgrp->dentry->d_name.name, len);
+		cgrp = cgrp->parent;
+		if (!cgrp)
+			break;
+		if (!cgrp->parent)
+			continue;
+		if (--start < buf)
+			return -ENAMETOOLONG;
+		*start = '/';
+	}
+	memmove(buf, start, buf + buflen - start);
+	return 0;
+}
+
+/*
+ * Return the first subsystem attached to a cgroup's hierarchy, and
+ * its subsystem id.
+ */
+
+static void get_first_subsys(const struct cgroup *cgrp,
+			struct cgroup_subsys_state **css, int *subsys_id)
+{
+	const struct cgroupfs_root *root = cgrp->root;
+	const struct cgroup_subsys *test_ss;
+	BUG_ON(list_empty(&root->subsys_list));
+	test_ss = list_entry(root->subsys_list.next,
+			     struct cgroup_subsys, sibling);
+	if (css) {
+		*css = cgrp->subsys[test_ss->subsys_id];
+		BUG_ON(!*css);
+	}
+	if (subsys_id)
+		*subsys_id = test_ss->subsys_id;
+}
+
+/*
+ * Attach task 'tsk' to cgroup 'cgrp'
+ *
+ * Call holding cgroup_mutex.  May take task_lock of
+ * the task 'pid' during call.
+ */
+static int attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+{
+	int retval = 0;
+	struct cgroup_subsys *ss;
+	struct cgroup *oldcgrp;
+	struct css_set *cg = tsk->cgroups;
+	struct css_set *newcg;
+	struct cgroupfs_root *root = cgrp->root;
+	int subsys_id;
+
+	get_first_subsys(cgrp, NULL, &subsys_id);
+
+	/* Nothing to do if the task is already in that cgroup */
+	oldcgrp = task_cgroup(tsk, subsys_id);
+	if (cgrp == oldcgrp)
+		return 0;
+
+	for_each_subsys(root, ss) {
+		if (ss->can_attach) {
+			retval = ss->can_attach(ss, cgrp, tsk);
+			if (retval) {
+				return retval;
+			}
+		}
+	}
+
+	/*
+	 * Locate or allocate a new css_set for this task,
+	 * based on its final set of cgroups
+	 */
+	newcg = find_css_set(cg, cgrp);
+	if (!newcg) {
+		return -ENOMEM;
+	}
+
+	task_lock(tsk);
+	if (tsk->flags & PF_EXITING) {
+		task_unlock(tsk);
+		put_css_set(newcg);
+		return -ESRCH;
+	}
+	rcu_assign_pointer(tsk->cgroups, newcg);
+	task_unlock(tsk);
+
+	/* Update the css_set linked lists if we're using them */
+	write_lock(&css_set_lock);
+	if (!list_empty(&tsk->cg_list)) {
+		list_del(&tsk->cg_list);
+		list_add(&tsk->cg_list, &newcg->tasks);
+	}
+	write_unlock(&css_set_lock);
+
+	for_each_subsys(root, ss) {
+		if (ss->attach) {
+			ss->attach(ss, cgrp, oldcgrp, tsk);
+		}
+	}
+	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
+	synchronize_rcu();
+	put_css_set(cg);
+	return 0;
+}
+
+/*
+ * Attach task with pid 'pid' to cgroup 'cgrp'. Call with
+ * cgroup_mutex, may take task_lock of task
+ */
+static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
+{
+	pid_t pid;
+	struct task_struct *tsk;
+	int ret;
+
+	if (sscanf(pidbuf, "%d", &pid) != 1)
+		return -EIO;
+
+	if (pid) {
+		rcu_read_lock();
+		tsk = find_task_by_pid(pid);
+		if (!tsk || tsk->flags & PF_EXITING) {
+			rcu_read_unlock();
+			return -ESRCH;
+		}
+		get_task_struct(tsk);
+		rcu_read_unlock();
+
+		if ((current->euid) && (current->euid != tsk->uid)
+		    && (current->euid != tsk->suid)) {
+			put_task_struct(tsk);
+			return -EACCES;
+		}
+	} else {
+		tsk = current;
+		get_task_struct(tsk);
+	}
+
+	ret = attach_task(cgrp, tsk);
+	put_task_struct(tsk);
+	return ret;
+}
+
+/* The various types of files and directories in a cgroup file system */
+
+enum cgroup_filetype {
+	FILE_ROOT,
+	FILE_DIR,
+	FILE_TASKLIST,
+	FILE_NOTIFY_ON_RELEASE,
+	FILE_RELEASABLE,
+	FILE_RELEASE_AGENT,
+};
+
+static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft,
+				 struct file *file,
+				 const char __user *userbuf,
+				 size_t nbytes, loff_t *unused_ppos)
+{
+	char buffer[64];
+	int retval = 0;
+	u64 val;
+	char *end;
+
+	if (!nbytes)
+		return -EINVAL;
+	if (nbytes >= sizeof(buffer))
+		return -E2BIG;
+	if (copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;     /* nul-terminate */
+
+	/* strip newline if necessary */
+	if (nbytes && (buffer[nbytes-1] == '\n'))
+		buffer[nbytes-1] = 0;
+	val = simple_strtoull(buffer, &end, 0);
+	if (*end)
+		return -EINVAL;
+
+	/* Pass to subsystem */
+	retval = cft->write_uint(cgrp, cft, val);
+	if (!retval)
+		retval = nbytes;
+	return retval;
+}
+
+static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
+					   struct cftype *cft,
+					   struct file *file,
+					   const char __user *userbuf,
+					   size_t nbytes, loff_t *unused_ppos)
+{
+	enum cgroup_filetype type = cft->private;
+	char *buffer;
+	int retval = 0;
+
+	if (nbytes >= PATH_MAX)
+		return -E2BIG;
+
+	/* +1 for nul-terminator */
+	buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+	if (buffer == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buffer, userbuf, nbytes)) {
+		retval = -EFAULT;
+		goto out1;
+	}
+	buffer[nbytes] = 0;	/* nul-terminate */
+
+	mutex_lock(&cgroup_mutex);
+
+	if (cgroup_is_removed(cgrp)) {
+		retval = -ENODEV;
+		goto out2;
+	}
+
+	switch (type) {
+	case FILE_TASKLIST:
+		retval = attach_task_by_pid(cgrp, buffer);
+		break;
+	case FILE_NOTIFY_ON_RELEASE:
+		clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+		if (simple_strtoul(buffer, NULL, 10) != 0)
+			set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+		else
+			clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+		break;
+	case FILE_RELEASE_AGENT:
+	{
+		struct cgroupfs_root *root = cgrp->root;
+		/* Strip trailing newline */
+		if (nbytes && (buffer[nbytes-1] == '\n')) {
+			buffer[nbytes-1] = 0;
+		}
+		if (nbytes < sizeof(root->release_agent_path)) {
+			/* We never write anything other than '\0'
+			 * into the last char of release_agent_path,
+			 * so it always remains a NUL-terminated
+			 * string */
+			strncpy(root->release_agent_path, buffer, nbytes);
+			root->release_agent_path[nbytes] = 0;
+		} else {
+			retval = -ENOSPC;
+		}
+		break;
+	}
+	default:
+		retval = -EINVAL;
+		goto out2;
+	}
+
+	if (retval == 0)
+		retval = nbytes;
+out2:
+	mutex_unlock(&cgroup_mutex);
+out1:
+	kfree(buffer);
+	return retval;
+}
+
+static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
+						size_t nbytes, loff_t *ppos)
+{
+	struct cftype *cft = __d_cft(file->f_dentry);
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+	if (!cft)
+		return -ENODEV;
+	if (cft->write)
+		return cft->write(cgrp, cft, file, buf, nbytes, ppos);
+	if (cft->write_uint)
+		return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos);
+	return -EINVAL;
+}
+
+static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft,
+				   struct file *file,
+				   char __user *buf, size_t nbytes,
+				   loff_t *ppos)
+{
+	char tmp[64];
+	u64 val = cft->read_uint(cgrp, cft);
+	int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
+
+	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
+}
+
+static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
+					  struct cftype *cft,
+					  struct file *file,
+					  char __user *buf,
+					  size_t nbytes, loff_t *ppos)
+{
+	enum cgroup_filetype type = cft->private;
+	char *page;
+	ssize_t retval = 0;
+	char *s;
+
+	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
+		return -ENOMEM;
+
+	s = page;
+
+	switch (type) {
+	case FILE_RELEASE_AGENT:
+	{
+		struct cgroupfs_root *root;
+		size_t n;
+		mutex_lock(&cgroup_mutex);
+		root = cgrp->root;
+		n = strnlen(root->release_agent_path,
+			    sizeof(root->release_agent_path));
+		n = min(n, (size_t) PAGE_SIZE);
+		strncpy(s, root->release_agent_path, n);
+		mutex_unlock(&cgroup_mutex);
+		s += n;
+		break;
+	}
+	default:
+		retval = -EINVAL;
+		goto out;
+	}
+	*s++ = '\n';
+
+	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
+out:
+	free_page((unsigned long)page);
+	return retval;
+}
+
+static ssize_t cgroup_file_read(struct file *file, char __user *buf,
+				   size_t nbytes, loff_t *ppos)
+{
+	struct cftype *cft = __d_cft(file->f_dentry);
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+
+	if (!cft)
+		return -ENODEV;
+
+	if (cft->read)
+		return cft->read(cgrp, cft, file, buf, nbytes, ppos);
+	if (cft->read_uint)
+		return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos);
+	return -EINVAL;
+}
+
+static int cgroup_file_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct cftype *cft;
+
+	err = generic_file_open(inode, file);
+	if (err)
+		return err;
+
+	cft = __d_cft(file->f_dentry);
+	if (!cft)
+		return -ENODEV;
+	if (cft->open)
+		err = cft->open(inode, file);
+	else
+		err = 0;
+
+	return err;
+}
+
+static int cgroup_file_release(struct inode *inode, struct file *file)
+{
+	struct cftype *cft = __d_cft(file->f_dentry);
+	if (cft->release)
+		return cft->release(inode, file);
+	return 0;
+}
+
+/*
+ * cgroup_rename - Only allow simple rename of directories in place.
+ */
+static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
+			    struct inode *new_dir, struct dentry *new_dentry)
+{
+	if (!S_ISDIR(old_dentry->d_inode->i_mode))
+		return -ENOTDIR;
+	if (new_dentry->d_inode)
+		return -EEXIST;
+	if (old_dir != new_dir)
+		return -EIO;
+	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
+static struct file_operations cgroup_file_operations = {
+	.read = cgroup_file_read,
+	.write = cgroup_file_write,
+	.llseek = generic_file_llseek,
+	.open = cgroup_file_open,
+	.release = cgroup_file_release,
+};
+
+static struct inode_operations cgroup_dir_inode_operations = {
+	.lookup = simple_lookup,
+	.mkdir = cgroup_mkdir,
+	.rmdir = cgroup_rmdir,
+	.rename = cgroup_rename,
+};
+
+static int cgroup_create_file(struct dentry *dentry, int mode,
+				struct super_block *sb)
+{
+	static struct dentry_operations cgroup_dops = {
+		.d_iput = cgroup_diput,
+	};
+
+	struct inode *inode;
+
+	if (!dentry)
+		return -ENOENT;
+	if (dentry->d_inode)
+		return -EEXIST;
+
+	inode = cgroup_new_inode(mode, sb);
+	if (!inode)
+		return -ENOMEM;
+
+	if (S_ISDIR(mode)) {
+		inode->i_op = &cgroup_dir_inode_operations;
+		inode->i_fop = &simple_dir_operations;
+
+		/* start off with i_nlink == 2 (for "." entry) */
+		inc_nlink(inode);
+
+		/* start with the directory inode held, so that we can
+		 * populate it without racing with another mkdir */
+		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+	} else if (S_ISREG(mode)) {
+		inode->i_size = 0;
+		inode->i_fop = &cgroup_file_operations;
+	}
+	dentry->d_op = &cgroup_dops;
+	d_instantiate(dentry, inode);
+	dget(dentry);	/* Extra count - pin the dentry in core */
+	return 0;
+}
+
+/*
+ *	cgroup_create_dir - create a directory for an object.
+ *	cgrp:	the cgroup we create the directory for.
+ *		It must have a valid ->parent field
+ *		And we are going to fill its ->dentry field.
+ *	dentry: dentry of the new cgroup
+ *	mode:	mode to set on new directory.
+ */
+static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
+				int mode)
+{
+	struct dentry *parent;
+	int error = 0;
+
+	parent = cgrp->parent->dentry;
+	error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
+	if (!error) {
+		dentry->d_fsdata = cgrp;
+		inc_nlink(parent->d_inode);
+		cgrp->dentry = dentry;
+		dget(dentry);
+	}
+	dput(dentry);
+
+	return error;
+}
+
+int cgroup_add_file(struct cgroup *cgrp,
+		       struct cgroup_subsys *subsys,
+		       const struct cftype *cft)
+{
+	struct dentry *dir = cgrp->dentry;
+	struct dentry *dentry;
+	int error;
+
+	char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
+	if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
+		strcpy(name, subsys->name);
+		strcat(name, ".");
+	}
+	strcat(name, cft->name);
+	BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
+	dentry = lookup_one_len(name, dir, strlen(name));
+	if (!IS_ERR(dentry)) {
+		error = cgroup_create_file(dentry, 0644 | S_IFREG,
+						cgrp->root->sb);
+		if (!error)
+			dentry->d_fsdata = (void *)cft;
+		dput(dentry);
+	} else
+		error = PTR_ERR(dentry);
+	return error;
+}
+
+int cgroup_add_files(struct cgroup *cgrp,
+			struct cgroup_subsys *subsys,
+			const struct cftype cft[],
+			int count)
+{
+	int i, err;
+	for (i = 0; i < count; i++) {
+		err = cgroup_add_file(cgrp, subsys, &cft[i]);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/* Count the number of tasks in a cgroup. */
+
+int cgroup_task_count(const struct cgroup *cgrp)
+{
+	int count = 0;
+	struct list_head *l;
+
+	read_lock(&css_set_lock);
+	l = cgrp->css_sets.next;
+	while (l != &cgrp->css_sets) {
+		struct cg_cgroup_link *link =
+			list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+		count += atomic_read(&link->cg->ref.refcount);
+		l = l->next;
+	}
+	read_unlock(&css_set_lock);
+	return count;
+}
+
+/*
+ * Advance a list_head iterator.  The iterator should be positioned at
+ * the start of a css_set
+ */
+static void cgroup_advance_iter(struct cgroup *cgrp,
+					  struct cgroup_iter *it)
+{
+	struct list_head *l = it->cg_link;
+	struct cg_cgroup_link *link;
+	struct css_set *cg;
+
+	/* Advance to the next non-empty css_set */
+	do {
+		l = l->next;
+		if (l == &cgrp->css_sets) {
+			it->cg_link = NULL;
+			return;
+		}
+		link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
+		cg = link->cg;
+	} while (list_empty(&cg->tasks));
+	it->cg_link = l;
+	it->task = cg->tasks.next;
+}
+
+void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
+{
+	/*
+	 * The first time anyone tries to iterate across a cgroup,
+	 * we need to enable the list linking each css_set to its
+	 * tasks, and fix up all existing tasks.
+	 */
+	if (!use_task_css_set_links) {
+		struct task_struct *p, *g;
+		write_lock(&css_set_lock);
+		use_task_css_set_links = 1;
+ 		do_each_thread(g, p) {
+			task_lock(p);
+			if (list_empty(&p->cg_list))
+				list_add(&p->cg_list, &p->cgroups->tasks);
+			task_unlock(p);
+ 		} while_each_thread(g, p);
+		write_unlock(&css_set_lock);
+	}
+	read_lock(&css_set_lock);
+	it->cg_link = &cgrp->css_sets;
+	cgroup_advance_iter(cgrp, it);
+}
+
+struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
+					struct cgroup_iter *it)
+{
+	struct task_struct *res;
+	struct list_head *l = it->task;
+
+	/* If the iterator cg is NULL, we have no tasks */
+	if (!it->cg_link)
+		return NULL;
+	res = list_entry(l, struct task_struct, cg_list);
+	/* Advance iterator to find next entry */
+	l = l->next;
+	if (l == &res->cgroups->tasks) {
+		/* We reached the end of this task list - move on to
+		 * the next cg_cgroup_link */
+		cgroup_advance_iter(cgrp, it);
+	} else {
+		it->task = l;
+	}
+	return res;
+}
+
+void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
+{
+	read_unlock(&css_set_lock);
+}
+
+/*
+ * Stuff for reading the 'tasks' file.
+ *
+ * Reading this file can return large amounts of data if a cgroup has
+ * *lots* of attached tasks. So it may need several calls to read(),
+ * but we cannot guarantee that the information we produce is correct
+ * unless we produce it entirely atomically.
+ *
+ * Upon tasks file open(), a struct ctr_struct is allocated, that
+ * will have a pointer to an array (also allocated here).  The struct
+ * ctr_struct * is stored in file->private_data.  Its resources will
+ * be freed by release() when the file is closed.  The array is used
+ * to sprintf the PIDs and then used by read().
+ */
+struct ctr_struct {
+	char *buf;
+	int bufsz;
+};
+
+/*
+ * Load into 'pidarray' up to 'npids' of the tasks using cgroup
+ * 'cgrp'.  Return actual number of pids loaded.  No need to
+ * task_lock(p) when reading out p->cgroup, since we're in an RCU
+ * read section, so the css_set can't go away, and is
+ * immutable after creation.
+ */
+static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+{
+	int n = 0;
+	struct cgroup_iter it;
+	struct task_struct *tsk;
+	cgroup_iter_start(cgrp, &it);
+	while ((tsk = cgroup_iter_next(cgrp, &it))) {
+		if (unlikely(n == npids))
+			break;
+		pidarray[n++] = task_pid_nr(tsk);
+	}
+	cgroup_iter_end(cgrp, &it);
+	return n;
+}
+
+/**
+ * Build and fill cgroupstats so that taskstats can export it to user
+ * space.
+ *
+ * @stats: cgroupstats to fill information into
+ * @dentry: A dentry entry belonging to the cgroup for which stats have
+ * been requested.
+ */
+int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
+{
+	int ret = -EINVAL;
+	struct cgroup *cgrp;
+	struct cgroup_iter it;
+	struct task_struct *tsk;
+	/*
+	 * Validate dentry by checking the superblock operations
+	 */
+	if (dentry->d_sb->s_op != &cgroup_ops)
+		 goto err;
+
+	ret = 0;
+	cgrp = dentry->d_fsdata;
+	rcu_read_lock();
+
+	cgroup_iter_start(cgrp, &it);
+	while ((tsk = cgroup_iter_next(cgrp, &it))) {
+		switch (tsk->state) {
+		case TASK_RUNNING:
+			stats->nr_running++;
+			break;
+		case TASK_INTERRUPTIBLE:
+			stats->nr_sleeping++;
+			break;
+		case TASK_UNINTERRUPTIBLE:
+			stats->nr_uninterruptible++;
+			break;
+		case TASK_STOPPED:
+			stats->nr_stopped++;
+			break;
+		default:
+			if (delayacct_is_task_waiting_on_io(tsk))
+				stats->nr_io_wait++;
+			break;
+		}
+	}
+	cgroup_iter_end(cgrp, &it);
+
+	rcu_read_unlock();
+err:
+	return ret;
+}
+
+static int cmppid(const void *a, const void *b)
+{
+	return *(pid_t *)a - *(pid_t *)b;
+}
+
+/*
+ * Convert array 'a' of 'npids' pid_t's to a string of newline separated
+ * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
+ * count 'cnt' of how many chars would be written if buf were large enough.
+ */
+static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+{
+	int cnt = 0;
+	int i;
+
+	for (i = 0; i < npids; i++)
+		cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
+	return cnt;
+}
+
+/*
+ * Handle an open on 'tasks' file.  Prepare a buffer listing the
+ * process id's of tasks currently attached to the cgroup being opened.
+ *
+ * Does not require any specific cgroup mutexes, and does not take any.
+ */
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
+{
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+	struct ctr_struct *ctr;
+	pid_t *pidarray;
+	int npids;
+	char c;
+
+	if (!(file->f_mode & FMODE_READ))
+		return 0;
+
+	ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
+	if (!ctr)
+		goto err0;
+
+	/*
+	 * If cgroup gets more users after we read count, we won't have
+	 * enough space - tough.  This race is indistinguishable to the
+	 * caller from the case that the additional cgroup users didn't
+	 * show up until sometime later on.
+	 */
+	npids = cgroup_task_count(cgrp);
+	if (npids) {
+		pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+		if (!pidarray)
+			goto err1;
+
+		npids = pid_array_load(pidarray, npids, cgrp);
+		sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
+
+		/* Call pid_array_to_buf() twice, first just to get bufsz */
+		ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
+		ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
+		if (!ctr->buf)
+			goto err2;
+		ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
+
+		kfree(pidarray);
+	} else {
+		ctr->buf = 0;
+		ctr->bufsz = 0;
+	}
+	file->private_data = ctr;
+	return 0;
+
+err2:
+	kfree(pidarray);
+err1:
+	kfree(ctr);
+err0:
+	return -ENOMEM;
+}
+
+static ssize_t cgroup_tasks_read(struct cgroup *cgrp,
+				    struct cftype *cft,
+				    struct file *file, char __user *buf,
+				    size_t nbytes, loff_t *ppos)
+{
+	struct ctr_struct *ctr = file->private_data;
+
+	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
+}
+
+static int cgroup_tasks_release(struct inode *unused_inode,
+					struct file *file)
+{
+	struct ctr_struct *ctr;
+
+	if (file->f_mode & FMODE_READ) {
+		ctr = file->private_data;
+		kfree(ctr->buf);
+		kfree(ctr);
+	}
+	return 0;
+}
+
+static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
+					    struct cftype *cft)
+{
+	return notify_on_release(cgrp);
+}
+
+static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft)
+{
+	return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+}
+
+/*
+ * for the common functions, 'private' gives the type of file
+ */
+static struct cftype files[] = {
+	{
+		.name = "tasks",
+		.open = cgroup_tasks_open,
+		.read = cgroup_tasks_read,
+		.write = cgroup_common_file_write,
+		.release = cgroup_tasks_release,
+		.private = FILE_TASKLIST,
+	},
+
+	{
+		.name = "notify_on_release",
+		.read_uint = cgroup_read_notify_on_release,
+		.write = cgroup_common_file_write,
+		.private = FILE_NOTIFY_ON_RELEASE,
+	},
+
+	{
+		.name = "releasable",
+		.read_uint = cgroup_read_releasable,
+		.private = FILE_RELEASABLE,
+	}
+};
+
+static struct cftype cft_release_agent = {
+	.name = "release_agent",
+	.read = cgroup_common_file_read,
+	.write = cgroup_common_file_write,
+	.private = FILE_RELEASE_AGENT,
+};
+
+static int cgroup_populate_dir(struct cgroup *cgrp)
+{
+	int err;
+	struct cgroup_subsys *ss;
+
+	/* First clear out any existing files */
+	cgroup_clear_directory(cgrp->dentry);
+
+	err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
+	if (err < 0)
+		return err;
+
+	if (cgrp == cgrp->top_cgroup) {
+		if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
+			return err;
+	}
+
+	for_each_subsys(cgrp->root, ss) {
+		if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static void init_cgroup_css(struct cgroup_subsys_state *css,
+			       struct cgroup_subsys *ss,
+			       struct cgroup *cgrp)
+{
+	css->cgroup = cgrp;
+	atomic_set(&css->refcnt, 0);
+	css->flags = 0;
+	if (cgrp == dummytop)
+		set_bit(CSS_ROOT, &css->flags);
+	BUG_ON(cgrp->subsys[ss->subsys_id]);
+	cgrp->subsys[ss->subsys_id] = css;
+}
+
+/*
+ *	cgroup_create - create a cgroup
+ *	parent:	cgroup that will be parent of the new cgroup.
+ *	name:		name of the new cgroup. Will be strcpy'ed.
+ *	mode:		mode to set on new inode
+ *
+ *	Must be called with the mutex on the parent inode held
+ */
+
+static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
+			     int mode)
+{
+	struct cgroup *cgrp;
+	struct cgroupfs_root *root = parent->root;
+	int err = 0;
+	struct cgroup_subsys *ss;
+	struct super_block *sb = root->sb;
+
+	cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
+	if (!cgrp)
+		return -ENOMEM;
+
+	/* Grab a reference on the superblock so the hierarchy doesn't
+	 * get deleted on unmount if there are child cgroups.  This
+	 * can be done outside cgroup_mutex, since the sb can't
+	 * disappear while someone has an open control file on the
+	 * fs */
+	atomic_inc(&sb->s_active);
+
+	mutex_lock(&cgroup_mutex);
+
+	cgrp->flags = 0;
+	INIT_LIST_HEAD(&cgrp->sibling);
+	INIT_LIST_HEAD(&cgrp->children);
+	INIT_LIST_HEAD(&cgrp->css_sets);
+	INIT_LIST_HEAD(&cgrp->release_list);
+
+	cgrp->parent = parent;
+	cgrp->root = parent->root;
+	cgrp->top_cgroup = parent->top_cgroup;
+
+	for_each_subsys(root, ss) {
+		struct cgroup_subsys_state *css = ss->create(ss, cgrp);
+		if (IS_ERR(css)) {
+			err = PTR_ERR(css);
+			goto err_destroy;
+		}
+		init_cgroup_css(css, ss, cgrp);
+	}
+
+	list_add(&cgrp->sibling, &cgrp->parent->children);
+	root->number_of_cgroups++;
+
+	err = cgroup_create_dir(cgrp, dentry, mode);
+	if (err < 0)
+		goto err_remove;
+
+	/* The cgroup directory was pre-locked for us */
+	BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
+
+	err = cgroup_populate_dir(cgrp);
+	/* If err < 0, we have a half-filled directory - oh well ;) */
+
+	mutex_unlock(&cgroup_mutex);
+	mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
+
+	return 0;
+
+ err_remove:
+
+	list_del(&cgrp->sibling);
+	root->number_of_cgroups--;
+
+ err_destroy:
+
+	for_each_subsys(root, ss) {
+		if (cgrp->subsys[ss->subsys_id])
+			ss->destroy(ss, cgrp);
+	}
+
+	mutex_unlock(&cgroup_mutex);
+
+	/* Release the reference count that we took on the superblock */
+	deactivate_super(sb);
+
+	kfree(cgrp);
+	return err;
+}
+
+static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct cgroup *c_parent = dentry->d_parent->d_fsdata;
+
+	/* the vfs holds inode->i_mutex already */
+	return cgroup_create(c_parent, dentry, mode | S_IFDIR);
+}
+
+static inline int cgroup_has_css_refs(struct cgroup *cgrp)
+{
+	/* Check the reference count on each subsystem. Since we
+	 * already established that there are no tasks in the
+	 * cgroup, if the css refcount is also 0, then there should
+	 * be no outstanding references, so the subsystem is safe to
+	 * destroy. We scan across all subsystems rather than using
+	 * the per-hierarchy linked list of mounted subsystems since
+	 * we can be called via check_for_release() with no
+	 * synchronization other than RCU, and the subsystem linked
+	 * list isn't RCU-safe */
+	int i;
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		struct cgroup_subsys_state *css;
+		/* Skip subsystems not in this hierarchy */
+		if (ss->root != cgrp->root)
+			continue;
+		css = cgrp->subsys[ss->subsys_id];
+		/* When called from check_for_release() it's possible
+		 * that by this point the cgroup has been removed
+		 * and the css deleted. But a false-positive doesn't
+		 * matter, since it can only happen if the cgroup
+		 * has been deleted and hence no longer needs the
+		 * release agent to be called anyway. */
+		if (css && atomic_read(&css->refcnt)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
+{
+	struct cgroup *cgrp = dentry->d_fsdata;
+	struct dentry *d;
+	struct cgroup *parent;
+	struct cgroup_subsys *ss;
+	struct super_block *sb;
+	struct cgroupfs_root *root;
+
+	/* the vfs holds both inode->i_mutex already */
+
+	mutex_lock(&cgroup_mutex);
+	if (atomic_read(&cgrp->count) != 0) {
+		mutex_unlock(&cgroup_mutex);
+		return -EBUSY;
+	}
+	if (!list_empty(&cgrp->children)) {
+		mutex_unlock(&cgroup_mutex);
+		return -EBUSY;
+	}
+
+	parent = cgrp->parent;
+	root = cgrp->root;
+	sb = root->sb;
+
+	if (cgroup_has_css_refs(cgrp)) {
+		mutex_unlock(&cgroup_mutex);
+		return -EBUSY;
+	}
+
+	for_each_subsys(root, ss) {
+		if (cgrp->subsys[ss->subsys_id])
+			ss->destroy(ss, cgrp);
+	}
+
+	spin_lock(&release_list_lock);
+	set_bit(CGRP_REMOVED, &cgrp->flags);
+	if (!list_empty(&cgrp->release_list))
+		list_del(&cgrp->release_list);
+	spin_unlock(&release_list_lock);
+	/* delete my sibling from parent->children */
+	list_del(&cgrp->sibling);
+	spin_lock(&cgrp->dentry->d_lock);
+	d = dget(cgrp->dentry);
+	cgrp->dentry = NULL;
+	spin_unlock(&d->d_lock);
+
+	cgroup_d_remove_dir(d);
+	dput(d);
+	root->number_of_cgroups--;
+
+	set_bit(CGRP_RELEASABLE, &parent->flags);
+	check_for_release(parent);
+
+	mutex_unlock(&cgroup_mutex);
+	/* Drop the active superblock reference that we took when we
+	 * created the cgroup */
+	deactivate_super(sb);
+	return 0;
+}
+
+static void cgroup_init_subsys(struct cgroup_subsys *ss)
+{
+	struct cgroup_subsys_state *css;
+	struct list_head *l;
+	printk(KERN_ERR "Initializing cgroup subsys %s\n", ss->name);
+
+	/* Create the top cgroup state for this subsystem */
+	ss->root = &rootnode;
+	css = ss->create(ss, dummytop);
+	/* We don't handle early failures gracefully */
+	BUG_ON(IS_ERR(css));
+	init_cgroup_css(css, ss, dummytop);
+
+	/* Update all cgroup groups to contain a subsys
+	 * pointer to this state - since the subsystem is
+	 * newly registered, all tasks and hence all cgroup
+	 * groups are in the subsystem's top cgroup. */
+	write_lock(&css_set_lock);
+	l = &init_css_set.list;
+	do {
+		struct css_set *cg =
+			list_entry(l, struct css_set, list);
+		cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
+		l = l->next;
+	} while (l != &init_css_set.list);
+	write_unlock(&css_set_lock);
+
+ 	/* If this subsystem requested that it be notified with fork
+ 	 * events, we should send it one now for every process in the
+ 	 * system */
+	if (ss->fork) {
+		struct task_struct *g, *p;
+
+		read_lock(&tasklist_lock);
+		do_each_thread(g, p) {
+			ss->fork(ss, p);
+		} while_each_thread(g, p);
+		read_unlock(&tasklist_lock);
+	}
+
+	need_forkexit_callback |= ss->fork || ss->exit;
+
+	ss->active = 1;
+}
+
+/**
+ * cgroup_init_early - initialize cgroups at system boot, and
+ * initialize any subsystems that request early init.
+ */
+int __init cgroup_init_early(void)
+{
+	int i;
+	kref_init(&init_css_set.ref);
+	kref_get(&init_css_set.ref);
+	INIT_LIST_HEAD(&init_css_set.list);
+	INIT_LIST_HEAD(&init_css_set.cg_links);
+	INIT_LIST_HEAD(&init_css_set.tasks);
+	css_set_count = 1;
+	init_cgroup_root(&rootnode);
+	list_add(&rootnode.root_list, &roots);
+	root_count = 1;
+	init_task.cgroups = &init_css_set;
+
+	init_css_set_link.cg = &init_css_set;
+	list_add(&init_css_set_link.cgrp_link_list,
+		 &rootnode.top_cgroup.css_sets);
+	list_add(&init_css_set_link.cg_link_list,
+		 &init_css_set.cg_links);
+
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+
+		BUG_ON(!ss->name);
+		BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
+		BUG_ON(!ss->create);
+		BUG_ON(!ss->destroy);
+		if (ss->subsys_id != i) {
+			printk(KERN_ERR "Subsys %s id == %d\n",
+			       ss->name, ss->subsys_id);
+			BUG();
+		}
+
+		if (ss->early_init)
+			cgroup_init_subsys(ss);
+	}
+	return 0;
+}
+
+/**
+ * cgroup_init - register cgroup filesystem and /proc file, and
+ * initialize any subsystems that didn't request early init.
+ */
+int __init cgroup_init(void)
+{
+	int err;
+	int i;
+	struct proc_dir_entry *entry;
+
+	err = bdi_init(&cgroup_backing_dev_info);
+	if (err)
+		return err;
+
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		if (!ss->early_init)
+			cgroup_init_subsys(ss);
+	}
+
+	err = register_filesystem(&cgroup_fs_type);
+	if (err < 0)
+		goto out;
+
+	entry = create_proc_entry("cgroups", 0, NULL);
+	if (entry)
+		entry->proc_fops = &proc_cgroupstats_operations;
+
+out:
+	if (err)
+		bdi_destroy(&cgroup_backing_dev_info);
+
+	return err;
+}
+
+/*
+ * proc_cgroup_show()
+ *  - Print task's cgroup paths into seq_file, one line for each hierarchy
+ *  - Used for /proc/<pid>/cgroup.
+ *  - No need to task_lock(tsk) on this tsk->cgroup reference, as it
+ *    doesn't really matter if tsk->cgroup changes after we read it,
+ *    and we take cgroup_mutex, keeping attach_task() from changing it
+ *    anyway.  No need to check that tsk->cgroup != NULL, thanks to
+ *    the_top_cgroup_hack in cgroup_exit(), which sets an exiting tasks
+ *    cgroup to top_cgroup.
+ */
+
+/* TODO: Use a proper seq_file iterator */
+static int proc_cgroup_show(struct seq_file *m, void *v)
+{
+	struct pid *pid;
+	struct task_struct *tsk;
+	char *buf;
+	int retval;
+	struct cgroupfs_root *root;
+
+	retval = -ENOMEM;
+	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	retval = -ESRCH;
+	pid = m->private;
+	tsk = get_pid_task(pid, PIDTYPE_PID);
+	if (!tsk)
+		goto out_free;
+
+	retval = 0;
+
+	mutex_lock(&cgroup_mutex);
+
+	for_each_root(root) {
+		struct cgroup_subsys *ss;
+		struct cgroup *cgrp;
+		int subsys_id;
+		int count = 0;
+
+		/* Skip this hierarchy if it has no active subsystems */
+		if (!root->actual_subsys_bits)
+			continue;
+		for_each_subsys(root, ss)
+			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+		seq_putc(m, ':');
+		get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
+		cgrp = task_cgroup(tsk, subsys_id);
+		retval = cgroup_path(cgrp, buf, PAGE_SIZE);
+		if (retval < 0)
+			goto out_unlock;
+		seq_puts(m, buf);
+		seq_putc(m, '\n');
+	}
+
+out_unlock:
+	mutex_unlock(&cgroup_mutex);
+	put_task_struct(tsk);
+out_free:
+	kfree(buf);
+out:
+	return retval;
+}
+
+static int cgroup_open(struct inode *inode, struct file *file)
+{
+	struct pid *pid = PROC_I(inode)->pid;
+	return single_open(file, proc_cgroup_show, pid);
+}
+
+struct file_operations proc_cgroup_operations = {
+	.open		= cgroup_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+/* Display information about each subsystem and each hierarchy */
+static int proc_cgroupstats_show(struct seq_file *m, void *v)
+{
+	int i;
+	struct cgroupfs_root *root;
+
+	seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\n");
+	mutex_lock(&cgroup_mutex);
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+		struct cgroup_subsys *ss = subsys[i];
+		seq_printf(m, "%s\t%lu\t%d\n",
+			   ss->name, ss->root->subsys_bits,
+			   ss->root->number_of_cgroups);
+	}
+	mutex_unlock(&cgroup_mutex);
+	return 0;
+}
+
+static int cgroupstats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_cgroupstats_show, 0);
+}
+
+static struct file_operations proc_cgroupstats_operations = {
+	.open = cgroupstats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * cgroup_fork - attach newly forked task to its parents cgroup.
+ * @tsk: pointer to task_struct of forking parent process.
+ *
+ * Description: A task inherits its parent's cgroup at fork().
+ *
+ * A pointer to the shared css_set was automatically copied in
+ * fork.c by dup_task_struct().  However, we ignore that copy, since
+ * it was not made under the protection of RCU or cgroup_mutex, so
+ * might no longer be a valid cgroup pointer.  attach_task() might
+ * have already changed current->cgroups, allowing the previously
+ * referenced cgroup group to be removed and freed.
+ *
+ * At the point that cgroup_fork() is called, 'current' is the parent
+ * task, and the passed argument 'child' points to the child task.
+ */
+void cgroup_fork(struct task_struct *child)
+{
+	task_lock(current);
+	child->cgroups = current->cgroups;
+	get_css_set(child->cgroups);
+	task_unlock(current);
+	INIT_LIST_HEAD(&child->cg_list);
+}
+
+/**
+ * cgroup_fork_callbacks - called on a new task very soon before
+ * adding it to the tasklist. No need to take any locks since no-one
+ * can be operating on this task
+ */
+void cgroup_fork_callbacks(struct task_struct *child)
+{
+	if (need_forkexit_callback) {
+		int i;
+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+			struct cgroup_subsys *ss = subsys[i];
+			if (ss->fork)
+				ss->fork(ss, child);
+		}
+	}
+}
+
+/**
+ * cgroup_post_fork - called on a new task after adding it to the
+ * task list. Adds the task to the list running through its css_set
+ * if necessary. Has to be after the task is visible on the task list
+ * in case we race with the first call to cgroup_iter_start() - to
+ * guarantee that the new task ends up on its list. */
+void cgroup_post_fork(struct task_struct *child)
+{
+	if (use_task_css_set_links) {
+		write_lock(&css_set_lock);
+		if (list_empty(&child->cg_list))
+			list_add(&child->cg_list, &child->cgroups->tasks);
+		write_unlock(&css_set_lock);
+	}
+}
+/**
+ * cgroup_exit - detach cgroup from exiting task
+ * @tsk: pointer to task_struct of exiting process
+ *
+ * Description: Detach cgroup from @tsk and release it.
+ *
+ * Note that cgroups marked notify_on_release force every task in
+ * them to take the global cgroup_mutex mutex when exiting.
+ * This could impact scaling on very large systems.  Be reluctant to
+ * use notify_on_release cgroups where very high task exit scaling
+ * is required on large systems.
+ *
+ * the_top_cgroup_hack:
+ *
+ *    Set the exiting tasks cgroup to the root cgroup (top_cgroup).
+ *
+ *    We call cgroup_exit() while the task is still competent to
+ *    handle notify_on_release(), then leave the task attached to the
+ *    root cgroup in each hierarchy for the remainder of its exit.
+ *
+ *    To do this properly, we would increment the reference count on
+ *    top_cgroup, and near the very end of the kernel/exit.c do_exit()
+ *    code we would add a second cgroup function call, to drop that
+ *    reference.  This would just create an unnecessary hot spot on
+ *    the top_cgroup reference count, to no avail.
+ *
+ *    Normally, holding a reference to a cgroup without bumping its
+ *    count is unsafe.   The cgroup could go away, or someone could
+ *    attach us to a different cgroup, decrementing the count on
+ *    the first cgroup that we never incremented.  But in this case,
+ *    top_cgroup isn't going away, and either task has PF_EXITING set,
+ *    which wards off any attach_task() attempts, or task is a failed
+ *    fork, never visible to attach_task.
+ *
+ */
+void cgroup_exit(struct task_struct *tsk, int run_callbacks)
+{
+	int i;
+	struct css_set *cg;
+
+	if (run_callbacks && need_forkexit_callback) {
+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+			struct cgroup_subsys *ss = subsys[i];
+			if (ss->exit)
+				ss->exit(ss, tsk);
+		}
+	}
+
+	/*
+	 * Unlink from the css_set task list if necessary.
+	 * Optimistically check cg_list before taking
+	 * css_set_lock
+	 */
+	if (!list_empty(&tsk->cg_list)) {
+		write_lock(&css_set_lock);
+		if (!list_empty(&tsk->cg_list))
+			list_del(&tsk->cg_list);
+		write_unlock(&css_set_lock);
+	}
+
+	/* Reassign the task to the init_css_set. */
+	task_lock(tsk);
+	cg = tsk->cgroups;
+	tsk->cgroups = &init_css_set;
+	task_unlock(tsk);
+	if (cg)
+		put_css_set_taskexit(cg);
+}
+
+/**
+ * cgroup_clone - duplicate the current cgroup in the hierarchy
+ * that the given subsystem is attached to, and move this task into
+ * the new child
+ */
+int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys)
+{
+	struct dentry *dentry;
+	int ret = 0;
+	char nodename[MAX_CGROUP_TYPE_NAMELEN];
+	struct cgroup *parent, *child;
+	struct inode *inode;
+	struct css_set *cg;
+	struct cgroupfs_root *root;
+	struct cgroup_subsys *ss;
+
+	/* We shouldn't be called by an unregistered subsystem */
+	BUG_ON(!subsys->active);
+
+	/* First figure out what hierarchy and cgroup we're dealing
+	 * with, and pin them so we can drop cgroup_mutex */
+	mutex_lock(&cgroup_mutex);
+ again:
+	root = subsys->root;
+	if (root == &rootnode) {
+		printk(KERN_INFO
+		       "Not cloning cgroup for unused subsystem %s\n",
+		       subsys->name);
+		mutex_unlock(&cgroup_mutex);
+		return 0;
+	}
+	cg = tsk->cgroups;
+	parent = task_cgroup(tsk, subsys->subsys_id);
+
+	snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid);
+
+	/* Pin the hierarchy */
+	atomic_inc(&parent->root->sb->s_active);
+
+	/* Keep the cgroup alive */
+	get_css_set(cg);
+	mutex_unlock(&cgroup_mutex);
+
+	/* Now do the VFS work to create a cgroup */
+	inode = parent->dentry->d_inode;
+
+	/* Hold the parent directory mutex across this operation to
+	 * stop anyone else deleting the new cgroup */
+	mutex_lock(&inode->i_mutex);
+	dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
+	if (IS_ERR(dentry)) {
+		printk(KERN_INFO
+		       "Couldn't allocate dentry for %s: %ld\n", nodename,
+		       PTR_ERR(dentry));
+		ret = PTR_ERR(dentry);
+		goto out_release;
+	}
+
+	/* Create the cgroup directory, which also creates the cgroup */
+	ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
+	child = __d_cgrp(dentry);
+	dput(dentry);
+	if (ret) {
+		printk(KERN_INFO
+		       "Failed to create cgroup %s: %d\n", nodename,
+		       ret);
+		goto out_release;
+	}
+
+	if (!child) {
+		printk(KERN_INFO
+		       "Couldn't find new cgroup %s\n", nodename);
+		ret = -ENOMEM;
+		goto out_release;
+	}
+
+	/* The cgroup now exists. Retake cgroup_mutex and check
+	 * that we're still in the same state that we thought we
+	 * were. */
+	mutex_lock(&cgroup_mutex);
+	if ((root != subsys->root) ||
+	    (parent != task_cgroup(tsk, subsys->subsys_id))) {
+		/* Aargh, we raced ... */
+		mutex_unlock(&inode->i_mutex);
+		put_css_set(cg);
+
+		deactivate_super(parent->root->sb);
+		/* The cgroup is still accessible in the VFS, but
+		 * we're not going to try to rmdir() it at this
+		 * point. */
+		printk(KERN_INFO
+		       "Race in cgroup_clone() - leaking cgroup %s\n",
+		       nodename);
+		goto again;
+	}
+
+	/* do any required auto-setup */
+	for_each_subsys(root, ss) {
+		if (ss->post_clone)
+			ss->post_clone(ss, child);
+	}
+
+	/* All seems fine. Finish by moving the task into the new cgroup */
+	ret = attach_task(child, tsk);
+	mutex_unlock(&cgroup_mutex);
+
+ out_release:
+	mutex_unlock(&inode->i_mutex);
+
+	mutex_lock(&cgroup_mutex);
+	put_css_set(cg);
+	mutex_unlock(&cgroup_mutex);
+	deactivate_super(parent->root->sb);
+	return ret;
+}
+
+/*
+ * See if "cgrp" is a descendant of the current task's cgroup in
+ * the appropriate hierarchy
+ *
+ * If we are sending in dummytop, then presumably we are creating
+ * the top cgroup in the subsystem.
+ *
+ * Called only by the ns (nsproxy) cgroup.
+ */
+int cgroup_is_descendant(const struct cgroup *cgrp)
+{
+	int ret;
+	struct cgroup *target;
+	int subsys_id;
+
+	if (cgrp == dummytop)
+		return 1;
+
+	get_first_subsys(cgrp, NULL, &subsys_id);
+	target = task_cgroup(current, subsys_id);
+	while (cgrp != target && cgrp!= cgrp->top_cgroup)
+		cgrp = cgrp->parent;
+	ret = (cgrp == target);
+	return ret;
+}
+
+static void check_for_release(struct cgroup *cgrp)
+{
+	/* All of these checks rely on RCU to keep the cgroup
+	 * structure alive */
+	if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
+	    && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
+		/* Control Group is currently removeable. If it's not
+		 * already queued for a userspace notification, queue
+		 * it now */
+		int need_schedule_work = 0;
+		spin_lock(&release_list_lock);
+		if (!cgroup_is_removed(cgrp) &&
+		    list_empty(&cgrp->release_list)) {
+			list_add(&cgrp->release_list, &release_list);
+			need_schedule_work = 1;
+		}
+		spin_unlock(&release_list_lock);
+		if (need_schedule_work)
+			schedule_work(&release_agent_work);
+	}
+}
+
+void __css_put(struct cgroup_subsys_state *css)
+{
+	struct cgroup *cgrp = css->cgroup;
+	rcu_read_lock();
+	if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
+		set_bit(CGRP_RELEASABLE, &cgrp->flags);
+		check_for_release(cgrp);
+	}
+	rcu_read_unlock();
+}
+
+/*
+ * Notify userspace when a cgroup is released, by running the
+ * configured release agent with the name of the cgroup (path
+ * relative to the root of cgroup file system) as the argument.
+ *
+ * Most likely, this user command will try to rmdir this cgroup.
+ *
+ * This races with the possibility that some other task will be
+ * attached to this cgroup before it is removed, or that some other
+ * user task will 'mkdir' a child cgroup of this cgroup.  That's ok.
+ * The presumed 'rmdir' will fail quietly if this cgroup is no longer
+ * unused, and this cgroup will be reprieved from its death sentence,
+ * to continue to serve a useful existence.  Next time it's released,
+ * we will get notified again, if it still has 'notify_on_release' set.
+ *
+ * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
+ * means only wait until the task is successfully execve()'d.  The
+ * separate release agent task is forked by call_usermodehelper(),
+ * then control in this thread returns here, without waiting for the
+ * release agent task.  We don't bother to wait because the caller of
+ * this routine has no use for the exit status of the release agent
+ * task, so no sense holding our caller up for that.
+ *
+ */
+
+static void cgroup_release_agent(struct work_struct *work)
+{
+	BUG_ON(work != &release_agent_work);
+	mutex_lock(&cgroup_mutex);
+	spin_lock(&release_list_lock);
+	while (!list_empty(&release_list)) {
+		char *argv[3], *envp[3];
+		int i;
+		char *pathbuf;
+		struct cgroup *cgrp = list_entry(release_list.next,
+						    struct cgroup,
+						    release_list);
+		list_del_init(&cgrp->release_list);
+		spin_unlock(&release_list_lock);
+		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!pathbuf) {
+			spin_lock(&release_list_lock);
+			continue;
+		}
+
+		if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
+			kfree(pathbuf);
+			spin_lock(&release_list_lock);
+			continue;
+		}
+
+		i = 0;
+		argv[i++] = cgrp->root->release_agent_path;
+		argv[i++] = (char *)pathbuf;
+		argv[i] = NULL;
+
+		i = 0;
+		/* minimal command environment */
+		envp[i++] = "HOME=/";
+		envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+		envp[i] = NULL;
+
+		/* Drop the lock while we invoke the usermode helper,
+		 * since the exec could involve hitting disk and hence
+		 * be a slow process */
+		mutex_unlock(&cgroup_mutex);
+		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+		kfree(pathbuf);
+		mutex_lock(&cgroup_mutex);
+		spin_lock(&release_list_lock);
+	}
+	spin_unlock(&release_list_lock);
+	mutex_unlock(&cgroup_mutex);
+}
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
new file mode 100644
index 00000000000..37301e877cb
--- /dev/null
+++ b/kernel/cgroup_debug.c
@@ -0,0 +1,97 @@
+/*
+ * kernel/ccontainer_debug.c - Example cgroup subsystem that
+ * exposes debug info
+ *
+ * Copyright (C) Google Inc, 2007
+ *
+ * Developed by Paul Menage (menage@google.com)
+ *
+ */
+
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+
+#include <asm/atomic.h>
+
+static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
+						   struct cgroup *cont)
+{
+	struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
+
+	if (!css)
+		return ERR_PTR(-ENOMEM);
+
+	return css;
+}
+
+static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	kfree(cont->subsys[debug_subsys_id]);
+}
+
+static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
+{
+	return atomic_read(&cont->count);
+}
+
+static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
+{
+	u64 count;
+
+	cgroup_lock();
+	count = cgroup_task_count(cont);
+	cgroup_unlock();
+	return count;
+}
+
+static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
+{
+	return (u64)(long)current->cgroups;
+}
+
+static u64 current_css_set_refcount_read(struct cgroup *cont,
+					   struct cftype *cft)
+{
+	u64 count;
+
+	rcu_read_lock();
+	count = atomic_read(&current->cgroups->ref.refcount);
+	rcu_read_unlock();
+	return count;
+}
+
+static struct cftype files[] =  {
+	{
+		.name = "cgroup_refcount",
+		.read_uint = cgroup_refcount_read,
+	},
+	{
+		.name = "taskcount",
+		.read_uint = taskcount_read,
+	},
+
+	{
+		.name = "current_css_set",
+		.read_uint = current_css_set_read,
+	},
+
+	{
+		.name = "current_css_set_refcount",
+		.read_uint = current_css_set_refcount_read,
+	},
+};
+
+static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys debug_subsys = {
+	.name = "debug",
+	.create = debug_create,
+	.destroy = debug_destroy,
+	.populate = debug_populate,
+	.subsys_id = debug_subsys_id,
+};
diff --git a/kernel/cpu.c b/kernel/cpu.c
index a21f71af9d8..6b3a0c15144 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -98,7 +98,8 @@ static inline void check_for_tasks(int cpu)
 		     !cputime_eq(p->stime, cputime_zero)))
 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
 				(state = %ld, flags = %x) \n",
-				 p->comm, p->pid, cpu, p->state, p->flags);
+				 p->comm, task_pid_nr(p), cpu,
+				 p->state, p->flags);
 	}
 	write_unlock_irq(&tasklist_lock);
 }
@@ -264,6 +265,15 @@ out_notify:
 int __cpuinit cpu_up(unsigned int cpu)
 {
 	int err = 0;
+	if (!cpu_isset(cpu, cpu_possible_map)) {
+		printk(KERN_ERR "can't online cpu %d because it is not "
+			"configured as may-hotadd at boot time\n", cpu);
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390)
+		printk(KERN_ERR "please check additional_cpus= boot "
+				"parameter\n");
+#endif
+		return -EINVAL;
+	}
 
 	mutex_lock(&cpu_add_remove_lock);
 	if (cpu_hotplug_disabled)
diff --git a/kernel/cpu_acct.c b/kernel/cpu_acct.c
new file mode 100644
index 00000000000..731e47e7f16
--- /dev/null
+++ b/kernel/cpu_acct.c
@@ -0,0 +1,186 @@
+/*
+ * kernel/cpu_acct.c - CPU accounting cgroup subsystem
+ *
+ * Copyright (C) Google Inc, 2006
+ *
+ * Developed by Paul Menage (menage@google.com) and Balbir Singh
+ * (balbir@in.ibm.com)
+ *
+ */
+
+/*
+ * Example cgroup subsystem for reporting total CPU usage of tasks in a
+ * cgroup, along with percentage load over a time interval
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/rcupdate.h>
+
+#include <asm/div64.h>
+
+struct cpuacct {
+	struct cgroup_subsys_state css;
+	spinlock_t lock;
+	/* total time used by this class */
+	cputime64_t time;
+
+	/* time when next load calculation occurs */
+	u64 next_interval_check;
+
+	/* time used in current period */
+	cputime64_t current_interval_time;
+
+	/* time used in last period */
+	cputime64_t last_interval_time;
+};
+
+struct cgroup_subsys cpuacct_subsys;
+
+static inline struct cpuacct *cgroup_ca(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, cpuacct_subsys_id),
+			    struct cpuacct, css);
+}
+
+static inline struct cpuacct *task_ca(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, cpuacct_subsys_id),
+			    struct cpuacct, css);
+}
+
+#define INTERVAL (HZ * 10)
+
+static inline u64 next_interval_boundary(u64 now)
+{
+	/* calculate the next interval boundary beyond the
+	 * current time */
+	do_div(now, INTERVAL);
+	return (now + 1) * INTERVAL;
+}
+
+static struct cgroup_subsys_state *cpuacct_create(
+	struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+
+	if (!ca)
+		return ERR_PTR(-ENOMEM);
+	spin_lock_init(&ca->lock);
+	ca->next_interval_check = next_interval_boundary(get_jiffies_64());
+	return &ca->css;
+}
+
+static void cpuacct_destroy(struct cgroup_subsys *ss,
+			    struct cgroup *cont)
+{
+	kfree(cgroup_ca(cont));
+}
+
+/* Lazily update the load calculation if necessary. Called with ca locked */
+static void cpuusage_update(struct cpuacct *ca)
+{
+	u64 now = get_jiffies_64();
+
+	/* If we're not due for an update, return */
+	if (ca->next_interval_check > now)
+		return;
+
+	if (ca->next_interval_check <= (now - INTERVAL)) {
+		/* If it's been more than an interval since the last
+		 * check, then catch up - the last interval must have
+		 * been zero load */
+		ca->last_interval_time = 0;
+		ca->next_interval_check = next_interval_boundary(now);
+	} else {
+		/* If a steal takes the last interval time negative,
+		 * then we just ignore it */
+		if ((s64)ca->current_interval_time > 0)
+			ca->last_interval_time = ca->current_interval_time;
+		else
+			ca->last_interval_time = 0;
+		ca->next_interval_check += INTERVAL;
+	}
+	ca->current_interval_time = 0;
+}
+
+static u64 cpuusage_read(struct cgroup *cont, struct cftype *cft)
+{
+	struct cpuacct *ca = cgroup_ca(cont);
+	u64 time;
+
+	spin_lock_irq(&ca->lock);
+	cpuusage_update(ca);
+	time = cputime64_to_jiffies64(ca->time);
+	spin_unlock_irq(&ca->lock);
+
+	/* Convert 64-bit jiffies to seconds */
+	time *= 1000;
+	do_div(time, HZ);
+	return time;
+}
+
+static u64 load_read(struct cgroup *cont, struct cftype *cft)
+{
+	struct cpuacct *ca = cgroup_ca(cont);
+	u64 time;
+
+	/* Find the time used in the previous interval */
+	spin_lock_irq(&ca->lock);
+	cpuusage_update(ca);
+	time = cputime64_to_jiffies64(ca->last_interval_time);
+	spin_unlock_irq(&ca->lock);
+
+	/* Convert time to a percentage, to give the load in the
+	 * previous period */
+	time *= 100;
+	do_div(time, INTERVAL);
+
+	return time;
+}
+
+static struct cftype files[] = {
+	{
+		.name = "usage",
+		.read_uint = cpuusage_read,
+	},
+	{
+		.name = "load",
+		.read_uint = load_read,
+	}
+};
+
+static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+}
+
+void cpuacct_charge(struct task_struct *task, cputime_t cputime)
+{
+
+	struct cpuacct *ca;
+	unsigned long flags;
+
+	if (!cpuacct_subsys.active)
+		return;
+	rcu_read_lock();
+	ca = task_ca(task);
+	if (ca) {
+		spin_lock_irqsave(&ca->lock, flags);
+		cpuusage_update(ca);
+		ca->time = cputime64_add(ca->time, cputime);
+		ca->current_interval_time =
+			cputime64_add(ca->current_interval_time, cputime);
+		spin_unlock_irqrestore(&ca->lock, flags);
+	}
+	rcu_read_unlock();
+}
+
+struct cgroup_subsys cpuacct_subsys = {
+	.name = "cpuacct",
+	.create = cpuacct_create,
+	.destroy = cpuacct_destroy,
+	.populate = cpuacct_populate,
+	.subsys_id = cpuacct_subsys_id,
+};
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 64950fa5d32..50f5dc46368 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -4,7 +4,8 @@
  *  Processor and Memory placement constraints for sets of tasks.
  *
  *  Copyright (C) 2003 BULL SA.
- *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
+ *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
+ *  Copyright (C) 2006 Google, Inc
  *
  *  Portions derived from Patrick Mochel's sysfs code.
  *  sysfs is Copyright (c) 2001-3 Patrick Mochel
@@ -12,6 +13,7 @@
  *  2003-10-10 Written by Simon Derr.
  *  2003-10-22 Updates by Stephen Hemminger.
  *  2004 May-July Rework by Paul Jackson.
+ *  2006 Rework by Paul Menage to use generic cgroups
  *
  *  This file is subject to the terms and conditions of the GNU General Public
  *  License.  See the file COPYING in the main directory of the Linux
@@ -36,6 +38,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
+#include <linux/prio_heap.h>
 #include <linux/proc_fs.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
@@ -52,8 +55,7 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 #include <linux/mutex.h>
-
-#define CPUSET_SUPER_MAGIC		0x27e0eb
+#include <linux/kfifo.h>
 
 /*
  * Tracks how many cpusets are currently defined in system.
@@ -62,6 +64,10 @@
  */
 int number_of_cpusets __read_mostly;
 
+/* Retrieve the cpuset from a cgroup */
+struct cgroup_subsys cpuset_subsys;
+struct cpuset;
+
 /* See "Frequency meter" comments, below. */
 
 struct fmeter {
@@ -72,24 +78,13 @@ struct fmeter {
 };
 
 struct cpuset {
+	struct cgroup_subsys_state css;
+
 	unsigned long flags;		/* "unsigned long" so bitops work */
 	cpumask_t cpus_allowed;		/* CPUs allowed to tasks in cpuset */
 	nodemask_t mems_allowed;	/* Memory Nodes allowed to tasks */
 
-	/*
-	 * Count is atomic so can incr (fork) or decr (exit) without a lock.
-	 */
-	atomic_t count;			/* count tasks using this cpuset */
-
-	/*
-	 * We link our 'sibling' struct into our parents 'children'.
-	 * Our children link their 'sibling' into our 'children'.
-	 */
-	struct list_head sibling;	/* my parents children */
-	struct list_head children;	/* my children */
-
 	struct cpuset *parent;		/* my parent */
-	struct dentry *dentry;		/* cpuset fs entry */
 
 	/*
 	 * Copy of global cpuset_mems_generation as of the most
@@ -98,15 +93,32 @@ struct cpuset {
 	int mems_generation;
 
 	struct fmeter fmeter;		/* memory_pressure filter */
+
+	/* partition number for rebuild_sched_domains() */
+	int pn;
 };
 
+/* Retrieve the cpuset for a cgroup */
+static inline struct cpuset *cgroup_cs(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, cpuset_subsys_id),
+			    struct cpuset, css);
+}
+
+/* Retrieve the cpuset for a task */
+static inline struct cpuset *task_cs(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, cpuset_subsys_id),
+			    struct cpuset, css);
+}
+
+
 /* bits in struct cpuset flags field */
 typedef enum {
 	CS_CPU_EXCLUSIVE,
 	CS_MEM_EXCLUSIVE,
 	CS_MEMORY_MIGRATE,
-	CS_REMOVED,
-	CS_NOTIFY_ON_RELEASE,
+	CS_SCHED_LOAD_BALANCE,
 	CS_SPREAD_PAGE,
 	CS_SPREAD_SLAB,
 } cpuset_flagbits_t;
@@ -122,14 +134,9 @@ static inline int is_mem_exclusive(const struct cpuset *cs)
 	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
 }
 
-static inline int is_removed(const struct cpuset *cs)
+static inline int is_sched_load_balance(const struct cpuset *cs)
 {
-	return test_bit(CS_REMOVED, &cs->flags);
-}
-
-static inline int notify_on_release(const struct cpuset *cs)
-{
-	return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
+	return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 }
 
 static inline int is_memory_migrate(const struct cpuset *cs)
@@ -172,14 +179,8 @@ static struct cpuset top_cpuset = {
 	.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
 	.cpus_allowed = CPU_MASK_ALL,
 	.mems_allowed = NODE_MASK_ALL,
-	.count = ATOMIC_INIT(0),
-	.sibling = LIST_HEAD_INIT(top_cpuset.sibling),
-	.children = LIST_HEAD_INIT(top_cpuset.children),
 };
 
-static struct vfsmount *cpuset_mount;
-static struct super_block *cpuset_sb;
-
 /*
  * We have two global cpuset mutexes below.  They can nest.
  * It is ok to first take manage_mutex, then nest callback_mutex.  We also
@@ -263,297 +264,33 @@ static struct super_block *cpuset_sb;
  * the routine cpuset_update_task_memory_state().
  */
 
-static DEFINE_MUTEX(manage_mutex);
 static DEFINE_MUTEX(callback_mutex);
 
-/*
- * A couple of forward declarations required, due to cyclic reference loop:
- *  cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file
- *  -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir.
- */
-
-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode);
-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry);
-
-static struct backing_dev_info cpuset_backing_dev_info = {
-	.ra_pages = 0,		/* No readahead */
-	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
-};
-
-static struct inode *cpuset_new_inode(mode_t mode)
-{
-	struct inode *inode = new_inode(cpuset_sb);
-
-	if (inode) {
-		inode->i_mode = mode;
-		inode->i_uid = current->fsuid;
-		inode->i_gid = current->fsgid;
-		inode->i_blocks = 0;
-		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-		inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
-	}
-	return inode;
-}
-
-static void cpuset_diput(struct dentry *dentry, struct inode *inode)
-{
-	/* is dentry a directory ? if so, kfree() associated cpuset */
-	if (S_ISDIR(inode->i_mode)) {
-		struct cpuset *cs = dentry->d_fsdata;
-		BUG_ON(!(is_removed(cs)));
-		kfree(cs);
-	}
-	iput(inode);
-}
-
-static struct dentry_operations cpuset_dops = {
-	.d_iput = cpuset_diput,
-};
-
-static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name)
-{
-	struct dentry *d = lookup_one_len(name, parent, strlen(name));
-	if (!IS_ERR(d))
-		d->d_op = &cpuset_dops;
-	return d;
-}
-
-static void remove_dir(struct dentry *d)
-{
-	struct dentry *parent = dget(d->d_parent);
-
-	d_delete(d);
-	simple_rmdir(parent->d_inode, d);
-	dput(parent);
-}
-
-/*
- * NOTE : the dentry must have been dget()'ed
- */
-static void cpuset_d_remove_dir(struct dentry *dentry)
-{
-	struct list_head *node;
-
-	spin_lock(&dcache_lock);
-	node = dentry->d_subdirs.next;
-	while (node != &dentry->d_subdirs) {
-		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
-		list_del_init(node);
-		if (d->d_inode) {
-			d = dget_locked(d);
-			spin_unlock(&dcache_lock);
-			d_delete(d);
-			simple_unlink(dentry->d_inode, d);
-			dput(d);
-			spin_lock(&dcache_lock);
-		}
-		node = dentry->d_subdirs.next;
-	}
-	list_del_init(&dentry->d_u.d_child);
-	spin_unlock(&dcache_lock);
-	remove_dir(dentry);
-}
-
-static struct super_operations cpuset_ops = {
-	.statfs = simple_statfs,
-	.drop_inode = generic_delete_inode,
-};
-
-static int cpuset_fill_super(struct super_block *sb, void *unused_data,
-							int unused_silent)
-{
-	struct inode *inode;
-	struct dentry *root;
-
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-	sb->s_magic = CPUSET_SUPER_MAGIC;
-	sb->s_op = &cpuset_ops;
-	cpuset_sb = sb;
-
-	inode = cpuset_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR);
-	if (inode) {
-		inode->i_op = &simple_dir_inode_operations;
-		inode->i_fop = &simple_dir_operations;
-		/* directories start off with i_nlink == 2 (for "." entry) */
-		inc_nlink(inode);
-	} else {
-		return -ENOMEM;
-	}
-
-	root = d_alloc_root(inode);
-	if (!root) {
-		iput(inode);
-		return -ENOMEM;
-	}
-	sb->s_root = root;
-	return 0;
-}
-
+/* This is ugly, but preserves the userspace API for existing cpuset
+ * users. If someone tries to mount the "cpuset" filesystem, we
+ * silently switch it to mount "cgroup" instead */
 static int cpuset_get_sb(struct file_system_type *fs_type,
 			 int flags, const char *unused_dev_name,
 			 void *data, struct vfsmount *mnt)
 {
-	return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
+	struct file_system_type *cgroup_fs = get_fs_type("cgroup");
+	int ret = -ENODEV;
+	if (cgroup_fs) {
+		char mountopts[] =
+			"cpuset,noprefix,"
+			"release_agent=/sbin/cpuset_release_agent";
+		ret = cgroup_fs->get_sb(cgroup_fs, flags,
+					   unused_dev_name, mountopts, mnt);
+		put_filesystem(cgroup_fs);
+	}
+	return ret;
 }
 
 static struct file_system_type cpuset_fs_type = {
 	.name = "cpuset",
 	.get_sb = cpuset_get_sb,
-	.kill_sb = kill_litter_super,
 };
 
-/* struct cftype:
- *
- * The files in the cpuset filesystem mostly have a very simple read/write
- * handling, some common function will take care of it. Nevertheless some cases
- * (read tasks) are special and therefore I define this structure for every
- * kind of file.
- *
- *
- * When reading/writing to a file:
- *	- the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
- *	- the 'cftype' of the file is file->f_path.dentry->d_fsdata
- */
-
-struct cftype {
-	char *name;
-	int private;
-	int (*open) (struct inode *inode, struct file *file);
-	ssize_t (*read) (struct file *file, char __user *buf, size_t nbytes,
-							loff_t *ppos);
-	int (*write) (struct file *file, const char __user *buf, size_t nbytes,
-							loff_t *ppos);
-	int (*release) (struct inode *inode, struct file *file);
-};
-
-static inline struct cpuset *__d_cs(struct dentry *dentry)
-{
-	return dentry->d_fsdata;
-}
-
-static inline struct cftype *__d_cft(struct dentry *dentry)
-{
-	return dentry->d_fsdata;
-}
-
-/*
- * Call with manage_mutex held.  Writes path of cpuset into buf.
- * Returns 0 on success, -errno on error.
- */
-
-static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
-{
-	char *start;
-
-	start = buf + buflen;
-
-	*--start = '\0';
-	for (;;) {
-		int len = cs->dentry->d_name.len;
-		if ((start -= len) < buf)
-			return -ENAMETOOLONG;
-		memcpy(start, cs->dentry->d_name.name, len);
-		cs = cs->parent;
-		if (!cs)
-			break;
-		if (!cs->parent)
-			continue;
-		if (--start < buf)
-			return -ENAMETOOLONG;
-		*start = '/';
-	}
-	memmove(buf, start, buf + buflen - start);
-	return 0;
-}
-
-/*
- * Notify userspace when a cpuset is released, by running
- * /sbin/cpuset_release_agent with the name of the cpuset (path
- * relative to the root of cpuset file system) as the argument.
- *
- * Most likely, this user command will try to rmdir this cpuset.
- *
- * This races with the possibility that some other task will be
- * attached to this cpuset before it is removed, or that some other
- * user task will 'mkdir' a child cpuset of this cpuset.  That's ok.
- * The presumed 'rmdir' will fail quietly if this cpuset is no longer
- * unused, and this cpuset will be reprieved from its death sentence,
- * to continue to serve a useful existence.  Next time it's released,
- * we will get notified again, if it still has 'notify_on_release' set.
- *
- * The final arg to call_usermodehelper() is 0, which means don't
- * wait.  The separate /sbin/cpuset_release_agent task is forked by
- * call_usermodehelper(), then control in this thread returns here,
- * without waiting for the release agent task.  We don't bother to
- * wait because the caller of this routine has no use for the exit
- * status of the /sbin/cpuset_release_agent task, so no sense holding
- * our caller up for that.
- *
- * When we had only one cpuset mutex, we had to call this
- * without holding it, to avoid deadlock when call_usermodehelper()
- * allocated memory.  With two locks, we could now call this while
- * holding manage_mutex, but we still don't, so as to minimize
- * the time manage_mutex is held.
- */
-
-static void cpuset_release_agent(const char *pathbuf)
-{
-	char *argv[3], *envp[3];
-	int i;
-
-	if (!pathbuf)
-		return;
-
-	i = 0;
-	argv[i++] = "/sbin/cpuset_release_agent";
-	argv[i++] = (char *)pathbuf;
-	argv[i] = NULL;
-
-	i = 0;
-	/* minimal command environment */
-	envp[i++] = "HOME=/";
-	envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-	envp[i] = NULL;
-
-	call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-	kfree(pathbuf);
-}
-
-/*
- * Either cs->count of using tasks transitioned to zero, or the
- * cs->children list of child cpusets just became empty.  If this
- * cs is notify_on_release() and now both the user count is zero and
- * the list of children is empty, prepare cpuset path in a kmalloc'd
- * buffer, to be returned via ppathbuf, so that the caller can invoke
- * cpuset_release_agent() with it later on, once manage_mutex is dropped.
- * Call here with manage_mutex held.
- *
- * This check_for_release() routine is responsible for kmalloc'ing
- * pathbuf.  The above cpuset_release_agent() is responsible for
- * kfree'ing pathbuf.  The caller of these routines is responsible
- * for providing a pathbuf pointer, initialized to NULL, then
- * calling check_for_release() with manage_mutex held and the address
- * of the pathbuf pointer, then dropping manage_mutex, then calling
- * cpuset_release_agent() with pathbuf, as set by check_for_release().
- */
-
-static void check_for_release(struct cpuset *cs, char **ppathbuf)
-{
-	if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
-	    list_empty(&cs->children)) {
-		char *buf;
-
-		buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-		if (!buf)
-			return;
-		if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
-			kfree(buf);
-		else
-			*ppathbuf = buf;
-	}
-}
-
 /*
  * Return in *pmask the portion of a cpusets's cpus_allowed that
  * are online.  If none are online, walk up the cpuset hierarchy
@@ -653,20 +390,19 @@ void cpuset_update_task_memory_state(void)
 	struct task_struct *tsk = current;
 	struct cpuset *cs;
 
-	if (tsk->cpuset == &top_cpuset) {
+	if (task_cs(tsk) == &top_cpuset) {
 		/* Don't need rcu for top_cpuset.  It's never freed. */
 		my_cpusets_mem_gen = top_cpuset.mems_generation;
 	} else {
 		rcu_read_lock();
-		cs = rcu_dereference(tsk->cpuset);
-		my_cpusets_mem_gen = cs->mems_generation;
+		my_cpusets_mem_gen = task_cs(current)->mems_generation;
 		rcu_read_unlock();
 	}
 
 	if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
 		mutex_lock(&callback_mutex);
 		task_lock(tsk);
-		cs = tsk->cpuset;	/* Maybe changed when task not locked */
+		cs = task_cs(tsk); /* Maybe changed when task not locked */
 		guarantee_online_mems(cs, &tsk->mems_allowed);
 		tsk->cpuset_mems_generation = cs->mems_generation;
 		if (is_spread_page(cs))
@@ -721,11 +457,12 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
 
 static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 {
+	struct cgroup *cont;
 	struct cpuset *c, *par;
 
 	/* Each of our child cpusets must be a subset of us */
-	list_for_each_entry(c, &cur->children, sibling) {
-		if (!is_cpuset_subset(c, trial))
+	list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
+		if (!is_cpuset_subset(cgroup_cs(cont), trial))
 			return -EBUSY;
 	}
 
@@ -740,7 +477,8 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 		return -EACCES;
 
 	/* If either I or some sibling (!= me) is exclusive, we can't overlap */
-	list_for_each_entry(c, &par->children, sibling) {
+	list_for_each_entry(cont, &par->css.cgroup->children, sibling) {
+		c = cgroup_cs(cont);
 		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
 		    c != cur &&
 		    cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
@@ -751,17 +489,265 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 			return -EINVAL;
 	}
 
+	/* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
+	if (cgroup_task_count(cur->css.cgroup)) {
+		if (cpus_empty(trial->cpus_allowed) ||
+		    nodes_empty(trial->mems_allowed)) {
+			return -ENOSPC;
+		}
+	}
+
 	return 0;
 }
 
 /*
+ * Helper routine for rebuild_sched_domains().
+ * Do cpusets a, b have overlapping cpus_allowed masks?
+ */
+
+static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
+{
+	return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
+}
+
+/*
+ * rebuild_sched_domains()
+ *
+ * If the flag 'sched_load_balance' of any cpuset with non-empty
+ * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
+ * which has that flag enabled, or if any cpuset with a non-empty
+ * 'cpus' is removed, then call this routine to rebuild the
+ * scheduler's dynamic sched domains.
+ *
+ * This routine builds a partial partition of the systems CPUs
+ * (the set of non-overlappping cpumask_t's in the array 'part'
+ * below), and passes that partial partition to the kernel/sched.c
+ * partition_sched_domains() routine, which will rebuild the
+ * schedulers load balancing domains (sched domains) as specified
+ * by that partial partition.  A 'partial partition' is a set of
+ * non-overlapping subsets whose union is a subset of that set.
+ *
+ * See "What is sched_load_balance" in Documentation/cpusets.txt
+ * for a background explanation of this.
+ *
+ * Does not return errors, on the theory that the callers of this
+ * routine would rather not worry about failures to rebuild sched
+ * domains when operating in the severe memory shortage situations
+ * that could cause allocation failures below.
+ *
+ * Call with cgroup_mutex held.  May take callback_mutex during
+ * call due to the kfifo_alloc() and kmalloc() calls.  May nest
+ * a call to the lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
+ * Must not be called holding callback_mutex, because we must not
+ * call lock_cpu_hotplug() while holding callback_mutex.  Elsewhere
+ * the kernel nests callback_mutex inside lock_cpu_hotplug() calls.
+ * So the reverse nesting would risk an ABBA deadlock.
+ *
+ * The three key local variables below are:
+ *    q  - a kfifo queue of cpuset pointers, used to implement a
+ *	   top-down scan of all cpusets.  This scan loads a pointer
+ *	   to each cpuset marked is_sched_load_balance into the
+ *	   array 'csa'.  For our purposes, rebuilding the schedulers
+ *	   sched domains, we can ignore !is_sched_load_balance cpusets.
+ *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
+ *	   that need to be load balanced, for convenient iterative
+ *	   access by the subsequent code that finds the best partition,
+ *	   i.e the set of domains (subsets) of CPUs such that the
+ *	   cpus_allowed of every cpuset marked is_sched_load_balance
+ *	   is a subset of one of these domains, while there are as
+ *	   many such domains as possible, each as small as possible.
+ * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
+ *	   the kernel/sched.c routine partition_sched_domains() in a
+ *	   convenient format, that can be easily compared to the prior
+ *	   value to determine what partition elements (sched domains)
+ *	   were changed (added or removed.)
+ *
+ * Finding the best partition (set of domains):
+ *	The triple nested loops below over i, j, k scan over the
+ *	load balanced cpusets (using the array of cpuset pointers in
+ *	csa[]) looking for pairs of cpusets that have overlapping
+ *	cpus_allowed, but which don't have the same 'pn' partition
+ *	number and gives them in the same partition number.  It keeps
+ *	looping on the 'restart' label until it can no longer find
+ *	any such pairs.
+ *
+ *	The union of the cpus_allowed masks from the set of
+ *	all cpusets having the same 'pn' value then form the one
+ *	element of the partition (one sched domain) to be passed to
+ *	partition_sched_domains().
+ */
+
+static void rebuild_sched_domains(void)
+{
+	struct kfifo *q;	/* queue of cpusets to be scanned */
+	struct cpuset *cp;	/* scans q */
+	struct cpuset **csa;	/* array of all cpuset ptrs */
+	int csn;		/* how many cpuset ptrs in csa so far */
+	int i, j, k;		/* indices for partition finding loops */
+	cpumask_t *doms;	/* resulting partition; i.e. sched domains */
+	int ndoms;		/* number of sched domains in result */
+	int nslot;		/* next empty doms[] cpumask_t slot */
+
+	q = NULL;
+	csa = NULL;
+	doms = NULL;
+
+	/* Special case for the 99% of systems with one, full, sched domain */
+	if (is_sched_load_balance(&top_cpuset)) {
+		ndoms = 1;
+		doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+		if (!doms)
+			goto rebuild;
+		*doms = top_cpuset.cpus_allowed;
+		goto rebuild;
+	}
+
+	q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
+	if (IS_ERR(q))
+		goto done;
+	csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
+	if (!csa)
+		goto done;
+	csn = 0;
+
+	cp = &top_cpuset;
+	__kfifo_put(q, (void *)&cp, sizeof(cp));
+	while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
+		struct cgroup *cont;
+		struct cpuset *child;   /* scans child cpusets of cp */
+		if (is_sched_load_balance(cp))
+			csa[csn++] = cp;
+		list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
+			child = cgroup_cs(cont);
+			__kfifo_put(q, (void *)&child, sizeof(cp));
+		}
+  	}
+
+	for (i = 0; i < csn; i++)
+		csa[i]->pn = i;
+	ndoms = csn;
+
+restart:
+	/* Find the best partition (set of sched domains) */
+	for (i = 0; i < csn; i++) {
+		struct cpuset *a = csa[i];
+		int apn = a->pn;
+
+		for (j = 0; j < csn; j++) {
+			struct cpuset *b = csa[j];
+			int bpn = b->pn;
+
+			if (apn != bpn && cpusets_overlap(a, b)) {
+				for (k = 0; k < csn; k++) {
+					struct cpuset *c = csa[k];
+
+					if (c->pn == bpn)
+						c->pn = apn;
+				}
+				ndoms--;	/* one less element */
+				goto restart;
+			}
+		}
+	}
+
+	/* Convert <csn, csa> to <ndoms, doms> */
+	doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
+	if (!doms)
+		goto rebuild;
+
+	for (nslot = 0, i = 0; i < csn; i++) {
+		struct cpuset *a = csa[i];
+		int apn = a->pn;
+
+		if (apn >= 0) {
+			cpumask_t *dp = doms + nslot;
+
+			if (nslot == ndoms) {
+				static int warnings = 10;
+				if (warnings) {
+					printk(KERN_WARNING
+					 "rebuild_sched_domains confused:"
+					  " nslot %d, ndoms %d, csn %d, i %d,"
+					  " apn %d\n",
+					  nslot, ndoms, csn, i, apn);
+					warnings--;
+				}
+				continue;
+			}
+
+			cpus_clear(*dp);
+			for (j = i; j < csn; j++) {
+				struct cpuset *b = csa[j];
+
+				if (apn == b->pn) {
+					cpus_or(*dp, *dp, b->cpus_allowed);
+					b->pn = -1;
+				}
+			}
+			nslot++;
+		}
+	}
+	BUG_ON(nslot != ndoms);
+
+rebuild:
+	/* Have scheduler rebuild sched domains */
+	lock_cpu_hotplug();
+	partition_sched_domains(ndoms, doms);
+	unlock_cpu_hotplug();
+
+done:
+	if (q && !IS_ERR(q))
+		kfifo_free(q);
+	kfree(csa);
+	/* Don't kfree(doms) -- partition_sched_domains() does that. */
+}
+
+static inline int started_after_time(struct task_struct *t1,
+				     struct timespec *time,
+				     struct task_struct *t2)
+{
+	int start_diff = timespec_compare(&t1->start_time, time);
+	if (start_diff > 0) {
+		return 1;
+	} else if (start_diff < 0) {
+		return 0;
+	} else {
+		/*
+		 * Arbitrarily, if two processes started at the same
+		 * time, we'll say that the lower pointer value
+		 * started first. Note that t2 may have exited by now
+		 * so this may not be a valid pointer any longer, but
+		 * that's fine - it still serves to distinguish
+		 * between two tasks started (effectively)
+		 * simultaneously.
+		 */
+		return t1 > t2;
+	}
+}
+
+static inline int started_after(void *p1, void *p2)
+{
+	struct task_struct *t1 = p1;
+	struct task_struct *t2 = p2;
+	return started_after_time(t1, &t2->start_time, t2);
+}
+
+/*
  * Call with manage_mutex held.  May take callback_mutex during call.
  */
 
 static int update_cpumask(struct cpuset *cs, char *buf)
 {
 	struct cpuset trialcs;
-	int retval;
+	int retval, i;
+	int is_load_balanced;
+	struct cgroup_iter it;
+	struct cgroup *cgrp = cs->css.cgroup;
+	struct task_struct *p, *dropped;
+	/* Never dereference latest_task, since it's not refcounted */
+	struct task_struct *latest_task = NULL;
+	struct ptr_heap heap;
+	struct timespec latest_time = { 0, 0 };
 
 	/* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
 	if (cs == &top_cpuset)
@@ -770,11 +756,13 @@ static int update_cpumask(struct cpuset *cs, char *buf)
 	trialcs = *cs;
 
 	/*
-	 * We allow a cpuset's cpus_allowed to be empty; if it has attached
-	 * tasks, we'll catch it later when we validate the change and return
-	 * -ENOSPC.
+	 * An empty cpus_allowed is ok iff there are no tasks in the cpuset.
+	 * Since cpulist_parse() fails on an empty mask, we special case
+	 * that parsing.  The validate_change() call ensures that cpusets
+	 * with tasks have cpus.
 	 */
-	if (!buf[0] || (buf[0] == '\n' && !buf[1])) {
+	buf = strstrip(buf);
+	if (!*buf) {
 		cpus_clear(trialcs.cpus_allowed);
 	} else {
 		retval = cpulist_parse(buf, trialcs.cpus_allowed);
@@ -782,15 +770,79 @@ static int update_cpumask(struct cpuset *cs, char *buf)
 			return retval;
 	}
 	cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map);
-	/* cpus_allowed cannot be empty for a cpuset with attached tasks. */
-	if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed))
-		return -ENOSPC;
 	retval = validate_change(cs, &trialcs);
 	if (retval < 0)
 		return retval;
+
+	/* Nothing to do if the cpus didn't change */
+	if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
+		return 0;
+	retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after);
+	if (retval)
+		return retval;
+
+	is_load_balanced = is_sched_load_balance(&trialcs);
+
 	mutex_lock(&callback_mutex);
 	cs->cpus_allowed = trialcs.cpus_allowed;
 	mutex_unlock(&callback_mutex);
+
+ again:
+	/*
+	 * Scan tasks in the cpuset, and update the cpumasks of any
+	 * that need an update. Since we can't call set_cpus_allowed()
+	 * while holding tasklist_lock, gather tasks to be processed
+	 * in a heap structure. If the statically-sized heap fills up,
+	 * overflow tasks that started later, and in future iterations
+	 * only consider tasks that started after the latest task in
+	 * the previous pass. This guarantees forward progress and
+	 * that we don't miss any tasks
+	 */
+	heap.size = 0;
+	cgroup_iter_start(cgrp, &it);
+	while ((p = cgroup_iter_next(cgrp, &it))) {
+		/* Only affect tasks that don't have the right cpus_allowed */
+		if (cpus_equal(p->cpus_allowed, cs->cpus_allowed))
+			continue;
+		/*
+		 * Only process tasks that started after the last task
+		 * we processed
+		 */
+		if (!started_after_time(p, &latest_time, latest_task))
+			continue;
+		dropped = heap_insert(&heap, p);
+		if (dropped == NULL) {
+			get_task_struct(p);
+		} else if (dropped != p) {
+			get_task_struct(p);
+			put_task_struct(dropped);
+		}
+	}
+	cgroup_iter_end(cgrp, &it);
+	if (heap.size) {
+		for (i = 0; i < heap.size; i++) {
+			struct task_struct *p = heap.ptrs[i];
+			if (i == 0) {
+				latest_time = p->start_time;
+				latest_task = p;
+			}
+			set_cpus_allowed(p, cs->cpus_allowed);
+			put_task_struct(p);
+		}
+		/*
+		 * If we had to process any tasks at all, scan again
+		 * in case some of them were in the middle of forking
+		 * children that didn't notice the new cpumask
+		 * restriction.  Not the most efficient way to do it,
+		 * but it avoids having to take callback_mutex in the
+		 * fork path
+		 */
+		goto again;
+	}
+	heap_free(&heap);
+	if (is_load_balanced)
+		rebuild_sched_domains();
+
 	return 0;
 }
 
@@ -839,7 +891,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 	do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
 
 	mutex_lock(&callback_mutex);
-	guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
+	guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
 	mutex_unlock(&callback_mutex);
 }
 
@@ -857,16 +909,19 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
  * their mempolicies to the cpusets new mems_allowed.
  */
 
+static void *cpuset_being_rebound;
+
 static int update_nodemask(struct cpuset *cs, char *buf)
 {
 	struct cpuset trialcs;
 	nodemask_t oldmem;
-	struct task_struct *g, *p;
+	struct task_struct *p;
 	struct mm_struct **mmarray;
 	int i, n, ntasks;
 	int migrate;
 	int fudge;
 	int retval;
+	struct cgroup_iter it;
 
 	/*
 	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
@@ -878,29 +933,19 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 	trialcs = *cs;
 
 	/*
-	 * We allow a cpuset's mems_allowed to be empty; if it has attached
-	 * tasks, we'll catch it later when we validate the change and return
-	 * -ENOSPC.
+	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
+	 * Since nodelist_parse() fails on an empty mask, we special case
+	 * that parsing.  The validate_change() call ensures that cpusets
+	 * with tasks have memory.
 	 */
-	if (!buf[0] || (buf[0] == '\n' && !buf[1])) {
+	buf = strstrip(buf);
+	if (!*buf) {
 		nodes_clear(trialcs.mems_allowed);
 	} else {
 		retval = nodelist_parse(buf, trialcs.mems_allowed);
 		if (retval < 0)
 			goto done;
-		if (!nodes_intersects(trialcs.mems_allowed,
-						node_states[N_HIGH_MEMORY])) {
-			/*
-			 * error if only memoryless nodes specified.
-			 */
-			retval = -ENOSPC;
-			goto done;
-		}
 	}
-	/*
-	 * Exclude memoryless nodes.  We know that trialcs.mems_allowed
-	 * contains at least one node with memory.
-	 */
 	nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
 						node_states[N_HIGH_MEMORY]);
 	oldmem = cs->mems_allowed;
@@ -908,11 +953,6 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 		retval = 0;		/* Too easy - nothing to do */
 		goto done;
 	}
-	/* mems_allowed cannot be empty for a cpuset with attached tasks. */
-	if (atomic_read(&cs->count) && nodes_empty(trialcs.mems_allowed)) {
-		retval = -ENOSPC;
-		goto done;
-	}
 	retval = validate_change(cs, &trialcs);
 	if (retval < 0)
 		goto done;
@@ -922,7 +962,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 	cs->mems_generation = cpuset_mems_generation++;
 	mutex_unlock(&callback_mutex);
 
-	set_cpuset_being_rebound(cs);		/* causes mpol_copy() rebind */
+	cpuset_being_rebound = cs;		/* causes mpol_copy() rebind */
 
 	fudge = 10;				/* spare mmarray[] slots */
 	fudge += cpus_weight(cs->cpus_allowed);	/* imagine one fork-bomb/cpu */
@@ -936,13 +976,13 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 	 * enough mmarray[] w/o using GFP_ATOMIC.
 	 */
 	while (1) {
-		ntasks = atomic_read(&cs->count);	/* guess */
+		ntasks = cgroup_task_count(cs->css.cgroup);  /* guess */
 		ntasks += fudge;
 		mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
 		if (!mmarray)
 			goto done;
 		read_lock(&tasklist_lock);		/* block fork */
-		if (atomic_read(&cs->count) <= ntasks)
+		if (cgroup_task_count(cs->css.cgroup) <= ntasks)
 			break;				/* got enough */
 		read_unlock(&tasklist_lock);		/* try again */
 		kfree(mmarray);
@@ -951,21 +991,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 	n = 0;
 
 	/* Load up mmarray[] with mm reference for each task in cpuset. */
-	do_each_thread(g, p) {
+	cgroup_iter_start(cs->css.cgroup, &it);
+	while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
 		struct mm_struct *mm;
 
 		if (n >= ntasks) {
 			printk(KERN_WARNING
 				"Cpuset mempolicy rebind incomplete.\n");
-			continue;
+			break;
 		}
-		if (p->cpuset != cs)
-			continue;
 		mm = get_task_mm(p);
 		if (!mm)
 			continue;
 		mmarray[n++] = mm;
-	} while_each_thread(g, p);
+	}
+	cgroup_iter_end(cs->css.cgroup, &it);
 	read_unlock(&tasklist_lock);
 
 	/*
@@ -993,12 +1033,17 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 
 	/* We're done rebinding vma's to this cpusets new mems_allowed. */
 	kfree(mmarray);
-	set_cpuset_being_rebound(NULL);
+	cpuset_being_rebound = NULL;
 	retval = 0;
 done:
 	return retval;
 }
 
+int current_cpuset_is_being_rebound(void)
+{
+	return task_cs(current) == cpuset_being_rebound;
+}
+
 /*
  * Call with manage_mutex held.
  */
@@ -1015,6 +1060,7 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
 /*
  * update_flag - read a 0 or a 1 in a file and update associated flag
  * bit:	the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
+ *				CS_SCHED_LOAD_BALANCE,
  *				CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
  *				CS_SPREAD_PAGE, CS_SPREAD_SLAB)
  * cs:	the cpuset to update
@@ -1028,6 +1074,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
 	int turning_on;
 	struct cpuset trialcs;
 	int err;
+	int cpus_nonempty, balance_flag_changed;
 
 	turning_on = (simple_strtoul(buf, NULL, 10) != 0);
 
@@ -1040,10 +1087,18 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
 	err = validate_change(cs, &trialcs);
 	if (err < 0)
 		return err;
+
+	cpus_nonempty = !cpus_empty(trialcs.cpus_allowed);
+	balance_flag_changed = (is_sched_load_balance(cs) !=
+		 			is_sched_load_balance(&trialcs));
+
 	mutex_lock(&callback_mutex);
 	cs->flags = trialcs.flags;
 	mutex_unlock(&callback_mutex);
 
+	if (cpus_nonempty && balance_flag_changed)
+		rebuild_sched_domains();
+
 	return 0;
 }
 
@@ -1145,85 +1200,34 @@ static int fmeter_getrate(struct fmeter *fmp)
 	return val;
 }
 
-/*
- * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly
- * writing the path of the old cpuset in 'ppathbuf' if it needs to be
- * notified on release.
- *
- * Call holding manage_mutex.  May take callback_mutex and task_lock of
- * the task 'pid' during call.
- */
-
-static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
+static int cpuset_can_attach(struct cgroup_subsys *ss,
+			     struct cgroup *cont, struct task_struct *tsk)
 {
-	pid_t pid;
-	struct task_struct *tsk;
-	struct cpuset *oldcs;
-	cpumask_t cpus;
-	nodemask_t from, to;
-	struct mm_struct *mm;
-	int retval;
+	struct cpuset *cs = cgroup_cs(cont);
 
-	if (sscanf(pidbuf, "%d", &pid) != 1)
-		return -EIO;
 	if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
 		return -ENOSPC;
 
-	if (pid) {
-		read_lock(&tasklist_lock);
-
-		tsk = find_task_by_pid(pid);
-		if (!tsk || tsk->flags & PF_EXITING) {
-			read_unlock(&tasklist_lock);
-			return -ESRCH;
-		}
-
-		get_task_struct(tsk);
-		read_unlock(&tasklist_lock);
-
-		if ((current->euid) && (current->euid != tsk->uid)
-		    && (current->euid != tsk->suid)) {
-			put_task_struct(tsk);
-			return -EACCES;
-		}
-	} else {
-		tsk = current;
-		get_task_struct(tsk);
-	}
+	return security_task_setscheduler(tsk, 0, NULL);
+}
 
-	retval = security_task_setscheduler(tsk, 0, NULL);
-	if (retval) {
-		put_task_struct(tsk);
-		return retval;
-	}
+static void cpuset_attach(struct cgroup_subsys *ss,
+			  struct cgroup *cont, struct cgroup *oldcont,
+			  struct task_struct *tsk)
+{
+	cpumask_t cpus;
+	nodemask_t from, to;
+	struct mm_struct *mm;
+	struct cpuset *cs = cgroup_cs(cont);
+	struct cpuset *oldcs = cgroup_cs(oldcont);
 
 	mutex_lock(&callback_mutex);
-
-	task_lock(tsk);
-	oldcs = tsk->cpuset;
-	/*
-	 * After getting 'oldcs' cpuset ptr, be sure still not exiting.
-	 * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack
-	 * then fail this attach_task(), to avoid breaking top_cpuset.count.
-	 */
-	if (tsk->flags & PF_EXITING) {
-		task_unlock(tsk);
-		mutex_unlock(&callback_mutex);
-		put_task_struct(tsk);
-		return -ESRCH;
-	}
-	atomic_inc(&cs->count);
-	rcu_assign_pointer(tsk->cpuset, cs);
-	task_unlock(tsk);
-
 	guarantee_online_cpus(cs, &cpus);
 	set_cpus_allowed(tsk, cpus);
+	mutex_unlock(&callback_mutex);
 
 	from = oldcs->mems_allowed;
 	to = cs->mems_allowed;
-
-	mutex_unlock(&callback_mutex);
-
 	mm = get_task_mm(tsk);
 	if (mm) {
 		mpol_rebind_mm(mm, &to);
@@ -1232,44 +1236,36 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
 		mmput(mm);
 	}
 
-	put_task_struct(tsk);
-	synchronize_rcu();
-	if (atomic_dec_and_test(&oldcs->count))
-		check_for_release(oldcs, ppathbuf);
-	return 0;
 }
 
 /* The various types of files and directories in a cpuset file system */
 
 typedef enum {
-	FILE_ROOT,
-	FILE_DIR,
 	FILE_MEMORY_MIGRATE,
 	FILE_CPULIST,
 	FILE_MEMLIST,
 	FILE_CPU_EXCLUSIVE,
 	FILE_MEM_EXCLUSIVE,
-	FILE_NOTIFY_ON_RELEASE,
+	FILE_SCHED_LOAD_BALANCE,
 	FILE_MEMORY_PRESSURE_ENABLED,
 	FILE_MEMORY_PRESSURE,
 	FILE_SPREAD_PAGE,
 	FILE_SPREAD_SLAB,
-	FILE_TASKLIST,
 } cpuset_filetype_t;
 
-static ssize_t cpuset_common_file_write(struct file *file,
+static ssize_t cpuset_common_file_write(struct cgroup *cont,
+					struct cftype *cft,
+					struct file *file,
 					const char __user *userbuf,
 					size_t nbytes, loff_t *unused_ppos)
 {
-	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
-	struct cftype *cft = __d_cft(file->f_path.dentry);
+	struct cpuset *cs = cgroup_cs(cont);
 	cpuset_filetype_t type = cft->private;
 	char *buffer;
-	char *pathbuf = NULL;
 	int retval = 0;
 
 	/* Crude upper limit on largest legitimate cpulist user might write. */
-	if (nbytes > 100 + 6 * max(NR_CPUS, MAX_NUMNODES))
+	if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
 		return -E2BIG;
 
 	/* +1 for nul-terminator */
@@ -1282,9 +1278,9 @@ static ssize_t cpuset_common_file_write(struct file *file,
 	}
 	buffer[nbytes] = 0;	/* nul-terminate */
 
-	mutex_lock(&manage_mutex);
+	cgroup_lock();
 
-	if (is_removed(cs)) {
+	if (cgroup_is_removed(cont)) {
 		retval = -ENODEV;
 		goto out2;
 	}
@@ -1302,8 +1298,8 @@ static ssize_t cpuset_common_file_write(struct file *file,
 	case FILE_MEM_EXCLUSIVE:
 		retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
 		break;
-	case FILE_NOTIFY_ON_RELEASE:
-		retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
+	case FILE_SCHED_LOAD_BALANCE:
+		retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer);
 		break;
 	case FILE_MEMORY_MIGRATE:
 		retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
@@ -1322,9 +1318,6 @@ static ssize_t cpuset_common_file_write(struct file *file,
 		retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
 		cs->mems_generation = cpuset_mems_generation++;
 		break;
-	case FILE_TASKLIST:
-		retval = attach_task(cs, buffer, &pathbuf);
-		break;
 	default:
 		retval = -EINVAL;
 		goto out2;
@@ -1333,30 +1326,12 @@ static ssize_t cpuset_common_file_write(struct file *file,
 	if (retval == 0)
 		retval = nbytes;
 out2:
-	mutex_unlock(&manage_mutex);
-	cpuset_release_agent(pathbuf);
+	cgroup_unlock();
 out1:
 	kfree(buffer);
 	return retval;
 }
 
-static ssize_t cpuset_file_write(struct file *file, const char __user *buf,
-						size_t nbytes, loff_t *ppos)
-{
-	ssize_t retval = 0;
-	struct cftype *cft = __d_cft(file->f_path.dentry);
-	if (!cft)
-		return -ENODEV;
-
-	/* special function ? */
-	if (cft->write)
-		retval = cft->write(file, buf, nbytes, ppos);
-	else
-		retval = cpuset_common_file_write(file, buf, nbytes, ppos);
-
-	return retval;
-}
-
 /*
  * These ascii lists should be read in a single call, by using a user
  * buffer large enough to hold the entire map.  If read in smaller
@@ -1391,11 +1366,13 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
 	return nodelist_scnprintf(page, PAGE_SIZE, mask);
 }
 
-static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
-				size_t nbytes, loff_t *ppos)
+static ssize_t cpuset_common_file_read(struct cgroup *cont,
+				       struct cftype *cft,
+				       struct file *file,
+				       char __user *buf,
+				       size_t nbytes, loff_t *ppos)
 {
-	struct cftype *cft = __d_cft(file->f_path.dentry);
-	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+	struct cpuset *cs = cgroup_cs(cont);
 	cpuset_filetype_t type = cft->private;
 	char *page;
 	ssize_t retval = 0;
@@ -1419,8 +1396,8 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
 	case FILE_MEM_EXCLUSIVE:
 		*s++ = is_mem_exclusive(cs) ? '1' : '0';
 		break;
-	case FILE_NOTIFY_ON_RELEASE:
-		*s++ = notify_on_release(cs) ? '1' : '0';
+	case FILE_SCHED_LOAD_BALANCE:
+		*s++ = is_sched_load_balance(cs) ? '1' : '0';
 		break;
 	case FILE_MEMORY_MIGRATE:
 		*s++ = is_memory_migrate(cs) ? '1' : '0';
@@ -1449,390 +1426,150 @@ out:
 	return retval;
 }
 
-static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbytes,
-								loff_t *ppos)
-{
-	ssize_t retval = 0;
-	struct cftype *cft = __d_cft(file->f_path.dentry);
-	if (!cft)
-		return -ENODEV;
-
-	/* special function ? */
-	if (cft->read)
-		retval = cft->read(file, buf, nbytes, ppos);
-	else
-		retval = cpuset_common_file_read(file, buf, nbytes, ppos);
-
-	return retval;
-}
-
-static int cpuset_file_open(struct inode *inode, struct file *file)
-{
-	int err;
-	struct cftype *cft;
-
-	err = generic_file_open(inode, file);
-	if (err)
-		return err;
-
-	cft = __d_cft(file->f_path.dentry);
-	if (!cft)
-		return -ENODEV;
-	if (cft->open)
-		err = cft->open(inode, file);
-	else
-		err = 0;
-
-	return err;
-}
-
-static int cpuset_file_release(struct inode *inode, struct file *file)
-{
-	struct cftype *cft = __d_cft(file->f_path.dentry);
-	if (cft->release)
-		return cft->release(inode, file);
-	return 0;
-}
-
-/*
- * cpuset_rename - Only allow simple rename of directories in place.
- */
-static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry,
-                  struct inode *new_dir, struct dentry *new_dentry)
-{
-	if (!S_ISDIR(old_dentry->d_inode->i_mode))
-		return -ENOTDIR;
-	if (new_dentry->d_inode)
-		return -EEXIST;
-	if (old_dir != new_dir)
-		return -EIO;
-	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
-}
-
-static const struct file_operations cpuset_file_operations = {
-	.read = cpuset_file_read,
-	.write = cpuset_file_write,
-	.llseek = generic_file_llseek,
-	.open = cpuset_file_open,
-	.release = cpuset_file_release,
-};
-
-static const struct inode_operations cpuset_dir_inode_operations = {
-	.lookup = simple_lookup,
-	.mkdir = cpuset_mkdir,
-	.rmdir = cpuset_rmdir,
-	.rename = cpuset_rename,
-};
-
-static int cpuset_create_file(struct dentry *dentry, int mode)
-{
-	struct inode *inode;
-
-	if (!dentry)
-		return -ENOENT;
-	if (dentry->d_inode)
-		return -EEXIST;
-
-	inode = cpuset_new_inode(mode);
-	if (!inode)
-		return -ENOMEM;
-
-	if (S_ISDIR(mode)) {
-		inode->i_op = &cpuset_dir_inode_operations;
-		inode->i_fop = &simple_dir_operations;
-
-		/* start off with i_nlink == 2 (for "." entry) */
-		inc_nlink(inode);
-	} else if (S_ISREG(mode)) {
-		inode->i_size = 0;
-		inode->i_fop = &cpuset_file_operations;
-	}
-
-	d_instantiate(dentry, inode);
-	dget(dentry);	/* Extra count - pin the dentry in core */
-	return 0;
-}
-
-/*
- *	cpuset_create_dir - create a directory for an object.
- *	cs:	the cpuset we create the directory for.
- *		It must have a valid ->parent field
- *		And we are going to fill its ->dentry field.
- *	name:	The name to give to the cpuset directory. Will be copied.
- *	mode:	mode to set on new directory.
- */
-
-static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode)
-{
-	struct dentry *dentry = NULL;
-	struct dentry *parent;
-	int error = 0;
-
-	parent = cs->parent->dentry;
-	dentry = cpuset_get_dentry(parent, name);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-	error = cpuset_create_file(dentry, S_IFDIR | mode);
-	if (!error) {
-		dentry->d_fsdata = cs;
-		inc_nlink(parent->d_inode);
-		cs->dentry = dentry;
-	}
-	dput(dentry);
-
-	return error;
-}
-
-static int cpuset_add_file(struct dentry *dir, const struct cftype *cft)
-{
-	struct dentry *dentry;
-	int error;
-
-	mutex_lock(&dir->d_inode->i_mutex);
-	dentry = cpuset_get_dentry(dir, cft->name);
-	if (!IS_ERR(dentry)) {
-		error = cpuset_create_file(dentry, 0644 | S_IFREG);
-		if (!error)
-			dentry->d_fsdata = (void *)cft;
-		dput(dentry);
-	} else
-		error = PTR_ERR(dentry);
-	mutex_unlock(&dir->d_inode->i_mutex);
-	return error;
-}
-
-/*
- * Stuff for reading the 'tasks' file.
- *
- * Reading this file can return large amounts of data if a cpuset has
- * *lots* of attached tasks. So it may need several calls to read(),
- * but we cannot guarantee that the information we produce is correct
- * unless we produce it entirely atomically.
- *
- * Upon tasks file open(), a struct ctr_struct is allocated, that
- * will have a pointer to an array (also allocated here).  The struct
- * ctr_struct * is stored in file->private_data.  Its resources will
- * be freed by release() when the file is closed.  The array is used
- * to sprintf the PIDs and then used by read().
- */
-
-/* cpusets_tasks_read array */
-
-struct ctr_struct {
-	char *buf;
-	int bufsz;
-};
-
-/*
- * Load into 'pidarray' up to 'npids' of the tasks using cpuset 'cs'.
- * Return actual number of pids loaded.  No need to task_lock(p)
- * when reading out p->cpuset, as we don't really care if it changes
- * on the next cycle, and we are not going to try to dereference it.
- */
-static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
-{
-	int n = 0;
-	struct task_struct *g, *p;
-
-	read_lock(&tasklist_lock);
-
-	do_each_thread(g, p) {
-		if (p->cpuset == cs) {
-			if (unlikely(n == npids))
-				goto array_full;
-			pidarray[n++] = p->pid;
-		}
-	} while_each_thread(g, p);
-
-array_full:
-	read_unlock(&tasklist_lock);
-	return n;
-}
-
-static int cmppid(const void *a, const void *b)
-{
-	return *(pid_t *)a - *(pid_t *)b;
-}
-
-/*
- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
- * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
- * count 'cnt' of how many chars would be written if buf were large enough.
- */
-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
-{
-	int cnt = 0;
-	int i;
-
-	for (i = 0; i < npids; i++)
-		cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
-	return cnt;
-}
-
-/*
- * Handle an open on 'tasks' file.  Prepare a buffer listing the
- * process id's of tasks currently attached to the cpuset being opened.
- *
- * Does not require any specific cpuset mutexes, and does not take any.
- */
-static int cpuset_tasks_open(struct inode *unused, struct file *file)
-{
-	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
-	struct ctr_struct *ctr;
-	pid_t *pidarray;
-	int npids;
-	char c;
-
-	if (!(file->f_mode & FMODE_READ))
-		return 0;
-
-	ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
-	if (!ctr)
-		goto err0;
-
-	/*
-	 * If cpuset gets more users after we read count, we won't have
-	 * enough space - tough.  This race is indistinguishable to the
-	 * caller from the case that the additional cpuset users didn't
-	 * show up until sometime later on.
-	 */
-	npids = atomic_read(&cs->count);
-	pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-	if (!pidarray)
-		goto err1;
-
-	npids = pid_array_load(pidarray, npids, cs);
-	sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-	/* Call pid_array_to_buf() twice, first just to get bufsz */
-	ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
-	ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
-	if (!ctr->buf)
-		goto err2;
-	ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
-
-	kfree(pidarray);
-	file->private_data = ctr;
-	return 0;
-
-err2:
-	kfree(pidarray);
-err1:
-	kfree(ctr);
-err0:
-	return -ENOMEM;
-}
-
-static ssize_t cpuset_tasks_read(struct file *file, char __user *buf,
-						size_t nbytes, loff_t *ppos)
-{
-	struct ctr_struct *ctr = file->private_data;
 
-	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
-}
 
-static int cpuset_tasks_release(struct inode *unused_inode, struct file *file)
-{
-	struct ctr_struct *ctr;
 
-	if (file->f_mode & FMODE_READ) {
-		ctr = file->private_data;
-		kfree(ctr->buf);
-		kfree(ctr);
-	}
-	return 0;
-}
 
 /*
  * for the common functions, 'private' gives the type of file
  */
 
-static struct cftype cft_tasks = {
-	.name = "tasks",
-	.open = cpuset_tasks_open,
-	.read = cpuset_tasks_read,
-	.release = cpuset_tasks_release,
-	.private = FILE_TASKLIST,
-};
-
 static struct cftype cft_cpus = {
 	.name = "cpus",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_CPULIST,
 };
 
 static struct cftype cft_mems = {
 	.name = "mems",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEMLIST,
 };
 
 static struct cftype cft_cpu_exclusive = {
 	.name = "cpu_exclusive",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_CPU_EXCLUSIVE,
 };
 
 static struct cftype cft_mem_exclusive = {
 	.name = "mem_exclusive",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEM_EXCLUSIVE,
 };
 
-static struct cftype cft_notify_on_release = {
-	.name = "notify_on_release",
-	.private = FILE_NOTIFY_ON_RELEASE,
+static struct cftype cft_sched_load_balance = {
+	.name = "sched_load_balance",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
+	.private = FILE_SCHED_LOAD_BALANCE,
 };
 
 static struct cftype cft_memory_migrate = {
 	.name = "memory_migrate",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEMORY_MIGRATE,
 };
 
 static struct cftype cft_memory_pressure_enabled = {
 	.name = "memory_pressure_enabled",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEMORY_PRESSURE_ENABLED,
 };
 
 static struct cftype cft_memory_pressure = {
 	.name = "memory_pressure",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_MEMORY_PRESSURE,
 };
 
 static struct cftype cft_spread_page = {
 	.name = "memory_spread_page",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_SPREAD_PAGE,
 };
 
 static struct cftype cft_spread_slab = {
 	.name = "memory_spread_slab",
+	.read = cpuset_common_file_read,
+	.write = cpuset_common_file_write,
 	.private = FILE_SPREAD_SLAB,
 };
 
-static int cpuset_populate_dir(struct dentry *cs_dentry)
+static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
 {
 	int err;
 
-	if ((err = cpuset_add_file(cs_dentry, &cft_cpus)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_mems)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
 		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0)
-		return err;
-	if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
+	if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0)
 		return err;
+	/* memory_pressure_enabled is in root cpuset only */
+	if (err == 0 && !cont->parent)
+		err = cgroup_add_file(cont, ss,
+					 &cft_memory_pressure_enabled);
 	return 0;
 }
 
 /*
+ * post_clone() is called at the end of cgroup_clone().
+ * 'cgroup' was just created automatically as a result of
+ * a cgroup_clone(), and the current task is about to
+ * be moved into 'cgroup'.
+ *
+ * Currently we refuse to set up the cgroup - thereby
+ * refusing the task to be entered, and as a result refusing
+ * the sys_unshare() or clone() which initiated it - if any
+ * sibling cpusets have exclusive cpus or mem.
+ *
+ * If this becomes a problem for some users who wish to
+ * allow that scenario, then cpuset_post_clone() could be
+ * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
+ * (and likewise for mems) to the new cgroup.
+ */
+static void cpuset_post_clone(struct cgroup_subsys *ss,
+			      struct cgroup *cgroup)
+{
+	struct cgroup *parent, *child;
+	struct cpuset *cs, *parent_cs;
+
+	parent = cgroup->parent;
+	list_for_each_entry(child, &parent->children, sibling) {
+		cs = cgroup_cs(child);
+		if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
+			return;
+	}
+	cs = cgroup_cs(cgroup);
+	parent_cs = cgroup_cs(parent);
+
+	cs->mems_allowed = parent_cs->mems_allowed;
+	cs->cpus_allowed = parent_cs->cpus_allowed;
+	return;
+}
+
+/*
  *	cpuset_create - create a cpuset
  *	parent:	cpuset that will be parent of the new cpuset.
  *	name:		name of the new cpuset. Will be strcpy'ed.
@@ -1841,106 +1578,77 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
  *	Must be called with the mutex on the parent inode held
  */
 
-static long cpuset_create(struct cpuset *parent, const char *name, int mode)
+static struct cgroup_subsys_state *cpuset_create(
+	struct cgroup_subsys *ss,
+	struct cgroup *cont)
 {
 	struct cpuset *cs;
-	int err;
+	struct cpuset *parent;
 
+	if (!cont->parent) {
+		/* This is early initialization for the top cgroup */
+		top_cpuset.mems_generation = cpuset_mems_generation++;
+		return &top_cpuset.css;
+	}
+	parent = cgroup_cs(cont->parent);
 	cs = kmalloc(sizeof(*cs), GFP_KERNEL);
 	if (!cs)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	mutex_lock(&manage_mutex);
 	cpuset_update_task_memory_state();
 	cs->flags = 0;
-	if (notify_on_release(parent))
-		set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
 	if (is_spread_page(parent))
 		set_bit(CS_SPREAD_PAGE, &cs->flags);
 	if (is_spread_slab(parent))
 		set_bit(CS_SPREAD_SLAB, &cs->flags);
+	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
 	cs->cpus_allowed = CPU_MASK_NONE;
 	cs->mems_allowed = NODE_MASK_NONE;
-	atomic_set(&cs->count, 0);
-	INIT_LIST_HEAD(&cs->sibling);
-	INIT_LIST_HEAD(&cs->children);
 	cs->mems_generation = cpuset_mems_generation++;
 	fmeter_init(&cs->fmeter);
 
 	cs->parent = parent;
-
-	mutex_lock(&callback_mutex);
-	list_add(&cs->sibling, &cs->parent->children);
 	number_of_cpusets++;
-	mutex_unlock(&callback_mutex);
-
-	err = cpuset_create_dir(cs, name, mode);
-	if (err < 0)
-		goto err;
-
-	/*
-	 * Release manage_mutex before cpuset_populate_dir() because it
-	 * will down() this new directory's i_mutex and if we race with
-	 * another mkdir, we might deadlock.
-	 */
-	mutex_unlock(&manage_mutex);
-
-	err = cpuset_populate_dir(cs->dentry);
-	/* If err < 0, we have a half-filled directory - oh well ;) */
-	return 0;
-err:
-	list_del(&cs->sibling);
-	mutex_unlock(&manage_mutex);
-	kfree(cs);
-	return err;
+	return &cs->css ;
 }
 
-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-	struct cpuset *c_parent = dentry->d_parent->d_fsdata;
-
-	/* the vfs holds inode->i_mutex already */
-	return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
-}
+/*
+ * Locking note on the strange update_flag() call below:
+ *
+ * If the cpuset being removed has its flag 'sched_load_balance'
+ * enabled, then simulate turning sched_load_balance off, which
+ * will call rebuild_sched_domains().  The lock_cpu_hotplug()
+ * call in rebuild_sched_domains() must not be made while holding
+ * callback_mutex.  Elsewhere the kernel nests callback_mutex inside
+ * lock_cpu_hotplug() calls.  So the reverse nesting would risk an
+ * ABBA deadlock.
+ */
 
-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
+static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
 {
-	struct cpuset *cs = dentry->d_fsdata;
-	struct dentry *d;
-	struct cpuset *parent;
-	char *pathbuf = NULL;
+	struct cpuset *cs = cgroup_cs(cont);
 
-	/* the vfs holds both inode->i_mutex already */
-
-	mutex_lock(&manage_mutex);
 	cpuset_update_task_memory_state();
-	if (atomic_read(&cs->count) > 0) {
-		mutex_unlock(&manage_mutex);
-		return -EBUSY;
-	}
-	if (!list_empty(&cs->children)) {
-		mutex_unlock(&manage_mutex);
-		return -EBUSY;
-	}
-	parent = cs->parent;
-	mutex_lock(&callback_mutex);
-	set_bit(CS_REMOVED, &cs->flags);
-	list_del(&cs->sibling);	/* delete my sibling from parent->children */
-	spin_lock(&cs->dentry->d_lock);
-	d = dget(cs->dentry);
-	cs->dentry = NULL;
-	spin_unlock(&d->d_lock);
-	cpuset_d_remove_dir(d);
-	dput(d);
+
+	if (is_sched_load_balance(cs))
+		update_flag(CS_SCHED_LOAD_BALANCE, cs, "0");
+
 	number_of_cpusets--;
-	mutex_unlock(&callback_mutex);
-	if (list_empty(&parent->children))
-		check_for_release(parent, &pathbuf);
-	mutex_unlock(&manage_mutex);
-	cpuset_release_agent(pathbuf);
-	return 0;
+	kfree(cs);
 }
 
+struct cgroup_subsys cpuset_subsys = {
+	.name = "cpuset",
+	.create = cpuset_create,
+	.destroy  = cpuset_destroy,
+	.can_attach = cpuset_can_attach,
+	.attach = cpuset_attach,
+	.populate = cpuset_populate,
+	.post_clone = cpuset_post_clone,
+	.subsys_id = cpuset_subsys_id,
+	.early_init = 1,
+};
+
 /*
  * cpuset_init_early - just enough so that the calls to
  * cpuset_update_task_memory_state() in early init code
@@ -1949,13 +1657,11 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
 
 int __init cpuset_init_early(void)
 {
-	struct task_struct *tsk = current;
-
-	tsk->cpuset = &top_cpuset;
-	tsk->cpuset->mems_generation = cpuset_mems_generation++;
+	top_cpuset.mems_generation = cpuset_mems_generation++;
 	return 0;
 }
 
+
 /**
  * cpuset_init - initialize cpusets at system boot
  *
@@ -1964,39 +1670,21 @@ int __init cpuset_init_early(void)
 
 int __init cpuset_init(void)
 {
-	struct dentry *root;
-	int err;
+	int err = 0;
 
 	top_cpuset.cpus_allowed = CPU_MASK_ALL;
 	top_cpuset.mems_allowed = NODE_MASK_ALL;
 
 	fmeter_init(&top_cpuset.fmeter);
 	top_cpuset.mems_generation = cpuset_mems_generation++;
-
-	init_task.cpuset = &top_cpuset;
+	set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
 
 	err = register_filesystem(&cpuset_fs_type);
 	if (err < 0)
-		goto out;
-	cpuset_mount = kern_mount(&cpuset_fs_type);
-	if (IS_ERR(cpuset_mount)) {
-		printk(KERN_ERR "cpuset: could not mount!\n");
-		err = PTR_ERR(cpuset_mount);
-		cpuset_mount = NULL;
-		goto out;
-	}
-	root = cpuset_mount->mnt_sb->s_root;
-	root->d_fsdata = &top_cpuset;
-	inc_nlink(root->d_inode);
-	top_cpuset.dentry = root;
-	root->d_inode->i_op = &cpuset_dir_inode_operations;
+		return err;
+
 	number_of_cpusets = 1;
-	err = cpuset_populate_dir(root);
-	/* memory_pressure_enabled is in root cpuset only */
-	if (err == 0)
-		err = cpuset_add_file(root, &cft_memory_pressure_enabled);
-out:
-	return err;
+	return 0;
 }
 
 /*
@@ -2022,10 +1710,12 @@ out:
 
 static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
 {
+	struct cgroup *cont;
 	struct cpuset *c;
 
 	/* Each of our child cpusets mems must be online */
-	list_for_each_entry(c, &cur->children, sibling) {
+	list_for_each_entry(cont, &cur->css.cgroup->children, sibling) {
+		c = cgroup_cs(cont);
 		guarantee_online_cpus_mems_in_subtree(c);
 		if (!cpus_empty(c->cpus_allowed))
 			guarantee_online_cpus(c, &c->cpus_allowed);
@@ -2053,7 +1743,7 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
 
 static void common_cpu_mem_hotplug_unplug(void)
 {
-	mutex_lock(&manage_mutex);
+	cgroup_lock();
 	mutex_lock(&callback_mutex);
 
 	guarantee_online_cpus_mems_in_subtree(&top_cpuset);
@@ -2061,7 +1751,7 @@ static void common_cpu_mem_hotplug_unplug(void)
 	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
 	mutex_unlock(&callback_mutex);
-	mutex_unlock(&manage_mutex);
+	cgroup_unlock();
 }
 
 /*
@@ -2074,8 +1764,8 @@ static void common_cpu_mem_hotplug_unplug(void)
  * cpu_online_map on each CPU hotplug (cpuhp) event.
  */
 
-static int cpuset_handle_cpuhp(struct notifier_block *nb,
-				unsigned long phase, void *cpu)
+static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
+				unsigned long phase, void *unused_cpu)
 {
 	if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
 		return NOTIFY_DONE;
@@ -2113,109 +1803,7 @@ void __init cpuset_init_smp(void)
 }
 
 /**
- * cpuset_fork - attach newly forked task to its parents cpuset.
- * @tsk: pointer to task_struct of forking parent process.
- *
- * Description: A task inherits its parent's cpuset at fork().
- *
- * A pointer to the shared cpuset was automatically copied in fork.c
- * by dup_task_struct().  However, we ignore that copy, since it was
- * not made under the protection of task_lock(), so might no longer be
- * a valid cpuset pointer.  attach_task() might have already changed
- * current->cpuset, allowing the previously referenced cpuset to
- * be removed and freed.  Instead, we task_lock(current) and copy
- * its present value of current->cpuset for our freshly forked child.
- *
- * At the point that cpuset_fork() is called, 'current' is the parent
- * task, and the passed argument 'child' points to the child task.
- **/
-
-void cpuset_fork(struct task_struct *child)
-{
-	task_lock(current);
-	child->cpuset = current->cpuset;
-	atomic_inc(&child->cpuset->count);
-	task_unlock(current);
-}
-
-/**
- * cpuset_exit - detach cpuset from exiting task
- * @tsk: pointer to task_struct of exiting process
- *
- * Description: Detach cpuset from @tsk and release it.
- *
- * Note that cpusets marked notify_on_release force every task in
- * them to take the global manage_mutex mutex when exiting.
- * This could impact scaling on very large systems.  Be reluctant to
- * use notify_on_release cpusets where very high task exit scaling
- * is required on large systems.
- *
- * Don't even think about derefencing 'cs' after the cpuset use count
- * goes to zero, except inside a critical section guarded by manage_mutex
- * or callback_mutex.   Otherwise a zero cpuset use count is a license to
- * any other task to nuke the cpuset immediately, via cpuset_rmdir().
- *
- * This routine has to take manage_mutex, not callback_mutex, because
- * it is holding that mutex while calling check_for_release(),
- * which calls kmalloc(), so can't be called holding callback_mutex().
- *
- * the_top_cpuset_hack:
- *
- *    Set the exiting tasks cpuset to the root cpuset (top_cpuset).
- *
- *    Don't leave a task unable to allocate memory, as that is an
- *    accident waiting to happen should someone add a callout in
- *    do_exit() after the cpuset_exit() call that might allocate.
- *    If a task tries to allocate memory with an invalid cpuset,
- *    it will oops in cpuset_update_task_memory_state().
- *
- *    We call cpuset_exit() while the task is still competent to
- *    handle notify_on_release(), then leave the task attached to
- *    the root cpuset (top_cpuset) for the remainder of its exit.
- *
- *    To do this properly, we would increment the reference count on
- *    top_cpuset, and near the very end of the kernel/exit.c do_exit()
- *    code we would add a second cpuset function call, to drop that
- *    reference.  This would just create an unnecessary hot spot on
- *    the top_cpuset reference count, to no avail.
- *
- *    Normally, holding a reference to a cpuset without bumping its
- *    count is unsafe.   The cpuset could go away, or someone could
- *    attach us to a different cpuset, decrementing the count on
- *    the first cpuset that we never incremented.  But in this case,
- *    top_cpuset isn't going away, and either task has PF_EXITING set,
- *    which wards off any attach_task() attempts, or task is a failed
- *    fork, never visible to attach_task.
- *
- *    Another way to do this would be to set the cpuset pointer
- *    to NULL here, and check in cpuset_update_task_memory_state()
- *    for a NULL pointer.  This hack avoids that NULL check, for no
- *    cost (other than this way too long comment ;).
- **/
 
-void cpuset_exit(struct task_struct *tsk)
-{
-	struct cpuset *cs;
-
-	task_lock(current);
-	cs = tsk->cpuset;
-	tsk->cpuset = &top_cpuset;	/* the_top_cpuset_hack - see above */
-	task_unlock(current);
-
-	if (notify_on_release(cs)) {
-		char *pathbuf = NULL;
-
-		mutex_lock(&manage_mutex);
-		if (atomic_dec_and_test(&cs->count))
-			check_for_release(cs, &pathbuf);
-		mutex_unlock(&manage_mutex);
-		cpuset_release_agent(pathbuf);
-	} else {
-		atomic_dec(&cs->count);
-	}
-}
-
-/**
  * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
  * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
  *
@@ -2230,10 +1818,23 @@ cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
 	cpumask_t mask;
 
 	mutex_lock(&callback_mutex);
+	mask = cpuset_cpus_allowed_locked(tsk);
+	mutex_unlock(&callback_mutex);
+
+	return mask;
+}
+
+/**
+ * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
+ * Must be  called with callback_mutex held.
+ **/
+cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk)
+{
+	cpumask_t mask;
+
 	task_lock(tsk);
-	guarantee_online_cpus(tsk->cpuset, &mask);
+	guarantee_online_cpus(task_cs(tsk), &mask);
 	task_unlock(tsk);
-	mutex_unlock(&callback_mutex);
 
 	return mask;
 }
@@ -2259,7 +1860,7 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
 
 	mutex_lock(&callback_mutex);
 	task_lock(tsk);
-	guarantee_online_mems(tsk->cpuset, &mask);
+	guarantee_online_mems(task_cs(tsk), &mask);
 	task_unlock(tsk);
 	mutex_unlock(&callback_mutex);
 
@@ -2390,7 +1991,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
 	mutex_lock(&callback_mutex);
 
 	task_lock(current);
-	cs = nearest_exclusive_ancestor(current->cpuset);
+	cs = nearest_exclusive_ancestor(task_cs(current));
 	task_unlock(current);
 
 	allowed = node_isset(node, cs->mems_allowed);
@@ -2550,14 +2151,12 @@ int cpuset_memory_pressure_enabled __read_mostly;
 
 void __cpuset_memory_pressure_bump(void)
 {
-	struct cpuset *cs;
-
 	task_lock(current);
-	cs = current->cpuset;
-	fmeter_markevent(&cs->fmeter);
+	fmeter_markevent(&task_cs(current)->fmeter);
 	task_unlock(current);
 }
 
+#ifdef CONFIG_PROC_PID_CPUSET
 /*
  * proc_cpuset_show()
  *  - Print tasks cpuset path into seq_file.
@@ -2569,11 +2168,12 @@ void __cpuset_memory_pressure_bump(void)
  *    the_top_cpuset_hack in cpuset_exit(), which sets an exiting tasks
  *    cpuset to top_cpuset.
  */
-static int proc_cpuset_show(struct seq_file *m, void *v)
+static int proc_cpuset_show(struct seq_file *m, void *unused_v)
 {
 	struct pid *pid;
 	struct task_struct *tsk;
 	char *buf;
+	struct cgroup_subsys_state *css;
 	int retval;
 
 	retval = -ENOMEM;
@@ -2588,15 +2188,15 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
 		goto out_free;
 
 	retval = -EINVAL;
-	mutex_lock(&manage_mutex);
-
-	retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
+	cgroup_lock();
+	css = task_subsys_state(tsk, cpuset_subsys_id);
+	retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
 	if (retval < 0)
 		goto out_unlock;
 	seq_puts(m, buf);
 	seq_putc(m, '\n');
 out_unlock:
-	mutex_unlock(&manage_mutex);
+	cgroup_unlock();
 	put_task_struct(tsk);
 out_free:
 	kfree(buf);
@@ -2616,6 +2216,7 @@ const struct file_operations proc_cpuset_operations = {
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
+#endif /* CONFIG_PROC_PID_CPUSET */
 
 /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */
 char *cpuset_task_status_allowed(struct task_struct *task, char *buffer)
diff --git a/kernel/die_notifier.c b/kernel/die_notifier.c
deleted file mode 100644
index 0d98827887a..00000000000
--- a/kernel/die_notifier.c
+++ /dev/null
@@ -1,38 +0,0 @@
-
-#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/vmalloc.h>
-#include <linux/kdebug.h>
-
-
-static ATOMIC_NOTIFIER_HEAD(die_chain);
-
-int notify_die(enum die_val val, const char *str,
-	       struct pt_regs *regs, long err, int trap, int sig)
-{
-	struct die_args args = {
-		.regs		= regs,
-		.str		= str,
-		.err		= err,
-		.trapnr		= trap,
-		.signr		= sig,
-
-	};
-
-	return atomic_notifier_call_chain(&die_chain, val, &args);
-}
-
-int register_die_notifier(struct notifier_block *nb)
-{
-	vmalloc_sync_all();
-	return atomic_notifier_chain_register(&die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_die_notifier);
-
-int unregister_die_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&die_chain, nb);
-}
-EXPORT_SYMBOL_GPL(unregister_die_notifier);
-
-
diff --git a/kernel/exit.c b/kernel/exit.c
index 2c704c86edb..f1aec27f1df 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -31,7 +31,7 @@
 #include <linux/taskstats_kern.h>
 #include <linux/delayacct.h>
 #include <linux/freezer.h>
-#include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
 #include <linux/posix-timers.h>
@@ -148,6 +148,7 @@ void release_task(struct task_struct * p)
 	int zap_leader;
 repeat:
 	atomic_dec(&p->user->processes);
+	proc_flush_task(p);
 	write_lock_irq(&tasklist_lock);
 	ptrace_unlink(p);
 	BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
@@ -175,7 +176,6 @@ repeat:
 	}
 
 	write_unlock_irq(&tasklist_lock);
-	proc_flush_task(p);
 	release_thread(p);
 	call_rcu(&p->rcu, delayed_put_task_struct);
 
@@ -221,7 +221,7 @@ static int will_become_orphaned_pgrp(struct pid *pgrp, struct task_struct *ignor
 	do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 		if (p == ignored_task
 				|| p->exit_state
-				|| is_init(p->real_parent))
+				|| is_global_init(p->real_parent))
 			continue;
 		if (task_pgrp(p->real_parent) != pgrp &&
 		    task_session(p->real_parent) == task_session(p)) {
@@ -299,14 +299,14 @@ void __set_special_pids(pid_t session, pid_t pgrp)
 {
 	struct task_struct *curr = current->group_leader;
 
-	if (process_session(curr) != session) {
+	if (task_session_nr(curr) != session) {
 		detach_pid(curr, PIDTYPE_SID);
-		set_signal_session(curr->signal, session);
+		set_task_session(curr, session);
 		attach_pid(curr, PIDTYPE_SID, find_pid(session));
 	}
-	if (process_group(curr) != pgrp) {
+	if (task_pgrp_nr(curr) != pgrp) {
 		detach_pid(curr, PIDTYPE_PGID);
-		curr->signal->pgrp = pgrp;
+		set_task_pgrp(curr, pgrp);
 		attach_pid(curr, PIDTYPE_PGID, find_pid(pgrp));
 	}
 }
@@ -400,11 +400,12 @@ void daemonize(const char *name, ...)
 	current->fs = fs;
 	atomic_inc(&fs->count);
 
-	exit_task_namespaces(current);
-	current->nsproxy = init_task.nsproxy;
-	get_task_namespaces(current);
+	if (current->nsproxy != init_task.nsproxy) {
+		get_nsproxy(init_task.nsproxy);
+		switch_task_namespaces(current, init_task.nsproxy);
+	}
 
- 	exit_files(current);
+	exit_files(current);
 	current->files = init_task.files;
 	atomic_inc(&current->files->count);
 
@@ -492,7 +493,7 @@ void reset_files_struct(struct task_struct *tsk, struct files_struct *files)
 }
 EXPORT_SYMBOL(reset_files_struct);
 
-static inline void __exit_files(struct task_struct *tsk)
+static void __exit_files(struct task_struct *tsk)
 {
 	struct files_struct * files = tsk->files;
 
@@ -509,7 +510,7 @@ void exit_files(struct task_struct *tsk)
 	__exit_files(tsk);
 }
 
-static inline void __put_fs_struct(struct fs_struct *fs)
+static void __put_fs_struct(struct fs_struct *fs)
 {
 	/* No need to hold fs->lock if we are killing it */
 	if (atomic_dec_and_test(&fs->count)) {
@@ -530,7 +531,7 @@ void put_fs_struct(struct fs_struct *fs)
 	__put_fs_struct(fs);
 }
 
-static inline void __exit_fs(struct task_struct *tsk)
+static void __exit_fs(struct task_struct *tsk)
 {
 	struct fs_struct * fs = tsk->fs;
 
@@ -665,19 +666,22 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
  * the child reaper process (ie "init") in our pid
  * space.
  */
-static void
-forget_original_parent(struct task_struct *father, struct list_head *to_release)
+static void forget_original_parent(struct task_struct *father)
 {
-	struct task_struct *p, *reaper = father;
-	struct list_head *_p, *_n;
+	struct task_struct *p, *n, *reaper = father;
+	struct list_head ptrace_dead;
+
+	INIT_LIST_HEAD(&ptrace_dead);
+
+	write_lock_irq(&tasklist_lock);
 
 	do {
 		reaper = next_thread(reaper);
 		if (reaper == father) {
-			reaper = child_reaper(father);
+			reaper = task_child_reaper(father);
 			break;
 		}
-	} while (reaper->exit_state);
+	} while (reaper->flags & PF_EXITING);
 
 	/*
 	 * There are only two places where our children can be:
@@ -687,9 +691,8 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
 	 *
 	 * Search them and reparent children.
 	 */
-	list_for_each_safe(_p, _n, &father->children) {
+	list_for_each_entry_safe(p, n, &father->children, sibling) {
 		int ptrace;
-		p = list_entry(_p, struct task_struct, sibling);
 
 		ptrace = p->ptrace;
 
@@ -715,13 +718,23 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
 		 * while it was being traced by us, to be able to see it in wait4.
 		 */
 		if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
-			list_add(&p->ptrace_list, to_release);
+			list_add(&p->ptrace_list, &ptrace_dead);
 	}
-	list_for_each_safe(_p, _n, &father->ptrace_children) {
-		p = list_entry(_p, struct task_struct, ptrace_list);
+
+	list_for_each_entry_safe(p, n, &father->ptrace_children, ptrace_list) {
 		p->real_parent = reaper;
 		reparent_thread(p, father, 1);
 	}
+
+	write_unlock_irq(&tasklist_lock);
+	BUG_ON(!list_empty(&father->children));
+	BUG_ON(!list_empty(&father->ptrace_children));
+
+	list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_list) {
+		list_del_init(&p->ptrace_list);
+		release_task(p);
+	}
+
 }
 
 /*
@@ -732,7 +745,6 @@ static void exit_notify(struct task_struct *tsk)
 {
 	int state;
 	struct task_struct *t;
-	struct list_head ptrace_dead, *_p, *_n;
 	struct pid *pgrp;
 
 	if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
@@ -753,8 +765,6 @@ static void exit_notify(struct task_struct *tsk)
 		spin_unlock_irq(&tsk->sighand->siglock);
 	}
 
-	write_lock_irq(&tasklist_lock);
-
 	/*
 	 * This does two things:
 	 *
@@ -763,12 +773,10 @@ static void exit_notify(struct task_struct *tsk)
 	 *	as a result of our exiting, and if they have any stopped
 	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 	 */
+	forget_original_parent(tsk);
+	exit_task_namespaces(tsk);
 
-	INIT_LIST_HEAD(&ptrace_dead);
-	forget_original_parent(tsk, &ptrace_dead);
-	BUG_ON(!list_empty(&tsk->children));
-	BUG_ON(!list_empty(&tsk->ptrace_children));
-
+	write_lock_irq(&tasklist_lock);
 	/*
 	 * Check to see if any process groups have become orphaned
 	 * as a result of our exiting, and if they have any stopped
@@ -792,7 +800,7 @@ static void exit_notify(struct task_struct *tsk)
 	/* Let father know we died
 	 *
 	 * Thread signals are configurable, but you aren't going to use
-	 * that to send signals to arbitary processes. 
+	 * that to send signals to arbitary processes.
 	 * That stops right now.
 	 *
 	 * If the parent exec id doesn't match the exec id we saved
@@ -833,12 +841,6 @@ static void exit_notify(struct task_struct *tsk)
 
 	write_unlock_irq(&tasklist_lock);
 
-	list_for_each_safe(_p, _n, &ptrace_dead) {
-		list_del_init(_p);
-		t = list_entry(_p, struct task_struct, ptrace_list);
-		release_task(t);
-	}
-
 	/* If the process is dead, release it - nobody will wait for it */
 	if (state == EXIT_DEAD)
 		release_task(tsk);
@@ -874,10 +876,35 @@ static inline void check_stack_usage(void) {}
 
 static inline void exit_child_reaper(struct task_struct *tsk)
 {
-	if (likely(tsk->group_leader != child_reaper(tsk)))
+	if (likely(tsk->group_leader != task_child_reaper(tsk)))
 		return;
 
-	panic("Attempted to kill init!");
+	if (tsk->nsproxy->pid_ns == &init_pid_ns)
+		panic("Attempted to kill init!");
+
+	/*
+	 * @tsk is the last thread in the 'cgroup-init' and is exiting.
+	 * Terminate all remaining processes in the namespace and reap them
+	 * before exiting @tsk.
+	 *
+	 * Note that @tsk (last thread of cgroup-init) may not necessarily
+	 * be the child-reaper (i.e main thread of cgroup-init) of the
+	 * namespace i.e the child_reaper may have already exited.
+	 *
+	 * Even after a child_reaper exits, we let it inherit orphaned children,
+	 * because, pid_ns->child_reaper remains valid as long as there is
+	 * at least one living sub-thread in the cgroup init.
+
+	 * This living sub-thread of the cgroup-init will be notified when
+	 * a child inherited by the 'child-reaper' exits (do_notify_parent()
+	 * uses __group_send_sig_info()). Further, when reaping child processes,
+	 * do_wait() iterates over children of all living sub threads.
+
+	 * i.e even though 'child_reaper' thread is listed as the parent of the
+	 * orphaned children, any living sub-thread in the cgroup-init can
+	 * perform the role of the child_reaper.
+	 */
+	zap_pid_ns_processes(tsk->nsproxy->pid_ns);
 }
 
 fastcall NORET_TYPE void do_exit(long code)
@@ -932,7 +959,7 @@ fastcall NORET_TYPE void do_exit(long code)
 
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
-				current->comm, current->pid,
+				current->comm, task_pid_nr(current),
 				preempt_count());
 
 	acct_update_integrals(tsk);
@@ -972,7 +999,7 @@ fastcall NORET_TYPE void do_exit(long code)
 	__exit_fs(tsk);
 	check_stack_usage();
 	exit_thread();
-	cpuset_exit(tsk);
+	cgroup_exit(tsk, 1);
 	exit_keys(tsk);
 
 	if (group_dead && tsk->signal->leader)
@@ -983,7 +1010,6 @@ fastcall NORET_TYPE void do_exit(long code)
 		module_put(tsk->binfmt->module);
 
 	proc_exit_connector(tsk);
-	exit_task_namespaces(tsk);
 	exit_notify(tsk);
 #ifdef CONFIG_NUMA
 	mpol_free(tsk->mempolicy);
@@ -1086,15 +1112,17 @@ asmlinkage void sys_exit_group(int error_code)
 static int eligible_child(pid_t pid, int options, struct task_struct *p)
 {
 	int err;
+	struct pid_namespace *ns;
 
+	ns = current->nsproxy->pid_ns;
 	if (pid > 0) {
-		if (p->pid != pid)
+		if (task_pid_nr_ns(p, ns) != pid)
 			return 0;
 	} else if (!pid) {
-		if (process_group(p) != process_group(current))
+		if (task_pgrp_nr_ns(p, ns) != task_pgrp_vnr(current))
 			return 0;
 	} else if (pid != -1) {
-		if (process_group(p) != -pid)
+		if (task_pgrp_nr_ns(p, ns) != -pid)
 			return 0;
 	}
 
@@ -1164,9 +1192,12 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
 {
 	unsigned long state;
 	int retval, status, traced;
+	struct pid_namespace *ns;
+
+	ns = current->nsproxy->pid_ns;
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = task_pid_nr_ns(p, ns);
 		uid_t uid = p->uid;
 		int exit_code = p->exit_code;
 		int why, status;
@@ -1285,11 +1316,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
 			retval = put_user(status, &infop->si_status);
 	}
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (!retval)
-		retval = p->pid;
+		retval = task_pid_nr_ns(p, ns);
 
 	if (traced) {
 		write_lock_irq(&tasklist_lock);
@@ -1326,6 +1357,7 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
 			     int __user *stat_addr, struct rusage __user *ru)
 {
 	int retval, exit_code;
+	struct pid_namespace *ns;
 
 	if (!p->exit_code)
 		return 0;
@@ -1344,11 +1376,12 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
 	 * keep holding onto the tasklist_lock while we call getrusage and
 	 * possibly take page faults for user memory.
 	 */
+	ns = current->nsproxy->pid_ns;
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
 
 	if (unlikely(noreap)) {
-		pid_t pid = p->pid;
+		pid_t pid = task_pid_nr_ns(p, ns);
 		uid_t uid = p->uid;
 		int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
 
@@ -1419,11 +1452,11 @@ bail_ref:
 	if (!retval && infop)
 		retval = put_user(exit_code, &infop->si_status);
 	if (!retval && infop)
-		retval = put_user(p->pid, &infop->si_pid);
+		retval = put_user(task_pid_nr_ns(p, ns), &infop->si_pid);
 	if (!retval && infop)
 		retval = put_user(p->uid, &infop->si_uid);
 	if (!retval)
-		retval = p->pid;
+		retval = task_pid_nr_ns(p, ns);
 	put_task_struct(p);
 
 	BUG_ON(!retval);
@@ -1443,6 +1476,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
 	int retval;
 	pid_t pid;
 	uid_t uid;
+	struct pid_namespace *ns;
 
 	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
 		return 0;
@@ -1457,7 +1491,8 @@ static int wait_task_continued(struct task_struct *p, int noreap,
 		p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
 	spin_unlock_irq(&p->sighand->siglock);
 
-	pid = p->pid;
+	ns = current->nsproxy->pid_ns;
+	pid = task_pid_nr_ns(p, ns);
 	uid = p->uid;
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
@@ -1468,7 +1503,7 @@ static int wait_task_continued(struct task_struct *p, int noreap,
 		if (!retval && stat_addr)
 			retval = put_user(0xffff, stat_addr);
 		if (!retval)
-			retval = p->pid;
+			retval = task_pid_nr_ns(p, ns);
 	} else {
 		retval = wait_noreap_copyout(p, pid, uid,
 					     CLD_CONTINUED, SIGCONT,
@@ -1517,12 +1552,9 @@ repeat:
 	tsk = current;
 	do {
 		struct task_struct *p;
-		struct list_head *_p;
 		int ret;
 
-		list_for_each(_p,&tsk->children) {
-			p = list_entry(_p, struct task_struct, sibling);
-
+		list_for_each_entry(p, &tsk->children, sibling) {
 			ret = eligible_child(pid, options, p);
 			if (!ret)
 				continue;
@@ -1604,9 +1636,8 @@ check_continued:
 			}
 		}
 		if (!flag) {
-			list_for_each(_p, &tsk->ptrace_children) {
-				p = list_entry(_p, struct task_struct,
-						ptrace_list);
+			list_for_each_entry(p, &tsk->ptrace_children,
+					    ptrace_list) {
 				if (!eligible_child(pid, options, p))
 					continue;
 				flag = 1;
diff --git a/kernel/fork.c b/kernel/fork.c
index 2ce28f165e3..ddafdfac945 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -29,7 +29,7 @@
 #include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
-#include <linux/cpuset.h>
+#include <linux/cgroup.h>
 #include <linux/security.h>
 #include <linux/swap.h>
 #include <linux/syscalls.h>
@@ -50,6 +50,7 @@
 #include <linux/taskstats_kern.h>
 #include <linux/random.h>
 #include <linux/tty.h>
+#include <linux/proc_fs.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -116,7 +117,7 @@ EXPORT_SYMBOL(free_task);
 
 void __put_task_struct(struct task_struct *tsk)
 {
-	WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)));
+	WARN_ON(!tsk->exit_state);
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
@@ -205,7 +206,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 }
 
 #ifdef CONFIG_MMU
-static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	struct vm_area_struct *mpnt, *tmp, **pprev;
 	struct rb_node **rb_link, *rb_parent;
@@ -583,7 +584,7 @@ fail_nomem:
 	return retval;
 }
 
-static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
+static struct fs_struct *__copy_fs_struct(struct fs_struct *old)
 {
 	struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
 	/* We don't need to lock fs - think why ;-) */
@@ -615,7 +616,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 
 EXPORT_SYMBOL_GPL(copy_fs_struct);
 
-static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 {
 	if (clone_flags & CLONE_FS) {
 		atomic_inc(&current->fs->count);
@@ -818,7 +819,7 @@ int unshare_files(void)
 
 EXPORT_SYMBOL(unshare_files);
 
-static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct sighand_struct *sig;
 
@@ -841,7 +842,7 @@ void __cleanup_sighand(struct sighand_struct *sighand)
 		kmem_cache_free(sighand_cachep, sighand);
 }
 
-static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
+static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 	int ret;
@@ -923,7 +924,7 @@ void __cleanup_signal(struct signal_struct *sig)
 	kmem_cache_free(signal_cachep, sig);
 }
 
-static inline void cleanup_signal(struct task_struct *tsk)
+static void cleanup_signal(struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
 
@@ -933,7 +934,7 @@ static inline void cleanup_signal(struct task_struct *tsk)
 		__cleanup_signal(sig);
 }
 
-static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
+static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long new_flags = p->flags;
 
@@ -949,10 +950,10 @@ asmlinkage long sys_set_tid_address(int __user *tidptr)
 {
 	current->clear_child_tid = tidptr;
 
-	return current->pid;
+	return task_pid_vnr(current);
 }
 
-static inline void rt_mutex_init_task(struct task_struct *p)
+static void rt_mutex_init_task(struct task_struct *p)
 {
 	spin_lock_init(&p->pi_lock);
 #ifdef CONFIG_RT_MUTEXES
@@ -973,12 +974,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 					unsigned long stack_start,
 					struct pt_regs *regs,
 					unsigned long stack_size,
-					int __user *parent_tidptr,
 					int __user *child_tidptr,
 					struct pid *pid)
 {
 	int retval;
-	struct task_struct *p = NULL;
+	struct task_struct *p;
+	int cgroup_callbacks_done = 0;
 
 	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 		return ERR_PTR(-EINVAL);
@@ -1042,12 +1043,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->did_exec = 0;
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
 	copy_flags(clone_flags, p);
-	p->pid = pid_nr(pid);
-	retval = -EFAULT;
-	if (clone_flags & CLONE_PARENT_SETTID)
-		if (put_user(p->pid, parent_tidptr))
-			goto bad_fork_cleanup_delays_binfmt;
-
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
 	p->vfork_done = NULL;
@@ -1087,13 +1082,13 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #endif
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	cpuset_fork(p);
+	cgroup_fork(p);
 #ifdef CONFIG_NUMA
  	p->mempolicy = mpol_copy(p->mempolicy);
  	if (IS_ERR(p->mempolicy)) {
  		retval = PTR_ERR(p->mempolicy);
  		p->mempolicy = NULL;
- 		goto bad_fork_cleanup_cpuset;
+ 		goto bad_fork_cleanup_cgroup;
  	}
 	mpol_fix_fork_child_flag(p);
 #endif
@@ -1126,10 +1121,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->blocked_on = NULL; /* not blocked yet */
 #endif
 
-	p->tgid = p->pid;
-	if (clone_flags & CLONE_THREAD)
-		p->tgid = current->tgid;
-
 	if ((retval = security_task_alloc(p)))
 		goto bad_fork_cleanup_policy;
 	if ((retval = audit_alloc(p)))
@@ -1155,6 +1146,24 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (retval)
 		goto bad_fork_cleanup_namespaces;
 
+	if (pid != &init_struct_pid) {
+		retval = -ENOMEM;
+		pid = alloc_pid(task_active_pid_ns(p));
+		if (!pid)
+			goto bad_fork_cleanup_namespaces;
+
+		if (clone_flags & CLONE_NEWPID) {
+			retval = pid_ns_prepare_proc(task_active_pid_ns(p));
+			if (retval < 0)
+				goto bad_fork_free_pid;
+		}
+	}
+
+	p->pid = pid_nr(pid);
+	p->tgid = p->pid;
+	if (clone_flags & CLONE_THREAD)
+		p->tgid = current->tgid;
+
 	p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 	/*
 	 * Clear TID on mm_release()?
@@ -1204,6 +1213,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
+	/* Now that the task is set up, run cgroup callbacks if
+	 * necessary. We need to run them before the task is visible
+	 * on the tasklist. */
+	cgroup_fork_callbacks(p);
+	cgroup_callbacks_done = 1;
+
 	/* Need tasklist lock for parent etc handling! */
 	write_lock_irq(&tasklist_lock);
 
@@ -1246,7 +1261,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		spin_unlock(&current->sighand->siglock);
 		write_unlock_irq(&tasklist_lock);
 		retval = -ERESTARTNOINTR;
-		goto bad_fork_cleanup_namespaces;
+		goto bad_fork_free_pid;
 	}
 
 	if (clone_flags & CLONE_THREAD) {
@@ -1275,11 +1290,22 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			__ptrace_link(p, current->parent);
 
 		if (thread_group_leader(p)) {
-			p->signal->tty = current->signal->tty;
-			p->signal->pgrp = process_group(current);
-			set_signal_session(p->signal, process_session(current));
-			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
-			attach_pid(p, PIDTYPE_SID, task_session(current));
+			if (clone_flags & CLONE_NEWPID) {
+				p->nsproxy->pid_ns->child_reaper = p;
+				p->signal->tty = NULL;
+				set_task_pgrp(p, p->pid);
+				set_task_session(p, p->pid);
+				attach_pid(p, PIDTYPE_PGID, pid);
+				attach_pid(p, PIDTYPE_SID, pid);
+			} else {
+				p->signal->tty = current->signal->tty;
+				set_task_pgrp(p, task_pgrp_nr(current));
+				set_task_session(p, task_session_nr(current));
+				attach_pid(p, PIDTYPE_PGID,
+						task_pgrp(current));
+				attach_pid(p, PIDTYPE_SID,
+						task_session(current));
+			}
 
 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
@@ -1292,8 +1318,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
+	cgroup_post_fork(p);
 	return p;
 
+bad_fork_free_pid:
+	if (pid != &init_struct_pid)
+		free_pid(pid);
 bad_fork_cleanup_namespaces:
 	exit_task_namespaces(p);
 bad_fork_cleanup_keys:
@@ -1318,10 +1348,9 @@ bad_fork_cleanup_security:
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
 	mpol_free(p->mempolicy);
-bad_fork_cleanup_cpuset:
+bad_fork_cleanup_cgroup:
 #endif
-	cpuset_exit(p);
-bad_fork_cleanup_delays_binfmt:
+	cgroup_exit(p, cgroup_callbacks_done);
 	delayacct_tsk_free(p);
 	if (p->binfmt)
 		module_put(p->binfmt->module);
@@ -1348,7 +1377,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
 	struct task_struct *task;
 	struct pt_regs regs;
 
-	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL,
+	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
 				&init_struct_pid);
 	if (!IS_ERR(task))
 		init_idle(task, cpu);
@@ -1356,7 +1385,7 @@ struct task_struct * __cpuinit fork_idle(int cpu)
 	return task;
 }
 
-static inline int fork_traceflag (unsigned clone_flags)
+static int fork_traceflag(unsigned clone_flags)
 {
 	if (clone_flags & CLONE_UNTRACED)
 		return 0;
@@ -1387,19 +1416,16 @@ long do_fork(unsigned long clone_flags,
 {
 	struct task_struct *p;
 	int trace = 0;
-	struct pid *pid = alloc_pid();
 	long nr;
 
-	if (!pid)
-		return -EAGAIN;
-	nr = pid->nr;
 	if (unlikely(current->ptrace)) {
 		trace = fork_traceflag (clone_flags);
 		if (trace)
 			clone_flags |= CLONE_PTRACE;
 	}
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);
+	p = copy_process(clone_flags, stack_start, regs, stack_size,
+			child_tidptr, NULL);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
 	 * might get invalid after that point, if the thread exits quickly.
@@ -1407,6 +1433,17 @@ long do_fork(unsigned long clone_flags,
 	if (!IS_ERR(p)) {
 		struct completion vfork;
 
+		/*
+		 * this is enough to call pid_nr_ns here, but this if
+		 * improves optimisation of regular fork()
+		 */
+		nr = (clone_flags & CLONE_NEWPID) ?
+			task_pid_nr_ns(p, current->nsproxy->pid_ns) :
+				task_pid_vnr(p);
+
+		if (clone_flags & CLONE_PARENT_SETTID)
+			put_user(nr, parent_tidptr);
+
 		if (clone_flags & CLONE_VFORK) {
 			p->vfork_done = &vfork;
 			init_completion(&vfork);
@@ -1440,7 +1477,6 @@ long do_fork(unsigned long clone_flags,
 			}
 		}
 	} else {
-		free_pid(pid);
 		nr = PTR_ERR(p);
 	}
 	return nr;
@@ -1485,7 +1521,7 @@ void __init proc_caches_init(void)
  * Check constraints on flags passed to the unshare system call and
  * force unsharing of additional process context as appropriate.
  */
-static inline void check_unshare_flags(unsigned long *flags_ptr)
+static void check_unshare_flags(unsigned long *flags_ptr)
 {
 	/*
 	 * If unsharing a thread from a thread group, must also
@@ -1617,7 +1653,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 	struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
 	struct files_struct *fd, *new_fd = NULL;
 	struct sem_undo_list *new_ulist = NULL;
-	struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+	struct nsproxy *new_nsproxy = NULL;
 
 	check_unshare_flags(&unshare_flags);
 
@@ -1647,14 +1683,13 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
 
 	if (new_fs ||  new_mm || new_fd || new_ulist || new_nsproxy) {
 
-		task_lock(current);
-
 		if (new_nsproxy) {
-			old_nsproxy = current->nsproxy;
-			current->nsproxy = new_nsproxy;
-			new_nsproxy = old_nsproxy;
+			switch_task_namespaces(current, new_nsproxy);
+			new_nsproxy = NULL;
 		}
 
+		task_lock(current);
+
 		if (new_fs) {
 			fs = current->fs;
 			current->fs = new_fs;
diff --git a/kernel/futex.c b/kernel/futex.c
index e45a65e4168..32710451dc2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -53,6 +53,9 @@
 #include <linux/signal.h>
 #include <linux/module.h>
 #include <linux/magic.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
+
 #include <asm/futex.h>
 
 #include "rtmutex_common.h"
@@ -443,8 +446,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
 	struct task_struct *p;
 
 	rcu_read_lock();
-	p = find_task_by_pid(pid);
-
+	p = find_task_by_vpid(pid);
 	if (!p || ((current->euid != p->euid) && (current->euid != p->uid)))
 		p = ERR_PTR(-ESRCH);
 	else
@@ -653,7 +655,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 	if (!(uval & FUTEX_OWNER_DIED)) {
 		int ret = 0;
 
-		newval = FUTEX_WAITERS | new_owner->pid;
+		newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
 
 		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
 
@@ -1106,7 +1108,7 @@ static void unqueue_me_pi(struct futex_q *q)
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
 				struct task_struct *curr)
 {
-	u32 newtid = curr->pid | FUTEX_WAITERS;
+	u32 newtid = task_pid_vnr(curr) | FUTEX_WAITERS;
 	struct futex_pi_state *pi_state = q->pi_state;
 	u32 uval, curval, newval;
 	int ret;
@@ -1368,7 +1370,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
 	 * the locks. It will most likely not succeed.
 	 */
-	newval = current->pid;
+	newval = task_pid_vnr(current);
 
 	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
 
@@ -1379,7 +1381,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 	 * Detect deadlocks. In case of REQUEUE_PI this is a valid
 	 * situation and we return success to user space.
 	 */
-	if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
+	if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
 		ret = -EDEADLK;
 		goto out_unlock_release_sem;
 	}
@@ -1408,7 +1410,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 	 */
 	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
 		/* Keep the OWNER_DIED bit */
-		newval = (curval & ~FUTEX_TID_MASK) | current->pid;
+		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
 		ownerdied = 0;
 		lock_taken = 1;
 	}
@@ -1587,7 +1589,7 @@ retry:
 	/*
 	 * We release only a lock we actually own:
 	 */
-	if ((uval & FUTEX_TID_MASK) != current->pid)
+	if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
 		return -EPERM;
 	/*
 	 * First take all the futex related locks:
@@ -1608,7 +1610,7 @@ retry_unlocked:
 	 * anyone else up:
 	 */
 	if (!(uval & FUTEX_OWNER_DIED))
-		uval = cmpxchg_futex_value_locked(uaddr, current->pid, 0);
+		uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
 
 
 	if (unlikely(uval == -EFAULT))
@@ -1617,7 +1619,7 @@ retry_unlocked:
 	 * Rare case: we managed to release the lock atomically,
 	 * no need to wake anyone else up:
 	 */
-	if (unlikely(uval == current->pid))
+	if (unlikely(uval == task_pid_vnr(current)))
 		goto out_unlock;
 
 	/*
@@ -1854,7 +1856,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr,
 
 		ret = -ESRCH;
 		rcu_read_lock();
-		p = find_task_by_pid(pid);
+		p = find_task_by_vpid(pid);
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
@@ -1887,7 +1889,7 @@ retry:
 	if (get_user(uval, uaddr))
 		return -1;
 
-	if ((uval & FUTEX_TID_MASK) == curr->pid) {
+	if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) {
 		/*
 		 * Ok, this dying thread is truly holding a futex
 		 * of interest. Set the OWNER_DIED bit atomically
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 2c2e2954b71..00b572666cc 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -8,6 +8,7 @@
 
 #include <linux/linkage.h>
 #include <linux/compat.h>
+#include <linux/nsproxy.h>
 #include <linux/futex.h>
 
 #include <asm/uaccess.h>
@@ -124,7 +125,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 
 		ret = -ESRCH;
 		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		p = find_task_by_vpid(pid);
 		if (!p)
 			goto err_unlock;
 		ret = -EPERM;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index e9f1b4ea504..aa74a1ef2da 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -51,7 +51,7 @@ struct resource crashk_res = {
 
 int kexec_should_crash(struct task_struct *p)
 {
-	if (in_interrupt() || !p->pid || is_init(p) || panic_on_oops)
+	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
 		return 1;
 	return 0;
 }
@@ -1146,6 +1146,172 @@ static int __init crash_notes_memory_init(void)
 }
 module_init(crash_notes_memory_init)
 
+
+/*
+ * parsing the "crashkernel" commandline
+ *
+ * this code is intended to be called from architecture specific code
+ */
+
+
+/*
+ * This function parses command lines in the format
+ *
+ *   crashkernel=ramsize-range:size[,...][@offset]
+ *
+ * The function returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_mem(char 			*cmdline,
+					unsigned long long	system_ram,
+					unsigned long long	*crash_size,
+					unsigned long long	*crash_base)
+{
+	char *cur = cmdline, *tmp;
+
+	/* for each entry of the comma-separated list */
+	do {
+		unsigned long long start, end = ULLONG_MAX, size;
+
+		/* get the start of the range */
+		start = memparse(cur, &tmp);
+		if (cur == tmp) {
+			pr_warning("crashkernel: Memory value expected\n");
+			return -EINVAL;
+		}
+		cur = tmp;
+		if (*cur != '-') {
+			pr_warning("crashkernel: '-' expected\n");
+			return -EINVAL;
+		}
+		cur++;
+
+		/* if no ':' is here, than we read the end */
+		if (*cur != ':') {
+			end = memparse(cur, &tmp);
+			if (cur == tmp) {
+				pr_warning("crashkernel: Memory "
+						"value expected\n");
+				return -EINVAL;
+			}
+			cur = tmp;
+			if (end <= start) {
+				pr_warning("crashkernel: end <= start\n");
+				return -EINVAL;
+			}
+		}
+
+		if (*cur != ':') {
+			pr_warning("crashkernel: ':' expected\n");
+			return -EINVAL;
+		}
+		cur++;
+
+		size = memparse(cur, &tmp);
+		if (cur == tmp) {
+			pr_warning("Memory value expected\n");
+			return -EINVAL;
+		}
+		cur = tmp;
+		if (size >= system_ram) {
+			pr_warning("crashkernel: invalid size\n");
+			return -EINVAL;
+		}
+
+		/* match ? */
+		if (system_ram >= start && system_ram <= end) {
+			*crash_size = size;
+			break;
+		}
+	} while (*cur++ == ',');
+
+	if (*crash_size > 0) {
+		while (*cur != ' ' && *cur != '@')
+			cur++;
+		if (*cur == '@') {
+			cur++;
+			*crash_base = memparse(cur, &tmp);
+			if (cur == tmp) {
+				pr_warning("Memory value expected "
+						"after '@'\n");
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * That function parses "simple" (old) crashkernel command lines like
+ *
+ * 	crashkernel=size[@offset]
+ *
+ * It returns 0 on success and -EINVAL on failure.
+ */
+static int __init parse_crashkernel_simple(char 		*cmdline,
+					   unsigned long long 	*crash_size,
+					   unsigned long long 	*crash_base)
+{
+	char *cur = cmdline;
+
+	*crash_size = memparse(cmdline, &cur);
+	if (cmdline == cur) {
+		pr_warning("crashkernel: memory value expected\n");
+		return -EINVAL;
+	}
+
+	if (*cur == '@')
+		*crash_base = memparse(cur+1, &cur);
+
+	return 0;
+}
+
+/*
+ * That function is the entry point for command line parsing and should be
+ * called from the arch-specific code.
+ */
+int __init parse_crashkernel(char 		 *cmdline,
+			     unsigned long long system_ram,
+			     unsigned long long *crash_size,
+			     unsigned long long *crash_base)
+{
+	char 	*p = cmdline, *ck_cmdline = NULL;
+	char	*first_colon, *first_space;
+
+	BUG_ON(!crash_size || !crash_base);
+	*crash_size = 0;
+	*crash_base = 0;
+
+	/* find crashkernel and use the last one if there are more */
+	p = strstr(p, "crashkernel=");
+	while (p) {
+		ck_cmdline = p;
+		p = strstr(p+1, "crashkernel=");
+	}
+
+	if (!ck_cmdline)
+		return -EINVAL;
+
+	ck_cmdline += 12; /* strlen("crashkernel=") */
+
+	/*
+	 * if the commandline contains a ':', then that's the extended
+	 * syntax -- if not, it must be the classic syntax
+	 */
+	first_colon = strchr(ck_cmdline, ':');
+	first_space = strchr(ck_cmdline, ' ');
+	if (first_colon && (!first_space || first_colon < first_space))
+		return parse_crashkernel_mem(ck_cmdline, system_ram,
+				crash_size, crash_base);
+	else
+		return parse_crashkernel_simple(ck_cmdline, crash_size,
+				crash_base);
+
+	return 0;
+}
+
+
+
 void crash_save_vmcoreinfo(void)
 {
 	u32 *buf;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index a6f1ee9c92d..55fe0c7cd95 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -511,11 +511,11 @@ static void lockdep_print_held_locks(struct task_struct *curr)
 	int i, depth = curr->lockdep_depth;
 
 	if (!depth) {
-		printk("no locks held by %s/%d.\n", curr->comm, curr->pid);
+		printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
 		return;
 	}
 	printk("%d lock%s held by %s/%d:\n",
-		depth, depth > 1 ? "s" : "", curr->comm, curr->pid);
+		depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
 
 	for (i = 0; i < depth; i++) {
 		printk(" #%d: ", i);
@@ -904,7 +904,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth)
 	print_kernel_version();
 	printk(  "-------------------------------------------------------\n");
 	printk("%s/%d is trying to acquire lock:\n",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lock(check_source);
 	printk("\nbut task is already holding lock:\n");
 	print_lock(check_target);
@@ -1085,7 +1085,7 @@ print_bad_irq_dependency(struct task_struct *curr,
 	print_kernel_version();
 	printk(  "------------------------------------------------------\n");
 	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
-		curr->comm, curr->pid,
+		curr->comm, task_pid_nr(curr),
 		curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
 		curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
 		curr->hardirqs_enabled,
@@ -1237,7 +1237,7 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
 	print_kernel_version();
 	printk(  "---------------------------------------------\n");
 	printk("%s/%d is trying to acquire lock:\n",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lock(next);
 	printk("\nbut task is already holding lock:\n");
 	print_lock(prev);
@@ -1521,7 +1521,7 @@ cache_hit:
 }
 
 static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
-	       	struct held_lock *hlock, int chain_head, u64 chain_key)
+		struct held_lock *hlock, int chain_head, u64 chain_key)
 {
 	/*
 	 * Trylock needs to maintain the stack of held locks, but it
@@ -1641,7 +1641,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 		usage_str[prev_bit], usage_str[new_bit]);
 
 	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
-		curr->comm, curr->pid,
+		curr->comm, task_pid_nr(curr),
 		trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
 		trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
 		trace_hardirqs_enabled(curr),
@@ -1694,7 +1694,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
 	print_kernel_version();
 	printk(  "---------------------------------------------------------\n");
 	printk("%s/%d just changed the state of lock:\n",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lock(this);
 	if (forwards)
 		printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
@@ -2487,7 +2487,7 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
 	printk(  "[ BUG: bad unlock balance detected! ]\n");
 	printk(  "-------------------------------------\n");
 	printk("%s/%d is trying to release lock (",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
 	printk(") at:\n");
 	print_ip_sym(ip);
@@ -2737,7 +2737,7 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
 	printk(  "[ BUG: bad contention detected! ]\n");
 	printk(  "---------------------------------\n");
 	printk("%s/%d is trying to contend lock (",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	print_lockdep_cache(lock);
 	printk(") at:\n");
 	print_ip_sym(ip);
@@ -3072,7 +3072,7 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
 	printk(  "[ BUG: held lock freed! ]\n");
 	printk(  "-------------------------\n");
 	printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
-		curr->comm, curr->pid, mem_from, mem_to-1);
+		curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
 	print_lock(hlock);
 	lockdep_print_held_locks(curr);
 
@@ -3125,7 +3125,7 @@ static void print_held_locks_bug(struct task_struct *curr)
 	printk(  "[ BUG: lock held at task exit time! ]\n");
 	printk(  "-------------------------------------\n");
 	printk("%s/%d is exiting with locks still held!\n",
-		curr->comm, curr->pid);
+		curr->comm, task_pid_nr(curr));
 	lockdep_print_held_locks(curr);
 
 	printk("\nstack backtrace:\n");
diff --git a/kernel/marker.c b/kernel/marker.c
new file mode 100644
index 00000000000..ccb48d9a365
--- /dev/null
+++ b/kernel/marker.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/marker.h>
+#include <linux/err.h>
+
+extern struct marker __start___markers[];
+extern struct marker __stop___markers[];
+
+/*
+ * module_mutex nests inside markers_mutex. Markers mutex protects the builtin
+ * and module markers, the hash table and deferred_sync.
+ */
+static DEFINE_MUTEX(markers_mutex);
+
+/*
+ * Marker deferred synchronization.
+ * Upon marker probe_unregister, we delay call to synchronize_sched() to
+ * accelerate mass unregistration (only when there is no more reference to a
+ * given module do we call synchronize_sched()). However, we need to make sure
+ * every critical region has ended before we re-arm a marker that has been
+ * unregistered and then registered back with a different probe data.
+ */
+static int deferred_sync;
+
+/*
+ * Marker hash table, containing the active markers.
+ * Protected by module_mutex.
+ */
+#define MARKER_HASH_BITS 6
+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+
+struct marker_entry {
+	struct hlist_node hlist;
+	char *format;
+	marker_probe_func *probe;
+	void *private;
+	int refcount;	/* Number of times armed. 0 if disarmed. */
+	char name[0];	/* Contains name'\0'format'\0' */
+};
+
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
+
+/**
+ * __mark_empty_function - Empty probe callback
+ * @mdata: pointer of type const struct marker
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Empty callback provided as a probe to the markers. By providing this to a
+ * disabled marker, we make sure the  execution flow is always valid even
+ * though the function pointer change and the marker enabling are two distinct
+ * operations that modifies the execution flow of preemptible code.
+ */
+void __mark_empty_function(const struct marker *mdata, void *private,
+	const char *fmt, ...)
+{
+}
+EXPORT_SYMBOL_GPL(__mark_empty_function);
+
+/*
+ * Get marker if the marker is present in the marker hash table.
+ * Must be called with markers_mutex held.
+ * Returns NULL if not present.
+ */
+static struct marker_entry *get_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	u32 hash = jhash(name, strlen(name), 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name))
+			return e;
+	}
+	return NULL;
+}
+
+/*
+ * Add the marker to the marker hash table. Must be called with markers_mutex
+ * held.
+ */
+static int add_marker(const char *name, const char *format,
+	marker_probe_func *probe, void *private)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	size_t format_len = 0;
+	u32 hash = jhash(name, name_len-1, 0);
+
+	if (format)
+		format_len = strlen(format) + 1;
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			printk(KERN_NOTICE
+				"Marker %s busy, probe %p already installed\n",
+				name, e->probe);
+			return -EBUSY;	/* Already there */
+		}
+	}
+	/*
+	 * Using kmalloc here to allocate a variable length element. Could
+	 * cause some memory fragmentation if overused.
+	 */
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], name, name_len);
+	if (format) {
+		e->format = &e->name[name_len];
+		memcpy(e->format, format, format_len);
+		trace_mark(core_marker_format, "name %s format %s",
+				e->name, e->format);
+	} else
+		e->format = NULL;
+	e->probe = probe;
+	e->private = private;
+	e->refcount = 0;
+	hlist_add_head(&e->hlist, head);
+	return 0;
+}
+
+/*
+ * Remove the marker from the marker hash table. Must be called with mutex_lock
+ * held.
+ */
+static void *remove_marker(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	int found = 0;
+	size_t len = strlen(name) + 1;
+	void *private = NULL;
+	u32 hash = jhash(name, len-1, 0);
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			break;
+		}
+	}
+	if (found) {
+		private = e->private;
+		hlist_del(&e->hlist);
+		kfree(e);
+	}
+	return private;
+}
+
+/*
+ * Set the mark_entry format to the format found in the element.
+ */
+static int marker_set_format(struct marker_entry **entry, const char *format)
+{
+	struct marker_entry *e;
+	size_t name_len = strlen((*entry)->name) + 1;
+	size_t format_len = strlen(format) + 1;
+
+	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
+			GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+	memcpy(&e->name[0], (*entry)->name, name_len);
+	e->format = &e->name[name_len];
+	memcpy(e->format, format, format_len);
+	e->probe = (*entry)->probe;
+	e->private = (*entry)->private;
+	e->refcount = (*entry)->refcount;
+	hlist_add_before(&e->hlist, &(*entry)->hlist);
+	hlist_del(&(*entry)->hlist);
+	kfree(*entry);
+	*entry = e;
+	trace_mark(core_marker_format, "name %s format %s",
+			e->name, e->format);
+	return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one marker.
+ */
+static int set_marker(struct marker_entry **entry, struct marker *elem)
+{
+	int ret;
+	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+	if ((*entry)->format) {
+		if (strcmp((*entry)->format, elem->format) != 0) {
+			printk(KERN_NOTICE
+				"Format mismatch for probe %s "
+				"(%s), marker (%s)\n",
+				(*entry)->name,
+				(*entry)->format,
+				elem->format);
+			return -EPERM;
+		}
+	} else {
+		ret = marker_set_format(entry, elem->format);
+		if (ret)
+			return ret;
+	}
+	elem->call = (*entry)->probe;
+	elem->private = (*entry)->private;
+	elem->state = 1;
+	return 0;
+}
+
+/*
+ * Disable a marker and its probe callback.
+ * Note: only after a synchronize_sched() issued after setting elem->call to the
+ * empty function insures that the original callback is not used anymore. This
+ * insured by preemption disabling around the call site.
+ */
+static void disable_marker(struct marker *elem)
+{
+	elem->state = 0;
+	elem->call = __mark_empty_function;
+	/*
+	 * Leave the private data and id there, because removal is racy and
+	 * should be done only after a synchronize_sched(). These are never used
+	 * until the next initialization anyway.
+	 */
+}
+
+/**
+ * marker_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ * @probe_module: module address of the probe being updated
+ * @refcount: number of references left to the given probe_module (out)
+ *
+ * Updates the probe callback corresponding to a range of markers.
+ * Must be called with markers_mutex held.
+ */
+void marker_update_probe_range(struct marker *begin,
+	struct marker *end, struct module *probe_module,
+	int *refcount)
+{
+	struct marker *iter;
+	struct marker_entry *mark_entry;
+
+	for (iter = begin; iter < end; iter++) {
+		mark_entry = get_marker(iter->name);
+		if (mark_entry && mark_entry->refcount) {
+			set_marker(&mark_entry, iter);
+			/*
+			 * ignore error, continue
+			 */
+			if (probe_module)
+				if (probe_module ==
+			__module_text_address((unsigned long)mark_entry->probe))
+					(*refcount)++;
+		} else {
+			disable_marker(iter);
+		}
+	}
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ * Issues a synchronize_sched() when no reference to the module passed
+ * as parameter is found in the probes so the probe module can be
+ * safely unloaded from now on.
+ */
+static void marker_update_probes(struct module *probe_module)
+{
+	int refcount = 0;
+
+	mutex_lock(&markers_mutex);
+	/* Core kernel markers */
+	marker_update_probe_range(__start___markers,
+			__stop___markers, probe_module, &refcount);
+	/* Markers in modules. */
+	module_update_markers(probe_module, &refcount);
+	if (probe_module && refcount == 0) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	mutex_unlock(&markers_mutex);
+}
+
+/**
+ * marker_probe_register -  Connect a probe to a marker
+ * @name: marker name
+ * @format: format string
+ * @probe: probe handler
+ * @private: probe private data
+ *
+ * private data must be a valid allocated memory address, or NULL.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_probe_register(const char *name, const char *format,
+			marker_probe_func *probe, void *private)
+{
+	struct marker_entry *entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (entry && entry->refcount) {
+		ret = -EBUSY;
+		goto end;
+	}
+	if (deferred_sync) {
+		synchronize_sched();
+		deferred_sync = 0;
+	}
+	ret = add_marker(name, format, probe, private);
+	if (ret)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_register);
+
+/**
+ * marker_probe_unregister -  Disconnect a probe from a marker
+ * @name: marker name
+ *
+ * Returns the private data given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister(const char *name)
+{
+	struct module *probe_module;
+	struct marker_entry *entry;
+	void *private;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		private = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	private = remove_marker(name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return private;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister);
+
+/**
+ * marker_probe_unregister_private_data -  Disconnect a probe from a marker
+ * @private: probe private data
+ *
+ * Unregister a marker by providing the registered private data.
+ * Returns the private data given to marker_probe_register, or an ERR_PTR().
+ */
+void *marker_probe_unregister_private_data(void *private)
+{
+	struct module *probe_module;
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *entry;
+	int found = 0;
+	unsigned int i;
+	int need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+		head = &marker_table[i];
+		hlist_for_each_entry(entry, node, head, hlist) {
+			if (entry->private == private) {
+				found = 1;
+				goto iter_end;
+			}
+		}
+	}
+iter_end:
+	if (!found) {
+		private = ERR_PTR(-ENOENT);
+		goto end;
+	}
+	entry->refcount = 0;
+	/* In what module is the probe handler ? */
+	probe_module = __module_text_address((unsigned long)entry->probe);
+	private = remove_marker(entry->name);
+	deferred_sync = 1;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(probe_module);
+	return private;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
+
+/**
+ * marker_arm - Arm a marker
+ * @name: marker name
+ *
+ * Activate a marker. It keeps a reference count of the number of
+ * arming/disarming done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_arm(const char *name)
+{
+	struct marker_entry *entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only need to update probes when refcount passes from 0 to 1.
+	 */
+	if (entry->refcount++)
+		goto end;
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_arm);
+
+/**
+ * marker_disarm - Disarm a marker
+ * @name: marker name
+ *
+ * Disarm a marker. It keeps a reference count of the number of arming/disarming
+ * done.
+ * Returns 0 if ok, error value on error.
+ */
+int marker_disarm(const char *name)
+{
+	struct marker_entry *entry;
+	int ret = 0, need_update = 0;
+
+	mutex_lock(&markers_mutex);
+	entry = get_marker(name);
+	if (!entry) {
+		ret = -ENOENT;
+		goto end;
+	}
+	/*
+	 * Only permit decrement refcount if higher than 0.
+	 * Do probe update only on 1 -> 0 transition.
+	 */
+	if (entry->refcount) {
+		if (--entry->refcount)
+			goto end;
+	} else {
+		ret = -EPERM;
+		goto end;
+	}
+	need_update = 1;
+end:
+	mutex_unlock(&markers_mutex);
+	if (need_update)
+		marker_update_probes(NULL);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(marker_disarm);
+
+/**
+ * marker_get_private_data - Get a marker's probe private data
+ * @name: marker name
+ *
+ * Returns the private data pointer, or an ERR_PTR.
+ * The private data pointer should _only_ be dereferenced if the caller is the
+ * owner of the data, or its content could vanish. This is mostly used to
+ * confirm that a caller is the owner of a registered probe.
+ */
+void *marker_get_private_data(const char *name)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+	struct marker_entry *e;
+	size_t name_len = strlen(name) + 1;
+	u32 hash = jhash(name, name_len-1, 0);
+	int found = 0;
+
+	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+	hlist_for_each_entry(e, node, head, hlist) {
+		if (!strcmp(name, e->name)) {
+			found = 1;
+			return e->private;
+		}
+	}
+	return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(marker_get_private_data);
diff --git a/kernel/module.c b/kernel/module.c
index 7734595bd32..3202c995007 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1673,6 +1673,8 @@ static struct module *load_module(void __user *umod,
 	unsigned int unusedcrcindex;
 	unsigned int unusedgplindex;
 	unsigned int unusedgplcrcindex;
+	unsigned int markersindex;
+	unsigned int markersstringsindex;
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1939,6 +1941,9 @@ static struct module *load_module(void __user *umod,
 		add_taint_module(mod, TAINT_FORCED_MODULE);
 	}
 #endif
+	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
+ 	markersstringsindex = find_sec(hdr, sechdrs, secstrings,
+					"__markers_strings");
 
 	/* Now do relocations. */
 	for (i = 1; i < hdr->e_shnum; i++) {
@@ -1961,6 +1966,11 @@ static struct module *load_module(void __user *umod,
 		if (err < 0)
 			goto cleanup;
 	}
+#ifdef CONFIG_MARKERS
+	mod->markers = (void *)sechdrs[markersindex].sh_addr;
+	mod->num_markers =
+		sechdrs[markersindex].sh_size / sizeof(*mod->markers);
+#endif
 
         /* Find duplicate symbols */
 	err = verify_export_symbols(mod);
@@ -1979,6 +1989,11 @@ static struct module *load_module(void __user *umod,
 
 	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
 
+#ifdef CONFIG_MARKERS
+	if (!mod->taints)
+		marker_update_probe_range(mod->markers,
+			mod->markers + mod->num_markers, NULL, NULL);
+#endif
 	err = module_finalize(hdr, sechdrs, mod);
 	if (err < 0)
 		goto cleanup;
@@ -2570,3 +2585,18 @@ EXPORT_SYMBOL(module_remove_driver);
 void struct_module(struct module *mod) { return; }
 EXPORT_SYMBOL(struct_module);
 #endif
+
+#ifdef CONFIG_MARKERS
+void module_update_markers(struct module *probe_module, int *refcount)
+{
+	struct module *mod;
+
+	mutex_lock(&module_mutex);
+	list_for_each_entry(mod, &modules, list)
+		if (!mod->taints)
+			marker_update_probe_range(mod->markers,
+				mod->markers + mod->num_markers,
+				probe_module, refcount);
+	mutex_unlock(&module_mutex);
+}
+#endif
diff --git a/kernel/notifier.c b/kernel/notifier.c
new file mode 100644
index 00000000000..4253f472f06
--- /dev/null
+++ b/kernel/notifier.c
@@ -0,0 +1,539 @@
+#include <linux/kdebug.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/vmalloc.h>
+
+/*
+ *	Notifier list for kernel code which wants to be called
+ *	at shutdown. This is used to stop any idling DMA operations
+ *	and the like.
+ */
+BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
+
+/*
+ *	Notifier chain core routines.  The exported routines below
+ *	are layered on top of these, with appropriate locking added.
+ */
+
+static int notifier_chain_register(struct notifier_block **nl,
+		struct notifier_block *n)
+{
+	while ((*nl) != NULL) {
+		if (n->priority > (*nl)->priority)
+			break;
+		nl = &((*nl)->next);
+	}
+	n->next = *nl;
+	rcu_assign_pointer(*nl, n);
+	return 0;
+}
+
+static int notifier_chain_unregister(struct notifier_block **nl,
+		struct notifier_block *n)
+{
+	while ((*nl) != NULL) {
+		if ((*nl) == n) {
+			rcu_assign_pointer(*nl, n->next);
+			return 0;
+		}
+		nl = &((*nl)->next);
+	}
+	return -ENOENT;
+}
+
+/**
+ * notifier_call_chain - Informs the registered notifiers about an event.
+ *	@nl:		Pointer to head of the blocking notifier chain
+ *	@val:		Value passed unmodified to notifier function
+ *	@v:		Pointer passed unmodified to notifier function
+ *	@nr_to_call:	Number of notifier functions to be called. Don't care
+ *			value of this parameter is -1.
+ *	@nr_calls:	Records the number of notifications sent. Don't care
+ *			value of this field is NULL.
+ *	@returns:	notifier_call_chain returns the value returned by the
+ *			last notifier function called.
+ */
+static int __kprobes notifier_call_chain(struct notifier_block **nl,
+					unsigned long val, void *v,
+					int nr_to_call,	int *nr_calls)
+{
+	int ret = NOTIFY_DONE;
+	struct notifier_block *nb, *next_nb;
+
+	nb = rcu_dereference(*nl);
+
+	while (nb && nr_to_call) {
+		next_nb = rcu_dereference(nb->next);
+		ret = nb->notifier_call(nb, val, v);
+
+		if (nr_calls)
+			(*nr_calls)++;
+
+		if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
+			break;
+		nb = next_nb;
+		nr_to_call--;
+	}
+	return ret;
+}
+
+/*
+ *	Atomic notifier chain routines.  Registration and unregistration
+ *	use a spinlock, and call_chain is synchronized by RCU (no locks).
+ */
+
+/**
+ *	atomic_notifier_chain_register - Add notifier to an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to an atomic notifier chain.
+ *
+ *	Currently always returns zero.
+ */
+int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+		struct notifier_block *n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&nh->lock, flags);
+	ret = notifier_chain_register(&nh->head, n);
+	spin_unlock_irqrestore(&nh->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
+
+/**
+ *	atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from an atomic notifier chain.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
+		struct notifier_block *n)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&nh->lock, flags);
+	ret = notifier_chain_unregister(&nh->head, n);
+	spin_unlock_irqrestore(&nh->lock, flags);
+	synchronize_rcu();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
+
+/**
+ *	__atomic_notifier_call_chain - Call functions in an atomic notifier chain
+ *	@nh: Pointer to head of the atomic notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See the comment for notifier_call_chain.
+ *	@nr_calls: See the comment for notifier_call_chain.
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in an atomic context, so they must not block.
+ *	This routine uses RCU to synchronize with changes to the chain.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+					unsigned long val, void *v,
+					int nr_to_call, int *nr_calls)
+{
+	int ret;
+
+	rcu_read_lock();
+	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
+
+int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
+
+/*
+ *	Blocking notifier chain routines.  All access to the chain is
+ *	synchronized by an rwsem.
+ */
+
+/**
+ *	blocking_notifier_chain_register - Add notifier to a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to a blocking notifier chain.
+ *	Must be called in process context.
+ *
+ *	Currently always returns zero.
+ */
+int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call down_write().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_register(&nh->head, n);
+
+	down_write(&nh->rwsem);
+	ret = notifier_chain_register(&nh->head, n);
+	up_write(&nh->rwsem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
+
+/**
+ *	blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from a blocking notifier chain.
+ *	Must be called from process context.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call down_write().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_unregister(&nh->head, n);
+
+	down_write(&nh->rwsem);
+	ret = notifier_chain_unregister(&nh->head, n);
+	up_write(&nh->rwsem);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
+
+/**
+ *	__blocking_notifier_call_chain - Call functions in a blocking notifier chain
+ *	@nh: Pointer to head of the blocking notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See comment for notifier_call_chain.
+ *	@nr_calls: See comment for notifier_call_chain.
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in a process context, so they are allowed to block.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+				   unsigned long val, void *v,
+				   int nr_to_call, int *nr_calls)
+{
+	int ret = NOTIFY_DONE;
+
+	/*
+	 * We check the head outside the lock, but if this access is
+	 * racy then it does not matter what the result of the test
+	 * is, we re-check the list after having taken the lock anyway:
+	 */
+	if (rcu_dereference(nh->head)) {
+		down_read(&nh->rwsem);
+		ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
+					nr_calls);
+		up_read(&nh->rwsem);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
+
+int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
+
+/*
+ *	Raw notifier chain routines.  There is no protection;
+ *	the caller must provide it.  Use at your own risk!
+ */
+
+/**
+ *	raw_notifier_chain_register - Add notifier to a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to a raw notifier chain.
+ *	All locking must be provided by the caller.
+ *
+ *	Currently always returns zero.
+ */
+int raw_notifier_chain_register(struct raw_notifier_head *nh,
+		struct notifier_block *n)
+{
+	return notifier_chain_register(&nh->head, n);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
+
+/**
+ *	raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from a raw notifier chain.
+ *	All locking must be provided by the caller.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
+		struct notifier_block *n)
+{
+	return notifier_chain_unregister(&nh->head, n);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
+
+/**
+ *	__raw_notifier_call_chain - Call functions in a raw notifier chain
+ *	@nh: Pointer to head of the raw notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See comment for notifier_call_chain.
+ *	@nr_calls: See comment for notifier_call_chain
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in an undefined context.
+ *	All locking must be provided by the caller.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __raw_notifier_call_chain(struct raw_notifier_head *nh,
+			      unsigned long val, void *v,
+			      int nr_to_call, int *nr_calls)
+{
+	return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+}
+EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
+
+int raw_notifier_call_chain(struct raw_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __raw_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
+
+/*
+ *	SRCU notifier chain routines.    Registration and unregistration
+ *	use a mutex, and call_chain is synchronized by SRCU (no locks).
+ */
+
+/**
+ *	srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: New entry in notifier chain
+ *
+ *	Adds a notifier to an SRCU notifier chain.
+ *	Must be called in process context.
+ *
+ *	Currently always returns zero.
+ */
+int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_register(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_register(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
+
+/**
+ *	srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@n: Entry to remove from notifier chain
+ *
+ *	Removes a notifier from an SRCU notifier chain.
+ *	Must be called from process context.
+ *
+ *	Returns zero on success or %-ENOENT on failure.
+ */
+int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
+		struct notifier_block *n)
+{
+	int ret;
+
+	/*
+	 * This code gets used during boot-up, when task switching is
+	 * not yet working and interrupts must remain disabled.  At
+	 * such times we must not call mutex_lock().
+	 */
+	if (unlikely(system_state == SYSTEM_BOOTING))
+		return notifier_chain_unregister(&nh->head, n);
+
+	mutex_lock(&nh->mutex);
+	ret = notifier_chain_unregister(&nh->head, n);
+	mutex_unlock(&nh->mutex);
+	synchronize_srcu(&nh->srcu);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
+
+/**
+ *	__srcu_notifier_call_chain - Call functions in an SRCU notifier chain
+ *	@nh: Pointer to head of the SRCU notifier chain
+ *	@val: Value passed unmodified to notifier function
+ *	@v: Pointer passed unmodified to notifier function
+ *	@nr_to_call: See comment for notifier_call_chain.
+ *	@nr_calls: See comment for notifier_call_chain
+ *
+ *	Calls each function in a notifier chain in turn.  The functions
+ *	run in a process context, so they are allowed to block.
+ *
+ *	If the return value of the notifier can be and'ed
+ *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
+ *	will return immediately, with the return value of
+ *	the notifier function which halted execution.
+ *	Otherwise the return value is the return value
+ *	of the last notifier function called.
+ */
+int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+			       unsigned long val, void *v,
+			       int nr_to_call, int *nr_calls)
+{
+	int ret;
+	int idx;
+
+	idx = srcu_read_lock(&nh->srcu);
+	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
+	srcu_read_unlock(&nh->srcu, idx);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
+
+int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+		unsigned long val, void *v)
+{
+	return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
+}
+EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
+
+/**
+ *	srcu_init_notifier_head - Initialize an SRCU notifier head
+ *	@nh: Pointer to head of the srcu notifier chain
+ *
+ *	Unlike other sorts of notifier heads, SRCU notifier heads require
+ *	dynamic initialization.  Be sure to call this routine before
+ *	calling any of the other SRCU notifier routines for this head.
+ *
+ *	If an SRCU notifier head is deallocated, it must first be cleaned
+ *	up by calling srcu_cleanup_notifier_head().  Otherwise the head's
+ *	per-cpu data (used by the SRCU mechanism) will leak.
+ */
+void srcu_init_notifier_head(struct srcu_notifier_head *nh)
+{
+	mutex_init(&nh->mutex);
+	if (init_srcu_struct(&nh->srcu) < 0)
+		BUG();
+	nh->head = NULL;
+}
+EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
+
+/**
+ *	register_reboot_notifier - Register function to be called at reboot time
+ *	@nb: Info about notifier function to be called
+ *
+ *	Registers a function with the list of functions
+ *	to be called at reboot time.
+ *
+ *	Currently always returns zero, as blocking_notifier_chain_register()
+ *	always returns zero.
+ */
+int register_reboot_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(register_reboot_notifier);
+
+/**
+ *	unregister_reboot_notifier - Unregister previously registered reboot notifier
+ *	@nb: Hook to be unregistered
+ *
+ *	Unregisters a previously registered reboot
+ *	notifier function.
+ *
+ *	Returns zero on success, or %-ENOENT on failure.
+ */
+int unregister_reboot_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
+}
+EXPORT_SYMBOL(unregister_reboot_notifier);
+
+static ATOMIC_NOTIFIER_HEAD(die_chain);
+
+int notify_die(enum die_val val, const char *str,
+	       struct pt_regs *regs, long err, int trap, int sig)
+{
+	struct die_args args = {
+		.regs	= regs,
+		.str	= str,
+		.err	= err,
+		.trapnr	= trap,
+		.signr	= sig,
+
+	};
+	return atomic_notifier_call_chain(&die_chain, val, &args);
+}
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	vmalloc_sync_all();
+	return atomic_notifier_chain_register(&die_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_die_notifier);
+
+int unregister_die_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&die_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_die_notifier);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
new file mode 100644
index 00000000000..aead4d69f62
--- /dev/null
+++ b/kernel/ns_cgroup.c
@@ -0,0 +1,100 @@
+/*
+ * ns_cgroup.c - namespace cgroup subsystem
+ *
+ * Copyright 2006, 2007 IBM Corp
+ */
+
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+
+struct ns_cgroup {
+	struct cgroup_subsys_state css;
+	spinlock_t lock;
+};
+
+struct cgroup_subsys ns_subsys;
+
+static inline struct ns_cgroup *cgroup_to_ns(
+		struct cgroup *cgroup)
+{
+	return container_of(cgroup_subsys_state(cgroup, ns_subsys_id),
+			    struct ns_cgroup, css);
+}
+
+int ns_cgroup_clone(struct task_struct *task)
+{
+	return cgroup_clone(task, &ns_subsys);
+}
+
+/*
+ * Rules:
+ *   1. you can only enter a cgroup which is a child of your current
+ *     cgroup
+ *   2. you can only place another process into a cgroup if
+ *     a. you have CAP_SYS_ADMIN
+ *     b. your cgroup is an ancestor of task's destination cgroup
+ *       (hence either you are in the same cgroup as task, or in an
+ *        ancestor cgroup thereof)
+ */
+static int ns_can_attach(struct cgroup_subsys *ss,
+		struct cgroup *new_cgroup, struct task_struct *task)
+{
+	struct cgroup *orig;
+
+	if (current != task) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (!cgroup_is_descendant(new_cgroup))
+			return -EPERM;
+	}
+
+	if (atomic_read(&new_cgroup->count) != 0)
+		return -EPERM;
+
+	orig = task_cgroup(task, ns_subsys_id);
+	if (orig && orig != new_cgroup->parent)
+		return -EPERM;
+
+	return 0;
+}
+
+/*
+ * Rules: you can only create a cgroup if
+ *     1. you are capable(CAP_SYS_ADMIN)
+ *     2. the target cgroup is a descendant of your own cgroup
+ */
+static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
+						struct cgroup *cgroup)
+{
+	struct ns_cgroup *ns_cgroup;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+	if (!cgroup_is_descendant(cgroup))
+		return ERR_PTR(-EPERM);
+
+	ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
+	if (!ns_cgroup)
+		return ERR_PTR(-ENOMEM);
+	spin_lock_init(&ns_cgroup->lock);
+	return &ns_cgroup->css;
+}
+
+static void ns_destroy(struct cgroup_subsys *ss,
+			struct cgroup *cgroup)
+{
+	struct ns_cgroup *ns_cgroup;
+
+	ns_cgroup = cgroup_to_ns(cgroup);
+	kfree(ns_cgroup);
+}
+
+struct cgroup_subsys ns_subsys = {
+	.name = "ns",
+	.can_attach = ns_can_attach,
+	.create = ns_create,
+	.destroy  = ns_destroy,
+	.subsys_id = ns_subsys_id,
+};
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 049e7c0ac56..79f871bc0ef 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -26,19 +26,6 @@ static struct kmem_cache *nsproxy_cachep;
 
 struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
 
-static inline void get_nsproxy(struct nsproxy *ns)
-{
-	atomic_inc(&ns->count);
-}
-
-void get_task_namespaces(struct task_struct *tsk)
-{
-	struct nsproxy *ns = tsk->nsproxy;
-	if (ns) {
-		get_nsproxy(ns);
-	}
-}
-
 /*
  * creates a copy of "orig" with refcount 1.
  */
@@ -87,7 +74,7 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 		goto out_ipc;
 	}
 
-	new_nsp->pid_ns = copy_pid_ns(flags, tsk->nsproxy->pid_ns);
+	new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk));
 	if (IS_ERR(new_nsp->pid_ns)) {
 		err = PTR_ERR(new_nsp->pid_ns);
 		goto out_pid;
@@ -142,7 +129,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 
 	get_nsproxy(old_ns);
 
-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET)))
+	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+				CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET)))
 		return 0;
 
 	if (!capable(CAP_SYS_ADMIN)) {
@@ -156,7 +144,14 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 		goto out;
 	}
 
+	err = ns_cgroup_clone(tsk);
+	if (err) {
+		put_nsproxy(new_ns);
+		goto out;
+	}
+
 	tsk->nsproxy = new_ns;
+
 out:
 	put_nsproxy(old_ns);
 	return err;
@@ -196,11 +191,46 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 
 	*new_nsp = create_new_namespaces(unshare_flags, current,
 				new_fs ? new_fs : current->fs);
-	if (IS_ERR(*new_nsp))
+	if (IS_ERR(*new_nsp)) {
 		err = PTR_ERR(*new_nsp);
+		goto out;
+	}
+
+	err = ns_cgroup_clone(current);
+	if (err)
+		put_nsproxy(*new_nsp);
+
+out:
 	return err;
 }
 
+void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
+{
+	struct nsproxy *ns;
+
+	might_sleep();
+
+	ns = p->nsproxy;
+
+	rcu_assign_pointer(p->nsproxy, new);
+
+	if (ns && atomic_dec_and_test(&ns->count)) {
+		/*
+		 * wait for others to get what they want from this nsproxy.
+		 *
+		 * cannot release this nsproxy via the call_rcu() since
+		 * put_mnt_ns() will want to sleep
+		 */
+		synchronize_rcu();
+		free_nsproxy(ns);
+	}
+}
+
+void exit_task_namespaces(struct task_struct *p)
+{
+	switch_task_namespaces(p, NULL);
+}
+
 static int __init nsproxy_cache_init(void)
 {
 	nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
diff --git a/kernel/pid.c b/kernel/pid.c
index c6e3f9ffff8..d1db36b9467 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -18,6 +18,12 @@
  * allocation scenario when all but one out of 1 million PIDs possible are
  * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
  * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
+ *
+ * Pid namespaces:
+ *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
+ *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
+ *     Many thanks to Oleg Nesterov for comments and help
+ *
  */
 
 #include <linux/mm.h>
@@ -28,12 +34,14 @@
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
+#include <linux/syscalls.h>
 
-#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
+#define pid_hashfn(nr, ns)	\
+	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
 static struct hlist_head *pid_hash;
 static int pidhash_shift;
-static struct kmem_cache *pid_cachep;
 struct pid init_struct_pid = INIT_STRUCT_PID;
+static struct kmem_cache *pid_ns_cachep;
 
 int pid_max = PID_MAX_DEFAULT;
 
@@ -68,8 +76,25 @@ struct pid_namespace init_pid_ns = {
 		[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
 	},
 	.last_pid = 0,
-	.child_reaper = &init_task
+	.level = 0,
+	.child_reaper = &init_task,
 };
+EXPORT_SYMBOL_GPL(init_pid_ns);
+
+int is_container_init(struct task_struct *tsk)
+{
+	int ret = 0;
+	struct pid *pid;
+
+	rcu_read_lock();
+	pid = task_pid(tsk);
+	if (pid != NULL && pid->numbers[pid->level].nr == 1)
+		ret = 1;
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL(is_container_init);
 
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
@@ -176,11 +201,17 @@ static int next_pidmap(struct pid_namespace *pid_ns, int last)
 
 fastcall void put_pid(struct pid *pid)
 {
+	struct pid_namespace *ns;
+
 	if (!pid)
 		return;
+
+	ns = pid->numbers[pid->level].ns;
 	if ((atomic_read(&pid->count) == 1) ||
-	     atomic_dec_and_test(&pid->count))
-		kmem_cache_free(pid_cachep, pid);
+	     atomic_dec_and_test(&pid->count)) {
+		kmem_cache_free(ns->pid_cachep, pid);
+		put_pid_ns(ns);
+	}
 }
 EXPORT_SYMBOL_GPL(put_pid);
 
@@ -193,60 +224,94 @@ static void delayed_put_pid(struct rcu_head *rhp)
 fastcall void free_pid(struct pid *pid)
 {
 	/* We can be called with write_lock_irq(&tasklist_lock) held */
+	int i;
 	unsigned long flags;
 
 	spin_lock_irqsave(&pidmap_lock, flags);
-	hlist_del_rcu(&pid->pid_chain);
+	for (i = 0; i <= pid->level; i++)
+		hlist_del_rcu(&pid->numbers[i].pid_chain);
 	spin_unlock_irqrestore(&pidmap_lock, flags);
 
-	free_pidmap(&init_pid_ns, pid->nr);
+	for (i = 0; i <= pid->level; i++)
+		free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr);
+
 	call_rcu(&pid->rcu, delayed_put_pid);
 }
 
-struct pid *alloc_pid(void)
+struct pid *alloc_pid(struct pid_namespace *ns)
 {
 	struct pid *pid;
 	enum pid_type type;
-	int nr = -1;
+	int i, nr;
+	struct pid_namespace *tmp;
+	struct upid *upid;
 
-	pid = kmem_cache_alloc(pid_cachep, GFP_KERNEL);
+	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
 	if (!pid)
 		goto out;
 
-	nr = alloc_pidmap(current->nsproxy->pid_ns);
-	if (nr < 0)
-		goto out_free;
+	tmp = ns;
+	for (i = ns->level; i >= 0; i--) {
+		nr = alloc_pidmap(tmp);
+		if (nr < 0)
+			goto out_free;
+
+		pid->numbers[i].nr = nr;
+		pid->numbers[i].ns = tmp;
+		tmp = tmp->parent;
+	}
 
+	get_pid_ns(ns);
+	pid->level = ns->level;
 	atomic_set(&pid->count, 1);
-	pid->nr = nr;
 	for (type = 0; type < PIDTYPE_MAX; ++type)
 		INIT_HLIST_HEAD(&pid->tasks[type]);
 
 	spin_lock_irq(&pidmap_lock);
-	hlist_add_head_rcu(&pid->pid_chain, &pid_hash[pid_hashfn(pid->nr)]);
+	for (i = ns->level; i >= 0; i--) {
+		upid = &pid->numbers[i];
+		hlist_add_head_rcu(&upid->pid_chain,
+				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
+	}
 	spin_unlock_irq(&pidmap_lock);
 
 out:
 	return pid;
 
 out_free:
-	kmem_cache_free(pid_cachep, pid);
+	for (i++; i <= ns->level; i++)
+		free_pidmap(pid->numbers[i].ns, pid->numbers[i].nr);
+
+	kmem_cache_free(ns->pid_cachep, pid);
 	pid = NULL;
 	goto out;
 }
 
-struct pid * fastcall find_pid(int nr)
+struct pid * fastcall find_pid_ns(int nr, struct pid_namespace *ns)
 {
 	struct hlist_node *elem;
-	struct pid *pid;
+	struct upid *pnr;
+
+	hlist_for_each_entry_rcu(pnr, elem,
+			&pid_hash[pid_hashfn(nr, ns)], pid_chain)
+		if (pnr->nr == nr && pnr->ns == ns)
+			return container_of(pnr, struct pid,
+					numbers[ns->level]);
 
-	hlist_for_each_entry_rcu(pid, elem,
-			&pid_hash[pid_hashfn(nr)], pid_chain) {
-		if (pid->nr == nr)
-			return pid;
-	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(find_pid_ns);
+
+struct pid *find_vpid(int nr)
+{
+	return find_pid_ns(nr, current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL_GPL(find_vpid);
+
+struct pid *find_pid(int nr)
+{
+	return find_pid_ns(nr, &init_pid_ns);
+}
 EXPORT_SYMBOL_GPL(find_pid);
 
 /*
@@ -307,12 +372,32 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
 /*
  * Must be called under rcu_read_lock() or with tasklist_lock read-held.
  */
-struct task_struct *find_task_by_pid_type(int type, int nr)
+struct task_struct *find_task_by_pid_type_ns(int type, int nr,
+		struct pid_namespace *ns)
 {
-	return pid_task(find_pid(nr), type);
+	return pid_task(find_pid_ns(nr, ns), type);
 }
 
-EXPORT_SYMBOL(find_task_by_pid_type);
+EXPORT_SYMBOL(find_task_by_pid_type_ns);
+
+struct task_struct *find_task_by_pid(pid_t nr)
+{
+	return find_task_by_pid_type_ns(PIDTYPE_PID, nr, &init_pid_ns);
+}
+EXPORT_SYMBOL(find_task_by_pid);
+
+struct task_struct *find_task_by_vpid(pid_t vnr)
+{
+	return find_task_by_pid_type_ns(PIDTYPE_PID, vnr,
+			current->nsproxy->pid_ns);
+}
+EXPORT_SYMBOL(find_task_by_vpid);
+
+struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
+{
+	return find_task_by_pid_type_ns(PIDTYPE_PID, nr, ns);
+}
+EXPORT_SYMBOL(find_task_by_pid_ns);
 
 struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
 {
@@ -339,45 +424,239 @@ struct pid *find_get_pid(pid_t nr)
 	struct pid *pid;
 
 	rcu_read_lock();
-	pid = get_pid(find_pid(nr));
+	pid = get_pid(find_vpid(nr));
 	rcu_read_unlock();
 
 	return pid;
 }
 
+pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
+{
+	struct upid *upid;
+	pid_t nr = 0;
+
+	if (pid && ns->level <= pid->level) {
+		upid = &pid->numbers[ns->level];
+		if (upid->ns == ns)
+			nr = upid->nr;
+	}
+	return nr;
+}
+
+pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_pid(tsk), ns);
+}
+EXPORT_SYMBOL(task_pid_nr_ns);
+
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_tgid(tsk), ns);
+}
+EXPORT_SYMBOL(task_tgid_nr_ns);
+
+pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_pgrp(tsk), ns);
+}
+EXPORT_SYMBOL(task_pgrp_nr_ns);
+
+pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
+{
+	return pid_nr_ns(task_session(tsk), ns);
+}
+EXPORT_SYMBOL(task_session_nr_ns);
+
 /*
  * Used by proc to find the first pid that is greater then or equal to nr.
  *
  * If there is a pid at nr this function is exactly the same as find_pid.
  */
-struct pid *find_ge_pid(int nr)
+struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 {
 	struct pid *pid;
 
 	do {
-		pid = find_pid(nr);
+		pid = find_pid_ns(nr, ns);
 		if (pid)
 			break;
-		nr = next_pidmap(current->nsproxy->pid_ns, nr);
+		nr = next_pidmap(ns, nr);
 	} while (nr > 0);
 
 	return pid;
 }
 EXPORT_SYMBOL_GPL(find_get_pid);
 
+struct pid_cache {
+	int nr_ids;
+	char name[16];
+	struct kmem_cache *cachep;
+	struct list_head list;
+};
+
+static LIST_HEAD(pid_caches_lh);
+static DEFINE_MUTEX(pid_caches_mutex);
+
+/*
+ * creates the kmem cache to allocate pids from.
+ * @nr_ids: the number of numerical ids this pid will have to carry
+ */
+
+static struct kmem_cache *create_pid_cachep(int nr_ids)
+{
+	struct pid_cache *pcache;
+	struct kmem_cache *cachep;
+
+	mutex_lock(&pid_caches_mutex);
+	list_for_each_entry (pcache, &pid_caches_lh, list)
+		if (pcache->nr_ids == nr_ids)
+			goto out;
+
+	pcache = kmalloc(sizeof(struct pid_cache), GFP_KERNEL);
+	if (pcache == NULL)
+		goto err_alloc;
+
+	snprintf(pcache->name, sizeof(pcache->name), "pid_%d", nr_ids);
+	cachep = kmem_cache_create(pcache->name,
+			sizeof(struct pid) + (nr_ids - 1) * sizeof(struct upid),
+			0, SLAB_HWCACHE_ALIGN, NULL);
+	if (cachep == NULL)
+		goto err_cachep;
+
+	pcache->nr_ids = nr_ids;
+	pcache->cachep = cachep;
+	list_add(&pcache->list, &pid_caches_lh);
+out:
+	mutex_unlock(&pid_caches_mutex);
+	return pcache->cachep;
+
+err_cachep:
+	kfree(pcache);
+err_alloc:
+	mutex_unlock(&pid_caches_mutex);
+	return NULL;
+}
+
+static struct pid_namespace *create_pid_namespace(int level)
+{
+	struct pid_namespace *ns;
+	int i;
+
+	ns = kmem_cache_alloc(pid_ns_cachep, GFP_KERNEL);
+	if (ns == NULL)
+		goto out;
+
+	ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!ns->pidmap[0].page)
+		goto out_free;
+
+	ns->pid_cachep = create_pid_cachep(level + 1);
+	if (ns->pid_cachep == NULL)
+		goto out_free_map;
+
+	kref_init(&ns->kref);
+	ns->last_pid = 0;
+	ns->child_reaper = NULL;
+	ns->level = level;
+
+	set_bit(0, ns->pidmap[0].page);
+	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
+
+	for (i = 1; i < PIDMAP_ENTRIES; i++) {
+		ns->pidmap[i].page = 0;
+		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
+	}
+
+	return ns;
+
+out_free_map:
+	kfree(ns->pidmap[0].page);
+out_free:
+	kmem_cache_free(pid_ns_cachep, ns);
+out:
+	return ERR_PTR(-ENOMEM);
+}
+
+static void destroy_pid_namespace(struct pid_namespace *ns)
+{
+	int i;
+
+	for (i = 0; i < PIDMAP_ENTRIES; i++)
+		kfree(ns->pidmap[i].page);
+	kmem_cache_free(pid_ns_cachep, ns);
+}
+
 struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
 {
+	struct pid_namespace *new_ns;
+
 	BUG_ON(!old_ns);
-	get_pid_ns(old_ns);
-	return old_ns;
+	new_ns = get_pid_ns(old_ns);
+	if (!(flags & CLONE_NEWPID))
+		goto out;
+
+	new_ns = ERR_PTR(-EINVAL);
+	if (flags & CLONE_THREAD)
+		goto out_put;
+
+	new_ns = create_pid_namespace(old_ns->level + 1);
+	if (!IS_ERR(new_ns))
+		new_ns->parent = get_pid_ns(old_ns);
+
+out_put:
+	put_pid_ns(old_ns);
+out:
+	return new_ns;
 }
 
 void free_pid_ns(struct kref *kref)
 {
-	struct pid_namespace *ns;
+	struct pid_namespace *ns, *parent;
 
 	ns = container_of(kref, struct pid_namespace, kref);
-	kfree(ns);
+
+	parent = ns->parent;
+	destroy_pid_namespace(ns);
+
+	if (parent != NULL)
+		put_pid_ns(parent);
+}
+
+void zap_pid_ns_processes(struct pid_namespace *pid_ns)
+{
+	int nr;
+	int rc;
+
+	/*
+	 * The last thread in the cgroup-init thread group is terminating.
+	 * Find remaining pid_ts in the namespace, signal and wait for them
+	 * to exit.
+	 *
+	 * Note:  This signals each threads in the namespace - even those that
+	 * 	  belong to the same thread group, To avoid this, we would have
+	 * 	  to walk the entire tasklist looking a processes in this
+	 * 	  namespace, but that could be unnecessarily expensive if the
+	 * 	  pid namespace has just a few processes. Or we need to
+	 * 	  maintain a tasklist for each pid namespace.
+	 *
+	 */
+	read_lock(&tasklist_lock);
+	nr = next_pidmap(pid_ns, 1);
+	while (nr > 0) {
+		kill_proc_info(SIGKILL, SEND_SIG_PRIV, nr);
+		nr = next_pidmap(pid_ns, nr);
+	}
+	read_unlock(&tasklist_lock);
+
+	do {
+		clear_thread_flag(TIF_SIGPENDING);
+		rc = sys_wait4(-1, NULL, __WALL, NULL);
+	} while (rc != -ECHILD);
+
+
+	/* Child reaper for the pid namespace is going away */
+	pid_ns->child_reaper = NULL;
+	return;
 }
 
 /*
@@ -412,5 +691,9 @@ void __init pidmap_init(void)
 	set_bit(0, init_pid_ns.pidmap[0].page);
 	atomic_dec(&init_pid_ns.pidmap[0].nr_free);
 
-	pid_cachep = KMEM_CACHE(pid, SLAB_PANIC);
+	init_pid_ns.pid_cachep = create_pid_cachep(1);
+	if (init_pid_ns.pid_cachep == NULL)
+		panic("Can't create pid_1 cachep\n");
+
+	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
 }
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index b53c8fcd9d8..68c96376e84 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -21,8 +21,8 @@ static int check_clock(const clockid_t which_clock)
 
 	read_lock(&tasklist_lock);
 	p = find_task_by_pid(pid);
-	if (!p || (CPUCLOCK_PERTHREAD(which_clock) ?
-		   p->tgid != current->tgid : p->tgid != pid)) {
+	if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
+		   same_thread_group(p, current) : thread_group_leader(p))) {
 		error = -EINVAL;
 	}
 	read_unlock(&tasklist_lock);
@@ -308,13 +308,13 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
 		p = find_task_by_pid(pid);
 		if (p) {
 			if (CPUCLOCK_PERTHREAD(which_clock)) {
-				if (p->tgid == current->tgid) {
+				if (same_thread_group(p, current)) {
 					error = cpu_clock_sample(which_clock,
 								 p, &rtn);
 				}
 			} else {
 				read_lock(&tasklist_lock);
-				if (p->tgid == pid && p->signal) {
+				if (thread_group_leader(p) && p->signal) {
 					error =
 					    cpu_clock_sample_group(which_clock,
 							           p, &rtn);
@@ -355,7 +355,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
 			p = current;
 		} else {
 			p = find_task_by_pid(pid);
-			if (p && p->tgid != current->tgid)
+			if (p && !same_thread_group(p, current))
 				p = NULL;
 		}
 	} else {
@@ -363,7 +363,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
 			p = current->group_leader;
 		} else {
 			p = find_task_by_pid(pid);
-			if (p && p->tgid != pid)
+			if (p && !thread_group_leader(p))
 				p = NULL;
 		}
 	}
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d11f579d189..35b4bbfc78f 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -404,7 +404,7 @@ static struct task_struct * good_sigevent(sigevent_t * event)
 
 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
 		(!(rtn = find_task_by_pid(event->sigev_notify_thread_id)) ||
-		 rtn->tgid != current->tgid ||
+		 !same_thread_group(rtn, current) ||
 		 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
 		return NULL;
 
@@ -608,7 +608,7 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags)
 		spin_lock(&timr->it_lock);
 
 		if ((timr->it_id != timer_id) || !(timr->it_process) ||
-				timr->it_process->tgid != current->tgid) {
+				!same_thread_group(timr->it_process, current)) {
 			spin_unlock(&timr->it_lock);
 			spin_unlock_irqrestore(&idr_lock, *flags);
 			timr = NULL;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a73ebd3b9d4..7c76f2ffaea 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -19,6 +19,7 @@
 #include <linux/security.h>
 #include <linux/signal.h>
 #include <linux/audit.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -168,7 +169,7 @@ int ptrace_attach(struct task_struct *task)
 	retval = -EPERM;
 	if (task->pid <= 1)
 		goto out;
-	if (task->tgid == current->tgid)
+	if (same_thread_group(task, current))
 		goto out;
 
 repeat:
@@ -443,7 +444,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
 		return ERR_PTR(-EPERM);
 
 	read_lock(&tasklist_lock);
-	child = find_task_by_pid(pid);
+	child = find_task_by_vpid(pid);
 	if (child)
 		get_task_struct(child);
 
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 6b0703db152..56d73cb8826 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -87,7 +87,7 @@ static int rt_trace_on = 1;
 static void printk_task(struct task_struct *p)
 {
 	if (p)
-		printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
+		printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
 	else
 		printk("<none>");
 }
@@ -152,22 +152,25 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
 	printk(  "[ BUG: circular locking deadlock detected! ]\n");
 	printk(  "--------------------------------------------\n");
 	printk("%s/%d is deadlocking current task %s/%d\n\n",
-	       task->comm, task->pid, current->comm, current->pid);
+	       task->comm, task_pid_nr(task),
+	       current->comm, task_pid_nr(current));
 
 	printk("\n1) %s/%d is trying to acquire this lock:\n",
-	       current->comm, current->pid);
+	       current->comm, task_pid_nr(current));
 	printk_lock(waiter->lock, 1);
 
-	printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
+	printk("\n2) %s/%d is blocked on this lock:\n",
+		task->comm, task_pid_nr(task));
 	printk_lock(waiter->deadlock_lock, 1);
 
 	debug_show_held_locks(current);
 	debug_show_held_locks(task);
 
-	printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
+	printk("\n%s/%d's [blocked] stackdump:\n\n",
+		task->comm, task_pid_nr(task));
 	show_stack(task, NULL);
 	printk("\n%s/%d's [current] stackdump:\n\n",
-	       current->comm, current->pid);
+		current->comm, task_pid_nr(current));
 	dump_stack();
 	debug_show_all_locks();
 
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 8cd9bd2cdb3..0deef71ff8d 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -185,7 +185,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 			prev_max = max_lock_depth;
 			printk(KERN_WARNING "Maximum lock depth %d reached "
 			       "task: %s (%d)\n", max_lock_depth,
-			       top_task->comm, top_task->pid);
+			       top_task->comm, task_pid_nr(top_task));
 		}
 		put_task_struct(task);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index ed90be46fb3..afe76ec2e7f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -44,6 +44,7 @@
 #include <linux/vmalloc.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
+#include <linux/pid_namespace.h>
 #include <linux/smp.h>
 #include <linux/threads.h>
 #include <linux/timer.h>
@@ -51,6 +52,7 @@
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/percpu.h>
+#include <linux/cpu_acct.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
 #include <linux/sysctl.h>
@@ -153,10 +155,15 @@ struct rt_prio_array {
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
+#include <linux/cgroup.h>
+
 struct cfs_rq;
 
 /* task group related information */
 struct task_group {
+#ifdef CONFIG_FAIR_CGROUP_SCHED
+	struct cgroup_subsys_state css;
+#endif
 	/* schedulable entities of this group on each cpu */
 	struct sched_entity **se;
 	/* runqueue "owned" by this group on each cpu */
@@ -197,6 +204,9 @@ static inline struct task_group *task_group(struct task_struct *p)
 
 #ifdef CONFIG_FAIR_USER_SCHED
 	tg = p->user->tg;
+#elif defined(CONFIG_FAIR_CGROUP_SCHED)
+	tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id),
+				struct task_group, css);
 #else
 	tg  = &init_task_group;
 #endif
@@ -1875,7 +1885,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
 	preempt_enable();
 #endif
 	if (current->set_child_tid)
-		put_user(current->pid, current->set_child_tid);
+		put_user(task_pid_vnr(current), current->set_child_tid);
 }
 
 /*
@@ -3307,9 +3317,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
 {
 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
 	cputime64_t tmp;
+	struct rq *rq = this_rq();
 
 	p->utime = cputime_add(p->utime, cputime);
 
+	if (p != rq->idle)
+		cpuacct_charge(p, cputime);
+
 	/* Add user time to cpustat. */
 	tmp = cputime_to_cputime64(cputime);
 	if (TASK_NICE(p) > 0)
@@ -3374,9 +3388,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
 	else if (softirq_count())
 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
-	else if (p != rq->idle)
+	else if (p != rq->idle) {
 		cpustat->system = cputime64_add(cpustat->system, tmp);
-	else if (atomic_read(&rq->nr_iowait) > 0)
+		cpuacct_charge(p, cputime);
+	} else if (atomic_read(&rq->nr_iowait) > 0)
 		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 	else
 		cpustat->idle = cputime64_add(cpustat->idle, tmp);
@@ -3412,8 +3427,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
 		else
 			cpustat->idle = cputime64_add(cpustat->idle, tmp);
-	} else
+	} else {
 		cpustat->steal = cputime64_add(cpustat->steal, tmp);
+		cpuacct_charge(p, -tmp);
+	}
 }
 
 /*
@@ -3493,7 +3510,7 @@ EXPORT_SYMBOL(sub_preempt_count);
 static noinline void __schedule_bug(struct task_struct *prev)
 {
 	printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d\n",
-		prev->comm, preempt_count(), prev->pid);
+		prev->comm, preempt_count(), task_pid_nr(prev));
 	debug_show_held_locks(prev);
 	if (irqs_disabled())
 		print_irqtrace_events(prev);
@@ -4159,7 +4176,7 @@ struct task_struct *idle_task(int cpu)
  */
 static struct task_struct *find_process_by_pid(pid_t pid)
 {
-	return pid ? find_task_by_pid(pid) : current;
+	return pid ? find_task_by_vpid(pid) : current;
 }
 
 /* Actually do priority change: must hold rq lock. */
@@ -4462,8 +4479,21 @@ long sched_setaffinity(pid_t pid, cpumask_t new_mask)
 
 	cpus_allowed = cpuset_cpus_allowed(p);
 	cpus_and(new_mask, new_mask, cpus_allowed);
+ again:
 	retval = set_cpus_allowed(p, new_mask);
 
+	if (!retval) {
+		cpus_allowed = cpuset_cpus_allowed(p);
+		if (!cpus_subset(new_mask, cpus_allowed)) {
+			/*
+			 * We must have raced with a concurrent cpuset
+			 * update. Just reset the cpus_allowed to the
+			 * cpuset's cpus_allowed
+			 */
+			new_mask = cpus_allowed;
+			goto again;
+		}
+	}
 out_unlock:
 	put_task_struct(p);
 	mutex_unlock(&sched_hotcpu_mutex);
@@ -4843,7 +4873,8 @@ static void show_task(struct task_struct *p)
 		free = (unsigned long)n - (unsigned long)end_of_stack(p);
 	}
 #endif
-	printk(KERN_CONT "%5lu %5d %6d\n", free, p->pid, p->parent->pid);
+	printk(KERN_CONT "%5lu %5d %6d\n", free,
+		task_pid_nr(p), task_pid_nr(p->parent));
 
 	if (state != TASK_RUNNING)
 		show_stack(p, NULL);
@@ -5137,8 +5168,16 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 
 		/* No more Mr. Nice Guy. */
 		if (dest_cpu == NR_CPUS) {
+			cpumask_t cpus_allowed = cpuset_cpus_allowed_locked(p);
+			/*
+			 * Try to stay on the same cpuset, where the
+			 * current cpuset may be a subset of all cpus.
+			 * The cpuset_cpus_allowed_locked() variant of
+			 * cpuset_cpus_allowed() will not block.  It must be
+			 * called within calls to cpuset_lock/cpuset_unlock.
+			 */
 			rq = task_rq_lock(p, &flags);
-			cpus_setall(p->cpus_allowed);
+			p->cpus_allowed = cpus_allowed;
 			dest_cpu = any_online_cpu(p->cpus_allowed);
 			task_rq_unlock(rq, &flags);
 
@@ -5150,7 +5189,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 			if (p->mm && printk_ratelimit())
 				printk(KERN_INFO "process %d (%s) no "
 				       "longer affine to cpu%d\n",
-				       p->pid, p->comm, dead_cpu);
+			       task_pid_nr(p), p->comm, dead_cpu);
 		}
 	} while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
 }
@@ -5257,7 +5296,7 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
 	struct rq *rq = cpu_rq(dead_cpu);
 
 	/* Must be exiting, otherwise would be on tasklist. */
-	BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
+	BUG_ON(!p->exit_state);
 
 	/* Cannot have done final schedule yet: would have vanished. */
 	BUG_ON(p->state == TASK_DEAD);
@@ -5504,6 +5543,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
+		cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
 		migrate_live_tasks(cpu);
 		rq = cpu_rq(cpu);
 		kthread_stop(rq->migration_thread);
@@ -5517,6 +5557,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		rq->idle->sched_class = &idle_sched_class;
 		migrate_dead_tasks(cpu);
 		spin_unlock_irq(&rq->lock);
+		cpuset_unlock();
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
 
@@ -6367,26 +6408,31 @@ error:
 	return -ENOMEM;
 #endif
 }
+
+static cpumask_t *doms_cur;	/* current sched domains */
+static int ndoms_cur;		/* number of sched domains in 'doms_cur' */
+
+/*
+ * Special case: If a kmalloc of a doms_cur partition (array of
+ * cpumask_t) fails, then fallback to a single sched domain,
+ * as determined by the single cpumask_t fallback_doms.
+ */
+static cpumask_t fallback_doms;
+
 /*
  * Set up scheduler domains and groups.  Callers must hold the hotplug lock.
+ * For now this just excludes isolated cpus, but could be used to
+ * exclude other special cases in the future.
  */
 static int arch_init_sched_domains(const cpumask_t *cpu_map)
 {
-	cpumask_t cpu_default_map;
-	int err;
-
-	/*
-	 * Setup mask for cpus without special case scheduling requirements.
-	 * For now this just excludes isolated cpus, but could be used to
-	 * exclude other special cases in the future.
-	 */
-	cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map);
-
-	err = build_sched_domains(&cpu_default_map);
-
+	ndoms_cur = 1;
+	doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
+	if (!doms_cur)
+		doms_cur = &fallback_doms;
+	cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
 	register_sched_domain_sysctl();
-
-	return err;
+	return build_sched_domains(doms_cur);
 }
 
 static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
@@ -6410,6 +6456,68 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
 	arch_destroy_sched_domains(cpu_map);
 }
 
+/*
+ * Partition sched domains as specified by the 'ndoms_new'
+ * cpumasks in the array doms_new[] of cpumasks.  This compares
+ * doms_new[] to the current sched domain partitioning, doms_cur[].
+ * It destroys each deleted domain and builds each new domain.
+ *
+ * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'.
+ * The masks don't intersect (don't overlap.)  We should setup one
+ * sched domain for each mask.  CPUs not in any of the cpumasks will
+ * not be load balanced.  If the same cpumask appears both in the
+ * current 'doms_cur' domains and in the new 'doms_new', we can leave
+ * it as it is.
+ *
+ * The passed in 'doms_new' should be kmalloc'd.  This routine takes
+ * ownership of it and will kfree it when done with it.  If the caller
+ * failed the kmalloc call, then it can pass in doms_new == NULL,
+ * and partition_sched_domains() will fallback to the single partition
+ * 'fallback_doms'.
+ *
+ * Call with hotplug lock held
+ */
+void partition_sched_domains(int ndoms_new, cpumask_t *doms_new)
+{
+	int i, j;
+
+	if (doms_new == NULL) {
+		ndoms_new = 1;
+		doms_new = &fallback_doms;
+		cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+	}
+
+	/* Destroy deleted domains */
+	for (i = 0; i < ndoms_cur; i++) {
+		for (j = 0; j < ndoms_new; j++) {
+			if (cpus_equal(doms_cur[i], doms_new[j]))
+				goto match1;
+		}
+		/* no match - a current sched domain not in new doms_new[] */
+		detach_destroy_domains(doms_cur + i);
+match1:
+		;
+	}
+
+	/* Build new domains */
+	for (i = 0; i < ndoms_new; i++) {
+		for (j = 0; j < ndoms_cur; j++) {
+			if (cpus_equal(doms_new[i], doms_cur[j]))
+				goto match2;
+		}
+		/* no match - add a new doms_new */
+		build_sched_domains(doms_new + i);
+match2:
+		;
+	}
+
+	/* Remember the new sched domains */
+	if (doms_cur != &fallback_doms)
+		kfree(doms_cur);
+	doms_cur = doms_new;
+	ndoms_cur = ndoms_new;
+}
+
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 static int arch_reinit_sched_domains(void)
 {
@@ -6991,3 +7099,116 @@ unsigned long sched_group_shares(struct task_group *tg)
 }
 
 #endif	/* CONFIG_FAIR_GROUP_SCHED */
+
+#ifdef CONFIG_FAIR_CGROUP_SCHED
+
+/* return corresponding task_group object of a cgroup */
+static inline struct task_group *cgroup_tg(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, cpu_cgroup_subsys_id),
+					 struct task_group, css);
+}
+
+static struct cgroup_subsys_state *
+cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct task_group *tg;
+
+	if (!cont->parent) {
+		/* This is early initialization for the top cgroup */
+		init_task_group.css.cgroup = cont;
+		return &init_task_group.css;
+	}
+
+	/* we support only 1-level deep hierarchical scheduler atm */
+	if (cont->parent->parent)
+		return ERR_PTR(-EINVAL);
+
+	tg = sched_create_group();
+	if (IS_ERR(tg))
+		return ERR_PTR(-ENOMEM);
+
+	/* Bind the cgroup to task_group object we just created */
+	tg->css.cgroup = cont;
+
+	return &tg->css;
+}
+
+static void cpu_cgroup_destroy(struct cgroup_subsys *ss,
+					struct cgroup *cont)
+{
+	struct task_group *tg = cgroup_tg(cont);
+
+	sched_destroy_group(tg);
+}
+
+static int cpu_cgroup_can_attach(struct cgroup_subsys *ss,
+			     struct cgroup *cont, struct task_struct *tsk)
+{
+	/* We don't support RT-tasks being in separate groups */
+	if (tsk->sched_class != &fair_sched_class)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void
+cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+			struct cgroup *old_cont, struct task_struct *tsk)
+{
+	sched_move_task(tsk);
+}
+
+static ssize_t cpu_shares_write(struct cgroup *cont, struct cftype *cftype,
+				struct file *file, const char __user *userbuf,
+				size_t nbytes, loff_t *ppos)
+{
+	unsigned long shareval;
+	struct task_group *tg = cgroup_tg(cont);
+	char buffer[2*sizeof(unsigned long) + 1];
+	int rc;
+
+	if (nbytes > 2*sizeof(unsigned long))	/* safety check */
+		return -E2BIG;
+
+	if (copy_from_user(buffer, userbuf, nbytes))
+		return -EFAULT;
+
+	buffer[nbytes] = 0;	/* nul-terminate */
+	shareval = simple_strtoul(buffer, NULL, 10);
+
+	rc = sched_group_set_shares(tg, shareval);
+
+	return (rc < 0 ? rc : nbytes);
+}
+
+static u64 cpu_shares_read_uint(struct cgroup *cont, struct cftype *cft)
+{
+	struct task_group *tg = cgroup_tg(cont);
+
+	return (u64) tg->shares;
+}
+
+static struct cftype cpu_shares = {
+	.name = "shares",
+	.read_uint = cpu_shares_read_uint,
+	.write = cpu_shares_write,
+};
+
+static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_file(cont, ss, &cpu_shares);
+}
+
+struct cgroup_subsys cpu_cgroup_subsys = {
+	.name 	    	= "cpu",
+	.create	    	= cpu_cgroup_create,
+	.destroy    	= cpu_cgroup_destroy,
+	.can_attach 	= cpu_cgroup_can_attach,
+	.attach     	= cpu_cgroup_attach,
+	.populate   	= cpu_cgroup_populate,
+	.subsys_id  	= cpu_cgroup_subsys_id,
+	.early_init	= 1,
+};
+
+#endif	/* CONFIG_FAIR_CGROUP_SCHED */
diff --git a/kernel/signal.c b/kernel/signal.c
index e4f059cd986..12006308c7e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -256,7 +256,7 @@ flush_signal_handlers(struct task_struct *t, int force_default)
 
 int unhandled_signal(struct task_struct *tsk, int sig)
 {
-	if (is_init(tsk))
+	if (is_global_init(tsk))
 		return 1;
 	if (tsk->ptrace & PT_PTRACED)
 		return 0;
@@ -536,7 +536,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 			return error;
 		error = -EPERM;
 		if (((sig != SIGCONT) ||
-			(process_session(current) != process_session(t)))
+			(task_session_nr(current) != task_session_nr(t)))
 		    && (current->euid ^ t->suid) && (current->euid ^ t->uid)
 		    && (current->uid ^ t->suid) && (current->uid ^ t->uid)
 		    && !capable(CAP_KILL))
@@ -694,7 +694,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
 			q->info.si_signo = sig;
 			q->info.si_errno = 0;
 			q->info.si_code = SI_USER;
-			q->info.si_pid = current->pid;
+			q->info.si_pid = task_pid_vnr(current);
 			q->info.si_uid = current->uid;
 			break;
 		case (unsigned long) SEND_SIG_PRIV:
@@ -730,7 +730,7 @@ int print_fatal_signals;
 static void print_fatal_signal(struct pt_regs *regs, int signr)
 {
 	printk("%s/%d: potentially unexpected fatal signal %d.\n",
-		current->comm, current->pid, signr);
+		current->comm, task_pid_nr(current), signr);
 
 #ifdef __i386__
 	printk("code at %08lx: ", regs->eip);
@@ -1089,7 +1089,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 {
 	int error;
 	rcu_read_lock();
-	error = kill_pid_info(sig, info, find_pid(pid));
+	error = kill_pid_info(sig, info, find_vpid(pid));
 	rcu_read_unlock();
 	return error;
 }
@@ -1150,7 +1150,7 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
 
 		read_lock(&tasklist_lock);
 		for_each_process(p) {
-			if (p->pid > 1 && p->tgid != current->tgid) {
+			if (p->pid > 1 && !same_thread_group(p, current)) {
 				int err = group_send_sig_info(sig, info, p);
 				++count;
 				if (err != -EPERM)
@@ -1160,9 +1160,9 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
 		read_unlock(&tasklist_lock);
 		ret = count ? retval : -ESRCH;
 	} else if (pid < 0) {
-		ret = kill_pgrp_info(sig, info, find_pid(-pid));
+		ret = kill_pgrp_info(sig, info, find_vpid(-pid));
 	} else {
-		ret = kill_pid_info(sig, info, find_pid(pid));
+		ret = kill_pid_info(sig, info, find_vpid(pid));
 	}
 	rcu_read_unlock();
 	return ret;
@@ -1266,7 +1266,12 @@ EXPORT_SYMBOL(kill_pid);
 int
 kill_proc(pid_t pid, int sig, int priv)
 {
-	return kill_proc_info(sig, __si_special(priv), pid);
+	int ret;
+
+	rcu_read_lock();
+	ret = kill_pid_info(sig, __si_special(priv), find_pid(pid));
+	rcu_read_unlock();
+	return ret;
 }
 
 /*
@@ -1443,7 +1448,22 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 
 	info.si_signo = sig;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	/*
+	 * we are under tasklist_lock here so our parent is tied to
+	 * us and cannot exit and release its namespace.
+	 *
+	 * the only it can is to switch its nsproxy with sys_unshare,
+	 * bu uncharing pid namespaces is not allowed, so we'll always
+	 * see relevant namespace
+	 *
+	 * write_lock() currently calls preempt_disable() which is the
+	 * same as rcu_read_lock(), but according to Oleg, this is not
+	 * correct to rely on this
+	 */
+	rcu_read_lock();
+	info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+	rcu_read_unlock();
+
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1508,7 +1528,13 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
 
 	info.si_signo = SIGCHLD;
 	info.si_errno = 0;
-	info.si_pid = tsk->pid;
+	/*
+	 * see comment in do_notify_parent() abot the following 3 lines
+	 */
+	rcu_read_lock();
+	info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns);
+	rcu_read_unlock();
+
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
@@ -1634,7 +1660,7 @@ void ptrace_notify(int exit_code)
 	memset(&info, 0, sizeof info);
 	info.si_signo = SIGTRAP;
 	info.si_code = exit_code;
-	info.si_pid = current->pid;
+	info.si_pid = task_pid_vnr(current);
 	info.si_uid = current->uid;
 
 	/* Let the debugger run.  */
@@ -1804,7 +1830,7 @@ relock:
 				info->si_signo = signr;
 				info->si_errno = 0;
 				info->si_code = SI_USER;
-				info->si_pid = current->parent->pid;
+				info->si_pid = task_pid_vnr(current->parent);
 				info->si_uid = current->parent->uid;
 			}
 
@@ -1835,11 +1861,9 @@ relock:
 			continue;
 
 		/*
-		 * Init of a pid space gets no signals it doesn't want from
-		 * within that pid space. It can of course get signals from
-		 * its parent pid space.
+		 * Global init gets no signals it doesn't want.
 		 */
-		if (current == child_reaper(current))
+		if (is_global_init(current))
 			continue;
 
 		if (sig_kernel_stop(signr)) {
@@ -2193,7 +2217,7 @@ sys_kill(int pid, int sig)
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_USER;
-	info.si_pid = current->tgid;
+	info.si_pid = task_tgid_vnr(current);
 	info.si_uid = current->uid;
 
 	return kill_something_info(sig, &info, pid);
@@ -2209,12 +2233,12 @@ static int do_tkill(int tgid, int pid, int sig)
 	info.si_signo = sig;
 	info.si_errno = 0;
 	info.si_code = SI_TKILL;
-	info.si_pid = current->tgid;
+	info.si_pid = task_tgid_vnr(current);
 	info.si_uid = current->uid;
 
 	read_lock(&tasklist_lock);
-	p = find_task_by_pid(pid);
-	if (p && (tgid <= 0 || p->tgid == tgid)) {
+	p = find_task_by_vpid(pid);
+	if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
 		error = check_kill_permission(sig, &info, p);
 		/*
 		 * The null signal is a permissions and process existence
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index edeeef3a6a3..11df812263c 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -113,7 +113,7 @@ void softlockup_tick(void)
 	spin_lock(&print_lock);
 	printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
 			this_cpu, now - touch_timestamp,
-				current->comm, current->pid);
+			current->comm, task_pid_nr(current));
 	if (regs)
 		show_regs(regs);
 	else
diff --git a/kernel/sys.c b/kernel/sys.c
index bc8879c822a..304b5410d74 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -106,537 +106,6 @@ EXPORT_SYMBOL(cad_pid);
 
 void (*pm_power_off_prepare)(void);
 
-/*
- *	Notifier list for kernel code which wants to be called
- *	at shutdown. This is used to stop any idling DMA operations
- *	and the like. 
- */
-
-static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list);
-
-/*
- *	Notifier chain core routines.  The exported routines below
- *	are layered on top of these, with appropriate locking added.
- */
-
-static int notifier_chain_register(struct notifier_block **nl,
-		struct notifier_block *n)
-{
-	while ((*nl) != NULL) {
-		if (n->priority > (*nl)->priority)
-			break;
-		nl = &((*nl)->next);
-	}
-	n->next = *nl;
-	rcu_assign_pointer(*nl, n);
-	return 0;
-}
-
-static int notifier_chain_unregister(struct notifier_block **nl,
-		struct notifier_block *n)
-{
-	while ((*nl) != NULL) {
-		if ((*nl) == n) {
-			rcu_assign_pointer(*nl, n->next);
-			return 0;
-		}
-		nl = &((*nl)->next);
-	}
-	return -ENOENT;
-}
-
-/**
- * notifier_call_chain - Informs the registered notifiers about an event.
- *	@nl:		Pointer to head of the blocking notifier chain
- *	@val:		Value passed unmodified to notifier function
- *	@v:		Pointer passed unmodified to notifier function
- *	@nr_to_call:	Number of notifier functions to be called. Don't care
- *		     	value of this parameter is -1.
- *	@nr_calls:	Records the number of notifications sent. Don't care
- *		   	value of this field is NULL.
- * 	@returns:	notifier_call_chain returns the value returned by the
- *			last notifier function called.
- */
-
-static int __kprobes notifier_call_chain(struct notifier_block **nl,
-					unsigned long val, void *v,
-					int nr_to_call,	int *nr_calls)
-{
-	int ret = NOTIFY_DONE;
-	struct notifier_block *nb, *next_nb;
-
-	nb = rcu_dereference(*nl);
-
-	while (nb && nr_to_call) {
-		next_nb = rcu_dereference(nb->next);
-		ret = nb->notifier_call(nb, val, v);
-
-		if (nr_calls)
-			(*nr_calls)++;
-
-		if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)
-			break;
-		nb = next_nb;
-		nr_to_call--;
-	}
-	return ret;
-}
-
-/*
- *	Atomic notifier chain routines.  Registration and unregistration
- *	use a spinlock, and call_chain is synchronized by RCU (no locks).
- */
-
-/**
- *	atomic_notifier_chain_register - Add notifier to an atomic notifier chain
- *	@nh: Pointer to head of the atomic notifier chain
- *	@n: New entry in notifier chain
- *
- *	Adds a notifier to an atomic notifier chain.
- *
- *	Currently always returns zero.
- */
-
-int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
-		struct notifier_block *n)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&nh->lock, flags);
-	ret = notifier_chain_register(&nh->head, n);
-	spin_unlock_irqrestore(&nh->lock, flags);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
-
-/**
- *	atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain
- *	@nh: Pointer to head of the atomic notifier chain
- *	@n: Entry to remove from notifier chain
- *
- *	Removes a notifier from an atomic notifier chain.
- *
- *	Returns zero on success or %-ENOENT on failure.
- */
-int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
-		struct notifier_block *n)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&nh->lock, flags);
-	ret = notifier_chain_unregister(&nh->head, n);
-	spin_unlock_irqrestore(&nh->lock, flags);
-	synchronize_rcu();
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
-
-/**
- *	__atomic_notifier_call_chain - Call functions in an atomic notifier chain
- *	@nh: Pointer to head of the atomic notifier chain
- *	@val: Value passed unmodified to notifier function
- *	@v: Pointer passed unmodified to notifier function
- *	@nr_to_call: See the comment for notifier_call_chain.
- *	@nr_calls: See the comment for notifier_call_chain.
- *
- *	Calls each function in a notifier chain in turn.  The functions
- *	run in an atomic context, so they must not block.
- *	This routine uses RCU to synchronize with changes to the chain.
- *
- *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then atomic_notifier_call_chain()
- *	will return immediately, with the return value of
- *	the notifier function which halted execution.
- *	Otherwise the return value is the return value
- *	of the last notifier function called.
- */
- 
-int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-					unsigned long val, void *v,
-					int nr_to_call, int *nr_calls)
-{
-	int ret;
-
-	rcu_read_lock();
-	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
-	rcu_read_unlock();
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
-
-int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
-		unsigned long val, void *v)
-{
-	return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
-}
-
-EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
-/*
- *	Blocking notifier chain routines.  All access to the chain is
- *	synchronized by an rwsem.
- */
-
-/**
- *	blocking_notifier_chain_register - Add notifier to a blocking notifier chain
- *	@nh: Pointer to head of the blocking notifier chain
- *	@n: New entry in notifier chain
- *
- *	Adds a notifier to a blocking notifier chain.
- *	Must be called in process context.
- *
- *	Currently always returns zero.
- */
- 
-int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
-		struct notifier_block *n)
-{
-	int ret;
-
-	/*
-	 * This code gets used during boot-up, when task switching is
-	 * not yet working and interrupts must remain disabled.  At
-	 * such times we must not call down_write().
-	 */
-	if (unlikely(system_state == SYSTEM_BOOTING))
-		return notifier_chain_register(&nh->head, n);
-
-	down_write(&nh->rwsem);
-	ret = notifier_chain_register(&nh->head, n);
-	up_write(&nh->rwsem);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(blocking_notifier_chain_register);
-
-/**
- *	blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain
- *	@nh: Pointer to head of the blocking notifier chain
- *	@n: Entry to remove from notifier chain
- *
- *	Removes a notifier from a blocking notifier chain.
- *	Must be called from process context.
- *
- *	Returns zero on success or %-ENOENT on failure.
- */
-int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
-		struct notifier_block *n)
-{
-	int ret;
-
-	/*
-	 * This code gets used during boot-up, when task switching is
-	 * not yet working and interrupts must remain disabled.  At
-	 * such times we must not call down_write().
-	 */
-	if (unlikely(system_state == SYSTEM_BOOTING))
-		return notifier_chain_unregister(&nh->head, n);
-
-	down_write(&nh->rwsem);
-	ret = notifier_chain_unregister(&nh->head, n);
-	up_write(&nh->rwsem);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister);
-
-/**
- *	__blocking_notifier_call_chain - Call functions in a blocking notifier chain
- *	@nh: Pointer to head of the blocking notifier chain
- *	@val: Value passed unmodified to notifier function
- *	@v: Pointer passed unmodified to notifier function
- *	@nr_to_call: See comment for notifier_call_chain.
- *	@nr_calls: See comment for notifier_call_chain.
- *
- *	Calls each function in a notifier chain in turn.  The functions
- *	run in a process context, so they are allowed to block.
- *
- *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then blocking_notifier_call_chain()
- *	will return immediately, with the return value of
- *	the notifier function which halted execution.
- *	Otherwise the return value is the return value
- *	of the last notifier function called.
- */
- 
-int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
-				   unsigned long val, void *v,
-				   int nr_to_call, int *nr_calls)
-{
-	int ret = NOTIFY_DONE;
-
-	/*
-	 * We check the head outside the lock, but if this access is
-	 * racy then it does not matter what the result of the test
-	 * is, we re-check the list after having taken the lock anyway:
-	 */
-	if (rcu_dereference(nh->head)) {
-		down_read(&nh->rwsem);
-		ret = notifier_call_chain(&nh->head, val, v, nr_to_call,
-					nr_calls);
-		up_read(&nh->rwsem);
-	}
-	return ret;
-}
-EXPORT_SYMBOL_GPL(__blocking_notifier_call_chain);
-
-int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
-		unsigned long val, void *v)
-{
-	return __blocking_notifier_call_chain(nh, val, v, -1, NULL);
-}
-EXPORT_SYMBOL_GPL(blocking_notifier_call_chain);
-
-/*
- *	Raw notifier chain routines.  There is no protection;
- *	the caller must provide it.  Use at your own risk!
- */
-
-/**
- *	raw_notifier_chain_register - Add notifier to a raw notifier chain
- *	@nh: Pointer to head of the raw notifier chain
- *	@n: New entry in notifier chain
- *
- *	Adds a notifier to a raw notifier chain.
- *	All locking must be provided by the caller.
- *
- *	Currently always returns zero.
- */
-
-int raw_notifier_chain_register(struct raw_notifier_head *nh,
-		struct notifier_block *n)
-{
-	return notifier_chain_register(&nh->head, n);
-}
-
-EXPORT_SYMBOL_GPL(raw_notifier_chain_register);
-
-/**
- *	raw_notifier_chain_unregister - Remove notifier from a raw notifier chain
- *	@nh: Pointer to head of the raw notifier chain
- *	@n: Entry to remove from notifier chain
- *
- *	Removes a notifier from a raw notifier chain.
- *	All locking must be provided by the caller.
- *
- *	Returns zero on success or %-ENOENT on failure.
- */
-int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
-		struct notifier_block *n)
-{
-	return notifier_chain_unregister(&nh->head, n);
-}
-
-EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister);
-
-/**
- *	__raw_notifier_call_chain - Call functions in a raw notifier chain
- *	@nh: Pointer to head of the raw notifier chain
- *	@val: Value passed unmodified to notifier function
- *	@v: Pointer passed unmodified to notifier function
- *	@nr_to_call: See comment for notifier_call_chain.
- *	@nr_calls: See comment for notifier_call_chain
- *
- *	Calls each function in a notifier chain in turn.  The functions
- *	run in an undefined context.
- *	All locking must be provided by the caller.
- *
- *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then raw_notifier_call_chain()
- *	will return immediately, with the return value of
- *	the notifier function which halted execution.
- *	Otherwise the return value is the return value
- *	of the last notifier function called.
- */
-
-int __raw_notifier_call_chain(struct raw_notifier_head *nh,
-			      unsigned long val, void *v,
-			      int nr_to_call, int *nr_calls)
-{
-	return notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
-}
-
-EXPORT_SYMBOL_GPL(__raw_notifier_call_chain);
-
-int raw_notifier_call_chain(struct raw_notifier_head *nh,
-		unsigned long val, void *v)
-{
-	return __raw_notifier_call_chain(nh, val, v, -1, NULL);
-}
-
-EXPORT_SYMBOL_GPL(raw_notifier_call_chain);
-
-/*
- *	SRCU notifier chain routines.    Registration and unregistration
- *	use a mutex, and call_chain is synchronized by SRCU (no locks).
- */
-
-/**
- *	srcu_notifier_chain_register - Add notifier to an SRCU notifier chain
- *	@nh: Pointer to head of the SRCU notifier chain
- *	@n: New entry in notifier chain
- *
- *	Adds a notifier to an SRCU notifier chain.
- *	Must be called in process context.
- *
- *	Currently always returns zero.
- */
-
-int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
-		struct notifier_block *n)
-{
-	int ret;
-
-	/*
-	 * This code gets used during boot-up, when task switching is
-	 * not yet working and interrupts must remain disabled.  At
-	 * such times we must not call mutex_lock().
-	 */
-	if (unlikely(system_state == SYSTEM_BOOTING))
-		return notifier_chain_register(&nh->head, n);
-
-	mutex_lock(&nh->mutex);
-	ret = notifier_chain_register(&nh->head, n);
-	mutex_unlock(&nh->mutex);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(srcu_notifier_chain_register);
-
-/**
- *	srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain
- *	@nh: Pointer to head of the SRCU notifier chain
- *	@n: Entry to remove from notifier chain
- *
- *	Removes a notifier from an SRCU notifier chain.
- *	Must be called from process context.
- *
- *	Returns zero on success or %-ENOENT on failure.
- */
-int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
-		struct notifier_block *n)
-{
-	int ret;
-
-	/*
-	 * This code gets used during boot-up, when task switching is
-	 * not yet working and interrupts must remain disabled.  At
-	 * such times we must not call mutex_lock().
-	 */
-	if (unlikely(system_state == SYSTEM_BOOTING))
-		return notifier_chain_unregister(&nh->head, n);
-
-	mutex_lock(&nh->mutex);
-	ret = notifier_chain_unregister(&nh->head, n);
-	mutex_unlock(&nh->mutex);
-	synchronize_srcu(&nh->srcu);
-	return ret;
-}
-
-EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister);
-
-/**
- *	__srcu_notifier_call_chain - Call functions in an SRCU notifier chain
- *	@nh: Pointer to head of the SRCU notifier chain
- *	@val: Value passed unmodified to notifier function
- *	@v: Pointer passed unmodified to notifier function
- *	@nr_to_call: See comment for notifier_call_chain.
- *	@nr_calls: See comment for notifier_call_chain
- *
- *	Calls each function in a notifier chain in turn.  The functions
- *	run in a process context, so they are allowed to block.
- *
- *	If the return value of the notifier can be and'ed
- *	with %NOTIFY_STOP_MASK then srcu_notifier_call_chain()
- *	will return immediately, with the return value of
- *	the notifier function which halted execution.
- *	Otherwise the return value is the return value
- *	of the last notifier function called.
- */
-
-int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
-			       unsigned long val, void *v,
-			       int nr_to_call, int *nr_calls)
-{
-	int ret;
-	int idx;
-
-	idx = srcu_read_lock(&nh->srcu);
-	ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
-	srcu_read_unlock(&nh->srcu, idx);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(__srcu_notifier_call_chain);
-
-int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
-		unsigned long val, void *v)
-{
-	return __srcu_notifier_call_chain(nh, val, v, -1, NULL);
-}
-EXPORT_SYMBOL_GPL(srcu_notifier_call_chain);
-
-/**
- *	srcu_init_notifier_head - Initialize an SRCU notifier head
- *	@nh: Pointer to head of the srcu notifier chain
- *
- *	Unlike other sorts of notifier heads, SRCU notifier heads require
- *	dynamic initialization.  Be sure to call this routine before
- *	calling any of the other SRCU notifier routines for this head.
- *
- *	If an SRCU notifier head is deallocated, it must first be cleaned
- *	up by calling srcu_cleanup_notifier_head().  Otherwise the head's
- *	per-cpu data (used by the SRCU mechanism) will leak.
- */
-
-void srcu_init_notifier_head(struct srcu_notifier_head *nh)
-{
-	mutex_init(&nh->mutex);
-	if (init_srcu_struct(&nh->srcu) < 0)
-		BUG();
-	nh->head = NULL;
-}
-
-EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
-
-/**
- *	register_reboot_notifier - Register function to be called at reboot time
- *	@nb: Info about notifier function to be called
- *
- *	Registers a function with the list of functions
- *	to be called at reboot time.
- *
- *	Currently always returns zero, as blocking_notifier_chain_register()
- *	always returns zero.
- */
- 
-int register_reboot_notifier(struct notifier_block * nb)
-{
-	return blocking_notifier_chain_register(&reboot_notifier_list, nb);
-}
-
-EXPORT_SYMBOL(register_reboot_notifier);
-
-/**
- *	unregister_reboot_notifier - Unregister previously registered reboot notifier
- *	@nb: Hook to be unregistered
- *
- *	Unregisters a previously registered reboot
- *	notifier function.
- *
- *	Returns zero on success, or %-ENOENT on failure.
- */
- 
-int unregister_reboot_notifier(struct notifier_block * nb)
-{
-	return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
-}
-
-EXPORT_SYMBOL(unregister_reboot_notifier);
-
 static int set_one_prio(struct task_struct *p, int niceval, int error)
 {
 	int no_nice;
@@ -683,7 +152,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 	switch (which) {
 		case PRIO_PROCESS:
 			if (who)
-				p = find_task_by_pid(who);
+				p = find_task_by_vpid(who);
 			else
 				p = current;
 			if (p)
@@ -691,7 +160,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 			break;
 		case PRIO_PGRP:
 			if (who)
-				pgrp = find_pid(who);
+				pgrp = find_vpid(who);
 			else
 				pgrp = task_pgrp(current);
 			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
@@ -740,7 +209,7 @@ asmlinkage long sys_getpriority(int which, int who)
 	switch (which) {
 		case PRIO_PROCESS:
 			if (who)
-				p = find_task_by_pid(who);
+				p = find_task_by_vpid(who);
 			else
 				p = current;
 			if (p) {
@@ -751,7 +220,7 @@ asmlinkage long sys_getpriority(int which, int who)
 			break;
 		case PRIO_PGRP:
 			if (who)
-				pgrp = find_pid(who);
+				pgrp = find_vpid(who);
 			else
 				pgrp = task_pgrp(current);
 			do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
@@ -1448,9 +917,10 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 	struct task_struct *p;
 	struct task_struct *group_leader = current->group_leader;
 	int err = -EINVAL;
+	struct pid_namespace *ns;
 
 	if (!pid)
-		pid = group_leader->pid;
+		pid = task_pid_vnr(group_leader);
 	if (!pgid)
 		pgid = pid;
 	if (pgid < 0)
@@ -1459,10 +929,12 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 	/* From this point forward we keep holding onto the tasklist lock
 	 * so that our parent does not change from under us. -DaveM
 	 */
+	ns = current->nsproxy->pid_ns;
+
 	write_lock_irq(&tasklist_lock);
 
 	err = -ESRCH;
-	p = find_task_by_pid(pid);
+	p = find_task_by_pid_ns(pid, ns);
 	if (!p)
 		goto out;
 
@@ -1488,9 +960,9 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 		goto out;
 
 	if (pgid != pid) {
-		struct task_struct *g =
-			find_task_by_pid_type(PIDTYPE_PGID, pgid);
+		struct task_struct *g;
 
+		g = find_task_by_pid_type_ns(PIDTYPE_PGID, pgid, ns);
 		if (!g || task_session(g) != task_session(group_leader))
 			goto out;
 	}
@@ -1499,10 +971,13 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 	if (err)
 		goto out;
 
-	if (process_group(p) != pgid) {
+	if (task_pgrp_nr_ns(p, ns) != pgid) {
+		struct pid *pid;
+
 		detach_pid(p, PIDTYPE_PGID);
-		p->signal->pgrp = pgid;
-		attach_pid(p, PIDTYPE_PGID, find_pid(pgid));
+		pid = find_vpid(pgid);
+		attach_pid(p, PIDTYPE_PGID, pid);
+		set_task_pgrp(p, pid_nr(pid));
 	}
 
 	err = 0;
@@ -1515,19 +990,21 @@ out:
 asmlinkage long sys_getpgid(pid_t pid)
 {
 	if (!pid)
-		return process_group(current);
+		return task_pgrp_vnr(current);
 	else {
 		int retval;
 		struct task_struct *p;
+		struct pid_namespace *ns;
 
-		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		ns = current->nsproxy->pid_ns;
 
+		read_lock(&tasklist_lock);
+		p = find_task_by_pid_ns(pid, ns);
 		retval = -ESRCH;
 		if (p) {
 			retval = security_task_getpgid(p);
 			if (!retval)
-				retval = process_group(p);
+				retval = task_pgrp_nr_ns(p, ns);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1539,7 +1016,7 @@ asmlinkage long sys_getpgid(pid_t pid)
 asmlinkage long sys_getpgrp(void)
 {
 	/* SMP - assuming writes are word atomic this is fine */
-	return process_group(current);
+	return task_pgrp_vnr(current);
 }
 
 #endif
@@ -1547,19 +1024,21 @@ asmlinkage long sys_getpgrp(void)
 asmlinkage long sys_getsid(pid_t pid)
 {
 	if (!pid)
-		return process_session(current);
+		return task_session_vnr(current);
 	else {
 		int retval;
 		struct task_struct *p;
+		struct pid_namespace *ns;
 
-		read_lock(&tasklist_lock);
-		p = find_task_by_pid(pid);
+		ns = current->nsproxy->pid_ns;
 
+		read_lock(&tasklist_lock);
+		p = find_task_by_pid_ns(pid, ns);
 		retval = -ESRCH;
 		if (p) {
 			retval = security_task_getsid(p);
 			if (!retval)
-				retval = process_session(p);
+				retval = task_session_nr_ns(p, ns);
 		}
 		read_unlock(&tasklist_lock);
 		return retval;
@@ -1586,7 +1065,8 @@ asmlinkage long sys_setsid(void)
 	 * session id and so the check will always fail and make it so
 	 * init cannot successfully call setsid.
 	 */
-	if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session))
+	if (session > 1 && find_task_by_pid_type_ns(PIDTYPE_PGID,
+				session, &init_pid_ns))
 		goto out;
 
 	group_leader->signal->leader = 1;
@@ -1596,7 +1076,7 @@ asmlinkage long sys_setsid(void)
 	group_leader->signal->tty = NULL;
 	spin_unlock(&group_leader->sighand->siglock);
 
-	err = process_group(group_leader);
+	err = task_pgrp_vnr(group_leader);
 out:
 	write_unlock_irq(&tasklist_lock);
 	return err;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 067554bda8b..3b4efbe2644 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1888,7 +1888,7 @@ int proc_dointvec_bset(struct ctl_table *table, int write, struct file *filp,
 		return -EPERM;
 	}
 
-	op = is_init(current) ? OP_SET : OP_AND;
+	op = is_global_init(current) ? OP_SET : OP_AND;
 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
 				do_proc_dointvec_bset_conv,&op);
 }
@@ -2278,7 +2278,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
 	pid_t tmp;
 	int r;
 
-	tmp = pid_nr(cad_pid);
+	tmp = pid_nr_ns(cad_pid, current->nsproxy->pid_ns);
 
 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
 			       lenp, ppos, NULL, NULL);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 7d4d7f9c1bb..9f360f68aad 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -22,6 +22,10 @@
 #include <linux/delayacct.h>
 #include <linux/cpumask.h>
 #include <linux/percpu.h>
+#include <linux/cgroupstats.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/file.h>
 #include <net/genetlink.h>
 #include <asm/atomic.h>
 
@@ -49,6 +53,11 @@ __read_mostly = {
 	[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
 	[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
 
+static struct nla_policy
+cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = {
+	[CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
+};
+
 struct listener {
 	struct list_head list;
 	pid_t pid;
@@ -372,6 +381,51 @@ err:
 	return NULL;
 }
 
+static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	int rc = 0;
+	struct sk_buff *rep_skb;
+	struct cgroupstats *stats;
+	struct nlattr *na;
+	size_t size;
+	u32 fd;
+	struct file *file;
+	int fput_needed;
+
+	na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
+	if (!na)
+		return -EINVAL;
+
+	fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]);
+	file = fget_light(fd, &fput_needed);
+	if (file) {
+		size = nla_total_size(sizeof(struct cgroupstats));
+
+		rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb,
+					size);
+		if (rc < 0)
+			goto err;
+
+		na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS,
+					sizeof(struct cgroupstats));
+		stats = nla_data(na);
+		memset(stats, 0, sizeof(*stats));
+
+		rc = cgroupstats_build(stats, file->f_dentry);
+		if (rc < 0)
+			goto err;
+
+		fput_light(file, fput_needed);
+		return send_reply(rep_skb, info->snd_pid);
+	}
+
+err:
+	if (file)
+		fput_light(file, fput_needed);
+	nlmsg_free(rep_skb);
+	return rc;
+}
+
 static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	int rc = 0;
@@ -522,6 +576,12 @@ static struct genl_ops taskstats_ops = {
 	.policy		= taskstats_cmd_get_policy,
 };
 
+static struct genl_ops cgroupstats_ops = {
+	.cmd		= CGROUPSTATS_CMD_GET,
+	.doit		= cgroupstats_user_cmd,
+	.policy		= cgroupstats_cmd_get_policy,
+};
+
 /* Needed early in initialization */
 void __init taskstats_init_early(void)
 {
@@ -546,8 +606,15 @@ static int __init taskstats_init(void)
 	if (rc < 0)
 		goto err;
 
+	rc = genl_register_ops(&family, &cgroupstats_ops);
+	if (rc < 0)
+		goto err_cgroup_ops;
+
 	family_registered = 1;
+	printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
 	return 0;
+err_cgroup_ops:
+	genl_unregister_ops(&family, &taskstats_ops);
 err:
 	genl_unregister_family(&family);
 	return rc;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 51b6a6a6158..c8a9d13874d 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -207,15 +207,12 @@ static inline void clocksource_resume_watchdog(void) { }
  */
 void clocksource_resume(void)
 {
-	struct list_head *tmp;
+	struct clocksource *cs;
 	unsigned long flags;
 
 	spin_lock_irqsave(&clocksource_lock, flags);
 
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *cs;
-
-		cs = list_entry(tmp, struct clocksource, list);
+	list_for_each_entry(cs, &clocksource_list, list) {
 		if (cs->resume)
 			cs->resume();
 	}
@@ -369,7 +366,6 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 					  const char *buf, size_t count)
 {
 	struct clocksource *ovr = NULL;
-	struct list_head *tmp;
 	size_t ret = count;
 	int len;
 
@@ -389,12 +385,11 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 
 	len = strlen(override_name);
 	if (len) {
+		struct clocksource *cs;
+
 		ovr = clocksource_override;
 		/* try to select it: */
-		list_for_each(tmp, &clocksource_list) {
-			struct clocksource *cs;
-
-			cs = list_entry(tmp, struct clocksource, list);
+		list_for_each_entry(cs, &clocksource_list, list) {
 			if (strlen(cs->name) == len &&
 			    !strcmp(cs->name, override_name))
 				ovr = cs;
@@ -422,14 +417,11 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 static ssize_t
 sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
 {
-	struct list_head *tmp;
+	struct clocksource *src;
 	char *curr = buf;
 
 	spin_lock_irq(&clocksource_lock);
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *src;
-
-		src = list_entry(tmp, struct clocksource, list);
+	list_for_each_entry(src, &clocksource_list, list) {
 		curr += sprintf(curr, "%s ", src->name);
 	}
 	spin_unlock_irq(&clocksource_lock);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index ce89ffb474d..10a1347597f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -153,6 +153,7 @@ void tick_nohz_stop_sched_tick(void)
 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
 	struct tick_sched *ts;
 	ktime_t last_update, expires, now, delta;
+	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
 	int cpu;
 
 	local_irq_save(flags);
@@ -302,11 +303,26 @@ void tick_nohz_stop_sched_tick(void)
 out:
 	ts->next_jiffies = next_jiffies;
 	ts->last_jiffies = last_jiffies;
+	ts->sleep_length = ktime_sub(dev->next_event, now);
 end:
 	local_irq_restore(flags);
 }
 
 /**
+ * tick_nohz_get_sleep_length - return the length of the current sleep
+ *
+ * Called from power state control code with interrupts disabled
+ */
+ktime_t tick_nohz_get_sleep_length(void)
+{
+	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+
+	return ts->sleep_length;
+}
+
+EXPORT_SYMBOL_GPL(tick_nohz_get_sleep_length);
+
+/**
  * nohz_restart_sched_tick - restart the idle tick from the idle task
  *
  * Restart the idle tick when the CPU is woken up from idle
diff --git a/kernel/timer.c b/kernel/timer.c
index 8521d10fbb2..fb4e67d5dd6 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
+#include <linux/pid_namespace.h>
 #include <linux/notifier.h>
 #include <linux/thread_info.h>
 #include <linux/time.h>
@@ -956,7 +957,7 @@ asmlinkage unsigned long sys_alarm(unsigned int seconds)
  */
 asmlinkage long sys_getpid(void)
 {
-	return current->tgid;
+	return task_tgid_vnr(current);
 }
 
 /*
@@ -970,7 +971,7 @@ asmlinkage long sys_getppid(void)
 	int pid;
 
 	rcu_read_lock();
-	pid = rcu_dereference(current->real_parent)->tgid;
+	pid = task_ppid_nr_ns(current, current->nsproxy->pid_ns);
 	rcu_read_unlock();
 
 	return pid;
@@ -1102,7 +1103,7 @@ EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 /* Thread ID - the internal kernel "pid" */
 asmlinkage long sys_gettid(void)
 {
-	return current->pid;
+	return task_pid_vnr(current);
 }
 
 /**
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e080d1d744c..52d5e7c9a8e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -32,6 +32,7 @@
 #include <linux/freezer.h>
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
+#include <linux/lockdep.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -61,6 +62,9 @@ struct workqueue_struct {
 	const char *name;
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 /* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
@@ -250,6 +254,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		struct work_struct *work = list_entry(cwq->worklist.next,
 						struct work_struct, entry);
 		work_func_t f = work->func;
+#ifdef CONFIG_LOCKDEP
+		/*
+		 * It is permissible to free the struct work_struct
+		 * from inside the function that is called from it,
+		 * this we need to take into account for lockdep too.
+		 * To avoid bogus "held lock freed" warnings as well
+		 * as problems when looking into work->lockdep_map,
+		 * make a copy and use that here.
+		 */
+		struct lockdep_map lockdep_map = work->lockdep_map;
+#endif
 
 		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
@@ -257,13 +272,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 
 		BUG_ON(get_wq_data(work) != cwq);
 		work_clear_pending(work);
+		lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+		lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_);
 		f(work);
+		lock_release(&lockdep_map, 1, _THIS_IP_);
+		lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
 
 		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
 					"%s/0x%08x/%d\n",
 					current->comm, preempt_count(),
-				       	current->pid);
+				       	task_pid_nr(current));
 			printk(KERN_ERR "    last function: ");
 			print_symbol("%s\n", (unsigned long)f);
 			debug_show_held_locks(current);
@@ -376,6 +395,8 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
 	int cpu;
 
 	might_sleep();
+	lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&wq->lockdep_map, 1, _THIS_IP_);
 	for_each_cpu_mask(cpu, *cpu_map)
 		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
 }
@@ -446,6 +467,9 @@ static void wait_on_work(struct work_struct *work)
 
 	might_sleep();
 
+	lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&work->lockdep_map, 1, _THIS_IP_);
+
 	cwq = get_wq_data(work);
 	if (!cwq)
 		return;
@@ -695,8 +719,10 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 	}
 }
 
-struct workqueue_struct *__create_workqueue(const char *name,
-					    int singlethread, int freezeable)
+struct workqueue_struct *__create_workqueue_key(const char *name,
+						int singlethread,
+						int freezeable,
+						struct lock_class_key *key)
 {
 	struct workqueue_struct *wq;
 	struct cpu_workqueue_struct *cwq;
@@ -713,6 +739,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	}
 
 	wq->name = name;
+	lockdep_init_map(&wq->lockdep_map, name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
 	INIT_LIST_HEAD(&wq->list);
@@ -741,7 +768,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	}
 	return wq;
 }
-EXPORT_SYMBOL_GPL(__create_workqueue);
+EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
@@ -752,6 +779,9 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 	if (cwq->thread == NULL)
 		return;
 
+	lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
 	flush_cpu_workqueue(cwq);
 	/*
 	 * If the caller is CPU_DEAD and cwq->worklist was not empty,
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7d16e643330..c567f219191 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -498,3 +498,5 @@ config FAULT_INJECTION_STACKTRACE_FILTER
 	select FRAME_POINTER
 	help
 	  Provide stacktrace filter for fault-injection capabilities
+
+source "samples/Kconfig"
diff --git a/lib/Makefile b/lib/Makefile
index c5f215d509d..3a0983b7741 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -6,7 +6,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o \
 	 idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
-	 proportions.o
+	 proportions.o prio_heap.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/hweight.c b/lib/hweight.c
index 360556a7803..389424ecb12 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -1,6 +1,6 @@
 #include <linux/module.h>
+#include <linux/bitops.h>
 #include <asm/types.h>
-#include <asm/bitops.h>
 
 /**
  * hweightN - returns the hamming weight of a N-bit word
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 9659eabffc3..393a0e915c2 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -124,12 +124,13 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 	mutex_lock(&percpu_counters_lock);
 	list_for_each_entry(fbc, &percpu_counters, list) {
 		s32 *pcount;
+		unsigned long flags;
 
-		spin_lock(&fbc->lock);
+		spin_lock_irqsave(&fbc->lock, flags);
 		pcount = per_cpu_ptr(fbc->counters, cpu);
 		fbc->count += *pcount;
 		*pcount = 0;
-		spin_unlock(&fbc->lock);
+		spin_unlock_irqrestore(&fbc->lock, flags);
 	}
 	mutex_unlock(&percpu_counters_lock);
 	return NOTIFY_OK;
diff --git a/lib/prio_heap.c b/lib/prio_heap.c
new file mode 100644
index 00000000000..471944a54e2
--- /dev/null
+++ b/lib/prio_heap.c
@@ -0,0 +1,70 @@
+/*
+ * Simple insertion-only static-sized priority heap containing
+ * pointers, based on CLR, chapter 7
+ */
+
+#include <linux/slab.h>
+#include <linux/prio_heap.h>
+
+int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
+	      int (*gt)(void *, void *))
+{
+	heap->ptrs = kmalloc(size, gfp_mask);
+	if (!heap->ptrs)
+		return -ENOMEM;
+	heap->size = 0;
+	heap->max = size / sizeof(void *);
+	heap->gt = gt;
+	return 0;
+}
+
+void heap_free(struct ptr_heap *heap)
+{
+	kfree(heap->ptrs);
+}
+
+void *heap_insert(struct ptr_heap *heap, void *p)
+{
+	void *res;
+	void **ptrs = heap->ptrs;
+	int pos;
+
+	if (heap->size < heap->max) {
+		/* Heap insertion */
+		int pos = heap->size++;
+		while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) {
+			ptrs[pos] = ptrs[(pos-1)/2];
+			pos = (pos-1)/2;
+		}
+		ptrs[pos] = p;
+		return NULL;
+	}
+
+	/* The heap is full, so something will have to be dropped */
+
+	/* If the new pointer is greater than the current max, drop it */
+	if (heap->gt(p, ptrs[0]))
+		return p;
+
+	/* Replace the current max and heapify */
+	res = ptrs[0];
+	ptrs[0] = p;
+	pos = 0;
+
+	while (1) {
+		int left = 2 * pos + 1;
+		int right = 2 * pos + 2;
+		int largest = pos;
+		if (left < heap->size && heap->gt(ptrs[left], p))
+			largest = left;
+		if (right < heap->size && heap->gt(ptrs[right], ptrs[largest]))
+			largest = right;
+		if (largest == pos)
+			break;
+		/* Push p down the heap one level and bump one up */
+		ptrs[pos] = ptrs[largest];
+		ptrs[largest] = p;
+		pos = largest;
+	}
+	return res;
+}
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 479fd462eaa..9c4b0256490 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -60,12 +60,12 @@ static void spin_bug(spinlock_t *lock, const char *msg)
 		owner = lock->owner;
 	printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
 		msg, raw_smp_processor_id(),
-		current->comm, current->pid);
+		current->comm, task_pid_nr(current));
 	printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, "
 			".owner_cpu: %d\n",
 		lock, lock->magic,
 		owner ? owner->comm : "<none>",
-		owner ? owner->pid : -1,
+		owner ? task_pid_nr(owner) : -1,
 		lock->owner_cpu);
 	dump_stack();
 }
@@ -116,7 +116,7 @@ static void __spin_lock_debug(spinlock_t *lock)
 			printk(KERN_EMERG "BUG: spinlock lockup on CPU#%d, "
 					"%s/%d, %p\n",
 				raw_smp_processor_id(), current->comm,
-				current->pid, lock);
+				task_pid_nr(current), lock);
 			dump_stack();
 #ifdef CONFIG_SMP
 			trigger_all_cpu_backtrace();
@@ -161,7 +161,7 @@ static void rwlock_bug(rwlock_t *lock, const char *msg)
 
 	printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
 		msg, raw_smp_processor_id(), current->comm,
-		current->pid, lock);
+		task_pid_nr(current), lock);
 	dump_stack();
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 920366399ee..5209e47b7fe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -841,7 +841,7 @@ static void shrink_readahead_size_eio(struct file *filp,
 /**
  * do_generic_mapping_read - generic file read routine
  * @mapping:	address_space to be read
- * @_ra:	file's readahead state
+ * @ra:		file's readahead state
  * @filp:	the file to read
  * @ppos:	current file position
  * @desc:	read_descriptor
diff --git a/mm/memory.c b/mm/memory.c
index bd16dcaeefb..142683df875 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -259,9 +259,6 @@ void free_pgd_range(struct mmu_gather **tlb,
 			continue;
 		free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
 	} while (pgd++, addr = next, addr != end);
-
-	if (!(*tlb)->fullmm)
-		flush_tlb_pgtables((*tlb)->mm, start, end);
 }
 
 void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 568152ae6ca..c1592a94582 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -78,6 +78,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/module.h>
+#include <linux/nsproxy.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/compat.h>
@@ -940,7 +941,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 
 	/* Find the mm_struct */
 	read_lock(&tasklist_lock);
-	task = pid ? find_task_by_pid(pid) : current;
+	task = pid ? find_task_by_vpid(pid) : current;
 	if (!task) {
 		read_unlock(&tasklist_lock);
 		return -ESRCH;
@@ -1388,7 +1389,6 @@ EXPORT_SYMBOL(alloc_pages_current);
  * keeps mempolicies cpuset relative after its cpuset moves.  See
  * further kernel/cpuset.c update_nodemask().
  */
-void *cpuset_being_rebound;
 
 /* Slow path of a mempolicy copy */
 struct mempolicy *__mpol_copy(struct mempolicy *old)
@@ -2019,4 +2019,3 @@ out:
 		m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
 	return 0;
 }
-
diff --git a/mm/migrate.c b/mm/migrate.c
index 06d0877a66e..4d6ee03db94 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -19,6 +19,7 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/mm_inline.h>
+#include <linux/nsproxy.h>
 #include <linux/pagevec.h>
 #include <linux/rmap.h>
 #include <linux/topology.h>
@@ -924,7 +925,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 
 	/* Find the mm_struct */
 	read_lock(&tasklist_lock);
-	task = pid ? find_task_by_pid(pid) : current;
+	task = pid ? find_task_by_vpid(pid) : current;
 	if (!task) {
 		read_unlock(&tasklist_lock);
 		return -ESRCH;
diff --git a/mm/mmap.c b/mm/mmap.c
index 4275e81e25b..7a30c498823 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1048,8 +1048,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
 
 	/* The open routine did something to the protections already? */
 	if (pgprot_val(vma->vm_page_prot) !=
-	    pgprot_val(protection_map[vm_flags &
-		    (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+	    pgprot_val(vm_get_page_prot(vm_flags)))
 		return 0;
 
 	/* Specialty mapping? */
@@ -1130,8 +1129,7 @@ munmap_back:
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_flags = vm_flags;
-	vma->vm_page_prot = protection_map[vm_flags &
-				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+	vma->vm_page_prot = vm_get_page_prot(vm_flags);
 	vma->vm_pgoff = pgoff;
 
 	if (file) {
@@ -2002,8 +2000,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
 	vma->vm_end = addr + len;
 	vma->vm_pgoff = pgoff;
 	vma->vm_flags = flags;
-	vma->vm_page_prot = protection_map[flags &
-				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+	vma->vm_page_prot = vm_get_page_prot(flags);
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 out:
 	mm->total_vm += len >> PAGE_SHIFT;
@@ -2209,7 +2206,7 @@ int install_special_mapping(struct mm_struct *mm,
 	vma->vm_end = addr + len;
 
 	vma->vm_flags = vm_flags | mm->def_flags;
-	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
 	vma->vm_ops = &special_mapping_vmops;
 	vma->vm_private_data = pages;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 1d4d69790e5..55227845abb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -192,11 +192,9 @@ success:
 	 * held in write mode.
 	 */
 	vma->vm_flags = newflags;
-	vma->vm_page_prot = protection_map[newflags &
-		(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)];
+	vma->vm_page_prot = vm_get_page_prot(newflags);
 	if (vma_wants_writenotify(vma)) {
-		vma->vm_page_prot = protection_map[newflags &
-			(VM_READ|VM_WRITE|VM_EXEC)];
+		vma->vm_page_prot = vm_get_page_prot(newflags);
 		dirty_accountable = 1;
 	}
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a64decb5b13..824cade0782 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -212,7 +212,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
 		if (!p->mm)
 			continue;
 		/* skip the init task */
-		if (is_init(p))
+		if (is_global_init(p))
 			continue;
 
 		/*
@@ -265,7 +265,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
  */
 static void __oom_kill_task(struct task_struct *p, int verbose)
 {
-	if (is_init(p)) {
+	if (is_global_init(p)) {
 		WARN_ON(1);
 		printk(KERN_WARNING "tried to kill init!\n");
 		return;
@@ -278,7 +278,8 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
 	}
 
 	if (verbose)
-		printk(KERN_ERR "Killed process %d (%s)\n", p->pid, p->comm);
+		printk(KERN_ERR "Killed process %d (%s)\n",
+				task_pid_nr(p), p->comm);
 
 	/*
 	 * We give our sacrificial lamb high priority and access to
@@ -326,7 +327,7 @@ static int oom_kill_task(struct task_struct *p)
 	 * to memory reserves though, otherwise we might deplete all memory.
 	 */
 	do_each_thread(g, q) {
-		if (q->mm == mm && q->tgid != p->tgid)
+		if (q->mm == mm && !same_thread_group(q, p))
 			force_sig(SIGKILL, q);
 	} while_each_thread(g, q);
 
@@ -337,7 +338,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			    unsigned long points, const char *message)
 {
 	struct task_struct *c;
-	struct list_head *tsk;
 
 	if (printk_ratelimit()) {
 		printk(KERN_WARNING "%s invoked oom-killer: "
@@ -357,11 +357,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	}
 
 	printk(KERN_ERR "%s: kill process %d (%s) score %li or a child\n",
-					message, p->pid, p->comm, points);
+					message, task_pid_nr(p), p->comm, points);
 
 	/* Try to kill a child first */
-	list_for_each(tsk, &p->children) {
-		c = list_entry(tsk, struct task_struct, sibling);
+	list_for_each_entry(c, &p->children, sibling) {
 		if (c->mm == p->mm)
 			continue;
 		if (!oom_kill_task(c))
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index ff5784b440d..66c736953cf 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -656,11 +656,13 @@ static inline int hidp_setup_input(struct hidp_session *session, struct hidp_con
 	}
 
 	if (req->subclass & 0x80) {
-		input->evbit[0] = BIT(EV_KEY) | BIT(EV_REL);
-		input->keybit[LONG(BTN_MOUSE)] = BIT(BTN_LEFT) | BIT(BTN_RIGHT) | BIT(BTN_MIDDLE);
-		input->relbit[0] = BIT(REL_X) | BIT(REL_Y);
-		input->keybit[LONG(BTN_MOUSE)] |= BIT(BTN_SIDE) | BIT(BTN_EXTRA);
-		input->relbit[0] |= BIT(REL_WHEEL);
+		input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REL);
+		input->keybit[BIT_WORD(BTN_MOUSE)] = BIT_MASK(BTN_LEFT) |
+			BIT_MASK(BTN_RIGHT) | BIT_MASK(BTN_MIDDLE);
+		input->relbit[0] = BIT_MASK(REL_X) | BIT_MASK(REL_Y);
+		input->keybit[BIT_WORD(BTN_MOUSE)] |= BIT_MASK(BTN_SIDE) |
+			BIT_MASK(BTN_EXTRA);
+		input->relbit[0] |= BIT_MASK(REL_WHEEL);
 	}
 
 	input->dev.parent = hidp_get_device(session);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1f0068eae50..e0a06942c02 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -447,7 +447,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	rcu_assign_pointer(sk->sk_filter, fp);
 	rcu_read_unlock_bh();
 
-	sk_filter_delayed_uncharge(sk, old_fp);
+	if (old_fp)
+		sk_filter_delayed_uncharge(sk, old_fp);
 	return 0;
 }
 
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 590a767b029..daadbcc4e8d 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -15,7 +15,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8cae60c5338..7ac703171ff 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -161,7 +161,7 @@
 #endif
 #include <asm/byteorder.h>
 #include <linux/rcupdate.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/uaccess.h>
@@ -3514,7 +3514,7 @@ static int pktgen_thread_worker(void *arg)
 
 	init_waitqueue_head(&t->queue);
 
-	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid);
+	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1072d16696c..4a2640d3826 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -744,10 +744,10 @@ static struct net *get_net_ns_by_pid(pid_t pid)
 	rcu_read_lock();
 	tsk = find_task_by_pid(pid);
 	if (tsk) {
-		task_lock(tsk);
-		if (tsk->nsproxy)
-			net = get_net(tsk->nsproxy->net_ns);
-		task_unlock(tsk);
+		struct nsproxy *nsproxy;
+		nsproxy = task_nsproxy(tsk);
+		if (nsproxy)
+			net = get_net(nsproxy->net_ns);
 	}
 	rcu_read_unlock();
 	return net;
diff --git a/net/core/scm.c b/net/core/scm.c
index 530bee8d9ed..100ba6d9d47 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -24,6 +24,8 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/security.h>
+#include <linux/pid.h>
+#include <linux/nsproxy.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -42,7 +44,7 @@
 
 static __inline__ int scm_check_creds(struct ucred *creds)
 {
-	if ((creds->pid == current->tgid || capable(CAP_SYS_ADMIN)) &&
+	if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
 	    ((creds->uid == current->uid || creds->uid == current->euid ||
 	      creds->uid == current->suid) || capable(CAP_SETUID)) &&
 	    ((creds->gid == current->gid || creds->gid == current->egid ||
diff --git a/net/core/sock.c b/net/core/sock.c
index d292b4113d6..febbcbcf802 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -232,7 +232,7 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 			warned++;
 			printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
 			       "tries to set negative timeout\n",
-				current->comm, current->pid);
+				current->comm, task_pid_nr(current));
 		return 0;
 	}
 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 81a8285d6d6..8d8c2915e06 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -54,7 +54,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 3cef12835c4..8fb6ca23700 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -93,7 +93,7 @@ int inet_csk_get_port(struct inet_hashinfo *hashinfo,
 		int remaining, rover, low, high;
 
 		inet_get_local_port_range(&low, &high);
-		remaining = high - low;
+		remaining = (high - low) + 1;
 		rover = net_random() % remaining + low;
 
 		do {
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fac6398e436..16eecc7046a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -286,7 +286,7 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
 		struct inet_timewait_sock *tw = NULL;
 
 		inet_get_local_port_range(&low, &high);
-		remaining = high - low;
+		remaining = (high - low) + 1;
 
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 1960747f354..c99f2a33fb9 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -794,7 +794,7 @@ static int sync_thread(void *startup)
 
 	add_wait_queue(&sync_wait, &wait);
 
-	set_sync_pid(state, current->pid);
+	set_sync_pid(state, task_pid_nr(current));
 	complete(tinfo->startup);
 
 	/*
@@ -877,7 +877,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 	if (!tinfo)
 		return -ENOMEM;
 
-	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
 		  sizeof(struct ip_vs_sync_conn));
 
@@ -917,7 +917,7 @@ int stop_sync_thread(int state)
 	    (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
 		return -ESRCH;
 
-	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
+	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, task_pid_nr(current));
 	IP_VS_INFO("stopping sync thread %d ...\n",
 		   (state == IP_VS_STATE_MASTER) ?
 		   sync_master_pid : sync_backup_pid);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c78acc1a7f1..ffddd2b4535 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -122,7 +122,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
 	ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
 
 	if (write && ret == 0) {
-		if (range[1] <= range[0])
+		if (range[1] < range[0])
 			ret = -EINVAL;
 		else
 			set_local_port_range(range);
@@ -150,7 +150,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name,
 
 	ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen);
 	if (ret == 0 && newval && newlen) {
-		if (range[1] <= range[0])
+		if (range[1] < range[0])
 			ret = -EINVAL;
 		else
 			set_local_port_range(range);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4f322003835..2e6ad6dbba6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1334,7 +1334,7 @@ do_prequeue:
 		if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
 			if (net_ratelimit())
 				printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
-				       current->comm, current->pid);
+				       current->comm, task_pid_nr(current));
 			peek_seq = tp->copied_seq;
 		}
 		continue;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cb9fc58efb2..35d2b0e9e10 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -147,13 +147,14 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 	write_lock_bh(&udp_hash_lock);
 
 	if (!snum) {
-		int i, low, high;
+		int i, low, high, remaining;
 		unsigned rover, best, best_size_so_far;
 
 		inet_get_local_port_range(&low, &high);
+		remaining = (high - low) + 1;
 
 		best_size_so_far = UINT_MAX;
-		best = rover = net_random() % (high - low) + low;
+		best = rover = net_random() % remaining + low;
 
 		/* 1st pass: look for empty (or shortest) hash chain */
 		for (i = 0; i < UDP_HTABLE_SIZE; i++) {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 1c2c2765543..d6f1026f194 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -261,7 +261,7 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
 		struct inet_timewait_sock *tw = NULL;
 
 		inet_get_local_port_range(&low, &high);
-		remaining = high - low;
+		remaining = (high - low) + 1;
 
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 49eacba824d..46cf962f7f8 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -762,7 +762,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
 			if (net_ratelimit())
 				printk(KERN_DEBUG "LLC(%s:%d): Application "
 						  "bug, race in MSG_PEEK.\n",
-				       current->comm, current->pid);
+				       current->comm, task_pid_nr(current));
 			peek_seq = llc->copied_seq;
 		}
 		continue;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d34a9deca67..4b4ed2a5803 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -37,8 +37,6 @@
 
 struct ieee80211_local;
 
-#define BIT(x) (1 << (x))
-
 #define IEEE80211_ALIGN32_PAD(a) ((4 - ((a) & 3)) & 3)
 
 /* Maximum number of broadcast/multicast frames to buffer when some of the
diff --git a/net/mac80211/ieee80211_sta.c b/net/mac80211/ieee80211_sta.c
index db81aef6177..f7ffeec3913 100644
--- a/net/mac80211/ieee80211_sta.c
+++ b/net/mac80211/ieee80211_sta.c
@@ -108,14 +108,11 @@ struct ieee802_11_elems {
 	u8 wmm_param_len;
 };
 
-enum ParseRes { ParseOK = 0, ParseUnknown = 1, ParseFailed = -1 };
-
-static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
-					    struct ieee802_11_elems *elems)
+static void ieee802_11_parse_elems(u8 *start, size_t len,
+				   struct ieee802_11_elems *elems)
 {
 	size_t left = len;
 	u8 *pos = start;
-	int unknown = 0;
 
 	memset(elems, 0, sizeof(*elems));
 
@@ -126,15 +123,8 @@ static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
 		elen = *pos++;
 		left -= 2;
 
-		if (elen > left) {
-#if 0
-			if (net_ratelimit())
-				printk(KERN_DEBUG "IEEE 802.11 element parse "
-				       "failed (id=%d elen=%d left=%d)\n",
-				       id, elen, left);
-#endif
-			return ParseFailed;
-		}
+		if (elen > left)
+			return;
 
 		switch (id) {
 		case WLAN_EID_SSID:
@@ -201,28 +191,15 @@ static enum ParseRes ieee802_11_parse_elems(u8 *start, size_t len,
 			elems->ext_supp_rates_len = elen;
 			break;
 		default:
-#if 0
-			printk(KERN_DEBUG "IEEE 802.11 element parse ignored "
-				      "unknown element (id=%d elen=%d)\n",
-				      id, elen);
-#endif
-			unknown++;
 			break;
 		}
 
 		left -= elen;
 		pos += elen;
 	}
-
-	/* Do not trigger error if left == 1 as Apple Airport base stations
-	 * send AssocResps that are one spurious byte too long. */
-
-	return unknown ? ParseUnknown : ParseOK;
 }
 
 
-
-
 static int ecw2cw(int ecw)
 {
 	int cw = 1;
@@ -931,12 +908,7 @@ static void ieee80211_auth_challenge(struct net_device *dev,
 
 	printk(KERN_DEBUG "%s: replying to auth challenge\n", dev->name);
 	pos = mgmt->u.auth.variable;
-	if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems)
-	    == ParseFailed) {
-		printk(KERN_DEBUG "%s: failed to parse Auth(challenge)\n",
-		       dev->name);
-		return;
-	}
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
 	if (!elems.challenge) {
 		printk(KERN_DEBUG "%s: no challenge IE in shared key auth "
 		       "frame\n", dev->name);
@@ -1230,12 +1202,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct net_device *dev,
 	aid &= ~(BIT(15) | BIT(14));
 
 	pos = mgmt->u.assoc_resp.variable;
-	if (ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems)
-	    == ParseFailed) {
-		printk(KERN_DEBUG "%s: failed to parse AssocResp\n",
-		       dev->name);
-		return;
-	}
+	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
 
 	if (!elems.supp_rates) {
 		printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
@@ -1459,7 +1426,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee802_11_elems elems;
 	size_t baselen;
-	int channel, invalid = 0, clen;
+	int channel, clen;
 	struct ieee80211_sta_bss *bss;
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -1505,9 +1472,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev,
 #endif /* CONFIG_MAC80211_IBSS_DEBUG */
 	}
 
-	if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen,
-				   &elems) == ParseFailed)
-		invalid = 1;
+	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
 	if (sdata->type == IEEE80211_IF_TYPE_IBSS && elems.supp_rates &&
 	    memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 &&
@@ -1724,9 +1689,7 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev,
 	if (baselen > len)
 		return;
 
-	if (ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen,
-				   &elems) == ParseFailed)
-		return;
+	ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
 
 	if (elems.erp_info && elems.erp_info_len >= 1)
 		ieee80211_handle_erp_ie(dev, elems.erp_info[0]);
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index af79423bc8e..9ec50139b9a 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -2,13 +2,13 @@
  * GPL (C) 2002 Martin Devera (devik@cdi.cz).
  */
 #include <linux/module.h>
+#include <linux/bitops.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connbytes.h>
 #include <net/netfilter/nf_conntrack.h>
 
 #include <asm/div64.h>
-#include <asm/bitops.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e11000a8e95..d0936506b73 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1623,11 +1623,6 @@ static struct vm_operations_struct packet_mmap_ops = {
 	.close =packet_mm_close,
 };
 
-static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
-{
-	return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
-}
-
 static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
 {
 	int i;
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index eaabf087c59..d1e9d68f8ba 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -146,18 +146,18 @@ static void rfkill_disconnect(struct input_handle *handle)
 static const struct input_device_id rfkill_ids[] = {
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT(EV_KEY) },
-		.keybit = { [LONG(KEY_WLAN)] = BIT(KEY_WLAN) },
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_WLAN)] = BIT_MASK(KEY_WLAN) },
 	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT(EV_KEY) },
-		.keybit = { [LONG(KEY_BLUETOOTH)] = BIT(KEY_BLUETOOTH) },
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_BLUETOOTH)] = BIT_MASK(KEY_BLUETOOTH) },
 	},
 	{
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT,
-		.evbit = { BIT(EV_KEY) },
-		.keybit = { [LONG(KEY_UWB)] = BIT(KEY_UWB) },
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { [BIT_WORD(KEY_UWB)] = BIT_MASK(KEY_UWB) },
 	},
 	{ }
 };
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 92435a882fa..9c15c4888d1 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -2,9 +2,7 @@
 # Traffic control configuration.
 # 
 
-menu "QoS and/or fair queueing"
-
-config NET_SCHED
+menuconfig NET_SCHED
 	bool "QoS and/or fair queueing"
 	select NET_SCH_FIFO
 	---help---
@@ -41,9 +39,6 @@ config NET_SCHED
 	  The available schedulers are listed in the following questions; you
 	  can say Y to as many as you like. If unsure, say N now.
 
-config NET_SCH_FIFO
-	bool
-
 if NET_SCHED
 
 comment "Queueing/Scheduling"
@@ -500,4 +495,5 @@ config NET_CLS_IND
 
 endif # NET_SCHED
 
-endmenu
+config NET_SCH_FIFO
+	bool
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e01d57692c9..fa1a6f45dc4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -556,6 +556,7 @@ void dev_deactivate(struct net_device *dev)
 {
 	struct Qdisc *qdisc;
 	struct sk_buff *skb;
+	int running;
 
 	spin_lock_bh(&dev->queue_lock);
 	qdisc = dev->qdisc;
@@ -571,12 +572,31 @@ void dev_deactivate(struct net_device *dev)
 
 	dev_watchdog_down(dev);
 
-	/* Wait for outstanding dev_queue_xmit calls. */
+	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
 	synchronize_rcu();
 
 	/* Wait for outstanding qdisc_run calls. */
-	while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
-		yield();
+	do {
+		while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
+			yield();
+
+		/*
+		 * Double-check inside queue lock to ensure that all effects
+		 * of the queue run are visible when we return.
+		 */
+		spin_lock_bh(&dev->queue_lock);
+		running = test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
+		spin_unlock_bh(&dev->queue_lock);
+
+		/*
+		 * The running flag should never be set at this point because
+		 * we've already set dev->qdisc to noop_qdisc *inside* the same
+		 * pair of spin locks.  That is, if any qdisc_run starts after
+		 * our initial test it should see the noop_qdisc and then
+		 * clear the RUNNING bit before dropping the queue lock.  So
+		 * if it is set here then we've found a bug.
+		 */
+	} while (WARN_ON_ONCE(running));
 }
 
 void dev_init_scheduler(struct net_device *dev)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3c773c53e12..c98873f39ae 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -847,7 +847,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
 	task->tk_start = jiffies;
 
 	dprintk("RPC:       new task initialized, procpid %u\n",
-				current->pid);
+				task_pid_nr(current));
 }
 
 static struct rpc_task *
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6996cba5aa9..9163ec526c2 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -483,7 +483,7 @@ static int unix_listen(struct socket *sock, int backlog)
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= current->tgid;
+	sk->sk_peercred.pid	= task_tgid_vnr(current);
 	sk->sk_peercred.uid	= current->euid;
 	sk->sk_peercred.gid	= current->egid;
 	err = 0;
@@ -1133,7 +1133,7 @@ restart:
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= current->tgid;
+	newsk->sk_peercred.pid	= task_tgid_vnr(current);
 	newsk->sk_peercred.uid	= current->euid;
 	newsk->sk_peercred.gid	= current->egid;
 	newu = unix_sk(newsk);
@@ -1194,7 +1194,7 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
 	sock_hold(skb);
 	unix_peer(ska)=skb;
 	unix_peer(skb)=ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
+	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
 	ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
 	ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
 
diff --git a/samples/Kconfig b/samples/Kconfig
new file mode 100644
index 00000000000..57bb2236952
--- /dev/null
+++ b/samples/Kconfig
@@ -0,0 +1,16 @@
+# samples/Kconfig
+
+menuconfig SAMPLES
+	bool "Sample kernel code"
+	help
+	  You can build and test sample kernel code here.
+
+if SAMPLES
+
+config SAMPLE_MARKERS
+	tristate "Build markers examples -- loadable modules only"
+	depends on MARKERS && m
+	help
+	  This build markers example modules.
+
+endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
new file mode 100644
index 00000000000..5a4f0b6bcbe
--- /dev/null
+++ b/samples/Makefile
@@ -0,0 +1,3 @@
+# Makefile for Linux samples code
+
+obj-$(CONFIG_SAMPLES)	+= markers/
diff --git a/samples/markers/Makefile b/samples/markers/Makefile
new file mode 100644
index 00000000000..6d7231265f0
--- /dev/null
+++ b/samples/markers/Makefile
@@ -0,0 +1,4 @@
+# builds the kprobes example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
new file mode 100644
index 00000000000..e787c6d16dd
--- /dev/null
+++ b/samples/markers/marker-example.c
@@ -0,0 +1,54 @@
+/* marker-example.c
+ *
+ * Executes a marker when /proc/marker-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+	int i;
+
+	trace_mark(subsystem_event, "%d %s", 123, "example string");
+	for (i = 0; i < 10; i++)
+		trace_mark(subsystem_eventb, MARK_NOARGS);
+	return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+	.open = my_open,
+};
+
+static int example_init(void)
+{
+	printk(KERN_ALERT "example init\n");
+	pentry_example = create_proc_entry("marker-example", 0444, NULL);
+	if (pentry_example)
+		pentry_example->proc_fops = &mark_ops;
+	else
+		return -EPERM;
+	return 0;
+}
+
+static void example_exit(void)
+{
+	printk(KERN_ALERT "example exit\n");
+	remove_proc_entry("marker-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Marker example");
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
new file mode 100644
index 00000000000..238b2e384fc
--- /dev/null
+++ b/samples/markers/probe-example.c
@@ -0,0 +1,98 @@
+/* probe-example.c
+ *
+ * Connects two functions to marker call sites.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <asm/atomic.h>
+
+struct probe_data {
+	const char *name;
+	const char *format;
+	marker_probe_func *probe_func;
+};
+
+void probe_subsystem_event(const struct marker *mdata, void *private,
+	const char *format, ...)
+{
+	va_list ap;
+	/* Declare args */
+	unsigned int value;
+	const char *mystr;
+
+	/* Assign args */
+	va_start(ap, format);
+	value = va_arg(ap, typeof(value));
+	mystr = va_arg(ap, typeof(mystr));
+
+	/* Call printk */
+	printk(KERN_DEBUG "Value %u, string %s\n", value, mystr);
+
+	/* or count, check rights, serialize data in a buffer */
+
+	va_end(ap);
+}
+
+atomic_t eventb_count = ATOMIC_INIT(0);
+
+void probe_subsystem_eventb(const struct marker *mdata, void *private,
+	const char *format, ...)
+{
+	/* Increment counter */
+	atomic_inc(&eventb_count);
+}
+
+static struct probe_data probe_array[] =
+{
+	{	.name = "subsystem_event",
+		.format = "%d %s",
+		.probe_func = probe_subsystem_event },
+	{	.name = "subsystem_eventb",
+		.format = MARK_NOARGS,
+		.probe_func = probe_subsystem_eventb },
+};
+
+static int __init probe_init(void)
+{
+	int result;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+		result = marker_probe_register(probe_array[i].name,
+				probe_array[i].format,
+				probe_array[i].probe_func, &probe_array[i]);
+		if (result)
+			printk(KERN_INFO "Unable to register probe %s\n",
+				probe_array[i].name);
+		result = marker_arm(probe_array[i].name);
+		if (result)
+			printk(KERN_INFO "Unable to arm probe %s\n",
+				probe_array[i].name);
+	}
+	return 0;
+}
+
+static void __exit probe_fini(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(probe_array); i++)
+		marker_probe_unregister(probe_array[i].name);
+	printk(KERN_INFO "Number of event b : %u\n",
+			atomic_read(&eventb_count));
+}
+
+module_init(probe_init);
+module_exit(probe_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("SUBSYSTEM Probe");
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index de7bb284c61..b96ea8d6a5e 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -56,6 +56,17 @@ endef
 # gcc support functions
 # See documentation in Documentation/kbuild/makefiles.txt
 
+# cc-cross-prefix
+# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-)
+# Return first prefix where a prefix$(CC) is found in PATH.
+# If no $(CC) found in PATH with listed prefixes return nothing
+cc-cross-prefix =  \
+	$(word 1, $(foreach c,$(1),                                   \
+		$(shell set -e;                                       \
+		if (which $(strip $(c))$(CC)) > /dev/null 2>&1 ; then \
+			echo $(c);                                    \
+		fi)))
+
 # output directory for tests below
 TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/)
 
diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c
index e5c6ac7bde9..0e4bd5459df 100644
--- a/scripts/basic/docproc.c
+++ b/scripts/basic/docproc.c
@@ -66,12 +66,15 @@ FILELINE * entity_system;
 #define FUNCTION      "-function"
 #define NOFUNCTION    "-nofunction"
 
+char *srctree;
+
 void usage (void)
 {
 	fprintf(stderr, "Usage: docproc {doc|depend} file\n");
 	fprintf(stderr, "Input is read from file.tmpl. Output is sent to stdout\n");
 	fprintf(stderr, "doc: frontend when generating kernel documentation\n");
 	fprintf(stderr, "depend: generate list of files referenced within file\n");
+	fprintf(stderr, "Environment variable SRCTREE: absolute path to kernel source tree.\n");
 }
 
 /*
@@ -90,7 +93,7 @@ void exec_kernel_doc(char **svec)
 			exit(1);
 		case  0:
 			memset(real_filename, 0, sizeof(real_filename));
-			strncat(real_filename, getenv("SRCTREE"), PATH_MAX);
+			strncat(real_filename, srctree, PATH_MAX);
 			strncat(real_filename, KERNELDOCPATH KERNELDOC,
 					PATH_MAX - strlen(real_filename));
 			execvp(real_filename, svec);
@@ -171,7 +174,7 @@ void find_export_symbols(char * filename)
 	if (filename_exist(filename) == NULL) {
 		char real_filename[PATH_MAX + 1];
 		memset(real_filename, 0, sizeof(real_filename));
-		strncat(real_filename, getenv("SRCTREE"), PATH_MAX);
+		strncat(real_filename, srctree, PATH_MAX);
 		strncat(real_filename, filename,
 				PATH_MAX - strlen(real_filename));
 		sym = add_new_file(filename);
@@ -338,6 +341,10 @@ void parse_file(FILE *infile)
 int main(int argc, char *argv[])
 {
 	FILE * infile;
+
+	srctree = getenv("SRCTREE");
+	if (!srctree)
+		srctree = getcwd(NULL, 0);
 	if (argc != 3) {
 		usage();
 		exit(1);
diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl
index b458e2acb4a..28e480c8100 100755
--- a/scripts/checkstack.pl
+++ b/scripts/checkstack.pl
@@ -15,7 +15,7 @@
 #	AVR32 port by Haavard Skinnemoen <hskinnemoen@atmel.com>
 #
 #	Usage:
-#	objdump -d vmlinux | stackcheck.pl [arch]
+#	objdump -d vmlinux | scripts/checkstack.pl [arch]
 #
 #	TODO :	Port to all architectures (one regex per arch)
 
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index bb08069b04a..83c5e76414c 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -84,7 +84,7 @@ help:
 # lxdialog stuff
 check-lxdialog  := $(srctree)/$(src)/lxdialog/check-lxdialog.sh
 
-# Use reursively expanded variables so we do not call gcc unless
+# Use recursively expanded variables so we do not call gcc unless
 # we really need to do so. (Do not call gcc as part of make mrproper)
 HOST_EXTRACFLAGS = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags)
 HOST_LOADLIBES   = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ldflags $(HOSTCC))
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 2ef9a193fca..93ac52adb49 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -268,6 +268,9 @@ static struct symbol *sym_add_exported(const char *name, struct module *mod,
 			     "was in %s%s\n", mod->name, name,
 			     s->module->name,
 			     is_vmlinux(s->module->name) ?"":".ko");
+		} else {
+			/* In case Modules.symvers was out of date */
+			s->module = mod;
 		}
 	}
 	s->preloaded = 0;
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 6edb29f2b4a..0f657b5f3bc 100644
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -83,6 +83,7 @@ Maintainer: $name
 Standards-Version: 3.6.1
 
 Package: $packagename
+Provides: kernel-image-$version, linux-image-$version
 Architecture: any
 Description: User Mode Linux kernel, version $version
  User-mode Linux is a port of the Linux kernel to its own system call
@@ -104,6 +105,7 @@ Maintainer: $name
 Standards-Version: 3.6.1
 
 Package: $packagename
+Provides: kernel-image-$version, linux-image-$version
 Architecture: any
 Description: Linux kernel, version $version
  This package contains the Linux kernel, modules and corresponding other
diff --git a/security/commoncap.c b/security/commoncap.c
index 48ca5b09276..43f902750a1 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -23,6 +23,7 @@
 #include <linux/xattr.h>
 #include <linux/hugetlb.h>
 #include <linux/mount.h>
+#include <linux/sched.h>
 
 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES
 /*
@@ -334,7 +335,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 	/* For init, we want to retain the capabilities set
 	 * in the init_task struct. Thus we skip the usual
 	 * capability rules */
-	if (!is_init(current)) {
+	if (!is_global_init(current)) {
 		current->cap_permitted = new_permitted;
 		current->cap_effective = bprm->cap_effective ?
 				new_permitted : 0;
diff --git a/sound/ppc/beep.c b/sound/ppc/beep.c
index a1aa89f2faf..566b5ab9d4e 100644
--- a/sound/ppc/beep.c
+++ b/sound/ppc/beep.c
@@ -236,8 +236,8 @@ int __init snd_pmac_attach_beep(struct snd_pmac *chip)
 	input_dev->id.product = 0x0001;
 	input_dev->id.version = 0x0100;
 
-	input_dev->evbit[0] = BIT(EV_SND);
-	input_dev->sndbit[0] = BIT(SND_BELL) | BIT(SND_TONE);
+	input_dev->evbit[0] = BIT_MASK(EV_SND);
+	input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
 	input_dev->event = snd_pmac_beep_event;
 	input_dev->dev.parent = &chip->pdev->dev;
 	input_set_drvdata(input_dev, chip);
diff --git a/sound/usb/caiaq/caiaq-input.c b/sound/usb/caiaq/caiaq-input.c
index a1de0c60895..cd536ca20e5 100644
--- a/sound/usb/caiaq/caiaq-input.c
+++ b/sound/usb/caiaq/caiaq-input.c
@@ -200,8 +200,9 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *dev)
 
         switch (dev->chip.usb_id) {
 	case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_RIGKONTROL2):
-		input->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-		input->absbit[0] = BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_Z);
+		input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+		input->absbit[0] = BIT_MASK(ABS_X) | BIT_MASK(ABS_Y) |
+			BIT_MASK(ABS_Z);
 		input->keycode = keycode_rk2;
 		input->keycodesize = sizeof(char);
 		input->keycodemax = ARRAY_SIZE(keycode_rk2);
@@ -228,8 +229,8 @@ int snd_usb_caiaq_input_init(struct snd_usb_caiaqdev *dev)
 		snd_usb_caiaq_set_auto_msg(dev, 1, 10, 0);
 		break;
 	case USB_ID(USB_VID_NATIVEINSTRUMENTS, USB_PID_AK1):
-		input->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS);
-		input->absbit[0] = BIT(ABS_X);
+		input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+		input->absbit[0] = BIT_MASK(ABS_X);
 		input->keycode = keycode_ak1;
 		input->keycodesize = sizeof(char);
 		input->keycodemax = ARRAY_SIZE(keycode_ak1);