19 files changed, 575 insertions, 80 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 40ac7759c3b..33f55917f23 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -14,6 +14,7 @@ Following translations are available on the WWW:
 	- this file.
 ABI/
 	- info on kernel <-> userspace ABI and relative interface stability.
+
 BUG-HUNTING
 	- brute force method of doing binary search of patches to find bug.
 Changes
@@ -66,6 +67,8 @@ VGA-softcursor.txt
 	- how to change your VGA cursor from a blinking underscore.
 accounting/
 	- documentation on accounting and taskstats.
+acpi/
+	- info on ACPI-specific hooks in the kernel.
 aoe/
 	- description of AoE (ATA over Ethernet) along with config examples.
 applying-patches.txt
diff --git a/Documentation/BUG-HUNTING b/Documentation/BUG-HUNTING
index 6c816751b86..65022a87bf1 100644
--- a/Documentation/BUG-HUNTING
+++ b/Documentation/BUG-HUNTING
@@ -214,6 +214,23 @@ And recompile the kernel with CONFIG_DEBUG_INFO enabled:
   gdb vmlinux
   (gdb) p vt_ioctl
   (gdb) l *(0x<address of vt_ioctl> + 0xda8)
+or, as one command
+  (gdb) l *(vt_ioctl + 0xda8)
+
+If you have a call trace, such as :-
+>Call Trace:
+> [<ffffffff8802c8e9>] :jbd:log_wait_commit+0xa3/0xf5
+> [<ffffffff810482d9>] autoremove_wake_function+0x0/0x2e
+> [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee
+> ...
+this shows the problem in the :jbd: module. You can load that module in gdb
+and list the relevant code.
+  gdb fs/jbd/jbd.ko
+  (gdb) p log_wait_commit
+  (gdb) l *(0x<address> + 0xa3)
+or
+  (gdb) l *(log_wait_commit + 0xa3)
+
 
 Another very useful option of the Kernel Hacking section in menuconfig is
 Debug memory allocations. This will help you see whether data has been
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 77436d73501..059aaf20951 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -165,6 +165,7 @@ X!Ilib/string.c
 !Emm/vmalloc.c
 !Imm/page_alloc.c
 !Emm/mempool.c
+!Emm/dmapool.c
 !Emm/page-writeback.c
 !Emm/truncate.c
      </sect1>
@@ -371,7 +372,6 @@ X!Iinclude/linux/device.h
 !Edrivers/base/class.c
 !Edrivers/base/firmware_class.c
 !Edrivers/base/transport_class.c
-!Edrivers/base/dmapool.c
 <!-- Cannot be included, because
      attribute_container_add_class_device_adapter
  and attribute_container_classdev_to_container
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 01825ee7db6..2e9d6b41f03 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -717,7 +717,7 @@ used, and when it gets full, throws out the least used one.
     <para>
 For our first example, we assume that all operations are in user
 context (ie. from system calls), so we can sleep.  This means we can
-use a semaphore to protect the cache and all the objects within
+use a mutex to protect the cache and all the objects within
 it.  Here's the code:
     </para>
 
@@ -725,7 +725,7 @@ it.  Here's the code:
 #include &lt;linux/list.h&gt;
 #include &lt;linux/slab.h&gt;
 #include &lt;linux/string.h&gt;
-#include &lt;asm/semaphore.h&gt;
+#include &lt;linux/mutex.h&gt;
 #include &lt;asm/errno.h&gt;
 
 struct object
@@ -737,7 +737,7 @@ struct object
 };
 
 /* Protects the cache, cache_num, and the objects within it */
-static DECLARE_MUTEX(cache_lock);
+static DEFINE_MUTEX(cache_lock);
 static LIST_HEAD(cache);
 static unsigned int cache_num = 0;
 #define MAX_CACHE_SIZE 10
@@ -789,17 +789,17 @@ int cache_add(int id, const char *name)
         obj-&gt;id = id;
         obj-&gt;popularity = 0;
 
-        down(&amp;cache_lock);
+        mutex_lock(&amp;cache_lock);
         __cache_add(obj);
-        up(&amp;cache_lock);
+        mutex_unlock(&amp;cache_lock);
         return 0;
 }
 
 void cache_delete(int id)
 {
-        down(&amp;cache_lock);
+        mutex_lock(&amp;cache_lock);
         __cache_delete(__cache_find(id));
-        up(&amp;cache_lock);
+        mutex_unlock(&amp;cache_lock);
 }
 
 int cache_find(int id, char *name)
@@ -807,13 +807,13 @@ int cache_find(int id, char *name)
         struct object *obj;
         int ret = -ENOENT;
 
-        down(&amp;cache_lock);
+        mutex_lock(&amp;cache_lock);
         obj = __cache_find(id);
         if (obj) {
                 ret = 0;
                 strcpy(name, obj-&gt;name);
         }
-        up(&amp;cache_lock);
+        mutex_unlock(&amp;cache_lock);
         return ret;
 }
 </programlisting>
@@ -853,7 +853,7 @@ The change is shown below, in standard patch format: the
          int popularity;
  };
 
--static DECLARE_MUTEX(cache_lock);
+-static DEFINE_MUTEX(cache_lock);
 +static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED;
  static LIST_HEAD(cache);
  static unsigned int cache_num = 0;
@@ -870,22 +870,22 @@ The change is shown below, in standard patch format: the
          obj-&gt;id = id;
          obj-&gt;popularity = 0;
 
--        down(&amp;cache_lock);
+-        mutex_lock(&amp;cache_lock);
 +        spin_lock_irqsave(&amp;cache_lock, flags);
          __cache_add(obj);
--        up(&amp;cache_lock);
+-        mutex_unlock(&amp;cache_lock);
 +        spin_unlock_irqrestore(&amp;cache_lock, flags);
          return 0;
  }
 
  void cache_delete(int id)
  {
--        down(&amp;cache_lock);
+-        mutex_lock(&amp;cache_lock);
 +        unsigned long flags;
 +
 +        spin_lock_irqsave(&amp;cache_lock, flags);
          __cache_delete(__cache_find(id));
--        up(&amp;cache_lock);
+-        mutex_unlock(&amp;cache_lock);
 +        spin_unlock_irqrestore(&amp;cache_lock, flags);
  }
 
@@ -895,14 +895,14 @@ The change is shown below, in standard patch format: the
          int ret = -ENOENT;
 +        unsigned long flags;
 
--        down(&amp;cache_lock);
+-        mutex_lock(&amp;cache_lock);
 +        spin_lock_irqsave(&amp;cache_lock, flags);
          obj = __cache_find(id);
          if (obj) {
                  ret = 0;
                  strcpy(name, obj-&gt;name);
          }
--        up(&amp;cache_lock);
+-        mutex_unlock(&amp;cache_lock);
 +        spin_unlock_irqrestore(&amp;cache_lock, flags);
          return ret;
  }
diff --git a/Documentation/acpi/method-tracing.txt b/Documentation/acpi/method-tracing.txt
new file mode 100644
index 00000000000..f6efb1ea559
--- /dev/null
+++ b/Documentation/acpi/method-tracing.txt
@@ -0,0 +1,26 @@
+/sys/module/acpi/parameters/:
+
+trace_method_name
+	The AML method name that the user wants to trace
+
+trace_debug_layer
+	The temporary debug_layer used when tracing the method.
+	Using 0xffffffff by default if it is 0.
+
+trace_debug_level
+	The temporary debug_level used when tracing the method.
+	Using 0x00ffffff by default if it is 0.
+
+trace_state
+	The status of the tracing feature.
+
+	"enabled" means this feature is enabled
+	and the AML method is traced every time it's executed.
+
+	"1" means this feature is enabled and the AML method
+	will only be traced during the next execution.
+
+	"disabled" means this feature is disabled.
+	Users can enable/disable this debug tracing feature by
+	"echo string > /sys/module/acpi/parameters/trace_state".
+	"string" should be one of "enable", "disable" and "1".
diff --git a/Documentation/fb/deferred_io.txt b/Documentation/fb/deferred_io.txt
index 63883a89212..74832837025 100644
--- a/Documentation/fb/deferred_io.txt
+++ b/Documentation/fb/deferred_io.txt
@@ -7,10 +7,10 @@ IO. The following example may be a useful explanation of how one such setup
 works:
 
 - userspace app like Xfbdev mmaps framebuffer
-- deferred IO and driver sets up nopage and page_mkwrite handlers
+- deferred IO and driver sets up fault and page_mkwrite handlers
 - userspace app tries to write to mmaped vaddress
-- we get pagefault and reach nopage handler
-- nopage handler finds and returns physical page
+- we get pagefault and reach fault handler
+- fault handler finds and returns physical page
 - we get page_mkwrite where we add this page to a list
 - schedule a workqueue task to be run after a delay
 - app continues writing to that page with no additional cost. this is
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index a7d9d179131..68ce1300a36 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -208,13 +208,6 @@ Who:	Randy Dunlap <randy.dunlap@oracle.com>
 
 ---------------------------
 
-What:  drivers depending on OSS_OBSOLETE
-When:  options in 2.6.23, code in 2.6.25
-Why:   obsolete OSS drivers
-Who:   Adrian Bunk <bunk@stusta.de>
-
----------------------------
-
 What: libata spindown skipping and warning
 When: Dec 2008
 Why:  Some halt(8) implementations synchronize caches for and spin
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index e2799b5fafe..5681e2fa149 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1029,6 +1029,14 @@ nr_inodes
 Denotes the  number  of  inodes the system has allocated. This number will
 grow and shrink dynamically.
 
+nr_open
+-------
+
+Denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
 nr_free_inodes
 --------------
 
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9ad4e6fc56f..8ea41b6e6a8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -147,8 +147,10 @@ and is between 256 and 4096 characters. It is defined in the file
 			default: 0
 
 	acpi_sleep=	[HW,ACPI] Sleep options
-			Format: { s3_bios, s3_mode }
-			See Documentation/power/video.txt
+			Format: { s3_bios, s3_mode, s3_beep }
+			See Documentation/power/video.txt for s3_bios and s3_mode.
+			s3_beep is for debugging; it makes the PC's speaker beep
+			as soon as the kernel's real-mode entry point is called.
 
 	acpi_sci=	[HW,ACPI] ACPI System Control Interrupt trigger mode
 			Format: { level | edge | high | low }
@@ -780,6 +782,9 @@ and is between 256 and 4096 characters. It is defined in the file
 			loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
 			as idle=poll.
 
+	ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
+			Claim all unknown PCI IDE storage controllers.
+
 	ignore_loglevel	[KNL]
 			Ignore loglevel setting - this will print /all/
 			kernel messages to the console. Useful for debugging.
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 53a63890aea..30c101761d0 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -96,7 +96,9 @@ or in registers (e.g., for x86_64 or for an i386 fastcall function).
 The jprobe will work in either case, so long as the handler's
 prototype matches that of the probed function.
 
-1.3 How Does a Return Probe Work?
+1.3 Return Probes
+
+1.3.1 How Does a Return Probe Work?
 
 When you call register_kretprobe(), Kprobes establishes a kprobe at
 the entry to the function.  When the probed function is called and this
@@ -107,9 +109,9 @@ At boot time, Kprobes registers a kprobe at the trampoline.
 
 When the probed function executes its return instruction, control
 passes to the trampoline and that probe is hit.  Kprobes' trampoline
-handler calls the user-specified handler associated with the kretprobe,
-then sets the saved instruction pointer to the saved return address,
-and that's where execution resumes upon return from the trap.
+handler calls the user-specified return handler associated with the
+kretprobe, then sets the saved instruction pointer to the saved return
+address, and that's where execution resumes upon return from the trap.
 
 While the probed function is executing, its return address is
 stored in an object of type kretprobe_instance.  Before calling
@@ -131,6 +133,30 @@ zero when the return probe is registered, and is incremented every
 time the probed function is entered but there is no kretprobe_instance
 object available for establishing the return probe.
 
+1.3.2 Kretprobe entry-handler
+
+Kretprobes also provides an optional user-specified handler which runs
+on function entry. This handler is specified by setting the entry_handler
+field of the kretprobe struct. Whenever the kprobe placed by kretprobe at the
+function entry is hit, the user-defined entry_handler, if any, is invoked.
+If the entry_handler returns 0 (success) then a corresponding return handler
+is guaranteed to be called upon function return. If the entry_handler
+returns a non-zero error then Kprobes leaves the return address as is, and
+the kretprobe has no further effect for that particular function instance.
+
+Multiple entry and return handler invocations are matched using the unique
+kretprobe_instance object associated with them. Additionally, a user
+may also specify per return-instance private data to be part of each
+kretprobe_instance object. This is especially useful when sharing private
+data between corresponding user entry and return handlers. The size of each
+private data object can be specified at kretprobe registration time by
+setting the data_size field of the kretprobe struct. This data can be
+accessed through the data field of each kretprobe_instance object.
+
+In case probed function is entered but there is no kretprobe_instance
+object available, then in addition to incrementing the nmissed count,
+the user entry_handler invocation is also skipped.
+
 2. Architectures Supported
 
 Kprobes, jprobes, and return probes are implemented on the following
@@ -274,6 +300,8 @@ of interest:
 - ret_addr: the return address
 - rp: points to the corresponding kretprobe object
 - task: points to the corresponding task struct
+- data: points to per return-instance private data; see "Kretprobe
+	entry-handler" for details.
 
 The regs_return_value(regs) macro provides a simple abstraction to
 extract the return value from the appropriate register as defined by
@@ -556,23 +584,52 @@ report failed calls to sys_open().
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
+#include <linux/ktime.h>
+
+/* per-instance private data */
+struct my_data {
+	ktime_t entry_stamp;
+};
 
 static const char *probed_func = "sys_open";
 
-/* Return-probe handler: If the probed function fails, log the return value. */
-static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+/* Timestamp function entry. */
+static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
+{
+	struct my_data *data;
+
+	if(!current->mm)
+		return 1; /* skip kernel threads */
+
+	data = (struct my_data *)ri->data;
+	data->entry_stamp = ktime_get();
+	return 0;
+}
+
+/* If the probed function failed, log the return value and duration.
+ * Duration may turn out to be zero consistently, depending upon the
+ * granularity of time accounting on the platform. */
+static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	int retval = regs_return_value(regs);
+	struct my_data *data = (struct my_data *)ri->data;
+	s64 delta;
+	ktime_t now;
+
 	if (retval < 0) {
-		printk("%s returns %d\n", probed_func, retval);
+		now = ktime_get();
+		delta = ktime_to_ns(ktime_sub(now, data->entry_stamp));
+		printk("%s: return val = %d (duration = %lld ns)\n",
+		       probed_func, retval, delta);
 	}
 	return 0;
 }
 
 static struct kretprobe my_kretprobe = {
-	.handler = ret_handler,
-	/* Probe up to 20 instances concurrently. */
-	.maxactive = 20
+	.handler = return_handler,
+	.entry_handler = entry_handler,
+	.data_size = sizeof(struct my_data),
+	.maxactive = 20, /* probe up to 20 instances concurrently */
 };
 
 static int __init kretprobe_init(void)
@@ -584,7 +641,7 @@ static int __init kretprobe_init(void)
 		printk("register_kretprobe failed, returned %d\n", ret);
 		return -1;
 	}
-	printk("Planted return probe at %p\n", my_kretprobe.kp.addr);
+	printk("Kretprobe active on %s\n", my_kretprobe.kp.symbol_name);
 	return 0;
 }
 
@@ -594,7 +651,7 @@ static void __exit kretprobe_exit(void)
 	printk("kretprobe unregistered\n");
 	/* nmissed > 0 suggests that maxactive was set too low. */
 	printk("Missed probing %d instances of %s\n",
-		my_kretprobe.nmissed, probed_func);
+	       my_kretprobe.nmissed, probed_func);
 }
 
 module_init(kretprobe_init)
diff --git a/Documentation/kref.txt b/Documentation/kref.txt
index f38b59d00c6..130b6e87aa7 100644
--- a/Documentation/kref.txt
+++ b/Documentation/kref.txt
@@ -141,10 +141,10 @@ The last rule (rule 3) is the nastiest one to handle.  Say, for
 instance, you have a list of items that are each kref-ed, and you wish
 to get the first one.  You can't just pull the first item off the list
 and kref_get() it.  That violates rule 3 because you are not already
-holding a valid pointer.  You must add locks or semaphores.  For
-instance:
+holding a valid pointer.  You must add a mutex (or some other lock).
+For instance:
 
-static DECLARE_MUTEX(sem);
+static DEFINE_MUTEX(mutex);
 static LIST_HEAD(q);
 struct my_data
 {
@@ -155,12 +155,12 @@ struct my_data
 static struct my_data *get_entry()
 {
 	struct my_data *entry = NULL;
-	down(&sem);
+	mutex_lock(&mutex);
 	if (!list_empty(&q)) {
 		entry = container_of(q.next, struct my_q_entry, link);
 		kref_get(&entry->refcount);
 	}
-	up(&sem);
+	mutex_unlock(&mutex);
 	return entry;
 }
 
@@ -174,9 +174,9 @@ static void release_entry(struct kref *ref)
 
 static void put_entry(struct my_data *entry)
 {
-	down(&sem);
+	mutex_lock(&mutex);
 	kref_put(&entry->refcount, release_entry);
-	up(&sem);
+	mutex_unlock(&mutex);
 }
 
 The kref_put() return value is useful if you do not want to hold the
@@ -191,13 +191,13 @@ static void release_entry(struct kref *ref)
 
 static void put_entry(struct my_data *entry)
 {
-	down(&sem);
+	mutex_lock(&mutex);
 	if (kref_put(&entry->refcount, release_entry)) {
 		list_del(&entry->link);
-		up(&sem);
+		mutex_unlock(&mutex);
 		kfree(entry);
 	} else
-		up(&sem);
+		mutex_unlock(&mutex);
 }
 
 This is really more useful if you have to call other routines as part
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 5818628207b..396cdd982c2 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -416,6 +416,16 @@ also have
      sectors in total that could need to be processed.  The two
      numbers are separated by a '/'  thus effectively showing one
      value, a fraction of the process that is complete.
+     A 'select' on this attribute will return when resync completes,
+     when it reaches the current sync_max (below) and possibly at
+     other times.
+
+   sync_max
+     This is a number of sectors at which point a resync/recovery
+     process will pause.  When a resync is active, the value can
+     only ever be increased, never decreased.  The value of 'max'
+     effectively disables the limit.
+
 
    sync_speed
      This shows the current actual speed, in K/sec, of the current
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
index aea7e920966..9d60ab717a7 100644
--- a/Documentation/power/swsusp.txt
+++ b/Documentation/power/swsusp.txt
@@ -386,6 +386,11 @@ before suspending; then remount them after resuming.
 There is a work-around for this problem.  For more information, see
 Documentation/usb/persist.txt.
 
+Q: Can I suspend-to-disk using a swap partition under LVM?
+
+A: No. You can suspend successfully, but you'll not be able to
+resume. uswsusp should be able to work with LVM. See suspend.sf.net.
+
 Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
 compiled with the similar configuration files. Anyway I found that
 suspend to disk (and resume) is much slower on 2.6.16 compared to
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index e20b19c1b60..8deffcd68cb 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -182,8 +182,8 @@ driver returns ENOIOCTLCMD.  Some common examples:
 	since the frequency is stored in the irq_freq member of the rtc_device
 	structure.  Your driver needs to initialize the irq_freq member during
 	init.  Make sure you check the requested frequency is in range of your
-	hardware in the irq_set_freq function.  If you cannot actually change
-	the frequency, just return -ENOTTY.
+	hardware in the irq_set_freq function.  If it isn't, return -EINVAL.  If
+	you cannot actually change the frequency, do not define irq_set_freq.
 
 If all else fails, check out the rtc-test.c driver!
 
@@ -268,8 +268,8 @@ int main(int argc, char **argv)
 		/* This read will block */
 		retval = read(fd, &data, sizeof(unsigned long));
 		if (retval == -1) {
-		        perror("read");
-		        exit(errno);
+			perror("read");
+			exit(errno);
 		}
 		fprintf(stderr, " %d",i);
 		fflush(stderr);
@@ -326,11 +326,11 @@ test_READ:
 		rtc_tm.tm_sec %= 60;
 		rtc_tm.tm_min++;
 	}
-	if  (rtc_tm.tm_min == 60) {
+	if (rtc_tm.tm_min == 60) {
 		rtc_tm.tm_min = 0;
 		rtc_tm.tm_hour++;
 	}
-	if  (rtc_tm.tm_hour == 24)
+	if (rtc_tm.tm_hour == 24)
 		rtc_tm.tm_hour = 0;
 
 	retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
@@ -407,8 +407,8 @@ test_PIE:
 					"\n...Periodic IRQ rate is fixed\n");
 				goto done;
 			}
-		        perror("RTC_IRQP_SET ioctl");
-		        exit(errno);
+			perror("RTC_IRQP_SET ioctl");
+			exit(errno);
 		}
 
 		fprintf(stderr, "\n%ldHz:\t", tmp);
@@ -417,27 +417,27 @@ test_PIE:
 		/* Enable periodic interrupts */
 		retval = ioctl(fd, RTC_PIE_ON, 0);
 		if (retval == -1) {
-		        perror("RTC_PIE_ON ioctl");
-		        exit(errno);
+			perror("RTC_PIE_ON ioctl");
+			exit(errno);
 		}
 
 		for (i=1; i<21; i++) {
-		        /* This blocks */
-		        retval = read(fd, &data, sizeof(unsigned long));
-		        if (retval == -1) {
-				       perror("read");
-				       exit(errno);
-		        }
-		        fprintf(stderr, " %d",i);
-		        fflush(stderr);
-		        irqcount++;
+			/* This blocks */
+			retval = read(fd, &data, sizeof(unsigned long));
+			if (retval == -1) {
+				perror("read");
+				exit(errno);
+			}
+			fprintf(stderr, " %d",i);
+			fflush(stderr);
+			irqcount++;
 		}
 
 		/* Disable periodic interrupts */
 		retval = ioctl(fd, RTC_PIE_OFF, 0);
 		if (retval == -1) {
-		        perror("RTC_PIE_OFF ioctl");
-		        exit(errno);
+			perror("RTC_PIE_OFF ioctl");
+			exit(errno);
 		}
 	}
 
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index aa986a35e99..f99254327ae 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -23,6 +23,7 @@ Currently, these files are in /proc/sys/fs:
 - inode-max
 - inode-nr
 - inode-state
+- nr_open
 - overflowuid
 - overflowgid
 - suid_dumpable
@@ -91,6 +92,15 @@ usage of file handles and you don't need to increase the maximum.
 
 ==============================================================
 
+nr_open:
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
+==============================================================
+
 inode-max, inode-nr & inode-state:
 
 As with file handles, the kernel allocates the inode structures
diff --git a/Documentation/thinkpad-acpi.txt b/Documentation/thinkpad-acpi.txt
index 10c041ca13c..6c2477754a2 100644
--- a/Documentation/thinkpad-acpi.txt
+++ b/Documentation/thinkpad-acpi.txt
@@ -1,7 +1,7 @@
 		     ThinkPad ACPI Extras Driver
 
-                            Version 0.17
-                         October 04th, 2007
+                            Version 0.19
+                         January 06th, 2008
 
                Borislav Deianov <borislav@users.sf.net>
              Henrique de Moraes Holschuh <hmh@hmh.eng.br>
@@ -215,6 +215,11 @@ The following commands can be written to the /proc/acpi/ibm/hotkey file:
 	... any other 8-hex-digit mask ...
 	echo reset > /proc/acpi/ibm/hotkey -- restore the original mask
 
+The procfs interface does not support NVRAM polling control.  So as to
+maintain maximum bug-to-bug compatibility, it does not report any masks,
+nor does it allow one to manipulate the hot key mask when the firmware
+does not support masks at all, even if NVRAM polling is in use.
+
 sysfs notes:
 
 	hotkey_bios_enabled:
@@ -231,17 +236,26 @@ sysfs notes:
 		to this value.
 
 	hotkey_enable:
-		Enables/disables the hot keys feature, and reports
-		current status of the hot keys feature.
+		Enables/disables the hot keys feature in the ACPI
+		firmware, and reports current status of the hot keys
+		feature.  Has no effect on the NVRAM hot key polling
+		functionality.
 
 		0: disables the hot keys feature / feature disabled
 		1: enables the hot keys feature / feature enabled
 
 	hotkey_mask:
-		bit mask to enable driver-handling and ACPI event
-		generation for each hot key (see above).  Returns the
-		current status of the hot keys mask, and allows one to
-		modify it.
+		bit mask to enable driver-handling (and depending on
+		the firmware, ACPI event generation) for each hot key
+		(see above).  Returns the current status of the hot keys
+		mask, and allows one to modify it.
+
+		Note: when NVRAM polling is active, the firmware mask
+		will be different from the value returned by
+		hotkey_mask.  The driver will retain enabled bits for
+		hotkeys that are under NVRAM polling even if the
+		firmware refuses them, and will not set these bits on
+		the firmware hot key mask.
 
 	hotkey_all_mask:
 		bit mask that should enable event reporting for all
@@ -257,12 +271,48 @@ sysfs notes:
 		handled by the firmware anyway.  Echo it to
 		hotkey_mask above, to use.
 
+	hotkey_source_mask:
+		bit mask that selects which hot keys will the driver
+		poll the NVRAM for.  This is auto-detected by the driver
+		based on the capabilities reported by the ACPI firmware,
+		but it can be overridden at runtime.
+
+		Hot keys whose bits are set in both hotkey_source_mask
+		and also on hotkey_mask are polled for in NVRAM.  Only a
+		few hot keys are available through CMOS NVRAM polling.
+
+		Warning: when in NVRAM mode, the volume up/down/mute
+		keys are synthesized according to changes in the mixer,
+		so you have to use volume up or volume down to unmute,
+		as per the ThinkPad volume mixer user interface.  When
+		in ACPI event mode, volume up/down/mute are reported as
+		separate events, but this behaviour may be corrected in
+		future releases of this driver, in which case the
+		ThinkPad volume mixer user interface semanthics will be
+		enforced.
+
+	hotkey_poll_freq:
+		frequency in Hz for hot key polling. It must be between
+		0 and 25 Hz.  Polling is only carried out when strictly
+		needed.
+
+		Setting hotkey_poll_freq to zero disables polling, and
+		will cause hot key presses that require NVRAM polling
+		to never be reported.
+
+		Setting hotkey_poll_freq too low will cause repeated
+		pressings of the same hot key to be misreported as a
+		single key press, or to not even be detected at all.
+		The recommended polling frequency is 10Hz.
+
 	hotkey_radio_sw:
 		if the ThinkPad has a hardware radio switch, this
 		attribute will read 0 if the switch is in the "radios
 		disabled" postition, and 1 if the switch is in the
 		"radios enabled" position.
 
+		This attribute has poll()/select() support.
+
 	hotkey_report_mode:
 		Returns the state of the procfs ACPI event report mode
 		filter for hot keys.  If it is set to 1 (the default),
@@ -277,6 +327,25 @@ sysfs notes:
 		May return -EPERM (write access locked out by module
 		parameter) or -EACCES (read-only).
 
+	wakeup_reason:
+		Set to 1 if the system is waking up because the user
+		requested a bay ejection.  Set to 2 if the system is
+		waking up because the user requested the system to
+		undock.  Set to zero for normal wake-ups or wake-ups
+		due to unknown reasons.
+
+		This attribute has poll()/select() support.
+
+	wakeup_hotunplug_complete:
+		Set to 1 if the system was waken up because of an
+		undock or bay ejection request, and that request
+		was sucessfully completed.  At this point, it might
+		be useful to send the system back to sleep, at the
+		user's choice.  Refer to HKEY events 0x4003 and
+		0x3003, below.
+
+		This attribute has poll()/select() support.
+
 input layer notes:
 
 A Hot key is mapped to a single input layer EV_KEY event, possibly
@@ -427,6 +496,23 @@ Non hot-key ACPI HKEY event map:
 The above events are not propagated by the driver, except for legacy
 compatibility purposes when hotkey_report_mode is set to 1.
 
+0x2304		System is waking up from suspend to undock
+0x2305		System is waking up from suspend to eject bay
+0x2404		System is waking up from hibernation to undock
+0x2405		System is waking up from hibernation to eject bay
+
+The above events are never propagated by the driver.
+
+0x3003		Bay ejection (see 0x2x05) complete, can sleep again
+0x4003		Undocked (see 0x2x04), can sleep again
+0x5009		Tablet swivel: switched to tablet mode
+0x500A		Tablet swivel: switched to normal mode
+0x500B		Tablet pen insterted into its storage bay
+0x500C		Tablet pen removed from its storage bay
+0x5010		Brightness level changed (newer Lenovo BIOSes)
+
+The above events are propagated by the driver.
+
 Compatibility notes:
 
 ibm-acpi and thinkpad-acpi 0.15 (mainline kernels before 2.6.23) never
@@ -1263,3 +1349,17 @@ Sysfs interface changelog:
 		and the hwmon class for libsensors4 (lm-sensors 3)
 		compatibility.  Moved all hwmon attributes to this
 		new platform device.
+
+0x020100:	Marker for thinkpad-acpi with hot key NVRAM polling
+		support.  If you must, use it to know you should not
+		start an userspace NVRAM poller (allows to detect when
+		NVRAM is compiled out by the user because it is
+		unneeded/undesired in the first place).
+0x020101:	Marker for thinkpad-acpi with hot key NVRAM polling
+		and proper hotkey_mask semanthics (version 8 of the
+		NVRAM polling patch).  Some development snapshots of
+		0.18 had an earlier version that did strange things
+		to hotkey_mask.
+
+0x020200:	Add poll()/select() support to the following attributes:
+		hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
new file mode 100644
index 00000000000..6223eace3c0
--- /dev/null
+++ b/Documentation/unaligned-memory-access.txt
@@ -0,0 +1,226 @@
+UNALIGNED MEMORY ACCESSES
+=========================
+
+Linux runs on a wide variety of architectures which have varying behaviour
+when it comes to memory access. This document presents some details about
+unaligned accesses, why you need to write code that doesn't cause them,
+and how to write such code!
+
+
+The definition of an unaligned access
+=====================================
+
+Unaligned memory accesses occur when you try to read N bytes of data starting
+from an address that is not evenly divisible by N (i.e. addr % N != 0).
+For example, reading 4 bytes of data from address 0x10004 is fine, but
+reading 4 bytes of data from address 0x10005 would be an unaligned memory
+access.
+
+The above may seem a little vague, as memory access can happen in different
+ways. The context here is at the machine code level: certain instructions read
+or write a number of bytes to or from memory (e.g. movb, movw, movl in x86
+assembly). As will become clear, it is relatively easy to spot C statements
+which will compile to multiple-byte memory access instructions, namely when
+dealing with types such as u16, u32 and u64.
+
+
+Natural alignment
+=================
+
+The rule mentioned above forms what we refer to as natural alignment:
+When accessing N bytes of memory, the base memory address must be evenly
+divisible by N, i.e. addr % N == 0.
+
+When writing code, assume the target architecture has natural alignment
+requirements.
+
+In reality, only a few architectures require natural alignment on all sizes
+of memory access. However, we must consider ALL supported architectures;
+writing code that satisfies natural alignment requirements is the easiest way
+to achieve full portability.
+
+
+Why unaligned access is bad
+===========================
+
+The effects of performing an unaligned memory access vary from architecture
+to architecture. It would be easy to write a whole document on the differences
+here; a summary of the common scenarios is presented below:
+
+ - Some architectures are able to perform unaligned memory accesses
+   transparently, but there is usually a significant performance cost.
+ - Some architectures raise processor exceptions when unaligned accesses
+   happen. The exception handler is able to correct the unaligned access,
+   at significant cost to performance.
+ - Some architectures raise processor exceptions when unaligned accesses
+   happen, but the exceptions do not contain enough information for the
+   unaligned access to be corrected.
+ - Some architectures are not capable of unaligned memory access, but will
+   silently perform a different memory access to the one that was requested,
+   resulting a a subtle code bug that is hard to detect!
+
+It should be obvious from the above that if your code causes unaligned
+memory accesses to happen, your code will not work correctly on certain
+platforms and will cause performance problems on others.
+
+
+Code that does not cause unaligned access
+=========================================
+
+At first, the concepts above may seem a little hard to relate to actual
+coding practice. After all, you don't have a great deal of control over
+memory addresses of certain variables, etc.
+
+Fortunately things are not too complex, as in most cases, the compiler
+ensures that things will work for you. For example, take the following
+structure:
+
+	struct foo {
+		u16 field1;
+		u32 field2;
+		u8 field3;
+	};
+
+Let us assume that an instance of the above structure resides in memory
+starting at address 0x10000. With a basic level of understanding, it would
+not be unreasonable to expect that accessing field2 would cause an unaligned
+access. You'd be expecting field2 to be located at offset 2 bytes into the
+structure, i.e. address 0x10002, but that address is not evenly divisible
+by 4 (remember, we're reading a 4 byte value here).
+
+Fortunately, the compiler understands the alignment constraints, so in the
+above case it would insert 2 bytes of padding in between field1 and field2.
+Therefore, for standard structure types you can always rely on the compiler
+to pad structures so that accesses to fields are suitably aligned (assuming
+you do not cast the field to a type of different length).
+
+Similarly, you can also rely on the compiler to align variables and function
+parameters to a naturally aligned scheme, based on the size of the type of
+the variable.
+
+At this point, it should be clear that accessing a single byte (u8 or char)
+will never cause an unaligned access, because all memory addresses are evenly
+divisible by one.
+
+On a related topic, with the above considerations in mind you may observe
+that you could reorder the fields in the structure in order to place fields
+where padding would otherwise be inserted, and hence reduce the overall
+resident memory size of structure instances. The optimal layout of the
+above example is:
+
+	struct foo {
+		u32 field2;
+		u16 field1;
+		u8 field3;
+	};
+
+For a natural alignment scheme, the compiler would only have to add a single
+byte of padding at the end of the structure. This padding is added in order
+to satisfy alignment constraints for arrays of these structures.
+
+Another point worth mentioning is the use of __attribute__((packed)) on a
+structure type. This GCC-specific attribute tells the compiler never to
+insert any padding within structures, useful when you want to use a C struct
+to represent some data that comes in a fixed arrangement 'off the wire'.
+
+You might be inclined to believe that usage of this attribute can easily
+lead to unaligned accesses when accessing fields that do not satisfy
+architectural alignment requirements. However, again, the compiler is aware
+of the alignment constraints and will generate extra instructions to perform
+the memory access in a way that does not cause unaligned access. Of course,
+the extra instructions obviously cause a loss in performance compared to the
+non-packed case, so the packed attribute should only be used when avoiding
+structure padding is of importance.
+
+
+Code that causes unaligned access
+=================================
+
+With the above in mind, let's move onto a real life example of a function
+that can cause an unaligned memory access. The following function adapted
+from include/linux/etherdevice.h is an optimized routine to compare two
+ethernet MAC addresses for equality.
+
+unsigned int compare_ether_addr(const u8 *addr1, const u8 *addr2)
+{
+	const u16 *a = (const u16 *) addr1;
+	const u16 *b = (const u16 *) addr2;
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
+}
+
+In the above function, the reference to a[0] causes 2 bytes (16 bits) to
+be read from memory starting at address addr1. Think about what would happen
+if addr1 was an odd address such as 0x10003. (Hint: it'd be an unaligned
+access.)
+
+Despite the potential unaligned access problems with the above function, it
+is included in the kernel anyway but is understood to only work on
+16-bit-aligned addresses. It is up to the caller to ensure this alignment or
+not use this function at all. This alignment-unsafe function is still useful
+as it is a decent optimization for the cases when you can ensure alignment,
+which is true almost all of the time in ethernet networking context.
+
+
+Here is another example of some code that could cause unaligned accesses:
+	void myfunc(u8 *data, u32 value)
+	{
+		[...]
+		*((u32 *) data) = cpu_to_le32(value);
+		[...]
+	}
+
+This code will cause unaligned accesses every time the data parameter points
+to an address that is not evenly divisible by 4.
+
+In summary, the 2 main scenarios where you may run into unaligned access
+problems involve:
+ 1. Casting variables to types of different lengths
+ 2. Pointer arithmetic followed by access to at least 2 bytes of data
+
+
+Avoiding unaligned accesses
+===========================
+
+The easiest way to avoid unaligned access is to use the get_unaligned() and
+put_unaligned() macros provided by the <asm/unaligned.h> header file.
+
+Going back to an earlier example of code that potentially causes unaligned
+access:
+
+	void myfunc(u8 *data, u32 value)
+	{
+		[...]
+		*((u32 *) data) = cpu_to_le32(value);
+		[...]
+	}
+
+To avoid the unaligned memory access, you would rewrite it as follows:
+
+	void myfunc(u8 *data, u32 value)
+	{
+		[...]
+		value = cpu_to_le32(value);
+		put_unaligned(value, (u32 *) data);
+		[...]
+	}
+
+The get_unaligned() macro works similarly. Assuming 'data' is a pointer to
+memory and you wish to avoid unaligned access, its usage is as follows:
+
+	u32 value = get_unaligned((u32 *) data);
+
+These macros work work for memory accesses of any length (not just 32 bits as
+in the examples above). Be aware that when compared to standard access of
+aligned memory, using these macros to access unaligned memory can be costly in
+terms of performance.
+
+If use of such macros is not convenient, another option is to use memcpy(),
+where the source or destination (or both) are of type u8* or unsigned char*.
+Due to the byte-wise nature of this operation, unaligned accesses are avoided.
+
+--
+Author: Daniel Drake <dsd@gentoo.org>
+With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
+Johannes Berg, Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock,
+Uli Kunitz, Vadim Lobanov
+
diff --git a/Documentation/w1/masters/00-INDEX b/Documentation/w1/masters/00-INDEX
index 752613c4cea..7b0ceaaad7a 100644
--- a/Documentation/w1/masters/00-INDEX
+++ b/Documentation/w1/masters/00-INDEX
@@ -4,3 +4,5 @@ ds2482
 	- The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses.
 ds2490
 	- The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges.
+w1-gpio
+	- GPIO 1-wire bus master driver.
diff --git a/Documentation/w1/masters/w1-gpio b/Documentation/w1/masters/w1-gpio
new file mode 100644
index 00000000000..af5d3b4aa85
--- /dev/null
+++ b/Documentation/w1/masters/w1-gpio
@@ -0,0 +1,33 @@
+Kernel driver w1-gpio
+=====================
+
+Author: Ville Syrjala <syrjala@sci.fi>
+
+
+Description
+-----------
+
+GPIO 1-wire bus master driver. The driver uses the GPIO API to control the
+wire and the GPIO pin can be specified using platform data.
+
+
+Example (mach-at91)
+-------------------
+
+#include <linux/w1-gpio.h>
+
+static struct w1_gpio_platform_data foo_w1_gpio_pdata = {
+	.pin		= AT91_PIN_PB20,
+	.is_open_drain	= 1,
+};
+
+static struct platform_device foo_w1_device = {
+	.name			= "w1-gpio",
+	.id			= -1,
+	.dev.platform_data	= &foo_w1_gpio_pdata,
+};
+
+...
+	at91_set_GPIO_periph(foo_w1_gpio_pdata.pin, 1);
+	at91_set_multi_drive(foo_w1_gpio_pdata.pin, 1);
+	platform_device_register(&foo_w1_device);