From f5c1e2ea71603bc2962041fef9dd902cb8626a1d Mon Sep 17 00:00:00 2001
From: Alan Tyson <atyson@hp.com>
Date: Sat, 10 Mar 2007 06:05:14 +0000
Subject: [CIFS] reset mode when client notices that ATTR_READONLY is no longer
 set

Signed-off-by: Alan Tyso <atyson@hp.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES   | 3 +++
 fs/cifs/inode.c   | 6 ++++++
 fs/cifs/readdir.c | 4 ++++
 3 files changed, 13 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 6247628bdae..1cbe5615993 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -4,6 +4,9 @@ Fix mtime bouncing around from local idea of last write times to remote time.
 Fix hang (in i_size_read) when simultaneous size update of same remote file
 on smp system corrupts sequence number. Do not reread unnecessarily partial page
 (which we are about to overwrite anyway) when writing out file opened rw.
+When DOS attribute of file on non-Unix server's file changes on the server side
+from read-only back to read-write, reflect this change in default file mode
+(we had been leaving a file's mode read-only until the inode were reloaded)
 
 Version 1.47
 ------------
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 86b9dbbd844..e75a844accd 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -494,6 +494,12 @@ int cifs_get_inode_info(struct inode **pinode,
 			   mode e.g. 555 */
 			if (cifsInfo->cifsAttrs & ATTR_READONLY)
 				inode->i_mode &= ~(S_IWUGO);
+			else if ((inode->i_mode & S_IWUGO) == 0)
+				/* the ATTR_READONLY flag may have been	*/
+				/* changed on server -- set any w bits	*/
+				/* allowed by mnt_file_mode		*/
+				inode->i_mode |= (S_IWUGO &
+						  cifs_sb->mnt_file_mode);
 		/* BB add code here -
 		   validate if device or weird share or device type? */
 		}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 44cfb528797..2a374d5215a 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -219,6 +219,10 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
 		tmp_inode->i_mode |= S_IFREG;
 		if (attr & ATTR_READONLY)
 			tmp_inode->i_mode &= ~(S_IWUGO);
+		else if ((tmp_inode->i_mode & S_IWUGO) == 0)
+			/* the ATTR_READONLY flag may have been changed on   */
+		   	/* server -- set any w bits allowed by mnt_file_mode */
+			tmp_inode->i_mode |= (S_IWUGO & cifs_sb->mnt_file_mode);
 	} /* could add code here - to validate if device or weird share type? */
 
 	/* can not fill in nlink here as in qpathinfo version and Unx search */
-- 
cgit v1.2.3-70-g09d2


From c24f72cc7ca829bbad0532ddf315ace3ae1c359e Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Sat, 3 Feb 2007 03:14:30 -0800
Subject: ocfs2: Proper cleanup in case of error in
 ocfs2_register_hb_callbacks()

If ocfs2_register_hb_callbacks() succeeds on its first callback but fails
its second, it doesn't release the first on the way out. Fix that.

While we're at it, o2hb_unregister_callback() never returns anything but
0, so let's make it void.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c |  6 ++----
 fs/ocfs2/cluster/heartbeat.h |  2 +-
 fs/ocfs2/cluster/tcp.c       | 13 ++-----------
 fs/ocfs2/heartbeat.c         | 15 +++++----------
 4 files changed, 10 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 5a9779bb923..0f2cfecd42c 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1682,7 +1682,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(o2hb_register_callback);
 
-int o2hb_unregister_callback(struct o2hb_callback_func *hc)
+void o2hb_unregister_callback(struct o2hb_callback_func *hc)
 {
 	BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
 
@@ -1690,15 +1690,13 @@ int o2hb_unregister_callback(struct o2hb_callback_func *hc)
 	     __builtin_return_address(0), hc);
 
 	if (list_empty(&hc->hc_item))
-		return 0;
+		return;
 
 	down_write(&o2hb_callback_sem);
 
 	list_del_init(&hc->hc_item);
 
 	up_write(&o2hb_callback_sem);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(o2hb_unregister_callback);
 
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h
index cac6223206a..cc6d40b3977 100644
--- a/fs/ocfs2/cluster/heartbeat.h
+++ b/fs/ocfs2/cluster/heartbeat.h
@@ -70,7 +70,7 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
 			 void *data,
 			 int priority);
 int o2hb_register_callback(struct o2hb_callback_func *hc);
-int o2hb_unregister_callback(struct o2hb_callback_func *hc);
+void o2hb_unregister_callback(struct o2hb_callback_func *hc);
 void o2hb_fill_node_map(unsigned long *map,
 			unsigned bytes);
 void o2hb_init(void);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 1718215fc01..69caf3e12fe 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1638,17 +1638,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
 
 void o2net_unregister_hb_callbacks(void)
 {
-	int ret;
-
-	ret = o2hb_unregister_callback(&o2net_hb_up);
-	if (ret < 0)
-		mlog(ML_ERROR, "Status return %d unregistering heartbeat up "
-		     "callback!\n", ret);
-
-	ret = o2hb_unregister_callback(&o2net_hb_down);
-	if (ret < 0)
-		mlog(ML_ERROR, "Status return %d unregistering heartbeat down "
-		     "callback!\n", ret);
+	o2hb_unregister_callback(&o2net_hb_up);
+	o2hb_unregister_callback(&o2net_hb_down);
 }
 
 int o2net_register_hb_callbacks(void)
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c
index 8fc52d6d0ce..b25ef63781b 100644
--- a/fs/ocfs2/heartbeat.c
+++ b/fs/ocfs2/heartbeat.c
@@ -164,8 +164,10 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
 	}
 
 	status = o2hb_register_callback(&osb->osb_hb_up);
-	if (status < 0)
+	if (status < 0) {
 		mlog_errno(status);
+		o2hb_unregister_callback(&osb->osb_hb_down);
+	}
 
 bail:
 	return status;
@@ -173,18 +175,11 @@ bail:
 
 void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
 {
-	int status;
-
 	if (ocfs2_mount_local(osb))
 		return;
 
-	status = o2hb_unregister_callback(&osb->osb_hb_down);
-	if (status < 0)
-		mlog_errno(status);
-
-	status = o2hb_unregister_callback(&osb->osb_hb_up);
-	if (status < 0)
-		mlog_errno(status);
+	o2hb_unregister_callback(&osb->osb_hb_down);
+	o2hb_unregister_callback(&osb->osb_hb_up);
 }
 
 void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
-- 
cgit v1.2.3-70-g09d2


From e6c352dbc0f4dc7e3f82feafb9e6207c5814a189 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Sat, 3 Feb 2007 03:04:20 -0800
Subject: ocfs2: Concurrent access of o2hb_region->hr_task was not locked

This means that a build-up and a teardown could race which would result in a
double-kthread_stop().

Protect the setting and clearing of hr_task with o2hb_live_lock, as it's not
a common thing and not performance critical.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 0f2cfecd42c..eba282da500 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1234,6 +1234,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 				     const char *page,
 				     size_t count)
 {
+	struct task_struct *hb_task;
 	long fd;
 	int sectsize;
 	char *p = (char *)page;
@@ -1319,20 +1320,28 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 	 */
 	atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1);
 
-	reg->hr_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
-				   reg->hr_item.ci_name);
-	if (IS_ERR(reg->hr_task)) {
-		ret = PTR_ERR(reg->hr_task);
+	hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
+			      reg->hr_item.ci_name);
+	if (IS_ERR(hb_task)) {
+		ret = PTR_ERR(hb_task);
 		mlog_errno(ret);
-		reg->hr_task = NULL;
 		goto out;
 	}
 
+	spin_lock(&o2hb_live_lock);
+	reg->hr_task = hb_task;
+	spin_unlock(&o2hb_live_lock);
+
 	ret = wait_event_interruptible(o2hb_steady_queue,
 				atomic_read(&reg->hr_steady_iterations) == 0);
 	if (ret) {
-		kthread_stop(reg->hr_task);
+		spin_lock(&o2hb_live_lock);
+		hb_task = reg->hr_task;
 		reg->hr_task = NULL;
+		spin_unlock(&o2hb_live_lock);
+
+		if (hb_task)
+			kthread_stop(hb_task);
 		goto out;
 	}
 
@@ -1354,10 +1363,17 @@ out:
 static ssize_t o2hb_region_pid_read(struct o2hb_region *reg,
                                       char *page)
 {
-	if (!reg->hr_task)
+	pid_t pid = 0;
+
+	spin_lock(&o2hb_live_lock);
+	if (reg->hr_task)
+		pid = reg->hr_task->pid;
+	spin_unlock(&o2hb_live_lock);
+
+	if (!pid)
 		return 0;
 
-	return sprintf(page, "%u\n", reg->hr_task->pid);
+	return sprintf(page, "%u\n", pid);
 }
 
 struct o2hb_region_attribute {
@@ -1495,13 +1511,17 @@ out:
 static void o2hb_heartbeat_group_drop_item(struct config_group *group,
 					   struct config_item *item)
 {
+	struct task_struct *hb_task;
 	struct o2hb_region *reg = to_o2hb_region(item);
 
 	/* stop the thread when the user removes the region dir */
-	if (reg->hr_task) {
-		kthread_stop(reg->hr_task);
-		reg->hr_task = NULL;
-	}
+	spin_lock(&o2hb_live_lock);
+	hb_task = reg->hr_task;
+	reg->hr_task = NULL;
+	spin_unlock(&o2hb_live_lock);
+
+	if (hb_task)
+		kthread_stop(hb_task);
 
 	config_item_put(item);
 }
-- 
cgit v1.2.3-70-g09d2


From 03f981cf2ec95dd8bc43d2ecccaec4e83c8375e2 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 4 Jan 2007 14:54:41 -0800
Subject: ocfs2: add some missing address space callbacks

Under load, OCFS2 would crash in invalidate_inode_pages2_range() because
invalidate_complete_page2() was unable to invalidate a page.  It would
appear that JBD is holding on to the page.  ext3 has a specific
->releasepage() handler to cover this case.

Steal ext3's ->releasepage(), ->invalidatepage(), and ->migratepage(), as
they appear completely appropriate for OCFS2.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/aops.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 93628b02ef5..875c1144381 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -614,6 +614,27 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 	ocfs2_rw_unlock(inode, 0);
 }
 
+/*
+ * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen
+ * from ext3.  PageChecked() bits have been removed as OCFS2 does not
+ * do journalled data.
+ */
+static void ocfs2_invalidatepage(struct page *page, unsigned long offset)
+{
+	journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
+
+	journal_invalidatepage(journal, page, offset);
+}
+
+static int ocfs2_releasepage(struct page *page, gfp_t wait)
+{
+	journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
+
+	if (!page_has_buffers(page))
+		return 0;
+	return journal_try_to_free_buffers(journal, page, wait);
+}
+
 static ssize_t ocfs2_direct_IO(int rw,
 			       struct kiocb *iocb,
 			       const struct iovec *iov,
@@ -661,5 +682,8 @@ const struct address_space_operations ocfs2_aops = {
 	.commit_write	= ocfs2_commit_write,
 	.bmap		= ocfs2_bmap,
 	.sync_page	= block_sync_page,
-	.direct_IO	= ocfs2_direct_IO
+	.direct_IO	= ocfs2_direct_IO,
+	.invalidatepage	= ocfs2_invalidatepage,
+	.releasepage	= ocfs2_releasepage,
+	.migratepage	= buffer_migrate_page,
 };
-- 
cgit v1.2.3-70-g09d2


From afdf04ea098139e86147f63aad9c383cad3b6f37 Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Mon, 5 Mar 2007 15:49:49 -0800
Subject: configfs: add missing mutex_unlock()

d_alloc() failure in configfs_register_subsystem() would fail to unlock
the mutex taken above.  Reorganize the exit path to ensure the unlock
happens.

Reported-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/dir.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 34750d5e4ff..5e6e37e58f3 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1141,25 +1141,22 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
 
 	err = -ENOMEM;
 	dentry = d_alloc(configfs_sb->s_root, &name);
-	if (!dentry)
-		goto out_release;
-
-	d_add(dentry, NULL);
+	if (dentry) {
+		d_add(dentry, NULL);
 
-	err = configfs_attach_group(sd->s_element, &group->cg_item,
-				    dentry);
-	if (!err)
-		dentry = NULL;
-	else
-		d_delete(dentry);
+		err = configfs_attach_group(sd->s_element, &group->cg_item,
+					    dentry);
+		if (err) {
+			d_delete(dentry);
+			dput(dentry);
+		}
+	}
 
 	mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
 
-	if (dentry) {
-	    dput(dentry);
-out_release:
-	    unlink_group(group);
-	    configfs_release_fs();
+	if (err) {
+		unlink_group(group);
+		configfs_release_fs();
 	}
 
 	return err;
-- 
cgit v1.2.3-70-g09d2


From 3fca0894a4b5e52c278421b04435b88e32b423ad Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 12 Mar 2007 13:24:34 -0700
Subject: ocfs2_dlm: Missing get/put lockres in dlm_run_purge_lockres

In some circumstances, this was causing us to reference freed memory.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmthread.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 8ffa0916eb8..6421a8fae1d 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -265,8 +265,10 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 		/* This may drop and reacquire the dlm spinlock if it
 		 * has to do migration. */
 		mlog(0, "calling dlm_purge_lockres!\n");
+		dlm_lockres_get(lockres);
 		if (dlm_purge_lockres(dlm, lockres))
 			BUG();
+		dlm_lockres_put(lockres);
 		mlog(0, "DONE calling dlm_purge_lockres!\n");
 
 		/* Avoid adding any scheduling latencies */
-- 
cgit v1.2.3-70-g09d2


From b36c3f84988eebf38acaccc756e05f6b70e333ab Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Mon, 12 Mar 2007 13:25:44 -0700
Subject: ocfs2_dlm: Add missing locks in dlm_empty_lockres

__dlm_lockres_unused() expects the caller to take the lockres spinlock.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmmaster.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 77e4e6169a0..9229e04362f 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2730,14 +2730,17 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 	int ret;
 	int lock_dropped = 0;
 
+	spin_lock(&res->spinlock);
 	if (res->owner != dlm->node_num) {
 		if (!__dlm_lockres_unused(res)) {
 			mlog(ML_ERROR, "%s:%.*s: this node is not master, "
 			     "trying to free this but locks remain\n",
 			     dlm->name, res->lockname.len, res->lockname.name);
 		}
+		spin_unlock(&res->spinlock);
 		goto leave;
 	}
+	spin_unlock(&res->spinlock);
 
 	/* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
 	spin_unlock(&dlm->spinlock);
-- 
cgit v1.2.3-70-g09d2


From 04ff97086b1a3237bbd1fe6390fa80fe75207e23 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Mon, 12 Mar 2007 16:17:58 +0000
Subject: [PATCH] sanitize security_getprocattr() API

have it return the buffer it had allocated

Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c           | 21 ++++++---------------
 include/linux/security.h |  8 ++++----
 security/dummy.c         |  2 +-
 security/selinux/hooks.c |  8 ++++++--
 4 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 01f7769da8e..989af5e55d1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1558,29 +1558,20 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
 				  size_t count, loff_t *ppos)
 {
 	struct inode * inode = file->f_path.dentry->d_inode;
-	unsigned long page;
+	char *p = NULL;
 	ssize_t length;
 	struct task_struct *task = get_proc_task(inode);
 
-	length = -ESRCH;
 	if (!task)
-		goto out_no_task;
-
-	if (count > PAGE_SIZE)
-		count = PAGE_SIZE;
-	length = -ENOMEM;
-	if (!(page = __get_free_page(GFP_KERNEL)))
-		goto out;
+		return -ESRCH;
 
 	length = security_getprocattr(task,
 				      (char*)file->f_path.dentry->d_name.name,
-				      (void*)page, count);
-	if (length >= 0)
-		length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
-	free_page(page);
-out:
+				      &p);
 	put_task_struct(task);
-out_no_task:
+	if (length > 0)
+		length = simple_read_from_buffer(buf, count, ppos, p, length);
+	kfree(p);
 	return length;
 }
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 7f88d97575f..47e82c120f9 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1324,7 +1324,7 @@ struct security_operations {
 
 	void (*d_instantiate) (struct dentry *dentry, struct inode *inode);
 
- 	int (*getprocattr)(struct task_struct *p, char *name, void *value, size_t size);
+ 	int (*getprocattr)(struct task_struct *p, char *name, char **value);
  	int (*setprocattr)(struct task_struct *p, char *name, void *value, size_t size);
 	int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen);
 	void (*release_secctx)(char *secdata, u32 seclen);
@@ -2092,9 +2092,9 @@ static inline void security_d_instantiate (struct dentry *dentry, struct inode *
 	security_ops->d_instantiate (dentry, inode);
 }
 
-static inline int security_getprocattr(struct task_struct *p, char *name, void *value, size_t size)
+static inline int security_getprocattr(struct task_struct *p, char *name, char **value)
 {
-	return security_ops->getprocattr(p, name, value, size);
+	return security_ops->getprocattr(p, name, value);
 }
 
 static inline int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
@@ -2749,7 +2749,7 @@ static inline int security_sem_semop (struct sem_array * sma,
 static inline void security_d_instantiate (struct dentry *dentry, struct inode *inode)
 { }
 
-static inline int security_getprocattr(struct task_struct *p, char *name, void *value, size_t size)
+static inline int security_getprocattr(struct task_struct *p, char *name, char **value)
 {
 	return -EINVAL;
 }
diff --git a/security/dummy.c b/security/dummy.c
index 558795b237d..8ffd76405b5 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -907,7 +907,7 @@ static void dummy_d_instantiate (struct dentry *dentry, struct inode *inode)
 	return;
 }
 
-static int dummy_getprocattr(struct task_struct *p, char *name, void *value, size_t size)
+static int dummy_getprocattr(struct task_struct *p, char *name, char **value)
 {
 	return -EINVAL;
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 19a385e9968..d41e24d6ae4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4468,11 +4468,12 @@ static void selinux_d_instantiate (struct dentry *dentry, struct inode *inode)
 }
 
 static int selinux_getprocattr(struct task_struct *p,
-			       char *name, void *value, size_t size)
+			       char *name, char **value)
 {
 	struct task_security_struct *tsec;
 	u32 sid;
 	int error;
+	unsigned len;
 
 	if (current != p) {
 		error = task_has_perm(current, p, PROCESS__GETATTR);
@@ -4500,7 +4501,10 @@ static int selinux_getprocattr(struct task_struct *p,
 	if (!sid)
 		return 0;
 
-	return selinux_getsecurity(sid, value, size);
+	error = security_sid_to_context(sid, value, &len);
+	if (error)
+		return error;
+	return len;
 }
 
 static int selinux_setprocattr(struct task_struct *p,
-- 
cgit v1.2.3-70-g09d2


From a033f35a2206e28af8109c62b403d940ba89d2b9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Wed, 14 Mar 2007 09:16:24 +0000
Subject: [PATCH] include of asm/pgtable.h in nfsfh is bogus

not needed and actually breaks build on frv, while we are at it

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/nfsfh.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c2660cbfcd9..8d995bcef80 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -17,7 +17,6 @@
 #include <linux/stat.h>
 #include <linux/dcache.h>
 #include <linux/mount.h>
-#include <asm/pgtable.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
-- 
cgit v1.2.3-70-g09d2


From 82c05a13c9e0d782941f975eabf6c5a7928cc4d9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Wed, 14 Mar 2007 09:19:40 +0000
Subject: [PATCH] cifs endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/cifs/cifspdu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 0efdf35aab2..3af76249dc8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -220,7 +220,7 @@
  */
 #define CIFS_NO_HANDLE        0xFFFF
 
-#define NO_CHANGE_64          0xFFFFFFFFFFFFFFFFULL
+#define NO_CHANGE_64          cpu_to_le64(0xFFFFFFFFFFFFFFFFULL)
 #define NO_CHANGE_32          0xFFFFFFFFUL
 
 /* IPC$ in ASCII */
-- 
cgit v1.2.3-70-g09d2


From d9a9cdfb078d755e648d53ec25b7370f84ee5729 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 15 Mar 2007 15:50:34 -0400
Subject: [PATCH] sysfs and driver core: add callback helper, used by SCSI and
 S390

This patch (as868) adds a helper routine for device drivers that need
to set up a callback to perform some action in a different process's
context.  This is intended for use by attribute methods that want to
unregister themselves or their parent device.  Attribute method calls
are mutually exclusive with unregistration, so such actions cannot be
taken directly.

Two attribute methods are converted to use the new helper routine: one
for SCSI device deletion and one for System/390 ccwgroup devices.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/core.c         | 29 ++++++++++++++++++++++++
 drivers/s390/cio/ccwgroup.c | 18 ++++++++++++---
 drivers/scsi/scsi_sysfs.c   | 14 +++++++++++-
 fs/sysfs/file.c             | 54 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h      |  2 ++
 include/linux/sysfs.h       |  9 ++++++++
 6 files changed, 122 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index f191afe62b4..ad0f4a2f25c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -407,6 +407,35 @@ void device_remove_bin_file(struct device *dev, struct bin_attribute *attr)
 }
 EXPORT_SYMBOL_GPL(device_remove_bin_file);
 
+/**
+ * device_schedule_callback - helper to schedule a callback for a device
+ * @dev: device.
+ * @func: callback function to invoke later.
+ *
+ * Attribute methods must not unregister themselves or their parent device
+ * (which would amount to the same thing).  Attempts to do so will deadlock,
+ * since unregistration is mutually exclusive with driver callbacks.
+ *
+ * Instead methods can call this routine, which will attempt to allocate
+ * and schedule a workqueue request to call back @func with @dev as its
+ * argument in the workqueue's process context.  @dev will be pinned until
+ * @func returns.
+ *
+ * Returns 0 if the request was submitted, -ENOMEM if storage could not
+ * be allocated.
+ *
+ * NOTE: This routine won't work if CONFIG_SYSFS isn't set!  It uses an
+ * underlying sysfs routine (since it is intended for use by attribute
+ * methods), and if sysfs isn't available you'll get nothing but -ENOSYS.
+ */
+int device_schedule_callback(struct device *dev,
+		void (*func)(struct device *))
+{
+	return sysfs_schedule_callback(&dev->kobj,
+			(void (*)(void *)) func, dev);
+}
+EXPORT_SYMBOL_GPL(device_schedule_callback);
+
 static void klist_children_get(struct klist_node *n)
 {
 	struct device *dev = container_of(n, struct device, knode_parent);
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index d48e3ca4752..5aeb68e732b 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -71,19 +71,31 @@ __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev)
  * Provide an 'ungroup' attribute so the user can remove group devices no
  * longer needed or accidentially created. Saves memory :)
  */
+static void ccwgroup_ungroup_callback(struct device *dev)
+{
+	struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
+
+	__ccwgroup_remove_symlinks(gdev);
+	device_unregister(dev);
+}
+
 static ssize_t
 ccwgroup_ungroup_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct ccwgroup_device *gdev;
+	int rc;
 
 	gdev = to_ccwgroupdev(dev);
 
 	if (gdev->state != CCWGROUP_OFFLINE)
 		return -EINVAL;
 
-	__ccwgroup_remove_symlinks(gdev);
-	device_unregister(dev);
-
+	/* Note that we cannot unregister the device from one of its
+	 * attribute methods, so we have to use this roundabout approach.
+	 */
+	rc = device_schedule_callback(dev, ccwgroup_ungroup_callback);
+	if (rc)
+		count = rc;
 	return count;
 }
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index c275dcac3f1..939de0de18b 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -452,10 +452,22 @@ store_rescan_field (struct device *dev, struct device_attribute *attr, const cha
 }
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
 
+static void sdev_store_delete_callback(struct device *dev)
+{
+	scsi_remove_device(to_scsi_device(dev));
+}
+
 static ssize_t sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf,
 				 size_t count)
 {
-	scsi_remove_device(to_scsi_device(dev));
+	int rc;
+
+	/* An attribute cannot be unregistered by one of its own methods,
+	 * so we have to use this roundabout approach.
+	 */
+	rc = device_schedule_callback(dev, sdev_store_delete_callback);
+	if (rc)
+		count = rc;
 	return count;
 };
 static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 8d4d839a9d8..1bafdf6e171 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -629,6 +629,60 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
 
+struct sysfs_schedule_callback_struct {
+	struct kobject 		*kobj;
+	void			(*func)(void *);
+	void			*data;
+	struct work_struct	work;
+};
+
+static void sysfs_schedule_callback_work(struct work_struct *work)
+{
+	struct sysfs_schedule_callback_struct *ss = container_of(work,
+			struct sysfs_schedule_callback_struct, work);
+
+	(ss->func)(ss->data);
+	kobject_put(ss->kobj);
+	kfree(ss);
+}
+
+/**
+ * sysfs_schedule_callback - helper to schedule a callback for a kobject
+ * @kobj: object we're acting for.
+ * @func: callback function to invoke later.
+ * @data: argument to pass to @func.
+ *
+ * sysfs attribute methods must not unregister themselves or their parent
+ * kobject (which would amount to the same thing).  Attempts to do so will
+ * deadlock, since unregistration is mutually exclusive with driver
+ * callbacks.
+ *
+ * Instead methods can call this routine, which will attempt to allocate
+ * and schedule a workqueue request to call back @func with @data as its
+ * argument in the workqueue's process context.  @kobj will be pinned
+ * until @func returns.
+ *
+ * Returns 0 if the request was submitted, -ENOMEM if storage could not
+ * be allocated.
+ */
+int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
+		void *data)
+{
+	struct sysfs_schedule_callback_struct *ss;
+
+	ss = kmalloc(sizeof(*ss), GFP_KERNEL);
+	if (!ss)
+		return -ENOMEM;
+	kobject_get(kobj);
+	ss->kobj = kobj;
+	ss->func = func;
+	ss->data = data;
+	INIT_WORK(&ss->work, sysfs_schedule_callback_work);
+	schedule_work(&ss->work);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
+
 
 EXPORT_SYMBOL_GPL(sysfs_create_file);
 EXPORT_SYMBOL_GPL(sysfs_remove_file);
diff --git a/include/linux/device.h b/include/linux/device.h
index 39a3199a826..caad9bba965 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -353,6 +353,8 @@ extern int __must_check device_create_bin_file(struct device *dev,
 					       struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
 				   struct bin_attribute *attr);
+extern int device_schedule_callback(struct device *dev,
+		void (*func)(struct device *));
 
 /* device resource management */
 typedef void (*dr_release_t)(struct device *dev, void *res);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 523405e1e1f..0544edda716 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -78,6 +78,9 @@ struct sysfs_ops {
 
 #ifdef CONFIG_SYSFS
 
+extern int sysfs_schedule_callback(struct kobject *kobj,
+		void (*func)(void *), void *data);
+
 extern int __must_check
 sysfs_create_dir(struct kobject *, struct dentry *);
 
@@ -132,6 +135,12 @@ extern int __must_check sysfs_init(void);
 
 #else /* CONFIG_SYSFS */
 
+static inline int sysfs_schedule_callback(struct kobject *kobj,
+		void (*func)(void *), void *data)
+{
+	return -ENOSYS;
+}
+
 static inline int sysfs_create_dir(struct kobject * k, struct dentry *shadow)
 {
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From e7b0d26a86943370c04d6833c6edba2a72a6e240 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 15 Mar 2007 15:51:28 -0400
Subject: [PATCH] sysfs: reinstate exclusion between method calls and attribute
 unregistration

This patch (as869) reinstates the mutual exclusion between sysfs
attribute method calls and attribute unregistration.  The
previously-reported deadlocks have been fixed, and this exclusion is
by far the simplest way to avoid races during driver unbinding.

The check for orphaned read-buffers has been moved down slightly, so
that the remainder of a partially-read buffer will still be available
to userspace even after the attribute has been unregistered.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/sysfs/file.c  | 10 +++++-----
 fs/sysfs/inode.c | 10 +++++++---
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1bafdf6e171..fc4633378dc 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -168,12 +168,12 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	ssize_t retval = 0;
 
 	down(&buffer->sem);
-	if (buffer->orphaned) {
-		retval = -ENODEV;
-		goto out;
-	}
 	if (buffer->needs_read_fill) {
-		if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
+		if (buffer->orphaned)
+			retval = -ENODEV;
+		else
+			retval = fill_read_buffer(file->f_path.dentry,buffer);
+		if (retval)
 			goto out;
 	}
 	pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index ccb7d722c55..4de5c6b8991 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -222,13 +222,17 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
 
 static inline void orphan_all_buffers(struct inode *node)
 {
-	struct sysfs_buffer_collection *set = node->i_private;
+	struct sysfs_buffer_collection *set;
 	struct sysfs_buffer *buf;
 
 	mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
-	if (node->i_private) {
-		list_for_each_entry(buf, &set->associates, associates)
+	set = node->i_private;
+	if (set) {
+		list_for_each_entry(buf, &set->associates, associates) {
+			down(&buf->sem);
 			buf->orphaned = 1;
+			up(&buf->sem);
+		}
 	}
 	mutex_unlock(&node->i_mutex);
 }
-- 
cgit v1.2.3-70-g09d2


From 38e2aff670b681b6cc267aca307633cbcb48864b Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 16 Mar 2007 05:12:53 +0000
Subject: [CIFS] Do not negotiate new POSIX_PATH_OPERATIONS_CAP yet

Samba server now expects that clients which send the new
POSIX_PATH_OPERATIONS_CAP send all opens with this new
SMB - and expects that clients that could send the new
posix open/create but don't as indicating that they really
want Windows semantics on that handle (which allows Samba
to support clients which want to support both types of
behaviors on different handles on the same mount)

We will put this capability back in the SetFSInfo
negotiation with servers like Samba when the
new POSIXCreate (create/open/mkdir) code is finished.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifspdu.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 0efdf35aab2..e894545f99d 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1887,7 +1887,13 @@ typedef struct {
 						      calls including posix open
 						      and posix unlink */ 
 #ifdef CONFIG_CIFS_POSIX
-#define CIFS_UNIX_CAP_MASK              0x0000003b
+/* Can not set pathnames cap yet until we send new posix create SMB since
+   otherwise server can treat such handles opened with older ntcreatex
+   (by a new client which knows how to send posix path ops)
+   as non-posix handles (can affect write behavior with byte range locks.
+   We can add back in POSIX_PATH_OPS cap when Posix Create/Mkdir finished */
+/* #define CIFS_UNIX_CAP_MASK              0x0000003b */
+#define CIFS_UNIX_CAP_MASK              0x0000001b 
 #else 
 #define CIFS_UNIX_CAP_MASK              0x00000013
 #endif /* CONFIG_CIFS_POSIX */
-- 
cgit v1.2.3-70-g09d2


From 2189850f42beff23af32d847bd043cd1d1811a80 Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Fri, 16 Mar 2007 13:38:07 -0800
Subject: [PATCH] ufs2: more correct work with time

This patch corrects work with time in UFS2 case.

1) According to UFS2 disk layout modification/access and so on "time"
   should be hold in two variables one 64bit for seconds and another 32bit for
   nanoseconds,

   at now for some unknown reason we suppose that "inode time" holds in
   three variables 32bit for seconds, 32bit for milliseconds and 32bit for
   nanoseconds.

2) We set amount of nanoseconds in "VFS inode" to 0 during read, instead of
   getting values from "on disk inode"(this should close
   http://bugzilla.kernel.org/show_bug.cgi?id=7991).

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Cc: Bjoern Jacke <bjoern@j3e.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ufs/ialloc.c        |  5 ++---
 fs/ufs/inode.c         | 24 ++++++++++++------------
 include/linux/ufs_fs.h |  8 ++++----
 3 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index b868878009b..c28a8b6f2fe 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -343,9 +343,8 @@ cg_found:
 		lock_buffer(bh);
 		ufs2_inode = (struct ufs2_inode *)bh->b_data;
 		ufs2_inode += ufs_inotofsbo(inode->i_ino);
-		ufs2_inode->ui_birthtime.tv_sec =
-			cpu_to_fs32(sb, CURRENT_TIME_SEC.tv_sec);
-		ufs2_inode->ui_birthtime.tv_usec = 0;
+		ufs2_inode->ui_birthtime = cpu_to_fs64(sb, CURRENT_TIME.tv_sec);
+		ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, CURRENT_TIME.tv_nsec);
 		mark_buffer_dirty(bh);
 		unlock_buffer(bh);
 		if (sb->s_flags & MS_SYNCHRONOUS)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index fb34ad03e22..366618dd698 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -668,12 +668,12 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 	inode->i_gid = fs32_to_cpu(sb, ufs2_inode->ui_gid);
 
 	inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size);
-	inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_atime.tv_sec);
-	inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_ctime.tv_sec);
-	inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_mtime.tv_sec);
-	inode->i_mtime.tv_nsec = 0;
-	inode->i_atime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
+	inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime);
+	inode->i_ctime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_ctime);
+	inode->i_mtime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_mtime);
+	inode->i_atime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_atimensec);
+	inode->i_ctime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_ctimensec);
+	inode->i_mtime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_mtimensec);
 	inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
 	inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen);
 	ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags);
@@ -803,12 +803,12 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode)
 	ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid);
 
 	ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
-	ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
-	ufs_inode->ui_atime.tv_usec = 0;
-	ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec);
-	ufs_inode->ui_ctime.tv_usec = 0;
-	ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec);
-	ufs_inode->ui_mtime.tv_usec = 0;
+	ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec);
+	ufs_inode->ui_atimensec = cpu_to_fs32(sb, inode->i_atime.tv_nsec);
+	ufs_inode->ui_ctime = cpu_to_fs64(sb, inode->i_ctime.tv_sec);
+	ufs_inode->ui_ctimensec = cpu_to_fs32(sb, inode->i_ctime.tv_nsec);
+	ufs_inode->ui_mtime = cpu_to_fs64(sb, inode->i_mtime.tv_sec);
+	ufs_inode->ui_mtimensec = cpu_to_fs32(sb, inode->i_mtime.tv_nsec);
 
 	ufs_inode->ui_blocks = cpu_to_fs64(sb, inode->i_blocks);
 	ufs_inode->ui_flags = cpu_to_fs32(sb, ufsi->i_flags);
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index dc2e9fe6941..daeba22b765 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -649,10 +649,10 @@ struct ufs2_inode {
 	__fs32     ui_blksize;     /*  12: Inode blocksize. */
 	__fs64     ui_size;        /*  16: File byte count. */
 	__fs64     ui_blocks;      /*  24: Bytes actually held. */
-	struct ufs_timeval   ui_atime;       /*  32: Last access time. */
-	struct ufs_timeval   ui_mtime;       /*  40: Last modified time. */
-	struct ufs_timeval   ui_ctime;       /*  48: Last inode change time. */
-	struct ufs_timeval   ui_birthtime;   /*  56: Inode creation time. */
+	__fs64   ui_atime;       /*  32: Last access time. */
+	__fs64   ui_mtime;       /*  40: Last modified time. */
+	__fs64   ui_ctime;       /*  48: Last inode change time. */
+	__fs64   ui_birthtime;   /*  56: Inode creation time. */
 	__fs32     ui_mtimensec;   /*  64: Last modified time. */
 	__fs32     ui_atimensec;   /*  68: Last access time. */
 	__fs32     ui_ctimensec;   /*  72: Last inode change time. */
-- 
cgit v1.2.3-70-g09d2


From 5431bf97ce69065ed07de1ff12543d0800817b83 Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Fri, 16 Mar 2007 13:38:08 -0800
Subject: [PATCH] ufs: prepare write + change blocks on the fly

This fixes "change blocks numbers on the fly" in case when "prepare
write page" is in the call chain, in this case some buffers may be not
uptodate and not mapped, we should care to map them and load from disk.

This patch was tested with:
 - ufs regressions simple tests
 - fsx-linux
 - ltp(20060306)
 - untar and build kernel

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ufs/balloc.c | 81 +++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 53 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index bcc44084e00..b8fa34af87c 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -244,62 +244,87 @@ failed:
  * We can come here from ufs_writepage or ufs_prepare_write,
  * locked_page is argument of these functions, so we already lock it.
  */
-static void ufs_change_blocknr(struct inode *inode, unsigned int beg,
-			       unsigned int count, unsigned int oldb,
-			       unsigned int newb, struct page *locked_page)
+static void ufs_change_blocknr(struct inode *inode, sector_t beg,
+			       unsigned int count, sector_t oldb,
+			       sector_t newb, struct page *locked_page)
 {
-	const unsigned mask = (1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1;
+	const unsigned blks_per_page =
+		1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	const unsigned mask = blks_per_page - 1;
 	struct address_space * const mapping = inode->i_mapping;
-	pgoff_t index, cur_index;
-	unsigned end, pos, j;
+	pgoff_t index, cur_index, last_index;
+	unsigned pos, j, lblock;
+	sector_t end, i;
 	struct page *page;
 	struct buffer_head *head, *bh;
 
-	UFSD("ENTER, ino %lu, count %u, oldb %u, newb %u\n",
-	      inode->i_ino, count, oldb, newb);
+	UFSD("ENTER, ino %lu, count %u, oldb %llu, newb %llu\n",
+	      inode->i_ino, count,
+	     (unsigned long long)oldb, (unsigned long long)newb);
 
 	BUG_ON(!locked_page);
 	BUG_ON(!PageLocked(locked_page));
 
 	cur_index = locked_page->index;
-
-	for (end = count + beg; beg < end; beg = (beg | mask) + 1) {
-		index = beg >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	end = count + beg;
+	last_index = end >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	for (i = beg; i < end; i = (i | mask) + 1) {
+		index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
 
 		if (likely(cur_index != index)) {
 			page = ufs_get_locked_page(mapping, index);
-			if (!page || IS_ERR(page)) /* it was truncated or EIO */
+			if (!page)/* it was truncated */
+				continue;
+			if (IS_ERR(page)) {/* or EIO */
+				ufs_error(inode->i_sb, __FUNCTION__,
+					  "read of page %llu failed\n",
+					  (unsigned long long)index);
 				continue;
+			}
 		} else
 			page = locked_page;
 
 		head = page_buffers(page);
 		bh = head;
-		pos = beg & mask;
+		pos = i & mask;
 		for (j = 0; j < pos; ++j)
 			bh = bh->b_this_page;
-		j = 0;
+
+
+		if (unlikely(index == last_index))
+			lblock = end & mask;
+		else
+			lblock = blks_per_page;
+
 		do {
-			if (buffer_mapped(bh)) {
-				pos = bh->b_blocknr - oldb;
-				if (pos < count) {
-					UFSD(" change from %llu to %llu\n",
-					     (unsigned long long)pos + oldb,
-					     (unsigned long long)pos + newb);
-					bh->b_blocknr = newb + pos;
-					unmap_underlying_metadata(bh->b_bdev,
-								  bh->b_blocknr);
-					mark_buffer_dirty(bh);
-					++j;
+			if (j >= lblock)
+				break;
+			pos = (i - beg) + j;
+
+			if (!buffer_mapped(bh))
+					map_bh(bh, inode->i_sb, oldb + pos);
+			if (!buffer_uptodate(bh)) {
+				ll_rw_block(READ, 1, &bh);
+				wait_on_buffer(bh);
+				if (!buffer_uptodate(bh)) {
+					ufs_error(inode->i_sb, __FUNCTION__,
+						  "read of block failed\n");
+					break;
 				}
 			}
 
+			UFSD(" change from %llu to %llu, pos %u\n",
+			     (unsigned long long)pos + oldb,
+			     (unsigned long long)pos + newb, pos);
+
+			bh->b_blocknr = newb + pos;
+			unmap_underlying_metadata(bh->b_bdev,
+						  bh->b_blocknr);
+			mark_buffer_dirty(bh);
+			++j;
 			bh = bh->b_this_page;
 		} while (bh != head);
 
-		if (j)
-			set_page_dirty(page);
-
 		if (likely(cur_index != index))
 			ufs_put_locked_page(page);
  	}
-- 
cgit v1.2.3-70-g09d2


From 4b25a37e2093146c1f9aa436b832b7d4ef880ca4 Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Fri, 16 Mar 2007 13:38:09 -0800
Subject: [PATCH] ufs: zeroize the rest of block in truncate

This patch fix behaviour in such test scenario:

  lseek(fd, BIG_OFFSET)
  write(fd, buf, sizeof(buf))
  truncate(BIG_OFFSET)
  truncate(BIG_OFFSET + sizeof(buf))
  read(fd, buf...)

Because of if file big enough(BIG_OFFSET) we start allocate space by block,
ordinary block size > page size, so we should zeroize the rest of block in
truncate(except last framgnet, about which VFS should care), to not get
garbage, when we extend file.

Also patch corrects conversion from pointer to block to physical block number,
this helps in case of not common used UFS types.

And add to debug output inode number.

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ufs/balloc.c   |  5 +++--
 fs/ufs/inode.c    | 12 ++++++------
 fs/ufs/truncate.c | 36 ++++++++++++++++++++++++++----------
 3 files changed, 35 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index b8fa34af87c..841ac25fd95 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -482,8 +482,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
 	if (result) {
 		ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
 				locked_page != NULL);
-		ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp,
-				   result, locked_page);
+		ufs_change_blocknr(inode, fragment - oldcount, oldcount,
+				   uspi->s_sbbase + tmp,
+				   uspi->s_sbbase + result, locked_page);
 		ufs_cpu_to_data_ptr(sb, p, result);
 		*err = 0;
 		UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 366618dd698..013d7afe7cd 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -212,7 +212,7 @@ repeat:
 			brelse (result);
 			goto repeat;
 		} else {
-			*phys = tmp + blockoff;
+			*phys = uspi->s_sbbase + tmp + blockoff;
 			return NULL;
 		}
 	}
@@ -282,9 +282,9 @@ repeat:
 	}
 
 	if (!phys) {
-		result = sb_getblk(sb, tmp + blockoff);
+		result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
 	} else {
-		*phys = tmp + blockoff;
+		*phys = uspi->s_sbbase + tmp + blockoff;
 		result = NULL;
 		*err = 0;
 		*new = 1;
@@ -368,7 +368,7 @@ repeat:
 			brelse (result);
 			goto repeat;
 		} else {
-			*phys = tmp + blockoff;
+			*phys = uspi->s_sbbase + tmp + blockoff;
 			goto out;
 		}
 	}
@@ -389,9 +389,9 @@ repeat:
 
 
 	if (!phys) {
-		result = sb_getblk(sb, tmp + blockoff);
+		result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
 	} else {
-		*phys = tmp + blockoff;
+		*phys = uspi->s_sbbase + tmp + blockoff;
 		*new = 1;
 	}
 
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 749581fa772..52fb2b59f06 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -74,7 +74,7 @@ static int ufs_trunc_direct(struct inode *inode)
 	unsigned i, tmp;
 	int retry;
 	
-	UFSD("ENTER\n");
+	UFSD("ENTER: ino %lu\n", inode->i_ino);
 
 	sb = inode->i_sb;
 	uspi = UFS_SB(sb)->s_uspi;
@@ -96,8 +96,8 @@ static int ufs_trunc_direct(struct inode *inode)
 		block2 = ufs_fragstoblks (frag3);
 	}
 
-	UFSD("frag1 %llu, frag2 %llu, block1 %llu, block2 %llu, frag3 %llu,"
-	     " frag4 %llu\n",
+	UFSD("ino %lu, frag1 %llu, frag2 %llu, block1 %llu, block2 %llu,"
+	     " frag3 %llu, frag4 %llu\n", inode->i_ino,
 	     (unsigned long long)frag1, (unsigned long long)frag2,
 	     (unsigned long long)block1, (unsigned long long)block2,
 	     (unsigned long long)frag3, (unsigned long long)frag4);
@@ -163,7 +163,7 @@ next1:
 	mark_inode_dirty(inode);
  next3:
 
-	UFSD("EXIT\n");
+	UFSD("EXIT: ino %lu\n", inode->i_ino);
 	return retry;
 }
 
@@ -248,7 +248,7 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
 	}
 	ubh_brelse (ind_ubh);
 	
-	UFSD("EXIT\n");
+	UFSD("EXIT: ino %lu\n", inode->i_ino);
 	
 	return retry;
 }
@@ -262,7 +262,7 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 	void *dind;
 	int retry = 0;
 	
-	UFSD("ENTER\n");
+	UFSD("ENTER: ino %lu\n", inode->i_ino);
 	
 	sb = inode->i_sb;
 	uspi = UFS_SB(sb)->s_uspi;
@@ -312,7 +312,7 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
 	}
 	ubh_brelse (dind_bh);
 	
-	UFSD("EXIT\n");
+	UFSD("EXIT: ino %lu\n", inode->i_ino);
 	
 	return retry;
 }
@@ -327,7 +327,7 @@ static int ufs_trunc_tindirect(struct inode *inode)
 	void *tind, *p;
 	int retry;
 	
-	UFSD("ENTER\n");
+	UFSD("ENTER: ino %lu\n", inode->i_ino);
 
 	retry = 0;
 	
@@ -372,19 +372,21 @@ static int ufs_trunc_tindirect(struct inode *inode)
 	}
 	ubh_brelse (tind_bh);
 	
-	UFSD("EXIT\n");
+	UFSD("EXIT: ino %lu\n", inode->i_ino);
 	return retry;
 }
 
 static int ufs_alloc_lastblock(struct inode *inode)
 {
 	int err = 0;
+	struct super_block *sb = inode->i_sb;
 	struct address_space *mapping = inode->i_mapping;
-	struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
+	struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
 	unsigned i, end;
 	sector_t lastfrag;
 	struct page *lastpage;
 	struct buffer_head *bh;
+	u64 phys64;
 
 	lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
 
@@ -424,6 +426,20 @@ static int ufs_alloc_lastblock(struct inode *inode)
 	       set_page_dirty(lastpage);
        }
 
+       if (lastfrag >= UFS_IND_FRAGMENT) {
+	       end = uspi->s_fpb - ufs_fragnum(lastfrag) - 1;
+	       phys64 = bh->b_blocknr + 1;
+	       for (i = 0; i < end; ++i) {
+		       bh = sb_getblk(sb, i + phys64);
+		       lock_buffer(bh);
+		       memset(bh->b_data, 0, sb->s_blocksize);
+		       set_buffer_uptodate(bh);
+		       mark_buffer_dirty(bh);
+		       unlock_buffer(bh);
+		       sync_dirty_buffer(bh);
+		       brelse(bh);
+	       }
+       }
 out_unlock:
        ufs_put_locked_page(lastpage);
 out:
-- 
cgit v1.2.3-70-g09d2


From 0465fc0a1c42e18438d391f3a7e661493a9ad68e Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Fri, 16 Mar 2007 13:38:09 -0800
Subject: [PATCH] ufs2: tindirect truncate fix

During modification of code to support UFS2 writing, the case with
"three indirect" blocks in truncate path was missed, this patch fixes
this situation.

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ufs/truncate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 52fb2b59f06..79c54c85fb5 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -348,7 +348,7 @@ static int ufs_trunc_tindirect(struct inode *inode)
 	}
 
 	for (i = tindirect_block ; i < uspi->s_apb ; i++) {
-		tind = ubh_get_addr32 (tind_bh, i);
+		tind = ubh_get_data_ptr(uspi, tind_bh, i);
 		retry |= ufs_trunc_dindirect(inode, UFS_NDADDR + 
 			uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind);
 		ubh_mark_buffer_dirty(tind_bh);
-- 
cgit v1.2.3-70-g09d2


From b228b8e5bf96b740a70871c1a248bb65c267f5f2 Mon Sep 17 00:00:00 2001
From: Michael Halcrow <mhalcrow@us.ibm.com>
Date: Fri, 16 Mar 2007 13:38:22 -0800
Subject: [PATCH] eCryptfs: fix possible NULL ptr deref in ecryptfs_d_release()

ecryptfs_d_release() first dereferences a pointer (via
ecryptfs_dentry_to_lower()) and then afterwards checks to see if the
pointer it just dereferenced is NULL (via ecryptfs_dentry_to_private()).

This patch moves all of the work done on the dereferenced pointer inside a
block governed by the condition that the pointer is non-NULL.

Signed-off-by: Michael Halcrow <mhalcrow@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ecryptfs/dentry.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 329efcd3d8c..cb20b964419 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -78,18 +78,13 @@ struct kmem_cache *ecryptfs_dentry_info_cache;
  */
 static void ecryptfs_d_release(struct dentry *dentry)
 {
-	struct dentry *lower_dentry;
-
-	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	if (ecryptfs_dentry_to_private(dentry))
+	if (ecryptfs_dentry_to_private(dentry)) {
+		if (ecryptfs_dentry_to_lower(dentry)) {
+			mntput(ecryptfs_dentry_to_lower_mnt(dentry));
+			dput(ecryptfs_dentry_to_lower(dentry));
+		}
 		kmem_cache_free(ecryptfs_dentry_info_cache,
 				ecryptfs_dentry_to_private(dentry));
-	if (lower_dentry) {
-		struct vfsmount *lower_mnt =
-			ecryptfs_dentry_to_lower_mnt(dentry);
-
-		mntput(lower_mnt);
-		dput(lower_dentry);
 	}
 	return;
 }
-- 
cgit v1.2.3-70-g09d2


From 1174cf730179d8f029b9e93cb9a4d5bfb08d1202 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@sw.ru>
Date: Fri, 16 Mar 2007 13:38:24 -0800
Subject: [PATCH] smbfs: double free memory corruption

smbfs allocates rq_trans2buffer to handle server's multi transaction2 response
messages.  As struct smb_request may be reused, rq_trans2buffer is freed
before each new request.  However if last servers's response is not multi but
single trans2 message then new rq_trans2buffer is not allocated but last
smb_rput still tries to free it again.

To prevent this issue rq_trans2buffer pointer should be set to NULL after
kfree.

Signed-off-by: Vasily Averin <vvs@sw.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/smbfs/request.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index 42261dbdf60..723f7c66766 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -181,6 +181,7 @@ static int smb_setup_request(struct smb_request *req)
 	req->rq_errno = 0;
 	req->rq_fragment = 0;
 	kfree(req->rq_trans2buffer);
+	req->rq_trans2buffer = NULL;
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From b74a2f0913694556a027795d2954d30523fac4c5 Mon Sep 17 00:00:00 2001
From: suzuki <suzuki@in.ibm.com>
Date: Fri, 16 Mar 2007 13:38:25 -0800
Subject: [PATCH] fix rescan_partitions to return errors properly

The only error code which comes from the partition checkers is -1, when
they finds an EIO.  As per the discussion, ENOMEM values were ignored,
as they might scare the users.

So, with the current code, we end up returning -1 and not EIO for the
ioctl() calls.  Which doesn't give any clue to the user of what went
wrong.

Signed-off-by: Suzuki K P <suzuki@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/partitions/check.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index e46d237b10f..8a7d0035ad7 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -541,7 +541,7 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 		return 0;
 	if (IS_ERR(state))	/* I/O error reading the partition table */
-		return PTR_ERR(state);
+		return -EIO;
 	for (p = 1; p < state->limit; p++) {
 		sector_t size = state->parts[p].size;
 		sector_t from = state->parts[p].from;
-- 
cgit v1.2.3-70-g09d2


From 89a09141df6ac1c3821fbe44ca8384eb37692965 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 16 Mar 2007 13:38:26 -0800
Subject: [PATCH] nfs: fix congestion control

The current NFS client congestion logic is severly broken, it marks the
backing device congested during each nfs_writepages() call but doesn't
mirror this in nfs_writepage() which makes for deadlocks.  Also it
implements its own waitqueue.

Replace this by a more regular congestion implementation that puts a cap on
the number of active writeback pages and uses the bdi congestion waitqueue.

Also always use an interruptible wait since it makes sense to be able to
SIGKILL the process even for mounts without 'intr'.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/super.c              |   4 +-
 fs/nfs/sysctl.c             |   8 +++
 fs/nfs/write.c              | 116 ++++++++++++++++++++++++++++----------------
 include/linux/backing-dev.h |   1 +
 include/linux/nfs_fs.h      |   1 +
 include/linux/nfs_fs_sb.h   |   1 +
 mm/backing-dev.c            |  16 ++++++
 7 files changed, 103 insertions(+), 44 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index bb516a2cfba..f1eae44b9a1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -151,10 +151,10 @@ int __init register_nfs_fs(void)
 	if (ret < 0)
 		goto error_0;
 
-#ifdef CONFIG_NFS_V4
 	ret = nfs_register_sysctl();
 	if (ret < 0)
 		goto error_1;
+#ifdef CONFIG_NFS_V4
 	ret = register_filesystem(&nfs4_fs_type);
 	if (ret < 0)
 		goto error_2;
@@ -165,9 +165,9 @@ int __init register_nfs_fs(void)
 #ifdef CONFIG_NFS_V4
 error_2:
 	nfs_unregister_sysctl();
+#endif
 error_1:
 	unregister_filesystem(&nfs_fs_type);
-#endif
 error_0:
 	return ret;
 }
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index fcdcafbb329..b62481dabae 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -50,6 +50,14 @@ static ctl_table nfs_cb_sysctls[] = {
 		.proc_handler	= &proc_dointvec_jiffies,
 		.strategy	= &sysctl_jiffies,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nfs_congestion_kb",
+		.data		= &nfs_congestion_kb,
+		.maxlen		= sizeof(nfs_congestion_kb),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index febdade9167..2867e6b7096 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/writeback.h>
+#include <linux/swap.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
@@ -38,7 +39,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    struct page *,
 					    unsigned int, unsigned int);
 static void nfs_mark_request_dirty(struct nfs_page *req);
-static int nfs_wait_on_write_congestion(struct address_space *, int);
 static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
 static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
@@ -48,8 +48,6 @@ static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
-static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
-
 struct nfs_write_data *nfs_commit_alloc(void)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
@@ -210,6 +208,40 @@ static int wb_priority(struct writeback_control *wbc)
 	return 0;
 }
 
+/*
+ * NFS congestion control
+ */
+
+int nfs_congestion_kb;
+
+#define NFS_CONGESTION_ON_THRESH 	(nfs_congestion_kb >> (PAGE_SHIFT-10))
+#define NFS_CONGESTION_OFF_THRESH	\
+	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
+
+static void nfs_set_page_writeback(struct page *page)
+{
+	if (!test_set_page_writeback(page)) {
+		struct inode *inode = page->mapping->host;
+		struct nfs_server *nfss = NFS_SERVER(inode);
+
+		if (atomic_inc_return(&nfss->writeback) >
+				NFS_CONGESTION_ON_THRESH)
+			set_bdi_congested(&nfss->backing_dev_info, WRITE);
+	}
+}
+
+static void nfs_end_page_writeback(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct nfs_server *nfss = NFS_SERVER(inode);
+
+	end_page_writeback(page);
+	if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) {
+		clear_bdi_congested(&nfss->backing_dev_info, WRITE);
+		congestion_end(WRITE);
+	}
+}
+
 /*
  * Find an associated nfs write request, and prepare to flush it out
  * Returns 1 if there was no write request, or if the request was
@@ -247,7 +279,7 @@ static int nfs_page_mark_flush(struct page *page)
 	spin_unlock(req_lock);
 	if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) {
 		nfs_mark_request_dirty(req);
-		set_page_writeback(page);
+		nfs_set_page_writeback(page);
 	}
 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfs_unlock_request(req);
@@ -302,13 +334,8 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	return err; 
 }
 
-/*
- * Note: causes nfs_update_request() to block on the assumption
- * 	 that the writeback is generated due to memory pressure.
- */
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct inode *inode = mapping->host;
 	int err;
 
@@ -317,20 +344,12 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	err = generic_writepages(mapping, wbc);
 	if (err)
 		return err;
-	while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) {
-		if (wbc->nonblocking)
-			return 0;
-		nfs_wait_on_write_congestion(mapping, 0);
-	}
 	err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc));
 	if (err < 0)
 		goto out;
 	nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
 	err = 0;
 out:
-	clear_bit(BDI_write_congested, &bdi->state);
-	wake_up_all(&nfs_write_congestion);
-	congestion_end(WRITE);
 	return err;
 }
 
@@ -360,7 +379,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 }
 
 /*
- * Insert a write request into an inode
+ * Remove a write request from an inode
  */
 static void nfs_inode_remove_request(struct nfs_page *req)
 {
@@ -531,10 +550,10 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, un
 }
 #endif
 
-static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
+static int nfs_wait_on_write_congestion(struct address_space *mapping)
 {
+	struct inode *inode = mapping->host;
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
-	DEFINE_WAIT(wait);
 	int ret = 0;
 
 	might_sleep();
@@ -542,31 +561,23 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
 	if (!bdi_write_congested(bdi))
 		return 0;
 
-	nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+	nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT);
 
-	if (intr) {
-		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
+	do {
+		struct rpc_clnt *clnt = NFS_CLIENT(inode);
 		sigset_t oldset;
 
 		rpc_clnt_sigmask(clnt, &oldset);
-		prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE);
-		if (bdi_write_congested(bdi)) {
-			if (signalled())
-				ret = -ERESTARTSYS;
-			else
-				schedule();
-		}
+		ret = congestion_wait_interruptible(WRITE, HZ/10);
 		rpc_clnt_sigunmask(clnt, &oldset);
-	} else {
-		prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE);
-		if (bdi_write_congested(bdi))
-			schedule();
-	}
-	finish_wait(&nfs_write_congestion, &wait);
+		if (ret == -ERESTARTSYS)
+			break;
+		ret = 0;
+	} while (bdi_write_congested(bdi));
+
 	return ret;
 }
 
-
 /*
  * Try to update any existing write request, or create one if there is none.
  * In order to match, the request's credentials must match those of
@@ -577,14 +588,15 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
 static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 		struct page *page, unsigned int offset, unsigned int bytes)
 {
-	struct inode *inode = page->mapping->host;
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page		*req, *new = NULL;
 	unsigned long		rqend, end;
 
 	end = offset + bytes;
 
-	if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR))
+	if (nfs_wait_on_write_congestion(mapping))
 		return ERR_PTR(-ERESTARTSYS);
 	for (;;) {
 		/* Loop over all inode entries and see if we find
@@ -727,7 +739,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 
 static void nfs_writepage_release(struct nfs_page *req)
 {
-	end_page_writeback(req->wb_page);
+	nfs_end_page_writeback(req->wb_page);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (!PageError(req->wb_page)) {
@@ -1042,12 +1054,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 		if (task->tk_status < 0) {
 			nfs_set_pageerror(page);
 			req->wb_context->error = task->tk_status;
-			end_page_writeback(page);
+			nfs_end_page_writeback(page);
 			nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
-		end_page_writeback(page);
+		nfs_end_page_writeback(page);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
@@ -1514,6 +1526,26 @@ int __init nfs_init_writepagecache(void)
 	if (nfs_commit_mempool == NULL)
 		return -ENOMEM;
 
+	/*
+	 * NFS congestion size, scale with available memory.
+	 *
+	 *  64MB:    8192k
+	 * 128MB:   11585k
+	 * 256MB:   16384k
+	 * 512MB:   23170k
+	 *   1GB:   32768k
+	 *   2GB:   46340k
+	 *   4GB:   65536k
+	 *   8GB:   92681k
+	 *  16GB:  131072k
+	 *
+	 * This allows larger machines to have larger/more transfers.
+	 * Limit the default to 256M
+	 */
+	nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+	if (nfs_congestion_kb > 256*1024)
+		nfs_congestion_kb = 256*1024;
+
 	return 0;
 }
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 7011d625559..f2542c24b32 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -93,6 +93,7 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
 void set_bdi_congested(struct backing_dev_info *bdi, int rw);
 long congestion_wait(int rw, long timeout);
+long congestion_wait_interruptible(int rw, long timeout);
 void congestion_end(int rw);
 
 #define bdi_cap_writeback_dirty(bdi) \
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 47aaa2c6673..e9ae0c6e2c6 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -415,6 +415,7 @@ extern void nfs_complete_unlink(struct dentry *);
 /*
  * linux/fs/nfs/write.c
  */
+extern int  nfs_congestion_kb;
 extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 95796e6924f..c95d5e64254 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -82,6 +82,7 @@ struct nfs_server {
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
+	atomic_t		writeback;	/* number of writeback pages */
 	int			flags;		/* various flags */
 	unsigned int		caps;		/* server capabilities */
 	unsigned int		rsize;		/* read size */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f50a2811f9d..e5de3781d3f 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -55,6 +55,22 @@ long congestion_wait(int rw, long timeout)
 }
 EXPORT_SYMBOL(congestion_wait);
 
+long congestion_wait_interruptible(int rw, long timeout)
+{
+	long ret;
+	DEFINE_WAIT(wait);
+	wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+	prepare_to_wait(wqh, &wait, TASK_INTERRUPTIBLE);
+	if (signal_pending(current))
+		ret = -ERESTARTSYS;
+	else
+		ret = io_schedule_timeout(timeout);
+	finish_wait(wqh, &wait);
+	return ret;
+}
+EXPORT_SYMBOL(congestion_wait_interruptible);
+
 /**
  * congestion_end - wake up sleepers on a congested backing_dev_info
  * @rw: READ or WRITE
-- 
cgit v1.2.3-70-g09d2


From 634707388baa440d9c9d082cfc4c950500c8952b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@fys.uio.no>
Date: Fri, 16 Mar 2007 13:38:28 -0800
Subject: [PATCH] nfs: nfs_getattr() can't call nfs_sync_mapping_range() for
 non-regular files

Looks like we need a check in nfs_getattr() for a regular file. It makes
no sense to call nfs_sync_mapping_range() on anything else. I think that
should fix your problem: it will stop the NFS client from interfering
with dirty pages on that inode's mapping.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Olof Johansson <olof@lixom.net>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index af53c02f473..93d046c85f5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -429,7 +429,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int err;
 
 	/* Flush out writes to the server in order to update c/mtime */
-	nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT);
+	if (S_ISREG(inode->i_mode))
+		nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT);
 
 	/*
 	 * We may force a getattr if the user cares about atime.
-- 
cgit v1.2.3-70-g09d2


From e5d480ff17f9220cd6198b5c91e8c75329f5594f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 16 Mar 2007 13:38:32 -0800
Subject: [PATCH] change misleading EFI partition support description

Remove the misleading "Presently only useful on the IA-64 platform" text
from the EFI partition Kconfig.

EFI partitions are also used by Apple on their Intel-based machines and
thus you need EFI partition support if you (for example) want to attach
such a machine in target disk mode.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/partitions/Kconfig | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index 74552c60b67..6e8bb66fe61 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -235,5 +235,4 @@ config EFI_PARTITION
 	select CRC32
 	help
 	  Say Y here if you would like to use hard disks under Linux which
-	  were partitioned using EFI GPT.  Presently only useful on the
-	  IA-64 platform.
+	  were partitioned using EFI GPT.
-- 
cgit v1.2.3-70-g09d2


From d1cabd63262707ad5d6bb730f25b7a2852734595 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@SteelEye.com>
Date: Fri, 16 Mar 2007 13:38:35 -0800
Subject: [PATCH] fix process crash caused by randomisation and 64k pages

This bug was seen on ppc64, but it could have occurred on any
architecture with a page size of 64k or above.  The problem is that in
fs/binfmt_elf.c:randomize_stack_top() randomizes the stack to within
0x7ff pages.  On 4k page machines, this is 8MB; on 64k page boxes, this
is 128MB.

The problem is that the new binary layout (selected in
arch_pick_mmap_layout) places the mapping segment 128MB or the stack
rlimit away from the top of the process memory, whichever is larger.  If
you chose an rlimit of less than 128MB (most defaults are in the 8Mb
range) then you can end up having your entire stack randomized away.

The fix is to make randomize_stack_top() only steal at most 8MB, which this
patch does.  However, I have to point out that even with this, your stack
rlimit might not be exactly what you get if it's > 128MB, because you're
still losing the random offset of up to 8MB.

The true fix should be to leave an explicit gap for the randomization plus
a buffer when determining mmap_base, but that would involve fixing all the
architectures.

Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 51db1182b27..a2fceba7ef8 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -507,7 +507,7 @@ out:
 #define INTERPRETER_ELF 2
 
 #ifndef STACK_RND_MASK
-#define STACK_RND_MASK 0x7ff		/* with 4K pages 8MB of VA */
+#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
 #endif
 
 static unsigned long randomize_stack_top(unsigned long stack_top)
-- 
cgit v1.2.3-70-g09d2


From 066fcb06d3e27c258bc229bb688ced2b16daa6c2 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 23 Mar 2007 00:45:08 +0000
Subject: [CIFS] Allow reset of file to ATTR_NORMAL when archive bit not set

When a file had a dos attribute of 0x1 (readonly - but dos attribute
of archive was not set) - doing chmod 0777 or equivalent would
try to set a dos attribute of 0 (which some servers ignore)
rather than ATTR_NORMAL (0x20) which most servers accept.
Does not affect servers which support the CIFS Unix Extensions.

Acked-by: Prasad Potluri <pvp@us.ibm.com>
Acked-by: Shirish Pargaonkar <shirishp@us.ibm.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES |  5 ++++-
 fs/cifs/inode.c | 15 ++++++++++++---
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1cbe5615993..5d1f4873d70 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -6,7 +6,10 @@ on smp system corrupts sequence number. Do not reread unnecessarily partial page
 (which we are about to overwrite anyway) when writing out file opened rw.
 When DOS attribute of file on non-Unix server's file changes on the server side
 from read-only back to read-write, reflect this change in default file mode
-(we had been leaving a file's mode read-only until the inode were reloaded)
+(we had been leaving a file's mode read-only until the inode were reloaded).
+Allow setting of attribute back to ATTR_NORMAL (removing readonly dos attribute
+when archive dos attribute not set and we are changing mode back to writeable
+on server which does not support the Unix Extensions).
 
 Version 1.47
 ------------
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index e75a844accd..f414526e476 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1196,6 +1196,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	struct cifsFileInfo *open_file = NULL;
 	FILE_BASIC_INFO time_buf;
 	int set_time = FALSE;
+	int set_dosattr = FALSE;
 	__u64 mode = 0xFFFFFFFFFFFFFFFFULL;
 	__u64 uid = 0xFFFFFFFFFFFFFFFFULL;
 	__u64 gid = 0xFFFFFFFFFFFFFFFFULL;
@@ -1332,15 +1333,23 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	else if (attrs->ia_valid & ATTR_MODE) {
 		rc = 0;
 		if ((mode & S_IWUGO) == 0) /* not writeable */ {
-			if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0)
+			if ((cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
+				set_dosattr = TRUE;
 				time_buf.Attributes =
 					cpu_to_le32(cifsInode->cifsAttrs |
 						    ATTR_READONLY);
+			}
 		} else if ((mode & S_IWUGO) == S_IWUGO) {
-			if (cifsInode->cifsAttrs & ATTR_READONLY)
+			if (cifsInode->cifsAttrs & ATTR_READONLY) {
+				set_dosattr = TRUE;
 				time_buf.Attributes =
 					cpu_to_le32(cifsInode->cifsAttrs &
 						    (~ATTR_READONLY));
+				/* Windows ignores set to zero */
+				if(time_buf.Attributes == 0)
+					time_buf.Attributes |= 
+						cpu_to_le32(ATTR_NORMAL);
+			}
 		}
 		/* BB to be implemented -
 		   via Windows security descriptors or streams */
@@ -1378,7 +1387,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 	} else
 		time_buf.ChangeTime = 0;
 
-	if (set_time || time_buf.Attributes) {
+	if (set_time || set_dosattr) {
 		time_buf.CreationTime = 0;	/* do not change */
 		/* In the future we should experiment - try setting timestamps
 		   via Handle (SetFileInfo) instead of by path */
-- 
cgit v1.2.3-70-g09d2


From b43376927ab0f7b64c4fb304568ecfaea10446e2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 22 Mar 2007 00:11:27 -0800
Subject: [PATCH] Make XFS workqueues nonfreezable

Since freezable workqueues are broken in 2.6.21-rc
(cf. http://marc.theaimsgroup.com/?l=linux-kernel&m=116855740612755,
http://marc.theaimsgroup.com/?l=linux-kernel&m=117261312523921&w=2)
it's better to change the only user of them, which is XFS, to use "normal"
nonfreezable workqueues.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: David Chinner <dgc@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/xfs/linux-2.6/xfs_buf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index e2bea6a661f..69e9e80735d 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1829,11 +1829,11 @@ xfs_buf_init(void)
 	if (!xfs_buf_zone)
 		goto out_free_trace_buf;
 
-	xfslogd_workqueue = create_freezeable_workqueue("xfslogd");
+	xfslogd_workqueue = create_workqueue("xfslogd");
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
-	xfsdatad_workqueue = create_freezeable_workqueue("xfsdatad");
+	xfsdatad_workqueue = create_workqueue("xfsdatad");
 	if (!xfsdatad_workqueue)
 		goto out_destroy_xfslogd_workqueue;
 
-- 
cgit v1.2.3-70-g09d2


From aa289b47231c95abe53a75223906fdfb79ae368e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 23 Mar 2007 00:10:00 -0700
Subject: [PATCH] FDPIC: fix the /proc/pid/stat representation of executable
 boundaries

Fix the /proc/pid/stat representation of executable boundaries.  It should
show the bounds of the executable, but instead shows the bounds of the
loader.

Before the patch is applied, the bug can be seen by examining, say, inetd:

	# ps | grep inetd
	  610         root          0   S   /usr/sbin/inetd -i
	# cat /proc/610/maps
	c0bb0000-c0bba788 r-xs 00000000 00:0b 14582157  /lib/ld-uClibc-0.9.28.so
	c3180000-c31dede4 r-xs 00000000 00:0b 14582179  /lib/libuClibc-0.9.28.so
	c328c000-c328ea00 rw-p 00008000 00:0b 14582157  /lib/ld-uClibc-0.9.28.so
	c3290000-c329b6c0 rw-p 00000000 00:00 0
	c32a0000-c32c0000 rwxp 00000000 00:00 0
	c32d4000-c32d8000 rw-p 00000000 00:00 0
	c3394000-c3398000 rw-p 00000000 00:00 0
	c3458000-c345f464 r-xs 00000000 00:0b 16384612  /usr/sbin/inetd
	c3470000-c34748f8 rw-p 00004000 00:0b 16384612  /usr/sbin/inetd
	c34cc000-c34d0000 rw-p 00000000 00:00 0
	c34d4000-c34d8000 rw-p 00000000 00:00 0
	c34d8000-c34dc000 rw-p 00000000 00:00 0
	# cat /proc/610/stat
	610 (inetd) S 1 610 610 0 -1 256 0 0 0 0 0 8 0 0 19 0 1 0 94392000718
	950272 0 4294967295 3233480704 3233523592 3274440352 3274439976
 	3273467584 0 0 4096 90115 3221712796 0 0 17 0 0 0 0

The code boundaries are 3233480704 to 3233523592, which are:

	(gdb) p/x 3233480704
	$1 = 0xc0bb0000
	(gdb) p/x 3233523592
	$2 = 0xc0bba788

Which corresponds to this line in the maps file:

	c0bb0000-c0bba788 r-xs 00000000 00:0b 14582157  /lib/ld-uClibc-0.9.28.so

Which is wrong.  After the patch is applied, the maps file is pretty much
identical (there's some minor shuffling of the location of some of the
anonymous VMAs), but the stat file is now:

	# cat /proc/610/stat
	610 (inetd) S 1 610 610 0 -1 256 0 0 0 0 0 7 0 0 18 0 1 0 94392000722
	950272 0 4294967295 3276111872 3276141668 3274440352 3274439976
	3273467584 0 0 4096 90115 3221712796 0 0 17 0 0 0 0

The code boundaries are then 3276111872 to 3276141668, which are:

	(gdb) p/x 3276111872
	$1 = 0xc3458000
	(gdb) p/x 3276141668
	$2 = 0xc345f464

And these correspond to this line in the maps file instead:

	c3458000-c345f464 r-xs 00000000 00:0b 16384612  /usr/sbin/inetd

Which is now correct.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf_fdpic.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 5810aa1339f..47d6d49d1fb 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -179,6 +179,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 	int executable_stack;
 	int retval, i;
 
+	kdebug("____ LOAD %d ____", current->pid);
+
 	memset(&exec_params, 0, sizeof(exec_params));
 	memset(&interp_params, 0, sizeof(interp_params));
 
@@ -941,8 +943,11 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
 
 		if (mm) {
 			if (phdr->p_flags & PF_X) {
-				mm->start_code = seg->addr;
-				mm->end_code = seg->addr + phdr->p_memsz;
+				if (!mm->start_code) {
+					mm->start_code = seg->addr;
+					mm->end_code = seg->addr +
+						phdr->p_memsz;
+				}
 			} else if (!mm->start_data) {
 				mm->start_data = seg->addr;
 #ifndef CONFIG_MMU
@@ -1123,8 +1128,10 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 
 		if (mm) {
 			if (phdr->p_flags & PF_X) {
-				mm->start_code = maddr;
-				mm->end_code = maddr + phdr->p_memsz;
+				if (!mm->start_code) {
+					mm->start_code = maddr;
+					mm->end_code = maddr + phdr->p_memsz;
+				}
 			} else if (!mm->start_data) {
 				mm->start_data = maddr;
 				mm->end_data = maddr + phdr->p_memsz;
-- 
cgit v1.2.3-70-g09d2


From 105fd108a66ceff2b0fb710582b97d61ee4c9d40 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 23 Mar 2007 00:10:02 -0700
Subject: [PATCH] "ext[34]: EA block reference count racing fix" performance
 fix

A little mistake in 8a2bfdcbfa441d8b0e5cb9c9a7f45f77f80da465 is making all
transactions synchronous, which reduces ext3 performance to comical levels.

Cc: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/xattr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 12f7dda1232..f58cbb26323 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -495,7 +495,8 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode,
 		BHDR(bh)->h_refcount = cpu_to_le32(
 				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
 		error = ext3_journal_dirty_metadata(handle, bh);
-		handle->h_sync = 1;
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
 		DQUOT_FREE_BLOCK(inode, 1);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
-- 
cgit v1.2.3-70-g09d2


From d32b687e2e16e8174e88f186c3bae39a13e57b3d Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 5 Mar 2007 02:49:48 +0100
Subject: 9p: make struct v9fs_cached_file_operations static

This patch makes te needlessly global struct v9fs_cached_file_operations
static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs_vfs.h | 1 -
 fs/9p/vfs_file.c | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 8ada4c5c5d7..6a82d39dc49 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -40,7 +40,6 @@
 extern struct file_system_type v9fs_fs_type;
 extern const struct address_space_operations v9fs_addr_operations;
 extern const struct file_operations v9fs_file_operations;
-extern const struct file_operations v9fs_cached_file_operations;
 extern const struct file_operations v9fs_dir_operations;
 extern struct dentry_operations v9fs_dentry_operations;
 extern struct dentry_operations v9fs_cached_dentry_operations;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 653dfa5b253..c7b67725384 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -42,6 +42,8 @@
 #include "v9fs_vfs.h"
 #include "fid.h"
 
+static const struct file_operations v9fs_cached_file_operations;
+
 /**
  * v9fs_file_open - open a file (or directory)
  * @inode: inode to be opened
@@ -245,7 +247,7 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	return total;
 }
 
-const struct file_operations v9fs_cached_file_operations = {
+static const struct file_operations v9fs_cached_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = do_sync_read,
 	.aio_read = generic_file_aio_read,
-- 
cgit v1.2.3-70-g09d2


From 78062cb2e54ffe0df811dce5e68b54da9b8c9025 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Thu, 22 Mar 2007 17:01:07 -0700
Subject: ocfs2_dlm: Fix lockres ref counting bug

During umount, the umount thread migrates the lockres' and the dlm_thread
frees the empty lockres'. Due to a race, the reference counting on the
lockres goes awry leading to extra puts.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmdomain.c |  6 ++++--
 fs/ocfs2/dlm/dlmthread.c | 10 ++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6087c4749fe..dbbac184c26 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -138,8 +138,10 @@ static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm);
 
 void __dlm_unhash_lockres(struct dlm_lock_resource *lockres)
 {
-	hlist_del_init(&lockres->hash_node);
-	dlm_lockres_put(lockres);
+	if (!hlist_unhashed(&lockres->hash_node)) {
+		hlist_del_init(&lockres->hash_node);
+		dlm_lockres_put(lockres);
+	}
 }
 
 void __dlm_insert_lockres(struct dlm_ctxt *dlm,
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 6421a8fae1d..2b264c6ba03 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -256,20 +256,14 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
 			break;
 		}
 
-		mlog(0, "removing lockres %.*s:%p from purgelist\n",
-		     lockres->lockname.len, lockres->lockname.name, lockres);
-		list_del_init(&lockres->purge);
-		dlm_lockres_put(lockres);
-		dlm->purge_count--;
+		dlm_lockres_get(lockres);
 
 		/* This may drop and reacquire the dlm spinlock if it
 		 * has to do migration. */
-		mlog(0, "calling dlm_purge_lockres!\n");
-		dlm_lockres_get(lockres);
 		if (dlm_purge_lockres(dlm, lockres))
 			BUG();
+
 		dlm_lockres_put(lockres);
-		mlog(0, "DONE calling dlm_purge_lockres!\n");
 
 		/* Avoid adding any scheduling latencies */
 		cond_resched_lock(&dlm->spinlock);
-- 
cgit v1.2.3-70-g09d2


From 2f5bf1f2d061dea5146aa283685ce2b00cea2f3d Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Thu, 22 Mar 2007 17:08:32 -0700
Subject: ocfs2_dlm: Check for migrateable lockres in dlm_empty_lockres()

In dlm_migrate_lockres(), we check upfront whether the lockres is a
candidate for migration. This patch encapsulates that code in a separate
function so that dlm_empty_lockres() can also use it during umount. This
patch addresses the umount process spinning problem.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmdomain.c |  2 +
 fs/ocfs2/dlm/dlmmaster.c | 99 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 67 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index dbbac184c26..c558442a0b4 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -657,6 +657,8 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
 		dlm_kick_thread(dlm, NULL);
 
 		while (dlm_migrate_all_locks(dlm)) {
+			/* Give dlm_thread time to purge the lockres' */
+			msleep(500);
 			mlog(0, "%s: more migration to do\n", dlm->name);
 		}
 		dlm_mark_domain_leaving(dlm);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9229e04362f..6edffca99d9 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2424,6 +2424,57 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
 	dlm_lockres_put(res);
 }
 
+/* Checks whether the lockres can be migrated. Returns 0 if yes, < 0
+ * if not. If 0, numlocks is set to the number of locks in the lockres.
+ */
+static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
+				      struct dlm_lock_resource *res,
+				      int *numlocks)
+{
+	int ret;
+	int i;
+	int count = 0;
+	struct list_head *queue, *iter;
+	struct dlm_lock *lock;
+
+	assert_spin_locked(&res->spinlock);
+
+	ret = -EINVAL;
+	if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
+		mlog(0, "cannot migrate lockres with unknown owner!\n");
+		goto leave;
+	}
+
+	if (res->owner != dlm->node_num) {
+		mlog(0, "cannot migrate lockres this node doesn't own!\n");
+		goto leave;
+	}
+
+	ret = 0;
+	queue = &res->granted;
+	for (i = 0; i < 3; i++) {
+		list_for_each(iter, queue) {
+			lock = list_entry(iter, struct dlm_lock, list);
+			++count;
+			if (lock->ml.node == dlm->node_num) {
+				mlog(0, "found a lock owned by this node still "
+				     "on the %s queue!  will not migrate this "
+				     "lockres\n", (i == 0 ? "granted" :
+						   (i == 1 ? "converting" :
+						    "blocked")));
+				ret = -ENOTEMPTY;
+				goto leave;
+			}
+		}
+		queue++;
+	}
+
+	*numlocks = count;
+	mlog(0, "migrateable lockres having %d locks\n", *numlocks);
+
+leave:
+	return ret;
+}
 
 /*
  * DLM_MIGRATE_LOCKRES
@@ -2437,14 +2488,12 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	struct dlm_master_list_entry *mle = NULL;
 	struct dlm_master_list_entry *oldmle = NULL;
  	struct dlm_migratable_lockres *mres = NULL;
-	int ret = -EINVAL;
+	int ret = 0;
 	const char *name;
 	unsigned int namelen;
 	int mle_added = 0;
-	struct list_head *queue, *iter;
-	int i;
-	struct dlm_lock *lock;
-	int empty = 1, wake = 0;
+	int numlocks;
+	int wake = 0;
 
 	if (!dlm_grab(dlm))
 		return -EINVAL;
@@ -2458,42 +2507,16 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	 * ensure this lockres is a proper candidate for migration
 	 */
 	spin_lock(&res->spinlock);
-	if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
-		mlog(0, "cannot migrate lockres with unknown owner!\n");
-		spin_unlock(&res->spinlock);
-		goto leave;
-	}
-	if (res->owner != dlm->node_num) {
-		mlog(0, "cannot migrate lockres this node doesn't own!\n");
+	ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
+	if (ret < 0) {
 		spin_unlock(&res->spinlock);
 		goto leave;
 	}
-	mlog(0, "checking queues...\n");
-	queue = &res->granted;
-	for (i=0; i<3; i++) {
-		list_for_each(iter, queue) {
-			lock = list_entry (iter, struct dlm_lock, list);
-			empty = 0;
-			if (lock->ml.node == dlm->node_num) {
-				mlog(0, "found a lock owned by this node "
-				     "still on the %s queue!  will not "
-				     "migrate this lockres\n",
-				     i==0 ? "granted" :
-				     (i==1 ? "converting" : "blocked"));
-				spin_unlock(&res->spinlock);
-				ret = -ENOTEMPTY;
-				goto leave;
-			}
-		}
-		queue++;
-	}
-	mlog(0, "all locks on this lockres are nonlocal.  continuing\n");
 	spin_unlock(&res->spinlock);
 
 	/* no work to do */
-	if (empty) {
+	if (numlocks == 0) {
 		mlog(0, "no locks were found on this lockres! done!\n");
-		ret = 0;
 		goto leave;
 	}
 
@@ -2729,6 +2752,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 {
 	int ret;
 	int lock_dropped = 0;
+	int numlocks;
 
 	spin_lock(&res->spinlock);
 	if (res->owner != dlm->node_num) {
@@ -2740,6 +2764,13 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 		spin_unlock(&res->spinlock);
 		goto leave;
 	}
+
+	/* No need to migrate a lockres having no locks */
+	ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
+	if (ret >= 0 && numlocks == 0) {
+		spin_unlock(&res->spinlock);
+		goto leave;
+	}
 	spin_unlock(&res->spinlock);
 
 	/* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
-- 
cgit v1.2.3-70-g09d2


From 485ddb4b9741bafb70b22e5c1f9b4f37dc3e85bd Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 27 Mar 2007 08:55:08 +0200
Subject: 1/2 splice: dont steal

Stealing pages with splice is problematic because we cannot just insert
an uptodate page into the pagecache and hope the filesystem can take care
of it later.

We also cannot just ClearPageUptodate, then hope prepare_write does not
write anything into the page, because I don't think prepare_write gives
that guarantee.

Remove support for SPLICE_F_MOVE for now. If we really want to bring it
back, we might be able to do so with a the new filesystem buffered write
aops APIs I'm working on. If we really don't want to bring it back, then
we should decide that sooner rather than later, and remove the flag and
all the stealing infrastructure before anybody starts using it.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c | 101 +++++++++++++++++++++++-------------------------------------
 1 file changed, 38 insertions(+), 63 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 2fca6ebf4cc..badc78ff124 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -576,76 +576,51 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 	if (this_len + offset > PAGE_CACHE_SIZE)
 		this_len = PAGE_CACHE_SIZE - offset;
 
-	/*
-	 * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full
-	 * page.
-	 */
-	if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) {
+find_page:
+	page = find_lock_page(mapping, index);
+	if (!page) {
+		ret = -ENOMEM;
+		page = page_cache_alloc_cold(mapping);
+		if (unlikely(!page))
+			goto out_ret;
+
 		/*
-		 * If steal succeeds, buf->page is now pruned from the
-		 * pagecache and we can reuse it. The page will also be
-		 * locked on successful return.
+		 * This will also lock the page
 		 */
-		if (buf->ops->steal(pipe, buf))
-			goto find_page;
-
-		page = buf->page;
-		if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) {
-			unlock_page(page);
-			goto find_page;
-		}
-
-		page_cache_get(page);
-
-		if (!(buf->flags & PIPE_BUF_FLAG_LRU))
-			lru_cache_add(page);
-	} else {
-find_page:
-		page = find_lock_page(mapping, index);
-		if (!page) {
-			ret = -ENOMEM;
-			page = page_cache_alloc_cold(mapping);
-			if (unlikely(!page))
-				goto out_ret;
+		ret = add_to_page_cache_lru(page, mapping, index,
+					    GFP_KERNEL);
+		if (unlikely(ret))
+			goto out;
+	}
 
-			/*
-			 * This will also lock the page
-			 */
-			ret = add_to_page_cache_lru(page, mapping, index,
-						    GFP_KERNEL);
+	/*
+	 * We get here with the page locked. If the page is also
+	 * uptodate, we don't need to do more. If it isn't, we
+	 * may need to bring it in if we are not going to overwrite
+	 * the full page.
+	 */
+	if (!PageUptodate(page)) {
+		if (this_len < PAGE_CACHE_SIZE) {
+			ret = mapping->a_ops->readpage(file, page);
 			if (unlikely(ret))
 				goto out;
-		}
 
-		/*
-		 * We get here with the page locked. If the page is also
-		 * uptodate, we don't need to do more. If it isn't, we
-		 * may need to bring it in if we are not going to overwrite
-		 * the full page.
-		 */
-		if (!PageUptodate(page)) {
-			if (this_len < PAGE_CACHE_SIZE) {
-				ret = mapping->a_ops->readpage(file, page);
-				if (unlikely(ret))
-					goto out;
-
-				lock_page(page);
-
-				if (!PageUptodate(page)) {
-					/*
-					 * Page got invalidated, repeat.
-					 */
-					if (!page->mapping) {
-						unlock_page(page);
-						page_cache_release(page);
-						goto find_page;
-					}
-					ret = -EIO;
-					goto out;
+			lock_page(page);
+
+			if (!PageUptodate(page)) {
+				/*
+				 * Page got invalidated, repeat.
+				 */
+				if (!page->mapping) {
+					unlock_page(page);
+					page_cache_release(page);
+					goto find_page;
 				}
-			} else
-				SetPageUptodate(page);
-		}
+				ret = -EIO;
+				goto out;
+			}
+		} else
+			SetPageUptodate(page);
 	}
 
 	ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
-- 
cgit v1.2.3-70-g09d2


From 08c72591636829d40bd695d43ec6d2a8191b668b Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Tue, 27 Mar 2007 08:55:39 +0200
Subject: 2/2 splice: dont readpage

Splice does not need to readpage to bring the page uptodate before writing
to it, because prepare_write will take care of that for us.

Splice is also wrong to SetPageUptodate before the page is actually uptodate.
This results in the old uninitialised memory leak. This gets fixed as a
matter of course when removing the readpage logic.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c | 30 ------------------------------
 1 file changed, 30 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index badc78ff124..ae50208e3e6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -593,36 +593,6 @@ find_page:
 			goto out;
 	}
 
-	/*
-	 * We get here with the page locked. If the page is also
-	 * uptodate, we don't need to do more. If it isn't, we
-	 * may need to bring it in if we are not going to overwrite
-	 * the full page.
-	 */
-	if (!PageUptodate(page)) {
-		if (this_len < PAGE_CACHE_SIZE) {
-			ret = mapping->a_ops->readpage(file, page);
-			if (unlikely(ret))
-				goto out;
-
-			lock_page(page);
-
-			if (!PageUptodate(page)) {
-				/*
-				 * Page got invalidated, repeat.
-				 */
-				if (!page->mapping) {
-					unlock_page(page);
-					page_cache_release(page);
-					goto find_page;
-				}
-				ret = -EIO;
-				goto out;
-			}
-		} else
-			SetPageUptodate(page);
-	}
-
 	ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
 	if (unlikely(ret)) {
 		loff_t isize = i_size_read(mapping->host);
-- 
cgit v1.2.3-70-g09d2


From 40bee44eaef91b6030037c8bb47f909181fb1edc Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 21 Mar 2007 13:11:02 +0100
Subject: Export __splice_from_pipe()

Ocfs2 wants to implement it's own splice write actor so that it can better
manage cluster / page locks. This lets us re-use the rest of splice write
while only providing our own code where it's actually important.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c               | 7 ++++---
 include/linux/pipe_fs_i.h | 4 ++++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index ae50208e3e6..07f6556add0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -651,9 +651,9 @@ out_ret:
  * key here is the 'actor' worker passed in that actually moves the data
  * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
  */
-static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
-				  struct file *out, loff_t *ppos, size_t len,
-				  unsigned int flags, splice_actor *actor)
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
+			   struct file *out, loff_t *ppos, size_t len,
+			   unsigned int flags, splice_actor *actor)
 {
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
@@ -747,6 +747,7 @@ static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 
 	return ret;
 }
+EXPORT_SYMBOL(__splice_from_pipe);
 
 ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 			 loff_t *ppos, size_t len, unsigned int flags,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 2e19478e9e8..8bcbc54e1b4 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -99,4 +99,8 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
 				loff_t *, size_t, unsigned int,
 				splice_actor *);
 
+extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct file *,
+				  loff_t *, size_t, unsigned int,
+				  splice_actor *);
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 598b9a56373f0596ed48f7af730706aaee998d5f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 26 Mar 2007 21:32:08 -0800
Subject: [PATCH] knfsd: allow nfsd READDIR to return 64bit cookies

->readdir passes lofft_t offsets (used as nfs cookies) to
nfs3svc_encode_entry{,_plus}, but when they pass it on to encode_entry it
becomes an 'off_t', which isn't good.

So filesystems that returned 64bit offsets would lose.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/nfs3xdr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 6f677988c71..7e4bb0af24d 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -859,8 +859,8 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
 #define NFS3_ENTRY_BAGGAGE	(2 + 1 + 2 + 1)
 #define NFS3_ENTRYPLUS_BAGGAGE	(1 + 21 + 1 + (NFS3_FHSIZE >> 2))
 static int
-encode_entry(struct readdir_cd *ccd, const char *name,
-	     int namlen, off_t offset, ino_t ino, unsigned int d_type, int plus)
+encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
+	     loff_t offset, ino_t ino, unsigned int d_type, int plus)
 {
 	struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres,
 		       					common);
@@ -880,7 +880,7 @@ encode_entry(struct readdir_cd *ccd, const char *name,
 			*cd->offset1 = htonl(offset64 & 0xffffffff);
 			cd->offset1 = NULL;
 		} else {
-			xdr_encode_hyper(cd->offset, (u64) offset);
+			xdr_encode_hyper(cd->offset, offset64);
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 54c044094947826105317dadd01deca083627ea1 Mon Sep 17 00:00:00 2001
From: Bruce Fields <bfields@citi.umich.edu>
Date: Mon, 26 Mar 2007 21:32:09 -0800
Subject: [PATCH] knfsd: nfsd4: fix inheritance flags on v4 ace derived from
 posix default ace

A regression introduced in the last set of acl patches removed the
INHERIT_ONLY flag from aces derived from the posix acl.  Fix.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/nfs4acl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 832673b1458..673a53c014a 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -228,7 +228,7 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
 	struct posix_acl_summary pas;
 	unsigned short deny;
 	int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ?
-					NFS4_INHERITANCE_FLAGS : 0);
+		NFS4_INHERITANCE_FLAGS | NFS4_ACE_INHERIT_ONLY_ACE : 0);
 
 	BUG_ON(pacl->a_count < 3);
 	summarize_posix_acl(pacl, &pas);
-- 
cgit v1.2.3-70-g09d2


From 21315edd4877b593d5bf17a601a48fc836b8ba58 Mon Sep 17 00:00:00 2001
From: Bruce Fields <bfields@citi.umich.edu>
Date: Mon, 26 Mar 2007 21:32:09 -0800
Subject: [PATCH] knfsd: nfsd4: demote "clientid in use" printk to a dprintk

The reused clientid here is a more of a problem for the client than the
server, and the client can report the problem itself if it's serious.

Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/nfs4state.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9e406799920..0b03c1ed21c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -750,9 +750,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		status = nfserr_clid_inuse;
 		if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
 				|| conf->cl_addr != sin->sin_addr.s_addr) {
-			printk("NFSD: setclientid: string in use by client"
-			"(clientid %08x/%08x)\n",
-			conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id);
+			dprintk("NFSD: setclientid: string in use by client"
+				"at %u.%u.%u.%u\n", NIPQUAD(conf->cl_addr));
 			goto out;
 		}
 	}
-- 
cgit v1.2.3-70-g09d2


From 79f6523a16b2010969418f8df25fe61498dec66b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Mon, 26 Mar 2007 21:32:10 -0800
Subject: [PATCH] knfsd: nfsd4: remove superfluous cancel_delayed_work() call

This cancel_delayed_work call is called from a function that is only called
from a piece of code that immediate follows a cancel and destruction of the
workqueue, so it's clearly a mistake.

Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfsd/nfs4state.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0b03c1ed21c..af360705e55 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3260,7 +3260,6 @@ __nfs4_state_shutdown(void)
 		unhash_delegation(dp);
 	}
 
-	cancel_delayed_work(&laundromat_work);
 	nfsd4_shutdown_recdir();
 	nfs4_init = 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 5c46010af210712c8a390c7fe50ff05448983061 Mon Sep 17 00:00:00 2001
From: Mika Kukkonen <mikukkon@iki.fi>
Date: Mon, 26 Mar 2007 21:32:33 -0800
Subject: [PATCH] Fix kernel build with EMBEDDED & PROC_FS & !PROC_SYSCTL

Without attached patch against current -git I get following with
!PROC_SYSCTL (with EMBEDDED and PROC_FS set):

    CC      init/version.o
    LD      init/built-in.o
    LD      vmlinux
  fs/built-in.o: In function `do_proc_sys_lookup':
  proc_sysctl.c:(.text+0x26583): undefined reference to `sysctl_head_next'
  fs/built-in.o: In function `proc_sys_revalidate':
  proc_sysctl.c:(.text+0x265bb): undefined reference to `sysctl_head_finish'
  fs/built-in.o: In function `proc_sys_readdir':
  proc_sysctl.c:(.text+0x26720): undefined reference to `sysctl_head_next'
  proc_sysctl.c:(.text+0x267d8): undefined reference to `sysctl_head_finish'
  proc_sysctl.c:(.text+0x268e7): undefined reference to `sysctl_head_next'
  proc_sysctl.c:(.text+0x26910): undefined reference to `sysctl_head_finish'
  fs/built-in.o: In function `proc_sys_write':
  proc_sysctl.c:(.text+0x2695d): undefined reference to `sysctl_perm'
  proc_sysctl.c:(.text+0x2699c): undefined reference to `sysctl_head_finish'
  fs/built-in.o: In function `proc_sys_read':
  proc_sysctl.c:(.text+0x269e9): undefined reference to `sysctl_perm'
  proc_sysctl.c:(.text+0x26a25): undefined reference to `sysctl_head_finish'
  fs/built-in.o: In function `proc_sys_permission':
  proc_sysctl.c:(.text+0x26ad1): undefined reference to `sysctl_perm'
  proc_sysctl.c:(.text+0x26adb): undefined reference to `sysctl_head_finish'
  fs/built-in.o: In function `proc_sys_lookup':
  proc_sysctl.c:(.text+0x26b39): undefined reference to `sysctl_head_finish'
  make: *** [vmlinux] Virhe 1

All those functions are in fs/proc/proc_sysctl.c, which has no CONFIG_
#define's in it, so the patch makes the compilation of that file to depend
on CONFIG_PROC_SYSCTL (the simplest choice).

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index a6b3a8f878f..bce38e3f06c 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,8 +8,9 @@ proc-y			:= nommu.o task_nommu.o
 proc-$(CONFIG_MMU)	:= mmu.o task_mmu.o
 
 proc-y       += inode.o root.o base.o generic.o array.o \
-		proc_tty.o proc_misc.o proc_sysctl.o
+		proc_tty.o proc_misc.o
 
+proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
 proc-$(CONFIG_PROC_KCORE)	+= kcore.o
 proc-$(CONFIG_PROC_VMCORE)	+= vmcore.o
 proc-$(CONFIG_PROC_DEVICETREE)	+= proc_devtree.o
-- 
cgit v1.2.3-70-g09d2


From ed4bb1063171b2f44a40b0a9c400dedb0590dce6 Mon Sep 17 00:00:00 2001
From: Jean Tourrilhes <jt@hpl.hp.com>
Date: Fri, 23 Mar 2007 00:26:49 +0000
Subject: [PATCH] wext: Add missing ioctls to 64<->32 conversion

 	Johannes Berg and Michael Buesch noticed that the WPA ioctls
were missing from the 64<->32 bit conversion. This means that when
using a 32 bits userspace on a 64 bit kernel, those ioctls fail.

Signed-off-by: Jean Tourrilhes <jt@hpl.hp.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 fs/compat_ioctl.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c81c958b3e1..8b1c5d8bf4e 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2553,11 +2553,15 @@ HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl)
 HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl)
 /* wireless */
 HANDLE_IOCTL(SIOCGIWRANGE, do_wireless_ioctl)
+HANDLE_IOCTL(SIOCGIWPRIV, do_wireless_ioctl)
+HANDLE_IOCTL(SIOCGIWSTATS, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCSIWSPY, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCGIWSPY, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCSIWTHRSPY, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCGIWTHRSPY, do_wireless_ioctl)
+HANDLE_IOCTL(SIOCSIWMLME, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCGIWAPLIST, do_wireless_ioctl)
+HANDLE_IOCTL(SIOCSIWSCAN, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCGIWSCAN, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCSIWESSID, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCGIWESSID, do_wireless_ioctl)
@@ -2565,6 +2569,11 @@ HANDLE_IOCTL(SIOCSIWNICKN, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCGIWNICKN, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl)
+HANDLE_IOCTL(SIOCSIWGENIE, do_wireless_ioctl)
+HANDLE_IOCTL(SIOCGIWGENIE, do_wireless_ioctl)
+HANDLE_IOCTL(SIOCSIWENCODEEXT, do_wireless_ioctl)
+HANDLE_IOCTL(SIOCGIWENCODEEXT, do_wireless_ioctl)
+HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl)
 HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
 HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
 HANDLE_IOCTL(RTC_IRQP_READ32, rtc_ioctl)
-- 
cgit v1.2.3-70-g09d2


From 28defbea64622f69d65a6079bf800cedb9915a5f Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Tue, 27 Mar 2007 15:44:01 -0700
Subject: [PATCH] aio: remove bare user-triggerable error printk

The user can generate console output if they cause do_mmap() to fail
during sys_io_setup().  This was seen in a regression test that does
exactly that by spinning calling mmap() until it gets -ENOMEM before
calling io_setup().

We don't need this printk at all, just remove it.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 0b4ee0a5c83..e4598d6d49d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -136,7 +136,6 @@ static int aio_setup_ring(struct kioctx *ctx)
 				  0);
 	if (IS_ERR((void *)info->mmap_base)) {
 		up_write(&ctx->mm->mmap_sem);
-		printk("mmap err: %ld\n", -info->mmap_base);
 		info->mmap_size = 0;
 		aio_free_ring(ctx);
 		return -EAGAIN;
-- 
cgit v1.2.3-70-g09d2


From d9993c37ef87c758d4a6e63972395b1cf8a4cb7b Mon Sep 17 00:00:00 2001
From: Dmitriy Monakhov <dmonakhov@sw.ru>
Date: Thu, 29 Mar 2007 14:24:09 +0200
Subject: [PATCH] splice: partial write fix

Currently if partial write has happened while ->commit_write() then page
wasn't marked as accessed and rebalanced.

Signed-off-by: Monakhov Dmitriy <dmonakhov@openvz.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index 07f6556add0..5428b0ff3b6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -627,18 +627,25 @@ find_page:
 	}
 
 	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
-	if (!ret) {
+	if (ret) {
+		if (ret == AOP_TRUNCATED_PAGE) {
+			page_cache_release(page);
+			goto find_page;
+		}
+		if (ret < 0)
+			goto out;
 		/*
-		 * Return the number of bytes written and mark page as
-		 * accessed, we are now done!
+		 * Partial write has happened, so 'ret' already initialized by
+		 * number of bytes written, Where is nothing we have to do here.
 		 */
+	} else
 		ret = this_len;
-		mark_page_accessed(page);
-		balance_dirty_pages_ratelimited(mapping);
-	} else if (ret == AOP_TRUNCATED_PAGE) {
-		page_cache_release(page);
-		goto find_page;
-	}
+	/*
+	 * Return the number of bytes written and mark page as
+	 * accessed, we are now done!
+	 */
+	mark_page_accessed(page);
+	balance_dirty_pages_ratelimited(mapping);
 out:
 	page_cache_release(page);
 	unlock_page(page);
-- 
cgit v1.2.3-70-g09d2


From 622e696938c6a9c5357d2ec4a07ed2f27d56925a Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Thu, 29 Mar 2007 01:20:32 -0700
Subject: [PATCH] uml: fix compilation problems

Fix a few miscellaneous compilation problems -
	an assignment with mismatched types in ldt.c
	a missing include in mconsole.h which needs a definition of uml_pt_regs
	I missed removing an include of user_util.h in hostfs

Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/um/include/mconsole.h | 2 ++
 arch/um/sys-i386/ldt.c     | 3 ++-
 fs/hostfs/hostfs_kern.c    | 1 -
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/arch/um/include/mconsole.h b/arch/um/include/mconsole.h
index 2666815b6af..b282839c162 100644
--- a/arch/um/include/mconsole.h
+++ b/arch/um/include/mconsole.h
@@ -12,6 +12,8 @@
 #define u32 uint32_t
 #endif
 
+#include "sysdep/ptrace.h"
+
 #define MCONSOLE_MAGIC (0xcafebabe)
 #define MCONSOLE_MAX_DATA (512)
 #define MCONSOLE_VERSION 2
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index 4a8b4202ef9..a939a7ef022 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -394,7 +394,8 @@ static short * host_ldt_entries = NULL;
 static void ldt_get_host_info(void)
 {
 	long ret;
-	struct ldt_entry * ldt, *tmp;
+	struct ldt_entry * ldt;
+	short *tmp;
 	int i, size, k, order;
 
 	spin_lock(&host_ldt_lock);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 9baf69773ed..a2667db9f6b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -20,7 +20,6 @@
 #include "hostfs.h"
 #include "kern_util.h"
 #include "kern.h"
-#include "user_util.h"
 #include "init.h"
 
 struct hostfs_inode_info {
-- 
cgit v1.2.3-70-g09d2


From 75e8defbe4236a358b1396bc6d9a1231e5eca225 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 29 Mar 2007 01:20:33 -0700
Subject: [PATCH] uml: hostfs variable renaming

* rename name to host_root_path
* rename data to req_root.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hostfs/hostfs_kern.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index a2667db9f6b..fd301a91012 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -938,7 +938,7 @@ static const struct address_space_operations hostfs_link_aops = {
 static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 {
 	struct inode *root_inode;
-	char *name, *data = d;
+	char *host_root_path, *req_root = d;
 	int err;
 
 	sb->s_blocksize = 1024;
@@ -947,16 +947,16 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	sb->s_op = &hostfs_sbops;
 
 	/* NULL is printed as <NULL> by sprintf: avoid that. */
-	if (data == NULL)
-		data = "";
+	if (req_root == NULL)
+		req_root = "";
 
 	err = -ENOMEM;
-	name = kmalloc(strlen(root_ino) + 1
-			+ strlen(data) + 1, GFP_KERNEL);
-	if(name == NULL)
+	host_root_path = kmalloc(strlen(root_ino) + 1
+				 + strlen(req_root) + 1, GFP_KERNEL);
+	if(host_root_path == NULL)
 		goto out;
 
-	sprintf(name, "%s/%s", root_ino, data);
+	sprintf(host_root_path, "%s/%s", root_ino, req_root);
 
 	root_inode = iget(sb, 0);
 	if(root_inode == NULL)
@@ -966,10 +966,10 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	if(err)
 		goto out_put;
 
-	HOSTFS_I(root_inode)->host_filename = name;
-	/* Avoid that in the error path, iput(root_inode) frees again name through
-	 * hostfs_destroy_inode! */
-	name = NULL;
+	HOSTFS_I(root_inode)->host_filename = host_root_path;
+	/* Avoid that in the error path, iput(root_inode) frees again
+	 * host_root_path through hostfs_destroy_inode! */
+	host_root_path = NULL;
 
 	err = -ENOMEM;
 	sb->s_root = d_alloc_root(root_inode);
@@ -989,7 +989,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
  out_put:
         iput(root_inode);
  out_free:
-	kfree(name);
+	kfree(host_root_path);
  out:
 	return(err);
 }
-- 
cgit v1.2.3-70-g09d2


From 05565b65a5309e3e5c86db1975b57f75661bee8f Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sun, 1 Apr 2007 23:49:35 -0700
Subject: [PATCH] proc: fix linkage with CONFIG_SYSCTL=y, CONFIG_PROC_SYSCTL=n

We're using #ifdef CONFIG_SYSCTL, but we should be using CONFIG_PROC_SYSCTL,
so we get

 fs/built-in.o: In function `proc_root_init':
 /usr/src/linux/fs/proc/root.c:83: undefined reference to `proc_sys_init'

Fix that up and remove an ifdef-in-C.

Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Helge Hafting <helgehaf@aitel.hist.no>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/internal.h | 4 ++++
 fs/proc/root.c     | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c932aa65e19..f771889183c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,7 +11,11 @@
 
 #include <linux/proc_fs.h>
 
+#ifdef CONFIG_PROC_SYSCTL
 extern int proc_sys_init(void);
+#else
+static inline void proc_sys_init(void) { }
+#endif
 
 struct vmalloc_info {
 	unsigned long	used;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 5834a744c2a..41f17037f73 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -79,9 +79,7 @@ void __init proc_root_init(void)
 	proc_device_tree_init();
 #endif
 	proc_bus = proc_mkdir("bus", NULL);
-#ifdef CONFIG_SYSCTL
 	proc_sys_init();
-#endif
 }
 
 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
-- 
cgit v1.2.3-70-g09d2


From 03221702608c60b470fc86a23bdf4bc30e5bd59f Mon Sep 17 00:00:00 2001
From: Brian Pomerantz <bapper@piratehaven.org>
Date: Sun, 1 Apr 2007 23:49:41 -0700
Subject: [PATCH] fix page leak during core dump

When the dump cannot occur most likely because of a full file system and
the page to be written is the zero page, the call to page_cache_release()
is missed.

Signed-off-by: Brian Pomerantz <bapper@mvista.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: David Howells <dhowells@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c       | 5 ++++-
 fs/binfmt_elf_fdpic.c | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index a2fceba7ef8..9cc4f0a8aaa 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1704,7 +1704,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 				DUMP_SEEK(PAGE_SIZE);
 			} else {
 				if (page == ZERO_PAGE(addr)) {
-					DUMP_SEEK(PAGE_SIZE);
+					if (!dump_seek(file, PAGE_SIZE)) {
+						page_cache_release(page);
+						goto end_coredump;
+					}
 				} else {
 					void *kaddr;
 					flush_cache_page(vma, addr,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 47d6d49d1fb..f3ddca4a387 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1480,8 +1480,8 @@ static int elf_fdpic_dump_segments(struct file *file, struct mm_struct *mm,
 				DUMP_SEEK(file->f_pos + PAGE_SIZE);
 			}
 			else if (page == ZERO_PAGE(addr)) {
-				DUMP_SEEK(file->f_pos + PAGE_SIZE);
 				page_cache_release(page);
+				DUMP_SEEK(file->f_pos + PAGE_SIZE);
 			}
 			else {
 				void *kaddr;
-- 
cgit v1.2.3-70-g09d2


From 1aa9b4b9bc10a0cf6e6109c2997d759a76e840e5 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sun, 1 Apr 2007 23:49:43 -0700
Subject: [PATCH] revert "retries in ext3_prepare_write() violate ordering
 requirements"

Revert e92a4d595b464c4aae64be39ca61a9ffe9c8b278.

Dmitry points out

"When we block_prepare_write() failed while ext3_prepare_write() we jump to
 "failure" label and call ext3_prepare_failure() witch search last mapped bh
 and invoke commit_write untill it.  This is wrong!!  because some bh from
 begining to the last mapped bh may be not uptodate.  As a result we commit to
 disk not uptodate page content witch contains garbage from previous usage."

and

"Unexpected file size increasing."

   Call trace the same as it was in first issue but result is different.
   For example we have file with i_size is zero.  we want write two blocks ,
   but fs has only one free block.

   ->ext3_prepare_write(...from == 0, to == 2048)
     retry:
     ->block_prepare_write() == -ENOSPC# we failed but allocated one block here.
     ->ext3_prepare_failure()
       ->commit_write( from == 0, to == 1024) # after this i_size becomes 1024 :)
     if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
        goto retry;

   Finally when all retries will be spended ext3_prepare_failure return
   -ENOSPC, but i_size was increased and later block trimm procedures can't
   help here.

We don't appear to have the horsepower to fix these issues, so let's put
things back the way they were for now.

Cc: Kirill Korotaev <dev@openvz.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ken Chen <kenneth.w.chen@intel.com>
Cc: Andrey Savochkin <saw@sw.ru>
Cc: <linux-ext4@vger.kernel.org>
Cc: Dmitriy Monakhov <dmonakhov@openvz.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/inode.c | 85 +++++++--------------------------------------------------
 1 file changed, 10 insertions(+), 75 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 8a824f4ce5c..a5b150f7e8a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1148,102 +1148,37 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext3_journal_get_write_access(handle, bh);
 }
 
-/*
- * The idea of this helper function is following:
- * if prepare_write has allocated some blocks, but not all of them, the
- * transaction must include the content of the newly allocated blocks.
- * This content is expected to be set to zeroes by block_prepare_write().
- * 2006/10/14  SAW
- */
-static int ext3_prepare_failure(struct file *file, struct page *page,
-				unsigned from, unsigned to)
-{
-	struct address_space *mapping;
-	struct buffer_head *bh, *head, *next;
-	unsigned block_start, block_end;
-	unsigned blocksize;
-	int ret;
-	handle_t *handle = ext3_journal_current_handle();
-
-	mapping = page->mapping;
-	if (ext3_should_writeback_data(mapping->host)) {
-		/* optimization: no constraints about data */
-skip:
-		return ext3_journal_stop(handle);
-	}
-
-	head = page_buffers(page);
-	blocksize = head->b_size;
-	for (	bh = head, block_start = 0;
-		bh != head || !block_start;
-	    	block_start = block_end, bh = next)
-	{
-		next = bh->b_this_page;
-		block_end = block_start + blocksize;
-		if (block_end <= from)
-			continue;
-		if (block_start >= to) {
-			block_start = to;
-			break;
-		}
-		if (!buffer_mapped(bh))
-		/* prepare_write failed on this bh */
-			break;
-		if (ext3_should_journal_data(mapping->host)) {
-			ret = do_journal_get_write_access(handle, bh);
-			if (ret) {
-				ext3_journal_stop(handle);
-				return ret;
-			}
-		}
-	/*
-	 * block_start here becomes the first block where the current iteration
-	 * of prepare_write failed.
-	 */
-	}
-	if (block_start <= from)
-		goto skip;
-
-	/* commit allocated and zeroed buffers */
-	return mapping->a_ops->commit_write(file, page, from, block_start);
-}
-
 static int ext3_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, ret2;
-	int needed_blocks = ext3_writepage_trans_blocks(inode);
+	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
 	handle = ext3_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle))
-		return PTR_ERR(handle);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out;
+	}
 	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
 		ret = nobh_prepare_write(page, from, to, ext3_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext3_get_block);
 	if (ret)
-		goto failure;
+		goto prepare_write_failed;
 
 	if (ext3_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
-		if (ret)
-			/* fatal error, just put the handle and return */
-			journal_stop(handle);
 	}
-	return ret;
-
-failure:
-	ret2 = ext3_prepare_failure(file, page, from, to);
-	if (ret2 < 0)
-		return ret2;
+prepare_write_failed:
+	if (ret)
+		ext3_journal_stop(handle);
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-	/* retry number exceeded, or other error like -EDQUOT */
+out:
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 7479d2b90b103f84d956a7177b3f99cbd472b345 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sun, 1 Apr 2007 23:49:44 -0700
Subject: [PATCH] revert "retries in ext4_prepare_write() violate ordering
 requirements"

Revert b46be05004abb419e303e66e143eed9f8a6e9f3f.  Same reasoning as for ext3.

Cc: Kirill Korotaev <dev@openvz.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Ken Chen <kenneth.w.chen@intel.com>
Cc: Andrey Savochkin <saw@sw.ru>
Cc: <linux-ext4@vger.kernel.org>
Cc: Dmitriy Monakhov <dmonakhov@openvz.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext4/inode.c | 85 +++++++--------------------------------------------------
 1 file changed, 10 insertions(+), 75 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fbff4b9e122..810b6d6474b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1147,102 +1147,37 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext4_journal_get_write_access(handle, bh);
 }
 
-/*
- * The idea of this helper function is following:
- * if prepare_write has allocated some blocks, but not all of them, the
- * transaction must include the content of the newly allocated blocks.
- * This content is expected to be set to zeroes by block_prepare_write().
- * 2006/10/14  SAW
- */
-static int ext4_prepare_failure(struct file *file, struct page *page,
-				unsigned from, unsigned to)
-{
-	struct address_space *mapping;
-	struct buffer_head *bh, *head, *next;
-	unsigned block_start, block_end;
-	unsigned blocksize;
-	int ret;
-	handle_t *handle = ext4_journal_current_handle();
-
-	mapping = page->mapping;
-	if (ext4_should_writeback_data(mapping->host)) {
-		/* optimization: no constraints about data */
-skip:
-		return ext4_journal_stop(handle);
-	}
-
-	head = page_buffers(page);
-	blocksize = head->b_size;
-	for (	bh = head, block_start = 0;
-		bh != head || !block_start;
-	    	block_start = block_end, bh = next)
-	{
-		next = bh->b_this_page;
-		block_end = block_start + blocksize;
-		if (block_end <= from)
-			continue;
-		if (block_start >= to) {
-			block_start = to;
-			break;
-		}
-		if (!buffer_mapped(bh))
-		/* prepare_write failed on this bh */
-			break;
-		if (ext4_should_journal_data(mapping->host)) {
-			ret = do_journal_get_write_access(handle, bh);
-			if (ret) {
-				ext4_journal_stop(handle);
-				return ret;
-			}
-		}
-	/*
-	 * block_start here becomes the first block where the current iteration
-	 * of prepare_write failed.
-	 */
-	}
-	if (block_start <= from)
-		goto skip;
-
-	/* commit allocated and zeroed buffers */
-	return mapping->a_ops->commit_write(file, page, from, block_start);
-}
-
 static int ext4_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, ret2;
-	int needed_blocks = ext4_writepage_trans_blocks(inode);
+	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
 	handle = ext4_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle))
-		return PTR_ERR(handle);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out;
+	}
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
 		ret = nobh_prepare_write(page, from, to, ext4_get_block);
 	else
 		ret = block_prepare_write(page, from, to, ext4_get_block);
 	if (ret)
-		goto failure;
+		goto prepare_write_failed;
 
 	if (ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
-		if (ret)
-			/* fatal error, just put the handle and return */
-			ext4_journal_stop(handle);
 	}
-	return ret;
-
-failure:
-	ret2 = ext4_prepare_failure(file, page, from, to);
-	if (ret2 < 0)
-		return ret2;
+prepare_write_failed:
+	if (ret)
+		ext4_journal_stop(handle);
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
-	/* retry number exceeded, or other error like -EDQUOT */
+out:
 	return ret;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 2363cc0264c42636e9e7622f78dde5c2f66beb8e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 4 Apr 2007 19:08:22 -0700
Subject: [PATCH] remove protection of LANANA-reserved majors

Revert all this.  It can cause device-mapper to receive a different major from
earlier kernels and it turns out that the Amanda backup program (via GNU tar,
apparently) checks major numbers on files when performing incremental backups.

Which is a bit broken of Amanda (or tar), but this feature isn't important
enough to justify the churn.

Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/genhd.c          |  2 --
 drivers/base/core.c    | 14 --------------
 fs/char_dev.c          |  2 --
 include/linux/kdev_t.h |  2 --
 4 files changed, 20 deletions(-)

(limited to 'fs')

diff --git a/block/genhd.c b/block/genhd.c
index 050a1f0f3a8..441432a142f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -62,8 +62,6 @@ int register_blkdev(unsigned int major, const char *name)
 	/* temporary */
 	if (major == 0) {
 		for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
-			if (is_lanana_major(index))
-				continue;
 			if (major_names[index] == NULL)
 				break;
 		}
diff --git a/drivers/base/core.c b/drivers/base/core.c
index ad0f4a2f25c..d7fcf823a42 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -27,20 +27,6 @@
 int (*platform_notify)(struct device * dev) = NULL;
 int (*platform_notify_remove)(struct device * dev) = NULL;
 
-/*
- * Detect the LANANA-assigned LOCAL/EXPERIMENTAL majors
- */
-bool is_lanana_major(unsigned int major)
-{
-	if (major >= 60 && major <= 63)
-		return 1;
-	if (major >= 120 && major <= 127)
-		return 1;
-	if (major >= 240 && major <= 254)
-		return 1;
-	return 0;
-}
-
 /*
  * sysfs bindings for devices.
  */
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 78ced721554..164a45cdaf5 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -109,8 +109,6 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 	/* temporary */
 	if (major == 0) {
 		for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) {
-			if (is_lanana_major(i))
-				continue;
 			if (chrdevs[i] == NULL)
 				break;
 		}
diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h
index 4c2c3737e41..2dacab8becc 100644
--- a/include/linux/kdev_t.h
+++ b/include/linux/kdev_t.h
@@ -87,8 +87,6 @@ static inline unsigned sysv_minor(u32 dev)
 	return dev & 0x3ffff;
 }
 
-bool is_lanana_major(unsigned int major);
-
 #else /* __KERNEL__ */
 
 /*
-- 
cgit v1.2.3-70-g09d2


From a5bfffac645a7b2d8119f8bbae34df5c94832799 Mon Sep 17 00:00:00 2001
From: Timo Savola <tsavola@movial.fi>
Date: Sun, 8 Apr 2007 16:04:00 -0700
Subject: [PATCH] fuse: validate rootmode mount option

If rootmode isn't valid, we hit the BUG() in fuse_init_inode.  Now
EINVAL is returned.

Signed-off-by: Timo Savola <tsavola@movial.fi>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dir.c    | 5 +++--
 fs/fuse/fuse_i.h | 5 +++++
 fs/fuse/inode.c  | 2 ++
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 406bf61ed51..8890eba1db5 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -195,7 +195,7 @@ static struct dentry_operations fuse_dentry_operations = {
 	.d_revalidate	= fuse_dentry_revalidate,
 };
 
-static int valid_mode(int m)
+int fuse_valid_type(int m)
 {
 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
@@ -248,7 +248,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	fuse_put_request(fc, req);
 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
 	if (!err && outarg.nodeid &&
-	    (invalid_nodeid(outarg.nodeid) || !valid_mode(outarg.attr.mode)))
+	    (invalid_nodeid(outarg.nodeid) ||
+	     !fuse_valid_type(outarg.attr.mode)))
 		err = -EIO;
 	if (!err && outarg.nodeid) {
 		inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b98b20de740..68ae87cbafa 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -552,3 +552,8 @@ int fuse_ctl_add_conn(struct fuse_conn *fc);
  * Remove connection from control filesystem
  */
 void fuse_ctl_remove_conn(struct fuse_conn *fc);
+
+/**
+ * Is file type valid?
+ */
+int fuse_valid_type(int m);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 5ab8e50e780..608db81219a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -330,6 +330,8 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 		case OPT_ROOTMODE:
 			if (match_octal(&args[0], &value))
 				return 0;
+			if (!fuse_valid_type(value))
+				return 0;
 			d->rootmode = value;
 			d->rootmode_present = 1;
 			break;
-- 
cgit v1.2.3-70-g09d2


From 6d205f120547043de663315698dcf5f0eaa31b5c Mon Sep 17 00:00:00 2001
From: Vladimir Saveliev <vs@namesys.com>
Date: Wed, 11 Apr 2007 23:28:44 -0700
Subject: [PATCH] reiserfs: fix key decrementing

This patch fixes a bug in function decrementing a key of stat data item.

Offset of reiserfs keys are compared as signed values.  To set key offset
to maximal possible value maximal signed value has to be used.

This bug is responsible for severe reiserfs filesystem corruption which
shows itself as warning vs-13060.  reiserfsck fixes this corruption by
filesystem tree rebuilding.

Signed-off-by: Vladimir Saveliev <vs@namesys.com>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/item_ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index b9b423b22a8..9475557ab49 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -23,7 +23,7 @@ static void sd_decrement_key(struct cpu_key *key)
 {
 	key->on_disk_key.k_objectid--;
 	set_cpu_key_k_type(key, TYPE_ANY);
-	set_cpu_key_k_offset(key, (loff_t) (-1));
+	set_cpu_key_k_offset(key, (loff_t)(~0ULL >> 1));
 }
 
 static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize)
-- 
cgit v1.2.3-70-g09d2


From c3724b129b5a1a1789a2dc5348685a236ae02479 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 11 Apr 2007 23:28:46 -0700
Subject: [PATCH] autofs4: fix race in unhashed dentry code

Commit f50b6f8691cae2e0064c499dd3ef3f31142987f0 introduced a race in
autofs4 between autofs_lookup_unhashed() and autofs_dentry_release().

autofs_dentry_release() ends up clearing the ->dentry and ->inode members
of autofs_info before removing it from the rehash list.  The list is
protected by the rehash lock in both functions, but since
autofs_dentry_release() starts tearing the autofs_info struct down before
removing it from the list, autofs_lookup_unhashed() can get a autofs_info
with a NULL dentry.

This patch moves the clearing of ->dentry and ->inode after the removal
from the rehash list.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Acked-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs4/root.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index b4631046867..d0e9b3a3905 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -470,9 +470,6 @@ void autofs4_dentry_release(struct dentry *de)
 	if (inf) {
 		struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
 
-		inf->dentry = NULL;
-		inf->inode = NULL;
-
 		if (sbi) {
 			spin_lock(&sbi->rehash_lock);
 			if (!list_empty(&inf->rehash))
@@ -480,6 +477,9 @@ void autofs4_dentry_release(struct dentry *de)
 			spin_unlock(&sbi->rehash_lock);
 		}
 
+		inf->dentry = NULL;
+		inf->inode = NULL;
+
 		autofs4_free_ino(inf);
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From e1552e199857109d4b25b9163eff4646726eee3d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Apr 2007 19:07:28 -0400
Subject: NFS: Fix an Oops in nfs_setattr()

It looks like nfs_setattr() and nfs_rename() also need to test whether the
target is a regular file before calling nfs_wb_all()...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/dir.c   | 3 ++-
 fs/nfs/inode.c | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 92d8ec859e2..cd3469720cb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1684,7 +1684,8 @@ go_ahead:
 	 * ... prune child dentries and writebacks if needed.
 	 */
 	if (atomic_read(&old_dentry->d_count) > 1) {
-		nfs_wb_all(old_inode);
+		if (S_ISREG(old_inode->i_mode))
+			nfs_wb_all(old_inode);
 		shrink_dcache_parent(old_dentry);
 	}
 	nfs_inode_return_delegation(old_inode);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 93d046c85f5..44aa9b72657 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -341,8 +341,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 	lock_kernel();
 	nfs_begin_data_update(inode);
 	/* Write all dirty data */
-	filemap_write_and_wait(inode->i_mapping);
-	nfs_wb_all(inode);
+	if (S_ISREG(inode->i_mode)) {
+		filemap_write_and_wait(inode->i_mapping);
+		nfs_wb_all(inode);
+	}
 	/*
 	 * Return any delegations if we're going to change ACLs
 	 */
-- 
cgit v1.2.3-70-g09d2


From 60fa3f769f7651a60125a0f44e3ffe3246d7cf39 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Apr 2007 19:11:52 -0400
Subject: NFS: Fix two bugs in the O_DIRECT write code

Do not flag an error if the COMMIT call fails and we decide to resend the
writes. Let the resend flag the error if it fails.

If a write has failed, then nfs_direct_write_result should not attempt to
send a commit. It should just exit asap and return the error to the user.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/direct.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b1c98ea39b7..2877744cb60 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -432,10 +432,10 @@ static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
 	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
 		return;
 	if (unlikely(task->tk_status < 0)) {
-		dreq->error = task->tk_status;
+		dprintk("NFS: %5u commit failed with error %d.\n",
+				task->tk_pid, task->tk_status);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-	}
-	if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+	} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
 		dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 	}
@@ -531,9 +531,12 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 
 	spin_lock(&dreq->lock);
 
+	if (unlikely(dreq->error != 0))
+		goto out_unlock;
 	if (unlikely(status < 0)) {
+		/* An error has occured, so we should not commit */
+		dreq->flags = 0;
 		dreq->error = status;
-		goto out_unlock;
 	}
 
 	dreq->count += data->res.count;
-- 
cgit v1.2.3-70-g09d2


From 5a6d41b32a17ca902ef50fdfa170d7f23264bad5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Apr 2007 19:10:12 -0400
Subject: NFS: Ensure PG_writeback is cleared when writeback fails

If the writebacks are cancelled via nfs_cancel_dirty_list, or due to the
memory allocation failing in nfs_flush_one/nfs_flush_multi, then we must
ensure that the PG_writeback flag is cleared.

Also ensure that we actually own the PG_writeback flag whenever we
schedule a new writeback by making nfs_set_page_writeback() return the
value of test_set_page_writeback().
The PG_writeback page flag ends up replacing the functionality of the
PG_FLUSHING nfs_page flag, so we rip that out too.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/write.c           | 22 +++++++++++++++-------
 include/linux/nfs_page.h |  1 -
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2867e6b7096..e5d7cac569a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -218,9 +218,11 @@ int nfs_congestion_kb;
 #define NFS_CONGESTION_OFF_THRESH	\
 	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
 
-static void nfs_set_page_writeback(struct page *page)
+static int nfs_set_page_writeback(struct page *page)
 {
-	if (!test_set_page_writeback(page)) {
+	int ret = test_set_page_writeback(page);
+
+	if (!ret) {
 		struct inode *inode = page->mapping->host;
 		struct nfs_server *nfss = NFS_SERVER(inode);
 
@@ -228,6 +230,7 @@ static void nfs_set_page_writeback(struct page *page)
 				NFS_CONGESTION_ON_THRESH)
 			set_bdi_congested(&nfss->backing_dev_info, WRITE);
 	}
+	return ret;
 }
 
 static void nfs_end_page_writeback(struct page *page)
@@ -277,10 +280,8 @@ static int nfs_page_mark_flush(struct page *page)
 		spin_lock(req_lock);
 	}
 	spin_unlock(req_lock);
-	if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) {
+	if (nfs_set_page_writeback(page) == 0)
 		nfs_mark_request_dirty(req);
-		nfs_set_page_writeback(page);
-	}
 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfs_unlock_request(req);
 	return ret;
@@ -424,7 +425,6 @@ nfs_mark_request_dirty(struct nfs_page *req)
 static void
 nfs_redirty_request(struct nfs_page *req)
 {
-	clear_bit(PG_FLUSHING, &req->wb_flags);
 	__set_page_dirty_nobuffers(req->wb_page);
 }
 
@@ -434,7 +434,11 @@ nfs_redirty_request(struct nfs_page *req)
 static inline int
 nfs_dirty_request(struct nfs_page *req)
 {
-	return test_bit(PG_FLUSHING, &req->wb_flags) == 0;
+	struct page *page = req->wb_page;
+
+	if (page == NULL)
+		return 0;
+	return !PageWriteback(req->wb_page);
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -500,6 +504,7 @@ static void nfs_cancel_dirty_list(struct list_head *head)
 	while(!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
+		nfs_end_page_writeback(req->wb_page);
 		nfs_inode_remove_request(req);
 		nfs_clear_page_writeback(req);
 	}
@@ -890,6 +895,7 @@ out_bad:
 		list_del(&data->pages);
 		nfs_writedata_release(data);
 	}
+	nfs_end_page_writeback(req->wb_page);
 	nfs_redirty_request(req);
 	nfs_clear_page_writeback(req);
 	return -ENOMEM;
@@ -935,6 +941,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
+		nfs_end_page_writeback(req->wb_page);
 		nfs_redirty_request(req);
 		nfs_clear_page_writeback(req);
 	}
@@ -970,6 +977,7 @@ out_err:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
+		nfs_end_page_writeback(req->wb_page);
 		nfs_redirty_request(req);
 		nfs_clear_page_writeback(req);
 	}
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 2e555d49c9b..d111be63914 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -31,7 +31,6 @@
 #define PG_NEED_COMMIT		1
 #define PG_NEED_RESCHED		2
 #define PG_NEED_FLUSH		3
-#define PG_FLUSHING		4
 
 struct nfs_inode;
 struct nfs_page {
-- 
cgit v1.2.3-70-g09d2


From eb4cac10d9f7b006da842e2d37414d13e1333781 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 15 Apr 2007 16:21:49 -0400
Subject: NFS: Fix a list corruption problem

We must remove the request from whatever list it is currently on before we
can add it to the dirty list.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/write.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e5d7cac569a..ad2e91b4904 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -280,8 +280,10 @@ static int nfs_page_mark_flush(struct page *page)
 		spin_lock(req_lock);
 	}
 	spin_unlock(req_lock);
-	if (nfs_set_page_writeback(page) == 0)
+	if (nfs_set_page_writeback(page) == 0) {
+		nfs_list_remove_request(req);
 		nfs_mark_request_dirty(req);
+	}
 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfs_unlock_request(req);
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From c4bbafda70a0fc95c6595bffd6825ef264050d01 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 16 Apr 2007 22:53:13 -0700
Subject: exec.c: fix coredump to pipe problem and obscure "security hole"

The patch checks for "|" in the pattern not the output and doesn't nail a
pid on to a piped name (as it is a program name not a file)

Also fixes a very very obscure security corner case.  If you happen to have
decided on a core pattern that starts with the program name then the user
can run a program called "|myevilhack" as it stands.  I doubt anyone does
this.

Signed-off-by: Alan Cox <alan@redhat.com>
Confirmed-by: Christopher S. Aker <caker@theshore.net>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 7e36c6f6f53..3155e915307 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1244,13 +1244,17 @@ EXPORT_SYMBOL(set_binfmt);
  * name into corename, which must have space for at least
  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  */
-static void format_corename(char *corename, const char *pattern, long signr)
+static int format_corename(char *corename, const char *pattern, long signr)
 {
 	const char *pat_ptr = pattern;
 	char *out_ptr = corename;
 	char *const out_end = corename + CORENAME_MAX_SIZE;
 	int rc;
 	int pid_in_pattern = 0;
+	int ispipe = 0;
+
+	if (*pattern == '|')
+		ispipe = 1;
 
 	/* Repeat as long as we have more pattern to process and more output
 	   space */
@@ -1341,8 +1345,8 @@ static void format_corename(char *corename, const char *pattern, long signr)
 	 *
 	 * If core_pattern does not include a %p (as is the default)
 	 * and core_uses_pid is set, then .%pid will be appended to
-	 * the filename */
-	if (!pid_in_pattern
+	 * the filename. Do not do this for piped commands. */
+	if (!ispipe && !pid_in_pattern
             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
 		rc = snprintf(out_ptr, out_end - out_ptr,
 			      ".%d", current->tgid);
@@ -1350,8 +1354,9 @@ static void format_corename(char *corename, const char *pattern, long signr)
 			goto out;
 		out_ptr += rc;
 	}
-      out:
+out:
 	*out_ptr = 0;
+	return ispipe;
 }
 
 static void zap_process(struct task_struct *start)
@@ -1502,16 +1507,15 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	 * uses lock_kernel()
 	 */
  	lock_kernel();
-	format_corename(corename, core_pattern, signr);
+	ispipe = format_corename(corename, core_pattern, signr);
 	unlock_kernel();
- 	if (corename[0] == '|') {
+ 	if (ispipe) {
 		/* SIGPIPE can happen, but it's just never processed */
  		if(call_usermodehelper_pipe(corename+1, NULL, NULL, &file)) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
 			       corename);
  			goto fail_unlock;
  		}
-		ispipe = 1;
  	} else
  		file = filp_open(corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
-- 
cgit v1.2.3-70-g09d2


From 07a0cfec30848319cc86f21cce0d2efeca593e1a Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Mon, 16 Apr 2007 22:53:24 -0700
Subject: ufs proper handling of zero link case

This patch should fix or partly fix this bug:
http://bugzilla.kernel.org/show_bug.cgi?id=8276

The problem is:

- if we see "zero link case" during reading inode operation, we call
  ufs_error(which remount fs readonly), but not "mark" inode as bad (1)

- in readonly case we do not fill some data structures, which are used in
  read and write case (2)

- VFS call ufs_delete_inode if link count is zero (3)

so (1)->(3)->(2) cause oops, this patch should fix such scenario

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Cc: Jim Paris <jim@jtan.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ufs/inode.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 013d7afe7cd..f18b79122fa 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -601,7 +601,7 @@ static void ufs_set_inode_ops(struct inode *inode)
 				   ufs_get_inode_dev(inode->i_sb, UFS_I(inode)));
 }
 
-static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
+static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -613,8 +613,10 @@ static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 	 */
 	inode->i_mode = mode = fs16_to_cpu(sb, ufs_inode->ui_mode);
 	inode->i_nlink = fs16_to_cpu(sb, ufs_inode->ui_nlink);
-	if (inode->i_nlink == 0)
+	if (inode->i_nlink == 0) {
 		ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
+		return -1;
+	}
 	
 	/*
 	 * Linux now has 32-bit uid and gid, so we can support EFT.
@@ -643,9 +645,10 @@ static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 		for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
 			ufsi->i_u1.i_symlink[i] = ufs_inode->ui_u2.ui_symlink[i];
 	}
+	return 0;
 }
 
-static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
+static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 {
 	struct ufs_inode_info *ufsi = UFS_I(inode);
 	struct super_block *sb = inode->i_sb;
@@ -658,8 +661,10 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 	 */
 	inode->i_mode = mode = fs16_to_cpu(sb, ufs2_inode->ui_mode);
 	inode->i_nlink = fs16_to_cpu(sb, ufs2_inode->ui_nlink);
-	if (inode->i_nlink == 0)
+	if (inode->i_nlink == 0) {
 		ufs_error (sb, "ufs_read_inode", "inode %lu has zero nlink\n", inode->i_ino);
+		return -1;
+	}
 
         /*
          * Linux now has 32-bit uid and gid, so we can support EFT.
@@ -690,6 +695,7 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 		for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
 			ufsi->i_u1.i_symlink[i] = ufs2_inode->ui_u2.ui_symlink[i];
 	}
+	return 0;
 }
 
 void ufs_read_inode(struct inode * inode)
@@ -698,6 +704,7 @@ void ufs_read_inode(struct inode * inode)
 	struct super_block * sb;
 	struct ufs_sb_private_info * uspi;
 	struct buffer_head * bh;
+	int err;
 
 	UFSD("ENTER, ino %lu\n", inode->i_ino);
 
@@ -720,14 +727,17 @@ void ufs_read_inode(struct inode * inode)
 	if ((UFS_SB(sb)->s_flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
 		struct ufs2_inode *ufs2_inode = (struct ufs2_inode *)bh->b_data;
 
-		ufs2_read_inode(inode,
-				ufs2_inode + ufs_inotofsbo(inode->i_ino));
+		err = ufs2_read_inode(inode,
+				      ufs2_inode + ufs_inotofsbo(inode->i_ino));
 	} else {
 		struct ufs_inode *ufs_inode = (struct ufs_inode *)bh->b_data;
 
-		ufs1_read_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
+		err = ufs1_read_inode(inode,
+				      ufs_inode + ufs_inotofsbo(inode->i_ino));
 	}
 
+	if (err)
+		goto bad_inode;
 	inode->i_version++;
 	ufsi->i_lastfrag =
 		(inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -888,6 +898,8 @@ void ufs_delete_inode (struct inode * inode)
 	loff_t old_i_size;
 
 	truncate_inode_pages(&inode->i_data, 0);
+	if (is_bad_inode(inode))
+		goto no_delete;
 	/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
 	lock_kernel();
 	mark_inode_dirty(inode);
@@ -898,4 +910,7 @@ void ufs_delete_inode (struct inode * inode)
 		ufs_warning(inode->i_sb, __FUNCTION__, "ufs_truncate failed\n");
 	ufs_free_inode (inode);
 	unlock_kernel();
+	return;
+no_delete:
+	clear_inode(inode);	/* We must guarantee clearing of inode... */
 }
-- 
cgit v1.2.3-70-g09d2


From 8e821cad12e80cd1a8a3fbadf91f62f17f32549e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 20 Apr 2007 16:12:34 -0400
Subject: NFS: clean up the unstable write code

Get rid of the inlined #ifdefs.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/write.c           | 117 ++++++++++++++++++++++++++++-------------------
 include/linux/nfs_page.h |  30 ------------
 2 files changed, 71 insertions(+), 76 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ad2e91b4904..3ed4feb8c85 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -460,6 +460,43 @@ nfs_mark_request_commit(struct nfs_page *req)
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
+
+static inline
+int nfs_write_need_commit(struct nfs_write_data *data)
+{
+	return data->verf.committed != NFS_FILE_SYNC;
+}
+
+static inline
+int nfs_reschedule_unstable_write(struct nfs_page *req)
+{
+	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+		nfs_mark_request_commit(req);
+		return 1;
+	}
+	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
+		nfs_redirty_request(req);
+		return 1;
+	}
+	return 0;
+}
+#else
+static inline void
+nfs_mark_request_commit(struct nfs_page *req)
+{
+}
+
+static inline
+int nfs_write_need_commit(struct nfs_write_data *data)
+{
+	return 0;
+}
+
+static inline
+int nfs_reschedule_unstable_write(struct nfs_page *req)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -746,26 +783,12 @@ int nfs_updatepage(struct file *file, struct page *page,
 
 static void nfs_writepage_release(struct nfs_page *req)
 {
-	nfs_end_page_writeback(req->wb_page);
 
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-	if (!PageError(req->wb_page)) {
-		if (NFS_NEED_RESCHED(req)) {
-			nfs_redirty_request(req);
-			goto out;
-		} else if (NFS_NEED_COMMIT(req)) {
-			nfs_mark_request_commit(req);
-			goto out;
-		}
-	}
-	nfs_inode_remove_request(req);
-
-out:
-	nfs_clear_commit(req);
-	nfs_clear_reschedule(req);
-#else
-	nfs_inode_remove_request(req);
-#endif
+	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
+		nfs_end_page_writeback(req->wb_page);
+		nfs_inode_remove_request(req);
+	} else
+		nfs_end_page_writeback(req->wb_page);
 	nfs_clear_page_writeback(req);
 }
 
@@ -1008,22 +1031,28 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 		nfs_set_pageerror(page);
 		req->wb_context->error = task->tk_status;
 		dprintk(", error = %d\n", task->tk_status);
-	} else {
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-		if (data->verf.committed < NFS_FILE_SYNC) {
-			if (!NFS_NEED_COMMIT(req)) {
-				nfs_defer_commit(req);
-				memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-				dprintk(" defer commit\n");
-			} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
-				nfs_defer_reschedule(req);
-				dprintk(" server reboot detected\n");
-			}
-		} else
-#endif
-			dprintk(" OK\n");
+		goto out;
 	}
 
+	if (nfs_write_need_commit(data)) {
+		spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+
+		spin_lock(req_lock);
+		if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
+			/* Do nothing we need to resend the writes */
+		} else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
+			dprintk(" defer commit\n");
+		} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
+			set_bit(PG_NEED_RESCHED, &req->wb_flags);
+			clear_bit(PG_NEED_COMMIT, &req->wb_flags);
+			dprintk(" server reboot detected\n");
+		}
+		spin_unlock(req_lock);
+	} else
+		dprintk(" OK\n");
+
+out:
 	if (atomic_dec_and_test(&req->wb_complete))
 		nfs_writepage_release(req);
 }
@@ -1064,25 +1093,21 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 		if (task->tk_status < 0) {
 			nfs_set_pageerror(page);
 			req->wb_context->error = task->tk_status;
-			nfs_end_page_writeback(page);
-			nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", task->tk_status);
-			goto next;
+			goto remove_request;
 		}
-		nfs_end_page_writeback(page);
 
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-		if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
-			nfs_inode_remove_request(req);
-			dprintk(" OK\n");
+		if (nfs_write_need_commit(data)) {
+			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
+			nfs_mark_request_commit(req);
+			nfs_end_page_writeback(page);
+			dprintk(" marked for commit\n");
 			goto next;
 		}
-		memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-		nfs_mark_request_commit(req);
-		dprintk(" marked for commit\n");
-#else
+		dprintk(" OK\n");
+remove_request:
+		nfs_end_page_writeback(page);
 		nfs_inode_remove_request(req);
-#endif
 	next:
 		nfs_clear_page_writeback(req);
 	}
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index d111be63914..16b0266b14f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -49,8 +49,6 @@ struct nfs_page {
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
-#define NFS_NEED_COMMIT(req)	(test_bit(PG_NEED_COMMIT,&(req)->wb_flags))
-#define NFS_NEED_RESCHED(req)	(test_bit(PG_NEED_RESCHED,&(req)->wb_flags))
 
 extern	struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
 					    struct inode *inode,
@@ -121,34 +119,6 @@ nfs_list_remove_request(struct nfs_page *req)
 	req->wb_list_head = NULL;
 }
 
-static inline int
-nfs_defer_commit(struct nfs_page *req)
-{
-	return !test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags);
-}
-
-static inline void
-nfs_clear_commit(struct nfs_page *req)
-{
-	smp_mb__before_clear_bit();
-	clear_bit(PG_NEED_COMMIT, &req->wb_flags);
-	smp_mb__after_clear_bit();
-}
-
-static inline int
-nfs_defer_reschedule(struct nfs_page *req)
-{
-	return !test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags);
-}
-
-static inline void
-nfs_clear_reschedule(struct nfs_page *req)
-{
-	smp_mb__before_clear_bit();
-	clear_bit(PG_NEED_RESCHED, &req->wb_flags);
-	smp_mb__after_clear_bit();
-}
-
 static inline struct nfs_page *
 nfs_list_entry(struct list_head *head)
 {
-- 
cgit v1.2.3-70-g09d2


From 6d677e3504cd173b2ff7fc393ee4241b3c0f92a6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 20 Apr 2007 16:12:40 -0400
Subject: NFS: Don't clear PG_writeback until after we've processed unstable
 writes

Ensure that we don't release the PG_writeback lock until after the page has
either been redirtied, or queued on the nfs_inode 'commit' list.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/write.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3ed4feb8c85..8e9424651bc 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -920,8 +920,8 @@ out_bad:
 		list_del(&data->pages);
 		nfs_writedata_release(data);
 	}
-	nfs_end_page_writeback(req->wb_page);
 	nfs_redirty_request(req);
+	nfs_end_page_writeback(req->wb_page);
 	nfs_clear_page_writeback(req);
 	return -ENOMEM;
 }
@@ -966,8 +966,8 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_end_page_writeback(req->wb_page);
 		nfs_redirty_request(req);
+		nfs_end_page_writeback(req->wb_page);
 		nfs_clear_page_writeback(req);
 	}
 	return -ENOMEM;
@@ -1002,8 +1002,8 @@ out_err:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
-		nfs_end_page_writeback(req->wb_page);
 		nfs_redirty_request(req);
+		nfs_end_page_writeback(req->wb_page);
 		nfs_clear_page_writeback(req);
 	}
 	return error;
-- 
cgit v1.2.3-70-g09d2


From 612c9384fd0486686699f7d49b774f0c7a79c511 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 20 Apr 2007 16:12:45 -0400
Subject: NFS: Fix the 'desynchronized value of nfs_i.ncommit' error

Redirtying a request that is already marked for commit will screw up the
accounting for NR_UNSTABLE_NFS as well as nfs_i.ncommit.
Ensure that all requests on the commit queue are labelled with the
PG_NEED_COMMIT flag, and avoid moving them onto the dirty list inside
nfs_page_mark_flush().

Also inline nfs_mark_request_dirty() into nfs_page_mark_flush() for
atomicity reasons. Avoid dropping the spinlock until we're done marking the
request in the radix tree and have added it to the ->dirty list.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/write.c | 47 ++++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8e9424651bc..ce5b4a9f2d8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -38,7 +38,6 @@
 static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    struct page *,
 					    unsigned int, unsigned int);
-static void nfs_mark_request_dirty(struct nfs_page *req);
 static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
 static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
@@ -255,7 +254,8 @@ static void nfs_end_page_writeback(struct page *page)
 static int nfs_page_mark_flush(struct page *page)
 {
 	struct nfs_page *req;
-	spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+	struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+	spinlock_t *req_lock = &nfsi->req_lock;
 	int ret;
 
 	spin_lock(req_lock);
@@ -279,11 +279,23 @@ static int nfs_page_mark_flush(struct page *page)
 			return ret;
 		spin_lock(req_lock);
 	}
-	spin_unlock(req_lock);
+	if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+		/* This request is marked for commit */
+		spin_unlock(req_lock);
+		nfs_unlock_request(req);
+		return 1;
+	}
 	if (nfs_set_page_writeback(page) == 0) {
 		nfs_list_remove_request(req);
-		nfs_mark_request_dirty(req);
-	}
+		/* add the request to the inode's dirty list. */
+		radix_tree_tag_set(&nfsi->nfs_page_tree,
+				req->wb_index, NFS_PAGE_TAG_DIRTY);
+		nfs_list_add_request(req, &nfsi->dirty);
+		nfsi->ndirty++;
+		spin_unlock(req_lock);
+		__mark_inode_dirty(page->mapping->host, I_DIRTY_PAGES);
+	} else
+		spin_unlock(req_lock);
 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfs_unlock_request(req);
 	return ret;
@@ -406,24 +418,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
-/*
- * Add a request to the inode's dirty list.
- */
-static void
-nfs_mark_request_dirty(struct nfs_page *req)
-{
-	struct inode *inode = req->wb_context->dentry->d_inode;
-	struct nfs_inode *nfsi = NFS_I(inode);
-
-	spin_lock(&nfsi->req_lock);
-	radix_tree_tag_set(&nfsi->nfs_page_tree,
-			req->wb_index, NFS_PAGE_TAG_DIRTY);
-	nfs_list_add_request(req, &nfsi->dirty);
-	nfsi->ndirty++;
-	spin_unlock(&nfsi->req_lock);
-	__mark_inode_dirty(inode, I_DIRTY_PAGES);
-}
-
 static void
 nfs_redirty_request(struct nfs_page *req)
 {
@@ -438,7 +432,7 @@ nfs_dirty_request(struct nfs_page *req)
 {
 	struct page *page = req->wb_page;
 
-	if (page == NULL)
+	if (page == NULL || test_bit(PG_NEED_COMMIT, &req->wb_flags))
 		return 0;
 	return !PageWriteback(req->wb_page);
 }
@@ -456,6 +450,7 @@ nfs_mark_request_commit(struct nfs_page *req)
 	spin_lock(&nfsi->req_lock);
 	nfs_list_add_request(req, &nfsi->commit);
 	nfsi->ncommit++;
+	set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
 	spin_unlock(&nfsi->req_lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -470,7 +465,7 @@ int nfs_write_need_commit(struct nfs_write_data *data)
 static inline
 int nfs_reschedule_unstable_write(struct nfs_page *req)
 {
-	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+	if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
 		nfs_mark_request_commit(req);
 		return 1;
 	}
@@ -557,6 +552,7 @@ static void nfs_cancel_commit_list(struct list_head *head)
 		req = nfs_list_entry(head->next);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 		nfs_list_remove_request(req);
+		clear_bit(PG_NEED_COMMIT, &(req)->wb_flags);
 		nfs_inode_remove_request(req);
 		nfs_unlock_request(req);
 	}
@@ -1295,6 +1291,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
+		clear_bit(PG_NEED_COMMIT, &(req)->wb_flags);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 
 		dprintk("NFS: commit (%s/%Ld %d@%Ld)",
-- 
cgit v1.2.3-70-g09d2


From 2b82f190c81bf1524447c021df4e9ce8ef379bd5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 20 Apr 2007 16:12:50 -0400
Subject: NFS: Fix race in nfs_set_page_dirty

Protect nfs_set_page_dirty() against races with nfs_inode_add_request.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/write.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ce5b4a9f2d8..79755894174 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -388,6 +388,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 	}
 	SetPagePrivate(req->wb_page);
 	set_page_private(req->wb_page, (unsigned long)req);
+	if (PageDirty(req->wb_page))
+		set_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfsi->npages++;
 	atomic_inc(&req->wb_count);
 	return 0;
@@ -407,6 +409,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	set_page_private(req->wb_page, 0);
 	ClearPagePrivate(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
+	if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags))
+		__set_page_dirty_nobuffers(req->wb_page);
 	nfsi->npages--;
 	if (!nfsi->npages) {
 		spin_unlock(&nfsi->req_lock);
@@ -1527,15 +1531,22 @@ int nfs_wb_page(struct inode *inode, struct page* page)
 
 int nfs_set_page_dirty(struct page *page)
 {
+	spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
 	struct nfs_page *req;
+	int ret;
 
-	req = nfs_page_find_request(page);
+	spin_lock(req_lock);
+	req = nfs_page_find_request_locked(page);
 	if (req != NULL) {
 		/* Mark any existing write requests for flushing */
-		set_bit(PG_NEED_FLUSH, &req->wb_flags);
+		ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
+		spin_unlock(req_lock);
 		nfs_release_request(req);
+		return ret;
 	}
-	return __set_page_dirty_nobuffers(page);
+	ret = __set_page_dirty_nobuffers(page);
+	spin_unlock(req_lock);
+	return ret;
 }
 
 
-- 
cgit v1.2.3-70-g09d2


From c959df9f01cfb2f43b4d1f58631ee1e9c50541b6 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Mon, 23 Apr 2007 14:41:11 -0700
Subject: v9fs: don't use primary fid when removing file

v9fs_insert uses v9fs_fid_lookup (which also locks the fid) to get the
primary fid associated with the dentry and destroys the v9fs_fid struct
after removing the file.  If another process called v9fs_fid_lookup on the
same dentry, it may wait undefinitely for the fid's lock (as the struct is
freed).

This patch changes v9fs_remove to use a cloned fid, so the primary fid is
not locked and freed.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Cc: Eric Van Hensbergen <ericvh@hera.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/9p/vfs_inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 124a085d1f2..b01b0a45793 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -415,7 +415,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
 	file_inode = file->d_inode;
 	sb = file_inode->i_sb;
 	v9ses = v9fs_inode2v9ses(file_inode);
-	v9fid = v9fs_fid_lookup(file);
+	v9fid = v9fs_fid_clone(file);
 	if(IS_ERR(v9fid))
 		return PTR_ERR(v9fid);
 
-- 
cgit v1.2.3-70-g09d2


From 9b7f375505f5611efb562065b57814b28a81abc3 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Mon, 23 Apr 2007 14:41:17 -0700
Subject: reiserfs: fix xattr root locking/refcount bug

The listxattr() and getxattr() operations are only protected by a read
lock.  As a result, if either of these operations run in parallel, a race
condition exists where the xattr_root will end up being cached twice, which
results in the leaking of a reference and a BUG() on umount.

This patch refactors get_xa_root(), __get_xa_root(), and create_xa_root(),
into one get_xa_root() function that takes the appropriate locking around
the entire critical section.

Reported, diagnosed and tested by Andrea Righi <a.righi@cineca.it>

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Andrea Righi <a.righi@cineca.it>
Cc: "Vladimir V. Saveliev" <vs@namesys.com>
Cc: Edward Shishkin <edward@namesys.com>
Cc: Alex Zarochentsev <zam@namesys.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/xattr.c | 92 ++++++++++++++---------------------------------------
 1 file changed, 24 insertions(+), 68 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index f01389fd162..c8178b7b921 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -54,82 +54,48 @@
 static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char
 								*prefix);
 
-static struct dentry *create_xa_root(struct super_block *sb)
+/* Returns the dentry referring to the root of the extended attribute
+ * directory tree. If it has already been retrieved, it is used. If it
+ * hasn't been created and the flags indicate creation is allowed, we
+ * attempt to create it. On error, we return a pointer-encoded error.
+ */
+static struct dentry *get_xa_root(struct super_block *sb, int flags)
 {
 	struct dentry *privroot = dget(REISERFS_SB(sb)->priv_root);
 	struct dentry *xaroot;
 
 	/* This needs to be created at mount-time */
 	if (!privroot)
-		return ERR_PTR(-EOPNOTSUPP);
+		return ERR_PTR(-ENODATA);
 
-	xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME));
-	if (IS_ERR(xaroot)) {
+	mutex_lock(&privroot->d_inode->i_mutex);
+	if (REISERFS_SB(sb)->xattr_root) {
+		xaroot = dget(REISERFS_SB(sb)->xattr_root);
 		goto out;
-	} else if (!xaroot->d_inode) {
-		int err;
-		mutex_lock(&privroot->d_inode->i_mutex);
-		err =
-		    privroot->d_inode->i_op->mkdir(privroot->d_inode, xaroot,
-						   0700);
-		mutex_unlock(&privroot->d_inode->i_mutex);
-
-		if (err) {
-			dput(xaroot);
-			dput(privroot);
-			return ERR_PTR(err);
-		}
-		REISERFS_SB(sb)->xattr_root = dget(xaroot);
 	}
 
-      out:
-	dput(privroot);
-	return xaroot;
-}
-
-/* This will return a dentry, or error, refering to the xa root directory.
- * If the xa root doesn't exist yet, the dentry will be returned without
- * an associated inode. This dentry can be used with ->mkdir to create
- * the xa directory. */
-static struct dentry *__get_xa_root(struct super_block *s)
-{
-	struct dentry *privroot = dget(REISERFS_SB(s)->priv_root);
-	struct dentry *xaroot = NULL;
-
-	if (IS_ERR(privroot) || !privroot)
-		return privroot;
-
 	xaroot = lookup_one_len(XAROOT_NAME, privroot, strlen(XAROOT_NAME));
 	if (IS_ERR(xaroot)) {
 		goto out;
 	} else if (!xaroot->d_inode) {
-		dput(xaroot);
-		xaroot = NULL;
-		goto out;
+		int err = -ENODATA;
+		if (flags == 0 || flags & XATTR_CREATE)
+			err = privroot->d_inode->i_op->mkdir(privroot->d_inode,
+			                                     xaroot, 0700);
+		if (err) {
+			dput(xaroot);
+			xaroot = ERR_PTR(err);
+			goto out;
+		}
 	}
-
-	REISERFS_SB(s)->xattr_root = dget(xaroot);
+	REISERFS_SB(sb)->xattr_root = dget(xaroot);
 
       out:
+	mutex_unlock(&privroot->d_inode->i_mutex);
 	dput(privroot);
 	return xaroot;
 }
 
-/* Returns the dentry (or NULL) referring to the root of the extended
- * attribute directory tree. If it has already been retrieved, it is used.
- * Otherwise, we attempt to retrieve it from disk. It may also return
- * a pointer-encoded error.
- */
-static inline struct dentry *get_xa_root(struct super_block *s)
-{
-	struct dentry *dentry = dget(REISERFS_SB(s)->xattr_root);
-
-	if (!dentry)
-		dentry = __get_xa_root(s);
-
-	return dentry;
-}
-
 /* Opens the directory corresponding to the inode's extended attribute store.
  * If flags allow, the tree to the directory may be created. If creation is
  * prohibited, -ENODATA is returned. */
@@ -138,21 +104,11 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
 	struct dentry *xaroot, *xadir;
 	char namebuf[17];
 
-	xaroot = get_xa_root(inode->i_sb);
-	if (IS_ERR(xaroot)) {
+	xaroot = get_xa_root(inode->i_sb, flags);
+	if (IS_ERR(xaroot))
 		return xaroot;
-	} else if (!xaroot) {
-		if (flags == 0 || flags & XATTR_CREATE) {
-			xaroot = create_xa_root(inode->i_sb);
-			if (IS_ERR(xaroot))
-				return xaroot;
-		}
-		if (!xaroot)
-			return ERR_PTR(-ENODATA);
-	}
 
 	/* ok, we have xaroot open */
-
 	snprintf(namebuf, sizeof(namebuf), "%X.%X",
 		 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
 		 inode->i_generation);
@@ -821,7 +777,7 @@ int reiserfs_delete_xattrs(struct inode *inode)
 
 	/* Leftovers besides . and .. -- that's not good. */
 	if (dir->d_inode->i_nlink <= 2) {
-		root = get_xa_root(inode->i_sb);
+		root = get_xa_root(inode->i_sb, XATTR_REPLACE);
 		reiserfs_write_lock_xattrs(inode->i_sb);
 		err = vfs_rmdir(root->d_inode, dir);
 		reiserfs_write_unlock_xattrs(inode->i_sb);
-- 
cgit v1.2.3-70-g09d2