25 files changed, 297 insertions, 131 deletions
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 6d84ca2beea..bed6215c079 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -3,7 +3,10 @@ Version 1.50
 Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is
 done with "serverino" mount option).  Add support for POSIX Unlink
 (helps with certain sharing violation cases when server such as
-Samba supports newer POSIX CIFS Protocol Extensions).
+Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix"
+mount option to allow disabling the CIFS Unix Extensions for just
+that mount. Fix hang on spinlock in find_writable_file (race when
+reopening file after session crash).
 
 Version 1.49
 ------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 85f1eb14083..b806b11b556 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -444,6 +444,13 @@ A partial list of the supported mount options follows:
  noposixpaths   If CIFS Unix extensions are supported, do not request
 		posix path name support (this may cause servers to
 		reject creatingfile with certain reserved characters).
+ nounix         Disable the CIFS Unix Extensions for this mount (tree
+		connection). This is rarely needed, but it may be useful
+		in order to turn off multiple settings all at once (ie
+		posix acls, posix locks, posix paths, symlink support
+		and retrieving uids/gids/mode from the server) or to
+		work around a bug in server which implement the Unix
+		Extensions.
  nobrl          Do not send byte range lock requests to the server.
 		This is necessary for certain applications that break
 		with cifs style mandatory byte range locks (and most
@@ -451,6 +458,12 @@ A partial list of the supported mount options follows:
 		byte range locks).
  remount        remount the share (often used to change from ro to rw mounts
 	        or vice versa)
+ servern        Specify the server 's netbios name (RFC1001 name) to use
+		when attempting to setup a session to the server.  This is
+		This is needed for mounting to some older servers (such
+		as OS/2 or Windows 98 and Windows ME) since they do not
+		support a default server name.  A server name can be up
+		to 15 characters long and is usually uppercased.
  sfu            When the CIFS Unix Extensions are not negotiated, attempt to
 		create device files and fifos in a format compatible with
 		Services for Unix (SFU).  In addition retrieve bits 10-12
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index d7bd51575fd..29d4b271525 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -82,8 +82,7 @@ u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for
 
 v) mount check for unmatched uids
 
-w) Add mount option for Linux extension disable per mount, and partial
-disable per mount (uid off, symlink/fifo/mknod on but what about posix acls?)
+w) Add support for new vfs entry points for setlease and fallocate 
 
 x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of 
 processes can proceed better in parallel (on the server)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e13592afca9..894b1f7b299 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1904,6 +1904,25 @@ static int cifs_readpage(struct file *file, struct page *page)
 	return rc;
 }
 
+static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
+{
+	struct cifsFileInfo *open_file;
+
+	read_lock(&GlobalSMBSeslock);
+	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
+		if (open_file->closePend)
+			continue;
+		if (open_file->pfile &&
+		    ((open_file->pfile->f_flags & O_RDWR) ||
+		     (open_file->pfile->f_flags & O_WRONLY))) {
+			read_unlock(&GlobalSMBSeslock);
+			return 1;
+		}
+	}
+	read_unlock(&GlobalSMBSeslock);
+	return 0;
+}
+
 /* We do not want to update the file size from server for inodes
    open for write - to avoid races with writepage extending
    the file - in the future we could consider allowing
@@ -1912,19 +1931,13 @@ static int cifs_readpage(struct file *file, struct page *page)
    page caching in the current Linux kernel design */
 int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
 {
-	struct cifsFileInfo *open_file = NULL;
-
-	if (cifsInode)
-		open_file =  find_writable_file(cifsInode);
+	if (!cifsInode)
+		return 1;
 
-	if (open_file) {
+	if (is_inode_writable(cifsInode)) {
+		/* This inode is open for write at least once */
 		struct cifs_sb_info *cifs_sb;
 
-		/* there is not actually a write pending so let
-		this handle go free and allow it to
-		be closable if needed */
-		atomic_dec(&open_file->wrtPending);
-
 		cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
 		if ( cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO ) {
 			/* since no page cache to corrupt on directio
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 2ea027dda21..892be9b4d1f 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -372,6 +372,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
 
 	/* 2000 big enough to fit max user, domain, NOS name etc. */
 	str_area = kmalloc(2000, GFP_KERNEL);
+	if (str_area == NULL) {
+		cifs_small_buf_release(smb_buf);
+		return -ENOMEM;
+	}
 	bcc_ptr = str_area;
 
 	ses->flags &= ~CIFS_SES_LANMAN;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 52bb2638f7a..6874785bb65 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -974,6 +974,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	dio->get_block = get_block;
 	dio->end_io = end_io;
 	dio->map_bh.b_private = NULL;
+	dio->map_bh.b_state = 0;
 	dio->final_block_in_bio = -1;
 	dio->next_block_for_io = -1;
 
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index b455919c199..2082daf083d 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1670,9 +1670,10 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
    with a deadlk here, we'd have to generate something like grant_lock with
    the deadlk error.) */
 
-/* returns the highest requested mode of all blocked conversions */
+/* Returns the highest requested mode of all blocked conversions; sets
+   cw if there's a blocked conversion to DLM_LOCK_CW. */
 
-static int grant_pending_convert(struct dlm_rsb *r, int high)
+static int grant_pending_convert(struct dlm_rsb *r, int high, int *cw)
 {
 	struct dlm_lkb *lkb, *s;
 	int hi, demoted, quit, grant_restart, demote_restart;
@@ -1709,6 +1710,9 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
 		}
 
 		hi = max_t(int, lkb->lkb_rqmode, hi);
+
+		if (cw && lkb->lkb_rqmode == DLM_LOCK_CW)
+			*cw = 1;
 	}
 
 	if (grant_restart)
@@ -1721,29 +1725,52 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
 	return max_t(int, high, hi);
 }
 
-static int grant_pending_wait(struct dlm_rsb *r, int high)
+static int grant_pending_wait(struct dlm_rsb *r, int high, int *cw)
 {
 	struct dlm_lkb *lkb, *s;
 
 	list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
 		if (can_be_granted(r, lkb, 0, NULL))
 			grant_lock_pending(r, lkb);
-                else
+                else {
 			high = max_t(int, lkb->lkb_rqmode, high);
+			if (lkb->lkb_rqmode == DLM_LOCK_CW)
+				*cw = 1;
+		}
 	}
 
 	return high;
 }
 
+/* cw of 1 means there's a lock with a rqmode of DLM_LOCK_CW that's blocked
+   on either the convert or waiting queue.
+   high is the largest rqmode of all locks blocked on the convert or
+   waiting queue. */
+
+static int lock_requires_bast(struct dlm_lkb *gr, int high, int cw)
+{
+	if (gr->lkb_grmode == DLM_LOCK_PR && cw) {
+		if (gr->lkb_highbast < DLM_LOCK_EX)
+			return 1;
+		return 0;
+	}
+
+	if (gr->lkb_highbast < high &&
+	    !__dlm_compat_matrix[gr->lkb_grmode+1][high+1])
+		return 1;
+	return 0;
+}
+
 static void grant_pending_locks(struct dlm_rsb *r)
 {
 	struct dlm_lkb *lkb, *s;
 	int high = DLM_LOCK_IV;
+	int cw = 0;
 
 	DLM_ASSERT(is_master(r), dlm_dump_rsb(r););
 
-	high = grant_pending_convert(r, high);
-	high = grant_pending_wait(r, high);
+	high = grant_pending_convert(r, high, &cw);
+	high = grant_pending_wait(r, high, &cw);
 
 	if (high == DLM_LOCK_IV)
 		return;
@@ -1751,27 +1778,41 @@ static void grant_pending_locks(struct dlm_rsb *r)
 	/*
 	 * If there are locks left on the wait/convert queue then send blocking
 	 * ASTs to granted locks based on the largest requested mode (high)
-	 * found above. FIXME: highbast < high comparison not valid for PR/CW.
+	 * found above.
 	 */
 
 	list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
-		if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) &&
-		    !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) {
-			queue_bast(r, lkb, high);
+		if (lkb->lkb_bastaddr && lock_requires_bast(lkb, high, cw)) {
+			if (cw && high == DLM_LOCK_PR)
+				queue_bast(r, lkb, DLM_LOCK_CW);
+			else
+				queue_bast(r, lkb, high);
 			lkb->lkb_highbast = high;
 		}
 	}
 }
 
+static int modes_require_bast(struct dlm_lkb *gr, struct dlm_lkb *rq)
+{
+	if ((gr->lkb_grmode == DLM_LOCK_PR && rq->lkb_rqmode == DLM_LOCK_CW) ||
+	    (gr->lkb_grmode == DLM_LOCK_CW && rq->lkb_rqmode == DLM_LOCK_PR)) {
+		if (gr->lkb_highbast < DLM_LOCK_EX)
+			return 1;
+		return 0;
+	}
+
+	if (gr->lkb_highbast < rq->lkb_rqmode && !modes_compat(gr, rq))
+		return 1;
+	return 0;
+}
+
 static void send_bast_queue(struct dlm_rsb *r, struct list_head *head,
 			    struct dlm_lkb *lkb)
 {
 	struct dlm_lkb *gr;
 
 	list_for_each_entry(gr, head, lkb_statequeue) {
-		if (gr->lkb_bastaddr &&
-		    gr->lkb_highbast < lkb->lkb_rqmode &&
-		    !modes_compat(gr, lkb)) {
+		if (gr->lkb_bastaddr && modes_require_bast(gr, lkb)) {
 			queue_bast(r, gr, lkb->lkb_rqmode);
 			gr->lkb_highbast = lkb->lkb_rqmode;
 		}
@@ -2235,7 +2276,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 	   before we try again to grant this one. */
 
 	if (is_demoted(lkb)) {
-		grant_pending_convert(r, DLM_LOCK_IV);
+		grant_pending_convert(r, DLM_LOCK_IV, NULL);
 		if (_can_be_granted(r, lkb, 1)) {
 			grant_lock(r, lkb);
 			queue_cast(r, lkb, 0);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index dd362739d29..9e9d2e82f40 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -313,6 +313,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 		in6_addr->sin6_port = cpu_to_be16(port);
 		*addr_len = sizeof(struct sockaddr_in6);
 	}
+	memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len);
 }
 
 /* Close a remote connection and tidy up */
@@ -332,8 +333,19 @@ static void close_connection(struct connection *con, bool and_other)
 		__free_page(con->rx_page);
 		con->rx_page = NULL;
 	}
-	con->retries = 0;
-	mutex_unlock(&con->sock_mutex);
+
+	/* If we are an 'othercon' then NULL the pointer to us
+	   from the parent and tidy ourself up */
+	if (test_bit(CF_IS_OTHERCON, &con->flags)) {
+		struct connection *parent = __nodeid2con(con->nodeid, 0);
+		parent->othercon = NULL;
+		kmem_cache_free(con_cache, con);
+	}
+	else {
+		/* Parent connections get reused */
+		con->retries = 0;
+		mutex_unlock(&con->sock_mutex);
+	}
 }
 
 /* We only send shutdown messages to nodes that are not part of the cluster */
@@ -631,7 +643,7 @@ out_resched:
 
 out_close:
 	mutex_unlock(&con->sock_mutex);
-	if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
+	if (ret != -EAGAIN) {
 		close_connection(con, false);
 		/* Reconnect when there is something to send */
 	}
@@ -1122,8 +1134,6 @@ static int tcp_listen_for_all(void)
 
 	log_print("Using TCP for communications");
 
-	set_bit(CF_IS_OTHERCON, &con->flags);
-
 	sock = tcp_create_listen_sock(con, dlm_local_addr[0]);
 	if (sock) {
 		add_sock(sock, con);
@@ -1407,7 +1417,7 @@ void dlm_lowcomms_stop(void)
 	for (i = 0; i <= max_nodeid; i++) {
 		con = __nodeid2con(i, 0);
 		if (con) {
-			con->flags |= 0xFF;
+			con->flags |= 0x0F;
 			if (con->sock)
 				con->sock->sk->sk_user_data = NULL;
 		}
@@ -1423,8 +1433,6 @@ void dlm_lowcomms_stop(void)
 		con = __nodeid2con(i, 0);
 		if (con) {
 			close_connection(con, true);
-			if (con->othercon)
-				kmem_cache_free(con_cache, con->othercon);
 			kmem_cache_free(con_cache, con);
 		}
 	}
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 073599dced2..d09977528f6 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -56,8 +56,10 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid)
 		return -ENOMEM;
 
 	w = dlm_node_weight(ls->ls_name, nodeid);
-	if (w < 0)
+	if (w < 0) {
+		kfree(memb);
 		return w;
+	}
 
 	memb->nodeid = nodeid;
 	memb->weight = w;
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index e3a1527cbdb..188b91c027e 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -386,8 +386,7 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	dlm_recover_process_copy(ls, rc_in);
 }
 
-static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
-			     struct dlm_rcom *rc_in)
+static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
 	struct rcom_config *rf;
@@ -395,7 +394,7 @@ static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
 	char *mb;
 	int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
 
-	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
+	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_NOFS, &mb);
 	if (!mh)
 		return -ENOBUFS;
 	memset(mb, 0, mb_len);
@@ -465,7 +464,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
 		log_print("lockspace %x from %d type %x not found",
 			  hd->h_lockspace, nodeid, rc->rc_type);
 		if (rc->rc_type == DLM_RCOM_STATUS)
-			send_ls_not_ready(ls, nodeid, rc);
+			send_ls_not_ready(nodeid, rc);
 		return;
 	}
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 0a50942b437..131954b3fb9 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -353,6 +353,10 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n");
 		goto out;
 	}
+	if (special_file(lower_inode->i_mode)) {
+		ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n");
+		goto out;
+	}
 	if (!nd) {
 		ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave"
 				"as we *think* we are about to unlink\n");
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index e557a676692..a98497264fe 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -813,6 +813,15 @@ out:
 	return rc;
 }
 
+static void do_sysfs_unregistration(void)
+{
+	sysfs_remove_file(&ecryptfs_subsys.kobj,
+			  &sysfs_attr_version.attr);
+	sysfs_remove_file(&ecryptfs_subsys.kobj,
+			  &sysfs_attr_version_str.attr);
+	subsystem_unregister(&ecryptfs_subsys);
+}
+
 static int __init ecryptfs_init(void)
 {
 	int rc;
@@ -851,6 +860,9 @@ static int __init ecryptfs_init(void)
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
 				"initialize the eCryptfs netlink socket\n");
+		do_sysfs_unregistration();
+		unregister_filesystem(&ecryptfs_fs_type);
+		ecryptfs_free_kmem_caches();
 	}
 out:
 	return rc;
@@ -858,11 +870,7 @@ out:
 
 static void __exit ecryptfs_exit(void)
 {
-	sysfs_remove_file(&ecryptfs_subsys.kobj,
-			  &sysfs_attr_version.attr);
-	sysfs_remove_file(&ecryptfs_subsys.kobj,
-			  &sysfs_attr_version_str.attr);
-	subsystem_unregister(&ecryptfs_subsys);
+	do_sysfs_unregistration();
 	ecryptfs_release_messaging(ecryptfs_transport);
 	unregister_filesystem(&ecryptfs_fs_type);
 	ecryptfs_free_kmem_caches();
diff --git a/fs/exec.c b/fs/exec.c
index 7bdea7937ee..ce62f7b65f1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1084,9 +1084,12 @@ int flush_old_exec(struct linux_binprm * bprm)
 	 */
 	current->mm->task_size = TASK_SIZE;
 
-	if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
-	    file_permission(bprm->file, MAY_READ) ||
-	    (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
+	if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) {
+		suid_keys(current);
+		set_dumpable(current->mm, suid_dumpable);
+		current->pdeath_signal = 0;
+	} else if (file_permission(bprm->file, MAY_READ) ||
+			(bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
 		suid_keys(current);
 		set_dumpable(current->mm, suid_dumpable);
 	}
@@ -1177,8 +1180,10 @@ void compute_creds(struct linux_binprm *bprm)
 {
 	int unsafe;
 
-	if (bprm->e_uid != current->uid)
+	if (bprm->e_uid != current->uid) {
 		suid_keys(current);
+		current->pdeath_signal = 0;
+	}
 	exec_keys(current);
 
 	task_lock(current);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index aff70f0698f..3b395c41b2f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -486,8 +486,8 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 		gfs2_pin(sdp, bd->bd_bh);
 		tr->tr_num_databuf_new++;
 	}
-	sdp->sd_log_num_databuf++;
 	gfs2_log_lock(sdp);
+	sdp->sd_log_num_databuf++;
 	list_add(&le->le_list, &sdp->sd_log_le_databuf);
 	gfs2_log_unlock(sdp);
 }
@@ -523,7 +523,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 	struct buffer_head *bh = NULL,*bh1 = NULL;
 	struct gfs2_log_descriptor *ld;
 	unsigned int limit;
-	unsigned int total_dbuf = sdp->sd_log_num_databuf;
+	unsigned int total_dbuf;
 	unsigned int total_jdata = sdp->sd_log_num_jdata;
 	unsigned int num, n;
 	__be64 *ptr = NULL;
@@ -535,6 +535,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 	 * into the log along with a header
 	 */
 	gfs2_log_lock(sdp);
+	total_dbuf = sdp->sd_log_num_databuf;
 	bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
 				       bd_le.le_list);
 	while(total_dbuf) {
@@ -653,6 +654,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 				break;
 		}
 		bh = NULL;
+		BUG_ON(total_dbuf < num);
 		total_dbuf -= num;
 		total_jdata -= num;
 	}
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 6f006a804db..4864659555d 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -82,19 +82,20 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
 	char *options, *o, *v;
 	int error = 0;
 
-	/*  If someone preloaded options, use those instead  */
-	spin_lock(&gfs2_sys_margs_lock);
-	if (!remount && gfs2_sys_margs) {
-		data = gfs2_sys_margs;
-		gfs2_sys_margs = NULL;
-	}
-	spin_unlock(&gfs2_sys_margs_lock);
+	if (!remount) {
+		/*  If someone preloaded options, use those instead  */
+		spin_lock(&gfs2_sys_margs_lock);
+		if (gfs2_sys_margs) {
+			data = gfs2_sys_margs;
+			gfs2_sys_margs = NULL;
+		}
+		spin_unlock(&gfs2_sys_margs_lock);
 
-	/*  Set some defaults  */
-	memset(args, 0, sizeof(struct gfs2_args));
-	args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
-	args->ar_quota = GFS2_QUOTA_DEFAULT;
-	args->ar_data = GFS2_DATA_DEFAULT;
+		/*  Set some defaults  */
+		args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
+		args->ar_quota = GFS2_QUOTA_DEFAULT;
+		args->ar_data = GFS2_DATA_DEFAULT;
+	}
 
 	/* Split the options into tokens with the "," character and
 	   process them */
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index ce90032c010..42a5f58f6fc 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -416,7 +416,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 
 	error = gfs2_trans_begin(sdp, rblocks, 0);
 	if (error)
-		goto out;
+		goto out_trans_fail;
 
 	if (gfs2_is_stuffed(ip)) {
 		if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
@@ -434,6 +434,7 @@ prepare_write:
 out:
 	if (error) {
 		gfs2_trans_end(sdp);
+out_trans_fail:
 		if (alloc_required) {
 			gfs2_inplace_release(ip);
 out_qunlock:
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 77342113011..94d76ace0b9 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -177,8 +177,8 @@ static const u32 fsflags_to_gfs2[32] = {
 	[5] = GFS2_DIF_APPENDONLY,
 	[7] = GFS2_DIF_NOATIME,
 	[12] = GFS2_DIF_EXHASH,
-	[14] = GFS2_DIF_JDATA,
-	[20] = GFS2_DIF_DIRECTIO,
+	[14] = GFS2_DIF_INHERIT_JDATA,
+	[20] = GFS2_DIF_INHERIT_DIRECTIO,
 };
 
 static const u32 gfs2_to_fsflags[32] = {
@@ -187,8 +187,6 @@ static const u32 gfs2_to_fsflags[32] = {
 	[gfs2fl_AppendOnly] = FS_APPEND_FL,
 	[gfs2fl_NoAtime] = FS_NOATIME_FL,
 	[gfs2fl_ExHash] = FS_INDEX_FL,
-	[gfs2fl_Jdata] = FS_JOURNAL_DATA_FL,
-	[gfs2fl_Directio] = FS_DIRECTIO_FL,
 	[gfs2fl_InheritDirectio] = FS_DIRECTIO_FL,
 	[gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
 };
@@ -207,6 +205,12 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
 		return error;
 
 	fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags);
+	if (!S_ISDIR(inode->i_mode)) {
+		if (ip->i_di.di_flags & GFS2_DIF_JDATA)
+			fsflags |= FS_JOURNAL_DATA_FL;
+		if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
+			fsflags |= FS_DIRECTIO_FL;
+	}
 	if (put_user(fsflags, ptr))
 		error = -EFAULT;
 
@@ -270,13 +274,6 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
 	if ((new_flags ^ flags) == 0)
 		goto out;
 
-	if (S_ISDIR(inode->i_mode)) {
-		if ((new_flags ^ flags) & GFS2_DIF_JDATA)
-			new_flags ^= (GFS2_DIF_JDATA|GFS2_DIF_INHERIT_JDATA);
-		if ((new_flags ^ flags) & GFS2_DIF_DIRECTIO)
-			new_flags ^= (GFS2_DIF_DIRECTIO|GFS2_DIF_INHERIT_DIRECTIO);
-	}
-
 	error = -EINVAL;
 	if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
 		goto out;
@@ -315,11 +312,19 @@ out:
 
 static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
 {
+	struct inode *inode = filp->f_path.dentry->d_inode;
 	u32 fsflags, gfsflags;
 	if (get_user(fsflags, ptr))
 		return -EFAULT;
 	gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
-	return do_gfs2_set_flags(filp, gfsflags, ~0);
+	if (!S_ISDIR(inode->i_mode)) {
+		if (gfsflags & GFS2_DIF_INHERIT_JDATA)
+			gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
+		if (gfsflags & GFS2_DIF_INHERIT_DIRECTIO)
+			gfsflags ^= (GFS2_DIF_DIRECTIO | GFS2_DIF_INHERIT_DIRECTIO);
+		return do_gfs2_set_flags(filp, gfsflags, ~0);
+	}
+	return do_gfs2_set_flags(filp, gfsflags, ~GFS2_DIF_JDATA);
 }
 
 static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e4e04062515..ce48c4594ec 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -863,16 +863,19 @@ static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
 	u64 no_addr;
 
 	for(;;) {
+		if (goal >= rgd->rd_data)
+			break;
 		goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
 				    GFS2_BLKST_UNLINKED);
-		if (goal == 0)
-			return 0;
+		if (goal == BFITNOENT)
+			break;
 		no_addr = goal + rgd->rd_data0;
-		if (no_addr <= *last_unlinked)
+		goal++;
+		if (no_addr < *last_unlinked)
 			continue;
 		*last_unlinked = no_addr;
 		inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
-					no_addr, -1);
+					  no_addr, -1);
 		if (!IS_ERR(inode))
 			return inode;
 	}
@@ -1313,7 +1316,7 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
 				    bi->bi_len, blk, new_state);
 	}
 
-	return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
+	return (blk == BFITNOENT) ? blk : (bi->bi_start * GFS2_NBBY) + blk;
 }
 
 /**
@@ -1393,6 +1396,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
 		goal = rgd->rd_last_alloc_data;
 
 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
+	BUG_ON(blk == BFITNOENT);
 	rgd->rd_last_alloc_data = blk;
 
 	block = rgd->rd_data0 + blk;
@@ -1437,6 +1441,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
 		goal = rgd->rd_last_alloc_meta;
 
 	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
+	BUG_ON(blk == BFITNOENT);
 	rgd->rd_last_alloc_meta = blk;
 
 	block = rgd->rd_data0 + blk;
@@ -1478,6 +1483,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 
 	blk = rgblk_search(rgd, rgd->rd_last_alloc_meta,
 			   GFS2_BLKST_FREE, GFS2_BLKST_DINODE);
+	BUG_ON(blk == BFITNOENT);
 
 	rgd->rd_last_alloc_meta = blk;
 
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f5e11f4fa95..4f517665c9a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3731,7 +3731,6 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 {
 	int status;
 	struct buffer_head *last_eb_bh = NULL;
-	struct buffer_head *bh = NULL;
 	struct ocfs2_insert_type insert = {0, };
 	struct ocfs2_extent_rec rec;
 
@@ -3783,9 +3782,6 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 		ocfs2_extent_map_insert_rec(inode, &rec);
 
 bail:
-	if (bh)
-		brelse(bh);
-
 	if (last_eb_bh)
 		brelse(last_eb_bh);
 
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index f0bdfd944c4..685c18065c8 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -854,17 +854,25 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
 	struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
 	ssize_t ret;
 
-
-	mutex_lock(&sc->sc_send_lock);
-	ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
-					 virt_to_page(kmalloced_virt),
-					 (long)kmalloced_virt & ~PAGE_MASK,
-					 size, MSG_DONTWAIT);
-	mutex_unlock(&sc->sc_send_lock);
-	if (ret != size) {
+	while (1) {
+		mutex_lock(&sc->sc_send_lock);
+		ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
+						 virt_to_page(kmalloced_virt),
+						 (long)kmalloced_virt & ~PAGE_MASK,
+						 size, MSG_DONTWAIT);
+		mutex_unlock(&sc->sc_send_lock);
+		if (ret == size)
+			break;
+		if (ret == (ssize_t)-EAGAIN) {
+			mlog(0, "sendpage of size %zu to " SC_NODEF_FMT
+			     " returned EAGAIN\n", size, SC_NODEF_ARGS(sc));
+			cond_resched();
+			continue;
+		}
 		mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT 
 		     " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
 		o2net_ensure_shutdown(nn, sc, 0);
+		break;
 	}
 }
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c4034f693e7..4ffa715be09 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -187,6 +187,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
 	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	handle_t *handle;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data;
 
 	mlog_entry_void();
 
@@ -197,11 +198,27 @@ int ocfs2_update_inode_atime(struct inode *inode,
 		goto out;
 	}
 
+	ret = ocfs2_journal_access(handle, inode, bh,
+				   OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	/*
+	 * Don't use ocfs2_mark_inode_dirty() here as we don't always
+	 * have i_mutex to guard against concurrent changes to other
+	 * inode fields.
+	 */
 	inode->i_atime = CURRENT_TIME;
-	ret = ocfs2_mark_inode_dirty(handle, inode, bh);
+	di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
+	di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
+
+	ret = ocfs2_journal_dirty(handle, bh);
 	if (ret < 0)
 		mlog_errno(ret);
 
+out_commit:
 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
 	mlog_exit(ret);
@@ -1011,6 +1028,11 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (size_change && attr->ia_size != i_size_read(inode)) {
+		if (attr->ia_size > sb->s_maxbytes) {
+			status = -EFBIG;
+			goto bail_unlock;
+		}
+
 		if (i_size_read(inode) > attr->ia_size)
 			status = ocfs2_truncate_file(inode, bh, attr->ia_size);
 		else
@@ -1516,7 +1538,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct buffer_head *di_bh = NULL;
 	handle_t *handle;
-	unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits);
+	unsigned long long max_off = inode->i_sb->s_maxbytes;
 
 	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
 		return -EROFS;
@@ -1942,7 +1964,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
 		}
 
 		dst = kmap_atomic(page, KM_USER0);
-		memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes);
+		memcpy(dst + (pos & (loff_t)(PAGE_CACHE_SIZE - 1)), buf, bytes);
 		kunmap_atomic(dst, KM_USER0);
 		flush_dcache_page(page);
 		ocfs2_put_write_source(user_page);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d430fdab16e..701e6d04ed5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1080,6 +1080,7 @@ static int ocfs2_rename(struct inode *old_dir,
 	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
 						    // this is the 1st dirent bh
 	nlink_t old_dir_nlink = old_dir->i_nlink;
+	struct ocfs2_dinode *old_di;
 
 	/* At some point it might be nice to break this function up a
 	 * bit. */
@@ -1354,7 +1355,20 @@ static int ocfs2_rename(struct inode *old_dir,
 
 	old_inode->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(old_inode);
-	ocfs2_mark_inode_dirty(handle, old_inode, old_inode_bh);
+
+	status = ocfs2_journal_access(handle, old_inode, old_inode_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status >= 0) {
+		old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
+
+		old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
+		old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
+
+		status = ocfs2_journal_dirty(handle, old_inode_bh);
+		if (status < 0)
+			mlog_errno(status);
+	} else
+		mlog_errno(status);
 
 	/* now that the name has been added to new_dir, remove the old name */
 	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 5cc90a40b3c..58307853fb4 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -494,16 +494,16 @@ static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb,
 /*
  * Find the 1st page index which covers the given clusters.
  */
-static inline unsigned long ocfs2_align_clusters_to_page_index(struct super_block *sb,
+static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb,
 							u32 clusters)
 {
 	unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
-	unsigned long index = clusters;
+        pgoff_t index = clusters;
 
 	if (PAGE_CACHE_SHIFT > cbits) {
-		index = clusters >> (PAGE_CACHE_SHIFT - cbits);
+		index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits);
 	} else if (PAGE_CACHE_SHIFT < cbits) {
-		index = clusters << (cbits - PAGE_CACHE_SHIFT);
+		index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT);
 	}
 
 	return index;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 200c7d4790d..f2fc9a795de 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -316,39 +316,51 @@ static void ocfs2_destroy_inode(struct inode *inode)
 	kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
 }
 
-/* From xfs_super.c:xfs_max_file_offset
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.
- */
-unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
+static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
+						unsigned int cbits)
 {
-	unsigned int pagefactor = 1;
-	unsigned int bitshift = BITS_PER_LONG - 1;
-
-	/* Figure out maximum filesize, on Linux this can depend on
-	 * the filesystem blocksize (on 32 bit platforms).
-	 * __block_prepare_write does this in an [unsigned] long...
-	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
-	 * So, for page sized blocks (4K on 32 bit platforms),
-	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
-	 *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
-	 * but for smaller blocksizes it is less (bbits = log2 bsize).
-	 * Note1: get_block_t takes a long (implicit cast from above)
-	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
-	 * can optionally convert the [unsigned] long from above into
-	 * an [unsigned] long long.
+	unsigned int bytes = 1 << cbits;
+	unsigned int trim = bytes;
+	unsigned int bitshift = 32;
+
+	/*
+	 * i_size and all block offsets in ocfs2 are always 64 bits
+	 * wide. i_clusters is 32 bits, in cluster-sized units. So on
+	 * 64 bit platforms, cluster size will be the limiting factor.
 	 */
 
 #if BITS_PER_LONG == 32
 # if defined(CONFIG_LBD)
 	BUILD_BUG_ON(sizeof(sector_t) != 8);
-	pagefactor = PAGE_CACHE_SIZE;
-	bitshift = BITS_PER_LONG;
+	/*
+	 * We might be limited by page cache size.
+	 */
+	if (bytes > PAGE_CACHE_SIZE) {
+		bytes = PAGE_CACHE_SIZE;
+		trim = 1;
+		/*
+		 * Shift by 31 here so that we don't get larger than
+		 * MAX_LFS_FILESIZE
+		 */
+		bitshift = 31;
+	}
 # else
-	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+	/*
+	 * We are limited by the size of sector_t. Use block size, as
+	 * that's what we expose to the VFS.
+	 */
+	bytes = 1 << bbits;
+	trim = 1;
+	bitshift = 31;
 # endif
 #endif
 
-	return (((unsigned long long)pagefactor) << bitshift) - 1;
+	/*
+	 * Trim by a whole cluster when we can actually approach the
+	 * on-disk limits. Otherwise we can overflow i_clusters when
+	 * an extent start is at the max offset.
+	 */
+	return (((unsigned long long)bytes) << bitshift) - trim;
 }
 
 static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
@@ -1259,8 +1271,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 				  int sector_size)
 {
 	int status = 0;
-	int i;
-	struct ocfs2_dinode *di = NULL;
+	int i, cbits, bbits;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 	struct inode *inode = NULL;
 	struct buffer_head *bitmap_bh = NULL;
 	struct ocfs2_journal *journal;
@@ -1279,9 +1291,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	sb->s_fs_info = osb;
 	sb->s_op = &ocfs2_sops;
 	sb->s_export_op = &ocfs2_export_ops;
+	sb->s_time_gran = 1;
 	sb->s_flags |= MS_NOATIME;
 	/* this is needed to support O_LARGEFILE */
-	sb->s_maxbytes = ocfs2_max_file_offset(sb->s_blocksize_bits);
+	cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
+	bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
+	sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
 
 	osb->sb = sb;
 	/* Save off for ocfs2_rw_direct */
@@ -1341,8 +1356,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 		goto bail;
 	}
 
-	di = (struct ocfs2_dinode *)bh->b_data;
-
 	osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
 	if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) {
 		mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index 3b9cb3d0b00..783f5270f2a 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -45,6 +45,4 @@ void __ocfs2_abort(struct super_block *sb,
 
 #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
 
-unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
-
 #endif /* OCFS2_SUPER_H */