summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/compat_ioctl.c22
-rw-r--r--fs/exec.c3
-rw-r--r--fs/ext3/namei.c73
-rw-r--r--fs/ext3/super.c11
-rw-r--r--fs/ext4/namei.c73
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/jffs2/fs.c2
-rw-r--r--fs/locks.c2
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/nfs/write.c44
-rw-r--r--fs/nfsd/nfsfh.c20
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/ocfs2/alloc.c1
-rw-r--r--fs/ocfs2/aops.c37
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/localalloc.h2
-rw-r--r--fs/ocfs2/suballoc.c29
-rw-r--r--fs/ocfs2/suballoc.h11
-rw-r--r--fs/ocfs2/super.c69
-rw-r--r--fs/ocfs2/vote.c4
-rw-r--r--fs/proc/inode.c3
-rw-r--r--fs/reiserfs/super.c13
-rw-r--r--fs/select.c2
-rw-r--r--fs/signalfd.c190
-rw-r--r--fs/xfs/linux-2.6/kmem.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c3
-rw-r--r--fs/xfs/support/debug.h10
-rw-r--r--fs/xfs/xfs_buf_item.h5
-rw-r--r--fs/xfs/xfs_da_btree.c1
-rw-r--r--fs/xfs/xfs_filestream.c10
-rw-r--r--fs/xfs/xfs_log.c12
-rw-r--r--fs/xfs/xfs_log_recover.c63
-rw-r--r--fs/xfs/xfs_mru_cache.c72
-rw-r--r--fs/xfs/xfs_mru_cache.h6
-rw-r--r--fs/xfs/xfs_trans_buf.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c20
45 files changed, 518 insertions, 371 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 58a0650293e..f9eed6d7906 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -441,9 +441,6 @@ config OCFS2_FS
Note: Features which OCFS2 does not support yet:
- extended attributes
- - shared writeable mmap
- - loopback is supported, but data written will not
- be cluster coherent.
- quotas
- cluster aware flock
- Directory change notification (F_NOTIFY)
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index a3684dcc76e..6f8c96fb29e 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -235,8 +235,8 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
switch (err) {
case 0:
- mntput(nd->mnt);
dput(nd->dentry);
+ mntput(nd->mnt);
nd->mnt = newmnt;
nd->dentry = dget(newmnt->mnt_root);
schedule_delayed_work(&afs_mntpt_expiry_timer,
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a6c9078af12..5a5b7116cef 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2311,8 +2311,10 @@ static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar
struct iwreq __user *iwr_u;
struct iw_point __user *iwp;
struct compat_iw_point __user *iwp_u;
- compat_caddr_t pointer;
+ compat_caddr_t pointer_u;
+ void __user *pointer;
__u16 length, flags;
+ int ret;
iwr_u = compat_ptr(arg);
iwp_u = (struct compat_iw_point __user *) &iwr_u->u.data;
@@ -2330,17 +2332,29 @@ static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar
sizeof(iwr->ifr_ifrn.ifrn_name)))
return -EFAULT;
- if (__get_user(pointer, &iwp_u->pointer) ||
+ if (__get_user(pointer_u, &iwp_u->pointer) ||
__get_user(length, &iwp_u->length) ||
__get_user(flags, &iwp_u->flags))
return -EFAULT;
- if (__put_user(compat_ptr(pointer), &iwp->pointer) ||
+ if (__put_user(compat_ptr(pointer_u), &iwp->pointer) ||
__put_user(length, &iwp->length) ||
__put_user(flags, &iwp->flags))
return -EFAULT;
- return sys_ioctl(fd, cmd, (unsigned long) iwr);
+ ret = sys_ioctl(fd, cmd, (unsigned long) iwr);
+
+ if (__get_user(pointer, &iwp->pointer) ||
+ __get_user(length, &iwp->length) ||
+ __get_user(flags, &iwp->flags))
+ return -EFAULT;
+
+ if (__put_user(ptr_to_compat(pointer), &iwp_u->pointer) ||
+ __put_user(length, &iwp_u->length) ||
+ __put_user(flags, &iwp_u->flags))
+ return -EFAULT;
+
+ return ret;
}
/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
diff --git a/fs/exec.c b/fs/exec.c
index c21a8cc0627..073b0b8c6d0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -50,7 +50,6 @@
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/audit.h>
-#include <linux/signalfd.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -784,7 +783,6 @@ static int de_thread(struct task_struct *tsk)
* and we can just re-use it all.
*/
if (atomic_read(&oldsighand->count) <= 1) {
- signalfd_detach(tsk);
exit_itimers(sig);
return 0;
}
@@ -923,7 +921,6 @@ static int de_thread(struct task_struct *tsk)
sig->flags = 0;
no_thread_group:
- signalfd_detach(tsk);
exit_itimers(sig);
if (leader)
release_task(leader);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 1586807b817..c1fa1908dba 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT3_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}
@@ -671,6 +698,10 @@ errout:
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
ext3fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb,
}
#ifdef CONFIG_EXT3_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext3_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
return (struct ext3_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
@@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext3_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 22cfdd61c06..9537316a071 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2578,8 +2578,11 @@ static int ext3_release_dquot(struct dquot *dquot)
handle = ext3_journal_start(dquot_to_inode(dquot),
EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
- if (IS_ERR(handle))
+ if (IS_ERR(handle)) {
+ /* Release dquot anyway to avoid endless cycle in dqput() */
+ dquot_release(dquot);
return PTR_ERR(handle);
+ }
ret = dquot_release(dquot);
err = ext3_journal_stop(handle);
if (!ret)
@@ -2712,6 +2715,12 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
+ if (!handle) {
+ printk(KERN_WARNING "EXT3-fs: Quota write (off=%Lu, len=%Lu)"
+ " cancelled because transaction is not started.\n",
+ (unsigned long long)off, (unsigned long long)len);
+ return -EIO;
+ }
mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index da224974af7..5fdb862e71c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -140,7 +140,8 @@ struct dx_frame
struct dx_map_entry
{
u32 hash;
- u32 offs;
+ u16 offs;
+ u16 size;
};
#ifdef CONFIG_EXT4_INDEX
@@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir,
entries = (struct dx_entry *) (((char *)&root->info) +
root->info.info_length);
- assert(dx_get_limit(entries) == dx_root_limit(dir,
- root->info.info_length));
+
+ if (dx_get_limit(entries) != dx_root_limit(dir,
+ root->info.info_length)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != root limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+
dxtrace (printk("Look up %x", hash));
while (1)
{
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
+ if (!count || count > dx_get_limit(entries)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: no count or count > limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
+
p = entries + 1;
q = entries + count - 1;
while (p <= q)
@@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir,
if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
- assert (dx_get_limit(entries) == dx_node_limit (dir));
+ if (dx_get_limit(entries) != dx_node_limit (dir)) {
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "dx entry: limit != node limit");
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail2;
+ }
frame++;
+ frame->bh = NULL;
}
fail2:
while (frame >= frame_in) {
@@ -432,6 +455,10 @@ fail2:
frame--;
}
fail:
+ if (*err == ERR_BAD_DX_DIR)
+ ext4_warning(dir->i_sb, __FUNCTION__,
+ "Corrupt dir inode %ld, running e2fsck is "
+ "recommended.", dir->i_ino);
return NULL;
}
@@ -671,6 +698,10 @@ errout:
* Directory block splitting, compacting
*/
+/*
+ * Create map of hash values, offsets, and sizes, stored at end of block.
+ * Returns number of entries mapped.
+ */
static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
{
@@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
ext4fs_dirhash(de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
- map_tail->offs = (u32) ((char *) de - base);
+ map_tail->offs = (u16) ((char *) de - base);
+ map_tail->size = le16_to_cpu(de->rec_len);
count++;
cond_resched();
}
@@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
return count;
}
+/* Sort map by hash value */
static void dx_sort_map (struct dx_map_entry *map, unsigned count)
{
struct dx_map_entry *p, *q, *top = map + count - 1;
@@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb,
}
#ifdef CONFIG_EXT4_INDEX
+/*
+ * Move count entries from end of map between two memory locations.
+ * Returns pointer to last entry moved.
+ */
static struct ext4_dir_entry_2 *
dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
{
@@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
return (struct ext4_dir_entry_2 *) (to - rec_len);
}
+/*
+ * Compact each dir entry in the range to the minimal rec_len.
+ * Returns pointer to last entry in range.
+ */
static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
{
struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
@@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
return prev;
}
+/*
+ * Split a full leaf block to make room for a new dir entry.
+ * Allocate a new block, and move entries so that they are approx. equally full.
+ * Returns pointer to de in block into which the new entry will be inserted.
+ */
static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct buffer_head **bh,struct dx_frame *frame,
struct dx_hash_info *hinfo, int *error)
@@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
u32 hash2;
struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2;
- unsigned split;
+ unsigned split, move, size, i;
struct ext4_dir_entry_2 *de = NULL, *de2;
int err = 0;
@@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
blocksize, hinfo, map);
map -= count;
- split = count/2; // need to adjust to actual middle
dx_sort_map (map, count);
+ /* Split the existing block in the middle, size-wise */
+ size = 0;
+ move = 0;
+ for (i = count-1; i >= 0; i--) {
+ /* is more than half of this entry in 2nd half of the block? */
+ if (size + map[i].size/2 > blocksize/2)
+ break;
+ size += map[i].size;
+ move++;
+ }
+ /* map index at which we will split */
+ split = count - move;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n",
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4550b83ab1c..3c1397fa83d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2698,8 +2698,11 @@ static int ext4_release_dquot(struct dquot *dquot)
handle = ext4_journal_start(dquot_to_inode(dquot),
EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
- if (IS_ERR(handle))
+ if (IS_ERR(handle)) {
+ /* Release dquot anyway to avoid endless cycle in dqput() */
+ dquot_release(dquot);
return PTR_ERR(handle);
+ }
ret = dquot_release(dquot);
err = ext4_journal_stop(handle);
if (!ret)
@@ -2832,6 +2835,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
+ if (!handle) {
+ printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)"
+ " cancelled because transaction is not started.\n",
+ (unsigned long long)off, (unsigned long long)len);
+ return -EIO;
+ }
mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 1d3b7a9fc82..8bc727b7169 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -627,7 +627,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
struct inode *inode = OFNI_EDONI_2SFFJ(f);
struct page *pg;
- pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
+ pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
(void *)jffs2_do_readpage_unlock, inode);
if (IS_ERR(pg))
return (void *)pg;
diff --git a/fs/locks.c b/fs/locks.c
index 50857d2d340..c795eaaf6c4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -782,7 +782,7 @@ find_conflict:
if (request->fl_flags & FL_ACCESS)
goto out;
locks_copy_lock(new_fl, request);
- locks_insert_lock(&inode->i_flock, new_fl);
+ locks_insert_lock(before, new_fl);
new_fl = NULL;
error = 0;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index c87dc713b5d..579cf8a7d4a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -316,7 +316,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
if (offset != 0)
return;
/* Cancel any unstarted writes on this page */
- nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE);
+ nfs_wb_page_cancel(page->mapping->host, page);
}
static int nfs_release_page(struct page *page, gfp_t gfp)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index aea76d0e5fb..acfc56f9edc 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -176,7 +176,7 @@ static void nfs_expire_automounts(struct work_struct *work)
void nfs_release_automount_timer(void)
{
if (list_empty(&nfs_automount_list))
- cancel_delayed_work_sync(&nfs_automount_task);
+ cancel_delayed_work(&nfs_automount_task);
}
/*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 62b3ae28031..4b90e17555a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -646,7 +646,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
rcu_read_lock();
delegation = rcu_dereference(NFS_I(state->inode)->delegation);
if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
- delegation_type = delegation->flags;
+ delegation_type = delegation->type;
rcu_read_unlock();
opendata->o_arg.u.delegation_type = delegation_type;
status = nfs4_open_recover(opendata, state);
@@ -1434,7 +1434,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
}
res = d_add_unique(dentry, igrab(state->inode));
if (res != NULL)
- dentry = res;
+ path.dentry = res;
nfs4_intent_set_file(nd, &path, state);
return res;
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 46139003ea0..b878528b64c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void)
unregister_shrinker(&acl_shrinker);
#ifdef CONFIG_NFS_V4
unregister_filesystem(&nfs4_fs_type);
- nfs_unregister_sysctl();
#endif
+ nfs_unregister_sysctl();
unregister_filesystem(&nfs_fs_type);
}
@@ -911,13 +911,13 @@ static int nfs_parse_mount_options(char *raw,
kfree(string);
switch (token) {
- case Opt_udp:
+ case Opt_xprt_udp:
mnt->flags &= ~NFS_MOUNT_TCP;
mnt->nfs_server.protocol = IPPROTO_UDP;
mnt->timeo = 7;
mnt->retrans = 5;
break;
- case Opt_tcp:
+ case Opt_xprt_tcp:
mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = IPPROTO_TCP;
mnt->timeo = 600;
@@ -936,10 +936,10 @@ static int nfs_parse_mount_options(char *raw,
kfree(string);
switch (token) {
- case Opt_udp:
+ case Opt_xprt_udp:
mnt->mount_server.protocol = IPPROTO_UDP;
break;
- case Opt_tcp:
+ case Opt_xprt_tcp:
mnt->mount_server.protocol = IPPROTO_TCP;
break;
default:
@@ -1153,20 +1153,20 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options,
c = strchr(dev_name, ':');
if (c == NULL)
return -EINVAL;
- len = c - dev_name - 1;
+ len = c - dev_name;
if (len > sizeof(data->hostname))
- return -EINVAL;
+ return -ENAMETOOLONG;
strncpy(data->hostname, dev_name, len);
args.nfs_server.hostname = data->hostname;
c++;
if (strlen(c) > NFS_MAXPATHLEN)
- return -EINVAL;
+ return -ENAMETOOLONG;
args.nfs_server.export_path = c;
status = nfs_try_mount(&args, mntfh);
if (status)
- return -EINVAL;
+ return status;
/*
* Translate to nfs_mount_data, which nfs_fill_super
@@ -1677,7 +1677,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
/* while calculating len, pretend ':' is '\0' */
len = c - dev_name;
if (len > NFS4_MAXNAMLEN)
- return -EINVAL;
+ return -ENAMETOOLONG;
*hostname = kzalloc(len, GFP_KERNEL);
if (*hostname == NULL)
return -ENOMEM;
@@ -1686,7 +1686,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
c++; /* step over the ':' */
len = strlen(c);
if (len > NFS4_MAXPATHLEN)
- return -EINVAL;
+ return -ENAMETOOLONG;
*mntpath = kzalloc(len + 1, GFP_KERNEL);
if (*mntpath == NULL)
return -ENOMEM;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ef97e0c0f5b..0d7a77cc394 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1396,6 +1396,50 @@ out:
return ret;
}
+int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+{
+ struct nfs_page *req;
+ loff_t range_start = page_offset(page);
+ loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+ struct writeback_control wbc = {
+ .bdi = page->mapping->backing_dev_info,
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .range_start = range_start,
+ .range_end = range_end,
+ };
+ int ret = 0;
+
+ BUG_ON(!PageLocked(page));
+ for (;;) {
+ req = nfs_page_find_request(page);
+ if (req == NULL)
+ goto out;
+ if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+ nfs_release_request(req);
+ break;
+ }
+ if (nfs_lock_request_dontget(req)) {
+ nfs_inode_remove_request(req);
+ /*
+ * In case nfs_inode_remove_request has marked the
+ * page as being dirty
+ */
+ cancel_dirty_page(page, PAGE_CACHE_SIZE);
+ nfs_unlock_request(req);
+ break;
+ }
+ ret = nfs_wait_on_request(req);
+ if (ret < 0)
+ goto out;
+ }
+ if (!PagePrivate(page))
+ return 0;
+ ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE);
+out:
+ return ret;
+}
+
int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
{
loff_t range_start = page_offset(page);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0eb464a39aa..7011d62acfc 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -566,13 +566,23 @@ enum fsid_source fsid_source(struct svc_fh *fhp)
case FSID_DEV:
case FSID_ENCODE_DEV:
case FSID_MAJOR_MINOR:
- return FSIDSOURCE_DEV;
+ if (fhp->fh_export->ex_dentry->d_inode->i_sb->s_type->fs_flags
+ & FS_REQUIRES_DEV)
+ return FSIDSOURCE_DEV;
+ break;
case FSID_NUM:
- return FSIDSOURCE_FSID;
- default:
if (fhp->fh_export->ex_flags & NFSEXP_FSID)
return FSIDSOURCE_FSID;
- else
- return FSIDSOURCE_UUID;
+ break;
+ default:
+ break;
}
+ /* either a UUID type filehandle, or the filehandle doesn't
+ * match the export.
+ */
+ if (fhp->fh_export->ex_flags & NFSEXP_FSID)
+ return FSIDSOURCE_FSID;
+ if (fhp->fh_export->ex_uuid)
+ return FSIDSOURCE_UUID;
+ return FSIDSOURCE_DEV;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a0c2b253818..7867151ebb8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -115,7 +115,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
if (IS_ERR(exp2)) {
- err = PTR_ERR(exp2);
+ if (PTR_ERR(exp2) != -ENOENT)
+ err = PTR_ERR(exp2);
dput(mounts);
mntput(mnt);
goto out;
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 4f517665c9a..778a850b463 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5602,6 +5602,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
clusters_to_del;
spin_unlock(&OCFS2_I(inode)->ip_lock);
le32_add_cpu(&fe->i_clusters, -clusters_to_del);
+ inode->i_blocks = ocfs2_inode_sector_count(inode);
status = ocfs2_trim_tree(inode, path, handle, tc,
clusters_to_del, &delete_blk);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 460d440310f..f37f25c931f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -855,6 +855,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
struct ocfs2_super *osb, loff_t pos,
unsigned len, struct buffer_head *di_bh)
{
+ u32 cend;
struct ocfs2_write_ctxt *wc;
wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS);
@@ -862,7 +863,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
return -ENOMEM;
wc->w_cpos = pos >> osb->s_clustersize_bits;
- wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len);
+ cend = (pos + len - 1) >> osb->s_clustersize_bits;
+ wc->w_clen = cend - wc->w_cpos + 1;
get_bh(di_bh);
wc->w_di_bh = di_bh;
@@ -928,18 +930,11 @@ static void ocfs2_write_failure(struct inode *inode,
loff_t user_pos, unsigned user_len)
{
int i;
- unsigned from, to;
+ unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),
+ to = user_pos + user_len;
struct page *tmppage;
- ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len);
-
- if (wc->w_large_pages) {
- from = wc->w_target_from;
- to = wc->w_target_to;
- } else {
- from = 0;
- to = PAGE_CACHE_SIZE;
- }
+ ocfs2_zero_new_buffers(wc->w_target_page, from, to);
for(i = 0; i < wc->w_num_pages; i++) {
tmppage = wc->w_pages[i];
@@ -989,9 +984,6 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
map_from = cluster_start;
map_to = cluster_end;
}
-
- wc->w_target_from = map_from;
- wc->w_target_to = map_to;
} else {
/*
* If we haven't allocated the new page yet, we
@@ -1209,18 +1201,33 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
loff_t pos, unsigned len)
{
int ret, i;
+ loff_t cluster_off;
+ unsigned int local_len = len;
struct ocfs2_write_cluster_desc *desc;
+ struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb);
for (i = 0; i < wc->w_clen; i++) {
desc = &wc->w_desc[i];
+ /*
+ * We have to make sure that the total write passed in
+ * doesn't extend past a single cluster.
+ */
+ local_len = len;
+ cluster_off = pos & (osb->s_clustersize - 1);
+ if ((cluster_off + local_len) > osb->s_clustersize)
+ local_len = osb->s_clustersize - cluster_off;
+
ret = ocfs2_write_cluster(mapping, desc->c_phys,
desc->c_unwritten, data_ac, meta_ac,
- wc, desc->c_cpos, pos, len);
+ wc, desc->c_cpos, pos, local_len);
if (ret) {
mlog_errno(ret);
goto out;
}
+
+ len -= local_len;
+ pos += local_len;
}
ret = 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4ffa715be09..f3bc3658e7a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -314,7 +314,6 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
}
i_size_write(inode, new_i_size);
- inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
di = (struct ocfs2_dinode *) fe_bh->b_data;
@@ -492,8 +491,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
goto leave;
}
- status = ocfs2_claim_clusters(osb, handle, data_ac, 1,
- &bit_off, &num_bits);
+ status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
+ clusters_to_add, &bit_off, &num_bits);
if (status < 0) {
if (status != -ENOSPC)
mlog_errno(status);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 545f7892cdf..de984d27257 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -524,13 +524,12 @@ bail:
int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
handle_t *handle,
struct ocfs2_alloc_context *ac,
- u32 min_bits,
+ u32 bits_wanted,
u32 *bit_off,
u32 *num_bits)
{
int status, start;
struct inode *local_alloc_inode;
- u32 bits_wanted;
void *bitmap;
struct ocfs2_dinode *alloc;
struct ocfs2_local_alloc *la;
@@ -538,7 +537,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
mlog_entry_void();
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
- bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
local_alloc_inode = ac->ac_inode;
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
la = OCFS2_LOCAL_ALLOC(alloc);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 385a10152f9..3f76631e110 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -48,7 +48,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
handle_t *handle,
struct ocfs2_alloc_context *ac,
- u32 min_bits,
+ u32 bits_wanted,
u32 *bit_off,
u32 *num_bits);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index d9c5c9fcb30..8f09f5235e3 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1486,21 +1486,21 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
* contig. allocation, set to '1' to indicate we can deal with extents
* of any size.
*/
-int ocfs2_claim_clusters(struct ocfs2_super *osb,
- handle_t *handle,
- struct ocfs2_alloc_context *ac,
- u32 min_clusters,
- u32 *cluster_start,
- u32 *num_clusters)
+int __ocfs2_claim_clusters(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 min_clusters,
+ u32 max_clusters,
+ u32 *cluster_start,
+ u32 *num_clusters)
{
int status;
- unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+ unsigned int bits_wanted = max_clusters;
u64 bg_blkno = 0;
u16 bg_bit_off;
mlog_entry_void();
- BUG_ON(!ac);
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
@@ -1557,6 +1557,19 @@ bail:
return status;
}
+int ocfs2_claim_clusters(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 min_clusters,
+ u32 *cluster_start,
+ u32 *num_clusters)
+{
+ unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
+
+ return __ocfs2_claim_clusters(osb, handle, ac, min_clusters,
+ bits_wanted, cluster_start, num_clusters);
+}
+
static inline int ocfs2_block_group_clear_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index f212dc01a84..cafe9370309 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -85,6 +85,17 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
u32 min_clusters,
u32 *cluster_start,
u32 *num_clusters);
+/*
+ * Use this variant of ocfs2_claim_clusters to specify a maxiumum
+ * number of clusters smaller than the allocation reserved.
+ */
+int __ocfs2_claim_clusters(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 min_clusters,
+ u32 max_clusters,
+ u32 *cluster_start,
+ u32 *num_clusters);
int ocfs2_free_suballoc_bits(handle_t *handle,
struct inode *alloc_inode,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f2fc9a795de..c034b5129c1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -81,8 +81,15 @@ static struct dentry *ocfs2_debugfs_root = NULL;
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
+struct mount_options
+{
+ unsigned long mount_opt;
+ unsigned int atime_quantum;
+ signed short slot;
+};
+
static int ocfs2_parse_options(struct super_block *sb, char *options,
- unsigned long *mount_opt, s16 *slot,
+ struct mount_options *mopt,
int is_remount);
static void ocfs2_put_super(struct super_block *sb);
static int ocfs2_mount_volume(struct super_block *sb);
@@ -367,24 +374,23 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
{
int incompat_features;
int ret = 0;
- unsigned long parsed_options;
- s16 slot;
+ struct mount_options parsed_options;
struct ocfs2_super *osb = OCFS2_SB(sb);
- if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) {
+ if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
ret = -EINVAL;
goto out;
}
if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) !=
- (parsed_options & OCFS2_MOUNT_HB_LOCAL)) {
+ (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
goto out;
}
if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) !=
- (parsed_options & OCFS2_MOUNT_DATA_WRITEBACK)) {
+ (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot change data mode on remount\n");
goto out;
@@ -435,7 +441,9 @@ unlock_osb:
/* Only save off the new mount options in case of a successful
* remount. */
- osb->s_mount_opt = parsed_options;
+ osb->s_mount_opt = parsed_options.mount_opt;
+ osb->s_atime_quantum = parsed_options.atime_quantum;
+ osb->preferred_slot = parsed_options.slot;
}
out:
return ret;
@@ -547,8 +555,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
{
struct dentry *root;
int status, sector_size;
- unsigned long parsed_opt;
- s16 slot;
+ struct mount_options parsed_options;
struct inode *inode = NULL;
struct ocfs2_super *osb = NULL;
struct buffer_head *bh = NULL;
@@ -556,14 +563,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
mlog_entry("%p, %p, %i", sb, data, silent);
- if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) {
+ if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
status = -EINVAL;
goto read_super_error;
}
/* for now we only have one cluster/node, make sure we see it
* in the heartbeat universe */
- if (parsed_opt & OCFS2_MOUNT_HB_LOCAL) {
+ if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) {
if (!o2hb_check_local_node_heartbeating()) {
status = -EINVAL;
goto read_super_error;
@@ -585,8 +592,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
}
brelse(bh);
bh = NULL;
- osb->s_mount_opt = parsed_opt;
- osb->preferred_slot = slot;
+ osb->s_mount_opt = parsed_options.mount_opt;
+ osb->s_atime_quantum = parsed_options.atime_quantum;
+ osb->preferred_slot = parsed_options.slot;
sb->s_magic = OCFS2_SUPER_MAGIC;
@@ -728,8 +736,7 @@ static struct file_system_type ocfs2_fs_type = {
static int ocfs2_parse_options(struct super_block *sb,
char *options,
- unsigned long *mount_opt,
- s16 *slot,
+ struct mount_options *mopt,
int is_remount)
{
int status;
@@ -738,8 +745,9 @@ static int ocfs2_parse_options(struct super_block *sb,
mlog_entry("remount: %d, options: \"%s\"\n", is_remount,
options ? options : "(none)");
- *mount_opt = 0;
- *slot = OCFS2_INVALID_SLOT;
+ mopt->mount_opt = 0;
+ mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
+ mopt->slot = OCFS2_INVALID_SLOT;
if (!options) {
status = 1;
@@ -749,7 +757,6 @@ static int ocfs2_parse_options(struct super_block *sb,
while ((p = strsep(&options, ",")) != NULL) {
int token, option;
substring_t args[MAX_OPT_ARGS];
- struct ocfs2_super * osb = OCFS2_SB(sb);
if (!*p)
continue;
@@ -757,10 +764,10 @@ static int ocfs2_parse_options(struct super_block *sb,
token = match_token(p, tokens, args);
switch (token) {
case Opt_hb_local:
- *mount_opt |= OCFS2_MOUNT_HB_LOCAL;
+ mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL;
break;
case Opt_hb_none:
- *mount_opt &= ~OCFS2_MOUNT_HB_LOCAL;
+ mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL;
break;
case Opt_barrier:
if (match_int(&args[0], &option)) {
@@ -768,27 +775,27 @@ static int ocfs2_parse_options(struct super_block *sb,
goto bail;
}
if (option)
- *mount_opt |= OCFS2_MOUNT_BARRIER;
+ mopt->mount_opt |= OCFS2_MOUNT_BARRIER;
else
- *mount_opt &= ~OCFS2_MOUNT_BARRIER;
+ mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER;
break;
case Opt_intr:
- *mount_opt &= ~OCFS2_MOUNT_NOINTR;
+ mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR;
break;
case Opt_nointr:
- *mount_opt |= OCFS2_MOUNT_NOINTR;
+ mopt->mount_opt |= OCFS2_MOUNT_NOINTR;
break;
case Opt_err_panic:
- *mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
+ mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
break;
case Opt_err_ro:
- *mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
+ mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
break;
case Opt_data_ordered:
- *mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
+ mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
break;
case Opt_data_writeback:
- *mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
+ mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
break;
case Opt_atime_quantum:
if (match_int(&args[0], &option)) {
@@ -796,9 +803,7 @@ static int ocfs2_parse_options(struct super_block *sb,
goto bail;
}
if (option >= 0)
- osb->s_atime_quantum = option;
- else
- osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
+ mopt->atime_quantum = option;
break;
case Opt_slot:
option = 0;
@@ -807,7 +812,7 @@ static int ocfs2_parse_options(struct super_block *sb,
goto bail;
}
if (option)
- *slot = (s16)option;
+ mopt->slot = (s16)option;
break;
default:
mlog(ML_ERROR,
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 66a13ee63d4..c05358538f2 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -66,7 +66,7 @@ struct ocfs2_vote_msg
{
struct ocfs2_msg_hdr v_hdr;
__be32 v_reserved1;
-};
+} __attribute__ ((packed));
/* Responses are given these values to maintain backwards
* compatibility with older ocfs2 versions */
@@ -78,7 +78,7 @@ struct ocfs2_response_msg
{
struct ocfs2_msg_hdr r_hdr;
__be32 r_response;
-};
+} __attribute__ ((packed));
struct ocfs2_vote_work {
struct list_head w_list;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a5b0dfd89a1..0e4d37c93ee 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/completion.h>
+#include <linux/poll.h>
#include <linux/file.h>
#include <linux/limits.h>
#include <linux/init.h>
@@ -232,7 +233,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t
static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)
{
struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
- unsigned int rv = 0;
+ unsigned int rv = DEFAULT_POLLMASK;
unsigned int (*poll)(struct file *, struct poll_table_struct *);
spin_lock(&pde->pde_unload_lock);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 5b68dd3f191..a005451930b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1915,8 +1915,11 @@ static int reiserfs_release_dquot(struct dquot *dquot)
ret =
journal_begin(&th, dquot->dq_sb,
REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
- if (ret)
+ if (ret) {
+ /* Release dquot anyway to avoid endless cycle in dqput() */
+ dquot_release(dquot);
goto out;
+ }
ret = dquot_release(dquot);
err =
journal_end(&th, dquot->dq_sb,
@@ -2067,6 +2070,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
size_t towrite = len;
struct buffer_head tmp_bh, *bh;
+ if (!current->journal_info) {
+ printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)"
+ " cancelled because transaction is not started.\n",
+ (unsigned long long)off, (unsigned long long)len);
+ return -EIO;
+ }
mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
@@ -2098,7 +2107,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
data += tocopy;
blk++;
}
- out:
+out:
if (len == towrite)
return err;
if (inode->i_size < off + len - towrite)
diff --git a/fs/select.c b/fs/select.c
index a974082b082..46dca31c607 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,8 +26,6 @@
#include <asm/uaccess.h>
-#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
-
struct poll_table_page {
struct poll_table_page * next;
struct poll_table_entry * entry;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index a8e293d3003..aefb0be0794 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -11,8 +11,10 @@
* Now using anonymous inode source.
* Thanks to Oleg Nesterov for useful code review and suggestions.
* More comments and suggestions from Arnd Bergmann.
- * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
+ * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
* Retrieve multiple signals with one read() call
+ * Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org>
+ * Attach to the sighand only during read() and poll().
*/
#include <linux/file.h>
@@ -27,102 +29,12 @@
#include <linux/signalfd.h>
struct signalfd_ctx {
- struct list_head lnk;
- wait_queue_head_t wqh;
sigset_t sigmask;
- struct task_struct *tsk;
};
-struct signalfd_lockctx {
- struct task_struct *tsk;
- unsigned long flags;
-};
-
-/*
- * Tries to acquire the sighand lock. We do not increment the sighand
- * use count, and we do not even pin the task struct, so we need to
- * do it inside an RCU read lock, and we must be prepared for the
- * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand
- * being detached. We return 0 if the sighand has been detached, or
- * 1 if we were able to pin the sighand lock.
- */
-static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk)
-{
- struct sighand_struct *sighand = NULL;
-
- rcu_read_lock();
- lk->tsk = rcu_dereference(ctx->tsk);
- if (likely(lk->tsk != NULL))
- sighand = lock_task_sighand(lk->tsk, &lk->flags);
- rcu_read_unlock();
-
- if (!sighand)
- return 0;
-
- if (!ctx->tsk) {
- unlock_task_sighand(lk->tsk, &lk->flags);
- return 0;
- }
-
- if (lk->tsk->tgid == current->tgid)
- lk->tsk = current;
-
- return 1;
-}
-
-static void signalfd_unlock(struct signalfd_lockctx *lk)
-{
- unlock_task_sighand(lk->tsk, &lk->flags);
-}
-
-/*
- * This must be called with the sighand lock held.
- */
-void signalfd_deliver(struct task_struct *tsk, int sig)
-{
- struct sighand_struct *sighand = tsk->sighand;
- struct signalfd_ctx *ctx, *tmp;
-
- BUG_ON(!sig);
- list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) {
- /*
- * We use a negative signal value as a way to broadcast that the
- * sighand has been orphaned, so that we can notify all the
- * listeners about this. Remember the ctx->sigmask is inverted,
- * so if the user is interested in a signal, that corresponding
- * bit will be zero.
- */
- if (sig < 0) {
- if (ctx->tsk == tsk) {
- ctx->tsk = NULL;
- list_del_init(&ctx->lnk);
- wake_up(&ctx->wqh);
- }
- } else {
- if (!sigismember(&ctx->sigmask, sig))
- wake_up(&ctx->wqh);
- }
- }
-}
-
-static void signalfd_cleanup(struct signalfd_ctx *ctx)
-{
- struct signalfd_lockctx lk;
-
- /*
- * This is tricky. If the sighand is gone, we do not need to remove
- * context from the list, the list itself won't be there anymore.
- */
- if (signalfd_lock(ctx, &lk)) {
- list_del(&ctx->lnk);
- signalfd_unlock(&lk);
- }
- kfree(ctx);
-}
-
static int signalfd_release(struct inode *inode, struct file *file)
{
- signalfd_cleanup(file->private_data);
+ kfree(file->private_data);
return 0;
}
@@ -130,23 +42,15 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait)
{
struct signalfd_ctx *ctx = file->private_data;
unsigned int events = 0;
- struct signalfd_lockctx lk;
- poll_wait(file, &ctx->wqh, wait);
+ poll_wait(file, &current->sighand->signalfd_wqh, wait);
- /*
- * Let the caller get a POLLIN in this case, ala socket recv() when
- * the peer disconnects.
- */
- if (signalfd_lock(ctx, &lk)) {
- if ((lk.tsk == current &&
- next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
- next_signal(&lk.tsk->signal->shared_pending,
- &ctx->sigmask) > 0)
- events |= POLLIN;
- signalfd_unlock(&lk);
- } else
+ spin_lock_irq(&current->sighand->siglock);
+ if (next_signal(&current->pending, &ctx->sigmask) ||
+ next_signal(&current->signal->shared_pending,
+ &ctx->sigmask))
events |= POLLIN;
+ spin_unlock_irq(&current->sighand->siglock);
return events;
}
@@ -219,59 +123,46 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
int nonblock)
{
ssize_t ret;
- struct signalfd_lockctx lk;
DECLARE_WAITQUEUE(wait, current);
- if (!signalfd_lock(ctx, &lk))
- return 0;
-
- ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
+ spin_lock_irq(&current->sighand->siglock);
+ ret = dequeue_signal(current, &ctx->sigmask, info);
switch (ret) {
case 0:
if (!nonblock)
break;
ret = -EAGAIN;
default:
- signalfd_unlock(&lk);
+ spin_unlock_irq(&current->sighand->siglock);
return ret;
}
- add_wait_queue(&ctx->wqh, &wait);
+ add_wait_queue(&current->sighand->signalfd_wqh, &wait);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
- signalfd_unlock(&lk);
+ ret = dequeue_signal(current, &ctx->sigmask, info);
if (ret != 0)
break;
if (signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
+ spin_unlock_irq(&current->sighand->siglock);
schedule();
- ret = signalfd_lock(ctx, &lk);
- if (unlikely(!ret)) {
- /*
- * Let the caller read zero byte, ala socket
- * recv() when the peer disconnect. This test
- * must be done before doing a dequeue_signal(),
- * because if the sighand has been orphaned,
- * the dequeue_signal() call is going to crash
- * because ->sighand will be long gone.
- */
- break;
- }
+ spin_lock_irq(&current->sighand->siglock);
}
+ spin_unlock_irq(&current->sighand->siglock);
- remove_wait_queue(&ctx->wqh, &wait);
+ remove_wait_queue(&current->sighand->signalfd_wqh, &wait);
__set_current_state(TASK_RUNNING);
return ret;
}
/*
- * Returns either the size of a "struct signalfd_siginfo", or zero if the
- * sighand we are attached to, has been orphaned. The "count" parameter
- * must be at least the size of a "struct signalfd_siginfo".
+ * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative
+ * error code. The "count" parameter must be at least the size of a
+ * "struct signalfd_siginfo".
*/
static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
@@ -287,7 +178,6 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
return -EINVAL;
siginfo = (struct signalfd_siginfo __user *) buf;
-
do {
ret = signalfd_dequeue(ctx, &info, nonblock);
if (unlikely(ret <= 0))
@@ -300,7 +190,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
nonblock = 1;
} while (--count);
- return total ? total : ret;
+ return total ? total: ret;
}
static const struct file_operations signalfd_fops = {
@@ -309,20 +199,13 @@ static const struct file_operations signalfd_fops = {
.read = signalfd_read,
};
-/*
- * Create a file descriptor that is associated with our signal
- * state. We can pass it around to others if we want to, but
- * it will always be _our_ signal state.
- */
asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
{
int error;
sigset_t sigmask;
struct signalfd_ctx *ctx;
- struct sighand_struct *sighand;
struct file *file;
struct inode *inode;
- struct signalfd_lockctx lk;
if (sizemask != sizeof(sigset_t) ||
copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -335,17 +218,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
if (!ctx)
return -ENOMEM;
- init_waitqueue_head(&ctx->wqh);
ctx->sigmask = sigmask;
- ctx->tsk = current->group_leader;
-
- sighand = current->sighand;
- /*
- * Add this fd to the list of signal listeners.
- */
- spin_lock_irq(&sighand->siglock);
- list_add_tail(&ctx->lnk, &sighand->signalfd_list);
- spin_unlock_irq(&sighand->siglock);
/*
* When we call this, the initialization must be complete, since
@@ -364,23 +237,18 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
fput(file);
return -EINVAL;
}
- /*
- * We need to be prepared of the fact that the sighand this fd
- * is attached to, has been detched. In that case signalfd_lock()
- * will return 0, and we'll just skip setting the new mask.
- */
- if (signalfd_lock(ctx, &lk)) {
- ctx->sigmask = sigmask;
- signalfd_unlock(&lk);
- }
- wake_up(&ctx->wqh);
+ spin_lock_irq(&current->sighand->siglock);
+ ctx->sigmask = sigmask;
+ spin_unlock_irq(&current->sighand->siglock);
+
+ wake_up(&current->sighand->signalfd_wqh);
fput(file);
}
return ufd;
err_fdalloc:
- signalfd_cleanup(ctx);
+ kfree(ctx);
return error;
}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index b4acc7f3c37..e6ea293f303 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
static inline int
kmem_shake_allow(gfp_t gfp_mask)
{
- return (gfp_mask & __GFP_WAIT);
+ return (gfp_mask & __GFP_WAIT) != 0;
}
#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index fd4105d662e..5f152f60d74 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
ip->i_d.di_size = isize;
ip->i_update_core = 1;
ip->i_update_size = 1;
+ mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode));
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -652,7 +653,7 @@ xfs_probe_cluster(
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
- size_t pg_offset, len = 0;
+ size_t pg_offset, pg_len = 0;
if (tindex == tlast) {
pg_offset =
@@ -665,16 +666,16 @@ xfs_probe_cluster(
pg_offset = PAGE_CACHE_SIZE;
if (page->index == tindex && !TestSetPageLocked(page)) {
- len = xfs_probe_page(page, pg_offset, mapped);
+ pg_len = xfs_probe_page(page, pg_offset, mapped);
unlock_page(page);
}
- if (!len) {
+ if (!pg_len) {
done = 1;
break;
}
- total += len;
+ total += pg_len;
tindex++;
}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index bb72c3d4141..81565dea9af 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -46,7 +46,7 @@ xfs_param_t xfs_params = {
.inherit_nosym = { 0, 0, 1 },
.rotorstep = { 1, 1, 255 },
.inherit_nodfrg = { 0, 1, 1 },
- .fstrm_timer = { 1, 50, 3600*100},
+ .fstrm_timer = { 1, 30*100, 3600*100},
};
/*
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 4528f9a3f30..491d1f4f202 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -415,8 +415,10 @@ xfs_fs_write_inode(
if (vp) {
vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
- if (sync)
+ if (sync) {
+ filemap_fdatawait(inode->i_mapping);
flags |= FLUSH_SYNC;
+ }
error = bhv_vop_iflush(vp, flags);
if (error == EAGAIN)
error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 2d274b23ade..6ff0f4de163 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -120,7 +120,8 @@ xfs_Gqm_init(void)
* Initialize the dquot hash tables.
*/
udqhash = kmem_zalloc_greedy(&hsize,
- XFS_QM_HASHSIZE_LOW, XFS_QM_HASHSIZE_HIGH,
+ XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
+ XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t),
KM_SLEEP | KM_MAYFAIL | KM_LARGE);
gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE);
hsize /= sizeof(xfs_dqhash_t);
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index a27a7c8c052..855da040864 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -34,10 +34,10 @@ extern void cmn_err(int, char *, ...)
extern void assfail(char *expr, char *f, int l);
#define ASSERT_ALWAYS(expr) \
- (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+ (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
#ifndef DEBUG
-# define ASSERT(expr) ((void)0)
+#define ASSERT(expr) ((void)0)
#ifndef STATIC
# define STATIC static noinline
@@ -49,8 +49,10 @@ extern void assfail(char *expr, char *f, int l);
#else /* DEBUG */
-# define ASSERT(expr) ASSERT_ALWAYS(expr)
-# include <linux/random.h>
+#include <linux/random.h>
+
+#define ASSERT(expr) \
+ (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
#ifndef STATIC
# define STATIC noinline
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index d7e13614306..fa25b7dcc6c 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t {
#define XFS_BLI_UDQUOT_BUF 0x4
#define XFS_BLI_PDQUOT_BUF 0x8
#define XFS_BLI_GDQUOT_BUF 0x10
+/*
+ * This flag indicates that the buffer contains newly allocated
+ * inodes.
+ */
+#define XFS_BLI_INODE_NEW_BUF 0x20
#define XFS_BLI_CHUNK 128
#define XFS_BLI_SHIFT 7
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index aea37df4aa6..26d09e2e1a7 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1975,7 +1975,6 @@ xfs_da_do_buf(
error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
if (unlikely(error == EFSCORRUPTED)) {
if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
- int i;
cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n",
(long long)bno);
cmn_err(CE_ALERT, "dir: inode %lld\n",
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index ce2278611bb..36d8f6aa11a 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -350,9 +350,10 @@ _xfs_filestream_update_ag(
/* xfs_fstrm_free_func(): callback for freeing cached stream items. */
void
xfs_fstrm_free_func(
- xfs_ino_t ino,
- fstrm_item_t *item)
+ unsigned long ino,
+ void *data)
{
+ fstrm_item_t *item = (fstrm_item_t *)data;
xfs_inode_t *ip = item->ip;
int ref;
@@ -438,7 +439,7 @@ xfs_filestream_mount(
grp_count = 10;
err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count,
- (xfs_mru_cache_free_func_t)xfs_fstrm_free_func);
+ xfs_fstrm_free_func);
return err;
}
@@ -467,8 +468,7 @@ void
xfs_filestream_flush(
xfs_mount_t *mp)
{
- /* point in time flush, so keep the reaper running */
- xfs_mru_cache_flush(mp->m_filestream, 1);
+ xfs_mru_cache_flush(mp->m_filestream);
}
/*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9d4c4fbeb3e..9bfb69e1e88 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2185,13 +2185,13 @@ xlog_state_do_callback(
}
cb = iclog->ic_callback;
- while (cb != 0) {
+ while (cb) {
iclog->ic_callback_tail = &(iclog->ic_callback);
iclog->ic_callback = NULL;
LOG_UNLOCK(log, s);
/* perform callbacks in the order given */
- for (; cb != 0; cb = cb_next) {
+ for (; cb; cb = cb_next) {
cb_next = cb->cb_next;
cb->cb_func(cb->cb_arg, aborted);
}
@@ -2202,7 +2202,7 @@ xlog_state_do_callback(
loopdidcallbacks++;
funcdidcallbacks++;
- ASSERT(iclog->ic_callback == 0);
+ ASSERT(iclog->ic_callback == NULL);
if (!(iclog->ic_state & XLOG_STATE_IOERROR))
iclog->ic_state = XLOG_STATE_DIRTY;
@@ -3242,10 +3242,10 @@ xlog_ticket_put(xlog_t *log,
#else
/* When we debug, it is easier if tickets are cycled */
ticket->t_next = NULL;
- if (log->l_tail != 0) {
+ if (log->l_tail) {
log->l_tail->t_next = ticket;
} else {
- ASSERT(log->l_freelist == 0);
+ ASSERT(log->l_freelist == NULL);
log->l_freelist = ticket;
}
log->l_tail = ticket;
@@ -3463,7 +3463,7 @@ xlog_verify_iclog(xlog_t *log,
s = LOG_LOCK(log);
icptr = log->l_iclog;
for (i=0; i < log->l_iclog_bufs; i++) {
- if (icptr == 0)
+ if (icptr == NULL)
xlog_panic("xlog_verify_iclog: invalid ptr");
icptr = icptr->ic_next;
}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index fddbb091a86..7174991f4be 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1366,7 +1366,7 @@ xlog_recover_add_to_cont_trans(
int old_len;
item = trans->r_itemq;
- if (item == 0) {
+ if (item == NULL) {
/* finish copying rest of trans header */
xlog_recover_add_item(&trans->r_itemq);
ptr = (xfs_caddr_t) &trans->r_theader +
@@ -1412,7 +1412,7 @@ xlog_recover_add_to_trans(
if (!len)
return 0;
item = trans->r_itemq;
- if (item == 0) {
+ if (item == NULL) {
ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC);
if (len == sizeof(xfs_trans_header_t))
xlog_recover_add_item(&trans->r_itemq);
@@ -1467,12 +1467,12 @@ xlog_recover_unlink_tid(
xlog_recover_t *tp;
int found = 0;
- ASSERT(trans != 0);
+ ASSERT(trans != NULL);
if (trans == *q) {
*q = (*q)->r_next;
} else {
tp = *q;
- while (tp != 0) {
+ while (tp) {
if (tp->r_next == trans) {
found = 1;
break;
@@ -1495,7 +1495,7 @@ xlog_recover_insert_item_backq(
xlog_recover_item_t **q,
xlog_recover_item_t *item)
{
- if (*q == 0) {
+ if (*q == NULL) {
item->ri_prev = item->ri_next = item;
*q = item;
} else {
@@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer(
/*ARGSUSED*/
STATIC void
xlog_recover_do_reg_buffer(
+ xfs_mount_t *mp,
xlog_recover_item_t *item,
xfs_buf_t *bp,
xfs_buf_log_format_t *buf_f)
@@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer(
unsigned int *data_map = NULL;
unsigned int map_size = 0;
int error;
+ int stale_buf = 1;
+
+ /*
+ * Scan through the on-disk inode buffer and attempt to
+ * determine if it has been written to since it was logged.
+ *
+ * - If any of the magic numbers are incorrect then the buffer is stale
+ * - If any of the modes are non-zero then the buffer is not stale
+ * - If all of the modes are zero and at least one of the generation
+ * counts is non-zero then the buffer is stale
+ *
+ * If the end result is a stale buffer then the log buffer is replayed
+ * otherwise it is skipped.
+ *
+ * This heuristic is not perfect. It can be improved by scanning the
+ * entire inode chunk for evidence that any of the inode clusters have
+ * been updated. To fix this problem completely we will need a major
+ * architectural change to the logging system.
+ */
+ if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
+ xfs_dinode_t *dip;
+ int inodes_per_buf;
+ int mode_count = 0;
+ int gen_count = 0;
+
+ stale_buf = 0;
+ inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
+ for (i = 0; i < inodes_per_buf; i++) {
+ dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+ i * mp->m_sb.sb_inodesize);
+ if (be16_to_cpu(dip->di_core.di_magic) !=
+ XFS_DINODE_MAGIC) {
+ stale_buf = 1;
+ break;
+ }
+ if (dip->di_core.di_mode)
+ mode_count++;
+ if (dip->di_core.di_gen)
+ gen_count++;
+ }
+
+ if (!mode_count && gen_count)
+ stale_buf = 1;
+ }
switch (buf_f->blf_type) {
case XFS_LI_BUF:
@@ -1899,7 +1944,7 @@ xlog_recover_do_reg_buffer(
break;
nbits = xfs_contig_bits(data_map, map_size, bit);
ASSERT(nbits > 0);
- ASSERT(item->ri_buf[i].i_addr != 0);
+ ASSERT(item->ri_buf[i].i_addr != NULL);
ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0);
ASSERT(XFS_BUF_COUNT(bp) >=
((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT));
@@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer(
-1, 0, XFS_QMOPT_DOWARN,
"dquot_buf_recover");
}
- if (!error)
+ if (!error && stale_buf)
memcpy(xfs_buf_offset(bp,
(uint)bit << XFS_BLI_SHIFT), /* dest */
item->ri_buf[i].i_addr, /* source */
@@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer(
if (log->l_quotaoffs_flag & type)
return;
- xlog_recover_do_reg_buffer(item, bp, buf_f);
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
}
/*
@@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans(
(XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
} else {
- xlog_recover_do_reg_buffer(item, bp, buf_f);
+ xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
}
if (error)
return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 7deb9e3cbbd..e0b358c1c53 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert(
*/
if (!_xfs_mru_cache_migrate(mru, now)) {
mru->time_zero = now;
- if (!mru->next_reap)
- mru->next_reap = mru->grp_count * mru->grp_time;
+ if (!mru->queued) {
+ mru->queued = 1;
+ queue_delayed_work(xfs_mru_reap_wq, &mru->work,
+ mru->grp_count * mru->grp_time);
+ }
} else {
grp = (now - mru->time_zero) / mru->grp_time;
grp = (mru->lru_grp + grp) % mru->grp_count;
@@ -271,29 +274,26 @@ _xfs_mru_cache_reap(
struct work_struct *work)
{
xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);
- unsigned long now;
+ unsigned long now, next;
ASSERT(mru && mru->lists);
if (!mru || !mru->lists)
return;
mutex_spinlock(&mru->lock);
- now = jiffies;
- if (mru->reap_all ||
- (mru->next_reap && time_after(now, mru->next_reap))) {
- if (mru->reap_all)
- now += mru->grp_count * mru->grp_time * 2;
- mru->next_reap = _xfs_mru_cache_migrate(mru, now);
- _xfs_mru_cache_clear_reap_list(mru);
+ next = _xfs_mru_cache_migrate(mru, jiffies);
+ _xfs_mru_cache_clear_reap_list(mru);
+
+ mru->queued = next;
+ if ((mru->queued > 0)) {
+ now = jiffies;
+ if (next <= now)
+ next = 0;
+ else
+ next -= now;
+ queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
}
- /*
- * the process that triggered the reap_all is responsible
- * for restating the periodic reap if it is required.
- */
- if (!mru->reap_all)
- queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
- mru->reap_all = 0;
mutex_spinunlock(&mru->lock, 0);
}
@@ -352,7 +352,7 @@ xfs_mru_cache_create(
/* An extra list is needed to avoid reaping up to a grp_time early. */
mru->grp_count = grp_count + 1;
- mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
+ mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
if (!mru->lists) {
err = ENOMEM;
@@ -374,11 +374,6 @@ xfs_mru_cache_create(
mru->grp_time = grp_time;
mru->free_func = free_func;
- /* start up the reaper event */
- mru->next_reap = 0;
- mru->reap_all = 0;
- queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
-
*mrup = mru;
exit:
@@ -394,35 +389,25 @@ exit:
* Call xfs_mru_cache_flush() to flush out all cached entries, calling their
* free functions as they're deleted. When this function returns, the caller is
* guaranteed that all the free functions for all the elements have finished
- * executing.
- *
- * While we are flushing, we stop the periodic reaper event from triggering.
- * Normally, we want to restart this periodic event, but if we are shutting
- * down the cache we do not want it restarted. hence the restart parameter
- * where 0 = do not restart reaper and 1 = restart reaper.
+ * executing and the reaper is not running.
*/
void
xfs_mru_cache_flush(
- xfs_mru_cache_t *mru,
- int restart)
+ xfs_mru_cache_t *mru)
{
if (!mru || !mru->lists)
return;
- cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
-
mutex_spinlock(&mru->lock);
- mru->reap_all = 1;
- mutex_spinunlock(&mru->lock, 0);
+ if (mru->queued) {
+ mutex_spinunlock(&mru->lock, 0);
+ cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+ mutex_spinlock(&mru->lock);
+ }
- queue_work(xfs_mru_reap_wq, &mru->work.work);
- flush_workqueue(xfs_mru_reap_wq);
+ _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
+ _xfs_mru_cache_clear_reap_list(mru);
- mutex_spinlock(&mru->lock);
- WARN_ON_ONCE(mru->reap_all != 0);
- mru->reap_all = 0;
- if (restart)
- queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
mutex_spinunlock(&mru->lock, 0);
}
@@ -433,8 +418,7 @@ xfs_mru_cache_destroy(
if (!mru || !mru->lists)
return;
- /* we don't want the reaper to restart here */
- xfs_mru_cache_flush(mru, 0);
+ xfs_mru_cache_flush(mru);
kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
kmem_free(mru, sizeof(*mru));
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index 624fd10ee8e..dd58ea1bbeb 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache
unsigned int grp_time; /* Time period spanned by grps. */
unsigned int lru_grp; /* Group containing time zero. */
unsigned long time_zero; /* Time first element was added. */
- unsigned long next_reap; /* Time that the reaper should
- next do something. */
- unsigned int reap_all; /* if set, reap all lists */
xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
struct delayed_work work; /* Workqueue data for reaping. */
+ unsigned int queued; /* work has been queued */
} xfs_mru_cache_t;
int xfs_mru_cache_init(void);
@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void);
int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
unsigned int grp_count,
xfs_mru_cache_free_func_t free_func);
-void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
+void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
void *value);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b898022..95fff6872a2 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf(
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
+ bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF;
}
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 1a5ad8cd97b..60345922990 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1082,6 +1082,9 @@ xfs_fsync(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return XFS_ERROR(EIO);
+ if (flag & FSYNC_DATA)
+ filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
+
/*
* We always need to make sure that the required inode state
* is safe on disk. The vnode might be clean but because
@@ -3769,12 +3772,16 @@ xfs_inode_flush(
sync_lsn = log->l_last_sync_lsn;
GRANT_UNLOCK(log, s);
- if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0))
- return 0;
+ if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
+ if (flags & FLUSH_SYNC)
+ log_flags |= XFS_LOG_SYNC;
+ error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
+ if (error)
+ return error;
+ }
- if (flags & FLUSH_SYNC)
- log_flags |= XFS_LOG_SYNC;
- return xfs_log_force(mp, iip->ili_last_lsn, log_flags);
+ if (ip->i_update_core == 0)
+ return 0;
}
}
@@ -3788,9 +3795,6 @@ xfs_inode_flush(
if (flags & FLUSH_INODE) {
int flush_flags;
- if (xfs_ipincount(ip))
- return EAGAIN;
-
if (flags & FLUSH_SYNC) {
xfs_ilock(ip, XFS_ILOCK_SHARED);
xfs_iflock(ip);