summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/configfs/dir.c32
-rw-r--r--fs/ext2/balloc.c1
-rw-r--r--fs/ext2/ialloc.c1
-rw-r--r--fs/ext2/super.c41
-rw-r--r--fs/ext3/inode.c13
-rw-r--r--fs/ext3/super.c42
-rw-r--r--fs/jffs2/jffs2_fs_i.h4
-rw-r--r--fs/jffs2/nodelist.c6
-rw-r--r--fs/jffs2/nodelist.h1
-rw-r--r--fs/jffs2/summary.c5
-rw-r--r--fs/jffs2/xattr.c1
-rw-r--r--fs/nfs/direct.c50
-rw-r--r--fs/nfs/nfs4proc.c6
-rw-r--r--fs/nfs/read.c30
-rw-r--r--fs/nfs/write.c41
-rw-r--r--fs/nfsd/nfs4recover.c21
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/alloc.c28
-rw-r--r--fs/ocfs2/aops.c83
-rw-r--r--fs/ocfs2/buffer_head_io.c95
-rw-r--r--fs/ocfs2/buffer_head_io.h2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c8
-rw-r--r--fs/ocfs2/dir.c28
-rw-r--r--fs/ocfs2/dlm/dlmast.c10
-rw-r--r--fs/ocfs2/dlmglue.c9
-rw-r--r--fs/ocfs2/dlmglue.h5
-rw-r--r--fs/ocfs2/file.c3
-rw-r--r--fs/ocfs2/inode.c32
-rw-r--r--fs/ocfs2/inode.h3
-rw-r--r--fs/ocfs2/ioctl.c136
-rw-r--r--fs/ocfs2/ioctl.h16
-rw-r--r--fs/ocfs2/namei.c32
-rw-r--r--fs/ocfs2/ocfs2_fs.h24
-rw-r--r--fs/ocfs2/uptodate.c21
-rw-r--r--fs/ocfs2/uptodate.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c27
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_alloc.h20
-rw-r--r--fs/xfs/xfs_fsops.c16
-rw-r--r--fs/xfs/xfs_mount.c32
-rw-r--r--fs/xfs/xfs_vfsops.c3
43 files changed, 677 insertions, 278 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 3f00a9faabc..53058162831 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -325,8 +325,8 @@ config FS_POSIX_ACL
source "fs/xfs/Kconfig"
config OCFS2_FS
- tristate "OCFS2 file system support (EXPERIMENTAL)"
- depends on NET && SYSFS && EXPERIMENTAL
+ tristate "OCFS2 file system support"
+ depends on NET && SYSFS
select CONFIGFS_FS
select JBD
select CRC32
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index df025453dd9..816e8ef6456 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -86,6 +86,32 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
return sd;
}
+/*
+ *
+ * Return -EEXIST if there is already a configfs element with the same
+ * name for the same parent.
+ *
+ * called with parent inode's i_mutex held
+ */
+int configfs_dirent_exists(struct configfs_dirent *parent_sd,
+ const unsigned char *new)
+{
+ struct configfs_dirent * sd;
+
+ list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+ if (sd->s_element) {
+ const unsigned char *existing = configfs_get_name(sd);
+ if (strcmp(existing, new))
+ continue;
+ else
+ return -EEXIST;
+ }
+ }
+
+ return 0;
+}
+
+
int configfs_make_dirent(struct configfs_dirent * parent_sd,
struct dentry * dentry, void * element,
umode_t mode, int type)
@@ -136,8 +162,10 @@ static int create_dir(struct config_item * k, struct dentry * p,
int error;
umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
- error = configfs_make_dirent(p->d_fsdata, d, k, mode,
- CONFIGFS_DIR);
+ error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
+ if (!error)
+ error = configfs_make_dirent(p->d_fsdata, d, k, mode,
+ CONFIGFS_DIR);
if (!error) {
error = configfs_create(d, mode, init_dir);
if (!error) {
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index d4870432ecf..b1981d0e95a 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -539,7 +539,6 @@ unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
#endif /* EXT2FS_DEBUG */
-/* Superblock must be locked */
unsigned long ext2_count_free_blocks (struct super_block * sb)
{
struct ext2_group_desc * desc;
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index de85c61c58c..695f69ccf90 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -637,7 +637,6 @@ fail:
return ERR_PTR(err);
}
-/* Superblock must be locked */
unsigned long ext2_count_free_inodes (struct super_block * sb)
{
struct ext2_group_desc *desc;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 681dea8f953..4286ff6330b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -251,6 +251,44 @@ static struct super_operations ext2_sops = {
#endif
};
+static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp)
+{
+ __u32 *objp = vobjp;
+ unsigned long ino = objp[0];
+ __u32 generation = objp[1];
+ struct inode *inode;
+ struct dentry *result;
+
+ if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO)
+ return ERR_PTR(-ESTALE);
+ if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
+ return ERR_PTR(-ESTALE);
+
+ /* iget isn't really right if the inode is currently unallocated!!
+ * ext2_read_inode currently does appropriate checks, but
+ * it might be "neater" to call ext2_get_inode first and check
+ * if the inode is valid.....
+ */
+ inode = iget(sb, ino);
+ if (inode == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (is_bad_inode(inode) ||
+ (generation && inode->i_generation != generation)) {
+ /* we didn't find the right inode.. */
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+ /* now to find a dentry.
+ * If possible, get a well-connected one
+ */
+ result = d_alloc_anon(inode);
+ if (!result) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ return result;
+}
+
/* Yes, most of these are left as NULL!!
* A NULL value implies the default, which works with ext2-like file
* systems, but can be improved upon.
@@ -258,6 +296,7 @@ static struct super_operations ext2_sops = {
*/
static struct export_operations ext2_export_ops = {
.get_parent = ext2_get_parent,
+ .get_dentry = ext2_get_dentry,
};
static unsigned long get_sb_block(void **data)
@@ -1044,7 +1083,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
unsigned long overhead;
int i;
- lock_super(sb);
if (test_opt (sb, MINIX_DF))
overhead = 0;
else {
@@ -1085,7 +1123,6 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count);
buf->f_ffree = ext2_count_free_inodes (sb);
buf->f_namelen = EXT2_NAME_LEN;
- unlock_super(sb);
return 0;
}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index c5ee9f0691e..84be02e9365 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -925,7 +925,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
set_buffer_new(bh_result);
got_it:
map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
- if (blocks_to_boundary == 0)
+ if (count > blocks_to_boundary)
set_buffer_boundary(bh_result);
err = count;
/* Clean up and exit */
@@ -1009,11 +1009,14 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
buffer_trace_init(&dummy.b_history);
err = ext3_get_blocks_handle(handle, inode, block, 1,
&dummy, create, 1);
- if (err == 1) {
+ /*
+ * ext3_get_blocks_handle() returns number of blocks
+ * mapped. 0 in case of a HOLE.
+ */
+ if (err > 0) {
+ if (err > 1)
+ WARN_ON(1);
err = 0;
- } else if (err >= 0) {
- WARN_ON(1);
- err = -EIO;
}
*errp = err;
if (!err && buffer_mapped(&dummy)) {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 813d589cc6c..3559086eee5 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -554,6 +554,47 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
return 0;
}
+
+static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
+{
+ __u32 *objp = vobjp;
+ unsigned long ino = objp[0];
+ __u32 generation = objp[1];
+ struct inode *inode;
+ struct dentry *result;
+
+ if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
+ return ERR_PTR(-ESTALE);
+ if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
+ return ERR_PTR(-ESTALE);
+
+ /* iget isn't really right if the inode is currently unallocated!!
+ *
+ * ext3_read_inode will return a bad_inode if the inode had been
+ * deleted, so we should be safe.
+ *
+ * Currently we don't know the generation for parent directory, so
+ * a generation of 0 means "accept any"
+ */
+ inode = iget(sb, ino);
+ if (inode == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (is_bad_inode(inode) ||
+ (generation && inode->i_generation != generation)) {
+ iput(inode);
+ return ERR_PTR(-ESTALE);
+ }
+ /* now to find a dentry.
+ * If possible, get a well-connected one
+ */
+ result = d_alloc_anon(inode);
+ if (!result) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ return result;
+}
+
#ifdef CONFIG_QUOTA
#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
@@ -622,6 +663,7 @@ static struct super_operations ext3_sops = {
static struct export_operations ext3_export_ops = {
.get_parent = ext3_get_parent,
+ .get_dentry = ext3_get_dentry,
};
enum {
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 2e0cc8e00b8..3a566077ac9 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -41,11 +41,7 @@ struct jffs2_inode_info {
uint16_t flags;
uint8_t usercompr;
-#if !defined (__ECOS)
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
struct inode vfs_inode;
-#endif
-#endif
#ifdef CONFIG_JFFS2_FS_POSIX_ACL
struct posix_acl *i_acl_access;
struct posix_acl *i_acl_default;
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 7675b33396c..5a6b4d64206 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -21,6 +21,9 @@
#include <linux/pagemap.h>
#include "nodelist.h"
+static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
+ struct jffs2_node_frag *this);
+
void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list)
{
struct jffs2_full_dirent **prev = list;
@@ -87,7 +90,8 @@ void jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list, uint
}
}
-void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this)
+static void jffs2_obsolete_node_frag(struct jffs2_sb_info *c,
+ struct jffs2_node_frag *this)
{
if (this->node) {
this->node->frags--;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index cae92c14116..0ddfd70307f 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -334,7 +334,6 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c_delete);
struct rb_node *rb_next(struct rb_node *);
struct rb_node *rb_prev(struct rb_node *);
void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root);
-void jffs2_obsolete_node_frag(struct jffs2_sb_info *c, struct jffs2_node_frag *this);
int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index c19bd476e8e..e52cef526d9 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -252,6 +252,11 @@ int jffs2_sum_add_kvec(struct jffs2_sb_info *c, const struct kvec *invecs,
union jffs2_node_union *node;
struct jffs2_eraseblock *jeb;
+ if (c->summary->sum_size == JFFS2_SUMMARY_NOSUM_SIZE) {
+ dbg_summary("Summary is disabled for this jeb! Skipping summary info!\n");
+ return 0;
+ }
+
node = invecs[0].iov_base;
jeb = &c->blocks[ofs / c->sector_size];
ofs -= jeb->offset;
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 25bc1ae0864..4da09ce1d1f 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -1215,7 +1215,6 @@ int jffs2_garbage_collect_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xatt
rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XATTR_SIZE);
if (rc) {
JFFS2_WARNING("jffs2_reserve_space_gc()=%d, request=%u\n", rc, totlen);
- rc = rc ? rc : -EBADFD;
goto out;
}
rc = save_xattr_datum(c, xd);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fecd3b095de..76ca1cbc38f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -100,25 +100,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count);
}
-/*
- * "size" is never larger than rsize or wsize.
- */
-static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
-{
- int page_count;
-
- page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- page_count -= user_addr >> PAGE_SHIFT;
- BUG_ON(page_count < 0);
-
- return page_count;
-}
-
-static inline unsigned int nfs_max_pages(unsigned int size)
-{
- return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-}
-
/**
* nfs_direct_IO - NFS address space operation for direct I/O
* @rw: direction (read or write)
@@ -276,28 +257,24 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode;
size_t rsize = NFS_SERVER(inode)->rsize;
- unsigned int rpages = nfs_max_pages(rsize);
unsigned int pgbase;
int result;
ssize_t started = 0;
get_dreq(dreq);
- pgbase = user_addr & ~PAGE_MASK;
do {
struct nfs_read_data *data;
size_t bytes;
+ pgbase = user_addr & ~PAGE_MASK;
+ bytes = min(rsize,count);
+
result = -ENOMEM;
- data = nfs_readdata_alloc(rpages);
+ data = nfs_readdata_alloc(pgbase + bytes);
if (unlikely(!data))
break;
- bytes = rsize;
- if (count < rsize)
- bytes = count;
-
- data->npages = nfs_direct_count_pages(user_addr, bytes);
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
data->npages, 1, 0, data->pagevec, NULL);
@@ -344,8 +321,10 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
started += bytes;
user_addr += bytes;
pos += bytes;
+ /* FIXME: Remove this unnecessary math from final patch */
pgbase += bytes;
pgbase &= ~PAGE_MASK;
+ BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
count -= bytes;
} while (count != 0);
@@ -524,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
{
- dreq->commit_data = nfs_commit_alloc(0);
+ dreq->commit_data = nfs_commit_alloc();
if (dreq->commit_data != NULL)
dreq->commit_data->req = (struct nfs_page *) dreq;
}
@@ -605,28 +584,24 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode;
size_t wsize = NFS_SERVER(inode)->wsize;
- unsigned int wpages = nfs_max_pages(wsize);
unsigned int pgbase;
int result;
ssize_t started = 0;
get_dreq(dreq);
- pgbase = user_addr & ~PAGE_MASK;
do {
struct nfs_write_data *data;
size_t bytes;
+ pgbase = user_addr & ~PAGE_MASK;
+ bytes = min(wsize,count);
+
result = -ENOMEM;
- data = nfs_writedata_alloc(wpages);
+ data = nfs_writedata_alloc(pgbase + bytes);
if (unlikely(!data))
break;
- bytes = wsize;
- if (count < wsize)
- bytes = count;
-
- data->npages = nfs_direct_count_pages(user_addr, bytes);
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
data->npages, 0, 0, data->pagevec, NULL);
@@ -676,8 +651,11 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
started += bytes;
user_addr += bytes;
pos += bytes;
+
+ /* FIXME: Remove this useless math from the final patch */
pgbase += bytes;
pgbase &= ~PAGE_MASK;
+ BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
count -= bytes;
} while (count != 0);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 153898e1331..b14145b7b87 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -970,7 +970,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
status = -ENOMEM;
opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
if (opendata == NULL)
- goto err_put_state_owner;
+ goto err_release_rwsem;
status = _nfs4_proc_open(opendata);
if (status != 0)
@@ -989,11 +989,11 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
return 0;
err_opendata_free:
nfs4_opendata_free(opendata);
+err_release_rwsem:
+ up_read(&clp->cl_sem);
err_put_state_owner:
nfs4_put_state_owner(sp);
out_err:
- /* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
- up_read(&clp->cl_sem);
*res = NULL;
return status;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index da9cf11c326..f0aff824a29 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32)
-struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+struct nfs_read_data *nfs_readdata_alloc(size_t len)
{
+ unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
+ p->npages = pagecount;
if (pagecount <= ARRAY_SIZE(p->page_array))
p->pagevec = p->page_array;
else {
@@ -140,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
int result;
struct nfs_read_data *rdata;
- rdata = nfs_readdata_alloc(1);
+ rdata = nfs_readdata_alloc(count);
if (!rdata)
return -ENOMEM;
@@ -202,9 +204,11 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
spin_unlock(&inode->i_lock);
- nfs_readpage_truncate_uninitialised_page(rdata);
- if (rdata->res.eof || rdata->res.count == rdata->args.count)
+ if (rdata->res.eof || rdata->res.count == rdata->args.count) {
SetPageUptodate(page);
+ if (rdata->res.eof && count != 0)
+ memclear_highpage_flush(page, rdata->args.pgbase, count);
+ }
result = 0;
io_error:
@@ -336,25 +340,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
- unsigned int rsize = NFS_SERVER(inode)->rsize;
- unsigned int nbytes, offset;
+ size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
+ unsigned int offset;
int requests = 0;
LIST_HEAD(list);
nfs_list_remove_request(req);
nbytes = req->wb_bytes;
- for(;;) {
- data = nfs_readdata_alloc(1);
+ do {
+ size_t len = min(nbytes,rsize);
+
+ data = nfs_readdata_alloc(len);
if (!data)
goto out_bad;
INIT_LIST_HEAD(&data->pages);
list_add(&data->pages, &list);
requests++;
- if (nbytes <= rsize)
- break;
- nbytes -= rsize;
- }
+ nbytes -= len;
+ } while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
ClearPageError(page);
@@ -402,7 +406,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
return nfs_pagein_multi(head, inode);
- data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
+ data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
if (!data)
goto out_bad;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 50774991f8d..7084ac9a645 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
-struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(void)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
- if (pagecount <= ARRAY_SIZE(p->page_array))
- p->pagevec = p->page_array;
- else {
- p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
- if (!p->pagevec) {
- mempool_free(p, nfs_commit_mempool);
- p = NULL;
- }
- }
}
return p;
}
@@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p)
mempool_free(p, nfs_commit_mempool);
}
-struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_writedata_alloc(size_t len)
{
+ unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
+ p->npages = pagecount;
if (pagecount <= ARRAY_SIZE(p->page_array))
p->pagevec = p->page_array;
else {
@@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
int result, written = 0;
struct nfs_write_data *wdata;
- wdata = nfs_writedata_alloc(1);
+ wdata = nfs_writedata_alloc(wsize);
if (!wdata)
return -ENOMEM;
@@ -597,8 +590,8 @@ static void nfs_cancel_commit_list(struct list_head *head)
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_inode_remove_request(req);
- nfs_clear_page_writeback(req);
dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ nfs_clear_page_writeback(req);
}
}
@@ -999,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page;
struct nfs_write_data *data;
- unsigned int wsize = NFS_SERVER(inode)->wsize;
- unsigned int nbytes, offset;
+ size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
+ unsigned int offset;
int requests = 0;
LIST_HEAD(list);
nfs_list_remove_request(req);
nbytes = req->wb_bytes;
- for (;;) {
- data = nfs_writedata_alloc(1);
+ do {
+ size_t len = min(nbytes, wsize);
+
+ data = nfs_writedata_alloc(len);
if (!data)
goto out_bad;
list_add(&data->pages, &list);
requests++;
- if (nbytes <= wsize)
- break;
- nbytes -= wsize;
- }
+ nbytes -= len;
+ } while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
ClearPageError(page);
@@ -1070,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
struct nfs_write_data *data;
unsigned int count;
- data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
+ data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize);
if (!data)
goto out_bad;
@@ -1378,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
struct nfs_write_data *data;
struct nfs_page *req;
- data = nfs_commit_alloc(NFS_SERVER(inode)->wpages);
+ data = nfs_commit_alloc();
if (!data)
goto out_bad;
@@ -1393,8 +1386,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_mark_request_commit(req);
- nfs_clear_page_writeback(req);
dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ nfs_clear_page_writeback(req);
}
return -ENOMEM;
}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 06da7506363..e35d7e52fde 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,7 +33,7 @@
*
*/
-
+#include <linux/err.h>
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfs4.h>
@@ -87,34 +87,35 @@ int
nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
{
struct xdr_netobj cksum;
- struct crypto_tfm *tfm;
+ struct hash_desc desc;
struct scatterlist sg[1];
int status = nfserr_resource;
dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
clname->len, clname->data);
- tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
- if (tfm == NULL)
- goto out;
- cksum.len = crypto_tfm_alg_digestsize(tfm);
+ desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(desc.tfm))
+ goto out_no_tfm;
+ cksum.len = crypto_hash_digestsize(desc.tfm);
cksum.data = kmalloc(cksum.len, GFP_KERNEL);
if (cksum.data == NULL)
goto out;
- crypto_digest_init(tfm);
sg[0].page = virt_to_page(clname->data);
sg[0].offset = offset_in_page(clname->data);
sg[0].length = clname->len;
- crypto_digest_update(tfm, sg, 1);
- crypto_digest_final(tfm, cksum.data);
+ if (crypto_hash_digest(&desc, sg, sg->length, cksum.data))
+ goto out;
md5_to_hex(dname, cksum.data);
kfree(cksum.data);
status = nfs_ok;
out:
- crypto_free_tfm(tfm);
+ crypto_free_hash(desc.tfm);
+out_no_tfm:
return status;
}
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 7d3be845a61..9fb8132f19b 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -16,6 +16,7 @@ ocfs2-objs := \
file.o \
heartbeat.o \
inode.o \
+ ioctl.o \
journal.o \
localalloc.o \
mmap.o \
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index edaab05a93e..f43bc5f18a3 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1717,17 +1717,29 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
ocfs2_remove_from_cache(inode, eb_bh);
- BUG_ON(eb->h_suballoc_slot);
BUG_ON(el->l_recs[0].e_clusters);
BUG_ON(el->l_recs[0].e_cpos);
BUG_ON(el->l_recs[0].e_blkno);
- status = ocfs2_free_extent_block(handle,
- tc->tc_ext_alloc_inode,
- tc->tc_ext_alloc_bh,
- eb);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
+ if (eb->h_suballoc_slot == 0) {
+ /*
+ * This code only understands how to
+ * lock the suballocator in slot 0,
+ * which is fine because allocation is
+ * only ever done out of that
+ * suballocator too. A future version
+ * might change that however, so avoid
+ * a free if we don't know how to
+ * handle it. This way an fs incompat
+ * bit will not be necessary.
+ */
+ status = ocfs2_free_extent_block(handle,
+ tc->tc_ext_alloc_inode,
+ tc->tc_ext_alloc_bh,
+ eb);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
}
}
brelse(eb_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1d1c342ce0..3d7c082a8f5 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -391,31 +391,28 @@ out:
static int ocfs2_commit_write(struct file *file, struct page *page,
unsigned from, unsigned to)
{
- int ret, extending = 0, locklevel = 0;
- loff_t new_i_size;
+ int ret;
struct buffer_head *di_bh = NULL;
struct inode *inode = page->mapping->host;
struct ocfs2_journal_handle *handle = NULL;
+ struct ocfs2_dinode *di;
mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
/* NOTE: ocfs2_file_aio_write has ensured that it's safe for
- * us to sample inode->i_size here without the metadata lock:
+ * us to continue here without rechecking the I/O against
+ * changed inode values.
*
* 1) We're currently holding the inode alloc lock, so no
* nodes can change it underneath us.
*
* 2) We've had to take the metadata lock at least once
- * already to check for extending writes, hence insuring
- * that our current copy is also up to date.
+ * already to check for extending writes, suid removal, etc.
+ * The meta data update code then ensures that we don't get a
+ * stale inode allocation image (i_size, i_clusters, etc).
*/
- new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
- if (new_i_size > i_size_read(inode)) {
- extending = 1;
- locklevel = 1;
- }
- ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page);
+ ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page);
if (ret != 0) {
mlog_errno(ret);
goto out;
@@ -427,23 +424,20 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
goto out_unlock_meta;
}
- if (extending) {
- handle = ocfs2_start_walk_page_trans(inode, page, from, to);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
- goto out_unlock_data;
- }
+ handle = ocfs2_start_walk_page_trans(inode, page, from, to);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out_unlock_data;
+ }
- /* Mark our buffer early. We'd rather catch this error up here
- * as opposed to after a successful commit_write which would
- * require us to set back inode->i_size. */
- ret = ocfs2_journal_access(handle, inode, di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret < 0) {
- mlog_errno(ret);
- goto out_commit;
- }
+ /* Mark our buffer early. We'd rather catch this error up here
+ * as opposed to after a successful commit_write which would
+ * require us to set back inode->i_size. */
+ ret = ocfs2_journal_access(handle, inode, di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out_commit;
}
/* might update i_size */
@@ -453,37 +447,28 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
goto out_commit;
}
- if (extending) {
- loff_t size = (u64) i_size_read(inode);
- struct ocfs2_dinode *di =
- (struct ocfs2_dinode *)di_bh->b_data;
+ di = (struct ocfs2_dinode *)di_bh->b_data;
- /* ocfs2_mark_inode_dirty is too heavy to use here. */
- inode->i_blocks = ocfs2_align_bytes_to_sectors(size);
- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ /* ocfs2_mark_inode_dirty() is too heavy to use here. */
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+ di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
- di->i_size = cpu_to_le64(size);
- di->i_ctime = di->i_mtime =
- cpu_to_le64(inode->i_mtime.tv_sec);
- di->i_ctime_nsec = di->i_mtime_nsec =
- cpu_to_le32(inode->i_mtime.tv_nsec);
+ inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
+ di->i_size = cpu_to_le64((u64)i_size_read(inode));
- ret = ocfs2_journal_dirty(handle, di_bh);
- if (ret < 0) {
- mlog_errno(ret);
- goto out_commit;
- }
+ ret = ocfs2_journal_dirty(handle, di_bh);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out_commit;
}
- BUG_ON(extending && (i_size_read(inode) != new_i_size));
-
out_commit:
- if (handle)
- ocfs2_commit_trans(handle);
+ ocfs2_commit_trans(handle);
out_unlock_data:
ocfs2_data_unlock(inode, 1);
out_unlock_meta:
- ocfs2_meta_unlock(inode, locklevel);
+ ocfs2_meta_unlock(inode, 1);
out:
if (di_bh)
brelse(di_bh);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 9a24adf9be6..c9037414f4f 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
(unsigned long long)block, nr, flags, inode);
+ BUG_ON((flags & OCFS2_BH_READAHEAD) &&
+ (!inode || !(flags & OCFS2_BH_CACHED)));
+
if (osb == NULL || osb->sb == NULL || bhs == NULL) {
status = -EINVAL;
mlog_errno(status);
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
bh = bhs[i];
ignore_cache = 0;
+ /* There are three read-ahead cases here which we need to
+ * be concerned with. All three assume a buffer has
+ * previously been submitted with OCFS2_BH_READAHEAD
+ * and it hasn't yet completed I/O.
+ *
+ * 1) The current request is sync to disk. This rarely
+ * happens these days, and never when performance
+ * matters - the code can just wait on the buffer
+ * lock and re-submit.
+ *
+ * 2) The current request is cached, but not
+ * readahead. ocfs2_buffer_uptodate() will return
+ * false anyway, so we'll wind up waiting on the
+ * buffer lock to do I/O. We re-check the request
+ * with after getting the lock to avoid a re-submit.
+ *
+ * 3) The current request is readahead (and so must
+ * also be a caching one). We short circuit if the
+ * buffer is locked (under I/O) and if it's in the
+ * uptodate cache. The re-check from #2 catches the
+ * case that the previous read-ahead completes just
+ * before our is-it-in-flight check.
+ */
+
if (flags & OCFS2_BH_CACHED &&
!ocfs2_buffer_uptodate(inode, bh)) {
mlog(ML_UPTODATE,
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
continue;
}
+ /* A read-ahead request was made - if the
+ * buffer is already under read-ahead from a
+ * previously submitted request than we are
+ * done here. */
+ if ((flags & OCFS2_BH_READAHEAD)
+ && ocfs2_buffer_read_ahead(inode, bh))
+ continue;
+
lock_buffer(bh);
if (buffer_jbd(bh)) {
#ifdef CATCH_BH_JBD_RACES
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
continue;
#endif
}
+
+ /* Re-check ocfs2_buffer_uptodate() as a
+ * previously read-ahead buffer may have
+ * completed I/O while we were waiting for the
+ * buffer lock. */
+ if ((flags & OCFS2_BH_CACHED)
+ && !(flags & OCFS2_BH_READAHEAD)
+ && ocfs2_buffer_uptodate(inode, bh)) {
+ unlock_buffer(bh);
+ continue;
+ }
+
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
bh->b_end_io = end_buffer_read_sync;
- if (flags & OCFS2_BH_READAHEAD)
- submit_bh(READA, bh);
- else
- submit_bh(READ, bh);
+ submit_bh(READ, bh);
continue;
}
}
@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
for (i = (nr - 1); i >= 0; i--) {
bh = bhs[i];
- /* We know this can't have changed as we hold the
- * inode sem. Avoid doing any work on the bh if the
- * journal has it. */
- if (!buffer_jbd(bh))
- wait_on_buffer(bh);
-
- if (!buffer_uptodate(bh)) {
- /* Status won't be cleared from here on out,
- * so we can safely record this and loop back
- * to cleanup the other buffers. Don't need to
- * remove the clustered uptodate information
- * for this bh as it's not marked locally
- * uptodate. */
- status = -EIO;
- brelse(bh);
- bhs[i] = NULL;
- continue;
+ if (!(flags & OCFS2_BH_READAHEAD)) {
+ /* We know this can't have changed as we hold the
+ * inode sem. Avoid doing any work on the bh if the
+ * journal has it. */
+ if (!buffer_jbd(bh))
+ wait_on_buffer(bh);
+
+ if (!buffer_uptodate(bh)) {
+ /* Status won't be cleared from here on out,
+ * so we can safely record this and loop back
+ * to cleanup the other buffers. Don't need to
+ * remove the clustered uptodate information
+ * for this bh as it's not marked locally
+ * uptodate. */
+ status = -EIO;
+ brelse(bh);
+ bhs[i] = NULL;
+ continue;
+ }
}
+ /* Always set the buffer in the cache, even if it was
+ * a forced read, or read-ahead which hasn't yet
+ * completed. */
if (inode)
ocfs2_set_buffer_uptodate(inode, bh);
}
if (inode)
mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
- mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n",
+ mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
(unsigned long long)block, nr,
- (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
+ (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
bail:
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 6ecb90937b6..6cc20930fac 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb,
#define OCFS2_BH_CACHED 1
-#define OCFS2_BH_READAHEAD 8 /* use this to pass READA down to submit_bh */
+#define OCFS2_BH_READAHEAD 8
static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
struct buffer_head **bh, int flags,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 504595d6cf6..305cba3681f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -320,8 +320,12 @@ static int compute_max_sectors(struct block_device *bdev)
max_pages = q->max_hw_segments;
max_pages--; /* Handle I/Os that straddle a page */
- max_sectors = max_pages << (PAGE_SHIFT - 9);
-
+ if (max_pages) {
+ max_sectors = max_pages << (PAGE_SHIFT - 9);
+ } else {
+ /* If BIO contains 1 or less than 1 page. */
+ max_sectors = q->max_sectors;
+ }
/* Why is fls() 1-based???? */
pow_two_sectors = 1 << (fls(max_sectors) - 1);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 3d494d1a5f3..04e01915b86 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
int error = 0;
- unsigned long offset, blk;
- int i, num, stored;
+ unsigned long offset, blk, last_ra_blk = 0;
+ int i, stored;
struct buffer_head * bh, * tmp;
struct ocfs2_dir_entry * de;
int err;
struct inode *inode = filp->f_dentry->d_inode;
struct super_block * sb = inode->i_sb;
- int have_disk_lock = 0;
+ unsigned int ra_sectors = 16;
mlog_entry("dirino=%llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
mlog_errno(error);
/* we haven't got any yet, so propagate the error. */
stored = error;
- goto bail;
+ goto bail_nolock;
}
- have_disk_lock = 1;
offset = filp->f_pos & (sb->s_blocksize - 1);
@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
continue;
}
- /*
- * Do the readahead (8k)
- */
- if (!offset) {
- for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
+ /* The idea here is to begin with 8k read-ahead and to stay
+ * 4k ahead of our current position.
+ *
+ * TODO: Use the pagecache for this. We just need to
+ * make sure it's cluster-safe... */
+ if (!last_ra_blk
+ || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
+ for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
i > 0; i--) {
tmp = ocfs2_bread(inode, ++blk, &err, 1);
if (tmp)
brelse(tmp);
}
+ last_ra_blk = blk;
+ ra_sectors = 8;
}
revalidate:
@@ -194,9 +198,9 @@ revalidate:
stored = 0;
bail:
- if (have_disk_lock)
- ocfs2_meta_unlock(inode, 0);
+ ocfs2_meta_unlock(inode, 0);
+bail_nolock:
mlog_exit(stored);
return stored;
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 42775e2bbe2..f13a4bac41f 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -367,12 +367,10 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
goto do_ast;
}
- mlog(ML_ERROR, "got %sast for unknown lock! cookie=%u:%llu, "
- "name=%.*s, namelen=%u\n",
- past->type == DLM_AST ? "" : "b",
- dlm_get_lock_cookie_node(cookie),
- dlm_get_lock_cookie_seq(cookie),
- locklen, name, locklen);
+ mlog(0, "got %sast for unknown lock! cookie=%u:%llu, "
+ "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b",
+ dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie),
+ locklen, name, locklen);
ret = DLM_NORMAL;
unlock_out:
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 762eb1fbb34..151b41781ea 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1330,6 +1330,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
lvb->lvb_imtime_packed =
cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
+ lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
mlog_meta_lvb(0, lockres);
@@ -1360,6 +1361,9 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
+ oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
+ ocfs2_set_inode_flags(inode);
+
/* fast-symlinks are a special case */
if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
inode->i_blocks = 0;
@@ -2899,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level,
be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
be16_to_cpu(lvb->lvb_imode));
mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
- "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink),
+ "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
(long long)be64_to_cpu(lvb->lvb_iatime_packed),
(long long)be64_to_cpu(lvb->lvb_ictime_packed),
- (long long)be64_to_cpu(lvb->lvb_imtime_packed));
+ (long long)be64_to_cpu(lvb->lvb_imtime_packed),
+ be32_to_cpu(lvb->lvb_iattr));
}
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 8f2d1db2d9e..243ae862ece 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,7 +27,7 @@
#ifndef DLMGLUE_H
#define DLMGLUE_H
-#define OCFS2_LVB_VERSION 2
+#define OCFS2_LVB_VERSION 3
struct ocfs2_meta_lvb {
__be32 lvb_version;
@@ -40,7 +40,8 @@ struct ocfs2_meta_lvb {
__be64 lvb_isize;
__be16 lvb_imode;
__be16 lvb_inlink;
- __be32 lvb_reserved[3];
+ __be32 lvb_iattr;
+ __be32 lvb_reserved[2];
};
/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a9559c87453..2bbfa17090c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -44,6 +44,7 @@
#include "file.h"
#include "sysfile.h"
#include "inode.h"
+#include "ioctl.h"
#include "journal.h"
#include "mmap.h"
#include "suballoc.h"
@@ -1227,10 +1228,12 @@ const struct file_operations ocfs2_fops = {
.open = ocfs2_file_open,
.aio_read = ocfs2_file_aio_read,
.aio_write = ocfs2_file_aio_write,
+ .ioctl = ocfs2_ioctl,
};
const struct file_operations ocfs2_dops = {
.read = generic_read_dir,
.readdir = ocfs2_readdir,
.fsync = ocfs2_sync_file,
+ .ioctl = ocfs2_ioctl,
};
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 327a5b7b86e..7bcf6915459 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -71,6 +71,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh);
+void ocfs2_set_inode_flags(struct inode *inode)
+{
+ unsigned int flags = OCFS2_I(inode)->ip_attr;
+
+ inode->i_flags &= ~(S_IMMUTABLE |
+ S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+
+ if (flags & OCFS2_IMMUTABLE_FL)
+ inode->i_flags |= S_IMMUTABLE;
+
+ if (flags & OCFS2_SYNC_FL)
+ inode->i_flags |= S_SYNC;
+ if (flags & OCFS2_APPEND_FL)
+ inode->i_flags |= S_APPEND;
+ if (flags & OCFS2_NOATIME_FL)
+ inode->i_flags |= S_NOATIME;
+ if (flags & OCFS2_DIRSYNC_FL)
+ inode->i_flags |= S_DIRSYNC;
+}
+
struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
u64 blkno,
int delete_vote)
@@ -260,7 +280,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
inode->i_blocks =
ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
inode->i_mapping->a_ops = &ocfs2_aops;
- inode->i_flags |= S_NOATIME;
inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
@@ -276,6 +295,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
+ OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
if (create_ino)
inode->i_ino = ino_from_blkno(inode->i_sb,
@@ -330,6 +350,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
OCFS2_LOCK_TYPE_DATA, inode);
+ ocfs2_set_inode_flags(inode);
+ inode->i_flags |= S_NOATIME;
+
status = 0;
bail:
mlog_exit(status);
@@ -1027,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
u64 p_blkno;
int readflags = OCFS2_BH_CACHED;
-#if 0
- /* only turn this on if we know we can deal with read_block
- * returning nothing */
if (reada)
readflags |= OCFS2_BH_READAHEAD;
-#endif
if (((u64)block << inode->i_sb->s_blocksize_bits) >=
i_size_read(inode)) {
@@ -1131,6 +1150,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
spin_lock(&OCFS2_I(inode)->ip_lock);
fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
+ fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
spin_unlock(&OCFS2_I(inode)->ip_lock);
fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1169,6 +1189,8 @@ void ocfs2_refresh_inode(struct inode *inode,
spin_lock(&OCFS2_I(inode)->ip_lock);
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
+ OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
+ ocfs2_set_inode_flags(inode);
i_size_write(inode, le64_to_cpu(fe->i_size));
inode->i_nlink = le16_to_cpu(fe->i_links_count);
inode->i_uid = le32_to_cpu(fe->i_uid);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 35140f6cf84..4d1e5399256 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -56,6 +56,7 @@ struct ocfs2_inode_info
struct ocfs2_journal_handle *ip_handle;
u32 ip_flags; /* see below */
+ u32 ip_attr; /* inode attributes */
/* protected by recovery_lock. */
struct inode *ip_next_orphan;
@@ -142,4 +143,6 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
+void ocfs2_set_inode_flags(struct inode *inode);
+
#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
new file mode 100644
index 00000000000..3663cef8068
--- /dev/null
+++ b/fs/ocfs2/ioctl.c
@@ -0,0 +1,136 @@
+/*
+ * linux/fs/ocfs2/ioctl.c
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ * adapted from Remy Card's ext2/ioctl.c
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "alloc.h"
+#include "dlmglue.h"
+#include "inode.h"
+#include "journal.h"
+
+#include "ocfs2_fs.h"
+#include "ioctl.h"
+
+#include <linux/ext2_fs.h>
+
+static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
+{
+ int status;
+
+ status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+ if (status < 0) {
+ mlog_errno(status);
+ return status;
+ }
+ *flags = OCFS2_I(inode)->ip_attr;
+ ocfs2_meta_unlock(inode, 0);
+
+ mlog_exit(status);
+ return status;
+}
+
+static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
+ unsigned mask)
+{
+ struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_journal_handle *handle = NULL;
+ struct buffer_head *bh = NULL;
+ unsigned oldflags;
+ int status;
+
+ mutex_lock(&inode->i_mutex);
+
+ status = ocfs2_meta_lock(inode, NULL, &bh, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ status = -EROFS;
+ if (IS_RDONLY(inode))
+ goto bail_unlock;
+
+ status = -EACCES;
+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+ goto bail_unlock;
+
+ if (!S_ISDIR(inode->i_mode))
+ flags &= ~OCFS2_DIRSYNC_FL;
+
+ handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail_unlock;
+ }
+
+ oldflags = ocfs2_inode->ip_attr;
+ flags = flags & mask;
+ flags |= oldflags & ~mask;
+
+ /*
+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+ * the relevant capability.
+ */
+ status = -EPERM;
+ if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
+ (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
+ if (!capable(CAP_LINUX_IMMUTABLE))
+ goto bail_unlock;
+ }
+
+ ocfs2_inode->ip_attr = flags;
+ ocfs2_set_inode_flags(inode);
+
+ status = ocfs2_mark_inode_dirty(handle, inode, bh);
+ if (status < 0)
+ mlog_errno(status);
+
+ ocfs2_commit_trans(handle);
+bail_unlock:
+ ocfs2_meta_unlock(inode, 1);
+bail:
+ mutex_unlock(&inode->i_mutex);
+
+ if (bh)
+ brelse(bh);
+
+ mlog_exit(status);
+ return status;
+}
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+ unsigned int cmd, unsigned long arg)
+{
+ unsigned int flags;
+ int status;
+
+ switch (cmd) {
+ case OCFS2_IOC_GETFLAGS:
+ status = ocfs2_get_inode_attr(inode, &flags);
+ if (status < 0)
+ return status;
+
+ flags &= OCFS2_FL_VISIBLE;
+ return put_user(flags, (int __user *) arg);
+ case OCFS2_IOC_SETFLAGS:
+ if (get_user(flags, (int __user *) arg))
+ return -EFAULT;
+
+ return ocfs2_set_inode_attr(inode, flags,
+ OCFS2_FL_MODIFIABLE);
+ default:
+ return -ENOTTY;
+ }
+}
+
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
new file mode 100644
index 00000000000..4a7c82931db
--- /dev/null
+++ b/fs/ocfs2/ioctl.h
@@ -0,0 +1,16 @@
+/*
+ * ioctl.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ *
+ */
+
+#ifndef OCFS2_IOCTL_H
+#define OCFS2_IOCTL_H
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+ unsigned int cmd, unsigned long arg);
+
+#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0673862c8bd..0d3e939b1f5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -56,6 +56,7 @@
#include "journal.h"
#include "namei.h"
#include "suballoc.h"
+#include "super.h"
#include "symlink.h"
#include "sysfile.h"
#include "uptodate.h"
@@ -310,13 +311,6 @@ static int ocfs2_mknod(struct inode *dir,
/* get our super block */
osb = OCFS2_SB(dir->i_sb);
- if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
- mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
- (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
- status = -EMLINK;
- goto leave;
- }
-
handle = ocfs2_alloc_handle(osb);
if (handle == NULL) {
status = -ENOMEM;
@@ -331,6 +325,11 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
+ if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
+ status = -EMLINK;
+ goto leave;
+ }
+
dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
if (!dirfe->i_links_count) {
/* can't make a file in a deleted directory. */
@@ -643,11 +642,6 @@ static int ocfs2_link(struct dentry *old_dentry,
goto bail;
}
- if (inode->i_nlink >= OCFS2_LINK_MAX) {
- err = -EMLINK;
- goto bail;
- }
-
handle = ocfs2_alloc_handle(osb);
if (handle == NULL) {
err = -ENOMEM;
@@ -661,6 +655,11 @@ static int ocfs2_link(struct dentry *old_dentry,
goto bail;
}
+ if (!dir->i_nlink) {
+ err = -ENOENT;
+ goto bail;
+ }
+
err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
dentry->d_name.len);
if (err)
@@ -1964,13 +1963,8 @@ restart:
}
num++;
- /* XXX: questionable readahead stuff here */
bh = ocfs2_bread(dir, b++, &err, 1);
bh_use[ra_max] = bh;
-#if 0 // ???
- if (bh)
- ll_rw_block(READ, 1, &bh);
-#endif
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1978,6 +1972,10 @@ restart:
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */
+ ocfs2_error(dir->i_sb, "reading directory %llu, "
+ "offset %lu\n",
+ (unsigned long long)OCFS2_I(dir)->ip_blkno,
+ block);
brelse(bh);
goto next;
}
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c5b1ac547c1..3330a5dc6be 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -114,6 +114,26 @@
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
+/* Inode attributes, keep in sync with EXT2 */
+#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
+#define OCFS2_UNRM_FL (0x00000002) /* Undelete */
+#define OCFS2_COMPR_FL (0x00000004) /* Compress file */
+#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */
+#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */
+#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */
+#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */
+#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */
+#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */
+
+#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */
+#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */
+
+/*
+ * ioctl commands
+ */
+#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long)
+#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long)
+
/*
* Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
*/
@@ -399,7 +419,9 @@ struct ocfs2_dinode {
__le32 i_atime_nsec;
__le32 i_ctime_nsec;
__le32 i_mtime_nsec;
-/*70*/ __le64 i_reserved1[9];
+ __le32 i_attr;
+ __le32 i_reserved1;
+/*70*/ __le64 i_reserved2[8];
/*B8*/ union {
__le64 i_pad1; /* Generic way to refer to this
64bit union */
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index b8a00a79332..9707ed7a320 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
}
/* Warning: even if it returns true, this does *not* guarantee that
- * the block is stored in our inode metadata cache. */
+ * the block is stored in our inode metadata cache.
+ *
+ * This can be called under lock_buffer()
+ */
int ocfs2_buffer_uptodate(struct inode *inode,
struct buffer_head *bh)
{
@@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode,
return ocfs2_buffer_cached(OCFS2_I(inode), bh);
}
+/*
+ * Determine whether a buffer is currently out on a read-ahead request.
+ * ip_io_sem should be held to serialize submitters with the logic here.
+ */
+int ocfs2_buffer_read_ahead(struct inode *inode,
+ struct buffer_head *bh)
+{
+ return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
+}
+
/* Requires ip_lock */
static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
sector_t block)
@@ -403,7 +416,11 @@ out_free:
*
* Note that this function may actually fail to insert the block if
* memory cannot be allocated. This is not fatal however (but may
- * result in a performance penalty) */
+ * result in a performance penalty)
+ *
+ * Readahead buffers can be passed in here before the I/O request is
+ * completed.
+ */
void ocfs2_set_buffer_uptodate(struct inode *inode,
struct buffer_head *bh)
{
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 01cd32d26b0..2e73206059a 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
struct buffer_head *bh);
void ocfs2_remove_from_cache(struct inode *inode,
struct buffer_head *bh);
+int ocfs2_buffer_read_ahead(struct inode *inode,
+ struct buffer_head *bh);
#endif /* OCFS2_UPTODATE_H */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c40f81ba9b1..34dcb43a783 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1390,11 +1390,19 @@ xfs_vm_direct_IO(
iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
- ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
- iomap.iomap_target->bt_bdev,
- iov, offset, nr_segs,
- xfs_get_blocks_direct,
- xfs_end_io_direct);
+ if (rw == WRITE) {
+ ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
+ iomap.iomap_target->bt_bdev,
+ iov, offset, nr_segs,
+ xfs_get_blocks_direct,
+ xfs_end_io_direct);
+ } else {
+ ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
+ iomap.iomap_target->bt_bdev,
+ iov, offset, nr_segs,
+ xfs_get_blocks_direct,
+ xfs_end_io_direct);
+ }
if (unlikely(ret <= 0 && iocb->private))
xfs_destroy_ioend(iocb->private);
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 5d9cfd91ad0..ee788b1cb36 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -264,7 +264,9 @@ xfs_read(
dmflags, &locktype);
if (ret) {
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- goto unlock_mutex;
+ if (unlikely(ioflags & IO_ISDIRECT))
+ mutex_unlock(&inode->i_mutex);
+ return ret;
}
}
@@ -272,6 +274,9 @@ xfs_read(
bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)),
-1, FI_REMAPF_LOCKED);
+ if (unlikely(ioflags & IO_ISDIRECT))
+ mutex_unlock(&inode->i_mutex);
+
xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
(void *)iovp, segs, *offset, ioflags);
ret = __generic_file_aio_read(iocb, iovp, segs, offset);
@@ -281,10 +286,6 @@ xfs_read(
XFS_STATS_ADD(xs_read_bytes, ret);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
-unlock_mutex:
- if (unlikely(ioflags & IO_ISDIRECT))
- mutex_unlock(&inode->i_mutex);
return ret;
}
@@ -390,6 +391,8 @@ xfs_splice_write(
xfs_inode_t *ip = XFS_BHVTOI(bdp);
xfs_mount_t *mp = ip->i_mount;
ssize_t ret;
+ struct inode *inode = outfilp->f_mapping->host;
+ xfs_fsize_t isize;
XFS_STATS_INC(xs_write_calls);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -416,6 +419,20 @@ xfs_splice_write(
if (ret > 0)
XFS_STATS_ADD(xs_write_bytes, ret);
+ isize = i_size_read(inode);
+ if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
+ *ppos = isize;
+
+ if (*ppos > ip->i_d.di_size) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if (*ppos > ip->i_d.di_size) {
+ ip->i_d.di_size = *ppos;
+ i_size_write(inode, *ppos);
+ ip->i_update_core = 1;
+ ip->i_update_size = 1;
+ }
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index f137856c326..db8872be8c8 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -203,7 +203,7 @@ xfs_qm_statvfs(
if (error || !vnode)
return error;
- mp = XFS_BHVTOM(bhv);
+ mp = xfs_vfstom(bhvtovfs(bhv));
ip = xfs_vtoi(vnode);
if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 650591f999a..5a4256120cc 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -44,6 +44,26 @@ typedef enum xfs_alloctype
#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
/*
+ * In order to avoid ENOSPC-related deadlock caused by
+ * out-of-order locking of AGF buffer (PV 947395), we place
+ * constraints on the relationship among actual allocations for
+ * data blocks, freelist blocks, and potential file data bmap
+ * btree blocks. However, these restrictions may result in no
+ * actual space allocated for a delayed extent, for example, a data
+ * block in a certain AG is allocated but there is no additional
+ * block for the additional bmap btree block due to a split of the
+ * bmap btree of the file. The result of this may lead to an
+ * infinite loop in xfssyncd when the file gets flushed to disk and
+ * all delayed extents need to be actually allocated. To get around
+ * this, we explicitly set aside a few blocks which will not be
+ * reserved in delayed allocation. Considering the minimum number of
+ * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
+ * btree requires 1 fsb, so we set the number of set-aside blocks
+ * to 4 + 4*agcount.
+ */
+#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
+
+/*
* Argument structure for xfs_alloc routines.
* This is turned into a structure to avoid having 20 arguments passed
* down several levels of the stack.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 077629bab53..c064e72ada9 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
xfs_icsb_sync_counters_lazy(mp);
s = XFS_SB_LOCK(mp);
- cnt->freedata = mp->m_sb.sb_fdblocks;
+ cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
cnt->freertx = mp->m_sb.sb_frextents;
cnt->freeino = mp->m_sb.sb_ifree;
cnt->allocino = mp->m_sb.sb_icount;
@@ -519,15 +519,19 @@ xfs_reserve_blocks(
}
mp->m_resblks = request;
} else {
+ __int64_t free;
+
+ free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
delta = request - mp->m_resblks;
- lcounter = mp->m_sb.sb_fdblocks - delta;
+ lcounter = free - delta;
if (lcounter < 0) {
/* We can't satisfy the request, just get what we can */
- mp->m_resblks += mp->m_sb.sb_fdblocks;
- mp->m_resblks_avail += mp->m_sb.sb_fdblocks;
- mp->m_sb.sb_fdblocks = 0;
+ mp->m_resblks += free;
+ mp->m_resblks_avail += free;
+ mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
} else {
- mp->m_sb.sb_fdblocks = lcounter;
+ mp->m_sb.sb_fdblocks =
+ lcounter + XFS_ALLOC_SET_ASIDE(mp);
mp->m_resblks = request;
mp->m_resblks_avail += delta;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4be5c0b2d29..9dfae18d995 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1243,24 +1243,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
xfs_trans_log_buf(tp, bp, first, last);
}
-/*
- * In order to avoid ENOSPC-related deadlock caused by
- * out-of-order locking of AGF buffer (PV 947395), we place
- * constraints on the relationship among actual allocations for
- * data blocks, freelist blocks, and potential file data bmap
- * btree blocks. However, these restrictions may result in no
- * actual space allocated for a delayed extent, for example, a data
- * block in a certain AG is allocated but there is no additional
- * block for the additional bmap btree block due to a split of the
- * bmap btree of the file. The result of this may lead to an
- * infinite loop in xfssyncd when the file gets flushed to disk and
- * all delayed extents need to be actually allocated. To get around
- * this, we explicitly set aside a few blocks which will not be
- * reserved in delayed allocation. Considering the minimum number of
- * needed freelist blocks is 4 fsbs, a potential split of file's bmap
- * btree requires 1 fsb, so we set the number of set-aside blocks to 8.
-*/
-#define SET_ASIDE_BLOCKS 8
/*
* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
@@ -1306,7 +1288,8 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
return 0;
case XFS_SBS_FDBLOCKS:
- lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS;
+ lcounter = (long long)
+ mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
if (delta > 0) { /* Putting blocks back */
@@ -1340,7 +1323,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
}
}
- mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS;
+ mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
return 0;
case XFS_SBS_FREXTENTS:
lcounter = (long long)mp->m_sb.sb_frextents;
@@ -2021,7 +2004,8 @@ xfs_icsb_sync_counters_lazy(
* when we get near ENOSPC.
*/
#define XFS_ICSB_INO_CNTR_REENABLE 64
-#define XFS_ICSB_FDBLK_CNTR_REENABLE 512
+#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
+ (512 + XFS_ALLOC_SET_ASIDE(mp))
STATIC void
xfs_icsb_balance_counter(
xfs_mount_t *mp,
@@ -2055,7 +2039,7 @@ xfs_icsb_balance_counter(
case XFS_SBS_FDBLOCKS:
count = mp->m_sb.sb_fdblocks;
resid = do_div(count, weight);
- if (count < XFS_ICSB_FDBLK_CNTR_REENABLE)
+ if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp))
goto out;
break;
default:
@@ -2110,11 +2094,11 @@ again:
case XFS_SBS_FDBLOCKS:
BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
- lcounter = icsbp->icsb_fdblocks;
+ lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
lcounter += delta;
if (unlikely(lcounter < 0))
goto slow_path;
- icsbp->icsb_fdblocks = lcounter;
+ icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
break;
default:
BUG();
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index b427d220a16..a34796e57af 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -811,7 +811,8 @@ xfs_statvfs(
statp->f_bsize = sbp->sb_blocksize;
lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
statp->f_blocks = sbp->sb_dblocks - lsize;
- statp->f_bfree = statp->f_bavail = sbp->sb_fdblocks;
+ statp->f_bfree = statp->f_bavail =
+ sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
fakeinos = statp->f_bfree << sbp->sb_inopblog;
#if XFS_BIG_INUMS
fakeinos += mp->m_inoadd;