diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | fs/fuse/dev.c | 72 | ||||
-rw-r--r-- | fs/fuse/dir.c | 278 | ||||
-rw-r--r-- | fs/fuse/file.c | 49 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 12 | ||||
-rw-r--r-- | fs/fuse/inode.c | 14 | ||||
-rw-r--r-- | fs/hfsplus/super.c | 2 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 3 | ||||
-rw-r--r-- | fs/jbd/checkpoint.c | 418 | ||||
-rw-r--r-- | fs/nfsd/nfs3proc.c | 11 | ||||
-rw-r--r-- | fs/nfsd/nfs3xdr.c | 47 | ||||
-rw-r--r-- | fs/nfsd/nfsxdr.c | 48 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 40 | ||||
-rw-r--r-- | fs/partitions/Kconfig | 2 | ||||
-rw-r--r-- | fs/partitions/ibm.c | 30 | ||||
-rw-r--r-- | fs/proc/array.c | 2 | ||||
-rw-r--r-- | fs/ramfs/Makefile | 4 | ||||
-rw-r--r-- | fs/ramfs/file-mmu.c | 57 | ||||
-rw-r--r-- | fs/ramfs/file-nommu.c | 292 | ||||
-rw-r--r-- | fs/ramfs/inode.c | 22 | ||||
-rw-r--r-- | fs/ramfs/internal.h | 15 |
21 files changed, 987 insertions, 433 deletions
diff --git a/fs/exec.c b/fs/exec.c index 22533cce061..e75a9548da8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -324,7 +324,7 @@ void install_arg_page(struct vm_area_struct *vma, lru_cache_add_active(page); set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte( page, vma->vm_page_prot)))); - page_add_anon_rmap(page, vma, address); + page_add_new_anon_rmap(page, vma, address); pte_unmap_unlock(pte, ptl); /* no need for flush_tlb */ diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8f873e621f4..e08ab4702d9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -148,6 +148,26 @@ void fuse_release_background(struct fuse_req *req) spin_unlock(&fuse_lock); } +static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) +{ + int i; + struct fuse_init_out *arg = &req->misc.init_out; + + if (arg->major != FUSE_KERNEL_VERSION) + fc->conn_error = 1; + else { + fc->minor = arg->minor; + fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; + } + + /* After INIT reply is received other requests can go + out. So do (FUSE_MAX_OUTSTANDING - 1) number of + up()s on outstanding_sem. The last up() is done in + fuse_putback_request() */ + for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) + up(&fc->outstanding_sem); +} + /* * This function is called when a request is finished. Either a reply * has arrived or it was interrupted (and not yet sent) or some error @@ -172,19 +192,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) up_read(&fc->sbput_sem); } wake_up(&req->waitq); - if (req->in.h.opcode == FUSE_INIT) { - int i; - - if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION) - fc->conn_error = 1; - - /* After INIT reply is received other requests can go - out. So do (FUSE_MAX_OUTSTANDING - 1) number of - up()s on outstanding_sem. The last up() is done in - fuse_putback_request() */ - for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) - up(&fc->outstanding_sem); - } else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) { + if (req->in.h.opcode == FUSE_INIT) + process_init_reply(fc, req); + else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) { /* Special case for failed iget in CREATE */ u64 nodeid = req->in.h.nodeid; __fuse_get_request(req); @@ -357,7 +367,7 @@ void fuse_send_init(struct fuse_conn *fc) /* This is called from fuse_read_super() so there's guaranteed to be a request available */ struct fuse_req *req = do_get_request(fc); - struct fuse_init_in_out *arg = &req->misc.init_in_out; + struct fuse_init_in *arg = &req->misc.init_in; arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; req->in.h.opcode = FUSE_INIT; @@ -365,8 +375,12 @@ void fuse_send_init(struct fuse_conn *fc) req->in.args[0].size = sizeof(*arg); req->in.args[0].value = arg; req->out.numargs = 1; - req->out.args[0].size = sizeof(*arg); - req->out.args[0].value = arg; + /* Variable length arguement used for backward compatibility + with interface version < 7.5. Rest of init_out is zeroed + by do_get_request(), so a short reply is not a problem */ + req->out.argvar = 1; + req->out.args[0].size = sizeof(struct fuse_init_out); + req->out.args[0].value = &req->misc.init_out; request_send_background(fc, req); } @@ -615,6 +629,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, struct fuse_copy_state cs; unsigned reqsize; + restart: spin_lock(&fuse_lock); fc = file->private_data; err = -EPERM; @@ -630,20 +645,25 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, req = list_entry(fc->pending.next, struct fuse_req, list); list_del_init(&req->list); - spin_unlock(&fuse_lock); in = &req->in; - reqsize = req->in.h.len; - fuse_copy_init(&cs, 1, req, iov, nr_segs); - err = -EINVAL; - if (iov_length(iov, nr_segs) >= reqsize) { - err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); - if (!err) - err = fuse_copy_args(&cs, in->numargs, in->argpages, - (struct fuse_arg *) in->args, 0); + reqsize = in->h.len; + /* If request is too large, reply with an error and restart the read */ + if (iov_length(iov, nr_segs) < reqsize) { + req->out.h.error = -EIO; + /* SETXATTR is special, since it may contain too large data */ + if (in->h.opcode == FUSE_SETXATTR) + req->out.h.error = -E2BIG; + request_end(fc, req); + goto restart; } + spin_unlock(&fuse_lock); + fuse_copy_init(&cs, 1, req, iov, nr_segs); + err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); + if (!err) + err = fuse_copy_args(&cs, in->numargs, in->argpages, + (struct fuse_arg *) in->args, 0); fuse_copy_finish(&cs); - spin_lock(&fuse_lock); req->locked = 0; if (!err && req->interrupted) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 51f5da65277..417bcee466f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -13,8 +13,16 @@ #include <linux/gfp.h> #include <linux/sched.h> #include <linux/namei.h> -#include <linux/mount.h> +/* + * FUSE caches dentries and attributes with separate timeout. The + * time in jiffies until the dentry/attributes are valid is stored in + * dentry->d_time and fuse_inode->i_time respectively. + */ + +/* + * Calculate the time in jiffies until a dentry/attributes are valid + */ static inline unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) { @@ -22,6 +30,50 @@ static inline unsigned long time_to_jiffies(unsigned long sec, return jiffies + timespec_to_jiffies(&ts); } +/* + * Set dentry and possibly attribute timeouts from the lookup/mk* + * replies + */ +static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) +{ + entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec); + if (entry->d_inode) + get_fuse_inode(entry->d_inode)->i_time = + time_to_jiffies(o->attr_valid, o->attr_valid_nsec); +} + +/* + * Mark the attributes as stale, so that at the next call to + * ->getattr() they will be fetched from userspace + */ +void fuse_invalidate_attr(struct inode *inode) +{ + get_fuse_inode(inode)->i_time = jiffies - 1; +} + +/* + * Just mark the entry as stale, so that a next attempt to look it up + * will result in a new lookup call to userspace + * + * This is called when a dentry is about to become negative and the + * timeout is unknown (unlink, rmdir, rename and in some cases + * lookup) + */ +static void fuse_invalidate_entry_cache(struct dentry *entry) +{ + entry->d_time = jiffies - 1; +} + +/* + * Same as fuse_invalidate_entry_cache(), but also try to remove the + * dentry from the hash + */ +static void fuse_invalidate_entry(struct dentry *entry) +{ + d_invalidate(entry); + fuse_invalidate_entry_cache(entry); +} + static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, struct dentry *entry, struct fuse_entry_out *outarg) @@ -37,17 +89,34 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, req->out.args[0].value = outarg; } +/* + * Check whether the dentry is still valid + * + * If the entry validity timeout has expired and the dentry is + * positive, try to redo the lookup. If the lookup results in a + * different inode, then let the VFS invalidate the dentry and redo + * the lookup once more. If the lookup results in the same inode, + * then refresh the attributes, timeouts and mark the dentry valid. + */ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) { - if (!entry->d_inode || is_bad_inode(entry->d_inode)) + struct inode *inode = entry->d_inode; + + if (inode && is_bad_inode(inode)) return 0; else if (time_after(jiffies, entry->d_time)) { int err; struct fuse_entry_out outarg; - struct inode *inode = entry->d_inode; - struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_request(fc); + struct fuse_conn *fc; + struct fuse_req *req; + + /* Doesn't hurt to "reset" the validity timeout */ + fuse_invalidate_entry_cache(entry); + if (!inode) + return 0; + + fc = get_fuse_conn(inode); + req = fuse_get_request(fc); if (!req) return 0; @@ -55,6 +124,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) request_send(fc, req); err = req->out.h.error; if (!err) { + struct fuse_inode *fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode)) { fuse_send_forget(fc, req, outarg.nodeid, 1); return 0; @@ -66,18 +136,18 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) return 0; fuse_change_attributes(inode, &outarg.attr); - entry->d_time = time_to_jiffies(outarg.entry_valid, - outarg.entry_valid_nsec); - fi->i_time = time_to_jiffies(outarg.attr_valid, - outarg.attr_valid_nsec); + fuse_change_timeout(entry, &outarg); } return 1; } +/* + * Check if there's already a hashed alias of this directory inode. + * If yes, then lookup and mkdir must not create a new alias. + */ static int dir_alias(struct inode *inode) { if (S_ISDIR(inode->i_mode)) { - /* Don't allow creating an alias to a directory */ struct dentry *alias = d_find_alias(inode); if (alias) { dput(alias); @@ -96,8 +166,14 @@ static struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, }; -static int fuse_lookup_iget(struct inode *dir, struct dentry *entry, - struct inode **inodep) +static inline int valid_mode(int m) +{ + return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || + S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); +} + +static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, + struct nameidata *nd) { int err; struct fuse_entry_out outarg; @@ -106,53 +182,49 @@ static int fuse_lookup_iget(struct inode *dir, struct dentry *entry, struct fuse_req *req; if (entry->d_name.len > FUSE_NAME_MAX) - return -ENAMETOOLONG; + return ERR_PTR(-ENAMETOOLONG); req = fuse_get_request(fc); if (!req) - return -EINTR; + return ERR_PTR(-EINTR); fuse_lookup_init(req, dir, entry, &outarg); request_send(fc, req); err = req->out.h.error; - if (!err && invalid_nodeid(outarg.nodeid)) + if (!err && ((outarg.nodeid && invalid_nodeid(outarg.nodeid)) || + !valid_mode(outarg.attr.mode))) err = -EIO; - if (!err) { + if (!err && outarg.nodeid) { inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr); if (!inode) { fuse_send_forget(fc, req, outarg.nodeid, 1); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } } fuse_put_request(fc, req); if (err && err != -ENOENT) - return err; + return ERR_PTR(err); - if (inode) { - struct fuse_inode *fi = get_fuse_inode(inode); - entry->d_time = time_to_jiffies(outarg.entry_valid, - outarg.entry_valid_nsec); - fi->i_time = time_to_jiffies(outarg.attr_valid, - outarg.attr_valid_nsec); + if (inode && dir_alias(inode)) { + iput(inode); + return ERR_PTR(-EIO); } - + d_add(entry, inode); entry->d_op = &fuse_dentry_operations; - *inodep = inode; - return 0; -} - -void fuse_invalidate_attr(struct inode *inode) -{ - get_fuse_inode(inode)->i_time = jiffies - 1; -} - -static void fuse_invalidate_entry(struct dentry *entry) -{ - d_invalidate(entry); - entry->d_time = jiffies - 1; + if (!err) + fuse_change_timeout(entry, &outarg); + else + fuse_invalidate_entry_cache(entry); + return NULL; } +/* + * Atomic create+open operation + * + * If the filesystem doesn't support this, then fall back to separate + * 'mknod' + 'open' requests. + */ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct nameidata *nd) { @@ -163,7 +235,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct fuse_open_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; - struct fuse_inode *fi; struct fuse_file *ff; struct file *file; int flags = nd->intent.open.flags - 1; @@ -172,10 +243,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (fc->no_create) goto out; - err = -ENAMETOOLONG; - if (entry->d_name.len > FUSE_NAME_MAX) - goto out; - err = -EINTR; req = fuse_get_request(fc); if (!req) @@ -220,17 +287,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); ff->fh = outopen.fh; + /* Special release, with inode = NULL, this will + trigger a 'forget' request when the release is + complete */ fuse_send_release(fc, ff, outentry.nodeid, NULL, flags, 0); goto out_put_request; } fuse_put_request(fc, req); - entry->d_time = time_to_jiffies(outentry.entry_valid, - outentry.entry_valid_nsec); - fi = get_fuse_inode(inode); - fi->i_time = time_to_jiffies(outentry.attr_valid, - outentry.attr_valid_nsec); - d_instantiate(entry, inode); + fuse_change_timeout(entry, &outentry); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { ff->fh = outopen.fh; @@ -248,13 +313,15 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, return err; } +/* + * Code shared between mknod, mkdir, symlink and link + */ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, struct inode *dir, struct dentry *entry, int mode) { struct fuse_entry_out outarg; struct inode *inode; - struct fuse_inode *fi; int err; req->in.h.nodeid = get_node_id(dir); @@ -268,10 +335,13 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, fuse_put_request(fc, req); return err; } - if (invalid_nodeid(outarg.nodeid)) { - fuse_put_request(fc, req); - return -EIO; - } + err = -EIO; + if (invalid_nodeid(outarg.nodeid)) + goto out_put_request; + + if ((outarg.attr.mode ^ mode) & S_IFMT) + goto out_put_request; + inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr); if (!inode) { @@ -280,22 +350,19 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, } fuse_put_request(fc, req); - /* Don't allow userspace to do really stupid things... */ - if (((inode->i_mode ^ mode) & S_IFMT) || dir_alias(inode)) { + if (dir_alias(inode)) { iput(inode); return -EIO; } - entry->d_time = time_to_jiffies(outarg.entry_valid, - outarg.entry_valid_nsec); - - fi = get_fuse_inode(inode); - fi->i_time = time_to_jiffies(outarg.attr_valid, - outarg.attr_valid_nsec); - d_instantiate(entry, inode); + fuse_change_timeout(entry, &outarg); fuse_invalidate_attr(dir); return 0; + + out_put_request: + fuse_put_request(fc, req); + return err; } static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, @@ -355,12 +422,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, { struct fuse_conn *fc = get_fuse_conn(dir); unsigned len = strlen(link) + 1; - struct fuse_req *req; - - if (len > FUSE_SYMLINK_MAX) - return -ENAMETOOLONG; - - req = fuse_get_request(fc); + struct fuse_req *req = fuse_get_request(fc); if (!req) return -EINTR; @@ -399,6 +461,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) inode->i_nlink = 0; fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); + fuse_invalidate_entry_cache(entry); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -424,6 +487,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) if (!err) { entry->d_inode->i_nlink = 0; fuse_invalidate_attr(dir); + fuse_invalidate_entry_cache(entry); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -459,6 +523,10 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, fuse_invalidate_attr(olddir); if (olddir != newdir) fuse_invalidate_attr(newdir); + + /* newent will end up negative */ + if (newent->d_inode) + fuse_invalidate_entry_cache(newent); } else if (err == -EINTR) { /* If request was interrupted, DEITY only knows if the rename actually took place. If the invalidation @@ -566,6 +634,15 @@ static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) return 0; } +/* + * Check whether the inode attributes are still valid + * + * If the attribute validity timeout has expired, then fetch the fresh + * attributes with a 'getattr' request + * + * I'm not sure why cached attributes are never returned for the root + * inode, this is probably being too cautious. + */ static int fuse_revalidate(struct dentry *entry) { struct inode *inode = entry->d_inode; @@ -613,6 +690,19 @@ static int fuse_access(struct inode *inode, int mask) return err; } +/* + * Check permission. The two basic access models of FUSE are: + * + * 1) Local access checking ('default_permissions' mount option) based + * on file mode. This is the plain old disk filesystem permission + * modell. + * + * 2) "Remote" access checking, where server is responsible for + * checking permission in each inode operation. An exception to this + * is if ->permission() was invoked from sys_access() in which case an + * access request is sent. Execute permission is still checked + * locally based on file mode. + */ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) { struct fuse_conn *fc = get_fuse_conn(inode); @@ -631,14 +721,10 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) err = generic_permission(inode, mask, NULL); } - /* FIXME: Need some mechanism to revoke permissions: - currently if the filesystem suddenly changes the - file mode, we will not be informed about it, and - continue to allow access to the file/directory. - - This is actually not so grave, since the user can - simply keep access to the file/directory anyway by - keeping it open... */ + /* Note: the opposite of the above test does not + exist. So if permissions are revoked this won't be + noticed immediately, only after the attribute + timeout has expired */ return err; } else { @@ -691,7 +777,12 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) struct page *page; struct inode *inode = file->f_dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = fuse_get_request(fc); + struct fuse_req *req; + + if (is_bad_inode(inode)) + return -EIO; + + req = fuse_get_request(fc); if (!req) return -EINTR; @@ -806,6 +897,15 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) } } +/* + * Set attributes, and at the same time refresh them. + * + * Truncation is slightly complicated, because the 'truncate' request + * may fail, in which case we don't want to touch the mapping. + * vmtruncate() doesn't allow for this case. So do the rlimit + * checking by hand and call vmtruncate() only after the file has + * actually been truncated. + */ static int fuse_setattr(struct dentry *entry, struct iattr *attr) { struct inode *inode = entry->d_inode; @@ -883,23 +983,6 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, return err; } -static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, - struct nameidata *nd) -{ - struct inode *inode; - int err; - - err = fuse_lookup_iget(dir, entry, &inode); - if (err) - return ERR_PTR(err); - if (inode && dir_alias(inode)) { - iput(inode); - return ERR_PTR(-EIO); - } - d_add(entry, inode); - return NULL; -} - static int fuse_setxattr(struct dentry *entry, const char *name, const void *value, size_t size, int flags) { @@ -909,9 +992,6 @@ static int fuse_setxattr(struct dentry *entry, const char *name, struct fuse_setxattr_in inarg; int err; - if (size > FUSE_XATTR_SIZE_MAX) - return -E2BIG; - if (fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2ca86141d13..05dedddf428 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -163,6 +163,9 @@ static int fuse_flush(struct file *file) struct fuse_flush_in inarg; int err; + if (is_bad_inode(inode)) + return -EIO; + if (fc->no_flush) return 0; @@ -199,6 +202,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, struct fuse_fsync_in inarg; int err; + if (is_bad_inode(inode)) + return -EIO; + if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) return 0; @@ -272,16 +278,22 @@ static int fuse_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT; - struct fuse_req *req = fuse_get_request(fc); - int err = -EINTR; + struct fuse_req *req; + int err; + + err = -EIO; + if (is_bad_inode(inode)) + goto out; + + err = -EINTR; + req = fuse_get_request(fc); if (!req) goto out; req->out.page_zeroing = 1; req->num_pages = 1; req->pages[0] = page; - fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE); + fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE); err = req->out.h.error; fuse_put_request(fc, req); if (!err) @@ -295,7 +307,7 @@ static int fuse_readpage(struct file *file, struct page *page) static int fuse_send_readpages(struct fuse_req *req, struct file *file, struct inode *inode) { - loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT; + loff_t pos = page_offset(req->pages[0]); size_t count = req->num_pages << PAGE_CACHE_SHIFT; unsigned i; req->out.page_zeroing = 1; @@ -345,6 +357,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_readpages_data data; int err; + + if (is_bad_inode(inode)) + return -EIO; + data.file = file; data.inode = inode; data.req = fuse_get_request(fc); @@ -402,8 +418,13 @@ static int fuse_commit_write(struct file *file, struct page *page, unsigned count = to - offset; struct inode *inode = page->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset; - struct fuse_req *req = fuse_get_request(fc); + loff_t pos = page_offset(page) + offset; + struct fuse_req *req; + + if (is_bad_inode(inode)) + return -EIO; + + req = fuse_get_request(fc); if (!req) return -EINTR; @@ -454,7 +475,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - npages = min(npages, FUSE_MAX_PAGES_PER_REQ); + npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ); down_read(¤t->mm->mmap_sem); npages = get_user_pages(current, current->mm, user_addr, npages, write, 0, req->pages, NULL); @@ -475,12 +496,16 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, size_t nmax = write ? fc->max_write : fc->max_read; loff_t pos = *ppos; ssize_t res = 0; - struct fuse_req *req = fuse_get_request(fc); + struct fuse_req *req; + + if (is_bad_inode(inode)) + return -EIO; + + req = fuse_get_request(fc); if (!req) return -EINTR; while (count) { - size_t tmp; size_t nres; size_t nbytes = min(count, nmax); int err = fuse_get_user_pages(req, buf, nbytes, !write); @@ -488,8 +513,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, res = err; break; } - tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset; - nbytes = min(nbytes, tmp); + nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset; + nbytes = min(count, nbytes); if (write) nres = fuse_send_write(req, file, inode, pos, nbytes); else diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0ea5301f86b..74c8d098a14 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -21,6 +21,9 @@ /** If more requests are outstanding, then the operation will block */ #define FUSE_MAX_OUTSTANDING 10 +/** It could be as large as PATH_MAX, but would that have any uses? */ +#define FUSE_NAME_MAX 1024 + /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem module will check permissions based on the file mode. Otherwise no permission checking is done in the kernel */ @@ -108,9 +111,6 @@ struct fuse_out { struct fuse_arg args[3]; }; -struct fuse_req; -struct fuse_conn; - /** * A request to the client */ @@ -159,7 +159,8 @@ struct fuse_req { union { struct fuse_forget_in forget_in; struct fuse_release_in release_in; - struct fuse_init_in_out init_in_out; + struct fuse_init_in init_in; + struct fuse_init_out init_out; } misc; /** page vector */ @@ -272,6 +273,9 @@ struct fuse_conn { /** Is create not implemented by fs? */ unsigned no_create : 1; + /** Negotiated minor version */ + unsigned minor; + /** Backing dev info */ struct backing_dev_info bdi; }; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e69a546844d..04c80cc957a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -135,12 +135,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) fuse_init_common(inode); init_special_inode(inode, inode->i_mode, new_decode_dev(attr->rdev)); - } else { - /* Don't let user create weird files */ - inode->i_mode = S_IFREG; - fuse_init_common(inode); - fuse_init_file_inode(inode); - } + } else + BUG(); } static int fuse_inode_eq(struct inode *inode, void *_nodeidp) @@ -218,6 +214,7 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr { stbuf->f_type = FUSE_SUPER_MAGIC; stbuf->f_bsize = attr->bsize; + stbuf->f_frsize = attr->frsize; stbuf->f_blocks = attr->blocks; stbuf->f_bfree = attr->bfree; stbuf->f_bavail = attr->bavail; @@ -238,10 +235,12 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) if (!req) return -EINTR; + memset(&outarg, 0, sizeof(outarg)); req->in.numargs = 0; req->in.h.opcode = FUSE_STATFS; req->out.numargs = 1; - req->out.args[0].size = sizeof(outarg); + req->out.args[0].size = + fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); req->out.args[0].value = &outarg; request_send(fc, req); err = req->out.h.error; @@ -482,7 +481,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->max_read = d.max_read; if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages) fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE; - fc->max_write = FUSE_MAX_IN / 2; err = -ENOMEM; root = get_root_inode(sb, d.rootmode); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 8093351bd7c..6daaf7c755a 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -320,7 +320,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) /* temporarily use utf8 to correctly find the hidden dir below */ nls = sbi->nls; sbi->nls = load_nls("utf8"); - if (!nls) { + if (!sbi->nls) { printk("HFS+: unable to load nls for utf8\n"); err = -EINVAL; goto cleanup; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 8c1cef3bb67..8c41315a6e4 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -100,9 +100,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; - if ((vma->vm_flags & (VM_MAYSHARE | VM_WRITE)) == VM_WRITE) - return -EINVAL; - if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1)) return -EINVAL; diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 014a51fd00d..cb3cef525c3 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -24,29 +24,75 @@ #include <linux/slab.h> /* - * Unlink a buffer from a transaction. + * Unlink a buffer from a transaction checkpoint list. * * Called with j_list_lock held. */ -static inline void __buffer_unlink(struct journal_head *jh) +static void __buffer_unlink_first(struct journal_head *jh) { transaction_t *transaction; transaction = jh->b_cp_transaction; - jh->b_cp_transaction = NULL; jh->b_cpnext->b_cpprev = jh->b_cpprev; jh->b_cpprev->b_cpnext = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) + if (transaction->t_checkpoint_list == jh) { transaction->t_checkpoint_list = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) - transaction->t_checkpoint_list = NULL; + if (transaction->t_checkpoint_list == jh) + transaction->t_checkpoint_list = NULL; + } +} + +/* + * Unlink a buffer from a transaction checkpoint(io) list. + * + * Called with j_list_lock held. + */ + +static inline void __buffer_unlink(struct journal_head *jh) +{ + transaction_t *transaction; + + transaction = jh->b_cp_transaction; + + __buffer_unlink_first(jh); + if (transaction->t_checkpoint_io_list == jh) { + transaction->t_checkpoint_io_list = jh->b_cpnext; + if (transaction->t_checkpoint_io_list == jh) + transaction->t_checkpoint_io_list = NULL; + } +} + +/* + * Move a buffer from the checkpoint list to the checkpoint io list + * + * Called with j_list_lock held + */ + +static inline void __buffer_relink_io(struct journal_head *jh) +{ + transaction_t *transaction; + + transaction = jh->b_cp_transaction; + __buffer_unlink_first(jh); + + if (!transaction->t_checkpoint_io_list) { + jh->b_cpnext = jh->b_cpprev = jh; + } else { + jh->b_cpnext = transaction->t_checkpoint_io_list; + jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; + jh->b_cpprev->b_cpnext = jh; + jh->b_cpnext->b_cpprev = jh; + } + transaction->t_checkpoint_io_list = jh; } /* * Try to release a checkpointed buffer from its transaction. - * Returns 1 if we released it. + * Returns 1 if we released it and 2 if we also released the + * whole transaction. + * * Requires j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ @@ -57,12 +103,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); - __journal_remove_checkpoint(jh); + ret = __journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); - ret = 1; } else { jbd_unlock_bh_state(bh); } @@ -117,83 +162,53 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) } /* - * Clean up a transaction's checkpoint list. - * - * We wait for any pending IO to complete and make sure any clean - * buffers are removed from the transaction. - * - * Return 1 if we performed any actions which might have destroyed the - * checkpoint. (journal_remove_checkpoint() deletes the transaction when - * the last checkpoint buffer is cleansed) + * Clean up transaction's list of buffers submitted for io. + * We wait for any pending IO to complete and remove any clean + * buffers. Note that we take the buffers in the opposite ordering + * from the one in which they were submitted for IO. * * Called with j_list_lock held. */ -static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) + +static void __wait_cp_io(journal_t *journal, transaction_t *transaction) { - struct journal_head *jh, *next_jh, *last_jh; + struct journal_head *jh; struct buffer_head *bh; - int ret = 0; - - assert_spin_locked(&journal->j_list_lock); - jh = transaction->t_checkpoint_list; - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - next_jh = jh; - do { - jh = next_jh; + tid_t this_tid; + int released = 0; + + this_tid = transaction->t_tid; +restart: + /* Didn't somebody clean up the transaction in the meanwhile */ + if (journal->j_checkpoint_transactions != transaction || + transaction->t_tid != this_tid) + return; + while (!released && transaction->t_checkpoint_io_list) { + jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); + if (!jbd_trylock_bh_state(bh)) { + jbd_sync_bh(journal, bh); + spin_lock(&journal->j_list_lock); + goto restart; + } if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); wait_on_buffer(bh); /* the journal_head may have gone by now */ BUFFER_TRACE(bh, "brelse"); __brelse(bh); - goto out_return_1; - } - - /* - * This is foul - */ - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - goto out_return_1; + spin_lock(&journal->j_list_lock); + goto restart; } - - if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); - log_wait_commit(journal, tid); - goto out_return_1; - } - /* - * AKPM: I think the buffer_jbddirty test is redundant - it - * shouldn't have NULL b_transaction? + * Now in whatever state the buffer currently is, we know that + * it has been written out and so we can drop it from the list */ - next_jh = jh->b_cpnext; - if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { - BUFFER_TRACE(bh, "remove from checkpoint"); - __journal_remove_checkpoint(jh); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - __brelse(bh); - ret = 1; - } else { - jbd_unlock_bh_state(bh); - } - } while (jh != last_jh); - - return ret; -out_return_1: - spin_lock(&journal->j_list_lock); - return 1; + released = __journal_remove_checkpoint(jh); + jbd_unlock_bh_state(bh); + } } #define NR_BATCH 64 @@ -203,9 +218,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - spin_unlock(&journal->j_list_lock); ll_rw_block(SWRITE, *batch_count, bhs); - spin_lock(&journal->j_list_lock); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); @@ -221,19 +234,46 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Return 1 if something happened which requires us to abort the current * scan of the checkpoint list. * - * Called with j_list_lock held. + * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it */ -static int __flush_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count, - int *drop_count) +static int __process_buffer(journal_t *journal, struct journal_head *jh, + struct buffer_head **bhs, int *batch_count) { struct buffer_head *bh = jh2bh(jh); int ret = 0; - if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { - J_ASSERT_JH(jh, jh->b_transaction == NULL); + if (buffer_locked(bh)) { + get_bh(bh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + put_bh(bh); + ret = 1; + } + else if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + log_start_commit(journal, tid); + log_wait_commit(journal, tid); + ret = 1; + } + else if (!buffer_dirty(bh)) { + J_ASSERT_JH(jh, !buffer_jbddirty(bh)); + BUFFER_TRACE(bh, "remove from checkpoint"); + __journal_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + journal_remove_journal_head(bh); + put_bh(bh); + ret = 1; + } + else { /* * Important: we are about to write the buffer, and * possibly block, while still holding the journal lock. @@ -246,45 +286,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh, J_ASSERT_BH(bh, !buffer_jwrite(bh)); set_buffer_jwrite(bh); bhs[*batch_count] = bh; + __buffer_relink_io(jh); jbd_unlock_bh_state(bh); (*batch_count)++; if (*batch_count == NR_BATCH) { + spin_unlock(&journal->j_list_lock); __flush_batch(journal, bhs, batch_count); ret = 1; } - } else { - int last_buffer = 0; - if (jh->b_cpnext == jh) { - /* We may be about to drop the transaction. Tell the - * caller that the lists have changed. - */ - last_buffer = 1; - } - if (__try_to_free_cp_buf(jh)) { - (*drop_count)++; - ret = last_buffer; - } } return ret; } /* - * Perform an actual checkpoint. We don't write out only enough to - * satisfy the current blocked requests: rather we submit a reasonably - * sized chunk of the outstanding data to disk at once for - * efficiency. __log_wait_for_space() will retry if we didn't free enough. + * Perform an actual checkpoint. We take the first transaction on the + * list of transactions to be checkpointed and send all its buffers + * to disk. We submit larger chunks of data at once. * - * However, we _do_ take into account the amount requested so that once - * the IO has been queued, we can return as soon as enough of it has - * completed to disk. - * * The journal should be locked before calling this function. */ int log_do_checkpoint(journal_t *journal) { + transaction_t *transaction; + tid_t this_tid; int result; - int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; jbd_debug(1, "Start checkpoint\n"); @@ -299,79 +324,70 @@ int log_do_checkpoint(journal_t *journal) return result; /* - * OK, we need to start writing disk blocks. Try to free up a - * quarter of the log in a single checkpoint if we can. + * OK, we need to start writing disk blocks. Take one transaction + * and write it. */ + spin_lock(&journal->j_list_lock); + if (!journal->j_checkpoint_transactions) + goto out; + transaction = journal->j_checkpoint_transactions; + this_tid = transaction->t_tid; +restart: /* - * AKPM: check this code. I had a feeling a while back that it - * degenerates into a busy loop at unmount time. + * If someone cleaned up this transaction while we slept, we're + * done (maybe it's a new transaction, but it fell at the same + * address). */ - spin_lock(&journal->j_list_lock); - while (journal->j_checkpoint_transactions) { - transaction_t *transaction; - struct journal_head *jh, *last_jh, *next_jh; - int drop_count = 0; - int cleanup_ret, retry = 0; - tid_t this_tid; - - transaction = journal->j_checkpoint_transactions; - this_tid = transaction->t_tid; - jh = transaction->t_checkpoint_list; - last_jh = jh->b_cpprev; - next_jh = jh; - do { + if (journal->j_checkpoint_transactions == transaction || + transaction->t_tid == this_tid) { + int batch_count = 0; + struct buffer_head *bhs[NR_BATCH]; + struct journal_head *jh; + int retry = 0; + + while (!retry && transaction->t_checkpoint_list) { struct buffer_head *bh; - jh = next_jh; - next_jh = jh->b_cpnext; + jh = transaction->t_checkpoint_list; bh = jh2bh(jh); if (!jbd_trylock_bh_state(bh)) { jbd_sync_bh(journal, bh); - spin_lock(&journal->j_list_lock); retry = 1; break; } - retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); - if (cond_resched_lock(&journal->j_list_lock)) { + retry = __process_buffer(journal, jh, bhs, + &batch_count); + if (!retry && + lock_need_resched(&journal->j_list_lock)) { + spin_unlock(&journal->j_list_lock); retry = 1; break; } - } while (jh != last_jh && !retry); + } if (batch_count) { + if (!retry) { + spin_unlock(&journal->j_list_lock); + retry = 1; + } __flush_batch(journal, bhs, &batch_count); - retry = 1; } + if (retry) { + spin_lock(&journal->j_list_lock); + goto restart; + } /* - * If someone cleaned up this transaction while we slept, we're - * done - */ - if (journal->j_checkpoint_transactions != transaction) - break; - if (retry) - continue; - /* - * Maybe it's a new transaction, but it fell at the same - * address - */ - if (transaction->t_tid != this_tid) - continue; - /* - * We have walked the whole transaction list without - * finding anything to write to disk. We had better be - * able to make some progress or we are in trouble. + * Now we have cleaned up the first transaction's checkpoint + * list. Let's clean up the second one. */ - cleanup_ret = __cleanup_transaction(journal, transaction); - J_ASSERT(drop_count != 0 || cleanup_ret != 0); - if (journal->j_checkpoint_transactions != transaction) - break; + __wait_cp_io(journal, transaction); } +out: spin_unlock(&journal->j_list_lock); result = cleanup_journal_tail(journal); if (result < 0) return result; - return 0; } @@ -456,52 +472,91 @@ int cleanup_journal_tail(journal_t *journal) /* Checkpoint list management */ /* + * journal_clean_one_cp_list + * + * Find all the written-back checkpoint buffers in the given list and release them. + * + * Called with the journal locked. + * Called with j_list_lock held. + * Returns number of bufers reaped (for debug) + */ + +static int journal_clean_one_cp_list(struct journal_head *jh, int *released) +{ + struct journal_head *last_jh; + struct journal_head *next_jh = jh; + int ret, freed = 0; + + *released = 0; + if (!jh) + return 0; + + last_jh = jh->b_cpprev; + do { + jh = next_jh; + next_jh = jh->b_cpnext; + /* Use trylock because of the ranking */ + if (jbd_trylock_bh_state(jh2bh(jh))) { + ret = __try_to_free_cp_buf(jh); + if (ret) { + freed++; + if (ret == 2) { + *released = 1; + return freed; + } + } + } + /* + * This function only frees up some memory if possible so we + * dont have an obligation to finish processing. Bail out if + * preemption requested: + */ + if (need_resched()) + return freed; + } while (jh != last_jh); + + return freed; +} + +/* * journal_clean_checkpoint_list * * Find all the written-back checkpoint buffers in the journal and release them. * * Called with the journal locked. * Called with j_list_lock held. - * Returns number of bufers reaped (for debug) + * Returns number of buffers reaped (for debug) */ int __journal_clean_checkpoint_list(journal_t *journal) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret = 0; + int ret = 0, released; transaction = journal->j_checkpoint_transactions; - if (transaction == 0) + if (!transaction) goto out; last_transaction = transaction->t_cpprev; next_transaction = transaction; do { - struct journal_head *jh; - transaction = next_transaction; next_transaction = transaction->t_cpnext; - jh = transaction->t_checkpoint_list; - if (jh) { - struct journal_head *last_jh = jh->b_cpprev; - struct journal_head *next_jh = jh; - - do { - jh = next_jh; - next_jh = jh->b_cpnext; - /* Use trylock because of the ranknig */ - if (jbd_trylock_bh_state(jh2bh(jh))) - ret += __try_to_free_cp_buf(jh); - /* - * This function only frees up some memory - * if possible so we dont have an obligation - * to finish processing. Bail out if preemption - * requested: - */ - if (need_resched()) - goto out; - } while (jh != last_jh); - } + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_list, &released); + if (need_resched()) + goto out; + if (released) + continue; + /* + * It is essential that we are as careful as in the case of + * t_checkpoint_list with removing the buffer from the list as + * we can possibly see not yet submitted buffers on io_list + */ + ret += journal_clean_one_cp_list(transaction-> + t_checkpoint_io_list, &released); + if (need_resched()) + goto out; } while (transaction != last_transaction); out: return ret; @@ -516,18 +571,22 @@ out: * buffer updates committed in that transaction have safely been stored * elsewhere on disk. To achieve this, all of the buffers in a * transaction need to be maintained on the transaction's checkpoint - * list until they have been rewritten, at which point this function is + * lists until they have been rewritten, at which point this function is * called to remove the buffer from the existing transaction's - * checkpoint list. + * checkpoint lists. + * + * The function returns 1 if it frees the transaction, 0 otherwise. * * This function is called with the journal locked. * This function is called with j_list_lock held. + * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ -void __journal_remove_checkpoint(struct journal_head *jh) +int __journal_remove_checkpoint(struct journal_head *jh) { transaction_t *transaction; journal_t *journal; + int ret = 0; JBUFFER_TRACE(jh, "entry"); @@ -538,8 +597,10 @@ void __journal_remove_checkpoint(struct journal_head *jh) journal = transaction->t_journal; __buffer_unlink(jh); + jh->b_cp_transaction = NULL; - if (transaction->t_checkpoint_list != NULL) + if (transaction->t_checkpoint_list != NULL || + transaction->t_checkpoint_io_list != NULL) goto out; JBUFFER_TRACE(jh, "transaction has no more buffers"); @@ -565,8 +626,10 @@ void __journal_remove_checkpoint(struct journal_head *jh) /* Just in case anybody was waiting for more transactions to be checkpointed... */ wake_up(&journal->j_wait_logspace); + ret = 1; out: JBUFFER_TRACE(jh, "exit"); + return ret; } /* @@ -628,6 +691,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) J_ASSERT(transaction->t_shadow_list == NULL); J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); + J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(transaction->t_updates == 0); J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 041380fe667..6d2dfed1de0 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -56,13 +56,20 @@ static int nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, struct nfsd3_attrstat *resp) { - int nfserr; + int err, nfserr; dprintk("nfsd: GETATTR(3) %s\n", - SVCFH_fmt(&argp->fh)); + SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + if (nfserr) + RETURN_STATUS(nfserr); + + err = vfs_getattr(resp->fh.fh_export->ex_mnt, + resp->fh.fh_dentry, &resp->stat); + nfserr = nfserrno(err); + RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 9147b8524d0..243d94b9653 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -154,37 +154,34 @@ decode_sattr3(u32 *p, struct iattr *iap) } static inline u32 * -encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp, + struct kstat *stat) { - struct vfsmount *mnt = fhp->fh_export->ex_mnt; struct dentry *dentry = fhp->fh_dentry; - struct kstat stat; struct timespec time; - vfs_getattr(mnt, dentry, &stat); - - *p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]); - *p++ = htonl((u32) stat.mode); - *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); - if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) { + *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); + *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) stat->nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); + if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); } else { - p = xdr_encode_hyper(p, (u64) stat.size); + p = xdr_encode_hyper(p, (u64) stat->size); } - p = xdr_encode_hyper(p, ((u64)stat.blocks) << 9); - *p++ = htonl((u32) MAJOR(stat.rdev)); - *p++ = htonl((u32) MINOR(stat.rdev)); + p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); + *p++ = htonl((u32) MAJOR(stat->rdev)); + *p++ = htonl((u32) MINOR(stat->rdev)); if (is_fsid(fhp, rqstp->rq_reffh)) p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); else - p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat.dev)); - p = xdr_encode_hyper(p, (u64) stat.ino); - p = encode_time3(p, &stat.atime); + p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat->dev)); + p = xdr_encode_hyper(p, (u64) stat->ino); + p = encode_time3(p, &stat->atime); lease_get_mtime(dentry->d_inode, &time); p = encode_time3(p, &time); - p = encode_time3(p, &stat.ctime); + p = encode_time3(p, &stat->ctime); return p; } @@ -232,8 +229,14 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) { struct dentry *dentry = fhp->fh_dentry; if (dentry && dentry->d_inode != NULL) { - *p++ = xdr_one; /* attributes follow */ - return encode_fattr3(rqstp, p, fhp); + int err; + struct kstat stat; + + err = vfs_getattr(fhp->fh_export->ex_mnt, dentry, &stat); + if (!err) { + *p++ = xdr_one; /* attributes follow */ + return encode_fattr3(rqstp, p, fhp, &stat); + } } *p++ = xdr_zero; return p; @@ -616,7 +619,7 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, struct nfsd3_attrstat *resp) { if (resp->status == 0) - p = encode_fattr3(rqstp, p, &resp->fh); + p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index b45999ff33e..aa7bb41b293 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -152,46 +152,44 @@ decode_sattr(u32 *p, struct iattr *iap) } static inline u32 * -encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp, + struct kstat *stat) { - struct vfsmount *mnt = fhp->fh_export->ex_mnt; struct dentry *dentry = fhp->fh_dentry; - struct kstat stat; int type; struct timespec time; - vfs_getattr(mnt, dentry, &stat); - type = (stat.mode & S_IFMT); + type = (stat->mode & S_IFMT); *p++ = htonl(nfs_ftypes[type >> 12]); - *p++ = htonl((u32) stat.mode); - *p++ = htonl((u32) stat.nlink); - *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); - *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); + *p++ = htonl((u32) stat->mode); + *p++ = htonl((u32) stat->nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid)); - if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) { + if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { *p++ = htonl(NFS_MAXPATHLEN); } else { - *p++ = htonl((u32) stat.size); + *p++ = htonl((u32) stat->size); } - *p++ = htonl((u32) stat.blksize); + *p++ = htonl((u32) stat->blksize); if (S_ISCHR(type) || S_ISBLK(type)) - *p++ = htonl(new_encode_dev(stat.rdev)); + *p++ = htonl(new_encode_dev(stat->rdev)); else *p++ = htonl(0xffffffff); - *p++ = htonl((u32) stat.blocks); + *p++ = htonl((u32) stat->blocks); if (is_fsid(fhp, rqstp->rq_reffh)) *p++ = htonl((u32) fhp->fh_export->ex_fsid); else - *p++ = htonl(new_encode_dev(stat.dev)); - *p++ = htonl((u32) stat.ino); - *p++ = htonl((u32) stat.atime.tv_sec); - *p++ = htonl(stat.atime.tv_nsec ? stat.atime.tv_nsec / 1000 : 0); + *p++ = htonl(new_encode_dev(stat->dev)); + *p++ = htonl((u32) stat->ino); + *p++ = htonl((u32) stat->atime.tv_sec); + *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); lease_get_mtime(dentry->d_inode, &time); *p++ = htonl((u32) time.tv_sec); *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); - *p++ = htonl((u32) stat.ctime.tv_sec); - *p++ = htonl(stat.ctime.tv_nsec ? stat.ctime.tv_nsec / 1000 : 0); + *p++ = htonl((u32) stat->ctime.tv_sec); + *p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0); return p; } @@ -199,7 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) /* Helper function for NFSv2 ACL code */ u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) { - return encode_fattr(rqstp, p, fhp); + struct kstat stat; + vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, &stat); + return encode_fattr(rqstp, p, fhp, &stat); } /* @@ -394,7 +394,7 @@ int nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, struct nfsd_attrstat *resp) { - p = encode_fattr(rqstp, p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } @@ -403,7 +403,7 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p, struct nfsd_diropres *resp) { p = encode_fh(p, &resp->fh); - p = encode_fattr(rqstp, p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); return xdr_ressize_check(rqstp, p); } @@ -428,7 +428,7 @@ int nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p, struct nfsd_readres *resp) { - p = encode_fattr(rqstp, p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); *p++ = htonl(resp->count); xdr_ressize_check(rqstp, p); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index af7c3c3074b..df4019f0456 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -717,27 +717,33 @@ nfsd_close(struct file *filp) * As this calls fsync (not fdatasync) there is no need for a write_inode * after it. */ -static inline void nfsd_dosync(struct file *filp, struct dentry *dp, - struct file_operations *fop) +static inline int nfsd_dosync(struct file *filp, struct dentry *dp, + struct file_operations *fop) { struct inode *inode = dp->d_inode; int (*fsync) (struct file *, struct dentry *, int); + int err = nfs_ok; filemap_fdatawrite(inode->i_mapping); if (fop && (fsync = fop->fsync)) - fsync(filp, dp, 0); + err=fsync(filp, dp, 0); filemap_fdatawait(inode->i_mapping); + + return nfserrno(err); } -static void +static int nfsd_sync(struct file *filp) { + int err; struct inode *inode = filp->f_dentry->d_inode; dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name); down(&inode->i_sem); - nfsd_dosync(filp, filp->f_dentry, filp->f_op); + err=nfsd_dosync(filp, filp->f_dentry, filp->f_op); up(&inode->i_sem); + + return err; } void @@ -874,6 +880,16 @@ out: return err; } +static void kill_suid(struct dentry *dentry) +{ + struct iattr ia; + ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID; + + down(&dentry->d_inode->i_sem); + notify_change(dentry, &ia); + up(&dentry->d_inode->i_sem); +} + static inline int nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, @@ -927,14 +943,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, } /* clear setuid/setgid flag after write */ - if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) { - struct iattr ia; - ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID; - - down(&inode->i_sem); - notify_change(dentry, &ia); - up(&inode->i_sem); - } + if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) + kill_suid(dentry); if (err >= 0 && stable) { static ino_t last_ino; @@ -962,7 +972,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (inode->i_state & I_DIRTY) { dprintk("nfsd: write sync %d\n", current->pid); - nfsd_sync(file); + err=nfsd_sync(file); } #if 0 wake_up(&inode->i_wait); @@ -1066,7 +1076,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; if (EX_ISSYNC(fhp->fh_export)) { if (file->f_op && file->f_op->fsync) { - nfsd_sync(file); + err = nfsd_sync(file); } else { err = nfserr_notsupp; } diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index 656bc43431b..e227a04261a 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -85,7 +85,7 @@ config ATARI_PARTITION config IBM_PARTITION bool "IBM disk label and partition support" - depends on PARTITION_ADVANCED && ARCH_S390 + depends on PARTITION_ADVANCED && S390 help Say Y here if you would like to be able to read the hard disk partition table format used by IBM DASD disks operating under CMS. diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c index 6327bcb2d73..78010ad60e4 100644 --- a/fs/partitions/ibm.c +++ b/fs/partitions/ibm.c @@ -56,7 +56,10 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) struct hd_geometry *geo; char type[5] = {0,}; char name[7] = {0,}; - struct vtoc_volume_label *vlabel; + union label_t { + struct vtoc_volume_label vol; + struct vtoc_cms_label cms; + } *label; unsigned char *data; Sector sect; @@ -64,9 +67,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) goto out_noinfo; if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL) goto out_nogeo; - if ((vlabel = kmalloc(sizeof(struct vtoc_volume_label), - GFP_KERNEL)) == NULL) - goto out_novlab; + if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL) + goto out_nolab; if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 || ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) @@ -87,7 +89,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) strncpy(name, data + 8, 6); else strncpy(name, data + 4, 6); - memcpy (vlabel, data, sizeof(struct vtoc_volume_label)); + memcpy(label, data, sizeof(union label_t)); put_dev_sector(sect); EBCASC(type, 4); @@ -100,14 +102,12 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) /* * VM style CMS1 labeled disk */ - int *label = (int *) vlabel; - - if (label[13] != 0) { + if (label->cms.disk_offset != 0) { printk("CMS1/%8s(MDSK):", name); /* disk is reserved minidisk */ - blocksize = label[3]; - offset = label[13]; - size = (label[7] - 1)*(blocksize >> 9); + blocksize = label->cms.block_size; + offset = label->cms.disk_offset; + size = (label->cms.block_count - 1) * (blocksize >> 9); } else { printk("CMS1/%8s:", name); offset = (info->label_block + 1); @@ -126,7 +126,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) printk("VOL1/%8s:", name); /* get block number and read then go through format1 labels */ - blk = cchhb2blk(&vlabel->vtoc, geo) + 1; + blk = cchhb2blk(&label->vol.vtoc, geo) + 1; counter = 0; while ((data = read_dev_sector(bdev, blk*(blocksize/512), §)) != NULL) { @@ -174,7 +174,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) } printk("\n"); - kfree(vlabel); + kfree(label); kfree(geo); kfree(info); return 1; @@ -182,8 +182,8 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev) out_readerr: out_badsect: out_noioctl: - kfree(vlabel); -out_novlab: + kfree(label); +out_nolab: kfree(geo); out_nogeo: kfree(info); diff --git a/fs/proc/array.c b/fs/proc/array.c index 3e1239e4b30..5e9251f6531 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -308,7 +308,7 @@ int proc_pid_status(struct task_struct *task, char * buffer) buffer = task_sig(task, buffer); buffer = task_cap(task, buffer); buffer = cpuset_task_status_allowed(task, buffer); -#if defined(CONFIG_ARCH_S390) +#if defined(CONFIG_S390) buffer = task_show_regs(task, buffer); #endif return buffer - orig; diff --git a/fs/ramfs/Makefile b/fs/ramfs/Makefile index f096f300709..5a0236e02ee 100644 --- a/fs/ramfs/Makefile +++ b/fs/ramfs/Makefile @@ -4,4 +4,6 @@ obj-$(CONFIG_RAMFS) += ramfs.o -ramfs-objs := inode.o +file-mmu-y := file-nommu.o +file-mmu-$(CONFIG_MMU) := file-mmu.o +ramfs-objs += inode.o $(file-mmu-y) diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c new file mode 100644 index 00000000000..2115383dcc8 --- /dev/null +++ b/fs/ramfs/file-mmu.c @@ -0,0 +1,57 @@ +/* file-mmu.c: ramfs MMU-based file operations + * + * Resizable simple ram filesystem for Linux. + * + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + * + * Usage limits added by David Gibson, Linuxcare Australia. + * This file is released under the GPL. + */ + +/* + * NOTE! This filesystem is probably most useful + * not as a real filesystem, but as an example of + * how virtual filesystems can be written. + * + * It doesn't get much simpler than this. Consider + * that this file implements the full semantics of + * a POSIX-compliant read-write filesystem. + * + * Note in particular how the filesystem does not + * need to implement any data structures of its own + * to keep track of the virtual data: using the VFS + * caches is sufficient. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/backing-dev.h> +#include <linux/ramfs.h> + +#include <asm/uaccess.h> +#include "internal.h" + +struct address_space_operations ramfs_aops = { + .readpage = simple_readpage, + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write +}; + +struct file_operations ramfs_file_operations = { + .read = generic_file_read, + .write = generic_file_write, + .mmap = generic_file_mmap, + .fsync = simple_sync_file, + .sendfile = generic_file_sendfile, + .llseek = generic_file_llseek, +}; + +struct inode_operations ramfs_file_inode_operations = { + .getattr = simple_getattr, +}; diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c new file mode 100644 index 00000000000..3f810acd0bf --- /dev/null +++ b/fs/ramfs/file-nommu.c @@ -0,0 +1,292 @@ +/* file-nommu.c: no-MMU version of ramfs + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> +#include <linux/init.h> +#include <linux/string.h> +#include <linux/smp_lock.h> +#include <linux/backing-dev.h> +#include <linux/ramfs.h> +#include <linux/quotaops.h> +#include <linux/pagevec.h> +#include <linux/mman.h> + +#include <asm/uaccess.h> +#include "internal.h" + +static int ramfs_nommu_setattr(struct dentry *, struct iattr *); + +struct address_space_operations ramfs_aops = { + .readpage = simple_readpage, + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write +}; + +struct file_operations ramfs_file_operations = { + .mmap = ramfs_nommu_mmap, + .get_unmapped_area = ramfs_nommu_get_unmapped_area, + .read = generic_file_read, + .write = generic_file_write, + .fsync = simple_sync_file, + .sendfile = generic_file_sendfile, + .llseek = generic_file_llseek, +}; + +struct inode_operations ramfs_file_inode_operations = { + .setattr = ramfs_nommu_setattr, + .getattr = simple_getattr, +}; + +/*****************************************************************************/ +/* + * add a contiguous set of pages into a ramfs inode when it's truncated from + * size 0 on the assumption that it's going to be used for an mmap of shared + * memory + */ +static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) +{ + struct pagevec lru_pvec; + unsigned long npages, xpages, loop, limit; + struct page *pages; + unsigned order; + void *data; + int ret; + + /* make various checks */ + order = get_order(newsize); + if (unlikely(order >= MAX_ORDER)) + goto too_big; + + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (limit != RLIM_INFINITY && newsize > limit) + goto fsize_exceeded; + + if (newsize > inode->i_sb->s_maxbytes) + goto too_big; + + i_size_write(inode, newsize); + + /* allocate enough contiguous pages to be able to satisfy the + * request */ + pages = alloc_pages(mapping_gfp_mask(inode->i_mapping), order); + if (!pages) + return -ENOMEM; + + /* split the high-order page into an array of single pages */ + xpages = 1UL << order; + npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT; + + for (loop = 0; loop < npages; loop++) + set_page_count(pages + loop, 1); + + /* trim off any pages we don't actually require */ + for (loop = npages; loop < xpages; loop++) + __free_page(pages + loop); + + /* clear the memory we allocated */ + newsize = PAGE_SIZE * npages; + data = page_address(pages); + memset(data, 0, newsize); + + /* attach all the pages to the inode's address space */ + pagevec_init(&lru_pvec, 0); + for (loop = 0; loop < npages; loop++) { + struct page *page = pages + loop; + + ret = add_to_page_cache(page, inode->i_mapping, loop, GFP_KERNEL); + if (ret < 0) + goto add_error; + + if (!pagevec_add(&lru_pvec, page)) + __pagevec_lru_add(&lru_pvec); + + unlock_page(page); + } + + pagevec_lru_add(&lru_pvec); + return 0; + + fsize_exceeded: + send_sig(SIGXFSZ, current, 0); + too_big: + return -EFBIG; + + add_error: + page_cache_release(pages + loop); + for (loop++; loop < npages; loop++) + __free_page(pages + loop); + return ret; +} + +/*****************************************************************************/ +/* + * check that file shrinkage doesn't leave any VMAs dangling in midair + */ +static int ramfs_nommu_check_mappings(struct inode *inode, + size_t newsize, size_t size) +{ + struct vm_area_struct *vma; + struct prio_tree_iter iter; + + /* search for VMAs that fall within the dead zone */ + vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, + newsize >> PAGE_SHIFT, + (size + PAGE_SIZE - 1) >> PAGE_SHIFT + ) { + /* found one - only interested if it's shared out of the page + * cache */ + if (vma->vm_flags & VM_SHARED) + return -ETXTBSY; /* not quite true, but near enough */ + } + + return 0; +} + +/*****************************************************************************/ +/* + * + */ +static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) +{ + int ret; + + /* assume a truncate from zero size is going to be for the purposes of + * shared mmap */ + if (size == 0) { + if (unlikely(newsize >> 32)) + return -EFBIG; + + return ramfs_nommu_expand_for_mapping(inode, newsize); + } + + /* check that a decrease in size doesn't cut off any shared mappings */ + if (newsize < size) { + ret = ramfs_nommu_check_mappings(inode, newsize, size); + if (ret < 0) + return ret; + } + + ret = vmtruncate(inode, size); + + return ret; +} + +/*****************************************************************************/ +/* + * handle a change of attributes + * - we're specifically interested in a change of size + */ +static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) +{ + struct inode *inode = dentry->d_inode; + unsigned int old_ia_valid = ia->ia_valid; + int ret = 0; + + /* by providing our own setattr() method, we skip this quotaism */ + if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) || + (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid)) + ret = DQUOT_TRANSFER(inode, ia) ? -EDQUOT : 0; + + /* pick out size-changing events */ + if (ia->ia_valid & ATTR_SIZE) { + loff_t size = i_size_read(inode); + if (ia->ia_size != size) { + ret = ramfs_nommu_resize(inode, ia->ia_size, size); + if (ret < 0 || ia->ia_valid == ATTR_SIZE) + goto out; + } else { + /* we skipped the truncate but must still update + * timestamps + */ + ia->ia_valid |= ATTR_MTIME|ATTR_CTIME; + } + } + + ret = inode_setattr(inode, ia); + out: + ia->ia_valid = old_ia_valid; + return ret; +} + +/*****************************************************************************/ +/* + * try to determine where a shared mapping can be made + * - we require that: + * - the pages to be mapped must exist + * - the pages be physically contiguous in sequence + */ +unsigned long ramfs_nommu_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + unsigned long maxpages, lpages, nr, loop, ret; + struct inode *inode = file->f_dentry->d_inode; + struct page **pages = NULL, **ptr, *page; + loff_t isize; + + if (!(flags & MAP_SHARED)) + return addr; + + /* the mapping mustn't extend beyond the EOF */ + lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + isize = i_size_read(inode); + + ret = -EINVAL; + maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (pgoff >= maxpages) + goto out; + + if (maxpages - pgoff < lpages) + goto out; + + /* gang-find the pages */ + ret = -ENOMEM; + pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto out; + + nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages); + if (nr != lpages) + goto out; /* leave if some pages were missing */ + + /* check the pages for physical adjacency */ + ptr = pages; + page = *ptr++; + page++; + for (loop = lpages; loop > 1; loop--) + if (*ptr++ != page++) + goto out; + + /* okay - all conditions fulfilled */ + ret = (unsigned long) page_address(pages[0]); + + out: + if (pages) { + ptr = pages; + for (loop = lpages; loop > 0; loop--) + put_page(*ptr++); + kfree(pages); + } + + return ret; +} + +/*****************************************************************************/ +/* + * set up a mapping + */ +int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) +{ + return 0; +} diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 0a88917605a..c66bd5e4c05 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -34,13 +34,12 @@ #include <linux/ramfs.h> #include <asm/uaccess.h> +#include "internal.h" /* some random number */ #define RAMFS_MAGIC 0x858458f6 static struct super_operations ramfs_ops; -static struct address_space_operations ramfs_aops; -static struct inode_operations ramfs_file_inode_operations; static struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { @@ -142,25 +141,6 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * return error; } -static struct address_space_operations ramfs_aops = { - .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write -}; - -struct file_operations ramfs_file_operations = { - .read = generic_file_read, - .write = generic_file_write, - .mmap = generic_file_mmap, - .fsync = simple_sync_file, - .sendfile = generic_file_sendfile, - .llseek = generic_file_llseek, -}; - -static struct inode_operations ramfs_file_inode_operations = { - .getattr = simple_getattr, -}; - static struct inode_operations ramfs_dir_inode_operations = { .create = ramfs_create, .lookup = simple_lookup, diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h new file mode 100644 index 00000000000..272c8a7120b --- /dev/null +++ b/fs/ramfs/internal.h @@ -0,0 +1,15 @@ +/* internal.h: ramfs internal definitions + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + +extern struct address_space_operations ramfs_aops; +extern struct file_operations ramfs_file_operations; +extern struct inode_operations ramfs_file_inode_operations; |