diff options
Diffstat (limited to 'fs')
193 files changed, 2891 insertions, 1979 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index b364da70ff2..dfebdbe7440 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -175,7 +175,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) if (!wnames) return ERR_PTR(-ENOMEM); - for (d = dentry, i = n; i >= 0; i--, d = d->d_parent) + for (d = dentry, i = (n-1); i >= 0; i--, d = d->d_parent) wnames[i] = (char *) d->d_name.name; clone = 1; @@ -183,7 +183,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) while (i < n) { l = min(n - i, P9_MAXWELEM); fid = p9_client_walk(fid, l, &wnames[i], clone); - if (!fid) { + if (IS_ERR(fid)) { kfree(wnames); return fid; } diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index fbb12dadba8..9b0f0222e8b 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -3,7 +3,7 @@ * * This file contains functions assisting in mapping VFS to 9P2000 * - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * * This program is free software; you can redistribute it and/or modify @@ -31,7 +31,6 @@ #include <linux/idr.h> #include <net/9p/9p.h> #include <net/9p/transport.h> -#include <net/9p/conn.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" @@ -43,11 +42,11 @@ enum { /* Options that take integer arguments */ - Opt_debug, Opt_msize, Opt_dfltuid, Opt_dfltgid, Opt_afid, + Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid, /* String options */ Opt_uname, Opt_remotename, Opt_trans, /* Options that take no arguments */ - Opt_legacy, Opt_nodevmap, + Opt_nodevmap, /* Cache options */ Opt_cache_loose, /* Access options */ @@ -58,14 +57,11 @@ enum { static match_table_t tokens = { {Opt_debug, "debug=%x"}, - {Opt_msize, "msize=%u"}, {Opt_dfltuid, "dfltuid=%u"}, {Opt_dfltgid, "dfltgid=%u"}, {Opt_afid, "afid=%u"}, {Opt_uname, "uname=%s"}, {Opt_remotename, "aname=%s"}, - {Opt_trans, "trans=%s"}, - {Opt_legacy, "noextend"}, {Opt_nodevmap, "nodevmap"}, {Opt_cache_loose, "cache=loose"}, {Opt_cache_loose, "loose"}, @@ -85,16 +81,14 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) char *options; substring_t args[MAX_OPT_ARGS]; char *p; - int option; - int ret; + int option = 0; char *s, *e; + int ret; /* setup defaults */ - v9ses->maxdata = 8192; v9ses->afid = ~0; v9ses->debug = 0; v9ses->cache = 0; - v9ses->trans = v9fs_default_trans(); if (!v9ses->options) return; @@ -106,7 +100,8 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) continue; token = match_token(p, tokens, args); if (token < Opt_uname) { - if ((ret = match_int(&args[0], &option)) < 0) { + ret = match_int(&args[0], &option); + if (ret < 0) { P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); continue; @@ -119,9 +114,7 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) p9_debug_level = option; #endif break; - case Opt_msize: - v9ses->maxdata = option; - break; + case Opt_dfltuid: v9ses->dfltuid = option; break; @@ -131,18 +124,12 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) case Opt_afid: v9ses->afid = option; break; - case Opt_trans: - v9ses->trans = v9fs_match_trans(&args[0]); - break; case Opt_uname: match_strcpy(v9ses->uname, &args[0]); break; case Opt_remotename: match_strcpy(v9ses->aname, &args[0]); break; - case Opt_legacy: - v9ses->flags &= ~V9FS_EXTENDED; - break; case Opt_nodevmap: v9ses->nodev = 1; break; @@ -185,7 +172,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data) { int retval = -EINVAL; - struct p9_trans *trans = NULL; struct p9_fid *fid; v9ses->uname = __getname(); @@ -207,24 +193,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->options = kstrdup(data, GFP_KERNEL); v9fs_parse_options(v9ses); - if (v9ses->trans == NULL) { - retval = -EPROTONOSUPPORT; - P9_DPRINTK(P9_DEBUG_ERROR, - "No transport defined or default transport\n"); - goto error; - } - - trans = v9ses->trans->create(dev_name, v9ses->options); - if (IS_ERR(trans)) { - retval = PTR_ERR(trans); - trans = NULL; - goto error; - } - if ((v9ses->maxdata+P9_IOHDRSZ) > v9ses->trans->maxsize) - v9ses->maxdata = v9ses->trans->maxsize-P9_IOHDRSZ; - - v9ses->clnt = p9_client_create(trans, v9ses->maxdata+P9_IOHDRSZ, - v9fs_extended(v9ses)); + v9ses->clnt = p9_client_create(dev_name, v9ses->options); if (IS_ERR(v9ses->clnt)) { retval = PTR_ERR(v9ses->clnt); @@ -236,6 +205,8 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, if (!v9ses->clnt->dotu) v9ses->flags &= ~V9FS_EXTENDED; + v9ses->maxdata = v9ses->clnt->msize; + /* for legacy mode, fall back to V9FS_ACCESS_ANY */ if (!v9fs_extended(v9ses) && ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) { diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index db4b4193f2e..7d3a1018db5 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -1,7 +1,7 @@ /* * V9FS definitions. * - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * * This program is free software; you can redistribute it and/or modify @@ -28,7 +28,6 @@ struct v9fs_session_info { /* options */ - unsigned int maxdata; unsigned char flags; /* session flags */ unsigned char nodev; /* set to 1 if no disable device mapping */ unsigned short debug; /* debug level */ @@ -38,10 +37,10 @@ struct v9fs_session_info { char *options; /* copy of mount options */ char *uname; /* user name to mount as */ char *aname; /* name of remote hierarchy being mounted */ + unsigned int maxdata; /* max data for client interface */ unsigned int dfltuid; /* default uid/muid for legacy support */ unsigned int dfltgid; /* default gid for legacy support */ u32 uid; /* if ACCESS_SINGLE, the uid that has access */ - struct p9_trans_module *trans; /* 9p transport */ struct p9_client *clnt; /* 9p client */ struct dentry *debugfs_dir; }; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index ba4b1caa9c4..a616fff8906 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -184,7 +184,7 @@ static const struct file_operations v9fs_cached_file_operations = { .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, - .mmap = generic_file_mmap, + .mmap = generic_file_readonly_mmap, }; const struct file_operations v9fs_file_operations = { @@ -194,5 +194,5 @@ const struct file_operations v9fs_file_operations = { .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, - .mmap = generic_file_mmap, + .mmap = generic_file_readonly_mmap, }; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 23581bcb599..6a28842052e 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -77,6 +77,8 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) res |= P9_DMSETUID; if ((mode & S_ISGID) == S_ISGID) res |= P9_DMSETGID; + if ((mode & S_ISVTX) == S_ISVTX) + res |= P9_DMSETVTX; if ((mode & P9_DMLINK)) res |= P9_DMLINK; } @@ -119,6 +121,9 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) if ((mode & P9_DMSETGID) == P9_DMSETGID) res |= S_ISGID; + + if ((mode & P9_DMSETVTX) == P9_DMSETVTX) + res |= S_ISVTX; } return res; @@ -568,7 +573,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, v9ses = v9fs_inode2v9ses(dir); dfid = v9fs_fid_lookup(dentry->d_parent); if (IS_ERR(dfid)) - return ERR_PTR(PTR_ERR(dfid)); + return ERR_CAST(dfid); name = (char *) dentry->d_name.name; fid = p9_client_walk(dfid, 1, &name, 1); diff --git a/fs/Kconfig b/fs/Kconfig index 987b5d7cb21..3bf6ace1720 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -463,40 +463,18 @@ config OCFS2_DEBUG_FS this option for debugging only as it is likely to decrease performance of the filesystem. -config MINIX_FS - tristate "Minix fs support" - help - Minix is a simple operating system used in many classes about OS's. - The minix file system (method to organize files on a hard disk - partition or a floppy disk) was the original file system for Linux, - but has been superseded by the second extended file system ext2fs. - You don't want to use the minix file system on your hard disk - because of certain built-in restrictions, but it is sometimes found - on older Linux floppy disks. This option will enlarge your kernel - by about 28 KB. If unsure, say N. - - To compile this file system support as a module, choose M here: the - module will be called minix. Note that the file system of your root - partition (the one containing the directory /) cannot be compiled as - a module. - -config ROMFS_FS - tristate "ROM file system support" - ---help--- - This is a very small read-only file system mainly intended for - initial ram disks of installation disks, but it could be used for - other read-only media as well. Read - <file:Documentation/filesystems/romfs.txt> for details. - - To compile this file system support as a module, choose M here: the - module will be called romfs. Note that the file system of your - root partition (the one containing the directory /) cannot be a - module. +endif # BLOCK - If you don't know whether you need it, then you don't need it: - answer N. +config DNOTIFY + bool "Dnotify support" + default y + help + Dnotify is a directory-based per-fd file change notification system + that uses signals to communicate events to user-space. There exist + superior alternatives, but some applications may still rely on + dnotify. -endif + If unsure, say Y. config INOTIFY bool "Inotify file change notification support" @@ -577,17 +555,6 @@ config QUOTACTL depends on XFS_QUOTA || QUOTA default y -config DNOTIFY - bool "Dnotify support" - default y - help - Dnotify is a directory-based per-fd file change notification system - that uses signals to communicate events to user-space. There exist - superior alternatives, but some applications may still rely on - dnotify. - - If unsure, say Y. - config AUTOFS_FS tristate "Kernel automounter support" help @@ -713,7 +680,7 @@ config UDF_NLS depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y) endmenu -endif +endif # BLOCK if BLOCK menu "DOS/FAT/NT Filesystems" @@ -896,7 +863,7 @@ config NTFS_RW It is perfectly safe to say N here. endmenu -endif +endif # BLOCK menu "Pseudo filesystems" @@ -1152,7 +1119,7 @@ config BEFS_DEBUG depends on BEFS_FS help If you say Y here, you can use the 'debug' mount option to enable - debugging output from the driver. + debugging output from the driver. config BFS_FS tristate "BFS file system support (EXPERIMENTAL)" @@ -1263,7 +1230,7 @@ config JFFS2_FS_XATTR Extended attributes are name:value pairs associated with inodes by the kernel or by users (see the attr(5) manual page, or visit <http://acl.bestbits.at/> for details). - + If unsure, say N. config JFFS2_FS_POSIX_ACL @@ -1274,10 +1241,10 @@ config JFFS2_FS_POSIX_ACL help Posix Access Control Lists (ACLs) support permissions for users and groups beyond the owner/group/world scheme. - + To learn more about Access Control Lists, visit the Posix ACLs for Linux website <http://acl.bestbits.at/>. - + If you don't know what Access Control Lists are, say N config JFFS2_FS_SECURITY @@ -1289,7 +1256,7 @@ config JFFS2_FS_SECURITY implemented by security modules like SELinux. This option enables an extended attribute handler for file security labels in the jffs2 filesystem. - + If you are not using a security module that requires using extended attributes for file security labels, say N. @@ -1417,6 +1384,24 @@ config VXFS_FS To compile this as a module, choose M here: the module will be called freevxfs. If unsure, say N. +config MINIX_FS + tristate "Minix file system support" + depends on BLOCK + help + Minix is a simple operating system used in many classes about OS's. + The minix file system (method to organize files on a hard disk + partition or a floppy disk) was the original file system for Linux, + but has been superseded by the second extended file system ext2fs. + You don't want to use the minix file system on your hard disk + because of certain built-in restrictions, but it is sometimes found + on older Linux floppy disks. This option will enlarge your kernel + by about 28 KB. If unsure, say N. + + To compile this file system support as a module, choose M here: the + module will be called minix. Note that the file system of your root + partition (the one containing the directory /) cannot be compiled as + a module. + config HPFS_FS tristate "OS/2 HPFS file system support" @@ -1434,7 +1419,6 @@ config HPFS_FS module will be called hpfs. If unsure, say N. - config QNX4FS_FS tristate "QNX4 file system support (read only)" depends on BLOCK @@ -1461,6 +1445,22 @@ config QNX4FS_RW It's currently broken, so for now: answer N. +config ROMFS_FS + tristate "ROM file system support" + depends on BLOCK + ---help--- + This is a very small read-only file system mainly intended for + initial ram disks of installation disks, but it could be used for + other read-only media as well. Read + <file:Documentation/filesystems/romfs.txt> for details. + + To compile this file system support as a module, choose M here: the + module will be called romfs. Note that the file system of your + root partition (the one containing the directory /) cannot be a + module. + + If you don't know whether you need it, then you don't need it: + answer N. config SYSV_FS @@ -1501,7 +1501,6 @@ config SYSV_FS If you haven't heard about all of this before, it's safe to say N. - config UFS_FS tristate "UFS file system support (read only)" depends on BLOCK @@ -1835,7 +1834,7 @@ config RPCSEC_GSS_SPKM3 If unsure, say N. config SMB_FS - tristate "SMB file system support (to mount Windows shares etc.)" + tristate "SMB file system support (OBSOLETE, please use CIFS)" depends on INET select NLS help @@ -1858,8 +1857,8 @@ config SMB_FS General information about how to connect Linux, Windows machines and Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. - To compile the SMB support as a module, choose M here: the module will - be called smbfs. Most people say N, however. + To compile the SMB support as a module, choose M here: + the module will be called smbfs. Most people say N, however. config SMB_NLS_DEFAULT bool "Use a default NLS" @@ -1891,7 +1890,7 @@ config SMB_NLS_REMOTE smbmount from samba 2.2.0 or later supports this. config CIFS - tristate "CIFS support (advanced network filesystem for Samba, Window and other CIFS compliant servers)" + tristate "CIFS support (advanced network filesystem, SMBFS successor)" depends on INET select NLS help @@ -1949,16 +1948,16 @@ config CIFS_WEAK_PW_HASH LANMAN based servers such as OS/2 and Windows 95, but such mounts may be less secure than mounts using NTLM or more recent security mechanisms if you are on a public network. Unless you - have a need to access old SMB servers (and are on a private + have a need to access old SMB servers (and are on a private network) you probably want to say N. Even if this support is enabled in the kernel build, LANMAN authentication will not be used automatically. At runtime LANMAN mounts are disabled but can be set to required (or optional) either in /proc/fs/cifs (see fs/cifs/README for more detail) or via an - option on the mount command. This support is disabled by + option on the mount command. This support is disabled by default in order to reduce the possibility of a downgrade attack. - + If unsure, say N. config CIFS_XATTR @@ -1999,7 +1998,7 @@ config CIFS_DEBUG2 messages in some error paths, slowing performance. This option can be turned off unless you are debugging cifs problems. If unsure, say N. - + config CIFS_EXPERIMENTAL bool "CIFS Experimental Features (EXPERIMENTAL)" depends on CIFS && EXPERIMENTAL @@ -2090,7 +2089,7 @@ config CODA_FS_OLD_API However this new API is not backward compatible with older clients. If you really need to run the old Coda userspace cache manager then say Y. - + For most cases you probably want to say N. config AFS_FS diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 232c6949368..d5bd497ab9c 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -174,7 +174,8 @@ extern void affs_put_inode(struct inode *inode); extern void affs_drop_inode(struct inode *inode); extern void affs_delete_inode(struct inode *inode); extern void affs_clear_inode(struct inode *inode); -extern void affs_read_inode(struct inode *inode); +extern struct inode *affs_iget(struct super_block *sb, + unsigned long ino); extern int affs_write_inode(struct inode *inode, int); extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type); diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index f4de4b98004..805573005de 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -170,9 +170,11 @@ affs_remove_link(struct dentry *dentry) if (!link_bh) goto done; - dir = iget(sb, be32_to_cpu(AFFS_TAIL(sb, link_bh)->parent)); - if (!dir) + dir = affs_iget(sb, be32_to_cpu(AFFS_TAIL(sb, link_bh)->parent)); + if (IS_ERR(dir)) { + retval = PTR_ERR(dir); goto done; + } affs_lock_dir(dir); affs_fix_dcache(dentry, link_ino); diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 4609a6c13fe..27fe6cbe43a 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -15,20 +15,25 @@ extern const struct inode_operations affs_symlink_inode_operations; extern struct timezone sys_tz; -void -affs_read_inode(struct inode *inode) +struct inode *affs_iget(struct super_block *sb, unsigned long ino) { - struct super_block *sb = inode->i_sb; struct affs_sb_info *sbi = AFFS_SB(sb); struct buffer_head *bh; struct affs_head *head; struct affs_tail *tail; + struct inode *inode; u32 block; u32 size; u32 prot; u16 id; - pr_debug("AFFS: read_inode(%lu)\n",inode->i_ino); + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + pr_debug("AFFS: affs_iget(%lu)\n", inode->i_ino); block = inode->i_ino; bh = affs_bread(sb, block); @@ -154,12 +159,13 @@ affs_read_inode(struct inode *inode) sys_tz.tz_minuteswest * 60; inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_atime.tv_nsec = 0; affs_brelse(bh); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); affs_brelse(bh); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } int diff --git a/fs/affs/namei.c b/fs/affs/namei.c index a42143ca016..2218f1ee71c 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -208,9 +208,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) affs_lock_dir(dir); bh = affs_find_entry(dir, dentry); affs_unlock_dir(dir); - if (IS_ERR(bh)) { - return ERR_PTR(PTR_ERR(bh)); - } + if (IS_ERR(bh)) + return ERR_CAST(bh); if (bh) { u32 ino = bh->b_blocknr; @@ -223,10 +222,9 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) ino = be32_to_cpu(AFFS_TAIL(sb, bh)->original); } affs_brelse(bh); - inode = iget(sb, ino); - if (!inode) { - return ERR_PTR(-EACCES); - } + inode = affs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_PTR(PTR_ERR(inode)); } dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; d_add(dentry, inode); diff --git a/fs/affs/super.c b/fs/affs/super.c index b53e5d0ec65..3c45d49c0d2 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -113,7 +113,6 @@ static void destroy_inodecache(void) static const struct super_operations affs_sops = { .alloc_inode = affs_alloc_inode, .destroy_inode = affs_destroy_inode, - .read_inode = affs_read_inode, .write_inode = affs_write_inode, .put_inode = affs_put_inode, .drop_inode = affs_drop_inode, @@ -271,6 +270,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) unsigned long mount_flags; int tmp_flags; /* fix remount prototype... */ u8 sig[4]; + int ret = -EINVAL; pr_debug("AFFS: read_super(%s)\n",data ? (const char *)data : "no options"); @@ -444,7 +444,12 @@ got_root: /* set up enough so that it can read an inode */ - root_inode = iget(sb, root_block); + root_inode = affs_iget(sb, root_block); + if (IS_ERR(root_inode)) { + ret = PTR_ERR(root_inode); + goto out_error_noinode; + } + sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) { printk(KERN_ERR "AFFS: Get root inode failed\n"); @@ -461,12 +466,13 @@ got_root: out_error: if (root_inode) iput(root_inode); +out_error_noinode: kfree(sbi->s_bitmap); affs_brelse(root_bh); kfree(sbi->s_prefix); kfree(sbi); sb->s_fs_info = NULL; - return -EINVAL; + return ret; } static int diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 0cc3597c119..b58af8f18bc 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -512,7 +512,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) { _leave(" = %ld [key]", PTR_ERR(key)); - return ERR_PTR(PTR_ERR(key)); + return ERR_CAST(key); } ret = afs_validate(vnode, key); @@ -540,7 +540,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, key_put(key); if (IS_ERR(inode)) { _leave(" = %ld", PTR_ERR(inode)); - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); } dentry->d_op = &afs_fs_dentry_operations; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 84750c8e9f9..08db82e1343 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -196,10 +196,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, /* failure */ bad_inode: - make_bad_inode(inode); - unlock_new_inode(inode); - iput(inode); - + iget_failed(inode); _leave(" = %d [bad]", ret); return ERR_PTR(ret); } diff --git a/fs/afs/security.c b/fs/afs/security.c index 566fe712c68..9446a1fd108 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -95,7 +95,7 @@ static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode, auth_inode = afs_iget(vnode->vfs_inode.i_sb, key, &vnode->status.parent, NULL, NULL); if (IS_ERR(auth_inode)) - return ERR_PTR(PTR_ERR(auth_inode)); + return ERR_CAST(auth_inode); } auth_vnode = AFS_FS_I(auth_inode); diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 8b4cca3c470..901a3e67ec4 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -150,6 +150,7 @@ extern const struct file_operations autofs_root_operations; int autofs_fill_super(struct super_block *, void *, int); void autofs_kill_sb(struct super_block *sb); +struct inode *autofs_iget(struct super_block *, unsigned long); /* Queue management functions */ diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 45f5992a095..708bdb89fea 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -52,10 +52,7 @@ out_kill_sb: kill_anon_super(sb); } -static void autofs_read_inode(struct inode *inode); - static const struct super_operations autofs_sops = { - .read_inode = autofs_read_inode, .statfs = simple_statfs, }; @@ -164,7 +161,9 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) s->s_time_gran = 1; sbi->sb = s; - root_inode = iget(s, AUTOFS_ROOT_INO); + root_inode = autofs_iget(s, AUTOFS_ROOT_INO); + if (IS_ERR(root_inode)) + goto fail_free; root = d_alloc_root(root_inode); pipe = NULL; @@ -230,11 +229,17 @@ fail_unlock: return -EINVAL; } -static void autofs_read_inode(struct inode *inode) +struct inode *autofs_iget(struct super_block *sb, unsigned long ino) { - ino_t ino = inode->i_ino; unsigned int n; - struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb); + struct autofs_sb_info *sbi = autofs_sbi(sb); + struct inode *inode; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; /* Initialize to the default case (stub directory) */ @@ -250,7 +255,7 @@ static void autofs_read_inode(struct inode *inode) inode->i_op = &autofs_root_inode_operations; inode->i_fop = &autofs_root_operations; inode->i_uid = inode->i_gid = 0; /* Changed in read_super */ - return; + goto done; } inode->i_uid = inode->i_sb->s_root->d_inode->i_uid; @@ -263,7 +268,7 @@ static void autofs_read_inode(struct inode *inode) n = ino - AUTOFS_FIRST_SYMLINK; if (n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) { printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino); - return; + goto done; } inode->i_op = &autofs_symlink_inode_operations; @@ -275,4 +280,8 @@ static void autofs_read_inode(struct inode *inode) inode->i_size = sl->len; inode->i_nlink = 1; } + +done: + unlock_new_inode(inode); + return inode; } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 5efff3c0d88..8aacade5695 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -114,8 +114,8 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str dentry->d_time = (unsigned long) ent; if (!dentry->d_inode) { - inode = iget(sb, ent->ino); - if (!inode) { + inode = autofs_iget(sb, ent->ino); + if (IS_ERR(inode)) { /* Failed, but leave pending for next time */ return 1; } @@ -274,6 +274,7 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c unsigned int n; int slsize; struct autofs_symlink *sl; + struct inode *inode; DPRINTK(("autofs_root_symlink: %s <- ", symname)); autofs_say(dentry->d_name.name,dentry->d_name.len); @@ -331,7 +332,12 @@ static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const c ent->dentry = NULL; /* We don't keep the dentry for symlinks */ autofs_hash_insert(dh,ent); - d_instantiate(dentry, iget(dir->i_sb,ent->ino)); + + inode = autofs_iget(dir->i_sb, ent->ino); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + d_instantiate(dentry, inode); unlock_kernel(); return 0; } @@ -428,6 +434,7 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb); struct autofs_dirhash *dh = &sbi->dirhash; struct autofs_dir_ent *ent; + struct inode *inode; ino_t ino; lock_kernel(); @@ -469,7 +476,14 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode) autofs_hash_insert(dh,ent); inc_nlink(dir); - d_instantiate(dentry, iget(dir->i_sb,ino)); + + inode = autofs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) { + drop_nlink(dir); + return PTR_ERR(inode); + } + + d_instantiate(dentry, inode); unlock_kernel(); return 0; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 521ff7caadb..f1c2ea8342f 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -359,3 +359,17 @@ int is_bad_inode(struct inode *inode) } EXPORT_SYMBOL(is_bad_inode); + +/** + * iget_failed - Mark an under-construction inode as dead and release it + * @inode: The inode to discard + * + * Mark an under-construction inode as dead and release it. + */ +void iget_failed(struct inode *inode) +{ + make_bad_inode(inode); + unlock_new_inode(inode); + iput(inode); +} +EXPORT_SYMBOL(iget_failed); diff --git a/fs/befs/btree.c b/fs/befs/btree.c index af5bb93276f..4202db7496c 100644 --- a/fs/befs/btree.c +++ b/fs/befs/btree.c @@ -232,7 +232,7 @@ befs_bt_read_node(struct super_block *sb, befs_data_stream * ds, * @key: Key string to lookup in btree * @value: Value stored with @key * - * On sucess, returns BEFS_OK and sets *@value to the value stored + * On success, returns BEFS_OK and sets *@value to the value stored * with @key (usually the disk block number of an inode). * * On failure, returns BEFS_ERR or BEFS_BT_NOT_FOUND. diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index aacb4da6298..e3287d0d1a5 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -236,7 +236,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds) as in the indirect region code). When/if blockno is found, if blockno is inside of a block - run as stored on disk, we offset the start and lenght members + run as stored on disk, we offset the start and length members of the block run, so that blockno is the start and len is still valid (the run ends in the same place). diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index b28a20e61b8..403fe661c14 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -35,7 +35,7 @@ static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); static int befs_readpage(struct file *file, struct page *page); static sector_t befs_bmap(struct address_space *mapping, sector_t block); static struct dentry *befs_lookup(struct inode *, struct dentry *, struct nameidata *); -static void befs_read_inode(struct inode *ino); +static struct inode *befs_iget(struct super_block *, unsigned long); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_destroy_inode(struct inode *inode); static int befs_init_inodecache(void); @@ -52,7 +52,6 @@ static int befs_statfs(struct dentry *, struct kstatfs *); static int parse_options(char *, befs_mount_options *); static const struct super_operations befs_sops = { - .read_inode = befs_read_inode, /* initialize & read inode */ .alloc_inode = befs_alloc_inode, /* allocate a new inode */ .destroy_inode = befs_destroy_inode, /* deallocate an inode */ .put_super = befs_put_super, /* uninit super */ @@ -198,9 +197,9 @@ befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) return ERR_PTR(-ENODATA); } - inode = iget(dir->i_sb, (ino_t) offset); - if (!inode) - return ERR_PTR(-EACCES); + inode = befs_iget(dir->i_sb, (ino_t) offset); + if (IS_ERR(inode)) + return ERR_CAST(inode); d_add(dentry, inode); @@ -296,17 +295,23 @@ static void init_once(struct kmem_cache *cachep, void *foo) inode_init_once(&bi->vfs_inode); } -static void -befs_read_inode(struct inode *inode) +static struct inode *befs_iget(struct super_block *sb, unsigned long ino) { struct buffer_head *bh = NULL; befs_inode *raw_inode = NULL; - struct super_block *sb = inode->i_sb; befs_sb_info *befs_sb = BEFS_SB(sb); befs_inode_info *befs_ino = NULL; + struct inode *inode; + long ret = -EIO; - befs_debug(sb, "---> befs_read_inode() " "inode = %lu", inode->i_ino); + befs_debug(sb, "---> befs_read_inode() " "inode = %lu", ino); + + inode = iget_locked(sb, ino); + if (IS_ERR(inode)) + return inode; + if (!(inode->i_state & I_NEW)) + return inode; befs_ino = BEFS_I(inode); @@ -402,15 +407,16 @@ befs_read_inode(struct inode *inode) brelse(bh); befs_debug(sb, "<--- befs_read_inode()"); - return; + unlock_new_inode(inode); + return inode; unacquire_bh: brelse(bh); unacquire_none: - make_bad_inode(inode); + iget_failed(inode); befs_debug(sb, "<--- befs_read_inode() - Bad inode"); - return; + return ERR_PTR(ret); } /* Initialize the inode cache. Called at fs setup. @@ -752,6 +758,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) befs_sb_info *befs_sb; befs_super_block *disk_sb; struct inode *root; + long ret = -EINVAL; const unsigned long sb_block = 0; const off_t x86_sb_off = 512; @@ -833,7 +840,11 @@ befs_fill_super(struct super_block *sb, void *data, int silent) /* Set real blocksize of fs */ sb_set_blocksize(sb, (ulong) befs_sb->block_size); sb->s_op = (struct super_operations *) &befs_sops; - root = iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); + root = befs_iget(sb, iaddr2blockno(sb, &(befs_sb->root_dir))); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto unacquire_priv_sbp; + } sb->s_root = d_alloc_root(root); if (!sb->s_root) { iput(root); @@ -868,7 +879,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) unacquire_none: sb->s_fs_info = NULL; - return -EINVAL; + return ret; } static int diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index ac7a8b1d6c3..71faf4d2390 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -44,6 +44,8 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode) #define printf(format, args...) \ printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args) +/* inode.c */ +extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino); /* file.c */ extern const struct inode_operations bfs_file_inops; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 1fd056d0fc3..034950cb3cb 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -148,10 +148,10 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry, if (bh) { unsigned long ino = (unsigned long)le16_to_cpu(de->ino); brelse(bh); - inode = iget(dir->i_sb, ino); - if (!inode) { + inode = bfs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(inode); } } unlock_kernel(); diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index a64a71d444f..8db623838b5 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -32,17 +32,22 @@ MODULE_LICENSE("GPL"); void dump_imap(const char *prefix, struct super_block *s); -static void bfs_read_inode(struct inode *inode) +struct inode *bfs_iget(struct super_block *sb, unsigned long ino) { - unsigned long ino = inode->i_ino; struct bfs_inode *di; + struct inode *inode; struct buffer_head *bh; int block, off; + inode = iget_locked(sb, ino); + if (IS_ERR(inode)) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); - make_bad_inode(inode); - return; + goto error; } block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; @@ -50,8 +55,7 @@ static void bfs_read_inode(struct inode *inode) if (!bh) { printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); - make_bad_inode(inode); - return; + goto error; } off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; @@ -85,6 +89,12 @@ static void bfs_read_inode(struct inode *inode) inode->i_ctime.tv_nsec = 0; brelse(bh); + unlock_new_inode(inode); + return inode; + +error: + iget_failed(inode); + return ERR_PTR(-EIO); } static int bfs_write_inode(struct inode *inode, int unused) @@ -276,7 +286,6 @@ static void destroy_inodecache(void) static const struct super_operations bfs_sops = { .alloc_inode = bfs_alloc_inode, .destroy_inode = bfs_destroy_inode, - .read_inode = bfs_read_inode, .write_inode = bfs_write_inode, .delete_inode = bfs_delete_inode, .put_super = bfs_put_super, @@ -312,6 +321,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) struct inode *inode; unsigned i, imap_len; struct bfs_sb_info *info; + long ret = -EINVAL; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) @@ -346,14 +356,16 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) set_bit(i, info->si_imap); s->s_op = &bfs_sops; - inode = iget(s, BFS_ROOT_INO); - if (!inode) { + inode = bfs_iget(s, BFS_ROOT_INO); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); kfree(info->si_imap); goto out; } s->s_root = d_alloc_root(inode); if (!s->s_root) { iput(inode); + ret = -ENOMEM; kfree(info->si_imap); goto out; } @@ -404,7 +416,7 @@ out: brelse(bh); kfree(info); s->s_fs_info = NULL; - return -EINVAL; + return ret; } static int bfs_get_sb(struct file_system_type *fs_type, diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 7596e1e94cd..7f65e71bf85 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -115,7 +115,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u current->flags |= PF_DUMPCORE; strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm)); #ifndef __sparc__ - dump.u_ar0 = (void *)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump))); + dump.u_ar0 = offsetof(struct user, regs); #endif dump.signal = signr; dump_thread(regs, &dump); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 18ed6dd906c..111771d38e6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -117,7 +117,7 @@ static int padzero(unsigned long elf_bss) return 0; } -/* Let's use some macros to make this stack manipulation a litle clearer */ +/* Let's use some macros to make this stack manipulation a little clearer */ #ifdef CONFIG_STACK_GROWSUP #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items)) #define STACK_ROUND(sp, items) \ @@ -1077,7 +1077,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->start_stack = bprm->p; #ifdef arch_randomize_brk - if (current->flags & PF_RANDOMIZE) + if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); #endif diff --git a/fs/block_dev.c b/fs/block_dev.c index e48a630ae26..e63067d25cd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -534,7 +534,6 @@ void __init bdev_cache_init(void) if (err) panic("Cannot register bdev pseudo-fs"); bd_mnt = kern_mount(&bd_type); - err = PTR_ERR(bd_mnt); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ diff --git a/fs/buffer.c b/fs/buffer.c index 456c9ab7705..826baf4f04b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1798,7 +1798,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) start = max(from, block_start); size = min(to, block_end) - start; - zero_user_page(page, start, size, KM_USER0); + zero_user(page, start, size); set_buffer_uptodate(bh); } @@ -1861,19 +1861,10 @@ static int __block_prepare_write(struct inode *inode, struct page *page, mark_buffer_dirty(bh); continue; } - if (block_end > to || block_start < from) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_end > to) - memset(kaddr+to, 0, - block_end-to); - if (block_start < from) - memset(kaddr+block_start, - 0, from-block_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } + if (block_end > to || block_start < from) + zero_user_segments(page, + to, block_end, + block_start, from); continue; } } @@ -2104,8 +2095,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block) SetPageError(page); } if (!buffer_mapped(bh)) { - zero_user_page(page, i * blocksize, blocksize, - KM_USER0); + zero_user(page, i * blocksize, blocksize); if (!err) set_buffer_uptodate(bh); continue; @@ -2218,7 +2208,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping, &page, &fsdata); if (err) goto out; - zero_user_page(page, zerofrom, len, KM_USER0); + zero_user(page, zerofrom, len); err = pagecache_write_end(file, mapping, curpos, len, len, page, fsdata); if (err < 0) @@ -2245,7 +2235,7 @@ int cont_expand_zero(struct file *file, struct address_space *mapping, &page, &fsdata); if (err) goto out; - zero_user_page(page, zerofrom, len, KM_USER0); + zero_user(page, zerofrom, len); err = pagecache_write_end(file, mapping, curpos, len, len, page, fsdata); if (err < 0) @@ -2422,7 +2412,6 @@ int nobh_write_begin(struct file *file, struct address_space *mapping, unsigned block_in_page; unsigned block_start, block_end; sector_t block_in_file; - char *kaddr; int nr_reads = 0; int ret = 0; int is_mapped_to_disk = 1; @@ -2493,13 +2482,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping, continue; } if (buffer_new(bh) || !buffer_mapped(bh)) { - kaddr = kmap_atomic(page, KM_USER0); - if (block_start < from) - memset(kaddr+block_start, 0, from-block_start); - if (block_end > to) - memset(kaddr + to, 0, block_end - to); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_segments(page, block_start, from, + to, block_end); continue; } if (buffer_uptodate(bh)) @@ -2636,7 +2620,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); out: ret = mpage_writepage(page, get_block, wbc); if (ret == -EAGAIN) @@ -2709,7 +2693,7 @@ has_buffers: if (page_has_buffers(page)) goto has_buffers; } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); set_page_dirty(page); err = 0; @@ -2785,7 +2769,7 @@ int block_truncate_page(struct address_space *mapping, goto unlock; } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); mark_buffer_dirty(bh); err = 0; @@ -2831,7 +2815,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * the page size, the remaining memory is zeroed when mapped, and * writes to that region are not written out to the file." */ - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); return __block_write_full_page(inode, page, get_block, wbc); } @@ -3169,7 +3153,7 @@ static void recalc_bh_state(void) struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) { - struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, + struct buffer_head *ret = kmem_cache_alloc(bh_cachep, set_migrateflags(gfp_flags, __GFP_RECLAIMABLE)); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); @@ -3257,12 +3241,24 @@ int bh_submit_read(struct buffer_head *bh) } EXPORT_SYMBOL(bh_submit_read); +static void +init_buffer_head(struct kmem_cache *cachep, void *data) +{ + struct buffer_head *bh = data; + + memset(bh, 0, sizeof(*bh)); + INIT_LIST_HEAD(&bh->b_assoc_buffers); +} + void __init buffer_init(void) { int nrpages; - bh_cachep = KMEM_CACHE(buffer_head, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); + bh_cachep = kmem_cache_create("buffer_head", + sizeof(struct buffer_head), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| + SLAB_MEM_SPREAD), + init_buffer_head); /* * Limit the bh occupancy to 10% of ZONE_NORMAL diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e9f4ec70109..fcc43422769 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -147,10 +147,11 @@ cifs_read_super(struct super_block *sb, void *data, #endif sb->s_blocksize = CIFS_MAX_MSGSIZE; sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ - inode = iget(sb, ROOT_I); + inode = cifs_iget(sb, ROOT_I); - if (!inode) { - rc = -ENOMEM; + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + inode = NULL; goto out_no_root; } @@ -520,7 +521,6 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data) } static const struct super_operations cifs_super_ops = { - .read_inode = cifs_read_inode, .put_super = cifs_put_super, .statfs = cifs_statfs, .alloc_inode = cifs_alloc_inode, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 195b14de556..68978306c3c 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -44,6 +44,7 @@ extern void cifs_read_inode(struct inode *); /* Functions related to inodes */ extern const struct inode_operations cifs_dir_inode_ops; +extern struct inode *cifs_iget(struct super_block *, unsigned long); extern int cifs_create(struct inode *, struct dentry *, int, struct nameidata *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index d9567ba2960..b1a4a65eaa0 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -586,10 +586,18 @@ static const struct inode_operations cifs_ipc_inode_ops = { }; /* gets root inode */ -void cifs_read_inode(struct inode *inode) +struct inode *cifs_iget(struct super_block *sb, unsigned long ino) { - int xid, rc; + int xid; struct cifs_sb_info *cifs_sb; + struct inode *inode; + long rc; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; cifs_sb = CIFS_SB(inode->i_sb); xid = GetXid(); @@ -606,10 +614,18 @@ void cifs_read_inode(struct inode *inode) inode->i_fop = &simple_dir_operations; inode->i_uid = cifs_sb->mnt_uid; inode->i_gid = cifs_sb->mnt_gid; + _FreeXid(xid); + iget_failed(inode); + return ERR_PTR(rc); } - /* can not call macro FreeXid here since in a void func */ + unlock_new_inode(inode); + + /* can not call macro FreeXid here since in a void func + * TODO: This is no longer true + */ _FreeXid(xid); + return inode; } int cifs_unlink(struct inode *inode, struct dentry *direntry) @@ -1386,7 +1402,7 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from) if (!page) return -ENOMEM; - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); unlock_page(page); page_cache_release(page); return rc; diff --git a/fs/compat.c b/fs/compat.c index 5216c3fd751..ee80ff341d3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -2083,51 +2083,6 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) #ifdef CONFIG_EPOLL -#ifdef CONFIG_HAS_COMPAT_EPOLL_EVENT -asmlinkage long compat_sys_epoll_ctl(int epfd, int op, int fd, - struct compat_epoll_event __user *event) -{ - long err = 0; - struct compat_epoll_event user; - struct epoll_event __user *kernel = NULL; - - if (event) { - if (copy_from_user(&user, event, sizeof(user))) - return -EFAULT; - kernel = compat_alloc_user_space(sizeof(struct epoll_event)); - err |= __put_user(user.events, &kernel->events); - err |= __put_user(user.data, &kernel->data); - } - - return err ? err : sys_epoll_ctl(epfd, op, fd, kernel); -} - - -asmlinkage long compat_sys_epoll_wait(int epfd, - struct compat_epoll_event __user *events, - int maxevents, int timeout) -{ - long i, ret, err = 0; - struct epoll_event __user *kbuf; - struct epoll_event ev; - - if ((maxevents <= 0) || - (maxevents > (INT_MAX / sizeof(struct epoll_event)))) - return -EINVAL; - kbuf = compat_alloc_user_space(sizeof(struct epoll_event) * maxevents); - ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout); - for (i = 0; i < ret; i++) { - err |= __get_user(ev.events, &kbuf[i].events); - err |= __get_user(ev.data, &kbuf[i].data); - err |= __put_user(ev.events, &events->events); - err |= __put_user_unaligned(ev.data, &events->data); - events++; - } - - return err ? -EFAULT: ret; -} -#endif /* CONFIG_HAS_COMPAT_EPOLL_EVENT */ - #ifdef TIF_RESTORE_SIGMASK asmlinkage long compat_sys_epoll_pwait(int epfd, struct compat_epoll_event __user *events, @@ -2153,11 +2108,7 @@ asmlinkage long compat_sys_epoll_pwait(int epfd, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } -#ifdef CONFIG_HAS_COMPAT_EPOLL_EVENT - err = compat_sys_epoll_wait(epfd, events, maxevents, timeout); -#else err = sys_epoll_wait(epfd, events, maxevents, timeout); -#endif /* * If we changed the signal mask, we need to restore the original one. @@ -2206,19 +2157,41 @@ asmlinkage long compat_sys_signalfd(int ufd, #ifdef CONFIG_TIMERFD -asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, - const struct compat_itimerspec __user *utmr) +asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, + const struct compat_itimerspec __user *utmr, + struct compat_itimerspec __user *otmr) { + int error; struct itimerspec t; struct itimerspec __user *ut; if (get_compat_itimerspec(&t, utmr)) return -EFAULT; - ut = compat_alloc_user_space(sizeof(*ut)); - if (copy_to_user(ut, &t, sizeof(t))) + ut = compat_alloc_user_space(2 * sizeof(struct itimerspec)); + if (copy_to_user(&ut[0], &t, sizeof(t))) return -EFAULT; + error = sys_timerfd_settime(ufd, flags, &ut[0], &ut[1]); + if (!error && otmr) + error = (copy_from_user(&t, &ut[1], sizeof(struct itimerspec)) || + put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0; + + return error; +} + +asmlinkage long compat_sys_timerfd_gettime(int ufd, + struct compat_itimerspec __user *otmr) +{ + int error; + struct itimerspec t; + struct itimerspec __user *ut; - return sys_timerfd(ufd, clockid, flags, ut); + ut = compat_alloc_user_space(sizeof(struct itimerspec)); + error = sys_timerfd_gettime(ufd, ut); + if (!error) + error = (copy_from_user(&t, ut, sizeof(struct itimerspec)) || + put_compat_itimerspec(otmr, &t)) ? -EFAULT: 0; + + return error; } #endif /* CONFIG_TIMERFD */ diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index ffdc022cae6..614bd75b5a4 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2986,7 +2986,7 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, } do_ioctl: - error = vfs_ioctl(filp, fd, cmd, arg); + error = do_vfs_ioctl(filp, fd, cmd, arg); out_fput: fput_light(filp, fput_needed); out: diff --git a/fs/dcache.c b/fs/dcache.c index d9ca1e5ceb9..44f6cf23b70 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -89,7 +89,7 @@ static void d_free(struct dentry *dentry) if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); /* if dentry was never inserted into hash, immediate free is OK */ - if (dentry->d_hash.pprev == NULL) + if (hlist_unhashed(&dentry->d_hash)) __d_free(dentry); else call_rcu(&dentry->d_u.d_rcu, d_callback); @@ -1408,9 +1408,6 @@ void d_delete(struct dentry * dentry) if (atomic_read(&dentry->d_count) == 1) { dentry_iput(dentry); fsnotify_nameremove(dentry, isdir); - - /* remove this and other inotify debug checks after 2.6.18 */ - dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; return; } diff --git a/fs/direct-io.c b/fs/direct-io.c index acf0da1bd25..9e81addbd6e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -878,8 +878,8 @@ do_holes: page_cache_release(page); goto out; } - zero_user_page(page, block_in_page << blkbits, - 1 << blkbits, KM_USER0); + zero_user(page, block_in_page << blkbits, + 1 << blkbits); dio->block_in_file++; block_in_page++; goto next_block; diff --git a/fs/dquot.c b/fs/dquot.c index cee7c6f428f..def4e969df7 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -696,9 +696,8 @@ static int dqinit_needed(struct inode *inode, int type) /* This routine is guarded by dqonoff_mutex mutex */ static void add_dquot_ref(struct super_block *sb, int type) { - struct inode *inode; + struct inode *inode, *old_inode = NULL; -restart: spin_lock(&inode_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { if (!atomic_read(&inode->i_writecount)) @@ -711,12 +710,18 @@ restart: __iget(inode); spin_unlock(&inode_lock); + iput(old_inode); sb->dq_op->initialize(inode, type); - iput(inode); - /* As we may have blocked we had better restart... */ - goto restart; + /* We hold a reference to 'inode' so it couldn't have been + * removed from s_inodes list while we dropped the inode_lock. + * We cannot iput the inode now as we can be holding the last + * reference and we cannot iput it under inode_lock. So we + * keep the reference and iput it later. */ + old_inode = inode; + spin_lock(&inode_lock); } spin_unlock(&inode_lock); + iput(old_inode); } /* Return 0 if dqput() won't block (note that 1 doesn't necessarily mean blocking) */ diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f8ef0af919e..a066e109ad9 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -355,8 +355,11 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, } /* Consider doing this once, when the file is opened */ mutex_lock(&crypt_stat->cs_tfm_mutex); - rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key, - crypt_stat->key_size); + if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { + rc = crypto_blkcipher_setkey(crypt_stat->tfm, crypt_stat->key, + crypt_stat->key_size); + crypt_stat->flags |= ECRYPTFS_KEY_SET; + } if (rc) { ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", rc); @@ -376,11 +379,10 @@ out: * * Convert an eCryptfs page index into a lower byte offset */ -void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, - struct ecryptfs_crypt_stat *crypt_stat) +static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, + struct ecryptfs_crypt_stat *crypt_stat) { - (*offset) = ((crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front) + (*offset) = (crypt_stat->num_header_bytes_at_front + (crypt_stat->extent_size * extent_num)); } @@ -842,15 +844,13 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat) set_extent_mask_and_shift(crypt_stat); crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES; if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - crypt_stat->num_header_extents_at_front = 0; + crypt_stat->num_header_bytes_at_front = 0; else { if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) - crypt_stat->num_header_extents_at_front = - (ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE - / crypt_stat->extent_size); + crypt_stat->num_header_bytes_at_front = + ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; else - crypt_stat->num_header_extents_at_front = - (PAGE_CACHE_SIZE / crypt_stat->extent_size); + crypt_stat->num_header_bytes_at_front = PAGE_CACHE_SIZE; } } @@ -1128,7 +1128,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, struct ecryptfs_cipher_code_str_map_elem { char cipher_str[16]; - u16 cipher_code; + u8 cipher_code; }; /* Add support for additional ciphers by adding elements here. The @@ -1152,10 +1152,10 @@ ecryptfs_cipher_code_str_map[] = { * * Returns zero on no match, or the cipher code on match */ -u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) +u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) { int i; - u16 code = 0; + u8 code = 0; struct ecryptfs_cipher_code_str_map_elem *map = ecryptfs_cipher_code_str_map; @@ -1187,7 +1187,7 @@ u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat) * * Returns zero on success */ -int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code) +int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code) { int rc = 0; int i; @@ -1236,7 +1236,8 @@ ecryptfs_write_header_metadata(char *virt, header_extent_size = (u32)crypt_stat->extent_size; num_header_extents_at_front = - (u16)crypt_stat->num_header_extents_at_front; + (u16)(crypt_stat->num_header_bytes_at_front + / crypt_stat->extent_size); header_extent_size = cpu_to_be32(header_extent_size); memcpy(virt, &header_extent_size, 4); virt += 4; @@ -1311,40 +1312,16 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t *size, static int ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat, struct dentry *ecryptfs_dentry, - char *page_virt) + char *virt) { - int current_header_page; - int header_pages; int rc; - rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt, - 0, PAGE_CACHE_SIZE); - if (rc) { + rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt, + 0, crypt_stat->num_header_bytes_at_front); + if (rc) printk(KERN_ERR "%s: Error attempting to write header " "information to lower file; rc = [%d]\n", __FUNCTION__, rc); - goto out; - } - header_pages = ((crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front) - / PAGE_CACHE_SIZE); - memset(page_virt, 0, PAGE_CACHE_SIZE); - current_header_page = 1; - while (current_header_page < header_pages) { - loff_t offset; - - offset = (((loff_t)current_header_page) << PAGE_CACHE_SHIFT); - if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, - page_virt, offset, - PAGE_CACHE_SIZE))) { - printk(KERN_ERR "%s: Error attempting to write header " - "information to lower file; rc = [%d]\n", - __FUNCTION__, rc); - goto out; - } - current_header_page++; - } -out: return rc; } @@ -1370,15 +1347,13 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry, * retrieved via a prompt. Exactly what happens at this point should * be policy-dependent. * - * TODO: Support header information spanning multiple pages - * * Returns zero on success; non-zero on error */ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) { struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; - char *page_virt; + char *virt; size_t size = 0; int rc = 0; @@ -1389,40 +1364,39 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) goto out; } } else { + printk(KERN_WARNING "%s: Encrypted flag not set\n", + __FUNCTION__); rc = -EINVAL; - ecryptfs_printk(KERN_WARNING, - "Called with crypt_stat->encrypted == 0\n"); goto out; } /* Released in this function */ - page_virt = kmem_cache_zalloc(ecryptfs_header_cache_0, GFP_USER); - if (!page_virt) { - ecryptfs_printk(KERN_ERR, "Out of memory\n"); + virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); + if (!virt) { + printk(KERN_ERR "%s: Out of memory\n", __FUNCTION__); rc = -ENOMEM; goto out; } - rc = ecryptfs_write_headers_virt(page_virt, &size, crypt_stat, - ecryptfs_dentry); + rc = ecryptfs_write_headers_virt(virt, &size, crypt_stat, + ecryptfs_dentry); if (unlikely(rc)) { - ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n"); - memset(page_virt, 0, PAGE_CACHE_SIZE); + printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", + __FUNCTION__, rc); goto out_free; } if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry, - crypt_stat, page_virt, - size); + crypt_stat, virt, size); else rc = ecryptfs_write_metadata_to_contents(crypt_stat, - ecryptfs_dentry, - page_virt); + ecryptfs_dentry, virt); if (rc) { - printk(KERN_ERR "Error writing metadata out to lower file; " - "rc = [%d]\n", rc); + printk(KERN_ERR "%s: Error writing metadata out to lower file; " + "rc = [%d]\n", __FUNCTION__, rc); goto out_free; } out_free: - kmem_cache_free(ecryptfs_header_cache_0, page_virt); + memset(virt, 0, crypt_stat->num_header_bytes_at_front); + kfree(virt); out: return rc; } @@ -1442,16 +1416,16 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, virt += sizeof(u32); memcpy(&num_header_extents_at_front, virt, sizeof(u16)); num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front); - crypt_stat->num_header_extents_at_front = - (int)num_header_extents_at_front; + crypt_stat->num_header_bytes_at_front = + (((size_t)num_header_extents_at_front + * (size_t)header_extent_size)); (*bytes_read) = (sizeof(u32) + sizeof(u16)); if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) - && ((crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front) + && (crypt_stat->num_header_bytes_at_front < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { rc = -EINVAL; - printk(KERN_WARNING "Invalid number of header extents: [%zd]\n", - crypt_stat->num_header_extents_at_front); + printk(KERN_WARNING "Invalid header size: [%zd]\n", + crypt_stat->num_header_bytes_at_front); } return rc; } @@ -1466,7 +1440,8 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat, */ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat) { - crypt_stat->num_header_extents_at_front = 2; + crypt_stat->num_header_bytes_at_front = + ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; } /** @@ -1552,9 +1527,10 @@ int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode) size = ecryptfs_getxattr_lower(lower_dentry, ECRYPTFS_XATTR_NAME, page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE); if (size < 0) { - printk(KERN_ERR "Error attempting to read the [%s] " - "xattr from the lower file; return value = [%zd]\n", - ECRYPTFS_XATTR_NAME, size); + if (unlikely(ecryptfs_verbosity > 0)) + printk(KERN_INFO "Error attempting to read the [%s] " + "xattr from the lower file; return value = " + "[%zd]\n", ECRYPTFS_XATTR_NAME, size); rc = -EINVAL; goto out; } @@ -1802,7 +1778,7 @@ out: } struct kmem_cache *ecryptfs_key_tfm_cache; -struct list_head key_tfm_list; +static struct list_head key_tfm_list; struct mutex key_tfm_list_mutex; int ecryptfs_init_crypto(void) @@ -1812,6 +1788,11 @@ int ecryptfs_init_crypto(void) return 0; } +/** + * ecryptfs_destroy_crypto - free all cached key_tfms on key_tfm_list + * + * Called only at module unload time + */ int ecryptfs_destroy_crypto(void) { struct ecryptfs_key_tfm *key_tfm, *key_tfm_tmp; @@ -1835,6 +1816,8 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, struct ecryptfs_key_tfm *tmp_tfm; int rc = 0; + BUG_ON(!mutex_is_locked(&key_tfm_list_mutex)); + tmp_tfm = kmem_cache_alloc(ecryptfs_key_tfm_cache, GFP_KERNEL); if (key_tfm != NULL) (*key_tfm) = tmp_tfm; @@ -1861,13 +1844,50 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, (*key_tfm) = NULL; goto out; } - mutex_lock(&key_tfm_list_mutex); list_add(&tmp_tfm->key_tfm_list, &key_tfm_list); - mutex_unlock(&key_tfm_list_mutex); out: return rc; } +/** + * ecryptfs_tfm_exists - Search for existing tfm for cipher_name. + * @cipher_name: the name of the cipher to search for + * @key_tfm: set to corresponding tfm if found + * + * Searches for cached key_tfm matching @cipher_name + * Must be called with &key_tfm_list_mutex held + * Returns 1 if found, with @key_tfm set + * Returns 0 if not found, with @key_tfm set to NULL + */ +int ecryptfs_tfm_exists(char *cipher_name, struct ecryptfs_key_tfm **key_tfm) +{ + struct ecryptfs_key_tfm *tmp_key_tfm; + + BUG_ON(!mutex_is_locked(&key_tfm_list_mutex)); + + list_for_each_entry(tmp_key_tfm, &key_tfm_list, key_tfm_list) { + if (strcmp(tmp_key_tfm->cipher_name, cipher_name) == 0) { + if (key_tfm) + (*key_tfm) = tmp_key_tfm; + return 1; + } + } + if (key_tfm) + (*key_tfm) = NULL; + return 0; +} + +/** + * ecryptfs_get_tfm_and_mutex_for_cipher_name + * + * @tfm: set to cached tfm found, or new tfm created + * @tfm_mutex: set to mutex for cached tfm found, or new tfm created + * @cipher_name: the name of the cipher to search for and/or add + * + * Sets pointers to @tfm & @tfm_mutex matching @cipher_name. + * Searches for cached item first, and creates new if not found. + * Returns 0 on success, non-zero if adding new cipher failed + */ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, struct mutex **tfm_mutex, char *cipher_name) @@ -1877,22 +1897,17 @@ int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, (*tfm) = NULL; (*tfm_mutex) = NULL; + mutex_lock(&key_tfm_list_mutex); - list_for_each_entry(key_tfm, &key_tfm_list, key_tfm_list) { - if (strcmp(key_tfm->cipher_name, cipher_name) == 0) { - (*tfm) = key_tfm->key_tfm; - (*tfm_mutex) = &key_tfm->key_tfm_mutex; - mutex_unlock(&key_tfm_list_mutex); + if (!ecryptfs_tfm_exists(cipher_name, &key_tfm)) { + rc = ecryptfs_add_new_key_tfm(&key_tfm, cipher_name, 0); + if (rc) { + printk(KERN_ERR "Error adding new key_tfm to list; " + "rc = [%d]\n", rc); goto out; } } mutex_unlock(&key_tfm_list_mutex); - rc = ecryptfs_add_new_key_tfm(&key_tfm, cipher_name, 0); - if (rc) { - printk(KERN_ERR "Error adding new key_tfm to list; rc = [%d]\n", - rc); - goto out; - } (*tfm) = key_tfm->key_tfm; (*tfm_mutex) = &key_tfm->key_tfm_mutex; out: diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index ce7a5d4aec3..5007f788da0 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -234,10 +234,11 @@ struct ecryptfs_crypt_stat { #define ECRYPTFS_KEY_VALID 0x00000080 #define ECRYPTFS_METADATA_IN_XATTR 0x00000100 #define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200 +#define ECRYPTFS_KEY_SET 0x00000400 u32 flags; unsigned int file_version; size_t iv_bytes; - size_t num_header_extents_at_front; + size_t num_header_bytes_at_front; size_t extent_size; /* Data extent size; default is 4096 */ size_t key_size; size_t extent_shift; @@ -322,7 +323,6 @@ struct ecryptfs_key_tfm { unsigned char cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; }; -extern struct list_head key_tfm_list; extern struct mutex key_tfm_list_mutex; /** @@ -521,11 +521,9 @@ extern struct kmem_cache *ecryptfs_file_info_cache; extern struct kmem_cache *ecryptfs_dentry_info_cache; extern struct kmem_cache *ecryptfs_inode_info_cache; extern struct kmem_cache *ecryptfs_sb_info_cache; -extern struct kmem_cache *ecryptfs_header_cache_0; extern struct kmem_cache *ecryptfs_header_cache_1; extern struct kmem_cache *ecryptfs_header_cache_2; extern struct kmem_cache *ecryptfs_xattr_cache; -extern struct kmem_cache *ecryptfs_lower_page_cache; extern struct kmem_cache *ecryptfs_key_record_cache; extern struct kmem_cache *ecryptfs_key_sig_cache; extern struct kmem_cache *ecryptfs_global_auth_tok_cache; @@ -562,8 +560,8 @@ int ecryptfs_read_and_validate_header_region(char *data, struct inode *ecryptfs_inode); int ecryptfs_read_and_validate_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry); -u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); -int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code); +u8 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat); +int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code); void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); int ecryptfs_generate_key_packet_set(char *dest_base, struct ecryptfs_crypt_stat *crypt_stat, @@ -576,8 +574,6 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); int ecryptfs_inode_set(struct inode *inode, void *lower_inode); void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); -ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, - size_t size); ssize_t ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, void *value, size_t size); @@ -623,6 +619,7 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name, size_t key_size); int ecryptfs_init_crypto(void); int ecryptfs_destroy_crypto(void); +int ecryptfs_tfm_exists(char *cipher_name, struct ecryptfs_key_tfm **key_tfm); int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm, struct mutex **tfm_mutex, char *cipher_name); @@ -631,8 +628,6 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, char *sig); int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros); -void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, - struct ecryptfs_crypt_stat *crypt_stat); int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data, loff_t offset, size_t size); int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode, @@ -646,8 +641,6 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, pgoff_t page_index, size_t offset_in_page, size_t size, struct inode *ecryptfs_inode); -int ecryptfs_read(char *data, loff_t offset, size_t size, - struct file *ecryptfs_file); struct page *ecryptfs_get_locked_page(struct file *file, loff_t index); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index c98c4690a77..2b8f5ed4ade 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -209,9 +209,10 @@ static int ecryptfs_open(struct inode *inode, struct file *file) if (!(mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { rc = -EIO; - printk(KERN_WARNING "Attempt to read file that " + printk(KERN_WARNING "Either the lower file " "is not in a valid eCryptfs format, " - "and plaintext passthrough mode is not " + "or the key could not be retrieved. " + "Plaintext passthrough mode is not " "enabled; returning -EIO\n"); mutex_unlock(&crypt_stat->cs_mutex); goto out_free; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5a719180983..edd1e44e9d4 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -365,8 +365,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, dentry->d_sb)->mount_crypt_stat; if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) { if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) - file_size = ((crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front) + file_size = (crypt_stat->num_header_bytes_at_front + i_size_read(lower_dentry->d_inode)); else file_size = i_size_read(lower_dentry->d_inode); @@ -685,7 +684,7 @@ ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) * @crypt_stat: Crypt_stat associated with file * @upper_size: Size of the upper file * - * Calculate the requried size of the lower file based on the + * Calculate the required size of the lower file based on the * specified size of the upper file. This calculation is based on the * number of headers in the underlying file and the extent size. * @@ -697,8 +696,7 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat, { loff_t lower_size; - lower_size = (crypt_stat->extent_size - * crypt_stat->num_header_extents_at_front); + lower_size = crypt_stat->num_header_bytes_at_front; if (upper_size != 0) { loff_t num_extents; @@ -875,11 +873,11 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) if (!(mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { rc = -EIO; - printk(KERN_WARNING "Attempt to read file that " + printk(KERN_WARNING "Either the lower file " "is not in a valid eCryptfs format, " - "and plaintext passthrough mode is not " + "or the key could not be retrieved. " + "Plaintext passthrough mode is not " "enabled; returning -EIO\n"); - mutex_unlock(&crypt_stat->cs_mutex); goto out; } @@ -954,7 +952,7 @@ out: return rc; } -ssize_t +static ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index f458c1f3556..682b1b2482c 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -189,7 +189,7 @@ out: } static int -parse_tag_65_packet(struct ecryptfs_session_key *session_key, u16 *cipher_code, +parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code, struct ecryptfs_message *msg) { size_t i = 0; @@ -275,7 +275,7 @@ out: static int -write_tag_66_packet(char *signature, size_t cipher_code, +write_tag_66_packet(char *signature, u8 cipher_code, struct ecryptfs_crypt_stat *crypt_stat, char **packet, size_t *packet_len) { @@ -428,7 +428,7 @@ static int decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_crypt_stat *crypt_stat) { - u16 cipher_code = 0; + u8 cipher_code = 0; struct ecryptfs_msg_ctx *msg_ctx; struct ecryptfs_message *msg = NULL; char *auth_tok_sig; @@ -1537,7 +1537,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, struct scatterlist dst_sg; struct scatterlist src_sg; struct mutex *tfm_mutex = NULL; - size_t cipher_code; + u8 cipher_code; size_t packet_size_length; size_t max_packet_size; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 0249aa4ae18..778c420e4ca 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...) * * Returns zero on success; non-zero otherwise */ -int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) +static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) { struct ecryptfs_inode_info *inode_info = ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); @@ -226,17 +226,15 @@ out: return rc; } -enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug, - ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher, - ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes, +enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, + ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher, + ecryptfs_opt_ecryptfs_key_bytes, ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; static match_table_t tokens = { {ecryptfs_opt_sig, "sig=%s"}, {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, - {ecryptfs_opt_debug, "debug=%u"}, - {ecryptfs_opt_ecryptfs_debug, "ecryptfs_debug=%u"}, {ecryptfs_opt_cipher, "cipher=%s"}, {ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"}, {ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"}, @@ -313,7 +311,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) substring_t args[MAX_OPT_ARGS]; int token; char *sig_src; - char *debug_src; char *cipher_name_dst; char *cipher_name_src; char *cipher_key_bytes_src; @@ -341,16 +338,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) } sig_set = 1; break; - case ecryptfs_opt_debug: - case ecryptfs_opt_ecryptfs_debug: - debug_src = args[0].from; - ecryptfs_verbosity = - (int)simple_strtol(debug_src, &debug_src, - 0); - ecryptfs_printk(KERN_DEBUG, - "Verbosity set to [%d]" "\n", - ecryptfs_verbosity); - break; case ecryptfs_opt_cipher: case ecryptfs_opt_ecryptfs_cipher: cipher_name_src = args[0].from; @@ -423,9 +410,13 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) if (!cipher_key_bytes_set) { mount_crypt_stat->global_default_cipher_key_size = 0; } - rc = ecryptfs_add_new_key_tfm( - NULL, mount_crypt_stat->global_default_cipher_name, - mount_crypt_stat->global_default_cipher_key_size); + mutex_lock(&key_tfm_list_mutex); + if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name, + NULL)) + rc = ecryptfs_add_new_key_tfm( + NULL, mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size); + mutex_unlock(&key_tfm_list_mutex); if (rc) { printk(KERN_ERR "Error attempting to initialize cipher with " "name = [%s] and key size = [%td]; rc = [%d]\n", @@ -654,11 +645,6 @@ static struct ecryptfs_cache_info { .size = sizeof(struct ecryptfs_sb_info), }, { - .cache = &ecryptfs_header_cache_0, - .name = "ecryptfs_headers_0", - .size = PAGE_CACHE_SIZE, - }, - { .cache = &ecryptfs_header_cache_1, .name = "ecryptfs_headers_1", .size = PAGE_CACHE_SIZE, @@ -821,6 +807,10 @@ static int __init ecryptfs_init(void) "rc = [%d]\n", rc); goto out_release_messaging; } + if (ecryptfs_verbosity > 0) + printk(KERN_CRIT "eCryptfs verbosity set to %d. Secret values " + "will be written to the syslog!\n", ecryptfs_verbosity); + goto out; out_release_messaging: ecryptfs_release_messaging(ecryptfs_transport); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 32c5711d79a..dc74b186145 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -34,8 +34,6 @@ #include <linux/scatterlist.h> #include "ecryptfs_kernel.h" -struct kmem_cache *ecryptfs_lower_page_cache; - /** * ecryptfs_get_locked_page * @@ -102,13 +100,14 @@ static void set_header_info(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat) { size_t written; - int save_num_header_extents_at_front = - crypt_stat->num_header_extents_at_front; + size_t save_num_header_bytes_at_front = + crypt_stat->num_header_bytes_at_front; - crypt_stat->num_header_extents_at_front = 1; + crypt_stat->num_header_bytes_at_front = + ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE; ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written); - crypt_stat->num_header_extents_at_front = - save_num_header_extents_at_front; + crypt_stat->num_header_bytes_at_front = + save_num_header_bytes_at_front; } /** @@ -134,8 +133,11 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, loff_t view_extent_num = ((((loff_t)page->index) * num_extents_per_page) + extent_num_in_page); + size_t num_header_extents_at_front = + (crypt_stat->num_header_bytes_at_front + / crypt_stat->extent_size); - if (view_extent_num < crypt_stat->num_header_extents_at_front) { + if (view_extent_num < num_header_extents_at_front) { /* This is a header extent */ char *page_virt; @@ -157,9 +159,8 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, } else { /* This is an encrypted data extent */ loff_t lower_offset = - ((view_extent_num - - crypt_stat->num_header_extents_at_front) - * crypt_stat->extent_size); + ((view_extent_num * crypt_stat->extent_size) + - crypt_stat->num_header_bytes_at_front); rc = ecryptfs_read_lower_page_segment( page, (lower_offset >> PAGE_CACHE_SHIFT), @@ -257,8 +258,7 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; if (to > end_byte_in_page) end_byte_in_page = to; - zero_user_page(page, end_byte_in_page, - PAGE_CACHE_SIZE - end_byte_in_page, KM_USER0); + zero_user_segment(page, end_byte_in_page, PAGE_CACHE_SIZE); out: return 0; } @@ -307,7 +307,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, */ if ((i_size_read(page->mapping->host) == prev_page_end_size) && (from != 0)) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); } out: return rc; diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 948f57624c0..0c4928623bb 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -293,6 +293,7 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, return rc; } +#if 0 /** * ecryptfs_read * @data: The virtual address into which to write the data read (and @@ -371,3 +372,4 @@ int ecryptfs_read(char *data, loff_t offset, size_t size, out: return rc; } +#endif /* 0 */ diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 4859c4eecd6..c27ac2b358a 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -156,32 +156,38 @@ static void ecryptfs_clear_inode(struct inode *inode) /** * ecryptfs_show_options * - * Prints the directory we are currently mounted over. - * Returns zero on success; non-zero otherwise + * Prints the mount options for a given superblock. + * Returns zero; does not fail. */ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; - struct dentry *lower_root_dentry = ecryptfs_dentry_to_lower(sb->s_root); - struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(sb->s_root); - char *tmp_page; - char *path; - int rc = 0; - - tmp_page = (char *)__get_free_page(GFP_KERNEL); - if (!tmp_page) { - rc = -ENOMEM; - goto out; - } - path = d_path(lower_root_dentry, lower_mnt, tmp_page, PAGE_SIZE); - if (IS_ERR(path)) { - rc = PTR_ERR(path); - goto out; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat = + &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; + struct ecryptfs_global_auth_tok *walker; + + mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); + list_for_each_entry(walker, + &mount_crypt_stat->global_auth_tok_list, + mount_crypt_stat_list) { + seq_printf(m, ",ecryptfs_sig=%s", walker->sig); } - seq_printf(m, ",dir=%s", path); - free_page((unsigned long)tmp_page); -out: - return rc; + mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); + + seq_printf(m, ",ecryptfs_cipher=%s", + mount_crypt_stat->global_default_cipher_name); + + if (mount_crypt_stat->global_default_cipher_key_size) + seq_printf(m, ",ecryptfs_key_bytes=%zd", + mount_crypt_stat->global_default_cipher_key_size); + if (mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED) + seq_printf(m, ",ecryptfs_passthrough"); + if (mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED) + seq_printf(m, ",ecryptfs_xattr_metadata"); + if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) + seq_printf(m, ",ecryptfs_encrypted_view"); + + return 0; } const struct super_operations ecryptfs_sops = { diff --git a/fs/efs/inode.c b/fs/efs/inode.c index 174696f9bf1..627c3026946 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -45,17 +45,26 @@ static inline void extent_copy(efs_extent *src, efs_extent *dst) { return; } -void efs_read_inode(struct inode *inode) +struct inode *efs_iget(struct super_block *super, unsigned long ino) { int i, inode_index; dev_t device; u32 rdev; struct buffer_head *bh; - struct efs_sb_info *sb = SUPER_INFO(inode->i_sb); - struct efs_inode_info *in = INODE_INFO(inode); + struct efs_sb_info *sb = SUPER_INFO(super); + struct efs_inode_info *in; efs_block_t block, offset; struct efs_dinode *efs_inode; - + struct inode *inode; + + inode = iget_locked(super, ino); + if (IS_ERR(inode)) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + in = INODE_INFO(inode); + /* ** EFS layout: ** @@ -159,13 +168,13 @@ void efs_read_inode(struct inode *inode) break; } - return; + unlock_new_inode(inode); + return inode; read_inode_error: printk(KERN_WARNING "EFS: failed to read inode %lu\n", inode->i_ino); - make_bad_inode(inode); - - return; + iget_failed(inode); + return ERR_PTR(-EIO); } static inline efs_block_t diff --git a/fs/efs/namei.c b/fs/efs/namei.c index f7f407075be..e26704742d4 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -66,9 +66,10 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei lock_kernel(); inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len); if (inodenum) { - if (!(inode = iget(dir->i_sb, inodenum))) { + inode = efs_iget(dir->i_sb, inodenum); + if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(inode); } } unlock_kernel(); @@ -84,12 +85,11 @@ static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, if (ino == 0) return ERR_PTR(-ESTALE); - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); + inode = efs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -116,7 +116,7 @@ struct dentry *efs_get_parent(struct dentry *child) struct dentry *parent; struct inode *inode; efs_ino_t ino; - int error; + long error; lock_kernel(); @@ -125,10 +125,11 @@ struct dentry *efs_get_parent(struct dentry *child) if (!ino) goto fail; - error = -EACCES; - inode = iget(child->d_inode->i_sb, ino); - if (!inode) + inode = efs_iget(child->d_inode->i_sb, ino); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); goto fail; + } error = -ENOMEM; parent = d_alloc_anon(inode); diff --git a/fs/efs/super.c b/fs/efs/super.c index c79bc627f10..14082405cdd 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -107,7 +107,6 @@ static int efs_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations efs_superblock_operations = { .alloc_inode = efs_alloc_inode, .destroy_inode = efs_destroy_inode, - .read_inode = efs_read_inode, .put_super = efs_put_super, .statfs = efs_statfs, .remount_fs = efs_remount, @@ -247,6 +246,7 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) struct efs_sb_info *sb; struct buffer_head *bh; struct inode *root; + int ret = -EINVAL; sb = kzalloc(sizeof(struct efs_sb_info), GFP_KERNEL); if (!sb) @@ -303,12 +303,18 @@ static int efs_fill_super(struct super_block *s, void *d, int silent) } s->s_op = &efs_superblock_operations; s->s_export_op = &efs_export_ops; - root = iget(s, EFS_ROOTINODE); + root = efs_iget(s, EFS_ROOTINODE); + if (IS_ERR(root)) { + printk(KERN_ERR "EFS: get root inode failed\n"); + ret = PTR_ERR(root); + goto out_no_fs; + } + s->s_root = d_alloc_root(root); - if (!(s->s_root)) { - printk(KERN_ERR "EFS: get root inode failed\n"); + printk(KERN_ERR "EFS: get root dentry failed\n"); iput(root); + ret = -ENOMEM; goto out_no_fs; } @@ -318,7 +324,7 @@ out_no_fs_ul: out_no_fs: s->s_fs_info = NULL; kfree(sb); - return -EINVAL; + return ret; } static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { diff --git a/fs/eventfd.c b/fs/eventfd.c index 2ce19c000d2..a9f130cd50a 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/anon_inodes.h> #include <linux/eventfd.h> +#include <linux/syscalls.h> struct eventfd_ctx { wait_queue_head_t wqh; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 81c04abfb1a..a415f42d32c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -353,7 +353,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) spin_unlock_irqrestore(&psw->lock, flags); /* Do really wake up now */ - wake_up(wq); + wake_up_nested(wq, 1 + wake_nests); /* Remove the current task from the list */ spin_lock_irqsave(&psw->lock, flags); diff --git a/fs/exec.c b/fs/exec.c index 282240afe99..be923e4bc38 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -760,7 +760,7 @@ static int de_thread(struct task_struct *tsk) */ read_lock(&tasklist_lock); spin_lock_irq(lock); - if (sig->flags & SIGNAL_GROUP_EXIT) { + if (signal_group_exit(sig)) { /* * Another group action in progress, just * return so that the signal is processed. @@ -778,6 +778,7 @@ static int de_thread(struct task_struct *tsk) if (unlikely(tsk->group_leader == task_child_reaper(tsk))) task_active_pid_ns(tsk)->child_reaper = tsk; + sig->group_exit_task = tsk; zap_other_threads(tsk); read_unlock(&tasklist_lock); @@ -802,7 +803,6 @@ static int de_thread(struct task_struct *tsk) } sig->notify_count = count; - sig->group_exit_task = tsk; while (atomic_read(&sig->count) > count) { __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irq(lock); @@ -871,15 +871,10 @@ static int de_thread(struct task_struct *tsk) leader->exit_state = EXIT_DEAD; write_unlock_irq(&tasklist_lock); - } + } sig->group_exit_task = NULL; sig->notify_count = 0; - /* - * There may be one thread left which is just exiting, - * but it's safe to stop telling the group to kill themselves. - */ - sig->flags = 0; no_thread_group: exit_itimers(sig); @@ -947,12 +942,13 @@ static void flush_old_files(struct files_struct * files) spin_unlock(&files->file_lock); } -void get_task_comm(char *buf, struct task_struct *tsk) +char *get_task_comm(char *buf, struct task_struct *tsk) { /* buf must be at least sizeof(tsk->comm) in size */ task_lock(tsk); strncpy(buf, tsk->comm, sizeof(tsk->comm)); task_unlock(tsk); + return buf; } void set_task_comm(struct task_struct *tsk, char *buf) @@ -1548,7 +1544,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, int err = -EAGAIN; spin_lock_irq(&tsk->sighand->siglock); - if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) { + if (!signal_group_exit(tsk->signal)) { tsk->signal->group_exit_code = exit_code; zap_process(tsk); err = 0; diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 377ad172d74..e7b2bafa1dd 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -69,9 +69,53 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, return desc + offset; } +static int ext2_valid_block_bitmap(struct super_block *sb, + struct ext2_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) +{ + ext2_grpblk_t offset; + ext2_grpblk_t next_zero_bit; + ext2_fsblk_t bitmap_blk; + ext2_fsblk_t group_first_block; + + group_first_block = ext2_group_first_block_no(sb, block_group); + + /* check whether block bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext2_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext2_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode table block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_table); + offset = bitmap_blk - group_first_block; + next_zero_bit = ext2_find_next_zero_bit(bh->b_data, + offset + EXT2_SB(sb)->s_itb_per_group, + offset); + if (next_zero_bit >= offset + EXT2_SB(sb)->s_itb_per_group) + /* good bitmap for inode tables */ + return 1; + +err_out: + ext2_error(sb, __FUNCTION__, + "Invalid block bitmap - " + "block_group = %d, block = %lu", + block_group, bitmap_blk); + return 0; +} + /* - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. + * Read the bitmap for a given block_group,and validate the + * bits for block/inode/inode tables are set in the bitmaps * * Return buffer_head on success or NULL in case of failure. */ @@ -80,17 +124,36 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext2_group_desc * desc; struct buffer_head * bh = NULL; - - desc = ext2_get_group_desc (sb, block_group, NULL); + ext2_fsblk_t bitmap_blk; + + desc = ext2_get_group_desc(sb, block_group, NULL); if (!desc) - goto error_out; - bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); - if (!bh) - ext2_error (sb, "read_block_bitmap", + return NULL; + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + bh = sb_getblk(sb, bitmap_blk); + if (unlikely(!bh)) { + ext2_error(sb, __FUNCTION__, + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %u", + block_group, le32_to_cpu(desc->bg_block_bitmap)); + return NULL; + } + if (likely(bh_uptodate_or_lock(bh))) + return bh; + + if (bh_submit_read(bh) < 0) { + brelse(bh); + ext2_error(sb, __FUNCTION__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %u", block_group, le32_to_cpu(desc->bg_block_bitmap)); -error_out: + return NULL; + } + if (!ext2_valid_block_bitmap(sb, desc, block_group, bh)) { + brelse(bh); + return NULL; + } + return bh; } @@ -474,11 +537,13 @@ do_more: in_range (block, le32_to_cpu(desc->bg_inode_table), sbi->s_itb_per_group) || in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), - sbi->s_itb_per_group)) + sbi->s_itb_per_group)) { ext2_error (sb, "ext2_free_blocks", "Freeing blocks in system zones - " "Block = %lu, count = %lu", block, count); + goto error_return; + } for (i = 0, group_freed = 0; i < count; i++) { if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), @@ -1250,8 +1315,8 @@ retry_alloc: smp_rmb(); /* - * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. + * Now search the rest of the groups. We assume that + * group_no and gdp correctly point to the last group visited. */ for (bgi = 0; bgi < ngroups; bgi++) { group_no++; @@ -1311,11 +1376,13 @@ allocated: in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), EXT2_SB(sb)->s_itb_per_group) || in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), - EXT2_SB(sb)->s_itb_per_group)) + EXT2_SB(sb)->s_itb_per_group)) { ext2_error(sb, "ext2_new_blocks", "Allocating block in system zone - " "blocks from "E2FSBLK", length %lu", ret_block, num); + goto out; + } performed_allocation = 1; @@ -1466,9 +1533,6 @@ int ext2_bg_has_super(struct super_block *sb, int group) */ unsigned long ext2_bg_num_gdb(struct super_block *sb, int group) { - if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& - !ext2_group_sparse(group)) - return 0; - return EXT2_SB(sb)->s_gdb_count; + return ext2_bg_has_super(sb, group) ? EXT2_SB(sb)->s_gdb_count : 0; } diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index d868e26c15e..8dededd80fe 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -703,7 +703,7 @@ const struct file_operations ext2_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = ext2_readdir, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index c87ae29c19c..f1e5705e75f 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -124,7 +124,7 @@ extern void ext2_check_inodes_bitmap (struct super_block *); extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ -extern void ext2_read_inode (struct inode *); +extern struct inode *ext2_iget (struct super_block *, unsigned long); extern int ext2_write_inode (struct inode *, int); extern void ext2_put_inode (struct inode *); extern void ext2_delete_inode (struct inode *); @@ -139,8 +139,7 @@ int __ext2_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata); /* ioctl.c */ -extern int ext2_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); +extern long ext2_ioctl(struct file *, unsigned int, unsigned long); extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long); /* namei.c */ diff --git a/fs/ext2/file.c b/fs/ext2/file.c index c051798459a..5f2fa9c3629 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -48,7 +48,7 @@ const struct file_operations ext2_file_operations = { .write = do_sync_write, .aio_read = generic_file_aio_read, .aio_write = generic_file_aio_write, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif @@ -65,7 +65,7 @@ const struct file_operations ext2_xip_file_operations = { .llseek = generic_file_llseek, .read = xip_file_read, .write = xip_file_write, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, #endif diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b1ab32ab5a7..c6200680542 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -286,15 +286,12 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) * ext2_find_goal - find a prefered place for allocation. * @inode: owner * @block: block we want - * @chain: chain of indirect blocks * @partial: pointer to the last triple within a chain * * Returns preferred place for a block (the goal). */ -static inline int ext2_find_goal(struct inode *inode, - long block, - Indirect chain[4], +static inline int ext2_find_goal(struct inode *inode, long block, Indirect *partial) { struct ext2_block_alloc_info *block_i; @@ -569,7 +566,6 @@ static void ext2_splice_branch(struct inode *inode, * * `handle' can be NULL if create == 0. * - * The BKL may not be held on entry here. Be sure to take it early. * return > 0, # of blocks mapped or allocated. * return = 0, if plain lookup failed. * return < 0, error case. @@ -639,7 +635,7 @@ reread: if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) ext2_init_block_alloc_info(inode); - goal = ext2_find_goal(inode, iblock, chain, partial); + goal = ext2_find_goal(inode, iblock, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -1185,22 +1181,33 @@ void ext2_get_inode_flags(struct ext2_inode_info *ei) ei->i_flags |= EXT2_DIRSYNC_FL; } -void ext2_read_inode (struct inode * inode) +struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { - struct ext2_inode_info *ei = EXT2_I(inode); - ino_t ino = inode->i_ino; + struct ext2_inode_info *ei; struct buffer_head * bh; - struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + struct ext2_inode *raw_inode; + struct inode *inode; + long ret = -EIO; int n; + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ei = EXT2_I(inode); #ifdef CONFIG_EXT2_FS_POSIX_ACL ei->i_acl = EXT2_ACL_NOT_CACHED; ei->i_default_acl = EXT2_ACL_NOT_CACHED; #endif ei->i_block_alloc_info = NULL; - if (IS_ERR(raw_inode)) + raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + if (IS_ERR(raw_inode)) { + ret = PTR_ERR(raw_inode); goto bad_inode; + } inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); @@ -1224,6 +1231,7 @@ void ext2_read_inode (struct inode * inode) if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { /* this inode is deleted */ brelse (bh); + ret = -ESTALE; goto bad_inode; } inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); @@ -1290,11 +1298,12 @@ void ext2_read_inode (struct inode * inode) } brelse (bh); ext2_set_inode_flags(inode); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(ret); } static int ext2_update_inode(struct inode * inode, int do_sync) diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 320b2cb3d4d..b8ea11fee5c 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -17,9 +17,9 @@ #include <asm/uaccess.h> -int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = filp->f_dentry->d_inode; struct ext2_inode_info *ei = EXT2_I(inode); unsigned int flags; unsigned short rsv_window_size; @@ -141,9 +141,6 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, #ifdef CONFIG_COMPAT long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case EXT2_IOC32_GETFLAGS: @@ -161,9 +158,6 @@ long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) default: return -ENOIOCTLCMD; } - lock_kernel(); - ret = ext2_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; + return ext2_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index e69beed839a..80c97fd8c57 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -63,9 +63,9 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str ino = ext2_inode_by_name(dir, dentry); inode = NULL; if (ino) { - inode = iget(dir->i_sb, ino); - if (!inode) - return ERR_PTR(-EACCES); + inode = ext2_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } return d_splice_alias(inode, dentry); } @@ -83,10 +83,10 @@ struct dentry *ext2_get_parent(struct dentry *child) ino = ext2_inode_by_name(child->d_inode, &dotdot); if (!ino) return ERR_PTR(-ENOENT); - inode = iget(child->d_inode->i_sb, ino); + inode = ext2_iget(child->d_inode->i_sb, ino); - if (!inode) - return ERR_PTR(-EACCES); + if (IS_ERR(inode)) + return ERR_CAST(inode); parent = d_alloc_anon(inode); if (!parent) { iput(inode); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 6abaf75163f..22f1010bf79 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -234,16 +234,16 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) le16_to_cpu(es->s_def_resgid) != EXT2_DEF_RESGID) { seq_printf(seq, ",resgid=%u", sbi->s_resgid); } - if (test_opt(sb, ERRORS_CONT)) { + if (test_opt(sb, ERRORS_RO)) { int def_errors = le16_to_cpu(es->s_errors); if (def_errors == EXT2_ERRORS_PANIC || - def_errors == EXT2_ERRORS_RO) { - seq_puts(seq, ",errors=continue"); + def_errors == EXT2_ERRORS_CONTINUE) { + seq_puts(seq, ",errors=remount-ro"); } } - if (test_opt(sb, ERRORS_RO)) - seq_puts(seq, ",errors=remount-ro"); + if (test_opt(sb, ERRORS_CONT)) + seq_puts(seq, ",errors=continue"); if (test_opt(sb, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); if (test_opt(sb, NO_UID32)) @@ -296,7 +296,6 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *da static const struct super_operations ext2_sops = { .alloc_inode = ext2_alloc_inode, .destroy_inode = ext2_destroy_inode, - .read_inode = ext2_read_inode, .write_inode = ext2_write_inode, .delete_inode = ext2_delete_inode, .put_super = ext2_put_super, @@ -326,11 +325,10 @@ static struct inode *ext2_nfs_get_inode(struct super_block *sb, * it might be "neater" to call ext2_get_inode first and check * if the inode is valid..... */ - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + inode = ext2_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { /* we didn't find the right inode.. */ iput(inode); return ERR_PTR(-ESTALE); @@ -617,27 +615,24 @@ static int ext2_setup_super (struct super_block * sb, return res; } -static int ext2_check_descriptors (struct super_block * sb) +static int ext2_check_descriptors(struct super_block *sb) { int i; - int desc_block = 0; struct ext2_sb_info *sbi = EXT2_SB(sb); unsigned long first_block = le32_to_cpu(sbi->s_es->s_first_data_block); unsigned long last_block; - struct ext2_group_desc * gdp = NULL; ext2_debug ("Checking group descriptors"); - for (i = 0; i < sbi->s_groups_count; i++) - { + for (i = 0; i < sbi->s_groups_count; i++) { + struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL); + if (i == sbi->s_groups_count - 1) last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; else last_block = first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1); - if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) - gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data; if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || le32_to_cpu(gdp->bg_block_bitmap) > last_block) { @@ -667,7 +662,6 @@ static int ext2_check_descriptors (struct super_block * sb) return 0; } first_block += EXT2_BLOCKS_PER_GROUP(sb); - gdp++; } return 1; } @@ -750,6 +744,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) unsigned long logic_sb_block; unsigned long offset = 0; unsigned long def_mount_opts; + long ret = -EINVAL; int blocksize = BLOCK_SIZE; int db_count; int i, j; @@ -820,10 +815,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO) - set_opt(sbi->s_mount_opt, ERRORS_RO); - else + else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_CONTINUE) set_opt(sbi->s_mount_opt, ERRORS_CONT); + else + set_opt(sbi->s_mount_opt, ERRORS_RO); sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); @@ -868,8 +863,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) || - (sb->s_blocksize != blocksize))) { + if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) { if (!silent) printk("XIP: Unsupported blocksize\n"); goto failed_mount; @@ -1046,19 +1040,24 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &ext2_sops; sb->s_export_op = &ext2_export_ops; sb->s_xattr = ext2_xattr_handlers; - root = iget(sb, EXT2_ROOT_INO); - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); - printk(KERN_ERR "EXT2-fs: get root inode failed\n"); + root = ext2_iget(sb, EXT2_ROOT_INO); + if (IS_ERR(root)) { + ret = PTR_ERR(root); goto failed_mount3; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - dput(sb->s_root); - sb->s_root = NULL; + iput(root); printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); goto failed_mount3; } + + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + iput(root); + printk(KERN_ERR "EXT2-fs: get root inode failed\n"); + ret = -ENOMEM; + goto failed_mount3; + } if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) ext2_warning(sb, __FUNCTION__, "mounting ext3 filesystem as ext2"); @@ -1085,7 +1084,7 @@ failed_mount: failed_sbi: sb->s_fs_info = NULL; kfree(sbi); - return -EINVAL; + return ret; } static void ext2_commit_super (struct super_block * sb, diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index a8ba7e83127..a7571303110 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -80,13 +80,57 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, return desc + offset; } +static int ext3_valid_block_bitmap(struct super_block *sb, + struct ext3_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) +{ + ext3_grpblk_t offset; + ext3_grpblk_t next_zero_bit; + ext3_fsblk_t bitmap_blk; + ext3_fsblk_t group_first_block; + + group_first_block = ext3_group_first_block_no(sb, block_group); + + /* check whether block bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext3_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext3_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode table block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_table); + offset = bitmap_blk - group_first_block; + next_zero_bit = ext3_find_next_zero_bit(bh->b_data, + offset + EXT3_SB(sb)->s_itb_per_group, + offset); + if (next_zero_bit >= offset + EXT3_SB(sb)->s_itb_per_group) + /* good bitmap for inode tables */ + return 1; + +err_out: + ext3_error(sb, __FUNCTION__, + "Invalid block bitmap - " + "block_group = %d, block = %lu", + block_group, bitmap_blk); + return 0; +} + /** * read_block_bitmap() * @sb: super block * @block_group: given block group * - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. + * Read the bitmap for a given block_group,and validate the + * bits for block/inode/inode tables are set in the bitmaps * * Return buffer_head on success or NULL in case of failure. */ @@ -95,17 +139,35 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc; struct buffer_head * bh = NULL; + ext3_fsblk_t bitmap_blk; - desc = ext3_get_group_desc (sb, block_group, NULL); + desc = ext3_get_group_desc(sb, block_group, NULL); if (!desc) - goto error_out; - bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); - if (!bh) - ext3_error (sb, "read_block_bitmap", + return NULL; + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + bh = sb_getblk(sb, bitmap_blk); + if (unlikely(!bh)) { + ext3_error(sb, __FUNCTION__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %u", block_group, le32_to_cpu(desc->bg_block_bitmap)); -error_out: + return NULL; + } + if (likely(bh_uptodate_or_lock(bh))) + return bh; + + if (bh_submit_read(bh) < 0) { + brelse(bh); + ext3_error(sb, __FUNCTION__, + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %u", + block_group, le32_to_cpu(desc->bg_block_bitmap)); + return NULL; + } + if (!ext3_valid_block_bitmap(sb, desc, block_group, bh)) { + brelse(bh); + return NULL; + } return bh; } /* @@ -468,11 +530,13 @@ do_more: in_range (block, le32_to_cpu(desc->bg_inode_table), sbi->s_itb_per_group) || in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), - sbi->s_itb_per_group)) + sbi->s_itb_per_group)) { ext3_error (sb, "ext3_free_blocks", "Freeing blocks in system zones - " "Block = "E3FSBLK", count = %lu", block, count); + goto error_return; + } /* * We are about to start releasing blocks in the bitmap, @@ -1508,7 +1572,7 @@ retry_alloc: /* * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. + * group_no and gdp correctly point to the last group visited. */ for (bgi = 0; bgi < ngroups; bgi++) { group_no++; @@ -1575,11 +1639,13 @@ allocated: in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), EXT3_SB(sb)->s_itb_per_group) || in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), - EXT3_SB(sb)->s_itb_per_group)) + EXT3_SB(sb)->s_itb_per_group)) { ext3_error(sb, "ext3_new_block", "Allocating block in system zone - " "blocks from "E3FSBLK", length %lu", ret_block, num); + goto out; + } performed_allocation = 1; @@ -1782,11 +1848,7 @@ static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group) static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group) { - if (EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext3_group_sparse(group)) - return 0; - return EXT3_SB(sb)->s_gdb_count; + return ext3_bg_has_super(sb, group) ? EXT3_SB(sb)->s_gdb_count : 0; } /** diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 1bc8cd89c51..58ae2f943f1 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -642,14 +642,15 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count); unsigned long block_group; int bit; - struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bitmap_bh; struct inode *inode = NULL; + long err = -EIO; /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { ext3_warning(sb, __FUNCTION__, "bad orphan ino %lu! e2fsck was run?", ino); - goto out; + goto error; } block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); @@ -658,38 +659,49 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) if (!bitmap_bh) { ext3_warning(sb, __FUNCTION__, "inode bitmap error for orphan %lu", ino); - goto out; + goto error; } /* Having the inode bit set should be a 100% indicator that this * is a valid orphan (no e2fsck run on fs). Orphans also include * inodes that were being truncated, so we can't check i_nlink==0. */ - if (!ext3_test_bit(bit, bitmap_bh->b_data) || - !(inode = iget(sb, ino)) || is_bad_inode(inode) || - NEXT_ORPHAN(inode) > max_ino) { - ext3_warning(sb, __FUNCTION__, - "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", - bit, (unsigned long long)bitmap_bh->b_blocknr, - ext3_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); - if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", - is_bad_inode(inode)); - printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", - NEXT_ORPHAN(inode)); - printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - } + if (!ext3_test_bit(bit, bitmap_bh->b_data)) + goto bad_orphan; + + inode = ext3_iget(sb, ino); + if (IS_ERR(inode)) + goto iget_failed; + + if (NEXT_ORPHAN(inode) > max_ino) + goto bad_orphan; + brelse(bitmap_bh); + return inode; + +iget_failed: + err = PTR_ERR(inode); + inode = NULL; +bad_orphan: + ext3_warning(sb, __FUNCTION__, + "bad orphan inode %lu! e2fsck was run?", ino); + printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", + bit, (unsigned long long)bitmap_bh->b_blocknr, + ext3_test_bit(bit, bitmap_bh->b_data)); + printk(KERN_NOTICE "inode=%p\n", inode); + if (inode) { + printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", + is_bad_inode(inode)); + printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", + NEXT_ORPHAN(inode)); + printk(KERN_NOTICE "max_ino=%lu\n", max_ino); /* Avoid freeing blocks if we got a bad deleted inode */ - if (inode && inode->i_nlink == 0) + if (inode->i_nlink == 0) inode->i_blocks = 0; iput(inode); - inode = NULL; } -out: brelse(bitmap_bh); - return inode; +error: + return ERR_PTR(err); } unsigned long ext3_count_free_inodes (struct super_block * sb) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9b162cd6c16..eb95670a27e 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -439,16 +439,14 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) * ext3_find_goal - find a prefered place for allocation. * @inode: owner * @block: block we want - * @chain: chain of indirect blocks * @partial: pointer to the last triple within a chain - * @goal: place to store the result. * * Normally this function find the prefered place for block allocation, - * stores it in *@goal and returns zero. + * returns it. */ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, - Indirect chain[4], Indirect *partial) + Indirect *partial) { struct ext3_block_alloc_info *block_i; @@ -884,7 +882,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) ext3_init_block_alloc_info(inode); - goal = ext3_find_goal(inode, iblock, chain, partial); + goal = ext3_find_goal(inode, iblock, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -941,55 +939,45 @@ out: return err; } -#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32) +/* Maximum number of blocks we map for direct IO at once. */ +#define DIO_MAX_BLOCKS 4096 +/* + * Number of credits we need for writing DIO_MAX_BLOCKS: + * We need sb + group descriptor + bitmap + inode -> 4 + * For B blocks with A block pointers per block we need: + * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). + * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. + */ +#define DIO_CREDITS 25 static int ext3_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = ext3_journal_current_handle(); - int ret = 0; + int ret = 0, started = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - if (!create) - goto get_block; /* A read */ - - if (max_blocks == 1) - goto get_block; /* A single block get */ - - if (handle->h_transaction->t_state == T_LOCKED) { - /* - * Huge direct-io writes can hold off commits for long - * periods of time. Let this commit run. - */ - ext3_journal_stop(handle); - handle = ext3_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) + if (create && !handle) { /* Direct IO write... */ + if (max_blocks > DIO_MAX_BLOCKS) + max_blocks = DIO_MAX_BLOCKS; + handle = ext3_journal_start(inode, DIO_CREDITS + + 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto get_block; - } - - if (handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) { - /* - * Getting low on buffer credits... - */ - ret = ext3_journal_extend(handle, DIO_CREDITS); - if (ret > 0) { - /* - * Couldn't extend the transaction. Start a new one. - */ - ret = ext3_journal_restart(handle, DIO_CREDITS); + goto out; } + started = 1; } -get_block: - if (ret == 0) { - ret = ext3_get_blocks_handle(handle, inode, iblock, + ret = ext3_get_blocks_handle(handle, inode, iblock, max_blocks, bh_result, create, 0); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } + if (ret > 0) { + bh_result->b_size = (ret << inode->i_blkbits); + ret = 0; } + if (started) + ext3_journal_stop(handle); +out: return ret; } @@ -1680,7 +1668,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait) * if the machine crashes during the write. * * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. + * crashes then stale disk data _may_ be exposed inside the file. But current + * VFS code falls back into buffered path in that case so we are safe. */ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, @@ -1689,7 +1678,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct ext3_inode_info *ei = EXT3_I(inode); - handle_t *handle = NULL; + handle_t *handle; ssize_t ret; int orphan = 0; size_t count = iov_length(iov, nr_segs); @@ -1697,17 +1686,21 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE) { loff_t final_size = offset + count; - handle = ext3_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } if (final_size > inode->i_size) { + /* Credits for sb + inode write */ + handle = ext3_journal_start(inode, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } ret = ext3_orphan_add(handle, inode); - if (ret) - goto out_stop; + if (ret) { + ext3_journal_stop(handle); + goto out; + } orphan = 1; ei->i_disksize = inode->i_size; + ext3_journal_stop(handle); } } @@ -1715,18 +1708,21 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, ext3_get_block, NULL); - /* - * Reacquire the handle: ext3_get_block() can restart the transaction - */ - handle = ext3_journal_current_handle(); - -out_stop: - if (handle) { + if (orphan) { int err; - if (orphan && inode->i_nlink) + /* Credits for sb + inode write */ + handle = ext3_journal_start(inode, 2); + if (IS_ERR(handle)) { + /* This is really bad luck. We've written the data + * but cannot extend i_size. Bail out and pretend + * the write failed... */ + ret = PTR_ERR(handle); + goto out; + } + if (inode->i_nlink) ext3_orphan_del(handle, inode); - if (orphan && ret > 0) { + if (ret > 0) { loff_t end = offset + ret; if (end > inode->i_size) { ei->i_disksize = end; @@ -1845,7 +1841,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, */ if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode) && PageUptodate(page)) { - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); set_page_dirty(page); goto unlock; } @@ -1898,7 +1894,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, goto unlock; } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); BUFFER_TRACE(bh, "zeroed end of block"); err = 0; @@ -2658,21 +2654,31 @@ void ext3_get_inode_flags(struct ext3_inode_info *ei) ei->i_flags |= EXT3_DIRSYNC_FL; } -void ext3_read_inode(struct inode * inode) +struct inode *ext3_iget(struct super_block *sb, unsigned long ino) { struct ext3_iloc iloc; struct ext3_inode *raw_inode; - struct ext3_inode_info *ei = EXT3_I(inode); + struct ext3_inode_info *ei; struct buffer_head *bh; + struct inode *inode; + long ret; int block; + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ei = EXT3_I(inode); #ifdef CONFIG_EXT3_FS_POSIX_ACL ei->i_acl = EXT3_ACL_NOT_CACHED; ei->i_default_acl = EXT3_ACL_NOT_CACHED; #endif ei->i_block_alloc_info = NULL; - if (__ext3_get_inode_loc(inode, &iloc, 0)) + ret = __ext3_get_inode_loc(inode, &iloc, 0); + if (ret < 0) goto bad_inode; bh = iloc.bh; raw_inode = ext3_raw_inode(&iloc); @@ -2703,6 +2709,7 @@ void ext3_read_inode(struct inode * inode) !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { /* this inode is deleted */ brelse (bh); + ret = -ESTALE; goto bad_inode; } /* The only unlinked inodes we let through here have @@ -2746,6 +2753,7 @@ void ext3_read_inode(struct inode * inode) if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT3_INODE_SIZE(inode->i_sb)) { brelse (bh); + ret = -EIO; goto bad_inode; } if (ei->i_extra_isize == 0) { @@ -2787,11 +2795,12 @@ void ext3_read_inode(struct inode * inode) } brelse (iloc.bh); ext3_set_inode_flags(inode); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(ret); } /* diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 4ab6f76e63d..dec3e0d88ab 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -860,14 +860,10 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry, int nblocks, i, err; struct inode *dir = dentry->d_parent->d_inode; int namelen; - const u8 *name; - unsigned blocksize; *res_dir = NULL; sb = dir->i_sb; - blocksize = sb->s_blocksize; namelen = dentry->d_name.len; - name = dentry->d_name.name; if (namelen > EXT3_NAME_LEN) return NULL; if (is_dx(dir)) { @@ -1041,17 +1037,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str if (!ext3_valid_inum(dir->i_sb, ino)) { ext3_error(dir->i_sb, "ext3_lookup", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(dir->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext3_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } return d_splice_alias(inode, dentry); } @@ -1080,18 +1070,13 @@ struct dentry *ext3_get_parent(struct dentry *child) if (!ext3_valid_inum(child->d_inode->i_sb, ino)) { ext3_error(child->d_inode->i_sb, "ext3_get_parent", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(child->d_inode->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext3_iget(child->d_inode->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + parent = d_alloc_anon(inode); if (!parent) { iput(inode); diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 44de1453c30..ebc05af7343 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -795,12 +795,11 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) "No reserved GDT blocks, can't resize"); return -EPERM; } - inode = iget(sb, EXT3_RESIZE_INO); - if (!inode || is_bad_inode(inode)) { + inode = ext3_iget(sb, EXT3_RESIZE_INO); + if (IS_ERR(inode)) { ext3_warning(sb, __FUNCTION__, "Error opening resize inode"); - iput(inode); - return -ENOENT; + return PTR_ERR(inode); } } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f3675cc630e..cf2a2c3660e 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -575,16 +575,16 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) { seq_printf(seq, ",resgid=%u", sbi->s_resgid); } - if (test_opt(sb, ERRORS_CONT)) { + if (test_opt(sb, ERRORS_RO)) { int def_errors = le16_to_cpu(es->s_errors); if (def_errors == EXT3_ERRORS_PANIC || - def_errors == EXT3_ERRORS_RO) { - seq_puts(seq, ",errors=continue"); + def_errors == EXT3_ERRORS_CONTINUE) { + seq_puts(seq, ",errors=remount-ro"); } } - if (test_opt(sb, ERRORS_RO)) - seq_puts(seq, ",errors=remount-ro"); + if (test_opt(sb, ERRORS_CONT)) + seq_puts(seq, ",errors=continue"); if (test_opt(sb, ERRORS_PANIC)) seq_puts(seq, ",errors=panic"); if (test_opt(sb, NO_UID32)) @@ -649,11 +649,10 @@ static struct inode *ext3_nfs_get_inode(struct super_block *sb, * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + inode = ext3_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -722,7 +721,6 @@ static struct quotactl_ops ext3_qctl_operations = { static const struct super_operations ext3_sops = { .alloc_inode = ext3_alloc_inode, .destroy_inode = ext3_destroy_inode, - .read_inode = ext3_read_inode, .write_inode = ext3_write_inode, .dirty_inode = ext3_dirty_inode, .delete_inode = ext3_delete_inode, @@ -1252,28 +1250,24 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, } /* Called at mount-time, super-block is locked */ -static int ext3_check_descriptors (struct super_block * sb) +static int ext3_check_descriptors(struct super_block *sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ext3_fsblk_t last_block; - struct ext3_group_desc * gdp = NULL; - int desc_block = 0; int i; ext3_debug ("Checking group descriptors"); - for (i = 0; i < sbi->s_groups_count; i++) - { + for (i = 0; i < sbi->s_groups_count; i++) { + struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL); + if (i == sbi->s_groups_count - 1) last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; else last_block = first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); - if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0) - gdp = (struct ext3_group_desc *) - sbi->s_group_desc[desc_block++]->b_data; if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || le32_to_cpu(gdp->bg_block_bitmap) > last_block) { @@ -1306,7 +1300,6 @@ static int ext3_check_descriptors (struct super_block * sb) return 0; } first_block += EXT3_BLOCKS_PER_GROUP(sb); - gdp++; } sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); @@ -1383,8 +1376,8 @@ static void ext3_orphan_cleanup (struct super_block * sb, while (es->s_last_orphan) { struct inode *inode; - if (!(inode = - ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) { + inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); + if (IS_ERR(inode)) { es->s_last_orphan = 0; break; } @@ -1513,6 +1506,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) int db_count; int i; int needs_recovery; + int ret = -EINVAL; __le32 features; int err; @@ -1583,10 +1577,10 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO) - set_opt(sbi->s_mount_opt, ERRORS_RO); - else + else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE) set_opt(sbi->s_mount_opt, ERRORS_CONT); + else + set_opt(sbi->s_mount_opt, ERRORS_RO); sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); @@ -1882,19 +1876,24 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) * so we can safely mount the rest of the filesystem now. */ - root = iget(sb, EXT3_ROOT_INO); - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { + root = ext3_iget(sb, EXT3_ROOT_INO); + if (IS_ERR(root)) { printk(KERN_ERR "EXT3-fs: get root inode failed\n"); - iput(root); + ret = PTR_ERR(root); goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - dput(sb->s_root); - sb->s_root = NULL; + iput(root); printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n"); goto failed_mount4; } + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + printk(KERN_ERR "EXT3-fs: get root dentry failed\n"); + iput(root); + ret = -ENOMEM; + goto failed_mount4; + } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); /* @@ -1946,7 +1945,7 @@ out_fail: sb->s_fs_info = NULL; kfree(sbi); lock_kernel(); - return -EINVAL; + return ret; } /* @@ -1982,8 +1981,8 @@ static journal_t *ext3_get_journal(struct super_block *sb, * things happen if we iget() an unused inode, as the subsequent * iput() will try to delete it. */ - journal_inode = iget(sb, journal_inum); - if (!journal_inode) { + journal_inode = ext3_iget(sb, journal_inum); + if (IS_ERR(journal_inode)) { printk(KERN_ERR "EXT3-fs: no journal found.\n"); return NULL; } @@ -1996,7 +1995,7 @@ static journal_t *ext3_get_journal(struct super_block *sb, jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", journal_inode, journal_inode->i_size); - if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { + if (!S_ISREG(journal_inode->i_mode)) { printk(KERN_ERR "EXT3-fs: invalid journal inode.\n"); iput(journal_inode); return NULL; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index ac75ea953d8..0737e05ba3d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1700,7 +1700,7 @@ retry_alloc: /* * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. + * group_no and gdp correctly point to the last group visited. */ for (bgi = 0; bgi < ngroups; bgi++) { group_no++; @@ -2011,11 +2011,7 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, ext4_group_t group) { - if (EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext4_group_sparse(group)) - return 0; - return EXT4_SB(sb)->s_gdb_count; + return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; } /** diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 575b5215c80..da18a74b966 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -782,14 +782,15 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); ext4_group_t block_group; int bit; - struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bitmap_bh; struct inode *inode = NULL; + long err = -EIO; /* Error cases - e2fsck has already cleaned up for us */ if (ino > max_ino) { ext4_warning(sb, __FUNCTION__, "bad orphan ino %lu! e2fsck was run?", ino); - goto out; + goto error; } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); @@ -798,38 +799,49 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) if (!bitmap_bh) { ext4_warning(sb, __FUNCTION__, "inode bitmap error for orphan %lu", ino); - goto out; + goto error; } /* Having the inode bit set should be a 100% indicator that this * is a valid orphan (no e2fsck run on fs). Orphans also include * inodes that were being truncated, so we can't check i_nlink==0. */ - if (!ext4_test_bit(bit, bitmap_bh->b_data) || - !(inode = iget(sb, ino)) || is_bad_inode(inode) || - NEXT_ORPHAN(inode) > max_ino) { - ext4_warning(sb, __FUNCTION__, - "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", - bit, (unsigned long long)bitmap_bh->b_blocknr, - ext4_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); - if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", - is_bad_inode(inode)); - printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", - NEXT_ORPHAN(inode)); - printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - } + if (!ext4_test_bit(bit, bitmap_bh->b_data)) + goto bad_orphan; + + inode = ext4_iget(sb, ino); + if (IS_ERR(inode)) + goto iget_failed; + + if (NEXT_ORPHAN(inode) > max_ino) + goto bad_orphan; + brelse(bitmap_bh); + return inode; + +iget_failed: + err = PTR_ERR(inode); + inode = NULL; +bad_orphan: + ext4_warning(sb, __FUNCTION__, + "bad orphan inode %lu! e2fsck was run?", ino); + printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", + bit, (unsigned long long)bitmap_bh->b_blocknr, + ext4_test_bit(bit, bitmap_bh->b_data)); + printk(KERN_NOTICE "inode=%p\n", inode); + if (inode) { + printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", + is_bad_inode(inode)); + printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", + NEXT_ORPHAN(inode)); + printk(KERN_NOTICE "max_ino=%lu\n", max_ino); /* Avoid freeing blocks if we got a bad deleted inode */ - if (inode && inode->i_nlink == 0) + if (inode->i_nlink == 0) inode->i_blocks = 0; iput(inode); - inode = NULL; } -out: brelse(bitmap_bh); - return inode; +error: + return ERR_PTR(err); } unsigned long ext4_count_free_inodes (struct super_block * sb) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bb717cbb749..f4e38745224 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -429,16 +429,13 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) * ext4_find_goal - find a prefered place for allocation. * @inode: owner * @block: block we want - * @chain: chain of indirect blocks * @partial: pointer to the last triple within a chain - * @goal: place to store the result. * * Normally this function find the prefered place for block allocation, - * stores it in *@goal and returns zero. + * returns it. */ - static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, - Indirect chain[4], Indirect *partial) + Indirect *partial) { struct ext4_block_alloc_info *block_i; @@ -839,7 +836,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) ext4_init_block_alloc_info(inode); - goal = ext4_find_goal(inode, iblock, chain, partial); + goal = ext4_find_goal(inode, iblock, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ indirect_blks = (chain + depth) - partial - 1; @@ -1840,7 +1837,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, */ if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode) && PageUptodate(page)) { - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); set_page_dirty(page); goto unlock; } @@ -1893,7 +1890,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, goto unlock; } - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); BUFFER_TRACE(bh, "zeroed end of block"); @@ -2683,21 +2680,31 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, } } -void ext4_read_inode(struct inode * inode) +struct inode *ext4_iget(struct super_block *sb, unsigned long ino) { struct ext4_iloc iloc; struct ext4_inode *raw_inode; - struct ext4_inode_info *ei = EXT4_I(inode); + struct ext4_inode_info *ei; struct buffer_head *bh; + struct inode *inode; + long ret; int block; + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ei = EXT4_I(inode); #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; #endif ei->i_block_alloc_info = NULL; - if (__ext4_get_inode_loc(inode, &iloc, 0)) + ret = __ext4_get_inode_loc(inode, &iloc, 0); + if (ret < 0) goto bad_inode; bh = iloc.bh; raw_inode = ext4_raw_inode(&iloc); @@ -2723,6 +2730,7 @@ void ext4_read_inode(struct inode * inode) !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { /* this inode is deleted */ brelse (bh); + ret = -ESTALE; goto bad_inode; } /* The only unlinked inodes we let through here have @@ -2761,6 +2769,7 @@ void ext4_read_inode(struct inode * inode) if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT4_INODE_SIZE(inode->i_sb)) { brelse (bh); + ret = -EIO; goto bad_inode; } if (ei->i_extra_isize == 0) { @@ -2814,11 +2823,12 @@ void ext4_read_inode(struct inode * inode) } brelse (iloc.bh); ext4_set_inode_flags(inode); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(ret); } static int ext4_inode_blocks_set(handle_t *handle, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 67b6d8a1cef..d153bb5922f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1039,17 +1039,11 @@ static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, str if (!ext4_valid_inum(dir->i_sb, ino)) { ext4_error(dir->i_sb, "ext4_lookup", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(dir->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext4_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } return d_splice_alias(inode, dentry); } @@ -1078,18 +1072,13 @@ struct dentry *ext4_get_parent(struct dentry *child) if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { ext4_error(child->d_inode->i_sb, "ext4_get_parent", "bad inode number: %lu", ino); - inode = NULL; - } else - inode = iget(child->d_inode->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - - if (is_bad_inode(inode)) { - iput(inode); - return ERR_PTR(-ENOENT); + return ERR_PTR(-EIO); } + inode = ext4_iget(child->d_inode->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + parent = d_alloc_anon(inode); if (!parent) { iput(inode); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4fbba60816f..9477a2bd6ff 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -779,12 +779,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) "No reserved GDT blocks, can't resize"); return -EPERM; } - inode = iget(sb, EXT4_RESIZE_INO); - if (!inode || is_bad_inode(inode)) { + inode = ext4_iget(sb, EXT4_RESIZE_INO); + if (IS_ERR(inode)) { ext4_warning(sb, __FUNCTION__, "Error opening resize inode"); - iput(inode); - return -ENOENT; + return PTR_ERR(inode); } } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 055a0cd0168..93beb865c20 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -777,11 +777,10 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, * Currently we don't know the generation for parent directory, so * a generation of 0 means "accept any" */ - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + inode = ext4_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -850,7 +849,6 @@ static struct quotactl_ops ext4_qctl_operations = { static const struct super_operations ext4_sops = { .alloc_inode = ext4_alloc_inode, .destroy_inode = ext4_destroy_inode, - .read_inode = ext4_read_inode, .write_inode = ext4_write_inode, .dirty_inode = ext4_dirty_inode, .delete_inode = ext4_delete_inode, @@ -1458,7 +1456,7 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, } /* Called at mount-time, super-block is locked */ -static int ext4_check_descriptors (struct super_block * sb) +static int ext4_check_descriptors(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); @@ -1466,8 +1464,6 @@ static int ext4_check_descriptors (struct super_block * sb) ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; - struct ext4_group_desc * gdp = NULL; - int desc_block = 0; int flexbg_flag = 0; ext4_group_t i; @@ -1476,17 +1472,15 @@ static int ext4_check_descriptors (struct super_block * sb) ext4_debug ("Checking group descriptors"); - for (i = 0; i < sbi->s_groups_count; i++) - { + for (i = 0; i < sbi->s_groups_count; i++) { + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); + if (i == sbi->s_groups_count - 1 || flexbg_flag) last_block = ext4_blocks_count(sbi->s_es) - 1; else last_block = first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); - if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0) - gdp = (struct ext4_group_desc *) - sbi->s_group_desc[desc_block++]->b_data; block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { @@ -1524,8 +1518,6 @@ static int ext4_check_descriptors (struct super_block * sb) } if (!flexbg_flag) first_block += EXT4_BLOCKS_PER_GROUP(sb); - gdp = (struct ext4_group_desc *) - ((__u8 *)gdp + EXT4_DESC_SIZE(sb)); } ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); @@ -1811,6 +1803,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; + int ret = -EINVAL; int blocksize; int db_count; int i; @@ -2243,19 +2236,24 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) * so we can safely mount the rest of the filesystem now. */ - root = iget(sb, EXT4_ROOT_INO); - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { + root = ext4_iget(sb, EXT4_ROOT_INO); + if (IS_ERR(root)) { printk(KERN_ERR "EXT4-fs: get root inode failed\n"); - iput(root); + ret = PTR_ERR(root); goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - dput(sb->s_root); - sb->s_root = NULL; + iput(root); printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); goto failed_mount4; } + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); + iput(root); + ret = -ENOMEM; + goto failed_mount4; + } ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -2336,7 +2334,7 @@ out_fail: sb->s_fs_info = NULL; kfree(sbi); lock_kernel(); - return -EINVAL; + return ret; } /* @@ -2372,8 +2370,8 @@ static journal_t *ext4_get_journal(struct super_block *sb, * things happen if we iget() an unused inode, as the subsequent * iput() will try to delete it. */ - journal_inode = iget(sb, journal_inum); - if (!journal_inode) { + journal_inode = ext4_iget(sb, journal_inum); + if (IS_ERR(journal_inode)) { printk(KERN_ERR "EXT4-fs: no journal found.\n"); return NULL; } @@ -2386,7 +2384,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", journal_inode, journal_inode->i_size); - if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) { + if (!S_ISREG(journal_inode->i_mode)) { printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); iput(journal_inode); return NULL; diff --git a/fs/fat/file.c b/fs/fat/file.c index 69a83b59dce..c614175876e 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -155,6 +155,42 @@ out: return err; } +static int check_mode(const struct msdos_sb_info *sbi, mode_t mode) +{ + mode_t req = mode & ~S_IFMT; + + /* + * Of the r and x bits, all (subject to umask) must be present. Of the + * w bits, either all (subject to umask) or none must be present. + */ + + if (S_ISREG(mode)) { + req &= ~sbi->options.fs_fmask; + + if ((req & (S_IRUGO | S_IXUGO)) != + ((S_IRUGO | S_IXUGO) & ~sbi->options.fs_fmask)) + return -EPERM; + + if ((req & S_IWUGO) != 0 && + (req & S_IWUGO) != (S_IWUGO & ~sbi->options.fs_fmask)) + return -EPERM; + } else if (S_ISDIR(mode)) { + req &= ~sbi->options.fs_dmask; + + if ((req & (S_IRUGO | S_IXUGO)) != + ((S_IRUGO | S_IXUGO) & ~sbi->options.fs_dmask)) + return -EPERM; + + if ((req & S_IWUGO) != 0 && + (req & S_IWUGO) != (S_IWUGO & ~sbi->options.fs_dmask)) + return -EPERM; + } else { + return -EPERM; + } + + return 0; +} + int fat_notify_change(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); @@ -186,9 +222,7 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr) if (((attr->ia_valid & ATTR_UID) && (attr->ia_uid != sbi->options.fs_uid)) || ((attr->ia_valid & ATTR_GID) && - (attr->ia_gid != sbi->options.fs_gid)) || - ((attr->ia_valid & ATTR_MODE) && - (attr->ia_mode & ~MSDOS_VALID_MODE))) + (attr->ia_gid != sbi->options.fs_gid))) error = -EPERM; if (error) { @@ -196,6 +230,13 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr) error = 0; goto out; } + + if (attr->ia_valid & ATTR_MODE) { + error = check_mode(sbi, attr->ia_mode); + if (error != 0 && !sbi->options.quiet) + goto out; + } + error = inode_setattr(inode, attr); if (error) goto out; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 920a576e1c2..085269e07fb 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -634,8 +634,6 @@ static const struct super_operations fat_sops = { .clear_inode = fat_clear_inode, .remount_fs = fat_remount, - .read_inode = make_bad_inode, - .show_options = fat_show_options, }; @@ -663,8 +661,8 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, if (fh_len < 5 || fh_type != 3) return NULL; - inode = iget(sb, fh[0]); - if (!inode || is_bad_inode(inode) || inode->i_generation != fh[1]) { + inode = ilookup(sb, fh[0]); + if (!inode || inode->i_generation != fh[1]) { if (inode) iput(inode); inode = NULL; @@ -760,7 +758,7 @@ static struct dentry *fat_get_parent(struct dentry *child) inode = fat_build_inode(child->d_sb, de, i_pos); brelse(bh); if (IS_ERR(inode)) { - parent = ERR_PTR(PTR_ERR(inode)); + parent = ERR_CAST(inode); goto out; } parent = d_alloc_anon(inode); @@ -1295,10 +1293,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; if (!IS_FSINFO(fsinfo)) { - printk(KERN_WARNING - "FAT: Did not find valid FSINFO signature.\n" - " Found signature1 0x%08x signature2 0x%08x" - " (sector = %lu)\n", + printk(KERN_WARNING "FAT: Invalid FSINFO signature: " + "0x%08x, 0x%08x (sector = %lu)\n", le32_to_cpu(fsinfo->signature1), le32_to_cpu(fsinfo->signature2), sbi->fsinfo_sector); diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 308f2b6b502..61f23511eac 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -55,9 +55,8 @@ void fat_clusters_flush(struct super_block *sb) fsinfo = (struct fat_boot_fsinfo *)bh->b_data; /* Sanity check */ if (!IS_FSINFO(fsinfo)) { - printk(KERN_ERR "FAT: Did not find valid FSINFO signature.\n" - " Found signature1 0x%08x signature2 0x%08x" - " (sector = %lu)\n", + printk(KERN_ERR "FAT: Invalid FSINFO signature: " + "0x%08x, 0x%08x (sector = %lu)\n", le32_to_cpu(fsinfo->signature1), le32_to_cpu(fsinfo->signature2), sbi->fsinfo_sector); diff --git a/fs/file.c b/fs/file.c index c5575de0111..5110acb1c9e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -24,6 +24,8 @@ struct fdtable_defer { struct fdtable *next; }; +int sysctl_nr_open __read_mostly = 1024*1024; + /* * We use this list to defer free fdtables that have vmalloced * sets/arrays. By keeping a per-cpu list, we avoid having to embed @@ -147,8 +149,8 @@ static struct fdtable * alloc_fdtable(unsigned int nr) nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); - if (nr > NR_OPEN) - nr = NR_OPEN; + if (nr > sysctl_nr_open) + nr = sysctl_nr_open; fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); if (!fdt) @@ -233,7 +235,7 @@ int expand_files(struct files_struct *files, int nr) if (nr < fdt->max_fds) return 0; /* Can we expand? */ - if (nr >= NR_OPEN) + if (nr >= sysctl_nr_open) return -EMFILE; /* All good, so we try */ diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h index 3c96d6e6397..aaf1fb09863 100644 --- a/fs/freevxfs/vxfs_dir.h +++ b/fs/freevxfs/vxfs_dir.h @@ -41,7 +41,7 @@ * VxFS directory block header. * * This entry is the head of every filesystem block in a directory. - * It is used for free space managment and additionally includes + * It is used for free space management and additionally includes * a hash for speeding up directory search (lookup). * * The hash may be empty and in fact we do not use it all in the diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index 91ccee8723f..2b46064f66b 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h @@ -58,7 +58,7 @@ extern struct inode * vxfs_get_fake_inode(struct super_block *, extern void vxfs_put_fake_inode(struct inode *); extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t); extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t); -extern void vxfs_read_inode(struct inode *); +extern struct inode * vxfs_iget(struct super_block *, ino_t); extern void vxfs_clear_inode(struct inode *); /* vxfs_lookup.c */ diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index 24b5a775ff9..8a5959a61ba 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -54,7 +54,7 @@ const struct inode_operations vxfs_immed_symlink_iops = { }; /* - * Adress space operations for immed files and directories. + * Address space operations for immed files and directories. */ const struct address_space_operations vxfs_immed_aops = { .readpage = vxfs_immed_readpage, diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index d1f7c5b5b3c..ad88d2364bc 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -129,7 +129,7 @@ fail: * Description: * Search the for inode number @ino in the filesystem * described by @sbp. Use the specified inode table (@ilistp). - * Returns the matching VxFS inode on success, else a NULL pointer. + * Returns the matching VxFS inode on success, else an error code. */ static struct vxfs_inode_info * __vxfs_iget(ino_t ino, struct inode *ilistp) @@ -157,12 +157,12 @@ __vxfs_iget(ino_t ino, struct inode *ilistp) } printk(KERN_WARNING "vxfs: error on page %p\n", pp); - return NULL; + return ERR_CAST(pp); fail: printk(KERN_WARNING "vxfs: unable to read inode %ld\n", (unsigned long)ino); vxfs_put_page(pp); - return NULL; + return ERR_PTR(-ENOMEM); } /** @@ -178,7 +178,10 @@ fail: struct vxfs_inode_info * vxfs_stiget(struct super_block *sbp, ino_t ino) { - return __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_stilist); + struct vxfs_inode_info *vip; + + vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_stilist); + return IS_ERR(vip) ? NULL : vip; } /** @@ -282,23 +285,32 @@ vxfs_put_fake_inode(struct inode *ip) } /** - * vxfs_read_inode - fill in inode information - * @ip: inode pointer to fill + * vxfs_iget - get an inode + * @sbp: the superblock to get the inode for + * @ino: the number of the inode to get * * Description: - * vxfs_read_inode reads the disk inode for @ip and fills - * in all relevant fields in @ip. + * vxfs_read_inode creates an inode, reads the disk inode for @ino and fills + * in all relevant fields in the new inode. */ -void -vxfs_read_inode(struct inode *ip) +struct inode * +vxfs_iget(struct super_block *sbp, ino_t ino) { - struct super_block *sbp = ip->i_sb; struct vxfs_inode_info *vip; const struct address_space_operations *aops; - ino_t ino = ip->i_ino; - - if (!(vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist))) - return; + struct inode *ip; + + ip = iget_locked(sbp, ino); + if (!ip) + return ERR_PTR(-ENOMEM); + if (!(ip->i_state & I_NEW)) + return ip; + + vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist); + if (IS_ERR(vip)) { + iget_failed(ip); + return ERR_CAST(vip); + } vxfs_iinit(ip, vip); @@ -323,7 +335,8 @@ vxfs_read_inode(struct inode *ip) } else init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev)); - return; + unlock_new_inode(ip); + return ip; } /** diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index bf86e5444ea..aee049cb9f8 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -213,10 +213,10 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd) lock_kernel(); ino = vxfs_inode_by_name(dip, dp); if (ino) { - ip = iget(dip->i_sb, ino); - if (!ip) { + ip = vxfs_iget(dip->i_sb, ino); + if (IS_ERR(ip)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(ip); } } unlock_kernel(); diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 4f95572d272..1dacda83157 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -60,7 +60,6 @@ static int vxfs_statfs(struct dentry *, struct kstatfs *); static int vxfs_remount(struct super_block *, int *, char *); static const struct super_operations vxfs_super_ops = { - .read_inode = vxfs_read_inode, .clear_inode = vxfs_clear_inode, .put_super = vxfs_put_super, .statfs = vxfs_statfs, @@ -153,6 +152,7 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) struct buffer_head *bp = NULL; u_long bsize; struct inode *root; + int ret = -EINVAL; sbp->s_flags |= MS_RDONLY; @@ -219,7 +219,11 @@ static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) } sbp->s_op = &vxfs_super_ops; - root = iget(sbp, VXFS_ROOT_INO); + root = vxfs_iget(sbp, VXFS_ROOT_INO); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out; + } sbp->s_root = d_alloc_root(root); if (!sbp->s_root) { iput(root); @@ -236,7 +240,7 @@ out_free_ilist: out: brelse(bp); kfree(infp); - return -EINVAL; + return ret; } /* diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 300324bd563..db80ce9eb1d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -284,7 +284,17 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) * soon as the queue becomes uncongested. */ inode->i_state |= I_DIRTY_PAGES; - requeue_io(inode); + if (wbc->nr_to_write <= 0) { + /* + * slice used up: queue for next turn + */ + requeue_io(inode); + } else { + /* + * somehow blocked: retry later + */ + redirty_tail(inode); + } } else { /* * Otherwise fully redirty the inode so that @@ -334,9 +344,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) WARN_ON(inode->i_state & I_WILL_FREE); if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { - struct address_space *mapping = inode->i_mapping; - int ret; - /* * We're skipping this inode because it's locked, and we're not * doing writeback-for-data-integrity. Move it to s_more_io so @@ -345,15 +352,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * completed a full scan of s_io. */ requeue_io(inode); - - /* - * Even if we don't actually write the inode itself here, - * we can at least start some of the data writeout.. - */ - spin_unlock(&inode_lock); - ret = do_writepages(mapping, wbc); - spin_lock(&inode_lock); - return ret; + return 0; } /* @@ -479,8 +478,12 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) iput(inode); cond_resched(); spin_lock(&inode_lock); - if (wbc->nr_to_write <= 0) + if (wbc->nr_to_write <= 0) { + wbc->more_io = 1; break; + } + if (!list_empty(&sb->s_more_io)) + wbc->more_io = 1; } return; /* Leave any unwritten inodes on s_io */ } @@ -512,8 +515,7 @@ writeback_inodes(struct writeback_control *wbc) might_sleep(); spin_lock(&sb_lock); restart: - sb = sb_entry(super_blocks.prev); - for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { + list_for_each_entry_reverse(sb, &super_blocks, s_list) { if (sb_has_dirty_inodes(sb)) { /* we're making our own get_super here */ sb->s_count++; @@ -578,10 +580,8 @@ static void set_sb_syncing(int val) { struct super_block *sb; spin_lock(&sb_lock); - sb = sb_entry(super_blocks.prev); - for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { + list_for_each_entry_reverse(sb, &super_blocks, s_list) sb->s_syncing = val; - } spin_unlock(&sb_lock); } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index db534bcde45..af639807524 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -201,6 +201,55 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) } } +static unsigned len_args(unsigned numargs, struct fuse_arg *args) +{ + unsigned nbytes = 0; + unsigned i; + + for (i = 0; i < numargs; i++) + nbytes += args[i].size; + + return nbytes; +} + +static u64 fuse_get_unique(struct fuse_conn *fc) +{ + fc->reqctr++; + /* zero is special */ + if (fc->reqctr == 0) + fc->reqctr = 1; + + return fc->reqctr; +} + +static void queue_request(struct fuse_conn *fc, struct fuse_req *req) +{ + req->in.h.unique = fuse_get_unique(fc); + req->in.h.len = sizeof(struct fuse_in_header) + + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); + list_add_tail(&req->list, &fc->pending); + req->state = FUSE_REQ_PENDING; + if (!req->waiting) { + req->waiting = 1; + atomic_inc(&fc->num_waiting); + } + wake_up(&fc->waitq); + kill_fasync(&fc->fasync, SIGIO, POLL_IN); +} + +static void flush_bg_queue(struct fuse_conn *fc) +{ + while (fc->active_background < FUSE_MAX_BACKGROUND && + !list_empty(&fc->bg_queue)) { + struct fuse_req *req; + + req = list_entry(fc->bg_queue.next, struct fuse_req, list); + list_del(&req->list); + fc->active_background++; + queue_request(fc, req); + } +} + /* * This function is called when a request is finished. Either a reply * has arrived or it was aborted (and not yet sent) or some error @@ -229,6 +278,8 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) clear_bdi_congested(&fc->bdi, WRITE); } fc->num_background--; + fc->active_background--; + flush_bg_queue(fc); } spin_unlock(&fc->lock); wake_up(&req->waitq); @@ -320,42 +371,6 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) } } -static unsigned len_args(unsigned numargs, struct fuse_arg *args) -{ - unsigned nbytes = 0; - unsigned i; - - for (i = 0; i < numargs; i++) - nbytes += args[i].size; - - return nbytes; -} - -static u64 fuse_get_unique(struct fuse_conn *fc) - { - fc->reqctr++; - /* zero is special */ - if (fc->reqctr == 0) - fc->reqctr = 1; - - return fc->reqctr; -} - -static void queue_request(struct fuse_conn *fc, struct fuse_req *req) -{ - req->in.h.unique = fuse_get_unique(fc); - req->in.h.len = sizeof(struct fuse_in_header) + - len_args(req->in.numargs, (struct fuse_arg *) req->in.args); - list_add_tail(&req->list, &fc->pending); - req->state = FUSE_REQ_PENDING; - if (!req->waiting) { - req->waiting = 1; - atomic_inc(&fc->num_waiting); - } - wake_up(&fc->waitq); - kill_fasync(&fc->fasync, SIGIO, POLL_IN); -} - void request_send(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 1; @@ -375,20 +390,26 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req) spin_unlock(&fc->lock); } +static void request_send_nowait_locked(struct fuse_conn *fc, + struct fuse_req *req) +{ + req->background = 1; + fc->num_background++; + if (fc->num_background == FUSE_MAX_BACKGROUND) + fc->blocked = 1; + if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { + set_bdi_congested(&fc->bdi, READ); + set_bdi_congested(&fc->bdi, WRITE); + } + list_add_tail(&req->list, &fc->bg_queue); + flush_bg_queue(fc); +} + static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) { spin_lock(&fc->lock); if (fc->connected) { - req->background = 1; - fc->num_background++; - if (fc->num_background == FUSE_MAX_BACKGROUND) - fc->blocked = 1; - if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { - set_bdi_congested(&fc->bdi, READ); - set_bdi_congested(&fc->bdi, WRITE); - } - - queue_request(fc, req); + request_send_nowait_locked(fc, req); spin_unlock(&fc->lock); } else { req->out.h.error = -ENOTCONN; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 80d2f5292cf..7fb514b6d85 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -269,12 +269,12 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, req = fuse_get_req(fc); if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); forget_req = fuse_get_req(fc); if (IS_ERR(forget_req)) { fuse_put_request(fc, req); - return ERR_PTR(PTR_ERR(forget_req)); + return ERR_CAST(forget_req); } attr_version = fuse_get_attr_version(fc); @@ -416,6 +416,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, fuse_put_request(fc, forget_req); d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); + fuse_invalidate_attr(dir); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { ff->fh = outopen.fh; @@ -1005,7 +1006,7 @@ static char *read_link(struct dentry *dentry) char *link; if (IS_ERR(req)) - return ERR_PTR(PTR_ERR(req)); + return ERR_CAST(req); link = (char *) __get_free_page(GFP_KERNEL); if (!link) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index bb05d227cf3..676b0bc8a86 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -77,8 +77,8 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) { - dput(req->dentry); - mntput(req->vfsmount); + dput(req->misc.release.dentry); + mntput(req->misc.release.vfsmount); fuse_put_request(fc, req); } @@ -86,7 +86,8 @@ static void fuse_file_put(struct fuse_file *ff) { if (atomic_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - struct fuse_conn *fc = get_fuse_conn(req->dentry->d_inode); + struct inode *inode = req->misc.release.dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); req->end = fuse_release_end; request_send_background(fc, req); kfree(ff); @@ -137,7 +138,7 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode) { struct fuse_req *req = ff->reserved_req; - struct fuse_release_in *inarg = &req->misc.release_in; + struct fuse_release_in *inarg = &req->misc.release.in; inarg->fh = ff->fh; inarg->flags = flags; @@ -153,13 +154,14 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir) struct fuse_file *ff = file->private_data; if (ff) { struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req = ff->reserved_req; fuse_release_fill(ff, get_node_id(inode), file->f_flags, isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); /* Hold vfsmount and dentry until release is finished */ - ff->reserved_req->vfsmount = mntget(file->f_path.mnt); - ff->reserved_req->dentry = dget(file->f_path.dentry); + req->misc.release.vfsmount = mntget(file->f_path.mnt); + req->misc.release.dentry = dget(file->f_path.dentry); spin_lock(&fc->lock); list_del(&ff->write_entry); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 3ab8a3048e8..67aaf6ee38e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -215,7 +215,11 @@ struct fuse_req { /** Data for asynchronous requests */ union { struct fuse_forget_in forget_in; - struct fuse_release_in release_in; + struct { + struct fuse_release_in in; + struct vfsmount *vfsmount; + struct dentry *dentry; + } release; struct fuse_init_in init_in; struct fuse_init_out init_out; struct fuse_read_in read_in; @@ -238,12 +242,6 @@ struct fuse_req { /** File used in the request (or NULL) */ struct fuse_file *ff; - /** vfsmount used in release */ - struct vfsmount *vfsmount; - - /** dentry used in release */ - struct dentry *dentry; - /** Request completion callback */ void (*end)(struct fuse_conn *, struct fuse_req *); @@ -298,6 +296,12 @@ struct fuse_conn { /** Number of requests currently in the background */ unsigned num_background; + /** Number of background requests currently queued for userspace */ + unsigned active_background; + + /** The list of background requests set aside for later queuing */ + struct list_head bg_queue; + /** Pending interrupts */ struct list_head interrupts; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e5e80d1a468..574707409bb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -76,11 +76,6 @@ static void fuse_destroy_inode(struct inode *inode) kmem_cache_free(fuse_inode_cachep, inode); } -static void fuse_read_inode(struct inode *inode) -{ - /* No op */ -} - void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, unsigned long nodeid, u64 nlookup) { @@ -465,6 +460,7 @@ static struct fuse_conn *new_conn(void) INIT_LIST_HEAD(&fc->processing); INIT_LIST_HEAD(&fc->io); INIT_LIST_HEAD(&fc->interrupts); + INIT_LIST_HEAD(&fc->bg_queue); atomic_set(&fc->num_waiting, 0); fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; fc->bdi.unplug_io_fn = default_unplug_io_fn; @@ -514,7 +510,6 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode) static const struct super_operations fuse_super_operations = { .alloc_inode = fuse_alloc_inode, .destroy_inode = fuse_destroy_inode, - .read_inode = fuse_read_inode, .clear_inode = fuse_clear_inode, .drop_inode = generic_delete_inode, .remount_fs = fuse_remount_fs, diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e4effc47abf..e9456ebd3bb 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -932,7 +932,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping) if (!gfs2_is_writeback(ip)) gfs2_trans_add_bh(ip->i_gl, bh, 0); - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); unlock: unlock_page(page); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 57e2ed932ad..c34709512b1 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1498,7 +1498,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); if (dent) { if (IS_ERR(dent)) - return ERR_PTR(PTR_ERR(dent)); + return ERR_CAST(dent); inode = gfs2_inode_lookup(dir->i_sb, be16_to_cpu(dent->de_type), be64_to_cpu(dent->de_inum.no_addr), diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 80e09c50590..7175a4d0643 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -334,7 +334,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_state = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; gl->gl_hash = hash; - gl->gl_owner_pid = 0; + gl->gl_owner_pid = NULL; gl->gl_ip = 0; gl->gl_ops = glops; gl->gl_req_gh = NULL; @@ -399,7 +399,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, INIT_LIST_HEAD(&gh->gh_list); gh->gh_gl = gl; gh->gh_ip = (unsigned long)__builtin_return_address(0); - gh->gh_owner_pid = current->pid; + gh->gh_owner_pid = get_pid(task_pid(current)); gh->gh_state = state; gh->gh_flags = flags; gh->gh_error = 0; @@ -433,6 +433,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * void gfs2_holder_uninit(struct gfs2_holder *gh) { + put_pid(gh->gh_owner_pid); gfs2_glock_put(gh->gh_gl); gh->gh_gl = NULL; gh->gh_ip = 0; @@ -631,7 +632,7 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) wait_on_holder(&gh); gfs2_holder_uninit(&gh); } else { - gl->gl_owner_pid = current->pid; + gl->gl_owner_pid = get_pid(task_pid(current)); gl->gl_ip = (unsigned long)__builtin_return_address(0); spin_unlock(&gl->gl_spin); } @@ -652,7 +653,7 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl) if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { acquired = 0; } else { - gl->gl_owner_pid = current->pid; + gl->gl_owner_pid = get_pid(task_pid(current)); gl->gl_ip = (unsigned long)__builtin_return_address(0); } spin_unlock(&gl->gl_spin); @@ -668,12 +669,17 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl) static void gfs2_glmutex_unlock(struct gfs2_glock *gl) { + struct pid *pid; + spin_lock(&gl->gl_spin); clear_bit(GLF_LOCK, &gl->gl_flags); - gl->gl_owner_pid = 0; + pid = gl->gl_owner_pid; + gl->gl_owner_pid = NULL; gl->gl_ip = 0; run_queue(gl); spin_unlock(&gl->gl_spin); + + put_pid(pid); } /** @@ -1045,7 +1051,7 @@ static int glock_wait_internal(struct gfs2_holder *gh) } static inline struct gfs2_holder * -find_holder_by_owner(struct list_head *head, pid_t pid) +find_holder_by_owner(struct list_head *head, struct pid *pid) { struct gfs2_holder *gh; @@ -1082,7 +1088,7 @@ static void add_to_queue(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; struct gfs2_holder *existing; - BUG_ON(!gh->gh_owner_pid); + BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) BUG(); @@ -1092,12 +1098,14 @@ static void add_to_queue(struct gfs2_holder *gh) if (existing) { print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); - printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); + printk(KERN_INFO "pid : %d\n", + pid_nr(existing->gh_owner_pid)); printk(KERN_INFO "lock type : %d lock state : %d\n", existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); + printk(KERN_INFO "pid : %d\n", + pid_nr(gh->gh_owner_pid)); printk(KERN_INFO "lock type : %d lock state : %d\n", gl->gl_name.ln_type, gl->gl_state); BUG(); @@ -1798,8 +1806,9 @@ static int dump_holder(struct glock_iter *gi, char *str, print_dbg(gi, " %s\n", str); if (gh->gh_owner_pid) { - print_dbg(gi, " owner = %ld ", (long)gh->gh_owner_pid); - gh_owner = find_task_by_pid(gh->gh_owner_pid); + print_dbg(gi, " owner = %ld ", + (long)pid_nr(gh->gh_owner_pid)); + gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); if (gh_owner) print_dbg(gi, "(%s)\n", gh_owner->comm); else @@ -1877,13 +1886,13 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); print_dbg(gi, " gl_state = %u\n", gl->gl_state); if (gl->gl_owner_pid) { - gl_owner = find_task_by_pid(gl->gl_owner_pid); + gl_owner = pid_task(gl->gl_owner_pid, PIDTYPE_PID); if (gl_owner) print_dbg(gi, " gl_owner = pid %d (%s)\n", - gl->gl_owner_pid, gl_owner->comm); + pid_nr(gl->gl_owner_pid), gl_owner->comm); else print_dbg(gi, " gl_owner = %d (ended)\n", - gl->gl_owner_pid); + pid_nr(gl->gl_owner_pid)); } else print_dbg(gi, " gl_owner = -1\n"); print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index b16f604eea9..2f9c6d136b3 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -36,11 +36,13 @@ static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) { struct gfs2_holder *gh; int locked = 0; + struct pid *pid; /* Look in glock's list of holders for one with current task as owner */ spin_lock(&gl->gl_spin); + pid = task_pid(current); list_for_each_entry(gh, &gl->gl_holders, gh_list) { - if (gh->gh_owner_pid == current->pid) { + if (gh->gh_owner_pid == pid) { locked = 1; break; } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 513aaf0dc0a..525dcae352d 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -151,7 +151,7 @@ struct gfs2_holder { struct list_head gh_list; struct gfs2_glock *gh_gl; - pid_t gh_owner_pid; + struct pid *gh_owner_pid; unsigned int gh_state; unsigned gh_flags; @@ -182,7 +182,7 @@ struct gfs2_glock { unsigned int gl_hash; unsigned int gl_demote_state; /* state requested by remote node */ unsigned long gl_demote_time; /* time of first demote request */ - pid_t gl_owner_pid; + struct pid *gl_owner_pid; unsigned long gl_ip; struct list_head gl_holders; struct list_head gl_waiters1; /* HIF_MUTEX */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 728d3169e7b..37725ade3c5 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -240,7 +240,7 @@ fail_put: ip->i_gl->gl_object = NULL; gfs2_glock_put(ip->i_gl); fail: - iput(inode); + iget_failed(inode); return ERR_PTR(error); } diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 38dbe99a30e..ac772b6d9db 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -446,7 +446,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) * so we need to supply one here. It doesn't happen often. */ if (unlikely(page->index)) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); return 0; } diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index b9da62348a8..334c7f85351 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -143,7 +143,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) * have to return that as a(n invalid) pointer to dentry. */ if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); dentry = d_alloc_anon(inode); if (!dentry) { diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 9f71372c175..e87412902be 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -111,7 +111,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); if (inode && IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); if (inode) { struct gfs2_glock *gl = GFS2_I(inode)->i_gl; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index b249e294a95..6fb07d67ca8 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -450,7 +450,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", jd->jd_jid); - /* Aquire the journal lock so we can do recovery */ + /* Acquire the journal lock so we can do recovery */ error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, LM_ST_EXCLUSIVE, diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index f8452a0eab5..4129cdb3f0d 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c @@ -52,9 +52,9 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) rec = (e + b) / 2; len = hfs_brec_lenoff(bnode, rec, &off); keylen = hfs_brec_keylen(bnode, rec); - if (keylen == HFS_BAD_KEYLEN) { + if (keylen == 0) { res = -EINVAL; - goto done; + goto fail; } hfs_bnode_read(bnode, fd->key, off, keylen); cmpval = bnode->tree->keycmp(fd->key, fd->search_key); @@ -71,9 +71,9 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) if (rec != e && e >= 0) { len = hfs_brec_lenoff(bnode, e, &off); keylen = hfs_brec_keylen(bnode, e); - if (keylen == HFS_BAD_KEYLEN) { + if (keylen == 0) { res = -EINVAL; - goto done; + goto fail; } hfs_bnode_read(bnode, fd->key, off, keylen); } @@ -83,6 +83,7 @@ done: fd->keylength = keylen; fd->entryoffset = off + keylen; fd->entrylength = len - keylen; +fail: return res; } @@ -206,7 +207,7 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt) len = hfs_brec_lenoff(bnode, fd->record, &off); keylen = hfs_brec_keylen(bnode, fd->record); - if (keylen == HFS_BAD_KEYLEN) { + if (keylen == 0) { res = -EINVAL; goto out; } diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index 8626ee375ea..878bf25dbc6 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -49,14 +49,14 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) if (retval > node->tree->max_key_len + 2) { printk(KERN_ERR "hfs: keylen %d too large\n", retval); - retval = HFS_BAD_KEYLEN; + retval = 0; } } else { retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; if (retval > node->tree->max_key_len + 1) { printk(KERN_ERR "hfs: keylen %d too large\n", retval); - retval = HFS_BAD_KEYLEN; + retval = 0; } } } diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 110dd3515dc..24cf6fc4302 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -81,15 +81,23 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke goto fail_page; if (!tree->node_count) goto fail_page; - if ((id == HFS_EXT_CNID) && (tree->max_key_len != HFS_MAX_EXT_KEYLEN)) { - printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", - tree->max_key_len); - goto fail_page; - } - if ((id == HFS_CAT_CNID) && (tree->max_key_len != HFS_MAX_CAT_KEYLEN)) { - printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", - tree->max_key_len); - goto fail_page; + switch (id) { + case HFS_EXT_CNID: + if (tree->max_key_len != HFS_MAX_EXT_KEYLEN) { + printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", + tree->max_key_len); + goto fail_page; + } + break; + case HFS_CAT_CNID: + if (tree->max_key_len != HFS_MAX_CAT_KEYLEN) { + printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", + tree->max_key_len); + goto fail_page; + } + break; + default: + BUG(); } tree->node_size_shift = ffs(size) - 1; diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h index c6aae61adfe..6f194d0768b 100644 --- a/fs/hfs/hfs.h +++ b/fs/hfs/hfs.h @@ -28,8 +28,6 @@ #define HFS_MAX_NAMELEN 128 #define HFS_MAX_VALENCE 32767U -#define HFS_BAD_KEYLEN 0xFF - /* Meanings of the drAtrb field of the MDB, * Reference: _Inside Macintosh: Files_ p. 2-61 */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 16cbd902f8b..32de44ed002 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -6,7 +6,7 @@ * This file may be distributed under the terms of the GNU General Public License. * * This file contains hfs_read_super(), some of the super_ops and - * init_module() and cleanup_module(). The remaining super_ops are in + * init_hfs_fs() and exit_hfs_fs(). The remaining super_ops are in * inode.c since they deal with inodes. * * Based on the minix file system code, (C) 1991, 1992 by Linus Torvalds diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 050d29c0a5b..bb5433608a4 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -22,6 +22,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) struct hfs_btree *tree; struct hfs_btree_header_rec *head; struct address_space *mapping; + struct inode *inode; struct page *page; unsigned int size; @@ -33,9 +34,10 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) spin_lock_init(&tree->hash_lock); tree->sb = sb; tree->cnid = id; - tree->inode = iget(sb, id); - if (!tree->inode) + inode = hfsplus_iget(sb, id); + if (IS_ERR(inode)) goto free_tree; + tree->inode = inode; mapping = tree->inode->i_mapping; page = read_mapping_page(mapping, 0, NULL); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 1955ee61251..29683645fa0 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -97,9 +97,9 @@ again: goto fail; } hfs_find_exit(&fd); - inode = iget(dir->i_sb, cnid); - if (!inode) - return ERR_PTR(-EACCES); + inode = hfsplus_iget(dir->i_sb, cnid); + if (IS_ERR(inode)) + return ERR_CAST(inode); if (S_ISREG(inode->i_mode)) HFSPLUS_I(inode).dev = linkid; out: diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index d9f5eda6d03..d72d0a8b25a 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -345,6 +345,9 @@ int hfsplus_parse_options(char *, struct hfsplus_sb_info *); void hfsplus_fill_defaults(struct hfsplus_sb_info *); int hfsplus_show_options(struct seq_file *, struct vfsmount *); +/* super.c */ +struct inode *hfsplus_iget(struct super_block *, unsigned long); + /* tables.c */ extern u16 hfsplus_case_fold_table[]; extern u16 hfsplus_decompose_table[]; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index ecf70dafb64..b0f9ad362d1 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -20,11 +20,18 @@ static void hfsplus_destroy_inode(struct inode *inode); #include "hfsplus_fs.h" -static void hfsplus_read_inode(struct inode *inode) +struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) { struct hfs_find_data fd; struct hfsplus_vh *vhdr; - int err; + struct inode *inode; + long err = -EIO; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); init_MUTEX(&HFSPLUS_I(inode).extents_lock); @@ -41,7 +48,7 @@ static void hfsplus_read_inode(struct inode *inode) hfs_find_exit(&fd); if (err) goto bad_inode; - return; + goto done; } vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; switch(inode->i_ino) { @@ -70,10 +77,13 @@ static void hfsplus_read_inode(struct inode *inode) goto bad_inode; } - return; +done: + unlock_new_inode(inode); + return inode; - bad_inode: - make_bad_inode(inode); +bad_inode: + iget_failed(inode); + return ERR_PTR(err); } static int hfsplus_write_inode(struct inode *inode, int unused) @@ -262,7 +272,6 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations hfsplus_sops = { .alloc_inode = hfsplus_alloc_inode, .destroy_inode = hfsplus_destroy_inode, - .read_inode = hfsplus_read_inode, .write_inode = hfsplus_write_inode, .clear_inode = hfsplus_clear_inode, .put_super = hfsplus_put_super, @@ -278,7 +287,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) struct hfsplus_sb_info *sbi; hfsplus_cat_entry entry; struct hfs_find_data fd; - struct inode *root; + struct inode *root, *inode; struct qstr str; struct nls_table *nls = NULL; int err = -EINVAL; @@ -366,18 +375,25 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) goto cleanup; } - HFSPLUS_SB(sb).alloc_file = iget(sb, HFSPLUS_ALLOC_CNID); - if (!HFSPLUS_SB(sb).alloc_file) { + inode = hfsplus_iget(sb, HFSPLUS_ALLOC_CNID); + if (IS_ERR(inode)) { printk(KERN_ERR "hfs: failed to load allocation file\n"); + err = PTR_ERR(inode); goto cleanup; } + HFSPLUS_SB(sb).alloc_file = inode; /* Load the root directory */ - root = iget(sb, HFSPLUS_ROOT_CNID); + root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); + if (IS_ERR(root)) { + printk(KERN_ERR "hfs: failed to load root directory\n"); + err = PTR_ERR(root); + goto cleanup; + } sb->s_root = d_alloc_root(root); if (!sb->s_root) { - printk(KERN_ERR "hfs: failed to load root directory\n"); iput(root); + err = -ENOMEM; goto cleanup; } sb->s_root->d_op = &hfsplus_dentry_operations; @@ -390,9 +406,12 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) goto cleanup; - HFSPLUS_SB(sb).hidden_dir = iget(sb, be32_to_cpu(entry.folder.id)); - if (!HFSPLUS_SB(sb).hidden_dir) + inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto cleanup; + } + HFSPLUS_SB(sb).hidden_dir = inode; } else hfs_find_exit(&fd); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 8966b050196..2b9b35733aa 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -202,7 +202,7 @@ static char *follow_link(char *link) return ERR_PTR(n); } -static int read_inode(struct inode *ino) +static int hostfs_read_inode(struct inode *ino) { char *name; int err = 0; @@ -233,6 +233,25 @@ static int read_inode(struct inode *ino) return err; } +static struct inode *hostfs_iget(struct super_block *sb) +{ + struct inode *inode; + long ret; + + inode = iget_locked(sb, 0); + if (!inode) + return ERR_PTR(-ENOMEM); + if (inode->i_state & I_NEW) { + ret = hostfs_read_inode(inode); + if (ret < 0) { + iget_failed(inode); + return ERR_PTR(ret); + } + unlock_new_inode(inode); + } + return inode; +} + int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) { /* @@ -303,17 +322,11 @@ static void hostfs_destroy_inode(struct inode *inode) kfree(HOSTFS_I(inode)); } -static void hostfs_read_inode(struct inode *inode) -{ - read_inode(inode); -} - static const struct super_operations hostfs_sbops = { .alloc_inode = hostfs_alloc_inode, .drop_inode = generic_delete_inode, .delete_inode = hostfs_delete_inode, .destroy_inode = hostfs_destroy_inode, - .read_inode = hostfs_read_inode, .statfs = hostfs_statfs, }; @@ -571,10 +584,11 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, char *name; int error, fd; - error = -ENOMEM; - inode = iget(dir->i_sb, 0); - if (inode == NULL) + inode = hostfs_iget(dir->i_sb); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); goto out; + } error = init_inode(inode, dentry); if (error) @@ -615,10 +629,11 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, char *name; int err; - err = -ENOMEM; - inode = iget(ino->i_sb, 0); - if (inode == NULL) + inode = hostfs_iget(ino->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out; + } err = init_inode(inode, dentry); if (err) @@ -736,11 +751,13 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { struct inode *inode; char *name; - int err = -ENOMEM; + int err; - inode = iget(dir->i_sb, 0); - if (inode == NULL) + inode = hostfs_iget(dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out; + } err = init_inode(inode, dentry); if (err) @@ -952,9 +969,11 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sprintf(host_root_path, "%s/%s", root_ino, req_root); - root_inode = iget(sb, 0); - if (root_inode == NULL) + root_inode = hostfs_iget(sb); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); goto out_free; + } err = init_inode(root_inode, NULL); if (err) @@ -972,7 +991,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) if (sb->s_root == NULL) goto out_put; - err = read_inode(root_inode); + err = hostfs_read_inode(root_inode); if (err) { /* No iput in this case because the dput does that for us */ dput(sb->s_root); diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 35c1a9f33f4..53fd0a67c11 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c @@ -285,17 +285,17 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) return err; times[0].tv_sec = atime_ts.tv_sec; - times[0].tv_usec = atime_ts.tv_nsec * 1000; + times[0].tv_usec = atime_ts.tv_nsec / 1000; times[1].tv_sec = mtime_ts.tv_sec; - times[1].tv_usec = mtime_ts.tv_nsec * 1000; + times[1].tv_usec = mtime_ts.tv_nsec / 1000; if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { times[0].tv_sec = attrs->ia_atime.tv_sec; - times[0].tv_usec = attrs->ia_atime.tv_nsec * 1000; + times[0].tv_usec = attrs->ia_atime.tv_nsec / 1000; } if (attrs->ia_valid & HOSTFS_ATTR_MTIME_SET) { times[1].tv_sec = attrs->ia_mtime.tv_sec; - times[1].tv_usec = attrs->ia_mtime.tv_nsec * 1000; + times[1].tv_usec = attrs->ia_mtime.tv_nsec / 1000; } if (fd >= 0) { diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index affb7412125..a1e1f0f61aa 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -155,6 +155,20 @@ static void hppfs_read_inode(struct inode *ino) ino->i_blocks = proc_ino->i_blocks; } +static struct inode *hppfs_iget(struct super_block *sb) +{ + struct inode *inode; + + inode = iget_locked(sb, 0); + if (!inode) + return ERR_PTR(-ENOMEM); + if (inode->i_state & I_NEW) { + hppfs_read_inode(inode); + unlock_new_inode(inode); + } + return inode; +} + static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, struct nameidata *nd) { @@ -190,9 +204,11 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, if(IS_ERR(proc_dentry)) return(proc_dentry); - inode = iget(ino->i_sb, 0); - if(inode == NULL) + inode = hppfs_iget(ino->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); goto out_dput; + } err = init_inode(inode, proc_dentry); if(err) @@ -652,7 +668,6 @@ static void hppfs_destroy_inode(struct inode *inode) static const struct super_operations hppfs_sbops = { .alloc_inode = hppfs_alloc_inode, .destroy_inode = hppfs_destroy_inode, - .read_inode = hppfs_read_inode, .delete_inode = hppfs_delete_inode, .statfs = hppfs_statfs, }; @@ -745,9 +760,11 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) sb->s_magic = HPPFS_SUPER_MAGIC; sb->s_op = &hppfs_sbops; - root_inode = iget(sb, 0); - if(root_inode == NULL) + root_inode = hppfs_iget(sb); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); goto out; + } err = init_inode(root_inode, proc_sb->s_root); if(err) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 09ee07f0266..3b3cc28cdef 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -768,7 +768,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) case Opt_mode: if (match_octal(&args[0], &option)) goto bad_val; - pconfig->mode = option & 0777U; + pconfig->mode = option & 01777U; break; case Opt_size: { diff --git a/fs/inode.c b/fs/inode.c index 276ffd6b6fd..53245ffcf93 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -928,8 +928,6 @@ EXPORT_SYMBOL(ilookup); * @set: callback used to initialize a new struct inode * @data: opaque data pointer to pass to @test and @set * - * This is iget() without the read_inode() portion of get_new_inode(). - * * iget5_locked() uses ifind() to search for the inode specified by @hashval * and @data in the inode cache and if present it is returned with an increased * reference count. This is a generalized version of iget_locked() for file @@ -966,8 +964,6 @@ EXPORT_SYMBOL(iget5_locked); * @sb: super block of file system * @ino: inode number to get * - * This is iget() without the read_inode() portion of get_new_inode_fast(). - * * iget_locked() uses ifind_fast() to search for the inode specified by @ino in * the inode cache and if present it is returned with an increased reference * count. This is for file systems where the inode number is sufficient for diff --git a/fs/inotify.c b/fs/inotify.c index 2c5b9215287..690e72595e6 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -168,20 +168,14 @@ static void set_dentry_child_flags(struct inode *inode, int watched) struct dentry *child; list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { - if (!child->d_inode) { - WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); + if (!child->d_inode) continue; - } + spin_lock(&child->d_lock); - if (watched) { - WARN_ON(child->d_flags & - DCACHE_INOTIFY_PARENT_WATCHED); + if (watched) child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; - } else { - WARN_ON(!(child->d_flags & - DCACHE_INOTIFY_PARENT_WATCHED)); - child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED; - } + else + child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } } @@ -253,7 +247,6 @@ void inotify_d_instantiate(struct dentry *entry, struct inode *inode) if (!inode) return; - WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); spin_lock(&entry->d_lock); parent = entry->d_parent; if (parent->d_inode && inotify_inode_watched(parent->d_inode)) @@ -627,6 +620,7 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, struct inode *inode, u32 mask) { int ret = 0; + int newly_watched; /* don't allow invalid bits: we don't want flags set */ mask &= IN_ALL_EVENTS | IN_ONESHOT; @@ -653,12 +647,18 @@ s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch, */ watch->inode = igrab(inode); - if (!inotify_inode_watched(inode)) - set_dentry_child_flags(inode, 1); - /* Add the watch to the handle's and the inode's list */ + newly_watched = !inotify_inode_watched(inode); list_add(&watch->h_list, &ih->watches); list_add(&watch->i_list, &inode->inotify_watches); + /* + * Set child flags _after_ adding the watch, so there is no race + * windows where newly instantiated children could miss their parent's + * watched flag. + */ + if (newly_watched) + set_dentry_child_flags(inode, 1); + out: mutex_unlock(&ih->mutex); mutex_unlock(&inode->inotify_mutex); diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 5e009331c01..a336c9709f3 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -79,6 +79,7 @@ struct inotify_device { atomic_t count; /* reference count */ struct user_struct *user; /* user who opened this dev */ struct inotify_handle *ih; /* inotify handle */ + struct fasync_struct *fa; /* async notification */ unsigned int queue_size; /* size of the queue (bytes) */ unsigned int event_count; /* number of pending events */ unsigned int max_events; /* maximum number of events */ @@ -248,6 +249,19 @@ inotify_dev_get_event(struct inotify_device *dev) } /* + * inotify_dev_get_last_event - return the last event in the given dev's queue + * + * Caller must hold dev->ev_mutex. + */ +static inline struct inotify_kernel_event * +inotify_dev_get_last_event(struct inotify_device *dev) +{ + if (list_empty(&dev->events)) + return NULL; + return list_entry(dev->events.prev, struct inotify_kernel_event, list); +} + +/* * inotify_dev_queue_event - event handler registered with core inotify, adds * a new event to the given device * @@ -273,7 +287,7 @@ static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask, put_inotify_watch(w); /* final put */ /* coalescing: drop this event if it is a dupe of the previous */ - last = inotify_dev_get_event(dev); + last = inotify_dev_get_last_event(dev); if (last && last->event.mask == mask && last->event.wd == wd && last->event.cookie == cookie) { const char *lastname = last->name; @@ -302,6 +316,7 @@ static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask, dev->queue_size += sizeof(struct inotify_event) + kevent->event.len; list_add_tail(&kevent->list, &dev->events); wake_up_interruptible(&dev->wq); + kill_fasync(&dev->fa, SIGIO, POLL_IN); out: mutex_unlock(&dev->ev_mutex); @@ -490,6 +505,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, return ret; } +static int inotify_fasync(int fd, struct file *file, int on) +{ + struct inotify_device *dev = file->private_data; + + return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO; +} + static int inotify_release(struct inode *ignored, struct file *file) { struct inotify_device *dev = file->private_data; @@ -502,6 +524,9 @@ static int inotify_release(struct inode *ignored, struct file *file) inotify_dev_event_dequeue(dev); mutex_unlock(&dev->ev_mutex); + if (file->f_flags & FASYNC) + inotify_fasync(-1, file, 0); + /* free this device: the put matching the get in inotify_init() */ put_inotify_dev(dev); @@ -530,6 +555,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, static const struct file_operations inotify_fops = { .poll = inotify_poll, .read = inotify_read, + .fasync = inotify_fasync, .release = inotify_release, .unlocked_ioctl = inotify_ioctl, .compat_ioctl = inotify_ioctl, @@ -577,6 +603,7 @@ asmlinkage long sys_inotify_init(void) goto out_free_dev; } dev->ih = ih; + dev->fa = NULL; filp->f_op = &inotify_fops; filp->f_path.mnt = mntget(inotify_mnt); diff --git a/fs/ioctl.c b/fs/ioctl.c index c2a773e8620..683002fefa5 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -12,12 +12,24 @@ #include <linux/fs.h> #include <linux/security.h> #include <linux/module.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/ioctls.h> -static long do_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) +/** + * vfs_ioctl - call filesystem specific ioctl methods + * @filp: [in] open file to invoke ioctl method on + * @cmd: [in] ioctl command to execute + * @arg: [in/out] command-specific argument for ioctl + * + * Invokes filesystem specific ->unlocked_ioctl, if one exists; otherwise + * invokes * filesystem specific ->ioctl method. If neither method exists, + * returns -ENOTTY. + * + * Returns 0 on success, -errno on error. + */ +long vfs_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { int error = -ENOTTY; @@ -40,123 +52,148 @@ static long do_ioctl(struct file *filp, unsigned int cmd, return error; } +static int ioctl_fibmap(struct file *filp, int __user *p) +{ + struct address_space *mapping = filp->f_mapping; + int res, block; + + /* do we support this mess? */ + if (!mapping->a_ops->bmap) + return -EINVAL; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + res = get_user(block, p); + if (res) + return res; + lock_kernel(); + res = mapping->a_ops->bmap(mapping, block); + unlock_kernel(); + return put_user(res, p); +} + static int file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - int error; - int block; - struct inode * inode = filp->f_path.dentry->d_inode; + struct inode *inode = filp->f_path.dentry->d_inode; int __user *p = (int __user *)arg; switch (cmd) { - case FIBMAP: - { - struct address_space *mapping = filp->f_mapping; - int res; - /* do we support this mess? */ - if (!mapping->a_ops->bmap) - return -EINVAL; - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - if ((error = get_user(block, p)) != 0) - return error; + case FIBMAP: + return ioctl_fibmap(filp, p); + case FIGETBSZ: + return put_user(inode->i_sb->s_blocksize, p); + case FIONREAD: + return put_user(i_size_read(inode) - filp->f_pos, p); + } + return vfs_ioctl(filp, cmd, arg); +} + +static int ioctl_fionbio(struct file *filp, int __user *argp) +{ + unsigned int flag; + int on, error; + + error = get_user(on, argp); + if (error) + return error; + flag = O_NONBLOCK; +#ifdef __sparc__ + /* SunOS compatibility item. */ + if (O_NONBLOCK != O_NDELAY) + flag |= O_NDELAY; +#endif + if (on) + filp->f_flags |= flag; + else + filp->f_flags &= ~flag; + return error; +} + +static int ioctl_fioasync(unsigned int fd, struct file *filp, + int __user *argp) +{ + unsigned int flag; + int on, error; + + error = get_user(on, argp); + if (error) + return error; + flag = on ? FASYNC : 0; + + /* Did FASYNC state change ? */ + if ((flag ^ filp->f_flags) & FASYNC) { + if (filp->f_op && filp->f_op->fasync) { lock_kernel(); - res = mapping->a_ops->bmap(mapping, block); + error = filp->f_op->fasync(fd, filp, on); unlock_kernel(); - return put_user(res, p); - } - case FIGETBSZ: - return put_user(inode->i_sb->s_blocksize, p); - case FIONREAD: - return put_user(i_size_read(inode) - filp->f_pos, p); + } else + error = -ENOTTY; } + if (error) + return error; - return do_ioctl(filp, cmd, arg); + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; + return error; } /* * When you add any new common ioctls to the switches above and below * please update compat_sys_ioctl() too. * - * vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. + * do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d. * It's just a simple helper for sys_ioctl and compat_sys_ioctl. */ -int vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg) +int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, + unsigned long arg) { - unsigned int flag; - int on, error = 0; + int error = 0; + int __user *argp = (int __user *)arg; switch (cmd) { - case FIOCLEX: - set_close_on_exec(fd, 1); - break; - - case FIONCLEX: - set_close_on_exec(fd, 0); - break; - - case FIONBIO: - if ((error = get_user(on, (int __user *)arg)) != 0) - break; - flag = O_NONBLOCK; -#ifdef __sparc__ - /* SunOS compatibility item. */ - if(O_NONBLOCK != O_NDELAY) - flag |= O_NDELAY; -#endif - if (on) - filp->f_flags |= flag; - else - filp->f_flags &= ~flag; - break; - - case FIOASYNC: - if ((error = get_user(on, (int __user *)arg)) != 0) - break; - flag = on ? FASYNC : 0; - - /* Did FASYNC state change ? */ - if ((flag ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - lock_kernel(); - error = filp->f_op->fasync(fd, filp, on); - unlock_kernel(); - } - else error = -ENOTTY; - } - if (error != 0) - break; - - if (on) - filp->f_flags |= FASYNC; - else - filp->f_flags &= ~FASYNC; - break; - - case FIOQSIZE: - if (S_ISDIR(filp->f_path.dentry->d_inode->i_mode) || - S_ISREG(filp->f_path.dentry->d_inode->i_mode) || - S_ISLNK(filp->f_path.dentry->d_inode->i_mode)) { - loff_t res = inode_get_bytes(filp->f_path.dentry->d_inode); - error = copy_to_user((loff_t __user *)arg, &res, sizeof(res)) ? -EFAULT : 0; - } - else - error = -ENOTTY; - break; - default: - if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) - error = file_ioctl(filp, cmd, arg); - else - error = do_ioctl(filp, cmd, arg); - break; + case FIOCLEX: + set_close_on_exec(fd, 1); + break; + + case FIONCLEX: + set_close_on_exec(fd, 0); + break; + + case FIONBIO: + error = ioctl_fionbio(filp, argp); + break; + + case FIOASYNC: + error = ioctl_fioasync(fd, filp, argp); + break; + + case FIOQSIZE: + if (S_ISDIR(filp->f_path.dentry->d_inode->i_mode) || + S_ISREG(filp->f_path.dentry->d_inode->i_mode) || + S_ISLNK(filp->f_path.dentry->d_inode->i_mode)) { + loff_t res = + inode_get_bytes(filp->f_path.dentry->d_inode); + error = copy_to_user((loff_t __user *)arg, &res, + sizeof(res)) ? -EFAULT : 0; + } else + error = -ENOTTY; + break; + default: + if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + error = file_ioctl(filp, cmd, arg); + else + error = vfs_ioctl(filp, cmd, arg); + break; } return error; } asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) { - struct file * filp; + struct file *filp; int error = -EBADF; int fput_needed; @@ -168,7 +205,7 @@ asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) if (error) goto out_fput; - error = vfs_ioctl(filp, fd, cmd, arg); + error = do_vfs_ioctl(filp, fd, cmd, arg); out_fput: fput_light(filp, fput_needed); out: diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 29f9753ae5e..bb219138331 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -26,11 +26,9 @@ isofs_export_iget(struct super_block *sb, if (block == 0) return ERR_PTR(-ESTALE); inode = isofs_iget(sb, block, offset); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) - || (generation && inode->i_generation != generation)) - { + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -110,8 +108,10 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) parent_inode = isofs_iget(child_inode->i_sb, parent_block, parent_offset); - if (parent_inode == NULL) { - rv = ERR_PTR(-EACCES); + if (IS_ERR(parent_inode)) { + rv = ERR_CAST(parent_inode); + if (rv != ERR_PTR(-ENOMEM)) + rv = ERR_PTR(-EACCES); goto out; } diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 09e3d306e96..875d37fb6c7 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -54,7 +54,7 @@ static void isofs_put_super(struct super_block *sb) return; } -static void isofs_read_inode(struct inode *); +static int isofs_read_inode(struct inode *); static int isofs_statfs (struct dentry *, struct kstatfs *); static struct kmem_cache *isofs_inode_cachep; @@ -107,7 +107,6 @@ static int isofs_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations isofs_sops = { .alloc_inode = isofs_alloc_inode, .destroy_inode = isofs_destroy_inode, - .read_inode = isofs_read_inode, .put_super = isofs_put_super, .statfs = isofs_statfs, .remount_fs = isofs_remount, @@ -552,7 +551,7 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) int joliet_level = 0; int iso_blknum, block; int orig_zonesize; - int table; + int table, error = -EINVAL; unsigned int vol_desc_start; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); @@ -810,6 +809,8 @@ root_found: * we then decide whether to use the Joliet descriptor. */ inode = isofs_iget(s, sbi->s_firstdatazone, 0); + if (IS_ERR(inode)) + goto out_no_root; /* * If this disk has both Rock Ridge and Joliet on it, then we @@ -829,6 +830,8 @@ root_found: "ISOFS: changing to secondary root\n"); iput(inode); inode = isofs_iget(s, sbi->s_firstdatazone, 0); + if (IS_ERR(inode)) + goto out_no_root; } } @@ -842,8 +845,6 @@ root_found: sbi->s_joliet_level = joliet_level; /* check the root inode */ - if (!inode) - goto out_no_root; if (!inode->i_op) goto out_bad_root; @@ -876,11 +877,14 @@ root_found: */ out_bad_root: printk(KERN_WARNING "%s: root inode not initialized\n", __func__); - goto out_iput; -out_no_root: - printk(KERN_WARNING "%s: get root inode failed\n", __func__); out_iput: iput(inode); + goto out_no_inode; +out_no_root: + error = PTR_ERR(inode); + if (error != -ENOMEM) + printk(KERN_WARNING "%s: get root inode failed\n", __func__); +out_no_inode: #ifdef CONFIG_JOLIET if (sbi->s_nls_iocharset) unload_nls(sbi->s_nls_iocharset); @@ -908,7 +912,7 @@ out_freesbi: kfree(opt.iocharset); kfree(sbi); s->s_fs_info = NULL; - return -EINVAL; + return error; } static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) @@ -930,7 +934,7 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) /* * Get a set of blocks; filling in buffer_heads if already allocated * or getblk() if they are not. Returns the number of blocks inserted - * (0 == error.) + * (-ve == error.) */ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, struct buffer_head **bh, unsigned long nblocks) @@ -940,11 +944,12 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, unsigned int firstext; unsigned long nextblk, nextoff; long iblock = (long)iblock_s; - int section, rv; + int section, rv, error; struct iso_inode_info *ei = ISOFS_I(inode); lock_kernel(); + error = -EIO; rv = 0; if (iblock < 0 || iblock != iblock_s) { printk(KERN_DEBUG "%s: block number too large\n", __func__); @@ -983,8 +988,10 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, offset += sect_size; ninode = isofs_iget(inode->i_sb, nextblk, nextoff); - if (!ninode) + if (IS_ERR(ninode)) { + error = PTR_ERR(ninode); goto abort; + } firstext = ISOFS_I(ninode)->i_first_extent; sect_size = ISOFS_I(ninode)->i_section_size >> ISOFS_BUFFER_BITS(ninode); nextblk = ISOFS_I(ninode)->i_next_section_block; @@ -1015,9 +1022,10 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, rv++; } + error = 0; abort: unlock_kernel(); - return rv; + return rv != 0 ? rv : error; } /* @@ -1026,12 +1034,15 @@ abort: static int isofs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { + int ret; + if (create) { printk(KERN_DEBUG "%s: Kernel tries to allocate a block\n", __func__); return -EROFS; } - return isofs_get_blocks(inode, iblock, &bh_result, 1) ? 0 : -EIO; + ret = isofs_get_blocks(inode, iblock, &bh_result, 1); + return ret < 0 ? ret : 0; } static int isofs_bmap(struct inode *inode, sector_t block) @@ -1186,7 +1197,7 @@ out_toomany: goto out; } -static void isofs_read_inode(struct inode *inode) +static int isofs_read_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct isofs_sb_info *sbi = ISOFS_SB(sb); @@ -1199,6 +1210,7 @@ static void isofs_read_inode(struct inode *inode) unsigned int de_len; unsigned long offset; struct iso_inode_info *ei = ISOFS_I(inode); + int ret = -EIO; block = ei->i_iget5_block; bh = sb_bread(inode->i_sb, block); @@ -1216,6 +1228,7 @@ static void isofs_read_inode(struct inode *inode) tmpde = kmalloc(de_len, GFP_KERNEL); if (tmpde == NULL) { printk(KERN_INFO "%s: out of memory\n", __func__); + ret = -ENOMEM; goto fail; } memcpy(tmpde, bh->b_data + offset, frag1); @@ -1259,8 +1272,10 @@ static void isofs_read_inode(struct inode *inode) ei->i_section_size = isonum_733(de->size); if (de->flags[-high_sierra] & 0x80) { - if(isofs_read_level3_size(inode)) + ret = isofs_read_level3_size(inode); + if (ret < 0) goto fail; + ret = -EIO; } else { ei->i_next_section_block = 0; ei->i_next_section_offset = 0; @@ -1346,16 +1361,16 @@ static void isofs_read_inode(struct inode *inode) /* XXX - parse_rock_ridge_inode() had already set i_rdev. */ init_special_inode(inode, inode->i_mode, inode->i_rdev); + ret = 0; out: kfree(tmpde); if (bh) brelse(bh); - return; + return ret; out_badread: printk(KERN_WARNING "ISOFS: unable to read i-node block\n"); fail: - make_bad_inode(inode); goto out; } @@ -1394,9 +1409,10 @@ struct inode *isofs_iget(struct super_block *sb, unsigned long hashval; struct inode *inode; struct isofs_iget5_callback_data data; + long ret; if (offset >= 1ul << sb->s_blocksize_bits) - return NULL; + return ERR_PTR(-EINVAL); data.block = block; data.offset = offset; @@ -1406,9 +1422,17 @@ struct inode *isofs_iget(struct super_block *sb, inode = iget5_locked(sb, hashval, &isofs_iget5_test, &isofs_iget5_set, &data); - if (inode && (inode->i_state & I_NEW)) { - sb->s_op->read_inode(inode); - unlock_new_inode(inode); + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) { + ret = isofs_read_inode(inode); + if (ret < 0) { + iget_failed(inode); + inode = ERR_PTR(ret); + } else { + unlock_new_inode(inode); + } } return inode; diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index e2b4dad39ca..344b247bc29 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -179,9 +179,9 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam inode = NULL; if (found) { inode = isofs_iget(dir->i_sb, block, offset); - if (!inode) { + if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(inode); } } unlock_kernel(); diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index f3a1db3098d..6bd48f0a704 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c @@ -474,8 +474,10 @@ repeat: isofs_iget(inode->i_sb, ISOFS_I(inode)->i_first_extent, 0); - if (!reloc) + if (IS_ERR(reloc)) { + ret = PTR_ERR(reloc); goto out; + } inode->i_mode = reloc->i_mode; inode->i_nlink = reloc->i_nlink; inode->i_uid = reloc->i_uid; diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 5d14243499d..3943a8905eb 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1457,7 +1457,7 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, * and don't attempt to make any other journal updates. */ -void __journal_abort_hard(journal_t *journal) +static void __journal_abort_hard(journal_t *journal) { transaction_t *transaction; char b[BDEVNAME_SIZE]; diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index c5d9694b6a2..2b8edf4d6ea 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -354,7 +354,7 @@ static int do_one_pass(journal_t *journal, struct buffer_head * obh; struct buffer_head * nbh; - cond_resched(); /* We're under lock_kernel() */ + cond_resched(); /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 921680663fa..d36356f7d22 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -397,7 +397,7 @@ static int do_one_pass(journal_t *journal, struct buffer_head * obh; struct buffer_head * nbh; - cond_resched(); /* We're under lock_kernel() */ + cond_resched(); /* If we already know where to stop the log traversal, * check right now that we haven't gone past the end of diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 787e392ffd4..f948f7e6ec8 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -101,10 +101,10 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, ino = fd->ino; up(&dir_f->sem); if (ino) { - inode = iget(dir_i->i_sb, ino); - if (!inode) { + inode = jffs2_iget(dir_i->i_sb, ino); + if (IS_ERR(inode)) { printk(KERN_WARNING "iget() failed for ino #%u\n", ino); - return (ERR_PTR(-EIO)); + return ERR_CAST(inode); } } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index ee192af0b8b..e26ea78c789 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -226,16 +226,23 @@ void jffs2_clear_inode (struct inode *inode) jffs2_do_clear_inode(c, f); } -void jffs2_read_inode (struct inode *inode) +struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) { struct jffs2_inode_info *f; struct jffs2_sb_info *c; struct jffs2_raw_inode latest_node; union jffs2_device_node jdev; + struct inode *inode; dev_t rdev = 0; int ret; - D1(printk(KERN_DEBUG "jffs2_read_inode(): inode->i_ino == %lu\n", inode->i_ino)); + D1(printk(KERN_DEBUG "jffs2_iget(): ino == %lu\n", ino)); + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; f = JFFS2_INODE_INFO(inode); c = JFFS2_SB_INFO(inode->i_sb); @@ -246,9 +253,9 @@ void jffs2_read_inode (struct inode *inode) ret = jffs2_do_read_inode(c, f, inode->i_ino, &latest_node); if (ret) { - make_bad_inode(inode); up(&f->sem); - return; + iget_failed(inode); + return ERR_PTR(ret); } inode->i_mode = jemode_to_cpu(latest_node.mode); inode->i_uid = je16_to_cpu(latest_node.uid); @@ -299,19 +306,14 @@ void jffs2_read_inode (struct inode *inode) if (f->metadata->size != sizeof(jdev.old) && f->metadata->size != sizeof(jdev.new)) { printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size); - up(&f->sem); - jffs2_do_clear_inode(c, f); - make_bad_inode(inode); - return; + goto error_io; } D1(printk(KERN_DEBUG "Reading device numbers from flash\n")); - if (jffs2_read_dnode(c, f, f->metadata, (char *)&jdev, 0, f->metadata->size) < 0) { + ret = jffs2_read_dnode(c, f, f->metadata, (char *)&jdev, 0, f->metadata->size); + if (ret < 0) { /* Eep */ printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino); - up(&f->sem); - jffs2_do_clear_inode(c, f); - make_bad_inode(inode); - return; + goto error; } if (f->metadata->size == sizeof(jdev.old)) rdev = old_decode_dev(je16_to_cpu(jdev.old)); @@ -331,6 +333,16 @@ void jffs2_read_inode (struct inode *inode) up(&f->sem); D1(printk(KERN_DEBUG "jffs2_read_inode() returning\n")); + unlock_new_inode(inode); + return inode; + +error_io: + ret = -EIO; +error: + up(&f->sem); + jffs2_do_clear_inode(c, f); + iget_failed(inode); + return ERR_PTR(ret); } void jffs2_dirty_inode(struct inode *inode) @@ -518,15 +530,16 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) if ((ret = jffs2_do_mount_fs(c))) goto out_inohash; - ret = -EINVAL; - D1(printk(KERN_DEBUG "jffs2_do_fill_super(): Getting root inode\n")); - root_i = iget(sb, 1); - if (is_bad_inode(root_i)) { + root_i = jffs2_iget(sb, 1); + if (IS_ERR(root_i)) { D1(printk(KERN_WARNING "get root inode failed\n")); - goto out_root_i; + ret = PTR_ERR(root_i); + goto out_root; } + ret = -ENOMEM; + D1(printk(KERN_DEBUG "jffs2_do_fill_super(): d_alloc_root()\n")); sb->s_root = d_alloc_root(root_i); if (!sb->s_root) @@ -542,6 +555,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent) out_root_i: iput(root_i); +out_root: jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); if (jffs2_blocks_use_vmalloc(c)) @@ -611,9 +625,9 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c, jffs2_do_unlink() would need the alloc_sem and we have it. Just iget() it, and if read_inode() is necessary that's OK. */ - inode = iget(OFNI_BS_2SFFJ(c), inum); - if (!inode) - return ERR_PTR(-ENOMEM); + inode = jffs2_iget(OFNI_BS_2SFFJ(c), inum); + if (IS_ERR(inode)) + return ERR_CAST(inode); } if (is_bad_inode(inode)) { printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d\n", diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index bf64686cf09..1b10d259409 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -175,7 +175,7 @@ extern const struct inode_operations jffs2_symlink_inode_operations; /* fs.c */ int jffs2_setattr (struct dentry *, struct iattr *); int jffs2_do_setattr (struct inode *, struct iattr *); -void jffs2_read_inode (struct inode *); +struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_clear_inode (struct inode *); void jffs2_dirty_inode(struct inode *inode); struct inode *jffs2_new_inode (struct inode *dir_i, int mode, diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index fb89ab5e1d5..e512a93d624 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -742,7 +742,7 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref * are not obsolete. * * Of course, this optimization only makes sense in case - * of NAND flashes (or other flashes whith + * of NAND flashes (or other flashes with * !jffs2_can_mark_obsolete()), since on NOR flashes * nodes are marked obsolete physically. * diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index ffa447511e6..4677355996c 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -65,7 +65,6 @@ static const struct super_operations jffs2_super_operations = { .alloc_inode = jffs2_alloc_inode, .destroy_inode =jffs2_destroy_inode, - .read_inode = jffs2_read_inode, .put_super = jffs2_put_super, .write_super = jffs2_write_super, .statfs = jffs2_statfs, diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c index ecdf18d0486..776f13cbf2b 100644 --- a/fs/jffs2/write.c +++ b/fs/jffs2/write.c @@ -177,7 +177,7 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2 void *hold_err = fn->raw; /* Release the full_dnode which is now useless, and return */ jffs2_free_full_dnode(fn); - return ERR_PTR(PTR_ERR(hold_err)); + return ERR_CAST(hold_err); } fn->ofs = je32_to_cpu(ri->offset); fn->size = je32_to_cpu(ri->dsize); @@ -313,7 +313,7 @@ struct jffs2_full_dirent *jffs2_write_dirent(struct jffs2_sb_info *c, struct jff void *hold_err = fd->raw; /* Release the full_dirent which is now useless, and return */ jffs2_free_full_dirent(fd); - return ERR_PTR(PTR_ERR(hold_err)); + return ERR_CAST(hold_err); } if (retried) { diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 4672013802e..210339784b5 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -31,11 +31,21 @@ #include "jfs_debug.h" -void jfs_read_inode(struct inode *inode) +struct inode *jfs_iget(struct super_block *sb, unsigned long ino) { - if (diRead(inode)) { - make_bad_inode(inode); - return; + struct inode *inode; + int ret; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ret = diRead(inode); + if (ret < 0) { + iget_failed(inode); + return ERR_PTR(ret); } if (S_ISREG(inode->i_mode)) { @@ -55,6 +65,8 @@ void jfs_read_inode(struct inode *inode) inode->i_op = &jfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); } + unlock_new_inode(inode); + return inode; } /* diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 8e2cf2cde18..95a6a11425e 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -24,7 +24,7 @@ extern struct inode *ialloc(struct inode *, umode_t); extern int jfs_fsync(struct file *, struct dentry *, int); extern int jfs_ioctl(struct inode *, struct file *, unsigned int, unsigned long); -extern void jfs_read_inode(struct inode *); +extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); extern int jfs_write_inode(struct inode*, int); extern void jfs_delete_inode(struct inode *); diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 1543906a2e0..a000aaa7513 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -3965,7 +3965,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) * xtTruncate_pmap() * * function: - * Perform truncate to zero lenghth for deleted file, leaving the + * Perform truncate to zero length for deleted file, leaving the * the xtree and working map untouched. This allows the file to * be accessed via open file handles, while the delete of the file * is committed to disk. diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index f8718de3505..403cfc24c6f 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1462,12 +1462,10 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc } } - ip = iget(dip->i_sb, inum); - if (ip == NULL || is_bad_inode(ip)) { + ip = jfs_iget(dip->i_sb, inum); + if (IS_ERR(ip)) { jfs_err("jfs_lookup: iget failed on inum %d", (uint) inum); - if (ip) - iput(ip); - return ERR_PTR(-EACCES); + return ERR_CAST(ip); } dentry = d_splice_alias(ip, dentry); @@ -1485,12 +1483,11 @@ static struct inode *jfs_nfs_get_inode(struct super_block *sb, if (ino == 0) return ERR_PTR(-ESTALE); - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); + inode = jfs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + if (generation && inode->i_generation != generation) { iput(inode); return ERR_PTR(-ESTALE); } @@ -1521,17 +1518,14 @@ struct dentry *jfs_get_parent(struct dentry *dentry) parent_ino = le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot); - inode = iget(sb, parent_ino); - if (inode) { - if (is_bad_inode(inode)) { + inode = jfs_iget(sb, parent_ino); + if (IS_ERR(inode)) { + parent = ERR_CAST(inode); + } else { + parent = d_alloc_anon(inode); + if (!parent) { + parent = ERR_PTR(-ENOMEM); iput(inode); - parent = ERR_PTR(-EACCES); - } else { - parent = d_alloc_anon(inode); - if (!parent) { - parent = ERR_PTR(-ENOMEM); - iput(inode); - } } } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 70a14001c98..50ea6545173 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -414,7 +414,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; int rc; s64 newLVSize = 0; - int flag; + int flag, ret = -EINVAL; jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); @@ -461,8 +461,10 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) * Initialize direct-mapping inode/address-space */ inode = new_inode(sb); - if (inode == NULL) + if (inode == NULL) { + ret = -ENOMEM; goto out_kfree; + } inode->i_ino = 0; inode->i_nlink = 1; inode->i_size = sb->s_bdev->bd_inode->i_size; @@ -494,9 +496,11 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = JFS_SUPER_MAGIC; - inode = iget(sb, ROOT_I); - if (!inode || is_bad_inode(inode)) + inode = jfs_iget(sb, ROOT_I); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); goto out_no_root; + } sb->s_root = d_alloc_root(inode); if (!sb->s_root) goto out_no_root; @@ -536,7 +540,7 @@ out_kfree: if (sbi->nls_tab) unload_nls(sbi->nls_tab); kfree(sbi); - return -EINVAL; + return ret; } static void jfs_write_super_lockfs(struct super_block *sb) @@ -726,7 +730,6 @@ out: static const struct super_operations jfs_super_operations = { .alloc_inode = jfs_alloc_inode, .destroy_inode = jfs_destroy_inode, - .read_inode = jfs_read_inode, .dirty_inode = jfs_dirty_inode, .write_inode = jfs_write_inode, .delete_inode = jfs_delete_inode, diff --git a/fs/libfs.c b/fs/libfs.c index 6e68b700958..5523bde9638 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -341,13 +341,10 @@ int simple_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { if (!PageUptodate(page)) { - if (to - from != PAGE_CACHE_SIZE) { - void *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr, 0, from); - memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } + if (to - from != PAGE_CACHE_SIZE) + zero_user_segments(page, + 0, from, + to, PAGE_CACHE_SIZE); } return 0; } diff --git a/fs/locks.c b/fs/locks.c index 8b8388eca05..49354b9c7dc 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -125,6 +125,7 @@ #include <linux/syscalls.h> #include <linux/time.h> #include <linux/rcupdate.h> +#include <linux/pid_namespace.h> #include <asm/semaphore.h> #include <asm/uaccess.h> @@ -185,6 +186,7 @@ void locks_init_lock(struct file_lock *fl) fl->fl_fasync = NULL; fl->fl_owner = NULL; fl->fl_pid = 0; + fl->fl_nspid = NULL; fl->fl_file = NULL; fl->fl_flags = 0; fl->fl_type = 0; @@ -553,6 +555,8 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) { list_add(&fl->fl_link, &file_lock_list); + fl->fl_nspid = get_pid(task_tgid(current)); + /* insert into file's list */ fl->fl_next = *pos; *pos = fl; @@ -584,6 +588,11 @@ static void locks_delete_lock(struct file_lock **thisfl_p) if (fl->fl_ops && fl->fl_ops->fl_remove) fl->fl_ops->fl_remove(fl); + if (fl->fl_nspid) { + put_pid(fl->fl_nspid); + fl->fl_nspid = NULL; + } + locks_wake_up_blocks(fl); locks_free_lock(fl); } @@ -634,33 +643,6 @@ static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *s return (locks_conflict(caller_fl, sys_fl)); } -static int interruptible_sleep_on_locked(wait_queue_head_t *fl_wait, int timeout) -{ - int result = 0; - DECLARE_WAITQUEUE(wait, current); - - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(fl_wait, &wait); - if (timeout == 0) - schedule(); - else - result = schedule_timeout(timeout); - if (signal_pending(current)) - result = -ERESTARTSYS; - remove_wait_queue(fl_wait, &wait); - __set_current_state(TASK_RUNNING); - return result; -} - -static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *waiter, int time) -{ - int result; - locks_insert_block(blocker, waiter); - result = interruptible_sleep_on_locked(&waiter->fl_wait, time); - __locks_delete_block(waiter); - return result; -} - void posix_test_lock(struct file *filp, struct file_lock *fl) { @@ -673,55 +655,67 @@ posix_test_lock(struct file *filp, struct file_lock *fl) if (posix_locks_conflict(fl, cfl)) break; } - if (cfl) + if (cfl) { __locks_copy_lock(fl, cfl); - else + if (cfl->fl_nspid) + fl->fl_pid = pid_nr_ns(cfl->fl_nspid, + task_active_pid_ns(current)); + } else fl->fl_type = F_UNLCK; unlock_kernel(); return; } - EXPORT_SYMBOL(posix_test_lock); -/* This function tests for deadlock condition before putting a process to - * sleep. The detection scheme is no longer recursive. Recursive was neat, - * but dangerous - we risked stack corruption if the lock data was bad, or - * if the recursion was too deep for any other reason. +/* + * Deadlock detection: + * + * We attempt to detect deadlocks that are due purely to posix file + * locks. * - * We rely on the fact that a task can only be on one lock's wait queue - * at a time. When we find blocked_task on a wait queue we can re-search - * with blocked_task equal to that queue's owner, until either blocked_task - * isn't found, or blocked_task is found on a queue owned by my_task. + * We assume that a task can be waiting for at most one lock at a time. + * So for any acquired lock, the process holding that lock may be + * waiting on at most one other lock. That lock in turns may be held by + * someone waiting for at most one other lock. Given a requested lock + * caller_fl which is about to wait for a conflicting lock block_fl, we + * follow this chain of waiters to ensure we are not about to create a + * cycle. * - * Note: the above assumption may not be true when handling lock requests - * from a broken NFS client. But broken NFS clients have a lot more to - * worry about than proper deadlock detection anyway... --okir + * Since we do this before we ever put a process to sleep on a lock, we + * are ensured that there is never a cycle; that is what guarantees that + * the while() loop in posix_locks_deadlock() eventually completes. * - * However, the failure of this assumption (also possible in the case of - * multiple tasks sharing the same open file table) also means there's no - * guarantee that the loop below will terminate. As a hack, we give up - * after a few iterations. + * Note: the above assumption may not be true when handling lock + * requests from a broken NFS client. It may also fail in the presence + * of tasks (such as posix threads) sharing the same open file table. + * + * To handle those cases, we just bail out after a few iterations. */ #define MAX_DEADLK_ITERATIONS 10 +/* Find a lock that the owner of the given block_fl is blocking on. */ +static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl) +{ + struct file_lock *fl; + + list_for_each_entry(fl, &blocked_list, fl_link) { + if (posix_same_owner(fl, block_fl)) + return fl->fl_next; + } + return NULL; +} + static int posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { - struct file_lock *fl; int i = 0; -next_task: - if (posix_same_owner(caller_fl, block_fl)) - return 1; - list_for_each_entry(fl, &blocked_list, fl_link) { - if (posix_same_owner(fl, block_fl)) { - if (i++ > MAX_DEADLK_ITERATIONS) - return 0; - fl = fl->fl_next; - block_fl = fl; - goto next_task; - } + while ((block_fl = what_owner_is_waiting_for(block_fl))) { + if (i++ > MAX_DEADLK_ITERATIONS) + return 0; + if (posix_same_owner(caller_fl, block_fl)) + return 1; } return 0; } @@ -1256,7 +1250,10 @@ restart: if (break_time == 0) break_time++; } - error = locks_block_on_timeout(flock, new_fl, break_time); + locks_insert_block(flock, new_fl); + error = wait_event_interruptible_timeout(new_fl->fl_wait, + !new_fl->fl_next, break_time); + __locks_delete_block(new_fl); if (error >= 0) { if (error == 0) time_out_leases(inode); @@ -2084,6 +2081,12 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, int id, char *pfx) { struct inode *inode = NULL; + unsigned int fl_pid; + + if (fl->fl_nspid) + fl_pid = pid_nr_ns(fl->fl_nspid, task_active_pid_ns(current)); + else + fl_pid = fl->fl_pid; if (fl->fl_file != NULL) inode = fl->fl_file->f_path.dentry->d_inode; @@ -2124,16 +2127,16 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, } if (inode) { #ifdef WE_CAN_BREAK_LSLK_NOW - seq_printf(f, "%d %s:%ld ", fl->fl_pid, + seq_printf(f, "%d %s:%ld ", fl_pid, inode->i_sb->s_id, inode->i_ino); #else /* userspace relies on this representation of dev_t ;-( */ - seq_printf(f, "%d %02x:%02x:%ld ", fl->fl_pid, + seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); #endif } else { - seq_printf(f, "%d <none>:0 ", fl->fl_pid); + seq_printf(f, "%d <none>:0 ", fl_pid); } if (IS_POSIX(fl)) { if (fl->fl_end == OFFSET_MAX) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index bf4cd316af8..84f6242ba6f 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -18,7 +18,6 @@ #include <linux/highuid.h> #include <linux/vfs.h> -static void minix_read_inode(struct inode * inode); static int minix_write_inode(struct inode * inode, int wait); static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); static int minix_remount (struct super_block * sb, int * flags, char * data); @@ -96,7 +95,6 @@ static void destroy_inodecache(void) static const struct super_operations minix_sops = { .alloc_inode = minix_alloc_inode, .destroy_inode = minix_destroy_inode, - .read_inode = minix_read_inode, .write_inode = minix_write_inode, .delete_inode = minix_delete_inode, .put_super = minix_put_super, @@ -149,6 +147,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) unsigned long i, block; struct inode *root_inode; struct minix_sb_info *sbi; + int ret = -EINVAL; sbi = kzalloc(sizeof(struct minix_sb_info), GFP_KERNEL); if (!sbi) @@ -246,10 +245,13 @@ static int minix_fill_super(struct super_block *s, void *data, int silent) /* set up enough so that it can read an inode */ s->s_op = &minix_sops; - root_inode = iget(s, MINIX_ROOT_INO); - if (!root_inode || is_bad_inode(root_inode)) + root_inode = minix_iget(s, MINIX_ROOT_INO); + if (IS_ERR(root_inode)) { + ret = PTR_ERR(root_inode); goto out_no_root; + } + ret = -ENOMEM; s->s_root = d_alloc_root(root_inode); if (!s->s_root) goto out_iput; @@ -290,6 +292,7 @@ out_freemap: goto out_release; out_no_map: + ret = -ENOMEM; if (!silent) printk("MINIX-fs: can't allocate map\n"); goto out_release; @@ -316,7 +319,7 @@ out_bad_sb: out: s->s_fs_info = NULL; kfree(sbi); - return -EINVAL; + return ret; } static int minix_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -409,7 +412,7 @@ void minix_set_inode(struct inode *inode, dev_t rdev) /* * The minix V1 function to read an inode. */ -static void V1_minix_read_inode(struct inode * inode) +static struct inode *V1_minix_iget(struct inode *inode) { struct buffer_head * bh; struct minix_inode * raw_inode; @@ -418,8 +421,8 @@ static void V1_minix_read_inode(struct inode * inode) raw_inode = minix_V1_raw_inode(inode->i_sb, inode->i_ino, &bh); if (!raw_inode) { - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } inode->i_mode = raw_inode->i_mode; inode->i_uid = (uid_t)raw_inode->i_uid; @@ -435,12 +438,14 @@ static void V1_minix_read_inode(struct inode * inode) minix_inode->u.i1_data[i] = raw_inode->i_zone[i]; minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); brelse(bh); + unlock_new_inode(inode); + return inode; } /* * The minix V2 function to read an inode. */ -static void V2_minix_read_inode(struct inode * inode) +static struct inode *V2_minix_iget(struct inode *inode) { struct buffer_head * bh; struct minix2_inode * raw_inode; @@ -449,8 +454,8 @@ static void V2_minix_read_inode(struct inode * inode) raw_inode = minix_V2_raw_inode(inode->i_sb, inode->i_ino, &bh); if (!raw_inode) { - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } inode->i_mode = raw_inode->i_mode; inode->i_uid = (uid_t)raw_inode->i_uid; @@ -468,17 +473,27 @@ static void V2_minix_read_inode(struct inode * inode) minix_inode->u.i2_data[i] = raw_inode->i_zone[i]; minix_set_inode(inode, old_decode_dev(raw_inode->i_zone[0])); brelse(bh); + unlock_new_inode(inode); + return inode; } /* * The global function to read an inode. */ -static void minix_read_inode(struct inode * inode) +struct inode *minix_iget(struct super_block *sb, unsigned long ino) { + struct inode *inode; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + if (INODE_VERSION(inode) == MINIX_V1) - V1_minix_read_inode(inode); + return V1_minix_iget(inode); else - V2_minix_read_inode(inode); + return V2_minix_iget(inode); } /* diff --git a/fs/minix/minix.h b/fs/minix/minix.h index ac5d3a75cb0..326edfe9610 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -45,6 +45,7 @@ struct minix_sb_info { unsigned short s_version; }; +extern struct inode *minix_iget(struct super_block *, unsigned long); extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **); extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); extern struct inode * minix_new_inode(const struct inode * dir, int * error); diff --git a/fs/minix/namei.c b/fs/minix/namei.c index f4aa7a93904..102241bc9c7 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -54,10 +54,9 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st ino = minix_inode_by_name(dentry); if (ino) { - inode = iget(dir->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); + inode = minix_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } d_add(dentry, inode); return NULL; diff --git a/fs/mpage.c b/fs/mpage.c index d54f8f89722..5df564366f3 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -276,9 +276,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, } if (first_hole != blocks_per_page) { - zero_user_page(page, first_hole << blkbits, - PAGE_CACHE_SIZE - (first_hole << blkbits), - KM_USER0); + zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE); if (first_hole == 0) { SetPageUptodate(page); unlock_page(page); @@ -571,8 +569,7 @@ page_is_mapped: if (page->index > end_index || !offset) goto confused; - zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, - KM_USER0); + zero_user_segment(page, offset, PAGE_CACHE_SIZE); } /* diff --git a/fs/namei.c b/fs/namei.c index 73e2e665817..241cff42365 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2188,6 +2188,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { + fsnotify_link_count(dentry->d_inode); d_delete(dentry); } @@ -2360,7 +2361,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de error = dir->i_op->link(old_dentry, dir, new_dentry); mutex_unlock(&old_dentry->d_inode->i_mutex); if (!error) - fsnotify_create(dir, new_dentry); + fsnotify_link(dir, old_dentry->d_inode, new_dentry); return error; } diff --git a/fs/namespace.c b/fs/namespace.c index 61bf376e29e..e9c10cd01e1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -25,18 +25,21 @@ #include <linux/security.h> #include <linux/mount.h> #include <linux/ramfs.h> +#include <linux/log2.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include "pnode.h" #include "internal.h" +#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) +#define HASH_SIZE (1UL << HASH_SHIFT) + /* spinlock for vfsmount related operations, inplace of dcache_lock */ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); static int event; static struct list_head *mount_hashtable __read_mostly; -static int hash_mask __read_mostly, hash_bits __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static struct rw_semaphore namespace_sem; @@ -48,8 +51,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); - tmp = tmp + (tmp >> hash_bits); - return tmp & hash_mask; + tmp = tmp + (tmp >> HASH_SHIFT); + return tmp & (HASH_SIZE - 1); } struct vfsmount *alloc_vfsmnt(const char *name) @@ -1813,9 +1816,7 @@ static void __init init_mount_tree(void) void __init mnt_init(void) { - struct list_head *d; - unsigned int nr_hash; - int i; + unsigned u; int err; init_rwsem(&namespace_sem); @@ -1828,35 +1829,11 @@ void __init mnt_init(void) if (!mount_hashtable) panic("Failed to allocate mount hash table\n"); - /* - * Find the power-of-two list-heads that can fit into the allocation.. - * We don't guarantee that "sizeof(struct list_head)" is necessarily - * a power-of-two. - */ - nr_hash = PAGE_SIZE / sizeof(struct list_head); - hash_bits = 0; - do { - hash_bits++; - } while ((nr_hash >> hash_bits) != 0); - hash_bits--; + printk("Mount-cache hash table entries: %lu\n", HASH_SIZE); + + for (u = 0; u < HASH_SIZE; u++) + INIT_LIST_HEAD(&mount_hashtable[u]); - /* - * Re-calculate the actual number of entries and the mask - * from the number of bits we can fit. - */ - nr_hash = 1UL << hash_bits; - hash_mask = nr_hash - 1; - - printk("Mount-cache hash table entries: %d\n", nr_hash); - - /* And initialize the newly allocated array */ - d = mount_hashtable; - i = nr_hash; - do { - INIT_LIST_HEAD(d); - d++; - i--; - } while (i); err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index e1cb70c643f..eff1f18d034 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -987,7 +987,7 @@ static struct file_system_type ncp_fs_type = { static int __init init_ncp_fs(void) { int err; - DPRINTK("ncpfs: init_module called\n"); + DPRINTK("ncpfs: init_ncp_fs called\n"); err = init_inodecache(); if (err) @@ -1004,7 +1004,7 @@ out1: static void __exit exit_ncp_fs(void) { - DPRINTK("ncpfs: cleanup_module called\n"); + DPRINTK("ncpfs: exit_ncp_fs called\n"); unregister_filesystem(&ncp_fs_type); destroy_inodecache(); } diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index a94473d3072..5d8dcb9ee32 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c @@ -50,10 +50,6 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area, pos = vmf->pgoff << PAGE_SHIFT; count = PAGE_SIZE; - if ((unsigned long)vmf->virtual_address + PAGE_SIZE > area->vm_end) { - WARN_ON(1); /* shouldn't happen? */ - count = area->vm_end - (unsigned long)vmf->virtual_address; - } /* what we can read in one go */ bufsize = NCP_SERVER(inode)->buffer_size; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index e6242cdbaf9..fae97196daa 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -96,7 +96,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) inode = nfs_fhget(sb, mntfh, fsinfo.fattr); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); } error = nfs_superblock_set_dummy_root(sb, inode); @@ -266,7 +266,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) inode = nfs_fhget(sb, mntfh, &fattr); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); } error = nfs_superblock_set_dummy_root(sb, inode); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 8fd6dfbe1bc..3d7d9631e12 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -79,7 +79,7 @@ void nfs_readdata_release(void *data) static int nfs_return_empty_page(struct page *page) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); SetPageUptodate(page); unlock_page(page); return 0; @@ -103,10 +103,10 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) pglen = PAGE_CACHE_SIZE - base; for (;;) { if (remainder <= pglen) { - zero_user_page(*pages, base, remainder, KM_USER0); + zero_user(*pages, base, remainder); break; } - zero_user_page(*pages, base, pglen, KM_USER0); + zero_user(*pages, base, pglen); pages++; remainder -= pglen; pglen = PAGE_CACHE_SIZE; @@ -130,7 +130,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, return PTR_ERR(new); } if (len < PAGE_CACHE_SIZE) - zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); + zero_user_segment(page, len, PAGE_CACHE_SIZE); nfs_list_add_request(new, &one_request); if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) @@ -532,7 +532,7 @@ readpage_async_filler(void *data, struct page *page) goto out_error; if (len < PAGE_CACHE_SIZE) - zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); + zero_user_segment(page, len, PAGE_CACHE_SIZE); nfs_pageio_add_request(desc->pgio, new); return 0; out_error: diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 522efff3e2c..b144b1957dd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -665,9 +665,7 @@ zero_page: * then we need to zero any uninitalised data. */ if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE && !PageUptodate(req->wb_page)) - zero_user_page(req->wb_page, req->wb_bytes, - PAGE_CACHE_SIZE - req->wb_bytes, - KM_USER0); + zero_user_segment(req->wb_page, req->wb_bytes, PAGE_CACHE_SIZE); return req; } diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 21928056e35..d13403e3362 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -11,8 +11,6 @@ #include <linux/nfsd/nfsd.h> #include <linux/nfsd/export.h> -#define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) - int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) { struct exp_flavor_info *f; @@ -69,10 +67,12 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) ret = set_current_groups(cred.cr_group_info); put_group_info(cred.cr_group_info); if ((cred.cr_uid)) { - cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; + current->cap_effective = + cap_drop_nfsd_set(current->cap_effective); } else { - cap_t(current->cap_effective) |= (CAP_NFSD_MASK & - current->cap_permitted); + current->cap_effective = + cap_raise_nfsd_set(current->cap_effective, + current->cap_permitted); } return ret; } diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 79b4bf81296..346570f6d84 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1218,13 +1218,13 @@ static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type, struct svc_export *exp; struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp); if (IS_ERR(ek)) - return ERR_PTR(PTR_ERR(ek)); + return ERR_CAST(ek); exp = exp_get_by_name(clp, ek->ek_mnt, ek->ek_dentry, reqp); cache_put(&ek->h, &svc_expkey_cache); if (IS_ERR(exp)) - return ERR_PTR(PTR_ERR(exp)); + return ERR_CAST(exp); return exp; } diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index ad87cb01299..00e9ccde8e4 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -87,13 +87,17 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) /* Check for the current buffer head overflowing. */ if (unlikely(file_ofs + bh->b_size > init_size)) { int ofs; + void *kaddr; ofs = 0; if (file_ofs < init_size) ofs = init_size - file_ofs; local_irq_save(flags); - zero_user_page(page, bh_offset(bh) + ofs, - bh->b_size - ofs, KM_BIO_SRC_IRQ); + kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); + memset(kaddr + bh_offset(bh) + ofs, 0, + bh->b_size - ofs); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); local_irq_restore(flags); } } else { @@ -334,7 +338,7 @@ handle_hole: bh->b_blocknr = -1UL; clear_buffer_mapped(bh); handle_zblock: - zero_user_page(page, i * blocksize, blocksize, KM_USER0); + zero_user(page, i * blocksize, blocksize); if (likely(!err)) set_buffer_uptodate(bh); } while (i++, iblock++, (bh = bh->b_this_page) != head); @@ -410,7 +414,7 @@ retry_readpage: /* Is the page fully outside i_size? (truncate in progress) */ if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); ntfs_debug("Read outside i_size - truncated?"); goto done; } @@ -459,7 +463,7 @@ retry_readpage: * ok to ignore the compressed flag here. */ if (unlikely(page->index > 0)) { - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); goto done; } if (!NInoAttr(ni)) @@ -788,8 +792,7 @@ lock_retry_remap: if (err == -ENOENT || lcn == LCN_ENOENT) { bh->b_blocknr = -1; clear_buffer_dirty(bh); - zero_user_page(page, bh_offset(bh), blocksize, - KM_USER0); + zero_user(page, bh_offset(bh), blocksize); set_buffer_uptodate(bh); err = 0; continue; @@ -1414,8 +1417,7 @@ retry_writepage: if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) { /* The page straddles i_size. */ unsigned int ofs = i_size & ~PAGE_CACHE_MASK; - zero_user_page(page, ofs, PAGE_CACHE_SIZE - ofs, - KM_USER0); + zero_user_segment(page, ofs, PAGE_CACHE_SIZE); } /* Handle mst protected attributes. */ if (NInoMstProtected(ni)) diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index d1619d05eb2..33ff314cc50 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -565,7 +565,7 @@ int ntfs_read_compressed_block(struct page *page) if (xpage >= max_page) { kfree(bhs); kfree(pages); - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_CACHE_SIZE); ntfs_debug("Compressed read outside i_size - truncated?"); SetPageUptodate(page); unlock_page(page); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 6cd08dfdc2e..3c5550cd11d 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -607,8 +607,8 @@ do_next_page: ntfs_submit_bh_for_read(bh); *wait_bh++ = bh; } else { - zero_user_page(page, bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } } @@ -683,9 +683,8 @@ map_buffer_cached: ntfs_submit_bh_for_read(bh); *wait_bh++ = bh; } else { - zero_user_page(page, - bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } } @@ -703,8 +702,8 @@ map_buffer_cached: */ if (bh_end <= pos || bh_pos >= end) { if (!buffer_uptodate(bh)) { - zero_user_page(page, bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } mark_buffer_dirty(bh); @@ -743,8 +742,7 @@ map_buffer_cached: if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh)) { - zero_user_page(page, bh_offset(bh), blocksize, - KM_USER0); + zero_user(page, bh_offset(bh), blocksize); set_buffer_uptodate(bh); } continue; @@ -868,8 +866,8 @@ rl_not_mapped_enoent: if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh)) { - zero_user_page(page, bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } continue; @@ -1128,8 +1126,8 @@ rl_not_mapped_enoent: if (likely(bh_pos < initialized_size)) ofs = initialized_size - bh_pos; - zero_user_page(page, bh_offset(bh) + ofs, - blocksize - ofs, KM_USER0); + zero_user_segment(page, bh_offset(bh) + ofs, + blocksize); } } else /* if (unlikely(!buffer_uptodate(bh))) */ err = -EIO; @@ -1269,8 +1267,8 @@ rl_not_mapped_enoent: if (PageUptodate(page)) set_buffer_uptodate(bh); else { - zero_user_page(page, bh_offset(bh), - blocksize, KM_USER0); + zero_user(page, bh_offset(bh), + blocksize); set_buffer_uptodate(bh); } } @@ -1330,7 +1328,7 @@ err_out: len = PAGE_CACHE_SIZE; if (len > bytes) len = bytes; - zero_user_page(*pages, 0, len, KM_USER0); + zero_user(*pages, 0, len); } goto out; } @@ -1451,7 +1449,7 @@ err_out: len = PAGE_CACHE_SIZE; if (len > bytes) len = bytes; - zero_user_page(*pages, 0, len, KM_USER0); + zero_user(*pages, 0, len); } goto out; } diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index e38e402e410..cd0be3f5c3c 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -85,8 +85,7 @@ static inline void *ntfs_malloc_nofs_nofail(unsigned long size) static inline void ntfs_free(void *addr) { - if (likely(((unsigned long)addr < VMALLOC_START) || - ((unsigned long)addr >= VMALLOC_END ))) { + if (!is_vmalloc_addr(addr)) { kfree(addr); /* free_page((unsigned long)addr); */ return; diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index e6df06ac640..447206eb5c2 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -3338,7 +3338,7 @@ static int ocfs2_insert_path(struct inode *inode, if (insert->ins_split != SPLIT_NONE) { /* * We could call ocfs2_insert_at_leaf() for some types - * of splits, but it's easier to just let one seperate + * of splits, but it's easier to just let one separate * function sort it all out. */ ocfs2_split_record(inode, left_path, right_path, @@ -5670,7 +5670,7 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, mlog_errno(ret); if (zero) - zero_user_page(page, from, to - from, KM_USER0); + zero_user_segment(page, from, to); /* * Need to set the buffers we zero'd into uptodate diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index bc7b4cbbe8e..82243127eeb 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -307,7 +307,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) * XXX sys_readahead() seems to get that wrong? */ if (start >= i_size_read(inode)) { - zero_user_page(page, 0, PAGE_SIZE, KM_USER0); + zero_user(page, 0, PAGE_SIZE); SetPageUptodate(page); ret = 0; goto out_alloc; @@ -869,7 +869,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, if (block_start >= to) break; - zero_user_page(page, block_start, bh->b_size, KM_USER0); + zero_user(page, block_start, bh->b_size); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -1034,7 +1034,7 @@ static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to start = max(from, block_start); end = min(to, block_end); - zero_user_page(page, start, end - start, KM_USER0); + zero_user_segment(page, start, end); set_buffer_uptodate(bh); } diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 6b0107f2134..e280833ceb9 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1215,7 +1215,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, down_write(&oi->ip_alloc_sem); /* - * Prepare for worst case allocation scenario of two seperate + * Prepare for worst case allocation scenario of two separate * extents. */ if (alloc == 2) diff --git a/fs/ocfs2/ocfs1_fs_compat.h b/fs/ocfs2/ocfs1_fs_compat.h index 0b499bccec5..dfb313bda5d 100644 --- a/fs/ocfs2/ocfs1_fs_compat.h +++ b/fs/ocfs2/ocfs1_fs_compat.h @@ -77,7 +77,7 @@ struct ocfs1_disk_lock { /*00*/ __u32 curr_master; __u8 file_lock; - __u8 compat_pad[3]; /* Not in orignal definition. Used to + __u8 compat_pad[3]; /* Not in original definition. Used to make the already existing alignment explicit */ __u64 last_write_time; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 7e397e2c25d..72c198a004d 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -646,7 +646,7 @@ bail: * sync-data inodes." * * Note: OCFS2 already does this differently for metadata vs data - * allocations, as those bitmaps are seperate and undo access is never + * allocations, as those bitmaps are separate and undo access is never * called on a metadata group descriptor. */ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 6b7ff161894..d17b4fd204e 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -38,6 +38,8 @@ struct op_inode_info { union op_inode_data u; }; +static struct inode *openprom_iget(struct super_block *sb, ino_t ino); + static inline struct op_inode_info *OP_I(struct inode *inode) { return container_of(inode, struct op_inode_info, vfs_inode); @@ -226,10 +228,10 @@ static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry return ERR_PTR(-ENOENT); found: - inode = iget(dir->i_sb, ino); + inode = openprom_iget(dir->i_sb, ino); mutex_unlock(&op_mutex); - if (!inode) - return ERR_PTR(-EINVAL); + if (IS_ERR(inode)) + return ERR_CAST(inode); ent_oi = OP_I(inode); ent_oi->type = ent_type; ent_oi->u = ent_data; @@ -348,14 +350,23 @@ static void openprom_destroy_inode(struct inode *inode) kmem_cache_free(op_inode_cachep, OP_I(inode)); } -static void openprom_read_inode(struct inode * inode) +static struct inode *openprom_iget(struct super_block *sb, ino_t ino) { - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - if (inode->i_ino == OPENPROM_ROOT_INO) { - inode->i_op = &openprom_inode_operations; - inode->i_fop = &openprom_operations; - inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + struct inode *inode; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + if (inode->i_ino == OPENPROM_ROOT_INO) { + inode->i_op = &openprom_inode_operations; + inode->i_fop = &openprom_operations; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + } + unlock_new_inode(inode); } + return inode; } static int openprom_remount(struct super_block *sb, int *flags, char *data) @@ -367,7 +378,6 @@ static int openprom_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations openprom_sops = { .alloc_inode = openprom_alloc_inode, .destroy_inode = openprom_destroy_inode, - .read_inode = openprom_read_inode, .statfs = simple_statfs, .remount_fs = openprom_remount, }; @@ -376,6 +386,7 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) { struct inode *root_inode; struct op_inode_info *oi; + int ret; s->s_flags |= MS_NOATIME; s->s_blocksize = 1024; @@ -383,9 +394,11 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) s->s_magic = OPENPROM_SUPER_MAGIC; s->s_op = &openprom_sops; s->s_time_gran = 1; - root_inode = iget(s, OPENPROM_ROOT_INO); - if (!root_inode) + root_inode = openprom_iget(s, OPENPROM_ROOT_INO); + if (IS_ERR(root_inode)) { + ret = PTR_ERR(root_inode); goto out_no_root; + } oi = OP_I(root_inode); oi->type = op_inode_node; @@ -393,13 +406,15 @@ static int openprom_fill_super(struct super_block *s, void *data, int silent) s->s_root = d_alloc_root(root_inode); if (!s->s_root) - goto out_no_root; + goto out_no_root_dentry; return 0; +out_no_root_dentry: + iput(root_inode); + ret = -ENOMEM; out_no_root: printk("openprom_fill_super: get root inode failed\n"); - iput(root_inode); - return -ENOMEM; + return ret; } static int openprom_get_sb(struct file_system_type *fs_type, diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig index a99acd8de35..cb5f0a3f1b0 100644 --- a/fs/partitions/Kconfig +++ b/fs/partitions/Kconfig @@ -198,7 +198,7 @@ config LDM_DEBUG config SGI_PARTITION bool "SGI partition support" if PARTITION_ADVANCED - default y if (SGI_IP22 || SGI_IP27 || ((MACH_JAZZ || SNI_RM) && !CPU_LITTLE_ENDIAN)) + default y if DEFAULT_SGI_PARTITION help Say Y here if you would like to be able to read the hard disk partition table format used by SGI machines. diff --git a/fs/pnode.c b/fs/pnode.c index 89940f243fc..05ba692bc54 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -83,6 +83,8 @@ void change_mnt_propagation(struct vfsmount *mnt, int type) mnt->mnt_master = NULL; if (type == MS_UNBINDABLE) mnt->mnt_flags |= MNT_UNBINDABLE; + else + mnt->mnt_flags &= ~MNT_UNBINDABLE; } } diff --git a/fs/proc/array.c b/fs/proc/array.c index b380313092b..6ba2746e451 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -281,14 +281,23 @@ static inline char *task_sig(struct task_struct *p, char *buffer) return buffer; } +static char *render_cap_t(const char *header, kernel_cap_t *a, char *buffer) +{ + unsigned __capi; + + buffer += sprintf(buffer, "%s", header); + CAP_FOR_EACH_U32(__capi) { + buffer += sprintf(buffer, "%08x", + a->cap[(_LINUX_CAPABILITY_U32S-1) - __capi]); + } + return buffer + sprintf(buffer, "\n"); +} + static inline char *task_cap(struct task_struct *p, char *buffer) { - return buffer + sprintf(buffer, "CapInh:\t%016x\n" - "CapPrm:\t%016x\n" - "CapEff:\t%016x\n", - cap_t(p->cap_inheritable), - cap_t(p->cap_permitted), - cap_t(p->cap_effective)); + buffer = render_cap_t("CapInh:\t", &p->cap_inheritable, buffer); + buffer = render_cap_t("CapPrm:\t", &p->cap_permitted, buffer); + return render_cap_t("CapEff:\t", &p->cap_effective, buffer); } static inline char *task_context_switch_counts(struct task_struct *p, diff --git a/fs/proc/base.c b/fs/proc/base.c index 33537487f5a..c59852b3878 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -88,10 +88,6 @@ * in /proc for a task before it execs a suid executable. */ - -/* Worst case buffer size needed for holding an integer. */ -#define PROC_NUMBUF 13 - struct pid_entry { char *name; int len; @@ -787,7 +783,7 @@ out_no_task: } #endif -static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { case 0: @@ -935,42 +931,6 @@ static const struct file_operations proc_oom_adjust_operations = { .write = oom_adjust_write, }; -#ifdef CONFIG_MMU -static ssize_t clear_refs_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct task_struct *task; - char buffer[PROC_NUMBUF], *end; - struct mm_struct *mm; - - memset(buffer, 0, sizeof(buffer)); - if (count > sizeof(buffer) - 1) - count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - return -EFAULT; - if (!simple_strtol(buffer, &end, 0)) - return -EINVAL; - if (*end == '\n') - end++; - task = get_proc_task(file->f_path.dentry->d_inode); - if (!task) - return -ESRCH; - mm = get_task_mm(task); - if (mm) { - clear_refs_smap(mm); - mmput(mm); - } - put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; -} - -static struct file_operations proc_clear_refs_operations = { - .write = clear_refs_write, -}; -#endif - #ifdef CONFIG_AUDITSYSCALL #define TMPBUFLEN 21 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, @@ -2289,9 +2249,10 @@ static const struct pid_entry tgid_base_stuff[] = { LNK("exe", exe), REG("mounts", S_IRUGO, mounts), REG("mountstats", S_IRUSR, mountstats), -#ifdef CONFIG_MMU +#ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), REG("smaps", S_IRUGO, smaps), + REG("pagemap", S_IRUSR, pagemap), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), @@ -2360,7 +2321,8 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - shrink_dcache_parent(dentry); + if (!(current->flags & PF_EXITING)) + shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } @@ -2617,9 +2579,10 @@ static const struct pid_entry tid_base_stuff[] = { LNK("root", root), LNK("exe", exe), REG("mounts", S_IRUGO, mounts), -#ifdef CONFIG_MMU +#ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, clear_refs), REG("smaps", S_IRUGO, smaps), + REG("pagemap", S_IRUSR, pagemap), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, attr_dir), diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 1a551d92e1d..6ecf6396f07 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -73,11 +73,6 @@ static void proc_delete_inode(struct inode *inode) struct vfsmount *proc_mnt; -static void proc_read_inode(struct inode * inode) -{ - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -} - static struct kmem_cache * proc_inode_cachep; static struct inode *proc_alloc_inode(struct super_block *sb) @@ -128,7 +123,6 @@ static int proc_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, - .read_inode = proc_read_inode, .drop_inode = generic_delete_inode, .delete_inode = proc_delete_inode, .statfs = simple_statfs, @@ -401,39 +395,41 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, if (de != NULL && !try_module_get(de->owner)) goto out_mod; - inode = iget(sb, ino); + inode = iget_locked(sb, ino); if (!inode) goto out_ino; - - PROC_I(inode)->fd = 0; - PROC_I(inode)->pde = de; - if (de) { - if (de->mode) { - inode->i_mode = de->mode; - inode->i_uid = de->uid; - inode->i_gid = de->gid; - } - if (de->size) - inode->i_size = de->size; - if (de->nlink) - inode->i_nlink = de->nlink; - if (de->proc_iops) - inode->i_op = de->proc_iops; - if (de->proc_fops) { - if (S_ISREG(inode->i_mode)) { + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + PROC_I(inode)->fd = 0; + PROC_I(inode)->pde = de; + if (de) { + if (de->mode) { + inode->i_mode = de->mode; + inode->i_uid = de->uid; + inode->i_gid = de->gid; + } + if (de->size) + inode->i_size = de->size; + if (de->nlink) + inode->i_nlink = de->nlink; + if (de->proc_iops) + inode->i_op = de->proc_iops; + if (de->proc_fops) { + if (S_ISREG(inode->i_mode)) { #ifdef CONFIG_COMPAT - if (!de->proc_fops->compat_ioctl) - inode->i_fop = - &proc_reg_file_ops_no_compat; - else + if (!de->proc_fops->compat_ioctl) + inode->i_fop = + &proc_reg_file_ops_no_compat; + else #endif - inode->i_fop = &proc_reg_file_ops; + inode->i_fop = &proc_reg_file_ops; + } else { + inode->i_fop = de->proc_fops; + } } - else - inode->i_fop = de->proc_fops; } + unlock_new_inode(inode); } - return inode; out_ino: diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 05b3e900626..7d57e806992 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -52,15 +52,13 @@ extern int proc_tid_stat(struct task_struct *, char *); extern int proc_tgid_stat(struct task_struct *, char *); extern int proc_pid_status(struct task_struct *, char *); extern int proc_pid_statm(struct task_struct *, char *); +extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); extern const struct file_operations proc_maps_operations; extern const struct file_operations proc_numa_maps_operations; extern const struct file_operations proc_smaps_operations; - -extern const struct file_operations proc_maps_operations; -extern const struct file_operations proc_numa_maps_operations; -extern const struct file_operations proc_smaps_operations; - +extern const struct file_operations proc_clear_refs_operations; +extern const struct file_operations proc_pagemap_operations; void free_proc_entry(struct proc_dir_entry *de); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 1be73082edd..7dd26e18cbf 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -325,7 +325,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (m == NULL) { if (clear_user(buffer, tsz)) return -EFAULT; - } else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) { + } else if (is_vmalloc_addr((void *)start)) { char * elf_buf; struct vm_struct *m; unsigned long curstart = start; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 3462bfde89f..2686592dbcb 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -29,6 +29,7 @@ #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/pagemap.h> +#include <linux/interrupt.h> #include <linux/swap.h> #include <linux/slab.h> #include <linux/smp.h> @@ -46,6 +47,7 @@ #include <linux/vmalloc.h> #include <linux/crash_dump.h> #include <linux/pid_namespace.h> +#include <linux/bootmem.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -63,7 +65,6 @@ */ extern int get_hardware_list(char *); extern int get_stram_list(char *); -extern int get_filesystem_list(char *); extern int get_exec_domain_list(char *); extern int get_dma_list(char *); @@ -83,10 +84,15 @@ static int loadavg_read_proc(char *page, char **start, off_t off, { int a, b, c; int len; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + } while (read_seqretry(&xtime_lock, seq)); - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), @@ -598,7 +604,6 @@ static void int_seq_stop(struct seq_file *f, void *v) } -extern int show_interrupts(struct seq_file *f, void *v); /* In arch code */ static struct seq_operations int_seq_ops = { .start = int_seq_start, .next = int_seq_next, @@ -675,6 +680,137 @@ static const struct file_operations proc_sysrq_trigger_operations = { }; #endif +#ifdef CONFIG_PROC_PAGE_MONITOR +#define KPMSIZE sizeof(u64) +#define KPMMASK (KPMSIZE - 1) +/* /proc/kpagecount - an array exposing page counts + * + * Each entry is a u64 representing the corresponding + * physical page count. + */ +static ssize_t kpagecount_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 __user *out = (u64 __user *)buf; + struct page *ppage; + unsigned long src = *ppos; + unsigned long pfn; + ssize_t ret = 0; + u64 pcount; + + pfn = src / KPMSIZE; + count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); + if (src & KPMMASK || count & KPMMASK) + return -EIO; + + while (count > 0) { + ppage = NULL; + if (pfn_valid(pfn)) + ppage = pfn_to_page(pfn); + pfn++; + if (!ppage) + pcount = 0; + else + pcount = atomic_read(&ppage->_count); + + if (put_user(pcount, out++)) { + ret = -EFAULT; + break; + } + + count -= KPMSIZE; + } + + *ppos += (char __user *)out - buf; + if (!ret) + ret = (char __user *)out - buf; + return ret; +} + +static struct file_operations proc_kpagecount_operations = { + .llseek = mem_lseek, + .read = kpagecount_read, +}; + +/* /proc/kpageflags - an array exposing page flags + * + * Each entry is a u64 representing the corresponding + * physical page flags. + */ + +/* These macros are used to decouple internal flags from exported ones */ + +#define KPF_LOCKED 0 +#define KPF_ERROR 1 +#define KPF_REFERENCED 2 +#define KPF_UPTODATE 3 +#define KPF_DIRTY 4 +#define KPF_LRU 5 +#define KPF_ACTIVE 6 +#define KPF_SLAB 7 +#define KPF_WRITEBACK 8 +#define KPF_RECLAIM 9 +#define KPF_BUDDY 10 + +#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) + +static ssize_t kpageflags_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 __user *out = (u64 __user *)buf; + struct page *ppage; + unsigned long src = *ppos; + unsigned long pfn; + ssize_t ret = 0; + u64 kflags, uflags; + + pfn = src / KPMSIZE; + count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); + if (src & KPMMASK || count & KPMMASK) + return -EIO; + + while (count > 0) { + ppage = NULL; + if (pfn_valid(pfn)) + ppage = pfn_to_page(pfn); + pfn++; + if (!ppage) + kflags = 0; + else + kflags = ppage->flags; + + uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | + kpf_copy_bit(kflags, KPF_ERROR, PG_error) | + kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | + kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | + kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) | + kpf_copy_bit(kflags, KPF_LRU, PG_lru) | + kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) | + kpf_copy_bit(kflags, KPF_SLAB, PG_slab) | + kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) | + kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) | + kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy); + + if (put_user(uflags, out++)) { + ret = -EFAULT; + break; + } + + count -= KPMSIZE; + } + + *ppos += (char __user *)out - buf; + if (!ret) + ret = (char __user *)out - buf; + return ret; +} + +static struct file_operations proc_kpageflags_operations = { + .llseek = mem_lseek, + .read = kpageflags_read, +}; +#endif /* CONFIG_PROC_PAGE_MONITOR */ + struct proc_dir_entry *proc_root_kcore; void create_seq_entry(char *name, mode_t mode, const struct file_operations *f) @@ -755,6 +891,10 @@ void __init proc_misc_init(void) (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; } #endif +#ifdef CONFIG_PROC_PAGE_MONITOR + create_seq_entry("kpagecount", S_IRUSR, &proc_kpagecount_operations); + create_seq_entry("kpageflags", S_IRUSR, &proc_kpageflags_operations); +#endif #ifdef CONFIG_PROC_VMCORE proc_vmcore = create_proc_entry("vmcore", S_IRUSR, NULL); if (proc_vmcore) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8043a3eab52..38338ed98cc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -5,7 +5,10 @@ #include <linux/highmem.h> #include <linux/ptrace.h> #include <linux/pagemap.h> +#include <linux/ptrace.h> #include <linux/mempolicy.h> +#include <linux/swap.h> +#include <linux/swapops.h> #include <asm/elf.h> #include <asm/uaccess.h> @@ -114,24 +117,124 @@ static void pad_len_spaces(struct seq_file *m, int len) seq_printf(m, "%*c", len, ' '); } -struct mem_size_stats +static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) { - unsigned long resident; - unsigned long shared_clean; - unsigned long shared_dirty; - unsigned long private_clean; - unsigned long private_dirty; - unsigned long referenced; -}; + if (vma && vma != priv->tail_vma) { + struct mm_struct *mm = vma->vm_mm; + up_read(&mm->mmap_sem); + mmput(mm); + } +} -struct pmd_walker { - struct vm_area_struct *vma; - void *private; - void (*action)(struct vm_area_struct *, pmd_t *, unsigned long, - unsigned long, void *); -}; +static void *m_start(struct seq_file *m, loff_t *pos) +{ + struct proc_maps_private *priv = m->private; + unsigned long last_addr = m->version; + struct mm_struct *mm; + struct vm_area_struct *vma, *tail_vma = NULL; + loff_t l = *pos; + + /* Clear the per syscall fields in priv */ + priv->task = NULL; + priv->tail_vma = NULL; + + /* + * We remember last_addr rather than next_addr to hit with + * mmap_cache most of the time. We have zero last_addr at + * the beginning and also after lseek. We will have -1 last_addr + * after the end of the vmas. + */ + + if (last_addr == -1UL) + return NULL; + + priv->task = get_pid_task(priv->pid, PIDTYPE_PID); + if (!priv->task) + return NULL; + + mm = mm_for_maps(priv->task); + if (!mm) + return NULL; + + tail_vma = get_gate_vma(priv->task); + priv->tail_vma = tail_vma; + + /* Start with last addr hint */ + vma = find_vma(mm, last_addr); + if (last_addr && vma) { + vma = vma->vm_next; + goto out; + } + + /* + * Check the vma index is within the range and do + * sequential scan until m_index. + */ + vma = NULL; + if ((unsigned long)l < mm->map_count) { + vma = mm->mmap; + while (l-- && vma) + vma = vma->vm_next; + goto out; + } + + if (l != mm->map_count) + tail_vma = NULL; /* After gate vma */ + +out: + if (vma) + return vma; + + /* End of vmas has been reached */ + m->version = (tail_vma != NULL)? 0: -1UL; + up_read(&mm->mmap_sem); + mmput(mm); + return tail_vma; +} -static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) +static void *m_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma = v; + struct vm_area_struct *tail_vma = priv->tail_vma; + + (*pos)++; + if (vma && (vma != tail_vma) && vma->vm_next) + return vma->vm_next; + vma_stop(priv, vma); + return (vma != tail_vma)? tail_vma: NULL; +} + +static void m_stop(struct seq_file *m, void *v) +{ + struct proc_maps_private *priv = m->private; + struct vm_area_struct *vma = v; + + vma_stop(priv, vma); + if (priv->task) + put_task_struct(priv->task); +} + +static int do_maps_open(struct inode *inode, struct file *file, + struct seq_operations *ops) +{ + struct proc_maps_private *priv; + int ret = -ENOMEM; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (priv) { + priv->pid = proc_pid(inode); + ret = seq_open(file, ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = priv; + } else { + kfree(priv); + } + } + return ret; +} + +static int show_map(struct seq_file *m, void *v) { struct proc_maps_private *priv = m->private; struct task_struct *task = priv->task; @@ -191,41 +294,71 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats } seq_putc(m, '\n'); - if (mss) - seq_printf(m, - "Size: %8lu kB\n" - "Rss: %8lu kB\n" - "Shared_Clean: %8lu kB\n" - "Shared_Dirty: %8lu kB\n" - "Private_Clean: %8lu kB\n" - "Private_Dirty: %8lu kB\n" - "Referenced: %8lu kB\n", - (vma->vm_end - vma->vm_start) >> 10, - mss->resident >> 10, - mss->shared_clean >> 10, - mss->shared_dirty >> 10, - mss->private_clean >> 10, - mss->private_dirty >> 10, - mss->referenced >> 10); - if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; return 0; } -static int show_map(struct seq_file *m, void *v) +static struct seq_operations proc_pid_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_map +}; + +static int maps_open(struct inode *inode, struct file *file) { - return show_map_internal(m, v, NULL); + return do_maps_open(inode, file, &proc_pid_maps_op); } -static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - void *private) +const struct file_operations proc_maps_operations = { + .open = maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +/* + * Proportional Set Size(PSS): my share of RSS. + * + * PSS of a process is the count of pages it has in memory, where each + * page is divided by the number of processes sharing it. So if a + * process has 1000 pages all to itself, and 1000 shared with one other + * process, its PSS will be 1500. + * + * To keep (accumulated) division errors low, we adopt a 64bit + * fixed-point pss counter to minimize division errors. So (pss >> + * PSS_SHIFT) would be the real byte count. + * + * A shift of 12 before division means (assuming 4K page size): + * - 1M 3-user-pages add up to 8KB errors; + * - supports mapcount up to 2^24, or 16M; + * - supports PSS up to 2^52 bytes, or 4PB. + */ +#define PSS_SHIFT 12 + +#ifdef CONFIG_PROC_PAGE_MONITOR +struct mem_size_stats +{ + struct vm_area_struct *vma; + unsigned long resident; + unsigned long shared_clean; + unsigned long shared_dirty; + unsigned long private_clean; + unsigned long private_dirty; + unsigned long referenced; + u64 pss; +}; + +static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + void *private) { struct mem_size_stats *mss = private; + struct vm_area_struct *vma = mss->vma; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; + int mapcount; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { @@ -242,26 +375,88 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, /* Accumulate the size in pages that have been accessed. */ if (pte_young(ptent) || PageReferenced(page)) mss->referenced += PAGE_SIZE; - if (page_mapcount(page) >= 2) { + mapcount = page_mapcount(page); + if (mapcount >= 2) { if (pte_dirty(ptent)) mss->shared_dirty += PAGE_SIZE; else mss->shared_clean += PAGE_SIZE; + mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; } else { if (pte_dirty(ptent)) mss->private_dirty += PAGE_SIZE; else mss->private_clean += PAGE_SIZE; + mss->pss += (PAGE_SIZE << PSS_SHIFT); } } pte_unmap_unlock(pte - 1, ptl); cond_resched(); + return 0; } -static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - void *private) +static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; + +static int show_smap(struct seq_file *m, void *v) { + struct vm_area_struct *vma = v; + struct mem_size_stats mss; + int ret; + + memset(&mss, 0, sizeof mss); + mss.vma = vma; + if (vma->vm_mm && !is_vm_hugetlb_page(vma)) + walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, + &smaps_walk, &mss); + + ret = show_map(m, v); + if (ret) + return ret; + + seq_printf(m, + "Size: %8lu kB\n" + "Rss: %8lu kB\n" + "Pss: %8lu kB\n" + "Shared_Clean: %8lu kB\n" + "Shared_Dirty: %8lu kB\n" + "Private_Clean: %8lu kB\n" + "Private_Dirty: %8lu kB\n" + "Referenced: %8lu kB\n", + (vma->vm_end - vma->vm_start) >> 10, + mss.resident >> 10, + (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), + mss.shared_clean >> 10, + mss.shared_dirty >> 10, + mss.private_clean >> 10, + mss.private_dirty >> 10, + mss.referenced >> 10); + + return ret; +} + +static struct seq_operations proc_pid_smaps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_smap +}; + +static int smaps_open(struct inode *inode, struct file *file) +{ + return do_maps_open(inode, file, &proc_pid_smaps_op); +} + +const struct file_operations proc_smaps_operations = { + .open = smaps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, void *private) +{ + struct vm_area_struct *vma = private; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -282,235 +477,248 @@ static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, } pte_unmap_unlock(pte - 1, ptl); cond_resched(); + return 0; } -static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud, - unsigned long addr, unsigned long end) +static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; + +static ssize_t clear_refs_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { - pmd_t *pmd; - unsigned long next; + struct task_struct *task; + char buffer[PROC_NUMBUF], *end; + struct mm_struct *mm; + struct vm_area_struct *vma; - for (pmd = pmd_offset(pud, addr); addr != end; - pmd++, addr = next) { - next = pmd_addr_end(addr, end); - if (pmd_none_or_clear_bad(pmd)) - continue; - walker->action(walker->vma, pmd, addr, next, walker->private); + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + if (!simple_strtol(buffer, &end, 0)) + return -EINVAL; + if (*end == '\n') + end++; + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) + return -ESRCH; + mm = get_task_mm(task); + if (mm) { + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) + if (!is_vm_hugetlb_page(vma)) + walk_page_range(mm, vma->vm_start, vma->vm_end, + &clear_refs_walk, vma); + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); + mmput(mm); } + put_task_struct(task); + if (end - buffer == 0) + return -EIO; + return end - buffer; } -static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd, - unsigned long addr, unsigned long end) -{ - pud_t *pud; - unsigned long next; +const struct file_operations proc_clear_refs_operations = { + .write = clear_refs_write, +}; - for (pud = pud_offset(pgd, addr); addr != end; - pud++, addr = next) { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - continue; - walk_pmd_range(walker, pud, addr, next); +struct pagemapread { + char __user *out, *end; +}; + +#define PM_ENTRY_BYTES sizeof(u64) +#define PM_RESERVED_BITS 3 +#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS) +#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET) +#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK) +#define PM_NOT_PRESENT PM_SPECIAL(1LL) +#define PM_SWAP PM_SPECIAL(2LL) +#define PM_END_OF_BUFFER 1 + +static int add_to_pagemap(unsigned long addr, u64 pfn, + struct pagemapread *pm) +{ + /* + * Make sure there's room in the buffer for an + * entire entry. Otherwise, only copy part of + * the pfn. + */ + if (pm->out + PM_ENTRY_BYTES >= pm->end) { + if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) + return -EFAULT; + pm->out = pm->end; + return PM_END_OF_BUFFER; } + + if (put_user(pfn, pm->out)) + return -EFAULT; + pm->out += PM_ENTRY_BYTES; + return 0; } -/* - * walk_page_range - walk the page tables of a VMA with a callback - * @vma - VMA to walk - * @action - callback invoked for every bottom-level (PTE) page table - * @private - private data passed to the callback function - * - * Recursively walk the page table for the memory area in a VMA, calling - * a callback for every bottom-level (PTE) page table. - */ -static inline void walk_page_range(struct vm_area_struct *vma, - void (*action)(struct vm_area_struct *, - pmd_t *, unsigned long, - unsigned long, void *), - void *private) +static int pagemap_pte_hole(unsigned long start, unsigned long end, + void *private) { - unsigned long addr = vma->vm_start; - unsigned long end = vma->vm_end; - struct pmd_walker walker = { - .vma = vma, - .private = private, - .action = action, - }; - pgd_t *pgd; - unsigned long next; - - for (pgd = pgd_offset(vma->vm_mm, addr); addr != end; - pgd++, addr = next) { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - walk_pud_range(&walker, pgd, addr, next); + struct pagemapread *pm = private; + unsigned long addr; + int err = 0; + for (addr = start; addr < end; addr += PAGE_SIZE) { + err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); + if (err) + break; } + return err; } -static int show_smap(struct seq_file *m, void *v) +u64 swap_pte_to_pagemap_entry(pte_t pte) { - struct vm_area_struct *vma = v; - struct mem_size_stats mss; - - memset(&mss, 0, sizeof mss); - if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma, smaps_pte_range, &mss); - return show_map_internal(m, v, &mss); + swp_entry_t e = pte_to_swp_entry(pte); + return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } -void clear_refs_smap(struct mm_struct *mm) +static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + void *private) { - struct vm_area_struct *vma; + struct pagemapread *pm = private; + pte_t *pte; + int err = 0; + + for (; addr != end; addr += PAGE_SIZE) { + u64 pfn = PM_NOT_PRESENT; + pte = pte_offset_map(pmd, addr); + if (is_swap_pte(*pte)) + pfn = swap_pte_to_pagemap_entry(*pte); + else if (pte_present(*pte)) + pfn = pte_pfn(*pte); + /* unmap so we're not in atomic when we copy to userspace */ + pte_unmap(pte); + err = add_to_pagemap(addr, pfn, pm); + if (err) + return err; + } - down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) - if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma, clear_refs_pte_range, NULL); - flush_tlb_mm(mm); - up_read(&mm->mmap_sem); + cond_resched(); + + return err; } -static void *m_start(struct seq_file *m, loff_t *pos) +static struct mm_walk pagemap_walk = { + .pmd_entry = pagemap_pte_range, + .pte_hole = pagemap_pte_hole +}; + +/* + * /proc/pid/pagemap - an array mapping virtual pages to pfns + * + * For each page in the address space, this file contains one 64-bit + * entry representing the corresponding physical page frame number + * (PFN) if the page is present. If there is a swap entry for the + * physical page, then an encoding of the swap file number and the + * page's offset into the swap file are returned. If no page is + * present at all, PM_NOT_PRESENT is returned. This allows determining + * precisely which pages are mapped (or in swap) and comparing mapped + * pages between processes. + * + * Efficient users of this interface will use /proc/pid/maps to + * determine which areas of memory are actually mapped and llseek to + * skip over unmapped regions. + */ +static ssize_t pagemap_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) { - struct proc_maps_private *priv = m->private; - unsigned long last_addr = m->version; + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + struct page **pages, *page; + unsigned long uaddr, uend; struct mm_struct *mm; - struct vm_area_struct *vma, *tail_vma = NULL; - loff_t l = *pos; - - /* Clear the per syscall fields in priv */ - priv->task = NULL; - priv->tail_vma = NULL; + struct pagemapread pm; + int pagecount; + int ret = -ESRCH; - /* - * We remember last_addr rather than next_addr to hit with - * mmap_cache most of the time. We have zero last_addr at - * the beginning and also after lseek. We will have -1 last_addr - * after the end of the vmas. - */ + if (!task) + goto out; - if (last_addr == -1UL) - return NULL; + ret = -EACCES; + if (!ptrace_may_attach(task)) + goto out; - priv->task = get_pid_task(priv->pid, PIDTYPE_PID); - if (!priv->task) - return NULL; + ret = -EINVAL; + /* file position must be aligned */ + if (*ppos % PM_ENTRY_BYTES) + goto out; - mm = mm_for_maps(priv->task); + ret = 0; + mm = get_task_mm(task); if (!mm) - return NULL; - - priv->tail_vma = tail_vma = get_gate_vma(priv->task); - - /* Start with last addr hint */ - if (last_addr && (vma = find_vma(mm, last_addr))) { - vma = vma->vm_next; goto out; - } - /* - * Check the vma index is within the range and do - * sequential scan until m_index. - */ - vma = NULL; - if ((unsigned long)l < mm->map_count) { - vma = mm->mmap; - while (l-- && vma) - vma = vma->vm_next; - goto out; - } + ret = -ENOMEM; + uaddr = (unsigned long)buf & PAGE_MASK; + uend = (unsigned long)(buf + count); + pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; + pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto out_task; - if (l != mm->map_count) - tail_vma = NULL; /* After gate vma */ + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, pagecount, + 1, 0, pages, NULL); + up_read(¤t->mm->mmap_sem); -out: - if (vma) - return vma; + if (ret < 0) + goto out_free; - /* End of vmas has been reached */ - m->version = (tail_vma != NULL)? 0: -1UL; - up_read(&mm->mmap_sem); - mmput(mm); - return tail_vma; -} + pm.out = buf; + pm.end = buf + count; -static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) -{ - if (vma && vma != priv->tail_vma) { - struct mm_struct *mm = vma->vm_mm; - up_read(&mm->mmap_sem); - mmput(mm); + if (!ptrace_may_attach(task)) { + ret = -EIO; + } else { + unsigned long src = *ppos; + unsigned long svpfn = src / PM_ENTRY_BYTES; + unsigned long start_vaddr = svpfn << PAGE_SHIFT; + unsigned long end_vaddr = TASK_SIZE_OF(task); + + /* watch out for wraparound */ + if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) + start_vaddr = end_vaddr; + + /* + * The odds are that this will stop walking way + * before end_vaddr, because the length of the + * user buffer is tracked in "pm", and the walk + * will stop when we hit the end of the buffer. + */ + ret = walk_page_range(mm, start_vaddr, end_vaddr, + &pagemap_walk, &pm); + if (ret == PM_END_OF_BUFFER) + ret = 0; + /* don't need mmap_sem for these, but this looks cleaner */ + *ppos += pm.out - buf; + if (!ret) + ret = pm.out - buf; } -} - -static void *m_next(struct seq_file *m, void *v, loff_t *pos) -{ - struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - struct vm_area_struct *tail_vma = priv->tail_vma; - - (*pos)++; - if (vma && (vma != tail_vma) && vma->vm_next) - return vma->vm_next; - vma_stop(priv, vma); - return (vma != tail_vma)? tail_vma: NULL; -} - -static void m_stop(struct seq_file *m, void *v) -{ - struct proc_maps_private *priv = m->private; - struct vm_area_struct *vma = v; - vma_stop(priv, vma); - if (priv->task) - put_task_struct(priv->task); -} - -static struct seq_operations proc_pid_maps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_map -}; - -static struct seq_operations proc_pid_smaps_op = { - .start = m_start, - .next = m_next, - .stop = m_stop, - .show = show_smap -}; - -static int do_maps_open(struct inode *inode, struct file *file, - struct seq_operations *ops) -{ - struct proc_maps_private *priv; - int ret = -ENOMEM; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (priv) { - priv->pid = proc_pid(inode); - ret = seq_open(file, ops); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = priv; - } else { - kfree(priv); - } + for (; pagecount; pagecount--) { + page = pages[pagecount-1]; + if (!PageReserved(page)) + SetPageDirty(page); + page_cache_release(page); } + mmput(mm); +out_free: + kfree(pages); +out_task: + put_task_struct(task); +out: return ret; } -static int maps_open(struct inode *inode, struct file *file) -{ - return do_maps_open(inode, file, &proc_pid_maps_op); -} - -const struct file_operations proc_maps_operations = { - .open = maps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, +const struct file_operations proc_pagemap_operations = { + .llseek = mem_lseek, /* borrow this */ + .read = pagemap_read, }; +#endif /* CONFIG_PROC_PAGE_MONITOR */ #ifdef CONFIG_NUMA extern int show_numa_map(struct seq_file *m, void *v); @@ -545,15 +753,3 @@ const struct file_operations proc_numa_maps_operations = { .release = seq_release_private, }; #endif - -static int smaps_open(struct inode *inode, struct file *file) -{ - return do_maps_open(inode, file, &proc_pid_smaps_op); -} - -const struct file_operations proc_smaps_operations = { - .open = smaps_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 638bdb96321..b31ab78052b 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -125,7 +125,6 @@ static int qnx4_write_inode(struct inode *inode, int unused) static void qnx4_put_super(struct super_block *sb); static struct inode *qnx4_alloc_inode(struct super_block *sb); static void qnx4_destroy_inode(struct inode *inode); -static void qnx4_read_inode(struct inode *); static int qnx4_remount(struct super_block *sb, int *flags, char *data); static int qnx4_statfs(struct dentry *, struct kstatfs *); @@ -133,7 +132,6 @@ static const struct super_operations qnx4_sops = { .alloc_inode = qnx4_alloc_inode, .destroy_inode = qnx4_destroy_inode, - .read_inode = qnx4_read_inode, .put_super = qnx4_put_super, .statfs = qnx4_statfs, .remount_fs = qnx4_remount, @@ -357,6 +355,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) struct inode *root; const char *errmsg; struct qnx4_sb_info *qs; + int ret = -EINVAL; qs = kzalloc(sizeof(struct qnx4_sb_info), GFP_KERNEL); if (!qs) @@ -396,12 +395,14 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) } /* does root not have inode number QNX4_ROOT_INO ?? */ - root = iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK); - if (!root) { + root = qnx4_iget(s, QNX4_ROOT_INO * QNX4_INODES_PER_BLOCK); + if (IS_ERR(root)) { printk("qnx4: get inode failed\n"); + ret = PTR_ERR(root); goto out; } + ret = -ENOMEM; s->s_root = d_alloc_root(root); if (s->s_root == NULL) goto outi; @@ -417,7 +418,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) outnobh: kfree(qs); s->s_fs_info = NULL; - return -EINVAL; + return ret; } static void qnx4_put_super(struct super_block *sb) @@ -462,29 +463,38 @@ static const struct address_space_operations qnx4_aops = { .bmap = qnx4_bmap }; -static void qnx4_read_inode(struct inode *inode) +struct inode *qnx4_iget(struct super_block *sb, unsigned long ino) { struct buffer_head *bh; struct qnx4_inode_entry *raw_inode; - int block, ino; - struct super_block *sb = inode->i_sb; - struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); + int block; + struct qnx4_inode_entry *qnx4_inode; + struct inode *inode; - ino = inode->i_ino; + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + qnx4_inode = qnx4_raw_inode(inode); inode->i_mode = 0; QNX4DEBUG(("Reading inode : [%d]\n", ino)); if (!ino) { - printk("qnx4: bad inode number on dev %s: %d is out of range\n", + printk(KERN_ERR "qnx4: bad inode number on dev %s: %lu is " + "out of range\n", sb->s_id, ino); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } block = ino / QNX4_INODES_PER_BLOCK; if (!(bh = sb_bread(sb, block))) { printk("qnx4: major problem: unable to read inode from dev " "%s\n", sb->s_id); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } raw_inode = ((struct qnx4_inode_entry *) bh->b_data) + (ino % QNX4_INODES_PER_BLOCK); @@ -515,9 +525,16 @@ static void qnx4_read_inode(struct inode *inode) inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &qnx4_aops; qnx4_i(inode)->mmu_private = inode->i_size; - } else - printk("qnx4: bad inode %d on dev %s\n",ino,sb->s_id); + } else { + printk(KERN_ERR "qnx4: bad inode %lu on dev %s\n", + ino, sb->s_id); + iget_failed(inode); + brelse(bh); + return ERR_PTR(-EIO); + } brelse(bh); + unlock_new_inode(inode); + return inode; } static struct kmem_cache *qnx4_inode_cachep; diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 733cdf01d64..775eed3a408 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -128,10 +128,12 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nam } brelse(bh); - if ((foundinode = iget(dir->i_sb, ino)) == NULL) { + foundinode = qnx4_iget(dir->i_sb, ino); + if (IS_ERR(foundinode)) { unlock_kernel(); - QNX4DEBUG(("qnx4: lookup->iget -> NULL\n")); - return ERR_PTR(-EACCES); + QNX4DEBUG(("qnx4: lookup->iget -> error %ld\n", + PTR_ERR(foundinode))); + return ERR_CAST(foundinode); } out: unlock_kernel(); diff --git a/fs/quota.c b/fs/quota.c index 99b24b52bfc..84f28dd7211 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -341,11 +341,11 @@ static inline struct super_block *quotactl_block(const char __user *special) char *tmp = getname(special); if (IS_ERR(tmp)) - return ERR_PTR(PTR_ERR(tmp)); + return ERR_CAST(tmp); bdev = lookup_bdev(tmp); putname(tmp); if (IS_ERR(bdev)) - return ERR_PTR(PTR_ERR(bdev)); + return ERR_CAST(bdev); sb = get_super(bdev); bdput(bdev); if (!sb) diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 16b331dd991..f491ceb5af0 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -272,7 +272,7 @@ static inline int block_group_used(struct super_block *s, u32 id) /* If we don't have cached information on this bitmap block, we're * going to have to load it later anyway. Loading it here allows us - * to make a better decision. This favors long-term performace gain + * to make a better decision. This favors long-term performance gain * with a better on-disk layout vs. a short term gain of skipping the * read and potentially having a bad placement. */ if (info->free_count == UINT_MAX) { @@ -663,7 +663,7 @@ static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) /* * Relocation based on dirid, hashing them into a given bitmap block - * files. Formatted nodes are unaffected, a seperate policy covers them + * files. Formatted nodes are unaffected, a separate policy covers them */ static void dirid_groups(reiserfs_blocknr_hint_t * hint) { @@ -688,7 +688,7 @@ static void dirid_groups(reiserfs_blocknr_hint_t * hint) /* * Relocation based on oid, hashing them into a given bitmap block - * files. Formatted nodes are unaffected, a seperate policy covers them + * files. Formatted nodes are unaffected, a separate policy covers them */ static void oid_groups(reiserfs_blocknr_hint_t * hint) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 231fd5ccadc..57917932212 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1536,7 +1536,7 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb, if (!inode) inode = ERR_PTR(-ESTALE); if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); result = d_alloc_anon(inode); if (!result) { iput(inode); @@ -2143,7 +2143,7 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) /* if we are not on a block boundary */ if (length) { length = blocksize - length; - zero_user_page(page, offset, length, KM_USER0); + zero_user(page, offset, length); if (buffer_mapped(bh) && bh->b_blocknr != 0) { mark_buffer_dirty(bh); } @@ -2367,7 +2367,7 @@ static int reiserfs_write_full_page(struct page *page, unlock_page(page); return 0; } - zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0); + zero_user_segment(page, last_offset, PAGE_CACHE_SIZE); } bh = head; block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits); diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 5e7388b32d0..740bb8c0c1a 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -575,6 +575,8 @@ void print_block(struct buffer_head *bh, ...) //int print_mode, int first, int l printk ("Block %llu contains unformatted data\n", (unsigned long long)bh->b_blocknr); + + va_end(args); } static char print_tb_buf[2048]; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 1597f6b649e..eba037b3338 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -155,7 +155,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode, xadir = open_xa_dir(inode, flags); if (IS_ERR(xadir)) { - return ERR_PTR(PTR_ERR(xadir)); + return ERR_CAST(xadir); } else if (xadir && !xadir->d_inode) { dput(xadir); return ERR_PTR(-ENODATA); @@ -164,7 +164,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode, xafile = lookup_one_len(name, xadir, strlen(name)); if (IS_ERR(xafile)) { dput(xadir); - return ERR_PTR(PTR_ERR(xafile)); + return ERR_CAST(xafile); } if (xafile->d_inode) { /* file exists */ @@ -1084,7 +1084,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) } /* This is the implementation for the xattr plugin infrastructure */ -static struct list_head xattr_handlers = LIST_HEAD_INIT(xattr_handlers); +static LIST_HEAD(xattr_handlers); static DEFINE_RWLOCK(handler_lock); static struct reiserfs_xattr_handler *find_xattr_handler_prefix(const char diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index a49cf5b9a19..00b6f0a518c 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -84,6 +84,8 @@ struct romfs_inode_info { struct inode vfs_inode; }; +static struct inode *romfs_iget(struct super_block *, unsigned long); + /* instead of private superblock data */ static inline unsigned long romfs_maxsize(struct super_block *sb) { @@ -117,7 +119,7 @@ static int romfs_fill_super(struct super_block *s, void *data, int silent) struct buffer_head *bh; struct romfs_super_block *rsb; struct inode *root; - int sz; + int sz, ret = -EINVAL; /* I would parse the options here, but there are none.. :) */ @@ -157,10 +159,13 @@ static int romfs_fill_super(struct super_block *s, void *data, int silent) & ROMFH_MASK; s->s_op = &romfs_ops; - root = iget(s, sz); - if (!root) + root = romfs_iget(s, sz); + if (IS_ERR(root)) { + ret = PTR_ERR(root); goto out; + } + ret = -ENOMEM; s->s_root = d_alloc_root(root); if (!s->s_root) goto outiput; @@ -173,7 +178,7 @@ outiput: out: brelse(bh); outnobh: - return -EINVAL; + return ret; } /* That's simple too. */ @@ -389,8 +394,11 @@ romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD) offset = be32_to_cpu(ri.spec) & ROMFH_MASK; - if ((inode = iget(dir->i_sb, offset))) - goto outi; + inode = romfs_iget(dir->i_sb, offset); + if (IS_ERR(inode)) { + res = PTR_ERR(inode); + goto out; + } /* * it's a bit funky, _lookup needs to return an error code @@ -402,7 +410,7 @@ romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) */ out0: inode = NULL; -outi: res = 0; + res = 0; d_add (dentry, inode); out: unlock_kernel(); @@ -478,20 +486,29 @@ static mode_t romfs_modemap[] = S_IFBLK+0600, S_IFCHR+0600, S_IFSOCK+0644, S_IFIFO+0644 }; -static void -romfs_read_inode(struct inode *i) +static struct inode * +romfs_iget(struct super_block *sb, unsigned long ino) { - int nextfh, ino; + int nextfh; struct romfs_inode ri; + struct inode *i; + + ino &= ROMFH_MASK; + i = iget_locked(sb, ino); + if (!i) + return ERR_PTR(-ENOMEM); + if (!(i->i_state & I_NEW)) + return i; - ino = i->i_ino & ROMFH_MASK; i->i_mode = 0; /* Loop for finding the real hard link */ for(;;) { if (romfs_copyfrom(i, &ri, ino, ROMFH_SIZE) <= 0) { - printk("romfs: read error for inode 0x%x\n", ino); - return; + printk(KERN_ERR "romfs: read error for inode 0x%lx\n", + ino); + iget_failed(i); + return ERR_PTR(-EIO); } /* XXX: do romfs_checksum here too (with name) */ @@ -548,6 +565,8 @@ romfs_read_inode(struct inode *i) init_special_inode(i, ino, MKDEV(nextfh>>16,nextfh&0xffff)); } + unlock_new_inode(i); + return i; } static struct kmem_cache * romfs_inode_cachep; @@ -599,7 +618,6 @@ static int romfs_remount(struct super_block *sb, int *flags, char *data) static const struct super_operations romfs_ops = { .alloc_inode = romfs_alloc_inode, .destroy_inode = romfs_destroy_inode, - .read_inode = romfs_read_inode, .statfs = romfs_statfs, .remount_fs = romfs_remount, }; diff --git a/fs/select.c b/fs/select.c index 47f47925aea..5633fe98078 100644 --- a/fs/select.c +++ b/fs/select.c @@ -739,7 +739,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, timeout_jiffies = -1; else #endif - timeout_jiffies = msecs_to_jiffies(timeout_msecs); + timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1; } else { /* Infinite (< 0) or no (0) timeout */ timeout_jiffies = timeout_msecs; diff --git a/fs/signalfd.c b/fs/signalfd.c index fb7f7e8034d..cb2b63ae0bf 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -27,6 +27,7 @@ #include <linux/list.h> #include <linux/anon_inodes.h> #include <linux/signalfd.h> +#include <linux/syscalls.h> struct signalfd_ctx { sigset_t sigmask; @@ -66,7 +67,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128); /* - * Unused memebers should be zero ... + * Unused members should be zero ... */ err = __clear_user(uinfo, sizeof(*uinfo)); diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 9416ead0c7a..4e5c22ca802 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -500,6 +500,13 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) struct smb_fattr root; int ver; void *mem; + static int warn_count; + + if (warn_count < 5) { + warn_count++; + printk(KERN_EMERG "smbfs is deprecated and will be removed" + "from the 2.6.27 kernel. Please migrate to cifs\n"); + } if (!raw_data) goto out_no_data; diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c index e48bd8235a8..e37fe4deebd 100644 --- a/fs/smbfs/sock.c +++ b/fs/smbfs/sock.c @@ -329,9 +329,8 @@ smb_receive(struct smb_sb_info *server, struct smb_request *req) msg.msg_control = NULL; /* Dont repeat bytes and count available bufferspace */ - rlen = smb_move_iov(&p, &num, iov, req->rq_bytes_recvd); - if (req->rq_rlen < rlen) - rlen = req->rq_rlen; + rlen = min_t(int, smb_move_iov(&p, &num, iov, req->rq_bytes_recvd), + (req->rq_rlen - req->rq_bytes_recvd)); result = kernel_recvmsg(sock, &msg, p, num, rlen, flags); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 81ec6c548c0..c5d60de0658 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -169,20 +169,27 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) init_special_inode(inode, inode->i_mode, rdev); } -static void sysv_read_inode(struct inode *inode) +struct inode *sysv_iget(struct super_block *sb, unsigned int ino) { - struct super_block * sb = inode->i_sb; struct sysv_sb_info * sbi = SYSV_SB(sb); struct buffer_head * bh; struct sysv_inode * raw_inode; struct sysv_inode_info * si; - unsigned int block, ino = inode->i_ino; + struct inode *inode; + unsigned int block; if (!ino || ino > sbi->s_ninodes) { printk("Bad inode number on dev %s: %d is out of range\n", - inode->i_sb->s_id, ino); - goto bad_inode; + sb->s_id, ino); + return ERR_PTR(-EIO); } + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + raw_inode = sysv_raw_inode(sb, ino, &bh); if (!raw_inode) { printk("Major problem: unable to read inode from dev %s\n", @@ -214,11 +221,12 @@ static void sysv_read_inode(struct inode *inode) old_decode_dev(fs32_to_cpu(sbi, si->i_data[0]))); else sysv_set_inode(inode, 0); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(-EIO); } static struct buffer_head * sysv_update_inode(struct inode * inode) @@ -328,7 +336,6 @@ static void init_once(struct kmem_cache *cachep, void *p) const struct super_operations sysv_sops = { .alloc_inode = sysv_alloc_inode, .destroy_inode = sysv_destroy_inode, - .read_inode = sysv_read_inode, .write_inode = sysv_write_inode, .delete_inode = sysv_delete_inode, .put_super = sysv_put_super, diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 6bd850b7641..a1f1ef33e81 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -53,9 +53,9 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st ino = sysv_inode_by_name(dentry); if (ino) { - inode = iget(dir->i_sb, ino); - if (!inode) - return ERR_PTR(-EACCES); + inode = sysv_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); } d_add(dentry, inode); return NULL; diff --git a/fs/sysv/super.c b/fs/sysv/super.c index 6f9707a1b95..5a903da5455 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -332,8 +332,8 @@ static int complete_read_super(struct super_block *sb, int silent, int size) sb->s_magic = SYSV_MAGIC_BASE + sbi->s_type; /* set up enough so that it can read an inode */ sb->s_op = &sysv_sops; - root_inode = iget(sb,SYSV_ROOT_INO); - if (!root_inode || is_bad_inode(root_inode)) { + root_inode = sysv_iget(sb, SYSV_ROOT_INO); + if (IS_ERR(root_inode)) { printk("SysV FS: get root inode failed\n"); return 0; } diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 64c03bdf06a..42d51d1c05c 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -141,6 +141,7 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata); /* inode.c */ +extern struct inode *sysv_iget(struct super_block *, unsigned int); extern int sysv_write_inode(struct inode *, int); extern int sysv_sync_inode(struct inode *); extern int sysv_sync_file(struct file *, struct dentry *, int); diff --git a/fs/timerfd.c b/fs/timerfd.c index 61983f3b107..10c80b59ec4 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -25,13 +25,15 @@ struct timerfd_ctx { struct hrtimer tmr; ktime_t tintv; wait_queue_head_t wqh; + u64 ticks; int expired; + int clockid; }; /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, - * tintv.tv64 != 0) until the timer is read. + * tintv.tv64 != 0) until the timer is accessed. */ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) { @@ -40,13 +42,24 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) spin_lock_irqsave(&ctx->wqh.lock, flags); ctx->expired = 1; + ctx->ticks++; wake_up_locked(&ctx->wqh); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return HRTIMER_NORESTART; } -static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags, +static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) +{ + ktime_t now, remaining; + + now = ctx->tmr.base->get_time(); + remaining = ktime_sub(ctx->tmr.expires, now); + + return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; +} + +static void timerfd_setup(struct timerfd_ctx *ctx, int flags, const struct itimerspec *ktmr) { enum hrtimer_mode htmode; @@ -57,8 +70,9 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags, texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; + ctx->ticks = 0; ctx->tintv = timespec_to_ktime(ktmr->it_interval); - hrtimer_init(&ctx->tmr, clockid, htmode); + hrtimer_init(&ctx->tmr, ctx->clockid, htmode); ctx->tmr.expires = texp; ctx->tmr.function = timerfd_tmrproc; if (texp.tv64 != 0) @@ -83,7 +97,7 @@ static unsigned int timerfd_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->wqh, wait); spin_lock_irqsave(&ctx->wqh.lock, flags); - if (ctx->expired) + if (ctx->ticks) events |= POLLIN; spin_unlock_irqrestore(&ctx->wqh.lock, flags); @@ -102,11 +116,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, return -EINVAL; spin_lock_irq(&ctx->wqh.lock); res = -EAGAIN; - if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) { + if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) { __add_wait_queue(&ctx->wqh, &wait); for (res = 0;;) { set_current_state(TASK_INTERRUPTIBLE); - if (ctx->expired) { + if (ctx->ticks) { res = 0; break; } @@ -121,22 +135,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (ctx->expired) { - ctx->expired = 0; - if (ctx->tintv.tv64 != 0) { + if (ctx->ticks) { + ticks = ctx->ticks; + if (ctx->expired && ctx->tintv.tv64) { /* * If tintv.tv64 != 0, this is a periodic timer that * needs to be re-armed. We avoid doing it in the timer * callback to avoid DoS attacks specifying a very * short timer period. */ - ticks = (u64) - hrtimer_forward(&ctx->tmr, - hrtimer_cb_get_time(&ctx->tmr), - ctx->tintv); + ticks += hrtimer_forward_now(&ctx->tmr, + ctx->tintv) - 1; hrtimer_restart(&ctx->tmr); - } else - ticks = 1; + } + ctx->expired = 0; + ctx->ticks = 0; } spin_unlock_irq(&ctx->wqh.lock); if (ticks) @@ -150,76 +163,132 @@ static const struct file_operations timerfd_fops = { .read = timerfd_read, }; -asmlinkage long sys_timerfd(int ufd, int clockid, int flags, - const struct itimerspec __user *utmr) +static struct file *timerfd_fget(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADF); + if (file->f_op != &timerfd_fops) { + fput(file); + return ERR_PTR(-EINVAL); + } + + return file; +} + +asmlinkage long sys_timerfd_create(int clockid, int flags) { - int error; + int error, ufd; struct timerfd_ctx *ctx; struct file *file; struct inode *inode; - struct itimerspec ktmr; - - if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) - return -EFAULT; + if (flags) + return -EINVAL; if (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME) return -EINVAL; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + init_waitqueue_head(&ctx->wqh); + ctx->clockid = clockid; + hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); + + error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", + &timerfd_fops, ctx); + if (error) { + kfree(ctx); + return error; + } + + return ufd; +} + +asmlinkage long sys_timerfd_settime(int ufd, int flags, + const struct itimerspec __user *utmr, + struct itimerspec __user *otmr) +{ + struct file *file; + struct timerfd_ctx *ctx; + struct itimerspec ktmr, kotmr; + + if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) + return -EFAULT; + if (!timespec_valid(&ktmr.it_value) || !timespec_valid(&ktmr.it_interval)) return -EINVAL; - if (ufd == -1) { - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - - init_waitqueue_head(&ctx->wqh); - - timerfd_setup(ctx, clockid, flags, &ktmr); - - /* - * When we call this, the initialization must be complete, since - * anon_inode_getfd() will install the fd. - */ - error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]", - &timerfd_fops, ctx); - if (error) - goto err_tmrcancel; - } else { - file = fget(ufd); - if (!file) - return -EBADF; - ctx = file->private_data; - if (file->f_op != &timerfd_fops) { - fput(file); - return -EINVAL; - } - /* - * We need to stop the existing timer before reprogramming - * it to the new values. - */ - for (;;) { - spin_lock_irq(&ctx->wqh.lock); - if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) - break; - spin_unlock_irq(&ctx->wqh.lock); - cpu_relax(); - } - /* - * Re-program the timer to the new value ... - */ - timerfd_setup(ctx, clockid, flags, &ktmr); + file = timerfd_fget(ufd); + if (IS_ERR(file)) + return PTR_ERR(file); + ctx = file->private_data; + /* + * We need to stop the existing timer before reprogramming + * it to the new values. + */ + for (;;) { + spin_lock_irq(&ctx->wqh.lock); + if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) + break; spin_unlock_irq(&ctx->wqh.lock); - fput(file); + cpu_relax(); } - return ufd; + /* + * If the timer is expired and it's periodic, we need to advance it + * because the caller may want to know the previous expiration time. + * We do not update "ticks" and "expired" since the timer will be + * re-programmed again in the following timerfd_setup() call. + */ + if (ctx->expired && ctx->tintv.tv64) + hrtimer_forward_now(&ctx->tmr, ctx->tintv); -err_tmrcancel: - hrtimer_cancel(&ctx->tmr); - kfree(ctx); - return error; + kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); + kotmr.it_interval = ktime_to_timespec(ctx->tintv); + + /* + * Re-program the timer to the new value ... + */ + timerfd_setup(ctx, flags, &ktmr); + + spin_unlock_irq(&ctx->wqh.lock); + fput(file); + if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) + return -EFAULT; + + return 0; +} + +asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr) +{ + struct file *file; + struct timerfd_ctx *ctx; + struct itimerspec kotmr; + + file = timerfd_fget(ufd); + if (IS_ERR(file)) + return PTR_ERR(file); + ctx = file->private_data; + + spin_lock_irq(&ctx->wqh.lock); + if (ctx->expired && ctx->tintv.tv64) { + ctx->expired = 0; + ctx->ticks += + hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1; + hrtimer_restart(&ctx->tmr); + } + kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); + kotmr.it_interval = ktime_to_timespec(ctx->tintv); + spin_unlock_irq(&ctx->wqh.lock); + fput(file); + + return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; } diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 4320782761a..489f26bc26d 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -714,26 +714,30 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode) return 0; } -void ufs_read_inode(struct inode * inode) +struct inode *ufs_iget(struct super_block *sb, unsigned long ino) { - struct ufs_inode_info *ufsi = UFS_I(inode); - struct super_block * sb; - struct ufs_sb_private_info * uspi; + struct ufs_inode_info *ufsi; + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; struct buffer_head * bh; + struct inode *inode; int err; - UFSD("ENTER, ino %lu\n", inode->i_ino); - - sb = inode->i_sb; - uspi = UFS_SB(sb)->s_uspi; + UFSD("ENTER, ino %lu\n", ino); - if (inode->i_ino < UFS_ROOTINO || - inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) { + if (ino < UFS_ROOTINO || ino > (uspi->s_ncg * uspi->s_ipg)) { ufs_warning(sb, "ufs_read_inode", "bad inode number (%lu)\n", - inode->i_ino); - goto bad_inode; + ino); + return ERR_PTR(-EIO); } + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ufsi = UFS_I(inode); + bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino)); if (!bh) { ufs_warning(sb, "ufs_read_inode", "unable to read inode %lu\n", @@ -765,10 +769,12 @@ void ufs_read_inode(struct inode * inode) brelse(bh); UFSD("EXIT\n"); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); + iget_failed(inode); + return ERR_PTR(-EIO); } static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index d8bfbee2fe2..747a4de6c69 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -57,10 +57,10 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru lock_kernel(); ino = ufs_inode_by_name(dir, dentry); if (ino) { - inode = iget(dir->i_sb, ino); - if (!inode) { + inode = ufs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(-EACCES); + return ERR_CAST(inode); } } unlock_kernel(); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 0072cb33ebe..73deff475e6 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -633,6 +633,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) unsigned block_size, super_block_size; unsigned flags; unsigned super_block_offset; + int ret = -EINVAL; uspi = NULL; ubh = NULL; @@ -1065,12 +1066,16 @@ magic_found: uspi->s_maxsymlinklen = fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen); - inode = iget(sb, UFS_ROOTINO); - if (!inode || is_bad_inode(inode)) + inode = ufs_iget(sb, UFS_ROOTINO); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); goto failed; + } sb->s_root = d_alloc_root(inode); - if (!sb->s_root) + if (!sb->s_root) { + ret = -ENOMEM; goto dalloc_failed; + } ufs_setup_cstotal(sb); /* @@ -1092,7 +1097,7 @@ failed: kfree(sbi); sb->s_fs_info = NULL; UFSD("EXIT (FAILED)\n"); - return -EINVAL; + return ret; failed_nomem: UFSD("EXIT (NOMEM)\n"); @@ -1326,7 +1331,6 @@ static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, static const struct super_operations ufs_super_ops = { .alloc_inode = ufs_alloc_inode, .destroy_inode = ufs_destroy_inode, - .read_inode = ufs_read_inode, .write_inode = ufs_write_inode, .delete_inode = ufs_delete_inode, .put_super = ufs_put_super, diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 7faa4cd71a2..fcb9231bb9e 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -106,7 +106,7 @@ extern void ufs_free_inode (struct inode *inode); extern struct inode * ufs_new_inode (struct inode *, int); /* inode.c */ -extern void ufs_read_inode (struct inode *); +extern struct inode *ufs_iget(struct super_block *, unsigned long); extern void ufs_put_inode (struct inode *); extern int ufs_write_inode (struct inode *, int); extern int ufs_sync_inode (struct inode *); diff --git a/fs/utimes.c b/fs/utimes.c index b9912ecbee2..e5588cd8530 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/stat.h> #include <linux/utime.h> +#include <linux/syscalls.h> #include <asm/uaccess.h> #include <asm/unistd.h> diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index c28add2fbe9..cd450bea9f1 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -705,7 +705,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, brelse(sinfo.bh); if (IS_ERR(inode)) { unlock_kernel(); - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); } alias = d_find_alias(inode); if (alias) { diff --git a/fs/xattr.c b/fs/xattr.c index 6645b7313b3..f7c8f87bb39 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -105,6 +105,33 @@ out: EXPORT_SYMBOL_GPL(vfs_setxattr); ssize_t +xattr_getsecurity(struct inode *inode, const char *name, void *value, + size_t size) +{ + void *buffer = NULL; + ssize_t len; + + if (!value || !size) { + len = security_inode_getsecurity(inode, name, &buffer, false); + goto out_noalloc; + } + + len = security_inode_getsecurity(inode, name, &buffer, true); + if (len < 0) + return len; + if (size < len) { + len = -ERANGE; + goto out; + } + memcpy(value, buffer, len); +out: + security_release_secctx(buffer, len); +out_noalloc: + return len; +} +EXPORT_SYMBOL_GPL(xattr_getsecurity); + +ssize_t vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; @@ -118,23 +145,23 @@ vfs_getxattr(struct dentry *dentry, char *name, void *value, size_t size) if (error) return error; - if (inode->i_op->getxattr) - error = inode->i_op->getxattr(dentry, name, value, size); - else - error = -EOPNOTSUPP; - if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; - int ret = security_inode_getsecurity(inode, suffix, value, - size, error); + int ret = xattr_getsecurity(inode, suffix, value, size); /* * Only overwrite the return value if a security module * is actually active. */ - if (ret != -EOPNOTSUPP) - error = ret; + if (ret == -EOPNOTSUPP) + goto nolsm; + return ret; } +nolsm: + if (inode->i_op->getxattr) + error = inode->i_op->getxattr(dentry, name, value, size); + else + error = -EOPNOTSUPP; return error; } diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index ed2b16dff91..e040f1ce1b6 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -92,8 +92,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize, void kmem_free(void *ptr, size_t size) { - if (((unsigned long)ptr < VMALLOC_START) || - ((unsigned long)ptr >= VMALLOC_END)) { + if (!is_vmalloc_addr(ptr)) { kfree(ptr); } else { vfree(ptr); diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index a49dd8d4b06..0382c19d652 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -709,8 +709,7 @@ static inline struct page * mem_to_page( void *addr) { - if (((unsigned long)addr < VMALLOC_START) || - ((unsigned long)addr >= VMALLOC_END)) { + if ((!is_vmalloc_addr(addr))) { return virt_to_page(addr); } else { return vmalloc_to_page(addr); diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index d6a8dddb226..6f614f35f65 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -155,7 +155,7 @@ xfs_iozero( if (status) break; - zero_user_page(page, offset, bytes, KM_USER0); + zero_user(page, offset, bytes); status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, page, fsdata); |