From d37dc42ab6f040b8f0f2962ab219c5b2accf748d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 30 Apr 2009 06:45:08 -0400 Subject: nls: add a nls_nullsize inline It's possible for character sets to require a multi-byte null string terminator. Add a helper function that determines the size of the null terminator at runtime. Signed-off-by: Jeff Layton Acked-by: Suresh Jayaraman Signed-off-by: Steve French --- include/linux/nls.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nls.h b/include/linux/nls.h index 6a882208301..52b1a76c1b4 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h @@ -58,6 +58,25 @@ static inline int nls_strnicmp(struct nls_table *t, const unsigned char *s1, return 0; } +/* + * nls_nullsize - return length of null character for codepage + * @codepage - codepage for which to return length of NULL terminator + * + * Since we can't guarantee that the null terminator will be a particular + * length, we have to check against the codepage. If there's a problem + * determining it, assume a single-byte NULL terminator. + */ +static inline int +nls_nullsize(const struct nls_table *codepage) +{ + int charlen; + char tmp[NLS_MAX_CHARSET_SIZE]; + + charlen = codepage->uni2char(0, tmp, NLS_MAX_CHARSET_SIZE); + + return charlen > 0 ? charlen : 1; +} + #define MODULE_ALIAS_NLS(name) MODULE_ALIAS("nls_" __stringify(name)) #endif /* _LINUX_NLS_H */ -- cgit v1.2.3-70-g09d2 From a7ca7fccacc029958fd09985e7f3529b90ec791d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 5 May 2009 14:31:12 +0200 Subject: netfilter: add missing linux/types.h include to xt_LED.h Pointed out by Dave Miller: CHECK include/linux/netfilter (57 files) /home/davem/src/GIT/net-2.6/usr/include/linux/netfilter/xt_LED.h:6: found __[us]{8,16,32,64} type without #include Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_LED.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_LED.h b/include/linux/netfilter/xt_LED.h index 4c91a0d770d..f5509e7524d 100644 --- a/include/linux/netfilter/xt_LED.h +++ b/include/linux/netfilter/xt_LED.h @@ -1,6 +1,8 @@ #ifndef _XT_LED_H #define _XT_LED_H +#include + struct xt_led_info { char id[27]; /* Unique ID for this trigger in the LED class */ __u8 always_blink; /* Blink even if the LED is already on */ -- cgit v1.2.3-70-g09d2 From 280f37afa2c270ff029cb420b34396aa002909c3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 5 May 2009 17:46:07 +0200 Subject: netfilter: xt_cluster: fix use of cluster match with 32 nodes This patch fixes a problem when you use 32 nodes in the cluster match: % iptables -I PREROUTING -t mangle -i eth0 -m cluster \ --cluster-total-nodes 32 --cluster-local-node 32 \ --cluster-hash-seed 0xdeadbeef -j MARK --set-mark 0xffff iptables: Invalid argument. Run `dmesg' for more information. % dmesg | tail -1 xt_cluster: this node mask cannot be higher than the total number of nodes The problem is related to this checking: if (info->node_mask >= (1 << info->total_nodes)) { printk(KERN_ERR "xt_cluster: this node mask cannot be " "higher than the total number of nodes\n"); return false; } (1 << 32) is 1. Thus, the checking fails. BTW, I said this before but I insist: I have only tested the cluster match with 2 nodes getting ~45% extra performance in an active-active setup. The maximum limit of 32 nodes is still completely arbitrary. I'd really appreciate if people that have more nodes in their setups let me know. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_cluster.h | 2 ++ net/netfilter/xt_cluster.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h index 5e0a0d07b52..886682656f0 100644 --- a/include/linux/netfilter/xt_cluster.h +++ b/include/linux/netfilter/xt_cluster.h @@ -12,4 +12,6 @@ struct xt_cluster_match_info { u_int32_t flags; }; +#define XT_CLUSTER_NODES_MAX 32 + #endif /* _XT_CLUSTER_MATCH_H */ diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 6c4847662b8..69a639f3540 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -135,7 +135,13 @@ static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_cluster_match_info *info = par->matchinfo; - if (info->node_mask >= (1 << info->total_nodes)) { + if (info->total_nodes > XT_CLUSTER_NODES_MAX) { + printk(KERN_ERR "xt_cluster: you have exceeded the maximum " + "number of cluster nodes (%u > %u)\n", + info->total_nodes, XT_CLUSTER_NODES_MAX); + return false; + } + if (info->node_mask >= (1ULL << info->total_nodes)) { printk(KERN_ERR "xt_cluster: this node mask cannot be " "higher than the total number of nodes\n"); return false; -- cgit v1.2.3-70-g09d2 From e67c85626cd02e306da1b4195bfaf68d61050796 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 8 Mar 2009 23:13:32 +0800 Subject: Revert driver core: move platform_data into platform_device This reverts commit 006f4571a15fae3a0575f2a0f9e9b63b3d1012f8: This patch moves platform_data from struct device into struct platform_device, based on the two ideas: 1. Now all platform_driver is registered by platform_driver_register, which makes probe()/release()/... of platform_driver passed parameter of platform_device *, so platform driver can get platform_data from platform_device; 2. Other kind of devices do not need to use platform_data, we can decrease size of device if moving it to platform_device. Taking into consideration of thousands of files to be fixed and they can't be finished in one night(maybe it will take a long time), so we keep platform_data in device to allow two kind of cases coexist until all platform devices pass its platfrom data from platform_device->platform_data. All patches to do this kind of conversion are welcome. As we don't really want to do it, it was a bad idea. Cc: David Brownell Cc: Ming Lei Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 3 --- include/linux/device.h | 9 ++------- include/linux/platform_device.h | 1 - 3 files changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index d1d0ee43192..8b4708e0624 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -217,7 +217,6 @@ int platform_device_add_data(struct platform_device *pdev, const void *data, if (d) { memcpy(d, data, size); pdev->dev.platform_data = d; - pdev->platform_data = d; } return d ? 0 : -ENOMEM; } @@ -247,8 +246,6 @@ int platform_device_add(struct platform_device *pdev) else dev_set_name(&pdev->dev, pdev->name); - pdev->platform_data = pdev->dev.platform_data; - for (i = 0; i < pdev->num_resources; i++) { struct resource *p, *r = &pdev->resource[i]; diff --git a/include/linux/device.h b/include/linux/device.h index 6a69caaac18..5d5c197bad4 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -384,13 +384,8 @@ struct device { struct device_driver *driver; /* which driver has allocated this device */ void *driver_data; /* data private to the driver */ - - void *platform_data; /* We will remove platform_data - field if all platform devices - pass its platform specific data - from platform_device->platform_data, - other kind of devices should not - use platform_data. */ + void *platform_data; /* Platform specific data, device + core doesn't touch it */ struct dev_pm_info power; #ifdef CONFIG_NUMA diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 72736fd8223..b67bb5d7b22 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -20,7 +20,6 @@ struct platform_device { struct device dev; u32 num_resources; struct resource * resource; - void *platform_data; struct platform_device_id *id_entry; }; -- cgit v1.2.3-70-g09d2 From edcc37a0478836b4a51eafb1bcec6a52708f681d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 May 2009 06:00:05 -0400 Subject: Always lookup priv_root on reiserfs mount and keep it ... even if it's a negative dentry. That way we can set ->d_op on root before anyone could race with us. Simplify d_compare(), while we are at it. Signed-off-by: Al Viro --- fs/reiserfs/super.c | 6 ++- fs/reiserfs/xattr.c | 86 ++++++++++++++++++------------------------ include/linux/reiserfs_xattr.h | 1 + 3 files changed, 41 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0ae6486d904..d444fe0013a 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1842,7 +1842,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) goto error; } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; @@ -1855,7 +1856,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) reiserfs_info(s, "using 3.5.x disk format\n"); } - if ((errval = reiserfs_xattr_init(s, s->s_flags))) { + if ((errval = reiserfs_lookup_privroot(s)) || + (errval = reiserfs_xattr_init(s, s->s_flags))) { dput(s->s_root); s->s_root = NULL; goto error; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 31a3dbb120e..2891f789f54 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -903,16 +903,19 @@ static int create_privroot(struct dentry *dentry) WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); err = xattr_mkdir(inode, dentry, 0700); - if (err) { - dput(dentry); - dentry = NULL; + if (err || !dentry->d_inode) { + reiserfs_warning(dentry->d_sb, "jdm-20006", + "xattrs/ACLs enabled and couldn't " + "find/create .reiserfs_priv. " + "Failing mount."); + return -EOPNOTSUPP; } - if (dentry && dentry->d_inode) - reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " - "storage.\n", PRIVROOT_NAME); + dentry->d_inode->i_flags |= S_PRIVATE; + reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " + "storage.\n", PRIVROOT_NAME); - return err; + return 0; } static int xattr_mount_check(struct super_block *s) @@ -944,11 +947,9 @@ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) { struct dentry *priv_root = REISERFS_SB(dentry->d_sb)->priv_root; - if (name->len == priv_root->d_name.len && - name->hash == priv_root->d_name.hash && - !memcmp(name->name, priv_root->d_name.name, name->len)) { + if (container_of(q1, struct dentry, d_name) == priv_root) return -ENOENT; - } else if (q1->len == name->len && + if (q1->len == name->len && !memcmp(q1->name, name->name, name->len)) return 0; return 1; @@ -958,6 +959,27 @@ static const struct dentry_operations xattr_lookup_poison_ops = { .d_compare = xattr_lookup_poison, }; +int reiserfs_lookup_privroot(struct super_block *s) +{ + struct dentry *dentry; + int err = 0; + + /* If we don't have the privroot located yet - go find it */ + mutex_lock(&s->s_root->d_inode->i_mutex); + dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, + strlen(PRIVROOT_NAME)); + if (!IS_ERR(dentry)) { + REISERFS_SB(s)->priv_root = dentry; + s->s_root->d_op = &xattr_lookup_poison_ops; + if (dentry->d_inode) + dentry->d_inode->i_flags |= S_PRIVATE; + } else + err = PTR_ERR(dentry); + mutex_unlock(&s->s_root->d_inode->i_mutex); + + return err; +} + /* We need to take a copy of the mount flags since things like * MS_RDONLY don't get set until *after* we're called. * mount_flags != mount_options */ @@ -969,48 +991,12 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) err = xattr_mount_check(s); if (err) goto error; -#endif - /* If we don't have the privroot located yet - go find it */ - if (!REISERFS_SB(s)->priv_root) { - struct dentry *dentry; - mutex_lock_nested(&s->s_root->d_inode->i_mutex, I_MUTEX_CHILD); - dentry = lookup_one_len(PRIVROOT_NAME, s->s_root, - strlen(PRIVROOT_NAME)); - if (!IS_ERR(dentry)) { -#ifdef CONFIG_REISERFS_FS_XATTR - if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) - err = create_privroot(dentry); -#endif - if (!dentry->d_inode) { - dput(dentry); - dentry = NULL; - } - } else - err = PTR_ERR(dentry); + if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + mutex_lock(&s->s_root->d_inode->i_mutex); + err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); - - if (!err && dentry) { - s->s_root->d_op = &xattr_lookup_poison_ops; - dentry->d_inode->i_flags |= S_PRIVATE; - REISERFS_SB(s)->priv_root = dentry; -#ifdef CONFIG_REISERFS_FS_XATTR - /* xattrs are unavailable */ - } else if (!(mount_flags & MS_RDONLY)) { - /* If we're read-only it just means that the dir - * hasn't been created. Not an error -- just no - * xattrs on the fs. We'll check again if we - * go read-write */ - reiserfs_warning(s, "jdm-20006", - "xattrs/ACLs enabled and couldn't " - "find/create .reiserfs_priv. " - "Failing mount."); - err = -EOPNOTSUPP; -#endif - } } - -#ifdef CONFIG_REISERFS_FS_XATTR if (!err) s->s_xattr = reiserfs_xattr_handlers; diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index dcae01e63e4..fea1a8e65be 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -38,6 +38,7 @@ struct nameidata; int reiserfs_xattr_register_handlers(void) __init; void reiserfs_xattr_unregister_handlers(void); int reiserfs_xattr_init(struct super_block *sb, int mount_flags); +int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); -- cgit v1.2.3-70-g09d2 From ab17c4f02156c4f75d7fa43a5aa2a7f942d47201 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:15 -0400 Subject: reiserfs: fixup xattr_root caching The xattr_root caching was broken from my previous patch set. It wouldn't cause corruption, but could cause decreased performance due to allocating a larger chunk of the journal (~ 27 blocks) than it would actually use. This patch loads the xattr root dentry at xattr initialization and creates it on-demand. Since we're using the cached dentry, there's no point in keeping lookup_or_create_dir around, so that's removed. Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/xattr.c | 73 ++++++++++++++++++++++++++---------------- include/linux/reiserfs_fs_sb.h | 2 +- include/linux/reiserfs_xattr.h | 2 +- 3 files changed, 48 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 2891f789f54..c77984473db 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -113,36 +113,28 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) #define xattr_may_create(flags) (!flags || flags & XATTR_CREATE) -/* Returns and possibly creates the xattr dir. */ -static struct dentry *lookup_or_create_dir(struct dentry *parent, - const char *name, int flags) +static struct dentry *open_xa_root(struct super_block *sb, int flags) { - struct dentry *dentry; - BUG_ON(!parent); + struct dentry *privroot = REISERFS_SB(sb)->priv_root; + struct dentry *xaroot; + if (!privroot->d_inode) + return ERR_PTR(-ENODATA); - mutex_lock_nested(&parent->d_inode->i_mutex, I_MUTEX_XATTR); - dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(dentry) && !dentry->d_inode) { - int err = -ENODATA; + mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR); + xaroot = dget(REISERFS_SB(sb)->xattr_root); + if (!xaroot->d_inode) { + int err = -ENODATA; if (xattr_may_create(flags)) - err = xattr_mkdir(parent->d_inode, dentry, 0700); - + err = xattr_mkdir(privroot->d_inode, xaroot, 0700); if (err) { - dput(dentry); - dentry = ERR_PTR(err); + dput(xaroot); + xaroot = ERR_PTR(err); } } - mutex_unlock(&parent->d_inode->i_mutex); - return dentry; -} -static struct dentry *open_xa_root(struct super_block *sb, int flags) -{ - struct dentry *privroot = REISERFS_SB(sb)->priv_root; - if (!privroot) - return ERR_PTR(-ENODATA); - return lookup_or_create_dir(privroot, XAROOT_NAME, flags); + mutex_unlock(&privroot->d_inode->i_mutex); + return xaroot; } static struct dentry *open_xa_dir(const struct inode *inode, int flags) @@ -158,10 +150,22 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) le32_to_cpu(INODE_PKEY(inode)->k_objectid), inode->i_generation); - xadir = lookup_or_create_dir(xaroot, namebuf, flags); + mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR); + + xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf)); + if (!IS_ERR(xadir) && !xadir->d_inode) { + int err = -ENODATA; + if (xattr_may_create(flags)) + err = xattr_mkdir(xaroot->d_inode, xadir, 0700); + if (err) { + dput(xadir); + xadir = ERR_PTR(err); + } + } + + mutex_unlock(&xaroot->d_inode->i_mutex); dput(xaroot); return xadir; - } /* The following are side effects of other operations that aren't explicitly @@ -986,19 +990,33 @@ int reiserfs_lookup_privroot(struct super_block *s) int reiserfs_xattr_init(struct super_block *s, int mount_flags) { int err = 0; + struct dentry *privroot = REISERFS_SB(s)->priv_root; #ifdef CONFIG_REISERFS_FS_XATTR err = xattr_mount_check(s); if (err) goto error; - if (!REISERFS_SB(s)->priv_root->d_inode && !(mount_flags & MS_RDONLY)) { + if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) { mutex_lock(&s->s_root->d_inode->i_mutex); err = create_privroot(REISERFS_SB(s)->priv_root); mutex_unlock(&s->s_root->d_inode->i_mutex); } - if (!err) + + if (privroot->d_inode) { s->s_xattr = reiserfs_xattr_handlers; + mutex_lock(&privroot->d_inode->i_mutex); + if (!REISERFS_SB(s)->xattr_root) { + struct dentry *dentry; + dentry = lookup_one_len(XAROOT_NAME, privroot, + strlen(XAROOT_NAME)); + if (!IS_ERR(dentry)) + REISERFS_SB(s)->xattr_root = dentry; + else + err = PTR_ERR(dentry); + } + mutex_unlock(&privroot->d_inode->i_mutex); + } error: if (err) { @@ -1008,11 +1026,12 @@ error: #endif /* The super_block MS_POSIXACL must mirror the (no)acl mount option. */ - s->s_flags = s->s_flags & ~MS_POSIXACL; #ifdef CONFIG_REISERFS_FS_POSIX_ACL if (reiserfs_posixacl(s)) s->s_flags |= MS_POSIXACL; + else #endif + s->s_flags &= ~MS_POSIXACL; return err; } diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 6b361d23a49..8651640868a 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -402,7 +402,7 @@ struct reiserfs_sb_info { int reserved_blocks; /* amount of blocks reserved for further allocations */ spinlock_t bitmap_lock; /* this lock on now only used to protect reserved_blocks variable */ struct dentry *priv_root; /* root of /.reiserfs_priv */ - struct dentry *xattr_root; /* root of /.reiserfs_priv/.xa */ + struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ int j_errno; #ifdef CONFIG_QUOTA char *s_qf_names[MAXQUOTAS]; diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index fea1a8e65be..cdedc01036e 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -98,7 +98,7 @@ static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode) if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) { nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); - if (REISERFS_SB(inode->i_sb)->xattr_root == NULL) + if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode) nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb); } -- cgit v1.2.3-70-g09d2 From 677c9b2e393a0cd203bd54e9c18b012b2c73305a Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 5 May 2009 15:30:17 -0400 Subject: reiserfs: remove privroot hiding in lookup With Al Viro's patch to move privroot lookup to fs mount, there's no need to have special code to hide the privroot in reiserfs_lookup. I've also cleaned up the privroot hiding in reiserfs_readdir_dentry and removed the last user of reiserfs_xattrs(). Signed-off-by: Jeff Mahoney Signed-off-by: Al Viro --- fs/reiserfs/dir.c | 24 +++++++++++++----------- fs/reiserfs/namei.c | 17 ++--------------- fs/reiserfs/xattr.c | 2 +- include/linux/reiserfs_fs_sb.h | 1 - 4 files changed, 16 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 67a80d7e59e..45ee3d357c7 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -41,6 +41,18 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, #define store_ih(where,what) copy_item_head (where, what) +static inline bool is_privroot_deh(struct dentry *dir, + struct reiserfs_de_head *deh) +{ + int ret = 0; +#ifdef CONFIG_REISERFS_FS_XATTR + struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; + ret = (dir == dir->d_parent && privroot->d_inode && + deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); +#endif + return ret; +} + int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, filldir_t filldir, loff_t *pos) { @@ -138,18 +150,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, } /* Ignore the .reiserfs_priv entry */ - if (reiserfs_xattrs(inode->i_sb) && - !old_format_only(inode->i_sb) && - dentry == inode->i_sb->s_root && - REISERFS_SB(inode->i_sb)->priv_root && - REISERFS_SB(inode->i_sb)->priv_root->d_inode - && deh_objectid(deh) == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(inode->i_sb)-> - priv_root->d_inode)-> - k_objectid)) { + if (is_privroot_deh(dentry, deh)) continue; - } d_off = deh_offset(deh); *pos = d_off; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index efd4d720718..27157912863 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -338,21 +338,8 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, &path_to_entry, &de); pathrelse(&path_to_entry); if (retval == NAME_FOUND) { - /* Hide the .reiserfs_priv directory */ - if (reiserfs_xattrs(dir->i_sb) && - !old_format_only(dir->i_sb) && - REISERFS_SB(dir->i_sb)->priv_root && - REISERFS_SB(dir->i_sb)->priv_root->d_inode && - de.de_objectid == - le32_to_cpu(INODE_PKEY - (REISERFS_SB(dir->i_sb)->priv_root->d_inode)-> - k_objectid)) { - reiserfs_write_unlock(dir->i_sb); - return ERR_PTR(-EACCES); - } - - inode = - reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); + inode = reiserfs_iget(dir->i_sb, + (struct cpu_key *)&(de.de_dir_id)); if (!inode || IS_ERR(inode)) { reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index c77984473db..2237e10c7c7 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -841,7 +841,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) if (!dentry->d_inode) return -EINVAL; - if (!reiserfs_xattrs(dentry->d_sb) || + if (!dentry->d_sb->s_xattr || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1) return -EOPNOTSUPP; diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 8651640868a..6473650c28f 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -488,7 +488,6 @@ enum reiserfs_mount_options { #define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG)) #define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) -#define reiserfs_xattrs(s) ((s)->s_xattr != NULL) #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) -- cgit v1.2.3-70-g09d2 From 74dbbdd7fdc11763f4698d2f3e684cf4446951e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 6 May 2009 01:07:50 -0400 Subject: New helper: deactivate_locked_super() Does equivalent of up_write(&s->s_umount); deactivate_super(s); However, it does not does not unlock it until it's all over. As the result, it's safe to use to dispose of new superblock on ->get_sb() failure exits - nobody will see the sucker until it's all over. Equivalent using up_write/deactivate_super is safe for that purpose if superblock is either safe to use or has NULL ->s_root when we unlock. Normally filesystems take the required precautions, but a) we do have bugs in that area in some of them. b) up_write/deactivate_super sequence is extremely common, so the helper makes sense anyway. Signed-off-by: Al Viro --- fs/super.c | 46 ++++++++++++++++++++++++++++++++++------------ include/linux/fs.h | 1 + 2 files changed, 35 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/super.c b/fs/super.c index 786fe7d7279..a9dc4c33ef4 100644 --- a/fs/super.c +++ b/fs/super.c @@ -207,6 +207,34 @@ void deactivate_super(struct super_block *s) EXPORT_SYMBOL(deactivate_super); +/** + * deactivate_locked_super - drop an active reference to superblock + * @s: superblock to deactivate + * + * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that + * it does not unlock it until it's all over. As the result, it's safe to + * use to dispose of new superblock on ->get_sb() failure exits - nobody + * will see the sucker until it's all over. Equivalent using up_write + + * deactivate_super is safe for that purpose only if superblock is either + * safe to use or has NULL ->s_root when we unlock. + */ +void deactivate_locked_super(struct super_block *s) +{ + struct file_system_type *fs = s->s_type; + if (atomic_dec_and_lock(&s->s_active, &sb_lock)) { + s->s_count -= S_BIAS-1; + spin_unlock(&sb_lock); + vfs_dq_off(s, 0); + fs->kill_sb(s); + put_filesystem(fs); + put_super(s); + } else { + up_write(&s->s_umount); + } +} + +EXPORT_SYMBOL(deactivate_locked_super); + /** * grab_super - acquire an active reference * @s: reference we are trying to make active @@ -797,8 +825,7 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data, sb->s_flags = flags; err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) { - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; } @@ -854,8 +881,7 @@ int get_sb_bdev(struct file_system_type *fs_type, if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); error = -EBUSY; goto error_bdev; } @@ -870,8 +896,7 @@ int get_sb_bdev(struct file_system_type *fs_type, sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); goto error; } @@ -921,8 +946,7 @@ int get_sb_nodev(struct file_system_type *fs_type, error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -952,8 +976,7 @@ int get_sb_single(struct file_system_type *fs_type, s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { - up_write(&s->s_umount); - deactivate_super(s); + deactivate_locked_super(s); return error; } s->s_flags |= MS_ACTIVE; @@ -1006,8 +1029,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void return mnt; out_sb: dput(mnt->mnt_root); - up_write(&mnt->mnt_sb->s_umount); - deactivate_super(mnt->mnt_sb); + deactivate_locked_super(mnt->mnt_sb); out_free_secdata: free_secdata(secdata); out_mnt: diff --git a/include/linux/fs.h b/include/linux/fs.h index 5bed436f435..11484d08042 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1775,6 +1775,7 @@ void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); +void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), -- cgit v1.2.3-70-g09d2 From db6c1fbb92eeb4cb52c6133e0c533602f49fc4bd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2009 18:07:08 +0200 Subject: romfs: cleanup romfs_fs.h There's no kernel-only content in it anymore, so move it to header-y and remove the superflous #ifdef __KERNEL__. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/Kbuild | 2 +- include/linux/romfs_fs.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index ca9b9b9bd33..3f0eaa397ef 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -138,6 +138,7 @@ header-y += qnxtypes.h header-y += radeonfb.h header-y += raw.h header-y += resource.h +header-y += romfs_fs.h header-y += rose.h header-y += serial_reg.h header-y += smbno.h @@ -314,7 +315,6 @@ unifdef-y += irqnr.h unifdef-y += reboot.h unifdef-y += reiserfs_fs.h unifdef-y += reiserfs_xattr.h -unifdef-y += romfs_fs.h unifdef-y += route.h unifdef-y += rtc.h unifdef-y += rtnetlink.h diff --git a/include/linux/romfs_fs.h b/include/linux/romfs_fs.h index e20bbf9eb36..c490fbc43fe 100644 --- a/include/linux/romfs_fs.h +++ b/include/linux/romfs_fs.h @@ -53,9 +53,4 @@ struct romfs_inode { #define ROMFH_PAD (ROMFH_SIZE-1) #define ROMFH_MASK (~ROMFH_PAD) -#ifdef __KERNEL__ - -/* Not much now */ - -#endif /* __KERNEL__ */ #endif -- cgit v1.2.3-70-g09d2 From 6e8341a11eb21826b7192d0bb88cb5b44900a9af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2009 11:16:22 -0400 Subject: Switch open_exec() and sys_uselib() to do_open_filp() ... and make path_lookup_open() static Signed-off-by: Al Viro --- fs/exec.c | 72 ++++++++++++++++++--------------------------------- fs/namei.c | 13 +++++----- fs/open.c | 2 +- include/linux/fs.h | 2 +- include/linux/namei.h | 1 - 5 files changed, 34 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 41ae8e0de72..895823d0149 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -105,36 +105,28 @@ static inline void put_binfmt(struct linux_binfmt * fmt) SYSCALL_DEFINE1(uselib, const char __user *, library) { struct file *file; - struct nameidata nd; char *tmp = getname(library); int error = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - error = path_lookup_open(AT_FDCWD, tmp, - LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - putname(tmp); - } - if (error) + if (IS_ERR(tmp)) + goto out; + + file = do_filp_open(AT_FDCWD, tmp, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_READ | MAY_EXEC | MAY_OPEN); + putname(tmp); + error = PTR_ERR(file); + if (IS_ERR(file)) goto out; error = -EINVAL; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) goto exit; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto exit; - - error = may_open(&nd.path, MAY_READ | MAY_EXEC | MAY_OPEN, 0); - if (error) + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; - fsnotify_open(file->f_path.dentry); error = -ENOEXEC; @@ -156,13 +148,10 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) } read_unlock(&binfmt_lock); } +exit: fput(file); out: return error; -exit: - release_open_intent(&nd); - path_put(&nd.path); - goto out; } #ifdef CONFIG_MMU @@ -657,44 +646,33 @@ EXPORT_SYMBOL(setup_arg_pages); struct file *open_exec(const char *name) { - struct nameidata nd; struct file *file; int err; - err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, - FMODE_READ|FMODE_EXEC); - if (err) + file = do_filp_open(AT_FDCWD, name, + O_LARGEFILE | O_RDONLY | FMODE_EXEC, 0, + MAY_EXEC | MAY_OPEN); + if (IS_ERR(file)) goto out; err = -EACCES; - if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) - goto out_path_put; - - if (nd.path.mnt->mnt_flags & MNT_NOEXEC) - goto out_path_put; - - err = may_open(&nd.path, MAY_EXEC | MAY_OPEN, 0); - if (err) - goto out_path_put; + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) + goto exit; - file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); - if (IS_ERR(file)) - return file; + if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; fsnotify_open(file->f_path.dentry); err = deny_write_access(file); - if (err) { - fput(file); - goto out; - } + if (err) + goto exit; +out: return file; - out_path_put: - release_open_intent(&nd); - path_put(&nd.path); - out: +exit: + fput(file); return ERR_PTR(err); } EXPORT_SYMBOL(open_exec); diff --git a/fs/namei.c b/fs/namei.c index 78f253cd2d4..967c3db9272 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1130,8 +1130,8 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, * @nd: pointer to nameidata * @open_flags: open intent flags */ -int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags) +static int path_lookup_open(int dfd, const char *name, + unsigned int lookup_flags, struct nameidata *nd, int open_flags) { struct file *filp = get_empty_filp(); int err; @@ -1637,18 +1637,19 @@ static int open_will_write_to_fs(int flag, struct inode *inode) * open_to_namei_flags() for more details. */ struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode) + int open_flag, int mode, int acc_mode) { struct file *filp; struct nameidata nd; - int acc_mode, error; + int error; struct path path; struct dentry *dir; int count = 0; int will_write; int flag = open_to_namei_flags(open_flag); - acc_mode = MAY_OPEN | ACC_MODE(flag); + if (!acc_mode) + acc_mode = MAY_OPEN | ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ if (flag & O_TRUNC) @@ -1869,7 +1870,7 @@ do_link: */ struct file *filp_open(const char *filename, int flags, int mode) { - return do_filp_open(AT_FDCWD, filename, flags, mode); + return do_filp_open(AT_FDCWD, filename, flags, mode, 0); } EXPORT_SYMBOL(filp_open); diff --git a/fs/open.c b/fs/open.c index 377eb25b6ab..bdfbf03615a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1033,7 +1033,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, flags, mode); + struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); diff --git a/include/linux/fs.h b/include/linux/fs.h index 11484d08042..ed788426f46 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2118,7 +2118,7 @@ extern struct file *create_write_pipe(int flags); extern void free_write_pipe(struct file *); extern struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode); + int open_flag, int mode, int acc_mode); extern int may_open(struct path *, int, int); extern int kernel_read(struct file *, unsigned long, char *, unsigned long); diff --git a/include/linux/namei.h b/include/linux/namei.h index fc2e0357987..518098fe63a 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -69,7 +69,6 @@ extern int path_lookup(const char *, unsigned, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); -extern int path_lookup_open(int dfd, const char *name, unsigned lookup_flags, struct nameidata *, int open_flags); extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, int (*open)(struct inode *, struct file *)); extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); -- cgit v1.2.3-70-g09d2 From 2a32cebd6cbcc43996c3e2d114fa32ba1e71192a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 May 2009 16:05:57 -0400 Subject: Fix races around the access to ->s_options Put generic_show_options read access to s_options under rcu_read_lock, split save_mount_options() into "we are setting it the first time" (uses in foo_fill_super()) and "we are relacing and freeing the old one", synchronize_rcu() before kfree() in the latter. Signed-off-by: Al Viro --- drivers/isdn/capi/capifs.c | 3 +-- fs/affs/super.c | 3 +-- fs/afs/super.c | 4 ++-- fs/hpfs/super.c | 3 +-- fs/namespace.c | 21 ++++++++++++++++++--- fs/reiserfs/super.c | 3 +-- include/linux/fs.h | 1 + 7 files changed, 25 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index b129409925a..8f9f3b5a3e8 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -75,8 +75,7 @@ static int capifs_remount(struct super_block *s, int *flags, char *data) } } - kfree(s->s_options); - s->s_options = new_opt; + replace_mount_options(s, new_opt); config.setuid = setuid; config.setgid = setgid; diff --git a/fs/affs/super.c b/fs/affs/super.c index 5ce695e707f..63f5183f263 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -507,8 +507,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) kfree(new_opts); return -EINVAL; } - kfree(sb->s_options); - sb->s_options = new_opts; + replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; sbi->s_mode = mode; diff --git a/fs/afs/super.c b/fs/afs/super.c index 2753f16dd31..76828e5f8a3 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -408,17 +408,17 @@ static int afs_get_sb(struct file_system_type *fs_type, deactivate_locked_super(sb); goto error; } - sb->s_options = new_opts; + save_mount_options(sb, new_opts); sb->s_flags |= MS_ACTIVE; } else { _debug("reuse"); - kfree(new_opts); ASSERTCMP(sb->s_flags, &, MS_ACTIVE); } simple_set_mnt(mnt, sb); afs_put_volume(params.volume); afs_put_cell(params.cell); + kfree(new_opts); _leave(" = 0 [%p]", sb); return 0; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index fecf402d7b8..fc77965be84 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -423,8 +423,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) if (!(*flags & MS_RDONLY)) mark_dirty(s); - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; diff --git a/fs/namespace.c b/fs/namespace.c index 0d2003fb437..134d494158d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -695,12 +695,16 @@ static inline void mangle(struct seq_file *m, const char *s) */ int generic_show_options(struct seq_file *m, struct vfsmount *mnt) { - const char *options = mnt->mnt_sb->s_options; + const char *options; + + rcu_read_lock(); + options = rcu_dereference(mnt->mnt_sb->s_options); if (options != NULL && options[0]) { seq_putc(m, ','); mangle(m, options); } + rcu_read_unlock(); return 0; } @@ -721,11 +725,22 @@ EXPORT_SYMBOL(generic_show_options); */ void save_mount_options(struct super_block *sb, char *options) { - kfree(sb->s_options); - sb->s_options = kstrdup(options, GFP_KERNEL); + BUG_ON(sb->s_options); + rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL)); } EXPORT_SYMBOL(save_mount_options); +void replace_mount_options(struct super_block *sb, char *options) +{ + char *old = sb->s_options; + rcu_assign_pointer(sb->s_options, options); + if (old) { + synchronize_rcu(); + kfree(old); + } +} +EXPORT_SYMBOL(replace_mount_options); + #ifdef CONFIG_PROC_FS /* iterator */ static void *m_start(struct seq_file *m, loff_t *pos) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index d444fe0013a..1215a4f50cd 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1316,8 +1316,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) } out_ok: - kfree(s->s_options); - s->s_options = new_opts; + replace_mount_options(s, new_opts); return 0; out_err: diff --git a/include/linux/fs.h b/include/linux/fs.h index ed788426f46..3b534e527e0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2368,6 +2368,7 @@ extern void file_update_time(struct file *file); extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); extern void save_mount_options(struct super_block *sb, char *options); +extern void replace_mount_options(struct super_block *sb, char *options); static inline ino_t parent_ino(struct dentry *dentry) { -- cgit v1.2.3-70-g09d2 From ecf4667d30dd63fa130e22f8f2da3e6ce003358b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 May 2009 13:19:37 -0700 Subject: syscalls.h add the missing sys_pipe2 declaration In order to build the generic syscall table, we need a declaration for every system call. sys_pipe2 was added without a proper declaration, so add this to syscalls.h now. Signed-off-by: Arnd Bergmann Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 40617c1d897..30520844b8d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -433,6 +433,7 @@ asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg); #endif +asmlinkage long sys_pipe2(int __user *fildes, int flags); asmlinkage long sys_dup(unsigned int fildes); asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags); -- cgit v1.2.3-70-g09d2 From 4f005dbe5584fe54c9f6d6d4f0acd3fb29be84da Mon Sep 17 00:00:00 2001 From: Maciej Sosnowski Date: Thu, 23 Apr 2009 12:31:51 +0200 Subject: ioatdma: fix "ioatdma frees DMA memory with wrong function" as reported by Alexander Beregalov ioatdma 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong function [device address=0x000000007f76f800] [size=2000 bytes] [map ped as single] [unmapped as page] The ioatdma driver was unmapping all regions (either allocated as page or single) using unmap_page. This patch lets dma driver recognize if unmap_single or unmap_page should be used. It introduces two new dma control flags: DMA_COMPL_SRC_UNMAP_SINGLE and DMA_COMPL_DEST_UNMAP_SINGLE. They should be set to indicate dma driver to do dma-unmapping as single (first one for the source, tha latter for the destination). If respective flag is not set, the driver assumes dma-unmapping as page. Signed-off-by: Maciej Sosnowski Reported-by: Alexander Beregalov Tested-by: Alexander Beregalov Signed-off-by: Dan Williams --- drivers/dma/dmaengine.c | 17 +++++++++++------ drivers/dma/ioat_dma.c | 45 ++++++++++++++++++++++++++++----------------- include/linux/dmaengine.h | 6 ++++++ 3 files changed, 45 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 92438e9dacc..5a87384ea4f 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -804,11 +804,14 @@ dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, dma_addr_t dma_dest, dma_src; dma_cookie_t cookie; int cpu; + unsigned long flags; dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE); dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE); - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, - DMA_CTRL_ACK); + flags = DMA_CTRL_ACK | + DMA_COMPL_SRC_UNMAP_SINGLE | + DMA_COMPL_DEST_UNMAP_SINGLE; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); if (!tx) { dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); @@ -850,11 +853,12 @@ dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, dma_addr_t dma_dest, dma_src; dma_cookie_t cookie; int cpu; + unsigned long flags; dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE); dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE); - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, - DMA_CTRL_ACK); + flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); if (!tx) { dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); @@ -898,12 +902,13 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, dma_addr_t dma_dest, dma_src; dma_cookie_t cookie; int cpu; + unsigned long flags; dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE); dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE); - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, - DMA_CTRL_ACK); + flags = DMA_CTRL_ACK; + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); if (!tx) { dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE); diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index e4fc33c1c32..1955ee8d6d2 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -1063,22 +1063,31 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) static void ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) { - /* - * yes we are unmapping both _page and _single - * alloc'd regions with unmap_page. Is this - * *really* that bad? - */ - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) + pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + else + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + } + + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) + pci_unmap_single(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + else + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); + } } /** @@ -1363,6 +1372,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device) int err = 0; struct completion cmp; unsigned long tmo; + unsigned long flags; src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); if (!src) @@ -1392,8 +1402,9 @@ static int ioat_dma_self_test(struct ioatdma_device *device) DMA_TO_DEVICE); dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); + flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE; tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, - IOAT_TEST_SIZE, 0); + IOAT_TEST_SIZE, flags); if (!tx) { dev_err(&device->pdev->dev, "Self-test prep failed, disabling\n"); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 2e2aa3df170..ffefba81c81 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -78,12 +78,18 @@ enum dma_transaction_type { * dependency chains * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) + * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single + * (if not set, do the source dma-unmapping as page) + * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single + * (if not set, do the destination dma-unmapping as page) */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), + DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), + DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), }; /** -- cgit v1.2.3-70-g09d2 From cd17cbfda004fe5f406c01b318c6378d9895896f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 May 2009 11:32:24 +0200 Subject: Revert "mm: add /proc controls for pdflush threads" This reverts commit fafd688e4c0c34da0f3de909881117d374e4c7af. Work is progressing to switch away from pdflush as the process backing for flushing out dirty data. So it seems pointless to add more knobs to control pdflush threads. The original author of the patch did not have any specific use cases for adding the knobs, so we can easily revert this before 2.6.30 to avoid having to maintain this API forever. Signed-off-by: Jens Axboe --- Documentation/sysctl/vm.txt | 28 ---------------------------- include/linux/writeback.h | 2 -- kernel/sysctl.c | 23 ----------------------- mm/pdflush.c | 31 ++++++++++++------------------- 4 files changed, 12 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index b716d33912d..c302ddf629a 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -39,8 +39,6 @@ Currently, these files are in /proc/sys/vm: - nr_hugepages - nr_overcommit_hugepages - nr_pdflush_threads -- nr_pdflush_threads_min -- nr_pdflush_threads_max - nr_trim_pages (only if CONFIG_MMU=n) - numa_zonelist_order - oom_dump_tasks @@ -469,32 +467,6 @@ The default value is 0. ============================================================== -nr_pdflush_threads_min - -This value controls the minimum number of pdflush threads. - -At boot time, the kernel will create and maintain 'nr_pdflush_threads_min' -threads for the kernel's lifetime. - -The default value is 2. The minimum value you can specify is 1, and -the maximum value is the current setting of 'nr_pdflush_threads_max'. - -See 'nr_pdflush_threads_max' below for more information. - -============================================================== - -nr_pdflush_threads_max - -This value controls the maximum number of pdflush threads that can be -created. The pdflush algorithm will create a new pdflush thread (up to -this maximum) if no pdflush threads have been available for >= 1 second. - -The default value is 8. The minimum value you can specify is the -current value of 'nr_pdflush_threads_min' and the -maximum is 1000. - -============================================================== - overcommit_memory: This value contains a flag that enables memory overcommitment. diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9c1ed1fb6dd..93445477f86 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -168,8 +168,6 @@ void writeback_set_ratelimit(void); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl read-only. */ -extern int nr_pdflush_threads_max; /* Global so it can be exported to sysctl */ -extern int nr_pdflush_threads_min; /* Global so it can be exported to sysctl */ #endif /* WRITEBACK_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ea78fa101ad..b2970d56fb7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -101,7 +101,6 @@ static int __maybe_unused one = 1; static int __maybe_unused two = 2; static unsigned long one_ul = 1; static int one_hundred = 100; -static int one_thousand = 1000; /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; @@ -1033,28 +1032,6 @@ static struct ctl_table vm_table[] = { .mode = 0444 /* read-only*/, .proc_handler = &proc_dointvec, }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_min", - .data = &nr_pdflush_threads_min, - .maxlen = sizeof nr_pdflush_threads_min, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &nr_pdflush_threads_max, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "nr_pdflush_threads_max", - .data = &nr_pdflush_threads_max, - .maxlen = sizeof nr_pdflush_threads_max, - .mode = 0644 /* read-write */, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &nr_pdflush_threads_min, - .extra2 = &one_thousand, - }, { .ctl_name = VM_SWAPPINESS, .procname = "swappiness", diff --git a/mm/pdflush.c b/mm/pdflush.c index f2caf96993f..235ac440c44 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -57,14 +57,6 @@ static DEFINE_SPINLOCK(pdflush_lock); */ int nr_pdflush_threads = 0; -/* - * The max/min number of pdflush threads. R/W by sysctl at - * /proc/sys/vm/nr_pdflush_threads_max/min - */ -int nr_pdflush_threads_max __read_mostly = MAX_PDFLUSH_THREADS; -int nr_pdflush_threads_min __read_mostly = MIN_PDFLUSH_THREADS; - - /* * The time at which the pdflush thread pool last went empty */ @@ -76,7 +68,7 @@ static unsigned long last_empty_jifs; * Thread pool management algorithm: * * - The minimum and maximum number of pdflush instances are bound - * by nr_pdflush_threads_min and nr_pdflush_threads_max. + * by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS. * * - If there have been no idle pdflush instances for 1 second, create * a new one. @@ -142,13 +134,14 @@ static int __pdflush(struct pdflush_work *my_work) * To throttle creation, we reset last_empty_jifs. */ if (time_after(jiffies, last_empty_jifs + 1 * HZ)) { - if (list_empty(&pdflush_list) && - nr_pdflush_threads < nr_pdflush_threads_max) { - last_empty_jifs = jiffies; - nr_pdflush_threads++; - spin_unlock_irq(&pdflush_lock); - start_one_pdflush_thread(); - spin_lock_irq(&pdflush_lock); + if (list_empty(&pdflush_list)) { + if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) { + last_empty_jifs = jiffies; + nr_pdflush_threads++; + spin_unlock_irq(&pdflush_lock); + start_one_pdflush_thread(); + spin_lock_irq(&pdflush_lock); + } } } @@ -160,7 +153,7 @@ static int __pdflush(struct pdflush_work *my_work) */ if (list_empty(&pdflush_list)) continue; - if (nr_pdflush_threads <= nr_pdflush_threads_min) + if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS) continue; pdf = list_entry(pdflush_list.prev, struct pdflush_work, list); if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) { @@ -266,9 +259,9 @@ static int __init pdflush_init(void) * Pre-set nr_pdflush_threads... If we fail to create, * the count will be decremented. */ - nr_pdflush_threads = nr_pdflush_threads_min; + nr_pdflush_threads = MIN_PDFLUSH_THREADS; - for (i = 0; i < nr_pdflush_threads_min; i++) + for (i = 0; i < MIN_PDFLUSH_THREADS; i++) start_one_pdflush_thread(); return 0; } -- cgit v1.2.3-70-g09d2 From 4bca3286433585b5f1c3e7d8ac37a2f4b3def9ca Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 15 May 2009 00:40:35 -0400 Subject: libata: Media rotation rate and form factor heuristics This patch provides new heuristics for parsing both the form factor and media rotation rate ATA IDENFITY words. The reported ATA version must be 7 or greater and the device must return values defined as valid in the standard. Only then are the characteristics reported to SCSI via the VPD B1 page. This seems like a reasonable compromise to me considering that we have been shipping several kernel releases that key off the rotation rate bit without any version checking whatsoever. With no complaints so far. Signed-off-by: Martin K. Petersen Signed-off-by: Jeff Garzik --- drivers/ata/libata-scsi.c | 11 ++++++----- include/linux/ata.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d1718a1f278..342316064e9 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2142,13 +2142,14 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf) static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf) { + int form_factor = ata_id_form_factor(args->id); + int media_rotation_rate = ata_id_rotation_rate(args->id); + rbuf[1] = 0xb1; rbuf[3] = 0x3c; - if (ata_id_major_version(args->id) > 7) { - rbuf[4] = args->id[217] >> 8; - rbuf[5] = args->id[217]; - rbuf[7] = args->id[168] & 0xf; - } + rbuf[4] = media_rotation_rate >> 8; + rbuf[5] = media_rotation_rate; + rbuf[7] = form_factor; return 0; } diff --git a/include/linux/ata.h b/include/linux/ata.h index cb79b7a208e..915da43edee 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -730,6 +730,34 @@ static inline int ata_id_has_unload(const u16 *id) return 0; } +static inline int ata_id_form_factor(const u16 *id) +{ + u16 val = id[168]; + + if (ata_id_major_version(id) < 7 || val == 0 || val == 0xffff) + return 0; + + val &= 0xf; + + if (val > 5) + return 0; + + return val; +} + +static inline int ata_id_rotation_rate(const u16 *id) +{ + u16 val = id[217]; + + if (ata_id_major_version(id) < 7 || val == 0 || val == 0xffff) + return 0; + + if (val > 1 && val < 0x401) + return 0; + + return val; +} + static inline int ata_id_has_trim(const u16 *id) { if (ata_id_major_version(id) >= 7 && -- cgit v1.2.3-70-g09d2 From b83674c0da6558e357c6b482ccf299eeea77d8ef Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sun, 17 May 2009 01:02:03 -0400 Subject: reiserfs: fixup perms when xattrs are disabled This adds CONFIG_REISERFS_FS_XATTR protection from reiserfs_permission. This is needed to avoid warnings during file deletions and chowns with xattrs disabled. Signed-off-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/xattr.c | 36 +++++++++++++++++++----------------- include/linux/reiserfs_xattr.h | 4 +--- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 628075ca82c..8e7deb0e696 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -871,23 +871,6 @@ static int reiserfs_check_acl(struct inode *inode, int mask) return error; } -int reiserfs_permission(struct inode *inode, int mask) -{ - /* - * We don't do permission checks on the internal objects. - * Permissions are determined by the "owning" object. - */ - if (IS_PRIVATE(inode)) - return 0; - /* - * Stat data v1 doesn't support ACLs. - */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return generic_permission(inode, mask, NULL); - else - return generic_permission(inode, mask, reiserfs_check_acl); -} - static int create_privroot(struct dentry *dentry) { int err; @@ -951,6 +934,25 @@ static int xattr_mount_check(struct super_block *s) return 0; } +int reiserfs_permission(struct inode *inode, int mask) +{ + /* + * We don't do permission checks on the internal objects. + * Permissions are determined by the "owning" object. + */ + if (IS_PRIVATE(inode)) + return 0; + +#ifdef CONFIG_REISERFS_FS_XATTR + /* + * Stat data v1 doesn't support ACLs. + */ + if (get_inode_sd_version(inode) != STAT_DATA_V1) + return generic_permission(inode, mask, reiserfs_check_acl); +#endif + return generic_permission(inode, mask, NULL); +} + /* This will catch lookups from the fs root to .reiserfs_priv */ static int xattr_lookup_poison(struct dentry *dentry, struct qstr *q1, struct qstr *name) diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h index cdedc01036e..99928dce37e 100644 --- a/include/linux/reiserfs_xattr.h +++ b/include/linux/reiserfs_xattr.h @@ -41,6 +41,7 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags); int reiserfs_lookup_privroot(struct super_block *sb); int reiserfs_delete_xattrs(struct inode *inode); int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); +int reiserfs_permission(struct inode *inode, int mask); #ifdef CONFIG_REISERFS_FS_XATTR #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) @@ -50,7 +51,6 @@ int reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size); int reiserfs_removexattr(struct dentry *dentry, const char *name); -int reiserfs_permission(struct inode *inode, int mask); int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); @@ -117,8 +117,6 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode) #define reiserfs_listxattr NULL #define reiserfs_removexattr NULL -#define reiserfs_permission NULL - static inline void reiserfs_init_xattr_rwsem(struct inode *inode) { } -- cgit v1.2.3-70-g09d2 From eb33575cf67d3f35fa2510210ef92631266e2465 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 13 May 2009 17:34:48 +0100 Subject: [ARM] Double check memmap is actually valid with a memmap has unexpected holes V2 pfn_valid() is meant to be able to tell if a given PFN has valid memmap associated with it or not. In FLATMEM, it is expected that holes always have valid memmap as long as there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed that a valid section has a memmap for the entire section. However, ARM and maybe other embedded architectures in the future free memmap backing holes to save memory on the assumption the memmap is never used. The page_zone linkages are then broken even though pfn_valid() returns true. A walker of the full memmap must then do this additional check to ensure the memmap they are looking at is sane by making sure the zone and PFN linkages are still valid. This is expensive, but walkers of the full memmap are extremely rare. This was caught before for FLATMEM and hacked around but it hits again for SPARSEMEM because the page_zone linkages can look ok where the PFN linkages are totally screwed. This looks like a hatchet job but the reality is that any clean solution would end up consumning all the memory saved by punching these unexpected holes in the memmap. For example, we tried marking the memmap within the section invalid but the section size exceeds the size of the hole in most cases so pfn_valid() starts returning false where valid memmap exists. Shrinking the size of the section would increase memory consumption offsetting the gains. This patch identifies when an architecture is punching unexpected holes in the memmap that the memory model cannot automatically detect and sets ARCH_HAS_HOLES_MEMORYMODEL. At the moment, this is restricted to EP93xx which is the model sub-architecture this has been reported on but may expand later. When set, walkers of the full memmap must call memmap_valid_within() for each PFN and passing in what it expects the page and zone to be for that PFN. If it finds the linkages to be broken, it assumes the memmap is invalid for that PFN. Signed-off-by: Mel Gorman Signed-off-by: Russell King --- arch/arm/Kconfig | 6 +++--- include/linux/mmzone.h | 26 ++++++++++++++++++++++++++ mm/mmzone.c | 15 +++++++++++++++ mm/vmstat.c | 19 ++++--------------- 4 files changed, 48 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e60ec54df33..9d02cdb15b2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -273,6 +273,7 @@ config ARCH_EP93XX select HAVE_CLK select COMMON_CLKDEV select ARCH_REQUIRE_GPIOLIB + select ARCH_HAS_HOLES_MEMORYMODEL help This enables support for the Cirrus EP93xx series of CPUs. @@ -976,10 +977,9 @@ config OABI_COMPAT UNPREDICTABLE (in fact it can be predicted that it won't work at all). If in doubt say Y. -config ARCH_FLATMEM_HAS_HOLES +config ARCH_HAS_HOLES_MEMORYMODEL bool - default y - depends on FLATMEM + default n # Discontigmem is deprecated config ARCH_DISCONTIGMEM_ENABLE diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 186ec6ab334..a47c879e130 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1097,6 +1097,32 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #define pfn_valid_within(pfn) (1) #endif +#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL +/* + * pfn_valid() is meant to be able to tell if a given PFN has valid memmap + * associated with it or not. In FLATMEM, it is expected that holes always + * have valid memmap as long as there is valid PFNs either side of the hole. + * In SPARSEMEM, it is assumed that a valid section has a memmap for the + * entire section. + * + * However, an ARM, and maybe other embedded architectures in the future + * free memmap backing holes to save memory on the assumption the memmap is + * never used. The page_zone linkages are then broken even though pfn_valid() + * returns true. A walker of the full memmap must then do this additional + * check to ensure the memmap they are looking at is sane by making sure + * the zone and PFN linkages are still valid. This is expensive, but walkers + * of the full memmap are extremely rare. + */ +int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone); +#else +static inline int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone) +{ + return 1; +} +#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ + #endif /* !__GENERATING_BOUNDS.H */ #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/mm/mmzone.c b/mm/mmzone.c index 16ce8b955dc..f5b7d176021 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -6,6 +6,7 @@ #include +#include #include #include @@ -72,3 +73,17 @@ struct zoneref *next_zones_zonelist(struct zoneref *z, *zone = zonelist_zone(z); return z; } + +#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL +int memmap_valid_within(unsigned long pfn, + struct page *page, struct zone *zone) +{ + if (page_to_pfn(page) != pfn) + return 0; + + if (page_zone(page) != zone) + return 0; + + return 1; +} +#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 66f6130976c..74d66dba0cb 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -509,22 +509,11 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m, continue; page = pfn_to_page(pfn); -#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES - /* - * Ordinarily, memory holes in flatmem still have a valid - * memmap for the PFN range. However, an architecture for - * embedded systems (e.g. ARM) can free up the memmap backing - * holes to save memory on the assumption the memmap is - * never used. The page_zone linkages are then broken even - * though pfn_valid() returns true. Skip the page if the - * linkages are broken. Even if this test passed, the impact - * is that the counters for the movable type are off but - * fragmentation monitoring is likely meaningless on small - * systems. - */ - if (page_zone(page) != zone) + + /* Watch for unexpected holes punched in the memmap */ + if (!memmap_valid_within(pfn, page, zone)) continue; -#endif + mtype = get_pageblock_migratetype(page); if (mtype < MIGRATE_TYPES) -- cgit v1.2.3-70-g09d2 From 03fbdb15c14e9746c63168e3ff2c64b9c8336d33 Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Wed, 20 May 2009 22:39:08 +0100 Subject: [ARM] 5519/1: amba probe: pass "struct amba_id *" instead of void * The second argument of the probe method points to the amba_id structure, so it's better passed with the correct type. None of the current in-tree drivers uses the pointer, so they have only been checked for a clean compile. Change suggested by Russell King. Signed-off-by: Alessandro Rubini Signed-off-by: Russell King --- drivers/input/serio/ambakmi.c | 2 +- drivers/mmc/host/mmci.c | 2 +- drivers/rtc/rtc-pl030.c | 2 +- drivers/rtc/rtc-pl031.c | 2 +- drivers/serial/amba-pl010.c | 2 +- drivers/serial/amba-pl011.c | 2 +- drivers/video/amba-clcd.c | 2 +- include/linux/amba/bus.h | 2 +- sound/arm/aaci.c | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/serio/ambakmi.c b/drivers/input/serio/ambakmi.c index e29cdc13a19..a28c06d686e 100644 --- a/drivers/input/serio/ambakmi.c +++ b/drivers/input/serio/ambakmi.c @@ -107,7 +107,7 @@ static void amba_kmi_close(struct serio *io) clk_disable(kmi->clk); } -static int amba_kmi_probe(struct amba_device *dev, void *id) +static int amba_kmi_probe(struct amba_device *dev, struct amba_id *id) { struct amba_kmi_port *kmi; struct serio *io; diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 36875dcfa49..7d4febdab28 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -490,7 +490,7 @@ static void mmci_check_status(unsigned long data) mod_timer(&host->timer, jiffies + HZ); } -static int __devinit mmci_probe(struct amba_device *dev, void *id) +static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id) { struct mmc_platform_data *plat = dev->dev.platform_data; struct mmci_host *host; diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c index 82615355215..aaf1f75fa29 100644 --- a/drivers/rtc/rtc-pl030.c +++ b/drivers/rtc/rtc-pl030.c @@ -102,7 +102,7 @@ static const struct rtc_class_ops pl030_ops = { .set_alarm = pl030_set_alarm, }; -static int pl030_probe(struct amba_device *dev, void *id) +static int pl030_probe(struct amba_device *dev, struct amba_id *id) { struct pl030_rtc *rtc; int ret; diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index 333eec689d2..451fc13784d 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -127,7 +127,7 @@ static int pl031_remove(struct amba_device *adev) return 0; } -static int pl031_probe(struct amba_device *adev, void *id) +static int pl031_probe(struct amba_device *adev, struct amba_id *id) { int ret; struct pl031_local *ldata; diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c index e3a5ad5ef1d..cdc049d4350 100644 --- a/drivers/serial/amba-pl010.c +++ b/drivers/serial/amba-pl010.c @@ -665,7 +665,7 @@ static struct uart_driver amba_reg = { .cons = AMBA_CONSOLE, }; -static int pl010_probe(struct amba_device *dev, void *id) +static int pl010_probe(struct amba_device *dev, struct amba_id *id) { struct uart_amba_port *uap; void __iomem *base; diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c index 8b2b9700f3e..88fdac51b6c 100644 --- a/drivers/serial/amba-pl011.c +++ b/drivers/serial/amba-pl011.c @@ -729,7 +729,7 @@ static struct uart_driver amba_reg = { .cons = AMBA_CONSOLE, }; -static int pl011_probe(struct amba_device *dev, void *id) +static int pl011_probe(struct amba_device *dev, struct amba_id *id) { struct uart_amba_port *uap; void __iomem *base; diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 61050ab1412..d1f80bac54f 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -437,7 +437,7 @@ static int clcdfb_register(struct clcd_fb *fb) return ret; } -static int clcdfb_probe(struct amba_device *dev, void *id) +static int clcdfb_probe(struct amba_device *dev, struct amba_id *id) { struct clcd_board *board = dev->dev.platform_data; struct clcd_fb *fb; diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 51e6e54b2aa..9b93cafa82a 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -28,7 +28,7 @@ struct amba_id { struct amba_driver { struct device_driver drv; - int (*probe)(struct amba_device *, void *); + int (*probe)(struct amba_device *, struct amba_id *); int (*remove)(struct amba_device *); void (*shutdown)(struct amba_device *); int (*suspend)(struct amba_device *, pm_message_t); diff --git a/sound/arm/aaci.c b/sound/arm/aaci.c index 7fbd68fab94..5c48e36038f 100644 --- a/sound/arm/aaci.c +++ b/sound/arm/aaci.c @@ -1074,7 +1074,7 @@ static unsigned int __devinit aaci_size_fifo(struct aaci *aaci) return i; } -static int __devinit aaci_probe(struct amba_device *dev, void *id) +static int __devinit aaci_probe(struct amba_device *dev, struct amba_id *id) { struct aaci *aaci; int ret, i; -- cgit v1.2.3-70-g09d2