summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-04 09:07:19 +0100
committerIngo Molnar <mingo@elte.hu>2008-12-04 09:07:19 +0100
commitb8307db2477f9c551e54e0c7b643ea349a3349cd (patch)
tree88654f8bd73857bbd40f75013ce41d8882d16ce6 /fs
parentf0461d0146ee30927bc7efa2ae24ea8c6693b725 (diff)
parent061e41fdb5047b1fb161e89664057835935ca1d2 (diff)
Merge commit 'v2.6.28-rc7' into tracing/core
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/cifs/file.c77
-rw-r--r--fs/eventpoll.c85
-rw-r--r--fs/ntfs/debug.h8
-rw-r--r--fs/ocfs2/buffer_head_io.c15
-rw-r--r--fs/ocfs2/dlm/dlmfs.c4
-rw-r--r--fs/ocfs2/dlm/userdlm.h2
-rw-r--r--fs/ocfs2/dlmglue.c3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/stack_user.c3
-rw-r--r--fs/udf/inode.c1
11 files changed, 150 insertions, 51 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 6569fda5cfe..10179cfa115 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -878,6 +878,7 @@ void invalidate_inode_buffers(struct inode *inode)
spin_unlock(&buffer_mapping->private_lock);
}
}
+EXPORT_SYMBOL(invalidate_inode_buffers);
/*
* Remove any clean buffers from the inode's buffer list. This is called
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index b691b893a84..f0a81e631ae 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1475,7 +1475,11 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
page, pos, copied));
- if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+ if (PageChecked(page)) {
+ if (copied == len)
+ SetPageUptodate(page);
+ ClearPageChecked(page);
+ } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
SetPageUptodate(page);
if (!PageUptodate(page)) {
@@ -2062,39 +2066,70 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
{
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
+ loff_t page_start = pos & PAGE_MASK;
+ loff_t i_size;
+ struct page *page;
+ int rc = 0;
cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
- *pagep = __grab_cache_page(mapping, index);
- if (!*pagep)
- return -ENOMEM;
-
- if (PageUptodate(*pagep))
- return 0;
+ page = __grab_cache_page(mapping, index);
+ if (!page) {
+ rc = -ENOMEM;
+ goto out;
+ }
- /* If we are writing a full page it will be up to date,
- no need to read from the server */
- if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE)
- return 0;
+ if (PageUptodate(page))
+ goto out;
- if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
- int rc;
+ /*
+ * If we write a full page it will be up to date, no need to read from
+ * the server. If the write is short, we'll end up doing a sync write
+ * instead.
+ */
+ if (len == PAGE_CACHE_SIZE)
+ goto out;
- /* might as well read a page, it is fast enough */
- rc = cifs_readpage_worker(file, *pagep, &offset);
+ /*
+ * optimize away the read when we have an oplock, and we're not
+ * expecting to use any of the data we'd be reading in. That
+ * is, when the page lies beyond the EOF, or straddles the EOF
+ * and the write will cover all of the existing data.
+ */
+ if (CIFS_I(mapping->host)->clientCanCacheRead) {
+ i_size = i_size_read(mapping->host);
+ if (page_start >= i_size ||
+ (offset == 0 && (pos + len) >= i_size)) {
+ zero_user_segments(page, 0, offset,
+ offset + len,
+ PAGE_CACHE_SIZE);
+ /*
+ * PageChecked means that the parts of the page
+ * to which we're not writing are considered up
+ * to date. Once the data is copied to the
+ * page, it can be set uptodate.
+ */
+ SetPageChecked(page);
+ goto out;
+ }
+ }
- /* we do not need to pass errors back
- e.g. if we do not have read access to the file
- because cifs_write_end will attempt synchronous writes
- -- shaggy */
+ if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
+ /*
+ * might as well read a page, it is fast enough. If we get
+ * an error, we don't need to return it. cifs_write_end will
+ * do a sync write instead since PG_uptodate isn't set.
+ */
+ cifs_readpage_worker(file, page, &page_start);
} else {
/* we could try using another file handle if there is one -
but how would we lock it to prevent close of that handle
racing with this read? In any case
this will be written out by write_end so is fine */
}
-
- return 0;
+out:
+ *pagep = page;
+ return rc;
}
const struct address_space_operations cifs_addr_ops = {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aec5c13f634..96355d50534 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -102,6 +102,8 @@
#define EP_UNACTIVE_PTR ((void *) -1L)
+#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry))
+
struct epoll_filefd {
struct file *file;
int fd;
@@ -200,6 +202,9 @@ struct eventpoll {
* holding ->lock.
*/
struct epitem *ovflist;
+
+ /* The user that created the eventpoll descriptor */
+ struct user_struct *user;
};
/* Wait structure used by the poll hooks */
@@ -227,9 +232,17 @@ struct ep_pqueue {
};
/*
+ * Configuration options available inside /proc/sys/fs/epoll/
+ */
+/* Maximum number of epoll devices, per user */
+static int max_user_instances __read_mostly;
+/* Maximum number of epoll watched descriptors, per user */
+static int max_user_watches __read_mostly;
+
+/*
* This mutex is used to serialize ep_free() and eventpoll_release_file().
*/
-static struct mutex epmutex;
+static DEFINE_MUTEX(epmutex);
/* Safe wake up implementation */
static struct poll_safewake psw;
@@ -240,6 +253,33 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;
+#ifdef CONFIG_SYSCTL
+
+#include <linux/sysctl.h>
+
+static int zero;
+
+ctl_table epoll_table[] = {
+ {
+ .procname = "max_user_instances",
+ .data = &max_user_instances,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
+ {
+ .procname = "max_user_watches",
+ .data = &max_user_watches,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = &zero,
+ },
+ { .ctl_name = 0 }
+};
+#endif /* CONFIG_SYSCTL */
+
/* Setup the structure that is used as key for the RB tree */
static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -402,6 +442,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
/* At this point it is safe to free the eventpoll item */
kmem_cache_free(epi_cache, epi);
+ atomic_dec(&ep->user->epoll_watches);
+
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
current, ep, file));
@@ -449,6 +491,8 @@ static void ep_free(struct eventpoll *ep)
mutex_unlock(&epmutex);
mutex_destroy(&ep->mtx);
+ atomic_dec(&ep->user->epoll_devs);
+ free_uid(ep->user);
kfree(ep);
}
@@ -532,10 +576,19 @@ void eventpoll_release_file(struct file *file)
static int ep_alloc(struct eventpoll **pep)
{
- struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+ int error;
+ struct user_struct *user;
+ struct eventpoll *ep;
- if (!ep)
- return -ENOMEM;
+ user = get_current_user();
+ error = -EMFILE;
+ if (unlikely(atomic_read(&user->epoll_devs) >=
+ max_user_instances))
+ goto free_uid;
+ error = -ENOMEM;
+ ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+ if (unlikely(!ep))
+ goto free_uid;
spin_lock_init(&ep->lock);
mutex_init(&ep->mtx);
@@ -544,12 +597,17 @@ static int ep_alloc(struct eventpoll **pep)
INIT_LIST_HEAD(&ep->rdllist);
ep->rbr = RB_ROOT;
ep->ovflist = EP_UNACTIVE_PTR;
+ ep->user = user;
*pep = ep;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
current, ep));
return 0;
+
+free_uid:
+ free_uid(user);
+ return error;
}
/*
@@ -703,9 +761,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
struct epitem *epi;
struct ep_pqueue epq;
- error = -ENOMEM;
+ if (unlikely(atomic_read(&ep->user->epoll_watches) >=
+ max_user_watches))
+ return -ENOSPC;
if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
- goto error_return;
+ return -ENOMEM;
/* Item initialization follow here ... */
INIT_LIST_HEAD(&epi->rdllink);
@@ -735,6 +795,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
* install process. Namely an allocation for a wait queue failed due
* high memory pressure.
*/
+ error = -ENOMEM;
if (epi->nwait < 0)
goto error_unregister;
@@ -765,6 +826,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
spin_unlock_irqrestore(&ep->lock, flags);
+ atomic_inc(&ep->user->epoll_watches);
+
/* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(&psw, &ep->poll_wait);
@@ -789,7 +852,7 @@ error_unregister:
spin_unlock_irqrestore(&ep->lock, flags);
kmem_cache_free(epi_cache, epi);
-error_return:
+
return error;
}
@@ -1078,6 +1141,7 @@ asmlinkage long sys_epoll_create1(int flags)
flags & O_CLOEXEC);
if (fd < 0)
ep_free(ep);
+ atomic_inc(&ep->user->epoll_devs);
error_return:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
@@ -1299,7 +1363,12 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
static int __init eventpoll_init(void)
{
- mutex_init(&epmutex);
+ struct sysinfo si;
+
+ si_meminfo(&si);
+ max_user_instances = 128;
+ max_user_watches = (((si.totalram - si.totalhigh) / 32) << PAGE_SHIFT) /
+ EP_ITEM_COST;
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_poll_safewake_init(&psw);
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 5e6724c1afd..2142b1c68b6 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -30,7 +30,8 @@
extern int debug_msgs;
-#if 0 /* Fool kernel-doc since it doesn't do macros yet */
+extern void __ntfs_debug(const char *file, int line, const char *function,
+ const char *format, ...) __attribute__ ((format (printf, 4, 5)));
/**
* ntfs_debug - write a debug level message to syslog
* @f: a printf format string containing the message
@@ -39,11 +40,6 @@ extern int debug_msgs;
* ntfs_debug() writes a DEBUG level message to the syslog but only if the
* driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP.
*/
-static void ntfs_debug(const char *f, ...);
-#endif
-
-extern void __ntfs_debug (const char *file, int line, const char *function,
- const char *format, ...) __attribute__ ((format (printf, 4, 5)));
#define ntfs_debug(f, a...) \
__ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 7e947c67246..3a178ec48d7 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -112,7 +112,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
bh = bhs[i];
if (buffer_jbd(bh)) {
- mlog(ML_ERROR,
+ mlog(ML_BH_IO,
"trying to sync read a jbd "
"managed bh (blocknr = %llu), skipping\n",
(unsigned long long)bh->b_blocknr);
@@ -147,15 +147,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
for (i = nr; i > 0; i--) {
bh = bhs[i - 1];
- if (buffer_jbd(bh)) {
- mlog(ML_ERROR,
- "the journal got the buffer while it was "
- "locked for io! (blocknr = %llu)\n",
- (unsigned long long)bh->b_blocknr);
- BUG();
- }
+ /* No need to wait on the buffer if it's managed by JBD. */
+ if (!buffer_jbd(bh))
+ wait_on_buffer(bh);
- wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* Status won't be cleared from here on out,
* so we can safely record this and loop back
@@ -251,8 +246,6 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
ignore_cache = 1;
}
- /* XXX: Can we ever get this and *not* have the cached
- * flag set? */
if (buffer_jbd(bh)) {
if (ignore_cache)
mlog(ML_BH_IO, "trying to sync read a jbd "
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 533a789c3ef..ba962d71b34 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -608,8 +608,10 @@ static int __init init_dlmfs_fs(void)
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
dlmfs_init_once);
- if (!dlmfs_inode_cache)
+ if (!dlmfs_inode_cache) {
+ status = -ENOMEM;
goto bail;
+ }
cleanup_inode = 1;
user_dlm_worker = create_singlethread_workqueue("user_dlm");
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
index 39ec2773849..0c3cc03c61f 100644
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -33,7 +33,7 @@
#include <linux/workqueue.h>
/* user_lock_res->l_flags flags. */
-#define USER_LOCK_ATTACHED (0x00000001) /* have we initialized
+#define USER_LOCK_ATTACHED (0x00000001) /* we have initialized
* the lvb */
#define USER_LOCK_BUSY (0x00000002) /* we are currently in
* dlm_lock */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index ec684426034..6e6cc0a2e5f 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2841,9 +2841,8 @@ static void ocfs2_unlock_ast(void *opaque, int error)
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
- spin_unlock_irqrestore(&lockres->l_lock, flags);
-
wake_up(&lockres->l_event);
+ spin_unlock_irqrestore(&lockres->l_lock, flags);
mlog_exit_void();
}
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index fef7ece3237..3fed9e3d899 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -85,7 +85,7 @@ enum ocfs2_unlock_action {
};
/* ocfs2_lock_res->l_flags flags. */
-#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized
+#define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized
* the lvb */
#define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
* dlm_lock */
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index faec2d87935..9b76d41a8ac 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -740,6 +740,9 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
{
+ if (!lksb->lksb_fsdlm.sb_lvbptr)
+ lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
+ sizeof(struct dlm_lksb);
return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 6e74b117aaf..30ebde490f7 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -106,6 +106,7 @@ void udf_clear_inode(struct inode *inode)
udf_truncate_tail_extent(inode);
unlock_kernel();
write_inode_now(inode, 0);
+ invalidate_inode_buffers(inode);
}
iinfo = UDF_I(inode);
kfree(iinfo->i_ext.i_data);