diff options
Diffstat (limited to 'fs/locks.c')
-rw-r--r-- | fs/locks.c | 389 |
1 files changed, 247 insertions, 142 deletions
diff --git a/fs/locks.c b/fs/locks.c index 92a0f0a52b0..13fc7a6d380 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -135,6 +135,7 @@ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) +#define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) static bool lease_breaking(struct file_lock *fl) { @@ -344,48 +345,43 @@ static int assign_type(struct file_lock *fl, long type) return 0; } -/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX - * style lock. - */ -static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, - struct flock *l) +static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, + struct flock64 *l) { - off_t start, end; - switch (l->l_whence) { case SEEK_SET: - start = 0; + fl->fl_start = 0; break; case SEEK_CUR: - start = filp->f_pos; + fl->fl_start = filp->f_pos; break; case SEEK_END: - start = i_size_read(file_inode(filp)); + fl->fl_start = i_size_read(file_inode(filp)); break; default: return -EINVAL; } + if (l->l_start > OFFSET_MAX - fl->fl_start) + return -EOVERFLOW; + fl->fl_start += l->l_start; + if (fl->fl_start < 0) + return -EINVAL; /* POSIX-1996 leaves the case l->l_len < 0 undefined; POSIX-2001 defines it. */ - start += l->l_start; - if (start < 0) - return -EINVAL; - fl->fl_end = OFFSET_MAX; if (l->l_len > 0) { - end = start + l->l_len - 1; - fl->fl_end = end; + if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) + return -EOVERFLOW; + fl->fl_end = fl->fl_start + l->l_len - 1; + } else if (l->l_len < 0) { - end = start - 1; - fl->fl_end = end; - start += l->l_len; - if (start < 0) + if (fl->fl_start + l->l_len < 0) return -EINVAL; - } - fl->fl_start = start; /* we record the absolute position */ - if (fl->fl_end < fl->fl_start) - return -EOVERFLOW; - + fl->fl_end = fl->fl_start - 1; + fl->fl_start += l->l_len; + } else + fl->fl_end = OFFSET_MAX; + fl->fl_owner = current->files; fl->fl_pid = current->tgid; fl->fl_file = filp; @@ -393,55 +389,36 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, fl->fl_ops = NULL; fl->fl_lmops = NULL; - return assign_type(fl, l->l_type); -} - -#if BITS_PER_LONG == 32 -static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, - struct flock64 *l) -{ - loff_t start; - - switch (l->l_whence) { - case SEEK_SET: - start = 0; - break; - case SEEK_CUR: - start = filp->f_pos; + /* Ensure that fl->fl_filp has compatible f_mode */ + switch (l->l_type) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; break; - case SEEK_END: - start = i_size_read(file_inode(filp)); + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; break; - default: - return -EINVAL; } - start += l->l_start; - if (start < 0) - return -EINVAL; - fl->fl_end = OFFSET_MAX; - if (l->l_len > 0) { - fl->fl_end = start + l->l_len - 1; - } else if (l->l_len < 0) { - fl->fl_end = start - 1; - start += l->l_len; - if (start < 0) - return -EINVAL; - } - fl->fl_start = start; /* we record the absolute position */ - if (fl->fl_end < fl->fl_start) - return -EOVERFLOW; - - fl->fl_owner = current->files; - fl->fl_pid = current->tgid; - fl->fl_file = filp; - fl->fl_flags = FL_POSIX; - fl->fl_ops = NULL; - fl->fl_lmops = NULL; - return assign_type(fl, l->l_type); } -#endif + +/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX + * style lock. + */ +static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, + struct flock *l) +{ + struct flock64 ll = { + .l_type = l->l_type, + .l_whence = l->l_whence, + .l_start = l->l_start, + .l_len = l->l_len, + }; + + return flock64_to_posix_lock(filp, fl, &ll); +} /* default lease lock manager operations */ static void lease_break_callback(struct file_lock *fl) @@ -511,8 +488,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) } /* Must be called with the i_lock held! */ -static inline void -locks_insert_global_locks(struct file_lock *fl) +static void locks_insert_global_locks(struct file_lock *fl) { lg_local_lock(&file_lock_lglock); fl->fl_link_cpu = smp_processor_id(); @@ -521,8 +497,7 @@ locks_insert_global_locks(struct file_lock *fl) } /* Must be called with the i_lock held! */ -static inline void -locks_delete_global_locks(struct file_lock *fl) +static void locks_delete_global_locks(struct file_lock *fl) { /* * Avoid taking lock if already unhashed. This is safe since this check @@ -544,14 +519,12 @@ posix_owner_key(struct file_lock *fl) return (unsigned long)fl->fl_owner; } -static inline void -locks_insert_global_blocked(struct file_lock *waiter) +static void locks_insert_global_blocked(struct file_lock *waiter) { hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); } -static inline void -locks_delete_global_blocked(struct file_lock *waiter) +static void locks_delete_global_blocked(struct file_lock *waiter) { hash_del(&waiter->fl_link); } @@ -581,7 +554,7 @@ static void locks_delete_block(struct file_lock *waiter) * it seems like the reasonable thing to do. * * Must be called with both the i_lock and blocked_lock_lock held. The fl_block - * list itself is protected by the file_lock_list, but by ensuring that the + * list itself is protected by the blocked_lock_lock, but by ensuring that the * i_lock is also held on insertions we can avoid taking the blocked_lock_lock * in some cases when we see that the fl_block list is empty. */ @@ -591,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker, BUG_ON(!list_empty(&waiter->fl_block)); waiter->fl_next = blocker; list_add_tail(&waiter->fl_block, &blocker->fl_block); - if (IS_POSIX(blocker)) + if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) locks_insert_global_blocked(waiter); } @@ -652,15 +625,18 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) locks_insert_global_locks(fl); } -/* - * Delete a lock and then free it. - * Wake up processes that are blocked waiting for this lock, - * notify the FS that the lock has been cleared and - * finally free the lock. +/** + * locks_delete_lock - Delete a lock and then free it. + * @thisfl_p: pointer that points to the fl_next field of the previous + * inode->i_flock list entry + * + * Unlink a lock from all lists and free the namespace reference, but don't + * free it yet. Wake up processes that are blocked waiting for this lock and + * notify the FS that the lock has been cleared. * * Must be called with the i_lock held! */ -static void locks_delete_lock(struct file_lock **thisfl_p) +static void locks_unlink_lock(struct file_lock **thisfl_p) { struct file_lock *fl = *thisfl_p; @@ -675,6 +651,18 @@ static void locks_delete_lock(struct file_lock **thisfl_p) } locks_wake_up_blocks(fl); +} + +/* + * Unlink a lock from all lists and free it. + * + * Must be called with i_lock held! + */ +static void locks_delete_lock(struct file_lock **thisfl_p) +{ + struct file_lock *fl = *thisfl_p; + + locks_unlink_lock(thisfl_p); locks_free_lock(fl); } @@ -769,8 +757,16 @@ EXPORT_SYMBOL(posix_test_lock); * Note: the above assumption may not be true when handling lock * requests from a broken NFS client. It may also fail in the presence * of tasks (such as posix threads) sharing the same open file table. - * * To handle those cases, we just bail out after a few iterations. + * + * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. + * Because the owner is not even nominally tied to a thread of + * execution, the deadlock detection below can't reasonably work well. Just + * skip it for those. + * + * In principle, we could do a more limited deadlock detection on FL_FILE_PVT + * locks that just checks for the case where two tasks are attempting to + * upgrade from read to write locks on the same inode. */ #define MAX_DEADLK_ITERATIONS 10 @@ -793,6 +789,13 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, { int i = 0; + /* + * This deadlock detector can't reasonably detect deadlocks with + * FL_FILE_PVT locks, since they aren't owned by a process, per-se. + */ + if (IS_FILE_PVT(caller_fl)) + return 0; + while ((block_fl = what_owner_is_waiting_for(block_fl))) { if (i++ > MAX_DEADLK_ITERATIONS) return 0; @@ -1152,13 +1155,14 @@ EXPORT_SYMBOL(posix_lock_file_wait); /** * locks_mandatory_locked - Check for an active lock - * @inode: the file to check + * @file: the file to check * * Searches the inode's list of locks to find any POSIX locks which conflict. * This function is called from locks_verify_locked() only. */ -int locks_mandatory_locked(struct inode *inode) +int locks_mandatory_locked(struct file *file) { + struct inode *inode = file_inode(file); fl_owner_t owner = current->files; struct file_lock *fl; @@ -1169,7 +1173,7 @@ int locks_mandatory_locked(struct inode *inode) for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!IS_POSIX(fl)) continue; - if (fl->fl_owner != owner) + if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file) break; } spin_unlock(&inode->i_lock); @@ -1195,19 +1199,30 @@ int locks_mandatory_area(int read_write, struct inode *inode, { struct file_lock fl; int error; + bool sleep = false; locks_init_lock(&fl); - fl.fl_owner = current->files; fl.fl_pid = current->tgid; fl.fl_file = filp; fl.fl_flags = FL_POSIX | FL_ACCESS; if (filp && !(filp->f_flags & O_NONBLOCK)) - fl.fl_flags |= FL_SLEEP; + sleep = true; fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; fl.fl_start = offset; fl.fl_end = offset + count - 1; for (;;) { + if (filp) { + fl.fl_owner = (fl_owner_t)filp; + fl.fl_flags &= ~FL_SLEEP; + error = __posix_lock_file(inode, &fl, NULL); + if (!error) + break; + } + + if (sleep) + fl.fl_flags |= FL_SLEEP; + fl.fl_owner = current->files; error = __posix_lock_file(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; @@ -1472,6 +1487,32 @@ int fcntl_getlease(struct file *filp) return type; } +/** + * check_conflicting_open - see if the given dentry points to a file that has + * an existing open that would conflict with the + * desired lease. + * @dentry: dentry to check + * @arg: type of lease that we're trying to acquire + * + * Check to see if there's an existing open fd on this file that would + * conflict with the lease we're trying to set. + */ +static int +check_conflicting_open(const struct dentry *dentry, const long arg) +{ + int ret = 0; + struct inode *inode = dentry->d_inode; + + if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) + return -EAGAIN; + + if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || + (atomic_read(&inode->i_count) > 1))) + ret = -EAGAIN; + + return ret; +} + static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) { struct file_lock *fl, **before, **my_before = NULL, *lease; @@ -1499,12 +1540,8 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp return -EINVAL; } - error = -EAGAIN; - if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) - goto out; - if ((arg == F_WRLCK) - && ((d_count(dentry) > 1) - || (atomic_read(&inode->i_count) > 1))) + error = check_conflicting_open(dentry, arg); + if (error) goto out; /* @@ -1549,7 +1586,19 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp goto out; locks_insert_lock(before, lease); - error = 0; + /* + * The check in break_lease() is lockless. It's possible for another + * open to race in after we did the earlier check for a conflicting + * open but before the lease was inserted. Check again for a + * conflicting open and cancel the lease if there is one. + * + * We also add a barrier here to ensure that the insertion of the lock + * precedes these checks. + */ + smp_mb(); + error = check_conflicting_open(dentry, arg); + if (error) + locks_unlink_lock(flp); out: if (is_deleg) mutex_unlock(&inode->i_mutex); @@ -1842,7 +1891,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock); static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) { - flock->l_pid = fl->fl_pid; + flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; #if BITS_PER_LONG == 32 /* * Make sure we can represent the posix lock via @@ -1864,7 +1913,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) #if BITS_PER_LONG == 32 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) { - flock->l_pid = fl->fl_pid; + flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; @@ -1876,7 +1925,7 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ -int fcntl_getlk(struct file *filp, struct flock __user *l) +int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) { struct file_lock file_lock; struct flock flock; @@ -1893,6 +1942,16 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) if (error) goto out; + if (cmd == F_GETLKP) { + error = -EINVAL; + if (flock.l_pid != 0) + goto out; + + cmd = F_GETLK; + file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_owner = (fl_owner_t)filp; + } + error = vfs_test_lock(filp, &file_lock); if (error) goto out; @@ -2012,25 +2071,32 @@ again: error = flock_to_posix_lock(filp, file_lock, &flock); if (error) goto out; - if (cmd == F_SETLKW) { - file_lock->fl_flags |= FL_SLEEP; - } - - error = -EBADF; - switch (flock.l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - goto out; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) + + /* + * If the cmd is requesting file-private locks, then set the + * FL_FILE_PVT flag and override the owner. + */ + switch (cmd) { + case F_SETLKP: + error = -EINVAL; + if (flock.l_pid != 0) goto out; + + cmd = F_SETLK; + file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_owner = (fl_owner_t)filp; break; - case F_UNLCK: - break; - default: + case F_SETLKPW: error = -EINVAL; - goto out; + if (flock.l_pid != 0) + goto out; + + cmd = F_SETLKW; + file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_owner = (fl_owner_t)filp; + /* Fallthrough */ + case F_SETLKW: + file_lock->fl_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); @@ -2061,7 +2127,7 @@ out: /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ -int fcntl_getlk64(struct file *filp, struct flock64 __user *l) +int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) { struct file_lock file_lock; struct flock64 flock; @@ -2078,6 +2144,16 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) if (error) goto out; + if (cmd == F_GETLKP) { + error = -EINVAL; + if (flock.l_pid != 0) + goto out; + + cmd = F_GETLK64; + file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_owner = (fl_owner_t)filp; + } + error = vfs_test_lock(filp, &file_lock); if (error) goto out; @@ -2130,25 +2206,32 @@ again: error = flock64_to_posix_lock(filp, file_lock, &flock); if (error) goto out; - if (cmd == F_SETLKW64) { - file_lock->fl_flags |= FL_SLEEP; - } - - error = -EBADF; - switch (flock.l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - goto out; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) + + /* + * If the cmd is requesting file-private locks, then set the + * FL_FILE_PVT flag and override the owner. + */ + switch (cmd) { + case F_SETLKP: + error = -EINVAL; + if (flock.l_pid != 0) goto out; + + cmd = F_SETLK64; + file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_owner = (fl_owner_t)filp; break; - case F_UNLCK: - break; - default: + case F_SETLKPW: error = -EINVAL; - goto out; + if (flock.l_pid != 0) + goto out; + + cmd = F_SETLKW64; + file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_owner = (fl_owner_t)filp; + /* Fallthrough */ + case F_SETLKW64: + file_lock->fl_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); @@ -2209,7 +2292,7 @@ EXPORT_SYMBOL(locks_remove_posix); /* * This function is called on the last close of an open file. */ -void locks_remove_flock(struct file *filp) +void locks_remove_file(struct file *filp) { struct inode * inode = file_inode(filp); struct file_lock *fl; @@ -2218,6 +2301,8 @@ void locks_remove_flock(struct file *filp) if (!inode->i_flock) return; + locks_remove_posix(filp, (fl_owner_t)filp); + if (filp->f_op->flock) { struct file_lock fl = { .fl_pid = current->tgid, @@ -2236,16 +2321,28 @@ void locks_remove_flock(struct file *filp) while ((fl = *before) != NULL) { if (fl->fl_file == filp) { - if (IS_FLOCK(fl)) { - locks_delete_lock(before); - continue; - } if (IS_LEASE(fl)) { lease_modify(before, F_UNLCK); continue; } - /* What? */ - BUG(); + + /* + * There's a leftover lock on the list of a type that + * we didn't expect to see. Most likely a classic + * POSIX lock that ended up not getting released + * properly, or that raced onto the list somehow. Log + * some info about it and then just remove it from + * the list. + */ + WARN(!IS_FLOCK(fl), + "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", + MAJOR(inode->i_sb->s_dev), + MINOR(inode->i_sb->s_dev), inode->i_ino, + fl->fl_type, fl->fl_flags, + fl->fl_start, fl->fl_end); + + locks_delete_lock(before); + continue; } before = &fl->fl_next; } @@ -2314,8 +2411,14 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, seq_printf(f, "%lld:%s ", id, pfx); if (IS_POSIX(fl)) { - seq_printf(f, "%6s %s ", - (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", + if (fl->fl_flags & FL_ACCESS) + seq_printf(f, "ACCESS"); + else if (IS_FILE_PVT(fl)) + seq_printf(f, "FLPVT "); + else + seq_printf(f, "POSIX "); + + seq_printf(f, " %s ", (inode == NULL) ? "*NOINODE*" : mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); } else if (IS_FLOCK(fl)) { @@ -2385,6 +2488,7 @@ static int locks_show(struct seq_file *f, void *v) } static void *locks_start(struct seq_file *f, loff_t *pos) + __acquires(&blocked_lock_lock) { struct locks_iterator *iter = f->private; @@ -2403,6 +2507,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos) } static void locks_stop(struct seq_file *f, void *v) + __releases(&blocked_lock_lock) { spin_unlock(&blocked_lock_lock); lg_global_unlock(&file_lock_lglock); |