summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs_vfs.h2
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/9p/vfs_inode.c18
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile1
-rw-r--r--fs/bio-integrity.c719
-rw-r--r--fs/bio.c88
-rw-r--r--fs/block_dev.c10
-rw-r--r--fs/buffer.c13
-rw-r--r--fs/cifs/cifsacl.c10
-rw-r--r--fs/cifs/inode.c20
-rw-r--r--fs/dcache.c68
-rw-r--r--fs/ecryptfs/miscdev.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext3/super.c4
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/libfs.c28
-rw-r--r--fs/locks.c6
-rw-r--r--fs/namei.c26
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/ocfs2/dlmglue.c14
-rw-r--r--fs/open.c37
-rw-r--r--fs/pipe.c10
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/proc/task_mmu.c86
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/ramfs/file-mmu.c1
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/super.c4
-rw-r--r--fs/splice.c17
-rw-r--r--fs/udf/super.c57
-rw-r--r--fs/utimes.c59
-rw-r--r--fs/xfs/xfs_log.c15
36 files changed, 1081 insertions, 278 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index fd01d90cada..57997fa14e6 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -51,4 +51,4 @@ int v9fs_dir_release(struct inode *inode, struct file *filp);
int v9fs_file_open(struct inode *inode, struct file *file);
void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat);
void v9fs_dentry_release(struct dentry *);
-int v9fs_uflags2omode(int uflags);
+int v9fs_uflags2omode(int uflags, int extended);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 0d55affe37d..52944d2249a 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -59,7 +59,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p \n", inode, file);
v9ses = v9fs_inode2v9ses(inode);
- omode = v9fs_uflags2omode(file->f_flags);
+ omode = v9fs_uflags2omode(file->f_flags, v9fs_extended(v9ses));
fid = file->private_data;
if (!fid) {
fid = v9fs_fid_clone(file->f_path.dentry);
@@ -75,6 +75,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
inode->i_size = 0;
inode->i_blocks = 0;
}
+ if ((file->f_flags & O_APPEND) && (!v9fs_extended(v9ses)))
+ generic_file_llseek(file, 0, SEEK_END);
}
file->private_data = fid;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 40fa807bd92..c95295c6504 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -132,10 +132,10 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
/**
* v9fs_uflags2omode- convert posix open flags to plan 9 mode bits
* @uflags: flags to convert
- *
+ * @extended: if .u extensions are active
*/
-int v9fs_uflags2omode(int uflags)
+int v9fs_uflags2omode(int uflags, int extended)
{
int ret;
@@ -155,14 +155,16 @@ int v9fs_uflags2omode(int uflags)
break;
}
- if (uflags & O_EXCL)
- ret |= P9_OEXCL;
-
if (uflags & O_TRUNC)
ret |= P9_OTRUNC;
- if (uflags & O_APPEND)
- ret |= P9_OAPPEND;
+ if (extended) {
+ if (uflags & O_EXCL)
+ ret |= P9_OEXCL;
+
+ if (uflags & O_APPEND)
+ ret |= P9_OAPPEND;
+ }
return ret;
}
@@ -506,7 +508,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
flags = O_RDWR;
fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
- v9fs_uflags2omode(flags));
+ v9fs_uflags2omode(flags, v9fs_extended(v9ses)));
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
fid = NULL;
diff --git a/fs/Kconfig b/fs/Kconfig
index cf12c403b8c..2694648cbd1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -830,7 +830,7 @@ config NTFS_FS
from the project web site.
For more information see <file:Documentation/filesystems/ntfs.txt>
- and <http://linux-ntfs.sourceforge.net/>.
+ and <http://www.linux-ntfs.org/>.
To compile this file system support as a module, choose M here: the
module will be called ntfs.
diff --git a/fs/Makefile b/fs/Makefile
index 1e7a11bd4da..277b079dec9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,7 @@ else
obj-y += no-block.o
endif
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
obj-$(CONFIG_INOTIFY) += inotify.o
obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
obj-$(CONFIG_EPOLL) += eventpoll.o
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
new file mode 100644
index 00000000000..63e2ee63058
--- /dev/null
+++ b/fs/bio-integrity.c
@@ -0,0 +1,719 @@
+/*
+ * bio-integrity.c - bio data integrity extensions
+ *
+ * Copyright (C) 2007, 2008 Oracle Corporation
+ * Written by: Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/bio.h>
+#include <linux/workqueue.h>
+
+static struct kmem_cache *bio_integrity_slab __read_mostly;
+static struct workqueue_struct *kintegrityd_wq;
+
+/**
+ * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
+ * @bio: bio to attach integrity metadata to
+ * @gfp_mask: Memory allocation mask
+ * @nr_vecs: Number of integrity metadata scatter-gather elements
+ * @bs: bio_set to allocate from
+ *
+ * Description: This function prepares a bio for attaching integrity
+ * metadata. nr_vecs specifies the maximum number of pages containing
+ * integrity metadata that can be attached.
+ */
+struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
+ gfp_t gfp_mask,
+ unsigned int nr_vecs,
+ struct bio_set *bs)
+{
+ struct bio_integrity_payload *bip;
+ struct bio_vec *iv;
+ unsigned long idx;
+
+ BUG_ON(bio == NULL);
+
+ bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
+ if (unlikely(bip == NULL)) {
+ printk(KERN_ERR "%s: could not alloc bip\n", __func__);
+ return NULL;
+ }
+
+ memset(bip, 0, sizeof(*bip));
+
+ iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs);
+ if (unlikely(iv == NULL)) {
+ printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
+ mempool_free(bip, bs->bio_integrity_pool);
+ return NULL;
+ }
+
+ bip->bip_pool = idx;
+ bip->bip_vec = iv;
+ bip->bip_bio = bio;
+ bio->bi_integrity = bip;
+
+ return bip;
+}
+EXPORT_SYMBOL(bio_integrity_alloc_bioset);
+
+/**
+ * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * @bio: bio to attach integrity metadata to
+ * @gfp_mask: Memory allocation mask
+ * @nr_vecs: Number of integrity metadata scatter-gather elements
+ *
+ * Description: This function prepares a bio for attaching integrity
+ * metadata. nr_vecs specifies the maximum number of pages containing
+ * integrity metadata that can be attached.
+ */
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
+ gfp_t gfp_mask,
+ unsigned int nr_vecs)
+{
+ return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
+}
+EXPORT_SYMBOL(bio_integrity_alloc);
+
+/**
+ * bio_integrity_free - Free bio integrity payload
+ * @bio: bio containing bip to be freed
+ * @bs: bio_set this bio was allocated from
+ *
+ * Description: Used to free the integrity portion of a bio. Usually
+ * called from bio_free().
+ */
+void bio_integrity_free(struct bio *bio, struct bio_set *bs)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+
+ BUG_ON(bip == NULL);
+
+ /* A cloned bio doesn't own the integrity metadata */
+ if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL)
+ kfree(bip->bip_buf);
+
+ mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
+ mempool_free(bip, bs->bio_integrity_pool);
+
+ bio->bi_integrity = NULL;
+}
+EXPORT_SYMBOL(bio_integrity_free);
+
+/**
+ * bio_integrity_add_page - Attach integrity metadata
+ * @bio: bio to update
+ * @page: page containing integrity metadata
+ * @len: number of bytes of integrity metadata in page
+ * @offset: start offset within page
+ *
+ * Description: Attach a page containing integrity metadata to bio.
+ */
+int bio_integrity_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct bio_vec *iv;
+
+ if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) {
+ printk(KERN_ERR "%s: bip_vec full\n", __func__);
+ return 0;
+ }
+
+ iv = bip_vec_idx(bip, bip->bip_vcnt);
+ BUG_ON(iv == NULL);
+ BUG_ON(iv->bv_page != NULL);
+
+ iv->bv_page = page;
+ iv->bv_len = len;
+ iv->bv_offset = offset;
+ bip->bip_vcnt++;
+
+ return len;
+}
+EXPORT_SYMBOL(bio_integrity_add_page);
+
+/**
+ * bio_integrity_enabled - Check whether integrity can be passed
+ * @bio: bio to check
+ *
+ * Description: Determines whether bio_integrity_prep() can be called
+ * on this bio or not. bio data direction and target device must be
+ * set prior to calling. The functions honors the write_generate and
+ * read_verify flags in sysfs.
+ */
+int bio_integrity_enabled(struct bio *bio)
+{
+ /* Already protected? */
+ if (bio_integrity(bio))
+ return 0;
+
+ return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio));
+}
+EXPORT_SYMBOL(bio_integrity_enabled);
+
+/**
+ * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto
+ * @bi: blk_integrity profile for device
+ * @sectors: Number of 512 sectors to convert
+ *
+ * Description: The block layer calculates everything in 512 byte
+ * sectors but integrity metadata is done in terms of the hardware
+ * sector size of the storage device. Convert the block layer sectors
+ * to physical sectors.
+ */
+static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
+ unsigned int sectors)
+{
+ /* At this point there are only 512b or 4096b DIF/EPP devices */
+ if (bi->sector_size == 4096)
+ return sectors >>= 3;
+
+ return sectors;
+}
+
+/**
+ * bio_integrity_tag_size - Retrieve integrity tag space
+ * @bio: bio to inspect
+ *
+ * Description: Returns the maximum number of tag bytes that can be
+ * attached to this bio. Filesystems can use this to determine how
+ * much metadata to attach to an I/O.
+ */
+unsigned int bio_integrity_tag_size(struct bio *bio)
+{
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+
+ BUG_ON(bio->bi_size == 0);
+
+ return bi->tag_size * (bio->bi_size / bi->sector_size);
+}
+EXPORT_SYMBOL(bio_integrity_tag_size);
+
+int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ unsigned int nr_sectors;
+
+ BUG_ON(bip->bip_buf == NULL);
+
+ if (bi->tag_size == 0)
+ return -1;
+
+ nr_sectors = bio_integrity_hw_sectors(bi,
+ DIV_ROUND_UP(len, bi->tag_size));
+
+ if (nr_sectors * bi->tuple_size > bip->bip_size) {
+ printk(KERN_ERR "%s: tag too big for bio: %u > %u\n",
+ __func__, nr_sectors * bi->tuple_size, bip->bip_size);
+ return -1;
+ }
+
+ if (set)
+ bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
+ else
+ bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
+
+ return 0;
+}
+
+/**
+ * bio_integrity_set_tag - Attach a tag buffer to a bio
+ * @bio: bio to attach buffer to
+ * @tag_buf: Pointer to a buffer containing tag data
+ * @len: Length of the included buffer
+ *
+ * Description: Use this function to tag a bio by leveraging the extra
+ * space provided by devices formatted with integrity protection. The
+ * size of the integrity buffer must be <= to the size reported by
+ * bio_integrity_tag_size().
+ */
+int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
+{
+ BUG_ON(bio_data_dir(bio) != WRITE);
+
+ return bio_integrity_tag(bio, tag_buf, len, 1);
+}
+EXPORT_SYMBOL(bio_integrity_set_tag);
+
+/**
+ * bio_integrity_get_tag - Retrieve a tag buffer from a bio
+ * @bio: bio to retrieve buffer from
+ * @tag_buf: Pointer to a buffer for the tag data
+ * @len: Length of the target buffer
+ *
+ * Description: Use this function to retrieve the tag buffer from a
+ * completed I/O. The size of the integrity buffer must be <= to the
+ * size reported by bio_integrity_tag_size().
+ */
+int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
+{
+ BUG_ON(bio_data_dir(bio) != READ);
+
+ return bio_integrity_tag(bio, tag_buf, len, 0);
+}
+EXPORT_SYMBOL(bio_integrity_get_tag);
+
+/**
+ * bio_integrity_generate - Generate integrity metadata for a bio
+ * @bio: bio to generate integrity metadata for
+ *
+ * Description: Generates integrity metadata for a bio by calling the
+ * block device's generation callback function. The bio must have a
+ * bip attached with enough room to accommodate the generated
+ * integrity metadata.
+ */
+static void bio_integrity_generate(struct bio *bio)
+{
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct blk_integrity_exchg bix;
+ struct bio_vec *bv;
+ sector_t sector = bio->bi_sector;
+ unsigned int i, sectors, total;
+ void *prot_buf = bio->bi_integrity->bip_buf;
+
+ total = 0;
+ bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
+ bix.sector_size = bi->sector_size;
+
+ bio_for_each_segment(bv, bio, i) {
+ void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+ bix.data_buf = kaddr + bv->bv_offset;
+ bix.data_size = bv->bv_len;
+ bix.prot_buf = prot_buf;
+ bix.sector = sector;
+
+ bi->generate_fn(&bix);
+
+ sectors = bv->bv_len / bi->sector_size;
+ sector += sectors;
+ prot_buf += sectors * bi->tuple_size;
+ total += sectors * bi->tuple_size;
+ BUG_ON(total > bio->bi_integrity->bip_size);
+
+ kunmap_atomic(kaddr, KM_USER0);
+ }
+}
+
+/**
+ * bio_integrity_prep - Prepare bio for integrity I/O
+ * @bio: bio to prepare
+ *
+ * Description: Allocates a buffer for integrity metadata, maps the
+ * pages and attaches them to a bio. The bio must have data
+ * direction, target device and start sector set priot to calling. In
+ * the WRITE case, integrity metadata will be generated using the
+ * block device's integrity function. In the READ case, the buffer
+ * will be prepared for DMA and a suitable end_io handler set up.
+ */
+int bio_integrity_prep(struct bio *bio)
+{
+ struct bio_integrity_payload *bip;
+ struct blk_integrity *bi;
+ struct request_queue *q;
+ void *buf;
+ unsigned long start, end;
+ unsigned int len, nr_pages;
+ unsigned int bytes, offset, i;
+ unsigned int sectors;
+
+ bi = bdev_get_integrity(bio->bi_bdev);
+ q = bdev_get_queue(bio->bi_bdev);
+ BUG_ON(bi == NULL);
+ BUG_ON(bio_integrity(bio));
+
+ sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
+
+ /* Allocate kernel buffer for protection data */
+ len = sectors * blk_integrity_tuple_size(bi);
+ buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp);
+ if (unlikely(buf == NULL)) {
+ printk(KERN_ERR "could not allocate integrity buffer\n");
+ return -EIO;
+ }
+
+ end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ start = ((unsigned long) buf) >> PAGE_SHIFT;
+ nr_pages = end - start;
+
+ /* Allocate bio integrity payload and integrity vectors */
+ bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
+ if (unlikely(bip == NULL)) {
+ printk(KERN_ERR "could not allocate data integrity bioset\n");
+ kfree(buf);
+ return -EIO;
+ }
+
+ bip->bip_buf = buf;
+ bip->bip_size = len;
+ bip->bip_sector = bio->bi_sector;
+
+ /* Map it */
+ offset = offset_in_page(buf);
+ for (i = 0 ; i < nr_pages ; i++) {
+ int ret;
+ bytes = PAGE_SIZE - offset;
+
+ if (len <= 0)
+ break;
+
+ if (bytes > len)
+ bytes = len;
+
+ ret = bio_integrity_add_page(bio, virt_to_page(buf),
+ bytes, offset);
+
+ if (ret == 0)
+ return 0;
+
+ if (ret < bytes)
+ break;
+
+ buf += bytes;
+ len -= bytes;
+ offset = 0;
+ }
+
+ /* Install custom I/O completion handler if read verify is enabled */
+ if (bio_data_dir(bio) == READ) {
+ bip->bip_end_io = bio->bi_end_io;
+ bio->bi_end_io = bio_integrity_endio;
+ }
+
+ /* Auto-generate integrity metadata if this is a write */
+ if (bio_data_dir(bio) == WRITE)
+ bio_integrity_generate(bio);
+
+ return 0;
+}
+EXPORT_SYMBOL(bio_integrity_prep);
+
+/**
+ * bio_integrity_verify - Verify integrity metadata for a bio
+ * @bio: bio to verify
+ *
+ * Description: This function is called to verify the integrity of a
+ * bio. The data in the bio io_vec is compared to the integrity
+ * metadata returned by the HBA.
+ */
+static int bio_integrity_verify(struct bio *bio)
+{
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ struct blk_integrity_exchg bix;
+ struct bio_vec *bv;
+ sector_t sector = bio->bi_integrity->bip_sector;
+ unsigned int i, sectors, total, ret;
+ void *prot_buf = bio->bi_integrity->bip_buf;
+
+ ret = total = 0;
+ bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
+ bix.sector_size = bi->sector_size;
+
+ bio_for_each_segment(bv, bio, i) {
+ void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
+ bix.data_buf = kaddr + bv->bv_offset;
+ bix.data_size = bv->bv_len;
+ bix.prot_buf = prot_buf;
+ bix.sector = sector;
+
+ ret = bi->verify_fn(&bix);
+
+ if (ret) {
+ kunmap_atomic(kaddr, KM_USER0);
+ break;
+ }
+
+ sectors = bv->bv_len / bi->sector_size;
+ sector += sectors;
+ prot_buf += sectors * bi->tuple_size;
+ total += sectors * bi->tuple_size;
+ BUG_ON(total > bio->bi_integrity->bip_size);
+
+ kunmap_atomic(kaddr, KM_USER0);
+ }
+
+ return ret;
+}
+
+/**
+ * bio_integrity_verify_fn - Integrity I/O completion worker
+ * @work: Work struct stored in bio to be verified
+ *
+ * Description: This workqueue function is called to complete a READ
+ * request. The function verifies the transferred integrity metadata
+ * and then calls the original bio end_io function.
+ */
+static void bio_integrity_verify_fn(struct work_struct *work)
+{
+ struct bio_integrity_payload *bip =
+ container_of(work, struct bio_integrity_payload, bip_work);
+ struct bio *bio = bip->bip_bio;
+ int error = bip->bip_error;
+
+ if (bio_integrity_verify(bio)) {
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ error = -EIO;
+ }
+
+ /* Restore original bio completion handler */
+ bio->bi_end_io = bip->bip_end_io;
+
+ if (bio->bi_end_io)
+ bio->bi_end_io(bio, error);
+}
+
+/**
+ * bio_integrity_endio - Integrity I/O completion function
+ * @bio: Protected bio
+ * @error: Pointer to errno
+ *
+ * Description: Completion for integrity I/O
+ *
+ * Normally I/O completion is done in interrupt context. However,
+ * verifying I/O integrity is a time-consuming task which must be run
+ * in process context. This function postpones completion
+ * accordingly.
+ */
+void bio_integrity_endio(struct bio *bio, int error)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+
+ BUG_ON(bip->bip_bio != bio);
+
+ bip->bip_error = error;
+ INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
+ queue_work(kintegrityd_wq, &bip->bip_work);
+}
+EXPORT_SYMBOL(bio_integrity_endio);
+
+/**
+ * bio_integrity_mark_head - Advance bip_vec skip bytes
+ * @bip: Integrity vector to advance
+ * @skip: Number of bytes to advance it
+ */
+void bio_integrity_mark_head(struct bio_integrity_payload *bip,
+ unsigned int skip)
+{
+ struct bio_vec *iv;
+ unsigned int i;
+
+ bip_for_each_vec(iv, bip, i) {
+ if (skip == 0) {
+ bip->bip_idx = i;
+ return;
+ } else if (skip >= iv->bv_len) {
+ skip -= iv->bv_len;
+ } else { /* skip < iv->bv_len) */
+ iv->bv_offset += skip;
+ iv->bv_len -= skip;
+ bip->bip_idx = i;
+ return;
+ }
+ }
+}
+
+/**
+ * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long
+ * @bip: Integrity vector to truncate
+ * @len: New length of integrity vector
+ */
+void bio_integrity_mark_tail(struct bio_integrity_payload *bip,
+ unsigned int len)
+{
+ struct bio_vec *iv;
+ unsigned int i;
+
+ bip_for_each_vec(iv, bip, i) {
+ if (len == 0) {
+ bip->bip_vcnt = i;
+ return;
+ } else if (len >= iv->bv_len) {
+ len -= iv->bv_len;
+ } else { /* len < iv->bv_len) */
+ iv->bv_len = len;
+ len = 0;
+ }
+ }
+}
+
+/**
+ * bio_integrity_advance - Advance integrity vector
+ * @bio: bio whose integrity vector to update
+ * @bytes_done: number of data bytes that have been completed
+ *
+ * Description: This function calculates how many integrity bytes the
+ * number of completed data bytes correspond to and advances the
+ * integrity vector accordingly.
+ */
+void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ unsigned int nr_sectors;
+
+ BUG_ON(bip == NULL);
+ BUG_ON(bi == NULL);
+
+ nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9);
+ bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size);
+}
+EXPORT_SYMBOL(bio_integrity_advance);
+
+/**
+ * bio_integrity_trim - Trim integrity vector
+ * @bio: bio whose integrity vector to update
+ * @offset: offset to first data sector
+ * @sectors: number of data sectors
+ *
+ * Description: Used to trim the integrity vector in a cloned bio.
+ * The ivec will be advanced corresponding to 'offset' data sectors
+ * and the length will be truncated corresponding to 'len' data
+ * sectors.
+ */
+void bio_integrity_trim(struct bio *bio, unsigned int offset,
+ unsigned int sectors)
+{
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+ unsigned int nr_sectors;
+
+ BUG_ON(bip == NULL);
+ BUG_ON(bi == NULL);
+ BUG_ON(!bio_flagged(bio, BIO_CLONED));
+
+ nr_sectors = bio_integrity_hw_sectors(bi, sectors);
+ bip->bip_sector = bip->bip_sector + offset;
+ bio_integrity_mark_head(bip, offset * bi->tuple_size);
+ bio_integrity_mark_tail(bip, sectors * bi->tuple_size);
+}
+EXPORT_SYMBOL(bio_integrity_trim);
+
+/**
+ * bio_integrity_split - Split integrity metadata
+ * @bio: Protected bio
+ * @bp: Resulting bio_pair
+ * @sectors: Offset
+ *
+ * Description: Splits an integrity page into a bio_pair.
+ */
+void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
+{
+ struct blk_integrity *bi;
+ struct bio_integrity_payload *bip = bio->bi_integrity;
+ unsigned int nr_sectors;
+
+ if (bio_integrity(bio) == 0)
+ return;
+
+ bi = bdev_get_integrity(bio->bi_bdev);
+ BUG_ON(bi == NULL);
+ BUG_ON(bip->bip_vcnt != 1);
+
+ nr_sectors = bio_integrity_hw_sectors(bi, sectors);
+
+ bp->bio1.bi_integrity = &bp->bip1;
+ bp->bio2.bi_integrity = &bp->bip2;
+
+ bp->iv1 = bip->bip_vec[0];
+ bp->iv2 = bip->bip_vec[0];
+
+ bp->bip1.bip_vec = &bp->iv1;
+ bp->bip2.bip_vec = &bp->iv2;
+
+ bp->iv1.bv_len = sectors * bi->tuple_size;
+ bp->iv2.bv_offset += sectors * bi->tuple_size;
+ bp->iv2.bv_len -= sectors * bi->tuple_size;
+
+ bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
+ bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
+
+ bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
+ bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
+}
+EXPORT_SYMBOL(bio_integrity_split);
+
+/**
+ * bio_integrity_clone - Callback for cloning bios with integrity metadata
+ * @bio: New bio
+ * @bio_src: Original bio
+ * @bs: bio_set to allocate bip from
+ *
+ * Description: Called to allocate a bip when cloning a bio
+ */
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
+ struct bio_set *bs)
+{
+ struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
+ struct bio_integrity_payload *bip;
+
+ BUG_ON(bip_src == NULL);
+
+ bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs);
+
+ if (bip == NULL)
+ return -EIO;
+
+ memcpy(bip->bip_vec, bip_src->bip_vec,
+ bip_src->bip_vcnt * sizeof(struct bio_vec));
+
+ bip->bip_sector = bip_src->bip_sector;
+ bip->bip_vcnt = bip_src->bip_vcnt;
+ bip->bip_idx = bip_src->bip_idx;
+
+ return 0;
+}
+EXPORT_SYMBOL(bio_integrity_clone);
+
+int bioset_integrity_create(struct bio_set *bs, int pool_size)
+{
+ bs->bio_integrity_pool = mempool_create_slab_pool(pool_size,
+ bio_integrity_slab);
+ if (!bs->bio_integrity_pool)
+ return -1;
+
+ return 0;
+}
+EXPORT_SYMBOL(bioset_integrity_create);
+
+void bioset_integrity_free(struct bio_set *bs)
+{
+ if (bs->bio_integrity_pool)
+ mempool_destroy(bs->bio_integrity_pool);
+}
+EXPORT_SYMBOL(bioset_integrity_free);
+
+void __init bio_integrity_init_slab(void)
+{
+ bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+}
+EXPORT_SYMBOL(bio_integrity_init_slab);
+
+static int __init integrity_init(void)
+{
+ kintegrityd_wq = create_workqueue("kintegrityd");
+
+ if (!kintegrityd_wq)
+ panic("Failed to create kintegrityd\n");
+
+ return 0;
+}
+subsys_initcall(integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 78562574cb5..88322b066ac 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -28,25 +28,10 @@
#include <linux/blktrace_api.h>
#include <scsi/sg.h> /* for struct sg_iovec */
-#define BIO_POOL_SIZE 2
-
static struct kmem_cache *bio_slab __read_mostly;
-#define BIOVEC_NR_POOLS 6
-
-/*
- * a small number of entries is fine, not going to be performance critical.
- * basically we just need to survive
- */
-#define BIO_SPLIT_ENTRIES 2
mempool_t *bio_split_pool __read_mostly;
-struct biovec_slab {
- int nr_vecs;
- char *name;
- struct kmem_cache *slab;
-};
-
/*
* if you change this list, also change bvec_alloc or things will
* break badly! cannot be bigger than what you can fit into an
@@ -60,23 +45,17 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
#undef BV
/*
- * bio_set is used to allow other portions of the IO system to
- * allocate their own private memory pools for bio and iovec structures.
- * These memory pools in turn all allocate from the bio_slab
- * and the bvec_slabs[].
- */
-struct bio_set {
- mempool_t *bio_pool;
- mempool_t *bvec_pools[BIOVEC_NR_POOLS];
-};
-
-/*
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
* IO code that does not need private memory pools.
*/
-static struct bio_set *fs_bio_set;
+struct bio_set *fs_bio_set;
+
+unsigned int bvec_nr_vecs(unsigned short idx)
+{
+ return bvec_slabs[idx].nr_vecs;
+}
-static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
+struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
{
struct bio_vec *bvl;
@@ -117,6 +96,9 @@ void bio_free(struct bio *bio, struct bio_set *bio_set)
mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
}
+ if (bio_integrity(bio))
+ bio_integrity_free(bio, bio_set);
+
mempool_free(bio, bio_set->bio_pool);
}
@@ -275,9 +257,19 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
{
struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
- if (b) {
- b->bi_destructor = bio_fs_destructor;
- __bio_clone(b, bio);
+ if (!b)
+ return NULL;
+
+ b->bi_destructor = bio_fs_destructor;
+ __bio_clone(b, bio);
+
+ if (bio_integrity(bio)) {
+ int ret;
+
+ ret = bio_integrity_clone(b, bio, fs_bio_set);
+
+ if (ret < 0)
+ return NULL;
}
return b;
@@ -333,10 +325,19 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
if (page == prev->bv_page &&
offset == prev->bv_offset + prev->bv_len) {
prev->bv_len += len;
- if (q->merge_bvec_fn &&
- q->merge_bvec_fn(q, bio, prev) < len) {
- prev->bv_len -= len;
- return 0;
+
+ if (q->merge_bvec_fn) {
+ struct bvec_merge_data bvm = {
+ .bi_bdev = bio->bi_bdev,
+ .bi_sector = bio->bi_sector,
+ .bi_size = bio->bi_size,
+ .bi_rw = bio->bi_rw,
+ };
+
+ if (q->merge_bvec_fn(q, &bvm, prev) < len) {
+ prev->bv_len -= len;
+ return 0;
+ }
}
goto done;
@@ -377,11 +378,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
* queue to get further control
*/
if (q->merge_bvec_fn) {
+ struct bvec_merge_data bvm = {
+ .bi_bdev = bio->bi_bdev,
+ .bi_sector = bio->bi_sector,
+ .bi_size = bio->bi_size,
+ .bi_rw = bio->bi_rw,
+ };
+
/*
* merge_bvec_fn() returns number of bytes it can accept
* at this offset
*/
- if (q->merge_bvec_fn(q, bio, bvec) < len) {
+ if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
bvec->bv_page = NULL;
bvec->bv_len = 0;
bvec->bv_offset = 0;
@@ -1249,6 +1257,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
bp->bio1.bi_private = bi;
bp->bio2.bi_private = pool;
+ if (bio_integrity(bi))
+ bio_integrity_split(bi, bp, first_sectors);
+
return bp;
}
@@ -1290,6 +1301,7 @@ void bioset_free(struct bio_set *bs)
if (bs->bio_pool)
mempool_destroy(bs->bio_pool);
+ bioset_integrity_free(bs);
biovec_free_pools(bs);
kfree(bs);
@@ -1306,6 +1318,9 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
if (!bs->bio_pool)
goto bad;
+ if (bioset_integrity_create(bs, bio_pool_size))
+ goto bad;
+
if (!biovec_create_pools(bs, bvec_pool_size))
return bs;
@@ -1332,6 +1347,7 @@ static int __init init_bio(void)
{
bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+ bio_integrity_init_slab();
biovec_init_slabs();
fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 470c10ceb0f..10d8a0aa871 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -931,8 +931,16 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
struct gendisk *disk;
int ret;
int part;
+ int perm = 0;
- ret = devcgroup_inode_permission(bdev->bd_inode, file->f_mode);
+ if (file->f_mode & FMODE_READ)
+ perm |= MAY_READ;
+ if (file->f_mode & FMODE_WRITE)
+ perm |= MAY_WRITE;
+ /*
+ * hooks: /n/, see "layering violations".
+ */
+ ret = devcgroup_inode_permission(bdev->bd_inode, perm);
if (ret != 0)
return ret;
diff --git a/fs/buffer.c b/fs/buffer.c
index a073f3f4f01..0f51c0f7c26 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -821,7 +821,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
* contents - it is a noop if I/O is still in
* flight on potentially older contents.
*/
- ll_rw_block(SWRITE, 1, &bh);
+ ll_rw_block(SWRITE_SYNC, 1, &bh);
brelse(bh);
spin_lock(lock);
}
@@ -2940,16 +2940,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
for (i = 0; i < nr; i++) {
struct buffer_head *bh = bhs[i];
- if (rw == SWRITE)
+ if (rw == SWRITE || rw == SWRITE_SYNC)
lock_buffer(bh);
else if (test_set_buffer_locked(bh))
continue;
- if (rw == WRITE || rw == SWRITE) {
+ if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
- submit_bh(WRITE, bh);
+ if (rw == SWRITE_SYNC)
+ submit_bh(WRITE_SYNC, bh);
+ else
+ submit_bh(WRITE, bh);
continue;
}
} else {
@@ -2978,7 +2981,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
if (test_clear_buffer_dirty(bh)) {
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(WRITE, bh);
+ ret = submit_bh(WRITE_SYNC, bh);
wait_on_buffer(bh);
if (buffer_eopnotsupp(bh)) {
clear_buffer_eopnotsupp(bh);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 34902cff540..0e9fc2ba90e 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -34,11 +34,11 @@
static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
{{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
{{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
- {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
- {{1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
- {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(544), 0, 0, 0} }, "root"},
- {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(545), 0, 0, 0} }, "users"},
- {{1, 2, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(32), cpu_to_le32(546), 0, 0, 0} }, "guest"} }
+ {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
+ {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
+ {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
+ {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
+ {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 722be543cee..2e904bd111c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -219,15 +219,15 @@ int cifs_get_inode_info_unix(struct inode **pinode,
rc = CIFSSMBUnixQPathInfo(xid, pTcon, full_path, &find_data,
cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc) {
- if (rc == -EREMOTE && !is_dfs_referral) {
- is_dfs_referral = true;
- cFYI(DBG2, ("DFS ref"));
- /* for DFS, server does not give us real inode data */
- fill_fake_finddataunix(&find_data, sb);
- rc = 0;
- }
- }
+ if (rc == -EREMOTE && !is_dfs_referral) {
+ is_dfs_referral = true;
+ cFYI(DBG2, ("DFS ref"));
+ /* for DFS, server does not give us real inode data */
+ fill_fake_finddataunix(&find_data, sb);
+ rc = 0;
+ } else if (rc)
+ goto cgiiu_exit;
+
num_of_bytes = le64_to_cpu(find_data.NumOfBytes);
end_of_file = le64_to_cpu(find_data.EndOfFile);
@@ -236,7 +236,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
*pinode = new_inode(sb);
if (*pinode == NULL) {
rc = -ENOMEM;
- goto cgiiu_exit;
+ goto cgiiu_exit;
}
/* Is an i_ino of zero legal? */
/* note ino incremented to unique num in new_inode */
diff --git a/fs/dcache.c b/fs/dcache.c
index 3ee588d5f58..6068c25b393 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -17,6 +17,7 @@
#include <linux/syscalls.h>
#include <linux/string.h>
#include <linux/mm.h>
+#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/slab.h>
@@ -106,9 +107,10 @@ static void dentry_lru_remove(struct dentry *dentry)
/*
* Release the dentry's inode, using the filesystem
* d_iput() operation if defined.
- * Called with dcache_lock and per dentry lock held, drops both.
*/
static void dentry_iput(struct dentry * dentry)
+ __releases(dentry->d_lock)
+ __releases(dcache_lock)
{
struct inode *inode = dentry->d_inode;
if (inode) {
@@ -132,12 +134,13 @@ static void dentry_iput(struct dentry * dentry)
* d_kill - kill dentry and return parent
* @dentry: dentry to kill
*
- * Called with dcache_lock and d_lock, releases both. The dentry must
- * already be unhashed and removed from the LRU.
+ * The dentry must already be unhashed and removed from the LRU.
*
* If this is the root of the dentry tree, return NULL.
*/
static struct dentry *d_kill(struct dentry *dentry)
+ __releases(dentry->d_lock)
+ __releases(dcache_lock)
{
struct dentry *parent;
@@ -383,11 +386,11 @@ restart:
* Try to prune ancestors as well. This is necessary to prevent
* quadratic behavior of shrink_dcache_parent(), but is also expected
* to be beneficial in reducing dentry cache fragmentation.
- *
- * Called with dcache_lock, drops it and then regains.
- * Called with dentry->d_lock held, drops it.
*/
static void prune_one_dentry(struct dentry * dentry)
+ __releases(dentry->d_lock)
+ __releases(dcache_lock)
+ __acquires(dcache_lock)
{
__d_drop(dentry);
dentry = d_kill(dentry);
@@ -1604,10 +1607,9 @@ static int d_isparent(struct dentry *p1, struct dentry *p2)
*
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
- *
- * On return, dcache_lock will have been unlocked.
*/
static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
+ __releases(dcache_lock)
{
struct mutex *m1 = NULL, *m2 = NULL;
struct dentry *ret;
@@ -1743,11 +1745,9 @@ out_nolock:
shouldnt_be_hashed:
spin_unlock(&dcache_lock);
BUG();
- goto shouldnt_be_hashed;
}
-static int prepend(char **buffer, int *buflen, const char *str,
- int namelen)
+static int prepend(char **buffer, int *buflen, const char *str, int namelen)
{
*buflen -= namelen;
if (*buflen < 0)
@@ -1757,8 +1757,13 @@ static int prepend(char **buffer, int *buflen, const char *str,
return 0;
}
+static int prepend_name(char **buffer, int *buflen, struct qstr *name)
+{
+ return prepend(buffer, buflen, name->name, name->len);
+}
+
/**
- * d_path - return the path of a dentry
+ * __d_path - return the path of a dentry
* @path: the dentry/vfsmount to report
* @root: root vfsmnt/dentry (may be modified by this function)
* @buffer: buffer to return value in
@@ -1779,9 +1784,10 @@ char *__d_path(const struct path *path, struct path *root,
{
struct dentry *dentry = path->dentry;
struct vfsmount *vfsmnt = path->mnt;
- char * end = buffer+buflen;
- char * retval;
+ char *end = buffer + buflen;
+ char *retval;
+ spin_lock(&vfsmount_lock);
prepend(&end, &buflen, "\0", 1);
if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
(prepend(&end, &buflen, " (deleted)", 10) != 0))
@@ -1800,38 +1806,37 @@ char *__d_path(const struct path *path, struct path *root,
break;
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
/* Global root? */
- spin_lock(&vfsmount_lock);
if (vfsmnt->mnt_parent == vfsmnt) {
- spin_unlock(&vfsmount_lock);
goto global_root;
}
dentry = vfsmnt->mnt_mountpoint;
vfsmnt = vfsmnt->mnt_parent;
- spin_unlock(&vfsmount_lock);
continue;
}
parent = dentry->d_parent;
prefetch(parent);
- if ((prepend(&end, &buflen, dentry->d_name.name,
- dentry->d_name.len) != 0) ||
+ if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
(prepend(&end, &buflen, "/", 1) != 0))
goto Elong;
retval = end;
dentry = parent;
}
+out:
+ spin_unlock(&vfsmount_lock);
return retval;
global_root:
retval += 1; /* hit the slash */
- if (prepend(&retval, &buflen, dentry->d_name.name,
- dentry->d_name.len) != 0)
+ if (prepend_name(&retval, &buflen, &dentry->d_name) != 0)
goto Elong;
root->mnt = vfsmnt;
root->dentry = dentry;
- return retval;
+ goto out;
+
Elong:
- return ERR_PTR(-ENAMETOOLONG);
+ retval = ERR_PTR(-ENAMETOOLONG);
+ goto out;
}
/**
@@ -1845,9 +1850,9 @@ Elong:
*
* Returns the buffer or an error code if the path was too long.
*
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * "buflen" should be positive.
*/
-char *d_path(struct path *path, char *buf, int buflen)
+char *d_path(const struct path *path, char *buf, int buflen)
{
char *res;
struct path root;
@@ -1915,16 +1920,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
retval = end-1;
*retval = '/';
- for (;;) {
- struct dentry *parent;
- if (IS_ROOT(dentry))
- break;
+ while (!IS_ROOT(dentry)) {
+ struct dentry *parent = dentry->d_parent;
- parent = dentry->d_parent;
prefetch(parent);
-
- if ((prepend(&end, &buflen, dentry->d_name.name,
- dentry->d_name.len) != 0) ||
+ if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
(prepend(&end, &buflen, "/", 1) != 0))
goto Elong;
@@ -1975,7 +1975,7 @@ asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
error = -ENOENT;
/* Has the current directory has been unlinked? */
spin_lock(&dcache_lock);
- if (pwd.dentry->d_parent == pwd.dentry || !d_unhashed(pwd.dentry)) {
+ if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) {
unsigned long len;
struct path tmp = root;
char * cwd;
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 50c994a249a..09a4522f65e 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -575,13 +575,11 @@ int ecryptfs_init_ecryptfs_miscdev(void)
int rc;
atomic_set(&ecryptfs_num_miscdev_opens, 0);
- mutex_lock(&ecryptfs_daemon_hash_mux);
rc = misc_register(&ecryptfs_miscdev);
if (rc)
printk(KERN_ERR "%s: Failed to register miscellaneous device "
"for communications with userspace daemons; rc = [%d]\n",
__func__, rc);
- mutex_unlock(&ecryptfs_daemon_hash_mux);
return rc;
}
diff --git a/fs/exec.c b/fs/exec.c
index da94a6f05df..fd9234379e8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -610,7 +610,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
bprm->exec -= stack_shift;
down_write(&mm->mmap_sem);
- vm_flags = vma->vm_flags;
+ vm_flags = VM_STACK_FLAGS;
/*
* Adjust stack execute permissions; explicitly enable for
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index fe3119a71ad..2845425077e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2875,8 +2875,10 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
blk++;
}
out:
- if (len == towrite)
+ if (len == towrite) {
+ mutex_unlock(&inode->i_mutex);
return err;
+ }
if (inode->i_size < off+len-towrite) {
i_size_write(inode, off+len-towrite);
EXT3_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cb96f127c36..02bf2434397 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3337,8 +3337,10 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
blk++;
}
out:
- if (len == towrite)
+ if (len == towrite) {
+ mutex_unlock(&inode->i_mutex);
return err;
+ }
if (inode->i_size < off+len-towrite) {
i_size_write(inode, off+len-towrite);
EXT4_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/libfs.c b/fs/libfs.c
index 892d41cb338..baeb71ee1cd 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -512,6 +512,20 @@ void simple_release_fs(struct vfsmount **mount, int *count)
mntput(mnt);
}
+/**
+ * simple_read_from_buffer - copy data from the buffer to user space
+ * @to: the user space buffer to read to
+ * @count: the maximum number of bytes to read
+ * @ppos: the current position in the buffer
+ * @from: the buffer to read from
+ * @available: the size of the buffer
+ *
+ * The simple_read_from_buffer() function reads up to @count bytes from the
+ * buffer @from at offset @ppos into the user space address starting at @to.
+ *
+ * On success, the number of bytes read is returned and the offset @ppos is
+ * advanced by this number, or negative value is returned on error.
+ **/
ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
const void *from, size_t available)
{
@@ -528,6 +542,20 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
return count;
}
+/**
+ * memory_read_from_buffer - copy data from the buffer
+ * @to: the kernel space buffer to read to
+ * @count: the maximum number of bytes to read
+ * @ppos: the current position in the buffer
+ * @from: the buffer to read from
+ * @available: the size of the buffer
+ *
+ * The memory_read_from_buffer() function reads up to @count bytes from the
+ * buffer @from at offset @ppos into the kernel space address starting at @to.
+ *
+ * On success, the number of bytes read is returned and the offset @ppos is
+ * advanced by this number, or negative value is returned on error.
+ **/
ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
const void *from, size_t available)
{
diff --git a/fs/locks.c b/fs/locks.c
index 11dbf08651b..dce8c747371 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -561,9 +561,6 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)
/* insert into file's list */
fl->fl_next = *pos;
*pos = fl;
-
- if (fl->fl_ops && fl->fl_ops->fl_insert)
- fl->fl_ops->fl_insert(fl);
}
/*
@@ -586,9 +583,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p)
fl->fl_fasync = NULL;
}
- if (fl->fl_ops && fl->fl_ops->fl_remove)
- fl->fl_ops->fl_remove(fl);
-
if (fl->fl_nspid) {
put_pid(fl->fl_nspid);
fl->fl_nspid = NULL;
diff --git a/fs/namei.c b/fs/namei.c
index c7e43536c49..01e67dddcc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -581,15 +581,13 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd
int result;
/* make sure the stuff we saved doesn't go away */
- dget(save.dentry);
- mntget(save.mnt);
+ path_get(&save);
result = __link_path_walk(name, nd);
if (result == -ESTALE) {
/* nd->path had been dropped */
nd->path = save;
- dget(nd->path.dentry);
- mntget(nd->path.mnt);
+ path_get(&nd->path);
nd->flags |= LOOKUP_REVAL;
result = __link_path_walk(name, nd);
}
@@ -1216,8 +1214,9 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
nd->flags = flags;
nd->depth = 0;
- nd->path.mnt = mntget(mnt);
- nd->path.dentry = dget(dentry);
+ nd->path.dentry = dentry;
+ nd->path.mnt = mnt;
+ path_get(&nd->path);
retval = path_walk(name, nd);
if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
@@ -2857,16 +2856,17 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
struct nameidata nd;
void *cookie;
+ int res;
nd.depth = 0;
cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
- if (!IS_ERR(cookie)) {
- int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
- if (dentry->d_inode->i_op->put_link)
- dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
- cookie = ERR_PTR(res);
- }
- return PTR_ERR(cookie);
+ if (IS_ERR(cookie))
+ return PTR_ERR(cookie);
+
+ res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+ if (dentry->d_inode->i_op->put_link)
+ dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
+ return res;
}
int vfs_follow_link(struct nameidata *nd, const char *link)
diff --git a/fs/namespace.c b/fs/namespace.c
index 4fc302c2a0e..4f6f7635b59 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -750,7 +750,7 @@ struct proc_fs_info {
const char *str;
};
-static void show_sb_opts(struct seq_file *m, struct super_block *sb)
+static int show_sb_opts(struct seq_file *m, struct super_block *sb)
{
static const struct proc_fs_info fs_info[] = {
{ MS_SYNCHRONOUS, ",sync" },
@@ -764,6 +764,8 @@ static void show_sb_opts(struct seq_file *m, struct super_block *sb)
if (sb->s_flags & fs_infop->flag)
seq_puts(m, fs_infop->str);
}
+
+ return security_sb_show_options(m, sb);
}
static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
@@ -806,11 +808,14 @@ static int show_vfsmnt(struct seq_file *m, void *v)
seq_putc(m, ' ');
show_type(m, mnt->mnt_sb);
seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
- show_sb_opts(m, mnt->mnt_sb);
+ err = show_sb_opts(m, mnt->mnt_sb);
+ if (err)
+ goto out;
show_mnt_opts(m, mnt);
if (mnt->mnt_sb->s_op->show_options)
err = mnt->mnt_sb->s_op->show_options(m, mnt);
seq_puts(m, " 0 0\n");
+out:
return err;
}
@@ -865,10 +870,13 @@ static int show_mountinfo(struct seq_file *m, void *v)
seq_putc(m, ' ');
mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
- show_sb_opts(m, sb);
+ err = show_sb_opts(m, sb);
+ if (err)
+ goto out;
if (sb->s_op->show_options)
err = sb->s_op->show_options(m, mnt);
seq_putc(m, '\n');
+out:
return err;
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 58d43daec08..982a2064fe4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -204,7 +204,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
* Note: assumes we have exclusive access to this mapping either
* through inode->i_mutex or some other mechanism.
*/
- if (page->index == 0 && invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1) < 0) {
+ if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
/* Should never happen */
nfs_zap_mapping(inode, inode->i_mapping);
}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index efc015c6128..44f87caf368 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -606,7 +606,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
res->last_used = 0;
+ spin_lock(&dlm->spinlock);
list_add_tail(&res->tracking, &dlm->tracking_list);
+ spin_unlock(&dlm->spinlock);
memset(res->lvb, 0, DLM_LVB_LEN);
memset(res->refmap, 0, sizeof(res->refmap));
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 394d25a131a..80e20d9f278 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1554,8 +1554,8 @@ out:
*/
int ocfs2_file_lock(struct file *file, int ex, int trylock)
{
- int ret, level = ex ? LKM_EXMODE : LKM_PRMODE;
- unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0;
+ int ret, level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+ unsigned int lkm_flags = trylock ? DLM_LKF_NOQUEUE : 0;
unsigned long flags;
struct ocfs2_file_private *fp = file->private_data;
struct ocfs2_lock_res *lockres = &fp->fp_flock;
@@ -1582,7 +1582,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
* Get the lock at NLMODE to start - that way we
* can cancel the upconvert request if need be.
*/
- ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0);
+ ret = ocfs2_lock_create(osb, lockres, DLM_LOCK_NL, 0);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -1597,7 +1597,7 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock)
}
lockres->l_action = OCFS2_AST_CONVERT;
- lkm_flags |= LKM_CONVERT;
+ lkm_flags |= DLM_LKF_CONVERT;
lockres->l_requested = level;
lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
@@ -1664,7 +1664,7 @@ void ocfs2_file_unlock(struct file *file)
if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED))
return;
- if (lockres->l_level == LKM_NLMODE)
+ if (lockres->l_level == DLM_LOCK_NL)
return;
mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n",
@@ -1678,11 +1678,11 @@ void ocfs2_file_unlock(struct file *file)
lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
lockres->l_blocking = DLM_LOCK_EX;
- gen = ocfs2_prepare_downconvert(lockres, LKM_NLMODE);
+ gen = ocfs2_prepare_downconvert(lockres, DLM_LOCK_NL);
lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
spin_unlock_irqrestore(&lockres->l_lock, flags);
- ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0, gen);
+ ret = ocfs2_downconvert_lock(osb, lockres, DLM_LOCK_NL, 0, gen);
if (ret) {
mlog_errno(ret);
return;
diff --git a/fs/open.c b/fs/open.c
index a1450086e92..a99ad09c319 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -16,6 +16,7 @@
#include <linux/namei.h>
#include <linux/backing-dev.h>
#include <linux/capability.h>
+#include <linux/securebits.h>
#include <linux/security.h>
#include <linux/mount.h>
#include <linux/vfs.h>
@@ -425,7 +426,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
{
struct nameidata nd;
int old_fsuid, old_fsgid;
- kernel_cap_t old_cap;
+ kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */
int res;
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
@@ -433,23 +434,27 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
old_fsuid = current->fsuid;
old_fsgid = current->fsgid;
- old_cap = current->cap_effective;
current->fsuid = current->uid;
current->fsgid = current->gid;
- /*
- * Clear the capabilities if we switch to a non-root user
- *
- * FIXME: There is a race here against sys_capset. The
- * capabilities can change yet we will restore the old
- * value below. We should hold task_capabilities_lock,
- * but we cannot because user_path_walk can sleep.
- */
- if (current->uid)
- cap_clear(current->cap_effective);
- else
- current->cap_effective = current->cap_permitted;
+ if (!issecure(SECURE_NO_SETUID_FIXUP)) {
+ /*
+ * Clear the capabilities if we switch to a non-root user
+ */
+#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
+ /*
+ * FIXME: There is a race here against sys_capset. The
+ * capabilities can change yet we will restore the old
+ * value below. We should hold task_capabilities_lock,
+ * but we cannot because user_path_walk can sleep.
+ */
+#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
+ if (current->uid)
+ old_cap = cap_set_effective(__cap_empty_set);
+ else
+ old_cap = cap_set_effective(current->cap_permitted);
+ }
res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
if (res)
@@ -478,7 +483,9 @@ out_path_release:
out:
current->fsuid = old_fsuid;
current->fsgid = old_fsgid;
- current->cap_effective = old_cap;
+
+ if (!issecure(SECURE_NO_SETUID_FIXUP))
+ cap_set_effective(old_cap);
return res;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index ec228bc9f88..700f4e0d957 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1003,8 +1003,7 @@ struct file *create_write_pipe(void)
void free_write_pipe(struct file *f)
{
free_pipe_info(f->f_dentry->d_inode);
- dput(f->f_path.dentry);
- mntput(f->f_path.mnt);
+ path_put(&f->f_path);
put_filp(f);
}
@@ -1015,8 +1014,8 @@ struct file *create_read_pipe(struct file *wrf)
return ERR_PTR(-ENFILE);
/* Grab pipe from the writer */
- f->f_path.mnt = mntget(wrf->f_path.mnt);
- f->f_path.dentry = dget(wrf->f_path.dentry);
+ f->f_path = wrf->f_path;
+ path_get(&wrf->f_path);
f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
f->f_pos = 0;
@@ -1068,8 +1067,7 @@ int do_pipe(int *fd)
err_fdr:
put_unused_fd(fdr);
err_read_pipe:
- dput(fr->f_dentry);
- mntput(fr->f_vfsmnt);
+ path_put(&fr->f_path);
put_filp(fr);
err_write_pipe:
free_write_pipe(fw);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3b455371e7f..58c3e6a8e15 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -233,7 +233,7 @@ static int check_mem_permission(struct task_struct *task)
*/
if (task->parent == current && (task->ptrace & PT_PTRACED) &&
task_is_stopped_or_traced(task) &&
- ptrace_may_attach(task))
+ ptrace_may_access(task, PTRACE_MODE_ATTACH))
return 0;
/*
@@ -251,7 +251,8 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
task_lock(task);
if (task->mm != mm)
goto out;
- if (task->mm != current->mm && __ptrace_may_attach(task) < 0)
+ if (task->mm != current->mm &&
+ __ptrace_may_access(task, PTRACE_MODE_READ) < 0)
goto out;
task_unlock(task);
return mm;
@@ -518,7 +519,7 @@ static int proc_fd_access_allowed(struct inode *inode)
*/
task = get_proc_task(inode);
if (task) {
- allowed = ptrace_may_attach(task);
+ allowed = ptrace_may_access(task, PTRACE_MODE_READ);
put_task_struct(task);
}
return allowed;
@@ -904,7 +905,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
if (!task)
goto out_no_task;
- if (!ptrace_may_attach(task))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out;
ret = -ENOMEM;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ab8ccc9d14f..164bd9f9ede 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -210,7 +210,7 @@ static int show_map(struct seq_file *m, void *v)
dev_t dev = 0;
int len;
- if (maps_protect && !ptrace_may_attach(task))
+ if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ))
return -EACCES;
if (file) {
@@ -476,10 +476,10 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
- static struct mm_walk clear_refs_walk;
- memset(&clear_refs_walk, 0, sizeof(clear_refs_walk));
- clear_refs_walk.pmd_entry = clear_refs_pte_range;
- clear_refs_walk.mm = mm;
+ struct mm_walk clear_refs_walk = {
+ .pmd_entry = clear_refs_pte_range,
+ .mm = mm,
+ };
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
clear_refs_walk.private = vma;
@@ -602,11 +602,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
return err;
}
-static struct mm_walk pagemap_walk = {
- .pmd_entry = pagemap_pte_range,
- .pte_hole = pagemap_pte_hole
-};
-
/*
* /proc/pid/pagemap - an array mapping virtual pages to pfns
*
@@ -641,12 +636,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
struct pagemapread pm;
int pagecount;
int ret = -ESRCH;
+ struct mm_walk pagemap_walk;
+ unsigned long src;
+ unsigned long svpfn;
+ unsigned long start_vaddr;
+ unsigned long end_vaddr;
if (!task)
goto out;
ret = -EACCES;
- if (!ptrace_may_attach(task))
+ if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_task;
ret = -EINVAL;
@@ -659,11 +659,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!mm)
goto out_task;
- ret = -ENOMEM;
+
uaddr = (unsigned long)buf & PAGE_MASK;
uend = (unsigned long)(buf + count);
pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
- pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
+ ret = 0;
+ if (pagecount == 0)
+ goto out_mm;
+ pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ ret = -ENOMEM;
if (!pages)
goto out_mm;
@@ -684,33 +688,33 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
pm.out = (u64 *)buf;
pm.end = (u64 *)(buf + count);
- if (!ptrace_may_attach(task)) {
- ret = -EIO;
- } else {
- unsigned long src = *ppos;
- unsigned long svpfn = src / PM_ENTRY_BYTES;
- unsigned long start_vaddr = svpfn << PAGE_SHIFT;
- unsigned long end_vaddr = TASK_SIZE_OF(task);
-
- /* watch out for wraparound */
- if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
- start_vaddr = end_vaddr;
-
- /*
- * The odds are that this will stop walking way
- * before end_vaddr, because the length of the
- * user buffer is tracked in "pm", and the walk
- * will stop when we hit the end of the buffer.
- */
- ret = walk_page_range(start_vaddr, end_vaddr,
- &pagemap_walk);
- if (ret == PM_END_OF_BUFFER)
- ret = 0;
- /* don't need mmap_sem for these, but this looks cleaner */
- *ppos += (char *)pm.out - buf;
- if (!ret)
- ret = (char *)pm.out - buf;
- }
+ pagemap_walk.pmd_entry = pagemap_pte_range;
+ pagemap_walk.pte_hole = pagemap_pte_hole;
+ pagemap_walk.mm = mm;
+ pagemap_walk.private = &pm;
+
+ src = *ppos;
+ svpfn = src / PM_ENTRY_BYTES;
+ start_vaddr = svpfn << PAGE_SHIFT;
+ end_vaddr = TASK_SIZE_OF(task);
+
+ /* watch out for wraparound */
+ if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+ start_vaddr = end_vaddr;
+
+ /*
+ * The odds are that this will stop walking way
+ * before end_vaddr, because the length of the
+ * user buffer is tracked in "pm", and the walk
+ * will stop when we hit the end of the buffer.
+ */
+ ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk);
+ if (ret == PM_END_OF_BUFFER)
+ ret = 0;
+ /* don't need mmap_sem for these, but this looks cleaner */
+ *ppos += (char *)pm.out - buf;
+ if (!ret)
+ ret = (char *)pm.out - buf;
out_pages:
for (; pagecount; pagecount--) {
@@ -743,7 +747,7 @@ static int show_numa_map_checked(struct seq_file *m, void *v)
struct proc_maps_private *priv = m->private;
struct task_struct *task = priv->task;
- if (maps_protect && !ptrace_may_attach(task))
+ if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ))
return -EACCES;
return show_numa_map(m, v);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 4b4f9cc2f18..5d84e7121df 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -113,7 +113,7 @@ static int show_map(struct seq_file *m, void *_vml)
struct proc_maps_private *priv = m->private;
struct task_struct *task = priv->task;
- if (maps_protect && !ptrace_may_attach(task))
+ if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ))
return -EACCES;
return nommu_vma_show(m, vml->vma);
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 9590b902430..78f613cb9c7 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -45,6 +45,7 @@ const struct file_operations ramfs_file_operations = {
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
.splice_read = generic_file_splice_read,
+ .splice_write = generic_file_splice_write,
.llseek = generic_file_llseek,
};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 0989bc2c2f6..52312ec93ff 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -43,6 +43,7 @@ const struct file_operations ramfs_file_operations = {
.aio_write = generic_file_aio_write,
.fsync = simple_sync_file,
.splice_read = generic_file_splice_read,
+ .splice_write = generic_file_splice_write,
.llseek = generic_file_llseek,
};
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 57917932212..192269698a8 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -45,6 +45,8 @@ void reiserfs_delete_inode(struct inode *inode)
goto out;
reiserfs_update_inode_transaction(inode);
+ reiserfs_discard_prealloc(&th, inode);
+
err = reiserfs_delete_object(&th, inode);
/* Do quota update inside a transaction for journaled quotas. We must do that
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index ed424d708e6..1d40f2bd197 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2165,8 +2165,10 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
blk++;
}
out:
- if (len == towrite)
+ if (len == towrite) {
+ mutex_unlock(&inode->i_mutex);
return err;
+ }
if (inode->i_size < off + len - towrite)
i_size_write(inode, off + len - towrite);
inode->i_version++;
diff --git a/fs/splice.c b/fs/splice.c
index aa5f6f60b30..399442179d8 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -379,13 +379,22 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
lock_page(page);
/*
- * page was truncated, stop here. if this isn't the
- * first page, we'll just complete what we already
- * added
+ * Page was truncated, or invalidated by the
+ * filesystem. Redo the find/create, but this time the
+ * page is kept locked, so there's no chance of another
+ * race with truncate/invalidate.
*/
if (!page->mapping) {
unlock_page(page);
- break;
+ page = find_or_create_page(mapping, index,
+ mapping_gfp_mask(mapping));
+
+ if (!page) {
+ error = -ENOMEM;
+ break;
+ }
+ page_cache_release(pages[page_nr]);
+ pages[page_nr] = page;
}
/*
* page was already under io and is now done, great
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7a5f69be6ac..44cc702f96c 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -682,38 +682,26 @@ static int udf_vrs(struct super_block *sb, int silent)
/*
* Check whether there is an anchor block in the given block
*/
-static int udf_check_anchor_block(struct super_block *sb, sector_t block,
- bool varconv)
+static int udf_check_anchor_block(struct super_block *sb, sector_t block)
{
- struct buffer_head *bh = NULL;
- tag *t;
+ struct buffer_head *bh;
uint16_t ident;
- uint32_t location;
- if (varconv) {
- if (udf_fixed_to_variable(block) >=
- sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
- return 0;
- bh = sb_bread(sb, udf_fixed_to_variable(block));
- }
- else
- bh = sb_bread(sb, block);
+ if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
+ udf_fixed_to_variable(block) >=
+ sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
+ return 0;
+ bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
return 0;
-
- t = (tag *)bh->b_data;
- ident = le16_to_cpu(t->tagIdent);
- location = le32_to_cpu(t->tagLocation);
brelse(bh);
- if (ident != TAG_IDENT_AVDP)
- return 0;
- return location == block;
+
+ return ident == TAG_IDENT_AVDP;
}
/* Search for an anchor volume descriptor pointer */
-static sector_t udf_scan_anchors(struct super_block *sb, bool varconv,
- sector_t lastblock)
+static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock)
{
sector_t last[6];
int i;
@@ -739,7 +727,7 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv,
sb->s_blocksize_bits)
continue;
- if (udf_check_anchor_block(sb, last[i], varconv)) {
+ if (udf_check_anchor_block(sb, last[i])) {
sbi->s_anchor[0] = last[i];
sbi->s_anchor[1] = last[i] - 256;
return last[i];
@@ -748,17 +736,17 @@ static sector_t udf_scan_anchors(struct super_block *sb, bool varconv,
if (last[i] < 256)
continue;
- if (udf_check_anchor_block(sb, last[i] - 256, varconv)) {
+ if (udf_check_anchor_block(sb, last[i] - 256)) {
sbi->s_anchor[1] = last[i] - 256;
return last[i];
}
}
- if (udf_check_anchor_block(sb, sbi->s_session + 256, varconv)) {
+ if (udf_check_anchor_block(sb, sbi->s_session + 256)) {
sbi->s_anchor[0] = sbi->s_session + 256;
return last[0];
}
- if (udf_check_anchor_block(sb, sbi->s_session + 512, varconv)) {
+ if (udf_check_anchor_block(sb, sbi->s_session + 512)) {
sbi->s_anchor[0] = sbi->s_session + 512;
return last[0];
}
@@ -780,23 +768,24 @@ static void udf_find_anchor(struct super_block *sb)
int i;
struct udf_sb_info *sbi = UDF_SB(sb);
- lastblock = udf_scan_anchors(sb, 0, sbi->s_last_block);
+ lastblock = udf_scan_anchors(sb, sbi->s_last_block);
if (lastblock)
goto check_anchor;
/* No anchor found? Try VARCONV conversion of block numbers */
+ UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
/* Firstly, we try to not convert number of the last block */
- lastblock = udf_scan_anchors(sb, 1,
+ lastblock = udf_scan_anchors(sb,
udf_variable_to_fixed(sbi->s_last_block));
- if (lastblock) {
- UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
+ if (lastblock)
goto check_anchor;
- }
/* Secondly, we try with converted number of the last block */
- lastblock = udf_scan_anchors(sb, 1, sbi->s_last_block);
- if (lastblock)
- UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
+ lastblock = udf_scan_anchors(sb, sbi->s_last_block);
+ if (!lastblock) {
+ /* VARCONV didn't help. Clear it. */
+ UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
+ }
check_anchor:
/*
diff --git a/fs/utimes.c b/fs/utimes.c
index af059d5cb48..b6b664e7145 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -40,14 +40,9 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times)
#endif
-static bool nsec_special(long nsec)
-{
- return nsec == UTIME_OMIT || nsec == UTIME_NOW;
-}
-
static bool nsec_valid(long nsec)
{
- if (nsec_special(nsec))
+ if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
return true;
return nsec >= 0 && nsec <= 999999999;
@@ -102,7 +97,11 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
if (error)
goto dput_and_out;
- /* Don't worry, the checks are done in inode_change_ok() */
+ if (times && times[0].tv_nsec == UTIME_NOW &&
+ times[1].tv_nsec == UTIME_NOW)
+ times = NULL;
+
+ /* In most cases, the checks are done in inode_change_ok() */
newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
if (times) {
error = -EPERM;
@@ -124,28 +123,34 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
newattrs.ia_valid |= ATTR_MTIME_SET;
}
- }
- /*
- * If times is NULL or both times are either UTIME_OMIT or
- * UTIME_NOW, then need to check permissions, because
- * inode_change_ok() won't do it.
- */
- if (!times || (nsec_special(times[0].tv_nsec) &&
- nsec_special(times[1].tv_nsec))) {
+ /*
+ * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT
+ * cases, we need to make an extra check that is not done by
+ * inode_change_ok().
+ */
+ if (((times[0].tv_nsec == UTIME_NOW &&
+ times[1].tv_nsec == UTIME_OMIT)
+ ||
+ (times[0].tv_nsec == UTIME_OMIT &&
+ times[1].tv_nsec == UTIME_NOW))
+ && !is_owner_or_cap(inode))
+ goto mnt_drop_write_and_out;
+ } else {
+
+ /*
+ * If times is NULL (or both times are UTIME_NOW),
+ * then we need to check permissions, because
+ * inode_change_ok() won't do it.
+ */
error = -EACCES;
if (IS_IMMUTABLE(inode))
goto mnt_drop_write_and_out;
if (!is_owner_or_cap(inode)) {
- if (f) {
- if (!(f->f_mode & FMODE_WRITE))
- goto mnt_drop_write_and_out;
- } else {
- error = vfs_permission(&nd, MAY_WRITE);
- if (error)
- goto mnt_drop_write_and_out;
- }
+ error = permission(inode, MAY_WRITE, NULL);
+ if (error)
+ goto mnt_drop_write_and_out;
}
}
mutex_lock(&inode->i_mutex);
@@ -169,14 +174,6 @@ asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __
if (utimes) {
if (copy_from_user(&tstimes, utimes, sizeof(tstimes)))
return -EFAULT;
- if ((tstimes[0].tv_nsec == UTIME_OMIT ||
- tstimes[0].tv_nsec == UTIME_NOW) &&
- tstimes[0].tv_sec != 0)
- return -EINVAL;
- if ((tstimes[1].tv_nsec == UTIME_OMIT ||
- tstimes[1].tv_nsec == UTIME_NOW) &&
- tstimes[1].tv_sec != 0)
- return -EINVAL;
/* Nothing to do, we must not even check the path. */
if (tstimes[0].tv_nsec == UTIME_OMIT &&
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index afaee301b0e..ad3d26ddfe3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2427,13 +2427,20 @@ restart:
if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
- /* If I'm the only one writing to this iclog, sync it to disk */
- if (atomic_read(&iclog->ic_refcnt) == 1) {
+ /*
+ * If I'm the only one writing to this iclog, sync it to disk.
+ * We need to do an atomic compare and decrement here to avoid
+ * racing with concurrent atomic_dec_and_lock() calls in
+ * xlog_state_release_iclog() when there is more than one
+ * reference to the iclog.
+ */
+ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) {
+ /* we are the only one */
spin_unlock(&log->l_icloglock);
- if ((error = xlog_state_release_iclog(log, iclog)))
+ error = xlog_state_release_iclog(log, iclog);
+ if (error)
return error;
} else {
- atomic_dec(&iclog->ic_refcnt);
spin_unlock(&log->l_icloglock);
}
goto restart;