summaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/file.c270
-rw-r--r--fs/ext4/indirect.c18
-rw-r--r--fs/ext4/inode.c159
-rw-r--r--fs/ext4/namei.c10
-rw-r--r--fs/ext4/resize.c24
-rw-r--r--fs/ext4/super.c95
8 files changed, 383 insertions, 203 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a75fba67bb1..982d934fd9a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -965,6 +965,11 @@ struct ext4_inode_info {
#define EXT4_MOUNT_ERRORS_MASK 0x00070
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
+#ifdef CONFIG_FS_DAX
+#define EXT4_MOUNT_DAX 0x00200 /* Direct Access */
+#else
+#define EXT4_MOUNT_DAX 0
+#endif
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
@@ -2578,6 +2583,7 @@ extern const struct file_operations ext4_dir_operations;
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
+extern const struct file_operations ext4_dax_file_operations;
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* inline.c */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e5d3eadf47b..bed43081720 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5166,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/* fallback to generic here if not in extents fmt */
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- return __generic_block_fiemap(inode, fieinfo, start, len,
- ext4_get_block);
+ return generic_block_fiemap(inode, fieinfo, start, len,
+ ext4_get_block);
if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
return -EBADR;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 513c12cf444..33a09da16c9 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -95,7 +95,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(iocb->ki_filp);
struct mutex *aio_mutex = NULL;
struct blk_plug plug;
- int o_direct = file->f_flags & O_DIRECT;
+ int o_direct = io_is_direct(file);
int overwrite = 0;
size_t length = iov_iter_count(from);
ssize_t ret;
@@ -191,17 +191,41 @@ errout:
return ret;
}
+#ifdef CONFIG_FS_DAX
+static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return dax_fault(vma, vmf, ext4_get_block);
+ /* Is this the right get_block? */
+}
+
+static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ return dax_mkwrite(vma, vmf, ext4_get_block);
+}
+
+static const struct vm_operations_struct ext4_dax_vm_ops = {
+ .fault = ext4_dax_fault,
+ .page_mkwrite = ext4_dax_mkwrite,
+};
+#else
+#define ext4_dax_vm_ops ext4_file_vm_ops
+#endif
+
static const struct vm_operations_struct ext4_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
- .remap_pages = generic_file_remap_pages,
};
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
file_accessed(file);
- vma->vm_ops = &ext4_file_vm_ops;
+ if (IS_DAX(file_inode(file))) {
+ vma->vm_ops = &ext4_dax_vm_ops;
+ vma->vm_flags |= VM_MIXEDMAP;
+ } else {
+ vma->vm_ops = &ext4_file_vm_ops;
+ }
return 0;
}
@@ -273,19 +297,24 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
* we determine this extent as a data or a hole according to whether the
* page cache has data or not.
*/
-static int ext4_find_unwritten_pgoff(struct inode *inode, int whence,
- loff_t endoff, loff_t *offset)
+static int ext4_find_unwritten_pgoff(struct inode *inode,
+ int whence,
+ struct ext4_map_blocks *map,
+ loff_t *offset)
{
struct pagevec pvec;
+ unsigned int blkbits;
pgoff_t index;
pgoff_t end;
+ loff_t endoff;
loff_t startoff;
loff_t lastoff;
int found = 0;
+ blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset;
lastoff = startoff;
-
+ endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
index = startoff >> PAGE_CACHE_SHIFT;
end = endoff >> PAGE_CACHE_SHIFT;
@@ -403,144 +432,147 @@ out:
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct fiemap_extent_info fie;
- struct fiemap_extent ext[2];
- loff_t next;
- int i, ret = 0;
+ struct ext4_map_blocks map;
+ struct extent_status es;
+ ext4_lblk_t start, last, end;
+ loff_t dataoff, isize;
+ int blkbits;
+ int ret = 0;
mutex_lock(&inode->i_mutex);
- if (offset >= inode->i_size) {
+
+ isize = i_size_read(inode);
+ if (offset >= isize) {
mutex_unlock(&inode->i_mutex);
return -ENXIO;
}
- fie.fi_flags = 0;
- fie.fi_extents_max = 2;
- fie.fi_extents_start = (struct fiemap_extent __user *) &ext;
- while (1) {
- mm_segment_t old_fs = get_fs();
-
- fie.fi_extents_mapped = 0;
- memset(ext, 0, sizeof(*ext) * fie.fi_extents_max);
-
- set_fs(get_ds());
- ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
- set_fs(old_fs);
- if (ret)
+
+ blkbits = inode->i_sb->s_blocksize_bits;
+ start = offset >> blkbits;
+ last = start;
+ end = isize >> blkbits;
+ dataoff = offset;
+
+ do {
+ map.m_lblk = last;
+ map.m_len = end - last + 1;
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
+ if (last != start)
+ dataoff = (loff_t)last << blkbits;
break;
+ }
- /* No extents found, EOF */
- if (!fie.fi_extents_mapped) {
- ret = -ENXIO;
+ /*
+ * If there is a delay extent at this offset,
+ * it will be as a data.
+ */
+ ext4_es_find_delayed_extent_range(inode, last, last, &es);
+ if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
+ if (last != start)
+ dataoff = (loff_t)last << blkbits;
break;
}
- for (i = 0; i < fie.fi_extents_mapped; i++) {
- next = (loff_t)(ext[i].fe_length + ext[i].fe_logical);
- if (offset < (loff_t)ext[i].fe_logical)
- offset = (loff_t)ext[i].fe_logical;
- /*
- * If extent is not unwritten, then it contains valid
- * data, mapped or delayed.
- */
- if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN))
- goto out;
+ /*
+ * If there is a unwritten extent at this offset,
+ * it will be as a data or a hole according to page
+ * cache that has data or not.
+ */
+ if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ int unwritten;
+ unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
+ &map, &dataoff);
+ if (unwritten)
+ break;
+ }
- /*
- * If there is a unwritten extent at this offset,
- * it will be as a data or a hole according to page
- * cache that has data or not.
- */
- if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
- next, &offset))
- goto out;
+ last++;
+ dataoff = (loff_t)last << blkbits;
+ } while (last <= end);
- if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
- ret = -ENXIO;
- goto out;
- }
- offset = next;
- }
- }
- if (offset > inode->i_size)
- offset = inode->i_size;
-out:
mutex_unlock(&inode->i_mutex);
- if (ret)
- return ret;
- return vfs_setpos(file, offset, maxsize);
+ if (dataoff > isize)
+ return -ENXIO;
+
+ return vfs_setpos(file, dataoff, maxsize);
}
/*
- * ext4_seek_hole() retrieves the offset for SEEK_HOLE
+ * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
*/
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct fiemap_extent_info fie;
- struct fiemap_extent ext[2];
- loff_t next;
- int i, ret = 0;
+ struct ext4_map_blocks map;
+ struct extent_status es;
+ ext4_lblk_t start, last, end;
+ loff_t holeoff, isize;
+ int blkbits;
+ int ret = 0;
mutex_lock(&inode->i_mutex);
- if (offset >= inode->i_size) {
+
+ isize = i_size_read(inode);
+ if (offset >= isize) {
mutex_unlock(&inode->i_mutex);
return -ENXIO;
}
- fie.fi_flags = 0;
- fie.fi_extents_max = 2;
- fie.fi_extents_start = (struct fiemap_extent __user *)&ext;
- while (1) {
- mm_segment_t old_fs = get_fs();
+ blkbits = inode->i_sb->s_blocksize_bits;
+ start = offset >> blkbits;
+ last = start;
+ end = isize >> blkbits;
+ holeoff = offset;
- fie.fi_extents_mapped = 0;
- memset(ext, 0, sizeof(*ext));
-
- set_fs(get_ds());
- ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
- set_fs(old_fs);
- if (ret)
- break;
+ do {
+ map.m_lblk = last;
+ map.m_len = end - last + 1;
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
+ last += ret;
+ holeoff = (loff_t)last << blkbits;
+ continue;
+ }
- /* No extents found */
- if (!fie.fi_extents_mapped)
- break;
+ /*
+ * If there is a delay extent at this offset,
+ * we will skip this extent.
+ */
+ ext4_es_find_delayed_extent_range(inode, last, last, &es);
+ if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
+ last = es.es_lblk + es.es_len;
+ holeoff = (loff_t)last << blkbits;
+ continue;
+ }
- for (i = 0; i < fie.fi_extents_mapped; i++) {
- next = (loff_t)(ext[i].fe_logical + ext[i].fe_length);
- /*
- * If extent is not unwritten, then it contains valid
- * data, mapped or delayed.
- */
- if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
- if (offset < (loff_t)ext[i].fe_logical)
- goto out;
- offset = next;
+ /*
+ * If there is a unwritten extent at this offset,
+ * it will be as a data or a hole according to page
+ * cache that has data or not.
+ */
+ if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ int unwritten;
+ unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
+ &map, &holeoff);
+ if (!unwritten) {
+ last += ret;
+ holeoff = (loff_t)last << blkbits;
continue;
}
- /*
- * If there is a unwritten extent at this offset,
- * it will be as a data or a hole according to page
- * cache that has data or not.
- */
- if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
- next, &offset))
- goto out;
-
- offset = next;
- if (ext[i].fe_flags & FIEMAP_EXTENT_LAST)
- goto out;
}
- }
- if (offset > inode->i_size)
- offset = inode->i_size;
-out:
+
+ /* find a hole */
+ break;
+ } while (last <= end);
+
mutex_unlock(&inode->i_mutex);
- if (ret)
- return ret;
- return vfs_setpos(file, offset, maxsize);
+ if (holeoff > isize)
+ holeoff = isize;
+
+ return vfs_setpos(file, holeoff, maxsize);
}
/*
@@ -592,6 +624,26 @@ const struct file_operations ext4_file_operations = {
.fallocate = ext4_fallocate,
};
+#ifdef CONFIG_FS_DAX
+const struct file_operations ext4_dax_file_operations = {
+ .llseek = ext4_llseek,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = ext4_file_write_iter,
+ .unlocked_ioctl = ext4_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext4_compat_ioctl,
+#endif
+ .mmap = ext4_file_mmap,
+ .open = ext4_file_open,
+ .release = ext4_release_file,
+ .fsync = ext4_sync_file,
+ /* Splice not yet supported with DAX */
+ .fallocate = ext4_fallocate,
+};
+#endif
+
const struct inode_operations ext4_file_inode_operations = {
.setattr = ext4_setattr,
.getattr = ext4_getattr,
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 36b369697a1..6b9878a2418 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -689,14 +689,22 @@ retry:
inode_dio_done(inode);
goto locked;
}
- ret = __blockdev_direct_IO(rw, iocb, inode,
- inode->i_sb->s_bdev, iter, offset,
- ext4_get_block, NULL, NULL, 0);
+ if (IS_DAX(inode))
+ ret = dax_do_io(rw, iocb, inode, iter, offset,
+ ext4_get_block, NULL, 0);
+ else
+ ret = __blockdev_direct_IO(rw, iocb, inode,
+ inode->i_sb->s_bdev, iter, offset,
+ ext4_get_block, NULL, NULL, 0);
inode_dio_done(inode);
} else {
locked:
- ret = blockdev_direct_IO(rw, iocb, inode, iter,
- offset, ext4_get_block);
+ if (IS_DAX(inode))
+ ret = dax_do_io(rw, iocb, inode, iter, offset,
+ ext4_get_block, NULL, DIO_LOCKING);
+ else
+ ret = blockdev_direct_IO(rw, iocb, inode, iter,
+ offset, ext4_get_block);
if (unlikely((rw & WRITE) && ret < 0)) {
loff_t isize = i_size_read(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5653fa42930..85404f15e53 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -657,6 +657,18 @@ has_zeroout:
return retval;
}
+static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
+{
+ struct inode *inode = bh->b_assoc_map->host;
+ /* XXX: breaks on 32-bit > 16GB. Is that even supported? */
+ loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
+ int err;
+ if (!uptodate)
+ return;
+ WARN_ON(!buffer_unwritten(bh));
+ err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
+}
+
/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096
@@ -694,6 +706,11 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+ if (IS_DAX(inode) && buffer_unwritten(bh) && !io_end) {
+ bh->b_assoc_map = inode->i_mapping;
+ bh->b_private = (void *)(unsigned long)iblock;
+ bh->b_end_io = ext4_end_io_unwritten;
+ }
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
set_buffer_defer_completion(bh);
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -3010,13 +3027,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
}
- ret = __blockdev_direct_IO(rw, iocb, inode,
- inode->i_sb->s_bdev, iter,
- offset,
- get_block_func,
- ext4_end_io_dio,
- NULL,
- dio_flags);
+ if (IS_DAX(inode))
+ ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func,
+ ext4_end_io_dio, dio_flags);
+ else
+ ret = __blockdev_direct_IO(rw, iocb, inode,
+ inode->i_sb->s_bdev, iter, offset,
+ get_block_func,
+ ext4_end_io_dio, NULL, dio_flags);
/*
* Put our reference to io_end. This can free the io_end structure e.g.
@@ -3180,19 +3198,12 @@ void ext4_set_aops(struct inode *inode)
inode->i_mapping->a_ops = &ext4_aops;
}
-/*
- * ext4_block_zero_page_range() zeros out a mapping of length 'length'
- * starting from file offset 'from'. The range to be zero'd must
- * be contained with in one block. If the specified range exceeds
- * the end of the block it will be shortened to end of the block
- * that cooresponds to 'from'
- */
-static int ext4_block_zero_page_range(handle_t *handle,
+static int __ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length)
{
ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
unsigned offset = from & (PAGE_CACHE_SIZE-1);
- unsigned blocksize, max, pos;
+ unsigned blocksize, pos;
ext4_lblk_t iblock;
struct inode *inode = mapping->host;
struct buffer_head *bh;
@@ -3205,14 +3216,6 @@ static int ext4_block_zero_page_range(handle_t *handle,
return -ENOMEM;
blocksize = inode->i_sb->s_blocksize;
- max = blocksize - (offset & (blocksize - 1));
-
- /*
- * correct length if it does not fall between
- * 'from' and the end of the block
- */
- if (length > max || length < 0)
- length = max;
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
@@ -3278,6 +3281,33 @@ unlock:
}
/*
+ * ext4_block_zero_page_range() zeros out a mapping of length 'length'
+ * starting from file offset 'from'. The range to be zero'd must
+ * be contained with in one block. If the specified range exceeds
+ * the end of the block it will be shortened to end of the block
+ * that cooresponds to 'from'
+ */
+static int ext4_block_zero_page_range(handle_t *handle,
+ struct address_space *mapping, loff_t from, loff_t length)
+{
+ struct inode *inode = mapping->host;
+ unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned blocksize = inode->i_sb->s_blocksize;
+ unsigned max = blocksize - (offset & (blocksize - 1));
+
+ /*
+ * correct length if it does not fall between
+ * 'from' and the end of the block
+ */
+ if (length > max || length < 0)
+ length = max;
+
+ if (IS_DAX(inode))
+ return dax_zero_page_range(inode, from, length, ext4_get_block);
+ return __ext4_block_zero_page_range(handle, mapping, from, length);
+}
+
+/*
* ext4_block_truncate_page() zeroes out a mapping from file offset `from'
* up to the end of the block which corresponds to `from'.
* This required during truncate. We need to physically zero the tail end
@@ -3798,8 +3828,10 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
+ if (test_opt(inode->i_sb, DAX))
+ new_fl |= S_DAX;
inode_set_flags(inode, new_fl,
- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@ -4052,7 +4084,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext4_file_inode_operations;
- inode->i_fop = &ext4_file_operations;
+ if (test_opt(inode->i_sb, DAX))
+ inode->i_fop = &ext4_dax_file_operations;
+ else
+ inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ext4_dir_inode_operations;
@@ -4139,6 +4174,65 @@ static int ext4_inode_blocks_set(handle_t *handle,
return 0;
}
+struct other_inode {
+ unsigned long orig_ino;
+ struct ext4_inode *raw_inode;
+};
+
+static int other_inode_match(struct inode * inode, unsigned long ino,
+ void *data)
+{
+ struct other_inode *oi = (struct other_inode *) data;
+
+ if ((inode->i_ino != ino) ||
+ (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
+ I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+ ((inode->i_state & I_DIRTY_TIME) == 0))
+ return 0;
+ spin_lock(&inode->i_lock);
+ if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
+ I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
+ (inode->i_state & I_DIRTY_TIME)) {
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+ spin_unlock(&inode->i_lock);
+
+ spin_lock(&ei->i_raw_lock);
+ EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
+ EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
+ EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
+ ext4_inode_csum_set(inode, oi->raw_inode, ei);
+ spin_unlock(&ei->i_raw_lock);
+ trace_ext4_other_inode_update_time(inode, oi->orig_ino);
+ return -1;
+ }
+ spin_unlock(&inode->i_lock);
+ return -1;
+}
+
+/*
+ * Opportunistically update the other time fields for other inodes in
+ * the same inode table block.
+ */
+static void ext4_update_other_inodes_time(struct super_block *sb,
+ unsigned long orig_ino, char *buf)
+{
+ struct other_inode oi;
+ unsigned long ino;
+ int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
+ int inode_size = EXT4_INODE_SIZE(sb);
+
+ oi.orig_ino = orig_ino;
+ ino = orig_ino & ~(inodes_per_block - 1);
+ for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
+ if (ino == orig_ino)
+ continue;
+ oi.raw_inode = (struct ext4_inode *) buf;
+ (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
+ }
+}
+
/*
* Post the struct inode info into an on-disk inode location in the
* buffer-cache. This gobbles the caller's reference to the
@@ -4248,10 +4342,11 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le16(ei->i_extra_isize);
}
}
-
ext4_inode_csum_set(inode, raw_inode, ei);
-
spin_unlock(&ei->i_raw_lock);
+ if (inode->i_sb->s_flags & MS_LAZYTIME)
+ ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
+ bh->b_data);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
rc = ext4_handle_dirty_metadata(handle, NULL, bh);
@@ -4534,7 +4629,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
*/
- truncate_pagecache(inode, inode->i_size);
+ truncate_pagecache(inode, inode->i_size);
}
/*
* We want to call ext4_truncate() even if attr->ia_size ==
@@ -4840,11 +4935,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
* If the inode is marked synchronous, we don't honour that here - doing
* so would cause a commit on atime updates, which we don't bother doing.
* We handle synchronous inodes at the highest possible level.
+ *
+ * If only the I_DIRTY_TIME flag is set, we can skip everything. If
+ * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
+ * to copy into the on-disk inode structure are the timestamp files.
*/
void ext4_dirty_inode(struct inode *inode, int flags)
{
handle_t *handle;
+ if (flags == I_DIRTY_TIME)
+ return;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle))
goto out;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2291923dae4..28fe71a2904 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2235,7 +2235,10 @@ retry:
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations;
- inode->i_fop = &ext4_file_operations;
+ if (test_opt(inode->i_sb, DAX))
+ inode->i_fop = &ext4_dax_file_operations;
+ else
+ inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
err = ext4_add_nondir(handle, dentry, inode);
if (!err && IS_DIRSYNC(dir))
@@ -2299,7 +2302,10 @@ retry:
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext4_file_inode_operations;
- inode->i_fop = &ext4_file_operations;
+ if (test_opt(inode->i_sb, DAX))
+ inode->i_fop = &ext4_dax_file_operations;
+ else
+ inode->i_fop = &ext4_file_operations;
ext4_set_aops(inode);
d_tmpfile(dentry, inode);
err = ext4_orphan_add(handle, inode);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index bf76f405a5f..8a8ec6293b1 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -24,6 +24,18 @@ int ext4_resize_begin(struct super_block *sb)
return -EPERM;
/*
+ * If we are not using the primary superblock/GDT copy don't resize,
+ * because the user tools have no way of handling this. Probably a
+ * bad time to do it anyways.
+ */
+ if (EXT4_SB(sb)->s_sbh->b_blocknr !=
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
+ ext4_warning(sb, "won't resize using backup superblock at %llu",
+ (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
+ return -EPERM;
+ }
+
+ /*
* We are not allowed to do online-resizing on a filesystem mounted
* with error, because it can destroy the filesystem easily.
*/
@@ -758,18 +770,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
"EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
gdb_num);
- /*
- * If we are not using the primary superblock/GDT copy don't resize,
- * because the user tools have no way of handling this. Probably a
- * bad time to do it anyways.
- */
- if (EXT4_SB(sb)->s_sbh->b_blocknr !=
- le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
- ext4_warning(sb, "won't resize using backup superblock at %llu",
- (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
- return -EPERM;
- }
-
gdb_bh = sb_bread(sb, gdblock);
if (!gdb_bh)
return -EIO;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 43c92b1685c..1adac6868e6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -334,7 +334,7 @@ static void save_error_info(struct super_block *sb, const char *func,
static int block_device_ejected(struct super_block *sb)
{
struct inode *bd_inode = sb->s_bdev->bd_inode;
- struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info;
+ struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
return bdi->dev == NULL;
}
@@ -1046,10 +1046,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
struct path *path);
-static int ext4_quota_on_sysfile(struct super_block *sb, int type,
- int format_id);
static int ext4_quota_off(struct super_block *sb, int type);
-static int ext4_quota_off_sysfile(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
@@ -1084,16 +1081,6 @@ static const struct quotactl_ops ext4_qctl_operations = {
.get_dqblk = dquot_get_dqblk,
.set_dqblk = dquot_set_dqblk
};
-
-static const struct quotactl_ops ext4_qctl_sysfile_operations = {
- .quota_on_meta = ext4_quota_on_sysfile,
- .quota_off = ext4_quota_off_sysfile,
- .quota_sync = dquot_quota_sync,
- .get_info = dquot_get_dqinfo,
- .set_info = dquot_set_dqinfo,
- .get_dqblk = dquot_get_dqblk,
- .set_dqblk = dquot_set_dqblk
-};
#endif
static const struct super_operations ext4_sops = {
@@ -1137,8 +1124,9 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_i_version,
+ Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+ Opt_lazytime, Opt_nolazytime,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
@@ -1200,8 +1188,11 @@ static const match_table_t tokens = {
{Opt_barrier, "barrier"},
{Opt_nobarrier, "nobarrier"},
{Opt_i_version, "i_version"},
+ {Opt_dax, "dax"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
+ {Opt_lazytime, "lazytime"},
+ {Opt_nolazytime, "nolazytime"},
{Opt_nodelalloc, "nodelalloc"},
{Opt_removed, "mblk_io_submit"},
{Opt_removed, "nomblk_io_submit"},
@@ -1384,6 +1375,7 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
+ {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@@ -1459,6 +1451,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
case Opt_i_version:
sb->s_flags |= MS_I_VERSION;
return 1;
+ case Opt_lazytime:
+ sb->s_flags |= MS_LAZYTIME;
+ return 1;
+ case Opt_nolazytime:
+ sb->s_flags &= ~MS_LAZYTIME;
+ return 1;
}
for (m = ext4_mount_opts; m->token != Opt_err; m++)
@@ -1620,6 +1618,11 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
+#ifndef CONFIG_FS_DAX
+ } else if (token == Opt_dax) {
+ ext4_msg(sb, KERN_INFO, "dax option not supported");
+ return -1;
+#endif
} else {
if (!args->from)
arg = 1;
@@ -3482,7 +3485,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
- ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
+ ext4_warning(sb, "metadata_csum and uninit_bg are "
"redundant flags; please run fsck.");
/* Check for a known checksum algorithm */
@@ -3602,6 +3605,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"both data=journal and dioread_nolock");
goto failed_mount;
}
+ if (test_opt(sb, DAX)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ goto failed_mount;
+ }
if (test_opt(sb, DELALLOC))
clear_opt(sb, DELALLOC);
}
@@ -3665,6 +3673,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ if (blocksize != PAGE_SIZE) {
+ ext4_msg(sb, KERN_ERR,
+ "error: unsupported blocksize for dax");
+ goto failed_mount;
+ }
+ if (!sb->s_bdev->bd_disk->fops->direct_access) {
+ ext4_msg(sb, KERN_ERR,
+ "error: device does not support dax");
+ goto failed_mount;
+ }
+ }
+
if (sb->s_blocksize != blocksize) {
/* Validate the filesystem blocksize */
if (!sb_set_blocksize(sb, blocksize)) {
@@ -3935,7 +3956,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_QUOTA
sb->dq_op = &ext4_quota_operations;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
- sb->s_qcop = &ext4_qctl_sysfile_operations;
+ sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &ext4_qctl_operations;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
@@ -4882,6 +4903,18 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
err = -EINVAL;
goto restore_opts;
}
+ if (test_opt(sb, DAX)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+ }
+
+ if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
+ ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
+ "dax flag with busy inodes while remounting");
+ sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
}
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
@@ -5020,6 +5053,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
#endif
+ *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
return 0;
@@ -5288,21 +5322,6 @@ static int ext4_enable_quotas(struct super_block *sb)
return 0;
}
-/*
- * quota_on function that is used when QUOTA feature is set.
- */
-static int ext4_quota_on_sysfile(struct super_block *sb, int type,
- int format_id)
-{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
- return -EINVAL;
-
- /*
- * USAGE was enabled at mount time. Only need to enable LIMITS now.
- */
- return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED);
-}
-
static int ext4_quota_off(struct super_block *sb, int type)
{
struct inode *inode = sb_dqopt(sb)->files[type];
@@ -5329,18 +5348,6 @@ out:
return dquot_quota_off(sb, type);
}
-/*
- * quota_off function that is used when QUOTA feature is set.
- */
-static int ext4_quota_off_sysfile(struct super_block *sb, int type)
-{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
- return -EINVAL;
-
- /* Disable only the limits. */
- return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
-}
-
/* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and no one else should touch the files)