diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_aops.c | 28 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_dinode.h | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 13 |
5 files changed, 41 insertions, 37 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 596ec71da00..e11d654af78 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -86,14 +86,6 @@ xfs_destroy_ioend( bh->b_end_io(bh, !ioend->io_error); } - if (ioend->io_iocb) { - inode_dio_done(ioend->io_inode); - if (ioend->io_isasync) { - aio_complete(ioend->io_iocb, ioend->io_error ? - ioend->io_error : ioend->io_result, 0); - } - } - mempool_free(ioend, xfs_ioend_pool); } @@ -281,7 +273,6 @@ xfs_alloc_ioend( * all the I/O from calling the completion routine too early. */ atomic_set(&ioend->io_remaining, 1); - ioend->io_isasync = 0; ioend->io_isdirect = 0; ioend->io_error = 0; ioend->io_list = NULL; @@ -291,8 +282,6 @@ xfs_alloc_ioend( ioend->io_buffer_tail = NULL; ioend->io_offset = 0; ioend->io_size = 0; - ioend->io_iocb = NULL; - ioend->io_result = 0; ioend->io_append_trans = NULL; INIT_WORK(&ioend->io_work, xfs_end_io); @@ -1292,8 +1281,10 @@ __xfs_get_blocks( if (create || !ISUNWRITTEN(&imap)) xfs_map_buffer(inode, bh_result, &imap, offset); if (create && ISUNWRITTEN(&imap)) { - if (direct) + if (direct) { bh_result->b_private = inode; + set_buffer_defer_completion(bh_result); + } set_buffer_unwritten(bh_result); } } @@ -1390,9 +1381,7 @@ xfs_end_io_direct_write( struct kiocb *iocb, loff_t offset, ssize_t size, - void *private, - int ret, - bool is_async) + void *private) { struct xfs_ioend *ioend = iocb->private; @@ -1414,17 +1403,10 @@ xfs_end_io_direct_write( ioend->io_offset = offset; ioend->io_size = size; - ioend->io_iocb = iocb; - ioend->io_result = ret; if (private && size > 0) ioend->io_type = XFS_IO_UNWRITTEN; - if (is_async) { - ioend->io_isasync = 1; - xfs_finish_ioend(ioend); - } else { - xfs_finish_ioend_sync(ioend); - } + xfs_finish_ioend_sync(ioend); } STATIC ssize_t diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index c325abb8d61..f94dd459dff 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -45,7 +45,6 @@ typedef struct xfs_ioend { unsigned int io_type; /* delalloc / unwritten */ int io_error; /* I/O error code */ atomic_t io_remaining; /* hold count */ - unsigned int io_isasync : 1; /* needs aio_complete */ unsigned int io_isdirect : 1;/* direct I/O */ struct inode *io_inode; /* file being written to */ struct buffer_head *io_buffer_head;/* buffer linked list head */ @@ -54,8 +53,6 @@ typedef struct xfs_ioend { xfs_off_t io_offset; /* offset in the file */ struct work_struct io_work; /* xfsdatad work queue */ struct xfs_trans *io_append_trans;/* xact. for size update */ - struct kiocb *io_iocb; - int io_result; } xfs_ioend_t; extern const struct address_space_operations xfs_address_space_operations; diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index 07d735a80a0..e5869b50dc4 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -39,6 +39,9 @@ typedef struct xfs_timestamp { * There is a very similar struct icdinode in xfs_inode which matches the * layout of the first 96 bytes of this structure, but is kept in native * format instead of big endian. + * + * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed + * padding field for v3 inodes. */ typedef struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b78481f99d9..bb262c25c8d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -896,7 +896,6 @@ xfs_dinode_to_disk( to->di_projid_lo = cpu_to_be16(from->di_projid_lo); to->di_projid_hi = cpu_to_be16(from->di_projid_hi); memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); - to->di_flushiter = cpu_to_be16(from->di_flushiter); to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); @@ -924,6 +923,9 @@ xfs_dinode_to_disk( to->di_lsn = cpu_to_be64(from->di_lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &from->di_uuid); + to->di_flushiter = 0; + } else { + to->di_flushiter = cpu_to_be16(from->di_flushiter); } } @@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc( /* * Read the disk inode attributes into the in-core inode structure. * - * If we are initialising a new inode and we are not utilising the - * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core - * with a random generation number. If we are keeping inodes around, we need to - * read the inode cluster to get the existing generation number off disk. + * For version 5 superblocks, if we are initialising a new inode and we are not + * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new + * inode core with a random generation number. If we are keeping inodes around, + * we need to read the inode cluster to get the existing generation number off + * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode + * format) then log recovery is dependent on the di_flushiter field being + * initialised from the current on-disk value and hence we must also read the + * inode off disk. */ int xfs_iread( @@ -1054,6 +1060,7 @@ xfs_iread( /* shortcut IO on inode allocation if possible */ if ((iget_flags & XFS_IGET_CREATE) && + xfs_sb_version_hascrc(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); @@ -2882,12 +2889,18 @@ xfs_iflush_int( __func__, ip->i_ino, ip->i_d.di_forkoff, ip); goto corrupt_out; } + /* - * bump the flush iteration count, used to detect flushes which - * postdate a log record during recovery. This is redundant as we now - * log every change and hence this can't happen. Still, it doesn't hurt. + * Inode item log recovery for v1/v2 inodes are dependent on the + * di_flushiter count for correct sequencing. We bump the flush + * iteration count so we can detect flushes which postdate a log record + * during recovery. This is redundant as we now log every change and + * hence this can't happen but we need to still do it to ensure + * backwards compatibility with old kernels that predate logging all + * inode changes. */ - ip->i_d.di_flushiter++; + if (ip->i_d.di_version < 3) + ip->i_d.di_flushiter++; /* * Copy the dirty parts of the inode into the on-disk diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 6fcc910a50b..7681b19aa5d 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2( goto error; } - /* Skip replay when the on disk inode is newer than the log one */ - if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { + /* + * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes + * are transactional and if ordering is necessary we can determine that + * more accurately by the LSN field in the V3 inode core. Don't trust + * the inode versions we might be changing them here - use the + * superblock flag to determine whether we need to look at di_flushiter + * to skip replay when the on disk inode is newer than the log one + */ + if (!xfs_sb_version_hascrc(&mp->m_sb) && + dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { /* * Deal with the wrap case, DI_MAX_FLUSH is less * than smaller numbers @@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2( goto error; } } + /* Take the opportunity to reset the flush iteration count */ dicp->di_flushiter = 0; |