diff options
Diffstat (limited to 'fs')
53 files changed, 1578 insertions, 489 deletions
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f039b104a98..b03dd23feda 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -43,23 +43,6 @@ #include "fid.h" /** - * v9fs_dentry_delete - called when dentry refcount equals 0 - * @dentry: dentry in question - * - * By returning 1 here we should remove cacheing of unused - * dentry components. - * - */ - -static int v9fs_dentry_delete(const struct dentry *dentry) -{ - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", - dentry->d_name.name, dentry); - - return 1; -} - -/** * v9fs_cached_dentry_delete - called when dentry refcount equals 0 * @dentry: dentry in question * @@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = { }; const struct dentry_operations v9fs_dentry_operations = { - .d_delete = v9fs_dentry_delete, + .d_delete = always_delete_dentry, .d_release = v9fs_dentry_release, }; @@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, - unsigned short max_sectors) + unsigned int max_sectors) { int retried_segments = 0; struct bio_vec *bvec; diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index fc6f4f3a1a9..4934347321d 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -548,7 +548,13 @@ static int CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) { int rc; - unsigned int offset = CIFS_SESS_KEY_SIZE + 8; + struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *) + (ses->auth_key.response + CIFS_SESS_KEY_SIZE); + unsigned int hash_len; + + /* The MD5 hash starts at challenge_key.key */ + hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE + + offsetof(struct ntlmv2_resp, challenge.key[0])); if (!ses->server->secmech.sdeschmacmd5) { cifs_dbg(VFS, "%s: can't generate ntlmv2 hash\n", __func__); @@ -556,7 +562,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) } rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, - ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); + ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n", __func__); @@ -570,20 +576,21 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) } if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) - memcpy(ses->auth_key.response + offset, - ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); + memcpy(ntlmv2->challenge.key, + ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); else - memcpy(ses->auth_key.response + offset, - ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); + memcpy(ntlmv2->challenge.key, + ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, - ses->auth_key.response + offset, ses->auth_key.len - offset); + ntlmv2->challenge.key, hash_len); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; } + /* Note that the MD5 digest over writes anon.challenge_key.key */ rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, - ses->auth_key.response + CIFS_SESS_KEY_SIZE); + ntlmv2->ntlmv2_hash); if (rc) cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__); @@ -627,7 +634,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) int rc; int baselen; unsigned int tilen; - struct ntlmv2_resp *buf; + struct ntlmv2_resp *ntlmv2; char ntlmv2_hash[16]; unsigned char *tiblob = NULL; /* target info blob */ @@ -660,13 +667,14 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) } ses->auth_key.len += baselen; - buf = (struct ntlmv2_resp *) + ntlmv2 = (struct ntlmv2_resp *) (ses->auth_key.response + CIFS_SESS_KEY_SIZE); - buf->blob_signature = cpu_to_le32(0x00000101); - buf->reserved = 0; - buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); - get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); - buf->reserved2 = 0; + ntlmv2->blob_signature = cpu_to_le32(0x00000101); + ntlmv2->reserved = 0; + /* Must be within 5 minutes of the server */ + ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); + get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal)); + ntlmv2->reserved2 = 0; memcpy(ses->auth_key.response + baselen, tiblob, tilen); @@ -706,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) } rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, - ses->auth_key.response + CIFS_SESS_KEY_SIZE, + ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 26b1c1dc93f..d9ea7ada137 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -261,7 +261,7 @@ struct smb_version_operations { /* query path data from the server */ int (*query_path_info)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *, - FILE_ALL_INFO *, bool *); + FILE_ALL_INFO *, bool *, bool *); /* query file data from the server */ int (*query_file_info)(const unsigned int, struct cifs_tcon *, struct cifs_fid *, FILE_ALL_INFO *); @@ -381,6 +381,9 @@ struct smb_version_operations { char * (*create_lease_buf)(u8 *, u8); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *, unsigned int *); + int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, + struct cifsFileInfo *target_file, u64 src_off, u64 len, + u64 dest_off); }; struct smb_version_values { @@ -855,6 +858,9 @@ struct cifs_tcon { __le64 vol_create_time; __u32 ss_flags; /* sector size flags */ __u32 perf_sector_size; /* best sector size for perf */ + __u32 max_chunks; + __u32 max_bytes_chunk; + __u32 max_bytes_copy; #endif /* CONFIG_CIFS_SMB2 */ #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 9e5ee34de98..33df36ef9d5 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -697,7 +697,13 @@ struct ntlmssp2_name { } __attribute__((packed)); struct ntlmv2_resp { - char ntlmv2_hash[CIFS_ENCPWD_SIZE]; + union { + char ntlmv2_hash[CIFS_ENCPWD_SIZE]; + struct { + __u8 reserved[8]; + __u8 key[CIFS_SERVER_CHALLENGE_SIZE]; + } __attribute__((packed)) challenge; + } __attribute__((packed)); __le32 blob_signature; __u32 reserved; __le64 time; diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 93b29474714..124aa0230c1 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3369,11 +3369,13 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, return 0; } cifs_acl->version = cpu_to_le16(1); - if (acl_type == ACL_TYPE_ACCESS) + if (acl_type == ACL_TYPE_ACCESS) { cifs_acl->access_entry_count = cpu_to_le16(count); - else if (acl_type == ACL_TYPE_DEFAULT) + cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF); + } else if (acl_type == ACL_TYPE_DEFAULT) { cifs_acl->default_entry_count = cpu_to_le16(count); - else { + cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF); + } else { cifs_dbg(FYI, "unknown ACL type %d\n", acl_type); return 0; } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5384c2a640c..11ff5f116b2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -756,7 +756,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ } else if (rc != -EACCES) { - cifs_dbg(VFS, "Unexpected lookup error %d\n", rc); + cifs_dbg(FYI, "Unexpected lookup error %d\n", rc); /* We special case check for Access Denied - since that is a common return code */ } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 7ddddf2e250..5a5a87240fe 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3663,6 +3663,27 @@ void cifs_oplock_break(struct work_struct *work) } } +/* + * The presence of cifs_direct_io() in the address space ops vector + * allowes open() O_DIRECT flags which would have failed otherwise. + * + * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests + * so this method should never be called. + * + * Direct IO is not yet supported in the cached mode. + */ +static ssize_t +cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t pos, unsigned long nr_segs) +{ + /* + * FIXME + * Eventually need to support direct IO for non forcedirectio mounts + */ + return -EINVAL; +} + + const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, @@ -3672,6 +3693,7 @@ const struct address_space_operations cifs_addr_ops = { .write_end = cifs_write_end, .set_page_dirty = __set_page_dirty_nobuffers, .releasepage = cifs_release_page, + .direct_IO = cifs_direct_io, .invalidatepage = cifs_invalidate_page, .launder_page = cifs_launder_page, }; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 867b7cdc794..36f9ebb93ce 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -542,7 +542,8 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, /* Fill a cifs_fattr struct with info from FILE_ALL_INFO */ static void cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, - struct cifs_sb_info *cifs_sb, bool adjust_tz) + struct cifs_sb_info *cifs_sb, bool adjust_tz, + bool symlink) { struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); @@ -569,7 +570,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_createtime = le64_to_cpu(info->CreationTime); fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); - if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { + + if (symlink) { + fattr->cf_mode = S_IFLNK; + fattr->cf_dtype = DT_LNK; + } else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; /* @@ -578,10 +583,6 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, */ if (!tcon->unix_ext) fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; - } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { - fattr->cf_mode = S_IFLNK; - fattr->cf_dtype = DT_LNK; - fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; @@ -626,7 +627,8 @@ cifs_get_file_info(struct file *filp) rc = server->ops->query_file_info(xid, tcon, &cfile->fid, &find_data); switch (rc) { case 0: - cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false); + cifs_all_info_to_fattr(&fattr, &find_data, cifs_sb, false, + false); break; case -EREMOTE: cifs_create_dfs_fattr(&fattr, inode->i_sb); @@ -673,6 +675,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, bool adjust_tz = false; struct cifs_fattr fattr; struct cifs_search_info *srchinf = NULL; + bool symlink = false; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) @@ -702,12 +705,12 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } data = (FILE_ALL_INFO *)buf; rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path, - data, &adjust_tz); + data, &adjust_tz, &symlink); } if (!rc) { - cifs_all_info_to_fattr(&fattr, (FILE_ALL_INFO *)data, cifs_sb, - adjust_tz); + cifs_all_info_to_fattr(&fattr, data, cifs_sb, adjust_tz, + symlink); } else if (rc == -EREMOTE) { cifs_create_dfs_fattr(&fattr, sb); rc = 0; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index ba54bf6ab11..409b45eefe7 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -22,12 +22,120 @@ */ #include <linux/fs.h> +#include <linux/file.h> +#include <linux/mount.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/btrfs.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" #include "cifsfs.h" +static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, + unsigned long srcfd, u64 off, u64 len, u64 destoff) +{ + int rc; + struct cifsFileInfo *smb_file_target = dst_file->private_data; + struct inode *target_inode = file_inode(dst_file); + struct cifs_tcon *target_tcon; + struct fd src_file; + struct cifsFileInfo *smb_file_src; + struct inode *src_inode; + struct cifs_tcon *src_tcon; + + cifs_dbg(FYI, "ioctl clone range\n"); + /* the destination must be opened for writing */ + if (!(dst_file->f_mode & FMODE_WRITE)) { + cifs_dbg(FYI, "file target not open for write\n"); + return -EINVAL; + } + + /* check if target volume is readonly and take reference */ + rc = mnt_want_write_file(dst_file); + if (rc) { + cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc); + return rc; + } + + src_file = fdget(srcfd); + if (!src_file.file) { + rc = -EBADF; + goto out_drop_write; + } + + if ((!src_file.file->private_data) || (!dst_file->private_data)) { + rc = -EBADF; + cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); + goto out_fput; + } + + rc = -EXDEV; + smb_file_target = dst_file->private_data; + smb_file_src = src_file.file->private_data; + src_tcon = tlink_tcon(smb_file_src->tlink); + target_tcon = tlink_tcon(smb_file_target->tlink); + + /* check if source and target are on same tree connection */ + if (src_tcon != target_tcon) { + cifs_dbg(VFS, "file copy src and target on different volume\n"); + goto out_fput; + } + + src_inode = src_file.file->f_dentry->d_inode; + + /* + * Note: cifs case is easier than btrfs since server responsible for + * checks for proper open modes and file type and if it wants + * server could even support copy of range where source = target + */ + + /* so we do not deadlock racing two ioctls on same files */ + if (target_inode < src_inode) { + mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&target_inode->i_mutex, I_MUTEX_CHILD); + } + + /* determine range to clone */ + rc = -EINVAL; + if (off + len > src_inode->i_size || off + len < off) + goto out_unlock; + if (len == 0) + len = src_inode->i_size - off; + + cifs_dbg(FYI, "about to flush pages\n"); + /* should we flush first and last page first */ + truncate_inode_pages_range(&target_inode->i_data, destoff, + PAGE_CACHE_ALIGN(destoff + len)-1); + + if (target_tcon->ses->server->ops->clone_range) + rc = target_tcon->ses->server->ops->clone_range(xid, + smb_file_src, smb_file_target, off, len, destoff); + + /* force revalidate of size and timestamps of target file now + that target is updated on the server */ + CIFS_I(target_inode)->time = 0; +out_unlock: + /* although unlocking in the reverse order from locking is not + strictly necessary here it is a little cleaner to be consistent */ + if (target_inode < src_inode) { + mutex_unlock(&src_inode->i_mutex); + mutex_unlock(&target_inode->i_mutex); + } else { + mutex_unlock(&target_inode->i_mutex); + mutex_unlock(&src_inode->i_mutex); + } +out_fput: + fdput(src_file); +out_drop_write: + mnt_drop_write_file(dst_file); + return rc; +} + long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) { struct inode *inode = file_inode(filep); @@ -105,6 +213,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) cifs_dbg(FYI, "set compress flag rc %d\n", rc); } break; + case BTRFS_IOC_CLONE: + rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0); + break; default: cifs_dbg(FYI, "unsupported ioctl\n"); break; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 651a5279607..049884552e7 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -51,7 +51,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = { {ERRnoaccess, -EACCES}, {ERRbadfid, -EBADF}, {ERRbadmcb, -EIO}, - {ERRnomem, -ENOMEM}, + {ERRnomem, -EREMOTEIO}, {ERRbadmem, -EFAULT}, {ERRbadenv, -EFAULT}, {ERRbadformat, -EINVAL}, diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 53a75f3d017..5940ecabbe6 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -134,22 +134,6 @@ out: dput(dentry); } -/* - * Is it possible that this directory might turn out to be a DFS referral - * once we go to try and use it? - */ -static bool -cifs_dfs_is_possible(struct cifs_sb_info *cifs_sb) -{ -#ifdef CONFIG_CIFS_DFS_UPCALL - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); - - if (tcon->Flags & SMB_SHARE_IS_IN_DFS) - return true; -#endif - return false; -} - static void cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) { @@ -159,27 +143,19 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) if (fattr->cf_cifsattrs & ATTR_DIRECTORY) { fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode; fattr->cf_dtype = DT_DIR; - /* - * Windows CIFS servers generally make DFS referrals look - * like directories in FIND_* responses with the reparse - * attribute flag also set (since DFS junctions are - * reparse points). We must revalidate at least these - * directory inodes before trying to use them (if - * they are DFS we will get PATH_NOT_COVERED back - * when queried directly and can then try to connect - * to the DFS target) - */ - if (cifs_dfs_is_possible(cifs_sb) && - (fattr->cf_cifsattrs & ATTR_REPARSE)) - fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; - } else if (fattr->cf_cifsattrs & ATTR_REPARSE) { - fattr->cf_mode = S_IFLNK; - fattr->cf_dtype = DT_LNK; } else { fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode; fattr->cf_dtype = DT_REG; } + /* + * We need to revalidate it further to make a decision about whether it + * is a symbolic link, DFS referral or a reparse point with a direct + * access like junctions, deduplicated files, NFS symlinks. + */ + if (fattr->cf_cifsattrs & ATTR_REPARSE) + fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; + /* non-unix readdir doesn't provide nlink */ fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 384cffe4285..5f5ba0dc2ee 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -534,10 +534,12 @@ cifs_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - FILE_ALL_INFO *data, bool *adjustTZ) + FILE_ALL_INFO *data, bool *adjustTZ, bool *symlink) { int rc; + *symlink = false; + /* could do find first instead but this returns more info */ rc = CIFSSMBQPathInfo(xid, tcon, full_path, data, 0 /* not legacy */, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & @@ -554,6 +556,23 @@ cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, CIFS_MOUNT_MAP_SPECIAL_CHR); *adjustTZ = true; } + + if (!rc && (le32_to_cpu(data->Attributes) & ATTR_REPARSE)) { + int tmprc; + int oplock = 0; + __u16 netfid; + + /* Need to check if this is a symbolic link or not */ + tmprc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, + FILE_READ_ATTRIBUTES, 0, &netfid, &oplock, + NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (tmprc == -EOPNOTSUPP) + *symlink = true; + else + CIFSSMBClose(xid, tcon, netfid); + } + return rc; } diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 78ff88c467b..84c012a6aba 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -123,12 +123,13 @@ move_smb2_info_to_cifs(FILE_ALL_INFO *dst, struct smb2_file_all_info *src) int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, - FILE_ALL_INFO *data, bool *adjust_tz) + FILE_ALL_INFO *data, bool *adjust_tz, bool *symlink) { int rc; struct smb2_file_all_info *smb2_data; *adjust_tz = false; + *symlink = false; smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, GFP_KERNEL); @@ -136,9 +137,16 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, return -ENOMEM; rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, - FILE_READ_ATTRIBUTES, FILE_OPEN, - OPEN_REPARSE_POINT, smb2_data, - SMB2_OP_QUERY_INFO); + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, + smb2_data, SMB2_OP_QUERY_INFO); + if (rc == -EOPNOTSUPP) { + *symlink = true; + /* Failed on a symbolic link - query a reparse point info */ + rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, + FILE_READ_ATTRIBUTES, FILE_OPEN, + OPEN_REPARSE_POINT, smb2_data, + SMB2_OP_QUERY_INFO); + } if (rc) goto out; diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 7c2f45c06fc..94bd4fbb13d 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -306,7 +306,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_NONEXISTENT_SECTOR, -EIO, "STATUS_NONEXISTENT_SECTOR"}, {STATUS_MORE_PROCESSING_REQUIRED, -EIO, "STATUS_MORE_PROCESSING_REQUIRED"}, - {STATUS_NO_MEMORY, -ENOMEM, "STATUS_NO_MEMORY"}, + {STATUS_NO_MEMORY, -EREMOTEIO, "STATUS_NO_MEMORY"}, {STATUS_CONFLICTING_ADDRESSES, -EADDRINUSE, "STATUS_CONFLICTING_ADDRESSES"}, {STATUS_NOT_MAPPED_VIEW, -EIO, "STATUS_NOT_MAPPED_VIEW"}, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c571be8cb76..11dde4b24f8 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -494,6 +494,85 @@ smb2_close_file(const unsigned int xid, struct cifs_tcon *tcon, } static int +SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon, + u64 persistent_fid, u64 volatile_fid, + struct copychunk_ioctl *pcchunk) +{ + int rc; + unsigned int ret_data_len; + struct resume_key_req *res_key; + + rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid, + FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */, + NULL, 0 /* no input */, + (char **)&res_key, &ret_data_len); + + if (rc) { + cifs_dbg(VFS, "refcpy ioctl error %d getting resume key\n", rc); + goto req_res_key_exit; + } + if (ret_data_len < sizeof(struct resume_key_req)) { + cifs_dbg(VFS, "Invalid refcopy resume key length\n"); + rc = -EINVAL; + goto req_res_key_exit; + } + memcpy(pcchunk->SourceKey, res_key->ResumeKey, COPY_CHUNK_RES_KEY_SIZE); + +req_res_key_exit: + kfree(res_key); + return rc; +} + +static int +smb2_clone_range(const unsigned int xid, + struct cifsFileInfo *srcfile, + struct cifsFileInfo *trgtfile, u64 src_off, + u64 len, u64 dest_off) +{ + int rc; + unsigned int ret_data_len; + struct copychunk_ioctl *pcchunk; + char *retbuf = NULL; + + pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); + + if (pcchunk == NULL) + return -ENOMEM; + + cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n"); + /* Request a key from the server to identify the source of the copy */ + rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink), + srcfile->fid.persistent_fid, + srcfile->fid.volatile_fid, pcchunk); + + /* Note: request_res_key sets res_key null only if rc !=0 */ + if (rc) + return rc; + + /* For now array only one chunk long, will make more flexible later */ + pcchunk->ChunkCount = __constant_cpu_to_le32(1); + pcchunk->Reserved = 0; + pcchunk->SourceOffset = cpu_to_le64(src_off); + pcchunk->TargetOffset = cpu_to_le64(dest_off); + pcchunk->Length = cpu_to_le32(len); + pcchunk->Reserved2 = 0; + + /* Request that server copy to target from src file identified by key */ + rc = SMB2_ioctl(xid, tlink_tcon(trgtfile->tlink), + trgtfile->fid.persistent_fid, + trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, + true /* is_fsctl */, (char *)pcchunk, + sizeof(struct copychunk_ioctl), &retbuf, &ret_data_len); + + /* BB need to special case rc = EINVAL to alter chunk size */ + + cifs_dbg(FYI, "rc %d data length out %d\n", rc, ret_data_len); + + kfree(pcchunk); + return rc; +} + +static int smb2_flush_file(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *fid) { @@ -1017,6 +1096,7 @@ struct smb_version_operations smb20_operations = { .set_oplock_level = smb2_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, + .clone_range = smb2_clone_range, }; struct smb_version_operations smb21_operations = { @@ -1090,6 +1170,7 @@ struct smb_version_operations smb21_operations = { .set_oplock_level = smb21_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, .parse_lease_buf = smb2_parse_lease_buf, + .clone_range = smb2_clone_range, }; struct smb_version_operations smb30_operations = { @@ -1165,6 +1246,7 @@ struct smb_version_operations smb30_operations = { .set_oplock_level = smb3_set_oplock_level, .create_lease_buf = smb3_create_lease_buf, .parse_lease_buf = smb3_parse_lease_buf, + .clone_range = smb2_clone_range, }; struct smb_version_values smb20_values = { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8ab05b0d677..d65270c290a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -630,6 +630,8 @@ ssetup_ntlmssp_authenticate: goto ssetup_exit; ses->session_flags = le16_to_cpu(rsp->SessionFlags); + if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA) + cifs_dbg(VFS, "SMB3 encryption not supported yet\n"); ssetup_exit: free_rsp_buf(resp_buftype, rsp); @@ -717,6 +719,14 @@ static inline void cifs_stats_fail_inc(struct cifs_tcon *tcon, uint16_t code) #define MAX_SHARENAME_LENGTH (255 /* server */ + 80 /* share */ + 1 /* NULL */) +/* These are similar values to what Windows uses */ +static inline void init_copy_chunk_defaults(struct cifs_tcon *tcon) +{ + tcon->max_chunks = 256; + tcon->max_bytes_chunk = 1048576; + tcon->max_bytes_copy = 16777216; +} + int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, const struct nls_table *cp) @@ -818,7 +828,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) && ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); - + init_copy_chunk_defaults(tcon); tcon_exit: free_rsp_buf(resp_buftype, rsp); kfree(unc_path); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 6183b1b7550..f88320bbb47 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -122,6 +122,23 @@ struct smb2_pdu { __le16 StructureSize2; /* size of wct area (varies, request specific) */ } __packed; +struct smb2_transform_hdr { + __be32 smb2_buf_length; /* big endian on wire */ + /* length is only two or three bytes - with + one or two byte type preceding it that MBZ */ + __u8 ProtocolId[4]; /* 0xFD 'S' 'M' 'B' */ + __u8 Signature[16]; + __u8 Nonce[11]; + __u8 Reserved[5]; + __le32 OriginalMessageSize; + __u16 Reserved1; + __le16 EncryptionAlgorithm; + __u64 SessionId; +} __packed; + +/* Encryption Algorithms */ +#define SMB2_ENCRYPTION_AES128_CCM __constant_cpu_to_le16(0x0001) + /* * SMB2 flag definitions */ @@ -237,6 +254,7 @@ struct smb2_sess_setup_req { /* Currently defined SessionFlags */ #define SMB2_SESSION_FLAG_IS_GUEST 0x0001 #define SMB2_SESSION_FLAG_IS_NULL 0x0002 +#define SMB2_SESSION_FLAG_ENCRYPT_DATA 0x0004 struct smb2_sess_setup_rsp { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 9 */ @@ -534,9 +552,16 @@ struct create_durable { } Data; } __packed; +#define COPY_CHUNK_RES_KEY_SIZE 24 +struct resume_key_req { + char ResumeKey[COPY_CHUNK_RES_KEY_SIZE]; + __le32 ContextLength; /* MBZ */ + char Context[0]; /* ignored, Windows sets to 4 bytes of zero */ +} __packed; + /* this goes in the ioctl buffer when doing a copychunk request */ struct copychunk_ioctl { - char SourceKey[24]; + char SourceKey[COPY_CHUNK_RES_KEY_SIZE]; __le32 ChunkCount; /* we are only sending 1 */ __le32 Reserved; /* array will only be one chunk long for us */ @@ -546,6 +571,12 @@ struct copychunk_ioctl { __u32 Reserved2; } __packed; +struct copychunk_ioctl_rsp { + __le32 ChunksWritten; + __le32 ChunkBytesWritten; + __le32 TotalBytesWritten; +} __packed; + /* Response and Request are the same format */ struct validate_negotiate_info { __le32 Capabilities; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 313813e4c19..b4eea105b08 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -61,7 +61,7 @@ extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst, extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, FILE_ALL_INFO *data, - bool *adjust_tz); + bool *adjust_tz, bool *symlink); extern int smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, const char *full_path, __u64 size, struct cifs_sb_info *cifs_sb, bool set_alloc); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 277bd1be21f..4522e075577 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -66,19 +66,9 @@ static void configfs_d_iput(struct dentry * dentry, iput(inode); } -/* - * We _must_ delete our dentries on last dput, as the chain-to-parent - * behavior is required to clear the parents of default_groups. - */ -static int configfs_d_delete(const struct dentry *dentry) -{ - return 1; -} - const struct dentry_operations configfs_dentry_ops = { .d_iput = configfs_d_iput, - /* simple_delete_dentry() isn't exported */ - .d_delete = configfs_d_delete, + .d_delete = always_delete_dentry, }; #ifdef CONFIG_LOCKDEP diff --git a/fs/coredump.c b/fs/coredump.c index 62406b6959b..bc3fbcd3255 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -695,7 +695,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) while (nr) { if (dump_interrupted()) return 0; - n = vfs_write(file, addr, nr, &pos); + n = __kernel_write(file, addr, nr, &pos); if (n <= 0) return 0; file->f_pos = pos; @@ -733,7 +733,7 @@ int dump_align(struct coredump_params *cprm, int align) { unsigned mod = cprm->written & (align - 1); if (align & (align - 1)) - return -EINVAL; - return mod ? dump_skip(cprm, align - mod) : 0; + return 0; + return mod ? dump_skip(cprm, align - mod) : 1; } EXPORT_SYMBOL(dump_align); diff --git a/fs/dcache.c b/fs/dcache.c index 0a38ef8d7f0..4bdb300b16e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock); static struct kmem_cache *dentry_cache __read_mostly; -/** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked - * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. - */ -static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) -{ - if (!(*seq & 1)) /* Even */ - *seq = read_seqbegin(lock); - else /* Odd */ - read_seqlock_excl(lock); -} - -static inline int need_seqretry(seqlock_t *lock, int seq) -{ - return !(seq & 1) && read_seqretry(lock, seq); -} - -static inline void done_seqretry(seqlock_t *lock, int seq) -{ - if (seq & 1) - read_sequnlock_excl(lock); -} - /* * This is the single most critical data structure when it comes * to the dcache: the hashtable for lookups. Somebody should try @@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq) * This hash-function tries to avoid losing too many bits of hash * information, yet avoid using a prime hash-size or similar. */ -#define D_HASHBITS d_hash_shift -#define D_HASHMASK d_hash_mask static unsigned int d_hash_mask __read_mostly; static unsigned int d_hash_shift __read_mostly; @@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash + (hash >> D_HASHBITS); - return dentry_hashtable + (hash & D_HASHMASK); + hash = hash + (hash >> d_hash_shift); + return dentry_hashtable + (hash & d_hash_mask); } /* Statistics gathering. */ @@ -469,7 +438,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) { list_del(&dentry->d_u.d_child); /* - * Inform try_to_ascend() that we are no longer attached to the + * Inform d_walk() that we are no longer attached to the * dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; @@ -1069,34 +1038,6 @@ void shrink_dcache_sb(struct super_block *sb) } EXPORT_SYMBOL(shrink_dcache_sb); -/* - * This tries to ascend one level of parenthood, but - * we can race with renaming, so we need to re-check - * the parenthood after dropping the lock and check - * that the sequence number still matches. - */ -static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) -{ - struct dentry *new = old->d_parent; - - rcu_read_lock(); - spin_unlock(&old->d_lock); - spin_lock(&new->d_lock); - - /* - * might go back up the wrong parent if we have had a rename - * or deletion - */ - if (new != old->d_parent || - (old->d_flags & DCACHE_DENTRY_KILLED) || - need_seqretry(&rename_lock, seq)) { - spin_unlock(&new->d_lock); - new = NULL; - } - rcu_read_unlock(); - return new; -} - /** * enum d_walk_ret - action to talke during tree walk * @D_WALK_CONTINUE: contrinue walk @@ -1185,9 +1126,24 @@ resume: */ if (this_parent != parent) { struct dentry *child = this_parent; - this_parent = try_to_ascend(this_parent, seq); - if (!this_parent) + this_parent = child->d_parent; + + rcu_read_lock(); + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* + * might go back up the wrong parent if we have had a rename + * or deletion + */ + if (this_parent != child->d_parent || + (child->d_flags & DCACHE_DENTRY_KILLED) || + need_seqretry(&rename_lock, seq)) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); goto rename_retry; + } + rcu_read_unlock(); next = child->d_u.d_child.next; goto resume; } diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index 60a327863b1..e7cfbaf8d0e 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -74,14 +74,16 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info) return 0; } -static struct genl_ops dlm_nl_ops = { - .cmd = DLM_CMD_HELLO, - .doit = user_cmd, +static struct genl_ops dlm_nl_ops[] = { + { + .cmd = DLM_CMD_HELLO, + .doit = user_cmd, + }, }; int __init dlm_netlink_init(void) { - return genl_register_family_with_ops(&family, &dlm_nl_ops, 1); + return genl_register_family_with_ops(&family, dlm_nl_ops); } void dlm_netlink_exit(void) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index a8766b880c0..becc725a195 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) return 0; } -/* - * Retaining negative dentries for an in-memory filesystem just wastes - * memory and lookup time: arrange for them to be deleted immediately. - */ -static int efivarfs_delete_dentry(const struct dentry *dentry) -{ - return 1; -} - static struct dentry_operations efivarfs_d_ops = { .d_compare = efivarfs_d_compare, .d_hash = efivarfs_d_hash, - .d_delete = efivarfs_delete_dentry, + .d_delete = always_delete_dentry, }; static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index c8423d6de6c..2a6ba06bee6 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl) static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); - memcpy(&gen, lvb_bits, sizeof(uint32_t)); + memcpy(&gen, lvb_bits, sizeof(__le32)); *lvb_gen = le32_to_cpu(gen); } static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); gen = cpu_to_le32(lvb_gen); - memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); + memcpy(ls->ls_control_lvb, &gen, sizeof(__le32)); } static int all_jid_bits_clear(char *lvb) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 453b50eadde..98236d0df3c 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -667,7 +667,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct buffer_head *bh; struct page *page; void *kaddr, *ptr; - struct gfs2_quota q, *qp; + struct gfs2_quota q; int err, nbytes; u64 size; @@ -683,28 +683,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, return err; err = -EIO; - qp = &q; - qp->qu_value = be64_to_cpu(qp->qu_value); - qp->qu_value += change; - qp->qu_value = cpu_to_be64(qp->qu_value); - qd->qd_qb.qb_value = qp->qu_value; + be64_add_cpu(&q.qu_value, change); + qd->qd_qb.qb_value = q.qu_value; if (fdq) { if (fdq->d_fieldmask & FS_DQ_BSOFT) { - qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_warn = qp->qu_warn; + q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_warn = q.qu_warn; } if (fdq->d_fieldmask & FS_DQ_BHARD) { - qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_limit = qp->qu_limit; + q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_limit = q.qu_limit; } if (fdq->d_fieldmask & FS_DQ_BCOUNT) { - qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_value = qp->qu_value; + q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_value = q.qu_value; } } /* Write the quota into the quota file on disk */ - ptr = qp; + ptr = &q; nbytes = sizeof(struct gfs2_quota); get_a_page: page = find_or_create_page(mapping, index, GFP_NOFS); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4d83abdd563..c8d6161bd68 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1127,7 +1127,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; } - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); @@ -1161,7 +1161,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd) if (rgd->rd_flags & GFS2_RDF_UPTODATE) return 0; - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) return gfs2_rgrp_bh_get(rgd); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 25437280a20..db23ce1bd90 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file)) -static int hostfs_d_delete(const struct dentry *dentry) -{ - return 1; -} - -static const struct dentry_operations hostfs_dentry_ops = { - .d_delete = hostfs_d_delete, -}; - /* Changed in hostfs_args before the kernel starts running */ static char *root_ino = ""; static int append = 0; @@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; - sb->s_d_op = &hostfs_dentry_ops; + sb->s_d_op = &simple_dentry_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; /* NULL is printed as <NULL> by sprintf: avoid that. */ diff --git a/fs/libfs.c b/fs/libfs.c index 5de06947ba5..a1844244246 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs); * Retaining negative dentries for an in-memory filesystem just wastes * memory and lookup time: arrange for them to be deleted immediately. */ -static int simple_delete_dentry(const struct dentry *dentry) +int always_delete_dentry(const struct dentry *dentry) { return 1; } +EXPORT_SYMBOL(always_delete_dentry); + +const struct dentry_operations simple_dentry_operations = { + .d_delete = always_delete_dentry, +}; +EXPORT_SYMBOL(simple_dentry_operations); /* * Lookup the data. This is trivial - if the dentry didn't already @@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry) */ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - static const struct dentry_operations simple_dentry_operations = { - .d_delete = simple_delete_dentry, - }; - if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); if (!dentry->d_sb->s_d_op) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 737e15615b0..cca93b6fb9a 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -175,22 +175,6 @@ static const struct inode_operations proc_link_inode_operations = { }; /* - * As some entries in /proc are volatile, we want to - * get rid of unused dentries. This could be made - * smarter: we could keep a "volatile" flag in the - * inode to indicate which ones to keep. - */ -static int proc_delete_dentry(const struct dentry * dentry) -{ - return 1; -} - -static const struct dentry_operations proc_dentry_operations = -{ - .d_delete = proc_delete_dentry, -}; - -/* * Don't create negative dentries here, return -ENOENT by hand * instead. */ @@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, inode = proc_get_inode(dir->i_sb, de); if (!inode) return ERR_PTR(-ENOMEM); - d_set_d_op(dentry, &proc_dentry_operations); + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, inode); return NULL; } diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 49a7fff2e83..9ae46b87470 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = { .setattr = proc_setattr, }; -static int ns_delete_dentry(const struct dentry *dentry) -{ - /* Don't cache namespace inodes when not in use */ - return 1; -} - static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) { struct inode *inode = dentry->d_inode; @@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) const struct dentry_operations ns_dentry_operations = { - .d_delete = ns_delete_dentry, + .d_delete = always_delete_dentry, .d_dname = ns_dname, }; diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c index 16e8abb7709..72d29177998 100644 --- a/fs/quota/netlink.c +++ b/fs/quota/netlink.c @@ -9,13 +9,25 @@ #include <net/netlink.h> #include <net/genetlink.h> +static const struct genl_multicast_group quota_mcgrps[] = { + { .name = "events", }, +}; + /* Netlink family structure for quota */ static struct genl_family quota_genl_family = { - .id = GENL_ID_GENERATE, + /* + * Needed due to multicast group ID abuse - old code assumed + * the family ID was also a valid multicast group ID (which + * isn't true) and userspace might thus rely on it. Assign a + * static ID for this group to make dealing with that easier. + */ + .id = GENL_ID_VFS_DQUOT, .hdrsize = 0, .name = "VFS_DQUOT", .version = 1, .maxattr = QUOTA_NL_A_MAX, + .mcgrps = quota_mcgrps, + .n_mcgrps = ARRAY_SIZE(quota_mcgrps), }; /** @@ -78,7 +90,7 @@ void quota_send_warning(struct kqid qid, dev_t dev, goto attr_err_out; genlmsg_end(skb, msg_head); - genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); + genlmsg_multicast("a_genl_family, skb, 0, 0, GFP_NOFS); return; attr_err_out: printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); diff --git a/fs/seq_file.c b/fs/seq_file.c index 1cd2388ca5b..1d641bb108d 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -136,6 +136,7 @@ static int traverse(struct seq_file *m, loff_t offset) Eoverflow: m->op->stop(m, p); kfree(m->buf); + m->count = 0; m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); return !m->buf ? -ENOMEM : -EAGAIN; } @@ -232,10 +233,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) goto Fill; m->op->stop(m, p); kfree(m->buf); + m->count = 0; m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); if (!m->buf) goto Enomem; - m->count = 0; m->version = 0; pos = m->index; p = m->op->start(m, &pos); diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index c70111ebefd..b6fa8657dcb 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,6 +25,78 @@ config SQUASHFS If unsure, say N. +choice + prompt "File decompression options" + depends on SQUASHFS + help + Squashfs now supports two options for decompressing file + data. Traditionally Squashfs has decompressed into an + intermediate buffer and then memcopied it into the page cache. + Squashfs now supports the ability to decompress directly into + the page cache. + + If unsure, select "Decompress file data into an intermediate buffer" + +config SQUASHFS_FILE_CACHE + bool "Decompress file data into an intermediate buffer" + help + Decompress file data into an intermediate buffer and then + memcopy it into the page cache. + +config SQUASHFS_FILE_DIRECT + bool "Decompress files directly into the page cache" + help + Directly decompress file data into the page cache. + Doing so can significantly improve performance because + it eliminates a memcpy and it also removes the lock contention + on the single buffer. + +endchoice + +choice + prompt "Decompressor parallelisation options" + depends on SQUASHFS + help + Squashfs now supports three parallelisation options for + decompression. Each one exhibits various trade-offs between + decompression performance and CPU and memory usage. + + If in doubt, select "Single threaded compression" + +config SQUASHFS_DECOMP_SINGLE + bool "Single threaded compression" + help + Traditionally Squashfs has used single-threaded decompression. + Only one block (data or metadata) can be decompressed at any + one time. This limits CPU and memory usage to a minimum. + +config SQUASHFS_DECOMP_MULTI + bool "Use multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + If you have a parallel I/O workload and your system has enough memory, + using this option may improve overall I/O performance. + + This decompressor implementation uses up to two parallel + decompressors per core. It dynamically allocates decompressors + on a demand basis. + +config SQUASHFS_DECOMP_MULTI_PERCPU + bool "Use percpu multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + This decompressor implementation uses a maximum of one + decompressor per core. It uses percpu variables to ensure + decompression is load-balanced across the cores. + +endchoice + config SQUASHFS_XATTR bool "Squashfs XATTR support" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 110b0476f3b..4132520b4ff 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,6 +5,11 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 41d108ecc9b..0cea9b9236d 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -36,6 +36,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" /* * Read the metadata block length, this is stored in the first two @@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb, * generated a larger block - this does occasionally happen with compression * algorithms). */ -int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, - int length, u64 *next_index, int srclength, int pages) +int squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) { struct squashfs_sb_info *msblk = sb->s_fs_info; struct buffer_head **bh; int offset = index & ((1 << msblk->devblksize_log2) - 1); u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, page = 0, avail; + int bytes, compressed, b = 0, k = 0, avail, i; - bh = kcalloc(((srclength + msblk->devblksize - 1) + bh = kcalloc(((output->length + msblk->devblksize - 1) >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); if (bh == NULL) return -ENOMEM; @@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, *next_index = index + length; TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", - index, compressed ? "" : "un", length, srclength); + index, compressed ? "" : "un", length, output->length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto read_failure; @@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed ? "" : "un", length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto block_release; @@ -158,9 +159,15 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, ll_rw_block(READ, b - 1, bh + 1); } + for (i = 0; i < b; i++) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + goto block_release; + } + if (compressed) { - length = squashfs_decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + length = squashfs_decompress(msblk, bh, b, offset, length, + output); if (length < 0) goto read_failure; } else { @@ -168,22 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, * Block is uncompressed. */ int in, pg_offset = 0; + void *data = squashfs_first_page(output); for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto block_release; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { - page++; + data = squashfs_next_page(output); pg_offset = 0; } avail = min_t(int, in, PAGE_CACHE_SIZE - pg_offset); - memcpy(buffer[page] + pg_offset, - bh[k]->b_data + offset, avail); + memcpy(data + pg_offset, bh[k]->b_data + offset, + avail); in -= avail; pg_offset += avail; offset += avail; @@ -191,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, offset = 0; put_bh(bh[k]); } + squashfs_finish_page(output); } kfree(bh); diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index af0b7380259..1cb70a0b216 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -56,6 +56,7 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" +#include "page_actor.h" /* * Look-up block in cache, and increment usage count. If not in cache, read @@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, entry->error = 0; spin_unlock(&cache->lock); - entry->length = squashfs_read_data(sb, entry->data, - block, length, &entry->next_index, - cache->block_size, cache->pages); + entry->length = squashfs_read_data(sb, block, length, + &entry->next_index, entry->actor); spin_lock(&cache->lock); @@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) kfree(cache->entry[i].data[j]); kfree(cache->entry[i].data); } + kfree(cache->entry[i].actor); } kfree(cache->entry); @@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, goto cleanup; } } + + entry->actor = squashfs_page_actor_init(entry->data, + cache->pages, 0); + if (entry->actor == NULL) { + ERROR("Failed to allocate %s cache entry\n", name); + goto cleanup; + } } return cache; @@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; int i, res; void *table, *buffer, **data; + struct squashfs_page_actor *actor; table = buffer = kmalloc(length, GFP_KERNEL); if (table == NULL) @@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) goto failed; } + actor = squashfs_page_actor_init(data, pages, length); + if (actor == NULL) { + res = -ENOMEM; + goto failed2; + } + for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) data[i] = buffer; - res = squashfs_read_data(sb, data, block, length | - SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); + res = squashfs_read_data(sb, block, length | + SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); kfree(data); + kfree(actor); if (res < 0) goto failed; return table; +failed2: + kfree(data); failed: kfree(table); return ERR_PTR(res); diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 3f6271d86ab..ac22fe73b0a 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -30,6 +30,7 @@ #include "squashfs_fs_sb.h" #include "decompressor.h" #include "squashfs.h" +#include "page_actor.h" /* * This file (and decompressor.h) implements a decompressor framework for @@ -37,29 +38,29 @@ */ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { - NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 + NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 }; #ifndef CONFIG_SQUASHFS_LZO static const struct squashfs_decompressor squashfs_lzo_comp_ops = { - NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 + NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 }; #endif #ifndef CONFIG_SQUASHFS_XZ static const struct squashfs_decompressor squashfs_xz_comp_ops = { - NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 + NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 }; #endif #ifndef CONFIG_SQUASHFS_ZLIB static const struct squashfs_decompressor squashfs_zlib_comp_ops = { - NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 + NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 }; #endif static const struct squashfs_decompressor squashfs_unknown_comp_ops = { - NULL, NULL, NULL, 0, "unknown", 0 + NULL, NULL, NULL, NULL, 0, "unknown", 0 }; static const struct squashfs_decompressor *decompressor[] = { @@ -83,10 +84,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) } -void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) +static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *strm, *buffer = NULL; + void *buffer = NULL, *comp_opts; + struct squashfs_page_actor *actor = NULL; int length = 0; /* @@ -94,23 +96,46 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) */ if (SQUASHFS_COMP_OPTS(flags)) { buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (buffer == NULL) - return ERR_PTR(-ENOMEM); + if (buffer == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } + + actor = squashfs_page_actor_init(&buffer, 1, 0); + if (actor == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } - length = squashfs_read_data(sb, &buffer, - sizeof(struct squashfs_super_block), 0, NULL, - PAGE_CACHE_SIZE, 1); + length = squashfs_read_data(sb, + sizeof(struct squashfs_super_block), 0, NULL, actor); if (length < 0) { - strm = ERR_PTR(length); - goto finished; + comp_opts = ERR_PTR(length); + goto out; } } - strm = msblk->decompressor->init(msblk, buffer, length); + comp_opts = squashfs_comp_opts(msblk, buffer, length); -finished: +out: + kfree(actor); kfree(buffer); + return comp_opts; +} + + +void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + void *stream, *comp_opts = get_comp_opts(sb, flags); + + if (IS_ERR(comp_opts)) + return comp_opts; + + stream = squashfs_decompressor_create(msblk, comp_opts); + if (IS_ERR(stream)) + kfree(comp_opts); - return strm; + return stream; } diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 330073e2902..af098532180 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -24,28 +24,22 @@ */ struct squashfs_decompressor { - void *(*init)(struct squashfs_sb_info *, void *, int); + void *(*init)(struct squashfs_sb_info *, void *); + void *(*comp_opts)(struct squashfs_sb_info *, void *, int); void (*free)(void *); - int (*decompress)(struct squashfs_sb_info *, void **, - struct buffer_head **, int, int, int, int, int); + int (*decompress)(struct squashfs_sb_info *, void *, + struct buffer_head **, int, int, int, + struct squashfs_page_actor *); int id; char *name; int supported; }; -static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, - void *s) +static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int length) { - if (msblk->decompressor) - msblk->decompressor->free(s); -} - -static inline int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) -{ - return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + return msblk->decompressor->comp_opts ? + msblk->decompressor->comp_opts(msblk, buff, length) : NULL; } #ifdef CONFIG_SQUASHFS_XZ diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c new file mode 100644 index 00000000000..d6008a63647 --- /dev/null +++ b/fs/squashfs/decompressor_multi.c @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2013 + * Minchan Kim <minchan@kernel.org> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/cpumask.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression in the + * decompressor framework + */ + + +/* + * The reason that multiply two is that a CPU can request new I/O + * while it is waiting previous request. + */ +#define MAX_DECOMPRESSOR (num_online_cpus() * 2) + + +int squashfs_max_decompressors(void) +{ + return MAX_DECOMPRESSOR; +} + + +struct squashfs_stream { + void *comp_opts; + struct list_head strm_list; + struct mutex mutex; + int avail_decomp; + wait_queue_head_t wait; +}; + + +struct decomp_stream { + void *stream; + struct list_head list; +}; + + +static void put_decomp_stream(struct decomp_stream *decomp_strm, + struct squashfs_stream *stream) +{ + mutex_lock(&stream->mutex); + list_add(&decomp_strm->list, &stream->strm_list); + mutex_unlock(&stream->mutex); + wake_up(&stream->wait); +} + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct decomp_stream *decomp_strm = NULL; + int err = -ENOMEM; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + goto out; + + stream->comp_opts = comp_opts; + mutex_init(&stream->mutex); + INIT_LIST_HEAD(&stream->strm_list); + init_waitqueue_head(&stream->wait); + + /* + * We should have a decompressor at least as default + * so if we fail to allocate new decompressor dynamically, + * we could always fall back to default decompressor and + * file system works. + */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto out; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + err = PTR_ERR(decomp_strm->stream); + goto out; + } + + list_add(&decomp_strm->list, &stream->strm_list); + stream->avail_decomp = 1; + return stream; + +out: + kfree(decomp_strm); + kfree(stream); + return ERR_PTR(err); +} + + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + if (stream) { + struct decomp_stream *decomp_strm; + + while (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + msblk->decompressor->free(decomp_strm->stream); + kfree(decomp_strm); + stream->avail_decomp--; + } + WARN_ON(stream->avail_decomp); + kfree(stream->comp_opts); + kfree(stream); + } +} + + +static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, + struct squashfs_stream *stream) +{ + struct decomp_stream *decomp_strm; + + while (1) { + mutex_lock(&stream->mutex); + + /* There is available decomp_stream */ + if (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + mutex_unlock(&stream->mutex); + break; + } + + /* + * If there is no available decomp and already full, + * let's wait for releasing decomp from other users. + */ + if (stream->avail_decomp >= MAX_DECOMPRESSOR) + goto wait; + + /* Let's allocate new decomp */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto wait; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + kfree(decomp_strm); + goto wait; + } + + stream->avail_decomp++; + WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR); + + mutex_unlock(&stream->mutex); + break; +wait: + /* + * If system memory is tough, let's for other's + * releasing instead of hurting VM because it could + * make page cache thrashing. + */ + mutex_unlock(&stream->mutex); + wait_event(stream->wait, + !list_empty(&stream->strm_list)); + } + + return decomp_strm; +} + + +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); + res = msblk->decompressor->decompress(msblk, decomp_stream->stream, + bh, b, offset, length, output); + put_decomp_stream(decomp_stream, stream); + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + return res; +} diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c new file mode 100644 index 00000000000..23a9c28ad8e --- /dev/null +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/percpu.h> +#include <linux/buffer_head.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression using percpu + * variables, one thread per cpu core. + */ + +struct squashfs_stream { + void *stream; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct squashfs_stream __percpu *percpu; + int err, cpu; + + percpu = alloc_percpu(struct squashfs_stream); + if (percpu == NULL) + return ERR_PTR(-ENOMEM); + + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + } + + kfree(comp_opts); + return (__force void *) percpu; + +out: + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + if (!IS_ERR_OR_NULL(stream->stream)) + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream; + int cpu; + + if (msblk->stream) { + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream = get_cpu_ptr(percpu); + int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); + put_cpu_ptr(stream); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return num_possible_cpus(); +} diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c new file mode 100644 index 00000000000..a6c75929a00 --- /dev/null +++ b/fs/squashfs/decompressor_single.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/buffer_head.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements single-threaded decompression in the + * decompressor framework + */ + +struct squashfs_stream { + void *stream; + struct mutex mutex; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + int err = -ENOMEM; + + stream = kmalloc(sizeof(*stream), GFP_KERNEL); + if (stream == NULL) + goto out; + + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + + kfree(comp_opts); + mutex_init(&stream->mutex); + return stream; + +out: + kfree(stream); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + + if (stream) { + msblk->decompressor->free(stream->stream); + kfree(stream); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + + mutex_lock(&stream->mutex); + res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); + mutex_unlock(&stream->mutex); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return 1; +} diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 8ca62c28fe1..e5c9689062b 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) return le32_to_cpu(size); } - -static int squashfs_readpage(struct file *file, struct page *page) +/* Copy data into page cache */ +void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, + int bytes, int offset) { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes, i, offset = 0, sparse = 0; - struct squashfs_cache_entry *buffer = NULL; void *pageaddr; - - int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); - int start_index = page->index & ~mask; - int end_index = start_index | mask; - int file_end = i_size_read(inode) >> msblk->block_log; - - TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); - - if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT)) - goto out; - - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - /* - * Reading a datablock from disk. Need to read block list - * to get location and block size. - */ - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - if (bsize < 0) - goto error_out; - - if (bsize == 0) { /* hole */ - bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - sparse = 1; - } else { - /* - * Read and decompress datablock. - */ - buffer = squashfs_get_datablock(inode->i_sb, - block, bsize); - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x" - "\n", block, bsize); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = buffer->length; - } - } else { - /* - * Datablock is stored inside a fragment (tail-end packed - * block). - */ - buffer = squashfs_get_fragment(inode->i_sb, - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x\n", - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = i_size_read(inode) & (msblk->block_size - 1); - offset = squashfs_i(inode)->fragment_offset; - } + int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = page->index & ~mask, end_index = start_index | mask; /* * Loop copying datablock into pages. As the datablock likely covers @@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page) for (i = start_index; i <= end_index && bytes > 0; i++, bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { struct page *push_page; - int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE); + int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0; TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); @@ -475,11 +413,75 @@ skip_page: if (i != page->index) page_cache_release(push_page); } +} + +/* Read datablock stored packed inside a fragment (tail-end packed block) */ +static int squashfs_readpage_fragment(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + else + squashfs_copy_cache(page, buffer, i_size_read(inode) & + (msblk->block_size - 1), + squashfs_i(inode)->fragment_offset); + + squashfs_cache_put(buffer); + return res; +} - if (!sparse) - squashfs_cache_put(buffer); +static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int bytes = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + squashfs_copy_cache(page, NULL, bytes, 0); return 0; +} + +static int squashfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); + int file_end = i_size_read(inode) >> msblk->block_log; + int res; + void *pageaddr; + + TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", + page->index, squashfs_i(inode)->start); + + if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + goto out; + + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + if (bsize < 0) + goto error_out; + + if (bsize == 0) + res = squashfs_readpage_sparse(page, index, file_end); + else + res = squashfs_readpage_block(page, block, bsize); + } else + res = squashfs_readpage_fragment(page); + + if (!res) + return 0; error_out: SetPageError(page); diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c new file mode 100644 index 00000000000..f2310d2a201 --- /dev/null +++ b/fs/squashfs/file_cache.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/mutex.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* Read separately compressed datablock and memcopy into page cache */ +int squashfs_readpage_block(struct page *page, u64 block, int bsize) +{ + struct inode *i = page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + else + squashfs_copy_cache(page, buffer, buffer->length, 0); + + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c new file mode 100644 index 00000000000..2943b2bfae4 --- /dev/null +++ b/fs/squashfs/file_direct.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/mutex.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" +#include "page_actor.h" + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page); + +/* Read separately compressed datablock directly into page cache */ +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) + +{ + struct inode *inode = target_page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + + int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = target_page->index & ~mask; + int end_index = start_index | mask; + int i, n, pages, missing_pages, bytes, res = -ENOMEM; + struct page **page; + struct squashfs_page_actor *actor; + void *pageaddr; + + if (end_index > file_end) + end_index = file_end; + + pages = end_index - start_index + 1; + + page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); + if (page == NULL) + return res; + + /* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ + actor = squashfs_page_actor_init_special(page, pages, 0); + if (actor == NULL) + goto out; + + /* Try to grab all the pages covered by the Squashfs block */ + for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { + page[i] = (n == target_page->index) ? target_page : + grab_cache_page_nowait(target_page->mapping, n); + + if (page[i] == NULL) { + missing_pages++; + continue; + } + + if (PageUptodate(page[i])) { + unlock_page(page[i]); + page_cache_release(page[i]); + page[i] = NULL; + missing_pages++; + } + } + + if (missing_pages) { + /* + * Couldn't get one or more pages, this page has either + * been VM reclaimed, but others are still in the page cache + * and uptodate, or we're racing with another thread in + * squashfs_readpage also trying to grab them. Fall back to + * using an intermediate buffer. + */ + res = squashfs_read_cache(target_page, block, bsize, pages, + page); + goto out; + } + + /* Decompress directly into the page cache buffers */ + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + if (res < 0) + goto mark_errored; + + /* Last page may have trailing bytes not filled */ + bytes = res % PAGE_CACHE_SIZE; + if (bytes) { + pageaddr = kmap_atomic(page[pages - 1]); + memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + kunmap_atomic(pageaddr); + } + + /* Mark pages as uptodate, unlock and release */ + for (i = 0; i < pages; i++) { + flush_dcache_page(page[i]); + SetPageUptodate(page[i]); + unlock_page(page[i]); + if (page[i] != target_page) + page_cache_release(page[i]); + } + + kfree(actor); + kfree(page); + + return 0; + +mark_errored: + /* Decompression failed, mark pages as errored. Target_page is + * dealt with by the caller + */ + for (i = 0; i < pages; i++) { + if (page[i] == target_page) + continue; + flush_dcache_page(page[i]); + SetPageError(page[i]); + unlock_page(page[i]); + page_cache_release(page[i]); + } + +out: + kfree(actor); + kfree(page); + return res; +} + + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page) +{ + struct inode *i = target_page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int bytes = buffer->length, res = buffer->error, n, offset = 0; + void *pageaddr; + + if (res) { + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + goto out; + } + + for (n = 0; n < pages && bytes > 0; n++, + bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { + int avail = min_t(int, bytes, PAGE_CACHE_SIZE); + + if (page[n] == NULL) + continue; + + pageaddr = kmap_atomic(page[n]); + squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); + kunmap_atomic(pageaddr); + flush_dcache_page(page[n]); + SetPageUptodate(page[n]); + unlock_page(page[n]); + if (page[n] != target_page) + page_cache_release(page[n]); + } + +out: + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 00f4dfc5f08..244b9fbfff7 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -31,13 +31,14 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_lzo { void *input; void *output; }; -static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len) +static void *lzo_init(struct squashfs_sb_info *msblk, void *buff) { int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); @@ -74,22 +75,16 @@ static void lzo_free(void *strm) } -static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { - struct squashfs_lzo *stream = msblk->stream; - void *buff = stream->input; + struct squashfs_lzo *stream = strm; + void *buff = stream->input, *data; int avail, i, bytes = length, res; - size_t out_len = srclength; - - mutex_lock(&msblk->read_data_mutex); + size_t out_len = output->length; for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - avail = min(bytes, msblk->devblksize - offset); memcpy(buff, bh[i]->b_data + offset, avail); buff += avail; @@ -104,24 +99,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, goto failed; res = bytes = (int)out_len; - for (i = 0, buff = stream->output; bytes && i < pages; i++) { - avail = min_t(int, bytes, PAGE_CACHE_SIZE); - memcpy(buffer[i], buff, avail); - buff += avail; - bytes -= avail; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_CACHE_SIZE) { + memcpy(data, buff, bytes); + break; + } else { + memcpy(data, buff, PAGE_CACHE_SIZE); + buff += PAGE_CACHE_SIZE; + bytes -= PAGE_CACHE_SIZE; + data = squashfs_next_page(output); + } } + squashfs_finish_page(output); - mutex_unlock(&msblk->read_data_mutex); return res; -block_release: - for (; i < b; i++) - put_bh(bh[i]); - failed: - mutex_unlock(&msblk->read_data_mutex); - - ERROR("lzo decompression failed, data probably corrupt\n"); return -EIO; } diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c new file mode 100644 index 00000000000..5a1c11f5644 --- /dev/null +++ b/fs/squashfs/page_actor.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include "page_actor.h" + +/* + * This file contains implementations of page_actor for decompressing into + * an intermediate buffer, and for decompressing directly into the + * page cache. + * + * Calling code should avoid sleeping between calls to squashfs_first_page() + * and squashfs_finish_page(). + */ + +/* Implementation of page_actor for decompressing into intermediate buffer */ +static void *cache_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->buffer[0]; +} + +static void *cache_next_page(struct squashfs_page_actor *actor) +{ + if (actor->next_page == actor->pages) + return NULL; + + return actor->buffer[actor->next_page++]; +} + +static void cache_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} + +struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->buffer = buffer; + actor->pages = pages; + actor->next_page = 0; + actor->squashfs_first_page = cache_first_page; + actor->squashfs_next_page = cache_next_page; + actor->squashfs_finish_page = cache_finish_page; + return actor; +} + +/* Implementation of page_actor for decompressing directly into page cache. */ +static void *direct_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->pageaddr = kmap_atomic(actor->page[0]); +} + +static void *direct_next_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); + + return actor->pageaddr = actor->next_page == actor->pages ? NULL : + kmap_atomic(actor->page[actor->next_page++]); +} + +static void direct_finish_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); +} + +struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + actor->pageaddr = NULL; + actor->squashfs_first_page = direct_first_page; + actor->squashfs_next_page = direct_next_page; + actor->squashfs_finish_page = direct_finish_page; + return actor; +} diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h new file mode 100644 index 00000000000..26dd82008b8 --- /dev/null +++ b/fs/squashfs/page_actor.h @@ -0,0 +1,81 @@ +#ifndef PAGE_ACTOR_H +#define PAGE_ACTOR_H +/* + * Copyright (c) 2013 + * Phillip Lougher <phillip@squashfs.org.uk> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#ifndef CONFIG_SQUASHFS_FILE_DIRECT +struct squashfs_page_actor { + void **page; + int pages; + int length; + int next_page; +}; + +static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + return actor; +} + +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->page[0]; +} + +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->next_page == actor->pages ? NULL : + actor->page[actor->next_page++]; +} + +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} +#else +struct squashfs_page_actor { + union { + void **buffer; + struct page **page; + }; + void *pageaddr; + void *(*squashfs_first_page)(struct squashfs_page_actor *); + void *(*squashfs_next_page)(struct squashfs_page_actor *); + void (*squashfs_finish_page)(struct squashfs_page_actor *); + int pages; + int length; + int next_page; +}; + +extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); +extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page + **, int, int); +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_first_page(actor); +} +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_next_page(actor); +} +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + actor->squashfs_finish_page(actor); +} +#endif +#endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index d1266516ed0..9e1bb79f7e6 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,8 +28,8 @@ #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) /* block.c */ -extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, - int, int); +extern int squashfs_read_data(struct super_block *, u64, int, u64 *, + struct squashfs_page_actor *); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int); /* decompressor.c */ extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); -extern void *squashfs_decompressor_init(struct super_block *, unsigned short); +extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); + +/* decompressor_xxx.c */ +extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); +extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); +extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **, + int, int, int, struct squashfs_page_actor *); +extern int squashfs_max_decompressors(void); /* export.c */ extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64, @@ -59,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); +/* file.c */ +void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, + int); + +/* file_xxx.c */ +extern int squashfs_readpage_block(struct page *, u64, int); + /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64, diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 52934a22f29..1da565cb50c 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -50,6 +50,7 @@ struct squashfs_cache_entry { wait_queue_head_t wait_queue; struct squashfs_cache *cache; void **data; + struct squashfs_page_actor *actor; }; struct squashfs_sb_info { @@ -63,10 +64,9 @@ struct squashfs_sb_info { __le64 *id_table; __le64 *fragment_index; __le64 *xattr_id_table; - struct mutex read_data_mutex; struct mutex meta_index_mutex; struct meta_index *meta_index; - void *stream; + struct squashfs_stream *stream; __le64 *inode_lookup_table; u64 inode_table; u64 directory_table; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 60553a9053c..202df6312d4 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); msblk->devblksize_log2 = ffz(~msblk->devblksize); - mutex_init(&msblk->read_data_mutex); mutex_init(&msblk->meta_index_mutex); /* @@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; /* Allocate read_page block */ - msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size); + msblk->read_page = squashfs_cache_init("data", + squashfs_max_decompressors(), msblk->block_size); if (msblk->read_page == NULL) { ERROR("Failed to allocate read_page block\n"); goto failed_mount; } - msblk->stream = squashfs_decompressor_init(sb, flags); + msblk->stream = squashfs_decompressor_setup(sb, flags); if (IS_ERR(msblk->stream)) { err = PTR_ERR(msblk->stream); msblk->stream = NULL; @@ -336,7 +336,7 @@ failed_mount: squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); - squashfs_decompressor_free(msblk, msblk->stream); + squashfs_decompressor_destroy(msblk); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); kfree(msblk->id_table); @@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb) squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); - squashfs_decompressor_free(sbi, sbi->stream); + squashfs_decompressor_destroy(sbi); kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 1760b7d108f..c609624e4b8 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -32,44 +32,70 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_xz { struct xz_dec *state; struct xz_buf buf; }; -struct comp_opts { +struct disk_comp_opts { __le32 dictionary_size; __le32 flags; }; -static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, - int len) +struct comp_opts { + int dict_size; +}; + +static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int len) { - struct comp_opts *comp_opts = buff; - struct squashfs_xz *stream; - int dict_size = msblk->block_size; - int err, n; + struct disk_comp_opts *comp_opts = buff; + struct comp_opts *opts; + int err = 0, n; + + opts = kmalloc(sizeof(*opts), GFP_KERNEL); + if (opts == NULL) { + err = -ENOMEM; + goto out2; + } if (comp_opts) { /* check compressor options are the expected length */ if (len < sizeof(*comp_opts)) { err = -EIO; - goto failed; + goto out; } - dict_size = le32_to_cpu(comp_opts->dictionary_size); + opts->dict_size = le32_to_cpu(comp_opts->dictionary_size); /* the dictionary size should be 2^n or 2^n+2^(n+1) */ - n = ffs(dict_size) - 1; - if (dict_size != (1 << n) && dict_size != (1 << n) + + n = ffs(opts->dict_size) - 1; + if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) + (1 << (n + 1))) { err = -EIO; - goto failed; + goto out; } - } + } else + /* use defaults */ + opts->dict_size = max_t(int, msblk->block_size, + SQUASHFS_METADATA_SIZE); + + return opts; + +out: + kfree(opts); +out2: + return ERR_PTR(err); +} + - dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE); +static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff) +{ + struct comp_opts *comp_opts = buff; + struct squashfs_xz *stream; + int err; stream = kmalloc(sizeof(*stream), GFP_KERNEL); if (stream == NULL) { @@ -77,7 +103,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, goto failed; } - stream->state = xz_dec_init(XZ_PREALLOC, dict_size); + stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size); if (stream->state == NULL) { kfree(stream); err = -ENOMEM; @@ -103,42 +129,37 @@ static void squashfs_xz_free(void *strm) } -static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { enum xz_ret xz_err; - int avail, total = 0, k = 0, page = 0; - struct squashfs_xz *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + int avail, total = 0, k = 0; + struct squashfs_xz *stream = strm; xz_dec_reset(stream->state); stream->buf.in_pos = 0; stream->buf.in_size = 0; stream->buf.out_pos = 0; stream->buf.out_size = PAGE_CACHE_SIZE; - stream->buf.out = buffer[page++]; + stream->buf.out = squashfs_first_page(output); do { if (stream->buf.in_pos == stream->buf.in_size && k < b) { avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->buf.in = bh[k]->b_data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; offset = 0; } - if (stream->buf.out_pos == stream->buf.out_size - && page < pages) { - stream->buf.out = buffer[page++]; - stream->buf.out_pos = 0; - total += PAGE_CACHE_SIZE; + if (stream->buf.out_pos == stream->buf.out_size) { + stream->buf.out = squashfs_next_page(output); + if (stream->buf.out != NULL) { + stream->buf.out_pos = 0; + total += PAGE_CACHE_SIZE; + } } xz_err = xz_dec_run(stream->state, &stream->buf); @@ -147,23 +168,14 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (xz_err == XZ_OK); - if (xz_err != XZ_STREAM_END) { - ERROR("xz_dec_run error, data probably corrupt\n"); - goto release_mutex; - } - - if (k < b) { - ERROR("xz_uncompress error, input remaining\n"); - goto release_mutex; - } + squashfs_finish_page(output); - total += stream->buf.out_pos; - mutex_unlock(&msblk->read_data_mutex); - return total; + if (xz_err != XZ_STREAM_END || k < b) + goto out; -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return total + stream->buf.out_pos; +out: for (; k < b; k++) put_bh(bh[k]); @@ -172,6 +184,7 @@ release_mutex: const struct squashfs_decompressor squashfs_xz_comp_ops = { .init = squashfs_xz_init, + .comp_opts = squashfs_xz_comp_opts, .free = squashfs_xz_free, .decompress = squashfs_xz_uncompress, .id = XZ_COMPRESSION, diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 55d918fd2d8..8727caba688 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -32,8 +32,9 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" -static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len) +static void *zlib_init(struct squashfs_sb_info *dummy, void *buff) { z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); if (stream == NULL) @@ -61,44 +62,37 @@ static void zlib_free(void *strm) } -static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { - int zlib_err, zlib_init = 0; - int k = 0, page = 0; - z_stream *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + int zlib_err, zlib_init = 0, k = 0; + z_stream *stream = strm; - stream->avail_out = 0; + stream->avail_out = PAGE_CACHE_SIZE; + stream->next_out = squashfs_first_page(output); stream->avail_in = 0; do { if (stream->avail_in == 0 && k < b) { int avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->next_in = bh[k]->b_data + offset; stream->avail_in = avail; offset = 0; } - if (stream->avail_out == 0 && page < pages) { - stream->next_out = buffer[page++]; - stream->avail_out = PAGE_CACHE_SIZE; + if (stream->avail_out == 0) { + stream->next_out = squashfs_next_page(output); + if (stream->next_out != NULL) + stream->avail_out = PAGE_CACHE_SIZE; } if (!zlib_init) { zlib_err = zlib_inflateInit(stream); if (zlib_err != Z_OK) { - ERROR("zlib_inflateInit returned unexpected " - "result 0x%x, srclength %d\n", - zlib_err, srclength); - goto release_mutex; + squashfs_finish_page(output); + goto out; } zlib_init = 1; } @@ -109,29 +103,21 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (zlib_err == Z_OK); - if (zlib_err != Z_STREAM_END) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } + squashfs_finish_page(output); - zlib_err = zlib_inflateEnd(stream); - if (zlib_err != Z_OK) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } + if (zlib_err != Z_STREAM_END) + goto out; - if (k < b) { - ERROR("zlib_uncompress error, data remaining\n"); - goto release_mutex; - } + zlib_err = zlib_inflateEnd(stream); + if (zlib_err != Z_OK) + goto out; - length = stream->total_out; - mutex_unlock(&msblk->read_data_mutex); - return length; + if (k < b) + goto out; -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return stream->total_out; +out: for (; k < b; k++) put_bh(bh[k]); |