diff options
Diffstat (limited to 'fs')
104 files changed, 2704 insertions, 1188 deletions
@@ -165,6 +165,15 @@ static struct vfsmount *aio_mnt; static const struct file_operations aio_ring_fops; static const struct address_space_operations aio_ctx_aops; +/* Backing dev info for aio fs. + * -no dirty page accounting or writeback happens + */ +static struct backing_dev_info aio_fs_backing_dev_info = { + .name = "aiofs", + .state = 0, + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY, +}; + static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) { struct qstr this = QSTR_INIT("[aio]", 5); @@ -176,6 +185,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages) inode->i_mapping->a_ops = &aio_ctx_aops; inode->i_mapping->private_data = ctx; + inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info; inode->i_size = PAGE_SIZE * nr_pages; path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this); @@ -220,6 +230,9 @@ static int __init aio_setup(void) if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); + if (bdi_init(&aio_fs_backing_dev_info)) + panic("Failed to init aio fs backing dev info."); + kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); @@ -281,11 +294,6 @@ static const struct file_operations aio_ring_fops = { .mmap = aio_ring_mmap, }; -static int aio_set_page_dirty(struct page *page) -{ - return 0; -} - #if IS_ENABLED(CONFIG_MIGRATION) static int aio_migratepage(struct address_space *mapping, struct page *new, struct page *old, enum migrate_mode mode) @@ -357,7 +365,7 @@ out: #endif static const struct address_space_operations aio_ctx_aops = { - .set_page_dirty = aio_set_page_dirty, + .set_page_dirty = __set_page_dirty_no_writeback, #if IS_ENABLED(CONFIG_MIGRATION) .migratepage = aio_migratepage, #endif @@ -412,7 +420,6 @@ static int aio_setup_ring(struct kioctx *ctx) pr_debug("pid(%d) page[%d]->count=%d\n", current->pid, i, page_count(page)); SetPageUptodate(page); - SetPageDirty(page); unlock_page(page); ctx->ring_pages[i] = page; diff --git a/fs/block_dev.c b/fs/block_dev.c index 1d9c9f3754f..b48c41bf0f8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -235,7 +235,10 @@ struct super_block *freeze_bdev(struct block_device *bdev) sb = get_active_super(bdev); if (!sb) goto out; - error = freeze_super(sb); + if (sb->s_op->freeze_super) + error = sb->s_op->freeze_super(sb); + else + error = freeze_super(sb); if (error) { deactivate_super(sb); bdev->bd_fsfreeze_count--; @@ -272,7 +275,10 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) if (!sb) goto out; - error = thaw_super(sb); + if (sb->s_op->thaw_super) + error = sb->s_op->thaw_super(sb); + else + error = thaw_super(sb); if (error) { bdev->bd_fsfreeze_count++; mutex_unlock(&bdev->bd_fsfreeze_mutex); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d3220d31d3c..dcd9be32ac5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1011,8 +1011,6 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); - if (*pg_index == (vcnt - 1) && *pg_offset == 0) - memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); kunmap_atomic(kaddr); flush_dcache_page(page_out); @@ -1054,3 +1052,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, return 1; } + +/* + * When uncompressing data, we need to make sure and zero any parts of + * the biovec that were not filled in by the decompression code. pg_index + * and pg_offset indicate the last page and the last offset of that page + * that have been filled in. This will zero everything remaining in the + * biovec. + */ +void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, + unsigned long pg_index, + unsigned long pg_offset) +{ + while (pg_index < vcnt) { + struct page *page = bvec[pg_index].bv_page; + unsigned long off = bvec[pg_index].bv_offset; + unsigned long len = bvec[pg_index].bv_len; + + if (pg_offset < off) + pg_offset = off; + if (pg_offset < off + len) { + unsigned long bytes = off + len - pg_offset; + char *kaddr; + + kaddr = kmap_atomic(page); + memset(kaddr + pg_offset, 0, bytes); + kunmap_atomic(kaddr); + } + pg_index++; + pg_offset = 0; + } +} diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 0c803b4fbf9..d181f70caae 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -45,7 +45,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, unsigned long nr_pages); int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, int mirror_num, unsigned long bio_flags); - +void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt, + unsigned long pg_index, + unsigned long pg_offset); struct btrfs_compress_op { struct list_head *(*alloc_workspace)(void); diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index 78285f30909..617553cdb7d 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c @@ -373,6 +373,8 @@ cont: } done: kunmap(pages_in[page_in_index]); + if (!ret) + btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); return ret; } @@ -410,10 +412,23 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in, goto out; } + /* + * the caller is already checking against PAGE_SIZE, but lets + * move this check closer to the memcpy/memset + */ + destlen = min_t(unsigned long, destlen, PAGE_SIZE); bytes = min_t(unsigned long, destlen, out_len - start_byte); kaddr = kmap_atomic(dest_page); memcpy(kaddr, workspace->buf + start_byte, bytes); + + /* + * btrfs_getblock is doing a zero on the tail of the page too, + * but this will cover anything missing from the decompressed + * data. + */ + if (bytes < destlen) + memset(kaddr+bytes, 0, destlen-bytes); kunmap_atomic(kaddr); out: return ret; diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index 759fa4e2de8..fb22fd8d8fb 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -299,6 +299,8 @@ done: zlib_inflateEnd(&workspace->strm); if (data_in) kunmap(pages_in[page_in_index]); + if (!ret) + btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset); return ret; } @@ -310,10 +312,14 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, struct workspace *workspace = list_entry(ws, struct workspace, list); int ret = 0; int wbits = MAX_WBITS; - unsigned long bytes_left = destlen; + unsigned long bytes_left; unsigned long total_out = 0; + unsigned long pg_offset = 0; char *kaddr; + destlen = min_t(unsigned long, destlen, PAGE_SIZE); + bytes_left = destlen; + workspace->strm.next_in = data_in; workspace->strm.avail_in = srclen; workspace->strm.total_in = 0; @@ -341,7 +347,6 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in, unsigned long buf_start; unsigned long buf_offset; unsigned long bytes; - unsigned long pg_offset = 0; ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH); if (ret != Z_OK && ret != Z_STREAM_END) @@ -384,6 +389,17 @@ next: ret = 0; zlib_inflateEnd(&workspace->strm); + + /* + * this should only happen if zlib returned fewer bytes than we + * expected. btrfs_get_block is responsible for zeroing from the + * end of the inline extent (destlen) to the end of the page + */ + if (pg_offset < destlen) { + kaddr = kmap_atomic(dest_page); + memset(kaddr + pg_offset, 0, destlen - pg_offset); + kunmap_atomic(kaddr); + } return ret; } diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 44ec72684df..9c56ef77640 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -34,27 +34,9 @@ void cifs_dump_mem(char *label, void *data, int length) { - int i, j; - int *intptr = data; - char *charptr = data; - char buf[10], line[80]; - - printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n", - label, length, data); - for (i = 0; i < length; i += 16) { - line[0] = 0; - for (j = 0; (j < 4) && (i + j * 4 < length); j++) { - sprintf(buf, " %08x", intptr[i / 4 + j]); - strcat(line, buf); - } - buf[0] = ' '; - buf[2] = 0; - for (j = 0; (j < 16) && (i + j < length); j++) { - buf[1] = isprint(charptr[i + j]) ? charptr[i + j] : '.'; - strcat(line, buf); - } - printk(KERN_DEBUG "%s\n", line); - } + pr_debug("%s: dump of %d bytes of data at 0x%p\n", label, length, data); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 16, 4, + data, length, true); } #ifdef CONFIG_CIFS_DEBUG @@ -68,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - printk(KERN_ERR "CIFS VFS: %pV", &vaf); + pr_err("CIFS VFS: %pV", &vaf); va_end(args); } @@ -274,6 +256,7 @@ static ssize_t cifs_stats_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; struct list_head *tmp1, *tmp2, *tmp3; struct TCP_Server_Info *server; @@ -284,7 +267,7 @@ static ssize_t cifs_stats_proc_write(struct file *file, if (rc) return rc; - if (c == '1' || c == 'y' || c == 'Y' || c == '0') { + if (strtobool(&c, &bv) == 0) { #ifdef CONFIG_CIFS_STATS2 atomic_set(&totBufAllocCount, 0); atomic_set(&totSmBufAllocCount, 0); @@ -451,15 +434,14 @@ static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; rc = get_user(c, buffer); if (rc) return rc; - if (c == '0' || c == 'n' || c == 'N') - cifsFYI = 0; - else if (c == '1' || c == 'y' || c == 'Y') - cifsFYI = 1; + if (strtobool(&c, &bv) == 0) + cifsFYI = bv; else if ((c > '1') && (c <= '9')) cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */ @@ -490,15 +472,18 @@ static ssize_t cifs_linux_ext_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; rc = get_user(c, buffer); if (rc) return rc; - if (c == '0' || c == 'n' || c == 'N') - linuxExtEnabled = 0; - else if (c == '1' || c == 'y' || c == 'Y') - linuxExtEnabled = 1; + + rc = strtobool(&c, &bv); + if (rc) + return rc; + + linuxExtEnabled = bv; return count; } @@ -527,15 +512,18 @@ static ssize_t cifs_lookup_cache_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; rc = get_user(c, buffer); if (rc) return rc; - if (c == '0' || c == 'n' || c == 'N') - lookupCacheEnabled = 0; - else if (c == '1' || c == 'y' || c == 'Y') - lookupCacheEnabled = 1; + + rc = strtobool(&c, &bv); + if (rc) + return rc; + + lookupCacheEnabled = bv; return count; } @@ -564,15 +552,18 @@ static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; + bool bv; int rc; rc = get_user(c, buffer); if (rc) return rc; - if (c == '0' || c == 'n' || c == 'N') - traceSMB = 0; - else if (c == '1' || c == 'y' || c == 'Y') - traceSMB = 1; + + rc = strtobool(&c, &bv); + if (rc) + return rc; + + traceSMB = bv; return count; } @@ -630,6 +621,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, unsigned int flags; char flags_string[12]; char c; + bool bv; if ((count < 1) || (count > 11)) return -EINVAL; @@ -642,11 +634,8 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, if (count < 3) { /* single char or single char followed by null */ c = flags_string[0]; - if (c == '0' || c == 'n' || c == 'N') { - global_secflags = CIFSSEC_DEF; /* default */ - return count; - } else if (c == '1' || c == 'y' || c == 'Y') { - global_secflags = CIFSSEC_MAX; + if (strtobool(&c, &bv) == 0) { + global_secflags = bv ? CIFSSEC_MAX : CIFSSEC_DEF; return count; } else if (!isdigit(c)) { cifs_dbg(VFS, "Invalid SecurityFlags: %s\n", diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index c99b40fb609..f40fbaca1b2 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h @@ -53,13 +53,12 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...); do { \ if (type == FYI) { \ if (cifsFYI & CIFS_INFO) { \ - printk(KERN_DEBUG "%s: " fmt, \ - __FILE__, ##__VA_ARGS__); \ + pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \ } \ } else if (type == VFS) { \ cifs_vfs_err(fmt, ##__VA_ARGS__); \ } else if (type == NOISY && type != 0) { \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ + pr_debug(fmt, ##__VA_ARGS__); \ } \ } while (0) @@ -71,7 +70,7 @@ do { \ #define cifs_dbg(type, fmt, ...) \ do { \ if (0) \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ + pr_debug(fmt, ##__VA_ARGS__); \ } while (0) #endif diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 002e0c17393..252f5c15806 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.05" +#define CIFS_VERSION "2.06" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 24fa08d261f..2a772da16b8 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1466,9 +1466,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->seal = 1; break; case Opt_noac: - printk(KERN_WARNING "CIFS: Mount option noac not " - "supported. Instead set " - "/proc/fs/cifs/LookupCacheEnabled to 0\n"); + pr_warn("CIFS: Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n"); break; case Opt_fsc: #ifndef CONFIG_CIFS_FSCACHE @@ -1598,7 +1596,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (strnlen(string, CIFS_MAX_USERNAME_LEN) > CIFS_MAX_USERNAME_LEN) { - printk(KERN_WARNING "CIFS: username too long\n"); + pr_warn("CIFS: username too long\n"); goto cifs_parse_mount_err; } vol->username = kstrdup(string, GFP_KERNEL); @@ -1662,8 +1660,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, temp_len = strlen(value); vol->password = kzalloc(temp_len+1, GFP_KERNEL); if (vol->password == NULL) { - printk(KERN_WARNING "CIFS: no memory " - "for password\n"); + pr_warn("CIFS: no memory for password\n"); goto cifs_parse_mount_err; } @@ -1687,8 +1684,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (!cifs_convert_address(dstaddr, string, strlen(string))) { - printk(KERN_ERR "CIFS: bad ip= option (%s).\n", - string); + pr_err("CIFS: bad ip= option (%s).\n", string); goto cifs_parse_mount_err; } got_ip = true; @@ -1700,15 +1696,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN) == CIFS_MAX_DOMAINNAME_LEN) { - printk(KERN_WARNING "CIFS: domain name too" - " long\n"); + pr_warn("CIFS: domain name too long\n"); goto cifs_parse_mount_err; } vol->domainname = kstrdup(string, GFP_KERNEL); if (!vol->domainname) { - printk(KERN_WARNING "CIFS: no memory " - "for domainname\n"); + pr_warn("CIFS: no memory for domainname\n"); goto cifs_parse_mount_err; } cifs_dbg(FYI, "Domain name set\n"); @@ -1721,8 +1715,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (!cifs_convert_address( (struct sockaddr *)&vol->srcaddr, string, strlen(string))) { - printk(KERN_WARNING "CIFS: Could not parse" - " srcaddr: %s\n", string); + pr_warn("CIFS: Could not parse srcaddr: %s\n", + string); goto cifs_parse_mount_err; } break; @@ -1732,8 +1726,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto out_nomem; if (strnlen(string, 1024) >= 65) { - printk(KERN_WARNING "CIFS: iocharset name " - "too long.\n"); + pr_warn("CIFS: iocharset name too long.\n"); goto cifs_parse_mount_err; } @@ -1741,8 +1734,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->iocharset = kstrdup(string, GFP_KERNEL); if (!vol->iocharset) { - printk(KERN_WARNING "CIFS: no memory" - "for charset\n"); + pr_warn("CIFS: no memory for charset\n"); goto cifs_parse_mount_err; } } @@ -1773,9 +1765,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, * set at top of the function */ if (i == RFC1001_NAME_LEN && string[i] != 0) - printk(KERN_WARNING "CIFS: netbiosname" - " longer than 15 truncated.\n"); - + pr_warn("CIFS: netbiosname longer than 15 truncated.\n"); break; case Opt_servern: /* servernetbiosname specified override *SMBSERVER */ @@ -1801,8 +1791,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* The string has 16th byte zero still from set at top of the function */ if (i == RFC1001_NAME_LEN && string[i] != 0) - printk(KERN_WARNING "CIFS: server net" - "biosname longer than 15 truncated.\n"); + pr_warn("CIFS: server netbiosname longer than 15 truncated.\n"); break; case Opt_ver: string = match_strdup(args); @@ -1814,8 +1803,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, break; } /* For all other value, error */ - printk(KERN_WARNING "CIFS: Invalid version" - " specified\n"); + pr_warn("CIFS: Invalid version specified\n"); goto cifs_parse_mount_err; case Opt_vers: string = match_strdup(args); @@ -1856,7 +1844,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } if (!sloppy && invalid) { - printk(KERN_ERR "CIFS: Unknown mount option \"%s\"\n", invalid); + pr_err("CIFS: Unknown mount option \"%s\"\n", invalid); goto cifs_parse_mount_err; } @@ -1882,8 +1870,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* No ip= option specified? Try to get it from UNC */ if (!cifs_convert_address(dstaddr, &vol->UNC[2], strlen(&vol->UNC[2]))) { - printk(KERN_ERR "Unable to determine destination " - "address.\n"); + pr_err("Unable to determine destination address.\n"); goto cifs_parse_mount_err; } } @@ -1894,20 +1881,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (uid_specified) vol->override_uid = override_uid; else if (override_uid == 1) - printk(KERN_NOTICE "CIFS: ignoring forceuid mount option " - "specified with no uid= option.\n"); + pr_notice("CIFS: ignoring forceuid mount option specified with no uid= option.\n"); if (gid_specified) vol->override_gid = override_gid; else if (override_gid == 1) - printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " - "specified with no gid= option.\n"); + pr_notice("CIFS: ignoring forcegid mount option specified with no gid= option.\n"); kfree(mountdata_copy); return 0; out_nomem: - printk(KERN_WARNING "Could not allocate temporary buffer\n"); + pr_warn("Could not allocate temporary buffer\n"); cifs_parse_mount_err: kfree(string); kfree(mountdata_copy); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index b7415d596db..337946355b2 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -513,39 +513,11 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv) void dump_smb(void *buf, int smb_buf_length) { - int i, j; - char debug_line[17]; - unsigned char *buffer = buf; - if (traceSMB == 0) return; - for (i = 0, j = 0; i < smb_buf_length; i++, j++) { - if (i % 8 == 0) { - /* have reached the beginning of line */ - printk(KERN_DEBUG "| "); - j = 0; - } - printk("%0#4x ", buffer[i]); - debug_line[2 * j] = ' '; - if (isprint(buffer[i])) - debug_line[1 + (2 * j)] = buffer[i]; - else - debug_line[1 + (2 * j)] = '_'; - - if (i % 8 == 7) { - /* reached end of line, time to print ascii */ - debug_line[16] = 0; - printk(" | %s\n", debug_line); - } - } - for (; j < 8; j++) { - printk(" "); - debug_line[2 * j] = ' '; - debug_line[1 + (2 * j)] = ' '; - } - printk(" | %s\n", debug_line); - return; + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 8, 2, buf, + smb_buf_length, true); } void diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d116ca8ce4c..8eaf20a8064 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -794,10 +794,6 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) if it before then restart search if after then keep searching till find it */ - if (file->private_data == NULL) { - rc = -EINVAL; - goto rddir2_exit; - } cifsFile = file->private_data; if (cifsFile->srch_inf.endOfSearch) { if (cifsFile->srch_inf.emptyDir) { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 57db63ff88d..446cb7fb3f5 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -1303,6 +1303,11 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data) if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN) cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */ + if (ses->Suid != smb_buf->Uid) { + ses->Suid = smb_buf->Uid; + cifs_dbg(FYI, "UID changed! new UID = %llu\n", ses->Suid); + } + bytes_remaining = get_bcc(smb_buf); bcc_ptr = pByteArea(smb_buf); blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c5f521bcdee..568f323665c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1102,6 +1102,64 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, return rc; } +static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, + loff_t off, loff_t len, bool keep_size) +{ + struct inode *inode; + struct cifsInodeInfo *cifsi; + struct cifsFileInfo *cfile = file->private_data; + long rc = -EOPNOTSUPP; + unsigned int xid; + + xid = get_xid(); + + inode = cfile->dentry->d_inode; + cifsi = CIFS_I(inode); + + /* if file not oplocked can't be sure whether asking to extend size */ + if (!CIFS_CACHE_READ(cifsi)) + if (keep_size == false) + return -EOPNOTSUPP; + + /* + * Files are non-sparse by default so falloc may be a no-op + * Must check if file sparse. If not sparse, and not extending + * then no need to do anything since file already allocated + */ + if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) { + if (keep_size == true) + return 0; + /* check if extending file */ + else if (i_size_read(inode) >= off + len) + /* not extending file and already not sparse */ + return 0; + /* BB: in future add else clause to extend file */ + else + return -EOPNOTSUPP; + } + + if ((keep_size == true) || (i_size_read(inode) >= off + len)) { + /* + * Check if falloc starts within first few pages of file + * and ends within a few pages of the end of file to + * ensure that most of file is being forced to be + * fallocated now. If so then setting whole file sparse + * ie potentially making a few extra pages at the beginning + * or end of the file non-sparse via set_sparse is harmless. + */ + if ((off > 8192) || (off + len + 8192 < i_size_read(inode))) + return -EOPNOTSUPP; + + rc = smb2_set_sparse(xid, tcon, cfile, inode, false); + } + /* BB: else ... in future add code to extend file and set sparse */ + + + free_xid(xid); + return rc; +} + + static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, loff_t off, loff_t len) { @@ -1112,7 +1170,10 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, if (mode & FALLOC_FL_KEEP_SIZE) return smb3_zero_range(file, tcon, off, len, true); return smb3_zero_range(file, tcon, off, len, false); - } + } else if (mode == FALLOC_FL_KEEP_SIZE) + return smb3_simple_falloc(file, tcon, off, len, true); + else if (mode == 0) + return smb3_simple_falloc(file, tcon, off, len, false); return -EOPNOTSUPP; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8f1672bb82d..0ca7f636475 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -431,8 +431,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) if (rc) goto neg_exit; if (blob_length) - rc = decode_neg_token_init(security_blob, blob_length, - &server->sec_type); + rc = decode_negTokenInit(security_blob, blob_length, server); if (rc == 1) rc = 0; else if (rc == 0) { diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index e3188abdafd..d84f46c5b2c 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -836,6 +836,25 @@ struct smb2_query_directory_rsp { #define SMB2_O_INFO_SECURITY 0x03 #define SMB2_O_INFO_QUOTA 0x04 +/* Security info type additionalinfo flags. See MS-SMB2 (2.2.37) or MS-DTYP */ +#define OWNER_SECINFO 0x00000001 +#define GROUP_SECINFO 0x00000002 +#define DACL_SECINFO 0x00000004 +#define SACL_SECINFO 0x00000008 +#define LABEL_SECINFO 0x00000010 +#define ATTRIBUTE_SECINFO 0x00000020 +#define SCOPE_SECINFO 0x00000040 +#define BACKUP_SECINFO 0x00010000 +#define UNPROTECTED_SACL_SECINFO 0x10000000 +#define UNPROTECTED_DACL_SECINFO 0x20000000 +#define PROTECTED_SACL_SECINFO 0x40000000 +#define PROTECTED_DACL_SECINFO 0x80000000 + +/* Flags used for FileFullEAinfo */ +#define SL_RESTART_SCAN 0x00000001 +#define SL_RETURN_SINGLE_ENTRY 0x00000002 +#define SL_INDEX_SPECIFIED 0x00000004 + struct smb2_query_info_req { struct smb2_hdr hdr; __le16 StructureSize; /* Must be 41 */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 9d087f4e7d4..126f46b887c 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -99,9 +99,9 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) something is wrong, unless it is quite a slow link or server */ if ((now - midEntry->when_alloc) > HZ) { if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) { - printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %llu", + pr_debug(" CIFS slow rsp: cmd %d mid %llu", midEntry->command, midEntry->mid); - printk(" A: 0x%lx S: 0x%lx R: 0x%lx\n", + pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n", now - midEntry->when_alloc, now - midEntry->when_sent, now - midEntry->when_received); diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 83f3d552030..35502d4046f 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -5886,6 +5886,78 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, return error; } +/* + * The caller asks for an orphan lock on a given resource with a given mode. + * If a matching lock exists, it's moved to the owner's list of locks and + * the lkid is returned. + */ + +int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + int mode, uint32_t flags, void *name, unsigned int namelen, + unsigned long timeout_cs, uint32_t *lkid) +{ + struct dlm_lkb *lkb; + struct dlm_user_args *ua; + int found_other_mode = 0; + int found = 0; + int rv = 0; + + mutex_lock(&ls->ls_orphans_mutex); + list_for_each_entry(lkb, &ls->ls_orphans, lkb_ownqueue) { + if (lkb->lkb_resource->res_length != namelen) + continue; + if (memcmp(lkb->lkb_resource->res_name, name, namelen)) + continue; + if (lkb->lkb_grmode != mode) { + found_other_mode = 1; + continue; + } + + found = 1; + list_del_init(&lkb->lkb_ownqueue); + lkb->lkb_flags &= ~DLM_IFL_ORPHAN; + *lkid = lkb->lkb_id; + break; + } + mutex_unlock(&ls->ls_orphans_mutex); + + if (!found && found_other_mode) { + rv = -EAGAIN; + goto out; + } + + if (!found) { + rv = -ENOENT; + goto out; + } + + lkb->lkb_exflags = flags; + lkb->lkb_ownpid = (int) current->pid; + + ua = lkb->lkb_ua; + + ua->proc = ua_tmp->proc; + ua->xid = ua_tmp->xid; + ua->castparam = ua_tmp->castparam; + ua->castaddr = ua_tmp->castaddr; + ua->bastparam = ua_tmp->bastparam; + ua->bastaddr = ua_tmp->bastaddr; + ua->user_lksb = ua_tmp->user_lksb; + + /* + * The lkb reference from the ls_orphans list was not + * removed above, and is now considered the reference + * for the proc locks list. + */ + + spin_lock(&ua->proc->locks_spin); + list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks); + spin_unlock(&ua->proc->locks_spin); + out: + kfree(ua_tmp); + return rv; +} + int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, uint32_t flags, uint32_t lkid, char *lvb_in) { @@ -6029,7 +6101,7 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb) struct dlm_args args; int error; - hold_lkb(lkb); + hold_lkb(lkb); /* reference for the ls_orphans list */ mutex_lock(&ls->ls_orphans_mutex); list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans); mutex_unlock(&ls->ls_orphans_mutex); @@ -6217,7 +6289,7 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc, { int error = 0; - if (nodeid != dlm_our_nodeid()) { + if (nodeid && (nodeid != dlm_our_nodeid())) { error = send_purge(ls, nodeid, pid); } else { dlm_lock_recovery(ls); diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 5e0c72e36a9..ed8ebd3a859 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -49,6 +49,9 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode, int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, int mode, uint32_t flags, uint32_t lkid, char *lvb_in, unsigned long timeout_cs); +int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, + int mode, uint32_t flags, void *name, unsigned int namelen, + unsigned long timeout_cs, uint32_t *lkid); int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, uint32_t flags, uint32_t lkid, char *lvb_in); int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 142e21655ee..fb85f32e9ec 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -238,6 +238,7 @@ static int device_user_lock(struct dlm_user_proc *proc, { struct dlm_ls *ls; struct dlm_user_args *ua; + uint32_t lkid; int error = -ENOMEM; ls = dlm_find_lockspace_local(proc->lockspace); @@ -260,12 +261,20 @@ static int device_user_lock(struct dlm_user_proc *proc, ua->bastaddr = params->bastaddr; ua->xid = params->xid; - if (params->flags & DLM_LKF_CONVERT) + if (params->flags & DLM_LKF_CONVERT) { error = dlm_user_convert(ls, ua, params->mode, params->flags, params->lkid, params->lvb, (unsigned long) params->timeout); - else { + } else if (params->flags & DLM_LKF_ORPHAN) { + error = dlm_user_adopt_orphan(ls, ua, + params->mode, params->flags, + params->name, params->namelen, + (unsigned long) params->timeout, + &lkid); + if (!error) + error = lkid; + } else { error = dlm_user_request(ls, ua, params->mode, params->flags, params->name, params->namelen, diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 0a48886e069..6dad1176ec5 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -236,6 +236,7 @@ static void efivarfs_kill_sb(struct super_block *sb) } static struct file_system_type efivarfs_type = { + .owner = THIS_MODULE, .name = "efivarfs", .mount = efivarfs_mount, .kill_sb = efivarfs_kill_sb, @@ -244,17 +245,23 @@ static struct file_system_type efivarfs_type = { static __init int efivarfs_init(void) { if (!efi_enabled(EFI_RUNTIME_SERVICES)) - return 0; + return -ENODEV; if (!efivars_kobject()) - return 0; + return -ENODEV; return register_filesystem(&efivarfs_type); } +static __exit void efivarfs_exit(void) +{ + unregister_filesystem(&efivarfs_type); +} + MODULE_AUTHOR("Matthew Garrett, Jeremy Kerr"); MODULE_DESCRIPTION("EFI Variable Filesystem"); MODULE_LICENSE("GPL"); MODULE_ALIAS_FS("efivarfs"); module_init(efivarfs_init); +module_exit(efivarfs_exit); diff --git a/fs/exec.c b/fs/exec.c index 7302b75a982..01aebe30020 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,6 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) goto err; mm->stack_vm = mm->total_vm = 1; + arch_bprm_mm_init(mm, vma); up_write(&mm->mmap_sem); bprm->p = vma->vm_end - sizeof(void *); return 0; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index d9a17d0b124..e4279ead4a0 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -689,6 +689,9 @@ struct ext2_inode_info { struct mutex truncate_mutex; struct inode vfs_inode; struct list_head i_orphan; /* unlinked but open inodes */ +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif }; /* diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 170dc41e8bf..ae55fddc26a 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -166,6 +166,10 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) return NULL; ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; +#ifdef CONFIG_QUOTA + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); +#endif + return &ei->vfs_inode; } @@ -303,6 +307,10 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) #ifdef CONFIG_QUOTA static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static struct dquot **ext2_get_dquots(struct inode *inode) +{ + return EXT2_I(inode)->i_dquot; +} #endif static const struct super_operations ext2_sops = { @@ -320,6 +328,7 @@ static const struct super_operations ext2_sops = { #ifdef CONFIG_QUOTA .quota_read = ext2_quota_read, .quota_write = ext2_quota_write, + .get_dquots = ext2_get_dquots, #endif }; @@ -1090,6 +1099,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; sb->s_qcop = &dquot_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif root = ext2_iget(sb, EXT2_ROOT_INO); diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h index fc3cdcf24ae..f483a80b3fe 100644 --- a/fs/ext3/ext3.h +++ b/fs/ext3/ext3.h @@ -615,6 +615,10 @@ struct ext3_inode_info { atomic_t i_sync_tid; atomic_t i_datasync_tid; +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif + struct inode vfs_inode; }; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index eb742d0e67f..9b4e7d750d4 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -485,6 +485,10 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) ei->vfs_inode.i_version = 1; atomic_set(&ei->i_datasync_tid, 0); atomic_set(&ei->i_sync_tid, 0); +#ifdef CONFIG_QUOTA + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); +#endif + return &ei->vfs_inode; } @@ -764,6 +768,10 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext3_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static struct dquot **ext3_get_dquots(struct inode *inode) +{ + return EXT3_I(inode)->i_dquot; +} static const struct dquot_operations ext3_quota_operations = { .write_dquot = ext3_write_dquot, @@ -803,6 +811,7 @@ static const struct super_operations ext3_sops = { #ifdef CONFIG_QUOTA .quota_read = ext3_quota_read, .quota_write = ext3_quota_write, + .get_dquots = ext3_get_dquots, #endif .bdev_try_to_free_page = bdev_try_to_free_page, }; @@ -2001,6 +2010,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->s_qcop = &ext3_qctl_operations; sb->dq_op = &ext3_quota_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c55a1faaed5..db3f772e57a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -941,6 +941,10 @@ struct ext4_inode_info { tid_t i_sync_tid; tid_t i_datasync_tid; +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif + /* Precomputed uuid+inum+igen checksum for seeding inode checksums */ __u32 i_csum_seed; }; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2c9e6864abd..63e802b8ec6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -892,6 +892,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); #endif ei->jinode = NULL; INIT_LIST_HEAD(&ei->i_rsv_conversion_list); @@ -1068,6 +1069,11 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, unsigned int flags); static int ext4_enable_quotas(struct super_block *sb); +static struct dquot **ext4_get_dquots(struct inode *inode) +{ + return EXT4_I(inode)->i_dquot; +} + static const struct dquot_operations ext4_quota_operations = { .get_reserved_space = ext4_get_reserved_space, .write_dquot = ext4_write_dquot, @@ -1117,6 +1123,7 @@ static const struct super_operations ext4_sops = { #ifdef CONFIG_QUOTA .quota_read = ext4_quota_read, .quota_write = ext4_quota_write, + .get_dquots = ext4_get_dquots, #endif .bdev_try_to_free_page = bdev_try_to_free_page, }; @@ -3932,6 +3939,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_qcop = &ext4_qctl_sysfile_operations; else sb->s_qcop = &ext4_qctl_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 83b9b5a8d11..1ccb26bc2a0 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -162,7 +162,8 @@ fail: return ERR_PTR(-EINVAL); } -struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, + struct page *dpage) { int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; void *value = NULL; @@ -172,12 +173,13 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) if (type == ACL_TYPE_ACCESS) name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); if (retval > 0) { value = kmalloc(retval, GFP_F2FS_ZERO); if (!value) return ERR_PTR(-ENOMEM); - retval = f2fs_getxattr(inode, name_index, "", value, retval); + retval = f2fs_getxattr(inode, name_index, "", value, + retval, dpage); } if (retval > 0) @@ -194,6 +196,11 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) return acl; } +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + return __f2fs_get_acl(inode, type, NULL); +} + static int __f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { @@ -229,7 +236,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (acl) { value = f2fs_acl_to_disk(acl, &size); if (IS_ERR(value)) { - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return (int)PTR_ERR(value); } } @@ -240,7 +247,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (!error) set_cached_acl(inode, type, acl); - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return error; } @@ -249,12 +256,137 @@ int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return __f2fs_set_acl(inode, type, acl, NULL); } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) +/* + * Most part of f2fs_acl_clone, f2fs_acl_create_masq, f2fs_acl_create + * are copied from posix_acl.c + */ +static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, + gfp_t flags) +{ + struct posix_acl *clone = NULL; + + if (acl) { + int size = sizeof(struct posix_acl) + acl->a_count * + sizeof(struct posix_acl_entry); + clone = kmemdup(acl, size, flags); + if (clone) + atomic_set(&clone->a_refcount, 1); + } + return clone; +} + +static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) +{ + struct posix_acl_entry *pa, *pe; + struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; + umode_t mode = *mode_p; + int not_equiv = 0; + + /* assert(atomic_read(acl->a_refcount) == 1); */ + + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_perm &= (mode >> 6) | ~S_IRWXO; + mode &= (pa->e_perm << 6) | ~S_IRWXU; + break; + + case ACL_USER: + case ACL_GROUP: + not_equiv = 1; + break; + + case ACL_GROUP_OBJ: + group_obj = pa; + break; + + case ACL_OTHER: + pa->e_perm &= mode | ~S_IRWXO; + mode &= pa->e_perm | ~S_IRWXO; + break; + + case ACL_MASK: + mask_obj = pa; + not_equiv = 1; + break; + + default: + return -EIO; + } + } + + if (mask_obj) { + mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO; + mode &= (mask_obj->e_perm << 3) | ~S_IRWXG; + } else { + if (!group_obj) + return -EIO; + group_obj->e_perm &= (mode >> 3) | ~S_IRWXO; + mode &= (group_obj->e_perm << 3) | ~S_IRWXG; + } + + *mode_p = (*mode_p & ~S_IRWXUGO) | mode; + return not_equiv; +} + +static int f2fs_acl_create(struct inode *dir, umode_t *mode, + struct posix_acl **default_acl, struct posix_acl **acl, + struct page *dpage) +{ + struct posix_acl *p; + int ret; + + if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) + goto no_acl; + + p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); + if (IS_ERR(p)) { + if (p == ERR_PTR(-EOPNOTSUPP)) + goto apply_umask; + return PTR_ERR(p); + } + + if (!p) + goto apply_umask; + + *acl = f2fs_acl_clone(p, GFP_NOFS); + if (!*acl) + return -ENOMEM; + + ret = f2fs_acl_create_masq(*acl, mode); + if (ret < 0) { + posix_acl_release(*acl); + return -ENOMEM; + } + + if (ret == 0) { + posix_acl_release(*acl); + *acl = NULL; + } + + if (!S_ISDIR(*mode)) { + posix_acl_release(p); + *default_acl = NULL; + } else { + *default_acl = p; + } + return 0; + +apply_umask: + *mode &= ~current_umask(); +no_acl: + *default_acl = NULL; + *acl = NULL; + return 0; +} + +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, + struct page *dpage) { - struct posix_acl *default_acl, *acl; + struct posix_acl *default_acl = NULL, *acl = NULL; int error = 0; - error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); + error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage); if (error) return error; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index e0864651cdc..997ca8edb6c 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -38,14 +38,15 @@ struct f2fs_acl_header { extern struct posix_acl *f2fs_get_acl(struct inode *, int); extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); +extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, + struct page *); #else #define f2fs_check_acl NULL #define f2fs_get_acl NULL #define f2fs_set_acl NULL static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, - struct page *page) + struct page *ipage, struct page *dpage) { return 0; } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd10a031c05..e6c271fefac 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -72,36 +72,36 @@ out: return page; } -struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) -{ - bool readahead = false; - struct page *page; - - page = find_get_page(META_MAPPING(sbi), index); - if (!page || (page && !PageUptodate(page))) - readahead = true; - f2fs_put_page(page, 0); - - if (readahead) - ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); - return get_meta_page(sbi, index); -} - -static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) { switch (type) { case META_NAT: - return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK; + break; case META_SIT: - return SIT_BLK_CNT(sbi); + if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) + return false; + break; case META_SSA: + if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || + blkaddr < SM_I(sbi)->ssa_blkaddr)) + return false; + break; case META_CP: - return 0; + if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || + blkaddr < __start_cp_addr(sbi))) + return false; + break; case META_POR: - return MAX_BLKADDR(sbi); + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } + + return true; } /* @@ -112,7 +112,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type block_t prev_blk_addr = 0; struct page *page; block_t blkno = start; - block_t max_blks = get_max_meta_blks(sbi, type); struct f2fs_io_info fio = { .type = META, @@ -122,18 +121,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type for (; nrpages-- > 0; blkno++) { block_t blk_addr; + if (!is_valid_blkaddr(sbi, blkno, type)) + goto out; + switch (type) { case META_NAT: - /* get nat block addr */ - if (unlikely(blkno >= max_blks)) + if (unlikely(blkno >= + NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) blkno = 0; + /* get nat block addr */ blk_addr = current_nat_addr(sbi, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: /* get sit block addr */ - if (unlikely(blkno >= max_blks)) - goto out; blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); if (blkno != start && prev_blk_addr + 1 != blk_addr) @@ -143,10 +144,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type case META_SSA: case META_CP: case META_POR: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < SEG0_BLKADDR(sbi))) - goto out; blk_addr = blkno; break; default: @@ -169,6 +166,20 @@ out: return blkno - start; } +void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + bool readahead = false; + + page = find_get_page(META_MAPPING(sbi), index); + if (!page || (page && !PageUptodate(page))) + readahead = true; + f2fs_put_page(page, 0); + + if (readahead) + ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); +} + static int f2fs_write_meta_page(struct page *page, struct writeback_control *wbc) { @@ -178,7 +189,7 @@ static int f2fs_write_meta_page(struct page *page, if (unlikely(sbi->por_doing)) goto redirty_out; - if (wbc->for_reclaim) + if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; @@ -187,6 +198,9 @@ static int f2fs_write_meta_page(struct page *page, write_meta_page(sbi, page); dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, META, WRITE); return 0; redirty_out: @@ -298,46 +312,57 @@ const struct address_space_operations f2fs_meta_aops = { static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; retry: - spin_lock(&sbi->ino_lock[type]); + if (radix_tree_preload(GFP_NOFS)) { + cond_resched(); + goto retry; + } + + spin_lock(&im->ino_lock); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + e = radix_tree_lookup(&im->ino_root, ino); if (!e) { e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); if (!e) { - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); + radix_tree_preload_end(); goto retry; } - if (radix_tree_insert(&sbi->ino_root[type], ino, e)) { - spin_unlock(&sbi->ino_lock[type]); + if (radix_tree_insert(&im->ino_root, ino, e)) { + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); + radix_tree_preload_end(); goto retry; } memset(e, 0, sizeof(struct ino_entry)); e->ino = ino; - list_add_tail(&e->list, &sbi->ino_list[type]); + list_add_tail(&e->list, &im->ino_list); + if (type != ORPHAN_INO) + im->ino_num++; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); + radix_tree_preload_end(); } static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[type]); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); if (e) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[type], ino); - if (type == ORPHAN_INO) - sbi->n_orphans--; - spin_unlock(&sbi->ino_lock[type]); + radix_tree_delete(&im->ino_root, ino); + im->ino_num--; + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); return; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); } void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) @@ -355,10 +380,12 @@ void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) /* mode should be APPEND_INO or UPDATE_INO */ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { + struct inode_management *im = &sbi->im[mode]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[mode]); - e = radix_tree_lookup(&sbi->ino_root[mode], ino); - spin_unlock(&sbi->ino_lock[mode]); + + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); + spin_unlock(&im->ino_lock); return e ? true : false; } @@ -368,36 +395,42 @@ void release_dirty_inode(struct f2fs_sb_info *sbi) int i; for (i = APPEND_INO; i <= UPDATE_INO; i++) { - spin_lock(&sbi->ino_lock[i]); - list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) { + struct inode_management *im = &sbi->im[i]; + + spin_lock(&im->ino_lock); + list_for_each_entry_safe(e, tmp, &im->ino_list, list) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[i], e->ino); + radix_tree_delete(&im->ino_root, e->ino); kmem_cache_free(ino_entry_slab, e); + im->ino_num--; } - spin_unlock(&sbi->ino_lock[i]); + spin_unlock(&im->ino_lock); } } int acquire_orphan_inode(struct f2fs_sb_info *sbi) { + struct inode_management *im = &sbi->im[ORPHAN_INO]; int err = 0; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - if (unlikely(sbi->n_orphans >= sbi->max_orphans)) + spin_lock(&im->ino_lock); + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else - sbi->n_orphans++; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + im->ino_num++; + spin_unlock(&im->ino_lock); return err; } void release_orphan_inode(struct f2fs_sb_info *sbi) { - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - f2fs_bug_on(sbi, sbi->n_orphans == 0); - sbi->n_orphans--; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + spin_lock(&im->ino_lock); + f2fs_bug_on(sbi, im->ino_num == 0); + im->ino_num--; + spin_unlock(&im->ino_lock); } void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -460,17 +493,19 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = - (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); + unsigned short orphan_blocks; struct page *page = NULL; struct ino_entry *orphan = NULL; + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); for (index = 0; index < orphan_blocks; index++) grab_meta_page(sbi, start_blk + index); index = 1; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - head = &sbi->ino_list[ORPHAN_INO]; + spin_lock(&im->ino_lock); + head = &im->ino_list; /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { @@ -510,7 +545,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) f2fs_put_page(page, 1); } - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + spin_unlock(&im->ino_lock); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -731,6 +766,9 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) struct dir_inode_entry *entry; struct inode *inode; retry: + if (unlikely(f2fs_cp_error(sbi))) + return; + spin_lock(&sbi->dir_inode_lock); head = &sbi->dir_inode_list; @@ -830,6 +868,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; struct page *cp_page; @@ -889,7 +928,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); + orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); @@ -905,7 +944,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) orphan_blocks); } - if (sbi->n_orphans) + if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); @@ -940,7 +979,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_put_page(cp_page, 1); } - if (sbi->n_orphans) { + if (orphan_num) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; } @@ -975,6 +1014,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + /* wait for previous submitted meta pages writeback */ + wait_on_all_pages_writeback(sbi); + release_dirty_inode(sbi); if (unlikely(f2fs_cp_error(sbi))) @@ -1036,9 +1078,12 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) int i; for (i = 0; i < MAX_INO_ENTRY; i++) { - INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); - spin_lock_init(&sbi->ino_lock[i]); - INIT_LIST_HEAD(&sbi->ino_list[i]); + struct inode_management *im = &sbi->im[i]; + + INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); + spin_lock_init(&im->ino_lock); + INIT_LIST_HEAD(&im->ino_list); + im->ino_num = 0; } /* @@ -1047,7 +1092,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) * orphan entries with the limitation one reserved segment * for cp pack we can have max 1020*504 orphan entries */ - sbi->n_orphans = 0; sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8e58c4cc2cb..7ec697b37f1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -61,11 +61,6 @@ static void f2fs_write_end_io(struct bio *bio, int err) dec_page_count(sbi, F2FS_WRITEBACK); } - if (sbi->wait_io) { - complete(sbi->wait_io); - sbi->wait_io = NULL; - } - if (!get_pages(sbi, F2FS_WRITEBACK) && !list_empty(&sbi->cp_wait.task_list)) wake_up(&sbi->cp_wait); @@ -95,34 +90,18 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; - int rw; if (!io->bio) return; - rw = fio->rw; - - if (is_read_io(rw)) { - trace_f2fs_submit_read_bio(io->sbi->sb, rw, - fio->type, io->bio); - submit_bio(rw, io->bio); - } else { - trace_f2fs_submit_write_bio(io->sbi->sb, rw, - fio->type, io->bio); - /* - * META_FLUSH is only from the checkpoint procedure, and we - * should wait this metadata bio for FS consistency. - */ - if (fio->type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - io->sbi->wait_io = &wait; - submit_bio(rw, io->bio); - wait_for_completion(&wait); - } else { - submit_bio(rw, io->bio); - } - } + if (is_read_io(fio->rw)) + trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + else + trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + submit_bio(fio->rw, io->bio); io->bio = NULL; } @@ -257,9 +236,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) bool need_put = dn->inode_page ? false : true; int err; - /* if inode_page exists, index should be zero */ - f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); - err = get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; @@ -740,14 +716,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; - int ret; + int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); - else + if (ret == -EAGAIN) ret = mpage_readpage(page, get_data_block); return ret; @@ -859,10 +835,11 @@ write: else if (has_not_enough_free_secs(sbi, 0)) goto redirty_out; + err = -EAGAIN; f2fs_lock_op(sbi); - if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) - err = f2fs_write_inline_data(inode, page, offset); - else + if (f2fs_has_inline_data(inode)) + err = f2fs_write_inline_data(inode, page); + if (err == -EAGAIN) err = do_write_data_page(page, &fio); f2fs_unlock_op(sbi); done: @@ -951,7 +928,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; + struct page *page, *ipage; pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; int err = 0; @@ -959,45 +936,60 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); f2fs_balance_fs(sbi); -repeat: - err = f2fs_convert_inline_data(inode, pos + len, NULL); - if (err) - goto fail; + /* + * We should check this at this moment to avoid deadlock on inode page + * and #0 page. The locking rule for inline_data conversion should be: + * lock_page(page #0) -> lock_page(inode_page) + */ + if (index != 0) { + err = f2fs_convert_inline_inode(inode); + if (err) + goto fail; + } +repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { err = -ENOMEM; goto fail; } - /* to avoid latency during memory pressure */ - unlock_page(page); - *pagep = page; - if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) - goto inline_data; - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, index); - f2fs_unlock_op(sbi); - if (err) { - f2fs_put_page(page, 0); - goto fail; - } -inline_data: - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); - goto repeat; + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto unlock_fail; } - f2fs_wait_on_page_writeback(page, DATA); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) { + if (pos + len <= MAX_INLINE_DATA) { + read_inline_data(page, ipage); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); + goto put_next; + } + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto put_fail; + } + err = f2fs_reserve_block(&dn, index); + if (err) + goto put_fail; +put_next: + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; + f2fs_wait_on_page_writeback(page, DATA); + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { unsigned start = pos & (PAGE_CACHE_SIZE - 1); unsigned end = start + len; @@ -1010,18 +1002,10 @@ inline_data: if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - if (f2fs_has_inline_data(inode)) { - err = f2fs_read_inline_data(inode, page); - if (err) { - page_cache_release(page); - goto fail; - } - } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); - if (err) - goto fail; - } + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + READ_SYNC); + if (err) + goto fail; lock_page(page); if (unlikely(!PageUptodate(page))) { @@ -1038,6 +1022,12 @@ out: SetPageUptodate(page); clear_cold_data(page); return 0; + +put_fail: + f2fs_put_dnode(&dn); +unlock_fail: + f2fs_unlock_op(sbi); + f2fs_put_page(page, 1); fail: f2fs_write_failed(mapping, pos + len); return err; @@ -1052,10 +1042,7 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) - register_inmem_page(inode, page); - else - set_page_dirty(page); + set_page_dirty(page); if (pos + copied > i_size_read(inode)) { i_size_write(inode, pos + copied); @@ -1093,9 +1080,12 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); int err; - /* Let buffer I/O handle the inline data case. */ - if (f2fs_has_inline_data(inode)) - return 0; + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } if (check_direct_IO(inode, rw, iter, offset)) return 0; @@ -1119,6 +1109,9 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) return; + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + invalidate_inmem_page(inode, page); + if (PageDirty(page)) inode_dec_dirty_pages(inode); ClearPagePrivate(page); @@ -1138,6 +1131,12 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); SetPageUptodate(page); + + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) { + register_inmem_page(inode, page); + return 1; + } + mark_inode_dirty(inode); if (!PageDirty(page)) { @@ -1152,9 +1151,12 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; - if (f2fs_has_inline_data(inode)) - return 0; - + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } return generic_block_bmap(mapping, block, get_data_block); } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0a91ab813a9..91e8f699ab3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -39,13 +39,15 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); si->valid_count = valid_user_blocks(sbi); si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); - si->inline_inode = sbi->inline_inode; + si->inline_inode = atomic_read(&sbi->inline_inode); + si->inline_dir = atomic_read(&sbi->inline_dir); si->utilization = utilization(sbi); si->free_segs = free_segments(sbi); @@ -118,6 +120,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned npages; + int i; if (si->base_mem) goto get_cache; @@ -167,8 +170,9 @@ get_cache: si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry); si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); + for (i = 0; i <= UPDATE_INO; i++) + si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); } static int stat_show(struct seq_file *s, void *v) @@ -200,6 +204,8 @@ static int stat_show(struct seq_file *s, void *v) si->valid_count - si->valid_node_count); seq_printf(s, " - Inline_data Inode: %u\n", si->inline_inode); + seq_printf(s, " - Inline_dentry Inode: %u\n", + si->inline_dir); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); @@ -244,6 +250,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_puts(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - inmem: %4d\n", + si->inmem_pages); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", @@ -321,6 +329,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->sbi = sbi; sbi->stat_info = si; + atomic_set(&sbi->inline_inode, 0); + atomic_set(&sbi->inline_dir, 0); + mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); mutex_unlock(&f2fs_stat_mutex); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b54f87149c0..b1a7d5737cd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } -static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { +unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { [F2FS_FT_UNKNOWN] = DT_UNKNOWN, [F2FS_FT_REG_FILE] = DT_REG, [F2FS_FT_DIR] = DT_DIR, @@ -59,7 +59,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) { umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; @@ -90,51 +90,70 @@ static bool early_match_name(size_t namelen, f2fs_hash_t namehash, } static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - struct qstr *name, int *max_slots, - f2fs_hash_t namehash, struct page **res_page) + struct qstr *name, int *max_slots, + struct page **res_page) +{ + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; + + dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + de = find_target_dentry(name, max_slots, &d); + + if (de) + *res_page = dentry_page; + else + kunmap(dentry_page); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0); + return de; +} + +struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, + struct f2fs_dentry_ptr *d) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; - struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); - const void *dentry_bits = &dentry_blk->dentry_bitmap; + f2fs_hash_t namehash = f2fs_dentry_hash(name); int max_len = 0; - while (bit_pos < NR_DENTRY_IN_BLOCK) { - if (!test_bit_le(bit_pos, dentry_bits)) { + if (max_slots) + *max_slots = 0; + while (bit_pos < d->max) { + if (!test_bit_le(bit_pos, d->bitmap)) { if (bit_pos == 0) max_len = 1; - else if (!test_bit_le(bit_pos - 1, dentry_bits)) + else if (!test_bit_le(bit_pos - 1, d->bitmap)) max_len++; bit_pos++; continue; } - de = &dentry_blk->dentry[bit_pos]; - if (early_match_name(name->len, namehash, de)) { - if (!memcmp(dentry_blk->filename[bit_pos], - name->name, - name->len)) { - *res_page = dentry_page; - goto found; - } - } - if (max_len > *max_slots) { + de = &d->dentry[bit_pos]; + if (early_match_name(name->len, namehash, de) && + !memcmp(d->filename[bit_pos], name->name, name->len)) + goto found; + + if (max_slots && *max_slots >= 0 && max_len > *max_slots) { *max_slots = max_len; max_len = 0; } - /* - * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs has occurred. - */ - f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); + /* remain bug on condition */ + if (unlikely(!de->name_len)) + d->max = -1; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; - kunmap(dentry_page); found: - if (max_len > *max_slots) + if (max_slots && max_len > *max_slots) *max_slots = max_len; return de; } @@ -149,7 +168,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; - int max_slots = 0; + int max_slots; f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); @@ -168,8 +187,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, continue; } - de = find_in_block(dentry_page, name, &max_slots, - namehash, res_page); + de = find_in_block(dentry_page, name, &max_slots, res_page); if (de) break; @@ -201,6 +219,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + if (f2fs_has_inline_dentry(dir)) + return find_in_inline_dir(dir, child, res_page); + if (npages == 0) return NULL; @@ -227,6 +248,9 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) struct f2fs_dir_entry *de; struct f2fs_dentry_block *dentry_blk; + if (f2fs_has_inline_dentry(dir)) + return f2fs_parent_inline_dir(dir, p); + page = get_lock_data_page(dir, 0); if (IS_ERR(page)) return NULL; @@ -247,7 +271,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) de = f2fs_find_entry(dir, qstr, &page); if (de) { res = le32_to_cpu(de->ino); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); } @@ -257,11 +281,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode) { + enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, DATA); + f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -296,36 +322,48 @@ int update_dent_inode(struct inode *inode, const struct qstr *name) return 0; } -static int make_empty_dir(struct inode *inode, - struct inode *parent, struct page *page) +void do_make_empty_dir(struct inode *inode, struct inode *parent, + struct f2fs_dentry_ptr *d) { - struct page *dentry_page; - struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; - dentry_page = get_new_data_page(inode, page, 0, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); - - - dentry_blk = kmap_atomic(dentry_page); - - de = &dentry_blk->dentry[0]; + de = &d->dentry[0]; de->name_len = cpu_to_le16(1); de->hash_code = 0; de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); + memcpy(d->filename[0], ".", 1); set_de_type(de, inode); - de = &dentry_blk->dentry[1]; + de = &d->dentry[1]; de->hash_code = 0; de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); + memcpy(d->filename[1], "..", 2); set_de_type(de, inode); - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(0, (void *)d->bitmap); + test_and_set_bit_le(1, (void *)d->bitmap); +} + +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) +{ + struct page *dentry_page; + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_ptr d; + + if (f2fs_has_inline_dentry(inode)) + return make_empty_inline_dir(inode, parent, page); + + dentry_page = get_new_data_page(inode, page, 0, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + dentry_blk = kmap_atomic(dentry_page); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + do_make_empty_dir(inode, parent, &d); + kunmap_atomic(dentry_blk); set_page_dirty(dentry_page); @@ -333,8 +371,8 @@ static int make_empty_dir(struct inode *inode, return 0; } -static struct page *init_inode_metadata(struct inode *inode, - struct inode *dir, const struct qstr *name) +struct page *init_inode_metadata(struct inode *inode, struct inode *dir, + const struct qstr *name, struct page *dpage) { struct page *page; int err; @@ -350,7 +388,7 @@ static struct page *init_inode_metadata(struct inode *inode, goto error; } - err = f2fs_init_acl(inode, dir, page); + err = f2fs_init_acl(inode, dir, page, dpage); if (err) goto put_error; @@ -395,7 +433,7 @@ error: return ERR_PTR(err); } -static void update_parent_metadata(struct inode *dir, struct inode *inode, +void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { @@ -417,27 +455,23 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } -static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +int room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; next: - zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_start); - if (zero_start >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); + if (zero_start >= max_slots) + return max_slots; - zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - zero_start); + zero_end = find_next_bit_le(bitmap, max_slots, zero_start); if (zero_end - zero_start >= slots) return zero_start; bit_start = zero_end + 1; - if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + if (zero_end + 1 >= max_slots) + return max_slots; goto next; } @@ -463,6 +497,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, int err = 0; int i; + if (f2fs_has_inline_dentry(dir)) { + err = f2fs_add_inline_entry(dir, name, inode); + if (!err || err != -EAGAIN) + return err; + else + err = 0; + } + dentry_hash = f2fs_dentry_hash(name); level = 0; current_depth = F2FS_I(dir)->i_current_depth; @@ -491,7 +533,8 @@ start: return PTR_ERR(dentry_page); dentry_blk = kmap(dentry_page); - bit_pos = room_for_filename(dentry_blk, slots); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; @@ -506,7 +549,7 @@ add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name); + page = init_inode_metadata(inode, dir, name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -545,7 +588,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, NULL); + page = init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -560,26 +603,57 @@ fail: return err; } +void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + down_write(&F2FS_I(inode)->i_sem); + + if (S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + if (page) + update_inode(dir, page); + else + update_inode_page(dir); + } + inode->i_ctime = CURRENT_TIME; + + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + up_write(&F2FS_I(inode)->i_sem); + update_inode_page(inode); + + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); +} + /* * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *inode) + struct inode *dir, struct inode *inode) { struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; - struct inode *dir = page->mapping->host; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; + if (f2fs_has_inline_dentry(dir)) + return f2fs_delete_inline_entry(dentry, page, dir, inode); + lock_page(page); f2fs_wait_on_page_writeback(page, DATA); dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) - test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); /* Let's check and deallocate this dentry page */ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, @@ -590,29 +664,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dir->i_ctime = dir->i_mtime = CURRENT_TIME; - if (inode) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - - down_write(&F2FS_I(inode)->i_sem); - - if (S_ISDIR(inode->i_mode)) { - drop_nlink(dir); - update_inode_page(dir); - } - inode->i_ctime = CURRENT_TIME; - drop_nlink(inode); - if (S_ISDIR(inode->i_mode)) { - drop_nlink(inode); - i_size_write(inode, 0); - } - up_write(&F2FS_I(inode)->i_sem); - update_inode_page(inode); - - if (inode->i_nlink == 0) - add_orphan_inode(sbi, inode->i_ino); - else - release_orphan_inode(sbi); - } + if (inode) + f2fs_drop_nlink(dir, inode, NULL); if (bit_pos == NR_DENTRY_IN_BLOCK) { truncate_hole(dir, page->index, page->index + 1); @@ -628,9 +681,12 @@ bool f2fs_empty_dir(struct inode *dir) unsigned long bidx; struct page *dentry_page; unsigned int bit_pos; - struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); + if (f2fs_has_inline_dentry(dir)) + return f2fs_empty_inline_dir(dir); + for (bidx = 0; bidx < nblock; bidx++) { dentry_page = get_lock_data_page(dir, bidx); if (IS_ERR(dentry_page)) { @@ -640,7 +696,6 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - dentry_blk = kmap_atomic(dentry_page); if (bidx == 0) bit_pos = 2; @@ -659,19 +714,48 @@ bool f2fs_empty_dir(struct inode *dir) return true; } +bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, + unsigned int start_pos) +{ + unsigned char d_type = DT_UNKNOWN; + unsigned int bit_pos; + struct f2fs_dir_entry *de = NULL; + + bit_pos = ((unsigned long)ctx->pos % d->max); + + while (bit_pos < d->max) { + bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); + if (bit_pos >= d->max) + break; + + de = &d->dentry[bit_pos]; + if (de->file_type < F2FS_FT_MAX) + d_type = f2fs_filetype_table[de->file_type]; + else + d_type = DT_UNKNOWN; + if (!dir_emit(ctx, d->filename[bit_pos], + le16_to_cpu(de->name_len), + le32_to_cpu(de->ino), d_type)) + return true; + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + ctx->pos = start_pos + bit_pos; + } + return false; +} + static int f2fs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); - unsigned int bit_pos = 0; struct f2fs_dentry_block *dentry_blk = NULL; - struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); - unsigned char d_type = DT_UNKNOWN; + struct f2fs_dentry_ptr d; - bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); + if (f2fs_has_inline_dentry(inode)) + return f2fs_read_inline_dir(file, ctx); /* readahead for multi pages of dir */ if (npages - n > 1 && !ra_has_index(ra, n)) @@ -684,28 +768,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) continue; dentry_blk = kmap(dentry_page); - while (bit_pos < NR_DENTRY_IN_BLOCK) { - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_pos); - if (bit_pos >= NR_DENTRY_IN_BLOCK) - break; - - de = &dentry_blk->dentry[bit_pos]; - if (de->file_type < F2FS_FT_MAX) - d_type = f2fs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; - if (!dir_emit(ctx, - dentry_blk->filename[bit_pos], - le16_to_cpu(de->name_len), - le32_to_cpu(de->ino), d_type)) - goto stop; - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos; - } - bit_pos = 0; + make_dentry_ptr(&d, (void *)dentry_blk, 1); + + if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK)) + goto stop; + ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8171e80b2ee..ec58bb2373f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -46,8 +46,10 @@ #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 #define F2FS_MOUNT_INLINE_XATTR 0x00000080 #define F2FS_MOUNT_INLINE_DATA 0x00000100 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 -#define F2FS_MOUNT_NOBARRIER 0x00000400 +#define F2FS_MOUNT_INLINE_DENTRY 0x00000200 +#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 +#define F2FS_MOUNT_NOBARRIER 0x00000800 +#define F2FS_MOUNT_FASTBOOT 0x00001000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -211,6 +213,32 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, /* * For INODE and NODE manager */ +/* for directory operations */ +struct f2fs_dentry_ptr { + const void *bitmap; + struct f2fs_dir_entry *dentry; + __u8 (*filename)[F2FS_SLOT_LEN]; + int max; +}; + +static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d, + void *src, int type) +{ + if (type == 1) { + struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; + d->max = NR_DENTRY_IN_BLOCK; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } else { + struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; + d->max = NR_INLINE_DENTRY; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } +} + /* * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 * as its node offset to distinguish from index node blocks. @@ -269,6 +297,7 @@ struct f2fs_inode_info { struct extent_info ext; /* in-memory extent cache entry */ struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ }; @@ -303,7 +332,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ @@ -433,6 +462,7 @@ enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_NODES, F2FS_DIRTY_META, + F2FS_INMEM_PAGES, NR_COUNT_TYPE, }; @@ -470,6 +500,14 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ }; +/* for inner inode cache management */ +struct inode_management { + struct radix_tree_root ino_root; /* ino entry array */ + spinlock_t ino_lock; /* for ino entry lock */ + struct list_head ino_list; /* inode list head */ + unsigned long ino_num; /* number of entries */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -488,7 +526,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ - struct completion *wait_io; /* for completion bios */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ @@ -500,13 +537,9 @@ struct f2fs_sb_info { bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; - /* for inode management */ - struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ - spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ - struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ + struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ /* for orphan inode, use 0'th array */ - unsigned int n_orphans; /* # of orphan inodes */ unsigned int max_orphans; /* max orphan inodes */ /* for directory inode management */ @@ -557,7 +590,8 @@ struct f2fs_sb_info { unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ - int inline_inode; /* # of inline_data inodes */ + atomic_t inline_inode; /* # of inline_data inodes */ + atomic_t inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ unsigned int n_dirty_dirs; /* # of dir inodes */ #endif @@ -988,6 +1022,13 @@ retry: return entry; } +static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + while (radix_tree_insert(root, index, item)) + cond_resched(); +} + #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) static inline bool IS_INODE(struct page *page) @@ -1020,7 +1061,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) return mask & *addr; } -static inline int f2fs_set_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1032,7 +1073,7 @@ static inline int f2fs_set_bit(unsigned int nr, char *addr) return ret; } -static inline int f2fs_clear_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1044,6 +1085,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) return ret; } +static inline void f2fs_change_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr ^= mask; +} + /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -1057,11 +1107,13 @@ enum { FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ FI_APPEND_WRITE, /* inode has appended data */ FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_DATA_EXIST, /* indicate data exists */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1087,15 +1139,6 @@ static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) set_inode_flag(fi, FI_ACL_MODE); } -static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) -{ - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - clear_inode_flag(fi, FI_ACL_MODE); - return 1; - } - return 0; -} - static inline void get_inline_info(struct f2fs_inode_info *fi, struct f2fs_inode *ri) { @@ -1103,6 +1146,10 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_XATTR); if (ri->i_inline & F2FS_INLINE_DATA) set_inode_flag(fi, FI_INLINE_DATA); + if (ri->i_inline & F2FS_INLINE_DENTRY) + set_inode_flag(fi, FI_INLINE_DENTRY); + if (ri->i_inline & F2FS_DATA_EXIST) + set_inode_flag(fi, FI_DATA_EXIST); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1114,6 +1161,10 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_XATTR; if (is_inode_flag_set(fi, FI_INLINE_DATA)) ri->i_inline |= F2FS_INLINE_DATA; + if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) + ri->i_inline |= F2FS_INLINE_DENTRY; + if (is_inode_flag_set(fi, FI_DATA_EXIST)) + ri->i_inline |= F2FS_DATA_EXIST; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1148,6 +1199,17 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); } +static inline void f2fs_clear_inline_inode(struct inode *inode) +{ + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST); +} + +static inline int f2fs_exist_data(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1164,6 +1226,23 @@ static inline void *inline_data_addr(struct page *page) return (void *)&(ri->i_addr[1]); } +static inline int f2fs_has_inline_dentry(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); +} + +static inline void *inline_dentry_addr(struct page *page) +{ + struct f2fs_inode *ri = F2FS_INODE(page); + return (void *)&(ri->i_addr[1]); +} + +static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) +{ + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; @@ -1224,6 +1303,19 @@ struct dentry *f2fs_get_parent(struct dentry *child); /* * dir.c */ +extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; +void set_de_type(struct f2fs_dir_entry *, struct inode *); +struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, + struct f2fs_dentry_ptr *); +bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, + unsigned int); +void do_make_empty_dir(struct inode *, struct inode *, + struct f2fs_dentry_ptr *); +struct page *init_inode_metadata(struct inode *, struct inode *, + const struct qstr *, struct page *); +void update_parent_metadata(struct inode *, struct inode *, unsigned int); +int room_for_filename(const void *, int, int); +void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); @@ -1232,7 +1324,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); -void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, + struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); bool f2fs_empty_dir(struct inode *); @@ -1296,6 +1389,7 @@ void destroy_node_manager_caches(void); * segment.c */ void register_inmem_page(struct inode *, struct page *); +void invalidate_inmem_page(struct inode *, struct page *); void commit_inmem_pages(struct inode *, bool); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); @@ -1337,8 +1431,8 @@ void destroy_segment_manager_caches(void); */ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); +void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); @@ -1405,7 +1499,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, sits, fnids; int total_count, utilization; - int bg_gc, inline_inode; + int bg_gc, inline_inode, inline_dir, inmem_pages; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -1438,14 +1532,23 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode++); \ + (atomic_inc(&F2FS_I_SB(inode)->inline_inode)); \ } while (0) #define stat_dec_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode--); \ + (atomic_dec(&F2FS_I_SB(inode)->inline_inode)); \ + } while (0) +#define stat_inc_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + (atomic_inc(&F2FS_I_SB(inode)->inline_dir)); \ + } while (0) +#define stat_dec_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) - #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -1492,6 +1595,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_read_hit(sb) #define stat_inc_inline_inode(inode) #define stat_dec_inline_inode(inode) +#define stat_inc_inline_dir(inode) +#define stat_dec_inline_dir(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) #define stat_inc_seg_count(si, type) @@ -1519,9 +1624,20 @@ extern const struct inode_operations f2fs_special_inode_operations; * inline.c */ bool f2fs_may_inline(struct inode *); +void read_inline_data(struct page *, struct page *); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); -int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); -void truncate_inline_data(struct inode *, u64); +int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); +int f2fs_convert_inline_inode(struct inode *); +int f2fs_write_inline_data(struct inode *, struct page *); +void truncate_inline_data(struct page *, u64); bool recover_inline_data(struct inode *, struct page *); +struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); +int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, + struct inode *, struct inode *); +bool f2fs_empty_inline_dir(struct inode *); +int f2fs_read_inline_dir(struct file *, struct dir_context *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8e68bb64f83..3c27e0ecb3b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -41,18 +41,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, sb_start_pagefault(inode->i_sb); - /* force to convert with normal data indices */ - err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); - if (err) - goto out; + f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); /* block allocation */ f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_reserve_block(&dn, page->index); - f2fs_unlock_op(sbi); - if (err) + if (err) { + f2fs_unlock_op(sbi); goto out; + } + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); file_update_time(vma->vm_file); lock_page(page); @@ -130,10 +130,45 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) need_cp = true; + else if (test_opt(sbi, FASTBOOT)) + need_cp = true; + else if (sbi->active_logs == 2) + need_cp = true; return need_cp; } +static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + bool ret = false; + /* But we need to avoid that there are some inode updates */ + if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) + ret = true; + f2fs_put_page(i, 0); + return ret; +} + +static void try_to_fix_pino(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + nid_t pino; + + down_write(&fi->i_sem); + fi->xattr_ver = 0; + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + fi->i_pino = pino; + file_got_pino(inode); + up_write(&fi->i_sem); + + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + } else { + up_write(&fi->i_sem); + } +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -164,19 +199,21 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return ret; } + /* if the inode is dirty, let's recover all the time */ + if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { + update_inode_page(inode); + goto go_write; + } + /* * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { - struct page *i = find_get_page(NODE_MAPPING(sbi), ino); - /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { - f2fs_put_page(i, 0); + /* it may call write_inode just prior to fsync */ + if (need_inode_page_update(sbi, ino)) goto go_write; - } - f2fs_put_page(i, 0); if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) @@ -196,49 +233,36 @@ go_write: up_read(&fi->i_sem); if (need_cp) { - nid_t pino; - /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); - down_write(&fi->i_sem); - F2FS_I(inode)->xattr_ver = 0; - if (file_wrong_pino(inode) && inode->i_nlink == 1 && - get_parent_ino(inode, &pino)) { - F2FS_I(inode)->i_pino = pino; - file_got_pino(inode); - up_write(&fi->i_sem); - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - } else { - up_write(&fi->i_sem); - } - } else { + /* + * We've secured consistency through sync_fs. Following pino + * will be used only for fsynced inodes after checkpoint. + */ + try_to_fix_pino(inode); + goto out; + } sync_nodes: - sync_node_pages(sbi, ino, &wbc); - - if (need_inode_block_update(sbi, ino)) { - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - goto sync_nodes; - } + sync_node_pages(sbi, ino, &wbc); - ret = wait_on_node_pages_writeback(sbi, ino); - if (ret) - goto out; + if (need_inode_block_update(sbi, ino)) { + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + goto sync_nodes; + } + + ret = wait_on_node_pages_writeback(sbi, ino); + if (ret) + goto out; - /* once recovery info is written, don't need to tack this */ - remove_dirty_inode(sbi, ino, APPEND_INO); - clear_inode_flag(fi, FI_APPEND_WRITE); + /* once recovery info is written, don't need to tack this */ + remove_dirty_inode(sbi, ino, APPEND_INO); + clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: - remove_dirty_inode(sbi, ino, UPDATE_INO); - clear_inode_flag(fi, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(F2FS_I_SB(inode)); - } + remove_dirty_inode(sbi, ino, UPDATE_INO); + clear_inode_flag(fi, FI_UPDATE_WRITE); + ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; @@ -296,7 +320,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; /* handle inline data case */ - if (f2fs_has_inline_data(inode)) { + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { if (whence == SEEK_HOLE) data_ofs = isize; goto found; @@ -374,6 +398,15 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct inode *inode = file_inode(file); + + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; return 0; @@ -415,20 +448,17 @@ void truncate_data_blocks(struct dnode_of_data *dn) truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); } -static void truncate_partial_data_page(struct inode *inode, u64 from) +static int truncate_partial_data_page(struct inode *inode, u64 from) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; - if (f2fs_has_inline_data(inode)) - return truncate_inline_data(inode, from); - if (!offset) - return; + return 0; page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); if (IS_ERR(page)) - return; + return 0; lock_page(page); if (unlikely(!PageUptodate(page) || @@ -438,9 +468,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); set_page_dirty(page); - out: f2fs_put_page(page, 1); + return 0; } int truncate_blocks(struct inode *inode, u64 from, bool lock) @@ -450,27 +480,33 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; + struct page *ipage; trace_f2fs_truncate_blocks_enter(inode, from); - if (f2fs_has_inline_data(inode)) - goto done; - free_from = (pgoff_t) - ((from + blocksize - 1) >> (sbi->log_blocksize)); + ((from + blocksize - 1) >> (sbi->log_blocksize)); if (lock) f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } + + if (f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - if (lock) - f2fs_unlock_op(sbi); - trace_f2fs_truncate_blocks_exit(inode, err); - return err; + goto out; } count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); @@ -486,11 +522,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); +out: if (lock) f2fs_unlock_op(sbi); -done: + /* lastly zero out the first data page */ - truncate_partial_data_page(inode, from); + if (!err) + err = truncate_partial_data_page(inode, from); trace_f2fs_truncate_blocks_exit(inode, err); return err; @@ -504,6 +542,12 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); + /* we should check inline_data size */ + if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) { + if (f2fs_convert_inline_inode(inode)) + return; + } + if (!truncate_blocks(inode, i_size_read(inode), true)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); @@ -561,10 +605,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (attr->ia_valid & ATTR_SIZE) { - err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); - if (err) - return err; - if (attr->ia_size != i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); @@ -665,9 +705,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) if (offset >= inode->i_size) return ret; - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -721,9 +763,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - ret = f2fs_convert_inline_data(inode, offset + len, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -874,7 +918,15 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); - return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); + return f2fs_convert_inline_inode(inode); +} + +static int f2fs_release_file(struct inode *inode, struct file *filp) +{ + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + commit_inmem_pages(inode, true); + return 0; } static int f2fs_ioc_commit_atomic_write(struct file *filp) @@ -908,7 +960,8 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) return -EACCES; set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - return 0; + + return f2fs_convert_inline_inode(inode); } static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) @@ -985,6 +1038,7 @@ const struct file_operations f2fs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .open = generic_file_open, + .release = f2fs_release_file, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2a8f4acdb86..eec0933a481 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -96,8 +96,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) dev_t dev = sbi->sb->s_bdev->bd_dev; int err = 0; - if (!test_opt(sbi, BG_GC)) - goto out; gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); if (!gc_th) { err = -ENOMEM; @@ -340,34 +338,39 @@ static const struct victim_selection default_v_ops = { .get_victim = get_victim_by_default, }; -static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) { struct inode_entry *ie; - list_for_each_entry(ie, ilist, list) - if (ie->inode->i_ino == ino) - return ie->inode; + ie = radix_tree_lookup(&gc_list->iroot, ino); + if (ie) + return ie->inode; return NULL; } -static void add_gc_inode(struct inode *inode, struct list_head *ilist) +static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) { struct inode_entry *new_ie; - if (inode == find_gc_inode(inode->i_ino, ilist)) { + if (inode == find_gc_inode(gc_list, inode->i_ino)) { iput(inode); return; } - new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; - list_add_tail(&new_ie->list, ilist); +retry: + if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { + cond_resched(); + goto retry; + } + list_add_tail(&new_ie->list, &gc_list->ilist); } -static void put_gc_inode(struct list_head *ilist) +static void put_gc_inode(struct gc_inode_list *gc_list) { struct inode_entry *ie, *next_ie; - list_for_each_entry_safe(ie, next_ie, ilist, list) { + list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { + radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); kmem_cache_free(winode_slab, ie); @@ -553,7 +556,7 @@ out: * the victim data block is ignored. */ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, - struct list_head *ilist, unsigned int segno, int gc_type) + struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; struct f2fs_summary *entry; @@ -605,27 +608,27 @@ next_step: data_page = find_data_page(inode, start_bidx + ofs_in_node, false); - if (IS_ERR(data_page)) - goto next_iput; + if (IS_ERR(data_page)) { + iput(inode); + continue; + } f2fs_put_page(data_page, 0); - add_gc_inode(inode, ilist); - } else { - inode = find_gc_inode(dni.ino, ilist); - if (inode) { - start_bidx = start_bidx_of_node(nofs, - F2FS_I(inode)); - data_page = get_lock_data_page(inode, + add_gc_inode(gc_list, inode); + continue; + } + + /* phase 3 */ + inode = find_gc_inode(gc_list, dni.ino); + if (inode) { + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + data_page = get_lock_data_page(inode, start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; - move_data_page(inode, data_page, gc_type); - stat_inc_data_blk_count(sbi, 1); - } + if (IS_ERR(data_page)) + continue; + move_data_page(inode, data_page, gc_type); + stat_inc_data_blk_count(sbi, 1); } - continue; -next_iput: - iput(inode); } if (++phase < 4) @@ -646,18 +649,20 @@ next_iput: } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type, int type) + int gc_type) { struct sit_info *sit_i = SIT_I(sbi); int ret; + mutex_lock(&sit_i->sentry_lock); - ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, + NO_CHECK_TYPE, LFS); mutex_unlock(&sit_i->sentry_lock); return ret; } static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, - struct list_head *ilist, int gc_type) + struct gc_inode_list *gc_list, int gc_type) { struct page *sum_page; struct f2fs_summary_block *sum; @@ -675,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, gc_node_segment(sbi, sum->entries, segno, gc_type); break; case SUM_TYPE_DATA: - gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); break; } blk_finish_plug(&plug); @@ -688,16 +693,18 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, int f2fs_gc(struct f2fs_sb_info *sbi) { - struct list_head ilist; unsigned int segno, i; int gc_type = BG_GC; int nfree = 0; int ret = -1; - struct cp_control cpc = { - .reason = CP_SYNC, + struct cp_control cpc; + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(GFP_NOFS), }; - INIT_LIST_HEAD(&ilist); + cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; + gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; @@ -709,7 +716,7 @@ gc_more: write_checkpoint(sbi, &cpc); } - if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + if (!__get_victim(sbi, &segno, gc_type)) goto stop; ret = 0; @@ -719,7 +726,7 @@ gc_more: META_SSA); for (i = 0; i < sbi->segs_per_sec; i++) - do_garbage_collect(sbi, segno + i, &ilist, gc_type); + do_garbage_collect(sbi, segno + i, &gc_list, gc_type); if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -735,7 +742,7 @@ gc_more: stop: mutex_unlock(&sbi->gc_mutex); - put_gc_inode(&ilist); + put_gc_inode(&gc_list); return ret; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 16f0b2b2299..6ff7ad38463 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -40,6 +40,11 @@ struct inode_entry { struct inode *inode; }; +struct gc_inode_list { + struct list_head ilist; + struct radix_tree_root iroot; +}; + /* * inline functions */ diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 88036fd7579..f2d3c581e77 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -15,35 +15,44 @@ bool f2fs_may_inline(struct inode *inode) { - block_t nr_blocks; - loff_t i_size; - if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) return false; if (f2fs_is_atomic_file(inode)) return false; - nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; - if (inode->i_blocks > nr_blocks) + if (!S_ISREG(inode->i_mode)) return false; - i_size = i_size_read(inode); - if (i_size > MAX_INLINE_DATA) + if (i_size_read(inode) > MAX_INLINE_DATA) return false; return true; } -int f2fs_read_inline_data(struct inode *inode, struct page *page) +void read_inline_data(struct page *page, struct page *ipage) { - struct page *ipage; void *src_addr, *dst_addr; - if (page->index) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - goto out; - } + if (PageUptodate(page)) + return; + + f2fs_bug_on(F2FS_P_SB(page), page->index); + + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + + /* Copy the whole inline data block */ + src_addr = inline_data_addr(ipage); + dst_addr = kmap_atomic(page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + flush_dcache_page(page); + kunmap_atomic(dst_addr); + SetPageUptodate(page); +} + +int f2fs_read_inline_data(struct inode *inode, struct page *page) +{ + struct page *ipage; ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { @@ -51,112 +60,116 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) return PTR_ERR(ipage); } - zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + if (!f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + return -EAGAIN; + } - /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); - f2fs_put_page(ipage, 1); + if (page->index) + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + else + read_inline_data(page, ipage); -out: SetPageUptodate(page); + f2fs_put_page(ipage, 1); unlock_page(page); - return 0; } -static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) +int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { - int err = 0; - struct page *ipage; - struct dnode_of_data dn; void *src_addr, *dst_addr; block_t new_blk_addr; - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_io_info fio = { .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, }; + int dirty, err; - f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto out; - } + f2fs_bug_on(F2FS_I_SB(dn->inode), page->index); - /* someone else converted inline_data already */ - if (!f2fs_has_inline_data(inode)) - goto out; + if (!f2fs_exist_data(dn->inode)) + goto clear_out; - /* - * i_addr[0] is not used for inline data, - * so reserving new block will not destroy inline data - */ - set_new_dnode(&dn, inode, ipage, NULL, 0); - err = f2fs_reserve_block(&dn, 0); + err = f2fs_reserve_block(dn, 0); if (err) - goto out; + return err; f2fs_wait_on_page_writeback(page, DATA); + + if (PageUptodate(page)) + goto no_update; + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); + src_addr = inline_data_addr(dn->inode_page); + dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); + flush_dcache_page(page); + kunmap_atomic(dst_addr); SetPageUptodate(page); +no_update: + /* clear dirty state */ + dirty = clear_page_dirty_for_io(page); /* write data page to try to make data consistent */ set_page_writeback(page); - write_data_page(page, &dn, &new_blk_addr, &fio); - update_extent_cache(new_blk_addr, &dn); + + write_data_page(page, dn, &new_blk_addr, &fio); + update_extent_cache(new_blk_addr, dn); f2fs_wait_on_page_writeback(page, DATA); + if (dirty) + inode_dec_dirty_pages(dn->inode); - /* clear inline data and flag after data writeback */ - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_dec_inline_inode(inode); + /* this converted inline_data should be recovered. */ + set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); - sync_inode_page(&dn); - f2fs_put_dnode(&dn); -out: - f2fs_unlock_op(sbi); - return err; + /* clear inline data and flag after data writeback */ + truncate_inline_data(dn->inode_page, 0); +clear_out: + stat_dec_inline_inode(dn->inode); + f2fs_clear_inline_inode(dn->inode); + sync_inode_page(dn); + f2fs_put_dnode(dn); + return 0; } -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, - struct page *page) +int f2fs_convert_inline_inode(struct inode *inode) { - struct page *new_page = page; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + struct page *ipage, *page; + int err = 0; - if (!f2fs_has_inline_data(inode)) - return 0; - else if (to_size <= MAX_INLINE_DATA) - return 0; + page = grab_cache_page(inode->i_mapping, 0); + if (!page) + return -ENOMEM; + + f2fs_lock_op(sbi); - if (!page || page->index != 0) { - new_page = grab_cache_page(inode->i_mapping, 0); - if (!new_page) - return -ENOMEM; + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; } - err = __f2fs_convert_inline_data(inode, new_page); - if (!page || page->index != 0) - f2fs_put_page(new_page, 1); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) + err = f2fs_convert_inline_page(&dn, page); + + f2fs_put_dnode(&dn); +out: + f2fs_unlock_op(sbi); + + f2fs_put_page(page, 1); return err; } -int f2fs_write_inline_data(struct inode *inode, - struct page *page, unsigned size) +int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; - struct page *ipage; struct dnode_of_data dn; int err; @@ -164,47 +177,39 @@ int f2fs_write_inline_data(struct inode *inode, err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; - ipage = dn.inode_page; - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - src_addr = kmap(page); - dst_addr = inline_data_addr(ipage); - memcpy(dst_addr, src_addr, size); - kunmap(page); - - /* Release the first data block if it is allocated */ if (!f2fs_has_inline_data(inode)) { - truncate_data_blocks_range(&dn, 1); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_inc_inline_inode(inode); + f2fs_put_dnode(&dn); + return -EAGAIN; } + f2fs_bug_on(F2FS_I_SB(inode), page->index); + + f2fs_wait_on_page_writeback(dn.inode_page, NODE); + src_addr = kmap_atomic(page); + dst_addr = inline_data_addr(dn.inode_page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + kunmap_atomic(src_addr); + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); f2fs_put_dnode(&dn); - return 0; } -void truncate_inline_data(struct inode *inode, u64 from) +void truncate_inline_data(struct page *ipage, u64 from) { - struct page *ipage; + void *addr; if (from >= MAX_INLINE_DATA) return; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return; - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET + from, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + addr = inline_data_addr(ipage); + memset(addr + from, 0, MAX_INLINE_DATA - from); } bool recover_inline_data(struct inode *inode, struct page *npage) @@ -236,6 +241,10 @@ process_inline: src_addr = inline_data_addr(npage); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + update_inode(inode, ipage); f2fs_put_page(ipage, 1); return true; @@ -244,16 +253,279 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + truncate_inline_data(ipage, 0); + f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { truncate_blocks(inode, 0, false); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } return false; } + +struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, + struct qstr *name, struct page **res_page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_inline_dentry *inline_dentry; + struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; + struct page *ipage; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + inline_dentry = inline_data_addr(ipage); + + make_dentry_ptr(&d, (void *)inline_dentry, 2); + de = find_target_dentry(name, NULL, &d); + + unlock_page(ipage); + if (de) + *res_page = ipage; + else + f2fs_put_page(ipage, 0); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(sbi, d.max < 0); + return de; +} + +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir, + struct page **p) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + struct f2fs_dir_entry *de; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + dentry_blk = inline_data_addr(ipage); + de = &dentry_blk->dentry[1]; + *p = ipage; + unlock_page(ipage); + return de; +} + +int make_empty_inline_dir(struct inode *inode, struct inode *parent, + struct page *ipage) +{ + struct f2fs_inline_dentry *dentry_blk; + struct f2fs_dentry_ptr d; + + dentry_blk = inline_data_addr(ipage); + + make_dentry_ptr(&d, (void *)dentry_blk, 2); + do_make_empty_dir(inode, parent, &d); + + set_page_dirty(ipage); + + /* update i_size to MAX_INLINE_DATA */ + if (i_size_read(inode) < MAX_INLINE_DATA) { + i_size_write(inode, MAX_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + } + return 0; +} + +static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, + struct f2fs_inline_dentry *inline_dentry) +{ + struct page *page; + struct dnode_of_data dn; + struct f2fs_dentry_block *dentry_blk; + int err; + + page = grab_cache_page(dir->i_mapping, 0); + if (!page) + return -ENOMEM; + + set_new_dnode(&dn, dir, ipage, NULL, 0); + err = f2fs_reserve_block(&dn, 0); + if (err) + goto out; + + f2fs_wait_on_page_writeback(page, DATA); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + + dentry_blk = kmap_atomic(page); + + /* copy data from inline dentry block to new dentry block */ + memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, + INLINE_DENTRY_BITMAP_SIZE); + memcpy(dentry_blk->dentry, inline_dentry->dentry, + sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); + memcpy(dentry_blk->filename, inline_dentry->filename, + NR_INLINE_DENTRY * F2FS_SLOT_LEN); + + kunmap_atomic(dentry_blk); + SetPageUptodate(page); + set_page_dirty(page); + + /* clear inline dir and flag after data writeback */ + truncate_inline_data(ipage, 0); + + stat_dec_inline_dir(dir); + clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); + + if (i_size_read(dir) < PAGE_CACHE_SIZE) { + i_size_write(dir, PAGE_CACHE_SIZE); + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + + sync_inode_page(&dn); +out: + f2fs_put_page(page, 1); + return err; +} + +int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, + struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos; + f2fs_hash_t name_hash; + struct f2fs_dir_entry *de; + size_t namelen = name->len; + struct f2fs_inline_dentry *dentry_blk = NULL; + int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; + int err = 0; + int i; + + name_hash = f2fs_dentry_hash(name); + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + dentry_blk = inline_data_addr(ipage); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_INLINE_DENTRY); + if (bit_pos >= NR_INLINE_DENTRY) { + err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); + if (!err) + err = -EAGAIN; + goto out; + } + + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, ipage); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + + f2fs_wait_on_page_writeback(ipage, NODE); + de = &dentry_blk->dentry[bit_pos]; + de->hash_code = name_hash; + de->name_len = cpu_to_le16(namelen); + memcpy(dentry_blk->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + set_page_dirty(ipage); + + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, 0); +fail: + up_write(&F2FS_I(inode)->i_sem); + + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { + update_inode(dir, ipage); + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } +out: + f2fs_put_page(ipage, 1); + return err; +} + +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *dir, struct inode *inode) +{ + struct f2fs_inline_dentry *inline_dentry; + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + unsigned int bit_pos; + int i; + + lock_page(page); + f2fs_wait_on_page_writeback(page, NODE); + + inline_dentry = inline_data_addr(page); + bit_pos = dentry - inline_dentry->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, + &inline_dentry->dentry_bitmap); + + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode) + f2fs_drop_nlink(dir, inode, page); + + f2fs_put_page(page, 1); +} + +bool f2fs_empty_inline_dir(struct inode *dir) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos = 2; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return false; + + dentry_blk = inline_data_addr(ipage); + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + bit_pos); + + f2fs_put_page(ipage, 1); + + if (bit_pos < NR_INLINE_DENTRY) + return false; + + return true; +} + +int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) +{ + struct inode *inode = file_inode(file); + struct f2fs_inline_dentry *inline_dentry = NULL; + struct page *ipage = NULL; + struct f2fs_dentry_ptr d; + + if (ctx->pos == NR_INLINE_DENTRY) + return 0; + + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + inline_dentry = inline_data_addr(ipage); + + make_dentry_ptr(&d, (void *)inline_dentry, 2); + + if (!f2fs_fill_dentries(ctx, &d, 0)) + ctx->pos = NR_INLINE_DENTRY; + + f2fs_put_page(ipage, 1); + return 0; +} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0deead4505e..196cc7843aa 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -67,12 +67,38 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } +static int __recover_inline_status(struct inode *inode, struct page *ipage) +{ + void *inline_data = inline_data_addr(ipage); + struct f2fs_inode *ri; + void *zbuf; + + zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); + if (!zbuf) + return -ENOMEM; + + if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { + kfree(zbuf); + return 0; + } + kfree(zbuf); + + f2fs_wait_on_page_writeback(ipage, NODE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + + ri = F2FS_INODE(ipage); + set_raw_inline(F2FS_I(inode), ri); + set_page_dirty(ipage); + return 0; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; + int err = 0; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -114,11 +140,19 @@ static int do_read_inode(struct inode *inode) get_extent_info(&fi->ext, ri->i_ext); get_inline_info(fi, ri); + /* check data exist */ + if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) + err = __recover_inline_status(inode, node_page); + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); f2fs_put_page(node_page, 1); - return 0; + + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + + return err; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) @@ -156,7 +190,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; @@ -295,11 +329,12 @@ void f2fs_evict_inode(struct inode *inode) f2fs_lock_op(sbi); remove_inode_page(inode); - stat_dec_inline_inode(inode); f2fs_unlock_op(sbi); sb_end_intwrite(inode->i_sb); no_delete: + stat_dec_inline_dir(inode); + stat_dec_inline_inode(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); @@ -325,8 +360,9 @@ void handle_failed_inode(struct inode *inode) f2fs_truncate(inode); remove_inode_page(inode); - stat_dec_inline_inode(inode); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0d2526e5aa1..547a2deeb1a 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -54,6 +54,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; goto out; } + + if (f2fs_may_inline(inode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + trace_f2fs_new_inode(inode, 0); mark_inode_dirty(inode); return inode; @@ -129,8 +135,12 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); + stat_inc_inline_inode(inode); d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -157,6 +167,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_unlock_op(sbi); d_instantiate(dentry, inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); @@ -187,14 +200,12 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, de = f2fs_find_entry(dir, &dentry->d_name, &page); if (de) { nid_t ino = le32_to_cpu(de->ino); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); - - stat_inc_inline_inode(inode); } return d_splice_alias(inode, dentry); @@ -219,15 +230,18 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); goto fail; } - f2fs_delete_entry(de, page, inode); + f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); fail: trace_f2fs_unlink_exit(inode, err); return err; @@ -261,6 +275,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return err; out: handle_failed_inode(inode); @@ -291,11 +308,14 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; f2fs_unlock_op(sbi); + stat_inc_inline_dir(inode); alloc_nid_done(sbi, inode->i_ino); d_instantiate(dentry, inode); unlock_new_inode(inode); + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_fail: @@ -338,8 +358,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -435,7 +459,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_ctime = CURRENT_TIME; mark_inode_dirty(old_inode); - f2fs_delete_entry(old_entry, old_page, NULL); + f2fs_delete_entry(old_entry, old_page, old_dir, NULL); if (old_dir_entry) { if (old_dir != new_dir) { @@ -443,7 +467,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_page, new_dir); update_inode_page(old_inode); } else { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); @@ -452,19 +476,22 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; put_out_dir: f2fs_unlock_op(sbi); - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_old: - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; @@ -588,6 +615,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(new_dir); f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_undo: /* Still we may fail to recover name info of f2fs_inode here */ @@ -596,19 +626,19 @@ out_unlock: f2fs_unlock_op(sbi); out_new_dir: if (new_dir_entry) { - kunmap(new_dir_page); + f2fs_dentry_kunmap(new_inode, new_dir_page); f2fs_put_page(new_dir_page, 0); } out_old_dir: if (old_dir_entry) { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_new: - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_old: - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 44b8afef43d..f83326ca32e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; + unsigned long avail_ram; unsigned long mem_size = 0; bool res = false; si_meminfo(&val); - /* give 25%, 25%, 50% memory for each components respectively */ + + /* only uses low memory */ + avail_ram = val.totalram - val.totalhigh; + + /* give 25%, 25%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->dirty_exceeded) return false; mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INO_ENTRIES) { + int i; + + if (sbi->sb->s_bdi->dirty_exceeded) + return false; + for (i = 0; i <= UPDATE_INO; i++) + mem_size += (sbi->im[i].ino_num * + sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } return res; } @@ -131,7 +147,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) return; -retry: + head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); @@ -140,11 +156,7 @@ retry: INIT_LIST_HEAD(&head->set_list); head->set = set; head->entry_cnt = 0; - - if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { - cond_resched(); - goto retry; - } + f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } list_move_tail(&ne->list, &head->entry_list); nm_i->dirty_nat_cnt++; @@ -155,7 +167,7 @@ retry: static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, struct nat_entry *ne) { - nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; + nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; head = radix_tree_lookup(&nm_i->nat_set_root, set); @@ -180,11 +192,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -194,11 +206,11 @@ bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool fsynced = false; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) fsynced = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return fsynced; } @@ -208,13 +220,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need_update; } @@ -222,13 +234,8 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; - new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); memset(new, 0, sizeof(struct nat_entry)); nat_set_nid(new, nid); nat_reset_flag(new); @@ -241,18 +248,14 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, struct f2fs_nat_entry *ne) { struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } node_info_from_raw_nat(&e->ni, ne); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -260,15 +263,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } e->ni = *ni; f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -310,7 +309,7 @@ retry: set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -320,7 +319,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) if (available_free_memory(sbi, NAT_ENTRIES)) return 0; - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; ne = list_first_entry(&nm_i->nat_entries, @@ -328,7 +327,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); return nr_shrink; } @@ -351,14 +350,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); } - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (e) return; @@ -1298,16 +1297,22 @@ static int f2fs_write_node_page(struct page *page, return 0; } - if (wbc->for_reclaim) - goto redirty_out; - - down_read(&sbi->node_write); + if (wbc->for_reclaim) { + if (!down_read_trylock(&sbi->node_write)) + goto redirty_out; + } else { + down_read(&sbi->node_write); + } set_page_writeback(page); write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, NODE, WRITE); + return 0; redirty_out: @@ -1410,13 +1415,13 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) if (build) { /* do not add allocated nids */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (allocated) return 0; } @@ -1425,15 +1430,22 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i->nid = nid; i->state = NID_NEW; + if (radix_tree_preload(GFP_NOFS)) { + kmem_cache_free(free_nid_slab, i); + return 0; + } + spin_lock(&nm_i->free_nid_list_lock); if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); kmem_cache_free(free_nid_slab, i); return 0; } list_add_tail(&i->list, &nm_i->free_nid_list); nm_i->fcnt++; spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); return 1; } @@ -1804,21 +1816,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); raw_ne = nat_in_journal(sum, i); -retry: - write_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, nid); - if (ne) - goto found; - ne = grab_nat_entry(nm_i, nid); + down_write(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; + ne = grab_nat_entry(nm_i, nid); + node_info_from_raw_nat(&ne->ni, &raw_ne); } - node_info_from_raw_nat(&ne->ni, &raw_ne); -found: __set_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); @@ -1889,10 +1895,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, } raw_nat_from_node_info(raw_ne, &ne->ni); - write_lock(&NM_I(sbi)->nat_tree_lock); + down_write(&NM_I(sbi)->nat_tree_lock); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - write_unlock(&NM_I(sbi)->nat_tree_lock); + up_write(&NM_I(sbi)->nat_tree_lock); if (nat_get_blkaddr(ne) == NULL_ADDR) add_free_nid(sbi, nid, false); @@ -1903,10 +1909,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, else f2fs_put_page(page, 1); - if (!set->entry_cnt) { - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - kmem_cache_free(nat_entry_set_slab, set); - } + f2fs_bug_on(sbi, set->entry_cnt); + + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + kmem_cache_free(nat_entry_set_slab, set); } /* @@ -1923,6 +1929,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t set_idx = 0; LIST_HEAD(sets); + if (!nm_i->dirty_nat_cnt) + return; /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them @@ -1931,9 +1939,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); - if (!nm_i->dirty_nat_cnt) - return; - while ((found = __gang_lookup_nat_set(nm_i, set_idx, NATVEC_SIZE, setvec))) { unsigned idx; @@ -1973,13 +1978,13 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); - INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); - INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); + INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - rwlock_init(&nm_i->nat_tree_lock); + init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2035,7 +2040,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2044,7 +2049,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, natvec[idx]); } f2fs_bug_on(sbi, nm_i->nat_cnt); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; @@ -2061,17 +2066,17 @@ int __init create_node_manager_caches(void) free_nid_slab = f2fs_kmem_cache_create("free_nid", sizeof(struct free_nid)); if (!free_nid_slab) - goto destory_nat_entry; + goto destroy_nat_entry; nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) - goto destory_free_nid; + goto destroy_free_nid; return 0; -destory_free_nid: +destroy_free_nid: kmem_cache_destroy(free_nid_slab); -destory_nat_entry: +destroy_nat_entry: kmem_cache_destroy(nat_entry_slab); fail: return -ENOMEM; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8d5e6e0dd84..d10b6448a67 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ - DIRTY_DENTS /* indicates dirty dentry pages */ + DIRTY_DENTS, /* indicates dirty dentry pages */ + INO_ENTRIES, /* indicates inode entries */ }; struct nat_entry_set { @@ -192,10 +193,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) { unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); - if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - f2fs_clear_bit(block_off, nm_i->nat_bitmap); - else - f2fs_set_bit(block_off, nm_i->nat_bitmap); + f2fs_change_bit(block_off, nm_i->nat_bitmap); } static inline void fill_node_footer(struct page *page, nid_t nid, diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ebd01322578..9160a37e1c7 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -111,7 +111,7 @@ retry: iput(einode); goto out_unmap_put; } - f2fs_delete_entry(de, page, einode); + f2fs_delete_entry(de, page, dir, einode); iput(einode); goto retry; } @@ -129,7 +129,7 @@ retry: goto out; out_unmap_put: - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); out_err: iput(dir); @@ -170,13 +170,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + ra_meta_pages(sbi, blkaddr, 1, META_POR); + while (1) { struct fsync_inode_entry *entry; if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) return 0; - page = get_meta_page_ra(sbi, blkaddr); + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) break; @@ -227,6 +229,8 @@ next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); + + ra_meta_pages_cond(sbi, blkaddr); } f2fs_put_page(page, 1); return err; @@ -436,7 +440,9 @@ static int recover_data(struct f2fs_sb_info *sbi, if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) break; - page = get_meta_page_ra(sbi, blkaddr); + ra_meta_pages_cond(sbi, blkaddr); + + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) { f2fs_put_page(page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 923cb76fdc4..42607a67992 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -178,17 +178,47 @@ void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; + int err; + + SetPagePrivate(page); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ new->page = page; INIT_LIST_HEAD(&new->list); - +retry: /* increase reference count with clean state */ mutex_lock(&fi->inmem_lock); + err = radix_tree_insert(&fi->inmem_root, page->index, new); + if (err == -EEXIST) { + mutex_unlock(&fi->inmem_lock); + kmem_cache_free(inmem_entry_slab, new); + return; + } else if (err) { + mutex_unlock(&fi->inmem_lock); + goto retry; + } get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + mutex_unlock(&fi->inmem_lock); +} + +void invalidate_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct inmem_pages *cur; + + mutex_lock(&fi->inmem_lock); + cur = radix_tree_lookup(&fi->inmem_root, page->index); + if (cur) { + radix_tree_delete(&fi->inmem_root, cur->page->index); + f2fs_put_page(cur->page, 0); + list_del(&cur->list); + kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + } mutex_unlock(&fi->inmem_lock); } @@ -203,7 +233,16 @@ void commit_inmem_pages(struct inode *inode, bool abort) .rw = WRITE_SYNC, }; - f2fs_balance_fs(sbi); + /* + * The abort is true only when f2fs_evict_inode is called. + * Basically, the f2fs_evict_inode doesn't produce any data writes, so + * that we don't need to call f2fs_balance_fs. + * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this + * inode becomes free by iget_locked in f2fs_iget. + */ + if (!abort) + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); mutex_lock(&fi->inmem_lock); @@ -216,9 +255,11 @@ void commit_inmem_pages(struct inode *inode, bool abort) do_write_data_page(cur->page, &fio); submit_bio = true; } + radix_tree_delete(&fi->inmem_root, cur->page->index); f2fs_put_page(cur->page, 1); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } if (submit_bio) f2fs_submit_merged_bio(sbi, DATA, WRITE); @@ -248,7 +289,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || - excess_prefree_segs(sbi)) + excess_prefree_segs(sbi) || + available_free_memory(sbi, INO_ENTRIES)) f2fs_sync_fs(sbi->sb, true); } @@ -441,10 +483,33 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) } } -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +static void __add_discard_entry(struct f2fs_sb_info *sbi, + struct cp_control *cpc, unsigned int start, unsigned int end) { struct list_head *head = &SM_I(sbi)->discard_list; - struct discard_entry *new; + struct discard_entry *new, *last; + + if (!list_empty(head)) { + last = list_last_entry(head, struct discard_entry, list); + if (START_BLOCK(sbi, cpc->trim_start) + start == + last->blkaddr + last->len) { + last->len += end - start; + goto done; + } + } + + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); + INIT_LIST_HEAD(&new->list); + new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; + new->len = end - start; + list_add_tail(&new->list, head); +done: + SM_I(sbi)->nr_discards += end - start; + cpc->trimmed += end - start; +} + +static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); int max_blocks = sbi->blocks_per_seg; struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); @@ -473,13 +538,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) } mutex_unlock(&dirty_i->seglist_lock); - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start); - new->len = sbi->blocks_per_seg; - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += sbi->blocks_per_seg; - cpc->trimmed += sbi->blocks_per_seg; + __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg); return; } @@ -489,7 +548,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; + dmap[i] = ~(cur_map[i] | ckpt_map[i]); while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); @@ -501,14 +560,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (end - start < cpc->trim_minlen) continue; - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - cpc->trimmed += end - start; - - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += end - start; + __add_discard_entry(sbi, cpc, start, end); } } @@ -620,10 +672,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_set_bit(offset, se->cur_valid_map)) + if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } else { - if (!f2fs_clear_bit(offset, se->cur_valid_map)) + if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -1004,6 +1056,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) range->len < sbi->blocksize) return -EINVAL; + cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -1015,10 +1068,11 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_start = start_segno; cpc.trim_end = end_segno; cpc.trim_minlen = range->minlen >> sbi->log_blocksize; - cpc.trimmed = 0; /* do checkpoint to issue discard commands safely */ + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); out: range->len = cpc.trimmed << sbi->log_blocksize; return 0; @@ -1050,8 +1104,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && !is_cold_node(page)) - return CURSEG_HOT_NODE; + if (IS_DNODE(page) && is_cold_node(page)) + return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } @@ -1524,17 +1578,7 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(segno); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, segno); - - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) - blk_addr += sit_i->sit_blocks; - - return get_meta_page(sbi, blk_addr); + return get_meta_page(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, @@ -1687,7 +1731,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * #2, flush sit entries to sit page. */ list_for_each_entry_safe(ses, tmp, head, set_list) { - struct page *page; + struct page *page = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start_segno = ses->start_segno; unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, @@ -2200,7 +2244,7 @@ int __init create_segment_manager_caches(void) goto fail; sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", - sizeof(struct nat_entry_set)); + sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destory_discard_entry; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2495bec1c62..7f327c0ba4e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -657,10 +657,7 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) { unsigned int block_off = SIT_BLOCK_OFFSET(start); - if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) - f2fs_clear_bit(block_off, sit_i->sit_bitmap); - else - f2fs_set_bit(block_off, sit_i->sit_bitmap); + f2fs_change_bit(block_off, sit_i->sit_bitmap); } static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) @@ -714,6 +711,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) */ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) { + if (sbi->sb->s_bdi->dirty_exceeded) + return 0; + if (type == DATA) return sbi->blocks_per_seg; else if (type == NODE) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41d6f700f4e..f71421d7047 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -51,8 +51,10 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_inline_data, + Opt_inline_dentry, Opt_flush_merge, Opt_nobarrier, + Opt_fastboot, Opt_err, }; @@ -69,8 +71,10 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_inline_data, "inline_data"}, + {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, + {Opt_fastboot, "fastboot"}, {Opt_err, NULL}, }; @@ -340,12 +344,18 @@ static int parse_options(struct super_block *sb, char *options) case Opt_inline_data: set_opt(sbi, INLINE_DATA); break; + case Opt_inline_dentry: + set_opt(sbi, INLINE_DENTRY); + break; case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; + case Opt_fastboot: + set_opt(sbi, FASTBOOT); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -373,6 +383,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_advise = 0; rwlock_init(&fi->ext.ext_lock); init_rwsem(&fi->i_sem); + INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); @@ -473,9 +484,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); if (sync) { - struct cp_control cpc = { - .reason = CP_SYNC, - }; + struct cp_control cpc; + + cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); @@ -562,10 +573,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); + if (test_opt(sbi, INLINE_DENTRY)) + seq_puts(seq, ",inline_dentry"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) seq_puts(seq, ",nobarrier"); + if (test_opt(sbi, FASTBOOT)) + seq_puts(seq, ",fastboot"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; @@ -654,7 +669,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) f2fs_sync_fs(sb, 1); need_restart_gc = true; } - } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + } else if (!sbi->gc_thread) { err = start_gc_thread(sbi); if (err) goto restore_opts; @@ -667,7 +682,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { destroy_flush_cmd_control(sbi); - } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) { + } else if (!SM_I(sbi)->cmd_control_info) { err = create_flush_cmd_control(sbi); if (err) goto restore_gc; @@ -922,7 +937,7 @@ retry: static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; - struct f2fs_super_block *raw_super; + struct f2fs_super_block *raw_super = NULL; struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; @@ -1123,7 +1138,7 @@ try_onemore: * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (!f2fs_readonly(sb)) { + if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index deca8728117..5072bf9ae0e 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -83,7 +83,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL); } static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, @@ -398,7 +398,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } int f2fs_getxattr(struct inode *inode, int index, const char *name, - void *buffer, size_t buffer_size) + void *buffer, size_t buffer_size, struct page *ipage) { struct f2fs_xattr_entry *entry; void *base_addr; @@ -412,7 +412,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - base_addr = read_all_xattrs(inode, NULL); + base_addr = read_all_xattrs(inode, ipage); if (!base_addr) return -ENOMEM; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 34ab7dbcf5e..969d792ca36 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -115,7 +115,8 @@ extern const struct xattr_handler *f2fs_xattr_handlers[]; extern int f2fs_setxattr(struct inode *, int, const char *, const void *, size_t, struct page *, int); -extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, + size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else @@ -126,7 +127,8 @@ static inline int f2fs_setxattr(struct inode *inode, int index, return -EOPNOTSUPP; } static inline int f2fs_getxattr(struct inode *inode, int index, - const char *name, void *buffer, size_t buffer_size) + const char *name, void *buffer, + size_t buffer_size, struct page *dpage) { return -EOPNOTSUPP; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6df8d3d885e..b8b92c2f968 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -736,7 +736,12 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, } alias = d_find_alias(inode); - if (alias && !vfat_d_anon_disconn(alias)) { + /* + * Checking "alias->d_parent == dentry->d_parent" to make sure + * FS is not corrupted (especially double linked dir). + */ + if (alias && alias->d_parent == dentry->d_parent && + !vfat_d_anon_disconn(alias)) { /* * This inode has non anonymous-DCACHE_DISCONNECTED * dentry. This means, the user did ->lookup() by an @@ -755,12 +760,9 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: mutex_unlock(&MSDOS_SB(sb)->s_lock); - dentry->d_time = dentry->d_parent->d_inode->i_version; - dentry = d_splice_alias(inode, dentry); - if (dentry) - dentry->d_time = dentry->d_parent->d_inode->i_version; - return dentry; - + if (!inode) + dentry->d_time = dir->i_version; + return d_splice_alias(inode, dentry); error: mutex_unlock(&MSDOS_SB(sb)->s_lock); return ERR_PTR(err); @@ -793,7 +795,6 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - dentry->d_time = dentry->d_parent->d_inode->i_version; d_instantiate(dentry, inode); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -824,6 +825,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); + dentry->d_time = dir->i_version; out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -849,6 +851,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC; fat_detach(inode); + dentry->d_time = dir->i_version; out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -889,7 +892,6 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ - dentry->d_time = dentry->d_parent->d_inode->i_version; d_instantiate(dentry, inode); mutex_unlock(&MSDOS_SB(sb)->s_lock); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 5d4261ff5d2..c5a34f09e22 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -365,23 +365,17 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) ret = gfs2_dir_read_data(ip, hc, hsize); if (ret < 0) { - if (is_vmalloc_addr(hc)) - vfree(hc); - else - kfree(hc); + kvfree(hc); return ERR_PTR(ret); } spin_lock(&inode->i_lock); - if (ip->i_hash_cache) { - if (is_vmalloc_addr(hc)) - vfree(hc); - else - kfree(hc); - } else { + if (likely(!ip->i_hash_cache)) { ip->i_hash_cache = hc; + hc = NULL; } spin_unlock(&inode->i_lock); + kvfree(hc); return ip->i_hash_cache; } @@ -396,10 +390,7 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip) { __be64 *hc = ip->i_hash_cache; ip->i_hash_cache = NULL; - if (is_vmalloc_addr(hc)) - vfree(hc); - else - kfree(hc); + kvfree(hc); } static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) @@ -1168,10 +1159,7 @@ fail: gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); out_kfree: - if (is_vmalloc_addr(hc2)) - vfree(hc2); - else - kfree(hc2); + kvfree(hc2); return error; } @@ -1302,14 +1290,6 @@ static void *gfs2_alloc_sort_buffer(unsigned size) return ptr; } -static void gfs2_free_sort_buffer(void *ptr) -{ - if (is_vmalloc_addr(ptr)) - vfree(ptr); - else - kfree(ptr); -} - static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, int *copied, unsigned *depth, u64 leaf_no) @@ -1393,7 +1373,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, out_free: for(i = 0; i < leaf; i++) brelse(larr[i]); - gfs2_free_sort_buffer(larr); + kvfree(larr); out: return error; } @@ -2004,10 +1984,7 @@ out_rlist: gfs2_rlist_free(&rlist); gfs2_quota_unhold(dip); out: - if (is_vmalloc_addr(ht)) - vfree(ht); - else - kfree(ht); + kvfree(ht); return error; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 80dd44dca02..6e600abf694 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -337,7 +337,8 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; int hint = min_t(size_t, INT_MAX, blks); - atomic_set(&ip->i_res->rs_sizehint, hint); + if (hint > atomic_read(&ip->i_res->rs_sizehint)) + atomic_set(&ip->i_res->rs_sizehint, hint); } /** @@ -728,7 +729,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; - loff_t size = len; unsigned int nr_blks; sector_t lblock = offset >> inode->i_blkbits; @@ -762,11 +762,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, goto out; } } - if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) - i_size_write(inode, offset + size); - - mark_inode_dirty(inode); - out: brelse(dibh); return error; @@ -796,8 +791,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, } } -static long gfs2_fallocate(struct file *file, int mode, loff_t offset, - loff_t len) +static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -811,14 +805,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; - struct gfs2_holder gh; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; - /* We only support the FALLOC_FL_KEEP_SIZE mode */ - if (mode & ~FALLOC_FL_KEEP_SIZE) - return -EOPNOTSUPP; - offset &= bsize_mask; len = next - offset; @@ -829,17 +818,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; - error = gfs2_rs_alloc(ip); - if (error) - return error; - - mutex_lock(&inode->i_mutex); - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - error = gfs2_glock_nq(&gh); - if (unlikely(error)) - goto out_uninit; - gfs2_size_hint(file, offset, len); while (len > 0) { @@ -852,8 +830,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, } error = gfs2_quota_lock_check(ip); if (error) - goto out_unlock; - + return error; retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); @@ -895,20 +872,64 @@ retry: gfs2_quota_unlock(ip); } - if (error == 0) - error = generic_write_sync(file, pos, count); - goto out_unlock; + if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) { + i_size_write(inode, pos + count); + /* Marks the inode as dirty */ + file_update_time(file); + } + + return generic_write_sync(file, pos, count); out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); + return error; +} + +static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(file); + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder gh; + int ret; + + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + mutex_lock(&inode->i_mutex); + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + ret = gfs2_glock_nq(&gh); + if (ret) + goto out_uninit; + + if (!(mode & FALLOC_FL_KEEP_SIZE) && + (offset + len) > inode->i_size) { + ret = inode_newsize_ok(inode, offset + len); + if (ret) + goto out_unlock; + } + + ret = get_write_access(inode); + if (ret) + goto out_unlock; + + ret = gfs2_rs_alloc(ip); + if (ret) + goto out_putw; + + ret = __gfs2_fallocate(file, mode, offset, len); + if (ret) + gfs2_rs_deltree(ip->i_res); +out_putw: + put_write_access(inode); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); mutex_unlock(&inode->i_mutex); - return error; + return ret; } #ifdef CONFIG_GFS2_FS_LOCKING_DLM diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8f0c19d1d94..a23524aa3ea 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -836,8 +836,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * gh->gh_flags = flags; gh->gh_iflags = 0; gh->gh_ip = _RET_IP_; - if (gh->gh_owner_pid) - put_pid(gh->gh_owner_pid); + put_pid(gh->gh_owner_pid); gh->gh_owner_pid = get_pid(task_pid(current)); } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 1cc0bba6313..fe91951c336 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -28,6 +28,8 @@ #include "trans.h" #include "dir.h" +struct workqueue_struct *gfs2_freeze_wq; + static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) { fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n", @@ -94,11 +96,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) * on the stack */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = _RET_IP_; - sb_start_intwrite(sdp->sd_vfs); - if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) { - sb_end_intwrite(sdp->sd_vfs); + if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) return; - } WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; @@ -469,20 +468,19 @@ static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) static void freeze_go_sync(struct gfs2_glock *gl) { + int error = 0; struct gfs2_sbd *sdp = gl->gl_sbd; - DEFINE_WAIT(wait); if (gl->gl_state == LM_ST_SHARED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - atomic_set(&sdp->sd_log_freeze, 1); - wake_up(&sdp->sd_logd_waitq); - do { - prepare_to_wait(&sdp->sd_log_frozen_wait, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&sdp->sd_log_freeze)) - io_schedule(); - } while(atomic_read(&sdp->sd_log_freeze)); - finish_wait(&sdp->sd_log_frozen_wait, &wait); + atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE); + error = freeze_super(sdp->sd_vfs); + if (error) { + printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error); + gfs2_assert_withdraw(sdp, 0); + } + queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work); + gfs2_log_flush(sdp, NULL, FREEZE_FLUSH); } } diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index 7455d2629bc..8ed1857c1a8 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -12,6 +12,8 @@ #include "incore.h" +extern struct workqueue_struct *gfs2_freeze_wq; + extern const struct gfs2_glock_operations gfs2_meta_glops; extern const struct gfs2_glock_operations gfs2_inode_glops; extern const struct gfs2_glock_operations gfs2_rgrp_glops; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 39e7e9959b7..7a2dbbc0d63 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -97,6 +97,7 @@ struct gfs2_rgrpd { #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ +#define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ struct rb_root rd_rstree; /* multi-block reservation tree */ @@ -587,6 +588,12 @@ enum { SDF_SKIP_DLM_UNLOCK = 8, }; +enum gfs2_freeze_state { + SFS_UNFROZEN = 0, + SFS_STARTING_FREEZE = 1, + SFS_FROZEN = 2, +}; + #define GFS2_FSNAME_LEN 256 struct gfs2_inum_host { @@ -684,6 +691,7 @@ struct gfs2_sbd { struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; struct gfs2_glock *sd_freeze_gl; + struct work_struct sd_freeze_work; wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; @@ -788,6 +796,9 @@ struct gfs2_sbd { wait_queue_head_t sd_log_flush_wait; int sd_log_error; + atomic_t sd_reserving_log; + wait_queue_head_t sd_reserving_log_wait; + unsigned int sd_log_flush_head; u64 sd_log_flush_wrapped; @@ -797,12 +808,8 @@ struct gfs2_sbd { /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; - struct gfs2_holder sd_freeze_root_gh; - struct gfs2_holder sd_thaw_gh; - atomic_t sd_log_freeze; - atomic_t sd_frozen_root; - wait_queue_head_t sd_frozen_root_wait; - wait_queue_head_t sd_log_frozen_wait; + atomic_t sd_freeze_state; + struct mutex sd_freeze_mutex; char sd_fsname[GFS2_FSNAME_LEN]; char sd_table_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6e291743317..9054002ebe7 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1040,11 +1040,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (error) return error; - error = gfs2_dir_check(&dip->i_inode, name, ip); - if (error) - return error; - - return 0; + return gfs2_dir_check(&dip->i_inode, name, ip); } /** @@ -1614,26 +1610,18 @@ int gfs2_permission(struct inode *inode, int mask) { struct gfs2_inode *ip; struct gfs2_holder i_gh; - struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; - int frozen_root = 0; ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && - inode == sdp->sd_root_dir->d_inode && - atomic_inc_not_zero(&sdp->sd_frozen_root))) - frozen_root = 1; - else { - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (error) - return error; - unlock = 1; - } + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + unlock = 1; } if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) @@ -1642,8 +1630,6 @@ int gfs2_permission(struct inode *inode, int mask) error = generic_permission(inode, mask); if (unlock) gfs2_glock_dq_uninit(&i_gh); - else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) - wake_up(&sdp->sd_frozen_root_wait); return error; } @@ -1816,29 +1802,19 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct inode *inode = dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; - struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; - int frozen_root = 0; if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && - inode == sdp->sd_root_dir->d_inode && - atomic_inc_not_zero(&sdp->sd_frozen_root))) - frozen_root = 1; - else { - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - if (error) - return error; - unlock = 1; - } + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) + return error; + unlock = 1; } generic_fillattr(inode, stat); if (unlock) gfs2_glock_dq_uninit(&gh); - else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) - wake_up(&sdp->sd_frozen_root_wait); return 0; } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 3966fadbceb..536e7a6252c 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -339,6 +339,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) { + int ret = 0; unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); unsigned wanted = blks + reserved_blks; DEFINE_WAIT(wait); @@ -362,9 +363,13 @@ retry: } while(free_blocks <= wanted); finish_wait(&sdp->sd_log_waitq, &wait); } + atomic_inc(&sdp->sd_reserving_log); if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks, - free_blocks - blks) != free_blocks) + free_blocks - blks) != free_blocks) { + if (atomic_dec_and_test(&sdp->sd_reserving_log)) + wake_up(&sdp->sd_reserving_log_wait); goto retry; + } trace_gfs2_log_blocks(sdp, -blks); /* @@ -377,9 +382,11 @@ retry: down_read(&sdp->sd_log_flush_lock); if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { gfs2_log_release(sdp, blks); - return -EROFS; + ret = -EROFS; } - return 0; + if (atomic_dec_and_test(&sdp->sd_reserving_log)) + wake_up(&sdp->sd_reserving_log_wait); + return ret; } /** @@ -652,9 +659,12 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) u32 hash; int rw = WRITE_FLUSH_FUA | REQ_META; struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); + enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lh = page_address(page); clear_page(lh); + gfs2_assert_withdraw(sdp, (state != SFS_FROZEN)); + tail = current_tail(sdp); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); @@ -695,6 +705,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, enum gfs2_flush_type type) { struct gfs2_trans *tr; + enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); down_write(&sdp->sd_log_flush_lock); @@ -713,8 +724,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, INIT_LIST_HEAD(&tr->tr_ail1_list); INIT_LIST_HEAD(&tr->tr_ail2_list); tr->tr_first = sdp->sd_log_flush_head; + if (unlikely (state == SFS_FROZEN)) + gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new); } + if (unlikely(state == SFS_FROZEN)) + gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); @@ -745,8 +760,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); - if (atomic_read(&sdp->sd_log_freeze)) - type = FREEZE_FLUSH; if (type != NORMAL_FLUSH) { if (!sdp->sd_log_idle) { for (;;) { @@ -763,21 +776,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, } if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH) gfs2_log_shutdown(sdp); - if (type == FREEZE_FLUSH) { - int error; - - atomic_set(&sdp->sd_log_freeze, 0); - wake_up(&sdp->sd_log_frozen_wait); - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, - LM_ST_SHARED, 0, - &sdp->sd_thaw_gh); - if (error) { - printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error); - gfs2_assert_withdraw(sdp, 0); - } - else - gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); - } + if (type == FREEZE_FLUSH) + atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); } trace_gfs2_log_flush(sdp, 0); @@ -888,7 +888,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) { - return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze)); + return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1)); } static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 82b6ac82965..241a399bf83 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -30,6 +30,7 @@ #include "quota.h" #include "recovery.h" #include "dir.h" +#include "glops.h" struct workqueue_struct *gfs2_control_wq; @@ -161,9 +162,14 @@ static int __init init_gfs2_fs(void) if (!gfs2_control_wq) goto fail_recovery; + gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0); + + if (!gfs2_freeze_wq) + goto fail_control; + gfs2_page_pool = mempool_create_page_pool(64, 0); if (!gfs2_page_pool) - goto fail_control; + goto fail_freeze; gfs2_register_debugfs(); @@ -171,6 +177,8 @@ static int __init init_gfs2_fs(void) return 0; +fail_freeze: + destroy_workqueue(gfs2_freeze_wq); fail_control: destroy_workqueue(gfs2_control_wq); fail_recovery: @@ -224,6 +232,7 @@ static void __exit exit_gfs2_fs(void) unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); destroy_workqueue(gfs2_control_wq); + destroy_workqueue(gfs2_freeze_wq); list_lru_destroy(&gfs2_qd_lru); rcu_barrier(); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index d3eae244076..8633ad328ee 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -129,11 +129,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_rwsem(&sdp->sd_log_flush_lock); atomic_set(&sdp->sd_log_in_flight, 0); + atomic_set(&sdp->sd_reserving_log, 0); + init_waitqueue_head(&sdp->sd_reserving_log_wait); init_waitqueue_head(&sdp->sd_log_flush_wait); - init_waitqueue_head(&sdp->sd_log_frozen_wait); - atomic_set(&sdp->sd_log_freeze, 0); - atomic_set(&sdp->sd_frozen_root, 0); - init_waitqueue_head(&sdp->sd_frozen_root_wait); + atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + mutex_init(&sdp->sd_freeze_mutex); return sdp; } @@ -760,15 +760,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags); gfs2_glock_dq_uninit(&ji_gh); jindex = 0; - if (!sdp->sd_args.ar_spectator) { - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, - &sdp->sd_thaw_gh); - if (error) { - fs_err(sdp, "can't acquire freeze glock: %d\n", error); - goto fail_jinode_gh; - } - } - gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); + INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func); return 0; fail_jinode_gh: @@ -1082,6 +1074,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sb->s_export_op = &gfs2_export_ops; sb->s_xattr = gfs2_xattr_handlers; sb->s_qcop = &gfs2_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; sb->s_time_gran = 1; sb->s_maxbytes = MAX_LFS_FILESIZE; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 64b29f7f6b4..c8b148bbdc8 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1360,13 +1360,8 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); - if (sdp->sd_quota_bitmap) { - if (is_vmalloc_addr(sdp->sd_quota_bitmap)) - vfree(sdp->sd_quota_bitmap); - else - kfree(sdp->sd_quota_bitmap); - sdp->sd_quota_bitmap = NULL; - } + kvfree(sdp->sd_quota_bitmap); + sdp->sd_quota_bitmap = NULL; } static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7474c413ffd..9150207f365 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -936,7 +936,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; - rgd->rd_flags &= ~GFS2_RDF_UPTODATE; + rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); if (rgd->rd_data > sdp->sd_max_rg_data) sdp->sd_max_rg_data = rgd->rd_data; spin_lock(&sdp->sd_rindex_spin); @@ -955,6 +955,36 @@ fail: } /** + * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use + * @sdp: the GFS2 superblock + * + * The purpose of this function is to select a subset of the resource groups + * and mark them as PREFERRED. We do it in such a way that each node prefers + * to use a unique set of rgrps to minimize glock contention. + */ +static void set_rgrp_preferences(struct gfs2_sbd *sdp) +{ + struct gfs2_rgrpd *rgd, *first; + int i; + + /* Skip an initial number of rgrps, based on this node's journal ID. + That should start each node out on its own set. */ + rgd = gfs2_rgrpd_get_first(sdp); + for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++) + rgd = gfs2_rgrpd_get_next(rgd); + first = rgd; + + do { + rgd->rd_flags |= GFS2_RDF_PREFERRED; + for (i = 0; i < sdp->sd_journals; i++) { + rgd = gfs2_rgrpd_get_next(rgd); + if (rgd == first) + break; + } + } while (rgd != first); +} + +/** * gfs2_ri_update - Pull in a new resource index from the disk * @ip: pointer to the rindex inode * @@ -973,6 +1003,8 @@ static int gfs2_ri_update(struct gfs2_inode *ip) if (error < 0) return error; + set_rgrp_preferences(sdp); + sdp->sd_rindex_uptodate = 1; return 0; } @@ -1891,6 +1923,25 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b } /** + * fast_to_acquire - determine if a resource group will be fast to acquire + * + * If this is one of our preferred rgrps, it should be quicker to acquire, + * because we tried to set ourselves up as dlm lock master. + */ +static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) +{ + struct gfs2_glock *gl = rgd->rd_gl; + + if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) && + !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && + !test_bit(GLF_DEMOTE, &gl->gl_flags)) + return 1; + if (rgd->rd_flags & GFS2_RDF_PREFERRED) + return 1; + return 0; +} + +/** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for * @ap: the allocation parameters @@ -1932,10 +1983,15 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a rg_locked = 0; if (skip && skip--) goto next_rgrp; - if (!gfs2_rs_active(rs) && (loops < 2) && - gfs2_rgrp_used_recently(rs, 1000) && - gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) - goto next_rgrp; + if (!gfs2_rs_active(rs)) { + if (loops == 0 && + !fast_to_acquire(rs->rs_rbm.rgd)) + goto next_rgrp; + if ((loops < 2) && + gfs2_rgrp_used_recently(rs, 1000) && + gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) + goto next_rgrp; + } error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, LM_ST_EXCLUSIVE, flags, &rs->rs_rgd_gh); @@ -2195,6 +2251,9 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_free && !ret) goto out; + /* We used up our block reservation, so we should + reserve more blocks next time. */ + atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint); } __rs_deltree(rs); } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 5d8f085f7ad..b104f4af3af 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -20,6 +20,7 @@ */ #define RGRP_RSRV_MINBYTES 8 #define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY)) +#define RGRP_RSRV_ADDBLKS 64 struct gfs2_rgrpd; struct gfs2_sbd; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index a346f56c4c6..5b327f837de 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -26,6 +26,7 @@ #include <linux/wait.h> #include <linux/writeback.h> #include <linux/backing-dev.h> +#include <linux/kernel.h> #include "gfs2.h" #include "incore.h" @@ -399,7 +400,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_holder thaw_gh; + struct gfs2_holder freeze_gh; struct gfs2_log_header_host head; int error; @@ -408,7 +409,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) return error; error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, - &thaw_gh); + &freeze_gh); if (error) goto fail_threads; @@ -434,13 +435,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_glock_dq_uninit(&thaw_gh); + gfs2_glock_dq_uninit(&freeze_gh); return 0; fail: - thaw_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&thaw_gh); + freeze_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&freeze_gh); fail_threads: kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); @@ -580,14 +581,15 @@ int gfs2_statfs_sync(struct super_block *sb, int type) struct buffer_head *m_bh, *l_bh; int error; + sb_start_write(sb); error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &gh); if (error) - return error; + goto out; error = gfs2_meta_inode_buffer(m_ip, &m_bh); if (error) - goto out; + goto out_unlock; spin_lock(&sdp->sd_statfs_spin); gfs2_statfs_change_in(m_sc, m_bh->b_data + @@ -615,8 +617,10 @@ out_bh2: brelse(l_bh); out_bh: brelse(m_bh); -out: +out_unlock: gfs2_glock_dq_uninit(&gh); +out: + sb_end_write(sb); return error; } @@ -643,14 +647,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct lfcc *lfcc; LIST_HEAD(list); struct gfs2_log_header_host lh; - struct gfs2_inode *dip = GFS2_I(sdp->sd_root_dir->d_inode); int error; - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, - &sdp->sd_freeze_root_gh); - if (error) - return error; - atomic_set(&sdp->sd_frozen_root, 1); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); if (!lfcc) { @@ -692,11 +690,6 @@ out: gfs2_glock_dq_uninit(&lfcc->gh); kfree(lfcc); } - if (error) { - atomic_dec(&sdp->sd_frozen_root); - wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); - gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); - } return error; } @@ -834,18 +827,14 @@ out: static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) { - struct gfs2_holder thaw_gh; + struct gfs2_holder freeze_gh; int error; error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, - &thaw_gh); + &freeze_gh); if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return error; - down_write(&sdp->sd_log_flush_lock); - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - up_write(&sdp->sd_log_flush_lock); - kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); @@ -853,11 +842,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); + down_write(&sdp->sd_log_flush_lock); + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + up_write(&sdp->sd_log_flush_lock); + gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH); + wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0); gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); - if (thaw_gh.gh_gl) - gfs2_glock_dq_uninit(&thaw_gh); + if (freeze_gh.gh_gl) + gfs2_glock_dq_uninit(&freeze_gh); gfs2_quota_cleanup(sdp); @@ -943,11 +937,41 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) struct gfs2_sbd *sdp = sb->s_fs_info; gfs2_quota_sync(sb, -1); - if (wait && sdp && !atomic_read(&sdp->sd_log_freeze)) + if (wait && sdp) gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); return 0; } +void gfs2_freeze_func(struct work_struct *work) +{ + int error; + struct gfs2_holder freeze_gh; + struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work); + struct super_block *sb = sdp->sd_vfs; + + atomic_inc(&sb->s_active); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, + &freeze_gh); + if (error) { + printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error); + gfs2_assert_withdraw(sdp, 0); + } + else { + atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); + error = thaw_super(sb); + if (error) { + printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n", + error); + gfs2_assert_withdraw(sdp, 0); + } + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + freeze_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&freeze_gh); + } + deactivate_super(sb); + return; +} + /** * gfs2_freeze - prevent further writes to the filesystem * @sb: the VFS structure for the filesystem @@ -957,10 +981,16 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) static int gfs2_freeze(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; - int error; + int error = 0; - if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) - return -EINVAL; + mutex_lock(&sdp->sd_freeze_mutex); + if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) + goto out; + + if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + error = -EINVAL; + goto out; + } for (;;) { error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh); @@ -980,7 +1010,10 @@ static int gfs2_freeze(struct super_block *sb) fs_err(sdp, "retrying...\n"); msleep(1000); } - return 0; + error = 0; +out: + mutex_unlock(&sdp->sd_freeze_mutex); + return error; } /** @@ -993,10 +1026,15 @@ static int gfs2_unfreeze(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; + mutex_lock(&sdp->sd_freeze_mutex); + if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || + sdp->sd_freeze_gh.gh_gl == NULL) { + mutex_unlock(&sdp->sd_freeze_mutex); + return 0; + } + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); - atomic_dec(&sdp->sd_frozen_root); - wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); - gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); + mutex_unlock(&sdp->sd_freeze_mutex); return 0; } @@ -1618,8 +1656,8 @@ const struct super_operations gfs2_super_ops = { .evict_inode = gfs2_evict_inode, .put_super = gfs2_put_super, .sync_fs = gfs2_sync_fs, - .freeze_fs = gfs2_freeze, - .unfreeze_fs = gfs2_unfreeze, + .freeze_super = gfs2_freeze, + .thaw_super = gfs2_unfreeze, .statfs = gfs2_statfs, .remount_fs = gfs2_remount_fs, .drop_inode = gfs2_drop_inode, diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 90e3322ffa1..73c97dccae2 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -45,6 +45,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, struct buffer_head *l_bh); extern int gfs2_statfs_sync(struct super_block *sb, int type); +extern void gfs2_freeze_func(struct work_struct *work); extern struct file_system_type gfs2_fs_type; extern struct file_system_type gfs2meta_fs_type; diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 42bfd336197..88bff243066 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -89,14 +89,17 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) { struct gfs2_trans *tr = current->journal_info; s64 nbuf; + int alloced = tr->tr_alloced; + BUG_ON(!tr); current->journal_info = NULL; if (!tr->tr_touched) { gfs2_log_release(sdp, tr->tr_reserved); - if (tr->tr_alloced) + if (alloced) { kfree(tr); - sb_end_intwrite(sdp->sd_vfs); + sb_end_intwrite(sdp->sd_vfs); + } return; } @@ -109,13 +112,14 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_print_trans(tr); gfs2_log_commit(sdp, tr); - if (tr->tr_alloced && !tr->tr_attached) + if (alloced && !tr->tr_attached) kfree(tr); up_read(&sdp->sd_log_flush_lock); if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); - sb_end_intwrite(sdp->sd_vfs); + if (alloced) + sb_end_intwrite(sdp->sd_vfs); } static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl, @@ -192,6 +196,7 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { struct gfs2_meta_header *mh; struct gfs2_trans *tr; + enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); tr = current->journal_info; tr->tr_touched = 1; @@ -205,6 +210,10 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) (unsigned long long)bd->bd_bh->b_blocknr); BUG(); } + if (unlikely(state == SFS_FROZEN)) { + printk(KERN_INFO "GFS2:adding buf while frozen\n"); + gfs2_assert_withdraw(sdp, 0); + } gfs2_pin(sdp, bd->bd_bh); mh->__pad0 = cpu_to_be64(0); mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); diff --git a/fs/inode.c b/fs/inode.c index 26753ba7b6d..2ed95f7caa4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -143,9 +143,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_blocks = 0; inode->i_bytes = 0; inode->i_generation = 0; -#ifdef CONFIG_QUOTA - memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); -#endif inode->i_pipe = NULL; inode->i_bdev = NULL; inode->i_cdev = NULL; diff --git a/fs/ioctl.c b/fs/ioctl.c index 8ac3fad3619..77c9a781254 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -518,10 +518,12 @@ static int ioctl_fsfreeze(struct file *filp) return -EPERM; /* If filesystem doesn't support freeze feature, return. */ - if (sb->s_op->freeze_fs == NULL) + if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL) return -EOPNOTSUPP; /* Freeze */ + if (sb->s_op->freeze_super) + return sb->s_op->freeze_super(sb); return freeze_super(sb); } @@ -533,6 +535,8 @@ static int ioctl_fsthaw(struct file *filp) return -EPERM; /* Thaw */ + if (sb->s_op->thaw_super) + return sb->s_op->thaw_super(sb); return thaw_super(sb); } diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index aab8549591e..c46a79adb6a 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1373,8 +1373,7 @@ int journal_destroy(journal_t *journal) } mutex_unlock(&journal->j_checkpoint_mutex); - if (journal->j_inode) - iput(journal->j_inode); + iput(journal->j_inode); if (journal->j_revoke) journal_destroy_revoke(journal); kfree(journal->j_wbuf); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e4dc74713a4..1df94fabe4e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1853,13 +1853,12 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, journal->j_chksum_driver = NULL; return 0; } - } - /* Precompute checksum seed for all metadata */ - if (jbd2_journal_has_csum_v2or3(journal)) + /* Precompute checksum seed for all metadata */ journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); + } } /* If enabling v1 checksums, downgrade superblock */ diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index cf47f09e8ac..fa7e795bd8a 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -94,6 +94,9 @@ struct jfs_inode_info { unchar _inline_ea[128]; /* 128: inline extended attr */ } link; } u; +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif u32 dev; /* will die when we get wide dev_t */ struct inode vfs_inode; }; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 93e897e588a..16c3a955663 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -117,6 +117,9 @@ static struct inode *jfs_alloc_inode(struct super_block *sb) jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS); if (!jfs_inode) return NULL; +#ifdef CONFIG_QUOTA + memset(&jfs_inode->i_dquot, 0, sizeof(jfs_inode->i_dquot)); +#endif return &jfs_inode->vfs_inode; } @@ -537,6 +540,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; sb->s_qcop = &dquot_quotactl_ops; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif /* @@ -836,6 +840,10 @@ out: return len - towrite; } +static struct dquot **jfs_get_dquots(struct inode *inode) +{ + return JFS_IP(inode)->i_dquot; +} #endif static const struct super_operations jfs_super_operations = { @@ -854,6 +862,7 @@ static const struct super_operations jfs_super_operations = { #ifdef CONFIG_QUOTA .quota_read = jfs_quota_read, .quota_write = jfs_quota_write, + .get_dquots = jfs_get_dquots, #endif }; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 13db95f5417..56598742dde 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -53,7 +53,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops; static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); -#ifdef LOCKD_DEBUG +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) { /* diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 4f46f7a0528..77fec6a55f5 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -812,7 +812,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) /* Optimize common case that writes from 0 to end of file */ end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); - if (end != NFS_I(inode)->npages) { + if (end != inode->i_mapping->nrpages) { rcu_read_lock(); end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); rcu_read_unlock(); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 73466b93409..e36a9d78ea4 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, goto out_iput; res->size = i_size_read(inode); res->change_attr = delegation->change_attr; - if (nfsi->npages != 0) + if (nfsi->nrequests != 0) res->change_attr++; res->ctime = inode->i_ctime; res->mtime = inode->i_mtime; diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 9bb806a76d9..bfecac781f1 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -204,8 +204,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds) ifdebug(FACILITY) print_ds(ds); - if (ds->ds_clp) - nfs_put_client(ds->ds_clp); + nfs_put_client(ds->ds_clp); while (!list_empty(&ds->ds_addrs)) { da = list_first_entry(&ds->ds_addrs, diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 3ef01f0ba0b..d63bea8bbfb 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -269,8 +269,8 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) if (!fscache_maybe_release_page(cookie, page, gfp)) return 0; - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } return 1; @@ -293,8 +293,8 @@ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) BUG_ON(!PageLocked(page)); fscache_uncache_page(cookie, page); - nfs_add_fscache_stats(page->mapping->host, - NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(page->mapping->host, + NFSIOS_FSCACHE_PAGES_UNCACHED); } /* @@ -343,19 +343,19 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, case 0: /* read BIO submitted (page in fscache) */ dfprintk(FSCACHE, "NFS: readpage_from_fscache: BIO submitted\n"); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK); return ret; case -ENOBUFS: /* inode not in cache */ case -ENODATA: /* page not in cache */ - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); return 1; default: dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL); } return ret; } @@ -429,11 +429,11 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) if (ret != 0) { fscache_uncache_page(nfs_i_fscache(inode), page); - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); - nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL); + nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED); } else { - nfs_add_fscache_stats(inode, - NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1); + nfs_inc_fscache_stats(inode, + NFSIOS_FSCACHE_PAGES_WRITTEN_OK); } } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 00689a8a85e..4bffe637ea3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -192,6 +192,7 @@ void nfs_zap_caches(struct inode *inode) nfs_zap_caches_locked(inode); spin_unlock(&inode->i_lock); } +EXPORT_SYMBOL_GPL(nfs_zap_caches); void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) { @@ -1149,7 +1150,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) - && nfsi->npages == 0) { + && nfsi->nrequests == 0) { i_size_write(inode, nfs_size_to_loff_t(fattr->size)); ret |= NFS_INO_INVALID_ATTR; } @@ -1192,7 +1193,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (fattr->valid & NFS_ATTR_FATTR_SIZE) { cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); - if (cur_size != new_isize && nfsi->npages == 0) + if (cur_size != new_isize && nfsi->nrequests == 0) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; } @@ -1619,7 +1620,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (new_isize != cur_isize) { /* Do we perhaps have any outstanding writes, or has * the file grown beyond our last write? */ - if ((nfsi->npages == 0) || new_isize > cur_isize) { + if ((nfsi->nrequests == 0) || new_isize > cur_isize) { i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; invalid &= ~NFS_INO_REVAL_PAGECACHE; @@ -1784,7 +1785,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->commit_info.list); - nfsi->npages = 0; + nfsi->nrequests = 0; nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); atomic_set(&nfsi->silly_count, 1); diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index c5832487c45..0cb806fbd4c 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -55,6 +55,11 @@ static inline void nfs_add_fscache_stats(struct inode *inode, { this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend); } +static inline void nfs_inc_fscache_stats(struct inode *inode, + enum nfs_stat_fscachecounters stat) +{ + this_cpu_inc(NFS_SERVER(inode)->io_stats->fscache[stat]); +} #endif static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index d10333a197b..7afb8947dfd 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -6,6 +6,8 @@ #define __LINUX_FS_NFS_NFS4_2_H /* nfs4.2proc.c */ +int nfs42_proc_allocate(struct file *, loff_t, loff_t); +int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); /* nfs4.2xdr.h */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0886f1db591..cb170722769 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -32,6 +32,81 @@ static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, return ret; } +static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + struct nfs42_falloc_args args = { + .falloc_fh = NFS_FH(inode), + .falloc_offset = offset, + .falloc_length = len, + }; + struct nfs42_falloc_res res; + struct nfs_server *server = NFS_SERVER(inode); + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE); + if (status) + return status; + + return nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); +} + +static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, + loff_t offset, loff_t len) +{ + struct nfs_server *server = NFS_SERVER(file_inode(filep)); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs42_proc_fallocate(msg, filep, offset, len); + if (err == -ENOTSUPP) + return -EOPNOTSUPP; + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; +} + +int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE; + return err; +} + +int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DEALLOCATE], + }; + struct inode *inode = file_inode(filep); + int err; + + if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE)) + return -EOPNOTSUPP; + + err = nfs42_proc_fallocate(&msg, filep, offset, len); + if (err == -EOPNOTSUPP) + NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; + return err; +} + loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) { struct inode *inode = file_inode(filep); @@ -50,7 +125,7 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) struct nfs_server *server = NFS_SERVER(inode); int status; - if (!(server->caps & NFS_CAP_SEEK)) + if (!nfs_server_capable(inode, NFS_CAP_SEEK)) return -ENOTSUPP; status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index c90469b604b..038a7e1521f 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -4,6 +4,15 @@ #ifndef __LINUX_FS_NFS_NFS4_2XDR_H #define __LINUX_FS_NFS_NFS4_2XDR_H +#define encode_fallocate_maxsz (encode_stateid_maxsz + \ + 2 /* offset */ + \ + 2 /* length */) +#define encode_allocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_allocate_maxsz (op_decode_hdr_maxsz) +#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \ + encode_fallocate_maxsz) +#define decode_deallocate_maxsz (op_decode_hdr_maxsz) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -14,6 +23,18 @@ 2 /* offset */ + \ 2 /* length */) +#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_allocate_maxsz) +#define NFS4_dec_allocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_allocate_maxsz) +#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_deallocate_maxsz) +#define NFS4_dec_deallocate_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_deallocate_maxsz) #define NFS4_enc_seek_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_seek_maxsz) @@ -22,6 +43,30 @@ decode_seek_maxsz) +static void encode_fallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + encode_nfs4_stateid(xdr, &args->falloc_stateid); + encode_uint64(xdr, args->falloc_offset); + encode_uint64(xdr, args->falloc_length); +} + +static void encode_allocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + +static void encode_deallocate(struct xdr_stream *xdr, + struct nfs42_falloc_args *args, + struct compound_hdr *hdr) +{ + encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr); + encode_fallocate(xdr, args); +} + static void encode_seek(struct xdr_stream *xdr, struct nfs42_seek_args *args, struct compound_hdr *hdr) @@ -33,6 +78,42 @@ static void encode_seek(struct xdr_stream *xdr, } /* + * Encode ALLOCATE request + */ +static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_allocate(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* + * Encode DEALLOCATE request + */ +static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_falloc_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->falloc_fh, &hdr); + encode_deallocate(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* * Encode SEEK request */ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, @@ -50,6 +131,16 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, encode_nops(&hdr); } +static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_ALLOCATE); +} + +static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) +{ + return decode_op_hdr(xdr, OP_DEALLOCATE); +} + static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) { int status; @@ -73,6 +164,54 @@ out_overflow: } /* + * Decode ALLOCATE request + */ +static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_allocate(xdr, res); +out: + return status; +} + +/* + * Decode DEALLOCATE request + */ +static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_falloc_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_deallocate(xdr, res); +out: + return status; +} + +/* * Decode SEEK request */ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index be6cac37ea1..a08178764cf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -226,6 +226,7 @@ int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); /* nfs4proc.c */ +extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, struct rpc_message *, struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index ffdb28d86cf..03311259b0c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -241,28 +241,25 @@ void nfs4_free_client(struct nfs_client *clp) */ static int nfs4_init_callback(struct nfs_client *clp) { + struct rpc_xprt *xprt; int error; - if (clp->rpc_ops->version == 4) { - struct rpc_xprt *xprt; + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); - - if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel(xprt, - NFS41_BC_MIN_CALLBACKS); - if (error < 0) - return error; - } - - error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); - if (error < 0) { - dprintk("%s: failed to start callback. Error = %d\n", - __func__, error); + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); + if (error < 0) return error; - } - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); } + + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + return 0; } @@ -498,8 +495,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -517,8 +513,7 @@ int nfs40_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs4_proc_setclientid_confirm(pos, &clid, cred); @@ -549,8 +544,7 @@ int nfs40_walk_client_list(struct nfs_client *new, /* No match found. The server lost our clientid */ out: - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); dprintk("NFS: <-- %s status = %d\n", __func__, status); return status; } @@ -641,8 +635,7 @@ int nfs41_walk_client_list(struct nfs_client *new, atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); prev = pos; status = nfs_wait_client_init_complete(pos); @@ -675,8 +668,7 @@ int nfs41_walk_client_list(struct nfs_client *new, /* No matching nfs_client found. */ spin_unlock(&nn->nfs_client_lock); dprintk("NFS: <-- %s status = %d\n", __func__, status); - if (prev) - nfs_put_client(prev); + nfs_put_client(prev); return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index c51fb4db9bf..8b46389c4c5 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -3,6 +3,8 @@ * * Copyright (C) 1992 Rick Sladkey */ +#include <linux/fs.h> +#include <linux/falloc.h> #include <linux/nfs_fs.h> #include "internal.h" #include "fscache.h" @@ -134,6 +136,32 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) return nfs_file_llseek(filep, offset, whence); } } + +static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t len) +{ + struct inode *inode = file_inode(filep); + long ret; + + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE))) + return -EOPNOTSUPP; + + ret = inode_newsize_ok(inode, offset + len); + if (ret < 0) + return ret; + + mutex_lock(&inode->i_mutex); + if (mode & FALLOC_FL_PUNCH_HOLE) + ret = nfs42_proc_deallocate(filep, offset, len); + else + ret = nfs42_proc_allocate(filep, offset, len); + mutex_unlock(&inode->i_mutex); + + nfs_zap_caches(inode); + return ret; +} #endif /* CONFIG_NFS_V4_2 */ const struct file_operations nfs4_file_operations = { @@ -155,6 +183,9 @@ const struct file_operations nfs4_file_operations = { .flock = nfs_flock, .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, +#ifdef CONFIG_NFS_V4_2 + .fallocate = nfs42_fallocate, +#endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 69dc20a743f..e7f8d5ff258 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -158,8 +158,6 @@ static int nfs4_map_errors(int err) return -EACCES; case -NFS4ERR_MINOR_VERS_MISMATCH: return -EPROTONOSUPPORT; - case -NFS4ERR_ACCESS: - return -EACCES; case -NFS4ERR_FILE_OPEN: return -EBUSY; default: @@ -344,7 +342,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; @@ -7704,6 +7702,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) dprintk("--> %s\n", __func__); + /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ + pnfs_get_layout_hdr(NFS_I(inode)->layout); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); @@ -7716,9 +7717,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->res.seq_res.sr_slot = NULL; nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); - /* nfs4_layoutget_release calls pnfs_put_layout_hdr */ - pnfs_get_layout_hdr(NFS_I(inode)->layout); - task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return ERR_CAST(task); @@ -8426,6 +8424,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_ALLOCATE + | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 206c08a60c7..cb4376b78ed 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -141,13 +141,15 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ 1 /* sc_prog */ + \ - XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ - XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ 1) /* sc_cb_ident */ #define decode_setclientid_maxsz \ (op_decode_hdr_maxsz + \ - 2 + \ - 1024) /* large value for CLID_INUSE */ + 2 /* clientid */ + \ + XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ + 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) #define encode_setclientid_confirm_maxsz \ (op_encode_hdr_maxsz + \ 3 + (NFS4_VERIFIER_SIZE >> 2)) @@ -7394,6 +7396,8 @@ struct rpc_procinfo nfs4_procedures[] = { #endif /* CONFIG_NFS_V4_1 */ #ifdef CONFIG_NFS_V4_2 PROC(SEEK, enc_seek, dec_seek), + PROC(ALLOCATE, enc_allocate, dec_allocate), + PROC(DEALLOCATE, enc_deallocate, dec_deallocate), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ed0db61f854..2b5e769beb1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) static inline void nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) { + struct inode *inode; WARN_ON_ONCE(prev == req); if (!prev) { @@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) * nfs_page_group_destroy is called */ kref_get(&req->wb_head->wb_kref); - /* grab extra ref if head request has extra ref from - * the write/commit path to handle handoff between write - * and commit lists */ + /* grab extra ref and bump the request count if head request + * has extra ref from the write/commit path to handle handoff + * between write and commit lists. */ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { + inode = page_file_mapping(req->wb_page)->host; set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); + spin_lock(&inode->i_lock); + NFS_I(inode)->nrequests++; + spin_unlock(&inode->i_lock); } } } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index beff2769c5c..c91a4799c56 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -269,7 +269,7 @@ int nfs_readpage(struct file *file, struct page *page) dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", page, PAGE_CACHE_SIZE, page_file_index(page)); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); - nfs_add_stats(inode, NFSIOS_READPAGES, 1); + nfs_inc_stats(inode, NFSIOS_READPAGES); /* * Try to flush any pending writes to the file.. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f83b02dc916..af3af685a9e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -575,7 +575,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st int ret; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); - nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); + nfs_inc_stats(inode, NFSIOS_WRITEPAGES); nfs_pageio_cond_complete(pgio, page_file_index(page)); ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); @@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) nfs_lock_request(req); spin_lock(&inode->i_lock); - if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + if (!nfsi->nrequests && + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) inode->i_version++; /* * Swap-space should not get truncated. Hence no need to plug the race @@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); } - nfsi->npages++; + nfsi->nrequests++; /* this a head request for a page group - mark it as having an - * extra reference so sub groups can follow suit */ + * extra reference so sub groups can follow suit. + * This flag also informs pgio layer when to bump nrequests when + * adding subrequests. */ WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); @@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) wake_up_page(head->wb_page, PG_private); clear_bit(PG_MAPPED, &head->wb_flags); } - nfsi->npages--; + nfsi->nrequests--; + spin_unlock(&inode->i_lock); + } else { + spin_lock(&inode->i_lock); + nfsi->nrequests--; spin_unlock(&inode->i_lock); } @@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ - if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) + if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1)) goto out_mark_dirty; /* don't wait for the COMMIT response */ diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ed2b1151b17..7cbdf1b2e4a 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -774,8 +774,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task) { if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { rpc_sleep_on(&clp->cl_cb_waitq, task, NULL); - dprintk("%s slot is busy\n", __func__); - return false; + /* Race breaker */ + if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) { + dprintk("%s slot is busy\n", __func__); + return false; + } + rpc_wake_up_queued_task(&clp->cl_cb_waitq, task); } return true; } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 747f3b95bd1..33a46a8dfaf 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -335,12 +335,15 @@ void nfsd_lockd_shutdown(void); (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ - (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL #else -#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#define NFSD4_2_SECURITY_ATTRS 0 #endif +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ + NFSD4_2_SECURITY_ATTRS) + static inline u32 nfsd_suppattrs0(u32 minorversion) { return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index daf76652fe5..283aa312d74 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -227,14 +227,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, struct fsnotify_event *kevent; char __user *start; int ret; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); start = buf; group = file->private_data; + add_wait_queue(&group->notification_waitq, &wait); while (1) { - prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE); - mutex_lock(&group->notification_mutex); kevent = get_one_event(group, count); mutex_unlock(&group->notification_mutex); @@ -264,10 +263,10 @@ static ssize_t inotify_read(struct file *file, char __user *buf, if (start != buf) break; - schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } + remove_wait_queue(&group->notification_waitq, &wait); - finish_wait(&group->notification_waitq, &wait); if (start != buf && ret != -EFAULT) ret = buf - start; return ret; diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index a9b76de4604..ca3431ee7f2 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -80,6 +80,8 @@ struct ocfs2_inode_info */ tid_t i_sync_tid; tid_t i_datasync_tid; + + struct dquot *i_dquot[MAXQUOTAS]; }; /* diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 93c85bc745e..0945814ddb7 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -143,6 +143,11 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); static int ocfs2_enable_quotas(struct ocfs2_super *osb); static void ocfs2_disable_quotas(struct ocfs2_super *osb); +static struct dquot **ocfs2_get_dquots(struct inode *inode) +{ + return OCFS2_I(inode)->i_dquot; +} + static const struct super_operations ocfs2_sops = { .statfs = ocfs2_statfs, .alloc_inode = ocfs2_alloc_inode, @@ -155,6 +160,7 @@ static const struct super_operations ocfs2_sops = { .show_options = ocfs2_show_options, .quota_read = ocfs2_quota_read, .quota_write = ocfs2_quota_write, + .get_dquots = ocfs2_get_dquots, }; enum { @@ -563,6 +569,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) oi->i_sync_tid = 0; oi->i_datasync_tid = 0; + memset(&oi->i_dquot, 0, sizeof(oi->i_dquot)); jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); return &oi->vfs_inode; @@ -2073,6 +2080,7 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_export_op = &ocfs2_export_ops; sb->s_qcop = &ocfs2_quotactl_ops; sb->dq_op = &ocfs2_quota_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; sb->s_xattr = ocfs2_xattr_handlers; sb->s_time_gran = 1; sb->s_flags |= MS_NOATIME; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4e0388cffe3..f6734c6b66a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -552,6 +552,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", [ilog2(VM_DENYWRITE)] = "dw", +#ifdef CONFIG_X86_INTEL_MPX + [ilog2(VM_MPX)] = "mp", +#endif [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index fafb7a02a5d..50416602774 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -36,6 +36,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/uaccess.h> +#include <linux/syslog.h> #include "internal.h" @@ -120,6 +121,18 @@ static const struct seq_operations pstore_ftrace_seq_ops = { .show = pstore_ftrace_seq_show, }; +static int pstore_check_syslog_permissions(struct pstore_private *ps) +{ + switch (ps->type) { + case PSTORE_TYPE_DMESG: + case PSTORE_TYPE_CONSOLE: + return check_syslog_permissions(SYSLOG_ACTION_READ_ALL, + SYSLOG_FROM_READER); + default: + return 0; + } +} + static ssize_t pstore_file_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -138,6 +151,10 @@ static int pstore_file_open(struct inode *inode, struct file *file) int err; const struct seq_operations *sops = NULL; + err = pstore_check_syslog_permissions(ps); + if (err) + return err; + if (ps->type == PSTORE_TYPE_FTRACE) sops = &pstore_ftrace_seq_ops; @@ -174,6 +191,11 @@ static const struct file_operations pstore_file_operations = { static int pstore_unlink(struct inode *dir, struct dentry *dentry) { struct pstore_private *p = dentry->d_inode->i_private; + int err; + + err = pstore_check_syslog_permissions(p); + if (err) + return err; if (p->psi->erase) p->psi->erase(p->type, p->id, p->count, diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 3b5744306ed..ec881b31270 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -135,25 +135,27 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, return prz; } -static void ramoops_read_kmsg_hdr(char *buffer, struct timespec *time, +static int ramoops_read_kmsg_hdr(char *buffer, struct timespec *time, bool *compressed) { char data_type; + int header_length = 0; - if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n", - &time->tv_sec, &time->tv_nsec, &data_type) == 3) { + if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n%n", &time->tv_sec, + &time->tv_nsec, &data_type, &header_length) == 3) { if (data_type == 'C') *compressed = true; else *compressed = false; - } else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", - &time->tv_sec, &time->tv_nsec) == 2) { + } else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu\n%n", + &time->tv_sec, &time->tv_nsec, &header_length) == 2) { *compressed = false; } else { time->tv_sec = 0; time->tv_nsec = 0; *compressed = false; } + return header_length; } static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, @@ -165,6 +167,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, ssize_t ecc_notice_size; struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz; + int header_length; prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, cxt->max_dump_cnt, id, type, @@ -178,7 +181,13 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (!prz) return 0; + if (!persistent_ram_old(prz)) + return 0; + size = persistent_ram_old_size(prz); + header_length = ramoops_read_kmsg_hdr(persistent_ram_old(prz), time, + compressed); + size -= header_length; /* ECC correction notice */ ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0); @@ -187,8 +196,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (*buf == NULL) return -ENOMEM; - memcpy(*buf, persistent_ram_old(prz), size); - ramoops_read_kmsg_hdr(*buf, time, compressed); + memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size); persistent_ram_ecc_string(prz, *buf + size, ecc_notice_size + 1); return size + ecc_notice_size; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 6b4527216a7..8f0acef3d18 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -893,6 +893,11 @@ out: } EXPORT_SYMBOL(dqget); +static inline struct dquot **i_dquot(struct inode *inode) +{ + return inode->i_sb->s_op->get_dquots(inode); +} + static int dqinit_needed(struct inode *inode, int type) { int cnt; @@ -900,9 +905,9 @@ static int dqinit_needed(struct inode *inode, int type) if (IS_NOQUOTA(inode)) return 0; if (type != -1) - return !inode->i_dquot[type]; + return !i_dquot(inode)[type]; for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if (!inode->i_dquot[cnt]) + if (!i_dquot(inode)[cnt]) return 1; return 0; } @@ -965,9 +970,9 @@ static void add_dquot_ref(struct super_block *sb, int type) static void remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head) { - struct dquot *dquot = inode->i_dquot[type]; + struct dquot *dquot = i_dquot(inode)[type]; - inode->i_dquot[type] = NULL; + i_dquot(inode)[type] = NULL; if (!dquot) return; @@ -1402,7 +1407,7 @@ static void __dquot_initialize(struct inode *inode, int type) * we check it without locking here to avoid unnecessary * dqget()/dqput() calls. */ - if (inode->i_dquot[cnt]) + if (i_dquot(inode)[cnt]) continue; init_needed = 1; @@ -1433,8 +1438,8 @@ static void __dquot_initialize(struct inode *inode, int type) /* We could race with quotaon or dqget() could have failed */ if (!got[cnt]) continue; - if (!inode->i_dquot[cnt]) { - inode->i_dquot[cnt] = got[cnt]; + if (!i_dquot(inode)[cnt]) { + i_dquot(inode)[cnt] = got[cnt]; got[cnt] = NULL; /* * Make quota reservation system happy if someone @@ -1442,7 +1447,7 @@ static void __dquot_initialize(struct inode *inode, int type) */ rsv = inode_get_rsv_space(inode); if (unlikely(rsv)) - dquot_resv_space(inode->i_dquot[cnt], rsv); + dquot_resv_space(i_dquot(inode)[cnt], rsv); } } out_err: @@ -1472,8 +1477,8 @@ static void __dquot_drop(struct inode *inode) spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - put[cnt] = inode->i_dquot[cnt]; - inode->i_dquot[cnt] = NULL; + put[cnt] = i_dquot(inode)[cnt]; + i_dquot(inode)[cnt] = NULL; } spin_unlock(&dq_data_lock); dqput_all(put); @@ -1494,7 +1499,7 @@ void dquot_drop(struct inode *inode) * add quota pointers back anyway. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) + if (i_dquot(inode)[cnt]) break; } @@ -1595,7 +1600,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; - struct dquot **dquots = inode->i_dquot; + struct dquot **dquots = i_dquot(inode); int reserve = flags & DQUOT_SPACE_RESERVE; if (!dquot_active(inode)) { @@ -1643,11 +1648,11 @@ EXPORT_SYMBOL(__dquot_alloc_space); /* * This operation can block, but only after everything is updated */ -int dquot_alloc_inode(const struct inode *inode) +int dquot_alloc_inode(struct inode *inode) { int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots = inode->i_dquot; + struct dquot * const *dquots = i_dquot(inode); if (!dquot_active(inode)) return 0; @@ -1696,14 +1701,14 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) - dquot_claim_reserved_space(inode->i_dquot[cnt], + if (i_dquot(inode)[cnt]) + dquot_claim_reserved_space(i_dquot(inode)[cnt], number); } /* Update inode bytes */ inode_claim_rsv_space(inode, number); spin_unlock(&dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(i_dquot(inode)); srcu_read_unlock(&dquot_srcu, index); return 0; } @@ -1725,14 +1730,14 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) - dquot_reclaim_reserved_space(inode->i_dquot[cnt], + if (i_dquot(inode)[cnt]) + dquot_reclaim_reserved_space(i_dquot(inode)[cnt], number); } /* Update inode bytes */ inode_reclaim_rsv_space(inode, number); spin_unlock(&dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(i_dquot(inode)); srcu_read_unlock(&dquot_srcu, index); return; } @@ -1745,7 +1750,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot **dquots = inode->i_dquot; + struct dquot **dquots = i_dquot(inode); int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!dquot_active(inode)) { @@ -1784,11 +1789,11 @@ EXPORT_SYMBOL(__dquot_free_space); /* * This operation can block, but only after everything is updated */ -void dquot_free_inode(const struct inode *inode) +void dquot_free_inode(struct inode *inode) { unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; - struct dquot * const *dquots = inode->i_dquot; + struct dquot * const *dquots = i_dquot(inode); int index; if (!dquot_active(inode)) @@ -1865,7 +1870,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) if (!sb_has_quota_active(inode->i_sb, cnt)) continue; is_valid[cnt] = 1; - transfer_from[cnt] = inode->i_dquot[cnt]; + transfer_from[cnt] = i_dquot(inode)[cnt]; ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]); if (ret) goto over_quota; @@ -1901,7 +1906,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) dquot_incr_space(transfer_to[cnt], cur_space); dquot_resv_space(transfer_to[cnt], rsv_space); - inode->i_dquot[cnt] = transfer_to[cnt]; + i_dquot(inode)[cnt] = transfer_to[cnt]; } spin_unlock(&dq_data_lock); @@ -2743,8 +2748,8 @@ static int __init dquot_init(void) for (i = 0; i < nr_hash; i++) INIT_HLIST_HEAD(dquot_hash + i); - printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n", - nr_hash, order, (PAGE_SIZE << order)); + pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld," + " %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order)); register_shrinker(&dqcache_shrinker); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 75621649dbd..2aa4151f99d 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -47,8 +47,11 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd, static void quota_sync_one(struct super_block *sb, void *arg) { - if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, *(int *)arg); + int type = *(int *)arg; + + if (sb->s_qcop && sb->s_qcop->quota_sync && + (sb->s_quota_types & (1 << type))) + sb->s_qcop->quota_sync(sb, type); } static int quota_sync_all(int type) @@ -297,8 +300,14 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS)) return -EINVAL; + /* + * Quota not supported on this fs? Check this before s_quota_types + * since they needn't be set if quota is not supported at all. + */ if (!sb->s_qcop) return -ENOSYS; + if (!(sb->s_quota_types & (1 << type))) + return -EINVAL; ret = check_quotactl_permission(sb, type, cmd, id); if (ret < 0) diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 1894d96ccb7..bb79cddf0a1 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -97,6 +97,10 @@ struct reiserfs_inode_info { #ifdef CONFIG_REISERFS_FS_XATTR struct rw_semaphore i_xattr_sem; #endif +#ifdef CONFIG_QUOTA + struct dquot *i_dquot[MAXQUOTAS]; +#endif + struct inode vfs_inode; }; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f1376c92cf7..ea63ab13ef9 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -594,6 +594,10 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb) return NULL; atomic_set(&ei->openers, 0); mutex_init(&ei->tailpack); +#ifdef CONFIG_QUOTA + memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); +#endif + return &ei->vfs_inode; } @@ -750,6 +754,11 @@ static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t, loff_t); + +static struct dquot **reiserfs_get_dquots(struct inode *inode) +{ + return REISERFS_I(inode)->i_dquot; +} #endif static const struct super_operations reiserfs_sops = { @@ -768,6 +777,7 @@ static const struct super_operations reiserfs_sops = { #ifdef CONFIG_QUOTA .quota_read = reiserfs_quota_read, .quota_write = reiserfs_quota_write, + .get_dquots = reiserfs_get_dquots, #endif }; @@ -1633,6 +1643,7 @@ static int read_super_block(struct super_block *s, int offset) #ifdef CONFIG_QUOTA s->s_qcop = &reiserfs_qctl_operations; s->dq_op = &reiserfs_quota_operations; + s->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif /* diff --git a/fs/udf/super.c b/fs/udf/super.c index e229315bbf7..3ccb2f11fc7 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2082,12 +2082,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) mutex_init(&sbi->s_alloc_mutex); if (!udf_parse_options((char *)options, &uopt, false)) - goto error_out; + goto parse_options_failure; if (uopt.flags & (1 << UDF_FLAG_UTF8) && uopt.flags & (1 << UDF_FLAG_NLS_MAP)) { udf_err(sb, "utf8 cannot be combined with iocharset\n"); - goto error_out; + goto parse_options_failure; } #ifdef CONFIG_UDF_NLS if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) { @@ -2237,8 +2237,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) return 0; error_out: - if (sbi->s_vat_inode) - iput(sbi->s_vat_inode); + iput(sbi->s_vat_inode); +parse_options_failure: #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) unload_nls(sbi->s_nls_map); @@ -2291,8 +2291,7 @@ static void udf_put_super(struct super_block *sb) sbi = UDF_SB(sb); - if (sbi->s_vat_inode) - iput(sbi->s_vat_inode); + iput(sbi->s_vat_inode); #ifdef CONFIG_UDF_NLS if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) unload_nls(sbi->s_nls_map); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 9f622feda6a..206b97fd1d8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1425,6 +1425,7 @@ xfs_fs_fill_super( sb->s_export_op = &xfs_export_operations; #ifdef CONFIG_XFS_QUOTA sb->s_qcop = &xfs_quotactl_operations; + sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif sb->s_op = &xfs_super_operations; |