From c9f3f2d8b3247b7e16b3cd66698e690ab4697301 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Thu, 18 Jul 2013 01:29:12 +0900 Subject: doc: Fix typo in doucmentations Correct typo (double words) in documentations. Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- Documentation/filesystems/ext4.txt | 2 +- Documentation/filesystems/nfs/pnfs.txt | 2 +- Documentation/filesystems/relay.txt | 2 +- Documentation/filesystems/sysfs-tagging.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index f7cbf574a87..a92c5aa8ce2 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -2,7 +2,7 @@ Ext4 Filesystem =============== -Ext4 is an an advanced level of the ext3 filesystem which incorporates +Ext4 is an advanced level of the ext3 filesystem which incorporates scalability and reliability enhancements for supporting large filesystems (64 bit) in keeping with increasing disk capacities and state-of-the-art feature requirements. diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt index 52ae07f5f57..adc81a35fe2 100644 --- a/Documentation/filesystems/nfs/pnfs.txt +++ b/Documentation/filesystems/nfs/pnfs.txt @@ -12,7 +12,7 @@ struct pnfs_layout_hdr ---------------------- The on-the-wire command LAYOUTGET corresponds to struct pnfs_layout_segment, usually referred to by the variable name lseg. -Each nfs_inode may hold a pointer to a cache of of these layout +Each nfs_inode may hold a pointer to a cache of these layout segments in nfsi->layout, of type struct pnfs_layout_hdr. We reference the header for the inode pointing to it, across each diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt index 510b722667a..33e2f369473 100644 --- a/Documentation/filesystems/relay.txt +++ b/Documentation/filesystems/relay.txt @@ -31,7 +31,7 @@ Semantics Each relay channel has one buffer per CPU, each buffer has one or more sub-buffers. Messages are written to the first sub-buffer until it is -too full to contain a new message, in which case it it is written to +too full to contain a new message, in which case it is written to the next (if available). Messages are never split across sub-buffers. At this point, userspace can be notified so it empties the first sub-buffer, while the kernel continues writing to the next. diff --git a/Documentation/filesystems/sysfs-tagging.txt b/Documentation/filesystems/sysfs-tagging.txt index caaaf1266d8..eb843e49c5a 100644 --- a/Documentation/filesystems/sysfs-tagging.txt +++ b/Documentation/filesystems/sysfs-tagging.txt @@ -24,7 +24,7 @@ flag between KOBJ_NS_TYPE_NONE and KOBJ_NS_TYPES, and s_ns will point to the namespace to which it belongs. Each sysfs superblock's sysfs_super_info contains an array void -*ns[KOBJ_NS_TYPES]. When a a task in a tagging namespace +*ns[KOBJ_NS_TYPES]. When a task in a tagging namespace kobj_nstype first mounts sysfs, a new superblock is created. It will be differentiated from other sysfs mounts by having its s_fs_info->ns[kobj_nstype] set to the new namespace. Note that -- cgit v1.2.3-70-g09d2 From d51a7fba254b48aa7090a74d6b1455b6c41bc889 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Thu, 4 Jul 2013 17:12:47 +0900 Subject: f2fs: add description for fsck.f2fs and dump.f2fs This patch adds some description on fsck.f2fs and dump.f2fs which is recently merged into f2fs-tools. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 43 ++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b91e2f26b67..0500c198cd0 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -18,8 +18,8 @@ according to its internal geometry or flash memory management scheme, namely FTL F2FS and its tools support various parameters not only for configuring on-disk layout, but also for selecting allocation and cleaning algorithms. -The file system formatting tool, "mkfs.f2fs", is available from the following -git tree: +The following git tree provides the file system formatting tool (mkfs.f2fs), +a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). >> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git For reporting bugs and sending patches, please use the following mailing list: @@ -149,8 +149,12 @@ USAGE # mkfs.f2fs -l label /dev/block_device # mount -t f2fs /dev/block_device /mnt/f2fs -Format options --------------- +mkfs.f2fs +--------- +The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, +which builds a basic on-disk layout. + +The options consist of: -l [label] : Give a volume label, up to 512 unicode name. -a [0 or 1] : Split start location of each area for heap-based allocation. 1 is set by default, which performs this. @@ -164,6 +168,37 @@ Format options -t [0 or 1] : Disable discard command or not. 1 is set by default, which conducts discard. +fsck.f2fs +--------- +The fsck.f2fs is a tool to check the consistency of an f2fs-formatted +partition, which examines whether the filesystem metadata and user-made data +are cross-referenced correctly or not. +Note that, initial version of the tool does not fix any inconsistency. + +The options consist of: + -d debug level [default:0] + +dump.f2fs +--------- +The dump.f2fs shows the information of specific inode and dumps SSA and SIT to +file. Each file is dump_ssa and dump_sit. + +The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. +It shows on-disk inode information reconized by a given inode number, and is +able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and +./dump_sit respectively. + +The options consist of: + -d debug level [default:0] + -i inode no (hex) + -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] + -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] + +Examples: +# dump.f2fs -i [ino] /dev/sdx +# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) +# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) + ================================================================================ DESIGN ================================================================================ -- cgit v1.2.3-70-g09d2 From cf7eff4666629de006c5ed78de79e40f483c3b06 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 31 Jul 2013 14:33:00 -0500 Subject: ext3: allow specifying external journal by pathname mount option It's always been a hassle that if an external journal's device number changes, the filesystem won't mount. And since boot-time enumeration can change, device number changes aren't unusual. The current mechanism to update the journal location is by passing in a mount option w/ a new devnum, but that's a hassle; it's a manual approach, fixing things after the fact. Adding a mount option, "-o journal_path=/dev/$DEVICE" would help, since then we can do i.e. # mount -o journal_path=/dev/disk/by-label/$JOURNAL_LABEL ... and it'll mount even if the devnum has changed, as shown here: # losetup /dev/loop0 journalfile # mke2fs -L mylabel-journal -O journal_dev /dev/loop0 # mkfs.ext3 -L mylabel -J device=/dev/loop0 /dev/sdb1 Change the journal device number: # losetup -d /dev/loop0 # losetup /dev/loop1 journalfile And today it will fail: # mount /dev/sdb1 /mnt/test mount: wrong fs type, bad option, bad superblock on /dev/sdb1, missing codepage or helper program, or other error In some cases useful info is found in syslog - try dmesg | tail or so # dmesg | tail -n 1 [17343.240702] EXT3-fs (sdb1): error: couldn't read superblock of external journal But with this new mount option, we can specify the new path: # mount -o journal_path=/dev/loop1 /dev/sdb1 /mnt/test # (which does update the encoded device number, incidentally): # umount /dev/sdb1 # dumpe2fs -h /dev/sdb1 | grep "Journal device" dumpe2fs 1.41.12 (17-May-2010) Journal device: 0x0701 But best of all we can just always mount by journal-path, and it'll always work: # mount -o journal_path=/dev/disk/by-label/mylabel-journal /dev/sdb1 /mnt/test # So the journal_path option can be specified in fstab, and as long as the disk is available somewhere, and findable by label (or by UUID), we can mount. Signed-off-by: Eric Sandeen Signed-off-by: Jan Kara --- Documentation/filesystems/ext3.txt | 7 ++++--- fs/ext3/super.c | 43 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 293855e9500..7ed0d17d672 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt @@ -26,11 +26,12 @@ journal=inum When a journal already exists, this option is ignored. Otherwise, it specifies the number of the inode which will represent the ext3 file system's journal file. +journal_path=path journal_dev=devnum When the external journal device's major/minor numbers - have changed, this option allows the user to specify + have changed, these options allow the user to specify the new journal location. The journal device is - identified through its new major/minor numbers encoded - in devnum. + identified through either its new major/minor numbers + encoded in devnum, or via a path to the device. norecovery Don't load the journal on mounting. Note that this forces noload mount of inconsistent filesystem, which can lead to diff --git a/fs/ext3/super.c b/fs/ext3/super.c index c47f1475072..c50c7619037 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -819,6 +820,7 @@ enum { Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, + Opt_journal_path, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, @@ -860,6 +862,7 @@ static const match_table_t tokens = { {Opt_journal_update, "journal=update"}, {Opt_journal_inum, "journal=%u"}, {Opt_journal_dev, "journal_dev=%u"}, + {Opt_journal_path, "journal_path=%s"}, {Opt_abort, "abort"}, {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, @@ -975,6 +978,11 @@ static int parse_options (char *options, struct super_block *sb, int option; kuid_t uid; kgid_t gid; + char *journal_path; + struct inode *journal_inode; + struct path path; + int error; + #ifdef CONFIG_QUOTA int qfmt; #endif @@ -1129,6 +1137,41 @@ static int parse_options (char *options, struct super_block *sb, return 0; *journal_devnum = option; break; + case Opt_journal_path: + if (is_remount) { + ext3_msg(sb, KERN_ERR, "error: cannot specify " + "journal on remount"); + return 0; + } + + journal_path = match_strdup(&args[0]); + if (!journal_path) { + ext3_msg(sb, KERN_ERR, "error: could not dup " + "journal device string"); + return 0; + } + + error = kern_path(journal_path, LOOKUP_FOLLOW, &path); + if (error) { + ext3_msg(sb, KERN_ERR, "error: could not find " + "journal device path: error %d", error); + kfree(journal_path); + return 0; + } + + journal_inode = path.dentry->d_inode; + if (!S_ISBLK(journal_inode->i_mode)) { + ext3_msg(sb, KERN_ERR, "error: journal path %s " + "is not a block device", journal_path); + path_put(&path); + kfree(journal_path); + return 0; + } + + *journal_devnum = new_encode_dev(journal_inode->i_rdev); + path_put(&path); + kfree(journal_path); + break; case Opt_noload: set_opt (sbi->s_mount_opt, NOLOAD); break; -- cgit v1.2.3-70-g09d2 From b59d0bae6ca30c496f298881616258f9cde0d9c6 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 4 Aug 2013 23:09:40 +0900 Subject: f2fs: add sysfs support for controlling the gc_thread Add sysfs entries to control the timing parameters for f2fs gc thread. Various Sysfs options introduced are: gc_min_sleep_time: Min Sleep time for GC in ms gc_max_sleep_time: Max Sleep time for GC in ms gc_no_gc_sleep_time: Default Sleep time for GC in ms Cc: Gu Zheng Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Reviewed-by: Gu Zheng [Jaegeuk Kim: fix an umount bug and some minor changes] Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 20 ++++++ Documentation/filesystems/f2fs.txt | 26 +++++++ fs/f2fs/f2fs.h | 4 ++ fs/f2fs/gc.c | 17 +++-- fs/f2fs/gc.h | 33 +++++---- fs/f2fs/super.c | 123 ++++++++++++++++++++++++++++++++ 6 files changed, 203 insertions(+), 20 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-fs-f2fs (limited to 'Documentation/filesystems') diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs new file mode 100644 index 00000000000..98e53a09530 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -0,0 +1,20 @@ +What: /sys/fs/f2fs//gc_max_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the maximun sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_min_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the minimum sleep time for gc_thread. Time + is in milliseconds. + +What: /sys/fs/f2fs//gc_no_gc_sleep_time +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the default sleep time for gc_thread. Time + is in milliseconds. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 0500c198cd0..5daf3bb2eef 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -132,6 +132,32 @@ f2fs. Each file shows the whole f2fs information. - average SIT information about whole segments - current memory footprint consumed by f2fs. +================================================================================ +SYSFS ENTRIES +================================================================================ + +Information about mounted f2f2 file systems can be found in +/sys/fs/f2fs. Each mounted filesystem will have a directory in +/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). +The files in each per-device directory are shown in table below. + +Files in /sys/fs/f2fs/ +(see also Documentation/ABI/testing/sysfs-fs-f2fs) +.............................................................................. + File Content + + gc_max_sleep_time This tuning parameter controls the maximum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_min_sleep_time This tuning parameter controls the minimum sleep + time for the garbage collection thread. Time is + in milliseconds. + + gc_no_gc_sleep_time This tuning parameter controls the default sleep + time for the garbage collection thread. Time is + in milliseconds. + ================================================================================ USAGE ================================================================================ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 78777cdb89d..63813befdd8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -430,6 +430,10 @@ struct f2fs_sb_info { #endif unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ + + /* For sysfs suppport */ + struct kobject s_kobj; + struct completion s_kobj_unregister; }; /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 35f9b1a196a..60d4f674efa 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -29,10 +29,11 @@ static struct kmem_cache *winode_slab; static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; long wait_ms; - wait_ms = GC_THREAD_MIN_SLEEP_TIME; + wait_ms = gc_th->min_sleep_time; do { if (try_to_freeze()) @@ -45,7 +46,7 @@ static int gc_thread_func(void *data) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { - wait_ms = GC_THREAD_MAX_SLEEP_TIME; + wait_ms = increase_sleep_time(gc_th, wait_ms); continue; } @@ -66,15 +67,15 @@ static int gc_thread_func(void *data) continue; if (!is_idle(sbi)) { - wait_ms = increase_sleep_time(wait_ms); + wait_ms = increase_sleep_time(gc_th, wait_ms); mutex_unlock(&sbi->gc_mutex); continue; } if (has_enough_invalid_blocks(sbi)) - wait_ms = decrease_sleep_time(wait_ms); + wait_ms = decrease_sleep_time(gc_th, wait_ms); else - wait_ms = increase_sleep_time(wait_ms); + wait_ms = increase_sleep_time(gc_th, wait_ms); #ifdef CONFIG_F2FS_STAT_FS sbi->bg_gc++; @@ -82,7 +83,7 @@ static int gc_thread_func(void *data) /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) - wait_ms = GC_THREAD_NOGC_SLEEP_TIME; + wait_ms = gc_th->no_gc_sleep_time; } while (!kthread_should_stop()); return 0; } @@ -101,6 +102,10 @@ int start_gc_thread(struct f2fs_sb_info *sbi) goto out; } + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 2c6a6bd0832..f4bf44c9ded 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -13,9 +13,9 @@ * whether IO subsystem is idle * or not */ -#define GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ -#define GC_THREAD_MAX_SLEEP_TIME 60000 -#define GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */ +#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ @@ -25,6 +25,11 @@ struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; wait_queue_head_t gc_wait_queue_head; + + /* for gc sleep time */ + unsigned int min_sleep_time; + unsigned int max_sleep_time; + unsigned int no_gc_sleep_time; }; struct inode_entry { @@ -56,25 +61,25 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi) return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100; } -static inline long increase_sleep_time(long wait) +static inline long increase_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) { - if (wait == GC_THREAD_NOGC_SLEEP_TIME) + if (wait == gc_th->no_gc_sleep_time) return wait; - wait += GC_THREAD_MIN_SLEEP_TIME; - if (wait > GC_THREAD_MAX_SLEEP_TIME) - wait = GC_THREAD_MAX_SLEEP_TIME; + wait += gc_th->min_sleep_time; + if (wait > gc_th->max_sleep_time) + wait = gc_th->max_sleep_time; return wait; } -static inline long decrease_sleep_time(long wait) +static inline long decrease_sleep_time(struct f2fs_gc_kthread *gc_th, long wait) { - if (wait == GC_THREAD_NOGC_SLEEP_TIME) - wait = GC_THREAD_MAX_SLEEP_TIME; + if (wait == gc_th->no_gc_sleep_time) + wait = gc_th->max_sleep_time; - wait -= GC_THREAD_MIN_SLEEP_TIME; - if (wait <= GC_THREAD_MIN_SLEEP_TIME) - wait = GC_THREAD_MIN_SLEEP_TIME; + wait -= gc_th->min_sleep_time; + if (wait <= gc_th->min_sleep_time) + wait = gc_th->min_sleep_time; return wait; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 70dbb313a7c..e161a24fbf3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -23,17 +23,21 @@ #include #include #include +#include +#include #include "f2fs.h" #include "node.h" #include "segment.h" #include "xattr.h" +#include "gc.h" #define CREATE_TRACE_POINTS #include static struct proc_dir_entry *f2fs_proc_root; static struct kmem_cache *f2fs_inode_cachep; +static struct kset *f2fs_kset; enum { Opt_gc_background, @@ -59,6 +63,111 @@ static match_table_t f2fs_tokens = { {Opt_err, NULL}, }; +/* Sysfs support for f2fs */ +struct f2fs_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); + ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *, + const char *, size_t); + int offset; +}; + +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned int *ui; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + return snprintf(buf, PAGE_SIZE, "%u\n", *ui); +} + +static ssize_t f2fs_sbi_store(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, + const char *buf, size_t count) +{ + struct f2fs_gc_kthread *gc_kth = sbi->gc_thread; + unsigned long t; + unsigned int *ui; + ssize_t ret; + + if (!gc_kth) + return -EINVAL; + + ui = (unsigned int *)(((char *)gc_kth) + a->offset); + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret < 0) + return ret; + *ui = t; + return count; +} + +static ssize_t f2fs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->show ? a->show(a, sbi, buf) : 0; +} + +static ssize_t f2fs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + struct f2fs_attr *a = container_of(attr, struct f2fs_attr, attr); + + return a->store ? a->store(a, sbi, buf, len) : 0; +} + +static void f2fs_sb_release(struct kobject *kobj) +{ + struct f2fs_sb_info *sbi = container_of(kobj, struct f2fs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +#define F2FS_ATTR_OFFSET(_name, _mode, _show, _store, _elname) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + .offset = offsetof(struct f2fs_gc_kthread, _elname), \ +} + +#define F2FS_RW_ATTR(name, elname) \ + F2FS_ATTR_OFFSET(name, 0644, f2fs_sbi_show, f2fs_sbi_store, elname) + +F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); +F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); +F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); + +#define ATTR_LIST(name) (&f2fs_attr_##name.attr) +static struct attribute *f2fs_attrs[] = { + ATTR_LIST(gc_min_sleep_time), + ATTR_LIST(gc_max_sleep_time), + ATTR_LIST(gc_no_gc_sleep_time), + NULL, +}; + +static const struct sysfs_ops f2fs_attr_ops = { + .show = f2fs_attr_show, + .store = f2fs_attr_store, +}; + +static struct kobj_type f2fs_ktype = { + .default_attrs = f2fs_attrs, + .sysfs_ops = &f2fs_attr_ops, + .release = f2fs_sb_release, +}; + void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) { struct va_format vaf; @@ -229,6 +338,7 @@ static void f2fs_put_super(struct super_block *sb) remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry(sb->s_id, f2fs_proc_root); } + kobject_del(&sbi->s_kobj); f2fs_destroy_stats(sbi); stop_gc_thread(sbi); @@ -243,6 +353,8 @@ static void f2fs_put_super(struct super_block *sb) destroy_segment_manager(sbi); kfree(sbi->ckpt); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); sb->s_fs_info = NULL; brelse(sbi->raw_super_buf); @@ -818,6 +930,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) "the device does not support discard"); } + sbi->s_kobj.kset = f2fs_kset; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, + "%s", sb->s_id); + if (err) + goto fail; + return 0; fail: stop_gc_thread(sbi); @@ -892,6 +1011,9 @@ static int __init init_f2fs_fs(void) err = create_checkpoint_caches(); if (err) goto fail; + f2fs_kset = kset_create_and_add("f2fs", NULL, fs_kobj); + if (!f2fs_kset) + goto fail; err = register_filesystem(&f2fs_fs_type); if (err) goto fail; @@ -910,6 +1032,7 @@ static void __exit exit_f2fs_fs(void) destroy_gc_caches(); destroy_node_manager_caches(); destroy_inodecache(); + kset_unregister(f2fs_kset); } module_init(init_f2fs_fs) -- cgit v1.2.3-70-g09d2 From d2dc095f4280ad5fdea33769e8e119fd16648426 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 4 Aug 2013 23:10:15 +0900 Subject: f2fs: add sysfs entries to select the gc policy Add sysfs entry gc_idle to control the gc policy. Where gc_idle = 1 corresponds to selecting a cost benefit approach, while gc_idle = 2 corresponds to selecting a greedy approach to garbage collection. The selection is mutually exclusive one approach will work at any point. If gc_idle = 0, then this option is disabled. Cc: Gu Zheng Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Reviewed-by: Gu Zheng [Jaegeuk Kim: change the select_gc_type() flow slightly] Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 6 ++++++ Documentation/filesystems/f2fs.txt | 6 ++++++ fs/f2fs/gc.c | 16 +++++++++++++--- fs/f2fs/gc.h | 3 +++ fs/f2fs/super.c | 2 ++ 5 files changed, 30 insertions(+), 3 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 98e53a09530..31942efcaf0 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -18,3 +18,9 @@ Contact: "Namjae Jeon" Description: Controls the default sleep time for gc_thread. Time is in milliseconds. + +What: /sys/fs/f2fs//gc_idle +Date: July 2013 +Contact: "Namjae Jeon" +Description: + Controls the victim selection policy for garbage collection. diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 5daf3bb2eef..3cd27bed634 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -158,6 +158,12 @@ Files in /sys/fs/f2fs/ time for the garbage collection thread. Time is in milliseconds. + gc_idle This parameter controls the selection of victim + policy for garbage collection. Setting gc_idle = 0 + (default) will disable this option. Setting + gc_idle = 1 will select the Cost Benefit approach + & setting gc_idle = 2 will select the greedy aproach. + ================================================================================ USAGE ================================================================================ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 60d4f674efa..d286d8be8e6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -106,6 +106,8 @@ int start_gc_thread(struct f2fs_sb_info *sbi) gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->gc_idle = 0; + sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, @@ -130,9 +132,17 @@ void stop_gc_thread(struct f2fs_sb_info *sbi) sbi->gc_thread = NULL; } -static int select_gc_type(int gc_type) +static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type) { - return (gc_type == BG_GC) ? GC_CB : GC_GREEDY; + int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY; + + if (gc_th && gc_th->gc_idle) { + if (gc_th->gc_idle == 1) + gc_mode = GC_CB; + else if (gc_th->gc_idle == 2) + gc_mode = GC_GREEDY; + } + return gc_mode; } static void select_policy(struct f2fs_sb_info *sbi, int gc_type, @@ -145,7 +155,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->dirty_segmap = dirty_i->dirty_segmap[type]; p->ofs_unit = 1; } else { - p->gc_mode = select_gc_type(gc_type); + p->gc_mode = select_gc_type(sbi->gc_thread, gc_type); p->dirty_segmap = dirty_i->dirty_segmap[DIRTY]; p->ofs_unit = sbi->segs_per_sec; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index f4bf44c9ded..c22dee9f142 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -30,6 +30,9 @@ struct f2fs_gc_kthread { unsigned int min_sleep_time; unsigned int max_sleep_time; unsigned int no_gc_sleep_time; + + /* for changing gc mode */ + unsigned int gc_idle; }; struct inode_entry { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e161a24fbf3..94c0e204954 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -148,12 +148,14 @@ static struct f2fs_attr f2fs_attr_##_name = { \ F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time); F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +F2FS_RW_ATTR(gc_idle, gc_idle); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_min_sleep_time), ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_idle), NULL, }; -- cgit v1.2.3-70-g09d2 From 9ed354b732689c9dbff6820ce57196e7e499c1fc Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 20 Aug 2013 20:33:17 +0900 Subject: doc: filesystems : Fix typo in Documentations/filesystems Correct spelling typo in Documentations/filesystems. Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- Documentation/filesystems/btrfs.txt | 2 +- Documentation/filesystems/nfs/Exporting | 2 +- Documentation/filesystems/qnx6.txt | 2 +- Documentation/filesystems/xfs.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt index b349d57b76e..9dae5940743 100644 --- a/Documentation/filesystems/btrfs.txt +++ b/Documentation/filesystems/btrfs.txt @@ -87,7 +87,7 @@ Unless otherwise specified, all options default to off. device= Specify a device during mount so that ioctls on the control device - can be avoided. Especialy useful when trying to mount a multi-device + can be avoided. Especially useful when trying to mount a multi-device setup as root. May be specified multiple times for multiple devices. discard diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting index 09994c24728..e543b1a619c 100644 --- a/Documentation/filesystems/nfs/Exporting +++ b/Documentation/filesystems/nfs/Exporting @@ -93,7 +93,7 @@ For a filesystem to be exportable it must: 2/ make sure that d_splice_alias is used rather than d_add when ->lookup finds an inode for a given parent and name. - If inode is NULL, d_splice_alias(inode, dentry) is eqivalent to + If inode is NULL, d_splice_alias(inode, dentry) is equivalent to d_add(dentry, inode), NULL diff --git a/Documentation/filesystems/qnx6.txt b/Documentation/filesystems/qnx6.txt index 99e90184a72..40867978913 100644 --- a/Documentation/filesystems/qnx6.txt +++ b/Documentation/filesystems/qnx6.txt @@ -149,7 +149,7 @@ Bitmap system area ------------------ The bitmap itself is divided into three parts. -First the system area, that is split into two halfs. +First the system area, that is split into two halves. Then userspace. The requirement for a static, fixed preallocated system area comes from how diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 12525b17d9e..5be51fd888b 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -135,7 +135,7 @@ default behaviour. If the memory cost of 8 log buffers is too high on small systems, then it may be reduced at some cost to performance on metadata intensive workloads. The logbsize option below - controls the size of each buffer and so is also relevent to + controls the size of each buffer and so is also relevant to this case. logbsize=value -- cgit v1.2.3-70-g09d2 From ad4eec613536dc7e5ea0c6e59849e6edca634d8b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 28 Aug 2013 19:05:07 -0400 Subject: ext4: allow specifying external journal by pathname mount option It's always been a hassle that if an external journal's device number changes, the filesystem won't mount. And since boot-time enumeration can change, device number changes aren't unusual. The current mechanism to update the journal location is by passing in a mount option w/ a new devnum, but that's a hassle; it's a manual approach, fixing things after the fact. Adding a mount option, "-o journal_path=/dev/$DEVICE" would help, since then we can do i.e. # mount -o journal_path=/dev/disk/by-label/$JOURNAL_LABEL ... and it'll mount even if the devnum has changed, as shown here: # losetup /dev/loop0 journalfile # mke2fs -L mylabel-journal -O journal_dev /dev/loop0 # mkfs.ext4 -L mylabel -J device=/dev/loop0 /dev/sdb1 Change the journal device number: # losetup -d /dev/loop0 # losetup /dev/loop1 journalfile And today it will fail: # mount /dev/sdb1 /mnt/test mount: wrong fs type, bad option, bad superblock on /dev/sdb1, missing codepage or helper program, or other error In some cases useful info is found in syslog - try dmesg | tail or so # dmesg | tail -n 1 [17343.240702] EXT4-fs (sdb1): error: couldn't read superblock of external journal But with this new mount option, we can specify the new path: # mount -o journal_path=/dev/loop1 /dev/sdb1 /mnt/test # (which does update the encoded device number, incidentally): # umount /dev/sdb1 # dumpe2fs -h /dev/sdb1 | grep "Journal device" dumpe2fs 1.41.12 (17-May-2010) Journal device: 0x0701 But best of all we can just always mount by journal-path, and it'll always work: # mount -o journal_path=/dev/disk/by-label/mylabel-journal /dev/sdb1 /mnt/test # So the journal_path option can be specified in fstab, and as long as the disk is available somewhere, and findable by label (or by UUID), we can mount. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" Reviewed-by: Jan Kara Reviewed-by: Carlos Maiolino --- Documentation/filesystems/ext4.txt | 7 +++--- fs/ext4/super.c | 47 +++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 6 deletions(-) (limited to 'Documentation/filesystems') diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index f7cbf574a87..b91cfaaf6a0 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -144,11 +144,12 @@ journal_async_commit Commit block can be written to disk without waiting mount the device. This will enable 'journal_checksum' internally. +journal_path=path journal_dev=devnum When the external journal device's major/minor numbers - have changed, this option allows the user to specify + have changed, these options allow the user to specify the new journal location. The journal device is - identified through its new major/minor numbers encoded - in devnum. + identified through either its new major/minor numbers + encoded in devnum, or via a path to the device. norecovery Don't load the journal on mounting. Note that noload if the filesystem was not unmounted cleanly, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b59373b625e..42337141e79 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1134,8 +1134,8 @@ enum { Opt_nouid32, Opt_debug, Opt_removed, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, - Opt_commit, Opt_min_batch_time, Opt_max_batch_time, - Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, + Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, + Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, @@ -1179,6 +1179,7 @@ static const match_table_t tokens = { {Opt_min_batch_time, "min_batch_time=%u"}, {Opt_max_batch_time, "max_batch_time=%u"}, {Opt_journal_dev, "journal_dev=%u"}, + {Opt_journal_path, "journal_path=%s"}, {Opt_journal_checksum, "journal_checksum"}, {Opt_journal_async_commit, "journal_async_commit"}, {Opt_abort, "abort"}, @@ -1338,6 +1339,7 @@ static int clear_qf_name(struct super_block *sb, int qtype) #define MOPT_NO_EXT2 0x0100 #define MOPT_NO_EXT3 0x0200 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3) +#define MOPT_STRING 0x0400 static const struct mount_opts { int token; @@ -1387,6 +1389,7 @@ static const struct mount_opts { {Opt_resuid, 0, MOPT_GTE0}, {Opt_resgid, 0, MOPT_GTE0}, {Opt_journal_dev, 0, MOPT_GTE0}, + {Opt_journal_path, 0, MOPT_STRING}, {Opt_journal_ioprio, 0, MOPT_GTE0}, {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, @@ -1480,7 +1483,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, return -1; } - if (args->from && match_int(args, &arg)) + if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) return -1; if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) return -1; @@ -1544,6 +1547,44 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, return -1; } *journal_devnum = arg; + } else if (token == Opt_journal_path) { + char *journal_path; + struct inode *journal_inode; + struct path path; + int error; + + if (is_remount) { + ext4_msg(sb, KERN_ERR, + "Cannot specify journal on remount"); + return -1; + } + journal_path = match_strdup(&args[0]); + if (!journal_path) { + ext4_msg(sb, KERN_ERR, "error: could not dup " + "journal device string"); + return -1; + } + + error = kern_path(journal_path, LOOKUP_FOLLOW, &path); + if (error) { + ext4_msg(sb, KERN_ERR, "error: could not find " + "journal device path: error %d", error); + kfree(journal_path); + return -1; + } + + journal_inode = path.dentry->d_inode; + if (!S_ISBLK(journal_inode->i_mode)) { + ext4_msg(sb, KERN_ERR, "error: journal path %s " + "is not a block device", journal_path); + path_put(&path); + kfree(journal_path); + return -1; + } + + *journal_devnum = new_encode_dev(journal_inode->i_rdev); + path_put(&path); + kfree(journal_path); } else if (token == Opt_journal_ioprio) { if (arg > 7) { ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" -- cgit v1.2.3-70-g09d2