From b59d0bae6ca30c496f298881616258f9cde0d9c6 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Sun, 4 Aug 2013 23:09:40 +0900
Subject: f2fs: add sysfs support for controlling the gc_thread

Add sysfs entries to control the timing parameters for
f2fs gc thread.

Various Sysfs options introduced are:
gc_min_sleep_time: Min Sleep time for GC in ms
gc_max_sleep_time: Max Sleep time for GC in ms
gc_no_gc_sleep_time: Default Sleep time for GC in ms

Cc: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Pankaj Kumar <pankaj.km@samsung.com>
Reviewed-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: fix an umount bug and some minor changes]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/gc.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'fs/f2fs/gc.c')

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 35f9b1a196a..60d4f674efa 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -29,10 +29,11 @@ static struct kmem_cache *winode_slab;
 static int gc_thread_func(void *data)
 {
 	struct f2fs_sb_info *sbi = data;
+	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
 	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
 	long wait_ms;
 
-	wait_ms = GC_THREAD_MIN_SLEEP_TIME;
+	wait_ms = gc_th->min_sleep_time;
 
 	do {
 		if (try_to_freeze())
@@ -45,7 +46,7 @@ static int gc_thread_func(void *data)
 			break;
 
 		if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
-			wait_ms = GC_THREAD_MAX_SLEEP_TIME;
+			wait_ms = increase_sleep_time(gc_th, wait_ms);
 			continue;
 		}
 
@@ -66,15 +67,15 @@ static int gc_thread_func(void *data)
 			continue;
 
 		if (!is_idle(sbi)) {
-			wait_ms = increase_sleep_time(wait_ms);
+			wait_ms = increase_sleep_time(gc_th, wait_ms);
 			mutex_unlock(&sbi->gc_mutex);
 			continue;
 		}
 
 		if (has_enough_invalid_blocks(sbi))
-			wait_ms = decrease_sleep_time(wait_ms);
+			wait_ms = decrease_sleep_time(gc_th, wait_ms);
 		else
-			wait_ms = increase_sleep_time(wait_ms);
+			wait_ms = increase_sleep_time(gc_th, wait_ms);
 
 #ifdef CONFIG_F2FS_STAT_FS
 		sbi->bg_gc++;
@@ -82,7 +83,7 @@ static int gc_thread_func(void *data)
 
 		/* if return value is not zero, no victim was selected */
 		if (f2fs_gc(sbi))
-			wait_ms = GC_THREAD_NOGC_SLEEP_TIME;
+			wait_ms = gc_th->no_gc_sleep_time;
 	} while (!kthread_should_stop());
 	return 0;
 }
@@ -101,6 +102,10 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
 		goto out;
 	}
 
+	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
+	gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
+	gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
+
 	sbi->gc_thread = gc_th;
 	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
 	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
-- 
cgit v1.2.3-70-g09d2


From d2dc095f4280ad5fdea33769e8e119fd16648426 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Sun, 4 Aug 2013 23:10:15 +0900
Subject: f2fs: add sysfs entries to select the gc policy

Add sysfs entry gc_idle to control the gc policy. Where
gc_idle = 1 corresponds to selecting a cost benefit approach,
while gc_idle = 2 corresponds to selecting a greedy approach
to garbage collection. The selection is mutually exclusive one
approach will work at any point. If gc_idle = 0, then this
option is disabled.

Cc: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Pankaj Kumar <pankaj.km@samsung.com>
Reviewed-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
[Jaegeuk Kim: change the select_gc_type() flow slightly]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  6 ++++++
 Documentation/filesystems/f2fs.txt      |  6 ++++++
 fs/f2fs/gc.c                            | 16 +++++++++++++---
 fs/f2fs/gc.h                            |  3 +++
 fs/f2fs/super.c                         |  2 ++
 5 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'fs/f2fs/gc.c')

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 98e53a09530..31942efcaf0 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -18,3 +18,9 @@ Contact:	"Namjae Jeon" <namjae.jeon@samsung.com>
 Description:
 		 Controls the default sleep time for gc_thread. Time
 		 is in milliseconds.
+
+What:		/sys/fs/f2fs/<disk>/gc_idle
+Date:		July 2013
+Contact:	"Namjae Jeon" <namjae.jeon@samsung.com>
+Description:
+		 Controls the victim selection policy for garbage collection.
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 5daf3bb2eef..3cd27bed634 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -158,6 +158,12 @@ Files in /sys/fs/f2fs/<devname>
                               time for the garbage collection thread. Time is
                               in milliseconds.
 
+ gc_idle                      This parameter controls the selection of victim
+                              policy for garbage collection. Setting gc_idle = 0
+                              (default) will disable this option. Setting
+                              gc_idle = 1 will select the Cost Benefit approach
+                              & setting gc_idle = 2 will select the greedy aproach.
+
 ================================================================================
 USAGE
 ================================================================================
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 60d4f674efa..d286d8be8e6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -106,6 +106,8 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
 	gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
 	gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
 
+	gc_th->gc_idle = 0;
+
 	sbi->gc_thread = gc_th;
 	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
 	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
@@ -130,9 +132,17 @@ void stop_gc_thread(struct f2fs_sb_info *sbi)
 	sbi->gc_thread = NULL;
 }
 
-static int select_gc_type(int gc_type)
+static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
 {
-	return (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+
+	if (gc_th && gc_th->gc_idle) {
+		if (gc_th->gc_idle == 1)
+			gc_mode = GC_CB;
+		else if (gc_th->gc_idle == 2)
+			gc_mode = GC_GREEDY;
+	}
+	return gc_mode;
 }
 
 static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
@@ -145,7 +155,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 		p->dirty_segmap = dirty_i->dirty_segmap[type];
 		p->ofs_unit = 1;
 	} else {
-		p->gc_mode = select_gc_type(gc_type);
+		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
 		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
 		p->ofs_unit = sbi->segs_per_sec;
 	}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index f4bf44c9ded..c22dee9f142 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -30,6 +30,9 @@ struct f2fs_gc_kthread {
 	unsigned int min_sleep_time;
 	unsigned int max_sleep_time;
 	unsigned int no_gc_sleep_time;
+
+	/* for changing gc mode */
+	unsigned int gc_idle;
 };
 
 struct inode_entry {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e161a24fbf3..94c0e204954 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -148,12 +148,14 @@ static struct f2fs_attr f2fs_attr_##_name = {			\
 F2FS_RW_ATTR(gc_min_sleep_time, min_sleep_time);
 F2FS_RW_ATTR(gc_max_sleep_time, max_sleep_time);
 F2FS_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time);
+F2FS_RW_ATTR(gc_idle, gc_idle);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(gc_min_sleep_time),
 	ATTR_LIST(gc_max_sleep_time),
 	ATTR_LIST(gc_no_gc_sleep_time),
+	ATTR_LIST(gc_idle),
 	NULL,
 };
 
-- 
cgit v1.2.3-70-g09d2


From a569469e967022d9ceeaa4b73619f96614087d2d Mon Sep 17 00:00:00 2001
From: Jin Xu <jinuxstyle@gmail.com>
Date: Mon, 5 Aug 2013 20:02:04 +0800
Subject: f2fs: fix a deadlock in fsync

This patch fixes a deadlock bug that occurs quite often when there are
concurrent write and fsync on a same file.

Following is the simplified call trace when tasks get hung.

fsync thread:
- f2fs_sync_file
 ...
 - f2fs_write_data_pages
 ...
  - update_extent_cache
  ...
   - update_inode
    - wait_on_page_writeback

bdi writeback thread
- __writeback_single_inode
 - f2fs_write_data_pages
  - mutex_lock(sbi->writepages)

The deadlock happens when the fsync thread waits on a inode page that has
been added to the f2fs' cached bio sbi->bio[NODE], and unfortunately,
no one else could be able to submit the cached bio to block layer for
writeback. This is because the fsync thread already hold a sbi->fs_lock and
the sbi->writepages lock, causing the bdi thread being blocked when attempt
to write data pages for the same inode. At the same time, f2fs_gc thread
does not notice the situation and could not help. Even the sync syscall
gets blocked.

To fix it, we could submit the cached bio first before waiting on a inode page
that is being written back.

Signed-off-by: Jin Xu <jinuxstyle@gmail.com>
[Jaegeuk Kim: add more cases to use f2fs_wait_on_page_writeback]
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c    |  2 +-
 fs/f2fs/f2fs.h    |  3 ++-
 fs/f2fs/gc.c      |  8 ++------
 fs/f2fs/inode.c   |  2 +-
 fs/f2fs/segment.c | 10 ++++++++++
 5 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'fs/f2fs/gc.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f458883af81..a7eb5292572 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -37,7 +37,7 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
 	struct page *node_page = dn->node_page;
 	unsigned int ofs_in_node = dn->ofs_in_node;
 
-	wait_on_page_writeback(node_page);
+	f2fs_wait_on_page_writeback(node_page, NODE, false);
 
 	rn = F2FS_NODE(node_page);
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 63813befdd8..13db10b70e6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1023,7 +1023,8 @@ int npages_for_summary_flush(struct f2fs_sb_info *);
 void allocate_new_segments(struct f2fs_sb_info *);
 struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
 struct bio *f2fs_bio_alloc(struct block_device *, int);
-void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync);
+void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool);
+void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
 void write_meta_page(struct f2fs_sb_info *, struct page *);
 void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int,
 					block_t, block_t *);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d286d8be8e6..e6b3ffd5ff6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -422,8 +422,7 @@ next_step:
 
 		/* set page dirty and write it */
 		if (gc_type == FG_GC) {
-			f2fs_submit_bio(sbi, NODE, true);
-			wait_on_page_writeback(node_page);
+			f2fs_wait_on_page_writeback(node_page, NODE, true);
 			set_page_dirty(node_page);
 		} else {
 			if (!PageWriteback(node_page))
@@ -523,10 +522,7 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type)
 	} else {
 		struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 
-		if (PageWriteback(page)) {
-			f2fs_submit_bio(sbi, DATA, true);
-			wait_on_page_writeback(page);
-		}
+		f2fs_wait_on_page_writeback(page, DATA, true);
 
 		if (clear_page_dirty_for_io(page) &&
 			S_ISDIR(inode->i_mode)) {
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index debf7430804..9ab81e7472c 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -151,7 +151,7 @@ void update_inode(struct inode *inode, struct page *node_page)
 	struct f2fs_node *rn;
 	struct f2fs_inode *ri;
 
-	wait_on_page_writeback(node_page);
+	f2fs_wait_on_page_writeback(node_page, NODE, false);
 
 	rn = F2FS_NODE(node_page);
 	ri = &(rn->i);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9b74ae2137d..68e344f9e04 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -705,6 +705,16 @@ retry:
 	trace_f2fs_submit_write_page(page, blk_addr, type);
 }
 
+void f2fs_wait_on_page_writeback(struct page *page,
+				enum page_type type, bool sync)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
+	if (PageWriteback(page)) {
+		f2fs_submit_bio(sbi, type, sync);
+		wait_on_page_writeback(page);
+	}
+}
+
 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
-- 
cgit v1.2.3-70-g09d2


From de93653fe31fc9439971296842dcd0280f8ab5f4 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Date: Mon, 12 Aug 2013 21:08:03 +0900
Subject: f2fs: reserve the xattr space dynamically

This patch enables the number of direct pointers inside on-disk inode block to
be changed dynamically according to the size of inline xattr space.

The number of direct pointers, ADDRS_PER_INODE, can be changed only if the file
has inline xattr flag.

The number of direct pointers that will be used by inline xattrs is defined as
F2FS_INLINE_XATTR_ADDRS.
Current patch assigns F2FS_INLINE_XATTR_ADDRS to 0 temporarily.

Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c          |  5 +++--
 fs/f2fs/f2fs.h          | 18 ++++++++----------
 fs/f2fs/file.c          |  2 +-
 fs/f2fs/gc.c            |  9 ++++++---
 fs/f2fs/node.c          |  9 +++++----
 fs/f2fs/recovery.c      | 13 +++++++++----
 fs/f2fs/super.c         |  2 +-
 include/linux/f2fs_fs.h | 16 ++++++++++++----
 8 files changed, 45 insertions(+), 29 deletions(-)

(limited to 'fs/f2fs/gc.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 6b328de4172..941f9b9ca3a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -117,7 +117,8 @@ void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
 	block_t start_blkaddr, end_blkaddr;
 
 	BUG_ON(blk_addr == NEW_ADDR);
-	fofs = start_bidx_of_node(ofs_of_node(dn->node_page)) + dn->ofs_in_node;
+	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+							dn->ofs_in_node;
 
 	/* Update the page address in the parent node */
 	__set_data_blkaddr(dn, blk_addr);
@@ -448,7 +449,7 @@ static int get_data_block_ro(struct inode *inode, sector_t iblock,
 		unsigned int end_offset;
 
 		end_offset = IS_INODE(dn.node_page) ?
-				ADDRS_PER_INODE :
+				ADDRS_PER_INODE(F2FS_I(inode)) :
 				ADDRS_PER_BLOCK;
 
 		clear_buffer_new(bh_result);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b82f1419992..0343759ceb4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -300,15 +300,6 @@ struct f2fs_sm_info {
 	unsigned int ovp_segments;	/* # of overprovision segments */
 };
 
-/*
- * For directory operation
- */
-#define	NODE_DIR1_BLOCK		(ADDRS_PER_INODE + 1)
-#define	NODE_DIR2_BLOCK		(ADDRS_PER_INODE + 2)
-#define	NODE_IND1_BLOCK		(ADDRS_PER_INODE + 3)
-#define	NODE_IND2_BLOCK		(ADDRS_PER_INODE + 4)
-#define	NODE_DIND_BLOCK		(ADDRS_PER_INODE + 5)
-
 /*
  * For superblock
  */
@@ -942,6 +933,13 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
 		ri->i_inline |= F2FS_INLINE_XATTR;
 }
 
+static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
+{
+	if (is_inode_flag_set(fi, FI_INLINE_XATTR))
+		return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
+	return DEF_ADDRS_PER_INODE;
+}
+
 static inline int f2fs_readonly(struct super_block *sb)
 {
 	return sb->s_flags & MS_RDONLY;
@@ -1108,7 +1106,7 @@ int do_write_data_page(struct page *);
  */
 int start_gc_thread(struct f2fs_sb_info *);
 void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int);
+block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
 int f2fs_gc(struct f2fs_sb_info *);
 void build_gc_manager(struct f2fs_sb_info *);
 int __init create_gc_caches(void);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index bd4184e3552..02c906971cc 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -290,7 +290,7 @@ static int truncate_blocks(struct inode *inode, u64 from)
 	}
 
 	if (IS_INODE(dn.node_page))
-		count = ADDRS_PER_INODE;
+		count = ADDRS_PER_INODE(F2FS_I(inode));
 	else
 		count = ADDRS_PER_BLOCK;
 
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e6b3ffd5ff6..eb89037e331 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -461,7 +461,7 @@ next_step:
  * as indirect or double indirect node blocks, are given, it must be a caller's
  * bug.
  */
-block_t start_bidx_of_node(unsigned int node_ofs)
+block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
 {
 	unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
 	unsigned int bidx;
@@ -478,7 +478,7 @@ block_t start_bidx_of_node(unsigned int node_ofs)
 		int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
 		bidx = node_ofs - 5 - dec;
 	}
-	return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE;
+	return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
 }
 
 static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -586,7 +586,6 @@ next_step:
 			continue;
 		}
 
-		start_bidx = start_bidx_of_node(nofs);
 		ofs_in_node = le16_to_cpu(entry->ofs_in_node);
 
 		if (phase == 2) {
@@ -594,6 +593,8 @@ next_step:
 			if (IS_ERR(inode))
 				continue;
 
+			start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+
 			data_page = find_data_page(inode,
 					start_bidx + ofs_in_node, false);
 			if (IS_ERR(data_page))
@@ -604,6 +605,8 @@ next_step:
 		} else {
 			inode = find_gc_inode(dni.ino, ilist);
 			if (inode) {
+				start_bidx = start_bidx_of_node(nofs,
+								F2FS_I(inode));
 				data_page = get_lock_data_page(inode,
 						start_bidx + ofs_in_node);
 				if (IS_ERR(data_page))
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 818ff368de8..f0e733b21b2 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -315,9 +315,10 @@ cache:
  * The maximum depth is four.
  * Offset[0] will have raw inode offset.
  */
-static int get_node_path(long block, int offset[4], unsigned int noffset[4])
+static int get_node_path(struct f2fs_inode_info *fi, long block,
+				int offset[4], unsigned int noffset[4])
 {
-	const long direct_index = ADDRS_PER_INODE;
+	const long direct_index = ADDRS_PER_INODE(fi);
 	const long direct_blks = ADDRS_PER_BLOCK;
 	const long dptrs_per_blk = NIDS_PER_BLOCK;
 	const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
@@ -405,7 +406,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
 	int level, i;
 	int err = 0;
 
-	level = get_node_path(index, offset, noffset);
+	level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
 
 	nids[0] = dn->inode->i_ino;
 	npage[0] = dn->inode_page;
@@ -687,7 +688,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
 
 	trace_f2fs_truncate_inode_blocks_enter(inode, from);
 
-	level = get_node_path(from, offset, noffset);
+	level = get_node_path(F2FS_I(inode), from, offset, noffset);
 restart:
 	page = get_node_page(sbi, inode->i_ino);
 	if (IS_ERR(page)) {
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index fa493bb6416..51ef5eec33d 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -213,6 +213,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
 	void *kaddr;
 	struct inode *inode;
 	struct page *node_page;
+	unsigned int offset;
 	block_t bidx;
 	int i;
 
@@ -257,8 +258,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
 	node_page = get_node_page(sbi, nid);
 	if (IS_ERR(node_page))
 		return PTR_ERR(node_page);
-	bidx = start_bidx_of_node(ofs_of_node(node_page)) +
-					le16_to_cpu(sum.ofs_in_node);
+
+	offset = ofs_of_node(node_page);
 	ino = ino_of_node(node_page);
 	f2fs_put_page(node_page, 1);
 
@@ -267,6 +268,9 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
+	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
+					le16_to_cpu(sum.ofs_in_node);
+
 	truncate_hole(inode, bidx, bidx + 1);
 	iput(inode);
 	return 0;
@@ -275,6 +279,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
 static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 					struct page *page, block_t blkaddr)
 {
+	struct f2fs_inode_info *fi = F2FS_I(inode);
 	unsigned int start, end;
 	struct dnode_of_data dn;
 	struct f2fs_summary sum;
@@ -282,9 +287,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 	int err = 0, recovered = 0;
 	int ilock;
 
-	start = start_bidx_of_node(ofs_of_node(page));
+	start = start_bidx_of_node(ofs_of_node(page), fi);
 	if (IS_INODE(page))
-		end = start + ADDRS_PER_INODE;
+		end = start + ADDRS_PER_INODE(fi);
 	else
 		end = start + ADDRS_PER_BLOCK;
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 70ecf484e7e..13d0a0fe49d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -618,7 +618,7 @@ static const struct export_operations f2fs_export_ops = {
 
 static loff_t max_file_size(unsigned bits)
 {
-	loff_t result = ADDRS_PER_INODE;
+	loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
 	loff_t leaf_count = ADDRS_PER_BLOCK;
 
 	/* two direct node blocks */
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 10ab11f8f99..93e7020fb7a 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -140,9 +140,17 @@ struct f2fs_extent {
 } __packed;
 
 #define F2FS_NAME_LEN		255
-#define ADDRS_PER_INODE         923	/* Address Pointers in an Inode */
-#define ADDRS_PER_BLOCK         1018	/* Address Pointers in a Direct Block */
-#define NIDS_PER_BLOCK          1018	/* Node IDs in an Indirect Block */
+#define F2FS_INLINE_XATTR_ADDRS	0	/* 0 bytes for inline xattrs */
+#define DEF_ADDRS_PER_INODE	923	/* Address Pointers in an Inode */
+#define ADDRS_PER_INODE(fi)	addrs_per_inode(fi)
+#define ADDRS_PER_BLOCK		1018	/* Address Pointers in a Direct Block */
+#define NIDS_PER_BLOCK		1018	/* Node IDs in an Indirect Block */
+
+#define	NODE_DIR1_BLOCK		(DEF_ADDRS_PER_INODE + 1)
+#define	NODE_DIR2_BLOCK		(DEF_ADDRS_PER_INODE + 2)
+#define	NODE_IND1_BLOCK		(DEF_ADDRS_PER_INODE + 3)
+#define	NODE_IND2_BLOCK		(DEF_ADDRS_PER_INODE + 4)
+#define	NODE_DIND_BLOCK		(DEF_ADDRS_PER_INODE + 5)
 
 #define F2FS_INLINE_XATTR	0x01	/* file inline xattr flag */
 
@@ -172,7 +180,7 @@ struct f2fs_inode {
 
 	struct f2fs_extent i_ext;	/* caching a largest extent */
 
-	__le32 i_addr[ADDRS_PER_INODE];	/* Pointers to data blocks */
+	__le32 i_addr[DEF_ADDRS_PER_INODE];	/* Pointers to data blocks */
 
 	__le32 i_nid[5];		/* direct(2), indirect(2),
 						double_indirect(1) node id */
-- 
cgit v1.2.3-70-g09d2


From a26b7c8a0149ce1e3b6a10f2801aada6e447e4e7 Mon Sep 17 00:00:00 2001
From: Jin Xu <jinuxstyle@gmail.com>
Date: Thu, 5 Sep 2013 12:45:26 +0800
Subject: f2fs: optimize gc for better performance

This patch improves the gc efficiency by optimizing the victim
selection policy. With this optimization, the random re-write
performance could increase up to 20%.

For f2fs, when disk is in shortage of free spaces, gc will selects
dirty segments and moves valid blocks around for making more space
available. The gc cost of a segment is determined by the valid blocks
in the segment. The less the valid blocks, the higher the efficiency.
The ideal victim segment is the one that has the most garbage blocks.

Currently, it searches up to 20 dirty segments for a victim segment.
The selected victim is not likely the best victim for gc when there
are much more dirty segments. Why not searching more dirty segments
for a better victim? The cost of searching dirty segments is
negligible in comparison to moving blocks.

In this patch, it enlarges the MAX_VICTIM_SEARCH to 4096 to make
the search more aggressively for a possible better victim. Since
it also applies to victim selection for SSR, it will likely improve
the SSR efficiency as well.

The test case is simple. It creates as many files until the disk full.
The size for each file is 32KB. Then it writes as many as 100000
records of 4KB size to random offsets of random files in sync mode.
The testing was done on a 2GB partition of a SDHC card. Let's see the
test result of f2fs without and with the patch.

---------------------------------------
2GB partition, SDHC
create 52023 files of size 32768 bytes
random re-write 100000 records of 4KB
---------------------------------------
| file creation (s) | rewrite time (s) | gc count | gc garbage blocks |
[no patch]  341         4227             1174          174840
[patched]   324         2958             645           106682

It's obvious that, with the patch, f2fs finishes the test in 20+% less
time than without the patch. And internally it does much less gc with
higher efficiency than before.

Since the performance improvement is related to gc, it might not be so
obvious for other tests that do not trigger gc as often as this one (
This is because f2fs selects dirty segments for SSR use most of the
time when free space is in shortage). The well-known iozone test tool
was not used for benchmarking the patch becuase it seems do not have
a test case that performs random re-write on a full disk.

This patch is the revised version based on the suggestion from
Jaegeuk Kim.

Signed-off-by: Jin Xu <jinuxstyle@gmail.com>
[Jaegeuk Kim: suggested simpler solution]
Reviewed-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/gc.c      | 8 +++++++-
 fs/f2fs/gc.h      | 2 +-
 fs/f2fs/segment.h | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs/f2fs/gc.c')

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index eb89037e331..2f157e88368 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -153,12 +153,18 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 	if (p->alloc_mode == SSR) {
 		p->gc_mode = GC_GREEDY;
 		p->dirty_segmap = dirty_i->dirty_segmap[type];
+		p->max_search = dirty_i->nr_dirty[type];
 		p->ofs_unit = 1;
 	} else {
 		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
 		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
+		p->max_search = dirty_i->nr_dirty[DIRTY];
 		p->ofs_unit = sbi->segs_per_sec;
 	}
+
+	if (p->max_search > MAX_VICTIM_SEARCH)
+		p->max_search = MAX_VICTIM_SEARCH;
+
 	p->offset = sbi->last_victim[p->gc_mode];
 }
 
@@ -305,7 +311,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 		if (cost == max_cost)
 			continue;
 
-		if (nsearched++ >= MAX_VICTIM_SEARCH) {
+		if (nsearched++ >= p.max_search) {
 			sbi->last_victim[p.gc_mode] = segno;
 			break;
 		}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index c22dee9f142..507056d2220 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -20,7 +20,7 @@
 #define LIMIT_FREE_BLOCK	40 /* percentage over invalid + free space */
 
 /* Search max. number of dirty segments to select a victim segment */
-#define MAX_VICTIM_SEARCH	20
+#define MAX_VICTIM_SEARCH 4096 /* covers 8GB */
 
 struct f2fs_gc_kthread {
 	struct task_struct *f2fs_gc_task;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index e0d6d3abf39..bdd10eab8c4 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -142,6 +142,7 @@ struct victim_sel_policy {
 	int alloc_mode;			/* LFS or SSR */
 	int gc_mode;			/* GC_CB or GC_GREEDY */
 	unsigned long *dirty_segmap;	/* dirty segment bitmap */
+	unsigned int max_search;	/* maximum # of segments to search */
 	unsigned int offset;		/* last scanned bitmap offset */
 	unsigned int ofs_unit;		/* bitmap search unit */
 	unsigned int min_cost;		/* minimum cost */
-- 
cgit v1.2.3-70-g09d2