From 6eedc70150d55b5885800eb6664ea226dc2cb66f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:27 +0200
Subject: vfs: Move noop_backing_dev_info check from sync into writeback

In principle, a filesystem may want to have ->sync_fs() called during sync(1)
although it does not have a bdi (i.e. s_bdi is set to noop_backing_dev_info).
Only writeback code really needs bdi set to something reasonable. So move the
checks where they are more logical.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs/sync.c')

diff --git a/fs/sync.c b/fs/sync.c
index 11e3d1c4490..b3d2a001293 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -29,13 +29,6 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
-	/*
-	 * This should be safe, as we require bdi backing to actually
-	 * write out data in the first place
-	 */
-	if (sb->s_bdi == &noop_backing_dev_info)
-		return 0;
-
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
 		sb->s_qcop->quota_sync(sb, -1, wait);
 
-- 
cgit v1.2.3-70-g09d2


From ceed17236a7491b44ee2be21f56a41ab997cbe7d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:28 +0200
Subject: quota: Split dquot_quota_sync() to writeback and cache flushing part

Split off part of dquot_quota_sync() which writes dquots into a quota file
to a separate function. In the next patch we will use the function from
filesystems and we do not want to abuse ->quota_sync quotactl callback more
than necessary.

Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/gfs2/quota.c          |  4 ++--
 fs/gfs2/quota.h          |  2 +-
 fs/gfs2/super.c          |  2 +-
 fs/gfs2/sys.c            |  2 +-
 fs/quota/dquot.c         | 24 +++++++++++++++++++++---
 fs/quota/quota.c         |  4 ++--
 fs/sync.c                |  2 +-
 include/linux/quota.h    |  2 +-
 include/linux/quotaops.h |  8 +++++++-
 9 files changed, 37 insertions(+), 13 deletions(-)

(limited to 'fs/sync.c')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b97178e7d39..27b5cc7d688 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1108,7 +1108,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 	}
 }
 
-int gfs2_quota_sync(struct super_block *sb, int type, int wait)
+int gfs2_quota_sync(struct super_block *sb, int type)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_quota_data **qda;
@@ -1154,7 +1154,7 @@ int gfs2_quota_sync(struct super_block *sb, int type, int wait)
 
 static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
 {
-	return gfs2_quota_sync(sb, type, 0);
+	return gfs2_quota_sync(sb, type);
 }
 
 int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 90bf1c302a9..f25d98b8790 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -26,7 +26,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
 extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 			      u32 uid, u32 gid);
 
-extern int gfs2_quota_sync(struct super_block *sb, int type, int wait);
+extern int gfs2_quota_sync(struct super_block *sb, int type);
 extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
 
 extern int gfs2_quota_init(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 713e621c240..313c329490e 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -838,7 +838,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	int error;
 
 	flush_workqueue(gfs2_delete_workqueue);
-	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
+	gfs2_quota_sync(sdp->sd_vfs, 0);
 	gfs2_statfs_sync(sdp->sd_vfs, 0);
 
 	error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9c2592b1d5f..73ecc34c434 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -168,7 +168,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
 	if (simple_strtol(buf, NULL, 0) != 1)
 		return -EINVAL;
 
-	gfs2_quota_sync(sdp->sd_vfs, 0, 1);
+	gfs2_quota_sync(sdp->sd_vfs, 0);
 	return len;
 }
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 10cbe841cb7..d679fc48ef2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -595,12 +595,14 @@ out:
 }
 EXPORT_SYMBOL(dquot_scan_active);
 
-int dquot_quota_sync(struct super_block *sb, int type, int wait)
+/* Write all dquot structures to quota files */
+int dquot_writeback_dquots(struct super_block *sb, int type)
 {
 	struct list_head *dirty;
 	struct dquot *dquot;
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int cnt;
+	int err, ret = 0;
 
 	mutex_lock(&dqopt->dqonoff_mutex);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -624,7 +626,9 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
 			atomic_inc(&dquot->dq_count);
 			spin_unlock(&dq_list_lock);
 			dqstats_inc(DQST_LOOKUPS);
-			sb->dq_op->write_dquot(dquot);
+			err = sb->dq_op->write_dquot(dquot);
+			if (!ret && err)
+				err = ret;
 			dqput(dquot);
 			spin_lock(&dq_list_lock);
 		}
@@ -638,7 +642,21 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
 	dqstats_inc(DQST_SYNCS);
 	mutex_unlock(&dqopt->dqonoff_mutex);
 
-	if (!wait || (dqopt->flags & DQUOT_QUOTA_SYS_FILE))
+	return ret;
+}
+EXPORT_SYMBOL(dquot_writeback_dquots);
+
+/* Write all dquot structures to disk and make them visible from userspace */
+int dquot_quota_sync(struct super_block *sb, int type)
+{
+	struct quota_info *dqopt = sb_dqopt(sb);
+	int cnt;
+	int ret;
+
+	ret = dquot_writeback_dquots(sb, type);
+	if (ret)
+		return ret;
+	if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
 		return 0;
 
 	/* This is not very clever (and fast) but currently I don't know about
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 9a391204ca2..c659f92298d 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -47,7 +47,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 static void quota_sync_one(struct super_block *sb, void *arg)
 {
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, *(int *)arg, 1);
+		sb->s_qcop->quota_sync(sb, *(int *)arg);
 }
 
 static int quota_sync_all(int type)
@@ -270,7 +270,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 	case Q_SYNC:
 		if (!sb->s_qcop->quota_sync)
 			return -ENOSYS;
-		return sb->s_qcop->quota_sync(sb, type, 1);
+		return sb->s_qcop->quota_sync(sb, type);
 	case Q_XQUOTAON:
 	case Q_XQUOTAOFF:
 	case Q_XQUOTARM:
diff --git a/fs/sync.c b/fs/sync.c
index b3d2a001293..cae145dd801 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -30,7 +30,7 @@
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
 	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, -1, wait);
+		sb->s_qcop->quota_sync(sb, -1);
 
 	if (wait)
 		sync_inodes_sb(sb);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index c09fa042b5e..524ede8a160 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -333,7 +333,7 @@ struct quotactl_ops {
 	int (*quota_on)(struct super_block *, int, int, struct path *);
 	int (*quota_on_meta)(struct super_block *, int, int);
 	int (*quota_off)(struct super_block *, int);
-	int (*quota_sync)(struct super_block *, int, int);
+	int (*quota_sync)(struct super_block *, int);
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 17b977304a0..ec6b65feaab 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -83,7 +83,8 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id,
 int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
  	int format_id, int type);
 int dquot_quota_off(struct super_block *sb, int type);
-int dquot_quota_sync(struct super_block *sb, int type, int wait);
+int dquot_writeback_dquots(struct super_block *sb, int type);
+int dquot_quota_sync(struct super_block *sb, int type);
 int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int dquot_get_dqblk(struct super_block *sb, int type, qid_t id,
@@ -255,6 +256,11 @@ static inline int dquot_resume(struct super_block *sb, int type)
 
 #define dquot_file_open		generic_file_open
 
+static inline int dquot_writeback_dquots(struct super_block *sb, int type)
+{
+	return 0;
+}
+
 #endif /* CONFIG_QUOTA */
 
 static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr)
-- 
cgit v1.2.3-70-g09d2


From a1177825719ccef3f76ef39bbfd5ebb6087d53c7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:29 +0200
Subject: quota: Move quota syncing to ->sync_fs method

Since the moment writes to quota files are using block device page cache and
space for quota structures is reserved at the moment they are first accessed we
have no reason to sync quota before inode writeback. In fact this order is now
only harmful since quota information can easily change during inode writeback
(either because conversion of delayed-allocated extents or simply because of
allocation of new blocks for simple filesystems not using page_mkwrite).

So move syncing of quota information after writeback of inodes into ->sync_fs
method. This way we do not have to use ->quota_sync callback which is primarily
intended for use by quotactl syscall anyway and we get rid of calling
->sync_fs() twice unnecessarily. We skip quota syncing for OCFS2 since it does
proper quota journalling in all cases (unlike ext3, ext4, and reiserfs which
also support legacy non-journalled quotas) and thus there are no dirty quota
structures.

CC: "Theodore Ts'o" <tytso@mit.edu>
CC: Joel Becker <jlbec@evilplan.org>
CC: reiserfs-devel@vger.kernel.org
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Acked-by: Dave Kleikamp <shaggy@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/super.c     | 6 ++++++
 fs/ext3/super.c     | 5 +++++
 fs/ext4/super.c     | 5 +++++
 fs/gfs2/super.c     | 2 ++
 fs/jfs/super.c      | 5 +++++
 fs/reiserfs/super.c | 5 +++++
 fs/sync.c           | 3 ---
 7 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'fs/sync.c')

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b3621cb7ea3..5df3d2d8169 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1184,6 +1184,12 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 	struct ext2_super_block *es = EXT2_SB(sb)->s_es;
 
+	/*
+	 * Write quota structures to quota file, sync_blockdev() will write
+	 * them to disk later
+	 */
+	dquot_writeback_dquots(sb, -1);
+
 	spin_lock(&sbi->s_lock);
 	if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
 		ext2_debug("setting valid to 0\n");
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8c3a44b7c37..4ac304c55c5 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2526,6 +2526,11 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
 	tid_t target;
 
 	trace_ext3_sync_fs(sb, wait);
+	/*
+	 * Writeback quota in non-journalled quota case - journalled quota has
+	 * no dirty dquots
+	 */
+	dquot_writeback_dquots(sb, -1);
 	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
 		if (wait)
 			log_wait_commit(EXT3_SB(sb)->s_journal, target);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eb7aa3e4ef0..d8759401eca 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4325,6 +4325,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 
 	trace_ext4_sync_fs(sb, wait);
 	flush_workqueue(sbi->dio_unwritten_wq);
+	/*
+	 * Writeback quota in non-journalled quota case - journalled quota has
+	 * no dirty dquots
+	 */
+	dquot_writeback_dquots(sb, -1);
 	if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
 		if (wait)
 			jbd2_log_wait_commit(sbi->s_journal, target);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 313c329490e..f3d6bbfb32c 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -952,6 +952,8 @@ restart:
 static int gfs2_sync_fs(struct super_block *sb, int wait)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
+
+	gfs2_quota_sync(sb, -1);
 	if (wait && sdp)
 		gfs2_log_flush(sdp, NULL);
 	return 0;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4a82950f412..c55c7452d28 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -601,6 +601,11 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
 
 	/* log == NULL indicates read-only mount */
 	if (log) {
+		/*
+		 * Write quota structures to quota file, sync_blockdev() will
+		 * write them to disk later
+		 */
+		dquot_writeback_dquots(sb, -1);
 		jfs_flush_journal(log, wait);
 		jfs_syncpt(log, 0);
 	}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 651ce767b55..7a37dabf5a9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -68,6 +68,11 @@ static int reiserfs_sync_fs(struct super_block *s, int wait)
 {
 	struct reiserfs_transaction_handle th;
 
+	/*
+	 * Writeback quota in non-journalled quota case - journalled quota has
+	 * no dirty dquots
+	 */
+	dquot_writeback_dquots(s, -1);
 	reiserfs_write_lock(s);
 	if (!journal_begin(&th, s, 1))
 		if (!journal_end_sync(&th, s, 1))
diff --git a/fs/sync.c b/fs/sync.c
index cae145dd801..66acd2ba91c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -29,9 +29,6 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
-	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, -1);
-
 	if (wait)
 		sync_inodes_sb(sb);
 	else
-- 
cgit v1.2.3-70-g09d2


From b3de653105180b57af90ef2f5b8441f085f4ff56 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:30 +0200
Subject: vfs: Reorder operations during sys_sync

Change the order of operations during sync from

for_each_sb {
        writeback_inodes_sb();
        sync_fs(nowait);
        __sync_blockdev(nowait);
}
for_each_sb {
        sync_inodes_sb();
        sync_fs(wait);
        __sync_blockdev(wait);
}

to

for_each_sb
        writeback_inodes_sb();
for_each_sb
        sync_fs(nowait);
for_each_sb
        __sync_blockdev(nowait);
for_each_sb
        sync_inodes_sb();
for_each_sb
        sync_fs(wait);
for_each_sb
        __sync_blockdev(wait);

This is a preparation for the following patches in this series.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c | 46 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 12 deletions(-)

(limited to 'fs/sync.c')

diff --git a/fs/sync.c b/fs/sync.c
index 66acd2ba91c..490e9020113 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -67,18 +67,28 @@ int sync_filesystem(struct super_block *sb)
 }
 EXPORT_SYMBOL_GPL(sync_filesystem);
 
-static void sync_one_sb(struct super_block *sb, void *arg)
+static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 {
 	if (!(sb->s_flags & MS_RDONLY))
-		__sync_filesystem(sb, *(int *)arg);
+		sync_inodes_sb(sb);
 }
-/*
- * Sync all the data for all the filesystems (called by sys_sync() and
- * emergency sync)
- */
-static void sync_filesystems(int wait)
+
+static void writeback_inodes_one_sb(struct super_block *sb, void *arg)
 {
-	iterate_supers(sync_one_sb, &wait);
+	if (!(sb->s_flags & MS_RDONLY))
+		writeback_inodes_sb(sb, WB_REASON_SYNC);
+}
+
+static void sync_fs_one_sb(struct super_block *sb, void *arg)
+{
+	if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs)
+		sb->s_op->sync_fs(sb, *(int *)arg);
+}
+
+static void sync_blkdev_one_sb(struct super_block *sb, void *arg)
+{
+	if (!(sb->s_flags & MS_RDONLY))
+		__sync_blockdev(sb->s_bdev, *(int *)arg);
 }
 
 /*
@@ -87,9 +97,15 @@ static void sync_filesystems(int wait)
  */
 SYSCALL_DEFINE0(sync)
 {
+	int nowait = 0, wait = 1;
+
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
-	sync_filesystems(0);
-	sync_filesystems(1);
+	iterate_supers(writeback_inodes_one_sb, NULL);
+	iterate_supers(sync_fs_one_sb, &nowait);
+	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_supers(sync_inodes_one_sb, NULL);
+	iterate_supers(sync_fs_one_sb, &wait);
+	iterate_supers(sync_blkdev_one_sb, &wait);
 	if (unlikely(laptop_mode))
 		laptop_sync_completion();
 	return 0;
@@ -97,12 +113,18 @@ SYSCALL_DEFINE0(sync)
 
 static void do_sync_work(struct work_struct *work)
 {
+	int nowait = 0;
+
 	/*
 	 * Sync twice to reduce the possibility we skipped some inodes / pages
 	 * because they were temporarily locked
 	 */
-	sync_filesystems(0);
-	sync_filesystems(0);
+	iterate_supers(sync_inodes_one_sb, &nowait);
+	iterate_supers(sync_fs_one_sb, &nowait);
+	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_supers(sync_inodes_one_sb, &nowait);
+	iterate_supers(sync_fs_one_sb, &nowait);
+	iterate_supers(sync_blkdev_one_sb, &nowait);
 	printk("Emergency Sync complete\n");
 	kfree(work);
 }
-- 
cgit v1.2.3-70-g09d2


From a8c7176b6ded413d5044a00f1d05477b95a6d7ad Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:32 +0200
Subject: vfs: Make sys_sync writeout also block device inodes

In case block device does not have filesystem mounted on it, sys_sync will just
ignore it and doesn't writeout its dirty pages. This is because writeback code
avoids writing inodes from superblock without backing device and
blockdev_superblock is such a superblock.  Since it's unexpected that sync
doesn't writeout dirty data for block devices be nice to users and change the
behavior to do so. So now we iterate over all block devices on blockdev_super
instead of iterating over all superblocks when syncing block devices.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'fs/sync.c')

diff --git a/fs/sync.c b/fs/sync.c
index 490e9020113..0b166f26362 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -85,10 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg)
 		sb->s_op->sync_fs(sb, *(int *)arg);
 }
 
-static void sync_blkdev_one_sb(struct super_block *sb, void *arg)
+static void flush_one_bdev(struct block_device *bdev, void *arg)
 {
-	if (!(sb->s_flags & MS_RDONLY))
-		__sync_blockdev(sb->s_bdev, *(int *)arg);
+	__sync_blockdev(bdev, 0);
+}
+
+static void sync_one_bdev(struct block_device *bdev, void *arg)
+{
+	sync_blockdev(bdev);
 }
 
 /*
@@ -102,10 +106,10 @@ SYSCALL_DEFINE0(sync)
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
 	iterate_supers(writeback_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_bdevs(flush_one_bdev, NULL);
 	iterate_supers(sync_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &wait);
-	iterate_supers(sync_blkdev_one_sb, &wait);
+	iterate_bdevs(sync_one_bdev, NULL);
 	if (unlikely(laptop_mode))
 		laptop_sync_completion();
 	return 0;
@@ -121,10 +125,10 @@ static void do_sync_work(struct work_struct *work)
 	 */
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_bdevs(flush_one_bdev, NULL);
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_supers(sync_blkdev_one_sb, &nowait);
+	iterate_bdevs(flush_one_bdev, NULL);
 	printk("Emergency Sync complete\n");
 	kfree(work);
 }
-- 
cgit v1.2.3-70-g09d2


From d0e91b13eb34d449922124c34f8a05e498daa089 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:33 +0200
Subject: vfs: Remove unnecessary flushing of block devices

It is not necessary to write block devices twice. The reason why we first did
flush and then proper sync is that
  for_each_bdev() {
    write_bdev()
    wait_for_completion()
  }
is much slower than
  for_each_bdev()
    write_bdev()
  for_each_bdev()
    wait_for_completion()
when there is bigger amount of data. But as is seen in the above, there's no real
need to scan pages and submit them twice. We just need to separate the submission
and waiting part. This patch does that.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs/sync.c')

diff --git a/fs/sync.c b/fs/sync.c
index 0b166f26362..131ddae87a1 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -85,14 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg)
 		sb->s_op->sync_fs(sb, *(int *)arg);
 }
 
-static void flush_one_bdev(struct block_device *bdev, void *arg)
+static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
 {
-	__sync_blockdev(bdev, 0);
+	filemap_fdatawrite(bdev->bd_inode->i_mapping);
 }
 
-static void sync_one_bdev(struct block_device *bdev, void *arg)
+static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
 {
-	sync_blockdev(bdev);
+	filemap_fdatawait(bdev->bd_inode->i_mapping);
 }
 
 /*
@@ -106,10 +106,10 @@ SYSCALL_DEFINE0(sync)
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
 	iterate_supers(writeback_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_bdevs(flush_one_bdev, NULL);
 	iterate_supers(sync_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &wait);
-	iterate_bdevs(sync_one_bdev, NULL);
+	iterate_bdevs(fdatawrite_one_bdev, NULL);
+	iterate_bdevs(fdatawait_one_bdev, NULL);
 	if (unlikely(laptop_mode))
 		laptop_sync_completion();
 	return 0;
@@ -125,10 +125,10 @@ static void do_sync_work(struct work_struct *work)
 	 */
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_bdevs(flush_one_bdev, NULL);
+	iterate_bdevs(fdatawrite_one_bdev, NULL);
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_bdevs(flush_one_bdev, NULL);
+	iterate_bdevs(fdatawrite_one_bdev, NULL);
 	printk("Emergency Sync complete\n");
 	kfree(work);
 }
-- 
cgit v1.2.3-70-g09d2


From 4ea425b63a3dfeb7707fc7cc7161c11a51e871ed Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 3 Jul 2012 16:45:34 +0200
Subject: vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and
 reorder sync passes

wakeup_flusher_threads(0) will queue work doing complete writeback for each
flusher thread. Thus there is not much point in submitting another work doing
full inode WB_SYNC_NONE writeback by writeback_inodes_sb().

After this change it does not make sense to call nonblocking ->sync_fs and
block device flush before calling sync_inodes_sb() because
wakeup_flusher_threads() is completely asynchronous and thus these functions
would be called in parallel with inode writeback running which will effectively
void any work they do. So we move sync_inodes_sb() call before these two
functions.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sync.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs/sync.c')

diff --git a/fs/sync.c b/fs/sync.c
index 131ddae87a1..eb8722dc556 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -73,12 +73,6 @@ static void sync_inodes_one_sb(struct super_block *sb, void *arg)
 		sync_inodes_sb(sb);
 }
 
-static void writeback_inodes_one_sb(struct super_block *sb, void *arg)
-{
-	if (!(sb->s_flags & MS_RDONLY))
-		writeback_inodes_sb(sb, WB_REASON_SYNC);
-}
-
 static void sync_fs_one_sb(struct super_block *sb, void *arg)
 {
 	if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs)
@@ -96,17 +90,22 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
 }
 
 /*
- * sync everything.  Start out by waking pdflush, because that writes back
- * all queues in parallel.
+ * Sync everything. We start by waking flusher threads so that most of
+ * writeback runs on all devices in parallel. Then we sync all inodes reliably
+ * which effectively also waits for all flusher threads to finish doing
+ * writeback. At this point all data is on disk so metadata should be stable
+ * and we tell filesystems to sync their metadata via ->sync_fs() calls.
+ * Finally, we writeout all block devices because some filesystems (e.g. ext2)
+ * just write metadata (such as inodes or bitmaps) to block device page cache
+ * and do not sync it on their own in ->sync_fs().
  */
 SYSCALL_DEFINE0(sync)
 {
 	int nowait = 0, wait = 1;
 
 	wakeup_flusher_threads(0, WB_REASON_SYNC);
-	iterate_supers(writeback_inodes_one_sb, NULL);
-	iterate_supers(sync_fs_one_sb, &nowait);
 	iterate_supers(sync_inodes_one_sb, NULL);
+	iterate_supers(sync_fs_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &wait);
 	iterate_bdevs(fdatawrite_one_bdev, NULL);
 	iterate_bdevs(fdatawait_one_bdev, NULL);
-- 
cgit v1.2.3-70-g09d2