From 8d280b98cfe3c0b69c37d355218975c1c0279bb0 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Tue, 14 Mar 2006 13:13:09 +1100 Subject: [XFS] On machines with more than 8 cpus, when running parallel I/O threads, the incore superblock lock becomes the limiting factor for buffered write throughput. Make the contended fields in the incore superblock use per-cpu counters so that there is no global lock to limit scalability. SGI-PV: 946630 SGI-Modid: xfs-linux-melb:xfs-kern:25106a Signed-off-by: David Chinner Signed-off-by: Nathan Scott --- fs/xfs/xfs_mount.c | 560 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 544 insertions(+), 16 deletions(-) (limited to 'fs/xfs/xfs_mount.c') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 62188ea392c..9b43b7b3d76 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -51,11 +51,31 @@ STATIC int xfs_uuid_mount(xfs_mount_t *); STATIC void xfs_uuid_unmount(xfs_mount_t *mp); STATIC void xfs_unmountfs_wait(xfs_mount_t *); + +#ifdef HAVE_PERCPU_SB +STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); +STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); +STATIC void xfs_icsb_sync_counters(xfs_mount_t *); +STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, + int, int); +STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t, + int, int); + +#else + +#define xfs_icsb_destroy_counters(mp) do { } while (0) +#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) +#define xfs_icsb_sync_counters(mp) do { } while (0) +#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) +#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0) + +#endif + static const struct { - short offset; - short type; /* 0 = integer - * 1 = binary / string (no translation) - */ + short offset; + short type; /* 0 = integer + * 1 = binary / string (no translation) + */ } xfs_sb_info[] = { { offsetof(xfs_sb_t, sb_magicnum), 0 }, { offsetof(xfs_sb_t, sb_blocksize), 0 }, @@ -113,7 +133,11 @@ xfs_mount_init(void) { xfs_mount_t *mp; - mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); + mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP); + + if (xfs_icsb_init_counters(mp)) { + mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; + } AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); spinlock_init(&mp->m_sb_lock, "xfs_sb"); @@ -136,8 +160,8 @@ xfs_mount_init(void) */ void xfs_mount_free( - xfs_mount_t *mp, - int remove_bhv) + xfs_mount_t *mp, + int remove_bhv) { if (mp->m_ihash) xfs_ihash_free(mp); @@ -177,6 +201,7 @@ xfs_mount_free( VFS_REMOVEBHV(vfsp, &mp->m_bhv); } + xfs_icsb_destroy_counters(mp); kmem_free(mp, sizeof(xfs_mount_t)); } @@ -527,6 +552,10 @@ xfs_readsb(xfs_mount_t *mp) ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); } + xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); + xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); + xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); + mp->m_sb_bp = bp; xfs_buf_relse(bp); ASSERT(XFS_BUF_VALUSEMA(bp) > 0); @@ -1154,6 +1183,9 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) sbp = xfs_getsb(mp, 0); if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || XFS_FORCED_SHUTDOWN(mp))) { + + xfs_icsb_sync_counters(mp); + /* * mark shared-readonly if desired */ @@ -1227,7 +1259,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) xfs_trans_log_buf(tp, bp, first, last); } - /* * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply * a delta to a specified field in the in-core superblock. Simply @@ -1237,7 +1268,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) * * The SB_LOCK must be held when this routine is called. */ -STATIC int +int xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd) { @@ -1406,9 +1437,26 @@ xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd) unsigned long s; int status; - s = XFS_SB_LOCK(mp); - status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - XFS_SB_UNLOCK(mp, s); + /* check for per-cpu counters */ + switch (field) { +#ifdef HAVE_PERCPU_SB + case XFS_SBS_ICOUNT: + case XFS_SBS_IFREE: + case XFS_SBS_FDBLOCKS: + if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { + status = xfs_icsb_modify_counters(mp, field, + delta, rsvd); + break; + } + /* FALLTHROUGH */ +#endif + default: + s = XFS_SB_LOCK(mp); + status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); + XFS_SB_UNLOCK(mp, s); + break; + } + return status; } @@ -1445,8 +1493,26 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) * from the loop so we'll fall into the undo loop * below. */ - status = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, - msbp->msb_delta, rsvd); + switch (msbp->msb_field) { +#ifdef HAVE_PERCPU_SB + case XFS_SBS_ICOUNT: + case XFS_SBS_IFREE: + case XFS_SBS_FDBLOCKS: + if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { + status = xfs_icsb_modify_counters_locked(mp, + msbp->msb_field, + msbp->msb_delta, rsvd); + break; + } + /* FALLTHROUGH */ +#endif + default: + status = xfs_mod_incore_sb_unlocked(mp, + msbp->msb_field, + msbp->msb_delta, rsvd); + break; + } + if (status != 0) { break; } @@ -1463,8 +1529,28 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) if (status != 0) { msbp--; while (msbp >= msb) { - status = xfs_mod_incore_sb_unlocked(mp, - msbp->msb_field, -(msbp->msb_delta), rsvd); + switch (msbp->msb_field) { +#ifdef HAVE_PERCPU_SB + case XFS_SBS_ICOUNT: + case XFS_SBS_IFREE: + case XFS_SBS_FDBLOCKS: + if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { + status = + xfs_icsb_modify_counters_locked(mp, + msbp->msb_field, + -(msbp->msb_delta), + rsvd); + break; + } + /* FALLTHROUGH */ +#endif + default: + status = xfs_mod_incore_sb_unlocked(mp, + msbp->msb_field, + -(msbp->msb_delta), + rsvd); + break; + } ASSERT(status == 0); msbp--; } @@ -1577,3 +1663,445 @@ xfs_mount_log_sbunit( xfs_mod_sb(tp, fields); xfs_trans_commit(tp, 0, NULL); } + + +#ifdef HAVE_PERCPU_SB +/* + * Per-cpu incore superblock counters + * + * Simple concept, difficult implementation + * + * Basically, replace the incore superblock counters with a distributed per cpu + * counter for contended fields (e.g. free block count). + * + * Difficulties arise in that the incore sb is used for ENOSPC checking, and + * hence needs to be accurately read when we are running low on space. Hence + * there is a method to enable and disable the per-cpu counters based on how + * much "stuff" is available in them. + * + * Basically, a counter is enabled if there is enough free resource to justify + * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local + * ENOSPC), then we disable the counters to synchronise all callers and + * re-distribute the available resources. + * + * If, once we redistributed the available resources, we still get a failure, + * we disable the per-cpu counter and go through the slow path. + * + * The slow path is the current xfs_mod_incore_sb() function. This means that + * when we disable a per-cpu counter, we need to drain it's resources back to + * the global superblock. We do this after disabling the counter to prevent + * more threads from queueing up on the counter. + * + * Essentially, this means that we still need a lock in the fast path to enable + * synchronisation between the global counters and the per-cpu counters. This + * is not a problem because the lock will be local to a CPU almost all the time + * and have little contention except when we get to ENOSPC conditions. + * + * Basically, this lock becomes a barrier that enables us to lock out the fast + * path while we do things like enabling and disabling counters and + * synchronising the counters. + * + * Locking rules: + * + * 1. XFS_SB_LOCK() before picking up per-cpu locks + * 2. per-cpu locks always picked up via for_each_online_cpu() order + * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks + * 4. modifying per-cpu counters requires holding per-cpu lock + * 5. modifying global counters requires holding XFS_SB_LOCK + * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK + * and _none_ of the per-cpu locks. + * + * Disabled counters are only ever re-enabled by a balance operation + * that results in more free resources per CPU than a given threshold. + * To ensure counters don't remain disabled, they are rebalanced when + * the global resource goes above a higher threshold (i.e. some hysteresis + * is present to prevent thrashing). + * + * Note: hotplug CPUs not yet supported + */ +int +xfs_icsb_init_counters( + xfs_mount_t *mp) +{ + xfs_icsb_cnts_t *cntp; + int i; + + mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); + if (mp->m_sb_cnts == NULL) + return -ENOMEM; + + for_each_online_cpu(i) { + cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); + spin_lock_init(&cntp->icsb_lock); + } + /* + * start with all counters disabled so that the + * initial balance kicks us off correctly + */ + mp->m_icsb_counters = -1; + return 0; +} + +STATIC void +xfs_icsb_destroy_counters( + xfs_mount_t *mp) +{ + if (mp->m_sb_cnts) + free_percpu(mp->m_sb_cnts); +} + + +STATIC inline void +xfs_icsb_lock_all_counters( + xfs_mount_t *mp) +{ + xfs_icsb_cnts_t *cntp; + int i; + + for_each_online_cpu(i) { + cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); + spin_lock(&cntp->icsb_lock); + } +} + +STATIC inline void +xfs_icsb_unlock_all_counters( + xfs_mount_t *mp) +{ + xfs_icsb_cnts_t *cntp; + int i; + + for_each_online_cpu(i) { + cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); + spin_unlock(&cntp->icsb_lock); + } +} + +STATIC void +xfs_icsb_count( + xfs_mount_t *mp, + xfs_icsb_cnts_t *cnt, + int flags) +{ + xfs_icsb_cnts_t *cntp; + int i; + + memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); + + if (!(flags & XFS_ICSB_LAZY_COUNT)) + xfs_icsb_lock_all_counters(mp); + + for_each_online_cpu(i) { + cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); + cnt->icsb_icount += cntp->icsb_icount; + cnt->icsb_ifree += cntp->icsb_ifree; + cnt->icsb_fdblocks += cntp->icsb_fdblocks; + } + + if (!(flags & XFS_ICSB_LAZY_COUNT)) + xfs_icsb_unlock_all_counters(mp); +} + +STATIC int +xfs_icsb_counter_disabled( + xfs_mount_t *mp, + xfs_sb_field_t field) +{ + ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); + return test_bit(field, &mp->m_icsb_counters); +} + +STATIC int +xfs_icsb_disable_counter( + xfs_mount_t *mp, + xfs_sb_field_t field) +{ + xfs_icsb_cnts_t cnt; + + ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); + + xfs_icsb_lock_all_counters(mp); + if (!test_and_set_bit(field, &mp->m_icsb_counters)) { + /* drain back to superblock */ + + xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); + switch(field) { + case XFS_SBS_ICOUNT: + mp->m_sb.sb_icount = cnt.icsb_icount; + break; + case XFS_SBS_IFREE: + mp->m_sb.sb_ifree = cnt.icsb_ifree; + break; + case XFS_SBS_FDBLOCKS: + mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; + break; + default: + BUG(); + } + } + + xfs_icsb_unlock_all_counters(mp); + + return 0; +} + +STATIC void +xfs_icsb_enable_counter( + xfs_mount_t *mp, + xfs_sb_field_t field, + uint64_t count, + uint64_t resid) +{ + xfs_icsb_cnts_t *cntp; + int i; + + ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); + + xfs_icsb_lock_all_counters(mp); + for_each_online_cpu(i) { + cntp = per_cpu_ptr(mp->m_sb_cnts, i); + switch (field) { + case XFS_SBS_ICOUNT: + cntp->icsb_icount = count + resid; + break; + case XFS_SBS_IFREE: + cntp->icsb_ifree = count + resid; + break; + case XFS_SBS_FDBLOCKS: + cntp->icsb_fdblocks = count + resid; + break; + default: + BUG(); + break; + } + resid = 0; + } + clear_bit(field, &mp->m_icsb_counters); + xfs_icsb_unlock_all_counters(mp); +} + +STATIC void +xfs_icsb_sync_counters_int( + xfs_mount_t *mp, + int flags) +{ + xfs_icsb_cnts_t cnt; + int s; + + /* Pass 1: lock all counters */ + if ((flags & XFS_ICSB_SB_LOCKED) == 0) + s = XFS_SB_LOCK(mp); + + xfs_icsb_count(mp, &cnt, flags); + + /* Step 3: update mp->m_sb fields */ + if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) + mp->m_sb.sb_icount = cnt.icsb_icount; + if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) + mp->m_sb.sb_ifree = cnt.icsb_ifree; + if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) + mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; + + if ((flags & XFS_ICSB_SB_LOCKED) == 0) + XFS_SB_UNLOCK(mp, s); +} + +/* + * Accurate update of per-cpu counters to incore superblock + */ +STATIC void +xfs_icsb_sync_counters( + xfs_mount_t *mp) +{ + xfs_icsb_sync_counters_int(mp, 0); +} + +/* + * lazy addition used for things like df, background sb syncs, etc + */ +void +xfs_icsb_sync_counters_lazy( + xfs_mount_t *mp) +{ + xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT); +} + +/* + * Balance and enable/disable counters as necessary. + * + * Thresholds for re-enabling counters are somewhat magic. + * inode counts are chosen to be the same number as single + * on disk allocation chunk per CPU, and free blocks is + * something far enough zero that we aren't going thrash + * when we get near ENOSPC. + */ +#define XFS_ICSB_INO_CNTR_REENABLE 64 +#define XFS_ICSB_FDBLK_CNTR_REENABLE 512 +STATIC void +xfs_icsb_balance_counter( + xfs_mount_t *mp, + xfs_sb_field_t field, + int flags) +{ + uint64_t count, resid = 0; + int weight = num_online_cpus(); + int s; + + if (!(flags & XFS_ICSB_SB_LOCKED)) + s = XFS_SB_LOCK(mp); + + /* disable counter and sync counter */ + xfs_icsb_disable_counter(mp, field); + + /* update counters - first CPU gets residual*/ + switch (field) { + case XFS_SBS_ICOUNT: + count = mp->m_sb.sb_icount; + resid = do_div(count, weight); + if (count < XFS_ICSB_INO_CNTR_REENABLE) + goto out; + break; + case XFS_SBS_IFREE: + count = mp->m_sb.sb_ifree; + resid = do_div(count, weight); + if (count < XFS_ICSB_INO_CNTR_REENABLE) + goto out; + break; + case XFS_SBS_FDBLOCKS: + count = mp->m_sb.sb_fdblocks; + resid = do_div(count, weight); + if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) + goto out; + break; + default: + BUG(); + break; + } + + xfs_icsb_enable_counter(mp, field, count, resid); +out: + if (!(flags & XFS_ICSB_SB_LOCKED)) + XFS_SB_UNLOCK(mp, s); +} + +STATIC int +xfs_icsb_modify_counters_int( + xfs_mount_t *mp, + xfs_sb_field_t field, + int delta, + int rsvd, + int flags) +{ + xfs_icsb_cnts_t *icsbp; + long long lcounter; /* long counter for 64 bit fields */ + int cpu, s, locked = 0; + int ret = 0, balance_done = 0; + +again: + cpu = get_cpu(); + icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu), + spin_lock(&icsbp->icsb_lock); + if (unlikely(xfs_icsb_counter_disabled(mp, field))) + goto slow_path; + + switch (field) { + case XFS_SBS_ICOUNT: + lcounter = icsbp->icsb_icount; + lcounter += delta; + if (unlikely(lcounter < 0)) + goto slow_path; + icsbp->icsb_icount = lcounter; + break; + + case XFS_SBS_IFREE: + lcounter = icsbp->icsb_ifree; + lcounter += delta; + if (unlikely(lcounter < 0)) + goto slow_path; + icsbp->icsb_ifree = lcounter; + break; + + case XFS_SBS_FDBLOCKS: + BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); + + lcounter = icsbp->icsb_fdblocks; + lcounter += delta; + if (unlikely(lcounter < 0)) + goto slow_path; + icsbp->icsb_fdblocks = lcounter; + break; + default: + BUG(); + break; + } + spin_unlock(&icsbp->icsb_lock); + put_cpu(); + if (locked) + XFS_SB_UNLOCK(mp, s); + return 0; + + /* + * The slow path needs to be run with the SBLOCK + * held so that we prevent other threads from + * attempting to run this path at the same time. + * this provides exclusion for the balancing code, + * and exclusive fallback if the balance does not + * provide enough resources to continue in an unlocked + * manner. + */ +slow_path: + spin_unlock(&icsbp->icsb_lock); + put_cpu(); + + /* need to hold superblock incase we need + * to disable a counter */ + if (!(flags & XFS_ICSB_SB_LOCKED)) { + s = XFS_SB_LOCK(mp); + locked = 1; + flags |= XFS_ICSB_SB_LOCKED; + } + if (!balance_done) { + xfs_icsb_balance_counter(mp, field, flags); + balance_done = 1; + goto again; + } else { + /* + * we might not have enough on this local + * cpu to allocate for a bulk request. + * We need to drain this field from all CPUs + * and disable the counter fastpath + */ + xfs_icsb_disable_counter(mp, field); + } + + ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); + + if (locked) + XFS_SB_UNLOCK(mp, s); + return ret; +} + +STATIC int +xfs_icsb_modify_counters( + xfs_mount_t *mp, + xfs_sb_field_t field, + int delta, + int rsvd) +{ + return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0); +} + +/* + * Called when superblock is already locked + */ +STATIC int +xfs_icsb_modify_counters_locked( + xfs_mount_t *mp, + xfs_sb_field_t field, + int delta, + int rsvd) +{ + return xfs_icsb_modify_counters_int(mp, field, delta, + rsvd, XFS_ICSB_SB_LOCKED); +} +#endif -- cgit v1.2.3-70-g09d2 From e8234a6871aa0de1ed0aeeecb5230ecf3ab414e2 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Tue, 14 Mar 2006 13:23:52 +1100 Subject: [XFS] Add support for hotplug CPUs to the per-CPU superblock counters by registering a notifier callback that listens to CPU up/down events to modify the counters appropriately. SGI-PV: 949726 SGI-Modid: xfs-linux-melb:xfs-kern:25214a Signed-off-by: David Chinner Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_linux.h | 2 ++ fs/xfs/xfs_mount.c | 74 ++++++++++++++++++++++++++++++++++++++++++-- fs/xfs/xfs_mount.h | 1 + 3 files changed, 75 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_mount.c') diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index e2be64982bc..9fdc14cffb7 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -73,6 +73,8 @@ #include #include #include +#include +#include #include #include diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9b43b7b3d76..a64110b9023 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -60,6 +60,7 @@ STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, int, int); STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t, int, int); +STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); #else @@ -1716,9 +1717,72 @@ xfs_mount_log_sbunit( * To ensure counters don't remain disabled, they are rebalanced when * the global resource goes above a higher threshold (i.e. some hysteresis * is present to prevent thrashing). + */ + +/* + * hot-plug CPU notifier support. * - * Note: hotplug CPUs not yet supported + * We cannot use the hotcpu_register() function because it does + * not allow notifier instances. We need a notifier per filesystem + * as we need to be able to identify the filesystem to balance + * the counters out. This is acheived by having a notifier block + * embedded in the xfs_mount_t and doing pointer magic to get the + * mount pointer from the notifier block address. */ +STATIC int +xfs_icsb_cpu_notify( + struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + xfs_icsb_cnts_t *cntp; + xfs_mount_t *mp; + int s; + + mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); + cntp = (xfs_icsb_cnts_t *) + per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); + switch (action) { + case CPU_UP_PREPARE: + /* Easy Case - initialize the area and locks, and + * then rebalance when online does everything else for us. */ + spin_lock_init(&cntp->icsb_lock); + cntp->icsb_icount = 0; + cntp->icsb_ifree = 0; + cntp->icsb_fdblocks = 0; + break; + case CPU_ONLINE: + xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); + xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0); + xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0); + break; + case CPU_DEAD: + /* Disable all the counters, then fold the dead cpu's + * count into the total on the global superblock and + * re-enable the counters. */ + s = XFS_SB_LOCK(mp); + xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); + xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); + xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); + + mp->m_sb.sb_icount += cntp->icsb_icount; + mp->m_sb.sb_ifree += cntp->icsb_ifree; + mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; + + cntp->icsb_icount = 0; + cntp->icsb_ifree = 0; + cntp->icsb_fdblocks = 0; + + xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED); + xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED); + xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED); + XFS_SB_UNLOCK(mp, s); + break; + } + + return NOTIFY_OK; +} + int xfs_icsb_init_counters( xfs_mount_t *mp) @@ -1730,6 +1794,10 @@ xfs_icsb_init_counters( if (mp->m_sb_cnts == NULL) return -ENOMEM; + mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; + mp->m_icsb_notifier.priority = 0; + register_cpu_notifier(&mp->m_icsb_notifier); + for_each_online_cpu(i) { cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); spin_lock_init(&cntp->icsb_lock); @@ -1746,8 +1814,10 @@ STATIC void xfs_icsb_destroy_counters( xfs_mount_t *mp) { - if (mp->m_sb_cnts) + if (mp->m_sb_cnts) { + unregister_cpu_notifier(&mp->m_icsb_notifier); free_percpu(mp->m_sb_cnts); + } } diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7cca5110ca4..9d2ffbdc37a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -401,6 +401,7 @@ typedef struct xfs_mount { #ifdef HAVE_PERCPU_SB xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */ unsigned long m_icsb_counters; /* disabled per-cpu counters */ + struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */ #endif } xfs_mount_t; -- cgit v1.2.3-70-g09d2 From 01e1b69cfcdcfdd5b405165eaba29428f8b18a7c Mon Sep 17 00:00:00 2001 From: David Chinner Date: Tue, 14 Mar 2006 13:29:16 +1100 Subject: [XFS] using a spinlock per cpu for superblock counter exclusion results in a preēmpt counter overflow at 256p and above. Change the exclusion mechanism to use atomic bit operations and busy wait loops to emulate the spin lock exclusion mechanism but without the preempt count issues. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SGI-PV: 950027 SGI-Modid: xfs-linux-melb:xfs-kern:25338a Signed-off-by: David Chinner Signed-off-by: Nathan Scott --- fs/xfs/linux-2.6/xfs_linux.h | 1 + fs/xfs/xfs_mount.c | 37 ++++++++++++++++++++++++------------- fs/xfs/xfs_mount.h | 4 +++- 3 files changed, 28 insertions(+), 14 deletions(-) (limited to 'fs/xfs/xfs_mount.c') diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 9fdc14cffb7..bd88ccb0cad 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -75,6 +75,7 @@ #include #include #include +#include #include #include diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index a64110b9023..d62aee02736 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1746,10 +1746,7 @@ xfs_icsb_cpu_notify( case CPU_UP_PREPARE: /* Easy Case - initialize the area and locks, and * then rebalance when online does everything else for us. */ - spin_lock_init(&cntp->icsb_lock); - cntp->icsb_icount = 0; - cntp->icsb_ifree = 0; - cntp->icsb_fdblocks = 0; + memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); break; case CPU_ONLINE: xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0); @@ -1769,9 +1766,7 @@ xfs_icsb_cpu_notify( mp->m_sb.sb_ifree += cntp->icsb_ifree; mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; - cntp->icsb_icount = 0; - cntp->icsb_ifree = 0; - cntp->icsb_fdblocks = 0; + memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED); xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED); @@ -1800,7 +1795,7 @@ xfs_icsb_init_counters( for_each_online_cpu(i) { cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); - spin_lock_init(&cntp->icsb_lock); + memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); } /* * start with all counters disabled so that the @@ -1820,6 +1815,22 @@ xfs_icsb_destroy_counters( } } +STATIC inline void +xfs_icsb_lock_cntr( + xfs_icsb_cnts_t *icsbp) +{ + while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { + ndelay(1000); + } +} + +STATIC inline void +xfs_icsb_unlock_cntr( + xfs_icsb_cnts_t *icsbp) +{ + clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); +} + STATIC inline void xfs_icsb_lock_all_counters( @@ -1830,7 +1841,7 @@ xfs_icsb_lock_all_counters( for_each_online_cpu(i) { cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); - spin_lock(&cntp->icsb_lock); + xfs_icsb_lock_cntr(cntp); } } @@ -1843,7 +1854,7 @@ xfs_icsb_unlock_all_counters( for_each_online_cpu(i) { cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); - spin_unlock(&cntp->icsb_lock); + xfs_icsb_unlock_cntr(cntp); } } @@ -2070,7 +2081,7 @@ xfs_icsb_modify_counters_int( again: cpu = get_cpu(); icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu), - spin_lock(&icsbp->icsb_lock); + xfs_icsb_lock_cntr(icsbp); if (unlikely(xfs_icsb_counter_disabled(mp, field))) goto slow_path; @@ -2104,7 +2115,7 @@ again: BUG(); break; } - spin_unlock(&icsbp->icsb_lock); + xfs_icsb_unlock_cntr(icsbp); put_cpu(); if (locked) XFS_SB_UNLOCK(mp, s); @@ -2120,7 +2131,7 @@ again: * manner. */ slow_path: - spin_unlock(&icsbp->icsb_lock); + xfs_icsb_unlock_cntr(icsbp); put_cpu(); /* need to hold superblock incase we need diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 9d2ffbdc37a..29cfcf0c11b 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -280,9 +280,11 @@ typedef struct xfs_icsb_cnts { uint64_t icsb_fdblocks; uint64_t icsb_ifree; uint64_t icsb_icount; - spinlock_t icsb_lock; + unsigned long icsb_flags; } xfs_icsb_cnts_t; +#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */ + #define XFS_ICSB_SB_LOCKED (1 << 0) /* sb already locked */ #define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */ -- cgit v1.2.3-70-g09d2 From 9f989c9455aac417c34af9c505e6b169055251da Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Tue, 14 Mar 2006 13:29:32 +1100 Subject: [XFS] Additional mount time superblock validation checks. SGI-PV: 950491 SGI-Modid: xfs-linux-melb:xfs-kern:25354a Signed-off-by: Nathan Scott --- fs/xfs/xfs_mount.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/xfs/xfs_mount.c') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index d62aee02736..20e8abc16d1 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -268,9 +268,12 @@ xfs_mount_validate_sb( sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || + sbp->sb_inodelog < XFS_DINODE_MIN_LOG || + sbp->sb_inodelog > XFS_DINODE_MAX_LOG || + (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || - sbp->sb_imax_pct > 100)) { + (sbp->sb_imax_pct > 100 || sbp->sb_imax_pct < 1))) { cmn_err(CE_WARN, "XFS: SB sanity check 1 failed"); XFS_CORRUPTION_ERROR("xfs_mount_validate_sb(3)", XFS_ERRLEVEL_LOW, mp, sbp); -- cgit v1.2.3-70-g09d2