summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/proportions.h119
-rw-r--r--lib/Makefile3
-rw-r--r--lib/proportions.c384
3 files changed, 505 insertions, 1 deletions
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
new file mode 100644
index 00000000000..2c3b3cad92b
--- /dev/null
+++ b/include/linux/proportions.h
@@ -0,0 +1,119 @@
+/*
+ * FLoating proportions
+ *
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * This file contains the public data structure and API definitions.
+ */
+
+#ifndef _LINUX_PROPORTIONS_H
+#define _LINUX_PROPORTIONS_H
+
+#include <linux/percpu_counter.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+
+struct prop_global {
+ /*
+ * The period over which we differentiate
+ *
+ * period = 2^shift
+ */
+ int shift;
+ /*
+ * The total event counter aka 'time'.
+ *
+ * Treated as an unsigned long; the lower 'shift - 1' bits are the
+ * counter bits, the remaining upper bits the period counter.
+ */
+ struct percpu_counter events;
+};
+
+/*
+ * global proportion descriptor
+ *
+ * this is needed to consitently flip prop_global structures.
+ */
+struct prop_descriptor {
+ int index;
+ struct prop_global pg[2];
+ struct mutex mutex; /* serialize the prop_global switch */
+};
+
+int prop_descriptor_init(struct prop_descriptor *pd, int shift);
+void prop_change_shift(struct prop_descriptor *pd, int new_shift);
+
+/*
+ * ----- PERCPU ------
+ */
+
+struct prop_local_percpu {
+ /*
+ * the local events counter
+ */
+ struct percpu_counter events;
+
+ /*
+ * snapshot of the last seen global state
+ */
+ int shift;
+ unsigned long period;
+ spinlock_t lock; /* protect the snapshot state */
+};
+
+int prop_local_init_percpu(struct prop_local_percpu *pl);
+void prop_local_destroy_percpu(struct prop_local_percpu *pl);
+void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
+void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
+ long *numerator, long *denominator);
+
+static inline
+void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __prop_inc_percpu(pd, pl);
+ local_irq_restore(flags);
+}
+
+/*
+ * ----- SINGLE ------
+ */
+
+struct prop_local_single {
+ /*
+ * the local events counter
+ */
+ unsigned long events;
+
+ /*
+ * snapshot of the last seen global state
+ * and a lock protecting this state
+ */
+ int shift;
+ unsigned long period;
+ spinlock_t lock; /* protect the snapshot state */
+};
+
+#define INIT_PROP_LOCAL_SINGLE(name) \
+{ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
+}
+
+int prop_local_init_single(struct prop_local_single *pl);
+void prop_local_destroy_single(struct prop_local_single *pl);
+void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl);
+void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl,
+ long *numerator, long *denominator);
+
+static inline
+void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __prop_inc_single(pd, pl);
+ local_irq_restore(flags);
+}
+
+#endif /* _LINUX_PROPORTIONS_H */
diff --git a/lib/Makefile b/lib/Makefile
index 6c4ea33bb2c..c5f215d509d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -5,7 +5,8 @@
lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o \
idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
- sha1.o irq_regs.o reciprocal_div.o argv_split.o
+ sha1.o irq_regs.o reciprocal_div.o argv_split.o \
+ proportions.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/proportions.c b/lib/proportions.c
new file mode 100644
index 00000000000..332d8c58184
--- /dev/null
+++ b/lib/proportions.c
@@ -0,0 +1,384 @@
+/*
+ * Floating proportions
+ *
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Description:
+ *
+ * The floating proportion is a time derivative with an exponentially decaying
+ * history:
+ *
+ * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
+ *
+ * Where j is an element from {prop_local}, x_{j} is j's number of events,
+ * and i the time period over which the differential is taken. So d/dt_{-i} is
+ * the differential over the i-th last period.
+ *
+ * The decaying history gives smooth transitions. The time differential carries
+ * the notion of speed.
+ *
+ * The denominator is 2^(1+i) because we want the series to be normalised, ie.
+ *
+ * \Sum_{i=0} 1/2^(1+i) = 1
+ *
+ * Further more, if we measure time (t) in the same events as x; so that:
+ *
+ * t = \Sum_{j} x_{j}
+ *
+ * we get that:
+ *
+ * \Sum_{j} p_{j} = 1
+ *
+ * Writing this in an iterative fashion we get (dropping the 'd's):
+ *
+ * if (++x_{j}, ++t > period)
+ * t /= 2;
+ * for_each (j)
+ * x_{j} /= 2;
+ *
+ * so that:
+ *
+ * p_{j} = x_{j} / t;
+ *
+ * We optimize away the '/= 2' for the global time delta by noting that:
+ *
+ * if (++t > period) t /= 2:
+ *
+ * Can be approximated by:
+ *
+ * period/2 + (++t % period/2)
+ *
+ * [ Furthermore, when we choose period to be 2^n it can be written in terms of
+ * binary operations and wraparound artefacts disappear. ]
+ *
+ * Also note that this yields a natural counter of the elapsed periods:
+ *
+ * c = t / (period/2)
+ *
+ * [ Its monotonic increasing property can be applied to mitigate the wrap-
+ * around issue. ]
+ *
+ * This allows us to do away with the loop over all prop_locals on each period
+ * expiration. By remembering the period count under which it was last accessed
+ * as c_{j}, we can obtain the number of 'missed' cycles from:
+ *
+ * c - c_{j}
+ *
+ * We can then lazily catch up to the global period count every time we are
+ * going to use x_{j}, by doing:
+ *
+ * x_{j} /= 2^(c - c_{j}), c_{j} = c
+ */
+
+#include <linux/proportions.h>
+#include <linux/rcupdate.h>
+
+/*
+ * Limit the time part in order to ensure there are some bits left for the
+ * cycle counter.
+ */
+#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
+
+int prop_descriptor_init(struct prop_descriptor *pd, int shift)
+{
+ int err;
+
+ if (shift > PROP_MAX_SHIFT)
+ shift = PROP_MAX_SHIFT;
+
+ pd->index = 0;
+ pd->pg[0].shift = shift;
+ mutex_init(&pd->mutex);
+ err = percpu_counter_init_irq(&pd->pg[0].events, 0);
+ if (err)
+ goto out;
+
+ err = percpu_counter_init_irq(&pd->pg[1].events, 0);
+ if (err)
+ percpu_counter_destroy(&pd->pg[0].events);
+
+out:
+ return err;
+}
+
+/*
+ * We have two copies, and flip between them to make it seem like an atomic
+ * update. The update is not really atomic wrt the events counter, but
+ * it is internally consistent with the bit layout depending on shift.
+ *
+ * We copy the events count, move the bits around and flip the index.
+ */
+void prop_change_shift(struct prop_descriptor *pd, int shift)
+{
+ int index;
+ int offset;
+ u64 events;
+ unsigned long flags;
+
+ if (shift > PROP_MAX_SHIFT)
+ shift = PROP_MAX_SHIFT;
+
+ mutex_lock(&pd->mutex);
+
+ index = pd->index ^ 1;
+ offset = pd->pg[pd->index].shift - shift;
+ if (!offset)
+ goto out;
+
+ pd->pg[index].shift = shift;
+
+ local_irq_save(flags);
+ events = percpu_counter_sum(&pd->pg[pd->index].events);
+ if (offset < 0)
+ events <<= -offset;
+ else
+ events >>= offset;
+ percpu_counter_set(&pd->pg[index].events, events);
+
+ /*
+ * ensure the new pg is fully written before the switch
+ */
+ smp_wmb();
+ pd->index = index;
+ local_irq_restore(flags);
+
+ synchronize_rcu();
+
+out:
+ mutex_unlock(&pd->mutex);
+}
+
+/*
+ * wrap the access to the data in an rcu_read_lock() section;
+ * this is used to track the active references.
+ */
+static struct prop_global *prop_get_global(struct prop_descriptor *pd)
+{
+ int index;
+
+ rcu_read_lock();
+ index = pd->index;
+ /*
+ * match the wmb from vcd_flip()
+ */
+ smp_rmb();
+ return &pd->pg[index];
+}
+
+static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
+{
+ rcu_read_unlock();
+}
+
+static void
+prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
+{
+ int offset = *pl_shift - new_shift;
+
+ if (!offset)
+ return;
+
+ if (offset < 0)
+ *pl_period <<= -offset;
+ else
+ *pl_period >>= offset;
+
+ *pl_shift = new_shift;
+}
+
+/*
+ * PERCPU
+ */
+
+int prop_local_init_percpu(struct prop_local_percpu *pl)
+{
+ spin_lock_init(&pl->lock);
+ pl->shift = 0;
+ pl->period = 0;
+ return percpu_counter_init_irq(&pl->events, 0);
+}
+
+void prop_local_destroy_percpu(struct prop_local_percpu *pl)
+{
+ percpu_counter_destroy(&pl->events);
+}
+
+/*
+ * Catch up with missed period expirations.
+ *
+ * until (c_{j} == c)
+ * x_{j} -= x_{j}/2;
+ * c_{j}++;
+ */
+static
+void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
+{
+ unsigned long period = 1UL << (pg->shift - 1);
+ unsigned long period_mask = ~(period - 1);
+ unsigned long global_period;
+ unsigned long flags;
+
+ global_period = percpu_counter_read(&pg->events);
+ global_period &= period_mask;
+
+ /*
+ * Fast path - check if the local and global period count still match
+ * outside of the lock.
+ */
+ if (pl->period == global_period)
+ return;
+
+ spin_lock_irqsave(&pl->lock, flags);
+ prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+ /*
+ * For each missed period, we half the local counter.
+ * basically:
+ * pl->events >> (global_period - pl->period);
+ *
+ * but since the distributed nature of percpu counters make division
+ * rather hard, use a regular subtraction loop. This is safe, because
+ * the events will only every be incremented, hence the subtraction
+ * can never result in a negative number.
+ */
+ while (pl->period != global_period) {
+ unsigned long val = percpu_counter_read(&pl->events);
+ unsigned long half = (val + 1) >> 1;
+
+ /*
+ * Half of zero won't be much less, break out.
+ * This limits the loop to shift iterations, even
+ * if we missed a million.
+ */
+ if (!val)
+ break;
+
+ percpu_counter_add(&pl->events, -half);
+ pl->period += period;
+ }
+ pl->period = global_period;
+ spin_unlock_irqrestore(&pl->lock, flags);
+}
+
+/*
+ * ++x_{j}, ++t
+ */
+void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
+{
+ struct prop_global *pg = prop_get_global(pd);
+
+ prop_norm_percpu(pg, pl);
+ percpu_counter_add(&pl->events, 1);
+ percpu_counter_add(&pg->events, 1);
+ prop_put_global(pd, pg);
+}
+
+/*
+ * Obtain a fraction of this proportion
+ *
+ * p_{j} = x_{j} / (period/2 + t % period/2)
+ */
+void prop_fraction_percpu(struct prop_descriptor *pd,
+ struct prop_local_percpu *pl,
+ long *numerator, long *denominator)
+{
+ struct prop_global *pg = prop_get_global(pd);
+ unsigned long period_2 = 1UL << (pg->shift - 1);
+ unsigned long counter_mask = period_2 - 1;
+ unsigned long global_count;
+
+ prop_norm_percpu(pg, pl);
+ *numerator = percpu_counter_read_positive(&pl->events);
+
+ global_count = percpu_counter_read(&pg->events);
+ *denominator = period_2 + (global_count & counter_mask);
+
+ prop_put_global(pd, pg);
+}
+
+/*
+ * SINGLE
+ */
+
+int prop_local_init_single(struct prop_local_single *pl)
+{
+ spin_lock_init(&pl->lock);
+ pl->shift = 0;
+ pl->period = 0;
+ pl->events = 0;
+ return 0;
+}
+
+void prop_local_destroy_single(struct prop_local_single *pl)
+{
+}
+
+/*
+ * Catch up with missed period expirations.
+ */
+static
+void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
+{
+ unsigned long period = 1UL << (pg->shift - 1);
+ unsigned long period_mask = ~(period - 1);
+ unsigned long global_period;
+ unsigned long flags;
+
+ global_period = percpu_counter_read(&pg->events);
+ global_period &= period_mask;
+
+ /*
+ * Fast path - check if the local and global period count still match
+ * outside of the lock.
+ */
+ if (pl->period == global_period)
+ return;
+
+ spin_lock_irqsave(&pl->lock, flags);
+ prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+ /*
+ * For each missed period, we half the local counter.
+ */
+ period = (global_period - pl->period) >> (pg->shift - 1);
+ if (likely(period < BITS_PER_LONG))
+ pl->events >>= period;
+ else
+ pl->events = 0;
+ pl->period = global_period;
+ spin_unlock_irqrestore(&pl->lock, flags);
+}
+
+/*
+ * ++x_{j}, ++t
+ */
+void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
+{
+ struct prop_global *pg = prop_get_global(pd);
+
+ prop_norm_single(pg, pl);
+ pl->events++;
+ percpu_counter_add(&pg->events, 1);
+ prop_put_global(pd, pg);
+}
+
+/*
+ * Obtain a fraction of this proportion
+ *
+ * p_{j} = x_{j} / (period/2 + t % period/2)
+ */
+void prop_fraction_single(struct prop_descriptor *pd,
+ struct prop_local_single *pl,
+ long *numerator, long *denominator)
+{
+ struct prop_global *pg = prop_get_global(pd);
+ unsigned long period_2 = 1UL << (pg->shift - 1);
+ unsigned long counter_mask = period_2 - 1;
+ unsigned long global_count;
+
+ prop_norm_single(pg, pl);
+ *numerator = pl->events;
+
+ global_count = percpu_counter_read(&pg->events);
+ *denominator = period_2 + (global_count & counter_mask);
+
+ prop_put_global(pd, pg);
+}