summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/s390/appldata/appldata_base.c1
-rw-r--r--arch/s390/appldata/appldata_mem.c20
-rw-r--r--block/ll_rw_blk.c4
-rw-r--r--drivers/parisc/led.c11
-rw-r--r--fs/inode.c9
-rw-r--r--fs/ncpfs/mmap.c2
-rw-r--r--include/linux/vmstat.h170
-rw-r--r--init/Kconfig9
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/memory.c4
-rw-r--r--mm/page_alloc.c21
-rw-r--r--mm/page_io.c4
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/swap.c4
-rw-r--r--mm/vmscan.c23
-rw-r--r--mm/vmstat.c171
16 files changed, 212 insertions, 249 deletions
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 61bc44626c0..2476ca739c1 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -766,7 +766,6 @@ unsigned long nr_iowait(void)
#endif /* MODULE */
EXPORT_SYMBOL_GPL(si_swapinfo);
EXPORT_SYMBOL_GPL(nr_threads);
-EXPORT_SYMBOL_GPL(get_full_page_state);
EXPORT_SYMBOL_GPL(nr_running);
EXPORT_SYMBOL_GPL(nr_iowait);
//EXPORT_SYMBOL_GPL(nr_context_switches);
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 180ba79a626..4811e2dac86 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -107,21 +107,21 @@ static void appldata_get_mem_data(void *data)
* serialized through the appldata_ops_lock and can use static
*/
static struct sysinfo val;
- static struct page_state ps;
+ unsigned long ev[NR_VM_EVENT_ITEMS];
struct appldata_mem_data *mem_data;
mem_data = data;
mem_data->sync_count_1++;
- get_full_page_state(&ps);
- mem_data->pgpgin = ps.pgpgin >> 1;
- mem_data->pgpgout = ps.pgpgout >> 1;
- mem_data->pswpin = ps.pswpin;
- mem_data->pswpout = ps.pswpout;
- mem_data->pgalloc = ps.pgalloc_high + ps.pgalloc_normal +
- ps.pgalloc_dma;
- mem_data->pgfault = ps.pgfault;
- mem_data->pgmajfault = ps.pgmajfault;
+ all_vm_events(ev);
+ mem_data->pgpgin = ev[PGPGIN] >> 1;
+ mem_data->pgpgout = ev[PGPGOUT] >> 1;
+ mem_data->pswpin = ev[PSWPIN];
+ mem_data->pswpout = ev[PSWPOUT];
+ mem_data->pgalloc = ev[PGALLOC_HIGH] + ev[PGALLOC_NORMAL] +
+ ev[PGALLOC_DMA];
+ mem_data->pgfault = ev[PGFAULT];
+ mem_data->pgmajfault = ev[PGMAJFAULT];
si_meminfo(&val);
mem_data->sharedram = val.sharedram;
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index eee03a3876a..fb83547f563 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3117,9 +3117,9 @@ void submit_bio(int rw, struct bio *bio)
BIO_BUG_ON(!bio->bi_io_vec);
bio->bi_rw |= rw;
if (rw & WRITE)
- mod_page_state(pgpgout, count);
+ count_vm_events(PGPGOUT, count);
else
- mod_page_state(pgpgin, count);
+ count_vm_events(PGPGIN, count);
if (unlikely(block_dump)) {
char b[BDEVNAME_SIZE];
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 298f2ddb2c1..d7024c7483b 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -411,16 +411,17 @@ static __inline__ int led_get_net_activity(void)
static __inline__ int led_get_diskio_activity(void)
{
static unsigned long last_pgpgin, last_pgpgout;
- struct page_state pgstat;
+ unsigned long events[NR_VM_EVENT_ITEMS];
int changed;
- get_full_page_state(&pgstat); /* get no of sectors in & out */
+ all_vm_events(events);
/* Just use a very simple calculation here. Do not care about overflow,
since we only want to know if there was activity or not. */
- changed = (pgstat.pgpgin != last_pgpgin) || (pgstat.pgpgout != last_pgpgout);
- last_pgpgin = pgstat.pgpgin;
- last_pgpgout = pgstat.pgpgout;
+ changed = (events[PGPGIN] != last_pgpgin) ||
+ (events[PGPGOUT] != last_pgpgout);
+ last_pgpgin = events[PGPGIN];
+ last_pgpgout = events[PGPGOUT];
return (changed ? LED_DISK_IO : 0);
}
diff --git a/fs/inode.c b/fs/inode.c
index f42961eb983..14a6c4147e4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -452,15 +452,14 @@ static void prune_icache(int nr_to_scan)
nr_pruned++;
}
inodes_stat.nr_unused -= nr_pruned;
+ if (current_is_kswapd())
+ __count_vm_events(KSWAPD_INODESTEAL, reap);
+ else
+ __count_vm_events(PGINODESTEAL, reap);
spin_unlock(&inode_lock);
dispose_list(&freeable);
mutex_unlock(&iprune_mutex);
-
- if (current_is_kswapd())
- mod_page_state(kswapd_inodesteal, reap);
- else
- mod_page_state(pginodesteal, reap);
}
/*
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 52d60c3d899..e7d5a3097fe 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -93,7 +93,7 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area,
*/
if (type)
*type = VM_FAULT_MAJOR;
- inc_page_state(pgmajfault);
+ count_vm_event(PGMAJFAULT);
return page;
}
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 16173b63ee6..3e0daf54133 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -7,115 +7,77 @@
#include <linux/mmzone.h>
#include <asm/atomic.h>
+#ifdef CONFIG_VM_EVENT_COUNTERS
/*
- * Global page accounting. One instance per CPU. Only unsigned longs are
- * allowed.
+ * Light weight per cpu counter implementation.
*
- * - Fields can be modified with xxx_page_state and xxx_page_state_zone at
- * any time safely (which protects the instance from modification by
- * interrupt.
- * - The __xxx_page_state variants can be used safely when interrupts are
- * disabled.
- * - The __xxx_page_state variants can be used if the field is only
- * modified from process context and protected from preemption, or only
- * modified from interrupt context. In this case, the field should be
- * commented here.
+ * Counters should only be incremented and no critical kernel component
+ * should rely on the counter values.
+ *
+ * Counters are handled completely inline. On many platforms the code
+ * generated will simply be the increment of a global address.
*/
-struct page_state {
- unsigned long pgpgin; /* Disk reads */
- unsigned long pgpgout; /* Disk writes */
- unsigned long pswpin; /* swap reads */
- unsigned long pswpout; /* swap writes */
-
- unsigned long pgalloc_high; /* page allocations */
- unsigned long pgalloc_normal;
- unsigned long pgalloc_dma32;
- unsigned long pgalloc_dma;
-
- unsigned long pgfree; /* page freeings */
- unsigned long pgactivate; /* pages moved inactive->active */
- unsigned long pgdeactivate; /* pages moved active->inactive */
-
- unsigned long pgfault; /* faults (major+minor) */
- unsigned long pgmajfault; /* faults (major only) */
-
- unsigned long pgrefill_high; /* inspected in refill_inactive_zone */
- unsigned long pgrefill_normal;
- unsigned long pgrefill_dma32;
- unsigned long pgrefill_dma;
-
- unsigned long pgsteal_high; /* total highmem pages reclaimed */
- unsigned long pgsteal_normal;
- unsigned long pgsteal_dma32;
- unsigned long pgsteal_dma;
-
- unsigned long pgscan_kswapd_high;/* total highmem pages scanned */
- unsigned long pgscan_kswapd_normal;
- unsigned long pgscan_kswapd_dma32;
- unsigned long pgscan_kswapd_dma;
-
- unsigned long pgscan_direct_high;/* total highmem pages scanned */
- unsigned long pgscan_direct_normal;
- unsigned long pgscan_direct_dma32;
- unsigned long pgscan_direct_dma;
-
- unsigned long pginodesteal; /* pages reclaimed via inode freeing */
- unsigned long slabs_scanned; /* slab objects scanned */
- unsigned long kswapd_steal; /* pages reclaimed by kswapd */
- unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */
- unsigned long pageoutrun; /* kswapd's calls to page reclaim */
- unsigned long allocstall; /* direct reclaim calls */
-
- unsigned long pgrotated; /* pages rotated to tail of the LRU */
+
+#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH
+
+enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
+ FOR_ALL_ZONES(PGALLOC),
+ PGFREE, PGACTIVATE, PGDEACTIVATE,
+ PGFAULT, PGMAJFAULT,
+ FOR_ALL_ZONES(PGREFILL),
+ FOR_ALL_ZONES(PGSTEAL),
+ FOR_ALL_ZONES(PGSCAN_KSWAPD),
+ FOR_ALL_ZONES(PGSCAN_DIRECT),
+ PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
+ PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+ NR_VM_EVENT_ITEMS
+};
+
+struct vm_event_state {
+ unsigned long event[NR_VM_EVENT_ITEMS];
};
-extern void get_full_page_state(struct page_state *ret);
-extern void mod_page_state_offset(unsigned long offset, unsigned long delta);
-extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
-
-#define mod_page_state(member, delta) \
- mod_page_state_offset(offsetof(struct page_state, member), (delta))
-
-#define __mod_page_state(member, delta) \
- __mod_page_state_offset(offsetof(struct page_state, member), (delta))
-
-#define inc_page_state(member) mod_page_state(member, 1UL)
-#define dec_page_state(member) mod_page_state(member, 0UL - 1)
-#define add_page_state(member,delta) mod_page_state(member, (delta))
-#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
-
-#define __inc_page_state(member) __mod_page_state(member, 1UL)
-#define __dec_page_state(member) __mod_page_state(member, 0UL - 1)
-#define __add_page_state(member,delta) __mod_page_state(member, (delta))
-#define __sub_page_state(member,delta) __mod_page_state(member, 0UL - (delta))
-
-#define page_state(member) (*__page_state(offsetof(struct page_state, member)))
-
-#define state_zone_offset(zone, member) \
-({ \
- unsigned offset; \
- if (is_highmem(zone)) \
- offset = offsetof(struct page_state, member##_high); \
- else if (is_normal(zone)) \
- offset = offsetof(struct page_state, member##_normal); \
- else if (is_dma32(zone)) \
- offset = offsetof(struct page_state, member##_dma32); \
- else \
- offset = offsetof(struct page_state, member##_dma); \
- offset; \
-})
-
-#define __mod_page_state_zone(zone, member, delta) \
- do { \
- __mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
- } while (0)
-
-#define mod_page_state_zone(zone, member, delta) \
- do { \
- mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
- } while (0)
-
-DECLARE_PER_CPU(struct page_state, page_states);
+DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
+
+static inline void __count_vm_event(enum vm_event_item item)
+{
+ __get_cpu_var(vm_event_states.event[item])++;
+}
+
+static inline void count_vm_event(enum vm_event_item item)
+{
+ get_cpu_var(vm_event_states.event[item])++;
+ put_cpu();
+}
+
+static inline void __count_vm_events(enum vm_event_item item, long delta)
+{
+ __get_cpu_var(vm_event_states.event[item]) += delta;
+}
+
+static inline void count_vm_events(enum vm_event_item item, long delta)
+{
+ get_cpu_var(vm_event_states.event[item])++;
+ put_cpu();
+}
+
+extern void all_vm_events(unsigned long *);
+extern void vm_events_fold_cpu(int cpu);
+
+#else
+
+/* Disable counters */
+#define get_cpu_vm_events(e) 0L
+#define count_vm_event(e) do { } while (0)
+#define count_vm_events(e,d) do { } while (0)
+#define __count_vm_event(e) do { } while (0)
+#define __count_vm_events(e,d) do { } while (0)
+#define vm_events_fold_cpu(x) do { } while (0)
+
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+
+#define __count_zone_vm_events(item, zone, delta) \
+ __count_vm_events(item##_DMA + zone_idx(zone), delta)
/*
* Zone based page accounting with per cpu differentials.
diff --git a/init/Kconfig b/init/Kconfig
index f70f2fd273c..f515948889a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -379,6 +379,15 @@ config SLAB
SLOB is more space efficient but does not scale well and is
more susceptible to fragmentation.
+config VM_EVENT_COUNTERS
+ default y
+ bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
+ help
+ VM event counters are only needed to for event counts to be
+ shown. They have no function for the kernel itself. This
+ option allows the disabling of the VM event counters.
+ /proc/vmstat will only show page counts.
+
endmenu # General setup
config TINY_SHMEM
diff --git a/mm/filemap.c b/mm/filemap.c
index 87d62c44c3f..796a5471b49 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1416,7 +1416,7 @@ retry_find:
*/
if (!did_readaround) {
majmin = VM_FAULT_MAJOR;
- inc_page_state(pgmajfault);
+ count_vm_event(PGMAJFAULT);
}
did_readaround = 1;
ra_pages = max_sane_readahead(file->f_ra.ra_pages);
@@ -1487,7 +1487,7 @@ no_cached_page:
page_not_uptodate:
if (!did_readaround) {
majmin = VM_FAULT_MAJOR;
- inc_page_state(pgmajfault);
+ count_vm_event(PGMAJFAULT);
}
lock_page(page);
diff --git a/mm/memory.c b/mm/memory.c
index 1a78791590f..7e2a4b1580e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1951,7 +1951,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
- inc_page_state(pgmajfault);
+ count_vm_event(PGMAJFAULT);
grab_swap_token();
}
@@ -2324,7 +2324,7 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
__set_current_state(TASK_RUNNING);
- inc_page_state(pgfault);
+ count_vm_event(PGFAULT);
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, write_access);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d61671260f9..30b0b97ad02 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -456,7 +456,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
kernel_map_pages(page, 1 << order, 0);
local_irq_save(flags);
- __mod_page_state(pgfree, 1 << order);
+ __count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, order);
local_irq_restore(flags);
}
@@ -729,7 +729,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags);
- __inc_page_state(pgfree);
+ __count_vm_event(PGFREE);
list_add(&page->lru, &pcp->list);
pcp->count++;
if (pcp->count >= pcp->high) {
@@ -805,7 +805,7 @@ again:
goto failed;
}
- __mod_page_state_zone(zone, pgalloc, 1 << order);
+ __count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(zonelist, zone);
local_irq_restore(flags);
put_cpu();
@@ -2101,24 +2101,11 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
int cpu = (unsigned long)hcpu;
- unsigned long *src, *dest;
if (action == CPU_DEAD) {
- int i;
-
local_irq_disable();
__drain_pages(cpu);
-
- /* Add dead cpu's page_states to our own. */
- dest = (unsigned long *)&__get_cpu_var(page_states);
- src = (unsigned long *)&per_cpu(page_states, cpu);
-
- for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long);
- i++) {
- dest[i] += src[i];
- src[i] = 0;
- }
-
+ vm_events_fold_cpu(cpu);
local_irq_enable();
refresh_cpu_vm_stats(cpu);
}
diff --git a/mm/page_io.c b/mm/page_io.c
index bb2b0d53889..88029948d00 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -101,7 +101,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
}
if (wbc->sync_mode == WB_SYNC_ALL)
rw |= (1 << BIO_RW_SYNC);
- inc_page_state(pswpout);
+ count_vm_event(PSWPOUT);
set_page_writeback(page);
unlock_page(page);
submit_bio(rw, bio);
@@ -123,7 +123,7 @@ int swap_readpage(struct file *file, struct page *page)
ret = -ENOMEM;
goto out;
}
- inc_page_state(pswpin);
+ count_vm_event(PSWPIN);
submit_bio(READ, bio);
out:
return ret;
diff --git a/mm/shmem.c b/mm/shmem.c
index b14ff817d16..a9c09e0ba70 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1045,12 +1045,12 @@ repeat:
swappage = lookup_swap_cache(swap);
if (!swappage) {
shmem_swp_unmap(entry);
- spin_unlock(&info->lock);
/* here we actually do the io */
if (type && *type == VM_FAULT_MINOR) {
- inc_page_state(pgmajfault);
+ __count_vm_event(PGMAJFAULT);
*type = VM_FAULT_MAJOR;
}
+ spin_unlock(&info->lock);
swappage = shmem_swapin(info, swap, idx);
if (!swappage) {
spin_lock(&info->lock);
diff --git a/mm/swap.c b/mm/swap.c
index 990868afc1c..8fd095c4ae5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -87,7 +87,7 @@ int rotate_reclaimable_page(struct page *page)
spin_lock_irqsave(&zone->lru_lock, flags);
if (PageLRU(page) && !PageActive(page)) {
list_move_tail(&page->lru, &zone->inactive_list);
- inc_page_state(pgrotated);
+ __count_vm_event(PGROTATED);
}
if (!test_clear_page_writeback(page))
BUG();
@@ -107,7 +107,7 @@ void fastcall activate_page(struct page *page)
del_page_from_inactive_list(zone, page);
SetPageActive(page);
add_page_to_active_list(zone, page);
- inc_page_state(pgactivate);
+ __count_vm_event(PGACTIVATE);
}
spin_unlock_irq(&zone->lru_lock);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d6942436ac9..ff2ebe9458a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -215,7 +215,7 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
break;
if (shrink_ret < nr_before)
ret += nr_before - shrink_ret;
- mod_page_state(slabs_scanned, this_scan);
+ count_vm_events(SLABS_SCANNED, this_scan);
total_scan -= this_scan;
cond_resched();
@@ -569,7 +569,7 @@ keep:
list_splice(&ret_pages, page_list);
if (pagevec_count(&freed_pvec))
__pagevec_release_nonlru(&freed_pvec);
- mod_page_state(pgactivate, pgactivate);
+ count_vm_events(PGACTIVATE, pgactivate);
return nr_reclaimed;
}
@@ -659,11 +659,11 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
nr_reclaimed += nr_freed;
local_irq_disable();
if (current_is_kswapd()) {
- __mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
- __mod_page_state(kswapd_steal, nr_freed);
+ __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
+ __count_vm_events(KSWAPD_STEAL, nr_freed);
} else
- __mod_page_state_zone(zone, pgscan_direct, nr_scan);
- __mod_page_state_zone(zone, pgsteal, nr_freed);
+ __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
+ __count_vm_events(PGACTIVATE, nr_freed);
if (nr_taken == 0)
goto done;
@@ -841,11 +841,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
}
}
zone->nr_active += pgmoved;
- spin_unlock(&zone->lru_lock);
- __mod_page_state_zone(zone, pgrefill, pgscanned);
- __mod_page_state(pgdeactivate, pgdeactivate);
- local_irq_enable();
+ __count_zone_vm_events(PGREFILL, zone, pgscanned);
+ __count_vm_events(PGDEACTIVATE, pgdeactivate);
+ spin_unlock_irq(&zone->lru_lock);
pagevec_release(&pvec);
}
@@ -977,7 +976,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
.swappiness = vm_swappiness,
};
- inc_page_state(allocstall);
+ count_vm_event(ALLOCSTALL);
for (i = 0; zones[i] != NULL; i++) {
struct zone *zone = zones[i];
@@ -1074,7 +1073,7 @@ loop_again:
total_scanned = 0;
nr_reclaimed = 0;
sc.may_writepage = !laptop_mode;
- inc_page_state(pageoutrun);
+ count_vm_event(PAGEOUTRUN);
for (i = 0; i < pgdat->nr_zones; i++) {
struct zone *zone = pgdat->node_zones + i;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ee7f8966625..73b83d67bab 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -13,66 +13,6 @@
#include <linux/mm.h>
#include <linux/module.h>
-/*
- * Accumulate the page_state information across all CPUs.
- * The result is unavoidably approximate - it can change
- * during and after execution of this function.
- */
-DEFINE_PER_CPU(struct page_state, page_states) = {0};
-
-static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
-{
- unsigned cpu;
-
- memset(ret, 0, nr * sizeof(unsigned long));
- cpus_and(*cpumask, *cpumask, cpu_online_map);
-
- for_each_cpu_mask(cpu, *cpumask) {
- unsigned long *in;
- unsigned long *out;
- unsigned off;
- unsigned next_cpu;
-
- in = (unsigned long *)&per_cpu(page_states, cpu);
-
- next_cpu = next_cpu(cpu, *cpumask);
- if (likely(next_cpu < NR_CPUS))
- prefetch(&per_cpu(page_states, next_cpu));
-
- out = (unsigned long *)ret;
- for (off = 0; off < nr; off++)
- *out++ += *in++;
- }
-}
-
-void get_full_page_state(struct page_state *ret)
-{
- cpumask_t mask = CPU_MASK_ALL;
-
- __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
-}
-
-void __mod_page_state_offset(unsigned long offset, unsigned long delta)
-{
- void *ptr;
-
- ptr = &__get_cpu_var(page_states);
- *(unsigned long *)(ptr + offset) += delta;
-}
-EXPORT_SYMBOL(__mod_page_state_offset);
-
-void mod_page_state_offset(unsigned long offset, unsigned long delta)
-{
- unsigned long flags;
- void *ptr;
-
- local_irq_save(flags);
- ptr = &__get_cpu_var(page_states);
- *(unsigned long *)(ptr + offset) += delta;
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(mod_page_state_offset);
-
void __get_zone_counts(unsigned long *active, unsigned long *inactive,
unsigned long *free, struct pglist_data *pgdat)
{
@@ -106,6 +46,63 @@ void get_zone_counts(unsigned long *active,
}
}
+#ifdef CONFIG_VM_EVENT_COUNTERS
+DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
+EXPORT_PER_CPU_SYMBOL(vm_event_states);
+
+static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
+{
+ int cpu = 0;
+ int i;
+
+ memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
+
+ cpu = first_cpu(*cpumask);
+ while (cpu < NR_CPUS) {
+ struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
+
+ cpu = next_cpu(cpu, *cpumask);
+
+ if (cpu < NR_CPUS)
+ prefetch(&per_cpu(vm_event_states, cpu));
+
+
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ ret[i] += this->event[i];
+ }
+}
+
+/*
+ * Accumulate the vm event counters across all CPUs.
+ * The result is unavoidably approximate - it can change
+ * during and after execution of this function.
+*/
+void all_vm_events(unsigned long *ret)
+{
+ sum_vm_events(ret, &cpu_online_map);
+}
+
+#ifdef CONFIG_HOTPLUG
+/*
+ * Fold the foreign cpu events into our own.
+ *
+ * This is adding to the events on one processor
+ * but keeps the global counts constant.
+ */
+void vm_events_fold_cpu(int cpu)
+{
+ struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
+ int i;
+
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
+ count_vm_events(i, fold_state->event[i]);
+ fold_state->event[i] = 0;
+ }
+}
+#endif /* CONFIG_HOTPLUG */
+
+#endif /* CONFIG_VM_EVENT_COUNTERS */
+
/*
* Manage combined zone based / global counters
*
@@ -405,16 +402,16 @@ static char *vmstat_text[] = {
"numa_other",
#endif
- /* Event counters */
+#ifdef CONFIG_VM_EVENT_COUNTERS
"pgpgin",
"pgpgout",
"pswpin",
"pswpout",
- "pgalloc_high",
- "pgalloc_normal",
- "pgalloc_dma32",
"pgalloc_dma",
+ "pgalloc_dma32",
+ "pgalloc_normal",
+ "pgalloc_high",
"pgfree",
"pgactivate",
@@ -423,25 +420,25 @@ static char *vmstat_text[] = {
"pgfault",
"pgmajfault",
- "pgrefill_high",
- "pgrefill_normal",
- "pgrefill_dma32",
"pgrefill_dma",
+ "pgrefill_dma32",
+ "pgrefill_normal",
+ "pgrefill_high",
- "pgsteal_high",
- "pgsteal_normal",
- "pgsteal_dma32",
"pgsteal_dma",
+ "pgsteal_dma32",
+ "pgsteal_normal",
+ "pgsteal_high",
- "pgscan_kswapd_high",
- "pgscan_kswapd_normal",
- "pgscan_kswapd_dma32",
"pgscan_kswapd_dma",
+ "pgscan_kswapd_dma32",
+ "pgscan_kswapd_normal",
+ "pgscan_kswapd_high",
- "pgscan_direct_high",
- "pgscan_direct_normal",
- "pgscan_direct_dma32",
"pgscan_direct_dma",
+ "pgscan_direct_dma32",
+ "pgscan_direct_normal",
+ "pgscan_direct_high",
"pginodesteal",
"slabs_scanned",
@@ -451,6 +448,7 @@ static char *vmstat_text[] = {
"allocstall",
"pgrotated",
+#endif
};
/*
@@ -553,23 +551,32 @@ struct seq_operations zoneinfo_op = {
static void *vmstat_start(struct seq_file *m, loff_t *pos)
{
unsigned long *v;
- struct page_state *ps;
+#ifdef CONFIG_VM_EVENT_COUNTERS
+ unsigned long *e;
+#endif
int i;
if (*pos >= ARRAY_SIZE(vmstat_text))
return NULL;
+#ifdef CONFIG_VM_EVENT_COUNTERS
v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long)
- + sizeof(*ps), GFP_KERNEL);
+ + sizeof(struct vm_event_state), GFP_KERNEL);
+#else
+ v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long),
+ GFP_KERNEL);
+#endif
m->private = v;
if (!v)
return ERR_PTR(-ENOMEM);
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
v[i] = global_page_state(i);
- ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS);
- get_full_page_state(ps);
- ps->pgpgin /= 2; /* sectors -> kbytes */
- ps->pgpgout /= 2;
+#ifdef CONFIG_VM_EVENT_COUNTERS
+ e = v + NR_VM_ZONE_STAT_ITEMS;
+ all_vm_events(e);
+ e[PGPGIN] /= 2; /* sectors -> kbytes */
+ e[PGPGOUT] /= 2;
+#endif
return v + *pos;
}