diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 2 | ||||
-rw-r--r-- | drivers/xen/Kconfig | 24 | ||||
-rw-r--r-- | drivers/xen/Makefile | 3 | ||||
-rw-r--r-- | drivers/xen/tmem.c | 170 | ||||
-rw-r--r-- | drivers/xen/xen-balloon.c | 2 | ||||
-rw-r--r-- | drivers/xen/xen-selfballoon.c | 485 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 44 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.h | 2 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_backend.c | 9 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 6 |
10 files changed, 695 insertions, 52 deletions
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6cc0db1bf52..3f129b45451 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -684,7 +684,7 @@ again: err = xenbus_switch_state(dev, XenbusStateConnected); if (err) - xenbus_dev_fatal(dev, err, "switching to Connected state", + xenbus_dev_fatal(dev, err, "%s: switching to Connected state", dev->nodename); return; diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 9b700b4a987..03bc471c3ee 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -9,6 +9,23 @@ config XEN_BALLOON the system to expand the domain's memory allocation, or alternatively return unneeded memory to the system. +config XEN_SELFBALLOONING + bool "Dynamically self-balloon kernel memory to target" + depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP + default n + help + Self-ballooning dynamically balloons available kernel memory driven + by the current usage of anonymous memory ("committed AS") and + controlled by various sysfs-settable parameters. Configuring + FRONTSWAP is highly recommended; if it is not configured, self- + ballooning is disabled by default but can be enabled with the + 'selfballooning' kernel boot parameter. If FRONTSWAP is configured, + frontswap-selfshrinking is enabled by default but can be disabled + with the 'noselfshrink' kernel boot parameter; and self-ballooning + is enabled by default but can be disabled with the 'noselfballooning' + kernel boot parameter. Note that systems without a sufficiently + large swap device should not enable self-ballooning. + config XEN_SCRUB_PAGES bool "Scrub pages before returning them to system" depends on XEN_BALLOON @@ -105,6 +122,13 @@ config SWIOTLB_XEN depends on PCI select SWIOTLB +config XEN_TMEM + bool + default y if (CLEANCACHE || FRONTSWAP) + help + Shim to interface in-kernel Transcendent Memory hooks + (e.g. cleancache and frontswap) to Xen tmem hypercalls. + config XEN_PCIDEV_BACKEND tristate "Xen PCI-device backend driver" depends on PCI && X86 && XEN diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 35a72ef3afa..72bbb27d7a6 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,5 @@ obj-y += grant-table.o features.o events.o manage.o balloon.o obj-y += xenbus/ -obj-y += tmem.o nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_features.o := $(nostackp) @@ -9,12 +8,14 @@ obj-$(CONFIG_BLOCK) += biomerge.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o +obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o +obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 816a44959ef..d369965e8f8 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -1,7 +1,7 @@ /* * Xen implementation for transcendent memory (tmem) * - * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. + * Copyright (C) 2009-2011 Oracle Corp. All rights reserved. * Author: Dan Magenheimer */ @@ -9,8 +9,14 @@ #include <linux/types.h> #include <linux/init.h> #include <linux/pagemap.h> +#include <linux/module.h> #include <linux/cleancache.h> +/* temporary ifdef until include/linux/frontswap.h is upstream */ +#ifdef CONFIG_FRONTSWAP +#include <linux/frontswap.h> +#endif + #include <xen/xen.h> #include <xen/interface/xen.h> #include <asm/xen/hypercall.h> @@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid) return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0); } -static int xen_tmem_destroy_pool(u32 pool_id) -{ - struct tmem_oid oid = { { 0 } }; - - return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); -} - -int tmem_enabled; +int tmem_enabled __read_mostly; +EXPORT_SYMBOL(tmem_enabled); static int __init enable_tmem(char *s) { @@ -139,6 +139,14 @@ static int __init enable_tmem(char *s) __setup("tmem", enable_tmem); +#ifdef CONFIG_CLEANCACHE +static int xen_tmem_destroy_pool(u32 pool_id) +{ + struct tmem_oid oid = { { 0 } }; + + return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0); +} + /* cleancache ops */ static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key, @@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = { .init_shared_fs = tmem_cleancache_init_shared_fs, .init_fs = tmem_cleancache_init_fs }; +#endif -static int __init xen_tmem_init(void) +#ifdef CONFIG_FRONTSWAP +/* frontswap tmem operations */ + +/* a single tmem poolid is used for all frontswap "types" (swapfiles) */ +static int tmem_frontswap_poolid; + +/* + * Swizzling increases objects per swaptype, increasing tmem concurrency + * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS + */ +#define SWIZ_BITS 4 +#define SWIZ_MASK ((1 << SWIZ_BITS) - 1) +#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK)) +#define iswiz(_ind) (_ind >> SWIZ_BITS) + +static inline struct tmem_oid oswiz(unsigned type, u32 ind) { - struct cleancache_ops old_ops; + struct tmem_oid oid = { .oid = { 0 } }; + oid.oid[0] = _oswiz(type, ind); + return oid; +} +/* returns 0 if the page was successfully put into frontswap, -1 if not */ +static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + mb(); /* ensure page is quiescent; tmem may address it with an alias */ + ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* + * returns 0 if the page was successfully gotten from frontswap, -1 if + * was not present (should never happen!) + */ +static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, + struct page *page) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + unsigned long pfn = page_to_pfn(page); + int pool = tmem_frontswap_poolid; + int ret; + + if (pool < 0) + return -1; + if (ind64 != ind) + return -1; + ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn); + /* translate Xen tmem return values to linux semantics */ + if (ret == 1) + return 0; + else + return -1; +} + +/* flush a single page from frontswap */ +static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset) +{ + u64 ind64 = (u64)offset; + u32 ind = (u32)offset; + int pool = tmem_frontswap_poolid; + + if (pool < 0) + return; + if (ind64 != ind) + return; + (void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind)); +} + +/* flush all pages from the passed swaptype */ +static void tmem_frontswap_flush_area(unsigned type) +{ + int pool = tmem_frontswap_poolid; + int ind; + + if (pool < 0) + return; + for (ind = SWIZ_MASK; ind >= 0; ind--) + (void)xen_tmem_flush_object(pool, oswiz(type, ind)); +} + +static void tmem_frontswap_init(unsigned ignored) +{ + struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID; + + /* a single tmem poolid is used for all frontswap "types" (swapfiles) */ + if (tmem_frontswap_poolid < 0) + tmem_frontswap_poolid = + xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE); +} + +static int __initdata use_frontswap = 1; + +static int __init no_frontswap(char *s) +{ + use_frontswap = 0; + return 1; +} + +__setup("nofrontswap", no_frontswap); + +static struct frontswap_ops tmem_frontswap_ops = { + .put_page = tmem_frontswap_put_page, + .get_page = tmem_frontswap_get_page, + .flush_page = tmem_frontswap_flush_page, + .flush_area = tmem_frontswap_flush_area, + .init = tmem_frontswap_init +}; +#endif + +static int __init xen_tmem_init(void) +{ if (!xen_domain()) return 0; +#ifdef CONFIG_FRONTSWAP + if (tmem_enabled && use_frontswap) { + char *s = ""; + struct frontswap_ops old_ops = + frontswap_register_ops(&tmem_frontswap_ops); + + tmem_frontswap_poolid = -1; + if (old_ops.init != NULL) + s = " (WARNING: frontswap_ops overridden)"; + printk(KERN_INFO "frontswap enabled, RAM provided by " + "Xen Transcendent Memory\n"); + } +#endif #ifdef CONFIG_CLEANCACHE BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid)); if (tmem_enabled && use_cleancache) { char *s = ""; - old_ops = cleancache_register_ops(&tmem_cleancache_ops); + struct cleancache_ops old_ops = + cleancache_register_ops(&tmem_cleancache_ops); if (old_ops.init_fs != NULL) s = " (WARNING: cleancache_ops overridden)"; printk(KERN_INFO "cleancache enabled, RAM provided by " diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index a4ff225ee86..5c9dc43c1e9 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -98,6 +98,8 @@ static int __init balloon_init(void) register_balloon(&balloon_sysdev); + register_xen_selfballooning(&balloon_sysdev); + target_watch.callback = watch_target; xenstore_notifier.notifier_call = balloon_init_watcher; diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c new file mode 100644 index 00000000000..010937b5a7c --- /dev/null +++ b/drivers/xen/xen-selfballoon.c @@ -0,0 +1,485 @@ +/****************************************************************************** + * Xen selfballoon driver (and optional frontswap self-shrinking driver) + * + * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. + * + * This code complements the cleancache and frontswap patchsets to optimize + * support for Xen Transcendent Memory ("tmem"). The policy it implements + * is rudimentary and will likely improve over time, but it does work well + * enough today. + * + * Two functionalities are implemented here which both use "control theory" + * (feedback) to optimize memory utilization. In a virtualized environment + * such as Xen, RAM is often a scarce resource and we would like to ensure + * that each of a possibly large number of virtual machines is using RAM + * efficiently, i.e. using as little as possible when under light load + * and obtaining as much as possible when memory demands are high. + * Since RAM needs vary highly dynamically and sometimes dramatically, + * "hysteresis" is used, that is, memory target is determined not just + * on current data but also on past data stored in the system. + * + * "Selfballooning" creates memory pressure by managing the Xen balloon + * driver to decrease and increase available kernel memory, driven + * largely by the target value of "Committed_AS" (see /proc/meminfo). + * Since Committed_AS does not account for clean mapped pages (i.e. pages + * in RAM that are identical to pages on disk), selfballooning has the + * affect of pushing less frequently used clean pagecache pages out of + * kernel RAM and, presumably using cleancache, into Xen tmem where + * Xen can more efficiently optimize RAM utilization for such pages. + * + * When kernel memory demand unexpectedly increases faster than Xen, via + * the selfballoon driver, is able to (or chooses to) provide usable RAM, + * the kernel may invoke swapping. In most cases, frontswap is able + * to absorb this swapping into Xen tmem. However, due to the fact + * that the kernel swap subsystem assumes swapping occurs to a disk, + * swapped pages may sit on the disk for a very long time; even if + * the kernel knows the page will never be used again. This is because + * the disk space costs very little and can be overwritten when + * necessary. When such stale pages are in frontswap, however, they + * are taking up valuable real estate. "Frontswap selfshrinking" works + * to resolve this: When frontswap activity is otherwise stable + * and the guest kernel is not under memory pressure, the "frontswap + * selfshrinking" accounts for this by providing pressure to remove some + * pages from frontswap and return them to kernel memory. + * + * For both "selfballooning" and "frontswap-selfshrinking", a worker + * thread is used and sysfs tunables are provided to adjust the frequency + * and rate of adjustments to achieve the goal, as well as to disable one + * or both functions independently. + * + * While some argue that this functionality can and should be implemented + * in userspace, it has been observed that bad things happen (e.g. OOMs). + * + * System configuration note: Selfballooning should not be enabled on + * systems without a sufficiently large swap device configured; for best + * results, it is recommended that total swap be increased by the size + * of the guest memory. Also, while technically not required to be + * configured, it is highly recommended that frontswap also be configured + * and enabled when selfballooning is running. So, selfballooning + * is disabled by default if frontswap is not configured and can only + * be enabled with the "selfballooning" kernel boot option; similarly + * selfballooning is enabled by default if frontswap is configured and + * can be disabled with the "noselfballooning" kernel boot option. Finally, + * when frontswap is configured, frontswap-selfshrinking can be disabled + * with the "noselfshrink" kernel boot option. + * + * Selfballooning is disallowed in domain0 and force-disabled. + * + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/mman.h> + +#include <xen/balloon.h> + +#include <xen/tmem.h> + +/* Enable/disable with sysfs. */ +static int xen_selfballooning_enabled __read_mostly; + +/* + * Controls rate at which memory target (this iteration) approaches + * ultimate goal when memory need is increasing (up-hysteresis) or + * decreasing (down-hysteresis). Higher values of hysteresis cause + * slower increases/decreases. The default values for the various + * parameters were deemed reasonable by experimentation, may be + * workload-dependent, and can all be adjusted via sysfs. + */ +static unsigned int selfballoon_downhysteresis __read_mostly = 8; +static unsigned int selfballoon_uphysteresis __read_mostly = 1; + +/* In HZ, controls frequency of worker invocation. */ +static unsigned int selfballoon_interval __read_mostly = 5; + +static void selfballoon_process(struct work_struct *work); +static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process); + +#ifdef CONFIG_FRONTSWAP +#include <linux/frontswap.h> + +/* Enable/disable with sysfs. */ +static bool frontswap_selfshrinking __read_mostly; + +/* Enable/disable with kernel boot option. */ +static bool use_frontswap_selfshrink __initdata = true; + +/* + * The default values for the following parameters were deemed reasonable + * by experimentation, may be workload-dependent, and can all be + * adjusted via sysfs. + */ + +/* Control rate for frontswap shrinking. Higher hysteresis is slower. */ +static unsigned int frontswap_hysteresis __read_mostly = 20; + +/* + * Number of selfballoon worker invocations to wait before observing that + * frontswap selfshrinking should commence. Note that selfshrinking does + * not use a separate worker thread. + */ +static unsigned int frontswap_inertia __read_mostly = 3; + +/* Countdown to next invocation of frontswap_shrink() */ +static unsigned long frontswap_inertia_counter; + +/* + * Invoked by the selfballoon worker thread, uses current number of pages + * in frontswap (frontswap_curr_pages()), previous status, and control + * values (hysteresis and inertia) to determine if frontswap should be + * shrunk and what the new frontswap size should be. Note that + * frontswap_shrink is essentially a partial swapoff that immediately + * transfers pages from the "swap device" (frontswap) back into kernel + * RAM; despite the name, frontswap "shrinking" is very different from + * the "shrinker" interface used by the kernel MM subsystem to reclaim + * memory. + */ +static void frontswap_selfshrink(void) +{ + static unsigned long cur_frontswap_pages; + static unsigned long last_frontswap_pages; + static unsigned long tgt_frontswap_pages; + + last_frontswap_pages = cur_frontswap_pages; + cur_frontswap_pages = frontswap_curr_pages(); + if (!cur_frontswap_pages || + (cur_frontswap_pages > last_frontswap_pages)) { + frontswap_inertia_counter = frontswap_inertia; + return; + } + if (frontswap_inertia_counter && --frontswap_inertia_counter) + return; + if (cur_frontswap_pages <= frontswap_hysteresis) + tgt_frontswap_pages = 0; + else + tgt_frontswap_pages = cur_frontswap_pages - + (cur_frontswap_pages / frontswap_hysteresis); + frontswap_shrink(tgt_frontswap_pages); +} + +static int __init xen_nofrontswap_selfshrink_setup(char *s) +{ + use_frontswap_selfshrink = false; + return 1; +} + +__setup("noselfshrink", xen_nofrontswap_selfshrink_setup); + +/* Disable with kernel boot option. */ +static bool use_selfballooning __initdata = true; + +static int __init xen_noselfballooning_setup(char *s) +{ + use_selfballooning = false; + return 1; +} + +__setup("noselfballooning", xen_noselfballooning_setup); +#else /* !CONFIG_FRONTSWAP */ +/* Enable with kernel boot option. */ +static bool use_selfballooning __initdata = false; + +static int __init xen_selfballooning_setup(char *s) +{ + use_selfballooning = true; + return 1; +} + +__setup("selfballooning", xen_selfballooning_setup); +#endif /* CONFIG_FRONTSWAP */ + +/* + * Use current balloon size, the goal (vm_committed_as), and hysteresis + * parameters to set a new target balloon size + */ +static void selfballoon_process(struct work_struct *work) +{ + unsigned long cur_pages, goal_pages, tgt_pages; + bool reset_timer = false; + + if (xen_selfballooning_enabled) { + cur_pages = balloon_stats.current_pages; + tgt_pages = cur_pages; /* default is no change */ + goal_pages = percpu_counter_read_positive(&vm_committed_as) + + balloon_stats.current_pages - totalram_pages; +#ifdef CONFIG_FRONTSWAP + /* allow space for frontswap pages to be repatriated */ + if (frontswap_selfshrinking && frontswap_enabled) + goal_pages += frontswap_curr_pages(); +#endif + if (cur_pages > goal_pages) + tgt_pages = cur_pages - + ((cur_pages - goal_pages) / + selfballoon_downhysteresis); + else if (cur_pages < goal_pages) + tgt_pages = cur_pages + + ((goal_pages - cur_pages) / + selfballoon_uphysteresis); + /* else if cur_pages == goal_pages, no change */ + balloon_set_new_target(tgt_pages); + reset_timer = true; + } +#ifdef CONFIG_FRONTSWAP + if (frontswap_selfshrinking && frontswap_enabled) { + frontswap_selfshrink(); + reset_timer = true; + } +#endif + if (reset_timer) + schedule_delayed_work(&selfballoon_worker, + selfballoon_interval * HZ); +} + +#ifdef CONFIG_SYSFS + +#include <linux/sysdev.h> +#include <linux/capability.h> + +#define SELFBALLOON_SHOW(name, format, args...) \ + static ssize_t show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } + +SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled); + +static ssize_t store_selfballooning(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + bool was_enabled = xen_selfballooning_enabled; + unsigned long tmp; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + err = strict_strtoul(buf, 10, &tmp); + if (err || ((tmp != 0) && (tmp != 1))) + return -EINVAL; + + xen_selfballooning_enabled = !!tmp; + if (!was_enabled && xen_selfballooning_enabled) + schedule_delayed_work(&selfballoon_worker, + selfballoon_interval * HZ); + + return count; +} + +static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR, + show_selfballooning, store_selfballooning); + +SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval); + +static ssize_t store_selfballoon_interval(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_interval = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR, + show_selfballoon_interval, store_selfballoon_interval); + +SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis); + +static ssize_t store_selfballoon_downhys(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_downhysteresis = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR, + show_selfballoon_downhys, store_selfballoon_downhys); + + +SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis); + +static ssize_t store_selfballoon_uphys(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + selfballoon_uphysteresis = val; + return count; +} + +static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR, + show_selfballoon_uphys, store_selfballoon_uphys); + +#ifdef CONFIG_FRONTSWAP +SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking); + +static ssize_t store_frontswap_selfshrinking(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + bool was_enabled = frontswap_selfshrinking; + unsigned long tmp; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &tmp); + if (err || ((tmp != 0) && (tmp != 1))) + return -EINVAL; + frontswap_selfshrinking = !!tmp; + if (!was_enabled && !xen_selfballooning_enabled && + frontswap_selfshrinking) + schedule_delayed_work(&selfballoon_worker, + selfballoon_interval * HZ); + + return count; +} + +static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR, + show_frontswap_selfshrinking, store_frontswap_selfshrinking); + +SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia); + +static ssize_t store_frontswap_inertia(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + frontswap_inertia = val; + frontswap_inertia_counter = val; + return count; +} + +static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR, + show_frontswap_inertia, store_frontswap_inertia); + +SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis); + +static ssize_t store_frontswap_hysteresis(struct sys_device *dev, + struct sysdev_attribute *attr, + const char *buf, + size_t count) +{ + unsigned long val; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + err = strict_strtoul(buf, 10, &val); + if (err || val == 0) + return -EINVAL; + frontswap_hysteresis = val; + return count; +} + +static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR, + show_frontswap_hysteresis, store_frontswap_hysteresis); + +#endif /* CONFIG_FRONTSWAP */ + +static struct attribute *selfballoon_attrs[] = { + &attr_selfballooning.attr, + &attr_selfballoon_interval.attr, + &attr_selfballoon_downhysteresis.attr, + &attr_selfballoon_uphysteresis.attr, +#ifdef CONFIG_FRONTSWAP + &attr_frontswap_selfshrinking.attr, + &attr_frontswap_hysteresis.attr, + &attr_frontswap_inertia.attr, +#endif + NULL +}; + +static struct attribute_group selfballoon_group = { + .name = "selfballoon", + .attrs = selfballoon_attrs +}; +#endif + +int register_xen_selfballooning(struct sys_device *sysdev) +{ + int error = -1; + +#ifdef CONFIG_SYSFS + error = sysfs_create_group(&sysdev->kobj, &selfballoon_group); +#endif + return error; +} +EXPORT_SYMBOL(register_xen_selfballooning); + +static int __init xen_selfballoon_init(void) +{ + bool enable = false; + + if (!xen_domain()) + return -ENODEV; + + if (xen_initial_domain()) { + pr_info("xen/balloon: Xen selfballooning driver " + "disabled for domain0.\n"); + return -ENODEV; + } + + xen_selfballooning_enabled = tmem_enabled && use_selfballooning; + if (xen_selfballooning_enabled) { + pr_info("xen/balloon: Initializing Xen " + "selfballooning driver.\n"); + enable = true; + } +#ifdef CONFIG_FRONTSWAP + frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink; + if (frontswap_selfshrinking) { + pr_info("xen/balloon: Initializing frontswap " + "selfshrinking driver.\n"); + enable = true; + } +#endif + if (!enable) + return -ENODEV; + + schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ); + + return 0; +} + +subsys_initcall(xen_selfballoon_init); + +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 739769551e3..bd2f90c9ac8 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -378,26 +378,32 @@ static void xenbus_dev_release(struct device *dev) kfree(to_xenbus_device(dev)); } -static ssize_t xendev_show_nodename(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t nodename_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } -static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); -static ssize_t xendev_show_devtype(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t devtype_show(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } -static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); -static ssize_t xendev_show_modalias(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t modalias_show(struct device *dev, + struct device_attribute *attr, char *buf) { - return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); + return sprintf(buf, "%s:%s\n", dev->bus->name, + to_xenbus_device(dev)->devicetype); } -static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); + +struct device_attribute xenbus_dev_attrs[] = { + __ATTR_RO(nodename), + __ATTR_RO(devtype), + __ATTR_RO(modalias), + __ATTR_NULL +}; +EXPORT_SYMBOL_GPL(xenbus_dev_attrs); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, @@ -449,25 +455,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, if (err) goto fail; - err = device_create_file(&xendev->dev, &dev_attr_nodename); - if (err) - goto fail_unregister; - - err = device_create_file(&xendev->dev, &dev_attr_devtype); - if (err) - goto fail_remove_nodename; - - err = device_create_file(&xendev->dev, &dev_attr_modalias); - if (err) - goto fail_remove_devtype; - return 0; -fail_remove_devtype: - device_remove_file(&xendev->dev, &dev_attr_devtype); -fail_remove_nodename: - device_remove_file(&xendev->dev, &dev_attr_nodename); -fail_unregister: - device_unregister(&xendev->dev); fail: kfree(xendev); return err; diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 888b9900ca0..b814935378c 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -48,6 +48,8 @@ struct xen_bus_type struct bus_type bus; }; +extern struct device_attribute xenbus_dev_attrs[]; + extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 6cf467bf63e..60adf919d78 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -107,6 +107,9 @@ static int xenbus_uevent_backend(struct device *dev, if (xdev == NULL) return -ENODEV; + if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype)) + return -ENOMEM; + /* stuff we want to pass to /sbin/hotplug */ if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) return -ENOMEM; @@ -183,10 +186,6 @@ static void frontend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 0); } -static struct device_attribute xenbus_backend_dev_attrs[] = { - __ATTR_NULL -}; - static struct xen_bus_type xenbus_backend = { .root = "backend", .levels = 3, /* backend/type/<frontend>/<id> */ @@ -200,7 +199,7 @@ static struct xen_bus_type xenbus_backend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_backend_dev_attrs, + .dev_attrs = xenbus_dev_attrs, }, }; diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index b6a2690c9d4..ed2ba474a56 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -81,10 +81,6 @@ static void backend_changed(struct xenbus_watch *watch, xenbus_otherend_changed(watch, vec, len, 1); } -static struct device_attribute xenbus_frontend_dev_attrs[] = { - __ATTR_NULL -}; - static const struct dev_pm_ops xenbus_pm_ops = { .suspend = xenbus_dev_suspend, .resume = xenbus_dev_resume, @@ -106,7 +102,7 @@ static struct xen_bus_type xenbus_frontend = { .probe = xenbus_dev_probe, .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_frontend_dev_attrs, + .dev_attrs = xenbus_dev_attrs, .pm = &xenbus_pm_ops, }, |