diff options
24 files changed, 5028 insertions, 337 deletions
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt index ac2a261c5f7..6ef8cf3bc9a 100644 --- a/Documentation/filesystems/ntfs.txt +++ b/Documentation/filesystems/ntfs.txt @@ -457,6 +457,9 @@ ChangeLog Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog. +2.1.30: + - Fix writev() (it kept writing the first segment over and over again + instead of moving onto subsequent segments). 2.1.29: - Fix a deadlock when mounting read-write. 2.1.28: diff --git a/MAINTAINERS b/MAINTAINERS index 42f991e5a85..64d7621ab35 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4383,11 +4383,11 @@ F: Documentation/scsi/NinjaSCSI.txt F: drivers/scsi/nsp32* NTFS FILESYSTEM -M: Anton Altaparmakov <aia21@cantab.net> +M: Anton Altaparmakov <anton@tuxera.com> L: linux-ntfs-dev@lists.sourceforge.net -W: http://www.linux-ntfs.org/ +W: http://www.tuxera.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/aia21/ntfs-2.6.git -S: Maintained +S: Supported F: Documentation/filesystems/ntfs.txt F: fs/ntfs/ diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 4b9359a6f6c..83c32cb7258 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -464,6 +464,7 @@ config XEN_BLKDEV_FRONTEND tristate "Xen virtual block device support" depends on XEN default y + select XEN_XENBUS_FRONTEND help This driver implements the front-end of the Xen virtual block device driver. It communicates with a back-end driver diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index ff652c77a0a..4c8bfc97fb4 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2963,6 +2963,7 @@ config TILE_NET config XEN_NETDEV_FRONTEND tristate "Xen network device frontend driver" depends on XEN + select XEN_XENBUS_FRONTEND default y help The network device frontend driver allows the kernel to diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 5b1630e4e9e..a9523fdc691 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -45,6 +45,7 @@ config XEN_PCIDEV_FRONTEND depends on PCI && X86 && XEN select HOTPLUG select PCI_XEN + select XEN_XENBUS_FRONTEND default y help The PCI device frontend driver allows the kernel to import arbitrary diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 6e6180ccd72..5a48ce996de 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -29,6 +29,14 @@ config XEN_DEV_EVTCHN firing. If in doubt, say yes. +config XEN_BACKEND + bool "Backend driver support" + depends on XEN_DOM0 + default y + help + Support for backend device drivers that provide I/O services + to other virtual machines. + config XENFS tristate "Xen filesystem" default y @@ -62,6 +70,9 @@ config XEN_SYS_HYPERVISOR virtual environment, /sys/hypervisor will still be present, but will have no xen contents. +config XEN_XENBUS_FRONTEND + tristate + config XEN_PLATFORM_PCI tristate "xen platform pci device driver" depends on XEN_PVHVM diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile index 5571f5b8422..8dca685358b 100644 --- a/drivers/xen/xenbus/Makefile +++ b/drivers/xen/xenbus/Makefile @@ -5,3 +5,8 @@ xenbus-objs += xenbus_client.o xenbus-objs += xenbus_comms.o xenbus-objs += xenbus_xs.o xenbus-objs += xenbus_probe.o + +xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o +xenbus-objs += $(xenbus-be-objs-y) + +obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index deb9c4ba3a9..baa65e7fbbc 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -56,7 +56,6 @@ #include <xen/events.h> #include <xen/page.h> -#include <xen/platform_pci.h> #include <xen/hvm.h> #include "xenbus_comms.h" @@ -73,15 +72,6 @@ static unsigned long xen_store_mfn; static BLOCKING_NOTIFIER_HEAD(xenstore_chain); -static void wait_for_devices(struct xenbus_driver *xendrv); - -static int xenbus_probe_frontend(const char *type, const char *name); - -static void xenbus_dev_shutdown(struct device *_dev); - -static int xenbus_dev_suspend(struct device *dev, pm_message_t state); -static int xenbus_dev_resume(struct device *dev); - /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) @@ -102,34 +92,7 @@ int xenbus_match(struct device *_dev, struct device_driver *_drv) return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; } - -static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env) -{ - struct xenbus_device *dev = to_xenbus_device(_dev); - - if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) - return -ENOMEM; - - return 0; -} - -/* device/<type>/<id> => <type>-<id> */ -static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) -{ - nodename = strchr(nodename, '/'); - if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { - printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); - return -EINVAL; - } - - strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); - if (!strchr(bus_id, '/')) { - printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); - return -EINVAL; - } - *strchr(bus_id, '/') = '-'; - return 0; -} +EXPORT_SYMBOL_GPL(xenbus_match); static void free_otherend_details(struct xenbus_device *dev) @@ -149,7 +112,30 @@ static void free_otherend_watch(struct xenbus_device *dev) } -int read_otherend_details(struct xenbus_device *xendev, +static int talk_to_otherend(struct xenbus_device *dev) +{ + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + + free_otherend_watch(dev); + free_otherend_details(dev); + + return drv->read_otherend_details(dev); +} + + + +static int watch_otherend(struct xenbus_device *dev) +{ + struct xen_bus_type *bus = + container_of(dev->dev.bus, struct xen_bus_type, bus); + + return xenbus_watch_pathfmt(dev, &dev->otherend_watch, + bus->otherend_changed, + "%s/%s", dev->otherend, "state"); +} + + +int xenbus_read_otherend_details(struct xenbus_device *xendev, char *id_node, char *path_node) { int err = xenbus_gather(XBT_NIL, xendev->nodename, @@ -174,39 +160,11 @@ int read_otherend_details(struct xenbus_device *xendev, return 0; } +EXPORT_SYMBOL_GPL(xenbus_read_otherend_details); - -static int read_backend_details(struct xenbus_device *xendev) -{ - return read_otherend_details(xendev, "backend-id", "backend"); -} - -static struct device_attribute xenbus_dev_attrs[] = { - __ATTR_NULL -}; - -/* Bus type for frontend drivers. */ -static struct xen_bus_type xenbus_frontend = { - .root = "device", - .levels = 2, /* device/type/<id> */ - .get_bus_id = frontend_bus_id, - .probe = xenbus_probe_frontend, - .bus = { - .name = "xen", - .match = xenbus_match, - .uevent = xenbus_uevent, - .probe = xenbus_dev_probe, - .remove = xenbus_dev_remove, - .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_dev_attrs, - - .suspend = xenbus_dev_suspend, - .resume = xenbus_dev_resume, - }, -}; - -static void otherend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) +void xenbus_otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len, + int ignore_on_shutdown) { struct xenbus_device *dev = container_of(watch, struct xenbus_device, otherend_watch); @@ -234,11 +192,7 @@ static void otherend_changed(struct xenbus_watch *watch, * work that can fail e.g., when the rootfs is gone. */ if (system_state > SYSTEM_RUNNING) { - struct xen_bus_type *bus = bus; - bus = container_of(dev->dev.bus, struct xen_bus_type, bus); - /* If we're frontend, drive the state machine to Closed. */ - /* This should cause the backend to release our resources. */ - if ((bus == &xenbus_frontend) && (state == XenbusStateClosing)) + if (ignore_on_shutdown && (state == XenbusStateClosing)) xenbus_frontend_closed(dev); return; } @@ -246,25 +200,7 @@ static void otherend_changed(struct xenbus_watch *watch, if (drv->otherend_changed) drv->otherend_changed(dev, state); } - - -static int talk_to_otherend(struct xenbus_device *dev) -{ - struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); - - free_otherend_watch(dev); - free_otherend_details(dev); - - return drv->read_otherend_details(dev); -} - - -static int watch_otherend(struct xenbus_device *dev) -{ - return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, - "%s/%s", dev->otherend, "state"); -} - +EXPORT_SYMBOL_GPL(xenbus_otherend_changed); int xenbus_dev_probe(struct device *_dev) { @@ -308,8 +244,9 @@ int xenbus_dev_probe(struct device *_dev) fail: xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); xenbus_switch_state(dev, XenbusStateClosed); - return -ENODEV; + return err; } +EXPORT_SYMBOL_GPL(xenbus_dev_probe); int xenbus_dev_remove(struct device *_dev) { @@ -327,8 +264,9 @@ int xenbus_dev_remove(struct device *_dev) xenbus_switch_state(dev, XenbusStateClosed); return 0; } +EXPORT_SYMBOL_GPL(xenbus_dev_remove); -static void xenbus_dev_shutdown(struct device *_dev) +void xenbus_dev_shutdown(struct device *_dev) { struct xenbus_device *dev = to_xenbus_device(_dev); unsigned long timeout = 5*HZ; @@ -349,6 +287,7 @@ static void xenbus_dev_shutdown(struct device *_dev) out: put_device(&dev->dev); } +EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); int xenbus_register_driver_common(struct xenbus_driver *drv, struct xen_bus_type *bus, @@ -362,25 +301,7 @@ int xenbus_register_driver_common(struct xenbus_driver *drv, return driver_register(&drv->driver); } - -int __xenbus_register_frontend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) -{ - int ret; - - drv->read_otherend_details = read_backend_details; - - ret = xenbus_register_driver_common(drv, &xenbus_frontend, - owner, mod_name); - if (ret) - return ret; - - /* If this driver is loaded as a module wait for devices to attach. */ - wait_for_devices(drv); - - return 0; -} -EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +EXPORT_SYMBOL_GPL(xenbus_register_driver_common); void xenbus_unregister_driver(struct xenbus_driver *drv) { @@ -551,24 +472,7 @@ fail: kfree(xendev); return err; } - -/* device/<typename>/<name> */ -static int xenbus_probe_frontend(const char *type, const char *name) -{ - char *nodename; - int err; - - nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", - xenbus_frontend.root, type, name); - if (!nodename) - return -ENOMEM; - - DPRINTK("%s", nodename); - - err = xenbus_probe_node(&xenbus_frontend, type, nodename); - kfree(nodename); - return err; -} +EXPORT_SYMBOL_GPL(xenbus_probe_node); static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) { @@ -582,10 +486,11 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) return PTR_ERR(dir); for (i = 0; i < dir_n; i++) { - err = bus->probe(type, dir[i]); + err = bus->probe(bus, type, dir[i]); if (err) break; } + kfree(dir); return err; } @@ -605,9 +510,11 @@ int xenbus_probe_devices(struct xen_bus_type *bus) if (err) break; } + kfree(dir); return err; } +EXPORT_SYMBOL_GPL(xenbus_probe_devices); static unsigned int char_count(const char *str, char c) { @@ -670,32 +577,18 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) } EXPORT_SYMBOL_GPL(xenbus_dev_changed); -static void frontend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - DPRINTK(""); - - xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); -} - -/* We watch for devices appearing and vanishing. */ -static struct xenbus_watch fe_watch = { - .node = "device", - .callback = frontend_changed, -}; - -static int xenbus_dev_suspend(struct device *dev, pm_message_t state) +int xenbus_dev_suspend(struct device *dev, pm_message_t state) { int err = 0; struct xenbus_driver *drv; - struct xenbus_device *xdev; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); - DPRINTK(""); + DPRINTK("%s", xdev->nodename); if (dev->driver == NULL) return 0; drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); if (drv->suspend) err = drv->suspend(xdev, state); if (err) @@ -703,21 +596,20 @@ static int xenbus_dev_suspend(struct device *dev, pm_message_t state) "xenbus: suspend %s failed: %i\n", dev_name(dev), err); return 0; } +EXPORT_SYMBOL_GPL(xenbus_dev_suspend); -static int xenbus_dev_resume(struct device *dev) +int xenbus_dev_resume(struct device *dev) { int err; struct xenbus_driver *drv; - struct xenbus_device *xdev; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); - DPRINTK(""); + DPRINTK("%s", xdev->nodename); if (dev->driver == NULL) return 0; - drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); - err = talk_to_otherend(xdev); if (err) { printk(KERN_WARNING @@ -748,6 +640,7 @@ static int xenbus_dev_resume(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(xenbus_dev_resume); /* A flag to determine if xenstored is 'ready' (i.e. has started) */ int xenstored_ready = 0; @@ -776,11 +669,6 @@ void xenbus_probe(struct work_struct *unused) { xenstored_ready = 1; - /* Enumerate devices in xenstore and watch for changes. */ - xenbus_probe_devices(&xenbus_frontend); - register_xenbus_watch(&fe_watch); - xenbus_backend_probe_and_watch(); - /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } @@ -809,16 +697,7 @@ static int __init xenbus_init(void) err = -ENODEV; if (!xen_domain()) - goto out_error; - - /* Register ourselves with the kernel bus subsystem */ - err = bus_register(&xenbus_frontend.bus); - if (err) - goto out_error; - - err = xenbus_backend_bus_register(); - if (err) - goto out_unreg_front; + return err; /* * Domain0 doesn't have a store_evtchn or store_mfn yet. @@ -874,7 +753,7 @@ static int __init xenbus_init(void) if (err) { printk(KERN_WARNING "XENBUS: Error initializing xenstore comms: %i\n", err); - goto out_unreg_back; + goto out_error; } #ifdef CONFIG_XEN_COMPAT_XENFS @@ -887,133 +766,13 @@ static int __init xenbus_init(void) return 0; - out_unreg_back: - xenbus_backend_bus_unregister(); - - out_unreg_front: - bus_unregister(&xenbus_frontend.bus); - out_error: if (page != 0) free_page(page); + return err; } postcore_initcall(xenbus_init); MODULE_LICENSE("GPL"); - -static int is_device_connecting(struct device *dev, void *data) -{ - struct xenbus_device *xendev = to_xenbus_device(dev); - struct device_driver *drv = data; - struct xenbus_driver *xendrv; - - /* - * A device with no driver will never connect. We care only about - * devices which should currently be in the process of connecting. - */ - if (!dev->driver) - return 0; - - /* Is this search limited to a particular driver? */ - if (drv && (dev->driver != drv)) - return 0; - - xendrv = to_xenbus_driver(dev->driver); - return (xendev->state < XenbusStateConnected || - (xendev->state == XenbusStateConnected && - xendrv->is_ready && !xendrv->is_ready(xendev))); -} - -static int exists_connecting_device(struct device_driver *drv) -{ - return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, - is_device_connecting); -} - -static int print_device_status(struct device *dev, void *data) -{ - struct xenbus_device *xendev = to_xenbus_device(dev); - struct device_driver *drv = data; - - /* Is this operation limited to a particular driver? */ - if (drv && (dev->driver != drv)) - return 0; - - if (!dev->driver) { - /* Information only: is this too noisy? */ - printk(KERN_INFO "XENBUS: Device with no driver: %s\n", - xendev->nodename); - } else if (xendev->state < XenbusStateConnected) { - enum xenbus_state rstate = XenbusStateUnknown; - if (xendev->otherend) - rstate = xenbus_read_driver_state(xendev->otherend); - printk(KERN_WARNING "XENBUS: Timeout connecting " - "to device: %s (local state %d, remote state %d)\n", - xendev->nodename, xendev->state, rstate); - } - - return 0; -} - -/* We only wait for device setup after most initcalls have run. */ -static int ready_to_wait_for_devices; - -/* - * On a 5-minute timeout, wait for all devices currently configured. We need - * to do this to guarantee that the filesystems and / or network devices - * needed for boot are available, before we can allow the boot to proceed. - * - * This needs to be on a late_initcall, to happen after the frontend device - * drivers have been initialised, but before the root fs is mounted. - * - * A possible improvement here would be to have the tools add a per-device - * flag to the store entry, indicating whether it is needed at boot time. - * This would allow people who knew what they were doing to accelerate their - * boot slightly, but of course needs tools or manual intervention to set up - * those flags correctly. - */ -static void wait_for_devices(struct xenbus_driver *xendrv) -{ - unsigned long start = jiffies; - struct device_driver *drv = xendrv ? &xendrv->driver : NULL; - unsigned int seconds_waited = 0; - - if (!ready_to_wait_for_devices || !xen_domain()) - return; - - while (exists_connecting_device(drv)) { - if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { - if (!seconds_waited) - printk(KERN_WARNING "XENBUS: Waiting for " - "devices to initialise: "); - seconds_waited += 5; - printk("%us...", 300 - seconds_waited); - if (seconds_waited == 300) - break; - } - - schedule_timeout_interruptible(HZ/10); - } - - if (seconds_waited) - printk("\n"); - - bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, - print_device_status); -} - -#ifndef MODULE -static int __init boot_wait_for_devices(void) -{ - if (xen_hvm_domain() && !xen_platform_pci_unplug) - return -ENODEV; - - ready_to_wait_for_devices = 1; - wait_for_devices(NULL); - return 0; -} - -late_initcall(boot_wait_for_devices); -#endif diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 6c5e3185a6a..24665812316 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -36,26 +36,15 @@ #define XEN_BUS_ID_SIZE 20 -#ifdef CONFIG_XEN_BACKEND -extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); -extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); -extern void xenbus_backend_probe_and_watch(void); -extern int xenbus_backend_bus_register(void); -extern void xenbus_backend_bus_unregister(void); -#else -static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {} -static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {} -static inline void xenbus_backend_probe_and_watch(void) {} -static inline int xenbus_backend_bus_register(void) { return 0; } -static inline void xenbus_backend_bus_unregister(void) {} -#endif - struct xen_bus_type { char *root; unsigned int levels; int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); - int (*probe)(const char *type, const char *dir); + int (*probe)(struct xen_bus_type *bus, const char *type, + const char *dir); + void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, + unsigned int len); struct bus_type bus; }; @@ -73,4 +62,16 @@ extern int xenbus_probe_devices(struct xen_bus_type *bus); extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); +extern void xenbus_dev_shutdown(struct device *_dev); + +extern int xenbus_dev_suspend(struct device *dev, pm_message_t state); +extern int xenbus_dev_resume(struct device *dev); + +extern void xenbus_otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len, + int ignore_on_shutdown); + +extern int xenbus_read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node); + #endif diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c new file mode 100644 index 00000000000..6cf467bf63e --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -0,0 +1,276 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have (backend half). + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define DPRINTK(fmt, args...) \ + pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ + __func__, __LINE__, ##args) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/notifier.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/xen/hypervisor.h> +#include <asm/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/features.h> + +#include "xenbus_comms.h" +#include "xenbus_probe.h" + +/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */ +static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + int domid, err; + const char *devid, *type, *frontend; + unsigned int typelen; + + type = strchr(nodename, '/'); + if (!type) + return -EINVAL; + type++; + typelen = strcspn(type, "/"); + if (!typelen || type[typelen] != '/') + return -EINVAL; + + devid = strrchr(nodename, '/') + 1; + + err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, + "frontend", NULL, &frontend, + NULL); + if (err) + return err; + if (strlen(frontend) == 0) + err = -ERANGE; + if (!err && !xenbus_exists(XBT_NIL, frontend, "")) + err = -ENOENT; + kfree(frontend); + + if (err) + return err; + + if (snprintf(bus_id, XEN_BUS_ID_SIZE, "%.*s-%i-%s", + typelen, type, domid, devid) >= XEN_BUS_ID_SIZE) + return -ENOSPC; + return 0; +} + +static int xenbus_uevent_backend(struct device *dev, + struct kobj_uevent_env *env) +{ + struct xenbus_device *xdev; + struct xenbus_driver *drv; + struct xen_bus_type *bus; + + DPRINTK(""); + + if (dev == NULL) + return -ENODEV; + + xdev = to_xenbus_device(dev); + bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); + if (xdev == NULL) + return -ENODEV; + + /* stuff we want to pass to /sbin/hotplug */ + if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) + return -ENOMEM; + + if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename)) + return -ENOMEM; + + if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root)) + return -ENOMEM; + + if (dev->driver) { + drv = to_xenbus_driver(dev->driver); + if (drv && drv->uevent) + return drv->uevent(xdev, env); + } + + return 0; +} + +/* backend/<typename>/<frontend-uuid>/<name> */ +static int xenbus_probe_backend_unit(struct xen_bus_type *bus, + const char *dir, + const char *type, + const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s\n", nodename); + + err = xenbus_probe_node(bus, type, nodename); + kfree(nodename); + return err; +} + +/* backend/<typename>/<frontend-domid> */ +static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, + const char *domid) +{ + char *nodename; + int err = 0; + char **dir; + unsigned int i, dir_n = 0; + + DPRINTK(""); + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid); + if (!nodename) + return -ENOMEM; + + dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); + if (IS_ERR(dir)) { + kfree(nodename); + return PTR_ERR(dir); + } + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]); + if (err) + break; + } + kfree(dir); + kfree(nodename); + return err; +} + +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + xenbus_otherend_changed(watch, vec, len, 0); +} + +static struct device_attribute xenbus_backend_dev_attrs[] = { + __ATTR_NULL +}; + +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type/<frontend>/<id> */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + .otherend_changed = frontend_changed, + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .uevent = xenbus_uevent_backend, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + .dev_attrs = xenbus_backend_dev_attrs, + }, +}; + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); +} + +static struct xenbus_watch be_watch = { + .node = "backend", + .callback = backend_changed, +}; + +static int read_frontend_details(struct xenbus_device *xendev) +{ + return xenbus_read_otherend_details(xendev, "frontend-id", "frontend"); +} + +int xenbus_dev_is_online(struct xenbus_device *dev) +{ + int rc, val; + + rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); + if (rc != 1) + val = 0; /* no online node present */ + + return val; +} +EXPORT_SYMBOL_GPL(xenbus_dev_is_online); + +int __xenbus_register_backend(struct xenbus_driver *drv, + struct module *owner, const char *mod_name) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend, + owner, mod_name); +} +EXPORT_SYMBOL_GPL(__xenbus_register_backend); + +static int backend_probe_and_watch(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_backend); + register_xenbus_watch(&be_watch); + + return NOTIFY_DONE; +} + +static int __init xenbus_probe_backend_init(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = backend_probe_and_watch + }; + int err; + + DPRINTK(""); + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_backend.bus); + if (err) + return err; + + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} +subsys_initcall(xenbus_probe_backend_init); diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c new file mode 100644 index 00000000000..5bcc2d6cf12 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -0,0 +1,294 @@ +#define DPRINTK(fmt, args...) \ + pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ + __func__, __LINE__, ##args) + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/fcntl.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/notifier.h> +#include <linux/kthread.h> +#include <linux/mutex.h> +#include <linux/io.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/xen/hypervisor.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/page.h> + +#include <xen/platform_pci.h> + +#include "xenbus_comms.h" +#include "xenbus_probe.h" + + +/* device/<type>/<id> => <type>-<id> */ +static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + nodename = strchr(nodename, '/'); + if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { + printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); + return -EINVAL; + } + + strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); + if (!strchr(bus_id, '/')) { + printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); + return -EINVAL; + } + *strchr(bus_id, '/') = '-'; + return 0; +} + +/* device/<typename>/<name> */ +static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type, + const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s", nodename); + + err = xenbus_probe_node(bus, type, nodename); + kfree(nodename); + return err; +} + +static int xenbus_uevent_frontend(struct device *_dev, + struct kobj_uevent_env *env) +{ + struct xenbus_device *dev = to_xenbus_device(_dev); + + if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) + return -ENOMEM; + + return 0; +} + + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + xenbus_otherend_changed(watch, vec, len, 1); +} + +static struct device_attribute xenbus_frontend_dev_attrs[] = { + __ATTR_NULL +}; + +static struct xen_bus_type xenbus_frontend = { + .root = "device", + .levels = 2, /* device/type/<id> */ + .get_bus_id = frontend_bus_id, + .probe = xenbus_probe_frontend, + .otherend_changed = backend_changed, + .bus = { + .name = "xen", + .match = xenbus_match, + .uevent = xenbus_uevent_frontend, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + .dev_attrs = xenbus_frontend_dev_attrs, + + .suspend = xenbus_dev_suspend, + .resume = xenbus_dev_resume, + }, +}; + +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); +} + + +/* We watch for devices appearing and vanishing. */ +static struct xenbus_watch fe_watch = { + .node = "device", + .callback = frontend_changed, +}; + +static int read_backend_details(struct xenbus_device *xendev) +{ + return xenbus_read_otherend_details(xendev, "backend-id", "backend"); +} + +static int is_device_connecting(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + struct xenbus_driver *xendrv; + + /* + * A device with no driver will never connect. We care only about + * devices which should currently be in the process of connecting. + */ + if (!dev->driver) + return 0; + + /* Is this search limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + xendrv = to_xenbus_driver(dev->driver); + return (xendev->state < XenbusStateConnected || + (xendev->state == XenbusStateConnected && + xendrv->is_ready && !xendrv->is_ready(xendev))); +} + +static int exists_connecting_device(struct device_driver *drv) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + is_device_connecting); +} + +static int print_device_status(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + + /* Is this operation limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + if (!dev->driver) { + /* Information only: is this too noisy? */ + printk(KERN_INFO "XENBUS: Device with no driver: %s\n", + xendev->nodename); + } else if (xendev->state < XenbusStateConnected) { + enum xenbus_state rstate = XenbusStateUnknown; + if (xendev->otherend) + rstate = xenbus_read_driver_state(xendev->otherend); + printk(KERN_WARNING "XENBUS: Timeout connecting " + "to device: %s (local state %d, remote state %d)\n", + xendev->nodename, xendev->state, rstate); + } + + return 0; +} + +/* We only wait for device setup after most initcalls have run. */ +static int ready_to_wait_for_devices; + +/* + * On a 5-minute timeout, wait for all devices currently configured. We need + * to do this to guarantee that the filesystems and / or network devices + * needed for boot are available, before we can allow the boot to proceed. + * + * This needs to be on a late_initcall, to happen after the frontend device + * drivers have been initialised, but before the root fs is mounted. + * + * A possible improvement here would be to have the tools add a per-device + * flag to the store entry, indicating whether it is needed at boot time. + * This would allow people who knew what they were doing to accelerate their + * boot slightly, but of course needs tools or manual intervention to set up + * those flags correctly. + */ +static void wait_for_devices(struct xenbus_driver *xendrv) +{ + unsigned long start = jiffies; + struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + unsigned int seconds_waited = 0; + + if (!ready_to_wait_for_devices || !xen_domain()) + return; + + while (exists_connecting_device(drv)) { + if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { + if (!seconds_waited) + printk(KERN_WARNING "XENBUS: Waiting for " + "devices to initialise: "); + seconds_waited += 5; + printk("%us...", 300 - seconds_waited); + if (seconds_waited == 300) + break; + } + + schedule_timeout_interruptible(HZ/10); + } + + if (seconds_waited) + printk("\n"); + + bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + print_device_status); +} + +int __xenbus_register_frontend(struct xenbus_driver *drv, + struct module *owner, const char *mod_name) +{ + int ret; + + drv->read_otherend_details = read_backend_details; + + ret = xenbus_register_driver_common(drv, &xenbus_frontend, + owner, mod_name); + if (ret) + return ret; + + /* If this driver is loaded as a module wait for devices to attach. */ + wait_for_devices(drv); + + return 0; +} +EXPORT_SYMBOL_GPL(__xenbus_register_frontend); + +static int frontend_probe_and_watch(struct notifier_block *notifier, + unsigned long event, + void *data) +{ + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_frontend); + register_xenbus_watch(&fe_watch); + + return NOTIFY_DONE; +} + + +static int __init xenbus_probe_frontend_init(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = frontend_probe_and_watch + }; + int err; + + DPRINTK(""); + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_frontend.bus); + if (err) + return err; + + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} +subsys_initcall(xenbus_probe_frontend_init); + +#ifndef MODULE +static int __init boot_wait_for_devices(void) +{ + if (xen_hvm_domain() && !xen_platform_pci_unplug) + return -ENODEV; + + ready_to_wait_for_devices = 1; + wait_for_devices(NULL); + return 0; +} + +late_initcall(boot_wait_for_devices); +#endif + +MODULE_LICENSE("GPL"); diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 58b6be99254..4ff028fcfd6 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.29\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.30\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 113ebd9f25a..f4b1057abdd 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,7 +1,7 @@ /* * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2007 Anton Altaparmakov + * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -1380,15 +1380,14 @@ static inline void ntfs_set_next_iovec(const struct iovec **iovp, * pages (out to offset + bytes), to emulate ntfs_copy_from_user()'s * single-segment behaviour. * - * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both - * when atomic and when not atomic. This is ok because - * __ntfs_copy_from_user_iovec_inatomic() calls __copy_from_user_inatomic() - * and it is ok to call this when non-atomic. - * Infact, the only difference between __copy_from_user_inatomic() and + * We call the same helper (__ntfs_copy_from_user_iovec_inatomic()) both when + * atomic and when not atomic. This is ok because it calls + * __copy_from_user_inatomic() and it is ok to call this when non-atomic. In + * fact, the only difference between __copy_from_user_inatomic() and * __copy_from_user() is that the latter calls might_sleep() and the former - * should not zero the tail of the buffer on error. And on many - * architectures __copy_from_user_inatomic() is just defined to - * __copy_from_user() so it makes no difference at all on those architectures. + * should not zero the tail of the buffer on error. And on many architectures + * __copy_from_user_inatomic() is just defined to __copy_from_user() so it + * makes no difference at all on those architectures. */ static inline size_t ntfs_copy_from_user_iovec(struct page **pages, unsigned nr_pages, unsigned ofs, const struct iovec **iov, @@ -1409,28 +1408,28 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages, if (unlikely(copied != len)) { /* Do it the slow way. */ addr = kmap(*pages); - copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, - *iov, *iov_ofs, len); - /* - * Zero the rest of the target like __copy_from_user(). - */ - memset(addr + ofs + copied, 0, len - copied); - kunmap(*pages); + copied = __ntfs_copy_from_user_iovec_inatomic(addr + + ofs, *iov, *iov_ofs, len); if (unlikely(copied != len)) goto err_out; + kunmap(*pages); } total += len; + ntfs_set_next_iovec(iov, iov_ofs, len); bytes -= len; if (!bytes) break; - ntfs_set_next_iovec(iov, iov_ofs, len); ofs = 0; } while (++pages < last_page); out: return total; err_out: - total += copied; + BUG_ON(copied > len); /* Zero the rest of the target like __copy_from_user(). */ + memset(addr + ofs + copied, 0, len - copied); + kunmap(*pages); + total += copied; + ntfs_set_next_iovec(iov, iov_ofs, copied); while (++pages < last_page) { bytes -= len; if (!bytes) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index a30ecacc01f..29099a07b9f 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1,7 +1,7 @@ /* * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2007 Anton Altaparmakov + * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc. * Copyright (c) 2001,2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -3193,8 +3193,8 @@ static void __exit exit_ntfs_fs(void) ntfs_sysctl(0); } -MODULE_AUTHOR("Anton Altaparmakov <aia21@cantab.net>"); -MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2007 Anton Altaparmakov"); +MODULE_AUTHOR("Anton Altaparmakov <anton@tuxera.com>"); +MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc."); MODULE_VERSION(NTFS_VERSION); MODULE_LICENSE("GPL"); #ifdef DEBUG diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 43e2d7d3397..7a1d15ff19b 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -94,7 +94,7 @@ struct xenbus_driver { int (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev, pm_message_t state); int (*resume)(struct xenbus_device *dev); - int (*uevent)(struct xenbus_device *, char **, int, char *, int); + int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *); struct device_driver driver; int (*read_otherend_details)(struct xenbus_device *dev); int (*is_ready)(struct xenbus_device *dev); diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile new file mode 100644 index 00000000000..fd8e1f1297a --- /dev/null +++ b/tools/power/x86/turbostat/Makefile @@ -0,0 +1,8 @@ +turbostat : turbostat.c + +clean : + rm -f turbostat + +install : + install turbostat /usr/bin/turbostat + install turbostat.8 /usr/share/man/man8 diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 new file mode 100644 index 00000000000..ff75125deed --- /dev/null +++ b/tools/power/x86/turbostat/turbostat.8 @@ -0,0 +1,172 @@ +.TH TURBOSTAT 8 +.SH NAME +turbostat \- Report processor frequency and idle statistics +.SH SYNOPSIS +.ft B +.B turbostat +.RB [ "\-v" ] +.RB [ "\-M MSR#" ] +.RB command +.br +.B turbostat +.RB [ "\-v" ] +.RB [ "\-M MSR#" ] +.RB [ "\-i interval_sec" ] +.SH DESCRIPTION +\fBturbostat \fP reports processor topology, frequency +and idle power state statistics on modern X86 processors. +Either \fBcommand\fP is forked and statistics are printed +upon its completion, or statistics are printed periodically. + +\fBturbostat \fP +requires that the processor +supports an "invariant" TSC, plus the APERF and MPERF MSRs. +\fBturbostat \fP will report idle cpu power state residency +on processors that additionally support C-state residency counters. + +.SS Options +The \fB-v\fP option increases verbosity. +.PP +The \fB-M MSR#\fP option dumps the specified MSR, +in addition to the usual frequency and idle statistics. +.PP +The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds. +The default is 5 seconds. +.PP +The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit, +displays the statistics gathered since it was forked. +.PP +.SH FIELD DESCRIPTIONS +.nf +\fBpkg\fP processor package number. +\fBcore\fP processor core number. +\fBCPU\fP Linux CPU (logical processor) number. +\fB%c0\fP percent of the interval that the CPU retired instructions. +\fBGHz\fP average clock rate while the CPU was in c0 state. +\fBTSC\fP average GHz that the TSC ran during the entire interval. +\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states. +\fB%pc3, %pc6\fP percentage residency in hardware package idle states. +.fi +.PP +.SH EXAMPLE +Without any parameters, turbostat prints out counters ever 5 seconds. +(override interval with "-i sec" option, or specify a command +for turbostat to fork). + +The first row of statistics reflect the average for the entire system. +Subsequent rows show per-CPU statistics. + +.nf +[root@x980]# ./turbostat +core CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07 + 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07 + 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07 + 1 2 0.10 1.62 3.38 0.29 0.00 99.61 0.00 95.07 + 1 8 0.11 1.62 3.38 0.28 0.00 99.61 0.00 95.07 + 2 4 0.01 1.62 3.38 0.01 0.00 99.98 0.00 95.07 + 2 10 0.01 1.61 3.38 0.02 0.00 99.98 0.00 95.07 + 8 1 0.07 1.62 3.38 0.15 0.00 99.78 0.00 95.07 + 8 7 0.03 1.62 3.38 0.19 0.00 99.78 0.00 95.07 + 9 3 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 + 9 9 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 + 10 5 0.01 1.62 3.38 0.13 0.00 99.86 0.00 95.07 + 10 11 0.08 1.62 3.38 0.05 0.00 99.86 0.00 95.07 +.fi +.SH VERBOSE EXAMPLE +The "-v" option adds verbosity to the output: + +.nf +GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2) +12 * 133 = 1600 MHz max efficiency +25 * 133 = 3333 MHz TSC frequency +26 * 133 = 3467 MHz max turbo 4 active cores +26 * 133 = 3467 MHz max turbo 3 active cores +27 * 133 = 3600 MHz max turbo 2 active cores +27 * 133 = 3600 MHz max turbo 1 active cores + +.fi +The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency +available at the minimum package voltage. The \fBTSC frequency\fP is the nominal +maximum frequency of the processor if turbo-mode were not available. This frequency +should be sustainable on all CPUs indefinitely, given nominal power and cooling. +The remaining rows show what maximum turbo frequency is possible +depending on the number of idle cores. Note that this information is +not available on all processors. +.SH FORK EXAMPLE +If turbostat is invoked with a command, it will fork that command +and output the statistics gathered when the command exits. +eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds +until ^C while the other CPUs are mostly idle: + +.nf +[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null + +^Ccore CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00 + 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00 + 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00 + 1 2 0.11 3.14 3.38 0.19 3.95 95.75 0.00 0.00 + 1 8 0.05 2.88 3.38 0.25 3.95 95.75 0.00 0.00 + 2 4 0.00 3.13 3.38 0.02 0.00 99.98 0.00 0.00 + 2 10 0.00 3.09 3.38 0.02 0.00 99.98 0.00 0.00 + 8 1 0.04 3.50 3.38 14.43 0.00 85.54 0.00 0.00 + 8 7 0.03 2.98 3.38 14.43 0.00 85.54 0.00 0.00 + 9 3 0.00 3.16 3.38 100.00 0.00 0.00 0.00 0.00 + 9 9 99.93 3.63 3.38 0.06 0.00 0.00 0.00 0.00 + 10 5 0.01 2.82 3.38 0.08 0.00 99.91 0.00 0.00 + 10 11 0.02 3.36 3.38 0.06 0.00 99.91 0.00 0.00 +6.950866 sec + +.fi +Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit +while the other processors are generally in various states of idle. + +Note that cpu3 is an HT sibling sharing core9 +with cpu9, and thus it is unable to get to an idle state +deeper than c1 while cpu9 is busy. + +Note that turbostat reports average GHz of 3.61, while +the arithmetic average of the GHz column above is 3.24. +This is a weighted average, where the weight is %c0. ie. it is the total number of +un-halted cycles elapsed per time divided by the number of CPUs. +.SH NOTES + +.B "turbostat " +must be run as root. + +.B "turbostat " +reads hardware counters, but doesn't write them. +So it will not interfere with the OS or other programs, including +multiple invocations of itself. + +\fBturbostat \fP +may work poorly on Linux-2.6.20 through 2.6.29, +as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF +in those kernels. + +The APERF, MPERF MSRs are defined to count non-halted cycles. +Although it is not guaranteed by the architecture, turbostat assumes +that they count at TSC rate, which is true on all processors tested to date. + +.SH REFERENCES +"IntelÂŽ Turbo Boost Technology +in IntelÂŽ Core⢠Microarchitecture (Nehalem) Based Processors" +http://download.intel.com/design/processor/applnots/320354.pdf + +"IntelÂŽ 64 and IA-32 Architectures Software Developer's Manual +Volume 3B: System Programming Guide" +http://www.intel.com/products/processor/manuals/ + +.SH FILES +.ta +.nf +/dev/cpu/*/msr +.fi + +.SH "SEE ALSO" +msr(4), vmstat(8) +.PP +.SH AUTHORS +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c new file mode 100644 index 00000000000..4c6983de6fd --- /dev/null +++ b/tools/power/x86/turbostat/turbostat.c @@ -0,0 +1,1048 @@ +/* + * turbostat -- show CPU frequency and C-state residency + * on modern Intel turbo-capable processors. + * + * Copyright (c) 2010, Intel Corporation. + * Len Brown <len.brown@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/time.h> +#include <stdlib.h> +#include <dirent.h> +#include <string.h> +#include <ctype.h> + +#define MSR_TSC 0x10 +#define MSR_NEHALEM_PLATFORM_INFO 0xCE +#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD +#define MSR_APERF 0xE8 +#define MSR_MPERF 0xE7 +#define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ +#define MSR_PKG_C3_RESIDENCY 0x3F8 +#define MSR_PKG_C6_RESIDENCY 0x3F9 +#define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */ +#define MSR_CORE_C3_RESIDENCY 0x3FC +#define MSR_CORE_C6_RESIDENCY 0x3FD +#define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */ + +char *proc_stat = "/proc/stat"; +unsigned int interval_sec = 5; /* set with -i interval_sec */ +unsigned int verbose; /* set with -v */ +unsigned int skip_c0; +unsigned int skip_c1; +unsigned int do_nhm_cstates; +unsigned int do_snb_cstates; +unsigned int has_aperf; +unsigned int units = 1000000000; /* Ghz etc */ +unsigned int genuine_intel; +unsigned int has_invariant_tsc; +unsigned int do_nehalem_platform_info; +unsigned int do_nehalem_turbo_ratio_limit; +unsigned int extra_msr_offset; +double bclk; +unsigned int show_pkg; +unsigned int show_core; +unsigned int show_cpu; + +int aperf_mperf_unstable; +int backwards_count; +char *progname; +int need_reinitialize; + +int num_cpus; + +typedef struct per_cpu_counters { + unsigned long long tsc; /* per thread */ + unsigned long long aperf; /* per thread */ + unsigned long long mperf; /* per thread */ + unsigned long long c1; /* per thread (calculated) */ + unsigned long long c3; /* per core */ + unsigned long long c6; /* per core */ + unsigned long long c7; /* per core */ + unsigned long long pc2; /* per package */ + unsigned long long pc3; /* per package */ + unsigned long long pc6; /* per package */ + unsigned long long pc7; /* per package */ + unsigned long long extra_msr; /* per thread */ + int pkg; + int core; + int cpu; + struct per_cpu_counters *next; +} PCC; + +PCC *pcc_even; +PCC *pcc_odd; +PCC *pcc_delta; +PCC *pcc_average; +struct timeval tv_even; +struct timeval tv_odd; +struct timeval tv_delta; + +unsigned long long get_msr(int cpu, off_t offset) +{ + ssize_t retval; + unsigned long long msr; + char pathname[32]; + int fd; + + sprintf(pathname, "/dev/cpu/%d/msr", cpu); + fd = open(pathname, O_RDONLY); + if (fd < 0) { + perror(pathname); + need_reinitialize = 1; + return 0; + } + + retval = pread(fd, &msr, sizeof msr, offset); + if (retval != sizeof msr) { + fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n", + cpu, offset, retval); + exit(-2); + } + + close(fd); + return msr; +} + +void print_header() +{ + if (show_pkg) + fprintf(stderr, "pkg "); + if (show_core) + fprintf(stderr, "core"); + if (show_cpu) + fprintf(stderr, " CPU"); + if (do_nhm_cstates) + fprintf(stderr, " %%c0 "); + if (has_aperf) + fprintf(stderr, " GHz"); + fprintf(stderr, " TSC"); + if (do_nhm_cstates) + fprintf(stderr, " %%c1 "); + if (do_nhm_cstates) + fprintf(stderr, " %%c3 "); + if (do_nhm_cstates) + fprintf(stderr, " %%c6 "); + if (do_snb_cstates) + fprintf(stderr, " %%c7 "); + if (do_snb_cstates) + fprintf(stderr, " %%pc2 "); + if (do_nhm_cstates) + fprintf(stderr, " %%pc3 "); + if (do_nhm_cstates) + fprintf(stderr, " %%pc6 "); + if (do_snb_cstates) + fprintf(stderr, " %%pc7 "); + if (extra_msr_offset) + fprintf(stderr, " MSR 0x%x ", extra_msr_offset); + + putc('\n', stderr); +} + +void dump_pcc(PCC *pcc) +{ + fprintf(stderr, "package: %d ", pcc->pkg); + fprintf(stderr, "core:: %d ", pcc->core); + fprintf(stderr, "CPU: %d ", pcc->cpu); + fprintf(stderr, "TSC: %016llX\n", pcc->tsc); + fprintf(stderr, "c3: %016llX\n", pcc->c3); + fprintf(stderr, "c6: %016llX\n", pcc->c6); + fprintf(stderr, "c7: %016llX\n", pcc->c7); + fprintf(stderr, "aperf: %016llX\n", pcc->aperf); + fprintf(stderr, "pc2: %016llX\n", pcc->pc2); + fprintf(stderr, "pc3: %016llX\n", pcc->pc3); + fprintf(stderr, "pc6: %016llX\n", pcc->pc6); + fprintf(stderr, "pc7: %016llX\n", pcc->pc7); + fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, pcc->extra_msr); +} + +void dump_list(PCC *pcc) +{ + printf("dump_list 0x%p\n", pcc); + + for (; pcc; pcc = pcc->next) + dump_pcc(pcc); +} + +void print_pcc(PCC *p) +{ + double interval_float; + + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; + + /* topology columns, print blanks on 1st (average) line */ + if (p == pcc_average) { + if (show_pkg) + fprintf(stderr, " "); + if (show_core) + fprintf(stderr, " "); + if (show_cpu) + fprintf(stderr, " "); + } else { + if (show_pkg) + fprintf(stderr, "%4d", p->pkg); + if (show_core) + fprintf(stderr, "%4d", p->core); + if (show_cpu) + fprintf(stderr, "%4d", p->cpu); + } + + /* %c0 */ + if (do_nhm_cstates) { + if (!skip_c0) + fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc); + else + fprintf(stderr, " ****"); + } + + /* GHz */ + if (has_aperf) { + if (!aperf_mperf_unstable) { + fprintf(stderr, "%5.2f", + 1.0 * p->tsc / units * p->aperf / + p->mperf / interval_float); + } else { + if (p->aperf > p->tsc || p->mperf > p->tsc) { + fprintf(stderr, " ****"); + } else { + fprintf(stderr, "%4.1f*", + 1.0 * p->tsc / + units * p->aperf / + p->mperf / interval_float); + } + } + } + + /* TSC */ + fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float); + + if (do_nhm_cstates) { + if (!skip_c1) + fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); + else + fprintf(stderr, " ****"); + } + if (do_nhm_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc); + if (do_nhm_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc); + if (do_snb_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc); + if (do_snb_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc); + if (do_nhm_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc); + if (do_nhm_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc); + if (do_snb_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc); + if (extra_msr_offset) + fprintf(stderr, " 0x%016llx", p->extra_msr); + putc('\n', stderr); +} + +void print_counters(PCC *cnt) +{ + PCC *pcc; + + print_header(); + + if (num_cpus > 1) + print_pcc(pcc_average); + + for (pcc = cnt; pcc != NULL; pcc = pcc->next) + print_pcc(pcc); + +} + +#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after)) + + +int compute_delta(PCC *after, PCC *before, PCC *delta) +{ + int errors = 0; + int perf_err = 0; + + skip_c0 = skip_c1 = 0; + + for ( ; after && before && delta; + after = after->next, before = before->next, delta = delta->next) { + if (before->cpu != after->cpu) { + printf("cpu configuration changed: %d != %d\n", + before->cpu, after->cpu); + return -1; + } + + if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) { + fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n", + before->cpu, before->tsc, after->tsc); + errors++; + } + /* check for TSC < 1 Mcycles over interval */ + if (delta->tsc < (1000 * 1000)) { + fprintf(stderr, "Insanely slow TSC rate," + " TSC stops in idle?\n"); + fprintf(stderr, "You can disable all c-states" + " by booting with \"idle=poll\"\n"); + fprintf(stderr, "or just the deep ones with" + " \"processor.max_cstate=1\"\n"); + exit(-3); + } + if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) { + fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n", + before->cpu, before->c3, after->c3); + errors++; + } + if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) { + fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n", + before->cpu, before->c6, after->c6); + errors++; + } + if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) { + fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n", + before->cpu, before->c7, after->c7); + errors++; + } + if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) { + fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n", + before->cpu, before->pc2, after->pc2); + errors++; + } + if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) { + fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n", + before->cpu, before->pc3, after->pc3); + errors++; + } + if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) { + fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n", + before->cpu, before->pc6, after->pc6); + errors++; + } + if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) { + fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n", + before->cpu, before->pc7, after->pc7); + errors++; + } + + perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf); + if (perf_err) { + fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n", + before->cpu, before->aperf, after->aperf); + } + perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf); + if (perf_err) { + fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n", + before->cpu, before->mperf, after->mperf); + } + if (perf_err) { + if (!aperf_mperf_unstable) { + fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); + fprintf(stderr, "* Frequency results do not cover entire interval *\n"); + fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); + + aperf_mperf_unstable = 1; + } + /* + * mperf delta is likely a huge "positive" number + * can not use it for calculating c0 time + */ + skip_c0 = 1; + skip_c1 = 1; + } + + /* + * As mperf and tsc collection are not atomic, + * it is possible for mperf's non-halted cycles + * to exceed TSC's all cycles: show c1 = 0% in that case. + */ + if (delta->mperf > delta->tsc) + delta->c1 = 0; + else /* normal case, derive c1 */ + delta->c1 = delta->tsc - delta->mperf + - delta->c3 - delta->c6 - delta->c7; + + if (delta->mperf == 0) + delta->mperf = 1; /* divide by 0 protection */ + + /* + * for "extra msr", just copy the latest w/o subtracting + */ + delta->extra_msr = after->extra_msr; + if (errors) { + fprintf(stderr, "ERROR cpu%d before:\n", before->cpu); + dump_pcc(before); + fprintf(stderr, "ERROR cpu%d after:\n", before->cpu); + dump_pcc(after); + errors = 0; + } + } + return 0; +} + +void compute_average(PCC *delta, PCC *avg) +{ + PCC *sum; + + sum = calloc(1, sizeof(PCC)); + if (sum == NULL) { + perror("calloc sum"); + exit(1); + } + + for (; delta; delta = delta->next) { + sum->tsc += delta->tsc; + sum->c1 += delta->c1; + sum->c3 += delta->c3; + sum->c6 += delta->c6; + sum->c7 += delta->c7; + sum->aperf += delta->aperf; + sum->mperf += delta->mperf; + sum->pc2 += delta->pc2; + sum->pc3 += delta->pc3; + sum->pc6 += delta->pc6; + sum->pc7 += delta->pc7; + } + avg->tsc = sum->tsc/num_cpus; + avg->c1 = sum->c1/num_cpus; + avg->c3 = sum->c3/num_cpus; + avg->c6 = sum->c6/num_cpus; + avg->c7 = sum->c7/num_cpus; + avg->aperf = sum->aperf/num_cpus; + avg->mperf = sum->mperf/num_cpus; + avg->pc2 = sum->pc2/num_cpus; + avg->pc3 = sum->pc3/num_cpus; + avg->pc6 = sum->pc6/num_cpus; + avg->pc7 = sum->pc7/num_cpus; + + free(sum); +} + +void get_counters(PCC *pcc) +{ + for ( ; pcc; pcc = pcc->next) { + pcc->tsc = get_msr(pcc->cpu, MSR_TSC); + if (do_nhm_cstates) + pcc->c3 = get_msr(pcc->cpu, MSR_CORE_C3_RESIDENCY); + if (do_nhm_cstates) + pcc->c6 = get_msr(pcc->cpu, MSR_CORE_C6_RESIDENCY); + if (do_snb_cstates) + pcc->c7 = get_msr(pcc->cpu, MSR_CORE_C7_RESIDENCY); + if (has_aperf) + pcc->aperf = get_msr(pcc->cpu, MSR_APERF); + if (has_aperf) + pcc->mperf = get_msr(pcc->cpu, MSR_MPERF); + if (do_snb_cstates) + pcc->pc2 = get_msr(pcc->cpu, MSR_PKG_C2_RESIDENCY); + if (do_nhm_cstates) + pcc->pc3 = get_msr(pcc->cpu, MSR_PKG_C3_RESIDENCY); + if (do_nhm_cstates) + pcc->pc6 = get_msr(pcc->cpu, MSR_PKG_C6_RESIDENCY); + if (do_snb_cstates) + pcc->pc7 = get_msr(pcc->cpu, MSR_PKG_C7_RESIDENCY); + if (extra_msr_offset) + pcc->extra_msr = get_msr(pcc->cpu, extra_msr_offset); + } +} + + +void print_nehalem_info() +{ + unsigned long long msr; + unsigned int ratio; + + if (!do_nehalem_platform_info) + return; + + msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO); + + ratio = (msr >> 40) & 0xFF; + fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0xFF; + fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", + ratio, bclk, ratio * bclk); + + if (verbose > 1) + fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + + if (!do_nehalem_turbo_ratio_limit) + return; + + msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT); + + ratio = (msr >> 24) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", + ratio, bclk, ratio * bclk); + +} + +void free_counter_list(PCC *list) +{ + PCC *p; + + for (p = list; p; ) { + PCC *free_me; + + free_me = p; + p = p->next; + free(free_me); + } + return; +} + +void free_all_counters(void) +{ + free_counter_list(pcc_even); + pcc_even = NULL; + + free_counter_list(pcc_odd); + pcc_odd = NULL; + + free_counter_list(pcc_delta); + pcc_delta = NULL; + + free_counter_list(pcc_average); + pcc_average = NULL; +} + +void insert_cpu_counters(PCC **list, PCC *new) +{ + PCC *prev; + + /* + * list was empty + */ + if (*list == NULL) { + new->next = *list; + *list = new; + return; + } + + show_cpu = 1; /* there is more than one CPU */ + + /* + * insert on front of list. + * It is sorted by ascending package#, core#, cpu# + */ + if (((*list)->pkg > new->pkg) || + (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) || + (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) { + new->next = *list; + *list = new; + return; + } + + prev = *list; + + while (prev->next && (prev->next->pkg < new->pkg)) { + prev = prev->next; + show_pkg = 1; /* there is more than 1 package */ + } + + while (prev->next && (prev->next->pkg == new->pkg) + && (prev->next->core < new->core)) { + prev = prev->next; + show_core = 1; /* there is more than 1 core */ + } + + while (prev->next && (prev->next->pkg == new->pkg) + && (prev->next->core == new->core) + && (prev->next->cpu < new->cpu)) { + prev = prev->next; + } + + /* + * insert after "prev" + */ + new->next = prev->next; + prev->next = new; + + return; +} + +void alloc_new_cpu_counters(int pkg, int core, int cpu) +{ + PCC *new; + + if (verbose > 1) + printf("pkg%d core%d, cpu%d\n", pkg, core, cpu); + + new = (PCC *)calloc(1, sizeof(PCC)); + if (new == NULL) { + perror("calloc"); + exit(1); + } + new->pkg = pkg; + new->core = core; + new->cpu = cpu; + insert_cpu_counters(&pcc_odd, new); + + new = (PCC *)calloc(1, sizeof(PCC)); + if (new == NULL) { + perror("calloc"); + exit(1); + } + new->pkg = pkg; + new->core = core; + new->cpu = cpu; + insert_cpu_counters(&pcc_even, new); + + new = (PCC *)calloc(1, sizeof(PCC)); + if (new == NULL) { + perror("calloc"); + exit(1); + } + new->pkg = pkg; + new->core = core; + new->cpu = cpu; + insert_cpu_counters(&pcc_delta, new); + + new = (PCC *)calloc(1, sizeof(PCC)); + if (new == NULL) { + perror("calloc"); + exit(1); + } + new->pkg = pkg; + new->core = core; + new->cpu = cpu; + pcc_average = new; +} + +int get_physical_package_id(int cpu) +{ + char path[64]; + FILE *filep; + int pkg; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); + } + fscanf(filep, "%d", &pkg); + fclose(filep); + return pkg; +} + +int get_core_id(int cpu) +{ + char path[64]; + FILE *filep; + int core; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); + } + fscanf(filep, "%d", &core); + fclose(filep); + return core; +} + +/* + * run func(index, cpu) on every cpu in /proc/stat + */ + +int for_all_cpus(void (func)(int, int, int)) +{ + FILE *fp; + int cpu_count; + int retval; + + fp = fopen(proc_stat, "r"); + if (fp == NULL) { + perror(proc_stat); + exit(1); + } + + retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); + if (retval != 0) { + perror("/proc/stat format"); + exit(1); + } + + for (cpu_count = 0; ; cpu_count++) { + int cpu; + + retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); + if (retval != 1) + break; + + func(get_physical_package_id(cpu), get_core_id(cpu), cpu); + } + fclose(fp); + return cpu_count; +} + +void re_initialize(void) +{ + printf("turbostat: topology changed, re-initializing.\n"); + free_all_counters(); + num_cpus = for_all_cpus(alloc_new_cpu_counters); + need_reinitialize = 0; + printf("num_cpus is now %d\n", num_cpus); +} + +void dummy(int pkg, int core, int cpu) { return; } +/* + * check to see if a cpu came on-line + */ +void verify_num_cpus() +{ + int new_num_cpus; + + new_num_cpus = for_all_cpus(dummy); + + if (new_num_cpus != num_cpus) { + if (verbose) + printf("num_cpus was %d, is now %d\n", + num_cpus, new_num_cpus); + need_reinitialize = 1; + } + + return; +} + +void turbostat_loop() +{ +restart: + get_counters(pcc_even); + gettimeofday(&tv_even, (struct timezone *)NULL); + + while (1) { + verify_num_cpus(); + if (need_reinitialize) { + re_initialize(); + goto restart; + } + sleep(interval_sec); + get_counters(pcc_odd); + gettimeofday(&tv_odd, (struct timezone *)NULL); + + compute_delta(pcc_odd, pcc_even, pcc_delta); + timersub(&tv_odd, &tv_even, &tv_delta); + compute_average(pcc_delta, pcc_average); + print_counters(pcc_delta); + if (need_reinitialize) { + re_initialize(); + goto restart; + } + sleep(interval_sec); + get_counters(pcc_even); + gettimeofday(&tv_even, (struct timezone *)NULL); + compute_delta(pcc_even, pcc_odd, pcc_delta); + timersub(&tv_even, &tv_odd, &tv_delta); + compute_average(pcc_delta, pcc_average); + print_counters(pcc_delta); + } +} + +void check_dev_msr() +{ + struct stat sb; + + if (stat("/dev/cpu/0/msr", &sb)) { + fprintf(stderr, "no /dev/cpu/0/msr\n"); + fprintf(stderr, "Try \"# modprobe msr\"\n"); + exit(-5); + } +} + +void check_super_user() +{ + if (getuid() != 0) { + fprintf(stderr, "must be root\n"); + exit(-6); + } +} + +int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ + case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ + case 0x1F: /* Core i7 and i5 Processor - Nehalem */ + case 0x25: /* Westmere Client - Clarkdale, Arrandale */ + case 0x2C: /* Westmere EP - Gulftown */ + case 0x2A: /* SNB */ + case 0x2D: /* SNB Xeon */ + return 1; + case 0x2E: /* Nehalem-EX Xeon - Beckton */ + case 0x2F: /* Westmere-EX Xeon - Eagleton */ + default: + return 0; + } +} + +int is_snb(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case 0x2A: + case 0x2D: + return 1; + } + return 0; +} + +double discover_bclk(unsigned int family, unsigned int model) +{ + if (is_snb(family, model)) + return 100.00; + else + return 133.33; +} + +void check_cpuid() +{ + unsigned int eax, ebx, ecx, edx, max_level; + unsigned int fms, family, model, stepping; + + eax = ebx = ecx = edx = 0; + + asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0)); + + if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) + genuine_intel = 1; + + if (verbose) + fprintf(stderr, "%.4s%.4s%.4s ", + (char *)&ebx, (char *)&edx, (char *)&ecx); + + asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + stepping = fms & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (verbose) + fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", + max_level, family, model, stepping, family, model, stepping); + + if (!(edx & (1 << 5))) { + fprintf(stderr, "CPUID: no MSR\n"); + exit(1); + } + + /* + * check max extended function levels of CPUID. + * This is needed to check for invariant TSC. + * This check is valid for both Intel and AMD. + */ + ebx = ecx = edx = 0; + asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000)); + + if (max_level < 0x80000007) { + fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level); + exit(1); + } + + /* + * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 + * this check is valid for both Intel and AMD + */ + asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007)); + has_invariant_tsc = edx && (1 << 8); + + if (!has_invariant_tsc) { + fprintf(stderr, "No invariant TSC\n"); + exit(1); + } + + /* + * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 + * this check is valid for both Intel and AMD + */ + + asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); + has_aperf = ecx && (1 << 0); + if (!has_aperf) { + fprintf(stderr, "No APERF MSR\n"); + exit(1); + } + + do_nehalem_platform_info = genuine_intel && has_invariant_tsc; + do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ + do_snb_cstates = is_snb(family, model); + bclk = discover_bclk(family, model); + + do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); +} + + +void usage() +{ + fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n", + progname); + exit(1); +} + + +/* + * in /dev/cpu/ return success for names that are numbers + * ie. filter out ".", "..", "microcode". + */ +int dir_filter(const struct dirent *dirp) +{ + if (isdigit(dirp->d_name[0])) + return 1; + else + return 0; +} + +int open_dev_cpu_msr(int dummy1) +{ + return 0; +} + +void turbostat_init() +{ + check_cpuid(); + + check_dev_msr(); + check_super_user(); + + num_cpus = for_all_cpus(alloc_new_cpu_counters); + + if (verbose) + print_nehalem_info(); +} + +int fork_it(char **argv) +{ + int retval; + pid_t child_pid; + get_counters(pcc_even); + gettimeofday(&tv_even, (struct timezone *)NULL); + + child_pid = fork(); + if (!child_pid) { + /* child */ + execvp(argv[0], argv); + } else { + int status; + + /* parent */ + if (child_pid == -1) { + perror("fork"); + exit(1); + } + + signal(SIGINT, SIG_IGN); + signal(SIGQUIT, SIG_IGN); + if (waitpid(child_pid, &status, 0) == -1) { + perror("wait"); + exit(1); + } + } + get_counters(pcc_odd); + gettimeofday(&tv_odd, (struct timezone *)NULL); + retval = compute_delta(pcc_odd, pcc_even, pcc_delta); + + timersub(&tv_odd, &tv_even, &tv_delta); + compute_average(pcc_delta, pcc_average); + if (!retval) + print_counters(pcc_delta); + + fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);; + + return 0; +} + +void cmdline(int argc, char **argv) +{ + int opt; + + progname = argv[0]; + + while ((opt = getopt(argc, argv, "+vi:M:")) != -1) { + switch (opt) { + case 'v': + verbose++; + break; + case 'i': + interval_sec = atoi(optarg); + break; + case 'M': + sscanf(optarg, "%x", &extra_msr_offset); + if (verbose > 1) + fprintf(stderr, "MSR 0x%X\n", extra_msr_offset); + break; + default: + usage(); + } + } +} + +int main(int argc, char **argv) +{ + cmdline(argc, argv); + + if (verbose > 1) + fprintf(stderr, "turbostat Dec 6, 2010" + " - Len Brown <lenb@kernel.org>\n"); + if (verbose > 1) + fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n"); + + turbostat_init(); + + /* + * if any params left, it must be a command to fork + */ + if (argc - optind) + return fork_it(argv + optind); + else + turbostat_loop(); + + return 0; +} diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile new file mode 100644 index 00000000000..f458237fdd7 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -0,0 +1,8 @@ +x86_energy_perf_policy : x86_energy_perf_policy.c + +clean : + rm -f x86_energy_perf_policy + +install : + install x86_energy_perf_policy /usr/bin/ + install x86_energy_perf_policy.8 /usr/share/man/man8/ diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 new file mode 100644 index 00000000000..8eaaad648cd --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 @@ -0,0 +1,104 @@ +.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com> +.\" Distributed under the GPL, Copyleft 1994. +.TH X86_ENERGY_PERF_POLICY 8 +.SH NAME +x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS +.SH SYNOPSIS +.ft B +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB "\-r" +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB 'performance' +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB 'normal' +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB 'powersave' +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB n +.br +.SH DESCRIPTION +\fBx86_energy_perf_policy\fP +allows software to convey +its policy for the relative importance of performance +versus energy savings to the processor. + +The processor uses this information in model-specific ways +when it must select trade-offs between performance and +energy efficiency. + +This policy hint does not supersede Processor Performance states +(P-states) or CPU Idle power states (C-states), but allows +software to have influence where it would otherwise be unable +to express a preference. + +For example, this setting may tell the hardware how +aggressively or conservatively to control frequency +in the "turbo range" above the explicitly OS-controlled +P-state frequency range. It may also tell the hardware +how aggressively is should enter the OS requested C-states. + +Support for this feature is indicated by CPUID.06H.ECX.bit3 +per the Intel Architectures Software Developer's Manual. + +.SS Options +\fB-c\fP limits operation to a single CPU. +The default is to operate on all CPUs. +Note that MSR_IA32_ENERGY_PERF_BIAS is defined per +logical processor, but that the initial implementations +of the MSR were shared among all processors in each package. +.PP +\fB-v\fP increases verbosity. By default +x86_energy_perf_policy is silent. +.PP +\fB-r\fP is for "read-only" mode - the unchanged state +is read and displayed. +.PP +.I performance +Set a policy where performance is paramount. +The processor will be unwilling to sacrifice any performance +for the sake of energy saving. This is the hardware default. +.PP +.I normal +Set a policy with a normal balance between performance and energy efficiency. +The processor will tolerate minor performance compromise +for potentially significant energy savings. +This reasonable default for most desktops and servers. +.PP +.I powersave +Set a policy where the processor can accept +a measurable performance hit to maximize energy efficiency. +.PP +.I n +Set MSR_IA32_ENERGY_PERF_BIAS to the specified number. +The range of valid numbers is 0-15, where 0 is maximum +performance and 15 is maximum energy efficiency. + +.SH NOTES +.B "x86_energy_perf_policy " +runs only as root. +.SH FILES +.ta +.nf +/dev/cpu/*/msr +.fi + +.SH "SEE ALSO" +msr(4) +.PP +.SH AUTHORS +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c new file mode 100644 index 00000000000..d9678a34dd7 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -0,0 +1,325 @@ +/* + * x86_energy_perf_policy -- set the energy versus performance + * policy preference bias on recent X86 processors. + */ +/* + * Copyright (c) 2010, Intel Corporation. + * Len Brown <len.brown@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/time.h> +#include <stdlib.h> +#include <string.h> + +unsigned int verbose; /* set with -v */ +unsigned int read_only; /* set with -r */ +char *progname; +unsigned long long new_bias; +int cpu = -1; + +/* + * Usage: + * + * -c cpu: limit action to a single CPU (default is all CPUs) + * -v: verbose output (can invoke more than once) + * -r: read-only, don't change any settings + * + * performance + * Performance is paramount. + * Unwilling to sacrafice any performance + * for the sake of energy saving. (hardware default) + * + * normal + * Can tolerate minor performance compromise + * for potentially significant energy savings. + * (reasonable default for most desktops and servers) + * + * powersave + * Can tolerate significant performance hit + * to maximize energy savings. + * + * n + * a numerical value to write to the underlying MSR. + */ +void usage(void) +{ + printf("%s: [-c cpu] [-v] " + "(-r | 'performance' | 'normal' | 'powersave' | n)\n", + progname); + exit(1); +} + +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 + +#define BIAS_PERFORMANCE 0 +#define BIAS_BALANCE 6 +#define BIAS_POWERSAVE 15 + +void cmdline(int argc, char **argv) +{ + int opt; + + progname = argv[0]; + + while ((opt = getopt(argc, argv, "+rvc:")) != -1) { + switch (opt) { + case 'c': + cpu = atoi(optarg); + break; + case 'r': + read_only = 1; + break; + case 'v': + verbose++; + break; + default: + usage(); + } + } + /* if -r, then should be no additional optind */ + if (read_only && (argc > optind)) + usage(); + + /* + * if no -r , then must be one additional optind + */ + if (!read_only) { + + if (argc != optind + 1) { + printf("must supply -r or policy param\n"); + usage(); + } + + if (!strcmp("performance", argv[optind])) { + new_bias = BIAS_PERFORMANCE; + } else if (!strcmp("normal", argv[optind])) { + new_bias = BIAS_BALANCE; + } else if (!strcmp("powersave", argv[optind])) { + new_bias = BIAS_POWERSAVE; + } else { + char *endptr; + + new_bias = strtoull(argv[optind], &endptr, 0); + if (endptr == argv[optind] || + new_bias > BIAS_POWERSAVE) { + fprintf(stderr, "invalid value: %s\n", + argv[optind]); + usage(); + } + } + } +} + +/* + * validate_cpuid() + * returns on success, quietly exits on failure (make verbose with -v) + */ +void validate_cpuid(void) +{ + unsigned int eax, ebx, ecx, edx, max_level; + char brand[16]; + unsigned int fms, family, model, stepping; + + eax = ebx = ecx = edx = 0; + + asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), + "=d" (edx) : "a" (0)); + + if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) { + if (verbose) + fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel", + (char *)&ebx, (char *)&edx, (char *)&ecx); + exit(1); + } + + asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + stepping = fms & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (verbose > 1) + printf("CPUID %s %d levels family:model:stepping " + "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level, + family, model, stepping, family, model, stepping); + + if (!(edx & (1 << 5))) { + if (verbose) + printf("CPUID: no MSR\n"); + exit(1); + } + + /* + * Support for MSR_IA32_ENERGY_PERF_BIAS + * is indicated by CPUID.06H.ECX.bit3 + */ + asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6)); + if (verbose) + printf("CPUID.06H.ECX: 0x%x\n", ecx); + if (!(ecx & (1 << 3))) { + if (verbose) + printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n"); + exit(1); + } + return; /* success */ +} + +unsigned long long get_msr(int cpu, int offset) +{ + unsigned long long msr; + char msr_path[32]; + int retval; + int fd; + + sprintf(msr_path, "/dev/cpu/%d/msr", cpu); + fd = open(msr_path, O_RDONLY); + if (fd < 0) { + printf("Try \"# modprobe msr\"\n"); + perror(msr_path); + exit(1); + } + + retval = pread(fd, &msr, sizeof msr, offset); + + if (retval != sizeof msr) { + printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); + exit(-2); + } + close(fd); + return msr; +} + +unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset) +{ + unsigned long long old_msr; + char msr_path[32]; + int retval; + int fd; + + sprintf(msr_path, "/dev/cpu/%d/msr", cpu); + fd = open(msr_path, O_RDWR); + if (fd < 0) { + perror(msr_path); + exit(1); + } + + retval = pread(fd, &old_msr, sizeof old_msr, offset); + if (retval != sizeof old_msr) { + perror("pwrite"); + printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); + exit(-2); + } + + retval = pwrite(fd, &new_msr, sizeof new_msr, offset); + if (retval != sizeof new_msr) { + perror("pwrite"); + printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval); + exit(-2); + } + + close(fd); + + return old_msr; +} + +void print_msr(int cpu) +{ + printf("cpu%d: 0x%016llx\n", + cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS)); +} + +void update_msr(int cpu) +{ + unsigned long long previous_msr; + + previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS); + + if (verbose) + printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n", + cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias); + + return; +} + +char *proc_stat = "/proc/stat"; +/* + * run func() on every cpu in /dev/cpu + */ +void for_every_cpu(void (func)(int)) +{ + FILE *fp; + int retval; + + fp = fopen(proc_stat, "r"); + if (fp == NULL) { + perror(proc_stat); + exit(1); + } + + retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); + if (retval != 0) { + perror("/proc/stat format"); + exit(1); + } + + while (1) { + int cpu; + + retval = fscanf(fp, + "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", + &cpu); + if (retval != 1) + return; + + func(cpu); + } + fclose(fp); +} + +int main(int argc, char **argv) +{ + cmdline(argc, argv); + + if (verbose > 1) + printf("x86_energy_perf_policy Nov 24, 2010" + " - Len Brown <lenb@kernel.org>\n"); + if (verbose > 1 && !read_only) + printf("new_bias %lld\n", new_bias); + + validate_cpuid(); + + if (cpu != -1) { + if (read_only) + print_msr(cpu); + else + update_msr(cpu); + } else { + if (read_only) + for_every_cpu(print_msr); + else + for_every_cpu(update_msr); + } + + return 0; +} diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl new file mode 100755 index 00000000000..9a571e71683 --- /dev/null +++ b/tools/testing/ktest/compare-ktest-sample.pl @@ -0,0 +1,30 @@ +#!/usr/bin/perl + +open (IN,"ktest.pl"); +while (<IN>) { + if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ || + /set_test_option\("(.*?)"/) { + $opt{$1} = 1; + } +} +close IN; + +open (IN, "sample.conf"); +while (<IN>) { + if (/^\s*#?\s*(\S+)\s*=/) { + $samp{$1} = 1; + } +} +close IN; + +foreach $opt (keys %opt) { + if (!defined($samp{$opt})) { + print "opt = $opt\n"; + } +} + +foreach $samp (keys %samp) { + if (!defined($opt{$samp})) { + print "samp = $samp\n"; + } +} diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl new file mode 100755 index 00000000000..e1c62eeb88f --- /dev/null +++ b/tools/testing/ktest/ktest.pl @@ -0,0 +1,2023 @@ +#!/usr/bin/perl -w +# +# Copywrite 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc. +# Licensed under the terms of the GNU GPL License version 2 +# + +use strict; +use IPC::Open2; +use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK); +use File::Path qw(mkpath); +use File::Copy qw(cp); +use FileHandle; + +my $VERSION = "0.2"; + +$| = 1; + +my %opt; +my %repeat_tests; +my %repeats; +my %default; + +#default opts +$default{"NUM_TESTS"} = 1; +$default{"REBOOT_TYPE"} = "grub"; +$default{"TEST_TYPE"} = "test"; +$default{"BUILD_TYPE"} = "randconfig"; +$default{"MAKE_CMD"} = "make"; +$default{"TIMEOUT"} = 120; +$default{"TMP_DIR"} = "/tmp/ktest"; +$default{"SLEEP_TIME"} = 60; # sleep time between tests +$default{"BUILD_NOCLEAN"} = 0; +$default{"REBOOT_ON_ERROR"} = 0; +$default{"POWEROFF_ON_ERROR"} = 0; +$default{"REBOOT_ON_SUCCESS"} = 1; +$default{"POWEROFF_ON_SUCCESS"} = 0; +$default{"BUILD_OPTIONS"} = ""; +$default{"BISECT_SLEEP_TIME"} = 60; # sleep time between bisects +$default{"CLEAR_LOG"} = 0; +$default{"SUCCESS_LINE"} = "login:"; +$default{"BOOTED_TIMEOUT"} = 1; +$default{"DIE_ON_FAILURE"} = 1; +$default{"SSH_EXEC"} = "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND"; +$default{"SCP_TO_TARGET"} = "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE"; +$default{"REBOOT"} = "ssh \$SSH_USER\@\$MACHINE reboot"; +$default{"STOP_AFTER_SUCCESS"} = 10; +$default{"STOP_AFTER_FAILURE"} = 60; +$default{"LOCALVERSION"} = "-test"; + +my $ktest_config; +my $version; +my $machine; +my $ssh_user; +my $tmpdir; +my $builddir; +my $outputdir; +my $output_config; +my $test_type; +my $build_type; +my $build_options; +my $reboot_type; +my $reboot_script; +my $power_cycle; +my $reboot; +my $reboot_on_error; +my $poweroff_on_error; +my $die_on_failure; +my $powercycle_after_reboot; +my $poweroff_after_halt; +my $ssh_exec; +my $scp_to_target; +my $power_off; +my $grub_menu; +my $grub_number; +my $target; +my $make; +my $post_install; +my $noclean; +my $minconfig; +my $addconfig; +my $in_bisect = 0; +my $bisect_bad = ""; +my $reverse_bisect; +my $in_patchcheck = 0; +my $run_test; +my $redirect; +my $buildlog; +my $dmesg; +my $monitor_fp; +my $monitor_pid; +my $monitor_cnt = 0; +my $sleep_time; +my $bisect_sleep_time; +my $store_failures; +my $timeout; +my $booted_timeout; +my $console; +my $success_line; +my $stop_after_success; +my $stop_after_failure; +my $build_target; +my $target_image; +my $localversion; +my $iteration = 0; +my $successes = 0; + +my %entered_configs; +my %config_help; + +$config_help{"MACHINE"} = << "EOF" + The machine hostname that you will test. +EOF + ; +$config_help{"SSH_USER"} = << "EOF" + The box is expected to have ssh on normal bootup, provide the user + (most likely root, since you need privileged operations) +EOF + ; +$config_help{"BUILD_DIR"} = << "EOF" + The directory that contains the Linux source code (full path). +EOF + ; +$config_help{"OUTPUT_DIR"} = << "EOF" + The directory that the objects will be built (full path). + (can not be same as BUILD_DIR) +EOF + ; +$config_help{"BUILD_TARGET"} = << "EOF" + The location of the compiled file to copy to the target. + (relative to OUTPUT_DIR) +EOF + ; +$config_help{"TARGET_IMAGE"} = << "EOF" + The place to put your image on the test machine. +EOF + ; +$config_help{"POWER_CYCLE"} = << "EOF" + A script or command to reboot the box. + + Here is a digital loggers power switch example + POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin\@power/outlet?5=CCL' + + Here is an example to reboot a virtual box on the current host + with the name "Guest". + POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest +EOF + ; +$config_help{"CONSOLE"} = << "EOF" + The script or command that reads the console + + If you use ttywatch server, something like the following would work. +CONSOLE = nc -d localhost 3001 + + For a virtual machine with guest name "Guest". +CONSOLE = virsh console Guest +EOF + ; +$config_help{"LOCALVERSION"} = << "EOF" + Required version ending to differentiate the test + from other linux builds on the system. +EOF + ; +$config_help{"REBOOT_TYPE"} = << "EOF" + Way to reboot the box to the test kernel. + Only valid options so far are "grub" and "script". + + If you specify grub, it will assume grub version 1 + and will search in /boot/grub/menu.lst for the title \$GRUB_MENU + and select that target to reboot to the kernel. If this is not + your setup, then specify "script" and have a command or script + specified in REBOOT_SCRIPT to boot to the target. + + The entry in /boot/grub/menu.lst must be entered in manually. + The test will not modify that file. +EOF + ; +$config_help{"GRUB_MENU"} = << "EOF" + The grub title name for the test kernel to boot + (Only mandatory if REBOOT_TYPE = grub) + + Note, ktest.pl will not update the grub menu.lst, you need to + manually add an option for the test. ktest.pl will search + the grub menu.lst for this option to find what kernel to + reboot into. + + For example, if in the /boot/grub/menu.lst the test kernel title has: + title Test Kernel + kernel vmlinuz-test + GRUB_MENU = Test Kernel +EOF + ; +$config_help{"REBOOT_SCRIPT"} = << "EOF" + A script to reboot the target into the test kernel + (Only mandatory if REBOOT_TYPE = script) +EOF + ; + + +sub get_ktest_config { + my ($config) = @_; + + return if (defined($opt{$config})); + + if (defined($config_help{$config})) { + print "\n"; + print $config_help{$config}; + } + + for (;;) { + print "$config = "; + if (defined($default{$config})) { + print "\[$default{$config}\] "; + } + $entered_configs{$config} = <STDIN>; + $entered_configs{$config} =~ s/^\s*(.*\S)\s*$/$1/; + if ($entered_configs{$config} =~ /^\s*$/) { + if ($default{$config}) { + $entered_configs{$config} = $default{$config}; + } else { + print "Your answer can not be blank\n"; + next; + } + } + last; + } +} + +sub get_ktest_configs { + get_ktest_config("MACHINE"); + get_ktest_config("SSH_USER"); + get_ktest_config("BUILD_DIR"); + get_ktest_config("OUTPUT_DIR"); + get_ktest_config("BUILD_TARGET"); + get_ktest_config("TARGET_IMAGE"); + get_ktest_config("POWER_CYCLE"); + get_ktest_config("CONSOLE"); + get_ktest_config("LOCALVERSION"); + + my $rtype = $opt{"REBOOT_TYPE"}; + + if (!defined($rtype)) { + if (!defined($opt{"GRUB_MENU"})) { + get_ktest_config("REBOOT_TYPE"); + $rtype = $entered_configs{"REBOOT_TYPE"}; + } else { + $rtype = "grub"; + } + } + + if ($rtype eq "grub") { + get_ktest_config("GRUB_MENU"); + } else { + get_ktest_config("REBOOT_SCRIPT"); + } +} + +sub set_value { + my ($lvalue, $rvalue) = @_; + + if (defined($opt{$lvalue})) { + die "Error: Option $lvalue defined more than once!\n"; + } + if ($rvalue =~ /^\s*$/) { + delete $opt{$lvalue}; + } else { + $opt{$lvalue} = $rvalue; + } +} + +sub read_config { + my ($config) = @_; + + open(IN, $config) || die "can't read file $config"; + + my $name = $config; + $name =~ s,.*/(.*),$1,; + + my $test_num = 0; + my $default = 1; + my $repeat = 1; + my $num_tests_set = 0; + my $skip = 0; + my $rest; + + while (<IN>) { + + # ignore blank lines and comments + next if (/^\s*$/ || /\s*\#/); + + if (/^\s*TEST_START(.*)/) { + + $rest = $1; + + if ($num_tests_set) { + die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n"; + } + + my $old_test_num = $test_num; + my $old_repeat = $repeat; + + $test_num += $repeat; + $default = 0; + $repeat = 1; + + if ($rest =~ /\s+SKIP(.*)/) { + $rest = $1; + $skip = 1; + } else { + $skip = 0; + } + + if ($rest =~ /\s+ITERATE\s+(\d+)(.*)$/) { + $repeat = $1; + $rest = $2; + $repeat_tests{"$test_num"} = $repeat; + } + + if ($rest =~ /\s+SKIP(.*)/) { + $rest = $1; + $skip = 1; + } + + if ($rest !~ /^\s*$/) { + die "$name: $.: Gargbage found after TEST_START\n$_"; + } + + if ($skip) { + $test_num = $old_test_num; + $repeat = $old_repeat; + } + + } elsif (/^\s*DEFAULTS(.*)$/) { + $default = 1; + + $rest = $1; + + if ($rest =~ /\s+SKIP(.*)/) { + $rest = $1; + $skip = 1; + } else { + $skip = 0; + } + + if ($rest !~ /^\s*$/) { + die "$name: $.: Gargbage found after DEFAULTS\n$_"; + } + + } elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) { + + next if ($skip); + + my $lvalue = $1; + my $rvalue = $2; + + if (!$default && + ($lvalue eq "NUM_TESTS" || + $lvalue eq "LOG_FILE" || + $lvalue eq "CLEAR_LOG")) { + die "$name: $.: $lvalue must be set in DEFAULTS section\n"; + } + + if ($lvalue eq "NUM_TESTS") { + if ($test_num) { + die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n"; + } + if (!$default) { + die "$name: $.: NUM_TESTS must be set in default section\n"; + } + $num_tests_set = 1; + } + + if ($default || $lvalue =~ /\[\d+\]$/) { + set_value($lvalue, $rvalue); + } else { + my $val = "$lvalue\[$test_num\]"; + set_value($val, $rvalue); + + if ($repeat > 1) { + $repeats{$val} = $repeat; + } + } + } else { + die "$name: $.: Garbage found in config\n$_"; + } + } + + close(IN); + + if ($test_num) { + $test_num += $repeat - 1; + $opt{"NUM_TESTS"} = $test_num; + } + + # make sure we have all mandatory configs + get_ktest_configs; + + # set any defaults + + foreach my $default (keys %default) { + if (!defined($opt{$default})) { + $opt{$default} = $default{$default}; + } + } +} + +sub _logit { + if (defined($opt{"LOG_FILE"})) { + open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; + print OUT @_; + close(OUT); + } +} + +sub logit { + if (defined($opt{"LOG_FILE"})) { + _logit @_; + } else { + print @_; + } +} + +sub doprint { + print @_; + _logit @_; +} + +sub run_command; + +sub reboot { + # try to reboot normally + if (run_command $reboot) { + if (defined($powercycle_after_reboot)) { + sleep $powercycle_after_reboot; + run_command "$power_cycle"; + } + } else { + # nope? power cycle it. + run_command "$power_cycle"; + } +} + +sub do_not_reboot { + my $i = $iteration; + + return $test_type eq "build" || + ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") || + ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build"); +} + +sub dodie { + doprint "CRITICAL FAILURE... ", @_, "\n"; + + my $i = $iteration; + + if ($reboot_on_error && !do_not_reboot) { + + doprint "REBOOTING\n"; + reboot; + + } elsif ($poweroff_on_error && defined($power_off)) { + doprint "POWERING OFF\n"; + `$power_off`; + } + + die @_, "\n"; +} + +sub open_console { + my ($fp) = @_; + + my $flags; + + my $pid = open($fp, "$console|") or + dodie "Can't open console $console"; + + $flags = fcntl($fp, F_GETFL, 0) or + dodie "Can't get flags for the socket: $!"; + $flags = fcntl($fp, F_SETFL, $flags | O_NONBLOCK) or + dodie "Can't set flags for the socket: $!"; + + return $pid; +} + +sub close_console { + my ($fp, $pid) = @_; + + doprint "kill child process $pid\n"; + kill 2, $pid; + + print "closing!\n"; + close($fp); +} + +sub start_monitor { + if ($monitor_cnt++) { + return; + } + $monitor_fp = \*MONFD; + $monitor_pid = open_console $monitor_fp; + + return; + + open(MONFD, "Stop perl from warning about single use of MONFD"); +} + +sub end_monitor { + if (--$monitor_cnt) { + return; + } + close_console($monitor_fp, $monitor_pid); +} + +sub wait_for_monitor { + my ($time) = @_; + my $line; + + doprint "** Wait for monitor to settle down **\n"; + + # read the monitor and wait for the system to calm down + do { + $line = wait_for_input($monitor_fp, $time); + print "$line" if (defined($line)); + } while (defined($line)); + print "** Monitor flushed **\n"; +} + +sub fail { + + if ($die_on_failure) { + dodie @_; + } + + doprint "FAILED\n"; + + my $i = $iteration; + + # no need to reboot for just building. + if (!do_not_reboot) { + doprint "REBOOTING\n"; + reboot; + start_monitor; + wait_for_monitor $sleep_time; + end_monitor; + } + + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "KTEST RESULT: TEST $i Failed: ", @_, "\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + + return 1 if (!defined($store_failures)); + + my @t = localtime; + my $date = sprintf "%04d%02d%02d%02d%02d%02d", + 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; + + my $type = $build_type; + if ($type =~ /useconfig/) { + $type = "useconfig"; + } + + my $dir = "$machine-$test_type-$type-fail-$date"; + my $faildir = "$store_failures/$dir"; + + if (!-d $faildir) { + mkpath($faildir) or + die "can't create $faildir"; + } + if (-f "$output_config") { + cp "$output_config", "$faildir/config" or + die "failed to copy .config"; + } + if (-f $buildlog) { + cp $buildlog, "$faildir/buildlog" or + die "failed to move $buildlog"; + } + if (-f $dmesg) { + cp $dmesg, "$faildir/dmesg" or + die "failed to move $dmesg"; + } + + doprint "*** Saved info to $faildir ***\n"; + + return 1; +} + +sub run_command { + my ($command) = @_; + my $dolog = 0; + my $dord = 0; + my $pid; + + $command =~ s/\$SSH_USER/$ssh_user/g; + $command =~ s/\$MACHINE/$machine/g; + + doprint("$command ... "); + + $pid = open(CMD, "$command 2>&1 |") or + (fail "unable to exec $command" and return 0); + + if (defined($opt{"LOG_FILE"})) { + open(LOG, ">>$opt{LOG_FILE}") or + dodie "failed to write to log"; + $dolog = 1; + } + + if (defined($redirect)) { + open (RD, ">$redirect") or + dodie "failed to write to redirect $redirect"; + $dord = 1; + } + + while (<CMD>) { + print LOG if ($dolog); + print RD if ($dord); + } + + waitpid($pid, 0); + my $failed = $?; + + close(CMD); + close(LOG) if ($dolog); + close(RD) if ($dord); + + if ($failed) { + doprint "FAILED!\n"; + } else { + doprint "SUCCESS\n"; + } + + return !$failed; +} + +sub run_ssh { + my ($cmd) = @_; + my $cp_exec = $ssh_exec; + + $cp_exec =~ s/\$SSH_COMMAND/$cmd/g; + return run_command "$cp_exec"; +} + +sub run_scp { + my ($src, $dst) = @_; + my $cp_scp = $scp_to_target; + + $cp_scp =~ s/\$SRC_FILE/$src/g; + $cp_scp =~ s/\$DST_FILE/$dst/g; + + return run_command "$cp_scp"; +} + +sub get_grub_index { + + if ($reboot_type ne "grub") { + return; + } + return if (defined($grub_number)); + + doprint "Find grub menu ... "; + $grub_number = -1; + + my $ssh_grub = $ssh_exec; + $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g; + + open(IN, "$ssh_grub |") + or die "unable to get menu.lst"; + + while (<IN>) { + if (/^\s*title\s+$grub_menu\s*$/) { + $grub_number++; + last; + } elsif (/^\s*title\s/) { + $grub_number++; + } + } + close(IN); + + die "Could not find '$grub_menu' in /boot/grub/menu on $machine" + if ($grub_number < 0); + doprint "$grub_number\n"; +} + +sub wait_for_input +{ + my ($fp, $time) = @_; + my $rin; + my $ready; + my $line; + my $ch; + + if (!defined($time)) { + $time = $timeout; + } + + $rin = ''; + vec($rin, fileno($fp), 1) = 1; + $ready = select($rin, undef, undef, $time); + + $line = ""; + + # try to read one char at a time + while (sysread $fp, $ch, 1) { + $line .= $ch; + last if ($ch eq "\n"); + } + + if (!length($line)) { + return undef; + } + + return $line; +} + +sub reboot_to { + if ($reboot_type eq "grub") { + run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch; reboot)'"; + return; + } + + run_command "$reboot_script"; +} + +sub get_sha1 { + my ($commit) = @_; + + doprint "git rev-list --max-count=1 $commit ... "; + my $sha1 = `git rev-list --max-count=1 $commit`; + my $ret = $?; + + logit $sha1; + + if ($ret) { + doprint "FAILED\n"; + dodie "Failed to get git $commit"; + } + + print "SUCCESS\n"; + + chomp $sha1; + + return $sha1; +} + +sub monitor { + my $booted = 0; + my $bug = 0; + my $skip_call_trace = 0; + my $loops; + + wait_for_monitor 5; + + my $line; + my $full_line = ""; + + open(DMESG, "> $dmesg") or + die "unable to write to $dmesg"; + + reboot_to; + + my $success_start; + my $failure_start; + + for (;;) { + + if ($booted) { + $line = wait_for_input($monitor_fp, $booted_timeout); + } else { + $line = wait_for_input($monitor_fp); + } + + last if (!defined($line)); + + doprint $line; + print DMESG $line; + + # we are not guaranteed to get a full line + $full_line .= $line; + + if ($full_line =~ /$success_line/) { + $booted = 1; + $success_start = time; + } + + if ($booted && defined($stop_after_success) && + $stop_after_success >= 0) { + my $now = time; + if ($now - $success_start >= $stop_after_success) { + doprint "Test forced to stop after $stop_after_success seconds after success\n"; + last; + } + } + + if ($full_line =~ /\[ backtrace testing \]/) { + $skip_call_trace = 1; + } + + if ($full_line =~ /call trace:/i) { + if (!$skip_call_trace) { + $bug = 1; + $failure_start = time; + } + } + + if ($bug && defined($stop_after_failure) && + $stop_after_failure >= 0) { + my $now = time; + if ($now - $failure_start >= $stop_after_failure) { + doprint "Test forced to stop after $stop_after_failure seconds after failure\n"; + last; + } + } + + if ($full_line =~ /\[ end of backtrace testing \]/) { + $skip_call_trace = 0; + } + + if ($full_line =~ /Kernel panic -/) { + $bug = 1; + } + + if ($line =~ /\n/) { + $full_line = ""; + } + } + + close(DMESG); + + if ($bug) { + return 0 if ($in_bisect); + fail "failed - got a bug report" and return 0; + } + + if (!$booted) { + return 0 if ($in_bisect); + fail "failed - never got a boot prompt." and return 0; + } + + return 1; +} + +sub install { + + run_scp "$outputdir/$build_target", "$target_image" or + dodie "failed to copy image"; + + my $install_mods = 0; + + # should we process modules? + $install_mods = 0; + open(IN, "$output_config") or dodie("Can't read config file"); + while (<IN>) { + if (/CONFIG_MODULES(=y)?/) { + $install_mods = 1 if (defined($1)); + last; + } + } + close(IN); + + if (!$install_mods) { + doprint "No modules needed\n"; + return; + } + + run_command "$make INSTALL_MOD_PATH=$tmpdir modules_install" or + dodie "Failed to install modules"; + + my $modlib = "/lib/modules/$version"; + my $modtar = "ktest-mods.tar.bz2"; + + run_ssh "rm -rf $modlib" or + dodie "failed to remove old mods: $modlib"; + + # would be nice if scp -r did not follow symbolic links + run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or + dodie "making tarball"; + + run_scp "$tmpdir/$modtar", "/tmp" or + dodie "failed to copy modules"; + + unlink "$tmpdir/$modtar"; + + run_ssh "'(cd / && tar xf /tmp/$modtar)'" or + dodie "failed to tar modules"; + + run_ssh "rm -f /tmp/$modtar"; + + return if (!defined($post_install)); + + my $cp_post_install = $post_install; + $cp_post_install = s/\$KERNEL_VERSION/$version/g; + run_command "$cp_post_install" or + dodie "Failed to run post install"; +} + +sub check_buildlog { + my ($patch) = @_; + + my @files = `git show $patch | diffstat -l`; + + open(IN, "git show $patch |") or + dodie "failed to show $patch"; + while (<IN>) { + if (m,^--- a/(.*),) { + chomp $1; + $files[$#files] = $1; + } + } + close(IN); + + open(IN, $buildlog) or dodie "Can't open $buildlog"; + while (<IN>) { + if (/^\s*(.*?):.*(warning|error)/) { + my $err = $1; + foreach my $file (@files) { + my $fullpath = "$builddir/$file"; + if ($file eq $err || $fullpath eq $err) { + fail "$file built with warnings" and return 0; + } + } + } + } + close(IN); + + return 1; +} + +sub build { + my ($type) = @_; + my $defconfig = ""; + + unlink $buildlog; + + if ($type =~ /^useconfig:(.*)/) { + run_command "cp $1 $output_config" or + dodie "could not copy $1 to .config"; + + $type = "oldconfig"; + } + + # old config can ask questions + if ($type eq "oldconfig") { + $type = "oldnoconfig"; + + # allow for empty configs + run_command "touch $output_config"; + + run_command "mv $output_config $outputdir/config_temp" or + dodie "moving .config"; + + if (!$noclean && !run_command "$make mrproper") { + dodie "make mrproper"; + } + + run_command "mv $outputdir/config_temp $output_config" or + dodie "moving config_temp"; + + } elsif (!$noclean) { + unlink "$output_config"; + run_command "$make mrproper" or + dodie "make mrproper"; + } + + # add something to distinguish this build + open(OUT, "> $outputdir/localversion") or dodie("Can't make localversion file"); + print OUT "$localversion\n"; + close(OUT); + + if (defined($minconfig)) { + $defconfig = "KCONFIG_ALLCONFIG=$minconfig"; + } + + run_command "$defconfig $make $type" or + dodie "failed make config"; + + $redirect = "$buildlog"; + if (!run_command "$make $build_options") { + undef $redirect; + # bisect may need this to pass + return 0 if ($in_bisect); + fail "failed build" and return 0; + } + undef $redirect; + + return 1; +} + +sub halt { + if (!run_ssh "halt" or defined($power_off)) { + if (defined($poweroff_after_halt)) { + sleep $poweroff_after_halt; + run_command "$power_off"; + } + } else { + # nope? the zap it! + run_command "$power_off"; + } +} + +sub success { + my ($i) = @_; + + $successes++; + + doprint "\n\n*******************************************\n"; + doprint "*******************************************\n"; + doprint "KTEST RESULT: TEST $i SUCCESS!!!! **\n"; + doprint "*******************************************\n"; + doprint "*******************************************\n"; + + if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) { + doprint "Reboot and wait $sleep_time seconds\n"; + reboot; + start_monitor; + wait_for_monitor $sleep_time; + end_monitor; + } +} + +sub get_version { + # get the release name + doprint "$make kernelrelease ... "; + $version = `$make kernelrelease | tail -1`; + chomp($version); + doprint "$version\n"; +} + +sub child_run_test { + my $failed = 0; + + # child should have no power + $reboot_on_error = 0; + $poweroff_on_error = 0; + $die_on_failure = 1; + + run_command $run_test or $failed = 1; + exit $failed; +} + +my $child_done; + +sub child_finished { + $child_done = 1; +} + +sub do_run_test { + my $child_pid; + my $child_exit; + my $line; + my $full_line; + my $bug = 0; + + wait_for_monitor 1; + + doprint "run test $run_test\n"; + + $child_done = 0; + + $SIG{CHLD} = qw(child_finished); + + $child_pid = fork; + + child_run_test if (!$child_pid); + + $full_line = ""; + + do { + $line = wait_for_input($monitor_fp, 1); + if (defined($line)) { + + # we are not guaranteed to get a full line + $full_line .= $line; + + if ($full_line =~ /call trace:/i) { + $bug = 1; + } + + if ($full_line =~ /Kernel panic -/) { + $bug = 1; + } + + if ($line =~ /\n/) { + $full_line = ""; + } + } + } while (!$child_done && !$bug); + + if ($bug) { + doprint "Detected kernel crash!\n"; + # kill the child with extreme prejudice + kill 9, $child_pid; + } + + waitpid $child_pid, 0; + $child_exit = $?; + + if ($bug || $child_exit) { + return 0 if $in_bisect; + fail "test failed" and return 0; + } + return 1; +} + +sub run_git_bisect { + my ($command) = @_; + + doprint "$command ... "; + + my $output = `$command 2>&1`; + my $ret = $?; + + logit $output; + + if ($ret) { + doprint "FAILED\n"; + dodie "Failed to git bisect"; + } + + doprint "SUCCESS\n"; + if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) { + doprint "$1 [$2]\n"; + } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) { + $bisect_bad = $1; + doprint "Found bad commit... $1\n"; + return 0; + } else { + # we already logged it, just print it now. + print $output; + } + + return 1; +} + +# returns 1 on success, 0 on failure +sub run_bisect_test { + my ($type, $buildtype) = @_; + + my $failed = 0; + my $result; + my $output; + my $ret; + + $in_bisect = 1; + + build $buildtype or $failed = 1; + + if ($type ne "build") { + dodie "Failed on build" if $failed; + + # Now boot the box + get_grub_index; + get_version; + install; + + start_monitor; + monitor or $failed = 1; + + if ($type ne "boot") { + dodie "Failed on boot" if $failed; + + do_run_test or $failed = 1; + } + end_monitor; + } + + if ($failed) { + $result = 0; + + # reboot the box to a good kernel + if ($type ne "build") { + doprint "Reboot and sleep $bisect_sleep_time seconds\n"; + reboot; + start_monitor; + wait_for_monitor $bisect_sleep_time; + end_monitor; + } + } else { + $result = 1; + } + $in_bisect = 0; + + return $result; +} + +sub run_bisect { + my ($type) = @_; + my $buildtype = "oldconfig"; + + # We should have a minconfig to use? + if (defined($minconfig)) { + $buildtype = "useconfig:$minconfig"; + } + + my $ret = run_bisect_test $type, $buildtype; + + + # Are we looking for where it worked, not failed? + if ($reverse_bisect) { + $ret = !$ret; + } + + if ($ret) { + return "good"; + } else { + return "bad"; + } +} + +sub bisect { + my ($i) = @_; + + my $result; + + die "BISECT_GOOD[$i] not defined\n" if (!defined($opt{"BISECT_GOOD[$i]"})); + die "BISECT_BAD[$i] not defined\n" if (!defined($opt{"BISECT_BAD[$i]"})); + die "BISECT_TYPE[$i] not defined\n" if (!defined($opt{"BISECT_TYPE[$i]"})); + + my $good = $opt{"BISECT_GOOD[$i]"}; + my $bad = $opt{"BISECT_BAD[$i]"}; + my $type = $opt{"BISECT_TYPE[$i]"}; + my $start = $opt{"BISECT_START[$i]"}; + my $replay = $opt{"BISECT_REPLAY[$i]"}; + + # convert to true sha1's + $good = get_sha1($good); + $bad = get_sha1($bad); + + if (defined($opt{"BISECT_REVERSE[$i]"}) && + $opt{"BISECT_REVERSE[$i]"} == 1) { + doprint "Performing a reverse bisect (bad is good, good is bad!)\n"; + $reverse_bisect = 1; + } else { + $reverse_bisect = 0; + } + + # Can't have a test without having a test to run + if ($type eq "test" && !defined($run_test)) { + $type = "boot"; + } + + my $check = $opt{"BISECT_CHECK[$i]"}; + if (defined($check) && $check ne "0") { + + # get current HEAD + my $head = get_sha1("HEAD"); + + if ($check ne "good") { + doprint "TESTING BISECT BAD [$bad]\n"; + run_command "git checkout $bad" or + die "Failed to checkout $bad"; + + $result = run_bisect $type; + + if ($result ne "bad") { + fail "Tested BISECT_BAD [$bad] and it succeeded" and return 0; + } + } + + if ($check ne "bad") { + doprint "TESTING BISECT GOOD [$good]\n"; + run_command "git checkout $good" or + die "Failed to checkout $good"; + + $result = run_bisect $type; + + if ($result ne "good") { + fail "Tested BISECT_GOOD [$good] and it failed" and return 0; + } + } + + # checkout where we started + run_command "git checkout $head" or + die "Failed to checkout $head"; + } + + run_command "git bisect start" or + dodie "could not start bisect"; + + run_command "git bisect good $good" or + dodie "could not set bisect good to $good"; + + run_git_bisect "git bisect bad $bad" or + dodie "could not set bisect bad to $bad"; + + if (defined($replay)) { + run_command "git bisect replay $replay" or + dodie "failed to run replay"; + } + + if (defined($start)) { + run_command "git checkout $start" or + dodie "failed to checkout $start"; + } + + my $test; + do { + $result = run_bisect $type; + $test = run_git_bisect "git bisect $result"; + } while ($test); + + run_command "git bisect log" or + dodie "could not capture git bisect log"; + + run_command "git bisect reset" or + dodie "could not reset git bisect"; + + doprint "Bad commit was [$bisect_bad]\n"; + + success $i; +} + +my %config_ignore; +my %config_set; + +my %config_list; +my %null_config; + +my %dependency; + +sub process_config_ignore { + my ($config) = @_; + + open (IN, $config) + or dodie "Failed to read $config"; + + while (<IN>) { + if (/^(.*?(CONFIG\S*)(=.*| is not set))/) { + $config_ignore{$2} = $1; + } + } + + close(IN); +} + +sub read_current_config { + my ($config_ref) = @_; + + %{$config_ref} = (); + undef %{$config_ref}; + + my @key = keys %{$config_ref}; + if ($#key >= 0) { + print "did not delete!\n"; + exit; + } + open (IN, "$output_config"); + + while (<IN>) { + if (/^(CONFIG\S+)=(.*)/) { + ${$config_ref}{$1} = $2; + } + } + close(IN); +} + +sub get_dependencies { + my ($config) = @_; + + my $arr = $dependency{$config}; + if (!defined($arr)) { + return (); + } + + my @deps = @{$arr}; + + foreach my $dep (@{$arr}) { + print "ADD DEP $dep\n"; + @deps = (@deps, get_dependencies $dep); + } + + return @deps; +} + +sub create_config { + my @configs = @_; + + open(OUT, ">$output_config") or dodie "Can not write to $output_config"; + + foreach my $config (@configs) { + print OUT "$config_set{$config}\n"; + my @deps = get_dependencies $config; + foreach my $dep (@deps) { + print OUT "$config_set{$dep}\n"; + } + } + + foreach my $config (keys %config_ignore) { + print OUT "$config_ignore{$config}\n"; + } + close(OUT); + +# exit; + run_command "$make oldnoconfig" or + dodie "failed make config oldconfig"; + +} + +sub compare_configs { + my (%a, %b) = @_; + + foreach my $item (keys %a) { + if (!defined($b{$item})) { + print "diff $item\n"; + return 1; + } + delete $b{$item}; + } + + my @keys = keys %b; + if ($#keys) { + print "diff2 $keys[0]\n"; + } + return -1 if ($#keys >= 0); + + return 0; +} + +sub run_config_bisect_test { + my ($type) = @_; + + return run_bisect_test $type, "oldconfig"; +} + +sub process_passed { + my (%configs) = @_; + + doprint "These configs had no failure: (Enabling them for further compiles)\n"; + # Passed! All these configs are part of a good compile. + # Add them to the min options. + foreach my $config (keys %configs) { + if (defined($config_list{$config})) { + doprint " removing $config\n"; + $config_ignore{$config} = $config_list{$config}; + delete $config_list{$config}; + } + } + doprint "config copied to $outputdir/config_good\n"; + run_command "cp -f $output_config $outputdir/config_good"; +} + +sub process_failed { + my ($config) = @_; + + doprint "\n\n***************************************\n"; + doprint "Found bad config: $config\n"; + doprint "***************************************\n\n"; +} + +sub run_config_bisect { + + my @start_list = keys %config_list; + + if ($#start_list < 0) { + doprint "No more configs to test!!!\n"; + return -1; + } + + doprint "***** RUN TEST ***\n"; + my $type = $opt{"CONFIG_BISECT_TYPE[$iteration]"}; + my $ret; + my %current_config; + + my $count = $#start_list + 1; + doprint " $count configs to test\n"; + + my $half = int($#start_list / 2); + + do { + my @tophalf = @start_list[0 .. $half]; + + create_config @tophalf; + read_current_config \%current_config; + + $count = $#tophalf + 1; + doprint "Testing $count configs\n"; + my $found = 0; + # make sure we test something + foreach my $config (@tophalf) { + if (defined($current_config{$config})) { + logit " $config\n"; + $found = 1; + } + } + if (!$found) { + # try the other half + doprint "Top half produced no set configs, trying bottom half\n"; + @tophalf = @start_list[$half .. $#start_list]; + create_config @tophalf; + read_current_config \%current_config; + foreach my $config (@tophalf) { + if (defined($current_config{$config})) { + logit " $config\n"; + $found = 1; + } + } + if (!$found) { + doprint "Failed: Can't make new config with current configs\n"; + foreach my $config (@start_list) { + doprint " CONFIG: $config\n"; + } + return -1; + } + $count = $#tophalf + 1; + doprint "Testing $count configs\n"; + } + + $ret = run_config_bisect_test $type; + + if ($ret) { + process_passed %current_config; + return 0; + } + + doprint "This config had a failure.\n"; + doprint "Removing these configs that were not set in this config:\n"; + doprint "config copied to $outputdir/config_bad\n"; + run_command "cp -f $output_config $outputdir/config_bad"; + + # A config exists in this group that was bad. + foreach my $config (keys %config_list) { + if (!defined($current_config{$config})) { + doprint " removing $config\n"; + delete $config_list{$config}; + } + } + + @start_list = @tophalf; + + if ($#start_list == 0) { + process_failed $start_list[0]; + return 1; + } + + # remove half the configs we are looking at and see if + # they are good. + $half = int($#start_list / 2); + } while ($half > 0); + + # we found a single config, try it again + my @tophalf = @start_list[0 .. 0]; + + $ret = run_config_bisect_test $type; + if ($ret) { + process_passed %current_config; + return 0; + } + + process_failed $start_list[0]; + return 1; +} + +sub config_bisect { + my ($i) = @_; + + my $start_config = $opt{"CONFIG_BISECT[$i]"}; + + my $tmpconfig = "$tmpdir/use_config"; + + # Make the file with the bad config and the min config + if (defined($minconfig)) { + # read the min config for things to ignore + run_command "cp $minconfig $tmpconfig" or + dodie "failed to copy $minconfig to $tmpconfig"; + } else { + unlink $tmpconfig; + } + + # Add other configs + if (defined($addconfig)) { + run_command "cat $addconfig >> $tmpconfig" or + dodie "failed to append $addconfig"; + } + + my $defconfig = ""; + if (-f $tmpconfig) { + $defconfig = "KCONFIG_ALLCONFIG=$tmpconfig"; + process_config_ignore $tmpconfig; + } + + # now process the start config + run_command "cp $start_config $output_config" or + dodie "failed to copy $start_config to $output_config"; + + # read directly what we want to check + my %config_check; + open (IN, $output_config) + or dodie "faied to open $output_config"; + + while (<IN>) { + if (/^((CONFIG\S*)=.*)/) { + $config_check{$2} = $1; + } + } + close(IN); + + # Now run oldconfig with the minconfig (and addconfigs) + run_command "$defconfig $make oldnoconfig" or + dodie "failed make config oldconfig"; + + # check to see what we lost (or gained) + open (IN, $output_config) + or dodie "Failed to read $start_config"; + + my %removed_configs; + my %added_configs; + + while (<IN>) { + if (/^((CONFIG\S*)=.*)/) { + # save off all options + $config_set{$2} = $1; + if (defined($config_check{$2})) { + if (defined($config_ignore{$2})) { + $removed_configs{$2} = $1; + } else { + $config_list{$2} = $1; + } + } elsif (!defined($config_ignore{$2})) { + $added_configs{$2} = $1; + $config_list{$2} = $1; + } + } + } + close(IN); + + my @confs = keys %removed_configs; + if ($#confs >= 0) { + doprint "Configs overridden by default configs and removed from check:\n"; + foreach my $config (@confs) { + doprint " $config\n"; + } + } + @confs = keys %added_configs; + if ($#confs >= 0) { + doprint "Configs appearing in make oldconfig and added:\n"; + foreach my $config (@confs) { + doprint " $config\n"; + } + } + + my %config_test; + my $once = 0; + + # Sometimes kconfig does weird things. We must make sure + # that the config we autocreate has everything we need + # to test, otherwise we may miss testing configs, or + # may not be able to create a new config. + # Here we create a config with everything set. + create_config (keys %config_list); + read_current_config \%config_test; + foreach my $config (keys %config_list) { + if (!defined($config_test{$config})) { + if (!$once) { + $once = 1; + doprint "Configs not produced by kconfig (will not be checked):\n"; + } + doprint " $config\n"; + delete $config_list{$config}; + } + } + my $ret; + do { + $ret = run_config_bisect; + } while (!$ret); + + return $ret if ($ret < 0); + + success $i; +} + +sub patchcheck { + my ($i) = @_; + + die "PATCHCHECK_START[$i] not defined\n" + if (!defined($opt{"PATCHCHECK_START[$i]"})); + die "PATCHCHECK_TYPE[$i] not defined\n" + if (!defined($opt{"PATCHCHECK_TYPE[$i]"})); + + my $start = $opt{"PATCHCHECK_START[$i]"}; + + my $end = "HEAD"; + if (defined($opt{"PATCHCHECK_END[$i]"})) { + $end = $opt{"PATCHCHECK_END[$i]"}; + } + + # Get the true sha1's since we can use things like HEAD~3 + $start = get_sha1($start); + $end = get_sha1($end); + + my $type = $opt{"PATCHCHECK_TYPE[$i]"}; + + # Can't have a test without having a test to run + if ($type eq "test" && !defined($run_test)) { + $type = "boot"; + } + + open (IN, "git log --pretty=oneline $end|") or + dodie "could not get git list"; + + my @list; + + while (<IN>) { + chomp; + $list[$#list+1] = $_; + last if (/^$start/); + } + close(IN); + + if ($list[$#list] !~ /^$start/) { + fail "SHA1 $start not found"; + } + + # go backwards in the list + @list = reverse @list; + + my $save_clean = $noclean; + + $in_patchcheck = 1; + foreach my $item (@list) { + my $sha1 = $item; + $sha1 =~ s/^([[:xdigit:]]+).*/$1/; + + doprint "\nProcessing commit $item\n\n"; + + run_command "git checkout $sha1" or + die "Failed to checkout $sha1"; + + # only clean on the first and last patch + if ($item eq $list[0] || + $item eq $list[$#list]) { + $noclean = $save_clean; + } else { + $noclean = 1; + } + + if (defined($minconfig)) { + build "useconfig:$minconfig" or return 0; + } else { + # ?? no config to use? + build "oldconfig" or return 0; + } + + check_buildlog $sha1 or return 0; + + next if ($type eq "build"); + + get_grub_index; + get_version; + install; + + my $failed = 0; + + start_monitor; + monitor or $failed = 1; + + if (!$failed && $type ne "boot"){ + do_run_test or $failed = 1; + } + end_monitor; + return 0 if ($failed); + + } + $in_patchcheck = 0; + success $i; + + return 1; +} + +$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl config-file\n"; + +if ($#ARGV == 0) { + $ktest_config = $ARGV[0]; + if (! -f $ktest_config) { + print "$ktest_config does not exist.\n"; + my $ans; + for (;;) { + print "Create it? [Y/n] "; + $ans = <STDIN>; + chomp $ans; + if ($ans =~ /^\s*$/) { + $ans = "y"; + } + last if ($ans =~ /^y$/i || $ans =~ /^n$/i); + print "Please answer either 'y' or 'n'.\n"; + } + if ($ans !~ /^y$/i) { + exit 0; + } + } +} else { + $ktest_config = "ktest.conf"; +} + +if (! -f $ktest_config) { + open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; + print OUT << "EOF" +# Generated by ktest.pl +# +# Define each test with TEST_START +# The config options below it will override the defaults +TEST_START + +DEFAULTS +EOF +; + close(OUT); +} +read_config $ktest_config; + +# Append any configs entered in manually to the config file. +my @new_configs = keys %entered_configs; +if ($#new_configs >= 0) { + print "\nAppending entered in configs to $ktest_config\n"; + open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; + foreach my $config (@new_configs) { + print OUT "$config = $entered_configs{$config}\n"; + $opt{$config} = $entered_configs{$config}; + } +} + +if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) { + unlink $opt{"LOG_FILE"}; +} + +doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; + +for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) { + + if (!$i) { + doprint "DEFAULT OPTIONS:\n"; + } else { + doprint "\nTEST $i OPTIONS"; + if (defined($repeat_tests{$i})) { + $repeat = $repeat_tests{$i}; + doprint " ITERATE $repeat"; + } + doprint "\n"; + } + + foreach my $option (sort keys %opt) { + + if ($option =~ /\[(\d+)\]$/) { + next if ($i != $1); + } else { + next if ($i); + } + + doprint "$option = $opt{$option}\n"; + } +} + +sub set_test_option { + my ($name, $i) = @_; + + my $option = "$name\[$i\]"; + + if (defined($opt{$option})) { + return $opt{$option}; + } + + foreach my $test (keys %repeat_tests) { + if ($i >= $test && + $i < $test + $repeat_tests{$test}) { + $option = "$name\[$test\]"; + if (defined($opt{$option})) { + return $opt{$option}; + } + } + } + + if (defined($opt{$name})) { + return $opt{$name}; + } + + return undef; +} + +# First we need to do is the builds +for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { + + $iteration = $i; + + my $makecmd = set_test_option("MAKE_CMD", $i); + + $machine = set_test_option("MACHINE", $i); + $ssh_user = set_test_option("SSH_USER", $i); + $tmpdir = set_test_option("TMP_DIR", $i); + $outputdir = set_test_option("OUTPUT_DIR", $i); + $builddir = set_test_option("BUILD_DIR", $i); + $test_type = set_test_option("TEST_TYPE", $i); + $build_type = set_test_option("BUILD_TYPE", $i); + $build_options = set_test_option("BUILD_OPTIONS", $i); + $power_cycle = set_test_option("POWER_CYCLE", $i); + $reboot = set_test_option("REBOOT", $i); + $noclean = set_test_option("BUILD_NOCLEAN", $i); + $minconfig = set_test_option("MIN_CONFIG", $i); + $run_test = set_test_option("TEST", $i); + $addconfig = set_test_option("ADD_CONFIG", $i); + $reboot_type = set_test_option("REBOOT_TYPE", $i); + $grub_menu = set_test_option("GRUB_MENU", $i); + $post_install = set_test_option("POST_INSTALL", $i); + $reboot_script = set_test_option("REBOOT_SCRIPT", $i); + $reboot_on_error = set_test_option("REBOOT_ON_ERROR", $i); + $poweroff_on_error = set_test_option("POWEROFF_ON_ERROR", $i); + $die_on_failure = set_test_option("DIE_ON_FAILURE", $i); + $power_off = set_test_option("POWER_OFF", $i); + $powercycle_after_reboot = set_test_option("POWERCYCLE_AFTER_REBOOT", $i); + $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i); + $sleep_time = set_test_option("SLEEP_TIME", $i); + $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i); + $store_failures = set_test_option("STORE_FAILURES", $i); + $timeout = set_test_option("TIMEOUT", $i); + $booted_timeout = set_test_option("BOOTED_TIMEOUT", $i); + $console = set_test_option("CONSOLE", $i); + $success_line = set_test_option("SUCCESS_LINE", $i); + $stop_after_success = set_test_option("STOP_AFTER_SUCCESS", $i); + $stop_after_failure = set_test_option("STOP_AFTER_FAILURE", $i); + $build_target = set_test_option("BUILD_TARGET", $i); + $ssh_exec = set_test_option("SSH_EXEC", $i); + $scp_to_target = set_test_option("SCP_TO_TARGET", $i); + $target_image = set_test_option("TARGET_IMAGE", $i); + $localversion = set_test_option("LOCALVERSION", $i); + + chdir $builddir || die "can't change directory to $builddir"; + + if (!-d $tmpdir) { + mkpath($tmpdir) or + die "can't create $tmpdir"; + } + + $ENV{"SSH_USER"} = $ssh_user; + $ENV{"MACHINE"} = $machine; + + $target = "$ssh_user\@$machine"; + + $buildlog = "$tmpdir/buildlog-$machine"; + $dmesg = "$tmpdir/dmesg-$machine"; + $make = "$makecmd O=$outputdir"; + $output_config = "$outputdir/.config"; + + if ($reboot_type eq "grub") { + dodie "GRUB_MENU not defined" if (!defined($grub_menu)); + } elsif (!defined($reboot_script)) { + dodie "REBOOT_SCRIPT not defined" + } + + my $run_type = $build_type; + if ($test_type eq "patchcheck") { + $run_type = $opt{"PATCHCHECK_TYPE[$i]"}; + } elsif ($test_type eq "bisect") { + $run_type = $opt{"BISECT_TYPE[$i]"}; + } elsif ($test_type eq "config_bisect") { + $run_type = $opt{"CONFIG_BISECT_TYPE[$i]"}; + } + + # mistake in config file? + if (!defined($run_type)) { + $run_type = "ERROR"; + } + + doprint "\n\n"; + doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type\n\n"; + + unlink $dmesg; + unlink $buildlog; + + if (!defined($minconfig)) { + $minconfig = $addconfig; + + } elsif (defined($addconfig)) { + run_command "cat $addconfig $minconfig > $tmpdir/add_config" or + dodie "Failed to create temp config"; + $minconfig = "$tmpdir/add_config"; + } + + my $checkout = $opt{"CHECKOUT[$i]"}; + if (defined($checkout)) { + run_command "git checkout $checkout" or + die "failed to checkout $checkout"; + } + + if ($test_type eq "bisect") { + bisect $i; + next; + } elsif ($test_type eq "config_bisect") { + config_bisect $i; + next; + } elsif ($test_type eq "patchcheck") { + patchcheck $i; + next; + } + + if ($build_type ne "nobuild") { + build $build_type or next; + } + + if ($test_type ne "build") { + get_grub_index; + get_version; + install; + + my $failed = 0; + start_monitor; + monitor or $failed = 1;; + + if (!$failed && $test_type ne "boot" && defined($run_test)) { + do_run_test or $failed = 1; + } + end_monitor; + next if ($failed); + } + + success $i; +} + +if ($opt{"POWEROFF_ON_SUCCESS"}) { + halt; +} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) { + reboot; +} + +doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; + +exit 0; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf new file mode 100644 index 00000000000..3408c594b2d --- /dev/null +++ b/tools/testing/ktest/sample.conf @@ -0,0 +1,622 @@ +# +# Config file for ktest.pl +# +# Note, all paths must be absolute +# + +# Options set in the beginning of the file are considered to be +# default options. These options can be overriden by test specific +# options, with the following exceptions: +# +# LOG_FILE +# CLEAR_LOG +# POWEROFF_ON_SUCCESS +# REBOOT_ON_SUCCESS +# +# Test specific options are set after the label: +# +# TEST_START +# +# The options after a TEST_START label are specific to that test. +# Each TEST_START label will set up a new test. If you want to +# perform a test more than once, you can add the ITERATE label +# to it followed by the number of times you want that test +# to iterate. If the ITERATE is left off, the test will only +# be performed once. +# +# TEST_START ITERATE 10 +# +# You can skip a test by adding SKIP (before or after the ITERATE +# and number) +# +# TEST_START SKIP +# +# TEST_START SKIP ITERATE 10 +# +# TEST_START ITERATE 10 SKIP +# +# The SKIP label causes the options and the test itself to be ignored. +# This is useful to set up several different tests in one config file, and +# only enabling the ones you want to use for a current test run. +# +# You can add default options anywhere in the file as well +# with the DEFAULTS tag. This allows you to have default options +# after the test options to keep the test options at the top +# of the file. You can even place the DEFAULTS tag between +# test cases (but not in the middle of a single test case) +# +# TEST_START +# MIN_CONFIG = /home/test/config-test1 +# +# DEFAULTS +# MIN_CONFIG = /home/test/config-default +# +# TEST_START ITERATE 10 +# +# The above will run the first test with MIN_CONFIG set to +# /home/test/config-test-1. Then 10 tests will be executed +# with MIN_CONFIG with /home/test/config-default. +# +# You can also disable defaults with the SKIP option +# +# DEFAULTS SKIP +# MIN_CONFIG = /home/test/config-use-sometimes +# +# DEFAULTS +# MIN_CONFIG = /home/test/config-most-times +# +# The above will ignore the first MIN_CONFIG. If you want to +# use the first MIN_CONFIG, remove the SKIP from the first +# DEFAULTS tag and add it to the second. Be careful, options +# may only be declared once per test or default. If you have +# the same option name under the same test or as default +# ktest will fail to execute, and no tests will run. +# + + +#### Mandatory Default Options #### + +# These options must be in the default section, although most +# may be overridden by test options. + +# The machine hostname that you will test +#MACHINE = target + +# The box is expected to have ssh on normal bootup, provide the user +# (most likely root, since you need privileged operations) +#SSH_USER = root + +# The directory that contains the Linux source code +#BUILD_DIR = /home/test/linux.git + +# The directory that the objects will be built +# (can not be same as BUILD_DIR) +#OUTPUT_DIR = /home/test/build/target + +# The location of the compiled file to copy to the target +# (relative to OUTPUT_DIR) +#BUILD_TARGET = arch/x86/boot/bzImage + +# The place to put your image on the test machine +#TARGET_IMAGE = /boot/vmlinuz-test + +# A script or command to reboot the box +# +# Here is a digital loggers power switch example +#POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=CCL' +# +# Here is an example to reboot a virtual box on the current host +# with the name "Guest". +#POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest + +# The script or command that reads the console +# +# If you use ttywatch server, something like the following would work. +#CONSOLE = nc -d localhost 3001 +# +# For a virtual machine with guest name "Guest". +#CONSOLE = virsh console Guest + +# Required version ending to differentiate the test +# from other linux builds on the system. +#LOCALVERSION = -test + +# The grub title name for the test kernel to boot +# (Only mandatory if REBOOT_TYPE = grub) +# +# Note, ktest.pl will not update the grub menu.lst, you need to +# manually add an option for the test. ktest.pl will search +# the grub menu.lst for this option to find what kernel to +# reboot into. +# +# For example, if in the /boot/grub/menu.lst the test kernel title has: +# title Test Kernel +# kernel vmlinuz-test +#GRUB_MENU = Test Kernel + +# A script to reboot the target into the test kernel +# (Only mandatory if REBOOT_TYPE = script) +#REBOOT_SCRIPT = + +#### Optional Config Options (all have defaults) #### + +# Start a test setup. If you leave this off, all options +# will be default and the test will run once. +# This is a label and not really an option (it takes no value). +# You can append ITERATE and a number after it to iterate the +# test a number of times, or SKIP to ignore this test. +# +#TEST_START +#TEST_START ITERATE 5 +#TEST_START SKIP + +# Have the following options as default again. Used after tests +# have already been defined by TEST_START. Optionally, you can +# just define all default options before the first TEST_START +# and you do not need this option. +# +# This is a label and not really an option (it takes no value). +# You can append SKIP to this label and the options within this +# section will be ignored. +# +# DEFAULTS +# DEFAULTS SKIP + +# The default test type (default test) +# The test types may be: +# build - only build the kernel, do nothing else +# boot - build and boot the kernel +# test - build, boot and if TEST is set, run the test script +# (If TEST is not set, it defaults back to boot) +# bisect - Perform a bisect on the kernel (see BISECT_TYPE below) +# patchcheck - Do a test on a series of commits in git (see PATCHCHECK below) +#TEST_TYPE = test + +# Test to run if there is a successful boot and TEST_TYPE is test. +# Must exit with 0 on success and non zero on error +# default (undefined) +#TEST = ssh user@machine /root/run_test + +# The build type is any make config type or special command +# (default randconfig) +# nobuild - skip the clean and build step +# useconfig:/path/to/config - use the given config and run +# oldconfig on it. +# This option is ignored if TEST_TYPE is patchcheck or bisect +#BUILD_TYPE = randconfig + +# The make command (default make) +# If you are building a 32bit x86 on a 64 bit host +#MAKE_CMD = CC=i386-gcc AS=i386-as make ARCH=i386 + +# Any build options for the make of the kernel (not for other makes, like configs) +# (default "") +#BUILD_OPTIONS = -j20 + +# If you need an initrd, you can add a script or code here to install +# it. The environment variable KERNEL_VERSION will be set to the +# kernel version that is used. Remember to add the initrd line +# to your grub menu.lst file. +# +# Here's a couple of examples to use: +#POST_INSTALL = ssh user@target /sbin/mkinitrd --allow-missing -f /boot/initramfs-test.img $KERNEL_VERSION +# +# or on some systems: +#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION + +# Way to reboot the box to the test kernel. +# Only valid options so far are "grub" and "script" +# (default grub) +# If you specify grub, it will assume grub version 1 +# and will search in /boot/grub/menu.lst for the title $GRUB_MENU +# and select that target to reboot to the kernel. If this is not +# your setup, then specify "script" and have a command or script +# specified in REBOOT_SCRIPT to boot to the target. +# +# The entry in /boot/grub/menu.lst must be entered in manually. +# The test will not modify that file. +#REBOOT_TYPE = grub + +# The min config that is needed to build for the machine +# A nice way to create this is with the following: +# +# $ ssh target +# $ lsmod > mymods +# $ scp mymods host:/tmp +# $ exit +# $ cd linux.git +# $ rm .config +# $ make LSMOD=mymods localyesconfig +# $ grep '^CONFIG' .config > /home/test/config-min +# +# If you want even less configs: +# +# log in directly to target (do not ssh) +# +# $ su +# # lsmod | cut -d' ' -f1 | xargs rmmod +# +# repeat the above several times +# +# # lsmod > mymods +# # reboot +# +# May need to reboot to get your network back to copy the mymods +# to the host, and then remove the previous .config and run the +# localyesconfig again. The CONFIG_MIN generated like this will +# not guarantee network activity to the box so the TEST_TYPE of +# test may fail. +# +# You might also want to set: +# CONFIG_CMDLINE="<your options here>" +# randconfig may set the above and override your real command +# line options. +# (default undefined) +#MIN_CONFIG = /home/test/config-min + +# Sometimes there's options that just break the boot and +# you do not care about. Here are a few: +# # CONFIG_STAGING is not set +# Staging drivers are horrible, and can break the build. +# # CONFIG_SCSI_DEBUG is not set +# SCSI_DEBUG may change your root partition +# # CONFIG_KGDB_SERIAL_CONSOLE is not set +# KGDB may cause oops waiting for a connection that's not there. +# This option points to the file containing config options that will be prepended +# to the MIN_CONFIG (or be the MIN_CONFIG if it is not set) +# +# Note, config options in MIN_CONFIG will override these options. +# +# (default undefined) +#ADD_CONFIG = /home/test/config-broken + +# The location on the host where to write temp files +# (default /tmp/ktest) +#TMP_DIR = /tmp/ktest + +# Optional log file to write the status (recommended) +# Note, this is a DEFAULT section only option. +# (default undefined) +#LOG_FILE = /home/test/logfiles/target.log + +# Remove old logfile if it exists before starting all tests. +# Note, this is a DEFAULT section only option. +# (default 0) +#CLEAR_LOG = 0 + +# Line to define a successful boot up in console output. +# This is what the line contains, not the entire line. If you need +# the entire line to match, then use regural expression syntax like: +# (do not add any quotes around it) +# +# SUCCESS_LINE = ^MyBox Login:$ +# +# (default "login:") +#SUCCESS_LINE = login: + +# In case the console constantly fills the screen, having +# a specified time to stop the test after success is recommended. +# (in seconds) +# (default 10) +#STOP_AFTER_SUCCESS = 10 + +# In case the console constantly fills the screen, having +# a specified time to stop the test after failure is recommended. +# (in seconds) +# (default 60) +#STOP_AFTER_FAILURE = 60 + +# Stop testing if a build fails. If set, the script will end if +# a failure is detected, otherwise it will save off the .config, +# dmesg and bootlog in a directory called +# MACHINE-TEST_TYPE_BUILD_TYPE-fail-yyyymmddhhmmss +# if the STORE_FAILURES directory is set. +# (default 1) +# Note, even if this is set to zero, there are some errors that still +# stop the tests. +#DIE_ON_FAILURE = 1 + +# Directory to store failure directories on failure. If this is not +# set, DIE_ON_FAILURE=0 will not save off the .config, dmesg and +# bootlog. This option is ignored if DIE_ON_FAILURE is not set. +# (default undefined) +#STORE_FAILURES = /home/test/failures + +# Build without doing a make mrproper, or removing .config +# (default 0) +#BUILD_NOCLEAN = 0 + +# As the test reads the console, after it hits the SUCCESS_LINE +# the time it waits for the monitor to settle down between reads +# can usually be lowered. +# (in seconds) (default 1) +#BOOTED_TIMEOUT = 1 + +# The timeout in seconds when we consider the box hung after +# the console stop producing output. Be sure to leave enough +# time here to get pass a reboot. Some machines may not produce +# any console output for a long time during a reboot. You do +# not want the test to fail just because the system was in +# the process of rebooting to the test kernel. +# (default 120) +#TIMEOUT = 120 + +# In between tests, a reboot of the box may occur, and this +# is the time to wait for the console after it stops producing +# output. Some machines may not produce a large lag on reboot +# so this should accommodate it. +# The difference between this and TIMEOUT, is that TIMEOUT happens +# when rebooting to the test kernel. This sleep time happens +# after a test has completed and we are about to start running +# another test. If a reboot to the reliable kernel happens, +# we wait SLEEP_TIME for the console to stop producing output +# before starting the next test. +# (default 60) +#SLEEP_TIME = 60 + +# The time in between bisects to sleep (in seconds) +# (default 60) +#BISECT_SLEEP_TIME = 60 + +# Reboot the target box on error (default 0) +#REBOOT_ON_ERROR = 0 + +# Power off the target on error (ignored if REBOOT_ON_ERROR is set) +# Note, this is a DEFAULT section only option. +# (default 0) +#POWEROFF_ON_ERROR = 0 + +# Power off the target after all tests have completed successfully +# Note, this is a DEFAULT section only option. +# (default 0) +#POWEROFF_ON_SUCCESS = 0 + +# Reboot the target after all test completed successfully (default 1) +# (ignored if POWEROFF_ON_SUCCESS is set) +#REBOOT_ON_SUCCESS = 1 + +# In case there are isses with rebooting, you can specify this +# to always powercycle after this amount of time after calling +# reboot. +# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just +# makes it powercycle immediately after rebooting. Do not define +# it if you do not want it. +# (default undefined) +#POWERCYCLE_AFTER_REBOOT = 5 + +# In case there's isses with halting, you can specify this +# to always poweroff after this amount of time after calling +# halt. +# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just +# makes it poweroff immediately after halting. Do not define +# it if you do not want it. +# (default undefined) +#POWEROFF_AFTER_HALT = 20 + +# A script or command to power off the box (default undefined) +# Needed for POWEROFF_ON_ERROR and SUCCESS +# +# Example for digital loggers power switch: +#POWER_OFF = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=OFF' +# +# Example for a virtual guest call "Guest". +#POWER_OFF = virsh destroy Guest + +# The way to execute a command on the target +# (default ssh $SSH_USER@$MACHINE $SSH_COMMAND";) +# The variables SSH_USER, MACHINE and SSH_COMMAND are defined +#SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND"; + +# The way to copy a file to the target +# (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE) +# The variables SSH_USER, MACHINE, SRC_FILE and DST_FILE are defined. +#SCP_TO_TARGET = scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE + +# The nice way to reboot the target +# (default ssh $SSH_USER@$MACHINE reboot) +# The variables SSH_USER and MACHINE are defined. +#REBOOT = ssh $SSH_USER@$MACHINE reboot + +#### Per test run options #### +# The following options are only allowed in TEST_START sections. +# They are ignored in the DEFAULTS sections. +# +# All of these are optional and undefined by default, although +# some of these options are required for TEST_TYPE of patchcheck +# and bisect. +# +# +# CHECKOUT = branch +# +# If the BUILD_DIR is a git repository, then you can set this option +# to checkout the given branch before running the TEST. If you +# specify this for the first run, that branch will be used for +# all preceding tests until a new CHECKOUT is set. +# +# +# +# For TEST_TYPE = patchcheck +# +# This expects the BUILD_DIR to be a git repository, and +# will checkout the PATCHCHECK_START commit. +# +# The option BUILD_TYPE will be ignored. +# +# The MIN_CONFIG will be used for all builds of the patchcheck. The build type +# used for patchcheck is oldconfig. +# +# PATCHCHECK_START is required and is the first patch to +# test (the SHA1 of the commit). You may also specify anything +# that git checkout allows (branch name, tage, HEAD~3). +# +# PATCHCHECK_END is the last patch to check (default HEAD) +# +# PATCHCHECK_TYPE is required and is the type of test to run: +# build, boot, test. +# +# Note, the build test will look for warnings, if a warning occurred +# in a file that a commit touches, the build will fail. +# +# If BUILD_NOCLEAN is set, then make mrproper will not be run on +# any of the builds, just like all other TEST_TYPE tests. But +# what makes patchcheck different from the other tests, is if +# BUILD_NOCLEAN is not set, only the first and last patch run +# make mrproper. This helps speed up the test. +# +# Example: +# TEST_START +# TEST_TYPE = patchcheck +# CHECKOUT = mybranch +# PATCHCHECK_TYPE = boot +# PATCHCHECK_START = 747e94ae3d1b4c9bf5380e569f614eb9040b79e7 +# PATCHCHECK_END = HEAD~2 +# +# +# +# For TEST_TYPE = bisect +# +# You can specify a git bisect if the BUILD_DIR is a git repository. +# The MIN_CONFIG will be used for all builds of the bisect. The build type +# used for bisecting is oldconfig. +# +# The option BUILD_TYPE will be ignored. +# +# BISECT_TYPE is the type of test to perform: +# build - bad fails to build +# boot - bad builds but fails to boot +# test - bad boots but fails a test +# +# BISECT_GOOD is the commit (SHA1) to label as good (accepts all git good commit types) +# BISECT_BAD is the commit to label as bad (accepts all git bad commit types) +# +# The above three options are required for a bisect operation. +# +# BISECT_REPLAY = /path/to/replay/file (optional, default undefined) +# +# If an operation failed in the bisect that was not expected to +# fail. Then the test ends. The state of the BUILD_DIR will be +# left off at where the failure occurred. You can examine the +# reason for the failure, and perhaps even find a git commit +# that would work to continue with. You can run: +# +# git bisect log > /path/to/replay/file +# +# The adding: +# +# BISECT_REPLAY= /path/to/replay/file +# +# And running the test again. The test will perform the initial +# git bisect start, git bisect good, and git bisect bad, and +# then it will run git bisect replay on this file, before +# continuing with the bisect. +# +# BISECT_START = commit (optional, default undefined) +# +# As with BISECT_REPLAY, if the test failed on a commit that +# just happen to have a bad commit in the middle of the bisect, +# and you need to skip it. If BISECT_START is defined, it +# will checkout that commit after doing the initial git bisect start, +# git bisect good, git bisect bad, and running the git bisect replay +# if the BISECT_REPLAY is set. +# +# BISECT_REVERSE = 1 (optional, default 0) +# +# In those strange instances where it was broken forever +# and you are trying to find where it started to work! +# Set BISECT_GOOD to the commit that was last known to fail +# Set BISECT_BAD to the commit that is known to start working. +# With BISECT_REVERSE = 1, The test will consider failures as +# good, and success as bad. +# +# BISECT_CHECK = 1 (optional, default 0) +# +# Just to be sure the good is good and bad is bad, setting +# BISECT_CHECK to 1 will start the bisect by first checking +# out BISECT_BAD and makes sure it fails, then it will check +# out BISECT_GOOD and makes sure it succeeds before starting +# the bisect (it works for BISECT_REVERSE too). +# +# You can limit the test to just check BISECT_GOOD or +# BISECT_BAD with BISECT_CHECK = good or +# BISECT_CHECK = bad, respectively. +# +# Example: +# TEST_START +# TEST_TYPE = bisect +# BISECT_GOOD = v2.6.36 +# BISECT_BAD = b5153163ed580e00c67bdfecb02b2e3843817b3e +# BISECT_TYPE = build +# MIN_CONFIG = /home/test/config-bisect +# +# +# +# For TEST_TYPE = config_bisect +# +# In those cases that you have two different configs. One of them +# work, the other does not, and you do not know what config causes +# the problem. +# The TEST_TYPE config_bisect will bisect the bad config looking for +# what config causes the failure. +# +# The way it works is this: +# +# First it finds a config to work with. Since a different version, or +# MIN_CONFIG may cause different dependecies, it must run through this +# preparation. +# +# Overwrites any config set in the bad config with a config set in +# either the MIN_CONFIG or ADD_CONFIG. Thus, make sure these configs +# are minimal and do not disable configs you want to test: +# (ie. # CONFIG_FOO is not set). +# +# An oldconfig is run on the bad config and any new config that +# appears will be added to the configs to test. +# +# Finally, it generates a config with the above result and runs it +# again through make oldconfig to produce a config that should be +# satisfied by kconfig. +# +# Then it starts the bisect. +# +# The configs to test are cut in half. If all the configs in this +# half depend on a config in the other half, then the other half +# is tested instead. If no configs are enabled by either half, then +# this means a circular dependency exists and the test fails. +# +# A config is created with the test half, and the bisect test is run. +# +# If the bisect succeeds, then all configs in the generated config +# are removed from the configs to test and added to the configs that +# will be enabled for all builds (they will be enabled, but not be part +# of the configs to examine). +# +# If the bisect fails, then all test configs that were not enabled by +# the config file are removed from the test. These configs will not +# be enabled in future tests. Since current config failed, we consider +# this to be a subset of the config that we started with. +# +# When we are down to one config, it is considered the bad config. +# +# Note, the config chosen may not be the true bad config. Due to +# dependencies and selections of the kbuild system, mulitple +# configs may be needed to cause a failure. If you disable the +# config that was found and restart the test, if the test fails +# again, it is recommended to rerun the config_bisect with a new +# bad config without the found config enabled. +# +# The option BUILD_TYPE will be ignored. +# +# CONFIG_BISECT_TYPE is the type of test to perform: +# build - bad fails to build +# boot - bad builds but fails to boot +# test - bad boots but fails a test +# +# CONFIG_BISECT is the config that failed to boot +# +# Example: +# TEST_START +# TEST_TYPE = config_bisect +# CONFIG_BISECT_TYPE = build +# CONFIG_BISECT = /home/test/˘onfig-bad +# MIN_CONFIG = /home/test/config-min +# |