summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/lguest/.gitignore1
-rw-r--r--tools/lguest/Makefile8
-rw-r--r--tools/lguest/extract58
-rw-r--r--tools/lguest/lguest.c2065
-rw-r--r--tools/lguest/lguest.txt129
-rw-r--r--tools/perf/Documentation/examples.txt34
-rw-r--r--tools/perf/Documentation/perf-annotate.txt4
-rw-r--r--tools/perf/Documentation/perf-buildid-list.txt2
-rw-r--r--tools/perf/Documentation/perf-evlist.txt2
-rw-r--r--tools/perf/Documentation/perf-kmem.txt2
-rw-r--r--tools/perf/Documentation/perf-lock.txt2
-rw-r--r--tools/perf/Documentation/perf-record.txt2
-rw-r--r--tools/perf/Documentation/perf-report.txt11
-rw-r--r--tools/perf/Documentation/perf-sched.txt2
-rw-r--r--tools/perf/Documentation/perf-script.txt9
-rw-r--r--tools/perf/Documentation/perf-test.txt8
-rw-r--r--tools/perf/Documentation/perf-timechart.txt2
-rw-r--r--tools/perf/Makefile1
-rw-r--r--tools/perf/arch/powerpc/util/dwarf-regs.c3
-rw-r--r--tools/perf/builtin-annotate.c132
-rw-r--r--tools/perf/builtin-buildid-list.c53
-rw-r--r--tools/perf/builtin-diff.c21
-rw-r--r--tools/perf/builtin-evlist.c2
-rw-r--r--tools/perf/builtin-inject.c118
-rw-r--r--tools/perf/builtin-kmem.c16
-rw-r--r--tools/perf/builtin-kvm.c2
-rw-r--r--tools/perf/builtin-lock.c12
-rw-r--r--tools/perf/builtin-probe.c1
-rw-r--r--tools/perf/builtin-record.c603
-rw-r--r--tools/perf/builtin-report.c236
-rw-r--r--tools/perf/builtin-sched.c200
-rw-r--r--tools/perf/builtin-script.c130
-rw-r--r--tools/perf/builtin-stat.c137
-rw-r--r--tools/perf/builtin-test.c545
-rw-r--r--tools/perf/builtin-timechart.c38
-rw-r--r--tools/perf/builtin-top.c558
-rw-r--r--tools/perf/perf.c33
-rw-r--r--tools/perf/perf.h24
-rw-r--r--tools/perf/util/annotate.c8
-rw-r--r--tools/perf/util/annotate.h5
-rw-r--r--tools/perf/util/build-id.c26
-rw-r--r--tools/perf/util/build-id.h2
-rw-r--r--tools/perf/util/callchain.h3
-rw-r--r--tools/perf/util/cgroup.c15
-rw-r--r--tools/perf/util/config.c5
-rw-r--r--tools/perf/util/debugfs.c35
-rw-r--r--tools/perf/util/debugfs.h31
-rw-r--r--tools/perf/util/event.c360
-rw-r--r--tools/perf/util/event.h68
-rw-r--r--tools/perf/util/evlist.c299
-rw-r--r--tools/perf/util/evlist.h43
-rw-r--r--tools/perf/util/evsel.c164
-rw-r--r--tools/perf/util/evsel.h8
-rw-r--r--tools/perf/util/header.c743
-rw-r--r--tools/perf/util/header.h51
-rw-r--r--tools/perf/util/hist.c10
-rw-r--r--tools/perf/util/hist.h5
-rw-r--r--tools/perf/util/include/linux/bitops.h118
-rw-r--r--tools/perf/util/map.c4
-rw-r--r--tools/perf/util/map.h19
-rw-r--r--tools/perf/util/parse-events.c30
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/probe-finder.h1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c75
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c4
-rw-r--r--tools/perf/util/session.c346
-rw-r--r--tools/perf/util/session.h72
-rw-r--r--tools/perf/util/setup.py3
-rw-r--r--tools/perf/util/symbol.c11
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/thread.c6
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/tool.h50
-rw-r--r--tools/perf/util/top.h20
-rw-r--r--tools/perf/util/trace-event-info.c28
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/trace-event-scripting.c2
-rw-r--r--tools/perf/util/trace-event.h8
-rw-r--r--tools/perf/util/ui/browsers/annotate.c16
-rw-r--r--tools/perf/util/ui/browsers/hists.c2
-rw-r--r--tools/perf/util/ui/progress.c3
-rw-r--r--tools/perf/util/usage.c5
-rw-r--r--tools/perf/util/util.h11
-rw-r--r--tools/perf/util/values.c1
-rw-r--r--tools/power/x86/turbostat/turbostat.88
-rwxr-xr-xtools/testing/ktest/ktest.pl531
-rw-r--r--tools/testing/ktest/sample.conf148
-rw-r--r--tools/testing/selftests/Makefile11
-rw-r--r--tools/testing/selftests/breakpoints/Makefile20
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c394
-rw-r--r--tools/testing/selftests/run_tests8
-rw-r--r--tools/virtio/linux/virtio.h22
-rw-r--r--tools/virtio/virtio_test.c6
93 files changed, 6764 insertions, 2324 deletions
diff --git a/tools/lguest/.gitignore b/tools/lguest/.gitignore
new file mode 100644
index 00000000000..115587fd5f6
--- /dev/null
+++ b/tools/lguest/.gitignore
@@ -0,0 +1 @@
+lguest
diff --git a/tools/lguest/Makefile b/tools/lguest/Makefile
new file mode 100644
index 00000000000..0ac34206f7a
--- /dev/null
+++ b/tools/lguest/Makefile
@@ -0,0 +1,8 @@
+# This creates the demonstration utility "lguest" which runs a Linux guest.
+# Missing headers? Add "-I../../../include -I../../../arch/x86/include"
+CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
+
+all: lguest
+
+clean:
+ rm -f lguest
diff --git a/tools/lguest/extract b/tools/lguest/extract
new file mode 100644
index 00000000000..7730bb6e4b9
--- /dev/null
+++ b/tools/lguest/extract
@@ -0,0 +1,58 @@
+#! /bin/sh
+
+set -e
+
+PREFIX=$1
+shift
+
+trap 'rm -r $TMPDIR' 0
+TMPDIR=`mktemp -d`
+
+exec 3>/dev/null
+for f; do
+ while IFS="
+" read -r LINE; do
+ case "$LINE" in
+ *$PREFIX:[0-9]*:\**)
+ NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
+ if [ -f $TMPDIR/$NUM ]; then
+ echo "$TMPDIR/$NUM already exits prior to $f"
+ exit 1
+ fi
+ exec 3>>$TMPDIR/$NUM
+ echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
+ /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3
+ ;;
+ *$PREFIX:[0-9]*)
+ NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
+ if [ -f $TMPDIR/$NUM ]; then
+ echo "$TMPDIR/$NUM already exits prior to $f"
+ exit 1
+ fi
+ exec 3>>$TMPDIR/$NUM
+ echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
+ /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3
+ ;;
+ *:\**)
+ /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3
+ echo >&3
+ exec 3>/dev/null
+ ;;
+ *)
+ /bin/echo "$LINE" >&3
+ ;;
+ esac
+ done < $f
+ echo >&3
+ exec 3>/dev/null
+done
+
+LASTFILE=""
+for f in $TMPDIR/*; do
+ if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then
+ LASTFILE=$(cat $TMPDIR/.$(basename $f) )
+ echo "[ $LASTFILE ]"
+ fi
+ cat $f
+done
+
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
new file mode 100644
index 00000000000..f759f4f097c
--- /dev/null
+++ b/tools/lguest/lguest.c
@@ -0,0 +1,2065 @@
+/*P:100
+ * This is the Launcher code, a simple program which lays out the "physical"
+ * memory for the new Guest by mapping the kernel image and the virtual
+ * devices, then opens /dev/lguest to tell the kernel about the Guest and
+ * control it.
+:*/
+#define _LARGEFILE64_SOURCE
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <err.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <elf.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/eventfd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <ctype.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <time.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <linux/sockios.h>
+#include <linux/if_tun.h>
+#include <sys/uio.h>
+#include <termios.h>
+#include <getopt.h>
+#include <assert.h>
+#include <sched.h>
+#include <limits.h>
+#include <stddef.h>
+#include <signal.h>
+#include <pwd.h>
+#include <grp.h>
+
+#include <linux/virtio_config.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_blk.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_rng.h>
+#include <linux/virtio_ring.h>
+#include <asm/bootparam.h>
+#include "../../include/linux/lguest_launcher.h"
+/*L:110
+ * We can ignore the 43 include files we need for this program, but I do want
+ * to draw attention to the use of kernel-style types.
+ *
+ * As Linus said, "C is a Spartan language, and so should your naming be." I
+ * like these abbreviations, so we define them here. Note that u64 is always
+ * unsigned long long, which works on all Linux systems: this means that we can
+ * use %llu in printf for any u64.
+ */
+typedef unsigned long long u64;
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+/*:*/
+
+#define BRIDGE_PFX "bridge:"
+#ifndef SIOCBRADDIF
+#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
+#endif
+/* We can have up to 256 pages for devices. */
+#define DEVICE_PAGES 256
+/* This will occupy 3 pages: it must be a power of 2. */
+#define VIRTQUEUE_NUM 256
+
+/*L:120
+ * verbose is both a global flag and a macro. The C preprocessor allows
+ * this, and although I wouldn't recommend it, it works quite nicely here.
+ */
+static bool verbose;
+#define verbose(args...) \
+ do { if (verbose) printf(args); } while(0)
+/*:*/
+
+/* The pointer to the start of guest memory. */
+static void *guest_base;
+/* The maximum guest physical address allowed, and maximum possible. */
+static unsigned long guest_limit, guest_max;
+/* The /dev/lguest file descriptor. */
+static int lguest_fd;
+
+/* a per-cpu variable indicating whose vcpu is currently running */
+static unsigned int __thread cpu_id;
+
+/* This is our list of devices. */
+struct device_list {
+ /* Counter to assign interrupt numbers. */
+ unsigned int next_irq;
+
+ /* Counter to print out convenient device numbers. */
+ unsigned int device_num;
+
+ /* The descriptor page for the devices. */
+ u8 *descpage;
+
+ /* A single linked list of devices. */
+ struct device *dev;
+ /* And a pointer to the last device for easy append. */
+ struct device *lastdev;
+};
+
+/* The list of Guest devices, based on command line arguments. */
+static struct device_list devices;
+
+/* The device structure describes a single device. */
+struct device {
+ /* The linked-list pointer. */
+ struct device *next;
+
+ /* The device's descriptor, as mapped into the Guest. */
+ struct lguest_device_desc *desc;
+
+ /* We can't trust desc values once Guest has booted: we use these. */
+ unsigned int feature_len;
+ unsigned int num_vq;
+
+ /* The name of this device, for --verbose. */
+ const char *name;
+
+ /* Any queues attached to this device */
+ struct virtqueue *vq;
+
+ /* Is it operational */
+ bool running;
+
+ /* Device-specific data. */
+ void *priv;
+};
+
+/* The virtqueue structure describes a queue attached to a device. */
+struct virtqueue {
+ struct virtqueue *next;
+
+ /* Which device owns me. */
+ struct device *dev;
+
+ /* The configuration for this queue. */
+ struct lguest_vqconfig config;
+
+ /* The actual ring of buffers. */
+ struct vring vring;
+
+ /* Last available index we saw. */
+ u16 last_avail_idx;
+
+ /* How many are used since we sent last irq? */
+ unsigned int pending_used;
+
+ /* Eventfd where Guest notifications arrive. */
+ int eventfd;
+
+ /* Function for the thread which is servicing this virtqueue. */
+ void (*service)(struct virtqueue *vq);
+ pid_t thread;
+};
+
+/* Remember the arguments to the program so we can "reboot" */
+static char **main_args;
+
+/* The original tty settings to restore on exit. */
+static struct termios orig_term;
+
+/*
+ * We have to be careful with barriers: our devices are all run in separate
+ * threads and so we need to make sure that changes visible to the Guest happen
+ * in precise order.
+ */
+#define wmb() __asm__ __volatile__("" : : : "memory")
+#define mb() __asm__ __volatile__("" : : : "memory")
+
+/*
+ * Convert an iovec element to the given type.
+ *
+ * This is a fairly ugly trick: we need to know the size of the type and
+ * alignment requirement to check the pointer is kosher. It's also nice to
+ * have the name of the type in case we report failure.
+ *
+ * Typing those three things all the time is cumbersome and error prone, so we
+ * have a macro which sets them all up and passes to the real function.
+ */
+#define convert(iov, type) \
+ ((type *)_convert((iov), sizeof(type), __alignof__(type), #type))
+
+static void *_convert(struct iovec *iov, size_t size, size_t align,
+ const char *name)
+{
+ if (iov->iov_len != size)
+ errx(1, "Bad iovec size %zu for %s", iov->iov_len, name);
+ if ((unsigned long)iov->iov_base % align != 0)
+ errx(1, "Bad alignment %p for %s", iov->iov_base, name);
+ return iov->iov_base;
+}
+
+/* Wrapper for the last available index. Makes it easier to change. */
+#define lg_last_avail(vq) ((vq)->last_avail_idx)
+
+/*
+ * The virtio configuration space is defined to be little-endian. x86 is
+ * little-endian too, but it's nice to be explicit so we have these helpers.
+ */
+#define cpu_to_le16(v16) (v16)
+#define cpu_to_le32(v32) (v32)
+#define cpu_to_le64(v64) (v64)
+#define le16_to_cpu(v16) (v16)
+#define le32_to_cpu(v32) (v32)
+#define le64_to_cpu(v64) (v64)
+
+/* Is this iovec empty? */
+static bool iov_empty(const struct iovec iov[], unsigned int num_iov)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_iov; i++)
+ if (iov[i].iov_len)
+ return false;
+ return true;
+}
+
+/* Take len bytes from the front of this iovec. */
+static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_iov; i++) {
+ unsigned int used;
+
+ used = iov[i].iov_len < len ? iov[i].iov_len : len;
+ iov[i].iov_base += used;
+ iov[i].iov_len -= used;
+ len -= used;
+ }
+ assert(len == 0);
+}
+
+/* The device virtqueue descriptors are followed by feature bitmasks. */
+static u8 *get_feature_bits(struct device *dev)
+{
+ return (u8 *)(dev->desc + 1)
+ + dev->num_vq * sizeof(struct lguest_vqconfig);
+}
+
+/*L:100
+ * The Launcher code itself takes us out into userspace, that scary place where
+ * pointers run wild and free! Unfortunately, like most userspace programs,
+ * it's quite boring (which is why everyone likes to hack on the kernel!).
+ * Perhaps if you make up an Lguest Drinking Game at this point, it will get
+ * you through this section. Or, maybe not.
+ *
+ * The Launcher sets up a big chunk of memory to be the Guest's "physical"
+ * memory and stores it in "guest_base". In other words, Guest physical ==
+ * Launcher virtual with an offset.
+ *
+ * This can be tough to get your head around, but usually it just means that we
+ * use these trivial conversion functions when the Guest gives us its
+ * "physical" addresses:
+ */
+static void *from_guest_phys(unsigned long addr)
+{
+ return guest_base + addr;
+}
+
+static unsigned long to_guest_phys(const void *addr)
+{
+ return (addr - guest_base);
+}
+
+/*L:130
+ * Loading the Kernel.
+ *
+ * We start with couple of simple helper routines. open_or_die() avoids
+ * error-checking code cluttering the callers:
+ */
+static int open_or_die(const char *name, int flags)
+{
+ int fd = open(name, flags);
+ if (fd < 0)
+ err(1, "Failed to open %s", name);
+ return fd;
+}
+
+/* map_zeroed_pages() takes a number of pages. */
+static void *map_zeroed_pages(unsigned int num)
+{
+ int fd = open_or_die("/dev/zero", O_RDONLY);
+ void *addr;
+
+ /*
+ * We use a private mapping (ie. if we write to the page, it will be
+ * copied). We allocate an extra two pages PROT_NONE to act as guard
+ * pages against read/write attempts that exceed allocated space.
+ */
+ addr = mmap(NULL, getpagesize() * (num+2),
+ PROT_NONE, MAP_PRIVATE, fd, 0);
+
+ if (addr == MAP_FAILED)
+ err(1, "Mmapping %u pages of /dev/zero", num);
+
+ if (mprotect(addr + getpagesize(), getpagesize() * num,
+ PROT_READ|PROT_WRITE) == -1)
+ err(1, "mprotect rw %u pages failed", num);
+
+ /*
+ * One neat mmap feature is that you can close the fd, and it
+ * stays mapped.
+ */
+ close(fd);
+
+ /* Return address after PROT_NONE page */
+ return addr + getpagesize();
+}
+
+/* Get some more pages for a device. */
+static void *get_pages(unsigned int num)
+{
+ void *addr = from_guest_phys(guest_limit);
+
+ guest_limit += num * getpagesize();
+ if (guest_limit > guest_max)
+ errx(1, "Not enough memory for devices");
+ return addr;
+}
+
+/*
+ * This routine is used to load the kernel or initrd. It tries mmap, but if
+ * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries),
+ * it falls back to reading the memory in.
+ */
+static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
+{
+ ssize_t r;
+
+ /*
+ * We map writable even though for some segments are marked read-only.
+ * The kernel really wants to be writable: it patches its own
+ * instructions.
+ *
+ * MAP_PRIVATE means that the page won't be copied until a write is
+ * done to it. This allows us to share untouched memory between
+ * Guests.
+ */
+ if (mmap(addr, len, PROT_READ|PROT_WRITE,
+ MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED)
+ return;
+
+ /* pread does a seek and a read in one shot: saves a few lines. */
+ r = pread(fd, addr, len, offset);
+ if (r != len)
+ err(1, "Reading offset %lu len %lu gave %zi", offset, len, r);
+}
+
+/*
+ * This routine takes an open vmlinux image, which is in ELF, and maps it into
+ * the Guest memory. ELF = Embedded Linking Format, which is the format used
+ * by all modern binaries on Linux including the kernel.
+ *
+ * The ELF headers give *two* addresses: a physical address, and a virtual
+ * address. We use the physical address; the Guest will map itself to the
+ * virtual address.
+ *
+ * We return the starting address.
+ */
+static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
+{
+ Elf32_Phdr phdr[ehdr->e_phnum];
+ unsigned int i;
+
+ /*
+ * Sanity checks on the main ELF header: an x86 executable with a
+ * reasonable number of correctly-sized program headers.
+ */
+ if (ehdr->e_type != ET_EXEC
+ || ehdr->e_machine != EM_386
+ || ehdr->e_phentsize != sizeof(Elf32_Phdr)
+ || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
+ errx(1, "Malformed elf header");
+
+ /*
+ * An ELF executable contains an ELF header and a number of "program"
+ * headers which indicate which parts ("segments") of the program to
+ * load where.
+ */
+
+ /* We read in all the program headers at once: */
+ if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
+ err(1, "Seeking to program headers");
+ if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
+ err(1, "Reading program headers");
+
+ /*
+ * Try all the headers: there are usually only three. A read-only one,
+ * a read-write one, and a "note" section which we don't load.
+ */
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ /* If this isn't a loadable segment, we ignore it */
+ if (phdr[i].p_type != PT_LOAD)
+ continue;
+
+ verbose("Section %i: size %i addr %p\n",
+ i, phdr[i].p_memsz, (void *)phdr[i].p_paddr);
+
+ /* We map this section of the file at its physical address. */
+ map_at(elf_fd, from_guest_phys(phdr[i].p_paddr),
+ phdr[i].p_offset, phdr[i].p_filesz);
+ }
+
+ /* The entry point is given in the ELF header. */
+ return ehdr->e_entry;
+}
+
+/*L:150
+ * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed
+ * to jump into it and it will unpack itself. We used to have to perform some
+ * hairy magic because the unpacking code scared me.
+ *
+ * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote
+ * a small patch to jump over the tricky bits in the Guest, so now we just read
+ * the funky header so we know where in the file to load, and away we go!
+ */
+static unsigned long load_bzimage(int fd)
+{
+ struct boot_params boot;
+ int r;
+ /* Modern bzImages get loaded at 1M. */
+ void *p = from_guest_phys(0x100000);
+
+ /*
+ * Go back to the start of the file and read the header. It should be
+ * a Linux boot header (see Documentation/x86/boot.txt)
+ */
+ lseek(fd, 0, SEEK_SET);
+ read(fd, &boot, sizeof(boot));
+
+ /* Inside the setup_hdr, we expect the magic "HdrS" */
+ if (memcmp(&boot.hdr.header, "HdrS", 4) != 0)
+ errx(1, "This doesn't look like a bzImage to me");
+
+ /* Skip over the extra sectors of the header. */
+ lseek(fd, (boot.hdr.setup_sects+1) * 512, SEEK_SET);
+
+ /* Now read everything into memory. in nice big chunks. */
+ while ((r = read(fd, p, 65536)) > 0)
+ p += r;
+
+ /* Finally, code32_start tells us where to enter the kernel. */
+ return boot.hdr.code32_start;
+}
+
+/*L:140
+ * Loading the kernel is easy when it's a "vmlinux", but most kernels
+ * come wrapped up in the self-decompressing "bzImage" format. With a little
+ * work, we can load those, too.
+ */
+static unsigned long load_kernel(int fd)
+{
+ Elf32_Ehdr hdr;
+
+ /* Read in the first few bytes. */
+ if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
+ err(1, "Reading kernel");
+
+ /* If it's an ELF file, it starts with "\177ELF" */
+ if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
+ return map_elf(fd, &hdr);
+
+ /* Otherwise we assume it's a bzImage, and try to load it. */
+ return load_bzimage(fd);
+}
+
+/*
+ * This is a trivial little helper to align pages. Andi Kleen hated it because
+ * it calls getpagesize() twice: "it's dumb code."
+ *
+ * Kernel guys get really het up about optimization, even when it's not
+ * necessary. I leave this code as a reaction against that.
+ */
+static inline unsigned long page_align(unsigned long addr)
+{
+ /* Add upwards and truncate downwards. */
+ return ((addr + getpagesize()-1) & ~(getpagesize()-1));
+}
+
+/*L:180
+ * An "initial ram disk" is a disk image loaded into memory along with the
+ * kernel which the kernel can use to boot from without needing any drivers.
+ * Most distributions now use this as standard: the initrd contains the code to
+ * load the appropriate driver modules for the current machine.
+ *
+ * Importantly, James Morris works for RedHat, and Fedora uses initrds for its
+ * kernels. He sent me this (and tells me when I break it).
+ */
+static unsigned long load_initrd(const char *name, unsigned long mem)
+{
+ int ifd;
+ struct stat st;
+ unsigned long len;
+
+ ifd = open_or_die(name, O_RDONLY);
+ /* fstat() is needed to get the file size. */
+ if (fstat(ifd, &st) < 0)
+ err(1, "fstat() on initrd '%s'", name);
+
+ /*
+ * We map the initrd at the top of memory, but mmap wants it to be
+ * page-aligned, so we round the size up for that.
+ */
+ len = page_align(st.st_size);
+ map_at(ifd, from_guest_phys(mem - len), 0, st.st_size);
+ /*
+ * Once a file is mapped, you can close the file descriptor. It's a
+ * little odd, but quite useful.
+ */
+ close(ifd);
+ verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len);
+
+ /* We return the initrd size. */
+ return len;
+}
+/*:*/
+
+/*
+ * Simple routine to roll all the commandline arguments together with spaces
+ * between them.
+ */
+static void concat(char *dst, char *args[])
+{
+ unsigned int i, len = 0;
+
+ for (i = 0; args[i]; i++) {
+ if (i) {
+ strcat(dst+len, " ");
+ len++;
+ }
+ strcpy(dst+len, args[i]);
+ len += strlen(args[i]);
+ }
+ /* In case it's empty. */
+ dst[len] = '\0';
+}
+
+/*L:185
+ * This is where we actually tell the kernel to initialize the Guest. We
+ * saw the arguments it expects when we looked at initialize() in lguest_user.c:
+ * the base of Guest "physical" memory, the top physical page to allow and the
+ * entry point for the Guest.
+ */
+static void tell_kernel(unsigned long start)
+{
+ unsigned long args[] = { LHREQ_INITIALIZE,
+ (unsigned long)guest_base,
+ guest_limit / getpagesize(), start };
+ verbose("Guest: %p - %p (%#lx)\n",
+ guest_base, guest_base + guest_limit, guest_limit);
+ lguest_fd = open_or_die("/dev/lguest", O_RDWR);
+ if (write(lguest_fd, args, sizeof(args)) < 0)
+ err(1, "Writing to /dev/lguest");
+}
+/*:*/
+
+/*L:200
+ * Device Handling.
+ *
+ * When the Guest gives us a buffer, it sends an array of addresses and sizes.
+ * We need to make sure it's not trying to reach into the Launcher itself, so
+ * we have a convenient routine which checks it and exits with an error message
+ * if something funny is going on:
+ */
+static void *_check_pointer(unsigned long addr, unsigned int size,
+ unsigned int line)
+{
+ /*
+ * Check if the requested address and size exceeds the allocated memory,
+ * or addr + size wraps around.
+ */
+ if ((addr + size) > guest_limit || (addr + size) < addr)
+ errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr);
+ /*
+ * We return a pointer for the caller's convenience, now we know it's
+ * safe to use.
+ */
+ return from_guest_phys(addr);
+}
+/* A macro which transparently hands the line number to the real function. */
+#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
+
+/*
+ * Each buffer in the virtqueues is actually a chain of descriptors. This
+ * function returns the next descriptor in the chain, or vq->vring.num if we're
+ * at the end.
+ */
+static unsigned next_desc(struct vring_desc *desc,
+ unsigned int i, unsigned int max)
+{
+ unsigned int next;
+
+ /* If this descriptor says it doesn't chain, we're done. */
+ if (!(desc[i].flags & VRING_DESC_F_NEXT))
+ return max;
+
+ /* Check they're not leading us off end of descriptors. */
+ next = desc[i].next;
+ /* Make sure compiler knows to grab that: we don't want it changing! */
+ wmb();
+
+ if (next >= max)
+ errx(1, "Desc next is %u", next);
+
+ return next;
+}
+
+/*
+ * This actually sends the interrupt for this virtqueue, if we've used a
+ * buffer.
+ */
+static void trigger_irq(struct virtqueue *vq)
+{
+ unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
+
+ /* Don't inform them if nothing used. */
+ if (!vq->pending_used)
+ return;
+ vq->pending_used = 0;
+
+ /* If they don't want an interrupt, don't send one... */
+ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
+ return;
+ }
+
+ /* Send the Guest an interrupt tell them we used something up. */
+ if (write(lguest_fd, buf, sizeof(buf)) != 0)
+ err(1, "Triggering irq %i", vq->config.irq);
+}
+
+/*
+ * This looks in the virtqueue for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function waits if necessary, and returns the descriptor number found.
+ */
+static unsigned wait_for_vq_desc(struct virtqueue *vq,
+ struct iovec iov[],
+ unsigned int *out_num, unsigned int *in_num)
+{
+ unsigned int i, head, max;
+ struct vring_desc *desc;
+ u16 last_avail = lg_last_avail(vq);
+
+ /* There's nothing available? */
+ while (last_avail == vq->vring.avail->idx) {
+ u64 event;
+
+ /*
+ * Since we're about to sleep, now is a good time to tell the
+ * Guest about what we've used up to now.
+ */
+ trigger_irq(vq);
+
+ /* OK, now we need to know about added descriptors. */
+ vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+
+ /*
+ * They could have slipped one in as we were doing that: make
+ * sure it's written, then check again.
+ */
+ mb();
+ if (last_avail != vq->vring.avail->idx) {
+ vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+ break;
+ }
+
+ /* Nothing new? Wait for eventfd to tell us they refilled. */
+ if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
+ errx(1, "Event read failed?");
+
+ /* We don't need to be notified again. */
+ vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+ }
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num)
+ errx(1, "Guest moved used index from %u to %u",
+ last_avail, vq->vring.avail->idx);
+
+ /*
+ * Grab the next descriptor number they're advertising, and increment
+ * the index we've seen.
+ */
+ head = vq->vring.avail->ring[last_avail % vq->vring.num];
+ lg_last_avail(vq)++;
+
+ /* If their number is silly, that's a fatal mistake. */
+ if (head >= vq->vring.num)
+ errx(1, "Guest says index %u is available", head);
+
+ /* When we start there are none of either input nor output. */
+ *out_num = *in_num = 0;
+
+ max = vq->vring.num;
+ desc = vq->vring.desc;
+ i = head;
+
+ /*
+ * If this is an indirect entry, then this buffer contains a descriptor
+ * table which we handle as if it's any normal descriptor chain.
+ */
+ if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+ if (desc[i].len % sizeof(struct vring_desc))
+ errx(1, "Invalid size for indirect buffer table");
+
+ max = desc[i].len / sizeof(struct vring_desc);
+ desc = check_pointer(desc[i].addr, desc[i].len);
+ i = 0;
+ }
+
+ do {
+ /* Grab the first descriptor, and check it's OK. */
+ iov[*out_num + *in_num].iov_len = desc[i].len;
+ iov[*out_num + *in_num].iov_base
+ = check_pointer(desc[i].addr, desc[i].len);
+ /* If this is an input descriptor, increment that count. */
+ if (desc[i].flags & VRING_DESC_F_WRITE)
+ (*in_num)++;
+ else {
+ /*
+ * If it's an output descriptor, they're all supposed
+ * to come before any input descriptors.
+ */
+ if (*in_num)
+ errx(1, "Descriptor has out after in");
+ (*out_num)++;
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if (*out_num + *in_num > max)
+ errx(1, "Looped descriptor");
+ } while ((i = next_desc(desc, i, max)) != max);
+
+ return head;
+}
+
+/*
+ * After we've used one of their buffers, we tell the Guest about it. Sometime
+ * later we'll want to send them an interrupt using trigger_irq(); note that
+ * wait_for_vq_desc() does that for us if it has to wait.
+ */
+static void add_used(struct virtqueue *vq, unsigned int head, int len)
+{
+ struct vring_used_elem *used;
+
+ /*
+ * The virtqueue contains a ring of used buffers. Get a pointer to the
+ * next entry in that used ring.
+ */
+ used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
+ used->id = head;
+ used->len = len;
+ /* Make sure buffer is written before we update index. */
+ wmb();
+ vq->vring.used->idx++;
+ vq->pending_used++;
+}
+
+/* And here's the combo meal deal. Supersize me! */
+static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
+{
+ add_used(vq, head, len);
+ trigger_irq(vq);
+}
+
+/*
+ * The Console
+ *
+ * We associate some data with the console for our exit hack.
+ */
+struct console_abort {
+ /* How many times have they hit ^C? */
+ int count;
+ /* When did they start? */
+ struct timeval start;
+};
+
+/* This is the routine which handles console input (ie. stdin). */
+static void console_input(struct virtqueue *vq)
+{
+ int len;
+ unsigned int head, in_num, out_num;
+ struct console_abort *abort = vq->dev->priv;
+ struct iovec iov[vq->vring.num];
+
+ /* Make sure there's a descriptor available. */
+ head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
+ if (out_num)
+ errx(1, "Output buffers in console in queue?");
+
+ /* Read into it. This is where we usually wait. */
+ len = readv(STDIN_FILENO, iov, in_num);
+ if (len <= 0) {
+ /* Ran out of input? */
+ warnx("Failed to get console input, ignoring console.");
+ /*
+ * For simplicity, dying threads kill the whole Launcher. So
+ * just nap here.
+ */
+ for (;;)
+ pause();
+ }
+
+ /* Tell the Guest we used a buffer. */
+ add_used_and_trigger(vq, head, len);
+
+ /*
+ * Three ^C within one second? Exit.
+ *
+ * This is such a hack, but works surprisingly well. Each ^C has to
+ * be in a buffer by itself, so they can't be too fast. But we check
+ * that we get three within about a second, so they can't be too
+ * slow.
+ */
+ if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
+ abort->count = 0;
+ return;
+ }
+
+ abort->count++;
+ if (abort->count == 1)
+ gettimeofday(&abort->start, NULL);
+ else if (abort->count == 3) {
+ struct timeval now;
+ gettimeofday(&now, NULL);
+ /* Kill all Launcher processes with SIGINT, like normal ^C */
+ if (now.tv_sec <= abort->start.tv_sec+1)
+ kill(0, SIGINT);
+ abort->count = 0;
+ }
+}
+
+/* This is the routine which handles console output (ie. stdout). */
+static void console_output(struct virtqueue *vq)
+{
+ unsigned int head, out, in;
+ struct iovec iov[vq->vring.num];
+
+ /* We usually wait in here, for the Guest to give us something. */
+ head = wait_for_vq_desc(vq, iov, &out, &in);
+ if (in)
+ errx(1, "Input buffers in console output queue?");
+
+ /* writev can return a partial write, so we loop here. */
+ while (!iov_empty(iov, out)) {
+ int len = writev(STDOUT_FILENO, iov, out);
+ if (len <= 0) {
+ warn("Write to stdout gave %i (%d)", len, errno);
+ break;
+ }
+ iov_consume(iov, out, len);
+ }
+
+ /*
+ * We're finished with that buffer: if we're going to sleep,
+ * wait_for_vq_desc() will prod the Guest with an interrupt.
+ */
+ add_used(vq, head, 0);
+}
+
+/*
+ * The Network
+ *
+ * Handling output for network is also simple: we get all the output buffers
+ * and write them to /dev/net/tun.
+ */
+struct net_info {
+ int tunfd;
+};
+
+static void net_output(struct virtqueue *vq)
+{
+ struct net_info *net_info = vq->dev->priv;
+ unsigned int head, out, in;
+ struct iovec iov[vq->vring.num];
+
+ /* We usually wait in here for the Guest to give us a packet. */
+ head = wait_for_vq_desc(vq, iov, &out, &in);
+ if (in)
+ errx(1, "Input buffers in net output queue?");
+ /*
+ * Send the whole thing through to /dev/net/tun. It expects the exact
+ * same format: what a coincidence!
+ */
+ if (writev(net_info->tunfd, iov, out) < 0)
+ warnx("Write to tun failed (%d)?", errno);
+
+ /*
+ * Done with that one; wait_for_vq_desc() will send the interrupt if
+ * all packets are processed.
+ */
+ add_used(vq, head, 0);
+}
+
+/*
+ * Handling network input is a bit trickier, because I've tried to optimize it.
+ *
+ * First we have a helper routine which tells is if from this file descriptor
+ * (ie. the /dev/net/tun device) will block:
+ */
+static bool will_block(int fd)
+{
+ fd_set fdset;
+ struct timeval zero = { 0, 0 };
+ FD_ZERO(&fdset);
+ FD_SET(fd, &fdset);
+ return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
+}
+
+/*
+ * This handles packets coming in from the tun device to our Guest. Like all
+ * service routines, it gets called again as soon as it returns, so you don't
+ * see a while(1) loop here.
+ */
+static void net_input(struct virtqueue *vq)
+{
+ int len;
+ unsigned int head, out, in;
+ struct iovec iov[vq->vring.num];
+ struct net_info *net_info = vq->dev->priv;
+
+ /*
+ * Get a descriptor to write an incoming packet into. This will also
+ * send an interrupt if they're out of descriptors.
+ */
+ head = wait_for_vq_desc(vq, iov, &out, &in);
+ if (out)
+ errx(1, "Output buffers in net input queue?");
+
+ /*
+ * If it looks like we'll block reading from the tun device, send them
+ * an interrupt.
+ */
+ if (vq->pending_used && will_block(net_info->tunfd))
+ trigger_irq(vq);
+
+ /*
+ * Read in the packet. This is where we normally wait (when there's no
+ * incoming network traffic).
+ */
+ len = readv(net_info->tunfd, iov, in);
+ if (len <= 0)
+ warn("Failed to read from tun (%d).", errno);
+
+ /*
+ * Mark that packet buffer as used, but don't interrupt here. We want
+ * to wait until we've done as much work as we can.
+ */
+ add_used(vq, head, len);
+}
+/*:*/
+
+/* This is the helper to create threads: run the service routine in a loop. */
+static int do_thread(void *_vq)
+{
+ struct virtqueue *vq = _vq;
+
+ for (;;)
+ vq->service(vq);
+ return 0;
+}
+
+/*
+ * When a child dies, we kill our entire process group with SIGTERM. This
+ * also has the side effect that the shell restores the console for us!
+ */
+static void kill_launcher(int signal)
+{
+ kill(0, SIGTERM);
+}
+
+static void reset_device(struct device *dev)
+{
+ struct virtqueue *vq;
+
+ verbose("Resetting device %s\n", dev->name);
+
+ /* Clear any features they've acked. */
+ memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len);
+
+ /* We're going to be explicitly killing threads, so ignore them. */
+ signal(SIGCHLD, SIG_IGN);
+
+ /* Zero out the virtqueues, get rid of their threads */
+ for (vq = dev->vq; vq; vq = vq->next) {
+ if (vq->thread != (pid_t)-1) {
+ kill(vq->thread, SIGTERM);
+ waitpid(vq->thread, NULL, 0);
+ vq->thread = (pid_t)-1;
+ }
+ memset(vq->vring.desc, 0,
+ vring_size(vq->config.num, LGUEST_VRING_ALIGN));
+ lg_last_avail(vq) = 0;
+ }
+ dev->running = false;
+
+ /* Now we care if threads die. */
+ signal(SIGCHLD, (void *)kill_launcher);
+}
+
+/*L:216
+ * This actually creates the thread which services the virtqueue for a device.
+ */
+static void create_thread(struct virtqueue *vq)
+{
+ /*
+ * Create stack for thread. Since the stack grows upwards, we point
+ * the stack pointer to the end of this region.
+ */
+ char *stack = malloc(32768);
+ unsigned long args[] = { LHREQ_EVENTFD,
+ vq->config.pfn*getpagesize(), 0 };
+
+ /* Create a zero-initialized eventfd. */
+ vq->eventfd = eventfd(0, 0);
+ if (vq->eventfd < 0)
+ err(1, "Creating eventfd");
+ args[2] = vq->eventfd;
+
+ /*
+ * Attach an eventfd to this virtqueue: it will go off when the Guest
+ * does an LHCALL_NOTIFY for this vq.
+ */
+ if (write(lguest_fd, &args, sizeof(args)) != 0)
+ err(1, "Attaching eventfd");
+
+ /*
+ * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so
+ * we get a signal if it dies.
+ */
+ vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq);
+ if (vq->thread == (pid_t)-1)
+ err(1, "Creating clone");
+
+ /* We close our local copy now the child has it. */
+ close(vq->eventfd);
+}
+
+static void start_device(struct device *dev)
+{
+ unsigned int i;
+ struct virtqueue *vq;
+
+ verbose("Device %s OK: offered", dev->name);
+ for (i = 0; i < dev->feature_len; i++)
+ verbose(" %02x", get_feature_bits(dev)[i]);
+ verbose(", accepted");
+ for (i = 0; i < dev->feature_len; i++)
+ verbose(" %02x", get_feature_bits(dev)
+ [dev->feature_len+i]);
+
+ for (vq = dev->vq; vq; vq = vq->next) {
+ if (vq->service)
+ create_thread(vq);
+ }
+ dev->running = true;
+}
+
+static void cleanup_devices(void)
+{
+ struct device *dev;
+
+ for (dev = devices.dev; dev; dev = dev->next)
+ reset_device(dev);
+
+ /* If we saved off the original terminal settings, restore them now. */
+ if (orig_term.c_lflag & (ISIG|ICANON|ECHO))
+ tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
+}
+
+/* When the Guest tells us they updated the status field, we handle it. */
+static void update_device_status(struct device *dev)
+{
+ /* A zero status is a reset, otherwise it's a set of flags. */
+ if (dev->desc->status == 0)
+ reset_device(dev);
+ else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
+ warnx("Device %s configuration FAILED", dev->name);
+ if (dev->running)
+ reset_device(dev);
+ } else {
+ if (dev->running)
+ err(1, "Device %s features finalized twice", dev->name);
+ start_device(dev);
+ }
+}
+
+/*L:215
+ * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In
+ * particular, it's used to notify us of device status changes during boot.
+ */
+static void handle_output(unsigned long addr)
+{
+ struct device *i;
+
+ /* Check each device. */
+ for (i = devices.dev; i; i = i->next) {
+ struct virtqueue *vq;
+
+ /*
+ * Notifications to device descriptors mean they updated the
+ * device status.
+ */
+ if (from_guest_phys(addr) == i->desc) {
+ update_device_status(i);
+ return;
+ }
+
+ /* Devices should not be used before features are finalized. */
+ for (vq = i->vq; vq; vq = vq->next) {
+ if (addr != vq->config.pfn*getpagesize())
+ continue;
+ errx(1, "Notification on %s before setup!", i->name);
+ }
+ }
+
+ /*
+ * Early console write is done using notify on a nul-terminated string
+ * in Guest memory. It's also great for hacking debugging messages
+ * into a Guest.
+ */
+ if (addr >= guest_limit)
+ errx(1, "Bad NOTIFY %#lx", addr);
+
+ write(STDOUT_FILENO, from_guest_phys(addr),
+ strnlen(from_guest_phys(addr), guest_limit - addr));
+}
+
+/*L:190
+ * Device Setup
+ *
+ * All devices need a descriptor so the Guest knows it exists, and a "struct
+ * device" so the Launcher can keep track of it. We have common helper
+ * routines to allocate and manage them.
+ */
+
+/*
+ * The layout of the device page is a "struct lguest_device_desc" followed by a
+ * number of virtqueue descriptors, then two sets of feature bits, then an
+ * array of configuration bytes. This routine returns the configuration
+ * pointer.
+ */
+static u8 *device_config(const struct device *dev)
+{
+ return (void *)(dev->desc + 1)
+ + dev->num_vq * sizeof(struct lguest_vqconfig)
+ + dev->feature_len * 2;
+}
+
+/*
+ * This routine allocates a new "struct lguest_device_desc" from descriptor
+ * table page just above the Guest's normal memory. It returns a pointer to
+ * that descriptor.
+ */
+static struct lguest_device_desc *new_dev_desc(u16 type)
+{
+ struct lguest_device_desc d = { .type = type };
+ void *p;
+
+ /* Figure out where the next device config is, based on the last one. */
+ if (devices.lastdev)
+ p = device_config(devices.lastdev)
+ + devices.lastdev->desc->config_len;
+ else
+ p = devices.descpage;
+
+ /* We only have one page for all the descriptors. */
+ if (p + sizeof(d) > (void *)devices.descpage + getpagesize())
+ errx(1, "Too many devices");
+
+ /* p might not be aligned, so we memcpy in. */
+ return memcpy(p, &d, sizeof(d));
+}
+
+/*
+ * Each device descriptor is followed by the description of its virtqueues. We
+ * specify how many descriptors the virtqueue is to have.
+ */
+static void add_virtqueue(struct device *dev, unsigned int num_descs,
+ void (*service)(struct virtqueue *))
+{
+ unsigned int pages;
+ struct virtqueue **i, *vq = malloc(sizeof(*vq));
+ void *p;
+
+ /* First we need some memory for this virtqueue. */
+ pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1)
+ / getpagesize();
+ p = get_pages(pages);
+
+ /* Initialize the virtqueue */
+ vq->next = NULL;
+ vq->last_avail_idx = 0;
+ vq->dev = dev;
+
+ /*
+ * This is the routine the service thread will run, and its Process ID
+ * once it's running.
+ */
+ vq->service = service;
+ vq->thread = (pid_t)-1;
+
+ /* Initialize the configuration. */
+ vq->config.num = num_descs;
+ vq->config.irq = devices.next_irq++;
+ vq->config.pfn = to_guest_phys(p) / getpagesize();
+
+ /* Initialize the vring. */
+ vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN);
+
+ /*
+ * Append virtqueue to this device's descriptor. We use
+ * device_config() to get the end of the device's current virtqueues;
+ * we check that we haven't added any config or feature information
+ * yet, otherwise we'd be overwriting them.
+ */
+ assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
+ memcpy(device_config(dev), &vq->config, sizeof(vq->config));
+ dev->num_vq++;
+ dev->desc->num_vq++;
+
+ verbose("Virtqueue page %#lx\n", to_guest_phys(p));
+
+ /*
+ * Add to tail of list, so dev->vq is first vq, dev->vq->next is
+ * second.
+ */
+ for (i = &dev->vq; *i; i = &(*i)->next);
+ *i = vq;
+}
+
+/*
+ * The first half of the feature bitmask is for us to advertise features. The
+ * second half is for the Guest to accept features.
+ */
+static void add_feature(struct device *dev, unsigned bit)
+{
+ u8 *features = get_feature_bits(dev);
+
+ /* We can't extend the feature bits once we've added config bytes */
+ if (dev->desc->feature_len <= bit / CHAR_BIT) {
+ assert(dev->desc->config_len == 0);
+ dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1;
+ }
+
+ features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
+}
+
+/*
+ * This routine sets the configuration fields for an existing device's
+ * descriptor. It only works for the last device, but that's OK because that's
+ * how we use it.
+ */
+static void set_config(struct device *dev, unsigned len, const void *conf)
+{
+ /* Check we haven't overflowed our single page. */
+ if (device_config(dev) + len > devices.descpage + getpagesize())
+ errx(1, "Too many devices");
+
+ /* Copy in the config information, and store the length. */
+ memcpy(device_config(dev), conf, len);
+ dev->desc->config_len = len;
+
+ /* Size must fit in config_len field (8 bits)! */
+ assert(dev->desc->config_len == len);
+}
+
+/*
+ * This routine does all the creation and setup of a new device, including
+ * calling new_dev_desc() to allocate the descriptor and device memory. We
+ * don't actually start the service threads until later.
+ *
+ * See what I mean about userspace being boring?
+ */
+static struct device *new_device(const char *name, u16 type)
+{
+ struct device *dev = malloc(sizeof(*dev));
+
+ /* Now we populate the fields one at a time. */
+ dev->desc = new_dev_desc(type);
+ dev->name = name;
+ dev->vq = NULL;
+ dev->feature_len = 0;
+ dev->num_vq = 0;
+ dev->running = false;
+
+ /*
+ * Append to device list. Prepending to a single-linked list is
+ * easier, but the user expects the devices to be arranged on the bus
+ * in command-line order. The first network device on the command line
+ * is eth0, the first block device /dev/vda, etc.
+ */
+ if (devices.lastdev)
+ devices.lastdev->next = dev;
+ else
+ devices.dev = dev;
+ devices.lastdev = dev;
+
+ return dev;
+}
+
+/*
+ * Our first setup routine is the console. It's a fairly simple device, but
+ * UNIX tty handling makes it uglier than it could be.
+ */
+static void setup_console(void)
+{
+ struct device *dev;
+
+ /* If we can save the initial standard input settings... */
+ if (tcgetattr(STDIN_FILENO, &orig_term) == 0) {
+ struct termios term = orig_term;
+ /*
+ * Then we turn off echo, line buffering and ^C etc: We want a
+ * raw input stream to the Guest.
+ */
+ term.c_lflag &= ~(ISIG|ICANON|ECHO);
+ tcsetattr(STDIN_FILENO, TCSANOW, &term);
+ }
+
+ dev = new_device("console", VIRTIO_ID_CONSOLE);
+
+ /* We store the console state in dev->priv, and initialize it. */
+ dev->priv = malloc(sizeof(struct console_abort));
+ ((struct console_abort *)dev->priv)->count = 0;
+
+ /*
+ * The console needs two virtqueues: the input then the output. When
+ * they put something the input queue, we make sure we're listening to
+ * stdin. When they put something in the output queue, we write it to
+ * stdout.
+ */
+ add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
+ add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
+
+ verbose("device %u: console\n", ++devices.device_num);
+}
+/*:*/
+
+/*M:010
+ * Inter-guest networking is an interesting area. Simplest is to have a
+ * --sharenet=<name> option which opens or creates a named pipe. This can be
+ * used to send packets to another guest in a 1:1 manner.
+ *
+ * More sophisticated is to use one of the tools developed for project like UML
+ * to do networking.
+ *
+ * Faster is to do virtio bonding in kernel. Doing this 1:1 would be
+ * completely generic ("here's my vring, attach to your vring") and would work
+ * for any traffic. Of course, namespace and permissions issues need to be
+ * dealt with. A more sophisticated "multi-channel" virtio_net.c could hide
+ * multiple inter-guest channels behind one interface, although it would
+ * require some manner of hotplugging new virtio channels.
+ *
+ * Finally, we could use a virtio network switch in the kernel, ie. vhost.
+:*/
+
+static u32 str2ip(const char *ipaddr)
+{
+ unsigned int b[4];
+
+ if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4)
+ errx(1, "Failed to parse IP address '%s'", ipaddr);
+ return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+}
+
+static void str2mac(const char *macaddr, unsigned char mac[6])
+{
+ unsigned int m[6];
+ if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x",
+ &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6)
+ errx(1, "Failed to parse mac address '%s'", macaddr);
+ mac[0] = m[0];
+ mac[1] = m[1];
+ mac[2] = m[2];
+ mac[3] = m[3];
+ mac[4] = m[4];
+ mac[5] = m[5];
+}
+
+/*
+ * This code is "adapted" from libbridge: it attaches the Host end of the
+ * network device to the bridge device specified by the command line.
+ *
+ * This is yet another James Morris contribution (I'm an IP-level guy, so I
+ * dislike bridging), and I just try not to break it.
+ */
+static void add_to_bridge(int fd, const char *if_name, const char *br_name)
+{
+ int ifidx;
+ struct ifreq ifr;
+
+ if (!*br_name)
+ errx(1, "must specify bridge name");
+
+ ifidx = if_nametoindex(if_name);
+ if (!ifidx)
+ errx(1, "interface %s does not exist!", if_name);
+
+ strncpy(ifr.ifr_name, br_name, IFNAMSIZ);
+ ifr.ifr_name[IFNAMSIZ-1] = '\0';
+ ifr.ifr_ifindex = ifidx;
+ if (ioctl(fd, SIOCBRADDIF, &ifr) < 0)
+ err(1, "can't add %s to bridge %s", if_name, br_name);
+}
+
+/*
+ * This sets up the Host end of the network device with an IP address, brings
+ * it up so packets will flow, the copies the MAC address into the hwaddr
+ * pointer.
+ */
+static void configure_device(int fd, const char *tapif, u32 ipaddr)
+{
+ struct ifreq ifr;
+ struct sockaddr_in sin;
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, tapif);
+
+ /* Don't read these incantations. Just cut & paste them like I did! */
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(ipaddr);
+ memcpy(&ifr.ifr_addr, &sin, sizeof(sin));
+ if (ioctl(fd, SIOCSIFADDR, &ifr) != 0)
+ err(1, "Setting %s interface address", tapif);
+ ifr.ifr_flags = IFF_UP;
+ if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0)
+ err(1, "Bringing interface %s up", tapif);
+}
+
+static int get_tun_device(char tapif[IFNAMSIZ])
+{
+ struct ifreq ifr;
+ int netfd;
+
+ /* Start with this zeroed. Messy but sure. */
+ memset(&ifr, 0, sizeof(ifr));
+
+ /*
+ * We open the /dev/net/tun device and tell it we want a tap device. A
+ * tap device is like a tun device, only somehow different. To tell
+ * the truth, I completely blundered my way through this code, but it
+ * works now!
+ */
+ netfd = open_or_die("/dev/net/tun", O_RDWR);
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+ strcpy(ifr.ifr_name, "tap%d");
+ if (ioctl(netfd, TUNSETIFF, &ifr) != 0)
+ err(1, "configuring /dev/net/tun");
+
+ if (ioctl(netfd, TUNSETOFFLOAD,
+ TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0)
+ err(1, "Could not set features for tun device");
+
+ /*
+ * We don't need checksums calculated for packets coming in this
+ * device: trust us!
+ */
+ ioctl(netfd, TUNSETNOCSUM, 1);
+
+ memcpy(tapif, ifr.ifr_name, IFNAMSIZ);
+ return netfd;
+}
+
+/*L:195
+ * Our network is a Host<->Guest network. This can either use bridging or
+ * routing, but the principle is the same: it uses the "tun" device to inject
+ * packets into the Host as if they came in from a normal network card. We
+ * just shunt packets between the Guest and the tun device.
+ */
+static void setup_tun_net(char *arg)
+{
+ struct device *dev;
+ struct net_info *net_info = malloc(sizeof(*net_info));
+ int ipfd;
+ u32 ip = INADDR_ANY;
+ bool bridging = false;
+ char tapif[IFNAMSIZ], *p;
+ struct virtio_net_config conf;
+
+ net_info->tunfd = get_tun_device(tapif);
+
+ /* First we create a new network device. */
+ dev = new_device("net", VIRTIO_ID_NET);
+ dev->priv = net_info;
+
+ /* Network devices need a recv and a send queue, just like console. */
+ add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
+ add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
+
+ /*
+ * We need a socket to perform the magic network ioctls to bring up the
+ * tap interface, connect to the bridge etc. Any socket will do!
+ */
+ ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
+ if (ipfd < 0)
+ err(1, "opening IP socket");
+
+ /* If the command line was --tunnet=bridge:<name> do bridging. */
+ if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) {
+ arg += strlen(BRIDGE_PFX);
+ bridging = true;
+ }
+
+ /* A mac address may follow the bridge name or IP address */
+ p = strchr(arg, ':');
+ if (p) {
+ str2mac(p+1, conf.mac);
+ add_feature(dev, VIRTIO_NET_F_MAC);
+ *p = '\0';
+ }
+
+ /* arg is now either an IP address or a bridge name */
+ if (bridging)
+ add_to_bridge(ipfd, tapif, arg);
+ else
+ ip = str2ip(arg);
+
+ /* Set up the tun device. */
+ configure_device(ipfd, tapif, ip);
+
+ /* Expect Guest to handle everything except UFO */
+ add_feature(dev, VIRTIO_NET_F_CSUM);
+ add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
+ add_feature(dev, VIRTIO_NET_F_GUEST_TSO4);
+ add_feature(dev, VIRTIO_NET_F_GUEST_TSO6);
+ add_feature(dev, VIRTIO_NET_F_GUEST_ECN);
+ add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
+ add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
+ add_feature(dev, VIRTIO_NET_F_HOST_ECN);
+ /* We handle indirect ring entries */
+ add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
+ set_config(dev, sizeof(conf), &conf);
+
+ /* We don't need the socket any more; setup is done. */
+ close(ipfd);
+
+ devices.device_num++;
+
+ if (bridging)
+ verbose("device %u: tun %s attached to bridge: %s\n",
+ devices.device_num, tapif, arg);
+ else
+ verbose("device %u: tun %s: %s\n",
+ devices.device_num, tapif, arg);
+}
+/*:*/
+
+/* This hangs off device->priv. */
+struct vblk_info {
+ /* The size of the file. */
+ off64_t len;
+
+ /* The file descriptor for the file. */
+ int fd;
+
+};
+
+/*L:210
+ * The Disk
+ *
+ * The disk only has one virtqueue, so it only has one thread. It is really
+ * simple: the Guest asks for a block number and we read or write that position
+ * in the file.
+ *
+ * Before we serviced each virtqueue in a separate thread, that was unacceptably
+ * slow: the Guest waits until the read is finished before running anything
+ * else, even if it could have been doing useful work.
+ *
+ * We could have used async I/O, except it's reputed to suck so hard that
+ * characters actually go missing from your code when you try to use it.
+ */
+static void blk_request(struct virtqueue *vq)
+{
+ struct vblk_info *vblk = vq->dev->priv;
+ unsigned int head, out_num, in_num, wlen;
+ int ret;
+ u8 *in;
+ struct virtio_blk_outhdr *out;
+ struct iovec iov[vq->vring.num];
+ off64_t off;
+
+ /*
+ * Get the next request, where we normally wait. It triggers the
+ * interrupt to acknowledge previously serviced requests (if any).
+ */
+ head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
+
+ /*
+ * Every block request should contain at least one output buffer
+ * (detailing the location on disk and the type of request) and one
+ * input buffer (to hold the result).
+ */
+ if (out_num == 0 || in_num == 0)
+ errx(1, "Bad virtblk cmd %u out=%u in=%u",
+ head, out_num, in_num);
+
+ out = convert(&iov[0], struct virtio_blk_outhdr);
+ in = convert(&iov[out_num+in_num-1], u8);
+ /*
+ * For historical reasons, block operations are expressed in 512 byte
+ * "sectors".
+ */
+ off = out->sector * 512;
+
+ /*
+ * In general the virtio block driver is allowed to try SCSI commands.
+ * It'd be nice if we supported eject, for example, but we don't.
+ */
+ if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
+ fprintf(stderr, "Scsi commands unsupported\n");
+ *in = VIRTIO_BLK_S_UNSUPP;
+ wlen = sizeof(*in);
+ } else if (out->type & VIRTIO_BLK_T_OUT) {
+ /*
+ * Write
+ *
+ * Move to the right location in the block file. This can fail
+ * if they try to write past end.
+ */
+ if (lseek64(vblk->fd, off, SEEK_SET) != off)
+ err(1, "Bad seek to sector %llu", out->sector);
+
+ ret = writev(vblk->fd, iov+1, out_num-1);
+ verbose("WRITE to sector %llu: %i\n", out->sector, ret);
+
+ /*
+ * Grr... Now we know how long the descriptor they sent was, we
+ * make sure they didn't try to write over the end of the block
+ * file (possibly extending it).
+ */
+ if (ret > 0 && off + ret > vblk->len) {
+ /* Trim it back to the correct length */
+ ftruncate64(vblk->fd, vblk->len);
+ /* Die, bad Guest, die. */
+ errx(1, "Write past end %llu+%u", off, ret);
+ }
+
+ wlen = sizeof(*in);
+ *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
+ } else if (out->type & VIRTIO_BLK_T_FLUSH) {
+ /* Flush */
+ ret = fdatasync(vblk->fd);
+ verbose("FLUSH fdatasync: %i\n", ret);
+ wlen = sizeof(*in);
+ *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
+ } else {
+ /*
+ * Read
+ *
+ * Move to the right location in the block file. This can fail
+ * if they try to read past end.
+ */
+ if (lseek64(vblk->fd, off, SEEK_SET) != off)
+ err(1, "Bad seek to sector %llu", out->sector);
+
+ ret = readv(vblk->fd, iov+1, in_num-1);
+ verbose("READ from sector %llu: %i\n", out->sector, ret);
+ if (ret >= 0) {
+ wlen = sizeof(*in) + ret;
+ *in = VIRTIO_BLK_S_OK;
+ } else {
+ wlen = sizeof(*in);
+ *in = VIRTIO_BLK_S_IOERR;
+ }
+ }
+
+ /* Finished that request. */
+ add_used(vq, head, wlen);
+}
+
+/*L:198 This actually sets up a virtual block device. */
+static void setup_block_file(const char *filename)
+{
+ struct device *dev;
+ struct vblk_info *vblk;
+ struct virtio_blk_config conf;
+
+ /* Creat the device. */
+ dev = new_device("block", VIRTIO_ID_BLOCK);
+
+ /* The device has one virtqueue, where the Guest places requests. */
+ add_virtqueue(dev, VIRTQUEUE_NUM, blk_request);
+
+ /* Allocate the room for our own bookkeeping */
+ vblk = dev->priv = malloc(sizeof(*vblk));
+
+ /* First we open the file and store the length. */
+ vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE);
+ vblk->len = lseek64(vblk->fd, 0, SEEK_END);
+
+ /* We support FLUSH. */
+ add_feature(dev, VIRTIO_BLK_F_FLUSH);
+
+ /* Tell Guest how many sectors this device has. */
+ conf.capacity = cpu_to_le64(vblk->len / 512);
+
+ /*
+ * Tell Guest not to put in too many descriptors at once: two are used
+ * for the in and out elements.
+ */
+ add_feature(dev, VIRTIO_BLK_F_SEG_MAX);
+ conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2);
+
+ /* Don't try to put whole struct: we have 8 bit limit. */
+ set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf);
+
+ verbose("device %u: virtblock %llu sectors\n",
+ ++devices.device_num, le64_to_cpu(conf.capacity));
+}
+
+/*L:211
+ * Our random number generator device reads from /dev/random into the Guest's
+ * input buffers. The usual case is that the Guest doesn't want random numbers
+ * and so has no buffers although /dev/random is still readable, whereas
+ * console is the reverse.
+ *
+ * The same logic applies, however.
+ */
+struct rng_info {
+ int rfd;
+};
+
+static void rng_input(struct virtqueue *vq)
+{
+ int len;
+ unsigned int head, in_num, out_num, totlen = 0;
+ struct rng_info *rng_info = vq->dev->priv;
+ struct iovec iov[vq->vring.num];
+
+ /* First we need a buffer from the Guests's virtqueue. */
+ head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
+ if (out_num)
+ errx(1, "Output buffers in rng?");
+
+ /*
+ * Just like the console write, we loop to cover the whole iovec.
+ * In this case, short reads actually happen quite a bit.
+ */
+ while (!iov_empty(iov, in_num)) {
+ len = readv(rng_info->rfd, iov, in_num);
+ if (len <= 0)
+ err(1, "Read from /dev/random gave %i", len);
+ iov_consume(iov, in_num, len);
+ totlen += len;
+ }
+
+ /* Tell the Guest about the new input. */
+ add_used(vq, head, totlen);
+}
+
+/*L:199
+ * This creates a "hardware" random number device for the Guest.
+ */
+static void setup_rng(void)
+{
+ struct device *dev;
+ struct rng_info *rng_info = malloc(sizeof(*rng_info));
+
+ /* Our device's privat info simply contains the /dev/random fd. */
+ rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
+
+ /* Create the new device. */
+ dev = new_device("rng", VIRTIO_ID_RNG);
+ dev->priv = rng_info;
+
+ /* The device has one virtqueue, where the Guest places inbufs. */
+ add_virtqueue(dev, VIRTQUEUE_NUM, rng_input);
+
+ verbose("device %u: rng\n", devices.device_num++);
+}
+/* That's the end of device setup. */
+
+/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */
+static void __attribute__((noreturn)) restart_guest(void)
+{
+ unsigned int i;
+
+ /*
+ * Since we don't track all open fds, we simply close everything beyond
+ * stderr.
+ */
+ for (i = 3; i < FD_SETSIZE; i++)
+ close(i);
+
+ /* Reset all the devices (kills all threads). */
+ cleanup_devices();
+
+ execv(main_args[0], main_args);
+ err(1, "Could not exec %s", main_args[0]);
+}
+
+/*L:220
+ * Finally we reach the core of the Launcher which runs the Guest, serves
+ * its input and output, and finally, lays it to rest.
+ */
+static void __attribute__((noreturn)) run_guest(void)
+{
+ for (;;) {
+ unsigned long notify_addr;
+ int readval;
+
+ /* We read from the /dev/lguest device to run the Guest. */
+ readval = pread(lguest_fd, &notify_addr,
+ sizeof(notify_addr), cpu_id);
+
+ /* One unsigned long means the Guest did HCALL_NOTIFY */
+ if (readval == sizeof(notify_addr)) {
+ verbose("Notify on address %#lx\n", notify_addr);
+ handle_output(notify_addr);
+ /* ENOENT means the Guest died. Reading tells us why. */
+ } else if (errno == ENOENT) {
+ char reason[1024] = { 0 };
+ pread(lguest_fd, reason, sizeof(reason)-1, cpu_id);
+ errx(1, "%s", reason);
+ /* ERESTART means that we need to reboot the guest */
+ } else if (errno == ERESTART) {
+ restart_guest();
+ /* Anything else means a bug or incompatible change. */
+ } else
+ err(1, "Running guest failed");
+ }
+}
+/*L:240
+ * This is the end of the Launcher. The good news: we are over halfway
+ * through! The bad news: the most fiendish part of the code still lies ahead
+ * of us.
+ *
+ * Are you ready? Take a deep breath and join me in the core of the Host, in
+ * "make Host".
+:*/
+
+static struct option opts[] = {
+ { "verbose", 0, NULL, 'v' },
+ { "tunnet", 1, NULL, 't' },
+ { "block", 1, NULL, 'b' },
+ { "rng", 0, NULL, 'r' },
+ { "initrd", 1, NULL, 'i' },
+ { "username", 1, NULL, 'u' },
+ { "chroot", 1, NULL, 'c' },
+ { NULL },
+};
+static void usage(void)
+{
+ errx(1, "Usage: lguest [--verbose] "
+ "[--tunnet=(<ipaddr>:<macaddr>|bridge:<bridgename>:<macaddr>)\n"
+ "|--block=<filename>|--initrd=<filename>]...\n"
+ "<mem-in-mb> vmlinux [args...]");
+}
+
+/*L:105 The main routine is where the real work begins: */
+int main(int argc, char *argv[])
+{
+ /* Memory, code startpoint and size of the (optional) initrd. */
+ unsigned long mem = 0, start, initrd_size = 0;
+ /* Two temporaries. */
+ int i, c;
+ /* The boot information for the Guest. */
+ struct boot_params *boot;
+ /* If they specify an initrd file to load. */
+ const char *initrd_name = NULL;
+
+ /* Password structure for initgroups/setres[gu]id */
+ struct passwd *user_details = NULL;
+
+ /* Directory to chroot to */
+ char *chroot_path = NULL;
+
+ /* Save the args: we "reboot" by execing ourselves again. */
+ main_args = argv;
+
+ /*
+ * First we initialize the device list. We keep a pointer to the last
+ * device, and the next interrupt number to use for devices (1:
+ * remember that 0 is used by the timer).
+ */
+ devices.lastdev = NULL;
+ devices.next_irq = 1;
+
+ /* We're CPU 0. In fact, that's the only CPU possible right now. */
+ cpu_id = 0;
+
+ /*
+ * We need to know how much memory so we can set up the device
+ * descriptor and memory pages for the devices as we parse the command
+ * line. So we quickly look through the arguments to find the amount
+ * of memory now.
+ */
+ for (i = 1; i < argc; i++) {
+ if (argv[i][0] != '-') {
+ mem = atoi(argv[i]) * 1024 * 1024;
+ /*
+ * We start by mapping anonymous pages over all of
+ * guest-physical memory range. This fills it with 0,
+ * and ensures that the Guest won't be killed when it
+ * tries to access it.
+ */
+ guest_base = map_zeroed_pages(mem / getpagesize()
+ + DEVICE_PAGES);
+ guest_limit = mem;
+ guest_max = mem + DEVICE_PAGES*getpagesize();
+ devices.descpage = get_pages(1);
+ break;
+ }
+ }
+
+ /* The options are fairly straight-forward */
+ while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) {
+ switch (c) {
+ case 'v':
+ verbose = true;
+ break;
+ case 't':
+ setup_tun_net(optarg);
+ break;
+ case 'b':
+ setup_block_file(optarg);
+ break;
+ case 'r':
+ setup_rng();
+ break;
+ case 'i':
+ initrd_name = optarg;
+ break;
+ case 'u':
+ user_details = getpwnam(optarg);
+ if (!user_details)
+ err(1, "getpwnam failed, incorrect username?");
+ break;
+ case 'c':
+ chroot_path = optarg;
+ break;
+ default:
+ warnx("Unknown argument %s", argv[optind]);
+ usage();
+ }
+ }
+ /*
+ * After the other arguments we expect memory and kernel image name,
+ * followed by command line arguments for the kernel.
+ */
+ if (optind + 2 > argc)
+ usage();
+
+ verbose("Guest base is at %p\n", guest_base);
+
+ /* We always have a console device */
+ setup_console();
+
+ /* Now we load the kernel */
+ start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
+
+ /* Boot information is stashed at physical address 0 */
+ boot = from_guest_phys(0);
+
+ /* Map the initrd image if requested (at top of physical memory) */
+ if (initrd_name) {
+ initrd_size = load_initrd(initrd_name, mem);
+ /*
+ * These are the location in the Linux boot header where the
+ * start and size of the initrd are expected to be found.
+ */
+ boot->hdr.ramdisk_image = mem - initrd_size;
+ boot->hdr.ramdisk_size = initrd_size;
+ /* The bootloader type 0xFF means "unknown"; that's OK. */
+ boot->hdr.type_of_loader = 0xFF;
+ }
+
+ /*
+ * The Linux boot header contains an "E820" memory map: ours is a
+ * simple, single region.
+ */
+ boot->e820_entries = 1;
+ boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM });
+ /*
+ * The boot header contains a command line pointer: we put the command
+ * line after the boot header.
+ */
+ boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1);
+ /* We use a simple helper to copy the arguments separated by spaces. */
+ concat((char *)(boot + 1), argv+optind+2);
+
+ /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */
+ boot->hdr.kernel_alignment = 0x1000000;
+
+ /* Boot protocol version: 2.07 supports the fields for lguest. */
+ boot->hdr.version = 0x207;
+
+ /* The hardware_subarch value of "1" tells the Guest it's an lguest. */
+ boot->hdr.hardware_subarch = 1;
+
+ /* Tell the entry path not to try to reload segment registers. */
+ boot->hdr.loadflags |= KEEP_SEGMENTS;
+
+ /* We tell the kernel to initialize the Guest. */
+ tell_kernel(start);
+
+ /* Ensure that we terminate if a device-servicing child dies. */
+ signal(SIGCHLD, kill_launcher);
+
+ /* If we exit via err(), this kills all the threads, restores tty. */
+ atexit(cleanup_devices);
+
+ /* If requested, chroot to a directory */
+ if (chroot_path) {
+ if (chroot(chroot_path) != 0)
+ err(1, "chroot(\"%s\") failed", chroot_path);
+
+ if (chdir("/") != 0)
+ err(1, "chdir(\"/\") failed");
+
+ verbose("chroot done\n");
+ }
+
+ /* If requested, drop privileges */
+ if (user_details) {
+ uid_t u;
+ gid_t g;
+
+ u = user_details->pw_uid;
+ g = user_details->pw_gid;
+
+ if (initgroups(user_details->pw_name, g) != 0)
+ err(1, "initgroups failed");
+
+ if (setresgid(g, g, g) != 0)
+ err(1, "setresgid failed");
+
+ if (setresuid(u, u, u) != 0)
+ err(1, "setresuid failed");
+
+ verbose("Dropping privileges completed\n");
+ }
+
+ /* Finally, run the Guest. This doesn't return. */
+ run_guest();
+}
+/*:*/
+
+/*M:999
+ * Mastery is done: you now know everything I do.
+ *
+ * But surely you have seen code, features and bugs in your wanderings which
+ * you now yearn to attack? That is the real game, and I look forward to you
+ * patching and forking lguest into the Your-Name-Here-visor.
+ *
+ * Farewell, and good coding!
+ * Rusty Russell.
+ */
diff --git a/tools/lguest/lguest.txt b/tools/lguest/lguest.txt
new file mode 100644
index 00000000000..bff0c554485
--- /dev/null
+++ b/tools/lguest/lguest.txt
@@ -0,0 +1,129 @@
+ __
+ (___()'`; Rusty's Remarkably Unreliable Guide to Lguest
+ /, /` - or, A Young Coder's Illustrated Hypervisor
+ \\"--\\ http://lguest.ozlabs.org
+
+Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel,
+for Linux developers and users to experiment with virtualization with the
+minimum of complexity. Nonetheless, it should have sufficient features to
+make it useful for specific tasks, and, of course, you are encouraged to fork
+and enhance it (see drivers/lguest/README).
+
+Features:
+
+- Kernel module which runs in a normal kernel.
+- Simple I/O model for communication.
+- Simple program to create new guests.
+- Logo contains cute puppies: http://lguest.ozlabs.org
+
+Developer features:
+
+- Fun to hack on.
+- No ABI: being tied to a specific kernel anyway, you can change anything.
+- Many opportunities for improvement or feature implementation.
+
+Running Lguest:
+
+- The easiest way to run lguest is to use same kernel as guest and host.
+ You can configure them differently, but usually it's easiest not to.
+
+ You will need to configure your kernel with the following options:
+
+ "General setup":
+ "Prompt for development and/or incomplete code/drivers" = Y
+ (CONFIG_EXPERIMENTAL=y)
+
+ "Processor type and features":
+ "Paravirtualized guest support" = Y
+ "Lguest guest support" = Y
+ "High Memory Support" = off/4GB
+ "Alignment value to which kernel should be aligned" = 0x100000
+ (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
+ CONFIG_PHYSICAL_ALIGN=0x100000)
+
+ "Device Drivers":
+ "Block devices"
+ "Virtio block driver (EXPERIMENTAL)" = M/Y
+ "Network device support"
+ "Universal TUN/TAP device driver support" = M/Y
+ "Virtio network driver (EXPERIMENTAL)" = M/Y
+ (CONFIG_VIRTIO_BLK=m, CONFIG_VIRTIO_NET=m and CONFIG_TUN=m)
+
+ "Virtualization"
+ "Linux hypervisor example code" = M/Y
+ (CONFIG_LGUEST=m)
+
+- A tool called "lguest" is available in this directory: type "make"
+ to build it. If you didn't build your kernel in-tree, use "make
+ O=<builddir>".
+
+- Create or find a root disk image. There are several useful ones
+ around, such as the xm-test tiny root image at
+ http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img
+
+ For more serious work, I usually use a distribution ISO image and
+ install it under qemu, then make multiple copies:
+
+ dd if=/dev/zero of=rootfile bs=1M count=2048
+ qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d
+
+ Make sure that you install a getty on /dev/hvc0 if you want to log in on the
+ console!
+
+- "modprobe lg" if you built it as a module.
+
+- Run an lguest as root:
+
+ Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \
+ --block=rootfile root=/dev/vda
+
+ Explanation:
+ 64: the amount of memory to use, in MB.
+
+ vmlinux: the kernel image found in the top of your build directory. You
+ can also use a standard bzImage.
+
+ --tunnet=192.168.19.1: configures a "tap" device for networking with this
+ IP address.
+
+ --block=rootfile: a file or block device which becomes /dev/vda
+ inside the guest.
+
+ root=/dev/vda: this (and anything else on the command line) are
+ kernel boot parameters.
+
+- Configuring networking. I usually have the host masquerade, using
+ "iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 >
+ /proc/sys/net/ipv4/ip_forward". In this example, I would configure
+ eth0 inside the guest at 192.168.19.2.
+
+ Another method is to bridge the tap device to an external interface
+ using --tunnet=bridge:<bridgename>, and perhaps run dhcp on the guest
+ to obtain an IP address. The bridge needs to be configured first:
+ this option simply adds the tap interface to it.
+
+ A simple example on my system:
+
+ ifconfig eth0 0.0.0.0
+ brctl addbr lg0
+ ifconfig lg0 up
+ brctl addif lg0 eth0
+ dhclient lg0
+
+ Then use --tunnet=bridge:lg0 when launching the guest.
+
+ See:
+
+ http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge
+
+ for general information on how to get bridging to work.
+
+- Random number generation. Using the --rng option will provide a
+ /dev/hwrng in the guest that will read from the host's /dev/random.
+ Use this option in conjunction with rng-tools (see ../hw_random.txt)
+ to provide entropy to the guest kernel's /dev/random.
+
+There is a helpful mailing list at http://ozlabs.org/mailman/listinfo/lguest
+
+Good luck!
+Rusty Russell rusty@rustcorp.com.au.
diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt
index 8eb6c489fb1..77f95276242 100644
--- a/tools/perf/Documentation/examples.txt
+++ b/tools/perf/Documentation/examples.txt
@@ -17,8 +17,8 @@ titan:~> perf list
kmem:kmem_cache_alloc_node [Tracepoint event]
kmem:kfree [Tracepoint event]
kmem:kmem_cache_free [Tracepoint event]
- kmem:mm_page_free_direct [Tracepoint event]
- kmem:mm_pagevec_free [Tracepoint event]
+ kmem:mm_page_free [Tracepoint event]
+ kmem:mm_page_free_batched [Tracepoint event]
kmem:mm_page_alloc [Tracepoint event]
kmem:mm_page_alloc_zone_locked [Tracepoint event]
kmem:mm_page_pcpu_drain [Tracepoint event]
@@ -29,15 +29,15 @@ measured. For example the page alloc/free properties of a 'hackbench
run' are:
titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
- -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10
+ -e kmem:mm_page_free_batched -e kmem:mm_page_free ./hackbench 10
Time: 0.575
Performance counter stats for './hackbench 10':
13857 kmem:mm_page_pcpu_drain
27576 kmem:mm_page_alloc
- 6025 kmem:mm_pagevec_free
- 20934 kmem:mm_page_free_direct
+ 6025 kmem:mm_page_free_batched
+ 20934 kmem:mm_page_free
0.613972165 seconds time elapsed
@@ -45,8 +45,8 @@ You can observe the statistical properties as well, by using the
'repeat the workload N times' feature of perf stat:
titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
- kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
- kmem:mm_page_free_direct ./hackbench 10
+ kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+ kmem:mm_page_free ./hackbench 10
Time: 0.627
Time: 0.644
Time: 0.564
@@ -57,8 +57,8 @@ You can observe the statistical properties as well, by using the
12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
25035 kmem:mm_page_alloc ( +- 3.783% )
- 6104 kmem:mm_pagevec_free ( +- 0.934% )
- 18376 kmem:mm_page_free_direct ( +- 4.941% )
+ 6104 kmem:mm_page_free_batched ( +- 0.934% )
+ 18376 kmem:mm_page_free ( +- 4.941% )
0.643954516 seconds time elapsed ( +- 2.363% )
@@ -158,15 +158,15 @@ Or you can observe the whole system's page allocations for 10
seconds:
titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
-kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
-kmem:mm_page_free_direct sleep 10
+kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+kmem:mm_page_free sleep 10
Performance counter stats for 'sleep 10':
171585 kmem:mm_page_pcpu_drain
322114 kmem:mm_page_alloc
- 73623 kmem:mm_pagevec_free
- 254115 kmem:mm_page_free_direct
+ 73623 kmem:mm_page_free_batched
+ 254115 kmem:mm_page_free
10.000591410 seconds time elapsed
@@ -174,15 +174,15 @@ Or observe how fluctuating the page allocations are, via statistical
analysis done over ten 1-second intervals:
titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
- kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
- kmem:mm_page_free_direct sleep 1
+ kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+ kmem:mm_page_free sleep 1
Performance counter stats for 'sleep 1' (10 runs):
17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
34394 kmem:mm_page_alloc ( +- 4.617% )
- 7509 kmem:mm_pagevec_free ( +- 4.820% )
- 25653 kmem:mm_page_free_direct ( +- 3.672% )
+ 7509 kmem:mm_page_free_batched ( +- 4.820% )
+ 25653 kmem:mm_page_free ( +- 3.672% )
1.058135029 seconds time elapsed ( +- 3.089% )
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index fe6762ed56b..c89f9e1453f 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -22,7 +22,7 @@ OPTIONS
-------
-i::
--input=::
- Input file name. (default: perf.data)
+ Input file name. (default: perf.data unless stdin is a fifo)
-d::
--dsos=<dso[,dso...]>::
@@ -66,7 +66,7 @@ OPTIONS
used. This interfaces starts by centering on the line with more
samples, TAB/UNTAB cycles through the lines with more samples.
--c::
+-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
index cc22325ffd1..25c52efcc7f 100644
--- a/tools/perf/Documentation/perf-buildid-list.txt
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -26,7 +26,7 @@ OPTIONS
Show only DSOs with hits.
-i::
--input=::
- Input file name. (default: perf.data)
+ Input file name. (default: perf.data unless stdin is a fifo)
-f::
--force::
Don't do ownership validation.
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index 0cada9e053d..0507ec7bad7 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -18,7 +18,7 @@ OPTIONS
-------
-i::
--input=::
- Input file name. (default: perf.data)
+ Input file name. (default: perf.data unless stdin is a fifo)
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index a52fcde894c..7c8fbbf3f61 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -23,7 +23,7 @@ OPTIONS
-------
-i <file>::
--input=<file>::
- Select the input file (default: perf.data)
+ Select the input file (default: perf.data unless stdin is a fifo)
--caller::
Show per-callsite statistics
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 4a26a2f3a6a..d6b2a4f2108 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -29,7 +29,7 @@ COMMON OPTIONS
-i::
--input=<file>::
- Input file name.
+ Input file name. (default: perf.data unless stdin is a fifo)
-v::
--verbose::
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 5a520f82529..2937f7e14bb 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -89,7 +89,7 @@ OPTIONS
-m::
--mmap-pages=::
- Number of mmap data pages.
+ Number of mmap data pages. Must be a power of two.
-g::
--call-graph::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 212f24d672e..9b430e98712 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -19,7 +19,7 @@ OPTIONS
-------
-i::
--input=::
- Input file name. (default: perf.data)
+ Input file name. (default: perf.data unless stdin is a fifo)
-v::
--verbose::
@@ -39,7 +39,7 @@ OPTIONS
-T::
--threads::
Show per-thread event counters
--C::
+-c::
--comms=::
Only consider symbols in these comms. CSV that understands
file://filename entries.
@@ -80,9 +80,10 @@ OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.
--g [type,min,order]::
+-g [type,min[,limit],order]::
--call-graph::
- Display call chains using type, min percent threshold and order.
+ Display call chains using type, min percent threshold, optional print
+ limit and order.
type can be either:
- flat: single column, linear exposure of call chains.
- graph: use a graph tree, displaying absolute overhead rates.
@@ -128,7 +129,7 @@ OPTIONS
--symfs=<directory>::
Look for files with symbols relative to this directory.
--c::
+-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 5b212b57f70..8ff4df95695 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -40,7 +40,7 @@ OPTIONS
-------
-i::
--input=<file>::
- Input file name. (default: perf.data)
+ Input file name. (default: perf.data unless stdin is a fifo)
-v::
--verbose::
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index dec87ecb530..2f6cef43da2 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -106,7 +106,7 @@ OPTIONS
-i::
--input=::
- Input file name.
+ Input file name. (default: perf.data unless stdin is a fifo)
-d::
--debug-mode::
@@ -182,12 +182,17 @@ OPTIONS
--hide-call-graph::
When printing symbols do not display call chain.
--c::
+-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
CPUs are specified with -: 0-2. Default is to report samples on all
CPUs.
+-c::
+--comms=::
+ Only display events for these comms. CSV that understands
+ file://filename entries.
+
-I::
--show-info::
Display extended information about the perf.data file. This adds
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index 2c3b462f64b..b24ac40fcd5 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -8,13 +8,19 @@ perf-test - Runs sanity tests.
SYNOPSIS
--------
[verse]
-'perf test <options>'
+'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]'
DESCRIPTION
-----------
This command does assorted sanity tests, initially through linked routines but
also will look for a directory with more tests in the form of scripts.
+To get a list of available tests use 'perf test list', specifying a test name
+fragment will show all tests that have it.
+
+To run just specific tests, inform test name fragments or the numbers obtained
+from 'perf test list'.
+
OPTIONS
-------
-v::
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index d7b79e2ba2a..1632b0efc75 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -27,7 +27,7 @@ OPTIONS
Select the output file (default: output.svg)
-i::
--input=::
- Select the input file (default: perf.data)
+ Select the input file (default: perf.data unless stdin is a fifo)
-w::
--width=::
Select the width of the SVG file (default: 1000)
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index b98e3075646..ac86d67b636 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -278,6 +278,7 @@ LIB_H += util/strbuf.h
LIB_H += util/strlist.h
LIB_H += util/strfilter.h
LIB_H += util/svghelper.h
+LIB_H += util/tool.h
LIB_H += util/run-command.h
LIB_H += util/sigchain.h
LIB_H += util/symbol.h
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
index 48ae0c5e3f7..7cdd61d0e27 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -9,7 +9,10 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <stdlib.h>
+#ifndef __UCLIBC__
#include <libio.h>
+#endif
#include <dwarf-regs.h>
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 46b4c24f338..214ba7f9f57 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -27,32 +27,32 @@
#include "util/sort.h"
#include "util/hist.h"
#include "util/session.h"
+#include "util/tool.h"
#include <linux/bitmap.h>
-static char const *input_name = "perf.data";
-
-static bool force, use_tui, use_stdio;
-
-static bool full_paths;
-
-static bool print_line;
-
-static const char *sym_hist_filter;
-
-static const char *cpu_list;
-static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+struct perf_annotate {
+ struct perf_tool tool;
+ char const *input_name;
+ bool force, use_tui, use_stdio;
+ bool full_paths;
+ bool print_line;
+ const char *sym_hist_filter;
+ const char *cpu_list;
+ DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+};
-static int perf_evlist__add_sample(struct perf_evlist *evlist,
- struct perf_sample *sample,
- struct perf_evsel *evsel,
- struct addr_location *al)
+static int perf_evsel__add_sample(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct addr_location *al,
+ struct perf_annotate *ann)
{
struct hist_entry *he;
int ret;
- if (sym_hist_filter != NULL &&
- (al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) {
+ if (ann->sym_hist_filter != NULL &&
+ (al->sym == NULL ||
+ strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
/* We're only interested in a symbol named sym_hist_filter */
if (al->sym != NULL) {
rb_erase(&al->sym->rb_node,
@@ -69,8 +69,7 @@ static int perf_evlist__add_sample(struct perf_evlist *evlist,
ret = 0;
if (he->ms.sym != NULL) {
struct annotation *notes = symbol__annotation(he->ms.sym);
- if (notes->src == NULL &&
- symbol__alloc_hist(he->ms.sym, evlist->nr_entries) < 0)
+ if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
return -ENOMEM;
ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
@@ -81,25 +80,26 @@ static int perf_evlist__add_sample(struct perf_evlist *evlist,
return ret;
}
-static int process_sample_event(union perf_event *event,
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
- struct perf_session *session)
+ struct machine *machine)
{
+ struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
struct addr_location al;
- if (perf_event__preprocess_sample(event, session, &al, sample,
+ if (perf_event__preprocess_sample(event, machine, &al, sample,
symbol__annotate_init) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
}
- if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+ if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
return 0;
- if (!al.filtered &&
- perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
+ if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
pr_warning("problem incrementing symbol count, "
"skipping event\n");
return -1;
@@ -108,14 +108,15 @@ static int process_sample_event(union perf_event *event,
return 0;
}
-static int hist_entry__tty_annotate(struct hist_entry *he, int evidx)
+static int hist_entry__tty_annotate(struct hist_entry *he, int evidx,
+ struct perf_annotate *ann)
{
return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx,
- print_line, full_paths, 0, 0);
+ ann->print_line, ann->full_paths, 0, 0);
}
static void hists__find_annotations(struct hists *self, int evidx,
- int nr_events)
+ struct perf_annotate *ann)
{
struct rb_node *nd = rb_first(&self->entries), *next;
int key = K_RIGHT;
@@ -138,8 +139,7 @@ find_next:
}
if (use_browser > 0) {
- key = hist_entry__tui_annotate(he, evidx, nr_events,
- NULL, NULL, 0);
+ key = hist_entry__tui_annotate(he, evidx, NULL, NULL, 0);
switch (key) {
case K_RIGHT:
next = rb_next(nd);
@@ -154,7 +154,7 @@ find_next:
if (next != NULL)
nd = next;
} else {
- hist_entry__tty_annotate(he, evidx);
+ hist_entry__tty_annotate(he, evidx, ann);
nd = rb_next(nd);
/*
* Since we have a hist_entry per IP for the same
@@ -167,33 +167,26 @@ find_next:
}
}
-static struct perf_event_ops event_ops = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .comm = perf_event__process_comm,
- .fork = perf_event__process_task,
- .ordered_samples = true,
- .ordering_requires_timestamps = true,
-};
-
-static int __cmd_annotate(void)
+static int __cmd_annotate(struct perf_annotate *ann)
{
int ret;
struct perf_session *session;
struct perf_evsel *pos;
u64 total_nr_samples;
- session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
+ session = perf_session__new(ann->input_name, O_RDONLY,
+ ann->force, false, &ann->tool);
if (session == NULL)
return -ENOMEM;
- if (cpu_list) {
- ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+ if (ann->cpu_list) {
+ ret = perf_session__cpu_bitmap(session, ann->cpu_list,
+ ann->cpu_bitmap);
if (ret)
goto out_delete;
}
- ret = perf_session__process_events(session, &event_ops);
+ ret = perf_session__process_events(session, &ann->tool);
if (ret)
goto out_delete;
@@ -217,13 +210,12 @@ static int __cmd_annotate(void)
total_nr_samples += nr_samples;
hists__collapse_resort(hists);
hists__output_resort(hists);
- hists__find_annotations(hists, pos->idx,
- session->evlist->nr_entries);
+ hists__find_annotations(hists, pos->idx, ann);
}
}
if (total_nr_samples == 0) {
- ui__warning("The %s file has no samples!\n", input_name);
+ ui__warning("The %s file has no samples!\n", session->filename);
goto out_delete;
}
out_delete:
@@ -247,29 +239,41 @@ static const char * const annotate_usage[] = {
NULL
};
-static const struct option options[] = {
- OPT_STRING('i', "input", &input_name, "file",
+int cmd_annotate(int argc, const char **argv, const char *prefix __used)
+{
+ struct perf_annotate annotate = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .comm = perf_event__process_comm,
+ .fork = perf_event__process_task,
+ .ordered_samples = true,
+ .ordering_requires_timestamps = true,
+ },
+ };
+ const struct option options[] = {
+ OPT_STRING('i', "input", &annotate.input_name, "file",
"input file name"),
OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
"only consider symbols in these dsos"),
- OPT_STRING('s', "symbol", &sym_hist_filter, "symbol",
+ OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
"symbol to annotate"),
- OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+ OPT_BOOLEAN('f', "force", &annotate.force, "don't complain, do it"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
- OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
- OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
+ OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
+ OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
- OPT_BOOLEAN('l', "print-line", &print_line,
+ OPT_BOOLEAN('l', "print-line", &annotate.print_line,
"print matching source lines (may be slow)"),
- OPT_BOOLEAN('P', "full-paths", &full_paths,
+ OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
"Don't shorten the displayed pathnames"),
- OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
+ OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
@@ -279,15 +283,13 @@ static const struct option options[] = {
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_END()
-};
+ };
-int cmd_annotate(int argc, const char **argv, const char *prefix __used)
-{
argc = parse_options(argc, argv, options, annotate_usage, 0);
- if (use_stdio)
+ if (annotate.use_stdio)
use_browser = 0;
- else if (use_tui)
+ else if (annotate.use_tui)
use_browser = 1;
setup_browser(true);
@@ -308,7 +310,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
if (argc > 1)
usage_with_options(annotate_usage, options);
- sym_hist_filter = argv[0];
+ annotate.sym_hist_filter = argv[0];
}
if (field_sep && *field_sep == '.') {
@@ -316,5 +318,5 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
return -1;
}
- return __cmd_annotate();
+ return __cmd_annotate(&annotate);
}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index cb690a65bf0..52480467e9f 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -18,7 +18,7 @@
#include <libelf.h>
-static char const *input_name = "perf.data";
+static const char *input_name;
static bool force;
static bool show_kernel;
static bool with_hits;
@@ -39,24 +39,6 @@ static const struct option options[] = {
OPT_END()
};
-static int perf_session__list_build_ids(void)
-{
- struct perf_session *session;
-
- session = perf_session__new(input_name, O_RDONLY, force, false,
- &build_id__mark_dso_hit_ops);
- if (session == NULL)
- return -1;
-
- if (with_hits)
- perf_session__process_events(session, &build_id__mark_dso_hit_ops);
-
- perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
-
- perf_session__delete(session);
- return 0;
-}
-
static int sysfs__fprintf_build_id(FILE *fp)
{
u8 kallsyms_build_id[BUILD_ID_SIZE];
@@ -85,17 +67,36 @@ static int filename__fprintf_build_id(const char *name, FILE *fp)
return fprintf(fp, "%s\n", sbuild_id);
}
-static int __cmd_buildid_list(void)
+static int perf_session__list_build_ids(void)
{
- if (show_kernel)
- return sysfs__fprintf_build_id(stdout);
+ struct perf_session *session;
elf_version(EV_CURRENT);
+
+ session = perf_session__new(input_name, O_RDONLY, force, false,
+ &build_id__mark_dso_hit_ops);
+ if (session == NULL)
+ return -1;
+
/*
- * See if this is an ELF file first:
- */
- if (filename__fprintf_build_id(input_name, stdout))
- return 0;
+ * See if this is an ELF file first:
+ */
+ if (filename__fprintf_build_id(session->filename, stdout))
+ goto out;
+
+ if (with_hits)
+ perf_session__process_events(session, &build_id__mark_dso_hit_ops);
+
+ perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
+out:
+ perf_session__delete(session);
+ return 0;
+}
+
+static int __cmd_buildid_list(void)
+{
+ if (show_kernel)
+ return sysfs__fprintf_build_id(stdout);
return perf_session__list_build_ids();
}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index b39f3a1ee7d..4f19513d7dd 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -9,7 +9,9 @@
#include "util/debug.h"
#include "util/event.h"
#include "util/hist.h"
+#include "util/evsel.h"
#include "util/session.h"
+#include "util/tool.h"
#include "util/sort.h"
#include "util/symbol.h"
#include "util/util.h"
@@ -30,14 +32,15 @@ static int hists__add_entry(struct hists *self,
return -ENOMEM;
}
-static int diff__process_sample_event(union perf_event *event,
+static int diff__process_sample_event(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __used,
- struct perf_session *session)
+ struct machine *machine)
{
struct addr_location al;
- if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
+ if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -46,16 +49,16 @@ static int diff__process_sample_event(union perf_event *event,
if (al.filtered || al.sym == NULL)
return 0;
- if (hists__add_entry(&session->hists, &al, sample->period)) {
+ if (hists__add_entry(&evsel->hists, &al, sample->period)) {
pr_warning("problem incrementing symbol period, skipping event\n");
return -1;
}
- session->hists.stats.total_period += sample->period;
+ evsel->hists.stats.total_period += sample->period;
return 0;
}
-static struct perf_event_ops event_ops = {
+static struct perf_tool perf_diff = {
.sample = diff__process_sample_event,
.mmap = perf_event__process_mmap,
.comm = perf_event__process_comm,
@@ -145,13 +148,13 @@ static int __cmd_diff(void)
int ret, i;
struct perf_session *session[2];
- session[0] = perf_session__new(input_old, O_RDONLY, force, false, &event_ops);
- session[1] = perf_session__new(input_new, O_RDONLY, force, false, &event_ops);
+ session[0] = perf_session__new(input_old, O_RDONLY, force, false, &perf_diff);
+ session[1] = perf_session__new(input_new, O_RDONLY, force, false, &perf_diff);
if (session[0] == NULL || session[1] == NULL)
return -ENOMEM;
for (i = 0; i < 2; ++i) {
- ret = perf_session__process_events(session[i], &event_ops);
+ ret = perf_session__process_events(session[i], &perf_diff);
if (ret)
goto out_delete;
}
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 4c5e9e04a41..26760322c4f 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -15,7 +15,7 @@
#include "util/parse-options.h"
#include "util/session.h"
-static char const *input_name = "perf.data";
+static const char *input_name;
static int __cmd_evlist(void)
{
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 8dfc12bb119..09c106193e6 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -9,6 +9,7 @@
#include "perf.h"
#include "util/session.h"
+#include "util/tool.h"
#include "util/debug.h"
#include "util/parse-options.h"
@@ -16,8 +17,9 @@
static char const *input_name = "-";
static bool inject_build_ids;
-static int perf_event__repipe_synth(union perf_event *event,
- struct perf_session *session __used)
+static int perf_event__repipe_synth(struct perf_tool *tool __used,
+ union perf_event *event,
+ struct machine *machine __used)
{
uint32_t size;
void *buf = event;
@@ -36,41 +38,70 @@ static int perf_event__repipe_synth(union perf_event *event,
return 0;
}
-static int perf_event__repipe(union perf_event *event,
+static int perf_event__repipe_op2_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __used)
+{
+ return perf_event__repipe_synth(tool, event, NULL);
+}
+
+static int perf_event__repipe_event_type_synth(struct perf_tool *tool,
+ union perf_event *event)
+{
+ return perf_event__repipe_synth(tool, event, NULL);
+}
+
+static int perf_event__repipe_tracing_data_synth(union perf_event *event,
+ struct perf_session *session __used)
+{
+ return perf_event__repipe_synth(NULL, event, NULL);
+}
+
+static int perf_event__repipe_attr(union perf_event *event,
+ struct perf_evlist **pevlist __used)
+{
+ return perf_event__repipe_synth(NULL, event, NULL);
+}
+
+static int perf_event__repipe(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session)
+ struct machine *machine)
{
- return perf_event__repipe_synth(event, session);
+ return perf_event__repipe_synth(tool, event, machine);
}
-static int perf_event__repipe_sample(union perf_event *event,
+static int perf_event__repipe_sample(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample __used,
struct perf_evsel *evsel __used,
- struct perf_session *session)
+ struct machine *machine)
{
- return perf_event__repipe_synth(event, session);
+ return perf_event__repipe_synth(tool, event, machine);
}
-static int perf_event__repipe_mmap(union perf_event *event,
+static int perf_event__repipe_mmap(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample,
- struct perf_session *session)
+ struct machine *machine)
{
int err;
- err = perf_event__process_mmap(event, sample, session);
- perf_event__repipe(event, sample, session);
+ err = perf_event__process_mmap(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
return err;
}
-static int perf_event__repipe_task(union perf_event *event,
+static int perf_event__repipe_task(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample,
- struct perf_session *session)
+ struct machine *machine)
{
int err;
- err = perf_event__process_task(event, sample, session);
- perf_event__repipe(event, sample, session);
+ err = perf_event__process_task(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
return err;
}
@@ -80,7 +111,7 @@ static int perf_event__repipe_tracing_data(union perf_event *event,
{
int err;
- perf_event__repipe_synth(event, session);
+ perf_event__repipe_synth(NULL, event, NULL);
err = perf_event__process_tracing_data(event, session);
return err;
@@ -100,10 +131,10 @@ static int dso__read_build_id(struct dso *self)
return -1;
}
-static int dso__inject_build_id(struct dso *self, struct perf_session *session)
+static int dso__inject_build_id(struct dso *self, struct perf_tool *tool,
+ struct machine *machine)
{
u16 misc = PERF_RECORD_MISC_USER;
- struct machine *machine;
int err;
if (dso__read_build_id(self) < 0) {
@@ -111,17 +142,11 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
return -1;
}
- machine = perf_session__find_host_machine(session);
- if (machine == NULL) {
- pr_err("Can't find machine for session\n");
- return -1;
- }
-
if (self->kernel)
misc = PERF_RECORD_MISC_KERNEL;
- err = perf_event__synthesize_build_id(self, misc, perf_event__repipe,
- machine, session);
+ err = perf_event__synthesize_build_id(tool, self, misc, perf_event__repipe,
+ machine);
if (err) {
pr_err("Can't synthesize build_id event for %s\n", self->long_name);
return -1;
@@ -130,10 +155,11 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
return 0;
}
-static int perf_event__inject_buildid(union perf_event *event,
+static int perf_event__inject_buildid(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __used,
- struct perf_session *session)
+ struct machine *machine)
{
struct addr_location al;
struct thread *thread;
@@ -141,21 +167,21 @@ static int perf_event__inject_buildid(union perf_event *event,
cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- thread = perf_session__findnew(session, event->ip.pid);
+ thread = machine__findnew_thread(machine, event->ip.pid);
if (thread == NULL) {
pr_err("problem processing %d event, skipping it.\n",
event->header.type);
goto repipe;
}
- thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
- event->ip.pid, event->ip.ip, &al);
+ thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
+ event->ip.ip, &al);
if (al.map != NULL) {
if (!al.map->dso->hit) {
al.map->dso->hit = 1;
if (map__load(al.map, NULL) >= 0) {
- dso__inject_build_id(al.map->dso, session);
+ dso__inject_build_id(al.map->dso, tool, machine);
/*
* If this fails, too bad, let the other side
* account this as unresolved.
@@ -168,24 +194,24 @@ static int perf_event__inject_buildid(union perf_event *event,
}
repipe:
- perf_event__repipe(event, sample, session);
+ perf_event__repipe(tool, event, sample, machine);
return 0;
}
-struct perf_event_ops inject_ops = {
+struct perf_tool perf_inject = {
.sample = perf_event__repipe_sample,
.mmap = perf_event__repipe,
.comm = perf_event__repipe,
.fork = perf_event__repipe,
.exit = perf_event__repipe,
.lost = perf_event__repipe,
- .read = perf_event__repipe,
+ .read = perf_event__repipe_sample,
.throttle = perf_event__repipe,
.unthrottle = perf_event__repipe,
- .attr = perf_event__repipe_synth,
- .event_type = perf_event__repipe_synth,
- .tracing_data = perf_event__repipe_synth,
- .build_id = perf_event__repipe_synth,
+ .attr = perf_event__repipe_attr,
+ .event_type = perf_event__repipe_event_type_synth,
+ .tracing_data = perf_event__repipe_tracing_data_synth,
+ .build_id = perf_event__repipe_op2_synth,
};
extern volatile int session_done;
@@ -203,17 +229,17 @@ static int __cmd_inject(void)
signal(SIGINT, sig_handler);
if (inject_build_ids) {
- inject_ops.sample = perf_event__inject_buildid;
- inject_ops.mmap = perf_event__repipe_mmap;
- inject_ops.fork = perf_event__repipe_task;
- inject_ops.tracing_data = perf_event__repipe_tracing_data;
+ perf_inject.sample = perf_event__inject_buildid;
+ perf_inject.mmap = perf_event__repipe_mmap;
+ perf_inject.fork = perf_event__repipe_task;
+ perf_inject.tracing_data = perf_event__repipe_tracing_data;
}
- session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
+ session = perf_session__new(input_name, O_RDONLY, false, true, &perf_inject);
if (session == NULL)
return -ENOMEM;
- ret = perf_session__process_events(session, &inject_ops);
+ ret = perf_session__process_events(session, &perf_inject);
perf_session__delete(session);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 225e963df10..fe1ad8f2196 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -7,6 +7,7 @@
#include "util/thread.h"
#include "util/header.h"
#include "util/session.h"
+#include "util/tool.h"
#include "util/parse-options.h"
#include "util/trace-event.h"
@@ -18,7 +19,7 @@
struct alloc_stat;
typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
-static char const *input_name = "perf.data";
+static const char *input_name;
static int alloc_flag;
static int caller_flag;
@@ -303,12 +304,13 @@ static void process_raw_event(union perf_event *raw_event __used, void *data,
}
}
-static int process_sample_event(union perf_event *event,
+static int process_sample_event(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __used,
- struct perf_session *session)
+ struct machine *machine)
{
- struct thread *thread = perf_session__findnew(session, event->ip.pid);
+ struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
@@ -324,7 +326,7 @@ static int process_sample_event(union perf_event *event,
return 0;
}
-static struct perf_event_ops event_ops = {
+static struct perf_tool perf_kmem = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.ordered_samples = true,
@@ -483,7 +485,7 @@ static int __cmd_kmem(void)
{
int err = -EINVAL;
struct perf_session *session = perf_session__new(input_name, O_RDONLY,
- 0, false, &event_ops);
+ 0, false, &perf_kmem);
if (session == NULL)
return -ENOMEM;
@@ -494,7 +496,7 @@ static int __cmd_kmem(void)
goto out_delete;
setup_pager();
- err = perf_session__process_events(session, &event_ops);
+ err = perf_session__process_events(session, &perf_kmem);
if (err != 0)
goto out_delete;
sort_result();
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 34d1e853829..032324a76b8 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -38,7 +38,7 @@ static const struct option kvm_options[] = {
OPT_BOOLEAN(0, "guest", &perf_guest,
"Collect guest os data"),
OPT_BOOLEAN(0, "host", &perf_host,
- "Collect guest os data"),
+ "Collect host os data"),
OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
"guest mount directory under which every guest os"
" instance has a subdir"),
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 899080ace26..2296c391d0f 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -12,6 +12,7 @@
#include "util/debug.h"
#include "util/session.h"
+#include "util/tool.h"
#include <sys/types.h>
#include <sys/prctl.h>
@@ -325,7 +326,7 @@ alloc_failed:
die("memory allocation failed\n");
}
-static char const *input_name = "perf.data";
+static const char *input_name;
struct raw_event_sample {
u32 size;
@@ -845,12 +846,13 @@ static void dump_info(void)
die("Unknown type of information\n");
}
-static int process_sample_event(union perf_event *event,
+static int process_sample_event(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel __used,
- struct perf_session *s)
+ struct machine *machine)
{
- struct thread *thread = perf_session__findnew(s, sample->tid);
+ struct thread *thread = machine__findnew_thread(machine, sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
@@ -863,7 +865,7 @@ static int process_sample_event(union perf_event *event,
return 0;
}
-static struct perf_event_ops eops = {
+static struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.ordered_samples = true,
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 710ae3d0a48..59d43abfbfe 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -46,7 +46,6 @@
#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
#define DEFAULT_FUNC_FILTER "!_*"
-#define MAX_PATH_LEN 256
/* Session management structure */
static struct {
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6ab58cc99d5..0abfb18b911 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -22,6 +22,7 @@
#include "util/evsel.h"
#include "util/debug.h"
#include "util/session.h"
+#include "util/tool.h"
#include "util/symbol.h"
#include "util/cpumap.h"
#include "util/thread_map.h"
@@ -35,55 +36,36 @@ enum write_mode_t {
WRITE_APPEND
};
-static u64 user_interval = ULLONG_MAX;
-static u64 default_interval = 0;
-
-static unsigned int page_size;
-static unsigned int mmap_pages = UINT_MAX;
-static unsigned int user_freq = UINT_MAX;
-static int freq = 1000;
-static int output;
-static int pipe_output = 0;
-static const char *output_name = NULL;
-static bool group = false;
-static int realtime_prio = 0;
-static bool nodelay = false;
-static bool raw_samples = false;
-static bool sample_id_all_avail = true;
-static bool system_wide = false;
-static pid_t target_pid = -1;
-static pid_t target_tid = -1;
-static pid_t child_pid = -1;
-static bool no_inherit = false;
-static enum write_mode_t write_mode = WRITE_FORCE;
-static bool call_graph = false;
-static bool inherit_stat = false;
-static bool no_samples = false;
-static bool sample_address = false;
-static bool sample_time = false;
-static bool no_buildid = false;
-static bool no_buildid_cache = false;
-static struct perf_evlist *evsel_list;
-
-static long samples = 0;
-static u64 bytes_written = 0;
-
-static int file_new = 1;
-static off_t post_processing_offset;
-
-static struct perf_session *session;
-static const char *cpu_list;
-static const char *progname;
-
-static void advance_output(size_t size)
+struct perf_record {
+ struct perf_tool tool;
+ struct perf_record_opts opts;
+ u64 bytes_written;
+ const char *output_name;
+ struct perf_evlist *evlist;
+ struct perf_session *session;
+ const char *progname;
+ int output;
+ unsigned int page_size;
+ int realtime_prio;
+ enum write_mode_t write_mode;
+ bool no_buildid;
+ bool no_buildid_cache;
+ bool force;
+ bool file_new;
+ bool append_file;
+ long samples;
+ off_t post_processing_offset;
+};
+
+static void advance_output(struct perf_record *rec, size_t size)
{
- bytes_written += size;
+ rec->bytes_written += size;
}
-static void write_output(void *buf, size_t size)
+static void write_output(struct perf_record *rec, void *buf, size_t size)
{
while (size) {
- int ret = write(output, buf, size);
+ int ret = write(rec->output, buf, size);
if (ret < 0)
die("failed to write");
@@ -91,30 +73,33 @@ static void write_output(void *buf, size_t size)
size -= ret;
buf += ret;
- bytes_written += ret;
+ rec->bytes_written += ret;
}
}
-static int process_synthesized_event(union perf_event *event,
+static int process_synthesized_event(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *self __used)
+ struct machine *machine __used)
{
- write_output(event, event->header.size);
+ struct perf_record *rec = container_of(tool, struct perf_record, tool);
+ write_output(rec, event, event->header.size);
return 0;
}
-static void mmap_read(struct perf_mmap *md)
+static void perf_record__mmap_read(struct perf_record *rec,
+ struct perf_mmap *md)
{
unsigned int head = perf_mmap__read_head(md);
unsigned int old = md->prev;
- unsigned char *data = md->base + page_size;
+ unsigned char *data = md->base + rec->page_size;
unsigned long size;
void *buf;
if (old == head)
return;
- samples++;
+ rec->samples++;
size = head - old;
@@ -123,14 +108,14 @@ static void mmap_read(struct perf_mmap *md)
size = md->mask + 1 - (old & md->mask);
old += size;
- write_output(buf, size);
+ write_output(rec, buf, size);
}
buf = &data[old & md->mask];
size = head - old;
old += size;
- write_output(buf, size);
+ write_output(rec, buf, size);
md->prev = old;
perf_mmap__write_tail(md, old);
@@ -149,17 +134,18 @@ static void sig_handler(int sig)
signr = sig;
}
-static void sig_atexit(void)
+static void perf_record__sig_exit(int exit_status __used, void *arg)
{
+ struct perf_record *rec = arg;
int status;
- if (child_pid > 0) {
+ if (rec->evlist->workload.pid > 0) {
if (!child_finished)
- kill(child_pid, SIGTERM);
+ kill(rec->evlist->workload.pid, SIGTERM);
wait(&status);
if (WIFSIGNALED(status))
- psignal(WTERMSIG(status), progname);
+ psignal(WTERMSIG(status), rec->progname);
}
if (signr == -1 || signr == SIGUSR1)
@@ -169,78 +155,6 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
-static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
-{
- struct perf_event_attr *attr = &evsel->attr;
- int track = !evsel->idx; /* only the first counter needs these */
-
- attr->disabled = 1;
- attr->inherit = !no_inherit;
- attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
- PERF_FORMAT_TOTAL_TIME_RUNNING |
- PERF_FORMAT_ID;
-
- attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
-
- if (evlist->nr_entries > 1)
- attr->sample_type |= PERF_SAMPLE_ID;
-
- /*
- * We default some events to a 1 default interval. But keep
- * it a weak assumption overridable by the user.
- */
- if (!attr->sample_period || (user_freq != UINT_MAX &&
- user_interval != ULLONG_MAX)) {
- if (freq) {
- attr->sample_type |= PERF_SAMPLE_PERIOD;
- attr->freq = 1;
- attr->sample_freq = freq;
- } else {
- attr->sample_period = default_interval;
- }
- }
-
- if (no_samples)
- attr->sample_freq = 0;
-
- if (inherit_stat)
- attr->inherit_stat = 1;
-
- if (sample_address) {
- attr->sample_type |= PERF_SAMPLE_ADDR;
- attr->mmap_data = track;
- }
-
- if (call_graph)
- attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
-
- if (system_wide)
- attr->sample_type |= PERF_SAMPLE_CPU;
-
- if (sample_id_all_avail &&
- (sample_time || system_wide || !no_inherit || cpu_list))
- attr->sample_type |= PERF_SAMPLE_TIME;
-
- if (raw_samples) {
- attr->sample_type |= PERF_SAMPLE_TIME;
- attr->sample_type |= PERF_SAMPLE_RAW;
- attr->sample_type |= PERF_SAMPLE_CPU;
- }
-
- if (nodelay) {
- attr->watermark = 0;
- attr->wakeup_events = 1;
- }
-
- attr->mmap = track;
- attr->comm = track;
-
- if (target_pid == -1 && target_tid == -1 && !system_wide) {
- attr->disabled = 1;
- attr->enable_on_exec = 1;
- }
-}
-
static bool perf_evlist__equal(struct perf_evlist *evlist,
struct perf_evlist *other)
{
@@ -260,15 +174,17 @@ static bool perf_evlist__equal(struct perf_evlist *evlist,
return true;
}
-static void open_counters(struct perf_evlist *evlist)
+static void perf_record__open(struct perf_record *rec)
{
struct perf_evsel *pos, *first;
-
- if (evlist->cpus->map[0] < 0)
- no_inherit = true;
+ struct perf_evlist *evlist = rec->evlist;
+ struct perf_session *session = rec->session;
+ struct perf_record_opts *opts = &rec->opts;
first = list_entry(evlist->entries.next, struct perf_evsel, node);
+ perf_evlist__config_attrs(evlist, opts);
+
list_for_each_entry(pos, &evlist->entries, node) {
struct perf_event_attr *attr = &pos->attr;
struct xyarray *group_fd = NULL;
@@ -286,29 +202,27 @@ static void open_counters(struct perf_evlist *evlist)
*/
bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
- if (group && pos != first)
+ if (opts->group && pos != first)
group_fd = first->fd;
-
- config_attr(pos, evlist);
retry_sample_id:
- attr->sample_id_all = sample_id_all_avail ? 1 : 0;
+ attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
try_again:
- if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
- group_fd) < 0) {
+ if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
+ opts->group, group_fd) < 0) {
int err = errno;
if (err == EPERM || err == EACCES) {
ui__error_paranoid();
exit(EXIT_FAILURE);
- } else if (err == ENODEV && cpu_list) {
+ } else if (err == ENODEV && opts->cpu_list) {
die("No such device - did you specify"
" an out-of-range profile CPU?\n");
- } else if (err == EINVAL && sample_id_all_avail) {
+ } else if (err == EINVAL && opts->sample_id_all_avail) {
/*
* Old kernel, no attr->sample_id_type_all field
*/
- sample_id_all_avail = false;
- if (!sample_time && !raw_samples && !time_needed)
+ opts->sample_id_all_avail = false;
+ if (!opts->sample_time && !opts->raw_samples && !time_needed)
attr->sample_type &= ~PERF_SAMPLE_TIME;
goto retry_sample_id;
@@ -358,10 +272,20 @@ try_again:
exit(-1);
}
- if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
+ if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+ if (errno == EPERM)
+ die("Permission error mapping pages.\n"
+ "Consider increasing "
+ "/proc/sys/kernel/perf_event_mlock_kb,\n"
+ "or try again with a smaller value of -m/--mmap_pages.\n"
+ "(current value: %d)\n", opts->mmap_pages);
+ else if (!is_power_of_2(opts->mmap_pages))
+ die("--mmap_pages/-m value must be a power of two.");
+
die("failed to mmap with %d (%s)\n", errno, strerror(errno));
+ }
- if (file_new)
+ if (rec->file_new)
session->evlist = evlist;
else {
if (!perf_evlist__equal(session->evlist, evlist)) {
@@ -373,29 +297,32 @@ try_again:
perf_session__update_sample_type(session);
}
-static int process_buildids(void)
+static int process_buildids(struct perf_record *rec)
{
- u64 size = lseek(output, 0, SEEK_CUR);
+ u64 size = lseek(rec->output, 0, SEEK_CUR);
if (size == 0)
return 0;
- session->fd = output;
- return __perf_session__process_events(session, post_processing_offset,
- size - post_processing_offset,
+ rec->session->fd = rec->output;
+ return __perf_session__process_events(rec->session, rec->post_processing_offset,
+ size - rec->post_processing_offset,
size, &build_id__mark_dso_hit_ops);
}
-static void atexit_header(void)
+static void perf_record__exit(int status __used, void *arg)
{
- if (!pipe_output) {
- session->header.data_size += bytes_written;
-
- if (!no_buildid)
- process_buildids();
- perf_session__write_header(session, evsel_list, output, true);
- perf_session__delete(session);
- perf_evlist__delete(evsel_list);
+ struct perf_record *rec = arg;
+
+ if (!rec->opts.pipe_output) {
+ rec->session->header.data_size += rec->bytes_written;
+
+ if (!rec->no_buildid)
+ process_buildids(rec);
+ perf_session__write_header(rec->session, rec->evlist,
+ rec->output, true);
+ perf_session__delete(rec->session);
+ perf_evlist__delete(rec->evlist);
symbol__exit();
}
}
@@ -403,7 +330,7 @@ static void atexit_header(void)
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
{
int err;
- struct perf_session *psession = data;
+ struct perf_tool *tool = data;
if (machine__is_host(machine))
return;
@@ -416,8 +343,8 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
*method is used to avoid symbol missing when the first addr is
*in module instead of in guest kernel.
*/
- err = perf_event__synthesize_modules(process_synthesized_event,
- psession, machine);
+ err = perf_event__synthesize_modules(tool, process_synthesized_event,
+ machine);
if (err < 0)
pr_err("Couldn't record guest kernel [%d]'s reference"
" relocation symbol.\n", machine->pid);
@@ -426,12 +353,11 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
* We use _stext for guest kernel because guest kernel's /proc/kallsyms
* have no _text sometimes.
*/
- err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
- psession, machine, "_text");
+ err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+ machine, "_text");
if (err < 0)
- err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
- psession, machine,
- "_stext");
+ err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+ machine, "_stext");
if (err < 0)
pr_err("Couldn't record guest kernel [%d]'s reference"
" relocation symbol.\n", machine->pid);
@@ -442,73 +368,71 @@ static struct perf_event_header finished_round_event = {
.type = PERF_RECORD_FINISHED_ROUND,
};
-static void mmap_read_all(void)
+static void perf_record__mmap_read_all(struct perf_record *rec)
{
int i;
- for (i = 0; i < evsel_list->nr_mmaps; i++) {
- if (evsel_list->mmap[i].base)
- mmap_read(&evsel_list->mmap[i]);
+ for (i = 0; i < rec->evlist->nr_mmaps; i++) {
+ if (rec->evlist->mmap[i].base)
+ perf_record__mmap_read(rec, &rec->evlist->mmap[i]);
}
- if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
- write_output(&finished_round_event, sizeof(finished_round_event));
+ if (perf_header__has_feat(&rec->session->header, HEADER_TRACE_INFO))
+ write_output(rec, &finished_round_event, sizeof(finished_round_event));
}
-static int __cmd_record(int argc, const char **argv)
+static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
{
struct stat st;
int flags;
- int err;
+ int err, output;
unsigned long waking = 0;
- int child_ready_pipe[2], go_pipe[2];
const bool forks = argc > 0;
- char buf;
struct machine *machine;
+ struct perf_tool *tool = &rec->tool;
+ struct perf_record_opts *opts = &rec->opts;
+ struct perf_evlist *evsel_list = rec->evlist;
+ const char *output_name = rec->output_name;
+ struct perf_session *session;
- progname = argv[0];
+ rec->progname = argv[0];
- page_size = sysconf(_SC_PAGE_SIZE);
+ rec->page_size = sysconf(_SC_PAGE_SIZE);
- atexit(sig_atexit);
+ on_exit(perf_record__sig_exit, rec);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
signal(SIGUSR1, sig_handler);
- if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
- perror("failed to create pipes");
- exit(-1);
- }
-
if (!output_name) {
if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
- pipe_output = 1;
+ opts->pipe_output = true;
else
- output_name = "perf.data";
+ rec->output_name = output_name = "perf.data";
}
if (output_name) {
if (!strcmp(output_name, "-"))
- pipe_output = 1;
+ opts->pipe_output = true;
else if (!stat(output_name, &st) && st.st_size) {
- if (write_mode == WRITE_FORCE) {
+ if (rec->write_mode == WRITE_FORCE) {
char oldname[PATH_MAX];
snprintf(oldname, sizeof(oldname), "%s.old",
output_name);
unlink(oldname);
rename(output_name, oldname);
}
- } else if (write_mode == WRITE_APPEND) {
- write_mode = WRITE_FORCE;
+ } else if (rec->write_mode == WRITE_APPEND) {
+ rec->write_mode = WRITE_FORCE;
}
}
flags = O_CREAT|O_RDWR;
- if (write_mode == WRITE_APPEND)
- file_new = 0;
+ if (rec->write_mode == WRITE_APPEND)
+ rec->file_new = 0;
else
flags |= O_TRUNC;
- if (pipe_output)
+ if (opts->pipe_output)
output = STDOUT_FILENO;
else
output = open(output_name, flags, S_IRUSR | S_IWUSR);
@@ -517,17 +441,21 @@ static int __cmd_record(int argc, const char **argv)
exit(-1);
}
+ rec->output = output;
+
session = perf_session__new(output_name, O_WRONLY,
- write_mode == WRITE_FORCE, false, NULL);
+ rec->write_mode == WRITE_FORCE, false, NULL);
if (session == NULL) {
pr_err("Not enough memory for reading perf file header\n");
return -1;
}
- if (!no_buildid)
+ rec->session = session;
+
+ if (!rec->no_buildid)
perf_header__set_feat(&session->header, HEADER_BUILD_ID);
- if (!file_new) {
+ if (!rec->file_new) {
err = perf_session__read_header(session, output);
if (err < 0)
goto out_delete_session;
@@ -549,94 +477,57 @@ static int __cmd_record(int argc, const char **argv)
perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
perf_header__set_feat(&session->header, HEADER_CPUID);
- /* 512 kiB: default amount of unprivileged mlocked memory */
- if (mmap_pages == UINT_MAX)
- mmap_pages = (512 * 1024) / page_size;
-
if (forks) {
- child_pid = fork();
- if (child_pid < 0) {
- perror("failed to fork");
- exit(-1);
- }
-
- if (!child_pid) {
- if (pipe_output)
- dup2(2, 1);
- close(child_ready_pipe[0]);
- close(go_pipe[1]);
- fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
-
- /*
- * Do a dummy execvp to get the PLT entry resolved,
- * so we avoid the resolver overhead on the real
- * execvp call.
- */
- execvp("", (char **)argv);
-
- /*
- * Tell the parent we're ready to go
- */
- close(child_ready_pipe[1]);
-
- /*
- * Wait until the parent tells us to go.
- */
- if (read(go_pipe[0], &buf, 1) == -1)
- perror("unable to read pipe");
-
- execvp(argv[0], (char **)argv);
-
- perror(argv[0]);
- kill(getppid(), SIGUSR1);
- exit(-1);
- }
-
- if (!system_wide && target_tid == -1 && target_pid == -1)
- evsel_list->threads->map[0] = child_pid;
-
- close(child_ready_pipe[1]);
- close(go_pipe[0]);
- /*
- * wait for child to settle
- */
- if (read(child_ready_pipe[0], &buf, 1) == -1) {
- perror("unable to read pipe");
- exit(-1);
+ err = perf_evlist__prepare_workload(evsel_list, opts, argv);
+ if (err < 0) {
+ pr_err("Couldn't run the workload!\n");
+ goto out_delete_session;
}
- close(child_ready_pipe[0]);
}
- open_counters(evsel_list);
+ perf_record__open(rec);
/*
- * perf_session__delete(session) will be called at atexit_header()
+ * perf_session__delete(session) will be called at perf_record__exit()
*/
- atexit(atexit_header);
+ on_exit(perf_record__exit, rec);
- if (pipe_output) {
+ if (opts->pipe_output) {
err = perf_header__write_pipe(output);
if (err < 0)
return err;
- } else if (file_new) {
+ } else if (rec->file_new) {
err = perf_session__write_header(session, evsel_list,
output, false);
if (err < 0)
return err;
}
- post_processing_offset = lseek(output, 0, SEEK_CUR);
+ if (!!rec->no_buildid
+ && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
+ pr_err("Couldn't generating buildids. "
+ "Use --no-buildid to profile anyway.\n");
+ return -1;
+ }
- if (pipe_output) {
- err = perf_session__synthesize_attrs(session,
- process_synthesized_event);
+ rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
+
+ machine = perf_session__find_host_machine(session);
+ if (!machine) {
+ pr_err("Couldn't find native kernel information.\n");
+ return -1;
+ }
+
+ if (opts->pipe_output) {
+ err = perf_event__synthesize_attrs(tool, session,
+ process_synthesized_event);
if (err < 0) {
pr_err("Couldn't synthesize attrs.\n");
return err;
}
- err = perf_event__synthesize_event_types(process_synthesized_event,
- session);
+ err = perf_event__synthesize_event_types(tool, process_synthesized_event,
+ machine);
if (err < 0) {
pr_err("Couldn't synthesize event_types.\n");
return err;
@@ -651,56 +542,49 @@ static int __cmd_record(int argc, const char **argv)
* return this more properly and also
* propagate errors that now are calling die()
*/
- err = perf_event__synthesize_tracing_data(output, evsel_list,
- process_synthesized_event,
- session);
+ err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
+ process_synthesized_event);
if (err <= 0) {
pr_err("Couldn't record tracing data.\n");
return err;
}
- advance_output(err);
+ advance_output(rec, err);
}
}
- machine = perf_session__find_host_machine(session);
- if (!machine) {
- pr_err("Couldn't find native kernel information.\n");
- return -1;
- }
-
- err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
- session, machine, "_text");
+ err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+ machine, "_text");
if (err < 0)
- err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
- session, machine, "_stext");
+ err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+ machine, "_stext");
if (err < 0)
pr_err("Couldn't record kernel reference relocation symbol\n"
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
"Check /proc/kallsyms permission or run as root.\n");
- err = perf_event__synthesize_modules(process_synthesized_event,
- session, machine);
+ err = perf_event__synthesize_modules(tool, process_synthesized_event,
+ machine);
if (err < 0)
pr_err("Couldn't record kernel module information.\n"
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
"Check /proc/modules permission or run as root.\n");
if (perf_guest)
- perf_session__process_machines(session,
+ perf_session__process_machines(session, tool,
perf_event__synthesize_guest_os);
- if (!system_wide)
- perf_event__synthesize_thread_map(evsel_list->threads,
+ if (!opts->system_wide)
+ perf_event__synthesize_thread_map(tool, evsel_list->threads,
process_synthesized_event,
- session);
+ machine);
else
- perf_event__synthesize_threads(process_synthesized_event,
- session);
+ perf_event__synthesize_threads(tool, process_synthesized_event,
+ machine);
- if (realtime_prio) {
+ if (rec->realtime_prio) {
struct sched_param param;
- param.sched_priority = realtime_prio;
+ param.sched_priority = rec->realtime_prio;
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
pr_err("Could not set realtime priority.\n");
exit(-1);
@@ -713,14 +597,14 @@ static int __cmd_record(int argc, const char **argv)
* Let the child rip
*/
if (forks)
- close(go_pipe[1]);
+ perf_evlist__start_workload(evsel_list);
for (;;) {
- int hits = samples;
+ int hits = rec->samples;
- mmap_read_all();
+ perf_record__mmap_read_all(rec);
- if (hits == samples) {
+ if (hits == rec->samples) {
if (done)
break;
err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
@@ -741,9 +625,9 @@ static int __cmd_record(int argc, const char **argv)
*/
fprintf(stderr,
"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
- (double)bytes_written / 1024.0 / 1024.0,
+ (double)rec->bytes_written / 1024.0 / 1024.0,
output_name,
- bytes_written / 24);
+ rec->bytes_written / 24);
return 0;
@@ -758,58 +642,89 @@ static const char * const record_usage[] = {
NULL
};
-static bool force, append_file;
+/*
+ * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
+ * because we need to have access to it in perf_record__exit, that is called
+ * after cmd_record() exits, but since record_options need to be accessible to
+ * builtin-script, leave it here.
+ *
+ * At least we don't ouch it in all the other functions here directly.
+ *
+ * Just say no to tons of global variables, sigh.
+ */
+static struct perf_record record = {
+ .opts = {
+ .target_pid = -1,
+ .target_tid = -1,
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .freq = 1000,
+ .sample_id_all_avail = true,
+ },
+ .write_mode = WRITE_FORCE,
+ .file_new = true,
+};
+/*
+ * XXX Will stay a global variable till we fix builtin-script.c to stop messing
+ * with it and switch to use the library functions in perf_evlist that came
+ * from builtin-record.c, i.e. use perf_record_opts,
+ * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
+ * using pipes, etc.
+ */
const struct option record_options[] = {
- OPT_CALLBACK('e', "event", &evsel_list, "event",
+ OPT_CALLBACK('e', "event", &record.evlist, "event",
"event selector. use 'perf list' to list available events",
parse_events_option),
- OPT_CALLBACK(0, "filter", &evsel_list, "filter",
+ OPT_CALLBACK(0, "filter", &record.evlist, "filter",
"event filter", parse_filter),
- OPT_INTEGER('p', "pid", &target_pid,
+ OPT_INTEGER('p', "pid", &record.opts.target_pid,
"record events on existing process id"),
- OPT_INTEGER('t', "tid", &target_tid,
+ OPT_INTEGER('t', "tid", &record.opts.target_tid,
"record events on existing thread id"),
- OPT_INTEGER('r', "realtime", &realtime_prio,
+ OPT_INTEGER('r', "realtime", &record.realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
- OPT_BOOLEAN('D', "no-delay", &nodelay,
+ OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
"collect data without buffering"),
- OPT_BOOLEAN('R', "raw-samples", &raw_samples,
+ OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
"collect raw sample records from all opened counters"),
- OPT_BOOLEAN('a', "all-cpus", &system_wide,
+ OPT_BOOLEAN('a', "all-cpus", &record.opts.system_wide,
"system-wide collection from all CPUs"),
- OPT_BOOLEAN('A', "append", &append_file,
+ OPT_BOOLEAN('A', "append", &record.append_file,
"append to the output file to do incremental profiling"),
- OPT_STRING('C', "cpu", &cpu_list, "cpu",
+ OPT_STRING('C', "cpu", &record.opts.cpu_list, "cpu",
"list of cpus to monitor"),
- OPT_BOOLEAN('f', "force", &force,
+ OPT_BOOLEAN('f', "force", &record.force,
"overwrite existing data file (deprecated)"),
- OPT_U64('c', "count", &user_interval, "event period to sample"),
- OPT_STRING('o', "output", &output_name, "file",
+ OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
+ OPT_STRING('o', "output", &record.output_name, "file",
"output file name"),
- OPT_BOOLEAN('i', "no-inherit", &no_inherit,
+ OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
"child tasks do not inherit counters"),
- OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"),
- OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
- OPT_BOOLEAN(0, "group", &group,
+ OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
+ OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
+ "number of mmap data pages"),
+ OPT_BOOLEAN(0, "group", &record.opts.group,
"put the counters into a counter group"),
- OPT_BOOLEAN('g', "call-graph", &call_graph,
+ OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph,
"do call-graph (stack chain/backtrace) recording"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
- OPT_BOOLEAN('s', "stat", &inherit_stat,
+ OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
"per thread counts"),
- OPT_BOOLEAN('d', "data", &sample_address,
+ OPT_BOOLEAN('d', "data", &record.opts.sample_address,
"Sample addresses"),
- OPT_BOOLEAN('T', "timestamp", &sample_time, "Sample timestamps"),
- OPT_BOOLEAN('n', "no-samples", &no_samples,
+ OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
+ OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
+ OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
"don't sample"),
- OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
+ OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
"do not update the buildid cache"),
- OPT_BOOLEAN('B', "no-buildid", &no_buildid,
+ OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
"do not collect buildids in perf.data"),
- OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
+ OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
"monitor event in cgroup name only",
parse_cgroups),
OPT_END()
@@ -819,6 +734,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
{
int err = -ENOMEM;
struct perf_evsel *pos;
+ struct perf_evlist *evsel_list;
+ struct perf_record *rec = &record;
perf_header__set_cmdline(argc, argv);
@@ -826,23 +743,25 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
if (evsel_list == NULL)
return -ENOMEM;
+ rec->evlist = evsel_list;
+
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc && target_pid == -1 && target_tid == -1 &&
- !system_wide && !cpu_list)
+ if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
+ !rec->opts.system_wide && !rec->opts.cpu_list)
usage_with_options(record_usage, record_options);
- if (force && append_file) {
+ if (rec->force && rec->append_file) {
fprintf(stderr, "Can't overwrite and append at the same time."
" You need to choose between -f and -A");
usage_with_options(record_usage, record_options);
- } else if (append_file) {
- write_mode = WRITE_APPEND;
+ } else if (rec->append_file) {
+ rec->write_mode = WRITE_APPEND;
} else {
- write_mode = WRITE_FORCE;
+ rec->write_mode = WRITE_FORCE;
}
- if (nr_cgroups && !system_wide) {
+ if (nr_cgroups && !rec->opts.system_wide) {
fprintf(stderr, "cgroup monitoring only available in"
" system-wide mode\n");
usage_with_options(record_usage, record_options);
@@ -860,7 +779,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
"even with a suitable vmlinux or kallsyms file.\n\n");
- if (no_buildid_cache || no_buildid)
+ if (rec->no_buildid_cache || rec->no_buildid)
disable_buildid_cache();
if (evsel_list->nr_entries == 0 &&
@@ -869,43 +788,37 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
goto out_symbol_exit;
}
- if (target_pid != -1)
- target_tid = target_pid;
+ if (rec->opts.target_pid != -1)
+ rec->opts.target_tid = rec->opts.target_pid;
- if (perf_evlist__create_maps(evsel_list, target_pid,
- target_tid, cpu_list) < 0)
+ if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
+ rec->opts.target_tid, rec->opts.cpu_list) < 0)
usage_with_options(record_usage, record_options);
list_for_each_entry(pos, &evsel_list->entries, node) {
- if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
- evsel_list->threads->nr) < 0)
- goto out_free_fd;
if (perf_header__push_event(pos->attr.config, event_name(pos)))
goto out_free_fd;
}
- if (perf_evlist__alloc_pollfd(evsel_list) < 0)
- goto out_free_fd;
-
- if (user_interval != ULLONG_MAX)
- default_interval = user_interval;
- if (user_freq != UINT_MAX)
- freq = user_freq;
+ if (rec->opts.user_interval != ULLONG_MAX)
+ rec->opts.default_interval = rec->opts.user_interval;
+ if (rec->opts.user_freq != UINT_MAX)
+ rec->opts.freq = rec->opts.user_freq;
/*
* User specified count overrides default frequency.
*/
- if (default_interval)
- freq = 0;
- else if (freq) {
- default_interval = freq;
+ if (rec->opts.default_interval)
+ rec->opts.freq = 0;
+ else if (rec->opts.freq) {
+ rec->opts.default_interval = rec->opts.freq;
} else {
fprintf(stderr, "frequency and count are zero, aborting\n");
err = -EINVAL;
goto out_free_fd;
}
- err = __cmd_record(argc, argv);
+ err = __cmd_record(&record, argc, argv);
out_free_fd:
perf_evlist__delete_maps(evsel_list);
out_symbol_exit:
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4d7c8340c32..25d34d483e4 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -25,6 +25,7 @@
#include "util/evsel.h"
#include "util/header.h"
#include "util/session.h"
+#include "util/tool.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
@@ -35,38 +36,35 @@
#include <linux/bitmap.h>
-static char const *input_name = "perf.data";
-
-static bool force, use_tui, use_stdio;
-static bool hide_unresolved;
-static bool dont_use_callchains;
-static bool show_full_info;
-
-static bool show_threads;
-static struct perf_read_values show_threads_values;
-
-static const char default_pretty_printing_style[] = "normal";
-static const char *pretty_printing_style = default_pretty_printing_style;
-
-static char callchain_default_opt[] = "fractal,0.5,callee";
-static bool inverted_callchain;
-static symbol_filter_t annotate_init;
-
-static const char *cpu_list;
-static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+struct perf_report {
+ struct perf_tool tool;
+ struct perf_session *session;
+ char const *input_name;
+ bool force, use_tui, use_stdio;
+ bool hide_unresolved;
+ bool dont_use_callchains;
+ bool show_full_info;
+ bool show_threads;
+ bool inverted_callchain;
+ struct perf_read_values show_threads_values;
+ const char *pretty_printing_style;
+ symbol_filter_t annotate_init;
+ const char *cpu_list;
+ DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+};
-static int perf_session__add_hist_entry(struct perf_session *session,
- struct addr_location *al,
- struct perf_sample *sample,
- struct perf_evsel *evsel)
+static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
+ struct addr_location *al,
+ struct perf_sample *sample,
+ struct machine *machine)
{
struct symbol *parent = NULL;
int err = 0;
struct hist_entry *he;
if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
- err = perf_session__resolve_callchain(session, al->thread,
- sample->callchain, &parent);
+ err = machine__resolve_callchain(machine, evsel, al->thread,
+ sample->callchain, &parent);
if (err)
return err;
}
@@ -76,7 +74,8 @@ static int perf_session__add_hist_entry(struct perf_session *session,
return -ENOMEM;
if (symbol_conf.use_callchain) {
- err = callchain_append(he->callchain, &session->callchain_cursor,
+ err = callchain_append(he->callchain,
+ &evsel->hists.callchain_cursor,
sample->period);
if (err)
return err;
@@ -92,8 +91,7 @@ static int perf_session__add_hist_entry(struct perf_session *session,
assert(evsel != NULL);
err = -ENOMEM;
- if (notes->src == NULL &&
- symbol__alloc_hist(he->ms.sym, session->evlist->nr_entries) < 0)
+ if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
goto out;
err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
@@ -106,30 +104,32 @@ out:
}
-static int process_sample_event(union perf_event *event,
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
- struct perf_session *session)
+ struct machine *machine)
{
+ struct perf_report *rep = container_of(tool, struct perf_report, tool);
struct addr_location al;
- if (perf_event__preprocess_sample(event, session, &al, sample,
- annotate_init) < 0) {
+ if (perf_event__preprocess_sample(event, machine, &al, sample,
+ rep->annotate_init) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
event->header.type);
return -1;
}
- if (al.filtered || (hide_unresolved && al.sym == NULL))
+ if (al.filtered || (rep->hide_unresolved && al.sym == NULL))
return 0;
- if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+ if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0;
if (al.map != NULL)
al.map->dso->hit = 1;
- if (perf_session__add_hist_entry(session, &al, sample, evsel)) {
+ if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
pr_debug("problem incrementing symbol period, skipping event\n");
return -1;
}
@@ -137,15 +137,17 @@ static int process_sample_event(union perf_event *event,
return 0;
}
-static int process_read_event(union perf_event *event,
+static int process_read_event(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session)
+ struct perf_evsel *evsel,
+ struct machine *machine __used)
{
- struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist,
- event->read.id);
- if (show_threads) {
+ struct perf_report *rep = container_of(tool, struct perf_report, tool);
+
+ if (rep->show_threads) {
const char *name = evsel ? event_name(evsel) : "unknown";
- perf_read_values_add_value(&show_threads_values,
+ perf_read_values_add_value(&rep->show_threads_values,
event->read.pid, event->read.tid,
event->read.id,
name,
@@ -159,8 +161,10 @@ static int process_read_event(union perf_event *event,
return 0;
}
-static int perf_session__setup_sample_type(struct perf_session *self)
+static int perf_report__setup_sample_type(struct perf_report *rep)
{
+ struct perf_session *self = rep->session;
+
if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
if (sort__has_parent) {
ui__warning("Selected --sort parent, but no "
@@ -173,7 +177,8 @@ static int perf_session__setup_sample_type(struct perf_session *self)
"you call 'perf record' without -g?\n");
return -1;
}
- } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
+ } else if (!rep->dont_use_callchains &&
+ callchain_param.mode != CHAIN_NONE &&
!symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
if (callchain_register_param(&callchain_param) < 0) {
@@ -186,22 +191,6 @@ static int perf_session__setup_sample_type(struct perf_session *self)
return 0;
}
-static struct perf_event_ops event_ops = {
- .sample = process_sample_event,
- .mmap = perf_event__process_mmap,
- .comm = perf_event__process_comm,
- .exit = perf_event__process_task,
- .fork = perf_event__process_task,
- .lost = perf_event__process_lost,
- .read = process_read_event,
- .attr = perf_event__process_attr,
- .event_type = perf_event__process_event_type,
- .tracing_data = perf_event__process_tracing_data,
- .build_id = perf_event__process_build_id,
- .ordered_samples = true,
- .ordering_requires_timestamps = true,
-};
-
extern volatile int session_done;
static void sig_handler(int sig __used)
@@ -224,6 +213,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
}
static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
+ struct perf_report *rep,
const char *help)
{
struct perf_evsel *pos;
@@ -241,18 +231,18 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
parent_pattern == default_parent_pattern) {
fprintf(stdout, "#\n# (%s)\n#\n", help);
- if (show_threads) {
- bool style = !strcmp(pretty_printing_style, "raw");
- perf_read_values_display(stdout, &show_threads_values,
+ if (rep->show_threads) {
+ bool style = !strcmp(rep->pretty_printing_style, "raw");
+ perf_read_values_display(stdout, &rep->show_threads_values,
style);
- perf_read_values_destroy(&show_threads_values);
+ perf_read_values_destroy(&rep->show_threads_values);
}
}
return 0;
}
-static int __cmd_report(void)
+static int __cmd_report(struct perf_report *rep)
{
int ret = -EINVAL;
u64 nr_samples;
@@ -264,27 +254,31 @@ static int __cmd_report(void)
signal(SIGINT, sig_handler);
- session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
+ session = perf_session__new(rep->input_name, O_RDONLY,
+ rep->force, false, &rep->tool);
if (session == NULL)
return -ENOMEM;
- if (cpu_list) {
- ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+ rep->session = session;
+
+ if (rep->cpu_list) {
+ ret = perf_session__cpu_bitmap(session, rep->cpu_list,
+ rep->cpu_bitmap);
if (ret)
goto out_delete;
}
if (use_browser <= 0)
- perf_session__fprintf_info(session, stdout, show_full_info);
+ perf_session__fprintf_info(session, stdout, rep->show_full_info);
- if (show_threads)
- perf_read_values_init(&show_threads_values);
+ if (rep->show_threads)
+ perf_read_values_init(&rep->show_threads_values);
- ret = perf_session__setup_sample_type(session);
+ ret = perf_report__setup_sample_type(rep);
if (ret)
goto out_delete;
- ret = perf_session__process_events(session, &event_ops);
+ ret = perf_session__process_events(session, &rep->tool);
if (ret)
goto out_delete;
@@ -327,7 +321,7 @@ static int __cmd_report(void)
}
if (nr_samples == 0) {
- ui__warning("The %s file has no samples!\n", input_name);
+ ui__warning("The %s file has no samples!\n", session->filename);
goto out_delete;
}
@@ -335,7 +329,7 @@ static int __cmd_report(void)
perf_evlist__tui_browse_hists(session->evlist, help,
NULL, NULL, 0);
} else
- perf_evlist__tty_browse_hists(session->evlist, help);
+ perf_evlist__tty_browse_hists(session->evlist, rep, help);
out_delete:
/*
@@ -354,9 +348,9 @@ out_delete:
}
static int
-parse_callchain_opt(const struct option *opt __used, const char *arg,
- int unset)
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
{
+ struct perf_report *rep = (struct perf_report *)opt->value;
char *tok, *tok2;
char *endptr;
@@ -364,7 +358,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
* --no-call-graph
*/
if (unset) {
- dont_use_callchains = true;
+ rep->dont_use_callchains = true;
return 0;
}
@@ -412,7 +406,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
goto setup;
if (tok2[0] != 'c') {
- callchain_param.print_limit = strtod(tok2, &endptr);
+ callchain_param.print_limit = strtoul(tok2, &endptr, 0);
tok2 = strtok(NULL, ",");
if (!tok2)
goto setup;
@@ -433,13 +427,34 @@ setup:
return 0;
}
-static const char * const report_usage[] = {
- "perf report [<options>] <command>",
- NULL
-};
-
-static const struct option options[] = {
- OPT_STRING('i', "input", &input_name, "file",
+int cmd_report(int argc, const char **argv, const char *prefix __used)
+{
+ struct stat st;
+ char callchain_default_opt[] = "fractal,0.5,callee";
+ const char * const report_usage[] = {
+ "perf report [<options>]",
+ NULL
+ };
+ struct perf_report report = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_task,
+ .fork = perf_event__process_task,
+ .lost = perf_event__process_lost,
+ .read = process_read_event,
+ .attr = perf_event__process_attr,
+ .event_type = perf_event__process_event_type,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
+ .ordered_samples = true,
+ .ordering_requires_timestamps = true,
+ },
+ .pretty_printing_style = "normal",
+ };
+ const struct option options[] = {
+ OPT_STRING('i', "input", &report.input_name, "file",
"input file name"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
@@ -449,17 +464,18 @@ static const struct option options[] = {
"file", "vmlinux pathname"),
OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
"file", "kallsyms pathname"),
- OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+ OPT_BOOLEAN('f', "force", &report.force, "don't complain, do it"),
OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
"Show a column with the number of samples"),
- OPT_BOOLEAN('T', "threads", &show_threads,
+ OPT_BOOLEAN('T', "threads", &report.show_threads,
"Show per-thread event counters"),
- OPT_STRING(0, "pretty", &pretty_printing_style, "key",
+ OPT_STRING(0, "pretty", &report.pretty_printing_style, "key",
"pretty printing style key: normal raw"),
- OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
- OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
+ OPT_BOOLEAN(0, "tui", &report.use_tui, "Use the TUI interface"),
+ OPT_BOOLEAN(0, "stdio", &report.use_stdio,
+ "Use the stdio interface"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
@@ -468,13 +484,14 @@ static const struct option options[] = {
"regex filter to identify parent, see: '--sort parent'"),
OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
"Only display entries with parent-match"),
- OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent, call_order",
- "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold and callchain order. "
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
+ "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit and callchain order. "
"Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
- OPT_BOOLEAN('G', "inverted", &inverted_callchain, "alias for inverted call graph"),
+ OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
+ "alias for inverted call graph"),
OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
"only consider symbols in these dsos"),
- OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+ OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
"only consider symbols in these comms"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
@@ -484,12 +501,13 @@ static const struct option options[] = {
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
- OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
+ OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved,
"Only display entries resolved to a symbol"),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
- OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
- OPT_BOOLEAN('I', "show-info", &show_full_info,
+ OPT_STRING('C', "cpu", &report.cpu_list, "cpu",
+ "list of cpus to profile"),
+ OPT_BOOLEAN('I', "show-info", &report.show_full_info,
"Display extended information about perf.data file"),
OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
"Interleave source code with assembly code (default)"),
@@ -500,24 +518,30 @@ static const struct option options[] = {
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
OPT_END()
-};
+ };
-int cmd_report(int argc, const char **argv, const char *prefix __used)
-{
argc = parse_options(argc, argv, options, report_usage, 0);
- if (use_stdio)
+ if (report.use_stdio)
use_browser = 0;
- else if (use_tui)
+ else if (report.use_tui)
use_browser = 1;
- if (inverted_callchain)
+ if (report.inverted_callchain)
callchain_param.order = ORDER_CALLER;
- if (strcmp(input_name, "-") != 0)
+ if (!report.input_name || !strlen(report.input_name)) {
+ if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+ report.input_name = "-";
+ else
+ report.input_name = "perf.data";
+ }
+
+ if (strcmp(report.input_name, "-") != 0)
setup_browser(true);
else
use_browser = 0;
+
/*
* Only in the newt browser we are doing integrated annotation,
* so don't allocate extra space that won't be used in the stdio
@@ -525,7 +549,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
*/
if (use_browser > 0) {
symbol_conf.priv_size = sizeof(struct annotation);
- annotate_init = symbol__annotate_init;
+ report.annotate_init = symbol__annotate_init;
/*
* For searching by name on the "Browse map details".
* providing it only in verbose mode not to bloat too
@@ -572,5 +596,5 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
- return __cmd_report();
+ return __cmd_report(&report);
}
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 5177964943e..fb8b5f83b4a 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2,11 +2,14 @@
#include "perf.h"
#include "util/util.h"
+#include "util/evlist.h"
#include "util/cache.h"
+#include "util/evsel.h"
#include "util/symbol.h"
#include "util/thread.h"
#include "util/header.h"
#include "util/session.h"
+#include "util/tool.h"
#include "util/parse-options.h"
#include "util/trace-event.h"
@@ -19,7 +22,7 @@
#include <pthread.h>
#include <math.h>
-static char const *input_name = "perf.data";
+static const char *input_name;
static char default_sort_order[] = "avg, max, switch, runtime";
static const char *sort_order = default_sort_order;
@@ -723,21 +726,21 @@ struct trace_migrate_task_event {
struct trace_sched_handler {
void (*switch_event)(struct trace_switch_event *,
- struct perf_session *,
+ struct machine *,
struct event *,
int cpu,
u64 timestamp,
struct thread *thread);
void (*runtime_event)(struct trace_runtime_event *,
- struct perf_session *,
+ struct machine *,
struct event *,
int cpu,
u64 timestamp,
struct thread *thread);
void (*wakeup_event)(struct trace_wakeup_event *,
- struct perf_session *,
+ struct machine *,
struct event *,
int cpu,
u64 timestamp,
@@ -750,7 +753,7 @@ struct trace_sched_handler {
struct thread *thread);
void (*migrate_task_event)(struct trace_migrate_task_event *,
- struct perf_session *session,
+ struct machine *machine,
struct event *,
int cpu,
u64 timestamp,
@@ -760,7 +763,7 @@ struct trace_sched_handler {
static void
replay_wakeup_event(struct trace_wakeup_event *wakeup_event,
- struct perf_session *session __used,
+ struct machine *machine __used,
struct event *event,
int cpu __used,
u64 timestamp __used,
@@ -787,7 +790,7 @@ static u64 cpu_last_switched[MAX_CPUS];
static void
replay_switch_event(struct trace_switch_event *switch_event,
- struct perf_session *session __used,
+ struct machine *machine __used,
struct event *event,
int cpu,
u64 timestamp,
@@ -1021,7 +1024,7 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
static void
latency_switch_event(struct trace_switch_event *switch_event,
- struct perf_session *session,
+ struct machine *machine,
struct event *event __used,
int cpu,
u64 timestamp,
@@ -1045,8 +1048,8 @@ latency_switch_event(struct trace_switch_event *switch_event,
die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
- sched_out = perf_session__findnew(session, switch_event->prev_pid);
- sched_in = perf_session__findnew(session, switch_event->next_pid);
+ sched_out = machine__findnew_thread(machine, switch_event->prev_pid);
+ sched_in = machine__findnew_thread(machine, switch_event->next_pid);
out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
if (!out_events) {
@@ -1074,13 +1077,13 @@ latency_switch_event(struct trace_switch_event *switch_event,
static void
latency_runtime_event(struct trace_runtime_event *runtime_event,
- struct perf_session *session,
+ struct machine *machine,
struct event *event __used,
int cpu,
u64 timestamp,
struct thread *this_thread __used)
{
- struct thread *thread = perf_session__findnew(session, runtime_event->pid);
+ struct thread *thread = machine__findnew_thread(machine, runtime_event->pid);
struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid);
BUG_ON(cpu >= MAX_CPUS || cpu < 0);
@@ -1097,7 +1100,7 @@ latency_runtime_event(struct trace_runtime_event *runtime_event,
static void
latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
- struct perf_session *session,
+ struct machine *machine,
struct event *__event __used,
int cpu __used,
u64 timestamp,
@@ -1111,7 +1114,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
if (!wakeup_event->success)
return;
- wakee = perf_session__findnew(session, wakeup_event->pid);
+ wakee = machine__findnew_thread(machine, wakeup_event->pid);
atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
if (!atoms) {
thread_atoms_insert(wakee);
@@ -1145,7 +1148,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
static void
latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
- struct perf_session *session,
+ struct machine *machine,
struct event *__event __used,
int cpu __used,
u64 timestamp,
@@ -1161,7 +1164,7 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event,
if (profile_cpu == -1)
return;
- migrant = perf_session__findnew(session, migrate_task_event->pid);
+ migrant = machine__findnew_thread(machine, migrate_task_event->pid);
atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid);
if (!atoms) {
thread_atoms_insert(migrant);
@@ -1356,12 +1359,13 @@ static void sort_lat(void)
static struct trace_sched_handler *trace_handler;
static void
-process_sched_wakeup_event(void *data, struct perf_session *session,
+process_sched_wakeup_event(struct perf_tool *tool __used,
struct event *event,
- int cpu __used,
- u64 timestamp __used,
- struct thread *thread __used)
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct thread *thread)
{
+ void *data = sample->raw_data;
struct trace_wakeup_event wakeup_event;
FILL_COMMON_FIELDS(wakeup_event, event, data);
@@ -1373,8 +1377,8 @@ process_sched_wakeup_event(void *data, struct perf_session *session,
FILL_FIELD(wakeup_event, cpu, event, data);
if (trace_handler->wakeup_event)
- trace_handler->wakeup_event(&wakeup_event, session, event,
- cpu, timestamp, thread);
+ trace_handler->wakeup_event(&wakeup_event, machine, event,
+ sample->cpu, sample->time, thread);
}
/*
@@ -1392,7 +1396,7 @@ static char next_shortname2 = '0';
static void
map_switch_event(struct trace_switch_event *switch_event,
- struct perf_session *session,
+ struct machine *machine,
struct event *event __used,
int this_cpu,
u64 timestamp,
@@ -1420,8 +1424,8 @@ map_switch_event(struct trace_switch_event *switch_event,
die("hm, delta: %" PRIu64 " < 0 ?\n", delta);
- sched_out = perf_session__findnew(session, switch_event->prev_pid);
- sched_in = perf_session__findnew(session, switch_event->next_pid);
+ sched_out = machine__findnew_thread(machine, switch_event->prev_pid);
+ sched_in = machine__findnew_thread(machine, switch_event->next_pid);
curr_thread[this_cpu] = sched_in;
@@ -1469,14 +1473,15 @@ map_switch_event(struct trace_switch_event *switch_event,
}
}
-
static void
-process_sched_switch_event(void *data, struct perf_session *session,
+process_sched_switch_event(struct perf_tool *tool __used,
struct event *event,
- int this_cpu,
- u64 timestamp __used,
- struct thread *thread __used)
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct thread *thread)
{
+ int this_cpu = sample->cpu;
+ void *data = sample->raw_data;
struct trace_switch_event switch_event;
FILL_COMMON_FIELDS(switch_event, event, data);
@@ -1498,19 +1503,20 @@ process_sched_switch_event(void *data, struct perf_session *session,
nr_context_switch_bugs++;
}
if (trace_handler->switch_event)
- trace_handler->switch_event(&switch_event, session, event,
- this_cpu, timestamp, thread);
+ trace_handler->switch_event(&switch_event, machine, event,
+ this_cpu, sample->time, thread);
curr_pid[this_cpu] = switch_event.next_pid;
}
static void
-process_sched_runtime_event(void *data, struct perf_session *session,
- struct event *event,
- int cpu __used,
- u64 timestamp __used,
- struct thread *thread __used)
+process_sched_runtime_event(struct perf_tool *tool __used,
+ struct event *event,
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct thread *thread)
{
+ void *data = sample->raw_data;
struct trace_runtime_event runtime_event;
FILL_ARRAY(runtime_event, comm, event, data);
@@ -1519,16 +1525,18 @@ process_sched_runtime_event(void *data, struct perf_session *session,
FILL_FIELD(runtime_event, vruntime, event, data);
if (trace_handler->runtime_event)
- trace_handler->runtime_event(&runtime_event, session, event, cpu, timestamp, thread);
+ trace_handler->runtime_event(&runtime_event, machine, event,
+ sample->cpu, sample->time, thread);
}
static void
-process_sched_fork_event(void *data,
+process_sched_fork_event(struct perf_tool *tool __used,
struct event *event,
- int cpu __used,
- u64 timestamp __used,
- struct thread *thread __used)
+ struct perf_sample *sample,
+ struct machine *machine __used,
+ struct thread *thread)
{
+ void *data = sample->raw_data;
struct trace_fork_event fork_event;
FILL_COMMON_FIELDS(fork_event, event, data);
@@ -1540,13 +1548,14 @@ process_sched_fork_event(void *data,
if (trace_handler->fork_event)
trace_handler->fork_event(&fork_event, event,
- cpu, timestamp, thread);
+ sample->cpu, sample->time, thread);
}
static void
-process_sched_exit_event(struct event *event,
- int cpu __used,
- u64 timestamp __used,
+process_sched_exit_event(struct perf_tool *tool __used,
+ struct event *event,
+ struct perf_sample *sample __used,
+ struct machine *machine __used,
struct thread *thread __used)
{
if (verbose)
@@ -1554,12 +1563,13 @@ process_sched_exit_event(struct event *event,
}
static void
-process_sched_migrate_task_event(void *data, struct perf_session *session,
- struct event *event,
- int cpu __used,
- u64 timestamp __used,
- struct thread *thread __used)
+process_sched_migrate_task_event(struct perf_tool *tool __used,
+ struct event *event,
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct thread *thread)
{
+ void *data = sample->raw_data;
struct trace_migrate_task_event migrate_task_event;
FILL_COMMON_FIELDS(migrate_task_event, event, data);
@@ -1570,67 +1580,47 @@ process_sched_migrate_task_event(void *data, struct perf_session *session,
FILL_FIELD(migrate_task_event, cpu, event, data);
if (trace_handler->migrate_task_event)
- trace_handler->migrate_task_event(&migrate_task_event, session,
- event, cpu, timestamp, thread);
+ trace_handler->migrate_task_event(&migrate_task_event, machine,
+ event, sample->cpu,
+ sample->time, thread);
}
-static void process_raw_event(union perf_event *raw_event __used,
- struct perf_session *session, void *data, int cpu,
- u64 timestamp, struct thread *thread)
-{
- struct event *event;
- int type;
-
-
- type = trace_parse_common_type(data);
- event = trace_find_event(type);
-
- if (!strcmp(event->name, "sched_switch"))
- process_sched_switch_event(data, session, event, cpu, timestamp, thread);
- if (!strcmp(event->name, "sched_stat_runtime"))
- process_sched_runtime_event(data, session, event, cpu, timestamp, thread);
- if (!strcmp(event->name, "sched_wakeup"))
- process_sched_wakeup_event(data, session, event, cpu, timestamp, thread);
- if (!strcmp(event->name, "sched_wakeup_new"))
- process_sched_wakeup_event(data, session, event, cpu, timestamp, thread);
- if (!strcmp(event->name, "sched_process_fork"))
- process_sched_fork_event(data, event, cpu, timestamp, thread);
- if (!strcmp(event->name, "sched_process_exit"))
- process_sched_exit_event(event, cpu, timestamp, thread);
- if (!strcmp(event->name, "sched_migrate_task"))
- process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
-}
+typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event *event,
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct thread *thread);
-static int process_sample_event(union perf_event *event,
- struct perf_sample *sample,
- struct perf_evsel *evsel __used,
- struct perf_session *session)
+static int perf_sched__process_tracepoint_sample(struct perf_tool *tool,
+ union perf_event *event __used,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
{
- struct thread *thread;
-
- if (!(session->sample_type & PERF_SAMPLE_RAW))
- return 0;
+ struct thread *thread = machine__findnew_thread(machine, sample->pid);
- thread = perf_session__findnew(session, sample->pid);
if (thread == NULL) {
- pr_debug("problem processing %d event, skipping it.\n",
- event->header.type);
+ pr_debug("problem processing %s event, skipping it.\n",
+ evsel->name);
return -1;
}
- dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+ evsel->hists.stats.total_period += sample->period;
+ hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
- if (profile_cpu != -1 && profile_cpu != (int)sample->cpu)
- return 0;
+ if (evsel->handler.func != NULL) {
+ tracepoint_handler f = evsel->handler.func;
- process_raw_event(event, session, sample->raw_data, sample->cpu,
- sample->time, thread);
+ if (evsel->handler.data == NULL)
+ evsel->handler.data = trace_find_event(evsel->attr.config);
+
+ f(tool, evsel->handler.data, sample, machine, thread);
+ }
return 0;
}
-static struct perf_event_ops event_ops = {
- .sample = process_sample_event,
+static struct perf_tool perf_sched = {
+ .sample = perf_sched__process_tracepoint_sample,
.comm = perf_event__process_comm,
.lost = perf_event__process_lost,
.fork = perf_event__process_task,
@@ -1640,13 +1630,25 @@ static struct perf_event_ops event_ops = {
static void read_events(bool destroy, struct perf_session **psession)
{
int err = -EINVAL;
+ const struct perf_evsel_str_handler handlers[] = {
+ { "sched:sched_switch", process_sched_switch_event, },
+ { "sched:sched_stat_runtime", process_sched_runtime_event, },
+ { "sched:sched_wakeup", process_sched_wakeup_event, },
+ { "sched:sched_wakeup_new", process_sched_wakeup_event, },
+ { "sched:sched_process_fork", process_sched_fork_event, },
+ { "sched:sched_process_exit", process_sched_exit_event, },
+ { "sched:sched_migrate_task", process_sched_migrate_task_event, },
+ };
struct perf_session *session = perf_session__new(input_name, O_RDONLY,
- 0, false, &event_ops);
+ 0, false, &perf_sched);
if (session == NULL)
die("No Memory");
+ err = perf_evlist__set_tracepoints_handlers_array(session->evlist, handlers);
+ assert(err == 0);
+
if (perf_session__has_traces(session, "record -R")) {
- err = perf_session__process_events(session, &event_ops);
+ err = perf_session__process_events(session, &perf_sched);
if (err)
die("Failed to process events, error %d", err);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 2f62a295226..fd1909afcfd 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -7,6 +7,7 @@
#include "util/header.h"
#include "util/parse-options.h"
#include "util/session.h"
+#include "util/tool.h"
#include "util/symbol.h"
#include "util/thread.h"
#include "util/trace-event.h"
@@ -23,6 +24,7 @@ static u64 nr_unordered;
extern const struct option record_options[];
static bool no_callchain;
static bool show_full_info;
+static bool system_wide;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -315,7 +317,7 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
static void print_sample_addr(union perf_event *event,
struct perf_sample *sample,
- struct perf_session *session,
+ struct machine *machine,
struct thread *thread,
struct perf_event_attr *attr)
{
@@ -328,11 +330,11 @@ static void print_sample_addr(union perf_event *event,
if (!sample_addr_correlates_sym(attr))
return;
- thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
- event->ip.pid, sample->addr, &al);
+ thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
+ sample->addr, &al);
if (!al.map)
- thread__find_addr_map(thread, session, cpumode, MAP__VARIABLE,
- event->ip.pid, sample->addr, &al);
+ thread__find_addr_map(thread, machine, cpumode, MAP__VARIABLE,
+ sample->addr, &al);
al.cpu = sample->cpu;
al.sym = NULL;
@@ -362,7 +364,7 @@ static void print_sample_addr(union perf_event *event,
static void process_event(union perf_event *event __unused,
struct perf_sample *sample,
struct perf_evsel *evsel,
- struct perf_session *session,
+ struct machine *machine,
struct thread *thread)
{
struct perf_event_attr *attr = &evsel->attr;
@@ -377,15 +379,15 @@ static void process_event(union perf_event *event __unused,
sample->raw_size);
if (PRINT_FIELD(ADDR))
- print_sample_addr(event, sample, session, thread, attr);
+ print_sample_addr(event, sample, machine, thread, attr);
if (PRINT_FIELD(IP)) {
if (!symbol_conf.use_callchain)
printf(" ");
else
printf("\n");
- perf_session__print_ip(event, sample, session,
- PRINT_FIELD(SYM), PRINT_FIELD(DSO));
+ perf_event__print_ip(event, sample, machine, evsel,
+ PRINT_FIELD(SYM), PRINT_FIELD(DSO));
}
printf("\n");
@@ -432,14 +434,16 @@ static int cleanup_scripting(void)
return scripting_ops->stop_script();
}
-static char const *input_name = "perf.data";
+static const char *input_name;
-static int process_sample_event(union perf_event *event,
+static int process_sample_event(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
- struct perf_session *session)
+ struct machine *machine)
{
- struct thread *thread = perf_session__findnew(session, event->ip.pid);
+ struct addr_location al;
+ struct thread *thread = machine__findnew_thread(machine, event->ip.tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
@@ -458,16 +462,25 @@ static int process_sample_event(union perf_event *event,
return 0;
}
+ if (perf_event__preprocess_sample(event, machine, &al, sample, 0) < 0) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (al.filtered)
+ return 0;
+
if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
return 0;
- scripting_ops->process_event(event, sample, evsel, session, thread);
+ scripting_ops->process_event(event, sample, evsel, machine, thread);
- session->hists.stats.total_period += sample->period;
+ evsel->hists.stats.total_period += sample->period;
return 0;
}
-static struct perf_event_ops event_ops = {
+static struct perf_tool perf_script = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
.comm = perf_event__process_comm,
@@ -494,7 +507,7 @@ static int __cmd_script(struct perf_session *session)
signal(SIGINT, sig_handler);
- ret = perf_session__process_events(session, &event_ops);
+ ret = perf_session__process_events(session, &perf_script);
if (debug_mode)
pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
@@ -523,12 +536,6 @@ static struct script_spec *script_spec__new(const char *spec,
return s;
}
-static void script_spec__delete(struct script_spec *s)
-{
- free(s->spec);
- free(s);
-}
-
static void script_spec__add(struct script_spec *s)
{
list_add_tail(&s->node, &script_specs);
@@ -554,16 +561,11 @@ static struct script_spec *script_spec__findnew(const char *spec,
s = script_spec__new(spec, ops);
if (!s)
- goto out_delete_spec;
+ return NULL;
script_spec__add(s);
return s;
-
-out_delete_spec:
- script_spec__delete(s);
-
- return NULL;
}
int script_spec_register(const char *spec, struct scripting_ops *ops)
@@ -681,7 +683,8 @@ static int parse_output_fields(const struct option *opt __used,
type = PERF_TYPE_RAW;
else {
fprintf(stderr, "Invalid event type in field string.\n");
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
if (output[type].user_set)
@@ -923,6 +926,24 @@ static int read_script_info(struct script_desc *desc, const char *filename)
return 0;
}
+static char *get_script_root(struct dirent *script_dirent, const char *suffix)
+{
+ char *script_root, *str;
+
+ script_root = strdup(script_dirent->d_name);
+ if (!script_root)
+ return NULL;
+
+ str = (char *)ends_with(script_root, suffix);
+ if (!str) {
+ free(script_root);
+ return NULL;
+ }
+
+ *str = '\0';
+ return script_root;
+}
+
static int list_available_scripts(const struct option *opt __used,
const char *s __used, int unset __used)
{
@@ -934,7 +955,6 @@ static int list_available_scripts(const struct option *opt __used,
struct script_desc *desc;
char first_half[BUFSIZ];
char *script_root;
- char *str;
snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
@@ -950,16 +970,14 @@ static int list_available_scripts(const struct option *opt __used,
continue;
for_each_script(lang_path, lang_dir, script_dirent, script_next) {
- script_root = strdup(script_dirent.d_name);
- str = (char *)ends_with(script_root, REPORT_SUFFIX);
- if (str) {
- *str = '\0';
+ script_root = get_script_root(&script_dirent, REPORT_SUFFIX);
+ if (script_root) {
desc = script_desc__findnew(script_root);
snprintf(script_path, MAXPATHLEN, "%s/%s",
lang_path, script_dirent.d_name);
read_script_info(desc, script_path);
+ free(script_root);
}
- free(script_root);
}
}
@@ -981,8 +999,7 @@ static char *get_script_path(const char *script_root, const char *suffix)
char script_path[MAXPATHLEN];
DIR *scripts_dir, *lang_dir;
char lang_path[MAXPATHLEN];
- char *str, *__script_root;
- char *path = NULL;
+ char *__script_root;
snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path());
@@ -998,23 +1015,18 @@ static char *get_script_path(const char *script_root, const char *suffix)
continue;
for_each_script(lang_path, lang_dir, script_dirent, script_next) {
- __script_root = strdup(script_dirent.d_name);
- str = (char *)ends_with(__script_root, suffix);
- if (str) {
- *str = '\0';
- if (strcmp(__script_root, script_root))
- continue;
+ __script_root = get_script_root(&script_dirent, suffix);
+ if (__script_root && !strcmp(script_root, __script_root)) {
+ free(__script_root);
snprintf(script_path, MAXPATHLEN, "%s/%s",
lang_path, script_dirent.d_name);
- path = strdup(script_path);
- free(__script_root);
- break;
+ return strdup(script_path);
}
free(__script_root);
}
}
- return path;
+ return NULL;
}
static bool is_top_script(const char *script_path)
@@ -1083,7 +1095,11 @@ static const struct option options[] = {
OPT_CALLBACK('f', "fields", NULL, "str",
"comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
parse_output_fields),
- OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
+ OPT_BOOLEAN('a', "all-cpus", &system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
+ OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+ "only display events for these comms"),
OPT_BOOLEAN('I', "show-info", &show_full_info,
"display extended information from perf.data file"),
OPT_END()
@@ -1110,7 +1126,6 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
struct perf_session *session;
char *script_path = NULL;
const char **__argv;
- bool system_wide;
int i, j, err;
setup_scripting();
@@ -1178,15 +1193,17 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
}
if (!pid) {
- system_wide = true;
j = 0;
dup2(live_pipe[1], 1);
close(live_pipe[0]);
- if (!is_top_script(argv[0]))
+ if (is_top_script(argv[0])) {
+ system_wide = true;
+ } else if (!system_wide) {
system_wide = !have_cmd(argc - rep_args,
&argv[rep_args]);
+ }
__argv = malloc((argc + 6) * sizeof(const char *));
if (!__argv)
@@ -1234,10 +1251,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
script_path = rep_script_path;
if (script_path) {
- system_wide = false;
j = 0;
- if (rec_script_path)
+ if (!rec_script_path)
+ system_wide = false;
+ else if (!system_wide)
system_wide = !have_cmd(argc - 1, &argv[1]);
__argv = malloc((argc + 2) * sizeof(const char *));
@@ -1261,7 +1279,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
if (!script_name)
setup_pager();
- session = perf_session__new(input_name, O_RDONLY, 0, false, &event_ops);
+ session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_script);
if (session == NULL)
return -ENOMEM;
@@ -1287,7 +1305,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
return -1;
}
- input = open(input_name, O_RDONLY);
+ input = open(session->filename, O_RDONLY); /* input_name */
if (input < 0) {
perror("failed to open file");
exit(-1);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7d98676808d..f5d2a63eba6 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -463,7 +463,8 @@ static int run_perf_stat(int argc __used, const char **argv)
list_for_each_entry(counter, &evsel_list->entries, node) {
if (create_perf_stat_counter(counter, first) < 0) {
- if (errno == EINVAL || errno == ENOSYS || errno == ENOENT) {
+ if (errno == EINVAL || errno == ENOSYS ||
+ errno == ENOENT || errno == EOPNOTSUPP) {
if (verbose)
ui__warning("%s event is not supported by the kernel.\n",
event_name(counter));
@@ -577,6 +578,33 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
avg / avg_stats(&walltime_nsecs_stats));
}
+/* used for get_ratio_color() */
+enum grc_type {
+ GRC_STALLED_CYCLES_FE,
+ GRC_STALLED_CYCLES_BE,
+ GRC_CACHE_MISSES,
+ GRC_MAX_NR
+};
+
+static const char *get_ratio_color(enum grc_type type, double ratio)
+{
+ static const double grc_table[GRC_MAX_NR][3] = {
+ [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
+ [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
+ [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
+ };
+ const char *color = PERF_COLOR_NORMAL;
+
+ if (ratio > grc_table[type][0])
+ color = PERF_COLOR_RED;
+ else if (ratio > grc_table[type][1])
+ color = PERF_COLOR_MAGENTA;
+ else if (ratio > grc_table[type][2])
+ color = PERF_COLOR_YELLOW;
+
+ return color;
+}
+
static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
@@ -587,13 +615,7 @@ static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __us
if (total)
ratio = avg / total * 100.0;
- color = PERF_COLOR_NORMAL;
- if (ratio > 50.0)
- color = PERF_COLOR_RED;
- else if (ratio > 30.0)
- color = PERF_COLOR_MAGENTA;
- else if (ratio > 10.0)
- color = PERF_COLOR_YELLOW;
+ color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
@@ -610,13 +632,7 @@ static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __use
if (total)
ratio = avg / total * 100.0;
- color = PERF_COLOR_NORMAL;
- if (ratio > 75.0)
- color = PERF_COLOR_RED;
- else if (ratio > 50.0)
- color = PERF_COLOR_MAGENTA;
- else if (ratio > 20.0)
- color = PERF_COLOR_YELLOW;
+ color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
@@ -633,13 +649,7 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double
if (total)
ratio = avg / total * 100.0;
- color = PERF_COLOR_NORMAL;
- if (ratio > 20.0)
- color = PERF_COLOR_RED;
- else if (ratio > 10.0)
- color = PERF_COLOR_MAGENTA;
- else if (ratio > 5.0)
- color = PERF_COLOR_YELLOW;
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
@@ -656,13 +666,7 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou
if (total)
ratio = avg / total * 100.0;
- color = PERF_COLOR_NORMAL;
- if (ratio > 20.0)
- color = PERF_COLOR_RED;
- else if (ratio > 10.0)
- color = PERF_COLOR_MAGENTA;
- else if (ratio > 5.0)
- color = PERF_COLOR_YELLOW;
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
@@ -679,13 +683,7 @@ static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, dou
if (total)
ratio = avg / total * 100.0;
- color = PERF_COLOR_NORMAL;
- if (ratio > 20.0)
- color = PERF_COLOR_RED;
- else if (ratio > 10.0)
- color = PERF_COLOR_MAGENTA;
- else if (ratio > 5.0)
- color = PERF_COLOR_YELLOW;
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
@@ -702,13 +700,7 @@ static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do
if (total)
ratio = avg / total * 100.0;
- color = PERF_COLOR_NORMAL;
- if (ratio > 20.0)
- color = PERF_COLOR_RED;
- else if (ratio > 10.0)
- color = PERF_COLOR_MAGENTA;
- else if (ratio > 5.0)
- color = PERF_COLOR_YELLOW;
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
@@ -725,13 +717,7 @@ static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, do
if (total)
ratio = avg / total * 100.0;
- color = PERF_COLOR_NORMAL;
- if (ratio > 20.0)
- color = PERF_COLOR_RED;
- else if (ratio > 10.0)
- color = PERF_COLOR_MAGENTA;
- else if (ratio > 5.0)
- color = PERF_COLOR_YELLOW;
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
@@ -748,13 +734,7 @@ static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, doub
if (total)
ratio = avg / total * 100.0;
- color = PERF_COLOR_NORMAL;
- if (ratio > 20.0)
- color = PERF_COLOR_RED;
- else if (ratio > 10.0)
- color = PERF_COLOR_MAGENTA;
- else if (ratio > 5.0)
- color = PERF_COLOR_YELLOW;
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
fprintf(output, " # ");
color_fprintf(output, color, "%6.2f%%", ratio);
@@ -1107,22 +1087,13 @@ static const struct option options[] = {
*/
static int add_default_attributes(void)
{
- struct perf_evsel *pos;
- size_t attr_nr = 0;
- size_t c;
-
/* Set attrs if no event is selected and !null_run: */
if (null_run)
return 0;
if (!evsel_list->nr_entries) {
- for (c = 0; c < ARRAY_SIZE(default_attrs); c++) {
- pos = perf_evsel__new(default_attrs + c, c + attr_nr);
- if (pos == NULL)
- return -1;
- perf_evlist__add(evsel_list, pos);
- }
- attr_nr += c;
+ if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0)
+ return -1;
}
/* Detailed events get appended to the event list: */
@@ -1131,38 +1102,21 @@ static int add_default_attributes(void)
return 0;
/* Append detailed run extra attributes: */
- for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) {
- pos = perf_evsel__new(detailed_attrs + c, c + attr_nr);
- if (pos == NULL)
- return -1;
- perf_evlist__add(evsel_list, pos);
- }
- attr_nr += c;
+ if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0)
+ return -1;
if (detailed_run < 2)
return 0;
/* Append very detailed run extra attributes: */
- for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) {
- pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr);
- if (pos == NULL)
- return -1;
- perf_evlist__add(evsel_list, pos);
- }
+ if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0)
+ return -1;
if (detailed_run < 3)
return 0;
/* Append very, very detailed run extra attributes: */
- for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) {
- pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr);
- if (pos == NULL)
- return -1;
- perf_evlist__add(evsel_list, pos);
- }
-
-
- return 0;
+ return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs);
}
int cmd_stat(int argc, const char **argv, const char *prefix __used)
@@ -1266,8 +1220,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
list_for_each_entry(pos, &evsel_list->entries, node) {
if (perf_evsel__alloc_stat_priv(pos) < 0 ||
- perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 ||
- perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0)
+ perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0)
goto out_free_fd;
}
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 831d1baeac3..2b9a7f497a2 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -7,6 +7,7 @@
#include "util/cache.h"
#include "util/debug.h"
+#include "util/debugfs.h"
#include "util/evlist.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
@@ -14,8 +15,6 @@
#include "util/thread_map.h"
#include "../../include/linux/hw_breakpoint.h"
-static long page_size;
-
static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
{
bool *visited = symbol__priv(sym);
@@ -31,6 +30,7 @@ static int test__vmlinux_matches_kallsyms(void)
struct map *kallsyms_map, *vmlinux_map;
struct machine kallsyms, vmlinux;
enum map_type type = MAP__FUNCTION;
+ long page_size = sysconf(_SC_PAGE_SIZE);
struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", };
/*
@@ -247,7 +247,7 @@ static int trace_event__id(const char *evname)
if (asprintf(&filename,
"%s/syscalls/%s/id",
- debugfs_path, evname) < 0)
+ tracing_events_path, evname) < 0)
return -1;
fd = open(filename, O_RDONLY);
@@ -603,7 +603,7 @@ out_free_threads:
#define TEST_ASSERT_VAL(text, cond) \
do { \
- if (!cond) { \
+ if (!(cond)) { \
pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
return -1; \
} \
@@ -759,6 +759,103 @@ static int test__checkevent_breakpoint_w(struct perf_evlist *evlist)
return 0;
}
+static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ return test__checkevent_tracepoint(evlist);
+}
+
+static int
+test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ TEST_ASSERT_VAL("wrong exclude_user",
+ !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel",
+ evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ }
+
+ return test__checkevent_tracepoint_multi(evlist);
+}
+
+static int test__checkevent_raw_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+ return test__checkevent_raw(evlist);
+}
+
+static int test__checkevent_numeric_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+ return test__checkevent_numeric(evlist);
+}
+
+static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ return test__checkevent_symbolic_alias(evlist);
+}
+
+static int test__checkevent_genhw_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+ return test__checkevent_genhw(evlist);
+}
+
static struct test__event_st {
const char *name;
__u32 type;
@@ -808,6 +905,34 @@ static struct test__event_st {
.name = "mem:0:w",
.check = test__checkevent_breakpoint_w,
},
+ {
+ .name = "syscalls:sys_enter_open:k",
+ .check = test__checkevent_tracepoint_modifier,
+ },
+ {
+ .name = "syscalls:*:u",
+ .check = test__checkevent_tracepoint_multi_modifier,
+ },
+ {
+ .name = "r1:kp",
+ .check = test__checkevent_raw_modifier,
+ },
+ {
+ .name = "1:1:hp",
+ .check = test__checkevent_numeric_modifier,
+ },
+ {
+ .name = "instructions:h",
+ .check = test__checkevent_symbolic_name_modifier,
+ },
+ {
+ .name = "faults:u",
+ .check = test__checkevent_symbolic_alias_modifier,
+ },
+ {
+ .name = "L1-dcache-load-miss:kp",
+ .check = test__checkevent_genhw_modifier,
+ },
};
#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st))
@@ -841,6 +966,336 @@ static int test__parse_events(void)
return ret;
}
+
+static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t **maskp,
+ size_t *sizep)
+{
+ cpu_set_t *mask;
+ size_t size;
+ int i, cpu = -1, nrcpus = 1024;
+realloc:
+ mask = CPU_ALLOC(nrcpus);
+ size = CPU_ALLOC_SIZE(nrcpus);
+ CPU_ZERO_S(size, mask);
+
+ if (sched_getaffinity(pid, size, mask) == -1) {
+ CPU_FREE(mask);
+ if (errno == EINVAL && nrcpus < (1024 << 8)) {
+ nrcpus = nrcpus << 2;
+ goto realloc;
+ }
+ perror("sched_getaffinity");
+ return -1;
+ }
+
+ for (i = 0; i < nrcpus; i++) {
+ if (CPU_ISSET_S(i, size, mask)) {
+ if (cpu == -1) {
+ cpu = i;
+ *maskp = mask;
+ *sizep = size;
+ } else
+ CPU_CLR_S(i, size, mask);
+ }
+ }
+
+ if (cpu == -1)
+ CPU_FREE(mask);
+
+ return cpu;
+}
+
+static int test__PERF_RECORD(void)
+{
+ struct perf_record_opts opts = {
+ .target_pid = -1,
+ .target_tid = -1,
+ .no_delay = true,
+ .freq = 10,
+ .mmap_pages = 256,
+ .sample_id_all_avail = true,
+ };
+ cpu_set_t *cpu_mask = NULL;
+ size_t cpu_mask_size = 0;
+ struct perf_evlist *evlist = perf_evlist__new(NULL, NULL);
+ struct perf_evsel *evsel;
+ struct perf_sample sample;
+ const char *cmd = "sleep";
+ const char *argv[] = { cmd, "1", NULL, };
+ char *bname;
+ u64 sample_type, prev_time = 0;
+ bool found_cmd_mmap = false,
+ found_libc_mmap = false,
+ found_vdso_mmap = false,
+ found_ld_mmap = false;
+ int err = -1, errs = 0, i, wakeups = 0, sample_size;
+ u32 cpu;
+ int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
+
+ if (evlist == NULL || argv == NULL) {
+ pr_debug("Not enough memory to create evlist\n");
+ goto out;
+ }
+
+ /*
+ * We need at least one evsel in the evlist, use the default
+ * one: "cycles".
+ */
+ err = perf_evlist__add_default(evlist);
+ if (err < 0) {
+ pr_debug("Not enough memory to create evsel\n");
+ goto out_delete_evlist;
+ }
+
+ /*
+ * Create maps of threads and cpus to monitor. In this case
+ * we start with all threads and cpus (-1, -1) but then in
+ * perf_evlist__prepare_workload we'll fill in the only thread
+ * we're monitoring, the one forked there.
+ */
+ err = perf_evlist__create_maps(evlist, opts.target_pid,
+ opts.target_tid, opts.cpu_list);
+ if (err < 0) {
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ goto out_delete_evlist;
+ }
+
+ /*
+ * Prepare the workload in argv[] to run, it'll fork it, and then wait
+ * for perf_evlist__start_workload() to exec it. This is done this way
+ * so that we have time to open the evlist (calling sys_perf_event_open
+ * on all the fds) and then mmap them.
+ */
+ err = perf_evlist__prepare_workload(evlist, &opts, argv);
+ if (err < 0) {
+ pr_debug("Couldn't run the workload!\n");
+ goto out_delete_evlist;
+ }
+
+ /*
+ * Config the evsels, setting attr->comm on the first one, etc.
+ */
+ evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
+ evsel->attr.sample_type |= PERF_SAMPLE_CPU;
+ evsel->attr.sample_type |= PERF_SAMPLE_TID;
+ evsel->attr.sample_type |= PERF_SAMPLE_TIME;
+ perf_evlist__config_attrs(evlist, &opts);
+
+ err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask,
+ &cpu_mask_size);
+ if (err < 0) {
+ pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno));
+ goto out_delete_evlist;
+ }
+
+ cpu = err;
+
+ /*
+ * So that we can check perf_sample.cpu on all the samples.
+ */
+ if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, cpu_mask) < 0) {
+ pr_debug("sched_setaffinity: %s\n", strerror(errno));
+ goto out_free_cpu_mask;
+ }
+
+ /*
+ * Call sys_perf_event_open on all the fds on all the evsels,
+ * grouping them if asked to.
+ */
+ err = perf_evlist__open(evlist, opts.group);
+ if (err < 0) {
+ pr_debug("perf_evlist__open: %s\n", strerror(errno));
+ goto out_delete_evlist;
+ }
+
+ /*
+ * mmap the first fd on a given CPU and ask for events for the other
+ * fds in the same CPU to be injected in the same mmap ring buffer
+ * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
+ */
+ err = perf_evlist__mmap(evlist, opts.mmap_pages, false);
+ if (err < 0) {
+ pr_debug("perf_evlist__mmap: %s\n", strerror(errno));
+ goto out_delete_evlist;
+ }
+
+ /*
+ * We'll need these two to parse the PERF_SAMPLE_* fields in each
+ * event.
+ */
+ sample_type = perf_evlist__sample_type(evlist);
+ sample_size = __perf_evsel__sample_size(sample_type);
+
+ /*
+ * Now that all is properly set up, enable the events, they will
+ * count just on workload.pid, which will start...
+ */
+ perf_evlist__enable(evlist);
+
+ /*
+ * Now!
+ */
+ perf_evlist__start_workload(evlist);
+
+ while (1) {
+ int before = total_events;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ union perf_event *event;
+
+ while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
+ const u32 type = event->header.type;
+ const char *name = perf_event__name(type);
+
+ ++total_events;
+ if (type < PERF_RECORD_MAX)
+ nr_events[type]++;
+
+ err = perf_event__parse_sample(event, sample_type,
+ sample_size, true,
+ &sample, false);
+ if (err < 0) {
+ if (verbose)
+ perf_event__fprintf(event, stderr);
+ pr_debug("Couldn't parse sample\n");
+ goto out_err;
+ }
+
+ if (verbose) {
+ pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
+ perf_event__fprintf(event, stderr);
+ }
+
+ if (prev_time > sample.time) {
+ pr_debug("%s going backwards in time, prev=%" PRIu64 ", curr=%" PRIu64 "\n",
+ name, prev_time, sample.time);
+ ++errs;
+ }
+
+ prev_time = sample.time;
+
+ if (sample.cpu != cpu) {
+ pr_debug("%s with unexpected cpu, expected %d, got %d\n",
+ name, cpu, sample.cpu);
+ ++errs;
+ }
+
+ if ((pid_t)sample.pid != evlist->workload.pid) {
+ pr_debug("%s with unexpected pid, expected %d, got %d\n",
+ name, evlist->workload.pid, sample.pid);
+ ++errs;
+ }
+
+ if ((pid_t)sample.tid != evlist->workload.pid) {
+ pr_debug("%s with unexpected tid, expected %d, got %d\n",
+ name, evlist->workload.pid, sample.tid);
+ ++errs;
+ }
+
+ if ((type == PERF_RECORD_COMM ||
+ type == PERF_RECORD_MMAP ||
+ type == PERF_RECORD_FORK ||
+ type == PERF_RECORD_EXIT) &&
+ (pid_t)event->comm.pid != evlist->workload.pid) {
+ pr_debug("%s with unexpected pid/tid\n", name);
+ ++errs;
+ }
+
+ if ((type == PERF_RECORD_COMM ||
+ type == PERF_RECORD_MMAP) &&
+ event->comm.pid != event->comm.tid) {
+ pr_debug("%s with different pid/tid!\n", name);
+ ++errs;
+ }
+
+ switch (type) {
+ case PERF_RECORD_COMM:
+ if (strcmp(event->comm.comm, cmd)) {
+ pr_debug("%s with unexpected comm!\n", name);
+ ++errs;
+ }
+ break;
+ case PERF_RECORD_EXIT:
+ goto found_exit;
+ case PERF_RECORD_MMAP:
+ bname = strrchr(event->mmap.filename, '/');
+ if (bname != NULL) {
+ if (!found_cmd_mmap)
+ found_cmd_mmap = !strcmp(bname + 1, cmd);
+ if (!found_libc_mmap)
+ found_libc_mmap = !strncmp(bname + 1, "libc", 4);
+ if (!found_ld_mmap)
+ found_ld_mmap = !strncmp(bname + 1, "ld", 2);
+ } else if (!found_vdso_mmap)
+ found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]");
+ break;
+
+ case PERF_RECORD_SAMPLE:
+ /* Just ignore samples for now */
+ break;
+ default:
+ pr_debug("Unexpected perf_event->header.type %d!\n",
+ type);
+ ++errs;
+ }
+ }
+ }
+
+ /*
+ * We don't use poll here because at least at 3.1 times the
+ * PERF_RECORD_{!SAMPLE} events don't honour
+ * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
+ */
+ if (total_events == before && false)
+ poll(evlist->pollfd, evlist->nr_fds, -1);
+
+ sleep(1);
+ if (++wakeups > 5) {
+ pr_debug("No PERF_RECORD_EXIT event!\n");
+ break;
+ }
+ }
+
+found_exit:
+ if (nr_events[PERF_RECORD_COMM] > 1) {
+ pr_debug("Excessive number of PERF_RECORD_COMM events!\n");
+ ++errs;
+ }
+
+ if (nr_events[PERF_RECORD_COMM] == 0) {
+ pr_debug("Missing PERF_RECORD_COMM for %s!\n", cmd);
+ ++errs;
+ }
+
+ if (!found_cmd_mmap) {
+ pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd);
+ ++errs;
+ }
+
+ if (!found_libc_mmap) {
+ pr_debug("PERF_RECORD_MMAP for %s missing!\n", "libc");
+ ++errs;
+ }
+
+ if (!found_ld_mmap) {
+ pr_debug("PERF_RECORD_MMAP for %s missing!\n", "ld");
+ ++errs;
+ }
+
+ if (!found_vdso_mmap) {
+ pr_debug("PERF_RECORD_MMAP for %s missing!\n", "[vdso]");
+ ++errs;
+ }
+out_err:
+ perf_evlist__munmap(evlist);
+out_free_cpu_mask:
+ CPU_FREE(cpu_mask);
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+out:
+ return (err < 0 || errs > 0) ? -1 : 0;
+}
+
static struct test {
const char *desc;
int (*func)(void);
@@ -866,45 +1321,89 @@ static struct test {
.func = test__parse_events,
},
{
+ .desc = "Validate PERF_RECORD_* events & perf_sample fields",
+ .func = test__PERF_RECORD,
+ },
+ {
.func = NULL,
},
};
-static int __cmd_test(void)
+static bool perf_test__matches(int curr, int argc, const char *argv[])
{
- int i = 0;
+ int i;
+
+ if (argc == 0)
+ return true;
- page_size = sysconf(_SC_PAGE_SIZE);
+ for (i = 0; i < argc; ++i) {
+ char *end;
+ long nr = strtoul(argv[i], &end, 10);
+
+ if (*end == '\0') {
+ if (nr == curr + 1)
+ return true;
+ continue;
+ }
+
+ if (strstr(tests[curr].desc, argv[i]))
+ return true;
+ }
+
+ return false;
+}
+
+static int __cmd_test(int argc, const char *argv[])
+{
+ int i = 0;
while (tests[i].func) {
- int err;
- pr_info("%2d: %s:", i + 1, tests[i].desc);
+ int curr = i++, err;
+
+ if (!perf_test__matches(curr, argc, argv))
+ continue;
+
+ pr_info("%2d: %s:", i, tests[curr].desc);
pr_debug("\n--- start ---\n");
- err = tests[i].func();
- pr_debug("---- end ----\n%s:", tests[i].desc);
+ err = tests[curr].func();
+ pr_debug("---- end ----\n%s:", tests[curr].desc);
pr_info(" %s\n", err ? "FAILED!\n" : "Ok");
- ++i;
}
return 0;
}
-static const char * const test_usage[] = {
- "perf test [<options>]",
- NULL,
-};
+static int perf_test__list(int argc, const char **argv)
+{
+ int i = 0;
+
+ while (tests[i].func) {
+ int curr = i++;
-static const struct option test_options[] = {
+ if (argc > 1 && !strstr(tests[curr].desc, argv[1]))
+ continue;
+
+ pr_info("%2d: %s\n", i, tests[curr].desc);
+ }
+
+ return 0;
+}
+
+int cmd_test(int argc, const char **argv, const char *prefix __used)
+{
+ const char * const test_usage[] = {
+ "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
+ NULL,
+ };
+ const struct option test_options[] = {
OPT_INTEGER('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_END()
-};
+ };
-int cmd_test(int argc, const char **argv, const char *prefix __used)
-{
argc = parse_options(argc, argv, test_options, test_usage, 0);
- if (argc)
- usage_with_options(test_usage, test_options);
+ if (argc >= 1 && !strcmp(argv[0], "list"))
+ return perf_test__list(argc, argv);
symbol_conf.priv_size = sizeof(int);
symbol_conf.sort_by_name = true;
@@ -915,5 +1414,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
setup_pager();
- return __cmd_test();
+ return __cmd_test(argc, argv);
}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index aa26f4d66d1..3b75b2e21ea 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -19,6 +19,7 @@
#include "util/color.h"
#include <linux/list.h>
#include "util/cache.h"
+#include "util/evsel.h"
#include <linux/rbtree.h>
#include "util/symbol.h"
#include "util/callchain.h"
@@ -31,13 +32,14 @@
#include "util/event.h"
#include "util/session.h"
#include "util/svghelper.h"
+#include "util/tool.h"
#define SUPPORT_OLD_POWER_EVENTS 1
#define PWR_EVENT_EXIT -1
-static char const *input_name = "perf.data";
-static char const *output_name = "output.svg";
+static const char *input_name;
+static const char *output_name = "output.svg";
static unsigned int numcpus;
static u64 min_freq; /* Lowest CPU frequency seen */
@@ -273,25 +275,28 @@ static int cpus_cstate_state[MAX_CPUS];
static u64 cpus_pstate_start_times[MAX_CPUS];
static u64 cpus_pstate_state[MAX_CPUS];
-static int process_comm_event(union perf_event *event,
+static int process_comm_event(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session __used)
+ struct machine *machine __used)
{
pid_set_comm(event->comm.tid, event->comm.comm);
return 0;
}
-static int process_fork_event(union perf_event *event,
+static int process_fork_event(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session __used)
+ struct machine *machine __used)
{
pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
return 0;
}
-static int process_exit_event(union perf_event *event,
+static int process_exit_event(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session __used)
+ struct machine *machine __used)
{
pid_exit(event->fork.pid, event->fork.time);
return 0;
@@ -486,14 +491,15 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
}
-static int process_sample_event(union perf_event *event __used,
+static int process_sample_event(struct perf_tool *tool __used,
+ union perf_event *event __used,
struct perf_sample *sample,
- struct perf_evsel *evsel __used,
- struct perf_session *session)
+ struct perf_evsel *evsel,
+ struct machine *machine __used)
{
struct trace_entry *te;
- if (session->sample_type & PERF_SAMPLE_TIME) {
+ if (evsel->attr.sample_type & PERF_SAMPLE_TIME) {
if (!first_time || first_time > sample->time)
first_time = sample->time;
if (last_time < sample->time)
@@ -501,7 +507,7 @@ static int process_sample_event(union perf_event *event __used,
}
te = (void *)sample->raw_data;
- if (session->sample_type & PERF_SAMPLE_RAW && sample->raw_size > 0) {
+ if ((evsel->attr.sample_type & PERF_SAMPLE_RAW) && sample->raw_size > 0) {
char *event_str;
#ifdef SUPPORT_OLD_POWER_EVENTS
struct power_entry_old *peo;
@@ -974,7 +980,7 @@ static void write_svg_file(const char *filename)
svg_close();
}
-static struct perf_event_ops event_ops = {
+static struct perf_tool perf_timechart = {
.comm = process_comm_event,
.fork = process_fork_event,
.exit = process_exit_event,
@@ -985,7 +991,7 @@ static struct perf_event_ops event_ops = {
static int __cmd_timechart(void)
{
struct perf_session *session = perf_session__new(input_name, O_RDONLY,
- 0, false, &event_ops);
+ 0, false, &perf_timechart);
int ret = -EINVAL;
if (session == NULL)
@@ -994,7 +1000,7 @@ static int __cmd_timechart(void)
if (!perf_session__has_traces(session, "timechart record"))
goto out_delete;
- ret = perf_session__process_events(session, &event_ops);
+ ret = perf_session__process_events(session, &perf_timechart);
if (ret)
goto out_delete;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c9cdedb5813..4f81eeb9987 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -64,44 +64,6 @@
#include <linux/unistd.h>
#include <linux/types.h>
-static struct perf_top top = {
- .count_filter = 5,
- .delay_secs = 2,
- .target_pid = -1,
- .target_tid = -1,
- .freq = 1000, /* 1 KHz */
-};
-
-static bool system_wide = false;
-
-static bool use_tui, use_stdio;
-
-static bool sort_has_symbols;
-
-static bool dont_use_callchains;
-static char callchain_default_opt[] = "fractal,0.5,callee";
-
-
-static int default_interval = 0;
-
-static bool kptr_restrict_warned;
-static bool vmlinux_warned;
-static bool inherit = false;
-static int realtime_prio = 0;
-static bool group = false;
-static bool sample_id_all_avail = true;
-static unsigned int mmap_pages = 128;
-
-static bool dump_symtab = false;
-
-static struct winsize winsize;
-
-static const char *sym_filter = NULL;
-static int sym_pcnt_filter = 5;
-
-/*
- * Source functions
- */
void get_term_dimensions(struct winsize *ws)
{
@@ -125,21 +87,23 @@ void get_term_dimensions(struct winsize *ws)
ws->ws_col = 80;
}
-static void update_print_entries(struct winsize *ws)
+static void perf_top__update_print_entries(struct perf_top *top)
{
- top.print_entries = ws->ws_row;
+ top->print_entries = top->winsize.ws_row;
- if (top.print_entries > 9)
- top.print_entries -= 9;
+ if (top->print_entries > 9)
+ top->print_entries -= 9;
}
-static void sig_winch_handler(int sig __used)
+static void perf_top__sig_winch(int sig __used, siginfo_t *info __used, void *arg)
{
- get_term_dimensions(&winsize);
- update_print_entries(&winsize);
+ struct perf_top *top = arg;
+
+ get_term_dimensions(&top->winsize);
+ perf_top__update_print_entries(top);
}
-static int parse_source(struct hist_entry *he)
+static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
{
struct symbol *sym;
struct annotation *notes;
@@ -170,7 +134,7 @@ static int parse_source(struct hist_entry *he)
pthread_mutex_lock(&notes->lock);
- if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
+ if (symbol__alloc_hist(sym) < 0) {
pthread_mutex_unlock(&notes->lock);
pr_err("Not enough memory for annotating '%s' symbol!\n",
sym->name);
@@ -181,7 +145,7 @@ static int parse_source(struct hist_entry *he)
err = symbol__annotate(sym, map, 0);
if (err == 0) {
out_assign:
- top.sym_filter_entry = he;
+ top->sym_filter_entry = he;
}
pthread_mutex_unlock(&notes->lock);
@@ -194,14 +158,16 @@ static void __zero_source_counters(struct hist_entry *he)
symbol__annotate_zero_histograms(sym);
}
-static void record_precise_ip(struct hist_entry *he, int counter, u64 ip)
+static void perf_top__record_precise_ip(struct perf_top *top,
+ struct hist_entry *he,
+ int counter, u64 ip)
{
struct annotation *notes;
struct symbol *sym;
if (he == NULL || he->ms.sym == NULL ||
- ((top.sym_filter_entry == NULL ||
- top.sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
+ ((top->sym_filter_entry == NULL ||
+ top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
return;
sym = he->ms.sym;
@@ -210,8 +176,7 @@ static void record_precise_ip(struct hist_entry *he, int counter, u64 ip)
if (pthread_mutex_trylock(&notes->lock))
return;
- if (notes->src == NULL &&
- symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
+ if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
pthread_mutex_unlock(&notes->lock);
pr_err("Not enough memory for annotating '%s' symbol!\n",
sym->name);
@@ -225,8 +190,9 @@ static void record_precise_ip(struct hist_entry *he, int counter, u64 ip)
pthread_mutex_unlock(&notes->lock);
}
-static void show_details(struct hist_entry *he)
+static void perf_top__show_details(struct perf_top *top)
{
+ struct hist_entry *he = top->sym_filter_entry;
struct annotation *notes;
struct symbol *symbol;
int more;
@@ -242,15 +208,15 @@ static void show_details(struct hist_entry *he)
if (notes->src == NULL)
goto out_unlock;
- printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
- printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
+ printf("Showing %s for %s\n", event_name(top->sym_evsel), symbol->name);
+ printf(" Events Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
- more = symbol__annotate_printf(symbol, he->ms.map, top.sym_evsel->idx,
- 0, sym_pcnt_filter, top.print_entries, 4);
- if (top.zero)
- symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
+ more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
+ 0, top->sym_pcnt_filter, top->print_entries, 4);
+ if (top->zero)
+ symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
else
- symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
+ symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
if (more != 0)
printf("%d lines not displayed, maybe increase display entries [e]\n", more);
out_unlock:
@@ -259,11 +225,9 @@ out_unlock:
static const char CONSOLE_CLEAR[] = "";
-static struct hist_entry *
- perf_session__add_hist_entry(struct perf_session *session,
- struct addr_location *al,
- struct perf_sample *sample,
- struct perf_evsel *evsel)
+static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
+ struct addr_location *al,
+ struct perf_sample *sample)
{
struct hist_entry *he;
@@ -271,50 +235,51 @@ static struct hist_entry *
if (he == NULL)
return NULL;
- session->hists.stats.total_period += sample->period;
+ evsel->hists.stats.total_period += sample->period;
hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
return he;
}
-static void print_sym_table(void)
+static void perf_top__print_sym_table(struct perf_top *top)
{
char bf[160];
int printed = 0;
- const int win_width = winsize.ws_col - 1;
+ const int win_width = top->winsize.ws_col - 1;
puts(CONSOLE_CLEAR);
- perf_top__header_snprintf(&top, bf, sizeof(bf));
+ perf_top__header_snprintf(top, bf, sizeof(bf));
printf("%s\n", bf);
- perf_top__reset_sample_counters(&top);
+ perf_top__reset_sample_counters(top);
printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
- if (top.sym_evsel->hists.stats.nr_lost_warned !=
- top.sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
- top.sym_evsel->hists.stats.nr_lost_warned =
- top.sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
+ if (top->sym_evsel->hists.stats.nr_lost_warned !=
+ top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
+ top->sym_evsel->hists.stats.nr_lost_warned =
+ top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
color_fprintf(stdout, PERF_COLOR_RED,
"WARNING: LOST %d chunks, Check IO/CPU overload",
- top.sym_evsel->hists.stats.nr_lost_warned);
+ top->sym_evsel->hists.stats.nr_lost_warned);
++printed;
}
- if (top.sym_filter_entry) {
- show_details(top.sym_filter_entry);
+ if (top->sym_filter_entry) {
+ perf_top__show_details(top);
return;
}
- hists__collapse_resort_threaded(&top.sym_evsel->hists);
- hists__output_resort_threaded(&top.sym_evsel->hists);
- hists__decay_entries_threaded(&top.sym_evsel->hists,
- top.hide_user_symbols,
- top.hide_kernel_symbols);
- hists__output_recalc_col_len(&top.sym_evsel->hists, winsize.ws_row - 3);
+ hists__collapse_resort_threaded(&top->sym_evsel->hists);
+ hists__output_resort_threaded(&top->sym_evsel->hists);
+ hists__decay_entries_threaded(&top->sym_evsel->hists,
+ top->hide_user_symbols,
+ top->hide_kernel_symbols);
+ hists__output_recalc_col_len(&top->sym_evsel->hists,
+ top->winsize.ws_row - 3);
putchar('\n');
- hists__fprintf(&top.sym_evsel->hists, NULL, false, false,
- winsize.ws_row - 4 - printed, win_width, stdout);
+ hists__fprintf(&top->sym_evsel->hists, NULL, false, false,
+ top->winsize.ws_row - 4 - printed, win_width, stdout);
}
static void prompt_integer(int *target, const char *msg)
@@ -352,17 +317,17 @@ static void prompt_percent(int *target, const char *msg)
*target = tmp;
}
-static void prompt_symbol(struct hist_entry **target, const char *msg)
+static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
{
char *buf = malloc(0), *p;
- struct hist_entry *syme = *target, *n, *found = NULL;
+ struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
struct rb_node *next;
size_t dummy = 0;
/* zero counters of active symbol */
if (syme) {
__zero_source_counters(syme);
- *target = NULL;
+ top->sym_filter_entry = NULL;
}
fprintf(stdout, "\n%s: ", msg);
@@ -373,7 +338,7 @@ static void prompt_symbol(struct hist_entry **target, const char *msg)
if (p)
*p = 0;
- next = rb_first(&top.sym_evsel->hists.entries);
+ next = rb_first(&top->sym_evsel->hists.entries);
while (next) {
n = rb_entry(next, struct hist_entry, rb_node);
if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
@@ -386,47 +351,46 @@ static void prompt_symbol(struct hist_entry **target, const char *msg)
if (!found) {
fprintf(stderr, "Sorry, %s is not active.\n", buf);
sleep(1);
- return;
} else
- parse_source(found);
+ perf_top__parse_source(top, found);
out_free:
free(buf);
}
-static void print_mapped_keys(void)
+static void perf_top__print_mapped_keys(struct perf_top *top)
{
char *name = NULL;
- if (top.sym_filter_entry) {
- struct symbol *sym = top.sym_filter_entry->ms.sym;
+ if (top->sym_filter_entry) {
+ struct symbol *sym = top->sym_filter_entry->ms.sym;
name = sym->name;
}
fprintf(stdout, "\nMapped keys:\n");
- fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top.delay_secs);
- fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top.print_entries);
+ fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top->delay_secs);
+ fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries);
- if (top.evlist->nr_entries > 1)
- fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top.sym_evsel));
+ if (top->evlist->nr_entries > 1)
+ fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top->sym_evsel));
- fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top.count_filter);
+ fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter);
- fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
+ fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
fprintf(stdout, "\t[S] stop annotation.\n");
fprintf(stdout,
"\t[K] hide kernel_symbols symbols. \t(%s)\n",
- top.hide_kernel_symbols ? "yes" : "no");
+ top->hide_kernel_symbols ? "yes" : "no");
fprintf(stdout,
"\t[U] hide user symbols. \t(%s)\n",
- top.hide_user_symbols ? "yes" : "no");
- fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top.zero ? 1 : 0);
+ top->hide_user_symbols ? "yes" : "no");
+ fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top->zero ? 1 : 0);
fprintf(stdout, "\t[qQ] quit.\n");
}
-static int key_mapped(int c)
+static int perf_top__key_mapped(struct perf_top *top, int c)
{
switch (c) {
case 'd':
@@ -442,7 +406,7 @@ static int key_mapped(int c)
case 'S':
return 1;
case 'E':
- return top.evlist->nr_entries > 1 ? 1 : 0;
+ return top->evlist->nr_entries > 1 ? 1 : 0;
default:
break;
}
@@ -450,13 +414,13 @@ static int key_mapped(int c)
return 0;
}
-static void handle_keypress(int c)
+static void perf_top__handle_keypress(struct perf_top *top, int c)
{
- if (!key_mapped(c)) {
+ if (!perf_top__key_mapped(top, c)) {
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
struct termios tc, save;
- print_mapped_keys();
+ perf_top__print_mapped_keys(top);
fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
fflush(stdout);
@@ -471,81 +435,86 @@ static void handle_keypress(int c)
c = getc(stdin);
tcsetattr(0, TCSAFLUSH, &save);
- if (!key_mapped(c))
+ if (!perf_top__key_mapped(top, c))
return;
}
switch (c) {
case 'd':
- prompt_integer(&top.delay_secs, "Enter display delay");
- if (top.delay_secs < 1)
- top.delay_secs = 1;
+ prompt_integer(&top->delay_secs, "Enter display delay");
+ if (top->delay_secs < 1)
+ top->delay_secs = 1;
break;
case 'e':
- prompt_integer(&top.print_entries, "Enter display entries (lines)");
- if (top.print_entries == 0) {
- sig_winch_handler(SIGWINCH);
- signal(SIGWINCH, sig_winch_handler);
+ prompt_integer(&top->print_entries, "Enter display entries (lines)");
+ if (top->print_entries == 0) {
+ struct sigaction act = {
+ .sa_sigaction = perf_top__sig_winch,
+ .sa_flags = SA_SIGINFO,
+ };
+ perf_top__sig_winch(SIGWINCH, NULL, top);
+ sigaction(SIGWINCH, &act, NULL);
} else
signal(SIGWINCH, SIG_DFL);
break;
case 'E':
- if (top.evlist->nr_entries > 1) {
+ if (top->evlist->nr_entries > 1) {
/* Select 0 as the default event: */
int counter = 0;
fprintf(stderr, "\nAvailable events:");
- list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
- fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
+ list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
+ fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, event_name(top->sym_evsel));
prompt_integer(&counter, "Enter details event counter");
- if (counter >= top.evlist->nr_entries) {
- top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
- fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
+ if (counter >= top->evlist->nr_entries) {
+ top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
+ fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top->sym_evsel));
sleep(1);
break;
}
- list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
- if (top.sym_evsel->idx == counter)
+ list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
+ if (top->sym_evsel->idx == counter)
break;
} else
- top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
+ top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node);
break;
case 'f':
- prompt_integer(&top.count_filter, "Enter display event count filter");
+ prompt_integer(&top->count_filter, "Enter display event count filter");
break;
case 'F':
- prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
+ prompt_percent(&top->sym_pcnt_filter,
+ "Enter details display event filter (percent)");
break;
case 'K':
- top.hide_kernel_symbols = !top.hide_kernel_symbols;
+ top->hide_kernel_symbols = !top->hide_kernel_symbols;
break;
case 'q':
case 'Q':
printf("exiting.\n");
- if (dump_symtab)
- perf_session__fprintf_dsos(top.session, stderr);
+ if (top->dump_symtab)
+ perf_session__fprintf_dsos(top->session, stderr);
exit(0);
case 's':
- prompt_symbol(&top.sym_filter_entry, "Enter details symbol");
+ perf_top__prompt_symbol(top, "Enter details symbol");
break;
case 'S':
- if (!top.sym_filter_entry)
+ if (!top->sym_filter_entry)
break;
else {
- struct hist_entry *syme = top.sym_filter_entry;
+ struct hist_entry *syme = top->sym_filter_entry;
- top.sym_filter_entry = NULL;
+ top->sym_filter_entry = NULL;
__zero_source_counters(syme);
}
break;
case 'U':
- top.hide_user_symbols = !top.hide_user_symbols;
+ top->hide_user_symbols = !top->hide_user_symbols;
break;
case 'z':
- top.zero = !top.zero;
+ top->zero = !top->zero;
break;
default:
break;
@@ -563,28 +532,30 @@ static void perf_top__sort_new_samples(void *arg)
hists__collapse_resort_threaded(&t->sym_evsel->hists);
hists__output_resort_threaded(&t->sym_evsel->hists);
hists__decay_entries_threaded(&t->sym_evsel->hists,
- top.hide_user_symbols,
- top.hide_kernel_symbols);
+ t->hide_user_symbols,
+ t->hide_kernel_symbols);
}
-static void *display_thread_tui(void *arg __used)
+static void *display_thread_tui(void *arg)
{
+ struct perf_top *top = arg;
const char *help = "For a higher level overview, try: perf top --sort comm,dso";
- perf_top__sort_new_samples(&top);
- perf_evlist__tui_browse_hists(top.evlist, help,
+ perf_top__sort_new_samples(top);
+ perf_evlist__tui_browse_hists(top->evlist, help,
perf_top__sort_new_samples,
- &top, top.delay_secs);
+ top, top->delay_secs);
exit_browser(0);
exit(0);
return NULL;
}
-static void *display_thread(void *arg __used)
+static void *display_thread(void *arg)
{
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
struct termios tc, save;
+ struct perf_top *top = arg;
int delay_msecs, c;
tcgetattr(0, &save);
@@ -595,13 +566,13 @@ static void *display_thread(void *arg __used)
pthread__unblock_sigwinch();
repeat:
- delay_msecs = top.delay_secs * 1000;
+ delay_msecs = top->delay_secs * 1000;
tcsetattr(0, TCSANOW, &tc);
/* trash return*/
getc(stdin);
while (1) {
- print_sym_table();
+ perf_top__print_sym_table(top);
/*
* Either timeout expired or we got an EINTR due to SIGWINCH,
* refresh screen in both cases.
@@ -621,7 +592,7 @@ process_hotkey:
c = getc(stdin);
tcsetattr(0, TCSAFLUSH, &save);
- handle_keypress(c);
+ perf_top__handle_keypress(top, c);
goto repeat;
return NULL;
@@ -673,47 +644,17 @@ static int symbol_filter(struct map *map __used, struct symbol *sym)
return 0;
}
-static void perf_event__process_sample(const union perf_event *event,
+static void perf_event__process_sample(struct perf_tool *tool,
+ const union perf_event *event,
struct perf_evsel *evsel,
struct perf_sample *sample,
- struct perf_session *session)
+ struct machine *machine)
{
+ struct perf_top *top = container_of(tool, struct perf_top, tool);
struct symbol *parent = NULL;
u64 ip = event->ip.ip;
struct addr_location al;
- struct machine *machine;
int err;
- u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
- ++top.samples;
-
- switch (origin) {
- case PERF_RECORD_MISC_USER:
- ++top.us_samples;
- if (top.hide_user_symbols)
- return;
- machine = perf_session__find_host_machine(session);
- break;
- case PERF_RECORD_MISC_KERNEL:
- ++top.kernel_samples;
- if (top.hide_kernel_symbols)
- return;
- machine = perf_session__find_host_machine(session);
- break;
- case PERF_RECORD_MISC_GUEST_KERNEL:
- ++top.guest_kernel_samples;
- machine = perf_session__find_machine(session, event->ip.pid);
- break;
- case PERF_RECORD_MISC_GUEST_USER:
- ++top.guest_us_samples;
- /*
- * TODO: we don't process guest user from host side
- * except simple counting.
- */
- return;
- default:
- return;
- }
if (!machine && perf_guest) {
pr_err("Can't find guest [%d]'s kernel information\n",
@@ -722,14 +663,14 @@ static void perf_event__process_sample(const union perf_event *event,
}
if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
- top.exact_samples++;
+ top->exact_samples++;
- if (perf_event__preprocess_sample(event, session, &al, sample,
+ if (perf_event__preprocess_sample(event, machine, &al, sample,
symbol_filter) < 0 ||
al.filtered)
return;
- if (!kptr_restrict_warned &&
+ if (!top->kptr_restrict_warned &&
symbol_conf.kptr_restrict &&
al.cpumode == PERF_RECORD_MISC_KERNEL) {
ui__warning(
@@ -740,7 +681,7 @@ static void perf_event__process_sample(const union perf_event *event,
" modules" : "");
if (use_browser <= 0)
sleep(5);
- kptr_restrict_warned = true;
+ top->kptr_restrict_warned = true;
}
if (al.sym == NULL) {
@@ -756,7 +697,7 @@ static void perf_event__process_sample(const union perf_event *event,
* --hide-kernel-symbols, even if the user specifies an
* invalid --vmlinux ;-)
*/
- if (!kptr_restrict_warned && !vmlinux_warned &&
+ if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
if (symbol_conf.vmlinux_name) {
@@ -769,7 +710,7 @@ static void perf_event__process_sample(const union perf_event *event,
if (use_browser <= 0)
sleep(5);
- vmlinux_warned = true;
+ top->vmlinux_warned = true;
}
}
@@ -778,70 +719,109 @@ static void perf_event__process_sample(const union perf_event *event,
if ((sort__has_parent || symbol_conf.use_callchain) &&
sample->callchain) {
- err = perf_session__resolve_callchain(session, al.thread,
- sample->callchain, &parent);
+ err = machine__resolve_callchain(machine, evsel, al.thread,
+ sample->callchain, &parent);
if (err)
return;
}
- he = perf_session__add_hist_entry(session, &al, sample, evsel);
+ he = perf_evsel__add_hist_entry(evsel, &al, sample);
if (he == NULL) {
pr_err("Problem incrementing symbol period, skipping event\n");
return;
}
if (symbol_conf.use_callchain) {
- err = callchain_append(he->callchain, &session->callchain_cursor,
+ err = callchain_append(he->callchain, &evsel->hists.callchain_cursor,
sample->period);
if (err)
return;
}
- if (sort_has_symbols)
- record_precise_ip(he, evsel->idx, ip);
+ if (top->sort_has_symbols)
+ perf_top__record_precise_ip(top, he, evsel->idx, ip);
}
return;
}
-static void perf_session__mmap_read_idx(struct perf_session *self, int idx)
+static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
{
struct perf_sample sample;
struct perf_evsel *evsel;
+ struct perf_session *session = top->session;
union perf_event *event;
+ struct machine *machine;
+ u8 origin;
int ret;
- while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) {
- ret = perf_session__parse_sample(self, event, &sample);
+ while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
+ ret = perf_session__parse_sample(session, event, &sample);
if (ret) {
pr_err("Can't parse sample, err = %d\n", ret);
continue;
}
- evsel = perf_evlist__id2evsel(self->evlist, sample.id);
+ evsel = perf_evlist__id2evsel(session->evlist, sample.id);
assert(evsel != NULL);
+ origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
if (event->header.type == PERF_RECORD_SAMPLE)
- perf_event__process_sample(event, evsel, &sample, self);
- else if (event->header.type < PERF_RECORD_MAX) {
+ ++top->samples;
+
+ switch (origin) {
+ case PERF_RECORD_MISC_USER:
+ ++top->us_samples;
+ if (top->hide_user_symbols)
+ continue;
+ machine = perf_session__find_host_machine(session);
+ break;
+ case PERF_RECORD_MISC_KERNEL:
+ ++top->kernel_samples;
+ if (top->hide_kernel_symbols)
+ continue;
+ machine = perf_session__find_host_machine(session);
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ ++top->guest_kernel_samples;
+ machine = perf_session__find_machine(session, event->ip.pid);
+ break;
+ case PERF_RECORD_MISC_GUEST_USER:
+ ++top->guest_us_samples;
+ /*
+ * TODO: we don't process guest user from host side
+ * except simple counting.
+ */
+ /* Fall thru */
+ default:
+ continue;
+ }
+
+
+ if (event->header.type == PERF_RECORD_SAMPLE) {
+ perf_event__process_sample(&top->tool, event, evsel,
+ &sample, machine);
+ } else if (event->header.type < PERF_RECORD_MAX) {
hists__inc_nr_events(&evsel->hists, event->header.type);
- perf_event__process(event, &sample, self);
+ perf_event__process(&top->tool, event, &sample, machine);
} else
- ++self->hists.stats.nr_unknown_events;
+ ++session->hists.stats.nr_unknown_events;
}
}
-static void perf_session__mmap_read(struct perf_session *self)
+static void perf_top__mmap_read(struct perf_top *top)
{
int i;
- for (i = 0; i < top.evlist->nr_mmaps; i++)
- perf_session__mmap_read_idx(self, i);
+ for (i = 0; i < top->evlist->nr_mmaps; i++)
+ perf_top__mmap_read_idx(top, i);
}
-static void start_counters(struct perf_evlist *evlist)
+static void perf_top__start_counters(struct perf_top *top)
{
struct perf_evsel *counter, *first;
+ struct perf_evlist *evlist = top->evlist;
first = list_entry(evlist->entries.next, struct perf_evsel, node);
@@ -849,15 +829,15 @@ static void start_counters(struct perf_evlist *evlist)
struct perf_event_attr *attr = &counter->attr;
struct xyarray *group_fd = NULL;
- if (group && counter != first)
+ if (top->group && counter != first)
group_fd = first->fd;
attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
- if (top.freq) {
+ if (top->freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD;
attr->freq = 1;
- attr->sample_freq = top.freq;
+ attr->sample_freq = top->freq;
}
if (evlist->nr_entries > 1) {
@@ -870,23 +850,23 @@ static void start_counters(struct perf_evlist *evlist)
attr->mmap = 1;
attr->comm = 1;
- attr->inherit = inherit;
+ attr->inherit = top->inherit;
retry_sample_id:
- attr->sample_id_all = sample_id_all_avail ? 1 : 0;
+ attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
try_again:
- if (perf_evsel__open(counter, top.evlist->cpus,
- top.evlist->threads, group,
+ if (perf_evsel__open(counter, top->evlist->cpus,
+ top->evlist->threads, top->group,
group_fd) < 0) {
int err = errno;
if (err == EPERM || err == EACCES) {
ui__error_paranoid();
goto out_err;
- } else if (err == EINVAL && sample_id_all_avail) {
+ } else if (err == EINVAL && top->sample_id_all_avail) {
/*
* Old kernel, no attr->sample_id_type_all field
*/
- sample_id_all_avail = false;
+ top->sample_id_all_avail = false;
goto retry_sample_id;
}
/*
@@ -920,7 +900,7 @@ try_again:
}
}
- if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) {
+ if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
ui__warning("Failed to mmap with %d (%s)\n",
errno, strerror(errno));
goto out_err;
@@ -933,14 +913,14 @@ out_err:
exit(0);
}
-static int setup_sample_type(void)
+static int perf_top__setup_sample_type(struct perf_top *top)
{
- if (!sort_has_symbols) {
+ if (!top->sort_has_symbols) {
if (symbol_conf.use_callchain) {
ui__warning("Selected -g but \"sym\" not present in --sort/-s.");
return -EINVAL;
}
- } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
+ } else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
if (callchain_register_param(&callchain_param) < 0) {
ui__warning("Can't register callchain params.\n");
return -EINVAL;
@@ -950,7 +930,7 @@ static int setup_sample_type(void)
return 0;
}
-static int __cmd_top(void)
+static int __cmd_top(struct perf_top *top)
{
pthread_t thread;
int ret;
@@ -958,39 +938,40 @@ static int __cmd_top(void)
* FIXME: perf_session__new should allow passing a O_MMAP, so that all this
* mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
*/
- top.session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
- if (top.session == NULL)
+ top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
+ if (top->session == NULL)
return -ENOMEM;
- ret = setup_sample_type();
+ ret = perf_top__setup_sample_type(top);
if (ret)
goto out_delete;
- if (top.target_tid != -1)
- perf_event__synthesize_thread_map(top.evlist->threads,
- perf_event__process, top.session);
+ if (top->target_tid != -1)
+ perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
+ perf_event__process,
+ &top->session->host_machine);
else
- perf_event__synthesize_threads(perf_event__process, top.session);
-
- start_counters(top.evlist);
- top.session->evlist = top.evlist;
- perf_session__update_sample_type(top.session);
+ perf_event__synthesize_threads(&top->tool, perf_event__process,
+ &top->session->host_machine);
+ perf_top__start_counters(top);
+ top->session->evlist = top->evlist;
+ perf_session__update_sample_type(top->session);
/* Wait for a minimal set of events before starting the snapshot */
- poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
+ poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
- perf_session__mmap_read(top.session);
+ perf_top__mmap_read(top);
if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
- display_thread), NULL)) {
+ display_thread), top)) {
printf("Could not create display thread.\n");
exit(-1);
}
- if (realtime_prio) {
+ if (top->realtime_prio) {
struct sched_param param;
- param.sched_priority = realtime_prio;
+ param.sched_priority = top->realtime_prio;
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
printf("Could not set realtime priority.\n");
exit(-1);
@@ -998,25 +979,25 @@ static int __cmd_top(void)
}
while (1) {
- u64 hits = top.samples;
+ u64 hits = top->samples;
- perf_session__mmap_read(top.session);
+ perf_top__mmap_read(top);
- if (hits == top.samples)
- ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
+ if (hits == top->samples)
+ ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
}
out_delete:
- perf_session__delete(top.session);
- top.session = NULL;
+ perf_session__delete(top->session);
+ top->session = NULL;
return 0;
}
static int
-parse_callchain_opt(const struct option *opt __used, const char *arg,
- int unset)
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
{
+ struct perf_top *top = (struct perf_top *)opt->value;
char *tok, *tok2;
char *endptr;
@@ -1024,7 +1005,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
* --no-call-graph
*/
if (unset) {
- dont_use_callchains = true;
+ top->dont_use_callchains = true;
return 0;
}
@@ -1052,9 +1033,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
symbol_conf.use_callchain = false;
return 0;
- }
-
- else
+ } else
return -1;
/* get the min percentage */
@@ -1098,17 +1077,32 @@ static const char * const top_usage[] = {
NULL
};
-static const struct option options[] = {
+int cmd_top(int argc, const char **argv, const char *prefix __used)
+{
+ struct perf_evsel *pos;
+ int status = -ENOMEM;
+ struct perf_top top = {
+ .count_filter = 5,
+ .delay_secs = 2,
+ .target_pid = -1,
+ .target_tid = -1,
+ .freq = 1000, /* 1 KHz */
+ .sample_id_all_avail = true,
+ .mmap_pages = 128,
+ .sym_pcnt_filter = 5,
+ };
+ char callchain_default_opt[] = "fractal,0.5,callee";
+ const struct option options[] = {
OPT_CALLBACK('e', "event", &top.evlist, "event",
"event selector. use 'perf list' to list available events",
parse_events_option),
- OPT_INTEGER('c', "count", &default_interval,
+ OPT_INTEGER('c', "count", &top.default_interval,
"event period to sample"),
OPT_INTEGER('p', "pid", &top.target_pid,
"profile events on existing process id"),
OPT_INTEGER('t', "tid", &top.target_tid,
"profile events on existing thread id"),
- OPT_BOOLEAN('a', "all-cpus", &system_wide,
+ OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
"list of cpus to monitor"),
@@ -1116,20 +1110,20 @@ static const struct option options[] = {
"file", "vmlinux pathname"),
OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
"hide kernel symbols"),
- OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
- OPT_INTEGER('r', "realtime", &realtime_prio,
+ OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
+ OPT_INTEGER('r', "realtime", &top.realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
OPT_INTEGER('d', "delay", &top.delay_secs,
"number of seconds to delay between refreshes"),
- OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
+ OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
"dump the symbol table used for profiling"),
OPT_INTEGER('f', "count-filter", &top.count_filter,
"only display functions with more events than this"),
- OPT_BOOLEAN('g', "group", &group,
+ OPT_BOOLEAN('g', "group", &top.group,
"put the counters into a counter group"),
- OPT_BOOLEAN('i', "inherit", &inherit,
+ OPT_BOOLEAN('i', "inherit", &top.inherit,
"child tasks inherit counters"),
- OPT_STRING(0, "sym-annotate", &sym_filter, "symbol name",
+ OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
"symbol to annotate"),
OPT_BOOLEAN('z', "zero", &top.zero,
"zero history across updates"),
@@ -1139,15 +1133,15 @@ static const struct option options[] = {
"display this many functions"),
OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
"hide user symbols"),
- OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
- OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
+ OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
+ OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
"Show a column with the number of samples"),
- OPT_CALLBACK_DEFAULT('G', "call-graph", NULL, "output_type,min_percent, call_order",
+ OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
"Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
"Default: fractal,0.5,callee", &parse_callchain_opt,
callchain_default_opt),
@@ -1166,12 +1160,7 @@ static const struct option options[] = {
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_END()
-};
-
-int cmd_top(int argc, const char **argv, const char *prefix __used)
-{
- struct perf_evsel *pos;
- int status = -ENOMEM;
+ };
top.evlist = perf_evlist__new(NULL, NULL);
if (top.evlist == NULL)
@@ -1188,9 +1177,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
setup_sorting(top_usage, options);
- if (use_stdio)
+ if (top.use_stdio)
use_browser = 0;
- else if (use_tui)
+ else if (top.use_tui)
use_browser = 1;
setup_browser(false);
@@ -1215,38 +1204,31 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
return -ENOMEM;
}
+ symbol_conf.nr_events = top.evlist->nr_entries;
+
if (top.delay_secs < 1)
top.delay_secs = 1;
/*
* User specified count overrides default frequency.
*/
- if (default_interval)
+ if (top.default_interval)
top.freq = 0;
else if (top.freq) {
- default_interval = top.freq;
+ top.default_interval = top.freq;
} else {
fprintf(stderr, "frequency and count are zero, aborting\n");
exit(EXIT_FAILURE);
}
list_for_each_entry(pos, &top.evlist->entries, node) {
- if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
- top.evlist->threads->nr) < 0)
- goto out_free_fd;
/*
* Fill in the ones not specifically initialized via -c:
*/
- if (pos->attr.sample_period)
- continue;
-
- pos->attr.sample_period = default_interval;
+ if (!pos->attr.sample_period)
+ pos->attr.sample_period = top.default_interval;
}
- if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
- perf_evlist__alloc_mmap(top.evlist) < 0)
- goto out_free_fd;
-
top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
symbol_conf.priv_size = sizeof(struct annotation);
@@ -1263,16 +1245,20 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
* Avoid annotation data structures overhead when symbols aren't on the
* sort list.
*/
- sort_has_symbols = sort_sym.list.next != NULL;
+ top.sort_has_symbols = sort_sym.list.next != NULL;
- get_term_dimensions(&winsize);
+ get_term_dimensions(&top.winsize);
if (top.print_entries == 0) {
- update_print_entries(&winsize);
- signal(SIGWINCH, sig_winch_handler);
+ struct sigaction act = {
+ .sa_sigaction = perf_top__sig_winch,
+ .sa_flags = SA_SIGINFO,
+ };
+ perf_top__update_print_entries(&top);
+ sigaction(SIGWINCH, &act, NULL);
}
- status = __cmd_top();
-out_free_fd:
+ status = __cmd_top(&top);
+
perf_evlist__delete(top.evlist);
return status;
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 73d0cac8b67..2b2e225a4d4 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -29,8 +29,6 @@ struct pager_config {
int val;
};
-static char debugfs_mntpt[MAXPATHLEN];
-
static int pager_command_config(const char *var, const char *value, void *data)
{
struct pager_config *c = data;
@@ -81,15 +79,6 @@ static void commit_pager_choice(void)
}
}
-static void set_debugfs_path(void)
-{
- char *path;
-
- path = getenv(PERF_DEBUGFS_ENVIRONMENT);
- snprintf(debugfs_path, MAXPATHLEN, "%s/%s", path ?: debugfs_mntpt,
- "tracing/events");
-}
-
static int handle_options(const char ***argv, int *argc, int *envchanged)
{
int handled = 0;
@@ -161,15 +150,14 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
fprintf(stderr, "No directory given for --debugfs-dir.\n");
usage(perf_usage_string);
}
- strncpy(debugfs_mntpt, (*argv)[1], MAXPATHLEN);
- debugfs_mntpt[MAXPATHLEN - 1] = '\0';
+ debugfs_set_path((*argv)[1]);
if (envchanged)
*envchanged = 1;
(*argv)++;
(*argc)--;
} else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
- strncpy(debugfs_mntpt, cmd + strlen(CMD_DEBUGFS_DIR), MAXPATHLEN);
- debugfs_mntpt[MAXPATHLEN - 1] = '\0';
+ debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR));
+ fprintf(stderr, "dir: %s\n", debugfs_mountpoint);
if (envchanged)
*envchanged = 1;
} else {
@@ -281,7 +269,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
if (use_pager == -1 && p->option & USE_PAGER)
use_pager = 1;
commit_pager_choice();
- set_debugfs_path();
status = p->fn(argc, argv, prefix);
exit_browser(status);
@@ -416,17 +403,6 @@ static int run_argv(int *argcp, const char ***argv)
return done_alias;
}
-/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */
-static void get_debugfs_mntpt(void)
-{
- const char *path = debugfs_mount(NULL);
-
- if (path)
- strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt));
- else
- debugfs_mntpt[0] = '\0';
-}
-
static void pthread__block_sigwinch(void)
{
sigset_t set;
@@ -453,7 +429,7 @@ int main(int argc, const char **argv)
if (!cmd)
cmd = "perf-help";
/* get debugfs mount point from /proc/mounts */
- get_debugfs_mntpt();
+ debugfs_mount(NULL);
/*
* "perf-xxxx" is the same as "perf xxxx", but we obviously:
*
@@ -476,7 +452,6 @@ int main(int argc, const char **argv)
argc--;
handle_options(&argv, &argc, NULL);
commit_pager_choice();
- set_debugfs_path();
set_buildid_dir();
if (argc > 0) {
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 914c895510f..64f8bee31ce 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -185,4 +185,28 @@ extern const char perf_version_string[];
void pthread__unblock_sigwinch(void);
+struct perf_record_opts {
+ pid_t target_pid;
+ pid_t target_tid;
+ bool call_graph;
+ bool group;
+ bool inherit_stat;
+ bool no_delay;
+ bool no_inherit;
+ bool no_samples;
+ bool pipe_output;
+ bool raw_samples;
+ bool sample_address;
+ bool sample_time;
+ bool sample_id_all_avail;
+ bool system_wide;
+ bool period;
+ unsigned int freq;
+ unsigned int mmap_pages;
+ unsigned int user_freq;
+ u64 default_interval;
+ u64 user_interval;
+ const char *cpu_list;
+};
+
#endif
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 119e996035c..011ed267660 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -25,17 +25,17 @@ int symbol__annotate_init(struct map *map __used, struct symbol *sym)
return 0;
}
-int symbol__alloc_hist(struct symbol *sym, int nevents)
+int symbol__alloc_hist(struct symbol *sym)
{
struct annotation *notes = symbol__annotation(sym);
size_t sizeof_sym_hist = (sizeof(struct sym_hist) +
(sym->end - sym->start) * sizeof(u64));
- notes->src = zalloc(sizeof(*notes->src) + nevents * sizeof_sym_hist);
+ notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist);
if (notes->src == NULL)
return -1;
notes->src->sizeof_sym_hist = sizeof_sym_hist;
- notes->src->nr_histograms = nevents;
+ notes->src->nr_histograms = symbol_conf.nr_events;
INIT_LIST_HEAD(&notes->src->source);
return 0;
}
@@ -334,7 +334,7 @@ fallback:
disassembler_style ? "-M " : "",
disassembler_style ? disassembler_style : "",
map__rip_2objdump(map, sym->start),
- map__rip_2objdump(map, sym->end),
+ map__rip_2objdump(map, sym->end+1),
symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
symbol_conf.annotate_src ? "-S" : "",
symfs_filename, filename);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index d9072523d34..efa5dc82bfa 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -72,7 +72,7 @@ static inline struct annotation *symbol__annotation(struct symbol *sym)
int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
int evidx, u64 addr);
-int symbol__alloc_hist(struct symbol *sym, int nevents);
+int symbol__alloc_hist(struct symbol *sym);
void symbol__annotate_zero_histograms(struct symbol *sym);
int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
@@ -99,8 +99,7 @@ static inline int symbol__tui_annotate(struct symbol *sym __used,
}
#else
int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
- int nr_events, void(*timer)(void *arg), void *arg,
- int delay_secs);
+ void(*timer)(void *arg), void *arg, int delay_secs);
#endif
extern const char *disassembler_style;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index a91cd99f26e..dff9c7a725f 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -13,15 +13,18 @@
#include "symbol.h"
#include <linux/kernel.h>
#include "debug.h"
+#include "session.h"
+#include "tool.h"
-static int build_id__mark_dso_hit(union perf_event *event,
+static int build_id__mark_dso_hit(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample __used,
struct perf_evsel *evsel __used,
- struct perf_session *session)
+ struct machine *machine)
{
struct addr_location al;
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- struct thread *thread = perf_session__findnew(session, event->ip.pid);
+ struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
if (thread == NULL) {
pr_err("problem processing %d event, skipping it.\n",
@@ -29,8 +32,8 @@ static int build_id__mark_dso_hit(union perf_event *event,
return -1;
}
- thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
- event->ip.pid, event->ip.ip, &al);
+ thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
+ event->ip.ip, &al);
if (al.map != NULL)
al.map->dso->hit = 1;
@@ -38,25 +41,26 @@ static int build_id__mark_dso_hit(union perf_event *event,
return 0;
}
-static int perf_event__exit_del_thread(union perf_event *event,
+static int perf_event__exit_del_thread(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session)
+ struct machine *machine)
{
- struct thread *thread = perf_session__findnew(session, event->fork.tid);
+ struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
event->fork.ppid, event->fork.ptid);
if (thread) {
- rb_erase(&thread->rb_node, &session->threads);
- session->last_match = NULL;
+ rb_erase(&thread->rb_node, &machine->threads);
+ machine->last_match = NULL;
thread__delete(thread);
}
return 0;
}
-struct perf_event_ops build_id__mark_dso_hit_ops = {
+struct perf_tool build_id__mark_dso_hit_ops = {
.sample = build_id__mark_dso_hit,
.mmap = perf_event__process_mmap,
.fork = perf_event__process_task,
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index 5dafb00eaa0..a993ba87d99 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -3,7 +3,7 @@
#include "session.h"
-extern struct perf_event_ops build_id__mark_dso_hit_ops;
+extern struct perf_tool build_id__mark_dso_hit_ops;
char *dso__build_id_filename(struct dso *self, char *bf, size_t size);
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 9b4ff16cac9..7f9c0f1ae3a 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -101,6 +101,9 @@ int callchain_append(struct callchain_root *root,
int callchain_merge(struct callchain_cursor *cursor,
struct callchain_root *dst, struct callchain_root *src);
+struct ip_callchain;
+union perf_event;
+
bool ip_callchain__valid(struct ip_callchain *chain,
const union perf_event *event);
/*
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 96bee5c4600..dbe2f16b1a1 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -3,7 +3,6 @@
#include "parse-options.h"
#include "evsel.h"
#include "cgroup.h"
-#include "debugfs.h" /* MAX_PATH, STR() */
#include "evlist.h"
int nr_cgroups;
@@ -12,7 +11,7 @@ static int
cgroupfs_find_mountpoint(char *buf, size_t maxlen)
{
FILE *fp;
- char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1];
+ char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
char *token, *saved_ptr = NULL;
int found = 0;
@@ -25,8 +24,8 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
* and inspect every cgroupfs mount point to find one that has
* perf_event subsystem
*/
- while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %"
- STR(MAX_PATH)"s %*d %*d\n",
+ while (fscanf(fp, "%*s %"STR(PATH_MAX)"s %"STR(PATH_MAX)"s %"
+ STR(PATH_MAX)"s %*d %*d\n",
mountpoint, type, tokens) == 3) {
if (!strcmp(type, "cgroup")) {
@@ -57,15 +56,15 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
static int open_cgroup(char *name)
{
- char path[MAX_PATH+1];
- char mnt[MAX_PATH+1];
+ char path[PATH_MAX + 1];
+ char mnt[PATH_MAX + 1];
int fd;
- if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1))
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
return -1;
- snprintf(path, MAX_PATH, "%s/%s", mnt, name);
+ snprintf(path, PATH_MAX, "%s/%s", mnt, name);
fd = open(path, O_RDONLY);
if (fd == -1)
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 80d9598db31..0deac6a14b6 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -1,5 +1,8 @@
/*
- * GIT - The information manager from hell
+ * config.c
+ *
+ * Helper functions for parsing config items.
+ * Originally copied from GIT source.
*
* Copyright (C) Linus Torvalds, 2005
* Copyright (C) Johannes Schindelin, 2005
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index a88fefc0cc0..ffc35e748e8 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -2,8 +2,12 @@
#include "debugfs.h"
#include "cache.h"
+#include <linux/kernel.h>
+#include <sys/mount.h>
+
static int debugfs_premounted;
-static char debugfs_mountpoint[MAX_PATH+1];
+char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug";
+char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events";
static const char *debugfs_known_mountpoints[] = {
"/sys/kernel/debug/",
@@ -62,11 +66,9 @@ const char *debugfs_find_mountpoint(void)
/* give up and parse /proc/mounts */
fp = fopen("/proc/mounts", "r");
if (fp == NULL)
- die("Can't open /proc/mounts for read");
+ return NULL;
- while (fscanf(fp, "%*s %"
- STR(MAX_PATH)
- "s %99s %*s %*d %*d\n",
+ while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
debugfs_mountpoint, type) == 2) {
if (strcmp(type, "debugfs") == 0)
break;
@@ -106,6 +108,12 @@ int debugfs_valid_entry(const char *path)
return 0;
}
+static void debugfs_set_tracing_events_path(const char *mountpoint)
+{
+ snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s",
+ mountpoint, "tracing/events");
+}
+
/* mount the debugfs somewhere if it's not mounted */
char *debugfs_mount(const char *mountpoint)
@@ -113,7 +121,7 @@ char *debugfs_mount(const char *mountpoint)
/* see if it's already mounted */
if (debugfs_find_mountpoint()) {
debugfs_premounted = 1;
- return debugfs_mountpoint;
+ goto out;
}
/* if not mounted and no argument */
@@ -129,12 +137,19 @@ char *debugfs_mount(const char *mountpoint)
return NULL;
/* save the mountpoint */
- strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
debugfs_found = 1;
-
+ strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
+out:
+ debugfs_set_tracing_events_path(debugfs_mountpoint);
return debugfs_mountpoint;
}
+void debugfs_set_path(const char *mountpoint)
+{
+ snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint);
+ debugfs_set_tracing_events_path(mountpoint);
+}
+
/* umount the debugfs */
int debugfs_umount(void)
@@ -158,7 +173,7 @@ int debugfs_umount(void)
int debugfs_write(const char *entry, const char *value)
{
- char path[MAX_PATH+1];
+ char path[PATH_MAX + 1];
int ret, count;
int fd;
@@ -203,7 +218,7 @@ int debugfs_write(const char *entry, const char *value)
*/
int debugfs_read(const char *entry, char *buffer, size_t size)
{
- char path[MAX_PATH+1];
+ char path[PATH_MAX + 1];
int ret;
int fd;
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 83a02879745..4a878f735eb 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -1,25 +1,18 @@
#ifndef __DEBUGFS_H__
#define __DEBUGFS_H__
-#include <sys/mount.h>
+const char *debugfs_find_mountpoint(void);
+int debugfs_valid_mountpoint(const char *debugfs);
+int debugfs_valid_entry(const char *path);
+char *debugfs_mount(const char *mountpoint);
+int debugfs_umount(void);
+void debugfs_set_path(const char *mountpoint);
+int debugfs_write(const char *entry, const char *value);
+int debugfs_read(const char *entry, char *buffer, size_t size);
+void debugfs_force_cleanup(void);
+int debugfs_make_path(const char *element, char *buffer, int size);
-#ifndef MAX_PATH
-# define MAX_PATH 256
-#endif
-
-#ifndef STR
-# define _STR(x) #x
-# define STR(x) _STR(x)
-#endif
-
-extern const char *debugfs_find_mountpoint(void);
-extern int debugfs_valid_mountpoint(const char *debugfs);
-extern int debugfs_valid_entry(const char *path);
-extern char *debugfs_mount(const char *mountpoint);
-extern int debugfs_umount(void);
-extern int debugfs_write(const char *entry, const char *value);
-extern int debugfs_read(const char *entry, char *buffer, size_t size);
-extern void debugfs_force_cleanup(void);
-extern int debugfs_make_path(const char *element, char *buffer, int size);
+extern char debugfs_mountpoint[];
+extern char tracing_events_path[];
#endif /* __DEBUGFS_H__ */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 437f8ca679a..73ddaf06b8e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,7 +1,6 @@
#include <linux/types.h>
#include "event.h"
#include "debug.h"
-#include "session.h"
#include "sort.h"
#include "string.h"
#include "strlist.h"
@@ -44,36 +43,27 @@ static struct perf_sample synth_sample = {
.period = 1,
};
-static pid_t perf_event__synthesize_comm(union perf_event *event, pid_t pid,
- int full, perf_event__handler_t process,
- struct perf_session *session)
+static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len)
{
char filename[PATH_MAX];
char bf[BUFSIZ];
FILE *fp;
size_t size = 0;
- DIR *tasks;
- struct dirent dirent, *next;
- pid_t tgid = 0;
+ pid_t tgid = -1;
snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
fp = fopen(filename, "r");
if (fp == NULL) {
-out_race:
- /*
- * We raced with a task exiting - just return:
- */
pr_debug("couldn't open %s\n", filename);
return 0;
}
- memset(&event->comm, 0, sizeof(event->comm));
-
- while (!event->comm.comm[0] || !event->comm.pid) {
+ while (!comm[0] || (tgid < 0)) {
if (fgets(bf, sizeof(bf), fp) == NULL) {
- pr_warning("couldn't get COMM and pgid, malformed %s\n", filename);
- goto out;
+ pr_warning("couldn't get COMM and pgid, malformed %s\n",
+ filename);
+ break;
}
if (memcmp(bf, "Name:", 5) == 0) {
@@ -81,33 +71,65 @@ out_race:
while (*name && isspace(*name))
++name;
size = strlen(name) - 1;
- memcpy(event->comm.comm, name, size++);
+ if (size >= len)
+ size = len - 1;
+ memcpy(comm, name, size);
+
} else if (memcmp(bf, "Tgid:", 5) == 0) {
char *tgids = bf + 5;
while (*tgids && isspace(*tgids))
++tgids;
- tgid = event->comm.pid = atoi(tgids);
+ tgid = atoi(tgids);
}
}
+ fclose(fp);
+
+ return tgid;
+}
+
+static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+ union perf_event *event, pid_t pid,
+ int full,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ char filename[PATH_MAX];
+ size_t size;
+ DIR *tasks;
+ struct dirent dirent, *next;
+ pid_t tgid;
+
+ memset(&event->comm, 0, sizeof(event->comm));
+
+ tgid = perf_event__get_comm_tgid(pid, event->comm.comm,
+ sizeof(event->comm.comm));
+ if (tgid < 0)
+ goto out;
+
+ event->comm.pid = tgid;
event->comm.header.type = PERF_RECORD_COMM;
+
+ size = strlen(event->comm.comm) + 1;
size = ALIGN(size, sizeof(u64));
- memset(event->comm.comm + size, 0, session->id_hdr_size);
+ memset(event->comm.comm + size, 0, machine->id_hdr_size);
event->comm.header.size = (sizeof(event->comm) -
(sizeof(event->comm.comm) - size) +
- session->id_hdr_size);
+ machine->id_hdr_size);
if (!full) {
event->comm.tid = pid;
- process(event, &synth_sample, session);
+ process(tool, event, &synth_sample, machine);
goto out;
}
snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
tasks = opendir(filename);
- if (tasks == NULL)
- goto out_race;
+ if (tasks == NULL) {
+ pr_debug("couldn't open %s\n", filename);
+ return 0;
+ }
while (!readdir_r(tasks, &dirent, &next) && next) {
char *end;
@@ -115,22 +137,32 @@ out_race:
if (*end)
continue;
+ /* already have tgid; jut want to update the comm */
+ (void) perf_event__get_comm_tgid(pid, event->comm.comm,
+ sizeof(event->comm.comm));
+
+ size = strlen(event->comm.comm) + 1;
+ size = ALIGN(size, sizeof(u64));
+ memset(event->comm.comm + size, 0, machine->id_hdr_size);
+ event->comm.header.size = (sizeof(event->comm) -
+ (sizeof(event->comm.comm) - size) +
+ machine->id_hdr_size);
+
event->comm.tid = pid;
- process(event, &synth_sample, session);
+ process(tool, event, &synth_sample, machine);
}
closedir(tasks);
out:
- fclose(fp);
-
return tgid;
}
-static int perf_event__synthesize_mmap_events(union perf_event *event,
+static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+ union perf_event *event,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
- struct perf_session *session)
+ struct machine *machine)
{
char filename[PATH_MAX];
FILE *fp;
@@ -193,12 +225,12 @@ static int perf_event__synthesize_mmap_events(union perf_event *event,
event->mmap.len -= event->mmap.start;
event->mmap.header.size = (sizeof(event->mmap) -
(sizeof(event->mmap.filename) - size));
- memset(event->mmap.filename + size, 0, session->id_hdr_size);
- event->mmap.header.size += session->id_hdr_size;
+ memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+ event->mmap.header.size += machine->id_hdr_size;
event->mmap.pid = tgid;
event->mmap.tid = pid;
- process(event, &synth_sample, session);
+ process(tool, event, &synth_sample, machine);
}
}
@@ -206,14 +238,14 @@ static int perf_event__synthesize_mmap_events(union perf_event *event,
return 0;
}
-int perf_event__synthesize_modules(perf_event__handler_t process,
- struct perf_session *session,
+int perf_event__synthesize_modules(struct perf_tool *tool,
+ perf_event__handler_t process,
struct machine *machine)
{
struct rb_node *nd;
struct map_groups *kmaps = &machine->kmaps;
union perf_event *event = zalloc((sizeof(event->mmap) +
- session->id_hdr_size));
+ machine->id_hdr_size));
if (event == NULL) {
pr_debug("Not enough memory synthesizing mmap event "
"for kernel modules\n");
@@ -243,15 +275,15 @@ int perf_event__synthesize_modules(perf_event__handler_t process,
event->mmap.header.type = PERF_RECORD_MMAP;
event->mmap.header.size = (sizeof(event->mmap) -
(sizeof(event->mmap.filename) - size));
- memset(event->mmap.filename + size, 0, session->id_hdr_size);
- event->mmap.header.size += session->id_hdr_size;
+ memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+ event->mmap.header.size += machine->id_hdr_size;
event->mmap.start = pos->start;
event->mmap.len = pos->end - pos->start;
event->mmap.pid = machine->pid;
memcpy(event->mmap.filename, pos->dso->long_name,
pos->dso->long_name_len + 1);
- process(event, &synth_sample, session);
+ process(tool, event, &synth_sample, machine);
}
free(event);
@@ -260,40 +292,69 @@ int perf_event__synthesize_modules(perf_event__handler_t process,
static int __event__synthesize_thread(union perf_event *comm_event,
union perf_event *mmap_event,
- pid_t pid, perf_event__handler_t process,
- struct perf_session *session)
+ pid_t pid, int full,
+ perf_event__handler_t process,
+ struct perf_tool *tool,
+ struct machine *machine)
{
- pid_t tgid = perf_event__synthesize_comm(comm_event, pid, 1, process,
- session);
+ pid_t tgid = perf_event__synthesize_comm(tool, comm_event, pid, full,
+ process, machine);
if (tgid == -1)
return -1;
- return perf_event__synthesize_mmap_events(mmap_event, pid, tgid,
- process, session);
+ return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+ process, machine);
}
-int perf_event__synthesize_thread_map(struct thread_map *threads,
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+ struct thread_map *threads,
perf_event__handler_t process,
- struct perf_session *session)
+ struct machine *machine)
{
union perf_event *comm_event, *mmap_event;
- int err = -1, thread;
+ int err = -1, thread, j;
- comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+ comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
if (comm_event == NULL)
goto out;
- mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+ mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size);
if (mmap_event == NULL)
goto out_free_comm;
err = 0;
for (thread = 0; thread < threads->nr; ++thread) {
if (__event__synthesize_thread(comm_event, mmap_event,
- threads->map[thread],
- process, session)) {
+ threads->map[thread], 0,
+ process, tool, machine)) {
err = -1;
break;
}
+
+ /*
+ * comm.pid is set to thread group id by
+ * perf_event__synthesize_comm
+ */
+ if ((int) comm_event->comm.pid != threads->map[thread]) {
+ bool need_leader = true;
+
+ /* is thread group leader in thread_map? */
+ for (j = 0; j < threads->nr; ++j) {
+ if ((int) comm_event->comm.pid == threads->map[j]) {
+ need_leader = false;
+ break;
+ }
+ }
+
+ /* if not, generate events for it */
+ if (need_leader &&
+ __event__synthesize_thread(comm_event,
+ mmap_event,
+ comm_event->comm.pid, 0,
+ process, tool, machine)) {
+ err = -1;
+ break;
+ }
+ }
}
free(mmap_event);
out_free_comm:
@@ -302,19 +363,20 @@ out:
return err;
}
-int perf_event__synthesize_threads(perf_event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_threads(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
{
DIR *proc;
struct dirent dirent, *next;
union perf_event *comm_event, *mmap_event;
int err = -1;
- comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
+ comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
if (comm_event == NULL)
goto out;
- mmap_event = malloc(sizeof(mmap_event->mmap) + session->id_hdr_size);
+ mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size);
if (mmap_event == NULL)
goto out_free_comm;
@@ -329,8 +391,8 @@ int perf_event__synthesize_threads(perf_event__handler_t process,
if (*end) /* only interested in proper numerical dirents */
continue;
- __event__synthesize_thread(comm_event, mmap_event, pid,
- process, session);
+ __event__synthesize_thread(comm_event, mmap_event, pid, 1,
+ process, tool, machine);
}
closedir(proc);
@@ -365,8 +427,8 @@ static int find_symbol_cb(void *arg, const char *name, char type,
return 1;
}
-int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
- struct perf_session *session,
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+ perf_event__handler_t process,
struct machine *machine,
const char *symbol_name)
{
@@ -383,7 +445,7 @@ int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
*/
struct process_symbol_args args = { .name = symbol_name, };
union perf_event *event = zalloc((sizeof(event->mmap) +
- session->id_hdr_size));
+ machine->id_hdr_size));
if (event == NULL) {
pr_debug("Not enough memory synthesizing mmap event "
"for kernel modules\n");
@@ -417,25 +479,32 @@ int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
size = ALIGN(size, sizeof(u64));
event->mmap.header.type = PERF_RECORD_MMAP;
event->mmap.header.size = (sizeof(event->mmap) -
- (sizeof(event->mmap.filename) - size) + session->id_hdr_size);
+ (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
event->mmap.pgoff = args.start;
event->mmap.start = map->start;
event->mmap.len = map->end - event->mmap.start;
event->mmap.pid = machine->pid;
- err = process(event, &synth_sample, session);
+ err = process(tool, event, &synth_sample, machine);
free(event);
return err;
}
-int perf_event__process_comm(union perf_event *event,
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, ": %s:%d\n", event->comm.comm, event->comm.tid);
+}
+
+int perf_event__process_comm(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session)
+ struct machine *machine)
{
- struct thread *thread = perf_session__findnew(session, event->comm.tid);
+ struct thread *thread = machine__findnew_thread(machine, event->comm.tid);
- dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid);
+ if (dump_trace)
+ perf_event__fprintf_comm(event, stdout);
if (thread == NULL || thread__set_comm(thread, event->comm.comm)) {
dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
@@ -445,13 +514,13 @@ int perf_event__process_comm(union perf_event *event,
return 0;
}
-int perf_event__process_lost(union perf_event *event,
+int perf_event__process_lost(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session)
+ struct machine *machine __used)
{
dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
event->lost.id, event->lost.lost);
- session->hists.stats.total_lost += event->lost.lost;
return 0;
}
@@ -468,21 +537,15 @@ static void perf_event__set_kernel_mmap_len(union perf_event *event,
maps[MAP__FUNCTION]->end = ~0ULL;
}
-static int perf_event__process_kernel_mmap(union perf_event *event,
- struct perf_session *session)
+static int perf_event__process_kernel_mmap(struct perf_tool *tool __used,
+ union perf_event *event,
+ struct machine *machine)
{
struct map *map;
char kmmap_prefix[PATH_MAX];
- struct machine *machine;
enum dso_kernel_type kernel_type;
bool is_kernel_mmap;
- machine = perf_session__findnew_machine(session, event->mmap.pid);
- if (!machine) {
- pr_err("Can't find id %d's machine\n", event->mmap.pid);
- goto out_problem;
- }
-
machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
if (machine__is_host(machine))
kernel_type = DSO_TYPE_KERNEL;
@@ -549,9 +612,9 @@ static int perf_event__process_kernel_mmap(union perf_event *event,
* time /proc/sys/kernel/kptr_restrict was non zero.
*/
if (event->mmap.pgoff != 0) {
- perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
- symbol_name,
- event->mmap.pgoff);
+ maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
+ symbol_name,
+ event->mmap.pgoff);
}
if (machine__is_default_guest(machine)) {
@@ -567,32 +630,35 @@ out_problem:
return -1;
}
-int perf_event__process_mmap(union perf_event *event,
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n",
+ event->mmap.pid, event->mmap.tid, event->mmap.start,
+ event->mmap.len, event->mmap.pgoff, event->mmap.filename);
+}
+
+int perf_event__process_mmap(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session)
+ struct machine *machine)
{
- struct machine *machine;
struct thread *thread;
struct map *map;
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
int ret = 0;
- dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n",
- event->mmap.pid, event->mmap.tid, event->mmap.start,
- event->mmap.len, event->mmap.pgoff, event->mmap.filename);
+ if (dump_trace)
+ perf_event__fprintf_mmap(event, stdout);
if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
cpumode == PERF_RECORD_MISC_KERNEL) {
- ret = perf_event__process_kernel_mmap(event, session);
+ ret = perf_event__process_kernel_mmap(tool, event, machine);
if (ret < 0)
goto out_problem;
return 0;
}
- machine = perf_session__find_host_machine(session);
- if (machine == NULL)
- goto out_problem;
- thread = perf_session__findnew(session, event->mmap.pid);
+ thread = machine__findnew_thread(machine, event->mmap.pid);
if (thread == NULL)
goto out_problem;
map = map__new(&machine->user_dsos, event->mmap.start,
@@ -610,18 +676,26 @@ out_problem:
return 0;
}
-int perf_event__process_task(union perf_event *event,
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, "(%d:%d):(%d:%d)\n",
+ event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
+}
+
+int perf_event__process_task(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_sample *sample __used,
- struct perf_session *session)
+ struct machine *machine)
{
- struct thread *thread = perf_session__findnew(session, event->fork.tid);
- struct thread *parent = perf_session__findnew(session, event->fork.ptid);
+ struct thread *thread = machine__findnew_thread(machine, event->fork.tid);
+ struct thread *parent = machine__findnew_thread(machine, event->fork.ptid);
- dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
- event->fork.ppid, event->fork.ptid);
+ if (dump_trace)
+ perf_event__fprintf_task(event, stdout);
if (event->header.type == PERF_RECORD_EXIT) {
- perf_session__remove_thread(session, thread);
+ machine__remove_thread(machine, thread);
return 0;
}
@@ -634,22 +708,45 @@ int perf_event__process_task(union perf_event *event,
return 0;
}
-int perf_event__process(union perf_event *event, struct perf_sample *sample,
- struct perf_session *session)
+size_t perf_event__fprintf(union perf_event *event, FILE *fp)
+{
+ size_t ret = fprintf(fp, "PERF_RECORD_%s",
+ perf_event__name(event->header.type));
+
+ switch (event->header.type) {
+ case PERF_RECORD_COMM:
+ ret += perf_event__fprintf_comm(event, fp);
+ break;
+ case PERF_RECORD_FORK:
+ case PERF_RECORD_EXIT:
+ ret += perf_event__fprintf_task(event, fp);
+ break;
+ case PERF_RECORD_MMAP:
+ ret += perf_event__fprintf_mmap(event, fp);
+ break;
+ default:
+ ret += fprintf(fp, "\n");
+ }
+
+ return ret;
+}
+
+int perf_event__process(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine)
{
switch (event->header.type) {
case PERF_RECORD_COMM:
- perf_event__process_comm(event, sample, session);
+ perf_event__process_comm(tool, event, sample, machine);
break;
case PERF_RECORD_MMAP:
- perf_event__process_mmap(event, sample, session);
+ perf_event__process_mmap(tool, event, sample, machine);
break;
case PERF_RECORD_FORK:
case PERF_RECORD_EXIT:
- perf_event__process_task(event, sample, session);
+ perf_event__process_task(tool, event, sample, machine);
break;
case PERF_RECORD_LOST:
- perf_event__process_lost(event, sample, session);
+ perf_event__process_lost(tool, event, sample, machine);
default:
break;
}
@@ -658,36 +755,29 @@ int perf_event__process(union perf_event *event, struct perf_sample *sample,
}
void thread__find_addr_map(struct thread *self,
- struct perf_session *session, u8 cpumode,
- enum map_type type, pid_t pid, u64 addr,
+ struct machine *machine, u8 cpumode,
+ enum map_type type, u64 addr,
struct addr_location *al)
{
struct map_groups *mg = &self->mg;
- struct machine *machine = NULL;
al->thread = self;
al->addr = addr;
al->cpumode = cpumode;
al->filtered = false;
+ if (machine == NULL) {
+ al->map = NULL;
+ return;
+ }
+
if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
al->level = 'k';
- machine = perf_session__find_host_machine(session);
- if (machine == NULL) {
- al->map = NULL;
- return;
- }
mg = &machine->kmaps;
} else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
al->level = '.';
- machine = perf_session__find_host_machine(session);
} else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
al->level = 'g';
- machine = perf_session__find_machine(session, pid);
- if (machine == NULL) {
- al->map = NULL;
- return;
- }
mg = &machine->kmaps;
} else {
/*
@@ -733,13 +823,12 @@ try_again:
al->addr = al->map->map_ip(al->map, al->addr);
}
-void thread__find_addr_location(struct thread *self,
- struct perf_session *session, u8 cpumode,
- enum map_type type, pid_t pid, u64 addr,
+void thread__find_addr_location(struct thread *thread, struct machine *machine,
+ u8 cpumode, enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter)
{
- thread__find_addr_map(self, session, cpumode, type, pid, addr, al);
+ thread__find_addr_map(thread, machine, cpumode, type, addr, al);
if (al->map != NULL)
al->sym = map__find_symbol(al->map, al->addr, filter);
else
@@ -747,13 +836,13 @@ void thread__find_addr_location(struct thread *self,
}
int perf_event__preprocess_sample(const union perf_event *event,
- struct perf_session *session,
+ struct machine *machine,
struct addr_location *al,
struct perf_sample *sample,
symbol_filter_t filter)
{
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- struct thread *thread = perf_session__findnew(session, event->ip.pid);
+ struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
if (thread == NULL)
return -1;
@@ -764,18 +853,18 @@ int perf_event__preprocess_sample(const union perf_event *event,
dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
/*
- * Have we already created the kernel maps for the host machine?
+ * Have we already created the kernel maps for this machine?
*
* This should have happened earlier, when we processed the kernel MMAP
* events, but for older perf.data files there was no such thing, so do
* it now.
*/
if (cpumode == PERF_RECORD_MISC_KERNEL &&
- session->host_machine.vmlinux_maps[MAP__FUNCTION] == NULL)
- machine__create_kernel_maps(&session->host_machine);
+ machine->vmlinux_maps[MAP__FUNCTION] == NULL)
+ machine__create_kernel_maps(machine);
- thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
- event->ip.pid, event->ip.ip, al);
+ thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
+ event->ip.ip, al);
dump_printf(" ...... dso: %s\n",
al->map ? al->map->dso->long_name :
al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -783,13 +872,14 @@ int perf_event__preprocess_sample(const union perf_event *event,
al->cpu = sample->cpu;
if (al->map) {
+ struct dso *dso = al->map->dso;
+
if (symbol_conf.dso_list &&
- (!al->map || !al->map->dso ||
- !(strlist__has_entry(symbol_conf.dso_list,
- al->map->dso->short_name) ||
- (al->map->dso->short_name != al->map->dso->long_name &&
- strlist__has_entry(symbol_conf.dso_list,
- al->map->dso->long_name)))))
+ (!dso || !(strlist__has_entry(symbol_conf.dso_list,
+ dso->short_name) ||
+ (dso->short_name != dso->long_name &&
+ strlist__has_entry(symbol_conf.dso_list,
+ dso->long_name)))))
goto out_filtered;
al->sym = map__find_symbol(al->map, al->addr, filter);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 357a85b8524..cbdeaad9c5e 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -2,6 +2,7 @@
#define __PERF_RECORD_H
#include <limits.h>
+#include <stdio.h>
#include "../perf.h"
#include "map.h"
@@ -141,43 +142,54 @@ union perf_event {
void perf_event__print_totals(void);
-struct perf_session;
+struct perf_tool;
struct thread_map;
-typedef int (*perf_event__handler_synth_t)(union perf_event *event,
- struct perf_session *session);
-typedef int (*perf_event__handler_t)(union perf_event *event,
+typedef int (*perf_event__handler_t)(struct perf_tool *tool,
+ union perf_event *event,
struct perf_sample *sample,
- struct perf_session *session);
+ struct machine *machine);
-int perf_event__synthesize_thread_map(struct thread_map *threads,
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+ struct thread_map *threads,
perf_event__handler_t process,
- struct perf_session *session);
-int perf_event__synthesize_threads(perf_event__handler_t process,
- struct perf_session *session);
-int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
- struct perf_session *session,
+ struct machine *machine);
+int perf_event__synthesize_threads(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+ perf_event__handler_t process,
struct machine *machine,
const char *symbol_name);
-int perf_event__synthesize_modules(perf_event__handler_t process,
- struct perf_session *session,
+int perf_event__synthesize_modules(struct perf_tool *tool,
+ perf_event__handler_t process,
struct machine *machine);
-int perf_event__process_comm(union perf_event *event, struct perf_sample *sample,
- struct perf_session *session);
-int perf_event__process_lost(union perf_event *event, struct perf_sample *sample,
- struct perf_session *session);
-int perf_event__process_mmap(union perf_event *event, struct perf_sample *sample,
- struct perf_session *session);
-int perf_event__process_task(union perf_event *event, struct perf_sample *sample,
- struct perf_session *session);
-int perf_event__process(union perf_event *event, struct perf_sample *sample,
- struct perf_session *session);
+int perf_event__process_comm(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_lost(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_mmap(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_task(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
struct addr_location;
int perf_event__preprocess_sample(const union perf_event *self,
- struct perf_session *session,
+ struct machine *machine,
struct addr_location *al,
struct perf_sample *sample,
symbol_filter_t filter);
@@ -187,5 +199,13 @@ const char *perf_event__name(unsigned int id);
int perf_event__parse_sample(const union perf_event *event, u64 type,
int sample_size, bool sample_id_all,
struct perf_sample *sample, bool swapped);
+int perf_event__synthesize_sample(union perf_event *event, u64 type,
+ const struct perf_sample *sample,
+ bool swapped);
+
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf(union perf_event *event, FILE *fp);
#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index fbb4b4ab9cc..fa1837088ca 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -6,12 +6,16 @@
*
* Released under the GPL v2. (and only v2, not any later version)
*/
+#include "util.h"
+#include "debugfs.h"
#include <poll.h>
#include "cpumap.h"
#include "thread_map.h"
#include "evlist.h"
#include "evsel.h"
-#include "util.h"
+#include <unistd.h>
+
+#include "parse-events.h"
#include <sys/mman.h>
@@ -30,6 +34,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries);
perf_evlist__set_maps(evlist, cpus, threads);
+ evlist->workload.pid = -1;
}
struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
@@ -43,6 +48,22 @@ struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
return evlist;
}
+void perf_evlist__config_attrs(struct perf_evlist *evlist,
+ struct perf_record_opts *opts)
+{
+ struct perf_evsel *evsel;
+
+ if (evlist->cpus->map[0] < 0)
+ opts->no_inherit = true;
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ perf_evsel__config(evsel, opts);
+
+ if (evlist->nr_entries > 1)
+ evsel->attr.sample_type |= PERF_SAMPLE_ID;
+ }
+}
+
static void perf_evlist__purge(struct perf_evlist *evlist)
{
struct perf_evsel *pos, *n;
@@ -76,6 +97,14 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
++evlist->nr_entries;
}
+static void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
+ struct list_head *list,
+ int nr_entries)
+{
+ list_splice_tail(list, &evlist->entries);
+ evlist->nr_entries += nr_entries;
+}
+
int perf_evlist__add_default(struct perf_evlist *evlist)
{
struct perf_event_attr attr = {
@@ -100,6 +129,126 @@ error:
return -ENOMEM;
}
+int perf_evlist__add_attrs(struct perf_evlist *evlist,
+ struct perf_event_attr *attrs, size_t nr_attrs)
+{
+ struct perf_evsel *evsel, *n;
+ LIST_HEAD(head);
+ size_t i;
+
+ for (i = 0; i < nr_attrs; i++) {
+ evsel = perf_evsel__new(attrs + i, evlist->nr_entries + i);
+ if (evsel == NULL)
+ goto out_delete_partial_list;
+ list_add_tail(&evsel->node, &head);
+ }
+
+ perf_evlist__splice_list_tail(evlist, &head, nr_attrs);
+
+ return 0;
+
+out_delete_partial_list:
+ list_for_each_entry_safe(evsel, n, &head, node)
+ perf_evsel__delete(evsel);
+ return -1;
+}
+
+static int trace_event__id(const char *evname)
+{
+ char *filename, *colon;
+ int err = -1, fd;
+
+ if (asprintf(&filename, "%s/%s/id", tracing_events_path, evname) < 0)
+ return -1;
+
+ colon = strrchr(filename, ':');
+ if (colon != NULL)
+ *colon = '/';
+
+ fd = open(filename, O_RDONLY);
+ if (fd >= 0) {
+ char id[16];
+ if (read(fd, id, sizeof(id)) > 0)
+ err = atoi(id);
+ close(fd);
+ }
+
+ free(filename);
+ return err;
+}
+
+int perf_evlist__add_tracepoints(struct perf_evlist *evlist,
+ const char *tracepoints[],
+ size_t nr_tracepoints)
+{
+ int err;
+ size_t i;
+ struct perf_event_attr *attrs = zalloc(nr_tracepoints * sizeof(*attrs));
+
+ if (attrs == NULL)
+ return -1;
+
+ for (i = 0; i < nr_tracepoints; i++) {
+ err = trace_event__id(tracepoints[i]);
+
+ if (err < 0)
+ goto out_free_attrs;
+
+ attrs[i].type = PERF_TYPE_TRACEPOINT;
+ attrs[i].config = err;
+ attrs[i].sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+ PERF_SAMPLE_CPU);
+ attrs[i].sample_period = 1;
+ }
+
+ err = perf_evlist__add_attrs(evlist, attrs, nr_tracepoints);
+out_free_attrs:
+ free(attrs);
+ return err;
+}
+
+static struct perf_evsel *
+ perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
+{
+ struct perf_evsel *evsel;
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
+ (int)evsel->attr.config == id)
+ return evsel;
+ }
+
+ return NULL;
+}
+
+int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
+ const struct perf_evsel_str_handler *assocs,
+ size_t nr_assocs)
+{
+ struct perf_evsel *evsel;
+ int err;
+ size_t i;
+
+ for (i = 0; i < nr_assocs; i++) {
+ err = trace_event__id(assocs[i].name);
+ if (err < 0)
+ goto out;
+
+ evsel = perf_evlist__find_tracepoint_by_id(evlist, err);
+ if (evsel == NULL)
+ continue;
+
+ err = -EEXIST;
+ if (evsel->handler.func != NULL)
+ goto out;
+ evsel->handler.func = assocs[i].handler;
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
void perf_evlist__disable(struct perf_evlist *evlist)
{
int cpu, thread;
@@ -126,7 +275,7 @@ void perf_evlist__enable(struct perf_evlist *evlist)
}
}
-int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
+static int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries;
evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
@@ -282,7 +431,7 @@ void perf_evlist__munmap(struct perf_evlist *evlist)
evlist->mmap = NULL;
}
-int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
+static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
{
evlist->nr_mmaps = evlist->cpus->nr;
if (evlist->cpus->map[0] == -1)
@@ -298,8 +447,10 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist,
evlist->mmap[idx].mask = mask;
evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, prot,
MAP_SHARED, fd, 0);
- if (evlist->mmap[idx].base == MAP_FAILED)
+ if (evlist->mmap[idx].base == MAP_FAILED) {
+ evlist->mmap[idx].base = NULL;
return -1;
+ }
perf_evlist__add_pollfd(evlist, fd);
return 0;
@@ -400,14 +551,22 @@ out_unmap:
*
* Using perf_evlist__read_on_cpu does this automatically.
*/
-int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite)
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
+ bool overwrite)
{
unsigned int page_size = sysconf(_SC_PAGE_SIZE);
- int mask = pages * page_size - 1;
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
const struct thread_map *threads = evlist->threads;
- int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE);
+ int prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), mask;
+
+ /* 512 kiB: default amount of unprivileged mlocked memory */
+ if (pages == UINT_MAX)
+ pages = (512 * 1024) / page_size;
+ else if (!is_power_of_2(pages))
+ return -EINVAL;
+
+ mask = pages * page_size - 1;
if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
return -ENOMEM;
@@ -512,6 +671,38 @@ u64 perf_evlist__sample_type(const struct perf_evlist *evlist)
return first->attr.sample_type;
}
+u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist)
+{
+ struct perf_evsel *first;
+ struct perf_sample *data;
+ u64 sample_type;
+ u16 size = 0;
+
+ first = list_entry(evlist->entries.next, struct perf_evsel, node);
+
+ if (!first->attr.sample_id_all)
+ goto out;
+
+ sample_type = first->attr.sample_type;
+
+ if (sample_type & PERF_SAMPLE_TID)
+ size += sizeof(data->tid) * 2;
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ size += sizeof(data->time);
+
+ if (sample_type & PERF_SAMPLE_ID)
+ size += sizeof(data->id);
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ size += sizeof(data->stream_id);
+
+ if (sample_type & PERF_SAMPLE_CPU)
+ size += sizeof(data->cpu) * 2;
+out:
+ return size;
+}
+
bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist)
{
struct perf_evsel *pos, *first;
@@ -569,3 +760,97 @@ out_err:
return err;
}
+
+int perf_evlist__prepare_workload(struct perf_evlist *evlist,
+ struct perf_record_opts *opts,
+ const char *argv[])
+{
+ int child_ready_pipe[2], go_pipe[2];
+ char bf;
+
+ if (pipe(child_ready_pipe) < 0) {
+ perror("failed to create 'ready' pipe");
+ return -1;
+ }
+
+ if (pipe(go_pipe) < 0) {
+ perror("failed to create 'go' pipe");
+ goto out_close_ready_pipe;
+ }
+
+ evlist->workload.pid = fork();
+ if (evlist->workload.pid < 0) {
+ perror("failed to fork");
+ goto out_close_pipes;
+ }
+
+ if (!evlist->workload.pid) {
+ if (opts->pipe_output)
+ dup2(2, 1);
+
+ close(child_ready_pipe[0]);
+ close(go_pipe[1]);
+ fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+ /*
+ * Do a dummy execvp to get the PLT entry resolved,
+ * so we avoid the resolver overhead on the real
+ * execvp call.
+ */
+ execvp("", (char **)argv);
+
+ /*
+ * Tell the parent we're ready to go
+ */
+ close(child_ready_pipe[1]);
+
+ /*
+ * Wait until the parent tells us to go.
+ */
+ if (read(go_pipe[0], &bf, 1) == -1)
+ perror("unable to read pipe");
+
+ execvp(argv[0], (char **)argv);
+
+ perror(argv[0]);
+ kill(getppid(), SIGUSR1);
+ exit(-1);
+ }
+
+ if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1)
+ evlist->threads->map[0] = evlist->workload.pid;
+
+ close(child_ready_pipe[1]);
+ close(go_pipe[0]);
+ /*
+ * wait for child to settle
+ */
+ if (read(child_ready_pipe[0], &bf, 1) == -1) {
+ perror("unable to read pipe");
+ goto out_close_pipes;
+ }
+
+ evlist->workload.cork_fd = go_pipe[1];
+ close(child_ready_pipe[0]);
+ return 0;
+
+out_close_pipes:
+ close(go_pipe[0]);
+ close(go_pipe[1]);
+out_close_ready_pipe:
+ close(child_ready_pipe[0]);
+ close(child_ready_pipe[1]);
+ return -1;
+}
+
+int perf_evlist__start_workload(struct perf_evlist *evlist)
+{
+ if (evlist->workload.cork_fd > 0) {
+ /*
+ * Remove the cork, let it rip!
+ */
+ return close(evlist->workload.cork_fd);
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 1779ffef782..8922aeed046 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -2,12 +2,16 @@
#define __PERF_EVLIST_H 1
#include <linux/list.h>
+#include <stdio.h>
#include "../perf.h"
#include "event.h"
+#include "util.h"
+#include <unistd.h>
struct pollfd;
struct thread_map;
struct cpu_map;
+struct perf_record_opts;
#define PERF_EVLIST__HLIST_BITS 8
#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
@@ -19,6 +23,10 @@ struct perf_evlist {
int nr_fds;
int nr_mmaps;
int mmap_len;
+ struct {
+ int cork_fd;
+ pid_t pid;
+ } workload;
bool overwrite;
union perf_event event_copy;
struct perf_mmap *mmap;
@@ -28,6 +36,11 @@ struct perf_evlist {
struct perf_evsel *selected;
};
+struct perf_evsel_str_handler {
+ const char *name;
+ void *handler;
+};
+
struct perf_evsel;
struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
@@ -39,11 +52,26 @@ void perf_evlist__delete(struct perf_evlist *evlist);
void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
int perf_evlist__add_default(struct perf_evlist *evlist);
+int perf_evlist__add_attrs(struct perf_evlist *evlist,
+ struct perf_event_attr *attrs, size_t nr_attrs);
+int perf_evlist__add_tracepoints(struct perf_evlist *evlist,
+ const char *tracepoints[], size_t nr_tracepoints);
+int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist,
+ const struct perf_evsel_str_handler *assocs,
+ size_t nr_assocs);
+
+#define perf_evlist__add_attrs_array(evlist, array) \
+ perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array))
+
+#define perf_evlist__add_tracepoints_array(evlist, array) \
+ perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array))
+
+#define perf_evlist__set_tracepoints_handlers_array(evlist, array) \
+ perf_evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array))
void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
int cpu, int thread, u64 id);
-int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
@@ -52,8 +80,16 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx);
int perf_evlist__open(struct perf_evlist *evlist, bool group);
-int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
-int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
+void perf_evlist__config_attrs(struct perf_evlist *evlist,
+ struct perf_record_opts *opts);
+
+int perf_evlist__prepare_workload(struct perf_evlist *evlist,
+ struct perf_record_opts *opts,
+ const char *argv[]);
+int perf_evlist__start_workload(struct perf_evlist *evlist);
+
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
+ bool overwrite);
void perf_evlist__munmap(struct perf_evlist *evlist);
void perf_evlist__disable(struct perf_evlist *evlist);
@@ -77,6 +113,7 @@ int perf_evlist__set_filters(struct perf_evlist *evlist);
u64 perf_evlist__sample_type(const struct perf_evlist *evlist);
bool perf_evlist__sample_id_all(const const struct perf_evlist *evlist);
+u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist);
bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist);
bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index e4262642258..667f3b78bb2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -34,6 +34,16 @@ int __perf_evsel__sample_size(u64 sample_type)
return size;
}
+static void hists__init(struct hists *hists)
+{
+ memset(hists, 0, sizeof(*hists));
+ hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
+ hists->entries_in = &hists->entries_in_array[0];
+ hists->entries_collapsed = RB_ROOT;
+ hists->entries = RB_ROOT;
+ pthread_mutex_init(&hists->lock, NULL);
+}
+
void perf_evsel__init(struct perf_evsel *evsel,
struct perf_event_attr *attr, int idx)
{
@@ -53,6 +63,79 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
return evsel;
}
+void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ int track = !evsel->idx; /* only the first counter needs these */
+
+ attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
+ attr->inherit = !opts->no_inherit;
+ attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING |
+ PERF_FORMAT_ID;
+
+ attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+
+ /*
+ * We default some events to a 1 default interval. But keep
+ * it a weak assumption overridable by the user.
+ */
+ if (!attr->sample_period || (opts->user_freq != UINT_MAX &&
+ opts->user_interval != ULLONG_MAX)) {
+ if (opts->freq) {
+ attr->sample_type |= PERF_SAMPLE_PERIOD;
+ attr->freq = 1;
+ attr->sample_freq = opts->freq;
+ } else {
+ attr->sample_period = opts->default_interval;
+ }
+ }
+
+ if (opts->no_samples)
+ attr->sample_freq = 0;
+
+ if (opts->inherit_stat)
+ attr->inherit_stat = 1;
+
+ if (opts->sample_address) {
+ attr->sample_type |= PERF_SAMPLE_ADDR;
+ attr->mmap_data = track;
+ }
+
+ if (opts->call_graph)
+ attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+ if (opts->system_wide)
+ attr->sample_type |= PERF_SAMPLE_CPU;
+
+ if (opts->period)
+ attr->sample_type |= PERF_SAMPLE_PERIOD;
+
+ if (opts->sample_id_all_avail &&
+ (opts->sample_time || opts->system_wide ||
+ !opts->no_inherit || opts->cpu_list))
+ attr->sample_type |= PERF_SAMPLE_TIME;
+
+ if (opts->raw_samples) {
+ attr->sample_type |= PERF_SAMPLE_TIME;
+ attr->sample_type |= PERF_SAMPLE_RAW;
+ attr->sample_type |= PERF_SAMPLE_CPU;
+ }
+
+ if (opts->no_delay) {
+ attr->watermark = 0;
+ attr->wakeup_events = 1;
+ }
+
+ attr->mmap = track;
+ attr->comm = track;
+
+ if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) {
+ attr->disabled = 1;
+ attr->enable_on_exec = 1;
+ }
+}
+
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
int cpu, thread;
@@ -377,7 +460,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
u32 val32[2];
} u;
-
+ memset(data, 0, sizeof(*data));
data->cpu = data->pid = data->tid = -1;
data->stream_id = data->id = data->time = -1ULL;
@@ -494,3 +577,82 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
return 0;
}
+
+int perf_event__synthesize_sample(union perf_event *event, u64 type,
+ const struct perf_sample *sample,
+ bool swapped)
+{
+ u64 *array;
+
+ /*
+ * used for cross-endian analysis. See git commit 65014ab3
+ * for why this goofiness is needed.
+ */
+ union {
+ u64 val64;
+ u32 val32[2];
+ } u;
+
+ array = event->sample.array;
+
+ if (type & PERF_SAMPLE_IP) {
+ event->ip.ip = sample->ip;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u.val32[0] = sample->pid;
+ u.val32[1] = sample->tid;
+ if (swapped) {
+ /*
+ * Inverse of what is done in perf_event__parse_sample
+ */
+ u.val32[0] = bswap_32(u.val32[0]);
+ u.val32[1] = bswap_32(u.val32[1]);
+ u.val64 = bswap_64(u.val64);
+ }
+
+ *array = u.val64;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ *array = sample->time;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ADDR) {
+ *array = sample->addr;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ *array = sample->id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ *array = sample->stream_id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+ u.val32[0] = sample->cpu;
+ if (swapped) {
+ /*
+ * Inverse of what is done in perf_event__parse_sample
+ */
+ u.val32[0] = bswap_32(u.val32[0]);
+ u.val64 = bswap_64(u.val64);
+ }
+ *array = u.val64;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ *array = sample->period;
+ array++;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b1d15e6f7ae..326b8e4d503 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -61,12 +61,17 @@ struct perf_evsel {
off_t id_offset;
};
struct cgroup_sel *cgrp;
+ struct {
+ void *func;
+ void *data;
+ } handler;
bool supported;
};
struct cpu_map;
struct thread_map;
struct perf_evlist;
+struct perf_record_opts;
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
void perf_evsel__init(struct perf_evsel *evsel,
@@ -74,6 +79,9 @@ void perf_evsel__init(struct perf_evsel *evsel,
void perf_evsel__exit(struct perf_evsel *evsel);
void perf_evsel__delete(struct perf_evsel *evsel);
+void perf_evsel__config(struct perf_evsel *evsel,
+ struct perf_record_opts *opts);
+
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index bcd05d05b4f..3e7e0b09c12 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <linux/list.h>
#include <linux/kernel.h>
+#include <linux/bitops.h>
#include <sys/utsname.h>
#include "evlist.h"
@@ -28,9 +29,6 @@ static struct perf_trace_event_type *events;
static u32 header_argc;
static const char **header_argv;
-static int dsos__write_buildid_table(struct perf_header *header, int fd);
-static int perf_session__cache_build_ids(struct perf_session *session);
-
int perf_header__push_event(u64 id, const char *name)
{
if (strlen(name) > MAX_EVENT_NAME)
@@ -187,6 +185,252 @@ perf_header__set_cmdline(int argc, const char **argv)
return 0;
}
+#define dsos__for_each_with_build_id(pos, head) \
+ list_for_each_entry(pos, head, node) \
+ if (!pos->has_build_id) \
+ continue; \
+ else
+
+static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
+ u16 misc, int fd)
+{
+ struct dso *pos;
+
+ dsos__for_each_with_build_id(pos, head) {
+ int err;
+ struct build_id_event b;
+ size_t len;
+
+ if (!pos->hit)
+ continue;
+ len = pos->long_name_len + 1;
+ len = ALIGN(len, NAME_ALIGN);
+ memset(&b, 0, sizeof(b));
+ memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
+ b.pid = pid;
+ b.header.misc = misc;
+ b.header.size = sizeof(b) + len;
+ err = do_write(fd, &b, sizeof(b));
+ if (err < 0)
+ return err;
+ err = write_padded(fd, pos->long_name,
+ pos->long_name_len + 1, len);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
+static int machine__write_buildid_table(struct machine *machine, int fd)
+{
+ int err;
+ u16 kmisc = PERF_RECORD_MISC_KERNEL,
+ umisc = PERF_RECORD_MISC_USER;
+
+ if (!machine__is_host(machine)) {
+ kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
+ umisc = PERF_RECORD_MISC_GUEST_USER;
+ }
+
+ err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid,
+ kmisc, fd);
+ if (err == 0)
+ err = __dsos__write_buildid_table(&machine->user_dsos,
+ machine->pid, umisc, fd);
+ return err;
+}
+
+static int dsos__write_buildid_table(struct perf_header *header, int fd)
+{
+ struct perf_session *session = container_of(header,
+ struct perf_session, header);
+ struct rb_node *nd;
+ int err = machine__write_buildid_table(&session->host_machine, fd);
+
+ if (err)
+ return err;
+
+ for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ err = machine__write_buildid_table(pos, fd);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
+ const char *name, bool is_kallsyms)
+{
+ const size_t size = PATH_MAX;
+ char *realname, *filename = zalloc(size),
+ *linkname = zalloc(size), *targetname;
+ int len, err = -1;
+
+ if (is_kallsyms) {
+ if (symbol_conf.kptr_restrict) {
+ pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
+ return 0;
+ }
+ realname = (char *)name;
+ } else
+ realname = realpath(name, NULL);
+
+ if (realname == NULL || filename == NULL || linkname == NULL)
+ goto out_free;
+
+ len = snprintf(filename, size, "%s%s%s",
+ debugdir, is_kallsyms ? "/" : "", realname);
+ if (mkdir_p(filename, 0755))
+ goto out_free;
+
+ snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id);
+
+ if (access(filename, F_OK)) {
+ if (is_kallsyms) {
+ if (copyfile("/proc/kallsyms", filename))
+ goto out_free;
+ } else if (link(realname, filename) && copyfile(name, filename))
+ goto out_free;
+ }
+
+ len = snprintf(linkname, size, "%s/.build-id/%.2s",
+ debugdir, sbuild_id);
+
+ if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
+ goto out_free;
+
+ snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
+ targetname = filename + strlen(debugdir) - 5;
+ memcpy(targetname, "../..", 5);
+
+ if (symlink(targetname, linkname) == 0)
+ err = 0;
+out_free:
+ if (!is_kallsyms)
+ free(realname);
+ free(filename);
+ free(linkname);
+ return err;
+}
+
+static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
+ const char *name, const char *debugdir,
+ bool is_kallsyms)
+{
+ char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+ build_id__sprintf(build_id, build_id_size, sbuild_id);
+
+ return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms);
+}
+
+int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
+{
+ const size_t size = PATH_MAX;
+ char *filename = zalloc(size),
+ *linkname = zalloc(size);
+ int err = -1;
+
+ if (filename == NULL || linkname == NULL)
+ goto out_free;
+
+ snprintf(linkname, size, "%s/.build-id/%.2s/%s",
+ debugdir, sbuild_id, sbuild_id + 2);
+
+ if (access(linkname, F_OK))
+ goto out_free;
+
+ if (readlink(linkname, filename, size - 1) < 0)
+ goto out_free;
+
+ if (unlink(linkname))
+ goto out_free;
+
+ /*
+ * Since the link is relative, we must make it absolute:
+ */
+ snprintf(linkname, size, "%s/.build-id/%.2s/%s",
+ debugdir, sbuild_id, filename);
+
+ if (unlink(linkname))
+ goto out_free;
+
+ err = 0;
+out_free:
+ free(filename);
+ free(linkname);
+ return err;
+}
+
+static int dso__cache_build_id(struct dso *dso, const char *debugdir)
+{
+ bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
+
+ return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
+ dso->long_name, debugdir, is_kallsyms);
+}
+
+static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
+{
+ struct dso *pos;
+ int err = 0;
+
+ dsos__for_each_with_build_id(pos, head)
+ if (dso__cache_build_id(pos, debugdir))
+ err = -1;
+
+ return err;
+}
+
+static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
+{
+ int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir);
+ ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir);
+ return ret;
+}
+
+static int perf_session__cache_build_ids(struct perf_session *session)
+{
+ struct rb_node *nd;
+ int ret;
+ char debugdir[PATH_MAX];
+
+ snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
+
+ if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
+ return -1;
+
+ ret = machine__cache_build_ids(&session->host_machine, debugdir);
+
+ for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret |= machine__cache_build_ids(pos, debugdir);
+ }
+ return ret ? -1 : 0;
+}
+
+static bool machine__read_build_ids(struct machine *machine, bool with_hits)
+{
+ bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
+ ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
+ return ret;
+}
+
+static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
+{
+ struct rb_node *nd;
+ bool ret = machine__read_build_ids(&session->host_machine, with_hits);
+
+ for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret |= machine__read_build_ids(pos, with_hits);
+ }
+
+ return ret;
+}
+
static int write_trace_info(int fd, struct perf_header *h __used,
struct perf_evlist *evlist)
{
@@ -202,6 +446,9 @@ static int write_build_id(int fd, struct perf_header *h,
session = container_of(h, struct perf_session, header);
+ if (!perf_session__read_build_ids(session, true))
+ return -1;
+
err = dsos__write_buildid_table(h, fd);
if (err < 0) {
pr_debug("failed to write buildid table\n");
@@ -388,7 +635,7 @@ static int write_event_desc(int fd, struct perf_header *h __used,
/*
* write event string as passed on cmdline
*/
- ret = do_write_string(fd, attr->name);
+ ret = do_write_string(fd, event_name(attr));
if (ret < 0)
return ret;
/*
@@ -1065,26 +1312,30 @@ struct feature_ops {
bool full_only;
};
-#define FEAT_OPA(n, w, p) \
- [n] = { .name = #n, .write = w, .print = p }
-#define FEAT_OPF(n, w, p) \
- [n] = { .name = #n, .write = w, .print = p, .full_only = true }
+#define FEAT_OPA(n, func) \
+ [n] = { .name = #n, .write = write_##func, .print = print_##func }
+#define FEAT_OPF(n, func) \
+ [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true }
+
+/* feature_ops not implemented: */
+#define print_trace_info NULL
+#define print_build_id NULL
static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
- FEAT_OPA(HEADER_TRACE_INFO, write_trace_info, NULL),
- FEAT_OPA(HEADER_BUILD_ID, write_build_id, NULL),
- FEAT_OPA(HEADER_HOSTNAME, write_hostname, print_hostname),
- FEAT_OPA(HEADER_OSRELEASE, write_osrelease, print_osrelease),
- FEAT_OPA(HEADER_VERSION, write_version, print_version),
- FEAT_OPA(HEADER_ARCH, write_arch, print_arch),
- FEAT_OPA(HEADER_NRCPUS, write_nrcpus, print_nrcpus),
- FEAT_OPA(HEADER_CPUDESC, write_cpudesc, print_cpudesc),
- FEAT_OPA(HEADER_CPUID, write_cpuid, print_cpuid),
- FEAT_OPA(HEADER_TOTAL_MEM, write_total_mem, print_total_mem),
- FEAT_OPA(HEADER_EVENT_DESC, write_event_desc, print_event_desc),
- FEAT_OPA(HEADER_CMDLINE, write_cmdline, print_cmdline),
- FEAT_OPF(HEADER_CPU_TOPOLOGY, write_cpu_topology, print_cpu_topology),
- FEAT_OPF(HEADER_NUMA_TOPOLOGY, write_numa_topology, print_numa_topology),
+ FEAT_OPA(HEADER_TRACE_INFO, trace_info),
+ FEAT_OPA(HEADER_BUILD_ID, build_id),
+ FEAT_OPA(HEADER_HOSTNAME, hostname),
+ FEAT_OPA(HEADER_OSRELEASE, osrelease),
+ FEAT_OPA(HEADER_VERSION, version),
+ FEAT_OPA(HEADER_ARCH, arch),
+ FEAT_OPA(HEADER_NRCPUS, nrcpus),
+ FEAT_OPA(HEADER_CPUDESC, cpudesc),
+ FEAT_OPA(HEADER_CPUID, cpuid),
+ FEAT_OPA(HEADER_TOTAL_MEM, total_mem),
+ FEAT_OPA(HEADER_EVENT_DESC, event_desc),
+ FEAT_OPA(HEADER_CMDLINE, cmdline),
+ FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology),
+ FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
};
struct header_print_data {
@@ -1103,9 +1354,9 @@ static int perf_file_section__fprintf_info(struct perf_file_section *section,
"%d, continuing...\n", section->offset, feat);
return 0;
}
- if (feat < HEADER_TRACE_INFO || feat >= HEADER_LAST_FEATURE) {
+ if (feat >= HEADER_LAST_FEATURE) {
pr_warning("unknown feature %d\n", feat);
- return -1;
+ return 0;
}
if (!feat_ops[feat].print)
return 0;
@@ -1132,252 +1383,6 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
return 0;
}
-#define dsos__for_each_with_build_id(pos, head) \
- list_for_each_entry(pos, head, node) \
- if (!pos->has_build_id) \
- continue; \
- else
-
-static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
- u16 misc, int fd)
-{
- struct dso *pos;
-
- dsos__for_each_with_build_id(pos, head) {
- int err;
- struct build_id_event b;
- size_t len;
-
- if (!pos->hit)
- continue;
- len = pos->long_name_len + 1;
- len = ALIGN(len, NAME_ALIGN);
- memset(&b, 0, sizeof(b));
- memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
- b.pid = pid;
- b.header.misc = misc;
- b.header.size = sizeof(b) + len;
- err = do_write(fd, &b, sizeof(b));
- if (err < 0)
- return err;
- err = write_padded(fd, pos->long_name,
- pos->long_name_len + 1, len);
- if (err < 0)
- return err;
- }
-
- return 0;
-}
-
-static int machine__write_buildid_table(struct machine *machine, int fd)
-{
- int err;
- u16 kmisc = PERF_RECORD_MISC_KERNEL,
- umisc = PERF_RECORD_MISC_USER;
-
- if (!machine__is_host(machine)) {
- kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
- umisc = PERF_RECORD_MISC_GUEST_USER;
- }
-
- err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid,
- kmisc, fd);
- if (err == 0)
- err = __dsos__write_buildid_table(&machine->user_dsos,
- machine->pid, umisc, fd);
- return err;
-}
-
-static int dsos__write_buildid_table(struct perf_header *header, int fd)
-{
- struct perf_session *session = container_of(header,
- struct perf_session, header);
- struct rb_node *nd;
- int err = machine__write_buildid_table(&session->host_machine, fd);
-
- if (err)
- return err;
-
- for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
- struct machine *pos = rb_entry(nd, struct machine, rb_node);
- err = machine__write_buildid_table(pos, fd);
- if (err)
- break;
- }
- return err;
-}
-
-int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
- const char *name, bool is_kallsyms)
-{
- const size_t size = PATH_MAX;
- char *realname, *filename = zalloc(size),
- *linkname = zalloc(size), *targetname;
- int len, err = -1;
-
- if (is_kallsyms) {
- if (symbol_conf.kptr_restrict) {
- pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n");
- return 0;
- }
- realname = (char *)name;
- } else
- realname = realpath(name, NULL);
-
- if (realname == NULL || filename == NULL || linkname == NULL)
- goto out_free;
-
- len = snprintf(filename, size, "%s%s%s",
- debugdir, is_kallsyms ? "/" : "", realname);
- if (mkdir_p(filename, 0755))
- goto out_free;
-
- snprintf(filename + len, sizeof(filename) - len, "/%s", sbuild_id);
-
- if (access(filename, F_OK)) {
- if (is_kallsyms) {
- if (copyfile("/proc/kallsyms", filename))
- goto out_free;
- } else if (link(realname, filename) && copyfile(name, filename))
- goto out_free;
- }
-
- len = snprintf(linkname, size, "%s/.build-id/%.2s",
- debugdir, sbuild_id);
-
- if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
- goto out_free;
-
- snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
- targetname = filename + strlen(debugdir) - 5;
- memcpy(targetname, "../..", 5);
-
- if (symlink(targetname, linkname) == 0)
- err = 0;
-out_free:
- if (!is_kallsyms)
- free(realname);
- free(filename);
- free(linkname);
- return err;
-}
-
-static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
- const char *name, const char *debugdir,
- bool is_kallsyms)
-{
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
-
- build_id__sprintf(build_id, build_id_size, sbuild_id);
-
- return build_id_cache__add_s(sbuild_id, debugdir, name, is_kallsyms);
-}
-
-int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
-{
- const size_t size = PATH_MAX;
- char *filename = zalloc(size),
- *linkname = zalloc(size);
- int err = -1;
-
- if (filename == NULL || linkname == NULL)
- goto out_free;
-
- snprintf(linkname, size, "%s/.build-id/%.2s/%s",
- debugdir, sbuild_id, sbuild_id + 2);
-
- if (access(linkname, F_OK))
- goto out_free;
-
- if (readlink(linkname, filename, size - 1) < 0)
- goto out_free;
-
- if (unlink(linkname))
- goto out_free;
-
- /*
- * Since the link is relative, we must make it absolute:
- */
- snprintf(linkname, size, "%s/.build-id/%.2s/%s",
- debugdir, sbuild_id, filename);
-
- if (unlink(linkname))
- goto out_free;
-
- err = 0;
-out_free:
- free(filename);
- free(linkname);
- return err;
-}
-
-static int dso__cache_build_id(struct dso *dso, const char *debugdir)
-{
- bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
-
- return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
- dso->long_name, debugdir, is_kallsyms);
-}
-
-static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
-{
- struct dso *pos;
- int err = 0;
-
- dsos__for_each_with_build_id(pos, head)
- if (dso__cache_build_id(pos, debugdir))
- err = -1;
-
- return err;
-}
-
-static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
-{
- int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir);
- ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir);
- return ret;
-}
-
-static int perf_session__cache_build_ids(struct perf_session *session)
-{
- struct rb_node *nd;
- int ret;
- char debugdir[PATH_MAX];
-
- snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
-
- if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
- return -1;
-
- ret = machine__cache_build_ids(&session->host_machine, debugdir);
-
- for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
- struct machine *pos = rb_entry(nd, struct machine, rb_node);
- ret |= machine__cache_build_ids(pos, debugdir);
- }
- return ret ? -1 : 0;
-}
-
-static bool machine__read_build_ids(struct machine *machine, bool with_hits)
-{
- bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
- ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
- return ret;
-}
-
-static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
-{
- struct rb_node *nd;
- bool ret = machine__read_build_ids(&session->host_machine, with_hits);
-
- for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
- struct machine *pos = rb_entry(nd, struct machine, rb_node);
- ret |= machine__read_build_ids(pos, with_hits);
- }
-
- return ret;
-}
-
static int do_write_feat(int fd, struct perf_header *h, int type,
struct perf_file_section **p,
struct perf_evlist *evlist)
@@ -1386,6 +1391,8 @@ static int do_write_feat(int fd, struct perf_header *h, int type,
int ret = 0;
if (perf_header__has_feat(h, type)) {
+ if (!feat_ops[type].write)
+ return -1;
(*p)->offset = lseek(fd, 0, SEEK_CUR);
@@ -1408,18 +1415,12 @@ static int perf_header__adds_write(struct perf_header *header,
struct perf_evlist *evlist, int fd)
{
int nr_sections;
- struct perf_session *session;
struct perf_file_section *feat_sec, *p;
int sec_size;
u64 sec_start;
+ int feat;
int err;
- session = container_of(header, struct perf_session, header);
-
- if (perf_header__has_feat(header, HEADER_BUILD_ID &&
- !perf_session__read_build_ids(session, true)))
- perf_header__clear_feat(header, HEADER_BUILD_ID);
-
nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
if (!nr_sections)
return 0;
@@ -1433,64 +1434,11 @@ static int perf_header__adds_write(struct perf_header *header,
sec_start = header->data_offset + header->data_size;
lseek(fd, sec_start + sec_size, SEEK_SET);
- err = do_write_feat(fd, header, HEADER_TRACE_INFO, &p, evlist);
- if (err)
- goto out_free;
-
- err = do_write_feat(fd, header, HEADER_BUILD_ID, &p, evlist);
- if (err) {
- perf_header__clear_feat(header, HEADER_BUILD_ID);
- goto out_free;
+ for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+ if (do_write_feat(fd, header, feat, &p, evlist))
+ perf_header__clear_feat(header, feat);
}
- err = do_write_feat(fd, header, HEADER_HOSTNAME, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_HOSTNAME);
-
- err = do_write_feat(fd, header, HEADER_OSRELEASE, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_OSRELEASE);
-
- err = do_write_feat(fd, header, HEADER_VERSION, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_VERSION);
-
- err = do_write_feat(fd, header, HEADER_ARCH, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_ARCH);
-
- err = do_write_feat(fd, header, HEADER_NRCPUS, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_NRCPUS);
-
- err = do_write_feat(fd, header, HEADER_CPUDESC, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_CPUDESC);
-
- err = do_write_feat(fd, header, HEADER_CPUID, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_CPUID);
-
- err = do_write_feat(fd, header, HEADER_TOTAL_MEM, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_TOTAL_MEM);
-
- err = do_write_feat(fd, header, HEADER_CMDLINE, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_CMDLINE);
-
- err = do_write_feat(fd, header, HEADER_EVENT_DESC, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_EVENT_DESC);
-
- err = do_write_feat(fd, header, HEADER_CPU_TOPOLOGY, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_CPU_TOPOLOGY);
-
- err = do_write_feat(fd, header, HEADER_NUMA_TOPOLOGY, &p, evlist);
- if (err)
- perf_header__clear_feat(header, HEADER_NUMA_TOPOLOGY);
-
lseek(fd, sec_start, SEEK_SET);
/*
* may write more than needed due to dropped feature, but
@@ -1499,7 +1447,6 @@ static int perf_header__adds_write(struct perf_header *header,
err = do_write(fd, feat_sec, sec_size);
if (err < 0)
pr_debug("failed to write feature section\n");
-out_free:
free(feat_sec);
return err;
}
@@ -1637,20 +1584,20 @@ static int perf_header__getbuffer64(struct perf_header *header,
int perf_header__process_sections(struct perf_header *header, int fd,
void *data,
int (*process)(struct perf_file_section *section,
- struct perf_header *ph,
- int feat, int fd, void *data))
+ struct perf_header *ph,
+ int feat, int fd, void *data))
{
- struct perf_file_section *feat_sec;
+ struct perf_file_section *feat_sec, *sec;
int nr_sections;
int sec_size;
- int idx = 0;
- int err = -1, feat = 1;
+ int feat;
+ int err;
nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
if (!nr_sections)
return 0;
- feat_sec = calloc(sizeof(*feat_sec), nr_sections);
+ feat_sec = sec = calloc(sizeof(*feat_sec), nr_sections);
if (!feat_sec)
return -1;
@@ -1658,20 +1605,16 @@ int perf_header__process_sections(struct perf_header *header, int fd,
lseek(fd, header->data_offset + header->data_size, SEEK_SET);
- if (perf_header__getbuffer64(header, fd, feat_sec, sec_size))
+ err = perf_header__getbuffer64(header, fd, feat_sec, sec_size);
+ if (err < 0)
goto out_free;
- err = 0;
- while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
- if (perf_header__has_feat(header, feat)) {
- struct perf_file_section *sec = &feat_sec[idx++];
-
- err = process(sec, header, feat, fd, data);
- if (err < 0)
- break;
- }
- ++feat;
+ for_each_set_bit(feat, header->adds_features, HEADER_LAST_FEATURE) {
+ err = process(sec++, header, feat, fd, data);
+ if (err < 0)
+ goto out_free;
}
+ err = 0;
out_free:
free(feat_sec);
return err;
@@ -1906,32 +1849,21 @@ static int perf_file_section__process(struct perf_file_section *section,
return 0;
}
+ if (feat >= HEADER_LAST_FEATURE) {
+ pr_debug("unknown feature %d, continuing...\n", feat);
+ return 0;
+ }
+
switch (feat) {
case HEADER_TRACE_INFO:
trace_report(fd, false);
break;
-
case HEADER_BUILD_ID:
if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
pr_debug("Failed to read buildids, continuing...\n");
break;
-
- case HEADER_HOSTNAME:
- case HEADER_OSRELEASE:
- case HEADER_VERSION:
- case HEADER_ARCH:
- case HEADER_NRCPUS:
- case HEADER_CPUDESC:
- case HEADER_CPUID:
- case HEADER_TOTAL_MEM:
- case HEADER_CMDLINE:
- case HEADER_EVENT_DESC:
- case HEADER_CPU_TOPOLOGY:
- case HEADER_NUMA_TOPOLOGY:
- break;
-
default:
- pr_debug("unknown feature %d, continuing...\n", feat);
+ break;
}
return 0;
@@ -2041,6 +1973,8 @@ int perf_session__read_header(struct perf_session *session, int fd)
lseek(fd, tmp, SEEK_SET);
}
+ symbol_conf.nr_events = nr_attrs;
+
if (f_header.event_types.size) {
lseek(fd, f_header.event_types.offset, SEEK_SET);
events = malloc(f_header.event_types.size);
@@ -2068,9 +2002,9 @@ out_delete_evlist:
return -ENOMEM;
}
-int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
- perf_event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_attr(struct perf_tool *tool,
+ struct perf_event_attr *attr, u16 ids, u64 *id,
+ perf_event__handler_t process)
{
union perf_event *ev;
size_t size;
@@ -2092,22 +2026,23 @@ int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
ev->attr.header.size = size;
- err = process(ev, NULL, session);
+ err = process(tool, ev, NULL, NULL);
free(ev);
return err;
}
-int perf_session__synthesize_attrs(struct perf_session *session,
+int perf_event__synthesize_attrs(struct perf_tool *tool,
+ struct perf_session *session,
perf_event__handler_t process)
{
struct perf_evsel *attr;
int err = 0;
list_for_each_entry(attr, &session->evlist->entries, node) {
- err = perf_event__synthesize_attr(&attr->attr, attr->ids,
- attr->id, process, session);
+ err = perf_event__synthesize_attr(tool, &attr->attr, attr->ids,
+ attr->id, process);
if (err) {
pr_debug("failed to create perf header attribute\n");
return err;
@@ -2118,23 +2053,23 @@ int perf_session__synthesize_attrs(struct perf_session *session,
}
int perf_event__process_attr(union perf_event *event,
- struct perf_session *session)
+ struct perf_evlist **pevlist)
{
unsigned int i, ids, n_ids;
struct perf_evsel *evsel;
+ struct perf_evlist *evlist = *pevlist;
- if (session->evlist == NULL) {
- session->evlist = perf_evlist__new(NULL, NULL);
- if (session->evlist == NULL)
+ if (evlist == NULL) {
+ *pevlist = evlist = perf_evlist__new(NULL, NULL);
+ if (evlist == NULL)
return -ENOMEM;
}
- evsel = perf_evsel__new(&event->attr.attr,
- session->evlist->nr_entries);
+ evsel = perf_evsel__new(&event->attr.attr, evlist->nr_entries);
if (evsel == NULL)
return -ENOMEM;
- perf_evlist__add(session->evlist, evsel);
+ perf_evlist__add(evlist, evsel);
ids = event->header.size;
ids -= (void *)&event->attr.id - (void *)event;
@@ -2148,18 +2083,16 @@ int perf_event__process_attr(union perf_event *event,
return -ENOMEM;
for (i = 0; i < n_ids; i++) {
- perf_evlist__id_add(session->evlist, evsel, 0, i,
- event->attr.id[i]);
+ perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]);
}
- perf_session__update_sample_type(session);
-
return 0;
}
-int perf_event__synthesize_event_type(u64 event_id, char *name,
+int perf_event__synthesize_event_type(struct perf_tool *tool,
+ u64 event_id, char *name,
perf_event__handler_t process,
- struct perf_session *session)
+ struct machine *machine)
{
union perf_event ev;
size_t size = 0;
@@ -2177,13 +2110,14 @@ int perf_event__synthesize_event_type(u64 event_id, char *name,
ev.event_type.header.size = sizeof(ev.event_type) -
(sizeof(ev.event_type.event_type.name) - size);
- err = process(&ev, NULL, session);
+ err = process(tool, &ev, NULL, machine);
return err;
}
-int perf_event__synthesize_event_types(perf_event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_event_types(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
{
struct perf_trace_event_type *type;
int i, err = 0;
@@ -2191,9 +2125,9 @@ int perf_event__synthesize_event_types(perf_event__handler_t process,
for (i = 0; i < event_count; i++) {
type = &events[i];
- err = perf_event__synthesize_event_type(type->event_id,
+ err = perf_event__synthesize_event_type(tool, type->event_id,
type->name, process,
- session);
+ machine);
if (err) {
pr_debug("failed to create perf header event type\n");
return err;
@@ -2203,8 +2137,8 @@ int perf_event__synthesize_event_types(perf_event__handler_t process,
return err;
}
-int perf_event__process_event_type(union perf_event *event,
- struct perf_session *session __unused)
+int perf_event__process_event_type(struct perf_tool *tool __unused,
+ union perf_event *event)
{
if (perf_header__push_event(event->event_type.event_type.event_id,
event->event_type.event_type.name) < 0)
@@ -2213,9 +2147,9 @@ int perf_event__process_event_type(union perf_event *event,
return 0;
}
-int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
- perf_event__handler_t process,
- struct perf_session *session __unused)
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
+ struct perf_evlist *evlist,
+ perf_event__handler_t process)
{
union perf_event ev;
struct tracing_data *tdata;
@@ -2246,7 +2180,7 @@ int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
ev.tracing_data.header.size = sizeof(ev.tracing_data);
ev.tracing_data.size = aligned_size;
- process(&ev, NULL, session);
+ process(tool, &ev, NULL, NULL);
/*
* The put function will copy all the tracing data
@@ -2288,10 +2222,10 @@ int perf_event__process_tracing_data(union perf_event *event,
return size_read + padding;
}
-int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
+int perf_event__synthesize_build_id(struct perf_tool *tool,
+ struct dso *pos, u16 misc,
perf_event__handler_t process,
- struct machine *machine,
- struct perf_session *session)
+ struct machine *machine)
{
union perf_event ev;
size_t len;
@@ -2311,12 +2245,13 @@ int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
ev.build_id.header.size = sizeof(ev.build_id) + len;
memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
- err = process(&ev, NULL, session);
+ err = process(tool, &ev, NULL, machine);
return err;
}
-int perf_event__process_build_id(union perf_event *event,
+int perf_event__process_build_id(struct perf_tool *tool __used,
+ union perf_event *event,
struct perf_session *session)
{
__event_process_build_id(&event->build_id,
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 3d5a742f4a2..ac4ec956024 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -10,7 +10,8 @@
#include <linux/bitmap.h>
enum {
- HEADER_TRACE_INFO = 1,
+ HEADER_RESERVED = 0, /* always cleared */
+ HEADER_TRACE_INFO = 1,
HEADER_BUILD_ID,
HEADER_HOSTNAME,
@@ -27,10 +28,9 @@ enum {
HEADER_NUMA_TOPOLOGY,
HEADER_LAST_FEATURE,
+ HEADER_FEAT_BITS = 256,
};
-#define HEADER_FEAT_BITS 256
-
struct perf_file_section {
u64 offset;
u64 size;
@@ -68,6 +68,7 @@ struct perf_header {
};
struct perf_evlist;
+struct perf_session;
int perf_session__read_header(struct perf_session *session, int fd);
int perf_session__write_header(struct perf_session *session,
@@ -96,32 +97,36 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
const char *name, bool is_kallsyms);
int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
-int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
- perf_event__handler_t process,
- struct perf_session *session);
-int perf_session__synthesize_attrs(struct perf_session *session,
- perf_event__handler_t process);
-int perf_event__process_attr(union perf_event *event, struct perf_session *session);
+int perf_event__synthesize_attr(struct perf_tool *tool,
+ struct perf_event_attr *attr, u16 ids, u64 *id,
+ perf_event__handler_t process);
+int perf_event__synthesize_attrs(struct perf_tool *tool,
+ struct perf_session *session,
+ perf_event__handler_t process);
+int perf_event__process_attr(union perf_event *event, struct perf_evlist **pevlist);
-int perf_event__synthesize_event_type(u64 event_id, char *name,
+int perf_event__synthesize_event_type(struct perf_tool *tool,
+ u64 event_id, char *name,
perf_event__handler_t process,
- struct perf_session *session);
-int perf_event__synthesize_event_types(perf_event__handler_t process,
- struct perf_session *session);
-int perf_event__process_event_type(union perf_event *event,
- struct perf_session *session);
-
-int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
- perf_event__handler_t process,
- struct perf_session *session);
+ struct machine *machine);
+int perf_event__synthesize_event_types(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__process_event_type(struct perf_tool *tool,
+ union perf_event *event);
+
+int perf_event__synthesize_tracing_data(struct perf_tool *tool,
+ int fd, struct perf_evlist *evlist,
+ perf_event__handler_t process);
int perf_event__process_tracing_data(union perf_event *event,
struct perf_session *session);
-int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
+int perf_event__synthesize_build_id(struct perf_tool *tool,
+ struct dso *pos, u16 misc,
perf_event__handler_t process,
- struct machine *machine,
- struct perf_session *session);
-int perf_event__process_build_id(union perf_event *event,
+ struct machine *machine);
+int perf_event__process_build_id(struct perf_tool *tool,
+ union perf_event *event,
struct perf_session *session);
/*
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index a36a3fa81ff..abef2703cd2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1211,13 +1211,3 @@ size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp)
return ret;
}
-
-void hists__init(struct hists *hists)
-{
- memset(hists, 0, sizeof(*hists));
- hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
- hists->entries_in = &hists->entries_in_array[0];
- hists->entries_collapsed = RB_ROOT;
- hists->entries = RB_ROOT;
- pthread_mutex_init(&hists->lock, NULL);
-}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index c86c1d27bd1..ff6f9d56ea4 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -63,8 +63,6 @@ struct hists {
struct callchain_cursor callchain_cursor;
};
-void hists__init(struct hists *hists);
-
struct hist_entry *__hists__add_entry(struct hists *self,
struct addr_location *al,
struct symbol *parent, u64 period);
@@ -119,7 +117,6 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used,
static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
int evidx __used,
- int nr_events __used,
void(*timer)(void *arg) __used,
void *arg __used,
int delay_secs __used)
@@ -130,7 +127,7 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
#define K_RIGHT -2
#else
#include "ui/keysyms.h"
-int hist_entry__tui_annotate(struct hist_entry *he, int evidx, int nr_events,
+int hist_entry__tui_annotate(struct hist_entry *he, int evidx,
void(*timer)(void *arg), void *arg, int delay_secs);
int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index 305c8484f20..62cdee78db7 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -9,6 +9,17 @@
#define BITS_PER_BYTE 8
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_cont(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), (bit)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
static inline void set_bit(int nr, unsigned long *addr)
{
addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
@@ -30,4 +41,111 @@ static inline unsigned long hweight_long(unsigned long w)
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __ffs(unsigned long word)
+{
+ int num = 0;
+
+#if BITS_PER_LONG == 64
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+#endif
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf) == 0) {
+ num += 4;
+ word >>= 4;
+ }
+ if ((word & 0x3) == 0) {
+ num += 2;
+ word >>= 2;
+ }
+ if ((word & 0x1) == 0)
+ num += 1;
+ return num;
+}
+
+/*
+ * Find the first set bit in a memory region.
+ */
+static inline unsigned long
+find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ const unsigned long *p = addr;
+ unsigned long result = 0;
+ unsigned long tmp;
+
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+
+ tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found:
+ return result + __ffs(tmp);
+}
+
+/*
+ * Find the next set bit in a memory region.
+ */
+static inline unsigned long
+find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __ffs(tmp);
+}
+
#endif
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 78284b13e80..316aa0ab712 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -562,6 +562,10 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid)
INIT_LIST_HEAD(&self->user_dsos);
INIT_LIST_HEAD(&self->kernel_dsos);
+ self->threads = RB_ROOT;
+ INIT_LIST_HEAD(&self->dead_threads);
+ self->last_match = NULL;
+
self->kmaps.machine = self;
self->pid = pid;
self->root_dir = strdup(root_dir);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 890d85545d0..2b8017f8a93 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -18,9 +18,11 @@ enum map_type {
extern const char *map_type__name[MAP__NR_TYPES];
struct dso;
+struct ip_callchain;
struct ref_reloc_sym;
struct map_groups;
struct machine;
+struct perf_evsel;
struct map {
union {
@@ -61,7 +63,11 @@ struct map_groups {
struct machine {
struct rb_node rb_node;
pid_t pid;
+ u16 id_hdr_size;
char *root_dir;
+ struct rb_root threads;
+ struct list_head dead_threads;
+ struct thread *last_match;
struct list_head user_dsos;
struct list_head kernel_dsos;
struct map_groups kmaps;
@@ -148,6 +154,13 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid);
void machine__exit(struct machine *self);
void machine__delete(struct machine *self);
+int machine__resolve_callchain(struct machine *machine,
+ struct perf_evsel *evsel, struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent);
+int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
+ u64 addr);
+
/*
* Default guest kernel is defined by parameter --guestkallsyms
* and --guestmodules
@@ -190,6 +203,12 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
struct map **mapp,
symbol_filter_t filter);
+
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid);
+void machine__remove_thread(struct machine *machine, struct thread *th);
+
+size_t machine__fprintf(struct machine *machine, FILE *fp);
+
static inline
struct symbol *machine__find_kernel_symbol(struct machine *self,
enum map_type type, u64 addr,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 928918b796b..531c283fc0c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -25,8 +25,6 @@ enum event_result {
EVT_HANDLED_ALL
};
-char debugfs_path[MAXPATHLEN];
-
#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
@@ -40,6 +38,7 @@ static struct event_symbol event_symbols[] = {
{ CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" },
{ CHW(BRANCH_MISSES), "branch-misses", "" },
{ CHW(BUS_CYCLES), "bus-cycles", "" },
+ { CHW(REF_CPU_CYCLES), "ref-cycles", "" },
{ CSW(CPU_CLOCK), "cpu-clock", "" },
{ CSW(TASK_CLOCK), "task-clock", "" },
@@ -70,6 +69,7 @@ static const char *hw_event_names[PERF_COUNT_HW_MAX] = {
"bus-cycles",
"stalled-cycles-frontend",
"stalled-cycles-backend",
+ "ref-cycles",
};
static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
@@ -140,7 +140,7 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
char evt_path[MAXPATHLEN];
int fd;
- snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
+ snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
sys_dir->d_name, evt_dir->d_name);
fd = open(evt_path, O_RDONLY);
if (fd < 0)
@@ -171,16 +171,16 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
char evt_path[MAXPATHLEN];
char dir_path[MAXPATHLEN];
- if (debugfs_valid_mountpoint(debugfs_path))
+ if (debugfs_valid_mountpoint(tracing_events_path))
return NULL;
- sys_dir = opendir(debugfs_path);
+ sys_dir = opendir(tracing_events_path);
if (!sys_dir)
return NULL;
for_each_subsystem(sys_dir, sys_dirent, sys_next) {
- snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
+ snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
sys_dirent.d_name);
evt_dir = opendir(dir_path);
if (!evt_dir)
@@ -447,7 +447,7 @@ parse_single_tracepoint_event(char *sys_name,
u64 id;
int fd;
- snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
+ snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
sys_name, evt_name);
fd = open(evt_path, O_RDONLY);
@@ -485,7 +485,7 @@ parse_multiple_tracepoint_event(struct perf_evlist *evlist, char *sys_name,
struct dirent *evt_ent;
DIR *evt_dir;
- snprintf(evt_path, MAXPATHLEN, "%s/%s", debugfs_path, sys_name);
+ snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name);
evt_dir = opendir(evt_path);
if (!evt_dir) {
@@ -528,7 +528,7 @@ parse_tracepoint_event(struct perf_evlist *evlist, const char **strp,
char sys_name[MAX_EVENT_LENGTH];
unsigned int sys_length, evt_length;
- if (debugfs_valid_mountpoint(debugfs_path))
+ if (debugfs_valid_mountpoint(tracing_events_path))
return 0;
evt_name = strchr(*strp, ':');
@@ -920,10 +920,10 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
char evt_path[MAXPATHLEN];
char dir_path[MAXPATHLEN];
- if (debugfs_valid_mountpoint(debugfs_path))
+ if (debugfs_valid_mountpoint(tracing_events_path))
return;
- sys_dir = opendir(debugfs_path);
+ sys_dir = opendir(tracing_events_path);
if (!sys_dir)
return;
@@ -932,7 +932,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
!strglobmatch(sys_dirent.d_name, subsys_glob))
continue;
- snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
+ snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
sys_dirent.d_name);
evt_dir = opendir(dir_path);
if (!evt_dir)
@@ -964,16 +964,16 @@ int is_valid_tracepoint(const char *event_string)
char evt_path[MAXPATHLEN];
char dir_path[MAXPATHLEN];
- if (debugfs_valid_mountpoint(debugfs_path))
+ if (debugfs_valid_mountpoint(tracing_events_path))
return 0;
- sys_dir = opendir(debugfs_path);
+ sys_dir = opendir(tracing_events_path);
if (!sys_dir)
return 0;
for_each_subsystem(sys_dir, sys_dirent, sys_next) {
- snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
+ snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
sys_dirent.d_name);
evt_dir = opendir(dir_path);
if (!evt_dir)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 2f8e375e038..7e0cbe75d5f 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -39,7 +39,6 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob);
int print_hwcache_events(const char *event_glob);
extern int is_valid_tracepoint(const char *event_string);
-extern char debugfs_path[];
extern int valid_debugfs_mount(const char *debugfs);
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 1132c8f0ce8..17e94d0c36f 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -5,7 +5,6 @@
#include "util.h"
#include "probe-event.h"
-#define MAX_PATH_LEN 256
#define MAX_PROBE_BUFFER 1024
#define MAX_PROBES 128
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 74350ffb57f..e30749e38a9 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -27,7 +27,10 @@
#include "../../perf.h"
#include "../util.h"
+#include "../thread.h"
+#include "../event.h"
#include "../trace-event.h"
+#include "../evsel.h"
#include <EXTERN.h>
#include <perl.h>
@@ -245,11 +248,11 @@ static inline struct event *find_cache_event(int type)
return event;
}
-static void perl_process_event(union perf_event *pevent __unused,
- struct perf_sample *sample,
- struct perf_evsel *evsel,
- struct perf_session *session __unused,
- struct thread *thread)
+static void perl_process_tracepoint(union perf_event *pevent __unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine __unused,
+ struct thread *thread)
{
struct format_field *field;
static char handler[256];
@@ -265,6 +268,9 @@ static void perl_process_event(union perf_event *pevent __unused,
dSP;
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return;
+
type = trace_parse_common_type(data);
event = find_cache_event(type);
@@ -332,6 +338,42 @@ static void perl_process_event(union perf_event *pevent __unused,
LEAVE;
}
+static void perl_process_event_generic(union perf_event *pevent __unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __unused,
+ struct machine *machine __unused,
+ struct thread *thread __unused)
+{
+ dSP;
+
+ if (!get_cv("process_event", 0))
+ return;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ XPUSHs(sv_2mortal(newSVpvn((const char *)pevent, pevent->header.size)));
+ XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->attr, sizeof(evsel->attr))));
+ XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample))));
+ XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size)));
+ PUTBACK;
+ call_pv("process_event", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void perl_process_event(union perf_event *pevent,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ struct thread *thread)
+{
+ perl_process_tracepoint(pevent, sample, evsel, machine, thread);
+ perl_process_event_generic(pevent, sample, evsel, machine, thread);
+}
+
static void run_start_sub(void)
{
dSP; /* access to Perl stack */
@@ -553,7 +595,28 @@ static int perl_generate_script(const char *outfile)
fprintf(ofp, "sub print_header\n{\n"
"\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
"\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t "
- "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}");
+ "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}\n");
+
+ fprintf(ofp,
+ "\n# Packed byte string args of process_event():\n"
+ "#\n"
+ "# $event:\tunion perf_event\tutil/event.h\n"
+ "# $attr:\tstruct perf_event_attr\tlinux/perf_event.h\n"
+ "# $sample:\tstruct perf_sample\tutil/event.h\n"
+ "# $raw_data:\tperf_sample->raw_data\tutil/event.h\n"
+ "\n"
+ "sub process_event\n"
+ "{\n"
+ "\tmy ($event, $attr, $sample, $raw_data) = @_;\n"
+ "\n"
+ "\tmy @event\t= unpack(\"LSS\", $event);\n"
+ "\tmy @attr\t= unpack(\"LLQQQQQLLQQ\", $attr);\n"
+ "\tmy @sample\t= unpack(\"QLLQQQQQLL\", $sample);\n"
+ "\tmy @raw_data\t= unpack(\"C*\", $raw_data);\n"
+ "\n"
+ "\tuse Data::Dumper;\n"
+ "\tprint Dumper \\@event, \\@attr, \\@sample, \\@raw_data;\n"
+ "}\n");
fclose(ofp);
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 6ccf70e8d8f..0b2a4878317 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -29,6 +29,8 @@
#include "../../perf.h"
#include "../util.h"
+#include "../event.h"
+#include "../thread.h"
#include "../trace-event.h"
PyMODINIT_FUNC initperf_trace_context(void);
@@ -207,7 +209,7 @@ static inline struct event *find_cache_event(int type)
static void python_process_event(union perf_event *pevent __unused,
struct perf_sample *sample,
struct perf_evsel *evsel __unused,
- struct perf_session *session __unused,
+ struct machine *machine __unused,
struct thread *thread)
{
PyObject *handler, *retval, *context, *t, *obj, *dict = NULL;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 85c1e6b76f0..b5ca2558c7b 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -10,6 +10,7 @@
#include "evlist.h"
#include "evsel.h"
#include "session.h"
+#include "tool.h"
#include "sort.h"
#include "util.h"
#include "cpumap.h"
@@ -78,39 +79,13 @@ out_close:
return -1;
}
-static void perf_session__id_header_size(struct perf_session *session)
-{
- struct perf_sample *data;
- u64 sample_type = session->sample_type;
- u16 size = 0;
-
- if (!session->sample_id_all)
- goto out;
-
- if (sample_type & PERF_SAMPLE_TID)
- size += sizeof(data->tid) * 2;
-
- if (sample_type & PERF_SAMPLE_TIME)
- size += sizeof(data->time);
-
- if (sample_type & PERF_SAMPLE_ID)
- size += sizeof(data->id);
-
- if (sample_type & PERF_SAMPLE_STREAM_ID)
- size += sizeof(data->stream_id);
-
- if (sample_type & PERF_SAMPLE_CPU)
- size += sizeof(data->cpu) * 2;
-out:
- session->id_hdr_size = size;
-}
-
void perf_session__update_sample_type(struct perf_session *self)
{
self->sample_type = perf_evlist__sample_type(self->evlist);
self->sample_size = __perf_evsel__sample_size(self->sample_type);
self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
- perf_session__id_header_size(self);
+ self->id_hdr_size = perf_evlist__id_hdr_size(self->evlist);
+ self->host_machine.id_hdr_size = self->id_hdr_size;
}
int perf_session__create_kernel_maps(struct perf_session *self)
@@ -130,18 +105,26 @@ static void perf_session__destroy_kernel_maps(struct perf_session *self)
struct perf_session *perf_session__new(const char *filename, int mode,
bool force, bool repipe,
- struct perf_event_ops *ops)
+ struct perf_tool *tool)
{
- size_t len = filename ? strlen(filename) + 1 : 0;
- struct perf_session *self = zalloc(sizeof(*self) + len);
+ struct perf_session *self;
+ struct stat st;
+ size_t len;
+
+ if (!filename || !strlen(filename)) {
+ if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+ filename = "-";
+ else
+ filename = "perf.data";
+ }
+
+ len = strlen(filename);
+ self = zalloc(sizeof(*self) + len);
if (self == NULL)
goto out;
memcpy(self->filename, filename, len);
- self->threads = RB_ROOT;
- INIT_LIST_HEAD(&self->dead_threads);
- self->last_match = NULL;
/*
* On 64bit we can mmap the data file in one go. No need for tiny mmap
* slices. On 32bit we use 32MB.
@@ -171,10 +154,10 @@ struct perf_session *perf_session__new(const char *filename, int mode,
goto out_delete;
}
- if (ops && ops->ordering_requires_timestamps &&
- ops->ordered_samples && !self->sample_id_all) {
+ if (tool && tool->ordering_requires_timestamps &&
+ tool->ordered_samples && !self->sample_id_all) {
dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
- ops->ordered_samples = false;
+ tool->ordered_samples = false;
}
out:
@@ -184,17 +167,22 @@ out_delete:
return NULL;
}
-static void perf_session__delete_dead_threads(struct perf_session *self)
+static void machine__delete_dead_threads(struct machine *machine)
{
struct thread *n, *t;
- list_for_each_entry_safe(t, n, &self->dead_threads, node) {
+ list_for_each_entry_safe(t, n, &machine->dead_threads, node) {
list_del(&t->node);
thread__delete(t);
}
}
-static void perf_session__delete_threads(struct perf_session *self)
+static void perf_session__delete_dead_threads(struct perf_session *session)
+{
+ machine__delete_dead_threads(&session->host_machine);
+}
+
+static void machine__delete_threads(struct machine *self)
{
struct rb_node *nd = rb_first(&self->threads);
@@ -207,6 +195,11 @@ static void perf_session__delete_threads(struct perf_session *self)
}
}
+static void perf_session__delete_threads(struct perf_session *session)
+{
+ machine__delete_threads(&session->host_machine);
+}
+
void perf_session__delete(struct perf_session *self)
{
perf_session__destroy_kernel_maps(self);
@@ -217,7 +210,7 @@ void perf_session__delete(struct perf_session *self)
free(self);
}
-void perf_session__remove_thread(struct perf_session *self, struct thread *th)
+void machine__remove_thread(struct machine *self, struct thread *th)
{
self->last_match = NULL;
rb_erase(&th->rb_node, &self->threads);
@@ -236,16 +229,16 @@ static bool symbol__match_parent_regex(struct symbol *sym)
return 0;
}
-int perf_session__resolve_callchain(struct perf_session *self,
- struct thread *thread,
- struct ip_callchain *chain,
- struct symbol **parent)
+int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent)
{
u8 cpumode = PERF_RECORD_MISC_USER;
unsigned int i;
int err;
- callchain_cursor_reset(&self->callchain_cursor);
+ callchain_cursor_reset(&evsel->hists.callchain_cursor);
for (i = 0; i < chain->nr; i++) {
u64 ip;
@@ -272,7 +265,7 @@ int perf_session__resolve_callchain(struct perf_session *self,
al.filtered = false;
thread__find_addr_location(thread, self, cpumode,
- MAP__FUNCTION, thread->pid, ip, &al, NULL);
+ MAP__FUNCTION, ip, &al, NULL);
if (al.sym != NULL) {
if (sort__has_parent && !*parent &&
symbol__match_parent_regex(al.sym))
@@ -281,7 +274,7 @@ int perf_session__resolve_callchain(struct perf_session *self,
break;
}
- err = callchain_cursor_append(&self->callchain_cursor,
+ err = callchain_cursor_append(&evsel->hists.callchain_cursor,
ip, al.map, al.sym);
if (err)
return err;
@@ -290,75 +283,91 @@ int perf_session__resolve_callchain(struct perf_session *self,
return 0;
}
-static int process_event_synth_stub(union perf_event *event __used,
- struct perf_session *session __used)
+static int process_event_synth_tracing_data_stub(union perf_event *event __used,
+ struct perf_session *session __used)
{
dump_printf(": unhandled!\n");
return 0;
}
-static int process_event_sample_stub(union perf_event *event __used,
+static int process_event_synth_attr_stub(union perf_event *event __used,
+ struct perf_evlist **pevlist __used)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_sample_stub(struct perf_tool *tool __used,
+ union perf_event *event __used,
struct perf_sample *sample __used,
struct perf_evsel *evsel __used,
- struct perf_session *session __used)
+ struct machine *machine __used)
{
dump_printf(": unhandled!\n");
return 0;
}
-static int process_event_stub(union perf_event *event __used,
+static int process_event_stub(struct perf_tool *tool __used,
+ union perf_event *event __used,
struct perf_sample *sample __used,
- struct perf_session *session __used)
+ struct machine *machine __used)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_finished_round_stub(struct perf_tool *tool __used,
+ union perf_event *event __used,
+ struct perf_session *perf_session __used)
{
dump_printf(": unhandled!\n");
return 0;
}
-static int process_finished_round_stub(union perf_event *event __used,
- struct perf_session *session __used,
- struct perf_event_ops *ops __used)
+static int process_event_type_stub(struct perf_tool *tool __used,
+ union perf_event *event __used)
{
dump_printf(": unhandled!\n");
return 0;
}
-static int process_finished_round(union perf_event *event,
- struct perf_session *session,
- struct perf_event_ops *ops);
+static int process_finished_round(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
-static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
+static void perf_tool__fill_defaults(struct perf_tool *tool)
{
- if (handler->sample == NULL)
- handler->sample = process_event_sample_stub;
- if (handler->mmap == NULL)
- handler->mmap = process_event_stub;
- if (handler->comm == NULL)
- handler->comm = process_event_stub;
- if (handler->fork == NULL)
- handler->fork = process_event_stub;
- if (handler->exit == NULL)
- handler->exit = process_event_stub;
- if (handler->lost == NULL)
- handler->lost = perf_event__process_lost;
- if (handler->read == NULL)
- handler->read = process_event_stub;
- if (handler->throttle == NULL)
- handler->throttle = process_event_stub;
- if (handler->unthrottle == NULL)
- handler->unthrottle = process_event_stub;
- if (handler->attr == NULL)
- handler->attr = process_event_synth_stub;
- if (handler->event_type == NULL)
- handler->event_type = process_event_synth_stub;
- if (handler->tracing_data == NULL)
- handler->tracing_data = process_event_synth_stub;
- if (handler->build_id == NULL)
- handler->build_id = process_event_synth_stub;
- if (handler->finished_round == NULL) {
- if (handler->ordered_samples)
- handler->finished_round = process_finished_round;
+ if (tool->sample == NULL)
+ tool->sample = process_event_sample_stub;
+ if (tool->mmap == NULL)
+ tool->mmap = process_event_stub;
+ if (tool->comm == NULL)
+ tool->comm = process_event_stub;
+ if (tool->fork == NULL)
+ tool->fork = process_event_stub;
+ if (tool->exit == NULL)
+ tool->exit = process_event_stub;
+ if (tool->lost == NULL)
+ tool->lost = perf_event__process_lost;
+ if (tool->read == NULL)
+ tool->read = process_event_sample_stub;
+ if (tool->throttle == NULL)
+ tool->throttle = process_event_stub;
+ if (tool->unthrottle == NULL)
+ tool->unthrottle = process_event_stub;
+ if (tool->attr == NULL)
+ tool->attr = process_event_synth_attr_stub;
+ if (tool->event_type == NULL)
+ tool->event_type = process_event_type_stub;
+ if (tool->tracing_data == NULL)
+ tool->tracing_data = process_event_synth_tracing_data_stub;
+ if (tool->build_id == NULL)
+ tool->build_id = process_finished_round_stub;
+ if (tool->finished_round == NULL) {
+ if (tool->ordered_samples)
+ tool->finished_round = process_finished_round;
else
- handler->finished_round = process_finished_round_stub;
+ tool->finished_round = process_finished_round_stub;
}
}
@@ -490,11 +499,11 @@ static void perf_session_free_sample_buffers(struct perf_session *session)
static int perf_session_deliver_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_event_ops *ops,
+ struct perf_tool *tool,
u64 file_offset);
static void flush_sample_queue(struct perf_session *s,
- struct perf_event_ops *ops)
+ struct perf_tool *tool)
{
struct ordered_samples *os = &s->ordered_samples;
struct list_head *head = &os->samples;
@@ -505,7 +514,7 @@ static void flush_sample_queue(struct perf_session *s,
unsigned idx = 0, progress_next = os->nr_samples / 16;
int ret;
- if (!ops->ordered_samples || !limit)
+ if (!tool->ordered_samples || !limit)
return;
list_for_each_entry_safe(iter, tmp, head, list) {
@@ -516,7 +525,7 @@ static void flush_sample_queue(struct perf_session *s,
if (ret)
pr_err("Can't parse sample, err = %d\n", ret);
else
- perf_session_deliver_event(s, iter->event, &sample, ops,
+ perf_session_deliver_event(s, iter->event, &sample, tool,
iter->file_offset);
os->last_flush = iter->timestamp;
@@ -578,11 +587,11 @@ static void flush_sample_queue(struct perf_session *s,
* Flush every events below timestamp 7
* etc...
*/
-static int process_finished_round(union perf_event *event __used,
- struct perf_session *session,
- struct perf_event_ops *ops)
+static int process_finished_round(struct perf_tool *tool,
+ union perf_event *event __used,
+ struct perf_session *session)
{
- flush_sample_queue(session, ops);
+ flush_sample_queue(session, tool);
session->ordered_samples.next_flush = session->ordered_samples.max_timestamp;
return 0;
@@ -737,13 +746,26 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
callchain__printf(sample);
}
+static struct machine *
+ perf_session__find_machine_for_cpumode(struct perf_session *session,
+ union perf_event *event)
+{
+ const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest)
+ return perf_session__find_machine(session, event->ip.pid);
+
+ return perf_session__find_host_machine(session);
+}
+
static int perf_session_deliver_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
- struct perf_event_ops *ops,
+ struct perf_tool *tool,
u64 file_offset)
{
struct perf_evsel *evsel;
+ struct machine *machine;
dump_event(session, event, file_offset, sample);
@@ -765,6 +787,8 @@ static int perf_session_deliver_event(struct perf_session *session,
hists__inc_nr_events(&evsel->hists, event->header.type);
}
+ machine = perf_session__find_machine_for_cpumode(session, event);
+
switch (event->header.type) {
case PERF_RECORD_SAMPLE:
dump_sample(session, event, sample);
@@ -772,23 +796,25 @@ static int perf_session_deliver_event(struct perf_session *session,
++session->hists.stats.nr_unknown_id;
return -1;
}
- return ops->sample(event, sample, evsel, session);
+ return tool->sample(tool, event, sample, evsel, machine);
case PERF_RECORD_MMAP:
- return ops->mmap(event, sample, session);
+ return tool->mmap(tool, event, sample, machine);
case PERF_RECORD_COMM:
- return ops->comm(event, sample, session);
+ return tool->comm(tool, event, sample, machine);
case PERF_RECORD_FORK:
- return ops->fork(event, sample, session);
+ return tool->fork(tool, event, sample, machine);
case PERF_RECORD_EXIT:
- return ops->exit(event, sample, session);
+ return tool->exit(tool, event, sample, machine);
case PERF_RECORD_LOST:
- return ops->lost(event, sample, session);
+ if (tool->lost == perf_event__process_lost)
+ session->hists.stats.total_lost += event->lost.lost;
+ return tool->lost(tool, event, sample, machine);
case PERF_RECORD_READ:
- return ops->read(event, sample, session);
+ return tool->read(tool, event, sample, evsel, machine);
case PERF_RECORD_THROTTLE:
- return ops->throttle(event, sample, session);
+ return tool->throttle(tool, event, sample, machine);
case PERF_RECORD_UNTHROTTLE:
- return ops->unthrottle(event, sample, session);
+ return tool->unthrottle(tool, event, sample, machine);
default:
++session->hists.stats.nr_unknown_events;
return -1;
@@ -812,24 +838,29 @@ static int perf_session__preprocess_sample(struct perf_session *session,
}
static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
- struct perf_event_ops *ops, u64 file_offset)
+ struct perf_tool *tool, u64 file_offset)
{
+ int err;
+
dump_event(session, event, file_offset, NULL);
/* These events are processed right away */
switch (event->header.type) {
case PERF_RECORD_HEADER_ATTR:
- return ops->attr(event, session);
+ err = tool->attr(event, &session->evlist);
+ if (err == 0)
+ perf_session__update_sample_type(session);
+ return err;
case PERF_RECORD_HEADER_EVENT_TYPE:
- return ops->event_type(event, session);
+ return tool->event_type(tool, event);
case PERF_RECORD_HEADER_TRACING_DATA:
/* setup for reading amidst mmap */
lseek(session->fd, file_offset, SEEK_SET);
- return ops->tracing_data(event, session);
+ return tool->tracing_data(event, session);
case PERF_RECORD_HEADER_BUILD_ID:
- return ops->build_id(event, session);
+ return tool->build_id(tool, event, session);
case PERF_RECORD_FINISHED_ROUND:
- return ops->finished_round(event, session, ops);
+ return tool->finished_round(tool, event, session);
default:
return -EINVAL;
}
@@ -837,7 +868,7 @@ static int perf_session__process_user_event(struct perf_session *session, union
static int perf_session__process_event(struct perf_session *session,
union perf_event *event,
- struct perf_event_ops *ops,
+ struct perf_tool *tool,
u64 file_offset)
{
struct perf_sample sample;
@@ -853,7 +884,7 @@ static int perf_session__process_event(struct perf_session *session,
hists__inc_nr_events(&session->hists, event->header.type);
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
- return perf_session__process_user_event(session, event, ops, file_offset);
+ return perf_session__process_user_event(session, event, tool, file_offset);
/*
* For all kernel events we get the sample data
@@ -866,14 +897,14 @@ static int perf_session__process_event(struct perf_session *session,
if (perf_session__preprocess_sample(session, event, &sample))
return 0;
- if (ops->ordered_samples) {
+ if (tool->ordered_samples) {
ret = perf_session_queue_event(session, event, &sample,
file_offset);
if (ret != -ETIME)
return ret;
}
- return perf_session_deliver_event(session, event, &sample, ops,
+ return perf_session_deliver_event(session, event, &sample, tool,
file_offset);
}
@@ -884,6 +915,11 @@ void perf_event_header__bswap(struct perf_event_header *self)
self->size = bswap_16(self->size);
}
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
+{
+ return machine__findnew_thread(&session->host_machine, pid);
+}
+
static struct thread *perf_session__register_idle_thread(struct perf_session *self)
{
struct thread *thread = perf_session__findnew(self, 0);
@@ -897,9 +933,9 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
}
static void perf_session__warn_about_errors(const struct perf_session *session,
- const struct perf_event_ops *ops)
+ const struct perf_tool *tool)
{
- if (ops->lost == perf_event__process_lost &&
+ if (tool->lost == perf_event__process_lost &&
session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) {
ui__warning("Processed %d events and lost %d chunks!\n\n"
"Check IO/CPU overload!\n\n",
@@ -934,7 +970,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
volatile int session_done;
static int __perf_session__process_pipe_events(struct perf_session *self,
- struct perf_event_ops *ops)
+ struct perf_tool *tool)
{
union perf_event event;
uint32_t size;
@@ -943,7 +979,7 @@ static int __perf_session__process_pipe_events(struct perf_session *self,
int err;
void *p;
- perf_event_ops__fill_defaults(ops);
+ perf_tool__fill_defaults(tool);
head = 0;
more:
@@ -979,8 +1015,7 @@ more:
}
}
- if (size == 0 ||
- (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
+ if ((skip = perf_session__process_event(self, &event, tool, head)) < 0) {
dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
head, event.header.size, event.header.type);
/*
@@ -1003,7 +1038,7 @@ more:
done:
err = 0;
out_err:
- perf_session__warn_about_errors(self, ops);
+ perf_session__warn_about_errors(self, tool);
perf_session_free_sample_buffers(self);
return err;
}
@@ -1034,7 +1069,7 @@ fetch_mmaped_event(struct perf_session *session,
int __perf_session__process_events(struct perf_session *session,
u64 data_offset, u64 data_size,
- u64 file_size, struct perf_event_ops *ops)
+ u64 file_size, struct perf_tool *tool)
{
u64 head, page_offset, file_offset, file_pos, progress_next;
int err, mmap_prot, mmap_flags, map_idx = 0;
@@ -1043,7 +1078,7 @@ int __perf_session__process_events(struct perf_session *session,
union perf_event *event;
uint32_t size;
- perf_event_ops__fill_defaults(ops);
+ perf_tool__fill_defaults(tool);
page_size = sysconf(_SC_PAGESIZE);
@@ -1098,7 +1133,7 @@ more:
size = event->header.size;
if (size == 0 ||
- perf_session__process_event(session, event, ops, file_pos) < 0) {
+ perf_session__process_event(session, event, tool, file_pos) < 0) {
dump_printf("%#" PRIx64 " [%#x]: skipping unknown header type: %d\n",
file_offset + head, event->header.size,
event->header.type);
@@ -1127,15 +1162,15 @@ more:
err = 0;
/* do the final flush for ordered samples */
session->ordered_samples.next_flush = ULLONG_MAX;
- flush_sample_queue(session, ops);
+ flush_sample_queue(session, tool);
out_err:
- perf_session__warn_about_errors(session, ops);
+ perf_session__warn_about_errors(session, tool);
perf_session_free_sample_buffers(session);
return err;
}
int perf_session__process_events(struct perf_session *self,
- struct perf_event_ops *ops)
+ struct perf_tool *tool)
{
int err;
@@ -1146,9 +1181,9 @@ int perf_session__process_events(struct perf_session *self,
err = __perf_session__process_events(self,
self->header.data_offset,
self->header.data_size,
- self->size, ops);
+ self->size, tool);
else
- err = __perf_session__process_pipe_events(self, ops);
+ err = __perf_session__process_pipe_events(self, tool);
return err;
}
@@ -1163,9 +1198,8 @@ bool perf_session__has_traces(struct perf_session *self, const char *msg)
return true;
}
-int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
- const char *symbol_name,
- u64 addr)
+int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
+ const char *symbol_name, u64 addr)
{
char *bracket;
enum map_type i;
@@ -1224,6 +1258,27 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
return ret;
}
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
+{
+ /*
+ * FIXME: Here we have to actually print all the machines in this
+ * session, not just the host...
+ */
+ return machine__fprintf(&session->host_machine, fp);
+}
+
+void perf_session__remove_thread(struct perf_session *session,
+ struct thread *th)
+{
+ /*
+ * FIXME: This one makes no sense, we need to remove the thread from
+ * the machine it belongs to, perf_session can have many machines, so
+ * doing it always on ->host_machine is wrong. Fix when auditing all
+ * the 'perf kvm' code.
+ */
+ machine__remove_thread(&session->host_machine, th);
+}
+
struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
unsigned int type)
{
@@ -1236,17 +1291,16 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
return NULL;
}
-void perf_session__print_ip(union perf_event *event,
- struct perf_sample *sample,
- struct perf_session *session,
- int print_sym, int print_dso)
+void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
+ struct machine *machine, struct perf_evsel *evsel,
+ int print_sym, int print_dso)
{
struct addr_location al;
const char *symname, *dsoname;
- struct callchain_cursor *cursor = &session->callchain_cursor;
+ struct callchain_cursor *cursor = &evsel->hists.callchain_cursor;
struct callchain_cursor_node *node;
- if (perf_event__preprocess_sample(event, session, &al, sample,
+ if (perf_event__preprocess_sample(event, machine, &al, sample,
NULL) < 0) {
error("problem processing %d event, skipping it.\n",
event->header.type);
@@ -1255,7 +1309,7 @@ void perf_session__print_ip(union perf_event *event,
if (symbol_conf.use_callchain && sample->callchain) {
- if (perf_session__resolve_callchain(session, al.thread,
+ if (machine__resolve_callchain(machine, evsel, al.thread,
sample->callchain, NULL) != 0) {
if (verbose)
error("Failed to resolve callchain. Skipping\n");
@@ -1333,6 +1387,10 @@ int perf_session__cpu_bitmap(struct perf_session *session,
}
map = cpu_map__new(cpu_list);
+ if (map == NULL) {
+ pr_err("Invalid cpu_list\n");
+ return -1;
+ }
for (i = 0; i < map->nr; i++) {
int cpu = map->map[i];
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 6e393c98eb3..37bc38381fb 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -30,9 +30,6 @@ struct perf_session {
struct perf_header header;
unsigned long size;
unsigned long mmap_window;
- struct rb_root threads;
- struct list_head dead_threads;
- struct thread *last_match;
struct machine host_machine;
struct rb_root machines;
struct perf_evlist *evlist;
@@ -53,65 +50,31 @@ struct perf_session {
int cwdlen;
char *cwd;
struct ordered_samples ordered_samples;
- struct callchain_cursor callchain_cursor;
- char filename[0];
+ char filename[1];
};
-struct perf_evsel;
-struct perf_event_ops;
-
-typedef int (*event_sample)(union perf_event *event, struct perf_sample *sample,
- struct perf_evsel *evsel, struct perf_session *session);
-typedef int (*event_op)(union perf_event *self, struct perf_sample *sample,
- struct perf_session *session);
-typedef int (*event_synth_op)(union perf_event *self,
- struct perf_session *session);
-typedef int (*event_op2)(union perf_event *self, struct perf_session *session,
- struct perf_event_ops *ops);
-
-struct perf_event_ops {
- event_sample sample;
- event_op mmap,
- comm,
- fork,
- exit,
- lost,
- read,
- throttle,
- unthrottle;
- event_synth_op attr,
- event_type,
- tracing_data,
- build_id;
- event_op2 finished_round;
- bool ordered_samples;
- bool ordering_requires_timestamps;
-};
+struct perf_tool;
struct perf_session *perf_session__new(const char *filename, int mode,
bool force, bool repipe,
- struct perf_event_ops *ops);
+ struct perf_tool *tool);
void perf_session__delete(struct perf_session *self);
void perf_event_header__bswap(struct perf_event_header *self);
int __perf_session__process_events(struct perf_session *self,
u64 data_offset, u64 data_size, u64 size,
- struct perf_event_ops *ops);
+ struct perf_tool *tool);
int perf_session__process_events(struct perf_session *self,
- struct perf_event_ops *event_ops);
+ struct perf_tool *tool);
-int perf_session__resolve_callchain(struct perf_session *self,
+int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel *evsel,
struct thread *thread,
struct ip_callchain *chain,
struct symbol **parent);
bool perf_session__has_traces(struct perf_session *self, const char *msg);
-int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
- const char *symbol_name,
- u64 addr);
-
void mem_bswap_64(void *src, int byte_size);
void perf_event__attr_swap(struct perf_event_attr *attr);
@@ -144,12 +107,16 @@ struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t p
static inline
void perf_session__process_machines(struct perf_session *self,
+ struct perf_tool *tool,
machine__process_t process)
{
- process(&self->host_machine, self);
- return machines__process(&self->machines, process, self);
+ process(&self->host_machine, tool);
+ return machines__process(&self->machines, process, tool);
}
+struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
+size_t perf_session__fprintf(struct perf_session *self, FILE *fp);
+
size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp);
size_t perf_session__fprintf_dsos_buildid(struct perf_session *self,
@@ -167,13 +134,20 @@ static inline int perf_session__parse_sample(struct perf_session *session,
session->header.needs_swap);
}
+static inline int perf_session__synthesize_sample(struct perf_session *session,
+ union perf_event *event,
+ const struct perf_sample *sample)
+{
+ return perf_event__synthesize_sample(event, session->sample_type,
+ sample, session->header.needs_swap);
+}
+
struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
unsigned int type);
-void perf_session__print_ip(union perf_event *event,
- struct perf_sample *sample,
- struct perf_session *session,
- int print_sym, int print_dso);
+void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
+ struct machine *machine, struct perf_evsel *evsel,
+ int print_sym, int print_dso);
int perf_session__cpu_bitmap(struct perf_session *session,
const char *cpu_list, unsigned long *cpu_bitmap);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 95d37007492..36d4c561957 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -27,7 +27,8 @@ build_tmp = getenv('PYTHON_EXTBUILD_TMP')
perf = Extension('perf',
sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
- 'util/util.c', 'util/xyarray.c', 'util/cgroup.c'],
+ 'util/util.c', 'util/xyarray.c', 'util/cgroup.c',
+ 'util/debugfs.c'],
include_dirs = ['util/include'],
extra_compile_args = cflags,
)
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 632b50c7bc2..215d50f2042 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1757,7 +1757,7 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg,
struct stat st;
/*sshfs might return bad dent->d_type, so we have to stat*/
- sprintf(path, "%s/%s", dir_name, dent->d_name);
+ snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
if (stat(path, &st))
continue;
@@ -1766,8 +1766,6 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg,
!strcmp(dent->d_name, ".."))
continue;
- snprintf(path, sizeof(path), "%s/%s",
- dir_name, dent->d_name);
ret = map_groups__set_modules_path_dir(mg, path);
if (ret < 0)
goto out;
@@ -1788,9 +1786,6 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg,
if (map == NULL)
continue;
- snprintf(path, sizeof(path), "%s/%s",
- dir_name, dent->d_name);
-
long_name = strdup(path);
if (long_name == NULL) {
ret = -1;
@@ -2609,10 +2604,10 @@ int symbol__init(void)
symbol_conf.initialized = true;
return 0;
-out_free_dso_list:
- strlist__delete(symbol_conf.dso_list);
out_free_comm_list:
strlist__delete(symbol_conf.comm_list);
+out_free_dso_list:
+ strlist__delete(symbol_conf.dso_list);
return -1;
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 29f8d742e92..123c2e14353 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -68,6 +68,7 @@ struct strlist;
struct symbol_conf {
unsigned short priv_size;
+ unsigned short nr_events;
bool try_vmlinux_path,
use_modules,
sort_by_name,
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index d5d3b22250f..fb4b7ea6752 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -61,7 +61,7 @@ static size_t thread__fprintf(struct thread *self, FILE *fp)
map_groups__fprintf(&self->mg, verbose, fp);
}
-struct thread *perf_session__findnew(struct perf_session *self, pid_t pid)
+struct thread *machine__findnew_thread(struct machine *self, pid_t pid)
{
struct rb_node **p = &self->threads.rb_node;
struct rb_node *parent = NULL;
@@ -125,12 +125,12 @@ int thread__fork(struct thread *self, struct thread *parent)
return 0;
}
-size_t perf_session__fprintf(struct perf_session *self, FILE *fp)
+size_t machine__fprintf(struct machine *machine, FILE *fp)
{
size_t ret = 0;
struct rb_node *nd;
- for (nd = rb_first(&self->threads); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
struct thread *pos = rb_entry(nd, struct thread, rb_node);
ret += thread__fprintf(pos, fp);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index e5f2401c1b5..70c2c13ff67 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,16 +18,14 @@ struct thread {
int comm_len;
};
-struct perf_session;
+struct machine;
void thread__delete(struct thread *self);
int thread__set_comm(struct thread *self, const char *comm);
int thread__comm_len(struct thread *self);
-struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
void thread__insert_map(struct thread *self, struct map *map);
int thread__fork(struct thread *self, struct thread *parent);
-size_t perf_session__fprintf(struct perf_session *self, FILE *fp);
static inline struct map *thread__find_map(struct thread *self,
enum map_type type, u64 addr)
@@ -35,14 +33,12 @@ static inline struct map *thread__find_map(struct thread *self,
return self ? map_groups__find(&self->mg, type, addr) : NULL;
}
-void thread__find_addr_map(struct thread *self,
- struct perf_session *session, u8 cpumode,
- enum map_type type, pid_t pid, u64 addr,
+void thread__find_addr_map(struct thread *thread, struct machine *machine,
+ u8 cpumode, enum map_type type, u64 addr,
struct addr_location *al);
-void thread__find_addr_location(struct thread *self,
- struct perf_session *session, u8 cpumode,
- enum map_type type, pid_t pid, u64 addr,
+void thread__find_addr_location(struct thread *thread, struct machine *machine,
+ u8 cpumode, enum map_type type, u64 addr,
struct addr_location *al,
symbol_filter_t filter);
#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
new file mode 100644
index 00000000000..b0e1aadba8d
--- /dev/null
+++ b/tools/perf/util/tool.h
@@ -0,0 +1,50 @@
+#ifndef __PERF_TOOL_H
+#define __PERF_TOOL_H
+
+#include <stdbool.h>
+
+struct perf_session;
+union perf_event;
+struct perf_evlist;
+struct perf_evsel;
+struct perf_sample;
+struct perf_tool;
+struct machine;
+
+typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel, struct machine *machine);
+
+typedef int (*event_op)(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine);
+
+typedef int (*event_attr_op)(union perf_event *event,
+ struct perf_evlist **pevlist);
+typedef int (*event_simple_op)(struct perf_tool *tool, union perf_event *event);
+
+typedef int (*event_synth_op)(union perf_event *event,
+ struct perf_session *session);
+
+typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
+ struct perf_session *session);
+
+struct perf_tool {
+ event_sample sample,
+ read;
+ event_op mmap,
+ comm,
+ fork,
+ exit,
+ lost,
+ throttle,
+ unthrottle;
+ event_attr_op attr;
+ event_synth_op tracing_data;
+ event_simple_op event_type;
+ event_op2 finished_round,
+ build_id;
+ bool ordered_samples;
+ bool ordering_requires_timestamps;
+};
+
+#endif /* __PERF_TOOL_H */
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 39965096795..a248f3c2c60 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -1,15 +1,17 @@
#ifndef __PERF_TOP_H
#define __PERF_TOP_H 1
+#include "tool.h"
#include "types.h"
-#include "../perf.h"
#include <stddef.h>
+#include <stdbool.h>
struct perf_evlist;
struct perf_evsel;
struct perf_session;
struct perf_top {
+ struct perf_tool tool;
struct perf_evlist *evlist;
/*
* Symbols will be added here in perf_event__process_sample and will
@@ -23,10 +25,26 @@ struct perf_top {
int freq;
pid_t target_pid, target_tid;
bool hide_kernel_symbols, hide_user_symbols, zero;
+ bool system_wide;
+ bool use_tui, use_stdio;
+ bool sort_has_symbols;
+ bool dont_use_callchains;
+ bool kptr_restrict_warned;
+ bool vmlinux_warned;
+ bool inherit;
+ bool group;
+ bool sample_id_all_avail;
+ bool dump_symtab;
const char *cpu_list;
struct hist_entry *sym_filter_entry;
struct perf_evsel *sym_evsel;
struct perf_session *session;
+ struct winsize winsize;
+ unsigned int mmap_pages;
+ int default_interval;
+ int realtime_prio;
+ int sym_pcnt_filter;
+ const char *sym_filter;
};
size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index d2655f08bcc..ac6830d8292 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -18,7 +18,8 @@
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-#define _GNU_SOURCE
+#include <ctype.h>
+#include "util.h"
#include <dirent.h>
#include <mntent.h>
#include <stdio.h>
@@ -31,7 +32,6 @@
#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
-#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
#include <linux/list.h>
@@ -44,10 +44,6 @@
#define VERSION "0.5"
-#define _STR(x) #x
-#define STR(x) _STR(x)
-#define MAX_PATH 256
-
#define TRACE_CTRL "tracing_on"
#define TRACE "trace"
#define AVAILABLE "available_tracers"
@@ -73,26 +69,6 @@ struct events {
};
-
-static void die(const char *fmt, ...)
-{
- va_list ap;
- int ret = errno;
-
- if (errno)
- perror("perf");
- else
- ret = -1;
-
- va_start(ap, fmt);
- fprintf(stderr, " ");
- vfprintf(stderr, fmt, ap);
- va_end(ap);
-
- fprintf(stderr, "\n");
- exit(ret);
-}
-
void *malloc_or_die(unsigned int size)
{
void *data;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 0a7ed5b5e28..6c164dc9ee9 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -1537,6 +1537,8 @@ process_flags(struct event *event, struct print_arg *arg, char **tok)
field = malloc_or_die(sizeof(*field));
type = process_arg(event, field, &token);
+ while (type == EVENT_OP)
+ type = process_op(event, field, &token);
if (test_type_token(type, token, EVENT_DELIM, ","))
goto out_free;
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index c9dcbec7d80..a3fdf55f317 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -39,7 +39,7 @@ static int stop_script_unsupported(void)
static void process_event_unsupported(union perf_event *event __unused,
struct perf_sample *sample __unused,
struct perf_evsel *evsel __unused,
- struct perf_session *session __unused,
+ struct machine *machine __unused,
struct thread *thread __unused)
{
}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index a8410081764..58ae14c5baa 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -3,7 +3,11 @@
#include <stdbool.h>
#include "parse-events.h"
-#include "session.h"
+
+struct machine;
+struct perf_sample;
+union perf_event;
+struct thread;
#define __unused __attribute__((unused))
@@ -292,7 +296,7 @@ struct scripting_ops {
void (*process_event) (union perf_event *event,
struct perf_sample *sample,
struct perf_evsel *evsel,
- struct perf_session *session,
+ struct machine *machine,
struct thread *thread);
int (*generate_script) (const char *outfile);
};
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index 0575905d120..295a9c93f94 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -224,7 +224,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser)
}
static int annotate_browser__run(struct annotate_browser *self, int evidx,
- int nr_events, void(*timer)(void *arg),
+ void(*timer)(void *arg),
void *arg, int delay_secs)
{
struct rb_node *nd = NULL;
@@ -328,8 +328,7 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
notes = symbol__annotation(target);
pthread_mutex_lock(&notes->lock);
- if (notes->src == NULL &&
- symbol__alloc_hist(target, nr_events) < 0) {
+ if (notes->src == NULL && symbol__alloc_hist(target) < 0) {
pthread_mutex_unlock(&notes->lock);
ui__warning("Not enough memory for annotating '%s' symbol!\n",
target->name);
@@ -337,7 +336,7 @@ static int annotate_browser__run(struct annotate_browser *self, int evidx,
}
pthread_mutex_unlock(&notes->lock);
- symbol__tui_annotate(target, ms->map, evidx, nr_events,
+ symbol__tui_annotate(target, ms->map, evidx,
timer, arg, delay_secs);
}
continue;
@@ -358,15 +357,15 @@ out:
return key;
}
-int hist_entry__tui_annotate(struct hist_entry *he, int evidx, int nr_events,
+int hist_entry__tui_annotate(struct hist_entry *he, int evidx,
void(*timer)(void *arg), void *arg, int delay_secs)
{
- return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, nr_events,
+ return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx,
timer, arg, delay_secs);
}
int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
- int nr_events, void(*timer)(void *arg), void *arg,
+ void(*timer)(void *arg), void *arg,
int delay_secs)
{
struct objdump_line *pos, *n;
@@ -419,8 +418,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
browser.b.nr_entries = browser.nr_entries;
browser.b.entries = &notes->src->source,
browser.b.width += 18; /* Percentage */
- ret = annotate_browser__run(&browser, evidx, nr_events,
- timer, arg, delay_secs);
+ ret = annotate_browser__run(&browser, evidx, timer, arg, delay_secs);
list_for_each_entry_safe(pos, n, &notes->src->source, node) {
list_del(&pos->node);
objdump_line__free(pos);
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index d0c94b45968..1212a386a03 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -1020,7 +1020,7 @@ do_annotate:
* Don't let this be freed, say, by hists__decay_entry.
*/
he->used = true;
- err = hist_entry__tui_annotate(he, evsel->idx, nr_events,
+ err = hist_entry__tui_annotate(he, evsel->idx,
timer, arg, delay_secs);
he->used = false;
ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
diff --git a/tools/perf/util/ui/progress.c b/tools/perf/util/ui/progress.c
index 295e366b631..13aa64e50e1 100644
--- a/tools/perf/util/ui/progress.c
+++ b/tools/perf/util/ui/progress.c
@@ -14,6 +14,9 @@ void ui_progress__update(u64 curr, u64 total, const char *title)
if (use_browser <= 0)
return;
+ if (total == 0)
+ return;
+
ui__refresh_dimensions(true);
pthread_mutex_lock(&ui__lock);
y = SLtt_Screen_Rows / 2 - 2;
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index e16bf9a707e..d76d1c0ff98 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -1,5 +1,8 @@
/*
- * GIT - The information manager from hell
+ * usage.c
+ *
+ * Various reporting routines.
+ * Originally copied from GIT source.
*
* Copyright (C) Linus Torvalds, 2005
*/
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 0128906bac8..37be34dff79 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -245,4 +245,15 @@ int readn(int fd, void *buf, size_t size);
#define _STR(x) #x
#define STR(x) _STR(x)
+/*
+ * Determine whether some value is a power of two, where zero is
+ * *not* considered a power of two.
+ */
+
+static inline __attribute__((const))
+bool is_power_of_2(unsigned long n)
+{
+ return (n != 0 && ((n & (n - 1)) == 0));
+}
+
#endif
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
index bdd33470b23..697c8b4e59c 100644
--- a/tools/perf/util/values.c
+++ b/tools/perf/util/values.c
@@ -32,6 +32,7 @@ void perf_read_values_destroy(struct perf_read_values *values)
for (i = 0; i < values->threads; i++)
free(values->value[i]);
+ free(values->value);
free(values->pid);
free(values->tid);
free(values->counterrawid);
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index ff75125deed..555c69a5592 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -38,8 +38,8 @@ displays the statistics gathered since it was forked.
.PP
.SH FIELD DESCRIPTIONS
.nf
-\fBpkg\fP processor package number.
-\fBcore\fP processor core number.
+\fBpk\fP processor package number.
+\fBcr\fP processor core number.
\fBCPU\fP Linux CPU (logical processor) number.
\fB%c0\fP percent of the interval that the CPU retired instructions.
\fBGHz\fP average clock rate while the CPU was in c0 state.
@@ -58,7 +58,7 @@ Subsequent rows show per-CPU statistics.
.nf
[root@x980]# ./turbostat
-core CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
+cr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07
0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07
0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07
@@ -102,7 +102,7 @@ until ^C while the other CPUs are mostly idle:
.nf
[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null
-^Ccore CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
+^Ccr CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00
0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00
0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8d02ccb10c5..8b4c2535b26 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -42,6 +42,7 @@ $default{"BISECT_MANUAL"} = 0;
$default{"BISECT_SKIP"} = 1;
$default{"SUCCESS_LINE"} = "login:";
$default{"DETECT_TRIPLE_FAULT"} = 1;
+$default{"NO_INSTALL"} = 0;
$default{"BOOTED_TIMEOUT"} = 1;
$default{"DIE_ON_FAILURE"} = 1;
$default{"SSH_EXEC"} = "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND";
@@ -84,6 +85,7 @@ my $grub_number;
my $target;
my $make;
my $post_install;
+my $no_install;
my $noclean;
my $minconfig;
my $start_minconfig;
@@ -115,6 +117,7 @@ my $timeout;
my $booted_timeout;
my $detect_triplefault;
my $console;
+my $reboot_success_line;
my $success_line;
my $stop_after_success;
my $stop_after_failure;
@@ -130,6 +133,12 @@ my %config_help;
my %variable;
my %force_config;
+# do not force reboots on config problems
+my $no_reboot = 1;
+
+# default variables that can be used
+chomp ($variable{"PWD"} = `pwd`);
+
$config_help{"MACHINE"} = << "EOF"
The machine hostname that you will test.
EOF
@@ -241,6 +250,7 @@ sub read_yn {
sub get_ktest_config {
my ($config) = @_;
+ my $ans;
return if (defined($opt{$config}));
@@ -254,16 +264,17 @@ sub get_ktest_config {
if (defined($default{$config})) {
print "\[$default{$config}\] ";
}
- $entered_configs{$config} = <STDIN>;
- $entered_configs{$config} =~ s/^\s*(.*\S)\s*$/$1/;
- if ($entered_configs{$config} =~ /^\s*$/) {
+ $ans = <STDIN>;
+ $ans =~ s/^\s*(.*\S)\s*$/$1/;
+ if ($ans =~ /^\s*$/) {
if ($default{$config}) {
- $entered_configs{$config} = $default{$config};
+ $ans = $default{$config};
} else {
print "Your answer can not be blank\n";
next;
}
}
+ $entered_configs{$config} = process_variables($ans);
last;
}
}
@@ -298,7 +309,7 @@ sub get_ktest_configs {
}
sub process_variables {
- my ($value) = @_;
+ my ($value, $remove_undef) = @_;
my $retval = "";
# We want to check for '\', and it is just easier
@@ -316,6 +327,10 @@ sub process_variables {
$retval = "$retval$begin";
if (defined($variable{$var})) {
$retval = "$retval$variable{$var}";
+ } elsif (defined($remove_undef) && $remove_undef) {
+ # for if statements, any variable that is not defined,
+ # we simple convert to 0
+ $retval = "${retval}0";
} else {
# put back the origin piece.
$retval = "$retval\$\{$var\}";
@@ -331,10 +346,17 @@ sub process_variables {
}
sub set_value {
- my ($lvalue, $rvalue) = @_;
+ my ($lvalue, $rvalue, $override, $overrides, $name) = @_;
if (defined($opt{$lvalue})) {
- die "Error: Option $lvalue defined more than once!\n";
+ if (!$override || defined(${$overrides}{$lvalue})) {
+ my $extra = "";
+ if ($override) {
+ $extra = "In the same override section!\n";
+ }
+ die "$name: $.: Option $lvalue defined more than once!\n$extra";
+ }
+ ${$overrides}{$lvalue} = $rvalue;
}
if ($rvalue =~ /^\s*$/) {
delete $opt{$lvalue};
@@ -355,86 +377,274 @@ sub set_variable {
}
}
-sub read_config {
- my ($config) = @_;
+sub process_compare {
+ my ($lval, $cmp, $rval) = @_;
+
+ # remove whitespace
+
+ $lval =~ s/^\s*//;
+ $lval =~ s/\s*$//;
+
+ $rval =~ s/^\s*//;
+ $rval =~ s/\s*$//;
+
+ if ($cmp eq "==") {
+ return $lval eq $rval;
+ } elsif ($cmp eq "!=") {
+ return $lval ne $rval;
+ }
+
+ my $statement = "$lval $cmp $rval";
+ my $ret = eval $statement;
+
+ # $@ stores error of eval
+ if ($@) {
+ return -1;
+ }
+
+ return $ret;
+}
+
+sub value_defined {
+ my ($val) = @_;
+
+ return defined($variable{$2}) ||
+ defined($opt{$2});
+}
+
+my $d = 0;
+sub process_expression {
+ my ($name, $val) = @_;
+
+ my $c = $d++;
+
+ while ($val =~ s/\(([^\(]*?)\)/\&\&\&\&VAL\&\&\&\&/) {
+ my $express = $1;
+
+ if (process_expression($name, $express)) {
+ $val =~ s/\&\&\&\&VAL\&\&\&\&/ 1 /;
+ } else {
+ $val =~ s/\&\&\&\&VAL\&\&\&\&/ 0 /;
+ }
+ }
+
+ $d--;
+ my $OR = "\\|\\|";
+ my $AND = "\\&\\&";
+
+ while ($val =~ s/^(.*?)($OR|$AND)//) {
+ my $express = $1;
+ my $op = $2;
+
+ if (process_expression($name, $express)) {
+ if ($op eq "||") {
+ return 1;
+ }
+ } else {
+ if ($op eq "&&") {
+ return 0;
+ }
+ }
+ }
+
+ if ($val =~ /(.*)(==|\!=|>=|<=|>|<)(.*)/) {
+ my $ret = process_compare($1, $2, $3);
+ if ($ret < 0) {
+ die "$name: $.: Unable to process comparison\n";
+ }
+ return $ret;
+ }
+
+ if ($val =~ /^\s*(NOT\s*)?DEFINED\s+(\S+)\s*$/) {
+ if (defined $1) {
+ return !value_defined($2);
+ } else {
+ return value_defined($2);
+ }
+ }
+
+ if ($val =~ /^\s*0\s*$/) {
+ return 0;
+ } elsif ($val =~ /^\s*\d+\s*$/) {
+ return 1;
+ }
+
+ die ("$name: $.: Undefined content $val in if statement\n");
+}
+
+sub process_if {
+ my ($name, $value) = @_;
- open(IN, $config) || die "can't read file $config";
+ # Convert variables and replace undefined ones with 0
+ my $val = process_variables($value, 1);
+ my $ret = process_expression $name, $val;
+
+ return $ret;
+}
+
+sub __read_config {
+ my ($config, $current_test_num) = @_;
+
+ my $in;
+ open($in, $config) || die "can't read file $config";
my $name = $config;
$name =~ s,.*/(.*),$1,;
- my $test_num = 0;
+ my $test_num = $$current_test_num;
my $default = 1;
my $repeat = 1;
my $num_tests_set = 0;
my $skip = 0;
my $rest;
+ my $line;
my $test_case = 0;
+ my $if = 0;
+ my $if_set = 0;
+ my $override = 0;
- while (<IN>) {
+ my %overrides;
+
+ while (<$in>) {
# ignore blank lines and comments
next if (/^\s*$/ || /\s*\#/);
- if (/^\s*TEST_START(.*)/) {
+ if (/^\s*(TEST_START|DEFAULTS)\b(.*)/) {
- $rest = $1;
+ my $type = $1;
+ $rest = $2;
+ $line = $2;
- if ($num_tests_set) {
- die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
- }
+ my $old_test_num;
+ my $old_repeat;
+ $override = 0;
+
+ if ($type eq "TEST_START") {
- my $old_test_num = $test_num;
- my $old_repeat = $repeat;
+ if ($num_tests_set) {
+ die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+ }
- $test_num += $repeat;
- $default = 0;
- $repeat = 1;
+ $old_test_num = $test_num;
+ $old_repeat = $repeat;
- if ($rest =~ /\s+SKIP(.*)/) {
- $rest = $1;
+ $test_num += $repeat;
+ $default = 0;
+ $repeat = 1;
+ } else {
+ $default = 1;
+ }
+
+ # If SKIP is anywhere in the line, the command will be skipped
+ if ($rest =~ s/\s+SKIP\b//) {
$skip = 1;
} else {
$test_case = 1;
$skip = 0;
}
- if ($rest =~ /\s+ITERATE\s+(\d+)(.*)$/) {
- $repeat = $1;
- $rest = $2;
- $repeat_tests{"$test_num"} = $repeat;
+ if ($rest =~ s/\sELSE\b//) {
+ if (!$if) {
+ die "$name: $.: ELSE found with out matching IF section\n$_";
+ }
+ $if = 0;
+
+ if ($if_set) {
+ $skip = 1;
+ } else {
+ $skip = 0;
+ }
}
- if ($rest =~ /\s+SKIP(.*)/) {
- $rest = $1;
- $skip = 1;
+ if ($rest =~ s/\sIF\s+(.*)//) {
+ if (process_if($name, $1)) {
+ $if_set = 1;
+ } else {
+ $skip = 1;
+ }
+ $if = 1;
+ } else {
+ $if = 0;
+ $if_set = 0;
}
- if ($rest !~ /^\s*$/) {
- die "$name: $.: Gargbage found after TEST_START\n$_";
+ if (!$skip) {
+ if ($type eq "TEST_START") {
+ if ($rest =~ s/\s+ITERATE\s+(\d+)//) {
+ $repeat = $1;
+ $repeat_tests{"$test_num"} = $repeat;
+ }
+ } elsif ($rest =~ s/\sOVERRIDE\b//) {
+ # DEFAULT only
+ $override = 1;
+ # Clear previous overrides
+ %overrides = ();
+ }
+ }
+
+ if (!$skip && $rest !~ /^\s*$/) {
+ die "$name: $.: Gargbage found after $type\n$_";
}
- if ($skip) {
+ if ($skip && $type eq "TEST_START") {
$test_num = $old_test_num;
$repeat = $old_repeat;
}
- } elsif (/^\s*DEFAULTS(.*)$/) {
- $default = 1;
-
+ } elsif (/^\s*ELSE\b(.*)$/) {
+ if (!$if) {
+ die "$name: $.: ELSE found with out matching IF section\n$_";
+ }
$rest = $1;
-
- if ($rest =~ /\s+SKIP(.*)/) {
- $rest = $1;
+ if ($if_set) {
$skip = 1;
+ $rest = "";
} else {
$skip = 0;
+
+ if ($rest =~ /\sIF\s+(.*)/) {
+ # May be a ELSE IF section.
+ if (!process_if($name, $1)) {
+ $skip = 1;
+ }
+ $rest = "";
+ } else {
+ $if = 0;
+ }
}
if ($rest !~ /^\s*$/) {
die "$name: $.: Gargbage found after DEFAULTS\n$_";
}
+ } elsif (/^\s*INCLUDE\s+(\S+)/) {
+
+ next if ($skip);
+
+ if (!$default) {
+ die "$name: $.: INCLUDE can only be done in default sections\n$_";
+ }
+
+ my $file = process_variables($1);
+
+ if ($file !~ m,^/,) {
+ # check the path of the config file first
+ if ($config =~ m,(.*)/,) {
+ if (-f "$1/$file") {
+ $file = "$1/$file";
+ }
+ }
+ }
+
+ if ( ! -r $file ) {
+ die "$name: $.: Can't read file $file\n$_";
+ }
+
+ if (__read_config($file, \$test_num)) {
+ $test_case = 1;
+ }
+
} elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
next if ($skip);
@@ -460,10 +670,10 @@ sub read_config {
}
if ($default || $lvalue =~ /\[\d+\]$/) {
- set_value($lvalue, $rvalue);
+ set_value($lvalue, $rvalue, $override, \%overrides, $name);
} else {
my $val = "$lvalue\[$test_num\]";
- set_value($val, $rvalue);
+ set_value($val, $rvalue, $override, \%overrides, $name);
if ($repeat > 1) {
$repeats{$val} = $repeat;
@@ -490,13 +700,26 @@ sub read_config {
}
}
- close(IN);
-
if ($test_num) {
$test_num += $repeat - 1;
$opt{"NUM_TESTS"} = $test_num;
}
+ close($in);
+
+ $$current_test_num = $test_num;
+
+ return $test_case;
+}
+
+sub read_config {
+ my ($config) = @_;
+
+ my $test_case;
+ my $test_num = 0;
+
+ $test_case = __read_config $config, \$test_num;
+
# make sure we have all mandatory configs
get_ktest_configs;
@@ -524,6 +747,18 @@ sub __eval_option {
# Add space to evaluate the character before $
$option = " $option";
my $retval = "";
+ my $repeated = 0;
+ my $parent = 0;
+
+ foreach my $test (keys %repeat_tests) {
+ if ($i >= $test &&
+ $i < $test + $repeat_tests{$test}) {
+
+ $repeated = 1;
+ $parent = $test;
+ last;
+ }
+ }
while ($option =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
my $start = $1;
@@ -537,10 +772,14 @@ sub __eval_option {
# otherwise see if the default OPT (without [$i]) exists.
my $o = "$var\[$i\]";
+ my $parento = "$var\[$parent\]";
if (defined($opt{$o})) {
$o = $opt{$o};
$retval = "$retval$o";
+ } elsif ($repeated && defined($opt{$parento})) {
+ $o = $opt{$parento};
+ $retval = "$retval$o";
} elsif (defined($opt{$var})) {
$o = $opt{$var};
$retval = "$retval$o";
@@ -603,8 +842,20 @@ sub doprint {
}
sub run_command;
+sub start_monitor;
+sub end_monitor;
+sub wait_for_monitor;
sub reboot {
+ my ($time) = @_;
+
+ if (defined($time)) {
+ start_monitor;
+ # flush out current monitor
+ # May contain the reboot success line
+ wait_for_monitor 1;
+ }
+
# try to reboot normally
if (run_command $reboot) {
if (defined($powercycle_after_reboot)) {
@@ -615,12 +866,17 @@ sub reboot {
# nope? power cycle it.
run_command "$power_cycle";
}
+
+ if (defined($time)) {
+ wait_for_monitor($time, $reboot_success_line);
+ end_monitor;
+ }
}
sub do_not_reboot {
my $i = $iteration;
- return $test_type eq "build" ||
+ return $test_type eq "build" || $no_reboot ||
($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build");
}
@@ -693,16 +949,29 @@ sub end_monitor {
}
sub wait_for_monitor {
- my ($time) = @_;
+ my ($time, $stop) = @_;
+ my $full_line = "";
my $line;
+ my $booted = 0;
doprint "** Wait for monitor to settle down **\n";
# read the monitor and wait for the system to calm down
- do {
+ while (!$booted) {
$line = wait_for_input($monitor_fp, $time);
- print "$line" if (defined($line));
- } while (defined($line));
+ last if (!defined($line));
+ print "$line";
+ $full_line .= $line;
+
+ if (defined($stop) && $full_line =~ /$stop/) {
+ doprint "wait for monitor detected $stop\n";
+ $booted = 1;
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+ }
print "** Monitor flushed **\n";
}
@@ -719,10 +988,7 @@ sub fail {
# no need to reboot for just building.
if (!do_not_reboot) {
doprint "REBOOTING\n";
- reboot;
- start_monitor;
- wait_for_monitor $sleep_time;
- end_monitor;
+ reboot $sleep_time;
}
my $name = "";
@@ -854,9 +1120,12 @@ sub get_grub_index {
open(IN, "$ssh_grub |")
or die "unable to get menu.lst";
+ my $found = 0;
+
while (<IN>) {
if (/^\s*title\s+$grub_menu\s*$/) {
$grub_number++;
+ $found = 1;
last;
} elsif (/^\s*title\s/) {
$grub_number++;
@@ -865,7 +1134,7 @@ sub get_grub_index {
close(IN);
die "Could not find '$grub_menu' in /boot/grub/menu on $machine"
- if ($grub_number < 0);
+ if (!$found);
doprint "$grub_number\n";
}
@@ -902,7 +1171,8 @@ sub wait_for_input
sub reboot_to {
if ($reboot_type eq "grub") {
- run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch && reboot)'";
+ run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
+ reboot;
return;
}
@@ -1083,6 +1353,8 @@ sub do_post_install {
sub install {
+ return if ($no_install);
+
run_scp "$outputdir/$build_target", "$target_image" or
dodie "failed to copy image";
@@ -1140,6 +1412,11 @@ sub get_version {
}
sub start_monitor_and_boot {
+ # Make sure the stable kernel has finished booting
+ start_monitor;
+ wait_for_monitor 5;
+ end_monitor;
+
get_grub_index;
get_version;
install;
@@ -1250,6 +1527,10 @@ sub build {
unlink $buildlog;
+ # Failed builds should not reboot the target
+ my $save_no_reboot = $no_reboot;
+ $no_reboot = 1;
+
if (defined($pre_build)) {
my $ret = run_command $pre_build;
if (!$ret && defined($pre_build_die) &&
@@ -1272,15 +1553,15 @@ sub build {
# allow for empty configs
run_command "touch $output_config";
- run_command "mv $output_config $outputdir/config_temp" or
- dodie "moving .config";
+ if (!$noclean) {
+ run_command "mv $output_config $outputdir/config_temp" or
+ dodie "moving .config";
- if (!$noclean && !run_command "$make mrproper") {
- dodie "make mrproper";
- }
+ run_command "$make mrproper" or dodie "make mrproper";
- run_command "mv $outputdir/config_temp $output_config" or
- dodie "moving config_temp";
+ run_command "mv $outputdir/config_temp $output_config" or
+ dodie "moving config_temp";
+ }
} elsif (!$noclean) {
unlink "$output_config";
@@ -1318,10 +1599,15 @@ sub build {
if (!$build_ret) {
# bisect may need this to pass
- return 0 if ($in_bisect);
+ if ($in_bisect) {
+ $no_reboot = $save_no_reboot;
+ return 0;
+ }
fail "failed build" and return 0;
}
+ $no_reboot = $save_no_reboot;
+
return 1;
}
@@ -1356,10 +1642,7 @@ sub success {
if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
doprint "Reboot and wait $sleep_time seconds\n";
- reboot;
- start_monitor;
- wait_for_monitor $sleep_time;
- end_monitor;
+ reboot $sleep_time;
}
}
@@ -1500,10 +1783,7 @@ sub run_git_bisect {
sub bisect_reboot {
doprint "Reboot and sleep $bisect_sleep_time seconds\n";
- reboot;
- start_monitor;
- wait_for_monitor $bisect_sleep_time;
- end_monitor;
+ reboot $bisect_sleep_time;
}
# returns 1 on success, 0 on failure, -1 on skip
@@ -2066,10 +2346,7 @@ sub config_bisect {
sub patchcheck_reboot {
doprint "Reboot and sleep $patchcheck_sleep_time seconds\n";
- reboot;
- start_monitor;
- wait_for_monitor $patchcheck_sleep_time;
- end_monitor;
+ reboot $patchcheck_sleep_time;
}
sub patchcheck {
@@ -2178,12 +2455,31 @@ sub patchcheck {
}
my %depends;
+my %depcount;
my $iflevel = 0;
my @ifdeps;
# prevent recursion
my %read_kconfigs;
+sub add_dep {
+ # $config depends on $dep
+ my ($config, $dep) = @_;
+
+ if (defined($depends{$config})) {
+ $depends{$config} .= " " . $dep;
+ } else {
+ $depends{$config} = $dep;
+ }
+
+ # record the number of configs depending on $dep
+ if (defined $depcount{$dep}) {
+ $depcount{$dep}++;
+ } else {
+ $depcount{$dep} = 1;
+ }
+}
+
# taken from streamline_config.pl
sub read_kconfig {
my ($kconfig) = @_;
@@ -2230,30 +2526,19 @@ sub read_kconfig {
$config = $2;
for (my $i = 0; $i < $iflevel; $i++) {
- if ($i) {
- $depends{$config} .= " " . $ifdeps[$i];
- } else {
- $depends{$config} = $ifdeps[$i];
- }
- $state = "DEP";
+ add_dep $config, $ifdeps[$i];
}
# collect the depends for the config
} elsif ($state eq "NEW" && /^\s*depends\s+on\s+(.*)$/) {
- if (defined($depends{$1})) {
- $depends{$config} .= " " . $1;
- } else {
- $depends{$config} = $1;
- }
+ add_dep $config, $1;
# Get the configs that select this config
- } elsif ($state ne "NONE" && /^\s*select\s+(\S+)/) {
- if (defined($depends{$1})) {
- $depends{$1} .= " " . $config;
- } else {
- $depends{$1} = $config;
- }
+ } elsif ($state eq "NEW" && /^\s*select\s+(\S+)/) {
+
+ # selected by depends on config
+ add_dep $1, $config;
# Check for if statements
} elsif (/^if\s+(.*\S)\s*$/) {
@@ -2365,11 +2650,18 @@ sub make_new_config {
close OUT;
}
+sub chomp_config {
+ my ($config) = @_;
+
+ $config =~ s/CONFIG_//;
+
+ return $config;
+}
+
sub get_depends {
my ($dep) = @_;
- my $kconfig = $dep;
- $kconfig =~ s/CONFIG_//;
+ my $kconfig = chomp_config $dep;
$dep = $depends{"$kconfig"};
@@ -2419,8 +2711,7 @@ sub test_this_config {
return undef;
}
- my $kconfig = $config;
- $kconfig =~ s/CONFIG_//;
+ my $kconfig = chomp_config $config;
# Test dependencies first
if (defined($depends{"$kconfig"})) {
@@ -2510,6 +2801,14 @@ sub make_min_config {
my @config_keys = keys %min_configs;
+ # All configs need a depcount
+ foreach my $config (@config_keys) {
+ my $kconfig = chomp_config $config;
+ if (!defined $depcount{$kconfig}) {
+ $depcount{$kconfig} = 0;
+ }
+ }
+
# Remove anything that was set by the make allnoconfig
# we shouldn't need them as they get set for us anyway.
foreach my $config (@config_keys) {
@@ -2548,8 +2847,13 @@ sub make_min_config {
# Now disable each config one by one and do a make oldconfig
# till we find a config that changes our list.
- # Put configs that did not modify the config at the end.
my @test_configs = keys %min_configs;
+
+ # Sort keys by who is most dependent on
+ @test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} }
+ @test_configs ;
+
+ # Put configs that did not modify the config at the end.
my $reset = 1;
for (my $i = 0; $i < $#test_configs; $i++) {
if (!defined($nochange_config{$test_configs[0]})) {
@@ -2659,10 +2963,7 @@ sub make_min_config {
}
doprint "Reboot and wait $sleep_time seconds\n";
- reboot;
- start_monitor;
- wait_for_monitor $sleep_time;
- end_monitor;
+ reboot $sleep_time;
}
success $i;
@@ -2783,6 +3084,9 @@ sub set_test_option {
# First we need to do is the builds
for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
+ # Do not reboot on failing test options
+ $no_reboot = 1;
+
$iteration = $i;
my $makecmd = set_test_option("MAKE_CMD", $i);
@@ -2811,6 +3115,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
$reboot_type = set_test_option("REBOOT_TYPE", $i);
$grub_menu = set_test_option("GRUB_MENU", $i);
$post_install = set_test_option("POST_INSTALL", $i);
+ $no_install = set_test_option("NO_INSTALL", $i);
$reboot_script = set_test_option("REBOOT_SCRIPT", $i);
$reboot_on_error = set_test_option("REBOOT_ON_ERROR", $i);
$poweroff_on_error = set_test_option("POWEROFF_ON_ERROR", $i);
@@ -2832,6 +3137,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
$console = set_test_option("CONSOLE", $i);
$detect_triplefault = set_test_option("DETECT_TRIPLE_FAULT", $i);
$success_line = set_test_option("SUCCESS_LINE", $i);
+ $reboot_success_line = set_test_option("REBOOT_SUCCESS_LINE", $i);
$stop_after_success = set_test_option("STOP_AFTER_SUCCESS", $i);
$stop_after_failure = set_test_option("STOP_AFTER_FAILURE", $i);
$stop_test_after = set_test_option("STOP_TEST_AFTER", $i);
@@ -2850,9 +3156,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
chdir $builddir || die "can't change directory to $builddir";
- if (!-d $tmpdir) {
- mkpath($tmpdir) or
- die "can't create $tmpdir";
+ foreach my $dir ($tmpdir, $outputdir) {
+ if (!-d $dir) {
+ mkpath($dir) or
+ die "can't create $dir";
+ }
}
$ENV{"SSH_USER"} = $ssh_user;
@@ -2889,8 +3197,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
$run_type = "ERROR";
}
+ my $installme = "";
+ $installme = " no_install" if ($no_install);
+
doprint "\n\n";
- doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type\n\n";
+ doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type$installme\n\n";
unlink $dmesg;
unlink $buildlog;
@@ -2911,6 +3222,9 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
die "failed to checkout $checkout";
}
+ $no_reboot = 0;
+
+
if ($test_type eq "bisect") {
bisect $i;
next;
@@ -2929,6 +3243,13 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
build $build_type or next;
}
+ if ($test_type eq "install") {
+ get_version;
+ install;
+ success $i;
+ next;
+ }
+
if ($test_type ne "build") {
my $failed = 0;
start_monitor_and_boot or $failed = 1;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index b8bcd14b5a4..553c06b7d6f 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -72,6 +72,128 @@
# the same option name under the same test or as default
# ktest will fail to execute, and no tests will run.
#
+# DEFAULTS OVERRIDE
+#
+# Options defined in the DEFAULTS section can not be duplicated
+# even if they are defined in two different DEFAULT sections.
+# This is done to catch mistakes where an option is added but
+# the previous option was forgotten about and not commented.
+#
+# The OVERRIDE keyword can be added to a section to allow this
+# section to override other DEFAULT sections values that have
+# been defined previously. It will only override options that
+# have been defined before its use. Options defined later
+# in a non override section will still error. The same option
+# can not be defined in the same section even if that section
+# is marked OVERRIDE.
+#
+#
+#
+# Both TEST_START and DEFAULTS sections can also have the IF keyword
+# The value after the IF must evaluate into a 0 or non 0 positive
+# integer, and can use the config variables (explained below).
+#
+# DEFAULTS IF ${IS_X86_32}
+#
+# The above will process the DEFAULTS section if the config
+# variable IS_X86_32 evaluates to a non zero positive integer
+# otherwise if it evaluates to zero, it will act the same
+# as if the SKIP keyword was used.
+#
+# The ELSE keyword can be used directly after a section with
+# a IF statement.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE
+#
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-normal
+#
+#
+# The ELSE keyword can also contain an IF statement to allow multiple
+# if then else sections. But all the sections must be either
+# DEFAULT or TEST_START, they can not be a mixture.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE IF ${RUN_DISK_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-tests
+#
+# ELSE IF ${RUN_CPU_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-cpu
+#
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# The if statement may also have comparisons that will and for
+# == and !=, strings may be used for both sides.
+#
+# BOX_TYPE := x86_32
+#
+# DEFAULTS IF ${BOX_TYPE} == x86_32
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-32
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-64
+#
+# The DEFINED keyword can be used by the IF statements too.
+# It returns true if the given config variable or option has been defined
+# or false otherwise.
+#
+#
+# DEFAULTS IF DEFINED USE_CC
+# CC := ${USE_CC}
+# ELSE
+# CC := gcc
+#
+#
+# As well as NOT DEFINED.
+#
+# DEFAULTS IF NOT DEFINED MAKE_CMD
+# MAKE_CMD := make ARCH=x86
+#
+#
+# And/or ops (&&,||) may also be used to make complex conditionals.
+#
+# TEST_START IF (DEFINED ALL_TESTS || ${MYTEST} == boottest) && ${MACHINE} == gandalf
+#
+# Notice the use of paranthesis. Without any paranthesis the above would be
+# processed the same as:
+#
+# TEST_START IF DEFINED ALL_TESTS || (${MYTEST} == boottest && ${MACHINE} == gandalf)
+#
+#
+#
+# INCLUDE file
+#
+# The INCLUDE keyword may be used in DEFAULT sections. This will
+# read another config file and process that file as well. The included
+# file can include other files, add new test cases or default
+# statements. Config variables will be passed to these files and changes
+# to config variables will be seen by top level config files. Including
+# a file is processed just like the contents of the file was cut and pasted
+# into the top level file, except, that include files that end with
+# TEST_START sections will have that section ended at the end of
+# the include file. That is, an included file is included followed
+# by another DEFAULT keyword.
+#
+# Unlike other files referenced in this config, the file path does not need
+# to be absolute. If the file does not start with '/', then the directory
+# that the current config file was located in is used. If no config by the
+# given name is found there, then the current directory is searched.
+#
+# INCLUDE myfile
+# DEFAULT
+#
+# is the same as:
+#
+# INCLUDE myfile
+#
+# Note, if the include file does not contain a full path, the file is
+# searched first by the location of the original include file, and then
+# by the location that ktest.pl was executed in.
+#
#### Config variables ####
#
@@ -253,9 +375,10 @@
# The default test type (default test)
# The test types may be:
-# build - only build the kernel, do nothing else
-# boot - build and boot the kernel
-# test - build, boot and if TEST is set, run the test script
+# build - only build the kernel, do nothing else
+# install - build and install, but do nothing else (does not reboot)
+# boot - build, install, and boot the kernel
+# test - build, boot and if TEST is set, run the test script
# (If TEST is not set, it defaults back to boot)
# bisect - Perform a bisect on the kernel (see BISECT_TYPE below)
# patchcheck - Do a test on a series of commits in git (see PATCHCHECK below)
@@ -293,6 +416,13 @@
# or on some systems:
#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+# If for some reason you just want to boot the kernel and you do not
+# want the test to install anything new. For example, you may just want
+# to boot test the same kernel over and over and do not want to go through
+# the hassle of installing anything, you can set this option to 1
+# (default 0)
+#NO_INSTALL = 1
+
# If there is a script that you require to run before the build is done
# you can specify it with PRE_BUILD.
#
@@ -415,6 +545,14 @@
# (default "login:")
#SUCCESS_LINE = login:
+# To speed up between reboots, defining a line that the
+# default kernel produces that represents that the default
+# kernel has successfully booted and can be used to pass
+# a new test kernel to it. Otherwise ktest.pl will wait till
+# SLEEP_TIME to continue.
+# (default undefined)
+#REBOOT_SUCCESS_LINE = login:
+
# In case the console constantly fills the screen, having
# a specified time to stop the test after success is recommended.
# (in seconds)
@@ -480,6 +618,8 @@
# another test. If a reboot to the reliable kernel happens,
# we wait SLEEP_TIME for the console to stop producing output
# before starting the next test.
+#
+# You can speed up reboot times even more by setting REBOOT_SUCCESS_LINE.
# (default 60)
#SLEEP_TIME = 60
@@ -810,7 +950,7 @@
# TEST_START
# TEST_TYPE = config_bisect
# CONFIG_BISECT_TYPE = build
-# CONFIG_BISECT = /home/test/˘onfig-bad
+# CONFIG_BISECT = /home/test/config-bad
# MIN_CONFIG = /home/test/config-min
# BISECT_MANUAL = 1
#
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
new file mode 100644
index 00000000000..4ec84018cc1
--- /dev/null
+++ b/tools/testing/selftests/Makefile
@@ -0,0 +1,11 @@
+TARGETS = breakpoints
+
+all:
+ for TARGET in $(TARGETS); do \
+ make -C $$TARGET; \
+ done;
+
+clean:
+ for TARGET in $(TARGETS); do \
+ make -C $$TARGET clean; \
+ done;
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
new file mode 100644
index 00000000000..f362722cdce
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -0,0 +1,20 @@
+# Taken from perf makefile
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+ ARCH := x86
+endif
+ifeq ($(ARCH),x86_64)
+ ARCH := x86
+endif
+
+
+all:
+ifeq ($(ARCH),x86)
+ gcc breakpoint_test.c -o run_test
+else
+ echo "Not an x86 target, can't build breakpoints selftests"
+endif
+
+clean:
+ rm -fr run_test
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
new file mode 100644
index 00000000000..a0743f3b2b5
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for breakpoints (and more generally the do_debug() path) in x86.
+ */
+
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+
+/* Breakpoint access modes */
+enum {
+ BP_X = 1,
+ BP_RW = 2,
+ BP_W = 4,
+};
+
+static pid_t child_pid;
+
+/*
+ * Ensures the child and parent are always "talking" about
+ * the same test sequence. (ie: that we haven't forgotten
+ * to call check_trapped() somewhere).
+ */
+static int nr_tests;
+
+static void set_breakpoint_addr(void *addr, int n)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_POKEUSER, child_pid,
+ offsetof(struct user, u_debugreg[n]), addr);
+ if (ret) {
+ perror("Can't set breakpoint addr\n");
+ exit(-1);
+ }
+}
+
+static void toggle_breakpoint(int n, int type, int len,
+ int local, int global, int set)
+{
+ int ret;
+
+ int xtype, xlen;
+ unsigned long vdr7, dr7;
+
+ switch (type) {
+ case BP_X:
+ xtype = 0;
+ break;
+ case BP_W:
+ xtype = 1;
+ break;
+ case BP_RW:
+ xtype = 3;
+ break;
+ }
+
+ switch (len) {
+ case 1:
+ xlen = 0;
+ break;
+ case 2:
+ xlen = 4;
+ break;
+ case 4:
+ xlen = 0xc;
+ break;
+ case 8:
+ xlen = 8;
+ break;
+ }
+
+ dr7 = ptrace(PTRACE_PEEKUSER, child_pid,
+ offsetof(struct user, u_debugreg[7]), 0);
+
+ vdr7 = (xlen | xtype) << 16;
+ vdr7 <<= 4 * n;
+
+ if (local) {
+ vdr7 |= 1 << (2 * n);
+ vdr7 |= 1 << 8;
+ }
+ if (global) {
+ vdr7 |= 2 << (2 * n);
+ vdr7 |= 1 << 9;
+ }
+
+ if (set)
+ dr7 |= vdr7;
+ else
+ dr7 &= ~vdr7;
+
+ ret = ptrace(PTRACE_POKEUSER, child_pid,
+ offsetof(struct user, u_debugreg[7]), dr7);
+ if (ret) {
+ perror("Can't set dr7");
+ exit(-1);
+ }
+}
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long dummy_var[4];
+
+/* Dummy functions to test execution accesses */
+static void dummy_func(void) { }
+static void dummy_func1(void) { }
+static void dummy_func2(void) { }
+static void dummy_func3(void) { }
+
+static void (*dummy_funcs[])(void) = {
+ dummy_func,
+ dummy_func1,
+ dummy_func2,
+ dummy_func3,
+};
+
+static int trapped;
+
+static void check_trapped(void)
+{
+ /*
+ * If we haven't trapped, wake up the parent
+ * so that it notices the failure.
+ */
+ if (!trapped)
+ kill(getpid(), SIGUSR1);
+ trapped = 0;
+
+ nr_tests++;
+}
+
+static void write_var(int len)
+{
+ char *pcval; short *psval; int *pival; long long *plval;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ switch (len) {
+ case 1:
+ pcval = (char *)&dummy_var[i];
+ *pcval = 0xff;
+ break;
+ case 2:
+ psval = (short *)&dummy_var[i];
+ *psval = 0xffff;
+ break;
+ case 4:
+ pival = (int *)&dummy_var[i];
+ *pival = 0xffffffff;
+ break;
+ case 8:
+ plval = (long long *)&dummy_var[i];
+ *plval = 0xffffffffffffffffLL;
+ break;
+ }
+ check_trapped();
+ }
+}
+
+static void read_var(int len)
+{
+ char cval; short sval; int ival; long long lval;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ switch (len) {
+ case 1:
+ cval = *(char *)&dummy_var[i];
+ break;
+ case 2:
+ sval = *(short *)&dummy_var[i];
+ break;
+ case 4:
+ ival = *(int *)&dummy_var[i];
+ break;
+ case 8:
+ lval = *(long long *)&dummy_var[i];
+ break;
+ }
+ check_trapped();
+ }
+}
+
+/*
+ * Do the r/w/x accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+ int len, local, global, i;
+ char val;
+ int ret;
+
+ ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+ if (ret) {
+ perror("Can't be traced?\n");
+ return;
+ }
+
+ /* Wake up father so that it sets up the first test */
+ kill(getpid(), SIGUSR1);
+
+ /* Test instruction breakpoints */
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+
+ for (i = 0; i < 4; i++) {
+ dummy_funcs[i]();
+ check_trapped();
+ }
+ }
+ }
+
+ /* Test write watchpoints */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ write_var(len);
+ }
+ }
+ }
+
+ /* Test read/write watchpoints (on read accesses) */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ read_var(len);
+ }
+ }
+ }
+
+ /* Icebp trap */
+ asm(".byte 0xf1\n");
+ check_trapped();
+
+ /* Int 3 trap */
+ asm("int $3\n");
+ check_trapped();
+
+ kill(getpid(), SIGUSR1);
+}
+
+static void check_success(const char *msg)
+{
+ const char *msg2;
+ int child_nr_tests;
+ int status;
+
+ /* Wait for the child to SIGTRAP */
+ wait(&status);
+
+ msg2 = "Failed";
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid,
+ &nr_tests, 0);
+ if (child_nr_tests == nr_tests)
+ msg2 = "Ok";
+ if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1)) {
+ perror("Can't poke\n");
+ exit(-1);
+ }
+ }
+
+ nr_tests++;
+
+ printf("%s [%s]\n", msg, msg2);
+}
+
+static void launch_instruction_breakpoints(char *buf, int local, int global)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ set_breakpoint_addr(dummy_funcs[i], i);
+ toggle_breakpoint(i, BP_X, 1, local, global, 1);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test breakpoint %d with local: %d global: %d",
+ i, local, global);
+ check_success(buf);
+ toggle_breakpoint(i, BP_X, 1, local, global, 0);
+ }
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+ int local, int global)
+{
+ const char *mode_str;
+ int i;
+
+ if (mode == BP_W)
+ mode_str = "write";
+ else
+ mode_str = "read";
+
+ for (i = 0; i < 4; i++) {
+ set_breakpoint_addr(&dummy_var[i], i);
+ toggle_breakpoint(i, mode, len, local, global, 1);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test %s watchpoint %d with len: %d local: "
+ "%d global: %d", mode_str, i, len, local, global);
+ check_success(buf);
+ toggle_breakpoint(i, mode, len, local, global, 0);
+ }
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static void launch_tests(void)
+{
+ char buf[1024];
+ int len, local, global, i;
+
+ /* Instruction breakpoints */
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ launch_instruction_breakpoints(buf, local, global);
+ }
+ }
+
+ /* Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ launch_watchpoints(buf, BP_W, len,
+ local, global);
+ }
+ }
+ }
+
+ /* Read-Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ launch_watchpoints(buf, BP_RW, len,
+ local, global);
+ }
+ }
+ }
+
+ /* Icebp traps */
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success("Test icebp");
+
+ /* Int 3 traps */
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success("Test int 3 trap");
+
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+ pid_t pid;
+ int ret;
+
+ pid = fork();
+ if (!pid) {
+ trigger_tests();
+ return 0;
+ }
+
+ child_pid = pid;
+
+ wait(NULL);
+
+ launch_tests();
+
+ wait(NULL);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/run_tests b/tools/testing/selftests/run_tests
new file mode 100644
index 00000000000..320718a4e6b
--- /dev/null
+++ b/tools/testing/selftests/run_tests
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+TARGETS=breakpoints
+
+for TARGET in $TARGETS
+do
+ $TARGET/run_test
+done
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 669bcdd4580..b4fbc91c41b 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -186,21 +186,12 @@ struct virtqueue {
#endif
/* Interfaces exported by virtio_ring. */
-int virtqueue_add_buf_gfp(struct virtqueue *vq,
- struct scatterlist sg[],
- unsigned int out_num,
- unsigned int in_num,
- void *data,
- gfp_t gfp);
-
-static inline int virtqueue_add_buf(struct virtqueue *vq,
- struct scatterlist sg[],
- unsigned int out_num,
- unsigned int in_num,
- void *data)
-{
- return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
-}
+int virtqueue_add_buf(struct virtqueue *vq,
+ struct scatterlist sg[],
+ unsigned int out_num,
+ unsigned int in_num,
+ void *data,
+ gfp_t gfp);
void virtqueue_kick(struct virtqueue *vq);
@@ -214,6 +205,7 @@ void *virtqueue_detach_unused_buf(struct virtqueue *vq);
struct virtqueue *vring_new_virtqueue(unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
+ bool weak_barriers,
void *pages,
void (*notify)(struct virtqueue *vq),
void (*callback)(struct virtqueue *vq),
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 74d3331bdaf..6bf95f99536 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -92,7 +92,8 @@ static void vq_info_add(struct vdev_info *dev, int num)
assert(r >= 0);
memset(info->ring, 0, vring_size(num, 4096));
vring_init(&info->vring, num, info->ring, 4096);
- info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring,
+ info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev,
+ true, info->ring,
vq_notify, vq_callback, "test");
assert(info->vq);
info->vq->priv = info;
@@ -160,7 +161,8 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
if (started < bufs) {
sg_init_one(&sl, dev->buf, dev->buf_size);
r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
- dev->buf + started);
+ dev->buf + started,
+ GFP_ATOMIC);
if (likely(r >= 0)) {
++started;
virtqueue_kick(vq->vq);