diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-06-12 16:53:38 +1000 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2009-06-12 16:53:38 +1000 |
commit | bc47ab0241c7c86da4f5e5f82fbca7d45387c18d (patch) | |
tree | b9c33ae8b6de43e44cc5fcbaa3e4a15f18a5ed42 /include | |
parent | 37f9ef553bed630957e025504cdcbc76f5de49d5 (diff) | |
parent | 8ebf975608aaebd7feb33d77f07ba21a6380e086 (diff) |
Merge commit 'origin/master' into next
Manual merge of:
arch/powerpc/kernel/asm-offsets.c
Diffstat (limited to 'include')
128 files changed, 4523 insertions, 1114 deletions
diff --git a/include/Kbuild b/include/Kbuild index d8c3e3cbf41..fe36accd432 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -8,3 +8,4 @@ header-y += mtd/ header-y += rdma/ header-y += video/ header-y += drm/ +header-y += xen/ diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 3673a13b670..81d3be459ef 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -134,7 +134,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_cmpxchg(l, old, new) \ (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) #define atomic_long_xchg(v, new) \ - (atomic64_xchg((atomic64_t *)(l), (new))) + (atomic64_xchg((atomic64_t *)(v), (new))) #else /* BITS_PER_LONG == 64 */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 8e6d0ca70ab..e410f602cab 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #endif /* - * A facility to provide batching of the reload of page tables with the - * actual context switch code for paravirtualized guests. By convention, - * only one of the lazy modes (CPU, MMU) should be active at any given - * time, entry should never be nested, and entry and exits should always - * be paired. This is for sanity of maintaining and reasoning about the - * kernel code. + * A facility to provide batching of the reload of page tables and + * other process state with the actual context switch code for + * paravirtualized guests. By convention, only one of the batched + * update (lazy) modes (CPU, MMU) should be active at any given time, + * entry should never be nested, and entry and exits should always be + * paired. This is for sanity of maintaining and reasoning about the + * kernel code. In this case, the exit (end of the context switch) is + * in architecture-specific code, and so doesn't need a generic + * definition. */ -#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) -#define arch_flush_lazy_cpu_mode() do {} while (0) +#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH +#define arch_start_context_switch(prev) do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 89853bcd27a..f1736ca7922 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -63,7 +63,7 @@ #define BRANCH_PROFILE() #endif -#ifdef CONFIG_EVENT_TRACER +#ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ *(_ftrace_events) \ VMLINUX_SYMBOL(__stop_ftrace_events) = .; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 3c1924c010e..7300fb86676 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -471,6 +471,9 @@ struct drm_connector { u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY]; uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY]; + /* requested DPMS state */ + int dpms; + void *helper_private; uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER]; diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index ec073d8288d..6769ff6c1bc 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -99,6 +99,8 @@ extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, struct drm_framebuffer *old_fb); extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc); +extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode); + extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb, struct drm_mode_fb_cmd *mode_cmd); diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 3f0eaa397ef..b3afd2219ad 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -135,6 +135,7 @@ header-y += posix_types.h header-y += ppdev.h header-y += prctl.h header-y += qnxtypes.h +header-y += qnx4_fs.h header-y += radeonfb.h header-y += raw.h header-y += resource.h @@ -308,7 +309,6 @@ unifdef-y += poll.h unifdef-y += ppp_defs.h unifdef-y += ppp-comp.h unifdef-y += ptrace.h -unifdef-y += qnx4_fs.h unifdef-y += quota.h unifdef-y += random.h unifdef-y += irqnr.h diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 88be890ee3c..51b4b0a5ce8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -119,7 +119,7 @@ extern int pci_mmcfg_config_num; extern int sbf_port; extern unsigned long acpi_realmode_flags; -int acpi_register_gsi (u32 gsi, int triggering, int polarity); +int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); #ifdef CONFIG_X86_IO_APIC diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 48ee32a18ac..64a982ea5d5 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -159,6 +159,7 @@ #define UART01x_FR_MODEM_ANY (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS) #ifndef __ASSEMBLY__ +struct amba_device; /* in uncompress this is included but amba/bus.h is not */ struct amba_pl010_data { void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl); }; diff --git a/include/linux/auto_fs.h b/include/linux/auto_fs.h index 63265852b7d..7b09c8348fd 100644 --- a/include/linux/auto_fs.h +++ b/include/linux/auto_fs.h @@ -14,13 +14,12 @@ #ifndef _LINUX_AUTO_FS_H #define _LINUX_AUTO_FS_H +#include <linux/types.h> #ifdef __KERNEL__ #include <linux/fs.h> #include <linux/limits.h> -#include <linux/types.h> #include <linux/ioctl.h> #else -#include <asm/types.h> #include <sys/ioctl.h> #endif /* __KERNEL__ */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 7b214fd672a..12737be5860 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -218,12 +218,12 @@ struct bio { #define bio_sectors(bio) ((bio)->bi_size >> 9) #define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) -static inline unsigned int bio_cur_sectors(struct bio *bio) +static inline unsigned int bio_cur_bytes(struct bio *bio) { if (bio->bi_vcnt) - return bio_iovec(bio)->bv_len >> 9; + return bio_iovec(bio)->bv_len; else /* dataless requests such as discard */ - return bio->bi_size >> 9; + return bio->bi_size; } static inline void *bio_data(struct bio *bio) @@ -279,7 +279,7 @@ static inline int bio_has_allocated_vec(struct bio *bio) #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ (((addr1) | (mask)) == (((addr2) - 1) | (mask))) #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ - __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask) + __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q))) #define BIO_SEG_BOUNDARY(q, b1, b2) \ BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2))) @@ -506,7 +506,7 @@ static inline int bio_has_data(struct bio *bio) } /* - * BIO list managment for use by remapping drivers (e.g. DM or MD). + * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * * A bio_list anchors a singly-linked list of bios chained through the bi_next * member of the bio. The bio_list also caches the last list member to allow diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b4f71f1a4af..ebdfde8fe55 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -166,19 +166,9 @@ struct request { enum rq_cmd_type_bits cmd_type; unsigned long atomic_flags; - /* Maintain bio traversal state for part by part I/O submission. - * hard_* are block layer internals, no driver should touch them! - */ - - sector_t sector; /* next sector to submit */ - sector_t hard_sector; /* next sector to complete */ - unsigned long nr_sectors; /* no. of sectors left to submit */ - unsigned long hard_nr_sectors; /* no. of sectors left to complete */ - /* no. of sectors left to submit in the current segment */ - unsigned int current_nr_sectors; - - /* no. of sectors left to complete in the current segment */ - unsigned int hard_cur_sectors; + /* the following two fields are internal, NEVER access directly */ + sector_t __sector; /* sector cursor */ + unsigned int __data_len; /* total data len */ struct bio *bio; struct bio *biotail; @@ -211,8 +201,8 @@ struct request { unsigned short ioprio; - void *special; - char *buffer; + void *special; /* opaque pointer available for LLD use */ + char *buffer; /* kaddr of the current segment if available */ int tag; int errors; @@ -226,10 +216,9 @@ struct request { unsigned char __cmd[BLK_MAX_CDB]; unsigned char *cmd; - unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; - void *data; + unsigned int resid_len; /* residual count */ void *sense; unsigned long deadline; @@ -318,6 +307,26 @@ struct blk_cmd_filter { struct kobject kobj; }; +struct queue_limits { + unsigned long bounce_pfn; + unsigned long seg_boundary_mask; + + unsigned int max_hw_sectors; + unsigned int max_sectors; + unsigned int max_segment_size; + unsigned int physical_block_size; + unsigned int alignment_offset; + unsigned int io_min; + unsigned int io_opt; + + unsigned short logical_block_size; + unsigned short max_hw_segments; + unsigned short max_phys_segments; + + unsigned char misaligned; + unsigned char no_cluster; +}; + struct request_queue { /* @@ -369,7 +378,6 @@ struct request_queue /* * queue needs bounce pages for pages above this limit */ - unsigned long bounce_pfn; gfp_t bounce_gfp; /* @@ -398,14 +406,6 @@ struct request_queue unsigned int nr_congestion_off; unsigned int nr_batching; - unsigned int max_sectors; - unsigned int max_hw_sectors; - unsigned short max_phys_segments; - unsigned short max_hw_segments; - unsigned short hardsect_size; - unsigned int max_segment_size; - - unsigned long seg_boundary_mask; void *dma_drain_buffer; unsigned int dma_drain_size; unsigned int dma_pad_mask; @@ -415,12 +415,14 @@ struct request_queue struct list_head tag_busy_list; unsigned int nr_sorted; - unsigned int in_flight; + unsigned int in_flight[2]; unsigned int rq_timeout; struct timer_list timeout; struct list_head timeout_list; + struct queue_limits limits; + /* * sg stuff */ @@ -522,6 +524,11 @@ static inline void queue_flag_clear_unlocked(unsigned int flag, __clear_bit(flag, &q->queue_flags); } +static inline int queue_in_flight(struct request_queue *q) +{ + return q->in_flight[0] + q->in_flight[1]; +} + static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) { WARN_ON_ONCE(!queue_is_locked(q)); @@ -752,10 +759,17 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); +extern struct request *blk_make_request(struct request_queue *, struct bio *, + gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); +extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, + struct bio_set *bs, gfp_t gfp_mask, + int (*bio_ctr)(struct bio *, struct bio *, void *), + void *data); +extern void blk_rq_unprep_clone(struct request *rq); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_plug_device(struct request_queue *); @@ -768,12 +782,6 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); /* - * Temporary export, until SCSI gets fixed up. - */ -extern int blk_rq_append_bio(struct request_queue *q, struct request *rq, - struct bio *bio); - -/* * A queue has just exitted congestion. Note this in the global counter of * congested queues, and wake up anyone who was waiting for requests to be * put back. @@ -798,7 +806,6 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); -extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); @@ -831,41 +838,73 @@ static inline void blk_run_address_space(struct address_space *mapping) blk_run_backing_dev(mapping->backing_dev_info, NULL); } -extern void blkdev_dequeue_request(struct request *req); +/* + * blk_rq_pos() : the current sector + * blk_rq_bytes() : bytes left in the entire request + * blk_rq_cur_bytes() : bytes left in the current segment + * blk_rq_sectors() : sectors left in the entire request + * blk_rq_cur_sectors() : sectors left in the current segment + */ +static inline sector_t blk_rq_pos(const struct request *rq) +{ + return rq->__sector; +} + +static inline unsigned int blk_rq_bytes(const struct request *rq) +{ + return rq->__data_len; +} + +static inline int blk_rq_cur_bytes(const struct request *rq) +{ + return rq->bio ? bio_cur_bytes(rq->bio) : 0; +} + +static inline unsigned int blk_rq_sectors(const struct request *rq) +{ + return blk_rq_bytes(rq) >> 9; +} + +static inline unsigned int blk_rq_cur_sectors(const struct request *rq) +{ + return blk_rq_cur_bytes(rq) >> 9; +} + +/* + * Request issue related functions. + */ +extern struct request *blk_peek_request(struct request_queue *q); +extern void blk_start_request(struct request *rq); +extern struct request *blk_fetch_request(struct request_queue *q); /* - * blk_end_request() and friends. - * __blk_end_request() and end_request() must be called with - * the request queue spinlock acquired. + * Request completion related functions. + * + * blk_update_request() completes given number of bytes and updates + * the request without completing it. + * + * blk_end_request() and friends. __blk_end_request() must be called + * with the request queue spinlock acquired. * * Several drivers define their own end_request and call * blk_end_request() for parts of the original function. * This prevents code duplication in drivers. */ -extern int blk_end_request(struct request *rq, int error, - unsigned int nr_bytes); -extern int __blk_end_request(struct request *rq, int error, - unsigned int nr_bytes); -extern int blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes); -extern void end_request(struct request *, int); -extern int blk_end_request_callback(struct request *rq, int error, - unsigned int nr_bytes, - int (drv_callback)(struct request *)); +extern bool blk_update_request(struct request *rq, int error, + unsigned int nr_bytes); +extern bool blk_end_request(struct request *rq, int error, + unsigned int nr_bytes); +extern void blk_end_request_all(struct request *rq, int error); +extern bool blk_end_request_cur(struct request *rq, int error); +extern bool __blk_end_request(struct request *rq, int error, + unsigned int nr_bytes); +extern void __blk_end_request_all(struct request *rq, int error); +extern bool __blk_end_request_cur(struct request *rq, int error); + extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_abort_queue(struct request_queue *); -extern void blk_update_request(struct request *rq, int error, - unsigned int nr_bytes); - -/* - * blk_end_request() takes bytes instead of sectors as a complete size. - * blk_rq_bytes() returns bytes left to complete in the entire request. - * blk_rq_cur_bytes() returns bytes left to complete in the current segment. - */ -extern unsigned int blk_rq_bytes(struct request *rq); -extern unsigned int blk_rq_cur_bytes(struct request *rq); /* * Access functions for manipulating queue properties @@ -877,10 +916,20 @@ extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_sectors(struct request_queue *, unsigned int); +extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short); extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); -extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); +extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_alignment_offset(struct request_queue *q, + unsigned int alignment); +extern void blk_queue_io_min(struct request_queue *q, unsigned int min); +extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); +extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, + sector_t offset); +extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, + sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); @@ -967,19 +1016,87 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) -static inline int queue_hardsect_size(struct request_queue *q) +static inline unsigned long queue_bounce_pfn(struct request_queue *q) +{ + return q->limits.bounce_pfn; +} + +static inline unsigned long queue_segment_boundary(struct request_queue *q) +{ + return q->limits.seg_boundary_mask; +} + +static inline unsigned int queue_max_sectors(struct request_queue *q) +{ + return q->limits.max_sectors; +} + +static inline unsigned int queue_max_hw_sectors(struct request_queue *q) +{ + return q->limits.max_hw_sectors; +} + +static inline unsigned short queue_max_hw_segments(struct request_queue *q) +{ + return q->limits.max_hw_segments; +} + +static inline unsigned short queue_max_phys_segments(struct request_queue *q) +{ + return q->limits.max_phys_segments; +} + +static inline unsigned int queue_max_segment_size(struct request_queue *q) +{ + return q->limits.max_segment_size; +} + +static inline unsigned short queue_logical_block_size(struct request_queue *q) { int retval = 512; - if (q && q->hardsect_size) - retval = q->hardsect_size; + if (q && q->limits.logical_block_size) + retval = q->limits.logical_block_size; return retval; } -static inline int bdev_hardsect_size(struct block_device *bdev) +static inline unsigned short bdev_logical_block_size(struct block_device *bdev) +{ + return queue_logical_block_size(bdev_get_queue(bdev)); +} + +static inline unsigned int queue_physical_block_size(struct request_queue *q) +{ + return q->limits.physical_block_size; +} + +static inline unsigned int queue_io_min(struct request_queue *q) +{ + return q->limits.io_min; +} + +static inline unsigned int queue_io_opt(struct request_queue *q) +{ + return q->limits.io_opt; +} + +static inline int queue_alignment_offset(struct request_queue *q) +{ + if (q && q->limits.misaligned) + return -1; + + if (q && q->limits.alignment_offset) + return q->limits.alignment_offset; + + return 0; +} + +static inline int queue_sector_alignment_offset(struct request_queue *q, + sector_t sector) { - return queue_hardsect_size(bdev_get_queue(bdev)); + return ((sector << 9) - q->limits.alignment_offset) + & (q->limits.io_min - 1); } static inline int queue_dma_alignment(struct request_queue *q) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d960889e92e..7e4350ece0f 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,9 +116,9 @@ struct blk_io_trace { * The remap event */ struct blk_io_trace_remap { - __be32 device; __be32 device_from; - __be64 sector; + __be32 device_to; + __be64 sector_from; }; enum { @@ -165,8 +165,9 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern int do_blk_trace_setup(struct request_queue *q, - char *name, dev_t dev, struct blk_user_trace_setup *buts); +extern int do_blk_trace_setup(struct request_queue *q, char *name, + dev_t dev, struct block_device *bdev, + struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); /** @@ -193,22 +194,42 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); extern void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ -#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) -#define blk_trace_shutdown(q) do { } while (0) -#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) -#define blk_add_driver_data(q, rq, data, len) do {} while (0) -#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) -#define blk_trace_startstop(q, start) (-ENOTTY) -#define blk_trace_remove(q) (-ENOTTY) -#define blk_add_trace_msg(q, fmt, ...) do { } while (0) +# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) +# define blk_trace_shutdown(q) do { } while (0) +# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY) +# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) +# define blk_trace_startstop(q, start) (-ENOTTY) +# define blk_trace_remove(q) (-ENOTTY) +# define blk_add_trace_msg(q, fmt, ...) do { } while (0) +static inline int blk_trace_init_sysfs(struct device *dev) +{ + return 0; +} #endif /* CONFIG_BLK_DEV_IO_TRACE */ + +#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK) + +static inline int blk_cmd_buf_len(struct request *rq) +{ + return blk_pc_request(rq) ? rq->cmd_len * 3 : 1; +} + +extern void blk_dump_cmd(char *buf, struct request *rq); +extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); +extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq); + +#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ + #endif /* __KERNEL__ */ #endif diff --git a/include/linux/cdev.h b/include/linux/cdev.h index fb4591977b0..f389e319a45 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -28,6 +28,8 @@ int cdev_add(struct cdev *, dev_t, unsigned); void cdev_del(struct cdev *); +int cdev_index(struct inode *inode); + void cd_forget(struct inode *); extern struct backing_dev_info directly_mappable_cdev_bdi; diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 5a40d14daa9..c56457c8334 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -288,7 +288,15 @@ static inline cycle_t clocksource_read(struct clocksource *cs) */ static inline int clocksource_enable(struct clocksource *cs) { - return cs->enable ? cs->enable(cs) : 0; + int ret = 0; + + if (cs->enable) + ret = cs->enable(cs); + + /* save mult_orig on enable */ + cs->mult_orig = cs->mult; + + return ret; } /** diff --git a/include/linux/compat.h b/include/linux/compat.h index f2ded21f9a3..af931ee43dd 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); +long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo); static inline int compat_timeval_compare(struct compat_timeval *lhs, struct compat_timeval *rhs) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9f315382610..c5ac87ca7bc 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1022,6 +1022,8 @@ typedef struct cpumask *cpumask_var_t; bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); +bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); +bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); @@ -1040,6 +1042,19 @@ static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, return true; } +static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + cpumask_clear(*mask); + return true; +} + +static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, + int node) +{ + cpumask_clear(*mask); + return true; +} + static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask) { } diff --git a/include/linux/cramfs_fs.h b/include/linux/cramfs_fs.h index 3be4e5a27d8..6fc2bed368b 100644 --- a/include/linux/cramfs_fs.h +++ b/include/linux/cramfs_fs.h @@ -2,9 +2,8 @@ #define __CRAMFS_H #include <linux/types.h> +#include <linux/magic.h> -#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ -#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define CRAMFS_SIGNATURE "Compressed ROMFS" /* diff --git a/include/linux/cred.h b/include/linux/cred.h index 3282ee4318e..4fa99969631 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -13,6 +13,7 @@ #define _LINUX_CRED_H #include <linux/capability.h> +#include <linux/init.h> #include <linux/key.h> #include <asm/atomic.h> diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 788850ba4e7..1fbdea4f08e 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -142,19 +142,6 @@ struct CYZ_BOOT_CTRL { #ifndef DP_WINDOW_SIZE -/* #include "cyclomz.h" */ -/****************** ****************** *******************/ -/* - * The data types defined below are used in all ZFIRM interface - * data structures. They accomodate differences between HW - * architectures and compilers. - */ - -typedef __u64 ucdouble; /* 64 bits, unsigned */ -typedef __u32 uclong; /* 32 bits, unsigned */ -typedef __u16 ucshort; /* 16 bits, unsigned */ -typedef __u8 ucchar; /* 8 bits, unsigned */ - /* * Memory Window Sizes */ @@ -507,16 +494,20 @@ struct ZFW_CTRL { /* Per card data structure */ struct cyclades_card { - void __iomem *base_addr; - void __iomem *ctl_addr; - int irq; - unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ - unsigned int first_line; /* minor number of first channel on card */ - unsigned int nports; /* Number of ports in the card */ - int bus_index; /* address shift - 0 for ISA, 1 for PCI */ - int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ - spinlock_t card_lock; - struct cyclades_port *ports; + void __iomem *base_addr; + union { + void __iomem *p9050; + struct RUNTIME_9060 __iomem *p9060; + } ctl_addr; + int irq; + unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ + unsigned int first_line; /* minor number of first channel on card */ + unsigned int nports; /* Number of ports in the card */ + int bus_index; /* address shift - 0 for ISA, 1 for PCI */ + int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ + u32 hw_ver; + spinlock_t card_lock; + struct cyclades_port *ports; }; /*************************************** diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 15156364d19..30b93b2a01a 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -180,10 +180,12 @@ d_iput: no no no yes #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ #define DCACHE_UNHASHED 0x0010 -#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ +#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ #define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ +#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */ + extern spinlock_t dcache_lock; extern seqlock_t rename_lock; @@ -351,6 +353,11 @@ static inline int d_unhashed(struct dentry *dentry) return (dentry->d_flags & DCACHE_UNHASHED); } +static inline int d_unlinked(struct dentry *dentry) +{ + return d_unhashed(dentry) && !IS_ROOT(dentry); +} + static inline struct dentry *dget_parent(struct dentry *dentry) { struct dentry *ret; @@ -368,7 +375,7 @@ static inline int d_mountpoint(struct dentry *dentry) return dentry->d_mounted; } -extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *); +extern struct vfsmount *lookup_mnt(struct path *); extern struct dentry *lookup_create(struct nameidata *nd, int is_dir); extern int sysctl_vfs_cache_pressure; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ded2d7c4266..49c2362977f 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -149,7 +149,7 @@ struct io_restrictions { unsigned max_hw_sectors; unsigned max_sectors; unsigned max_segment_size; - unsigned short hardsect_size; + unsigned short logical_block_size; unsigned short max_hw_segments; unsigned short max_phys_segments; unsigned char no_cluster; /* inverted so that 0 is default */ diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 28d53cb7b5a..171ad8aedc8 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus); extern void dma_debug_init(u32 num_entries); +extern int dma_debug_resize_entries(u32 num_entries); + extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, @@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries) { } +static inline int dma_debug_resize_entries(u32 num_entries) +{ + return 0; +} + static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e397dc342cd..10ff5c49882 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -108,6 +108,7 @@ struct irte { }; #ifdef CONFIG_INTR_REMAP extern int intr_remapping_enabled; +extern int intr_remapping_supported(void); extern int enable_intr_remapping(int); extern void disable_intr_remapping(void); extern int reenable_intr_remapping(int); @@ -157,6 +158,8 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic) } #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) +#define disable_intr_remapping() (0) +#define reenable_intr_remapping(mode) (0) #define intr_remapping_enabled (0) #endif diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index 102a902b439..ecc06286226 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -10,7 +10,7 @@ struct dnotify_struct { struct dnotify_struct * dn_next; - unsigned long dn_mask; + __u32 dn_mask; int dn_fd; struct file * dn_filp; fl_owner_t dn_owner; @@ -21,23 +21,18 @@ struct dnotify_struct { #ifdef CONFIG_DNOTIFY -extern void __inode_dir_notify(struct inode *, unsigned long); +#define DNOTIFY_ALL_EVENTS (FS_DELETE | FS_DELETE_CHILD |\ + FS_MODIFY | FS_MODIFY_CHILD |\ + FS_ACCESS | FS_ACCESS_CHILD |\ + FS_ATTRIB | FS_ATTRIB_CHILD |\ + FS_CREATE | FS_DN_RENAME |\ + FS_MOVED_FROM | FS_MOVED_TO) + extern void dnotify_flush(struct file *, fl_owner_t); extern int fcntl_dirnotify(int, struct file *, unsigned long); -extern void dnotify_parent(struct dentry *, unsigned long); - -static inline void inode_dir_notify(struct inode *inode, unsigned long event) -{ - if (inode->i_dnotify_mask & (event)) - __inode_dir_notify(inode, event); -} #else -static inline void __inode_dir_notify(struct inode *inode, unsigned long event) -{ -} - static inline void dnotify_flush(struct file *filp, fl_owner_t id) { } @@ -47,14 +42,6 @@ static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) return -EINVAL; } -static inline void dnotify_parent(struct dentry *dentry, unsigned long event) -{ -} - -static inline void inode_dir_notify(struct inode *inode, unsigned long event) -{ -} - #endif /* CONFIG_DNOTIFY */ #endif /* __KERNEL __ */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index c59b769f62b..1cb3372e65d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -103,10 +103,8 @@ extern int elv_merge(struct request_queue *, struct request **, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, int); -extern void elv_dequeue_request(struct request_queue *, struct request *); extern void elv_requeue_request(struct request_queue *, struct request *); extern int elv_queue_empty(struct request_queue *); -extern struct request *elv_next_request(struct request_queue *q); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); extern int elv_register_queue(struct request_queue *q); @@ -171,7 +169,7 @@ enum { ELV_MQUEUE_MUST, }; -#define rq_end_sector(rq) ((rq)->sector + (rq)->nr_sectors) +#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 3b534e527e0..ede84fa7da5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -729,8 +729,8 @@ struct inode { struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; - unsigned int i_blkbits; blkcnt_t i_blocks; + unsigned int i_blkbits; unsigned short i_bytes; umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ @@ -751,13 +751,12 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; }; - int i_cindex; __u32 i_generation; -#ifdef CONFIG_DNOTIFY - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ +#ifdef CONFIG_FSNOTIFY + __u32 i_fsnotify_mask; /* all events this inode cares about */ + struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */ #endif #ifdef CONFIG_INOTIFY @@ -1321,7 +1320,7 @@ struct super_block { struct rw_semaphore s_umount; struct mutex s_lock; int s_count; - int s_need_sync_fs; + int s_need_sync; atomic_t s_active; #ifdef CONFIG_SECURITY void *s_security; @@ -1372,11 +1371,6 @@ struct super_block { * generic_show_options() */ char *s_options; - - /* - * storage for asynchronous operations - */ - struct list_head s_async_list; }; extern struct timespec current_fs_time(struct super_block *sb); @@ -1800,7 +1794,7 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); extern long do_mount(char *, char *, char *, unsigned long, void *); -extern struct vfsmount *collect_mounts(struct vfsmount *, struct dentry *); +extern struct vfsmount *collect_mounts(struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int vfs_statfs(struct dentry *, struct kstatfs *); @@ -1947,8 +1941,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -extern int fsync_super(struct super_block *); -extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} static inline int sync_blockdev(struct block_device *bdev) { return 0; } @@ -1964,6 +1956,7 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) return 0; } #endif +extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; @@ -2082,12 +2075,8 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync); extern void sync_supers(void); -extern void sync_filesystems(int wait); -extern void __fsync_super(struct super_block *sb); extern void emergency_sync(void); extern void emergency_remount(void); -extern int do_remount_sb(struct super_block *sb, int flags, - void *data, int force); #ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); #endif @@ -2205,6 +2194,8 @@ extern int generic_segment_checks(const struct iovec *iov, /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); +extern ssize_t default_file_splice_read(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, @@ -2354,6 +2345,8 @@ extern void simple_release_fs(struct vfsmount **mount, int *count); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); +extern int simple_fsync(struct file *, struct dentry *, int); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 00fbd5b245c..936f9aa8bb9 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -13,6 +13,7 @@ #include <linux/dnotify.h> #include <linux/inotify.h> +#include <linux/fsnotify_backend.h> #include <linux/audit.h> /* @@ -22,19 +23,45 @@ static inline void fsnotify_d_instantiate(struct dentry *entry, struct inode *inode) { + __fsnotify_d_instantiate(entry, inode); + inotify_d_instantiate(entry, inode); } +/* Notify this dentry's parent about a child's events. */ +static inline void fsnotify_parent(struct dentry *dentry, __u32 mask) +{ + __fsnotify_parent(dentry, mask); + + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); +} + /* * fsnotify_d_move - entry has been moved * Called with dcache_lock and entry->d_lock held. */ static inline void fsnotify_d_move(struct dentry *entry) { + /* + * On move we need to update entry->d_flags to indicate if the new parent + * cares about events from this entry. + */ + __fsnotify_update_dcache_flags(entry); + inotify_d_move(entry); } /* + * fsnotify_link_count - inode's link count changed + */ +static inline void fsnotify_link_count(struct inode *inode) +{ + inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); + + fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0); +} + +/* * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir */ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, @@ -42,42 +69,62 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, int isdir, struct inode *target, struct dentry *moved) { struct inode *source = moved->d_inode; - u32 cookie = inotify_get_cookie(); + u32 in_cookie = inotify_get_cookie(); + u32 fs_cookie = fsnotify_get_cookie(); + __u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM); + __u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO); if (old_dir == new_dir) - inode_dir_notify(old_dir, DN_RENAME); - else { - inode_dir_notify(old_dir, DN_DELETE); - inode_dir_notify(new_dir, DN_CREATE); - } + old_dir_mask |= FS_DN_RENAME; - if (isdir) + if (isdir) { isdir = IN_ISDIR; - inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name, + old_dir_mask |= FS_IN_ISDIR; + new_dir_mask |= FS_IN_ISDIR; + } + + inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name, source); - inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, + inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name, source); + fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie); + fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie); + if (target) { inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(target); + + /* this is really a link_count change not a removal */ + fsnotify_link_count(target); } if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); + fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0); } audit_inode_child(new_name, moved, new_dir); } /* + * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed + */ +static inline void fsnotify_inode_delete(struct inode *inode) +{ + __fsnotify_inode_delete(inode); +} + +/* * fsnotify_nameremove - a filename was removed from a directory */ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) { + __u32 mask = FS_DELETE; + if (isdir) - isdir = IN_ISDIR; - dnotify_parent(dentry, DN_DELETE); - inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name); + mask |= FS_IN_ISDIR; + + fsnotify_parent(dentry, mask); } /* @@ -87,14 +134,9 @@ static inline void fsnotify_inoderemove(struct inode *inode) { inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(inode); -} -/* - * fsnotify_link_count - inode's link count changed - */ -static inline void fsnotify_link_count(struct inode *inode) -{ - inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); + fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + __fsnotify_inode_delete(inode); } /* @@ -102,10 +144,11 @@ static inline void fsnotify_link_count(struct inode *inode) */ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) { - inode_dir_notify(inode, DN_CREATE); inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name, dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); + + fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); } /* @@ -115,11 +158,12 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) */ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) { - inode_dir_notify(dir, DN_CREATE); inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name, inode); fsnotify_link_count(inode); audit_inode_child(new_dentry->d_name.name, new_dentry, dir); + + fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0); } /* @@ -127,10 +171,13 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct */ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { - inode_dir_notify(inode, DN_CREATE); - inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, - dentry->d_name.name, dentry->d_inode); + __u32 mask = (FS_CREATE | FS_IN_ISDIR); + struct inode *d_inode = dentry->d_inode; + + inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); + + fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0); } /* @@ -139,14 +186,15 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) static inline void fsnotify_access(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_ACCESS; + __u32 mask = FS_ACCESS; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; - dnotify_parent(dentry, DN_ACCESS); - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify_parent(dentry, mask); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -155,14 +203,15 @@ static inline void fsnotify_access(struct dentry *dentry) static inline void fsnotify_modify(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_MODIFY; + __u32 mask = FS_MODIFY; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; - dnotify_parent(dentry, DN_MODIFY); - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify_parent(dentry, mask); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -171,13 +220,15 @@ static inline void fsnotify_modify(struct dentry *dentry) static inline void fsnotify_open(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_OPEN; + __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify_parent(dentry, mask); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -187,15 +238,16 @@ static inline void fsnotify_close(struct file *file) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - const char *name = dentry->d_name.name; fmode_t mode = file->f_mode; - u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE; + __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; - inotify_dentry_parent_queue_event(dentry, mask, 0, name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify_parent(dentry, mask); + fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0); } /* @@ -204,13 +256,15 @@ static inline void fsnotify_close(struct file *file) static inline void fsnotify_xattr(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_ATTRIB; + __u32 mask = FS_ATTRIB; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; - inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify_parent(dentry, mask); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } /* @@ -220,50 +274,37 @@ static inline void fsnotify_xattr(struct dentry *dentry) static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { struct inode *inode = dentry->d_inode; - int dn_mask = 0; - u32 in_mask = 0; + __u32 mask = 0; + + if (ia_valid & ATTR_UID) + mask |= FS_ATTRIB; + if (ia_valid & ATTR_GID) + mask |= FS_ATTRIB; + if (ia_valid & ATTR_SIZE) + mask |= FS_MODIFY; - if (ia_valid & ATTR_UID) { - in_mask |= IN_ATTRIB; - dn_mask |= DN_ATTRIB; - } - if (ia_valid & ATTR_GID) { - in_mask |= IN_ATTRIB; - dn_mask |= DN_ATTRIB; - } - if (ia_valid & ATTR_SIZE) { - in_mask |= IN_MODIFY; - dn_mask |= DN_MODIFY; - } /* both times implies a utime(s) call */ if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) - { - in_mask |= IN_ATTRIB; - dn_mask |= DN_ATTRIB; - } else if (ia_valid & ATTR_ATIME) { - in_mask |= IN_ACCESS; - dn_mask |= DN_ACCESS; - } else if (ia_valid & ATTR_MTIME) { - in_mask |= IN_MODIFY; - dn_mask |= DN_MODIFY; - } - if (ia_valid & ATTR_MODE) { - in_mask |= IN_ATTRIB; - dn_mask |= DN_ATTRIB; - } + mask |= FS_ATTRIB; + else if (ia_valid & ATTR_ATIME) + mask |= FS_ACCESS; + else if (ia_valid & ATTR_MTIME) + mask |= FS_MODIFY; + + if (ia_valid & ATTR_MODE) + mask |= FS_ATTRIB; - if (dn_mask) - dnotify_parent(dentry, dn_mask); - if (in_mask) { + if (mask) { if (S_ISDIR(inode->i_mode)) - in_mask |= IN_ISDIR; - inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); - inotify_dentry_parent_queue_event(dentry, in_mask, 0, - dentry->d_name.name); + mask |= FS_IN_ISDIR; + inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify_parent(dentry, mask); + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0); } } -#ifdef CONFIG_INOTIFY /* inotify helpers */ +#if defined(CONFIG_INOTIFY) || defined(CONFIG_FSNOTIFY) /* notify helpers */ /* * fsnotify_oldname_init - save off the old filename before we change it @@ -281,7 +322,7 @@ static inline void fsnotify_oldname_free(const char *old_name) kfree(old_name); } -#else /* CONFIG_INOTIFY */ +#else /* CONFIG_INOTIFY || CONFIG_FSNOTIFY */ static inline const char *fsnotify_oldname_init(const char *name) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h new file mode 100644 index 00000000000..44848aa830d --- /dev/null +++ b/include/linux/fsnotify_backend.h @@ -0,0 +1,387 @@ +/* + * Filesystem access notification for Linux + * + * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> + */ + +#ifndef __LINUX_FSNOTIFY_BACKEND_H +#define __LINUX_FSNOTIFY_BACKEND_H + +#ifdef __KERNEL__ + +#include <linux/idr.h> /* inotify uses this */ +#include <linux/fs.h> /* struct inode */ +#include <linux/list.h> +#include <linux/path.h> /* struct path */ +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm/atomic.h> + +/* + * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily + * convert between them. dnotify only needs conversion at watch creation + * so no perf loss there. fanotify isn't defined yet, so it can use the + * wholes if it needs more events. + */ +#define FS_ACCESS 0x00000001 /* File was accessed */ +#define FS_MODIFY 0x00000002 /* File was modified */ +#define FS_ATTRIB 0x00000004 /* Metadata changed */ +#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ +#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define FS_OPEN 0x00000020 /* File was opened */ +#define FS_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FS_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FS_CREATE 0x00000100 /* Subfile was created */ +#define FS_DELETE 0x00000200 /* Subfile was deleted */ +#define FS_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FS_MOVE_SELF 0x00000800 /* Self was moved */ + +#define FS_UNMOUNT 0x00002000 /* inode on umount fs */ +#define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FS_IN_IGNORED 0x00008000 /* last inotify event here */ + +#define FS_IN_ISDIR 0x40000000 /* event occurred against dir */ +#define FS_IN_ONESHOT 0x80000000 /* only send event once */ + +#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ + +/* This inode cares about things that happen to its children. Always set for + * dnotify and inotify. */ +#define FS_EVENT_ON_CHILD 0x08000000 + +/* This is a list of all events that may get sent to a parernt based on fs event + * happening to inodes inside that directory */ +#define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ + FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ + FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ + FS_DELETE) + +/* listeners that hard code group numbers near the top */ +#define DNOTIFY_GROUP_NUM UINT_MAX +#define INOTIFY_GROUP_NUM (DNOTIFY_GROUP_NUM-1) + +struct fsnotify_group; +struct fsnotify_event; +struct fsnotify_mark_entry; +struct fsnotify_event_private_data; + +/* + * Each group much define these ops. The fsnotify infrastructure will call + * these operations for each relevant group. + * + * should_send_event - given a group, inode, and mask this function determines + * if the group is interested in this event. + * handle_event - main call for a group to handle an fs event + * free_group_priv - called when a group refcnt hits 0 to clean up the private union + * freeing-mark - this means that a mark has been flagged to die when everything + * finishes using it. The function is supplied with what must be a + * valid group and inode to use to clean up. + */ +struct fsnotify_ops { + bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, __u32 mask); + int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event); + void (*free_group_priv)(struct fsnotify_group *group); + void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group); + void (*free_event_priv)(struct fsnotify_event_private_data *priv); +}; + +/* + * A group is a "thing" that wants to receive notification about filesystem + * events. The mask holds the subset of event types this group cares about. + * refcnt on a group is up to the implementor and at any moment if it goes 0 + * everything will be cleaned up. + */ +struct fsnotify_group { + /* + * global list of all groups receiving events from fsnotify. + * anchored by fsnotify_groups and protected by either fsnotify_grp_mutex + * or fsnotify_grp_srcu depending on write vs read. + */ + struct list_head group_list; + + /* + * Defines all of the event types in which this group is interested. + * This mask is a bitwise OR of the FS_* events from above. Each time + * this mask changes for a group (if it changes) the correct functions + * must be called to update the global structures which indicate global + * interest in event types. + */ + __u32 mask; + + /* + * How the refcnt is used is up to each group. When the refcnt hits 0 + * fsnotify will clean up all of the resources associated with this group. + * As an example, the dnotify group will always have a refcnt=1 and that + * will never change. Inotify, on the other hand, has a group per + * inotify_init() and the refcnt will hit 0 only when that fd has been + * closed. + */ + atomic_t refcnt; /* things with interest in this group */ + unsigned int group_num; /* simply prevents accidental group collision */ + + const struct fsnotify_ops *ops; /* how this group handles things */ + + /* needed to send notification to userspace */ + struct mutex notification_mutex; /* protect the notification_list */ + struct list_head notification_list; /* list of event_holder this group needs to send to userspace */ + wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */ + unsigned int q_len; /* events on the queue */ + unsigned int max_events; /* maximum events allowed on the list */ + + /* stores all fastapth entries assoc with this group so they can be cleaned on unregister */ + spinlock_t mark_lock; /* protect mark_entries list */ + atomic_t num_marks; /* 1 for each mark entry and 1 for not being + * past the point of no return when freeing + * a group */ + struct list_head mark_entries; /* all inode mark entries for this group */ + + /* prevents double list_del of group_list. protected by global fsnotify_grp_mutex */ + bool on_group_list; + + /* groups can define private fields here or use the void *private */ + union { + void *private; +#ifdef CONFIG_INOTIFY_USER + struct inotify_group_private_data { + spinlock_t idr_lock; + struct idr idr; + u32 last_wd; + struct fasync_struct *fa; /* async notification */ + struct user_struct *user; + } inotify_data; +#endif + }; +}; + +/* + * A single event can be queued in multiple group->notification_lists. + * + * each group->notification_list will point to an event_holder which in turns points + * to the actual event that needs to be sent to userspace. + * + * Seemed cheaper to create a refcnt'd event and a small holder for every group + * than create a different event for every group + * + */ +struct fsnotify_event_holder { + struct fsnotify_event *event; + struct list_head event_list; +}; + +/* + * Inotify needs to tack data onto an event. This struct lets us later find the + * correct private data of the correct group. + */ +struct fsnotify_event_private_data { + struct fsnotify_group *group; + struct list_head event_list; +}; + +/* + * all of the information about the original object we want to now send to + * a group. If you want to carry more info from the accessing task to the + * listener this structure is where you need to be adding fields. + */ +struct fsnotify_event { + /* + * If we create an event we are also likely going to need a holder + * to link to a group. So embed one holder in the event. Means only + * one allocation for the common case where we only have one group + */ + struct fsnotify_event_holder holder; + spinlock_t lock; /* protection for the associated event_holder and private_list */ + /* to_tell may ONLY be dereferenced during handle_event(). */ + struct inode *to_tell; /* either the inode the event happened to or its parent */ + /* + * depending on the event type we should have either a path or inode + * We hold a reference on path, but NOT on inode. Since we have the ref on + * the path, it may be dereferenced at any point during this object's + * lifetime. That reference is dropped when this object's refcnt hits + * 0. If this event contains an inode instead of a path, the inode may + * ONLY be used during handle_event(). + */ + union { + struct path path; + struct inode *inode; + }; +/* when calling fsnotify tell it if the data is a path or inode */ +#define FSNOTIFY_EVENT_NONE 0 +#define FSNOTIFY_EVENT_PATH 1 +#define FSNOTIFY_EVENT_INODE 2 +#define FSNOTIFY_EVENT_FILE 3 + int data_type; /* which of the above union we have */ + atomic_t refcnt; /* how many groups still are using/need to send this event */ + __u32 mask; /* the type of access, bitwise OR for FS_* event types */ + + u32 sync_cookie; /* used to corrolate events, namely inotify mv events */ + char *file_name; + size_t name_len; + + struct list_head private_data_list; /* groups can store private data here */ +}; + +/* + * a mark is simply an entry attached to an in core inode which allows an + * fsnotify listener to indicate they are either no longer interested in events + * of a type matching mask or only interested in those events. + * + * these are flushed when an inode is evicted from core and may be flushed + * when the inode is modified (as seen by fsnotify_access). Some fsnotify users + * (such as dnotify) will flush these when the open fd is closed and not at + * inode eviction or modification. + */ +struct fsnotify_mark_entry { + __u32 mask; /* mask this mark entry is for */ + /* we hold ref for each i_list and g_list. also one ref for each 'thing' + * in kernel that found and may be using this mark. */ + atomic_t refcnt; /* active things looking at this mark */ + struct inode *inode; /* inode this entry is associated with */ + struct fsnotify_group *group; /* group this mark entry is for */ + struct hlist_node i_list; /* list of mark_entries by inode->i_fsnotify_mark_entries */ + struct list_head g_list; /* list of mark_entries by group->i_fsnotify_mark_entries */ + spinlock_t lock; /* protect group, inode, and killme */ + struct list_head free_i_list; /* tmp list used when freeing this mark */ + struct list_head free_g_list; /* tmp list used when freeing this mark */ + void (*free_mark)(struct fsnotify_mark_entry *entry); /* called on final put+free */ +}; + +#ifdef CONFIG_FSNOTIFY + +/* called from the vfs helpers */ + +/* main fsnotify call to send events */ +extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + const char *name, u32 cookie); +extern void __fsnotify_parent(struct dentry *dentry, __u32 mask); +extern void __fsnotify_inode_delete(struct inode *inode); +extern u32 fsnotify_get_cookie(void); + +static inline int fsnotify_inode_watches_children(struct inode *inode) +{ + /* FS_EVENT_ON_CHILD is set if the inode may care */ + if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD)) + return 0; + /* this inode might care about child events, does it care about the + * specific set of events that can happen on a child? */ + return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD; +} + +/* + * Update the dentry with a flag indicating the interest of its parent to receive + * filesystem events when those events happens to this dentry->d_inode. + */ +static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) +{ + struct dentry *parent; + + assert_spin_locked(&dcache_lock); + assert_spin_locked(&dentry->d_lock); + + parent = dentry->d_parent; + if (fsnotify_inode_watches_children(parent->d_inode)) + dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; + else + dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; +} + +/* + * fsnotify_d_instantiate - instantiate a dentry for inode + * Called with dcache_lock held. + */ +static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) +{ + if (!inode) + return; + + assert_spin_locked(&dcache_lock); + + spin_lock(&dentry->d_lock); + __fsnotify_update_dcache_flags(dentry); + spin_unlock(&dentry->d_lock); +} + +/* called from fsnotify listeners, such as fanotify or dnotify */ + +/* must call when a group changes its ->mask */ +extern void fsnotify_recalc_global_mask(void); +/* get a reference to an existing or create a new group */ +extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, + __u32 mask, + const struct fsnotify_ops *ops); +/* run all marks associated with this group and update group->mask */ +extern void fsnotify_recalc_group_mask(struct fsnotify_group *group); +/* drop reference on a group from fsnotify_obtain_group */ +extern void fsnotify_put_group(struct fsnotify_group *group); + +/* take a reference to an event */ +extern void fsnotify_get_event(struct fsnotify_event *event); +extern void fsnotify_put_event(struct fsnotify_event *event); +/* find private data previously attached to an event and unlink it */ +extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, + struct fsnotify_event *event); + +/* attach the event to the group notification queue */ +extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, + struct fsnotify_event_private_data *priv); +/* true if the group notification queue is empty */ +extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); +/* return, but do not dequeue the first event on the notification queue */ +extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group); +/* return AND dequeue the first event on the notification queue */ +extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group); + +/* functions used to manipulate the marks attached to inodes */ + +/* run all marks associated with an inode and update inode->i_fsnotify_mask */ +extern void fsnotify_recalc_inode_mask(struct inode *inode); +extern void fsnotify_init_mark(struct fsnotify_mark_entry *entry, void (*free_mark)(struct fsnotify_mark_entry *entry)); +/* find (and take a reference) to a mark associated with group and inode */ +extern struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode); +/* attach the mark to both the group and the inode */ +extern int fsnotify_add_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group, struct inode *inode); +/* given a mark, flag it to be freed when all references are dropped */ +extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry); +/* run all the marks in a group, and flag them to be freed */ +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); +extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry); +extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry); +extern void fsnotify_unmount_inodes(struct list_head *list); + +/* put here because inotify does some weird stuff when destroying watches */ +extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, + void *data, int data_is, const char *name, + u32 cookie); + +#else + +static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + const char *name, u32 cookie) +{} + +static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask) +{} + +static inline void __fsnotify_inode_delete(struct inode *inode) +{} + +static inline void __fsnotify_update_dcache_flags(struct dentry *dentry) +{} + +static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode) +{} + +static inline u32 fsnotify_get_cookie(void) +{ + return 0; +} + +static inline void fsnotify_unmount_inodes(struct list_head *list) +{} + +#endif /* CONFIG_FSNOTIFY */ + +#endif /* __KERNEL __ */ + +#endif /* __LINUX_FSNOTIFY_BACKEND_H */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8a0c2f221e6..39b95c56587 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); -extern void ftrace_release(void *start, unsigned long size); - extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else @@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } -static inline void -ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) { } #endif /* @@ -368,6 +361,7 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; + unsigned long long subtime; }; /* @@ -379,8 +373,6 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); -extern void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); /* * Sometimes we don't want to trace a function with the function @@ -496,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) extern int ftrace_dump_on_oops; +#ifdef CONFIG_PREEMPT +#define INIT_TRACE_RECURSION .trace_recursion = 0, +#endif + #endif /* CONFIG_TRACING */ +#ifndef INIT_TRACE_RECURSION +#define INIT_TRACE_RECURSION +#endif #ifdef CONFIG_HW_BRANCH_TRACER diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h new file mode 100644 index 00000000000..5c093ffc655 --- /dev/null +++ b/include/linux/ftrace_event.h @@ -0,0 +1,172 @@ +#ifndef _LINUX_FTRACE_EVENT_H +#define _LINUX_FTRACE_EVENT_H + +#include <linux/trace_seq.h> +#include <linux/ring_buffer.h> +#include <linux/percpu.h> + +struct trace_array; +struct tracer; +struct dentry; + +DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq); + +struct trace_print_flags { + unsigned long mask; + const char *name; +}; + +const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array); + +const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array); + +/* + * The trace entry - the most basic unit of tracing. This is what + * is printed in the end as a single line in the trace output, such as: + * + * bash-15816 [01] 235.197585: idle_cpu <- irq_enter + */ +struct trace_entry { + unsigned short type; + unsigned char flags; + unsigned char preempt_count; + int pid; + int tgid; +}; + +#define FTRACE_MAX_EVENT \ + ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) + +/* + * Trace iterator - used by printout routines who present trace + * results to users and which routines might sleep, etc: + */ +struct trace_iterator { + struct trace_array *tr; + struct tracer *trace; + void *private; + int cpu_file; + struct mutex mutex; + struct ring_buffer_iter *buffer_iter[NR_CPUS]; + unsigned long iter_flags; + + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; + struct trace_entry *ent; + int cpu; + u64 ts; + + loff_t pos; + long idx; + + cpumask_var_t started; +}; + + +typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, + int flags); +struct trace_event { + struct hlist_node node; + struct list_head list; + int type; + trace_print_func trace; + trace_print_func raw; + trace_print_func hex; + trace_print_func binary; +}; + +extern int register_ftrace_event(struct trace_event *event); +extern int unregister_ftrace_event(struct trace_event *event); + +/* Return values for print_line callback */ +enum print_line_t { + TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ + TRACE_TYPE_HANDLED = 1, + TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ + TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ +}; + + +struct ring_buffer_event * +trace_current_buffer_lock_reserve(int type, unsigned long len, + unsigned long flags, int pc); +void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_current_buffer_discard_commit(struct ring_buffer_event *event); + +void tracing_record_cmdline(struct task_struct *tsk); + +struct ftrace_event_call { + struct list_head list; + char *name; + char *system; + struct dentry *dir; + struct trace_event *event; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + int (*raw_init)(void); + int (*show_format)(struct trace_seq *s); + int (*define_fields)(void); + struct list_head fields; + int filter_active; + void *filter; + void *mod; + +#ifdef CONFIG_EVENT_PROFILE + atomic_t profile_count; + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); +#endif +}; + +#define MAX_FILTER_PRED 32 +#define MAX_FILTER_STR_VAL 128 + +extern int init_preds(struct ftrace_event_call *call); +extern void destroy_preds(struct ftrace_event_call *call); +extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern int filter_current_check_discard(struct ftrace_event_call *call, + void *rec, + struct ring_buffer_event *event); + +extern int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size, int is_signed); + +#define is_signed_type(type) (((type)(-1)) < 0) + +int trace_set_clr_event(const char *system, const char *event, int set); + +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement optimizing out. + */ +#define event_trace_printk(ip, fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + tracing_record_cmdline(current); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(ip, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(ip, fmt, ##args); \ +} while (0) + +#define __common_field(type, item, is_signed) \ + ret = trace_define_field(event_call, #type, "common_" #item, \ + offsetof(typeof(field.ent), item), \ + sizeof(field.ent.item), is_signed); \ + if (ret) \ + return ret; + +#endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/linux/futex.h b/include/linux/futex.h index 3bf5bb5a34f..34956c8fdeb 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -23,6 +23,8 @@ union ktime; #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -38,6 +40,10 @@ union ktime; #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) /* * Support for robust futexes: the kernel cleans up held futexes at diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a1a28caed23..149fda264c8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -90,6 +90,7 @@ struct disk_stats { struct hd_struct { sector_t start_sect; sector_t nr_sects; + sector_t alignment_offset; struct device __dev; struct kobject *holder_dir; int policy, partno; diff --git a/include/linux/i7300_idle.h b/include/linux/i7300_idle.h index 05a80c44513..1587b7dec50 100644 --- a/include/linux/i7300_idle.h +++ b/include/linux/i7300_idle.h @@ -16,35 +16,33 @@ struct fbd_ioat { unsigned int vendor; unsigned int ioat_dev; + unsigned int enabled; }; /* * The i5000 chip-set has the same hooks as the i7300 - * but support is disabled by default because this driver - * has not been validated on that platform. + * but it is not enabled by default and must be manually + * manually enabled with "forceload=1" because it is + * only lightly validated. */ -#define SUPPORT_I5000 0 static const struct fbd_ioat fbd_ioat_list[] = { - {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB}, -#if SUPPORT_I5000 - {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT}, -#endif + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB, 1}, + {PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT, 0}, {0, 0} }; /* table of devices that work with this driver */ static const struct pci_device_id pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_FBD_CNB) }, -#if SUPPORT_I5000 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5000_ERR) }, -#endif { } /* Terminating entry */ }; /* Check for known platforms with I/O-AT */ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev, - struct pci_dev **ioat_dev) + struct pci_dev **ioat_dev, + int enable_all) { int i; struct pci_dev *memdev, *dmadev; @@ -69,6 +67,8 @@ static inline int i7300_idle_platform_probe(struct pci_dev **fbd_dev, for (i = 0; fbd_ioat_list[i].vendor != 0; i++) { if (dmadev->vendor == fbd_ioat_list[i].vendor && dmadev->device == fbd_ioat_list[i].ioat_dev) { + if (!(fbd_ioat_list[i].enabled || enable_all)) + continue; if (fbd_dev) *fbd_dev = memdev; if (ioat_dev) diff --git a/include/linux/ide.h b/include/linux/ide.h index 9fed365a598..867cb68d846 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -26,6 +26,9 @@ #include <asm/io.h> #include <asm/mutex.h> +/* for request_sense */ +#include <linux/cdrom.h> + #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) # define SUPPORT_VLB_SYNC 0 #else @@ -324,7 +327,6 @@ struct ide_cmd { unsigned int cursg_ofs; struct request *rq; /* copy of request */ - void *special; /* valid_t generally */ }; /* ATAPI packet command flags */ @@ -360,11 +362,7 @@ struct ide_atapi_pc { /* data buffer */ u8 *buf; - /* current buffer position */ - u8 *cur_pos; int buf_size; - /* missing/available data on the current buffer */ - int b_count; /* the corresponding request */ struct request *rq; @@ -377,10 +375,6 @@ struct ide_atapi_pc { */ u8 pc_buf[IDE_PC_BUFFER_SIZE]; - /* idetape only */ - struct idetape_bh *bh; - char *b_data; - unsigned long timeout; }; @@ -593,16 +587,16 @@ struct ide_drive_s { /* callback for packet commands */ int (*pc_callback)(struct ide_drive_s *, int); - void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *); - int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, - unsigned int, int); - ide_startstop_t (*irq_handler)(struct ide_drive_s *); unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; + + /* current sense rq and buffer */ + bool sense_rq_armed; + struct request sense_rq; + struct request_sense sense_data; }; typedef struct ide_drive_s ide_drive_t; @@ -1174,7 +1168,10 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); -void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_retry_pc(ide_drive_t *drive); + +void ide_prep_sense(ide_drive_t *drive, struct request *rq); +int ide_queue_sense_rq(ide_drive_t *drive, void *special); int ide_cd_expiry(ide_drive_t *); diff --git a/include/linux/ima.h b/include/linux/ima.h index 0e2aa45cb0c..b1b827d091a 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -13,14 +13,17 @@ #include <linux/fs.h> struct linux_binprm; +#define IMA_COUNT_UPDATE 1 +#define IMA_COUNT_LEAVE 0 + #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct path *path, int mask); +extern int ima_path_check(struct path *path, int mask, int update_counts); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); -extern void ima_shm_check(struct file *file); +extern void ima_counts_get(struct file *file); #else static inline int ima_bprm_check(struct linux_binprm *bprm) @@ -38,7 +41,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct path *path, int mask) +static inline int ima_path_check(struct path *path, int mask, int update_counts) { return 0; } @@ -53,7 +56,7 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot) return 0; } -static inline void ima_shm_check(struct file *file) +static inline void ima_counts_get(struct file *file) { return; } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d87247d2641..28b1f30601b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -108,6 +108,15 @@ extern struct group_info init_groups; extern struct cred init_cred; +#ifdef CONFIG_PERF_COUNTERS +# define INIT_PERF_COUNTERS(tsk) \ + .perf_counter_mutex = \ + __MUTEX_INITIALIZER(tsk.perf_counter_mutex), \ + .perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list), +#else +# define INIT_PERF_COUNTERS(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -145,8 +154,8 @@ extern struct cred init_cred; .group_leader = &tsk, \ .real_cred = &init_cred, \ .cred = &init_cred, \ - .cred_exec_mutex = \ - __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ + .cred_guard_mutex = \ + __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ @@ -171,9 +180,11 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ + INIT_PERF_COUNTERS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ + INIT_TRACE_RECURSION \ } diff --git a/include/linux/input.h b/include/linux/input.h index 0e6ff5de358..6fed4f6a9c9 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -656,6 +656,7 @@ struct input_absinfo { #define ABS_MT_POSITION_Y 0x36 /* Center Y ellipse position */ #define ABS_MT_TOOL_TYPE 0x37 /* Type of touching device */ #define ABS_MT_BLOB_ID 0x38 /* Group a set of packets as a blob */ +#define ABS_MT_TRACKING_ID 0x39 /* Unique ID of initiated contact */ #define ABS_MAX 0x3f #define ABS_CNT (ABS_MAX+1) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 91bb76f44f1..ff374ceface 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -566,6 +566,6 @@ struct irq_desc; extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -extern int arch_init_chip_data(struct irq_desc *desc, int cpu); +extern int arch_init_chip_data(struct irq_desc *desc, int node); #endif diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 08b987bccf8..dd05434fa45 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -64,7 +64,7 @@ struct cfq_io_context { * and kmalloc'ed. These could be shared between processes. */ struct io_context { - atomic_t refcount; + atomic_long_t refcount; atomic_t nr_tasks; /* all the fields below are protected by this lock */ @@ -91,8 +91,8 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) * if ref count is zero, don't allow sharing (ioc is going away, it's * a race). */ - if (ioc && atomic_inc_not_zero(&ioc->refcount)) { - atomic_inc(&ioc->nr_tasks); + if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) { + atomic_long_inc(&ioc->refcount); return ioc; } diff --git a/include/linux/irq.h b/include/linux/irq.h index b7cbeed972e..1e50c34f006 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -117,7 +117,7 @@ struct irq_chip { void (*eoi)(unsigned int irq); void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, + int (*set_affinity)(unsigned int irq, const struct cpumask *dest); int (*retrigger)(unsigned int irq); int (*set_type)(unsigned int irq, unsigned int flow_type); @@ -187,7 +187,7 @@ struct irq_desc { spinlock_t lock; #ifdef CONFIG_SMP cpumask_var_t affinity; - unsigned int cpu; + unsigned int node; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif @@ -201,26 +201,23 @@ struct irq_desc { } ____cacheline_internodealigned_in_smp; extern void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu); + struct irq_desc *desc, int node); extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; -#else /* CONFIG_SPARSE_IRQ */ -extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); -#endif /* CONFIG_SPARSE_IRQ */ - -extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); +#endif -static inline struct irq_desc * -irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) -{ -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - return irq_to_desc(irq); +#ifdef CONFIG_NUMA_IRQ_DESC +extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); #else +static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) +{ return desc; -#endif } +#endif + +extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); /* * Migration helpers for obsolete names, they will go away: @@ -386,7 +383,7 @@ extern void set_irq_noprobe(unsigned int irq); extern void set_irq_probe(unsigned int irq); /* Handle dynamic irq creation and destruction */ -extern unsigned int create_irq_nr(unsigned int irq_want); +extern unsigned int create_irq_nr(unsigned int irq_want, int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); @@ -424,47 +421,44 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #ifdef CONFIG_SMP /** - * init_alloc_desc_masks - allocate cpumasks for irq_desc + * alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). - * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, - bool boot) +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, + bool boot) { - int node; - - if (boot) { - alloc_bootmem_cpumask_var(&desc->affinity); - cpumask_setall(desc->affinity); - -#ifdef CONFIG_GENERIC_PENDING_IRQ - alloc_bootmem_cpumask_var(&desc->pending_mask); - cpumask_clear(desc->pending_mask); -#endif - return true; - } + gfp_t gfp = GFP_ATOMIC; - node = cpu_to_node(cpu); + if (boot) + gfp = GFP_NOWAIT; - if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) +#ifdef CONFIG_CPUMASK_OFFSTACK + if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) return false; - cpumask_setall(desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ - if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { + if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { free_cpumask_var(desc->affinity); return false; } - cpumask_clear(desc->pending_mask); +#endif #endif return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ + cpumask_setall(desc->affinity); +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_clear(desc->pending_mask); +#endif +} + /** * init_copy_desc_masks - copy cpumasks for irq_desc * @old_desc: pointer to old irq_desc struct @@ -478,7 +472,7 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { -#ifdef CONFIG_CPUMASKS_OFFSTACK +#ifdef CONFIG_CPUMASK_OFFSTACK cpumask_copy(new_desc->affinity, old_desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -499,12 +493,16 @@ static inline void free_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ +} + static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 0c8b89f28a9..a77c6007dc9 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -81,7 +81,12 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } + +/* + * Lock/unlock the current runqueue - to extract task statistics: + */ extern unsigned long long task_delta_exec(struct task_struct *); + extern void account_user_time(struct task_struct *, cputime_t, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); extern void account_steal_time(cputime_t); diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h new file mode 100644 index 00000000000..7796aed6cdd --- /dev/null +++ b/include/linux/kmemleak.h @@ -0,0 +1,96 @@ +/* + * include/linux/kmemleak.h + * + * Copyright (C) 2008 ARM Limited + * Written by Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __KMEMLEAK_H +#define __KMEMLEAK_H + +#ifdef CONFIG_DEBUG_KMEMLEAK + +extern void kmemleak_init(void); +extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp); +extern void kmemleak_free(const void *ptr); +extern void kmemleak_padding(const void *ptr, unsigned long offset, + size_t size); +extern void kmemleak_not_leak(const void *ptr); +extern void kmemleak_ignore(const void *ptr); +extern void kmemleak_scan_area(const void *ptr, unsigned long offset, + size_t length, gfp_t gfp); +extern void kmemleak_no_scan(const void *ptr); + +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, unsigned long flags, + gfp_t gfp) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_alloc(ptr, size, min_count, gfp); +} + +static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) +{ + if (!(flags & SLAB_NOLEAKTRACE)) + kmemleak_free(ptr); +} + +static inline void kmemleak_erase(void **ptr) +{ + *ptr = NULL; +} + +#else + +static inline void kmemleak_init(void) +{ +} +static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count, + gfp_t gfp) +{ +} +static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, + int min_count, unsigned long flags, + gfp_t gfp) +{ +} +static inline void kmemleak_free(const void *ptr) +{ +} +static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) +{ +} +static inline void kmemleak_not_leak(const void *ptr) +{ +} +static inline void kmemleak_ignore(const void *ptr) +{ +} +static inline void kmemleak_scan_area(const void *ptr, unsigned long offset, + size_t length, gfp_t gfp) +{ +} +static inline void kmemleak_erase(void **ptr) +{ +} +static inline void kmemleak_no_scan(const void *ptr) +{ +} + +#endif /* CONFIG_DEBUG_KMEMLEAK */ + +#endif /* __KMEMLEAK_H */ diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h new file mode 100644 index 00000000000..b616d3930c3 --- /dev/null +++ b/include/linux/kmemtrace.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include <trace/events/kmem.h> + +#ifdef CONFIG_KMEMTRACE +extern void kmemtrace_init(void); +#else +static inline void kmemtrace_init(void) +{ +} +#endif + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8cc137911b3..3db5d8d3748 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -119,7 +119,7 @@ struct kvm_run { __u32 error_code; } ex; /* KVM_EXIT_IO */ - struct kvm_io { + struct { #define KVM_EXIT_IO_IN 0 #define KVM_EXIT_IO_OUT 1 __u8 direction; @@ -224,10 +224,10 @@ struct kvm_interrupt { /* for KVM_GET_DIRTY_LOG */ struct kvm_dirty_log { __u32 slot; - __u32 padding; + __u32 padding1; union { void __user *dirty_bitmap; /* one bit per page */ - __u64 padding; + __u64 padding2; }; }; @@ -409,6 +409,10 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 #endif +#ifdef __KVM_HAVE_MSIX +#define KVM_CAP_DEVICE_MSIX 28 +#endif +#define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 @@ -482,11 +486,18 @@ struct kvm_irq_routing { #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) +/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) +#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_SET_MSIX_NR \ + _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) +#define KVM_ASSIGN_SET_MSIX_ENTRY \ + _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) +#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) /* * ioctls for vcpu fds @@ -577,6 +588,8 @@ struct kvm_debug_guest { #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + struct kvm_assigned_pci_dev { __u32 assigned_dev_id; __u32 busnr; @@ -587,6 +600,17 @@ struct kvm_assigned_pci_dev { }; }; +#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +#define KVM_DEV_IRQ_HOST_MASK 0x00ff +#define KVM_DEV_IRQ_GUEST_MASK 0xff00 + struct kvm_assigned_irq { __u32 assigned_dev_id; __u32 host_irq; @@ -602,9 +626,19 @@ struct kvm_assigned_irq { }; }; -#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI -#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 512 +struct kvm_assigned_msix_entry { + __u32 assigned_dev_id; + __u32 gsi; + __u16 entry; /* The index of entry in the MSI-X table */ + __u16 padding[3]; +}; #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 894a56e365e..aacc5449f58 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -38,6 +38,7 @@ #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 #define KVM_REQ_KVMCLOCK_UPDATE 8 +#define KVM_REQ_KICK 9 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 @@ -72,7 +73,6 @@ struct kvm_vcpu { struct mutex mutex; int cpu; struct kvm_run *run; - int guest_mode; unsigned long requests; unsigned long guest_debug; int fpu_active; @@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); void kvm_free_physmem(struct kvm *kvm); @@ -319,6 +320,13 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; +#define KVM_ASSIGNED_MSIX_PENDING 0x1 +struct kvm_guest_msix_entry { + u32 vector; + u16 entry; + u16 flags; +}; + struct kvm_assigned_dev_kernel { struct kvm_irq_ack_notifier ack_notifier; struct work_struct interrupt_work; @@ -326,18 +334,18 @@ struct kvm_assigned_dev_kernel { int assigned_dev_id; int host_busnr; int host_devfn; + unsigned int entries_nr; int host_irq; bool host_irq_disabled; + struct msix_entry *host_msix_entries; int guest_irq; -#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) -#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) -#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) -#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) + struct kvm_guest_msix_entry *guest_msix_entries; unsigned long irq_requested_type; int irq_source_id; int flags; struct pci_dev *dev; struct kvm *kvm; + spinlock_t assigned_dev_lock; }; struct kvm_irq_mask_notifier { @@ -360,6 +368,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); +/* For vcpu->arch.iommu_flags */ +#define KVM_IOMMU_CACHE_COHERENCY 0x1 + #ifdef CONFIG_IOMMU_API int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 2b8318c83e5..fb46efbeabe 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -40,4 +40,31 @@ typedef unsigned long hfn_t; typedef hfn_t pfn_t; +union kvm_ioapic_redirect_entry { + u64 bits; + struct { + u8 vector; + u8 delivery_mode:3; + u8 dest_mode:1; + u8 delivery_status:1; + u8 polarity:1; + u8 remote_irr:1; + u8 trig_mode:1; + u8 mask:1; + u8 reserve:7; + u8 reserved[4]; + u8 dest_id; + } fields; +}; + +struct kvm_lapic_irq { + u32 vector; + u32 delivery_mode; + u32 dest_mode; + u32 level; + u32 trig_mode; + u32 shorthand; + u32 dest_id; +}; + #endif /* __KVM_TYPES_H__ */ diff --git a/include/linux/loop.h b/include/linux/loop.h index 40725447f5e..66c194e2d9b 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -56,8 +56,7 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; - struct bio *lo_bio; - struct bio *lo_biotail; + struct bio_list lo_bio_list; int lo_state; struct mutex lo_ctl_mutex; struct task_struct *lo_thread; diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h new file mode 100644 index 00000000000..e461b2c3d71 --- /dev/null +++ b/include/linux/lsm_audit.h @@ -0,0 +1,111 @@ +/* + * Common LSM logging functions + * Heavily borrowed from selinux/avc.h + * + * Author : Etienne BASSET <etienne.basset@ensta.org> + * + * All credits to : Stephen Smalley, <sds@epoch.ncsc.mil> + * All BUGS to : Etienne BASSET <etienne.basset@ensta.org> + */ +#ifndef _LSM_COMMON_LOGGING_ +#define _LSM_COMMON_LOGGING_ + +#include <linux/stddef.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kdev_t.h> +#include <linux/spinlock.h> +#include <linux/init.h> +#include <linux/audit.h> +#include <linux/in6.h> +#include <linux/path.h> +#include <linux/key.h> +#include <linux/skbuff.h> +#include <asm/system.h> + + +/* Auxiliary data to use in generating the audit record. */ +struct common_audit_data { + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 + struct task_struct *tsk; + union { + struct { + struct path path; + struct inode *inode; + } fs; + struct { + int netif; + struct sock *sk; + u16 family; + __be16 dport; + __be16 sport; + union { + struct { + __be32 daddr; + __be32 saddr; + } v4; + struct { + struct in6_addr daddr; + struct in6_addr saddr; + } v6; + } fam; + } net; + int cap; + int ipc_id; + struct task_struct *tsk; +#ifdef CONFIG_KEYS + struct { + key_serial_t key; + char *key_desc; + } key_struct; +#endif + } u; + const char *function; + /* this union contains LSM specific data */ + union { + /* SMACK data */ + struct smack_audit_data { + char *subject; + char *object; + char *request; + int result; + } smack_audit_data; + /* SELinux data */ + struct { + u32 ssid; + u32 tsid; + u16 tclass; + u32 requested; + u32 audited; + struct av_decision *avd; + int result; + } selinux_audit_data; + } lsm_priv; + /* these callback will be implemented by a specific LSM */ + void (*lsm_pre_audit)(struct audit_buffer *, void *); + void (*lsm_post_audit)(struct audit_buffer *, void *); +}; + +#define v4info fam.v4 +#define v6info fam.v6 + +int ipv4_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +int ipv6_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +/* Initialize an LSM audit data structure. */ +#define COMMON_AUDIT_DATA_INIT(_d, _t) \ + { memset((_d), 0, sizeof(struct common_audit_data)); \ + (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; } + +void common_lsm_audit(struct common_audit_data *a); + +#endif diff --git a/include/linux/magic.h b/include/linux/magic.h index 5b4e28bcb78..1923327b986 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -6,9 +6,12 @@ #define AFS_SUPER_MAGIC 0x5346414F #define AUTOFS_SUPER_MAGIC 0x0187 #define CODA_SUPER_MAGIC 0x73757245 +#define CRAMFS_MAGIC 0x28cd3d45 /* some random number */ +#define CRAMFS_MAGIC_WEND 0x453dcd28 /* magic number with the wrong endianess */ #define DEBUGFS_MAGIC 0x64626720 #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 +#define SELINUX_MAGIC 0xf97cff8c #define TMPFS_MAGIC 0x01021994 #define SQUASHFS_MAGIC 0x73717368 #define EFS_SUPER_MAGIC 0x414A53 diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h deleted file mode 100644 index 1f76b1ebf62..00000000000 --- a/include/linux/mg_disk.h +++ /dev/null @@ -1,206 +0,0 @@ -/* - * include/linux/mg_disk.c - * - * Support for the mGine m[g]flash IO mode. - * Based on legacy hd.c - * - * (c) 2008 mGine Co.,LTD - * (c) 2008 unsik Kim <donari75@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __MG_DISK_H__ -#define __MG_DISK_H__ - -#include <linux/blkdev.h> -#include <linux/ata.h> - -/* name for block device */ -#define MG_DISK_NAME "mgd" -/* name for platform device */ -#define MG_DEV_NAME "mg_disk" - -#define MG_DISK_MAJ 0 -#define MG_DISK_MAX_PART 16 -#define MG_SECTOR_SIZE 512 -#define MG_MAX_SECTS 256 - -/* Register offsets */ -#define MG_BUFF_OFFSET 0x8000 -#define MG_STORAGE_BUFFER_SIZE 0x200 -#define MG_REG_OFFSET 0xC000 -#define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ -#define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ -#define MG_REG_SECT_CNT (MG_REG_OFFSET + 4) -#define MG_REG_SECT_NUM (MG_REG_OFFSET + 6) -#define MG_REG_CYL_LOW (MG_REG_OFFSET + 8) -#define MG_REG_CYL_HIGH (MG_REG_OFFSET + 0xA) -#define MG_REG_DRV_HEAD (MG_REG_OFFSET + 0xC) -#define MG_REG_COMMAND (MG_REG_OFFSET + 0xE) /* write case */ -#define MG_REG_STATUS (MG_REG_OFFSET + 0xE) /* read case */ -#define MG_REG_DRV_CTRL (MG_REG_OFFSET + 0x10) -#define MG_REG_BURST_CTRL (MG_REG_OFFSET + 0x12) - -/* "Drive Select/Head Register" bit values */ -#define MG_REG_HEAD_MUST_BE_ON 0xA0 /* These 2 bits are always on */ -#define MG_REG_HEAD_DRIVE_MASTER (0x00 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_DRIVE_SLAVE (0x10 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_LBA_MODE (0x40 | MG_REG_HEAD_MUST_BE_ON) - - -/* "Device Control Register" bit values */ -#define MG_REG_CTRL_INTR_ENABLE 0x0 -#define MG_REG_CTRL_INTR_DISABLE (0x1<<1) -#define MG_REG_CTRL_RESET (0x1<<2) -#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH 0x0 -#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW (0x1<<4) -#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW 0x0 -#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH (0x1<<5) -#define MG_REG_CTRL_DPD_DISABLE 0x0 -#define MG_REG_CTRL_DPD_ENABLE (0x1<<6) - -/* Status register bit */ -/* error bit in status register */ -#define MG_REG_STATUS_BIT_ERROR 0x01 -/* corrected error in status register */ -#define MG_REG_STATUS_BIT_CORRECTED_ERROR 0x04 -/* data request bit in status register */ -#define MG_REG_STATUS_BIT_DATA_REQ 0x08 -/* DSC - Drive Seek Complete */ -#define MG_REG_STATUS_BIT_SEEK_DONE 0x10 -/* DWF - Drive Write Fault */ -#define MG_REG_STATUS_BIT_WRITE_FAULT 0x20 -#define MG_REG_STATUS_BIT_READY 0x40 -#define MG_REG_STATUS_BIT_BUSY 0x80 - -/* handy status */ -#define MG_STAT_READY (MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE) -#define MG_READY_OK(s) (((s) & (MG_STAT_READY | \ - (MG_REG_STATUS_BIT_BUSY | \ - MG_REG_STATUS_BIT_WRITE_FAULT | \ - MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY) - -/* Error register */ -#define MG_REG_ERR_AMNF 0x01 -#define MG_REG_ERR_ABRT 0x04 -#define MG_REG_ERR_IDNF 0x10 -#define MG_REG_ERR_UNC 0x40 -#define MG_REG_ERR_BBK 0x80 - -/* error code for others */ -#define MG_ERR_NONE 0 -#define MG_ERR_TIMEOUT 0x100 -#define MG_ERR_INIT_STAT 0x101 -#define MG_ERR_TRANSLATION 0x102 -#define MG_ERR_CTRL_RST 0x103 -#define MG_ERR_INV_STAT 0x104 -#define MG_ERR_RSTOUT 0x105 - -#define MG_MAX_ERRORS 6 /* Max read/write errors */ - -/* command */ -#define MG_CMD_RD 0x20 -#define MG_CMD_WR 0x30 -#define MG_CMD_SLEEP 0x99 -#define MG_CMD_WAKEUP 0xC3 -#define MG_CMD_ID 0xEC -#define MG_CMD_WR_CONF 0x3C -#define MG_CMD_RD_CONF 0x40 - -/* operation mode */ -#define MG_OP_CASCADE (1 << 0) -#define MG_OP_CASCADE_SYNC_RD (1 << 1) -#define MG_OP_CASCADE_SYNC_WR (1 << 2) -#define MG_OP_INTERLEAVE (1 << 3) - -/* synchronous */ -#define MG_BURST_LAT_4 (3 << 4) -#define MG_BURST_LAT_5 (4 << 4) -#define MG_BURST_LAT_6 (5 << 4) -#define MG_BURST_LAT_7 (6 << 4) -#define MG_BURST_LAT_8 (7 << 4) -#define MG_BURST_LEN_4 (1 << 1) -#define MG_BURST_LEN_8 (2 << 1) -#define MG_BURST_LEN_16 (3 << 1) -#define MG_BURST_LEN_32 (4 << 1) -#define MG_BURST_LEN_CONT (0 << 1) - -/* timeout value (unit: ms) */ -#define MG_TMAX_CONF_TO_CMD 1 -#define MG_TMAX_WAIT_RD_DRQ 10 -#define MG_TMAX_WAIT_WR_DRQ 500 -#define MG_TMAX_RST_TO_BUSY 10 -#define MG_TMAX_HDRST_TO_RDY 500 -#define MG_TMAX_SWRST_TO_RDY 500 -#define MG_TMAX_RSTOUT 3000 - -/* device attribution */ -/* use mflash as boot device */ -#define MG_BOOT_DEV (1 << 0) -/* use mflash as storage device */ -#define MG_STORAGE_DEV (1 << 1) -/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */ -#define MG_STORAGE_DEV_SKIP_RST (1 << 2) - -#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST) - -/* names of GPIO resource */ -#define MG_RST_PIN "mg_rst" -/* except MG_BOOT_DEV, reset-out pin should be assigned */ -#define MG_RSTOUT_PIN "mg_rstout" - -/* private driver data */ -struct mg_drv_data { - /* disk resource */ - u32 use_polling; - - /* device attribution */ - u32 dev_attr; - - /* internally used */ - struct mg_host *host; -}; - -/* main structure for mflash driver */ -struct mg_host { - struct device *dev; - - struct request_queue *breq; - spinlock_t lock; - struct gendisk *gd; - - struct timer_list timer; - void (*mg_do_intr) (struct mg_host *); - - u16 id[ATA_ID_WORDS]; - - u16 cyls; - u16 heads; - u16 sectors; - u32 n_sectors; - u32 nres_sectors; - - void __iomem *dev_base; - unsigned int irq; - unsigned int rst; - unsigned int rstout; - - u32 major; - u32 error; -}; - -/* - * Debugging macro and defines - */ -#undef DO_MG_DEBUG -#ifdef DO_MG_DEBUG -# define MG_DBG(fmt, args...) \ - printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args) -#else /* CONFIG_MG_DEBUG */ -# define MG_DBG(fmt, args...) do { } while (0) -#endif /* CONFIG_MG_DEBUG */ - -#endif diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d475c..ad613ed66ab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -19,6 +19,7 @@ struct anon_vma; struct file_ra_state; struct user_struct; struct writeback_control; +struct rlimit; #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; @@ -580,12 +581,10 @@ static inline void set_page_links(struct page *page, enum zone_type zone, */ static inline unsigned long round_hint_to_min(unsigned long hint) { -#ifdef CONFIG_SECURITY hint &= PAGE_MASK; if (((void *)hint != NULL) && (hint < mmap_min_addr)) return PAGE_ALIGN(mmap_min_addr); -#endif return hint; } @@ -1031,8 +1030,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, - unsigned long end_pfn); extern void remove_all_active_ranges(void); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); @@ -1319,8 +1316,8 @@ int vmemmap_populate_basepages(struct page *start_page, int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); -extern void *alloc_locked_buffer(size_t size); -extern void free_locked_buffer(void *buffer, size_t size); -extern void release_locked_buffer(void *buffer, size_t size); +extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, + size_t size); +extern void refund_locked_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 3d1b7bde128..97491f78b08 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -30,6 +30,8 @@ extern unsigned int kmmio_count; extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); +extern int kmmio_init(void); +extern void kmmio_cleanup(void); #ifdef CONFIG_MMIOTRACE /* kmmio is active by some kmmio_probes? */ diff --git a/include/linux/module.h b/include/linux/module.h index 627ac082e2a..a8f2c0aa4c3 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -337,6 +337,14 @@ struct module const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; #endif +#ifdef CONFIG_EVENT_TRACING + struct ftrace_event_call *trace_events; + unsigned int num_trace_events; +#endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + unsigned long *ftrace_callsites; + unsigned int num_ftrace_callsites; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ diff --git a/include/linux/mount.h b/include/linux/mount.h index 51f55f903af..5d527536486 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -30,7 +30,7 @@ struct mnt_namespace; #define MNT_STRICTATIME 0x80 #define MNT_SHRINKABLE 0x100 -#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */ +#define MNT_WRITE_HOLD 0x200 #define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ #define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ @@ -65,13 +65,22 @@ struct vfsmount { int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; - /* - * This value is not stable unless all of the mnt_writers[] spinlocks - * are held, and all mnt_writer[]s on this mount have 0 as their ->count - */ - atomic_t __mnt_writers; +#ifdef CONFIG_SMP + int *mnt_writers; +#else + int mnt_writers; +#endif }; +static inline int *get_mnt_writers_ptr(struct vfsmount *mnt) +{ +#ifdef CONFIG_SMP + return mnt->mnt_writers; +#else + return &mnt->mnt_writers; +#endif +} + static inline struct vfsmount *mntget(struct vfsmount *mnt) { if (mnt) @@ -79,7 +88,11 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt) return mnt; } +struct file; /* forward dec */ + extern int mnt_want_write(struct vfsmount *mnt); +extern int mnt_want_write_file(struct file *file); +extern int mnt_clone_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt); extern void mntput_no_expire(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt); diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7e0ab..878cab4f5fc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -150,5 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); #endif diff --git a/include/linux/namei.h b/include/linux/namei.h index 518098fe63a..d870ae2faed 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -18,6 +18,7 @@ enum { MAX_NESTED_LINKS = 8 }; struct nameidata { struct path path; struct qstr last; + struct path root; unsigned int flags; int last_type; unsigned depth; @@ -77,8 +78,8 @@ extern void release_open_intent(struct nameidata *); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_noperm(const char *, struct dentry *); -extern int follow_down(struct vfsmount **, struct dentry **); -extern int follow_up(struct vfsmount **, struct dentry **); +extern int follow_down(struct path *); +extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h index 0217fb81a63..0e2e100c44a 100644 --- a/include/linux/net_dropmon.h +++ b/include/linux/net_dropmon.h @@ -1,6 +1,7 @@ #ifndef __NET_DROPMON_H #define __NET_DROPMON_H +#include <linux/types.h> #include <linux/netlink.h> struct net_dm_drop_point { diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index 3066789b972..b2f384d4261 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -35,6 +35,9 @@ enum tcp_conntrack { /* Has unacknowledged data */ #define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10 +/* The field td_maxack has been set */ +#define IP_CT_TCP_FLAG_MAXACK_SET 0x20 + struct nf_ct_tcp_flags { __u8 flags; __u8 mask; @@ -46,6 +49,7 @@ struct ip_ct_tcp_state { u_int32_t td_end; /* max of seq + len */ u_int32_t td_maxend; /* max of ack + max(win, 1) */ u_int32_t td_maxwin; /* max(win) */ + u_int32_t td_maxack; /* max of ack */ u_int8_t td_scale; /* window scale factor */ u_int8_t flags; /* per direction options */ }; diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index bcd0201589f..a6d9ef2bb34 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h @@ -125,11 +125,9 @@ void nfsd_export_flush(void); void exp_readlock(void); void exp_readunlock(void); struct svc_export * rqst_exp_get_by_name(struct svc_rqst *, - struct vfsmount *, - struct dentry *); + struct path *); struct svc_export * rqst_exp_parent(struct svc_rqst *, - struct vfsmount *mnt, - struct dentry *dentry); + struct path *); int exp_rootfh(struct auth_domain *, char *path, struct knfsd_fh *, int maxsize); __be32 exp_pseudoroot(struct svc_rqst *, struct svc_fh *); diff --git a/include/linux/parport.h b/include/linux/parport.h index e1f83c5065c..38a423ed3c0 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -324,6 +324,10 @@ struct parport { int spintime; atomic_t ref_count; + unsigned long devflags; +#define PARPORT_DEVPROC_REGISTERED 0 + struct pardevice *proc_device; /* Currently register proc device */ + struct list_head full_list; struct parport *slaves[3]; }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b6ab381748c..d77c23779c6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1996,10 +1996,12 @@ #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U 0xC118 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU 0xC11C #define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501 +#define PCI_DEVICE_ID_OXSEMI_C950 0x950B #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523 +#define PCI_SUBDEVICE_ID_OXSEMI_C950 0x0001 #define PCI_VENDOR_ID_CHELSIO 0x1425 diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1581ff235c7..26fd9d12f05 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -86,7 +86,12 @@ struct percpu_data { void *ptrs[1]; }; +/* pointer disguising messes up the kmemleak objects tracking */ +#ifndef CONFIG_DEBUG_KMEMLEAK #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) +#else +#define __percpu_disguise(pdata) (struct percpu_data *)(pdata) +#endif #define per_cpu_ptr(ptr, cpu) \ ({ \ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 00000000000..6e133954e2e --- /dev/null +++ b/include/linux/perf_counter.h @@ -0,0 +1,697 @@ +/* + * Performance counters: + * + * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_COUNTER_H +#define _LINUX_PERF_COUNTER_H + +#include <linux/types.h> +#include <linux/ioctl.h> +#include <asm/byteorder.h> + +/* + * User-space ABI bits: + */ + +/* + * attr.type + */ +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + + PERF_TYPE_MAX, /* non-ABI */ +}; + +/* + * Generalized performance counter event types, used by the + * attr.event_id parameter of the sys_perf_counter_open() + * syscall: + */ +enum perf_hw_id { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + + PERF_COUNT_HW_MAX, /* non-ABI */ +}; + +/* + * Generalized hardware cache counters: + * + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, + + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ +}; + +/* + * Special "software" counters provided by the kernel, even if the hardware + * does not support performance counters. These counters measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + + PERF_COUNT_SW_MAX, /* non-ABI */ +}; + +/* + * Bits that can be set in attr.sample_type to request information + * in the overflow packets. + */ +enum perf_counter_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_GROUP = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, +}; + +/* + * Bits that can be set in attr.read_format to request that + * reads on the counter should return the indicated quantities, + * in increasing order of bit value, after the counter value. + */ +enum perf_counter_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, +}; + +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_attr { + /* + * Major type: hardware/software/tracepoint/etc. + */ + __u32 type; + __u32 __reserved_1; + + /* + * Type specific configuration information. + */ + __u64 config; + + union { + __u64 sample_period; + __u64 sample_freq; + }; + + __u64 sample_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + mmap : 1, /* include mmap data */ + comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ + + __reserved_2 : 53; + + __u32 wakeup_events; /* wakeup every n events */ + __u32 __reserved_3; + + __u64 __reserved_4; +}; + +/* + * Ioctls that can be done on a perf counter fd: + */ +#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) +#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) +#define PERF_COUNTER_IOC_RESET _IO ('$', 3) +#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) + +enum perf_counter_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; + +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_counter_mmap_page { + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw counters in user-space. + * + * u32 seq; + * s64 count; + * + * do { + * seq = pc->lock; + * + * barrier() + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; + * + * barrier(); + * } while (pc->lock != seq); + * + * NOTE: for obvious reason this only works on self-monitoring + * processes. + */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware counter identifier */ + __s64 offset; /* add to hardware counter value */ + + /* + * Control data for the mmap() data buffer. + * + * User-space reading this value should issue an rmb(), on SMP capable + * platforms, after reading this value -- see perf_counter_wakeup(). + */ + __u64 data_head; /* head in the data section */ +}; + +#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) +#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_EVENT_MISC_KERNEL (1 << 0) +#define PERF_EVENT_MISC_USER (2 << 0) +#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) +#define PERF_EVENT_MISC_OVERFLOW (1 << 2) + +struct perf_event_header { + __u32 type; + __u16 misc; + __u16 size; +}; + +enum perf_event_type { + + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * }; + */ + PERF_EVENT_MMAP = 1, + + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * }; + */ + PERF_EVENT_COMM = 3, + + /* + * struct { + * struct perf_event_header header; + * u64 time; + * u64 id; + * u64 sample_period; + * }; + */ + PERF_EVENT_PERIOD = 4, + + /* + * struct { + * struct perf_event_header header; + * u64 time; + * u64 id; + * }; + */ + PERF_EVENT_THROTTLE = 5, + PERF_EVENT_UNTHROTTLE = 6, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * }; + */ + PERF_EVENT_FORK = 7, + + /* + * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field + * will be PERF_RECORD_* + * + * struct { + * struct perf_event_header header; + * + * { u64 ip; } && PERF_RECORD_IP + * { u32 pid, tid; } && PERF_RECORD_TID + * { u64 time; } && PERF_RECORD_TIME + * { u64 addr; } && PERF_RECORD_ADDR + * { u64 config; } && PERF_RECORD_CONFIG + * { u32 cpu, res; } && PERF_RECORD_CPU + * + * { u64 nr; + * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP + * + * { u16 nr, + * hv, + * kernel, + * user; + * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN + * }; + */ +}; + +#ifdef __KERNEL__ +/* + * Kernel-internal data types and definitions: + */ + +#ifdef CONFIG_PERF_COUNTERS +# include <asm/perf_counter.h> +#endif + +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> +#include <linux/hrtimer.h> +#include <linux/fs.h> +#include <linux/pid_namespace.h> +#include <asm/atomic.h> + +struct task_struct; + +/** + * struct hw_perf_counter - performance counter hardware details: + */ +struct hw_perf_counter { +#ifdef CONFIG_PERF_COUNTERS + union { + struct { /* hardware */ + u64 config; + unsigned long config_base; + unsigned long counter_base; + int idx; + }; + union { /* software */ + atomic64_t count; + struct hrtimer hrtimer; + }; + }; + atomic64_t prev_count; + u64 sample_period; + u64 last_period; + atomic64_t period_left; + u64 interrupts; + + u64 freq_count; + u64 freq_interrupts; + u64 freq_stamp; +#endif +}; + +struct perf_counter; + +/** + * struct pmu - generic performance monitoring unit + */ +struct pmu { + int (*enable) (struct perf_counter *counter); + void (*disable) (struct perf_counter *counter); + void (*read) (struct perf_counter *counter); + void (*unthrottle) (struct perf_counter *counter); +}; + +/** + * enum perf_counter_active_state - the states of a counter + */ +enum perf_counter_active_state { + PERF_COUNTER_STATE_ERROR = -2, + PERF_COUNTER_STATE_OFF = -1, + PERF_COUNTER_STATE_INACTIVE = 0, + PERF_COUNTER_STATE_ACTIVE = 1, +}; + +struct file; + +struct perf_mmap_data { + struct rcu_head rcu_head; + int nr_pages; /* nr of data pages */ + int nr_locked; /* nr pages mlocked */ + + atomic_t poll; /* POLL_ for wakeups */ + atomic_t events; /* event limit */ + + atomic_long_t head; /* write position */ + atomic_long_t done_head; /* completed head */ + + atomic_t lock; /* concurrent writes */ + + atomic_t wakeup; /* needs a wakeup */ + + struct perf_counter_mmap_page *user_page; + void *data_pages[0]; +}; + +struct perf_pending_entry { + struct perf_pending_entry *next; + void (*func)(struct perf_pending_entry *); +}; + +/** + * struct perf_counter - performance counter kernel representation: + */ +struct perf_counter { +#ifdef CONFIG_PERF_COUNTERS + struct list_head list_entry; + struct list_head event_entry; + struct list_head sibling_list; + int nr_siblings; + struct perf_counter *group_leader; + const struct pmu *pmu; + + enum perf_counter_active_state state; + atomic64_t count; + + /* + * These are the total time in nanoseconds that the counter + * has been enabled (i.e. eligible to run, and the task has + * been scheduled in, if this is a per-task counter) + * and running (scheduled onto the CPU), respectively. + * + * They are computed from tstamp_enabled, tstamp_running and + * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. + */ + u64 total_time_enabled; + u64 total_time_running; + + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the counter is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * tstamp_enabled: the notional time when the counter was enabled + * tstamp_running: the notional time when the counter was scheduled on + * tstamp_stopped: in INACTIVE state, the notional time when the + * counter was scheduled off. + */ + u64 tstamp_enabled; + u64 tstamp_running; + u64 tstamp_stopped; + + struct perf_counter_attr attr; + struct hw_perf_counter hw; + + struct perf_counter_context *ctx; + struct file *filp; + + /* + * These accumulate total time (in nanoseconds) that children + * counters have been enabled and running, respectively. + */ + atomic64_t child_total_time_enabled; + atomic64_t child_total_time_running; + + /* + * Protect attach/detach and child_list: + */ + struct mutex child_mutex; + struct list_head child_list; + struct perf_counter *parent; + + int oncpu; + int cpu; + + struct list_head owner_entry; + struct task_struct *owner; + + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; + struct perf_mmap_data *data; + + /* poll related */ + wait_queue_head_t waitq; + struct fasync_struct *fasync; + + /* delayed work for NMIs and such */ + int pending_wakeup; + int pending_kill; + int pending_disable; + struct perf_pending_entry pending; + + atomic_t event_limit; + + void (*destroy)(struct perf_counter *); + struct rcu_head rcu_head; + + struct pid_namespace *ns; + u64 id; +#endif +}; + +/** + * struct perf_counter_context - counter context structure + * + * Used as a container for task counters and CPU counters as well: + */ +struct perf_counter_context { + /* + * Protect the states of the counters in the list, + * nr_active, and the list: + */ + spinlock_t lock; + /* + * Protect the list of counters. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; + + struct list_head counter_list; + struct list_head event_list; + int nr_counters; + int nr_active; + int is_active; + atomic_t refcount; + struct task_struct *task; + + /* + * Context clock, runs when context enabled. + */ + u64 time; + u64 timestamp; + + /* + * These fields let us detect when two contexts have both + * been cloned (inherited) from a common ancestor. + */ + struct perf_counter_context *parent_ctx; + u64 parent_gen; + u64 generation; + int pin_count; + struct rcu_head rcu_head; +}; + +/** + * struct perf_counter_cpu_context - per cpu counter context structure + */ +struct perf_cpu_context { + struct perf_counter_context ctx; + struct perf_counter_context *task_ctx; + int active_oncpu; + int max_pertask; + int exclusive; + + /* + * Recursion avoidance: + * + * task, softirq, irq, nmi context + */ + int recursion[4]; +}; + +#ifdef CONFIG_PERF_COUNTERS + +/* + * Set by architecture code: + */ +extern int perf_max_counters; + +extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); + +extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); +extern void perf_counter_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu); +extern void perf_counter_task_tick(struct task_struct *task, int cpu); +extern int perf_counter_init_task(struct task_struct *child); +extern void perf_counter_exit_task(struct task_struct *child); +extern void perf_counter_free_task(struct task_struct *task); +extern void perf_counter_do_pending(void); +extern void perf_counter_print_debug(void); +extern void __perf_disable(void); +extern bool __perf_enable(void); +extern void perf_disable(void); +extern void perf_enable(void); +extern int perf_counter_task_disable(void); +extern int perf_counter_task_enable(void); +extern int hw_perf_group_sched_in(struct perf_counter *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, int cpu); +extern void perf_counter_update_userpage(struct perf_counter *counter); + +struct perf_sample_data { + struct pt_regs *regs; + u64 addr; + u64 period; +}; + +extern int perf_counter_overflow(struct perf_counter *counter, int nmi, + struct perf_sample_data *data); + +/* + * Return 1 for a software counter, 0 for a hardware counter + */ +static inline int is_software_counter(struct perf_counter *counter) +{ + return (counter->attr.type != PERF_TYPE_RAW) && + (counter->attr.type != PERF_TYPE_HARDWARE); +} + +extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); + +extern void __perf_counter_mmap(struct vm_area_struct *vma); + +static inline void perf_counter_mmap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + __perf_counter_mmap(vma); +} + +extern void perf_counter_comm(struct task_struct *tsk); +extern void perf_counter_fork(struct task_struct *tsk); + +extern void perf_counter_task_migration(struct task_struct *task, int cpu); + +#define MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + u16 nr; + u16 hv; + u16 kernel; + u16 user; + u64 ip[MAX_STACK_DEPTH]; +}; + +extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); + +extern int sysctl_perf_counter_paranoid; +extern int sysctl_perf_counter_mlock; +extern int sysctl_perf_counter_sample_rate; + +extern void perf_counter_init(void); + +#ifndef perf_misc_flags +#define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ + PERF_EVENT_MISC_KERNEL) +#define perf_instruction_pointer(regs) instruction_pointer(regs) +#endif + +#else +static inline void +perf_counter_task_sched_in(struct task_struct *task, int cpu) { } +static inline void +perf_counter_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) { } +static inline void +perf_counter_task_tick(struct task_struct *task, int cpu) { } +static inline int perf_counter_init_task(struct task_struct *child) { return 0; } +static inline void perf_counter_exit_task(struct task_struct *child) { } +static inline void perf_counter_free_task(struct task_struct *task) { } +static inline void perf_counter_do_pending(void) { } +static inline void perf_counter_print_debug(void) { } +static inline void perf_disable(void) { } +static inline void perf_enable(void) { } +static inline int perf_counter_task_disable(void) { return -EINVAL; } +static inline int perf_counter_task_enable(void) { return -EINVAL; } + +static inline void +perf_swcounter_event(u32 event, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) { } + +static inline void perf_counter_mmap(struct vm_area_struct *vma) { } +static inline void perf_counter_comm(struct task_struct *tsk) { } +static inline void perf_counter_fork(struct task_struct *tsk) { } +static inline void perf_counter_init(void) { } +static inline void perf_counter_task_migration(struct task_struct *task, + int cpu) { } +#endif + +#endif /* __KERNEL__ */ +#endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index c8f038554e8..b43a9e03905 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -152,5 +152,6 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); +void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); #endif diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 48d887e3c6e..b00df4c79c6 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -85,4 +85,7 @@ #define PR_SET_TIMERSLACK 29 #define PR_GET_TIMERSLACK 30 +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index fbfa3d44d33..e6e77d31c41 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -93,20 +93,9 @@ struct vmcore { #ifdef CONFIG_PROC_FS -extern spinlock_t proc_subdir_lock; - extern void proc_root_init(void); void proc_flush_task(struct task_struct *task); -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); -int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); -unsigned long task_vsize(struct mm_struct *); -int task_statm(struct mm_struct *, int *, int *, int *, int *); -void task_mem(struct seq_file *, struct mm_struct *); -void clear_refs_smap(struct mm_struct *mm); - -struct proc_dir_entry *de_get(struct proc_dir_entry *de); -void de_put(struct proc_dir_entry *de); extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent); @@ -116,20 +105,7 @@ struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, void *data); extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); -extern struct vfsmount *proc_mnt; struct pid_namespace; -extern int proc_fill_super(struct super_block *); -extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); - -/* - * These are generic /proc routines that use the internal - * "struct proc_dir_entry" tree to traverse the filesystem. - * - * The /proc root directory has extended versions to take care - * of the /proc/<pid> subdirectories. - */ -extern int proc_readdir(struct file *, void *, filldir_t); -extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); extern int pid_ns_prepare_proc(struct pid_namespace *ns); extern void pid_ns_release_proc(struct pid_namespace *ns); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 67c15653fc2..59e133d39d5 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer); -extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ @@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task) #define arch_ptrace_untrace(task) do { } while (0) #endif -#ifndef arch_ptrace_fork -/* - * Do machine-specific work to initialize a new task. - * - * This is called from copy_process(). - */ -#define arch_ptrace_fork(child, clone_flags) do { } while (0) -#endif - extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); diff --git a/include/linux/qnx4_fs.h b/include/linux/qnx4_fs.h index 787d19ea9f4..8b9aee1a9ce 100644 --- a/include/linux/qnx4_fs.h +++ b/include/linux/qnx4_fs.h @@ -85,65 +85,4 @@ struct qnx4_super_block { struct qnx4_inode_entry AltBoot; }; -#ifdef __KERNEL__ - -#define QNX4_DEBUG 0 - -#if QNX4_DEBUG -#define QNX4DEBUG(X) printk X -#else -#define QNX4DEBUG(X) (void) 0 -#endif - -struct qnx4_sb_info { - struct buffer_head *sb_buf; /* superblock buffer */ - struct qnx4_super_block *sb; /* our superblock */ - unsigned int Version; /* may be useful */ - struct qnx4_inode_entry *BitMap; /* useful */ -}; - -struct qnx4_inode_info { - struct qnx4_inode_entry raw; - loff_t mmu_private; - struct inode vfs_inode; -}; - -extern struct inode *qnx4_iget(struct super_block *, unsigned long); -extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); -extern unsigned long qnx4_count_free_blocks(struct super_block *sb); -extern unsigned long qnx4_block_map(struct inode *inode, long iblock); - -extern struct buffer_head *qnx4_bread(struct inode *, int, int); - -extern const struct inode_operations qnx4_file_inode_operations; -extern const struct inode_operations qnx4_dir_inode_operations; -extern const struct file_operations qnx4_file_operations; -extern const struct file_operations qnx4_dir_operations; -extern int qnx4_is_free(struct super_block *sb, long block); -extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy); -extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd); -extern void qnx4_truncate(struct inode *inode); -extern void qnx4_free_inode(struct inode *inode); -extern int qnx4_unlink(struct inode *dir, struct dentry *dentry); -extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry); -extern int qnx4_sync_file(struct file *file, struct dentry *dentry, int); -extern int qnx4_sync_inode(struct inode *inode); - -static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct qnx4_inode_info *qnx4_i(struct inode *inode) -{ - return container_of(inode, struct qnx4_inode_info, vfs_inode); -} - -static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode) -{ - return &qnx4_i(inode)->raw; -} - -#endif /* __KERNEL__ */ - #endif diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 36353d95c8d..7bc45759368 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -20,7 +20,12 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb) /* * declaration of quota_function calls in kernel. */ -void sync_dquots(struct super_block *sb, int type); +void sync_quota_sb(struct super_block *sb, int type); +static inline void writeout_quota_sb(struct super_block *sb, int type) +{ + if (sb->s_qcop->quota_sync) + sb->s_qcop->quota_sync(sb, type); +} int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); @@ -253,12 +258,7 @@ static inline void vfs_dq_free_inode(struct inode *inode) inode->i_sb->dq_op->free_inode(inode, 1); } -/* The following two functions cannot be called inside a transaction */ -static inline void vfs_dq_sync(struct super_block *sb) -{ - sync_dquots(sb, -1); -} - +/* Cannot be called inside a transaction */ static inline int vfs_dq_off(struct super_block *sb, int remount) { int ret = -ENOSYS; @@ -334,7 +334,11 @@ static inline void vfs_dq_free_inode(struct inode *inode) { } -static inline void vfs_dq_sync(struct super_block *sb) +static inline void sync_quota_sb(struct super_block *sb, int type) +{ +} + +static inline void writeout_quota_sb(struct super_block *sb, int type) { } diff --git a/include/linux/rational.h b/include/linux/rational.h new file mode 100644 index 00000000000..4f532fcd9ee --- /dev/null +++ b/include/linux/rational.h @@ -0,0 +1,19 @@ +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com> + * + * helper functions when coping with rational numbers, + * e.g. when calculating optimum numerator/denominator pairs for + * pll configuration taking into account restricted register size + */ + +#ifndef _LINUX_RATIONAL_H +#define _LINUX_RATIONAL_H + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator); + +#endif /* _LINUX_RATIONAL_H */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3f2c9..5710f43bbc9 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list, at->prev = last; } +/** + * list_entry_rcu - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_entry_rcu(ptr, type, member) \ + container_of(rcu_dereference(ptr), type, member) + +/** + * list_first_entry_rcu - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_first_entry_rcu(ptr, type, member) \ + list_entry_rcu((ptr)->next, type, member) + #define __list_for_each_rcu(pos, head) \ for (pos = rcu_dereference((head)->next); \ pos != (head); \ @@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 58b2aa5312b..5a5153806c4 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -161,8 +161,15 @@ struct rcu_data { unsigned long offline_fqs; /* Kicked due to being offline. */ unsigned long resched_ipi; /* Sent a resched IPI. */ - /* 5) For future __rcu_pending statistics. */ + /* 5) __rcu_pending() statistics. */ long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rp_qs_pending; + long n_rp_cb_ready; + long n_rp_cpu_needs_gp; + long n_rp_gp_completed; + long n_rp_gp_started; + long n_rp_need_fqs; + long n_rp_need_nothing; int cpu; }; diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h index 6473650c28f..dab68bbed67 100644 --- a/include/linux/reiserfs_fs_sb.h +++ b/include/linux/reiserfs_fs_sb.h @@ -453,6 +453,7 @@ enum reiserfs_mount_options { REISERFS_ATTRS, REISERFS_XATTRS_USER, REISERFS_POSIXACL, + REISERFS_EXPOSE_PRIVROOT, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, @@ -490,6 +491,7 @@ enum reiserfs_mount_options { #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) +#define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) #define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) #define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e1b7b217388..8670f1575fe 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -11,7 +11,7 @@ struct ring_buffer_iter; * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { - u32 type:2, len:3, time_delta:27; + u32 type_len:5, time_delta:27; u32 array[]; }; @@ -24,7 +24,8 @@ struct ring_buffer_event { * size is variable depending on how much * padding is needed * If time_delta is non zero: - * everything else same as RINGBUF_TYPE_DATA + * array[0] holds the actual length + * size = 4 + length (bytes) * * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta * array[0] = time delta (28 .. 59) @@ -35,22 +36,23 @@ struct ring_buffer_event { * array[1..2] = tv_sec * size = 16 bytes * - * @RINGBUF_TYPE_DATA: Data record - * If len is zero: + * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: + * Data record + * If type_len is zero: * array[0] holds the actual length * array[1..(length+3)/4] holds data - * size = 4 + 4 + length (bytes) + * size = 4 + length (bytes) * else - * length = len << 2 + * length = type_len << 2 * array[0..(length+3)/4-1] holds data * size = 4 + length (bytes) */ enum ring_buffer_type { + RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, RINGBUF_TYPE_PADDING, RINGBUF_TYPE_TIME_EXTEND, /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ RINGBUF_TYPE_TIME_STAMP, - RINGBUF_TYPE_DATA, }; unsigned ring_buffer_event_length(struct ring_buffer_event *event); @@ -68,13 +70,54 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } +/* + * ring_buffer_event_discard can discard any event in the ring buffer. + * it is up to the caller to protect against a reader from + * consuming it or a writer from wrapping and replacing it. + * + * No external protection is needed if this is called before + * the event is commited. But in that case it would be better to + * use ring_buffer_discard_commit. + * + * Note, if an event that has not been committed is discarded + * with ring_buffer_event_discard, it must still be committed. + */ void ring_buffer_event_discard(struct ring_buffer_event *event); /* + * ring_buffer_discard_commit will remove an event that has not + * ben committed yet. If this is used, then ring_buffer_unlock_commit + * must not be called on the discarded event. This function + * will try to remove the event from the ring buffer completely + * if another event has not been written after it. + * + * Example use: + * + * if (some_condition) + * ring_buffer_discard_commit(buffer, event); + * else + * ring_buffer_unlock_commit(buffer, event); + */ +void ring_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); + +/* * size is in bytes for each per CPU buffer. */ struct ring_buffer * -ring_buffer_alloc(unsigned long size, unsigned flags); +__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key); + +/* + * Because the ring buffer is generic, if other users of the ring buffer get + * traced by ftrace, it can produce lockdep warnings. We need to keep each + * ring buffer's lock class separate. + */ +#define ring_buffer_alloc(size, flags) \ +({ \ + static struct lock_class_key __key; \ + __ring_buffer_alloc((size), (flags), &__key); \ +}) + void ring_buffer_free(struct ring_buffer *buffer); int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); @@ -122,6 +165,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer); unsigned long ring_buffer_overruns(struct ring_buffer *buffer); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, @@ -137,6 +182,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, size_t len, int cpu, int full); +struct trace_seq; + +int ring_buffer_print_entry_header(struct trace_seq *s); +int ring_buffer_print_page_header(struct trace_seq *s); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc8049..4896fdfec91 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -77,6 +77,7 @@ struct sched_param { #include <linux/proportions.h> #include <linux/seccomp.h> #include <linux/rcupdate.h> +#include <linux/rculist.h> #include <linux/rtmutex.h> #include <linux/time.h> @@ -96,8 +97,9 @@ struct exec_domain; struct futex_pi_state; struct robust_list_head; struct bio; -struct bts_tracer; struct fs_struct; +struct bts_context; +struct perf_counter_context; /* * List of flags we want to share for kernel threads, @@ -116,6 +118,7 @@ struct fs_struct; * 11 bit fractions. */ extern unsigned long avenrun[]; /* Load averages */ +extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift); #define FSHIFT 11 /* nr of bits of precision */ #define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */ @@ -135,8 +138,9 @@ DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); -extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern void calc_global_load(void); +extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); @@ -672,6 +676,10 @@ struct user_struct { struct work_struct work; #endif #endif + +#ifdef CONFIG_PERF_COUNTERS + atomic_long_t locked_vm; +#endif }; extern int uids_sysfs_init(void); @@ -838,7 +846,17 @@ struct sched_group { */ u32 reciprocal_cpu_power; - unsigned long cpumask[]; + /* + * The CPUs this group covers. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_group' in kernel/sched.c) + */ + unsigned long cpumask[0]; }; static inline struct cpumask *sched_group_cpus(struct sched_group *sg) @@ -924,8 +942,17 @@ struct sched_domain { char *name; #endif - /* span of all CPUs in this domain */ - unsigned long span[]; + /* + * Span of all CPUs in this domain. + * + * NOTE: this field is variable length. (Allocated dynamically + * by attaching extra space to the end of the structure, + * depending on how many CPUs the kernel has booted up with) + * + * It is also be embedded into static data structures at build + * time. (See 'struct static_sched_domain' in kernel/sched.c) + */ + unsigned long span[0]; }; static inline struct cpumask *sched_domain_span(struct sched_domain *sd) @@ -1052,9 +1079,10 @@ struct sched_entity { u64 last_wakeup; u64 avg_overlap; + u64 nr_migrations; + u64 start_runtime; u64 avg_wakeup; - u64 nr_migrations; #ifdef CONFIG_SCHEDSTATS u64 wait_start; @@ -1209,18 +1237,11 @@ struct task_struct { struct list_head ptraced; struct list_head ptrace_entry; -#ifdef CONFIG_X86_PTRACE_BTS /* * This is the tracer handle for the ptrace BTS extension. * This field actually belongs to the ptracer task. */ - struct bts_tracer *bts; - /* - * The buffer to hold the BTS data. - */ - void *bts_buffer; - size_t bts_size; -#endif /* CONFIG_X86_PTRACE_BTS */ + struct bts_context *bts; /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; @@ -1247,7 +1268,9 @@ struct task_struct { * credentials (COW) */ const struct cred *cred; /* effective (overridable) subjective task * credentials (COW) */ - struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1380,6 +1403,11 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif +#ifdef CONFIG_PERF_COUNTERS + struct perf_counter_context *perf_counter_ctxp; + struct mutex perf_counter_mutex; + struct list_head perf_counter_list; +#endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; @@ -1428,7 +1456,9 @@ struct task_struct { #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; -#endif + /* bitmask of trace recursion */ + unsigned long trace_recursion; +#endif /* CONFIG_TRACING */ }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ @@ -1885,6 +1915,7 @@ extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); +extern void __flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); @@ -2001,8 +2032,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP +extern void wait_task_context_switch(struct task_struct *p); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else +static inline void wait_task_context_switch(struct task_struct *p) {} static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { @@ -2010,7 +2043,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif -#define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) @@ -2049,8 +2083,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2) static inline struct task_struct *next_thread(const struct task_struct *p) { - return list_entry(rcu_dereference(p->thread_group.next), - struct task_struct, thread_group); + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); } static inline int thread_group_empty(struct task_struct *p) @@ -2388,6 +2422,13 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif +/* + * Call the function if the target task is executing on a CPU right now: + */ +extern void task_oncpu_function_call(struct task_struct *p, + void (*func) (void *info), void *info); + + #ifdef CONFIG_MM_OWNER extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); diff --git a/include/linux/security.h b/include/linux/security.h index d5fd6163606..5eff459b383 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2197,6 +2197,8 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { + if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) + return -EACCES; return 0; } diff --git a/include/linux/serial.h b/include/linux/serial.h index 9136cc5608c..e5bb75a6380 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -96,54 +96,76 @@ struct serial_uart_config { /* * Definitions for async_struct (and serial_struct) flags field + * + * Define ASYNCB_* for convenient use with {test,set,clear}_bit. */ -#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes - on the callout port */ -#define ASYNC_FOURPORT 0x0002 /* Set OU1, OUT2 per AST Fourport settings */ -#define ASYNC_SAK 0x0004 /* Secure Attention Key (Orange book) */ -#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */ - -#define ASYNC_SPD_MASK 0x1030 -#define ASYNC_SPD_HI 0x0010 /* Use 56000 instead of 38400 bps */ - -#define ASYNC_SPD_VHI 0x0020 /* Use 115200 instead of 38400 bps */ -#define ASYNC_SPD_CUST 0x0030 /* Use user-specified divisor */ - -#define ASYNC_SKIP_TEST 0x0040 /* Skip UART test during autoconfiguration */ -#define ASYNC_AUTO_IRQ 0x0080 /* Do automatic IRQ during autoconfiguration */ -#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */ -#define ASYNC_PGRP_LOCKOUT 0x0200 /* Lock out cua opens based on pgrp */ -#define ASYNC_CALLOUT_NOHUP 0x0400 /* Don't do hangups for cua device */ - -#define ASYNC_HARDPPS_CD 0x0800 /* Call hardpps when CD goes high */ - -#define ASYNC_SPD_SHI 0x1000 /* Use 230400 instead of 38400 bps */ -#define ASYNC_SPD_WARP 0x1010 /* Use 460800 instead of 38400 bps */ - -#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */ - -#define ASYNC_BUGGY_UART 0x4000 /* This is a buggy UART, skip some safety - * checks. Note: can be dangerous! */ - -#define ASYNC_AUTOPROBE 0x8000 /* Port was autoprobed by PCI or PNP code */ - -#define ASYNC_FLAGS 0x7FFF /* Possible legal async flags */ -#define ASYNC_USR_MASK 0x3430 /* Legal flags that non-privileged - * users can set or reset */ - -/* Internal flags used only by kernel/chr_drv/serial.c */ -#define ASYNC_INITIALIZED 0x80000000 /* Serial port was initialized */ -#define ASYNC_NORMAL_ACTIVE 0x20000000 /* Normal device is active */ -#define ASYNC_BOOT_AUTOCONF 0x10000000 /* Autoconfigure port on bootup */ -#define ASYNC_CLOSING 0x08000000 /* Serial port is closing */ -#define ASYNC_CTS_FLOW 0x04000000 /* Do CTS flow control */ -#define ASYNC_CHECK_CD 0x02000000 /* i.e., CLOCAL */ -#define ASYNC_SHARE_IRQ 0x01000000 /* for multifunction cards - --- no longer used */ -#define ASYNC_CONS_FLOW 0x00800000 /* flow control for console */ - -#define ASYNC_BOOT_ONLYMCA 0x00400000 /* Probe only if MCA bus */ -#define ASYNC_INTERNAL_FLAGS 0xFFC00000 /* Internal flags */ +#define ASYNCB_HUP_NOTIFY 0 /* Notify getty on hangups and closes + * on the callout port */ +#define ASYNCB_FOURPORT 1 /* Set OU1, OUT2 per AST Fourport settings */ +#define ASYNCB_SAK 2 /* Secure Attention Key (Orange book) */ +#define ASYNCB_SPLIT_TERMIOS 3 /* Separate termios for dialin/callout */ +#define ASYNCB_SPD_HI 4 /* Use 56000 instead of 38400 bps */ +#define ASYNCB_SPD_VHI 5 /* Use 115200 instead of 38400 bps */ +#define ASYNCB_SKIP_TEST 6 /* Skip UART test during autoconfiguration */ +#define ASYNCB_AUTO_IRQ 7 /* Do automatic IRQ during + * autoconfiguration */ +#define ASYNCB_SESSION_LOCKOUT 8 /* Lock out cua opens based on session */ +#define ASYNCB_PGRP_LOCKOUT 9 /* Lock out cua opens based on pgrp */ +#define ASYNCB_CALLOUT_NOHUP 10 /* Don't do hangups for cua device */ +#define ASYNCB_HARDPPS_CD 11 /* Call hardpps when CD goes high */ +#define ASYNCB_SPD_SHI 12 /* Use 230400 instead of 38400 bps */ +#define ASYNCB_LOW_LATENCY 13 /* Request low latency behaviour */ +#define ASYNCB_BUGGY_UART 14 /* This is a buggy UART, skip some safety + * checks. Note: can be dangerous! */ +#define ASYNCB_AUTOPROBE 15 /* Port was autoprobed by PCI or PNP code */ +#define ASYNCB_LAST_USER 15 + +/* Internal flags used only by kernel */ +#define ASYNCB_INITIALIZED 31 /* Serial port was initialized */ +#define ASYNCB_NORMAL_ACTIVE 29 /* Normal device is active */ +#define ASYNCB_BOOT_AUTOCONF 28 /* Autoconfigure port on bootup */ +#define ASYNCB_CLOSING 27 /* Serial port is closing */ +#define ASYNCB_CTS_FLOW 26 /* Do CTS flow control */ +#define ASYNCB_CHECK_CD 25 /* i.e., CLOCAL */ +#define ASYNCB_SHARE_IRQ 24 /* for multifunction cards, no longer used */ +#define ASYNCB_CONS_FLOW 23 /* flow control for console */ +#define ASYNCB_BOOT_ONLYMCA 22 /* Probe only if MCA bus */ +#define ASYNCB_FIRST_KERNEL 22 + +#define ASYNC_HUP_NOTIFY (1U << ASYNCB_HUP_NOTIFY) +#define ASYNC_FOURPORT (1U << ASYNCB_FOURPORT) +#define ASYNC_SAK (1U << ASYNCB_SAK) +#define ASYNC_SPLIT_TERMIOS (1U << ASYNCB_SPLIT_TERMIOS) +#define ASYNC_SPD_HI (1U << ASYNCB_SPD_HI) +#define ASYNC_SPD_VHI (1U << ASYNCB_SPD_VHI) +#define ASYNC_SKIP_TEST (1U << ASYNCB_SKIP_TEST) +#define ASYNC_AUTO_IRQ (1U << ASYNCB_AUTO_IRQ) +#define ASYNC_SESSION_LOCKOUT (1U << ASYNCB_SESSION_LOCKOUT) +#define ASYNC_PGRP_LOCKOUT (1U << ASYNCB_PGRP_LOCKOUT) +#define ASYNC_CALLOUT_NOHUP (1U << ASYNCB_CALLOUT_NOHUP) +#define ASYNC_HARDPPS_CD (1U << ASYNCB_HARDPPS_CD) +#define ASYNC_SPD_SHI (1U << ASYNCB_SPD_SHI) +#define ASYNC_LOW_LATENCY (1U << ASYNCB_LOW_LATENCY) +#define ASYNC_BUGGY_UART (1U << ASYNCB_BUGGY_UART) +#define ASYNC_AUTOPROBE (1U << ASYNCB_AUTOPROBE) + +#define ASYNC_FLAGS ((1U << ASYNCB_LAST_USER) - 1) +#define ASYNC_USR_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI| \ + ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY) +#define ASYNC_SPD_CUST (ASYNC_SPD_HI|ASYNC_SPD_VHI) +#define ASYNC_SPD_WARP (ASYNC_SPD_HI|ASYNC_SPD_SHI) +#define ASYNC_SPD_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI) + +#define ASYNC_INITIALIZED (1U << ASYNCB_INITIALIZED) +#define ASYNC_NORMAL_ACTIVE (1U << ASYNCB_NORMAL_ACTIVE) +#define ASYNC_BOOT_AUTOCONF (1U << ASYNCB_BOOT_AUTOCONF) +#define ASYNC_CLOSING (1U << ASYNCB_CLOSING) +#define ASYNC_CTS_FLOW (1U << ASYNCB_CTS_FLOW) +#define ASYNC_CHECK_CD (1U << ASYNCB_CHECK_CD) +#define ASYNC_SHARE_IRQ (1U << ASYNCB_SHARE_IRQ) +#define ASYNC_CONS_FLOW (1U << ASYNCB_CONS_FLOW) +#define ASYNC_BOOT_ONLYMCA (1U << ASYNCB_BOOT_ONLYMCA) +#define ASYNC_INTERNAL_FLAGS (~((1U << ASYNCB_FIRST_KERNEL) - 1)) /* * Multiport serial configuration structure --- external structure diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 57a97e52e58..6fd80c4243f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -41,7 +41,8 @@ #define PORT_XSCALE 15 #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ #define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ -#define PORT_MAX_8250 17 /* max port ID */ +#define PORT_AR7 18 /* Texas Instruments AR7 internal UART */ +#define PORT_MAX_8250 18 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed @@ -167,6 +168,9 @@ /* MAX3100 */ #define PORT_MAX3100 86 +/* Timberdale UART */ +#define PORT_TIMBUART 87 + #ifdef __KERNEL__ #include <linux/compiler.h> diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 893cc53486b..1c297ddc9d5 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -25,8 +25,7 @@ struct plat_sci_port { unsigned int irqs[SCIx_NR_IRQS]; /* ERI, RXI, TXI, BRI */ unsigned int type; /* SCI / SCIF / IRDA */ upf_t flags; /* UPF_* flags */ + char *clk; /* clock string */ }; -int early_sci_setup(struct uart_port *port); - #endif /* __LINUX_SERIAL_SCI_H */ diff --git a/include/linux/sh_cmt.h b/include/linux/sh_cmt.h deleted file mode 100644 index 68cacde5954..00000000000 --- a/include/linux/sh_cmt.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __SH_CMT_H__ -#define __SH_CMT_H__ - -struct sh_cmt_config { - char *name; - unsigned long channel_offset; - int timer_bit; - char *clk; - unsigned long clockevent_rating; - unsigned long clocksource_rating; -}; - -#endif /* __SH_CMT_H__ */ diff --git a/include/linux/sh_timer.h b/include/linux/sh_timer.h new file mode 100644 index 00000000000..864bd56bd3b --- /dev/null +++ b/include/linux/sh_timer.h @@ -0,0 +1,13 @@ +#ifndef __SH_TIMER_H__ +#define __SH_TIMER_H__ + +struct sh_timer_config { + char *name; + long channel_offset; + int timer_bit; + char *clk; + unsigned long clockevent_rating; + unsigned long clocksource_rating; +}; + +#endif /* __SH_TIMER_H__ */ diff --git a/include/linux/signal.h b/include/linux/signal.h index 84f997f8aa5..c7552836bd9 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig) extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); +extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, + siginfo_t *info); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); extern int show_unhandled_signals; diff --git a/include/linux/slab.h b/include/linux/slab.h index 24c5602bee9..48803064ced 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -62,6 +62,8 @@ # define SLAB_DEBUG_OBJECTS 0x00000000UL #endif +#define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ + /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 5ac9b0bcaf9..713f841ecaa 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,7 +14,7 @@ #include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ #include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include <linux/compiler.h> -#include <trace/kmemtrace.h> +#include <linux/kmemtrace.h> /* Size description struct for general caches. */ struct cache_sizes { diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 5046f90c117..be5d40c43bd 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,7 +10,7 @@ #include <linux/gfp.h> #include <linux/workqueue.h> #include <linux/kobject.h> -#include <trace/kmemtrace.h> +#include <linux/kmemtrace.h> enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 938234c4a99..d4841ed8215 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -60,6 +60,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) #define __raw_spin_is_locked(lock) ((void)(lock), 0) /* for sched.c and kernel_lock.c: */ # define __raw_spin_lock(lock) do { (void)(lock); } while (0) +# define __raw_spin_lock_flags(lock, flags) do { (void)(lock); } while (0) # define __raw_spin_unlock(lock) do { (void)(lock); } while (0) # define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ diff --git a/include/linux/splice.h b/include/linux/splice.h index 5f3faa9d15a..18e7c7c0cae 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -11,8 +11,7 @@ #include <linux/pipe_fs_i.h> /* - * splice is tied to pipes as a transport (at least for now), so we'll just - * add the splice flags here. + * Flags passed in from splice/tee/vmsplice */ #define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ #define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 62d81435347..d476aad3ff5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -437,6 +437,11 @@ static inline int mem_cgroup_cache_charge_swapin(struct page *page, return 0; } +static inline void +mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) +{ +} + #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ac9ff54f7cb..cb1a6631b8f 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t address); -extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); +extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, + dma_addr_t address); extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 30520844b8d..c6c84ad8bd7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -55,6 +55,7 @@ struct compat_timeval; struct robust_list_head; struct getcpu_cache; struct old_linux_dirent; +struct perf_counter_attr; #include <linux/types.h> #include <linux/aio_abi.h> @@ -755,4 +756,8 @@ asmlinkage long sys_pipe(int __user *); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); + +asmlinkage long sys_perf_counter_open( + const struct perf_counter_attr __user *attr_uptr, + pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e6b820f8b56..a8cc4e13434 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -21,13 +21,14 @@ struct restart_block { struct { unsigned long arg0, arg1, arg2, arg3; }; - /* For futex_wait */ + /* For futex_wait and futex_wait_requeue_pi */ struct { u32 *uaddr; u32 val; u32 flags; u32 bitset; u64 time; + u32 *uaddr2; } futex; /* For nanosleep */ struct { diff --git a/include/linux/time.h b/include/linux/time.h index 242f62499bb..ea16c1a01d5 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -113,6 +113,21 @@ struct timespec current_kernel_time(void); #define CURRENT_TIME (current_kernel_time()) #define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) +/* Some architectures do not supply their own clocksource. + * This is mainly the case in architectures that get their + * inter-tick times by reading the counter on their interval + * timer. Since these timers wrap every tick, they're not really + * useful as clocksources. Wrapping them to act like one is possible + * but not very efficient. So we provide a callout these arches + * can implement for use with the jiffies clocksource to provide + * finer then tick granular time. + */ +#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET +extern u32 arch_gettimeoffset(void); +#else +static inline u32 arch_gettimeoffset(void) { return 0; } +#endif + extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h new file mode 100644 index 00000000000..c68bccba207 --- /dev/null +++ b/include/linux/trace_seq.h @@ -0,0 +1,92 @@ +#ifndef _LINUX_TRACE_SEQ_H +#define _LINUX_TRACE_SEQ_H + +#include <linux/fs.h> + +/* + * Trace sequences are used to allow a function to call several other functions + * to create a string of data to use (up to a max of PAGE_SIZE. + */ + +struct trace_seq { + unsigned char buffer[PAGE_SIZE]; + unsigned int len; + unsigned int readpos; +}; + +static inline void +trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; +} + +/* + * Currently only defined when tracing is enabled. + */ +#ifdef CONFIG_TRACING +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) + __attribute__ ((format (printf, 2, 0))); +extern int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); +extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt); +extern int trace_seq_puts(struct trace_seq *s, const char *str); +extern int trace_seq_putc(struct trace_seq *s, unsigned char c); +extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); +extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len); +extern void *trace_seq_reserve(struct trace_seq *s, size_t len); +extern int trace_seq_path(struct trace_seq *s, struct path *path); + +#else /* CONFIG_TRACING */ +static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) +{ + return 0; +} +static inline int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +{ + return 0; +} + +static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ +} +static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt) +{ + return 0; +} +static inline int trace_seq_puts(struct trace_seq *s, const char *str) +{ + return 0; +} +static inline int trace_seq_putc(struct trace_seq *s, unsigned char c) +{ + return 0; +} +static inline int +trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) +{ + return 0; +} +static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len) +{ + return 0; +} +static inline void *trace_seq_reserve(struct trace_seq *s, size_t len) +{ + return NULL; +} +static inline int trace_seq_path(struct trace_seq *s, struct path *path) +{ + return 0; +} +#endif /* CONFIG_TRACING */ + +#endif /* _LINUX_TRACE_SEQ_H */ diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index c7aa154f4bf..eb96603d92d 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -259,14 +259,12 @@ static inline void tracehook_finish_clone(struct task_struct *child, /** * tracehook_report_clone - in parent, new child is about to start running - * @trace: return value from tracehook_prepare_clone() * @regs: parent's user register state * @clone_flags: flags from parent's system call * @pid: new child's PID in the parent's namespace * @child: new child task * - * Called after a child is set up, but before it has been started - * running. @trace is the value returned by tracehook_prepare_clone(). + * Called after a child is set up, but before it has been started running. * This is not a good place to block, because the child has not started * yet. Suspend the child here if desired, and then block in * tracehook_report_clone_complete(). This must prevent the child from @@ -276,13 +274,14 @@ static inline void tracehook_finish_clone(struct task_struct *child, * * Called with no locks held, but the child cannot run until this returns. */ -static inline void tracehook_report_clone(int trace, struct pt_regs *regs, +static inline void tracehook_report_clone(struct pt_regs *regs, unsigned long clone_flags, pid_t pid, struct task_struct *child) { - if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) { + if (unlikely(task_ptrace(child))) { /* - * The child starts up with an immediate SIGSTOP. + * It doesn't matter who attached/attaching to this + * task, the pending SIGSTOP is right in any case. */ sigaddset(&child->pending.signal, SIGSTOP); set_tsk_thread_flag(child, TIF_SIGPENDING); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index d35a7ee7611..14df7e635d4 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -31,6 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ +#ifndef DECLARE_TRACE + #define TP_PROTO(args...) args #define TP_ARGS(args...) args @@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } #endif /* CONFIG_TRACEPOINTS */ +#endif /* DECLARE_TRACE */ /* * Connect a probe to a tracepoint. @@ -154,10 +157,8 @@ static inline void tracepoint_synchronize_unregister(void) } #define PARAMS(args...) args -#define TRACE_FORMAT(name, proto, args, fmt) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) - +#ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: * @@ -262,5 +263,6 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#endif #endif diff --git a/include/linux/tty.h b/include/linux/tty.h index fc39db95499..1488d8c81aa 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -185,7 +185,7 @@ struct tty_port; struct tty_port_operations { /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); - void (*raise_dtr_rts)(struct tty_port *port); + void (*dtr_rts)(struct tty_port *port, int raise); }; struct tty_port { @@ -201,6 +201,9 @@ struct tty_port { unsigned char *xmit_buf; /* Optional buffer */ int close_delay; /* Close port delay */ int closing_wait; /* Delay for output */ + int drain_delay; /* Set to zero if no pure time + based drain is needed else + set to size of fifo */ }; /* @@ -223,8 +226,11 @@ struct tty_struct { struct tty_driver *driver; const struct tty_operations *ops; int index; - /* The ldisc objects are protected by tty_ldisc_lock at the moment */ - struct tty_ldisc ldisc; + + /* Protects ldisc changes: Lock tty not pty */ + struct mutex ldisc_mutex; + struct tty_ldisc *ldisc; + struct mutex termios_mutex; spinlock_t ctrl_lock; /* Termios values are protected by the termios mutex */ @@ -311,6 +317,7 @@ struct tty_struct { #define TTY_CLOSING 7 /* ->close() in progress */ #define TTY_LDISC 9 /* Line discipline attached */ #define TTY_LDISC_CHANGING 10 /* Line discipline changing */ +#define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_HW_COOK_OUT 14 /* Hardware can do output cooking */ #define TTY_HW_COOK_IN 15 /* Hardware can do input cooking */ #define TTY_PTY_LOCK 16 /* pty private */ @@ -403,6 +410,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); +extern void tty_ldisc_hangup(struct tty_struct *tty); extern const struct file_operations tty_ldiscs_proc_fops; extern void tty_wakeup(struct tty_struct *tty); @@ -425,6 +433,9 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx, extern void tty_release_dev(struct file *filp); extern int tty_init_termios(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); + extern struct mutex tty_mutex; extern void tty_write_unlock(struct tty_struct *tty); @@ -438,6 +449,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); +extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index bcba84ea2d8..3566129384a 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -127,7 +127,8 @@ * the line discipline are close to full, and it should somehow * signal that no more characters should be sent to the tty. * - * Optional: Always invoke via tty_throttle(); + * Optional: Always invoke via tty_throttle(), called under the + * termios lock. * * void (*unthrottle)(struct tty_struct * tty); * @@ -135,7 +136,8 @@ * that characters can now be sent to the tty without fear of * overrunning the input buffers of the line disciplines. * - * Optional: Always invoke via tty_unthrottle(); + * Optional: Always invoke via tty_unthrottle(), called under the + * termios lock. * * void (*stop)(struct tty_struct *tty); * diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 625e9e4639c..8cdfed738fe 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -224,8 +224,7 @@ struct usb_serial_driver { /* Called by console with tty = NULL and by tty */ int (*open)(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); - void (*close)(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); + void (*close)(struct usb_serial_port *port); int (*write)(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); /* Called only by the tty layer */ @@ -241,6 +240,10 @@ struct usb_serial_driver { int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); + /* Called by the tty layer for port level work. There may or may not + be an attached tty at this point */ + void (*dtr_rts)(struct usb_serial_port *port, int on); + int (*carrier_raised)(struct usb_serial_port *port); /* USB events */ void (*read_int_callback)(struct urb *urb); void (*write_int_callback)(struct urb *urb); @@ -283,8 +286,7 @@ extern int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); extern int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); -extern void usb_serial_generic_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +extern void usb_serial_generic_close(struct usb_serial_port *port); extern int usb_serial_generic_resume(struct usb_serial *serial); extern int usb_serial_generic_write_room(struct tty_struct *tty); extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty); diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 94c56d29869..be7d255fc7c 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -15,6 +15,10 @@ #define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ +#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_IDENTIFY 8 /* ATA IDENTIFY supported */ + +#define VIRTIO_BLK_ID_BYTES (sizeof(__u16[256])) /* IDENTIFY DATA */ struct virtio_blk_config { @@ -32,6 +36,7 @@ struct virtio_blk_config } geometry; /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ __u32 blk_size; + __u8 identify[VIRTIO_BLK_ID_BYTES]; } __attribute__((packed)); /* These two define direction. */ @@ -55,6 +60,13 @@ struct virtio_blk_outhdr __u64 sector; }; +struct virtio_scsi_inhdr { + __u32 errors; + __u32 data_len; + __u32 sense_len; + __u32 residual; +}; + /* And this is the final byte of the write scatter-gather list. */ #define VIRTIO_BLK_S_OK 0 #define VIRTIO_BLK_S_IOERR 1 diff --git a/include/linux/wait.h b/include/linux/wait.h index bc024632f36..6788e1a4d4c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -132,8 +132,6 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, list_del(&old->task_list); } -void __wake_up_common(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 93445477f86..3224820c851 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -79,7 +79,6 @@ struct writeback_control { void writeback_inodes(struct writeback_control *wbc); int inode_wait(void *); void sync_inodes_sb(struct super_block *, int wait); -void sync_inodes(int wait); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 43b50d36925..3878d1dc7f5 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -270,7 +270,7 @@ static inline unsigned char scsi_get_prot_type(struct scsi_cmnd *scmd) static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd) { - return scmd->request->sector; + return blk_rq_pos(scmd->request); } static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd) diff --git a/include/trace/block.h b/include/trace/block.h deleted file mode 100644 index 25b7068b819..00000000000 --- a/include/trace/block.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _TRACE_BLOCK_H -#define _TRACE_BLOCK_H - -#include <linux/blkdev.h> -#include <linux/tracepoint.h> - -DECLARE_TRACE(block_rq_abort, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); - -DECLARE_TRACE(block_bio_bounce, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_bio_queue, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); - -DECLARE_TRACE(block_getrq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_sleeprq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); - -DECLARE_TRACE(block_plug, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_timer, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_unplug_io, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); - -DECLARE_TRACE(block_split, - TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TP_ARGS(q, bio, pdu)); - -DECLARE_TRACE(block_remap, - TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TP_ARGS(q, bio, dev, from, to)); - -#endif diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h new file mode 100644 index 00000000000..f7a7ae1e8f9 --- /dev/null +++ b/include/trace/define_trace.h @@ -0,0 +1,75 @@ +/* + * Trace files that want to automate creationg of all tracepoints defined + * in their file should include this file. The following are macros that the + * trace file may define: + * + * TRACE_SYSTEM defines the system the tracepoint is for + * + * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h + * This macro may be defined to tell define_trace.h what file to include. + * Note, leave off the ".h". + * + * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace + * then this macro can define the path to use. Note, the path is relative to + * define_trace.h, not the file including it. Full path names for out of tree + * modules must be used. + */ + +#ifdef CREATE_TRACE_POINTS + +/* Prevent recursion */ +#undef CREATE_TRACE_POINTS + +#include <linux/stringify.h> + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + DEFINE_TRACE(name) + +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) \ + DEFINE_TRACE(name) + +#undef TRACE_INCLUDE +#undef __TRACE_INCLUDE + +#ifndef TRACE_INCLUDE_FILE +# define TRACE_INCLUDE_FILE TRACE_SYSTEM +# define UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifndef TRACE_INCLUDE_PATH +# define __TRACE_INCLUDE(system) <trace/events/system.h> +# define UNDEF_TRACE_INCLUDE_PATH +#else +# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) +#endif + +# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system) + +/* Let the trace headers be reread */ +#define TRACE_HEADER_MULTI_READ + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#ifdef CONFIG_EVENT_TRACING +#include <trace/ftrace.h> +#endif + +#undef TRACE_HEADER_MULTI_READ + +/* Only undef what we defined in this file */ +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_FILE +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifdef UNDEF_TRACE_INCLUDE_PATH +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_PATH +#endif + +/* We may be processing more files */ +#define CREATE_TRACE_POINTS + +#endif /* CREATE_TRACE_POINTS */ diff --git a/include/trace/events/block.h b/include/trace/events/block.h new file mode 100644 index 00000000000..d6b05f42dd4 --- /dev/null +++ b/include/trace/events/block.h @@ -0,0 +1,493 @@ +#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BLOCK_H + +#include <linux/blktrace_api.h> +#include <linux/blkdev.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM block + +TRACE_EVENT(block_rq_abort, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); + __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_insert, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); + __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); + __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_issue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( unsigned int, bytes ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); + __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); + __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __entry->bytes, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_rq_requeue, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); + __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) +); + +TRACE_EVENT(block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( int, errors ) + __array( char, rwbs, 6 ) + __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) + ), + + TP_fast_assign( + __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; + __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); + __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); + __entry->errors = rq->errors; + + blk_fill_rwbs_rq(__entry->rwbs, rq); + blk_dump_cmd(__get_str(cmd), rq); + ), + + TP_printk("%d,%d %s (%s) %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->rwbs, __get_str(cmd), + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->errors) +); +TRACE_EVENT(block_bio_bounce, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_complete, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __field( int, error ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u [%d]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->error) +); + +TRACE_EVENT(block_bio_backmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_frontmerge, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_bio_queue, + + TP_PROTO(struct request_queue *q, struct bio *bio), + + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_getrq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_sleeprq, + + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), + + TP_ARGS(q, bio, rw), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->sector = bio ? bio->bi_sector : 0; + __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; + blk_fill_rwbs(__entry->rwbs, + bio ? bio->bi_rw : 0, __entry->nr_sector); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) +); + +TRACE_EVENT(block_plug, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s]", __entry->comm) +); + +TRACE_EVENT(block_unplug_timer, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_unplug_io, + + TP_PROTO(struct request_queue *q), + + TP_ARGS(q), + + TP_STRUCT__entry( + __field( int, nr_rq ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) +); + +TRACE_EVENT(block_split, + + TP_PROTO(struct request_queue *q, struct bio *bio, + unsigned int new_sector), + + TP_ARGS(q, bio, new_sector), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( sector_t, new_sector ) + __array( char, rwbs, 6 ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->new_sector = new_sector; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu / %llu [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + (unsigned long long)__entry->new_sector, + __entry->comm) +); + +TRACE_EVENT(block_remap, + + TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from), + + TP_ARGS(q, bio, dev, from), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __field( dev_t, old_dev ) + __field( sector_t, old_sector ) + __array( char, rwbs, 6 ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + __entry->old_dev = dev; + __entry->old_sector = from; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + ), + + TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, + MAJOR(__entry->old_dev), MINOR(__entry->old_dev), + (unsigned long long)__entry->old_sector) +); + +#endif /* _TRACE_BLOCK_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> + diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h new file mode 100644 index 00000000000..b0c7ede55eb --- /dev/null +++ b/include/trace/events/irq.h @@ -0,0 +1,145 @@ +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_H + +#include <linux/tracepoint.h> +#include <linux/interrupt.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } +#define show_softirq_name(val) \ + __print_symbolic(val, \ + softirq_name(HI), \ + softirq_name(TIMER), \ + softirq_name(NET_TX), \ + softirq_name(NET_RX), \ + softirq_name(BLOCK), \ + softirq_name(TASKLET), \ + softirq_name(SCHED), \ + softirq_name(HRTIMER), \ + softirq_name(RCU)) + +/** + * irq_handler_entry - called immediately before the irq action handler + * @irq: irq number + * @action: pointer to struct irqaction + * + * The struct irqaction pointed to by @action contains various + * information about the handler, including the device name, + * @action->name, and the device id, @action->dev_id. When used in + * conjunction with the irq_handler_exit tracepoint, we can figure + * out irq handler latencies. + */ +TRACE_EVENT(irq_handler_entry, + + TP_PROTO(int irq, struct irqaction *action), + + TP_ARGS(irq, action), + + TP_STRUCT__entry( + __field( int, irq ) + __string( name, action->name ) + ), + + TP_fast_assign( + __entry->irq = irq; + __assign_str(name, action->name); + ), + + TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) +); + +/** + * irq_handler_exit - called immediately after the irq action handler returns + * @irq: irq number + * @action: pointer to struct irqaction + * @ret: return value + * + * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding + * @action->handler scuccessully handled this irq. Otherwise, the irq might be + * a shared irq line, or the irq was not handled successfully. Can be used in + * conjunction with the irq_handler_entry to understand irq handler latencies. + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +/** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ +TRACE_EVENT(softirq_entry, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec), + + TP_STRUCT__entry( + __field( int, vec ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, + show_softirq_name(__entry->vec)) +); + +/** + * softirq_exit - called immediately after the softirq handler returns + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the struct softirq_action + * that has handled the softirq. By subtracting the @vec pointer from + * the @h pointer, we can determine the softirq number. Also, when used in + * combination with the softirq_entry tracepoint we can determine the softirq + * latency. + */ +TRACE_EVENT(softirq_exit, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec), + + TP_STRUCT__entry( + __field( int, vec ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, + show_softirq_name(__entry->vec)) +); + +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h new file mode 100644 index 00000000000..9baba50d651 --- /dev/null +++ b/include/trace/events/kmem.h @@ -0,0 +1,231 @@ +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KMEM_H + +#include <linux/types.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +/* + * The order of these masks is important. Matching masks will be seen + * first and the left over flags will end up showing by themselves. + * + * For example, if we have GFP_KERNEL before GFP_USER we wil get: + * + * GFP_KERNEL|GFP_HARDWALL + * + * Thus most bits set go first. + */ +#define show_gfp_flags(flags) \ + (flags) ? __print_flags(flags, "|", \ + {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ + {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ + {(unsigned long)GFP_USER, "GFP_USER"}, \ + {(unsigned long)GFP_TEMPORARY, "GFP_TEMPORARY"}, \ + {(unsigned long)GFP_KERNEL, "GFP_KERNEL"}, \ + {(unsigned long)GFP_NOFS, "GFP_NOFS"}, \ + {(unsigned long)GFP_ATOMIC, "GFP_ATOMIC"}, \ + {(unsigned long)GFP_NOIO, "GFP_NOIO"}, \ + {(unsigned long)__GFP_HIGH, "GFP_HIGH"}, \ + {(unsigned long)__GFP_WAIT, "GFP_WAIT"}, \ + {(unsigned long)__GFP_IO, "GFP_IO"}, \ + {(unsigned long)__GFP_COLD, "GFP_COLD"}, \ + {(unsigned long)__GFP_NOWARN, "GFP_NOWARN"}, \ + {(unsigned long)__GFP_REPEAT, "GFP_REPEAT"}, \ + {(unsigned long)__GFP_NOFAIL, "GFP_NOFAIL"}, \ + {(unsigned long)__GFP_NORETRY, "GFP_NORETRY"}, \ + {(unsigned long)__GFP_COMP, "GFP_COMP"}, \ + {(unsigned long)__GFP_ZERO, "GFP_ZERO"}, \ + {(unsigned long)__GFP_NOMEMALLOC, "GFP_NOMEMALLOC"}, \ + {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ + {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ + {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ + {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ + ) : "GFP_NOWAIT" + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + show_gfp_flags(__entry->gfp_flags)) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + show_gfp_flags(__entry->gfp_flags)) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + show_gfp_flags(__entry->gfp_flags), + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + show_gfp_flags(__entry->gfp_flags), + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); +#endif /* _TRACE_KMEM_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h new file mode 100644 index 00000000000..0e956c9dfd7 --- /dev/null +++ b/include/trace/events/lockdep.h @@ -0,0 +1,96 @@ +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCKDEP_H + +#include <linux/lockdep.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lockdep + +#ifdef CONFIG_LOCKDEP + +TRACE_EVENT(lock_acquire, + + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + + TP_ARGS(lock, nested, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +#ifdef CONFIG_LOCK_STAT + +TRACE_EVENT(lock_contended, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __string(name, lock->name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __assign_str(name, lock->name); + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/sched_event_types.h b/include/trace/events/sched.h index 63547dc1125..24ab5bcff7b 100644 --- a/include/trace/sched_event_types.h +++ b/include/trace/events/sched.h @@ -1,9 +1,8 @@ +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCHED_H -/* use <trace/sched.h> instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif +#include <linux/sched.h> +#include <linux/tracepoint.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM sched @@ -157,6 +156,7 @@ TRACE_EVENT(sched_switch, __array( char, prev_comm, TASK_COMM_LEN ) __field( pid_t, prev_pid ) __field( int, prev_prio ) + __field( long, prev_state ) __array( char, next_comm, TASK_COMM_LEN ) __field( pid_t, next_pid ) __field( int, next_prio ) @@ -166,13 +166,19 @@ TRACE_EVENT(sched_switch, memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; + __entry->prev_state = prev->state; memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->prev_state ? + __print_flags(__entry->prev_state, "|", + { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, + { 16, "Z" }, { 32, "X" }, { 64, "x" }, + { 128, "W" }) : "R", __entry->next_comm, __entry->next_pid, __entry->next_prio) ); @@ -181,9 +187,9 @@ TRACE_EVENT(sched_switch, */ TRACE_EVENT(sched_migrate_task, - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + TP_PROTO(struct task_struct *p, int dest_cpu), - TP_ARGS(p, orig_cpu, dest_cpu), + TP_ARGS(p, dest_cpu), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -197,7 +203,7 @@ TRACE_EVENT(sched_migrate_task, memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; + __entry->orig_cpu = task_cpu(p); __entry->dest_cpu = dest_cpu; ), @@ -334,4 +340,7 @@ TRACE_EVENT(sched_signal_send, __entry->sig, __entry->comm, __entry->pid) ); -#undef TRACE_SYSTEM +#endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h new file mode 100644 index 00000000000..1e8fabb57c0 --- /dev/null +++ b/include/trace/events/skb.h @@ -0,0 +1,40 @@ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H + +#include <linux/skbuff.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h new file mode 100644 index 00000000000..035f1bff288 --- /dev/null +++ b/include/trace/events/workqueue.h @@ -0,0 +1,100 @@ +#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_WORKQUEUE_H + +#include <linux/workqueue.h> +#include <linux/sched.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM workqueue + +TRACE_EVENT(workqueue_insertion, + + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + + TP_ARGS(wq_thread, work), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(work_func_t, func) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->func = work->func; + ), + + TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + __entry->thread_pid, __entry->func) +); + +TRACE_EVENT(workqueue_execution, + + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), + + TP_ARGS(wq_thread, work), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(work_func_t, func) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->func = work->func; + ), + + TP_printk("thread=%s:%d func=%pF", __entry->thread_comm, + __entry->thread_pid, __entry->func) +); + +/* Trace the creation of one workqueue thread on a cpu */ +TRACE_EVENT(workqueue_creation, + + TP_PROTO(struct task_struct *wq_thread, int cpu), + + TP_ARGS(wq_thread, cpu), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + __field(int, cpu) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + __entry->cpu = cpu; + ), + + TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm, + __entry->thread_pid, __entry->cpu) +); + +TRACE_EVENT(workqueue_destruction, + + TP_PROTO(struct task_struct *wq_thread), + + TP_ARGS(wq_thread), + + TP_STRUCT__entry( + __array(char, thread_comm, TASK_COMM_LEN) + __field(pid_t, thread_pid) + ), + + TP_fast_assign( + memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); + __entry->thread_pid = wq_thread->pid; + ), + + TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid) +); + +#endif /* _TRACE_WORKQUEUE_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h new file mode 100644 index 00000000000..1867553c61e --- /dev/null +++ b/include/trace/ftrace.h @@ -0,0 +1,591 @@ +/* + * Stage 1 of the trace events. + * + * Override the macros in <trace/trace_events.h> to include the following: + * + * struct ftrace_raw_<call> { + * struct trace_entry ent; + * <type> <item>; + * <type2> <item2>[<len>]; + * [...] + * }; + * + * The <type> <item> is created by the __field(type, item) macro or + * the __array(type2, item2, len) macro. + * We simply do "type item;", and that will create the fields + * in the structure. + */ + +#include <linux/ftrace_event.h> + +#undef __field +#define __field(type, item) type item; + +#undef __array +#define __array(type, item, len) type item[len]; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) unsigned short __data_loc_##item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[0]; \ + }; \ + static struct ftrace_event_call event_##name + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + + +/* + * Stage 2 of the trace events. + * + * Include the following: + * + * struct ftrace_data_offsets_<call> { + * int <item1>; + * int <item2>; + * [...] + * }; + * + * The __dynamic_array() macro will create each int <item>, this is + * to keep the offset of each array from the beginning of the event. + */ + +#undef __field +#define __field(type, item); + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) int item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ + struct ftrace_data_offsets_##call { \ + tstruct; \ + }; + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " offset:%u; size:%u;\n", + * offsetof(struct ftrace_raw_##call, item), + * sizeof(field.type)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), \ + __data_loc_##item), \ + (unsigned int)sizeof(field.__data_loc_##item)); \ + if (!ret) \ + return 0; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef __entry +#define __entry REC + +#undef __print_symbolic +#undef __get_dynamic_array +#undef __get_str + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field __attribute__((unused)); \ + int ret = 0; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 3 of the trace events. + * + * Override the macros in <trace/trace_events.h> to include the following: + * + * enum print_line_t + * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags) + * { + * struct trace_seq *s = &iter->seq; + * struct ftrace_raw_<call> *field; <-- defined in stage 1 + * struct trace_entry *entry; + * struct trace_seq *p; + * int ret; + * + * entry = iter->ent; + * + * if (entry->type != event_<call>.id) { + * WARN_ON_ONCE(1); + * return TRACE_TYPE_UNHANDLED; + * } + * + * field = (typeof(field))entry; + * + * p = get_cpu_var(ftrace_event_seq); + * trace_seq_init(p); + * ret = trace_seq_printf(s, <TP_printk> "\n"); + * put_cpu(); + * if (!ret) + * return TRACE_TYPE_PARTIAL_LINE; + * + * return TRACE_TYPE_HANDLED; + * } + * + * This is the method used to print the raw event to the trace + * output format. Note, this is not needed if the data is read + * in binary. + */ + +#undef __entry +#define __entry field + +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args + +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + __entry->__data_loc_##field) + +#undef __get_str +#define __get_str(field) (char *)__get_dynamic_array(field) + +#undef __print_flags +#define __print_flags(flag, delim, flag_array...) \ + ({ \ + static const struct trace_print_flags flags[] = \ + { flag_array, { -1, NULL }}; \ + ftrace_print_flags_seq(p, delim, flag, flags); \ + }) + +#undef __print_symbolic +#define __print_symbolic(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags symbols[] = \ + { symbol_array, { -1, NULL }}; \ + ftrace_print_symbols_seq(p, value, symbols); \ + }) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##call *field; \ + struct trace_entry *entry; \ + struct trace_seq *p; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ + ret = trace_seq_printf(s, #call ": " print); \ + put_cpu(); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef __field +#define __field(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ + if (ret) \ + return ret; + +#undef __array +#define __array(type, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), 0); \ + if (ret) \ + return ret; + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\ + offsetof(typeof(field), __data_loc_##item), \ + sizeof(field.__data_loc_##item), 0); + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_raw_##call field; \ + struct ftrace_event_call *event_call = &event_##call; \ + int ret; \ + \ + __common_field(int, type, 1); \ + __common_field(unsigned char, flags, 0); \ + __common_field(unsigned char, preempt_count, 0); \ + __common_field(int, pid, 1); \ + __common_field(int, tgid, 1); \ + \ + tstruct; \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * remember the offset of each array from the beginning of the event. + */ + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __data_offsets->item = __data_size + \ + offsetof(typeof(*entry), __data); \ + __data_size += (len) * sizeof(type); + +#undef __string +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +static inline int ftrace_get_offsets_##call( \ + struct ftrace_data_offsets_##call *__data_offsets, proto) \ +{ \ + int __data_size = 0; \ + struct ftrace_raw_##call __maybe_unused *entry; \ + \ + tstruct; \ + \ + return __data_size; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 4 of the trace events. + * + * Override the macros in <trace/trace_events.h> to include the following: + * + * static void ftrace_event_<call>(proto) + * { + * event_trace_printk(_RET_IP_, "<call>: " <fmt>); + * } + * + * static int ftrace_reg_event_<call>(void) + * { + * int ret; + * + * ret = register_trace_<call>(ftrace_event_<call>); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to <call>"); + * return ret; + * } + * + * static void ftrace_unreg_event_<call>(void) + * { + * unregister_trace_<call>(ftrace_event_<call>); + * } + * + * + * For those macros defined with TRACE_EVENT: + * + * static struct ftrace_event_call event_<call>; + * + * static void ftrace_raw_event_<call>(proto) + * { + * struct ring_buffer_event *event; + * struct ftrace_raw_<call> *entry; <-- defined in stage 1 + * unsigned long irq_flags; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * event = trace_current_buffer_lock_reserve(event_<call>.id, + * sizeof(struct ftrace_raw_<call>), + * irq_flags, pc); + * if (!event) + * return; + * entry = ring_buffer_event_data(event); + * + * <assign>; <-- Here we assign the entries by the __field and + * __array macros. + * + * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * } + * + * static int ftrace_raw_reg_event_<call>(void) + * { + * int ret; + * + * ret = register_trace_<call>(ftrace_raw_event_<call>); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to <call>"); + * return ret; + * } + * + * static void ftrace_unreg_event_<call>(void) + * { + * unregister_trace_<call>(ftrace_raw_event_<call>); + * } + * + * static struct trace_event ftrace_event_type_<call> = { + * .trace = ftrace_raw_output_<call>, <-- stage 2 + * }; + * + * static int ftrace_raw_init_event_<call>(void) + * { + * int id; + * + * id = register_ftrace_event(&ftrace_event_type_<call>); + * if (!id) + * return -ENODEV; + * event_<call>.id = id; + * return 0; + * } + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_<call> = { + * .name = "<call>", + * .system = "<system>", + * .raw_init = ftrace_raw_init_event_<call>, + * .regfunc = ftrace_reg_event_<call>, + * .unregfunc = ftrace_unreg_event_<call>, + * .show_format = ftrace_format_<call>, + * } + * + */ + +#undef TP_FMT +#define TP_FMT(fmt, args...) fmt "\n", ##args + +#ifdef CONFIG_EVENT_PROFILE +#define _TRACE_PROFILE(call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + extern void perf_tpcounter_event(int); \ + perf_tpcounter_event(event_##call.id); \ +} \ + \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + \ + if (!atomic_inc_return(&event_call->profile_count)) \ + ret = register_trace_##call(ftrace_profile_##call); \ + \ + return ret; \ +} \ + \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ +{ \ + if (atomic_add_negative(-1, &event_call->profile_count)) \ + unregister_trace_##call(ftrace_profile_##call); \ +} + +#define _TRACE_PROFILE_INIT(call) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = ftrace_profile_enable_##call, \ + .profile_disable = ftrace_profile_disable_##call, + +#else +#define _TRACE_PROFILE(call, proto, args) +#define _TRACE_PROFILE_INIT(call) +#endif + +#undef __entry +#define __entry entry + +#undef __field +#define __field(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __dynamic_array +#define __dynamic_array(type, item, len) \ + __entry->__data_loc_##item = __data_offsets.item; + +#undef __string +#define __string(item, src) __dynamic_array(char, item, -1) \ + +#undef __assign_str +#define __assign_str(dst, src) \ + strcpy(__get_str(dst), src); + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ + \ +static struct ftrace_event_call event_##call; \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + struct ftrace_event_call *event_call = &event_##call; \ + struct ring_buffer_event *event; \ + struct ftrace_raw_##call *entry; \ + unsigned long irq_flags; \ + int __data_size; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ + \ + event = trace_current_buffer_lock_reserve(event_##call.id, \ + sizeof(*entry) + __data_size, \ + irq_flags, pc); \ + if (!event) \ + return; \ + entry = ring_buffer_event_data(event); \ + \ + \ + tstruct \ + \ + { assign; } \ + \ + if (!filter_current_check_discard(event_call, entry, event)) \ + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ +} \ + \ +static int ftrace_raw_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_raw_event_##call); \ + if (ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call "\n"); \ + return ret; \ +} \ + \ +static void ftrace_raw_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_raw_event_##call); \ +} \ + \ +static struct trace_event ftrace_event_type_##call = { \ + .trace = ftrace_raw_output_##call, \ +}; \ + \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(&ftrace_event_type_##call); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + INIT_LIST_HEAD(&event_##call.fields); \ + init_preds(&event_##call); \ + return 0; \ +} \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ + _TRACE_PROFILE_INIT(call) \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef _TRACE_PROFILE +#undef _TRACE_PROFILE_INIT + diff --git a/include/trace/irq.h b/include/trace/irq.h deleted file mode 100644 index ff5d4495dc3..00000000000 --- a/include/trace/irq.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _TRACE_IRQ_H -#define _TRACE_IRQ_H - -#include <linux/interrupt.h> -#include <linux/tracepoint.h> - -#include <trace/irq_event_types.h> - -#endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h deleted file mode 100644 index 85964ebd47e..00000000000 --- a/include/trace/irq_event_types.h +++ /dev/null @@ -1,55 +0,0 @@ - -/* use <trace/irq.h> instead */ -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#undef TRACE_SYSTEM diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h deleted file mode 100644 index 28ee69f9cd4..00000000000 --- a/include/trace/kmemtrace.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * - * This file is released under GPL version 2. - */ - -#ifndef _LINUX_KMEMTRACE_H -#define _LINUX_KMEMTRACE_H - -#ifdef __KERNEL__ - -#include <linux/tracepoint.h> -#include <linux/types.h> - -#ifdef CONFIG_KMEMTRACE -extern void kmemtrace_init(void); -#else -static inline void kmemtrace_init(void) -{ -} -#endif - -DECLARE_TRACE(kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmem_cache_alloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kmem_cache_alloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kfree, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); -DECLARE_TRACE(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_KMEMTRACE_H */ - diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h deleted file mode 100644 index 5ca67df87f2..00000000000 --- a/include/trace/lockdep.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _TRACE_LOCKDEP_H -#define _TRACE_LOCKDEP_H - -#include <linux/lockdep.h> -#include <linux/tracepoint.h> - -#include <trace/lockdep_event_types.h> - -#endif diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h deleted file mode 100644 index adccfcd2ec8..00000000000 --- a/include/trace/lockdep_event_types.h +++ /dev/null @@ -1,44 +0,0 @@ - -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lock - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_FORMAT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -#endif -#endif - -#undef TRACE_SYSTEM diff --git a/include/trace/sched.h b/include/trace/sched.h deleted file mode 100644 index 4e372a1a29b..00000000000 --- a/include/trace/sched.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _TRACE_SCHED_H -#define _TRACE_SCHED_H - -#include <linux/sched.h> -#include <linux/tracepoint.h> - -#include <trace/sched_event_types.h> - -#endif diff --git a/include/trace/skb.h b/include/trace/skb.h deleted file mode 100644 index b66206d9be7..00000000000 --- a/include/trace/skb.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _TRACE_SKB_H_ -#define _TRACE_SKB_H_ - -#include <linux/skbuff.h> -#include <linux/tracepoint.h> - -DECLARE_TRACE(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location), - TP_ARGS(skb, location)); - -#endif diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h deleted file mode 100644 index df56f5694be..00000000000 --- a/include/trace/trace_event_types.h +++ /dev/null @@ -1,5 +0,0 @@ -/* trace/<type>_event_types.h here */ - -#include <trace/sched_event_types.h> -#include <trace/irq_event_types.h> -#include <trace/lockdep_event_types.h> diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h deleted file mode 100644 index fd13750ca4b..00000000000 --- a/include/trace/trace_events.h +++ /dev/null @@ -1,5 +0,0 @@ -/* trace/<type>.h here */ - -#include <trace/sched.h> -#include <trace/irq.h> -#include <trace/lockdep.h> diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h deleted file mode 100644 index 7626523deeb..00000000000 --- a/include/trace/workqueue.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef __TRACE_WORKQUEUE_H -#define __TRACE_WORKQUEUE_H - -#include <linux/tracepoint.h> -#include <linux/workqueue.h> -#include <linux/sched.h> - -DECLARE_TRACE(workqueue_insertion, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); - -DECLARE_TRACE(workqueue_execution, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); - -/* Trace the creation of one workqueue thread on a cpu */ -DECLARE_TRACE(workqueue_creation, - TP_PROTO(struct task_struct *wq_thread, int cpu), - TP_ARGS(wq_thread, cpu)); - -DECLARE_TRACE(workqueue_destruction, - TP_PROTO(struct task_struct *wq_thread), - TP_ARGS(wq_thread)); - -#endif /* __TRACE_WORKQUEUE_H */ diff --git a/include/xen/Kbuild b/include/xen/Kbuild new file mode 100644 index 00000000000..4e65c16a445 --- /dev/null +++ b/include/xen/Kbuild @@ -0,0 +1 @@ +header-y += evtchn.h diff --git a/include/xen/events.h b/include/xen/events.h index 0d5f1adc036..e68d59a90ca 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq); irq will be disabled so it won't deliver an interrupt. */ void xen_poll_irq(int irq); +/* Determine the IRQ which is bound to an event channel */ +unsigned irq_from_evtchn(unsigned int evtchn); + #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h new file mode 100644 index 00000000000..14e833ee4e0 --- /dev/null +++ b/include/xen/evtchn.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * evtchn.h + * + * Interface to /dev/xen/evtchn. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LINUX_PUBLIC_EVTCHN_H__ +#define __LINUX_PUBLIC_EVTCHN_H__ + +/* + * Bind a fresh port to VIRQ @virq. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) +struct ioctl_evtchn_bind_virq { + unsigned int virq; +}; + +/* + * Bind a fresh port to remote <@remote_domain, @remote_port>. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; +}; + +/* + * Allocate a fresh port for binding to @remote_domain. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_UNBIND \ + _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) +struct ioctl_evtchn_unbind { + unsigned int port; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) +struct ioctl_evtchn_notify { + unsigned int port; +}; + +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IOC(_IOC_NONE, 'E', 5, 0) + +#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 453235e923f..e8b6519d47e 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -57,4 +57,7 @@ struct xen_feature_info { /* Declares the features reported by XENVER_get_features. */ #include "features.h" +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + #endif /* __XEN_PUBLIC_VERSION_H__ */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index f87f9614844..b9763badbd7 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -91,8 +91,7 @@ struct xenbus_driver { void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); - int (*suspend)(struct xenbus_device *dev); - int (*suspend_cancel)(struct xenbus_device *dev); + int (*suspend)(struct xenbus_device *dev, pm_message_t state); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; |