diff options
Diffstat (limited to 'include/linux')
40 files changed, 1292 insertions, 879 deletions
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index bc3ab707369..dd97fb8408a 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -132,9 +132,6 @@ static inline void *alloc_remap(int nid, unsigned long size) } #endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */ -extern unsigned long __meminitdata nr_kernel_pages; -extern unsigned long __meminitdata nr_all_pages; - extern void *alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, @@ -145,6 +142,8 @@ extern void *alloc_large_system_hash(const char *tablename, unsigned long limit); #define HASH_EARLY 0x00000001 /* Allocating during early boot? */ +#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min + * shift passed via *_hash_shift */ /* Only NUMA needs hash distribution. 64bit NUMA architectures have * sufficient vmalloc space. diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h index 51c8d2d49e4..b6cb5425cde 100644 --- a/include/linux/dvb/frontend.h +++ b/include/linux/dvb/frontend.h @@ -173,7 +173,8 @@ typedef enum fe_modulation { typedef enum fe_transmit_mode { TRANSMISSION_MODE_2K, TRANSMISSION_MODE_8K, - TRANSMISSION_MODE_AUTO + TRANSMISSION_MODE_AUTO, + TRANSMISSION_MODE_4K } fe_transmit_mode_t; typedef enum fe_bandwidth { @@ -268,15 +269,42 @@ struct dvb_frontend_event { #define DTV_FE_CAPABILITY 16 #define DTV_DELIVERY_SYSTEM 17 -#define DTV_API_VERSION 35 -#define DTV_API_VERSION 35 -#define DTV_CODE_RATE_HP 36 -#define DTV_CODE_RATE_LP 37 -#define DTV_GUARD_INTERVAL 38 -#define DTV_TRANSMISSION_MODE 39 -#define DTV_HIERARCHY 40 +/* ISDB-T and ISDB-Tsb */ +#define DTV_ISDBT_PARTIAL_RECEPTION 18 +#define DTV_ISDBT_SOUND_BROADCASTING 19 -#define DTV_MAX_COMMAND DTV_HIERARCHY +#define DTV_ISDBT_SB_SUBCHANNEL_ID 20 +#define DTV_ISDBT_SB_SEGMENT_IDX 21 +#define DTV_ISDBT_SB_SEGMENT_COUNT 22 + +#define DTV_ISDBT_LAYERA_FEC 23 +#define DTV_ISDBT_LAYERA_MODULATION 24 +#define DTV_ISDBT_LAYERA_SEGMENT_COUNT 25 +#define DTV_ISDBT_LAYERA_TIME_INTERLEAVING 26 + +#define DTV_ISDBT_LAYERB_FEC 27 +#define DTV_ISDBT_LAYERB_MODULATION 28 +#define DTV_ISDBT_LAYERB_SEGMENT_COUNT 29 +#define DTV_ISDBT_LAYERB_TIME_INTERLEAVING 30 + +#define DTV_ISDBT_LAYERC_FEC 31 +#define DTV_ISDBT_LAYERC_MODULATION 32 +#define DTV_ISDBT_LAYERC_SEGMENT_COUNT 33 +#define DTV_ISDBT_LAYERC_TIME_INTERLEAVING 34 + +#define DTV_API_VERSION 35 + +#define DTV_CODE_RATE_HP 36 +#define DTV_CODE_RATE_LP 37 +#define DTV_GUARD_INTERVAL 38 +#define DTV_TRANSMISSION_MODE 39 +#define DTV_HIERARCHY 40 + +#define DTV_ISDBT_LAYER_ENABLED 41 + +#define DTV_ISDBS_TS_ID 42 + +#define DTV_MAX_COMMAND DTV_ISDBS_TS_ID typedef enum fe_pilot { PILOT_ON, diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h index 25b823b8173..540b0583d9f 100644 --- a/include/linux/dvb/version.h +++ b/include/linux/dvb/version.h @@ -24,6 +24,6 @@ #define _DVBVERSION_H_ #define DVB_API_VERSION 5 -#define DVB_API_VERSION_MINOR 0 +#define DVB_API_VERSION_MINOR 1 #endif /*_DVBVERSION_H_*/ diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 45ff1849151..1d747f72298 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -31,10 +31,32 @@ struct flex_array { }; }; -#define FLEX_ARRAY_INIT(size, total) { { {\ - .element_size = (size), \ - .total_nr_elements = (total), \ -} } } +/* Number of bytes left in base struct flex_array, excluding metadata */ +#define FLEX_ARRAY_BASE_BYTES_LEFT \ + (FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts)) + +/* Number of pointers in base to struct flex_array_part pages */ +#define FLEX_ARRAY_NR_BASE_PTRS \ + (FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *)) + +/* Number of elements of size that fit in struct flex_array_part */ +#define FLEX_ARRAY_ELEMENTS_PER_PART(size) \ + (FLEX_ARRAY_PART_SIZE / size) + +/* + * Defines a statically allocated flex array and ensures its parameters are + * valid. + */ +#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total) \ + struct flex_array __arrayname = { { { \ + .element_size = (__element_size), \ + .total_nr_elements = (__total), \ + } } }; \ + static inline void __arrayname##_invalid_parameter(void) \ + { \ + BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS * \ + FLEX_ARRAY_ELEMENTS_PER_PART(__element_size)); \ + } struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags); @@ -44,6 +66,8 @@ void flex_array_free(struct flex_array *fa); void flex_array_free_parts(struct flex_array *fa); int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, gfp_t flags); +int flex_array_clear(struct flex_array *fa, unsigned int element_nr); void *flex_array_get(struct flex_array *fa, unsigned int element_nr); +int flex_array_shrink(struct flex_array *fa); #endif /* _FLEX_ARRAY_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 90162fb3bf0..51803528b09 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1066,8 +1066,8 @@ struct file_lock { struct fasync_struct * fl_fasync; /* for lease break notifications */ unsigned long fl_break_time; /* for nonblocking lease breaks */ - struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ - struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ + const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ + const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ union { struct nfs_lock_info nfs_fl; struct nfs4_lock_info nfs4_fl; @@ -1318,8 +1318,8 @@ struct super_block { unsigned long long s_maxbytes; /* Max file size */ struct file_system_type *s_type; const struct super_operations *s_op; - struct dquot_operations *dq_op; - struct quotactl_ops *s_qcop; + const struct dquot_operations *dq_op; + const struct quotactl_ops *s_qcop; const struct export_operations *s_export_op; unsigned long s_flags; unsigned long s_magic; diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bd099ba82cc..4ec5e67e18c 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -4,6 +4,7 @@ #include <linux/ring_buffer.h> #include <linux/trace_seq.h> #include <linux/percpu.h> +#include <linux/hardirq.h> struct trace_array; struct tracer; @@ -130,10 +131,15 @@ struct ftrace_event_call { void *data; atomic_t profile_count; - int (*profile_enable)(struct ftrace_event_call *); - void (*profile_disable)(struct ftrace_event_call *); + int (*profile_enable)(void); + void (*profile_disable)(void); }; +#define FTRACE_MAX_PROFILE_SIZE 2048 + +extern char *trace_profile_buf; +extern char *trace_profile_buf_nmi; + #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 109d179adb9..297df45ffd0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -151,7 +151,7 @@ struct gendisk { struct disk_part_tbl *part_tbl; struct hd_struct part0; - struct block_device_operations *fops; + const struct block_device_operations *fops; struct request_queue *queue; void *private_data; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 7c777a0da17..f53e9b868c2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -326,7 +326,6 @@ void free_pages_exact(void *virt, size_t size); extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); extern void free_hot_page(struct page *page); -extern void free_cold_page(struct page *page); #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr),0) @@ -336,18 +335,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(void); void drain_local_pages(void *dummy); -extern bool oom_killer_disabled; - -static inline void oom_killer_disable(void) -{ - oom_killer_disabled = true; -} - -static inline void oom_killer_enable(void) -{ - oom_killer_disabled = false; -} - extern gfp_t gfp_allowed_mask; static inline void set_gfp_allowed_mask(gfp_t mask) diff --git a/include/linux/hid.h b/include/linux/hid.h index a0ebdace7ba..10f62841674 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -494,6 +494,7 @@ struct hid_device { /* device report descriptor */ /* hiddev event handler */ int (*hiddev_connect)(struct hid_device *, unsigned int); + void (*hiddev_disconnect)(struct hid_device *); void (*hiddev_hid_event) (struct hid_device *, struct hid_field *field, struct hid_usage *, __s32); void (*hiddev_report_event) (struct hid_device *, struct hid_report *); @@ -691,6 +692,7 @@ struct hid_device *hid_allocate_device(void); int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size); int hid_check_keys_pressed(struct hid_device *hid); int hid_connect(struct hid_device *hid, unsigned int connect_mask); +void hid_disconnect(struct hid_device *hid); /** * hid_map_usage - map usage input bits @@ -800,6 +802,7 @@ static inline int __must_check hid_hw_start(struct hid_device *hdev, */ static inline void hid_hw_stop(struct hid_device *hdev) { + hid_disconnect(hdev); hdev->ll_driver->stop(hdev); } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5cbc620bdfe..176e7ee73ef 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user * int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); -int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int); +int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, + struct page **, struct vm_area_struct **, + unsigned long *, int *, int, unsigned int flags); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long, struct page *); void __unmap_hugepage_range(struct vm_area_struct *, @@ -110,6 +112,21 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) #endif /* !CONFIG_HUGETLB_PAGE */ +#define HUGETLB_ANON_FILE "anon_hugepage" + +enum { + /* + * The file will be used as an shm file so shmfs accounting rules + * apply + */ + HUGETLB_SHMFS_INODE = 1, + /* + * The file is being created on the internal vfs mount and shmfs + * accounting rules do not apply + */ + HUGETLB_ANONHUGE_INODE = 2, +}; + #ifdef CONFIG_HUGETLBFS struct hugetlbfs_config { uid_t uid; @@ -148,7 +165,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) extern const struct file_operations hugetlbfs_file_operations; extern struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, int acct, - struct user_struct **user); + struct user_struct **user, int creat_flags); int hugetlb_get_quota(struct address_space *mapping, long delta); void hugetlb_put_quota(struct address_space *mapping, long delta); @@ -170,7 +187,7 @@ static inline void set_file_hugepages(struct file *file) #define is_file_hugepages(file) 0 #define set_file_hugepages(file) BUG() -#define hugetlb_file_setup(name,size,acct,user) ERR_PTR(-ENOSYS) +#define hugetlb_file_setup(name,size,acct,user,creat) ERR_PTR(-ENOSYS) #endif /* !CONFIG_HUGETLBFS */ @@ -185,7 +202,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, #define HSTATE_NAME_LEN 32 /* Defines one hugetlb page size */ struct hstate { - int hugetlb_next_nid; + int next_nid_to_alloc; + int next_nid_to_free; unsigned int order; unsigned long mask; unsigned long max_huge_pages; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9e7f2e8fc66..21a6f5d9af2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -106,13 +106,13 @@ extern struct group_info init_groups; extern struct cred init_cred; -#ifdef CONFIG_PERF_COUNTERS -# define INIT_PERF_COUNTERS(tsk) \ - .perf_counter_mutex = \ - __MUTEX_INITIALIZER(tsk.perf_counter_mutex), \ - .perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list), +#ifdef CONFIG_PERF_EVENTS +# define INIT_PERF_EVENTS(tsk) \ + .perf_event_mutex = \ + __MUTEX_INITIALIZER(tsk.perf_event_mutex), \ + .perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list), #else -# define INIT_PERF_COUNTERS(tsk) +# define INIT_PERF_EVENTS(tsk) #endif /* @@ -178,7 +178,7 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ - INIT_PERF_COUNTERS(tsk) \ + INIT_PERF_EVENTS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ diff --git a/include/linux/ksm.h b/include/linux/ksm.h new file mode 100644 index 00000000000..a485c14ecd5 --- /dev/null +++ b/include/linux/ksm.h @@ -0,0 +1,79 @@ +#ifndef __LINUX_KSM_H +#define __LINUX_KSM_H +/* + * Memory merging support. + * + * This code enables dynamic sharing of identical pages found in different + * memory areas, even if they are not shared by fork(). + */ + +#include <linux/bitops.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/vmstat.h> + +#ifdef CONFIG_KSM +int ksm_madvise(struct vm_area_struct *vma, unsigned long start, + unsigned long end, int advice, unsigned long *vm_flags); +int __ksm_enter(struct mm_struct *mm); +void __ksm_exit(struct mm_struct *mm); + +static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) +{ + if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags)) + return __ksm_enter(mm); + return 0; +} + +static inline void ksm_exit(struct mm_struct *mm) +{ + if (test_bit(MMF_VM_MERGEABLE, &mm->flags)) + __ksm_exit(mm); +} + +/* + * A KSM page is one of those write-protected "shared pages" or "merged pages" + * which KSM maps into multiple mms, wherever identical anonymous page content + * is found in VM_MERGEABLE vmas. It's a PageAnon page, with NULL anon_vma. + */ +static inline int PageKsm(struct page *page) +{ + return ((unsigned long)page->mapping == PAGE_MAPPING_ANON); +} + +/* + * But we have to avoid the checking which page_add_anon_rmap() performs. + */ +static inline void page_add_ksm_rmap(struct page *page) +{ + if (atomic_inc_and_test(&page->_mapcount)) { + page->mapping = (void *) PAGE_MAPPING_ANON; + __inc_zone_page_state(page, NR_ANON_PAGES); + } +} +#else /* !CONFIG_KSM */ + +static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, + unsigned long end, int advice, unsigned long *vm_flags) +{ + return 0; +} + +static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm) +{ + return 0; +} + +static inline void ksm_exit(struct mm_struct *mm) +{ +} + +static inline int PageKsm(struct page *page) +{ + return 0; +} + +/* No stub required for page_add_ksm_rmap(page) */ +#endif /* !CONFIG_KSM */ + +#endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4af56036a6b..b7bbb5ddd7a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -15,7 +15,6 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/preempt.h> -#include <linux/marker.h> #include <linux/msi.h> #include <asm/signal.h> diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h index ad651f4e45a..3cc2f2c53e4 100644 --- a/include/linux/lis3lv02d.h +++ b/include/linux/lis3lv02d.h @@ -32,8 +32,17 @@ struct lis3lv02d_platform_data { #define LIS3_IRQ2_DATA_READY (4 << 3) #define LIS3_IRQ2_CLICK (7 << 3) #define LIS3_IRQ_OPEN_DRAIN (1 << 6) -#define LIS3_IRQ_ACTIVE_HIGH (1 << 7) +#define LIS3_IRQ_ACTIVE_LOW (1 << 7) unsigned char irq_cfg; + +#define LIS3_WAKEUP_X_LO (1 << 0) +#define LIS3_WAKEUP_X_HI (1 << 1) +#define LIS3_WAKEUP_Y_LO (1 << 2) +#define LIS3_WAKEUP_Y_HI (1 << 3) +#define LIS3_WAKEUP_Z_LO (1 << 4) +#define LIS3_WAKEUP_Z_HI (1 << 5) + unsigned char wakeup_flags; + unsigned char wakeup_thresh; }; #endif /* __LIS3LV02D_H_ */ diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index c325b187966..ccf2e0dc077 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -395,7 +395,7 @@ static inline int nlm_compare_locks(const struct file_lock *fl1, &&(fl1->fl_type == fl2->fl_type || fl2->fl_type == F_UNLCK); } -extern struct lock_manager_operations nlmsvc_lock_operations; +extern const struct lock_manager_operations nlmsvc_lock_operations; #endif /* __KERNEL__ */ diff --git a/include/linux/marker.h b/include/linux/marker.h deleted file mode 100644 index b85e74ca782..00000000000 --- a/include/linux/marker.h +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef _LINUX_MARKER_H -#define _LINUX_MARKER_H - -/* - * Code markup for dynamic and static tracing. - * - * See Documentation/marker.txt. - * - * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include <stdarg.h> -#include <linux/types.h> - -struct module; -struct marker; - -/** - * marker_probe_func - Type of a marker probe function - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @args: variable argument list pointer. Use a pointer to overcome C's - * inability to pass this around as a pointer in a portable manner in - * the callee otherwise. - * - * Type of marker probe functions. They receive the mdata and need to parse the - * format string to recover the variable argument list. - */ -typedef void marker_probe_func(void *probe_private, void *call_private, - const char *fmt, va_list *args); - -struct marker_probe_closure { - marker_probe_func *func; /* Callback */ - void *probe_private; /* Private probe data */ -}; - -struct marker { - const char *name; /* Marker name */ - const char *format; /* Marker format string, describing the - * variable argument list. - */ - char state; /* Marker state. */ - char ptype; /* probe type : 0 : single, 1 : multi */ - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - const char *tp_name; /* Optional tracepoint name */ - void *tp_cb; /* Optional tracepoint callback */ -} __attribute__((aligned(8))); - -#ifdef CONFIG_MARKERS - -#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ - NULL, tp_name_str, tp_cb } - -#define DEFINE_MARKER(name, format) \ - _DEFINE_MARKER(name, NULL, NULL, format) - -#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ - _DEFINE_MARKER(name, #tp_name, tp_cb, format) - -/* - * Note : the empty asm volatile with read constraint is used here instead of a - * "used" attribute to fix a gcc 4.1.x bug. - * Make sure the alignment of the structure in the __markers section will - * not add unwanted padding between the beginning of the section and the - * structure. Force alignment to the same alignment as the section start. - * - * The "generic" argument controls which marker enabling mechanism must be used. - * If generic is true, a variable read is used. - * If generic is false, immediate values are used. - */ -#define __trace_mark(generic, name, call_private, format, args...) \ - do { \ - DEFINE_MARKER(name, format); \ - __mark_check_format(format, ## args); \ - if (unlikely(__mark_##name.state)) { \ - (*__mark_##name.call) \ - (&__mark_##name, call_private, ## args);\ - } \ - } while (0) - -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ - __mark_check_format(format, ## args); \ - (*__mark_##name.call)(&__mark_##name, call_private, \ - ## args); \ - } while (0) - -extern void marker_update_probe_range(struct marker *begin, - struct marker *end); - -#define GET_MARKER(name) (__mark_##name) - -#else /* !CONFIG_MARKERS */ -#define DEFINE_MARKER(name, tp_name, tp_cb, format) -#define __trace_mark(generic, name, call_private, format, args...) \ - __mark_check_format(format, ## args) -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - __mark_check_format(format, ## args); \ - } while (0) -static inline void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ } -#define GET_MARKER(name) -#endif /* CONFIG_MARKERS */ - -/** - * trace_mark - Marker using code patching - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using optimized code patching technique (imv_read()) - * to be enabled when immediate values are present. - */ -#define trace_mark(name, format, args...) \ - __trace_mark(0, name, NULL, format, ## args) - -/** - * _trace_mark - Marker using variable read - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using a standard memory read (_imv_read()) to be - * enabled. Should be used for markers in code paths where instruction - * modification based enabling is not welcome. (__init and __exit functions, - * lockdep, some traps, printk). - */ -#define _trace_mark(name, format, args...) \ - __trace_mark(1, name, NULL, format, ## args) - -/** - * trace_mark_tp - Marker in a tracepoint callback - * @name: marker name, not quoted. - * @tp_name: tracepoint name, not quoted. - * @tp_cb: tracepoint callback. Should have an associated global symbol so it - * is not optimized away by the compiler (should not be static). - * @format: format string - * @args...: variable argument list - * - * Places a marker in a tracepoint callback. - */ -#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ - __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) - -/** - * MARK_NOARGS - Format string for a marker with no argument. - */ -#define MARK_NOARGS " " - -/* To be used for string format validity checking with gcc */ -static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) -{ -} - -#define __mark_check_format(format, args...) \ - do { \ - if (0) \ - ___mark_check_format(format, ## args); \ - } while (0) - -extern marker_probe_func __mark_empty_function; - -extern void marker_probe_cb(const struct marker *mdata, - void *call_private, ...); - -/* - * Connect a probe to a marker. - * private data pointer must be a valid allocated memory address, or NULL. - */ -extern int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private); - -/* - * Returns the private data given to marker_probe_register. - */ -extern int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private); -/* - * Unregister a marker by providing the registered private data. - */ -extern int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private); - -extern void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num); - -/* - * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the first one of - * - the end of module exit function - * - the free of any resource used by the probes - * to ensure the code and data are valid for any possibly running probes. - */ -#define marker_synchronize_unregister() synchronize_sched() - -#endif diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 9be484d1128..7c08052e332 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -47,22 +47,16 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) } /* - * 2 mempool_alloc_t's and a mempool_free_t to kmalloc/kzalloc and kfree - * the amount of memory specified by pool_data + * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the + * amount of memory specified by pool_data */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data); -void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data); void mempool_kfree(void *element, void *pool_data); static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) { return mempool_create(min_nr, mempool_kmalloc, mempool_kfree, (void *) size); } -static inline mempool_t *mempool_create_kzalloc_pool(int min_nr, size_t size) -{ - return mempool_create(min_nr, mempool_kzalloc, mempool_kfree, - (void *) size); -} /* * A mempool_alloc_t and mempool_free_t for a simple page allocator that diff --git a/include/linux/mm.h b/include/linux/mm.h index 9a72cc78e6b..5946e2ff9fe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -25,6 +25,7 @@ extern unsigned long max_mapnr; #endif extern unsigned long num_physpages; +extern unsigned long totalram_pages; extern void * high_memory; extern int page_cluster; @@ -103,6 +104,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ +#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -700,17 +702,8 @@ extern void pagefault_out_of_memory(void); extern void show_free_areas(void); -#ifdef CONFIG_SHMEM -extern int shmem_lock(struct file *file, int lock, struct user_struct *user); -#else -static inline int shmem_lock(struct file *file, int lock, - struct user_struct *user) -{ - return 0; -} -#endif +int shmem_lock(struct file *file, int lock, struct user_struct *user); struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); - int shmem_zero_setup(struct vm_area_struct *); #ifndef CONFIG_MMU @@ -815,6 +808,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct page **pages, struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +struct page *get_dump_page(unsigned long addr); extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned long offset); @@ -1058,6 +1052,8 @@ extern void setup_per_cpu_pageset(void); static inline void setup_per_cpu_pageset(void) {} #endif +extern void zone_pcp_update(struct zone *zone); + /* nommu.c */ extern atomic_long_t mmap_pages_allocated; @@ -1226,7 +1222,8 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_WRITE 0x01 /* check pte is writable */ #define FOLL_TOUCH 0x02 /* mark page accessed */ #define FOLL_GET 0x04 /* do get_page on page */ -#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ +#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ +#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 7fbb9726755..8835b877b8d 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -5,7 +5,7 @@ * page_is_file_cache - should the page be on a file LRU or anon LRU? * @page: the page to test * - * Returns LRU_FILE if @page is page cache page backed by a regular filesystem, + * Returns 1 if @page is page cache page backed by a regular filesystem, * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed. * Used by functions that manipulate the LRU lists, to sort a page * onto the right LRU list. @@ -16,11 +16,7 @@ */ static inline int page_is_file_cache(struct page *page) { - if (PageSwapBacked(page)) - return 0; - - /* The page is page cache backed by a normal filesystem. */ - return LRU_FILE; + return !PageSwapBacked(page); } static inline void @@ -39,21 +35,36 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) mem_cgroup_del_lru_list(page, l); } +/** + * page_lru_base_type - which LRU list type should a page be on? + * @page: the page to test + * + * Used for LRU list index arithmetic. + * + * Returns the base LRU type - file or anon - @page should be on. + */ +static inline enum lru_list page_lru_base_type(struct page *page) +{ + if (page_is_file_cache(page)) + return LRU_INACTIVE_FILE; + return LRU_INACTIVE_ANON; +} + static inline void del_page_from_lru(struct zone *zone, struct page *page) { - enum lru_list l = LRU_BASE; + enum lru_list l; list_del(&page->lru); if (PageUnevictable(page)) { __ClearPageUnevictable(page); l = LRU_UNEVICTABLE; } else { + l = page_lru_base_type(page); if (PageActive(page)) { __ClearPageActive(page); l += LRU_ACTIVE; } - l += page_is_file_cache(page); } __dec_zone_state(zone, NR_LRU_BASE + l); mem_cgroup_del_lru_list(page, l); @@ -68,14 +79,14 @@ del_page_from_lru(struct zone *zone, struct page *page) */ static inline enum lru_list page_lru(struct page *page) { - enum lru_list lru = LRU_BASE; + enum lru_list lru; if (PageUnevictable(page)) lru = LRU_UNEVICTABLE; else { + lru = page_lru_base_type(page); if (PageActive(page)) lru += LRU_ACTIVE; - lru += page_is_file_cache(page); } return lru; diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h new file mode 100644 index 00000000000..70fffeba749 --- /dev/null +++ b/include/linux/mmu_context.h @@ -0,0 +1,9 @@ +#ifndef _LINUX_MMU_CONTEXT_H +#define _LINUX_MMU_CONTEXT_H + +struct mm_struct; + +void use_mm(struct mm_struct *mm); +void unuse_mm(struct mm_struct *mm); + +#endif diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b77486d152c..4e02ee2b071 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -62,6 +62,15 @@ struct mmu_notifier_ops { unsigned long address); /* + * change_pte is called in cases that pte mapping to page is changed: + * for example, when ksm remaps pte to point to a new shared page. + */ + void (*change_pte)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address, + pte_t pte); + + /* * Before this is invoked any secondary MMU is still ok to * read/write to the page previously pointed to by the Linux * pte because the page hasn't been freed yet and it won't be @@ -154,6 +163,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long address); +extern void __mmu_notifier_change_pte(struct mm_struct *mm, + unsigned long address, pte_t pte); extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, unsigned long address); extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, @@ -175,6 +186,13 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, return 0; } +static inline void mmu_notifier_change_pte(struct mm_struct *mm, + unsigned long address, pte_t pte) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_change_pte(mm, address, pte); +} + static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, unsigned long address) { @@ -236,6 +254,16 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) __young; \ }) +#define set_pte_at_notify(__mm, __address, __ptep, __pte) \ +({ \ + struct mm_struct *___mm = __mm; \ + unsigned long ___address = __address; \ + pte_t ___pte = __pte; \ + \ + set_pte_at(___mm, ___address, __ptep, ___pte); \ + mmu_notifier_change_pte(___mm, ___address, ___pte); \ +}) + #else /* CONFIG_MMU_NOTIFIER */ static inline void mmu_notifier_release(struct mm_struct *mm) @@ -248,6 +276,11 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, return 0; } +static inline void mmu_notifier_change_pte(struct mm_struct *mm, + unsigned long address, pte_t pte) +{ +} + static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, unsigned long address) { @@ -273,6 +306,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define ptep_clear_flush_young_notify ptep_clear_flush_young #define ptep_clear_flush_notify ptep_clear_flush +#define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 88959853737..652ef01be58 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -38,6 +38,7 @@ #define MIGRATE_UNMOVABLE 0 #define MIGRATE_RECLAIMABLE 1 #define MIGRATE_MOVABLE 2 +#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */ #define MIGRATE_RESERVE 3 #define MIGRATE_ISOLATE 4 /* can't allocate from here */ #define MIGRATE_TYPES 5 @@ -94,11 +95,15 @@ enum zone_stat_item { NR_SLAB_RECLAIMABLE, NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ + NR_KERNEL_STACK, + /* Second 128 byte cacheline */ NR_UNSTABLE_NFS, /* NFS unstable pages */ NR_BOUNCE, NR_VMSCAN_WRITE, - /* Second 128 byte cacheline */ NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ + NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ + NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ + NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ #ifdef CONFIG_NUMA NUMA_HIT, /* allocated in intended node */ NUMA_MISS, /* allocated in non intended node */ @@ -165,7 +170,9 @@ struct per_cpu_pages { int count; /* number of pages in the list */ int high; /* high watermark, emptying needed */ int batch; /* chunk size for buddy add/remove */ - struct list_head list; /* the list of pages */ + + /* Lists of pages, one per migrate type stored on the pcp-lists */ + struct list_head lists[MIGRATE_PCPTYPES]; }; struct per_cpu_pageset { @@ -269,6 +276,11 @@ struct zone_reclaim_stat { */ unsigned long recent_rotated[2]; unsigned long recent_scanned[2]; + + /* + * accumulated for batching + */ + unsigned long nr_saved_scan[NR_LRU_LISTS]; }; struct zone { @@ -323,7 +335,6 @@ struct zone { spinlock_t lru_lock; struct zone_lru { struct list_head list; - unsigned long nr_saved_scan; /* accumulated for batching */ } lru[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; diff --git a/include/linux/module.h b/include/linux/module.h index f8f92d015ef..1c755b2f937 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,7 +15,6 @@ #include <linux/stringify.h> #include <linux/kobject.h> #include <linux/moduleparam.h> -#include <linux/marker.h> #include <linux/tracepoint.h> #include <asm/local.h> @@ -327,10 +326,6 @@ struct module /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; -#ifdef CONFIG_MARKERS - struct marker *markers; - unsigned int num_markers; -#endif #ifdef CONFIG_TRACEPOINTS struct tracepoint *tracepoints; unsigned int num_tracepoints; @@ -535,8 +530,6 @@ int unregister_module_notifier(struct notifier_block * nb); extern void print_modules(void); -extern void module_update_markers(void); - extern void module_update_tracepoints(void); extern int module_get_iter_tracepoints(struct tracepoint_iter *iter); @@ -651,10 +644,6 @@ static inline void print_modules(void) { } -static inline void module_update_markers(void) -{ -} - static inline void module_update_tracepoints(void) { } diff --git a/include/linux/oom.h b/include/linux/oom.h index a7979baf1e3..6aac5fe4f6f 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -30,5 +30,16 @@ extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); +extern bool oom_killer_disabled; + +static inline void oom_killer_disable(void) +{ + oom_killer_disabled = true; +} + +static inline void oom_killer_enable(void) +{ + oom_killer_disabled = false; +} #endif /* __KERNEL__*/ #endif /* _INCLUDE_LINUX_OOM_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 2b87acfc5f8..13de789f0a5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -158,6 +158,9 @@ static inline int TestSetPage##uname(struct page *page) \ static inline int TestClearPage##uname(struct page *page) \ { return test_and_clear_bit(PG_##lname, &page->flags); } +#define __TESTCLEARFLAG(uname, lname) \ +static inline int __TestClearPage##uname(struct page *page) \ + { return __test_and_clear_bit(PG_##lname, &page->flags); } #define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname) \ SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname) @@ -184,6 +187,9 @@ static inline void __ClearPage##uname(struct page *page) { } #define TESTCLEARFLAG_FALSE(uname) \ static inline int TestClearPage##uname(struct page *page) { return 0; } +#define __TESTCLEARFLAG_FALSE(uname) \ +static inline int __TestClearPage##uname(struct page *page) { return 0; } + struct page; /* forward declaration */ TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked) @@ -250,11 +256,11 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable) #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT #define MLOCK_PAGES 1 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked) - TESTSCFLAG(Mlocked, mlocked) + TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked) #else #define MLOCK_PAGES 0 -PAGEFLAG_FALSE(Mlocked) - SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked) +PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked) + TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked) #endif #ifdef CONFIG_ARCH_USES_PG_UNCACHED @@ -396,8 +402,8 @@ static inline void __ClearPageTail(struct page *page) */ #define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) -#endif /* !__GENERATING_BOUNDS_H */ - +#define PAGE_FLAGS_PRIVATE \ + (1 << PG_private | 1 << PG_private_2) /** * page_has_private - Determine if page has private stuff * @page: The page to be checked @@ -405,8 +411,11 @@ static inline void __ClearPageTail(struct page *page) * Determine if a page has private stuff, indicating that release routines * should be invoked upon it. */ -#define page_has_private(page) \ - ((page)->flags & ((1 << PG_private) | \ - (1 << PG_private_2))) +static inline int page_has_private(struct page *page) +{ + return !!(page->flags & PAGE_FLAGS_PRIVATE); +} + +#endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index bd341007c4f..368bd70f1d2 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -1,5 +1,9 @@ /* - * Performance counters: + * NOTE: this file will be removed in a future kernel release, it is + * provided as a courtesy copy of user-space code that relies on the + * old (pre-rename) symbols and constants. + * + * Performance events: * * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar @@ -131,19 +135,19 @@ enum perf_counter_sample_format { * as specified by attr.read_format: * * struct read_format { - * { u64 value; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 id; } && PERF_FORMAT_ID - * } && !PERF_FORMAT_GROUP + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP * - * { u64 nr; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 value; - * { u64 id; } && PERF_FORMAT_ID - * } cntr[nr]; - * } && PERF_FORMAT_GROUP + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP * }; */ enum perf_counter_read_format { @@ -314,9 +318,9 @@ enum perf_event_type { /* * struct { - * struct perf_event_header header; - * u64 id; - * u64 lost; + * struct perf_event_header header; + * u64 id; + * u64 lost; * }; */ PERF_EVENT_LOST = 2, @@ -364,10 +368,10 @@ enum perf_event_type { /* * struct { - * struct perf_event_header header; - * u32 pid, tid; + * struct perf_event_header header; + * u32 pid, tid; * - * struct read_format values; + * struct read_format values; * }; */ PERF_EVENT_READ = 8, @@ -383,23 +387,23 @@ enum perf_event_type { * { u64 id; } && PERF_SAMPLE_ID * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID * { u32 cpu, res; } && PERF_SAMPLE_CPU - * { u64 period; } && PERF_SAMPLE_PERIOD + * { u64 period; } && PERF_SAMPLE_PERIOD * * { struct read_format values; } && PERF_SAMPLE_READ * * { u64 nr, * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN * - * # - * # The RAW record below is opaque data wrt the ABI - * # - * # That is, the ABI doesn't make any promises wrt to - * # the stability of its content, it may vary depending - * # on event, hardware, kernel version and phase of - * # the moon. - * # - * # In other words, PERF_SAMPLE_RAW contents are not an ABI. - * # + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # * * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW @@ -422,454 +426,16 @@ enum perf_callchain_context { PERF_CONTEXT_MAX = (__u64)-4095, }; -#define PERF_FLAG_FD_NO_GROUP (1U << 0) -#define PERF_FLAG_FD_OUTPUT (1U << 1) +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) -#ifdef __KERNEL__ /* - * Kernel-internal data types and definitions: - */ - -#ifdef CONFIG_PERF_COUNTERS -# include <asm/perf_counter.h> -#endif - -#include <linux/list.h> -#include <linux/mutex.h> -#include <linux/rculist.h> -#include <linux/rcupdate.h> -#include <linux/spinlock.h> -#include <linux/hrtimer.h> -#include <linux/fs.h> -#include <linux/pid_namespace.h> -#include <asm/atomic.h> - -#define PERF_MAX_STACK_DEPTH 255 - -struct perf_callchain_entry { - __u64 nr; - __u64 ip[PERF_MAX_STACK_DEPTH]; -}; - -struct perf_raw_record { - u32 size; - void *data; -}; - -struct task_struct; - -/** - * struct hw_perf_counter - performance counter hardware details: + * In case some app still references the old symbols: */ -struct hw_perf_counter { -#ifdef CONFIG_PERF_COUNTERS - union { - struct { /* hardware */ - u64 config; - unsigned long config_base; - unsigned long counter_base; - int idx; - }; - union { /* software */ - atomic64_t count; - struct hrtimer hrtimer; - }; - }; - atomic64_t prev_count; - u64 sample_period; - u64 last_period; - atomic64_t period_left; - u64 interrupts; - - u64 freq_count; - u64 freq_interrupts; - u64 freq_stamp; -#endif -}; - -struct perf_counter; - -/** - * struct pmu - generic performance monitoring unit - */ -struct pmu { - int (*enable) (struct perf_counter *counter); - void (*disable) (struct perf_counter *counter); - void (*read) (struct perf_counter *counter); - void (*unthrottle) (struct perf_counter *counter); -}; - -/** - * enum perf_counter_active_state - the states of a counter - */ -enum perf_counter_active_state { - PERF_COUNTER_STATE_ERROR = -2, - PERF_COUNTER_STATE_OFF = -1, - PERF_COUNTER_STATE_INACTIVE = 0, - PERF_COUNTER_STATE_ACTIVE = 1, -}; - -struct file; -struct perf_mmap_data { - struct rcu_head rcu_head; - int nr_pages; /* nr of data pages */ - int writable; /* are we writable */ - int nr_locked; /* nr pages mlocked */ +#define __NR_perf_counter_open __NR_perf_event_open - atomic_t poll; /* POLL_ for wakeups */ - atomic_t events; /* event limit */ +#define PR_TASK_PERF_COUNTERS_DISABLE PR_TASK_PERF_EVENTS_DISABLE +#define PR_TASK_PERF_COUNTERS_ENABLE PR_TASK_PERF_EVENTS_ENABLE - atomic_long_t head; /* write position */ - atomic_long_t done_head; /* completed head */ - - atomic_t lock; /* concurrent writes */ - atomic_t wakeup; /* needs a wakeup */ - atomic_t lost; /* nr records lost */ - - long watermark; /* wakeup watermark */ - - struct perf_counter_mmap_page *user_page; - void *data_pages[0]; -}; - -struct perf_pending_entry { - struct perf_pending_entry *next; - void (*func)(struct perf_pending_entry *); -}; - -/** - * struct perf_counter - performance counter kernel representation: - */ -struct perf_counter { -#ifdef CONFIG_PERF_COUNTERS - struct list_head list_entry; - struct list_head event_entry; - struct list_head sibling_list; - int nr_siblings; - struct perf_counter *group_leader; - struct perf_counter *output; - const struct pmu *pmu; - - enum perf_counter_active_state state; - atomic64_t count; - - /* - * These are the total time in nanoseconds that the counter - * has been enabled (i.e. eligible to run, and the task has - * been scheduled in, if this is a per-task counter) - * and running (scheduled onto the CPU), respectively. - * - * They are computed from tstamp_enabled, tstamp_running and - * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. - */ - u64 total_time_enabled; - u64 total_time_running; - - /* - * These are timestamps used for computing total_time_enabled - * and total_time_running when the counter is in INACTIVE or - * ACTIVE state, measured in nanoseconds from an arbitrary point - * in time. - * tstamp_enabled: the notional time when the counter was enabled - * tstamp_running: the notional time when the counter was scheduled on - * tstamp_stopped: in INACTIVE state, the notional time when the - * counter was scheduled off. - */ - u64 tstamp_enabled; - u64 tstamp_running; - u64 tstamp_stopped; - - struct perf_counter_attr attr; - struct hw_perf_counter hw; - - struct perf_counter_context *ctx; - struct file *filp; - - /* - * These accumulate total time (in nanoseconds) that children - * counters have been enabled and running, respectively. - */ - atomic64_t child_total_time_enabled; - atomic64_t child_total_time_running; - - /* - * Protect attach/detach and child_list: - */ - struct mutex child_mutex; - struct list_head child_list; - struct perf_counter *parent; - - int oncpu; - int cpu; - - struct list_head owner_entry; - struct task_struct *owner; - - /* mmap bits */ - struct mutex mmap_mutex; - atomic_t mmap_count; - struct perf_mmap_data *data; - - /* poll related */ - wait_queue_head_t waitq; - struct fasync_struct *fasync; - - /* delayed work for NMIs and such */ - int pending_wakeup; - int pending_kill; - int pending_disable; - struct perf_pending_entry pending; - - atomic_t event_limit; - - void (*destroy)(struct perf_counter *); - struct rcu_head rcu_head; - - struct pid_namespace *ns; - u64 id; -#endif -}; - -/** - * struct perf_counter_context - counter context structure - * - * Used as a container for task counters and CPU counters as well: - */ -struct perf_counter_context { - /* - * Protect the states of the counters in the list, - * nr_active, and the list: - */ - spinlock_t lock; - /* - * Protect the list of counters. Locking either mutex or lock - * is sufficient to ensure the list doesn't change; to change - * the list you need to lock both the mutex and the spinlock. - */ - struct mutex mutex; - - struct list_head counter_list; - struct list_head event_list; - int nr_counters; - int nr_active; - int is_active; - int nr_stat; - atomic_t refcount; - struct task_struct *task; - - /* - * Context clock, runs when context enabled. - */ - u64 time; - u64 timestamp; - - /* - * These fields let us detect when two contexts have both - * been cloned (inherited) from a common ancestor. - */ - struct perf_counter_context *parent_ctx; - u64 parent_gen; - u64 generation; - int pin_count; - struct rcu_head rcu_head; -}; - -/** - * struct perf_counter_cpu_context - per cpu counter context structure - */ -struct perf_cpu_context { - struct perf_counter_context ctx; - struct perf_counter_context *task_ctx; - int active_oncpu; - int max_pertask; - int exclusive; - - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; -}; - -struct perf_output_handle { - struct perf_counter *counter; - struct perf_mmap_data *data; - unsigned long head; - unsigned long offset; - int nmi; - int sample; - int locked; - unsigned long flags; -}; - -#ifdef CONFIG_PERF_COUNTERS - -/* - * Set by architecture code: - */ -extern int perf_max_counters; - -extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); - -extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); -extern void perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu); -extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern int perf_counter_init_task(struct task_struct *child); -extern void perf_counter_exit_task(struct task_struct *child); -extern void perf_counter_free_task(struct task_struct *task); -extern void set_perf_counter_pending(void); -extern void perf_counter_do_pending(void); -extern void perf_counter_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); -extern int perf_counter_task_disable(void); -extern int perf_counter_task_enable(void); -extern int hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu); -extern void perf_counter_update_userpage(struct perf_counter *counter); - -struct perf_sample_data { - u64 type; - - u64 ip; - struct { - u32 pid; - u32 tid; - } tid_entry; - u64 time; - u64 addr; - u64 id; - u64 stream_id; - struct { - u32 cpu; - u32 reserved; - } cpu_entry; - u64 period; - struct perf_callchain_entry *callchain; - struct perf_raw_record *raw; -}; - -extern void perf_output_sample(struct perf_output_handle *handle, - struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter); -extern void perf_prepare_sample(struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter, - struct pt_regs *regs); - -extern int perf_counter_overflow(struct perf_counter *counter, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs); - -/* - * Return 1 for a software counter, 0 for a hardware counter - */ -static inline int is_software_counter(struct perf_counter *counter) -{ - return (counter->attr.type != PERF_TYPE_RAW) && - (counter->attr.type != PERF_TYPE_HARDWARE) && - (counter->attr.type != PERF_TYPE_HW_CACHE); -} - -extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; - -extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); - -static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) -{ - if (atomic_read(&perf_swcounter_enabled[event])) - __perf_swcounter_event(event, nr, nmi, regs, addr); -} - -extern void __perf_counter_mmap(struct vm_area_struct *vma); - -static inline void perf_counter_mmap(struct vm_area_struct *vma) -{ - if (vma->vm_flags & VM_EXEC) - __perf_counter_mmap(vma); -} - -extern void perf_counter_comm(struct task_struct *tsk); -extern void perf_counter_fork(struct task_struct *tsk); - -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); - -extern int sysctl_perf_counter_paranoid; -extern int sysctl_perf_counter_mlock; -extern int sysctl_perf_counter_sample_rate; - -extern void perf_counter_init(void); -extern void perf_tpcounter_event(int event_id, u64 addr, u64 count, - void *record, int entry_size); - -#ifndef perf_misc_flags -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ - PERF_EVENT_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) -#endif - -extern int perf_output_begin(struct perf_output_handle *handle, - struct perf_counter *counter, unsigned int size, - int nmi, int sample); -extern void perf_output_end(struct perf_output_handle *handle); -extern void perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len); -#else -static inline void -perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -static inline void -perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) { } -static inline void -perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline int perf_counter_init_task(struct task_struct *child) { return 0; } -static inline void perf_counter_exit_task(struct task_struct *child) { } -static inline void perf_counter_free_task(struct task_struct *task) { } -static inline void perf_counter_do_pending(void) { } -static inline void perf_counter_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } -static inline int perf_counter_task_disable(void) { return -EINVAL; } -static inline int perf_counter_task_enable(void) { return -EINVAL; } - -static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, - struct pt_regs *regs, u64 addr) { } - -static inline void perf_counter_mmap(struct vm_area_struct *vma) { } -static inline void perf_counter_comm(struct task_struct *tsk) { } -static inline void perf_counter_fork(struct task_struct *tsk) { } -static inline void perf_counter_init(void) { } - -static inline int -perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c, - unsigned int size, int nmi, int sample) { } -static inline void perf_output_end(struct perf_output_handle *handle) { } -static inline void -perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len) { } -static inline void -perf_output_sample(struct perf_output_handle *handle, - struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter) { } -static inline void -perf_prepare_sample(struct perf_event_header *header, - struct perf_sample_data *data, - struct perf_counter *counter, - struct pt_regs *regs) { } -#endif - -#define perf_output_put(handle, x) \ - perf_output_copy((handle), &(x), sizeof(x)) - -#endif /* __KERNEL__ */ #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h new file mode 100644 index 00000000000..acefaf71e6d --- /dev/null +++ b/include/linux/perf_event.h @@ -0,0 +1,858 @@ +/* + * Performance events: + * + * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_EVENT_H +#define _LINUX_PERF_EVENT_H + +#include <linux/types.h> +#include <linux/ioctl.h> +#include <asm/byteorder.h> + +/* + * User-space ABI bits: + */ + +/* + * attr.type + */ +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + + PERF_TYPE_MAX, /* non-ABI */ +}; + +/* + * Generalized performance event event_id types, used by the + * attr.event_id parameter of the sys_perf_event_open() + * syscall: + */ +enum perf_hw_id { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + + PERF_COUNT_HW_MAX, /* non-ABI */ +}; + +/* + * Generalized hardware cache events: + * + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, + + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ +}; + +/* + * Special "software" events provided by the kernel, even if the hardware + * does not support performance events. These events measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + + PERF_COUNT_SW_MAX, /* non-ABI */ +}; + +/* + * Bits that can be set in attr.sample_type to request information + * in the overflow packets. + */ +enum perf_event_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_READ = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_RAW = 1U << 10, + + PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ +}; + +/* + * The format of the data returned by read() on a perf event fd, + * as specified by attr.read_format: + * + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + * + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP + * }; + */ +enum perf_event_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_GROUP = 1U << 3, + + PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ +}; + +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +/* + * Hardware event_id to monitor via a performance monitoring event: + */ +struct perf_event_attr { + + /* + * Major type: hardware/software/tracepoint/etc. + */ + __u32 type; + + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; + + /* + * Type specific configuration information. + */ + __u64 config; + + union { + __u64 sample_period; + __u64 sample_freq; + }; + + __u64 sample_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + mmap : 1, /* include mmap data */ + comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ + watermark : 1, /* wakeup_watermark */ + + __reserved_1 : 49; + + union { + __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_watermark; /* bytes before wakeup */ + }; + __u32 __reserved_2; + + __u64 __reserved_3; +}; + +/* + * Ioctls that can be done on a perf event fd: + */ +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) + +enum perf_event_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; + +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_event_mmap_page { + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw events in user-space. + * + * u32 seq; + * s64 count; + * + * do { + * seq = pc->lock; + * + * barrier() + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; + * + * barrier(); + * } while (pc->lock != seq); + * + * NOTE: for obvious reason this only works on self-monitoring + * processes. + */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware event identifier */ + __s64 offset; /* add to hardware event value */ + __u64 time_enabled; /* time event active */ + __u64 time_running; /* time event on cpu */ + + /* + * Hole for extension of the self monitor capabilities + */ + + __u64 __reserved[123]; /* align to 1k */ + + /* + * Control data for the mmap() data buffer. + * + * User-space reading the @data_head value should issue an rmb(), on + * SMP capable platforms, after reading this value -- see + * perf_event_wakeup(). + * + * When the mapping is PROT_WRITE the @data_tail value should be + * written by userspace to reflect the last read data. In this case + * the kernel will not over-write unread data. + */ + __u64 data_head; /* head in the data section */ + __u64 data_tail; /* user-space written tail */ +}; + +#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_KERNEL (1 << 0) +#define PERF_RECORD_MISC_USER (2 << 0) +#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) + +struct perf_event_header { + __u32 type; + __u16 misc; + __u16 size; +}; + +enum perf_event_type { + + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * }; + */ + PERF_RECORD_MMAP = 1, + + /* + * struct { + * struct perf_event_header header; + * u64 id; + * u64 lost; + * }; + */ + PERF_RECORD_LOST = 2, + + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * }; + */ + PERF_RECORD_COMM = 3, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * }; + */ + PERF_RECORD_EXIT = 4, + + /* + * struct { + * struct perf_event_header header; + * u64 time; + * u64 id; + * u64 stream_id; + * }; + */ + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * { u64 time; } && PERF_SAMPLE_TIME + * }; + */ + PERF_RECORD_FORK = 7, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, tid; + * + * struct read_format values; + * }; + */ + PERF_RECORD_READ = 8, + + /* + * struct { + * struct perf_event_header header; + * + * { u64 ip; } && PERF_SAMPLE_IP + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 addr; } && PERF_SAMPLE_ADDR + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD + * + * { struct read_format values; } && PERF_SAMPLE_READ + * + * { u64 nr, + * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # + * + * { u32 size; + * char data[size];}&& PERF_SAMPLE_RAW + * }; + */ + PERF_RECORD_SAMPLE = 9, + + PERF_RECORD_MAX, /* non-ABI */ +}; + +enum perf_callchain_context { + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, + + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, + + PERF_CONTEXT_MAX = (__u64)-4095, +}; + +#define PERF_FLAG_FD_NO_GROUP (1U << 0) +#define PERF_FLAG_FD_OUTPUT (1U << 1) + +#ifdef __KERNEL__ +/* + * Kernel-internal data types and definitions: + */ + +#ifdef CONFIG_PERF_EVENTS +# include <asm/perf_event.h> +#endif + +#include <linux/list.h> +#include <linux/mutex.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> +#include <linux/hrtimer.h> +#include <linux/fs.h> +#include <linux/pid_namespace.h> +#include <asm/atomic.h> + +#define PERF_MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + __u64 nr; + __u64 ip[PERF_MAX_STACK_DEPTH]; +}; + +struct perf_raw_record { + u32 size; + void *data; +}; + +struct task_struct; + +/** + * struct hw_perf_event - performance event hardware details: + */ +struct hw_perf_event { +#ifdef CONFIG_PERF_EVENTS + union { + struct { /* hardware */ + u64 config; + unsigned long config_base; + unsigned long event_base; + int idx; + }; + union { /* software */ + atomic64_t count; + struct hrtimer hrtimer; + }; + }; + atomic64_t prev_count; + u64 sample_period; + u64 last_period; + atomic64_t period_left; + u64 interrupts; + + u64 freq_count; + u64 freq_interrupts; + u64 freq_stamp; +#endif +}; + +struct perf_event; + +/** + * struct pmu - generic performance monitoring unit + */ +struct pmu { + int (*enable) (struct perf_event *event); + void (*disable) (struct perf_event *event); + void (*read) (struct perf_event *event); + void (*unthrottle) (struct perf_event *event); +}; + +/** + * enum perf_event_active_state - the states of a event + */ +enum perf_event_active_state { + PERF_EVENT_STATE_ERROR = -2, + PERF_EVENT_STATE_OFF = -1, + PERF_EVENT_STATE_INACTIVE = 0, + PERF_EVENT_STATE_ACTIVE = 1, +}; + +struct file; + +struct perf_mmap_data { + struct rcu_head rcu_head; + int nr_pages; /* nr of data pages */ + int writable; /* are we writable */ + int nr_locked; /* nr pages mlocked */ + + atomic_t poll; /* POLL_ for wakeups */ + atomic_t events; /* event_id limit */ + + atomic_long_t head; /* write position */ + atomic_long_t done_head; /* completed head */ + + atomic_t lock; /* concurrent writes */ + atomic_t wakeup; /* needs a wakeup */ + atomic_t lost; /* nr records lost */ + + long watermark; /* wakeup watermark */ + + struct perf_event_mmap_page *user_page; + void *data_pages[0]; +}; + +struct perf_pending_entry { + struct perf_pending_entry *next; + void (*func)(struct perf_pending_entry *); +}; + +/** + * struct perf_event - performance event kernel representation: + */ +struct perf_event { +#ifdef CONFIG_PERF_EVENTS + struct list_head group_entry; + struct list_head event_entry; + struct list_head sibling_list; + int nr_siblings; + struct perf_event *group_leader; + struct perf_event *output; + const struct pmu *pmu; + + enum perf_event_active_state state; + atomic64_t count; + + /* + * These are the total time in nanoseconds that the event + * has been enabled (i.e. eligible to run, and the task has + * been scheduled in, if this is a per-task event) + * and running (scheduled onto the CPU), respectively. + * + * They are computed from tstamp_enabled, tstamp_running and + * tstamp_stopped when the event is in INACTIVE or ACTIVE state. + */ + u64 total_time_enabled; + u64 total_time_running; + + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the event is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * tstamp_enabled: the notional time when the event was enabled + * tstamp_running: the notional time when the event was scheduled on + * tstamp_stopped: in INACTIVE state, the notional time when the + * event was scheduled off. + */ + u64 tstamp_enabled; + u64 tstamp_running; + u64 tstamp_stopped; + + struct perf_event_attr attr; + struct hw_perf_event hw; + + struct perf_event_context *ctx; + struct file *filp; + + /* + * These accumulate total time (in nanoseconds) that children + * events have been enabled and running, respectively. + */ + atomic64_t child_total_time_enabled; + atomic64_t child_total_time_running; + + /* + * Protect attach/detach and child_list: + */ + struct mutex child_mutex; + struct list_head child_list; + struct perf_event *parent; + + int oncpu; + int cpu; + + struct list_head owner_entry; + struct task_struct *owner; + + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; + struct perf_mmap_data *data; + + /* poll related */ + wait_queue_head_t waitq; + struct fasync_struct *fasync; + + /* delayed work for NMIs and such */ + int pending_wakeup; + int pending_kill; + int pending_disable; + struct perf_pending_entry pending; + + atomic_t event_limit; + + void (*destroy)(struct perf_event *); + struct rcu_head rcu_head; + + struct pid_namespace *ns; + u64 id; +#endif +}; + +/** + * struct perf_event_context - event context structure + * + * Used as a container for task events and CPU events as well: + */ +struct perf_event_context { + /* + * Protect the states of the events in the list, + * nr_active, and the list: + */ + spinlock_t lock; + /* + * Protect the list of events. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; + + struct list_head group_list; + struct list_head event_list; + int nr_events; + int nr_active; + int is_active; + int nr_stat; + atomic_t refcount; + struct task_struct *task; + + /* + * Context clock, runs when context enabled. + */ + u64 time; + u64 timestamp; + + /* + * These fields let us detect when two contexts have both + * been cloned (inherited) from a common ancestor. + */ + struct perf_event_context *parent_ctx; + u64 parent_gen; + u64 generation; + int pin_count; + struct rcu_head rcu_head; +}; + +/** + * struct perf_event_cpu_context - per cpu event context structure + */ +struct perf_cpu_context { + struct perf_event_context ctx; + struct perf_event_context *task_ctx; + int active_oncpu; + int max_pertask; + int exclusive; + + /* + * Recursion avoidance: + * + * task, softirq, irq, nmi context + */ + int recursion[4]; +}; + +struct perf_output_handle { + struct perf_event *event; + struct perf_mmap_data *data; + unsigned long head; + unsigned long offset; + int nmi; + int sample; + int locked; + unsigned long flags; +}; + +#ifdef CONFIG_PERF_EVENTS + +/* + * Set by architecture code: + */ +extern int perf_max_events; + +extern const struct pmu *hw_perf_event_init(struct perf_event *event); + +extern void perf_event_task_sched_in(struct task_struct *task, int cpu); +extern void perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu); +extern void perf_event_task_tick(struct task_struct *task, int cpu); +extern int perf_event_init_task(struct task_struct *child); +extern void perf_event_exit_task(struct task_struct *child); +extern void perf_event_free_task(struct task_struct *task); +extern void set_perf_event_pending(void); +extern void perf_event_do_pending(void); +extern void perf_event_print_debug(void); +extern void __perf_disable(void); +extern bool __perf_enable(void); +extern void perf_disable(void); +extern void perf_enable(void); +extern int perf_event_task_disable(void); +extern int perf_event_task_enable(void); +extern int hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu); +extern void perf_event_update_userpage(struct perf_event *event); + +struct perf_sample_data { + u64 type; + + u64 ip; + struct { + u32 pid; + u32 tid; + } tid_entry; + u64 time; + u64 addr; + u64 id; + u64 stream_id; + struct { + u32 cpu; + u32 reserved; + } cpu_entry; + u64 period; + struct perf_callchain_entry *callchain; + struct perf_raw_record *raw; +}; + +extern void perf_output_sample(struct perf_output_handle *handle, + struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event); +extern void perf_prepare_sample(struct perf_event_header *header, + struct perf_sample_data *data, + struct perf_event *event, + struct pt_regs *regs); + +extern int perf_event_overflow(struct perf_event *event, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs); + +/* + * Return 1 for a software event, 0 for a hardware event + */ +static inline int is_software_event(struct perf_event *event) +{ + return (event->attr.type != PERF_TYPE_RAW) && + (event->attr.type != PERF_TYPE_HARDWARE) && + (event->attr.type != PERF_TYPE_HW_CACHE); +} + +extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; + +extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); + +static inline void +perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +{ + if (atomic_read(&perf_swevent_enabled[event_id])) + __perf_sw_event(event_id, nr, nmi, regs, addr); +} + +extern void __perf_event_mmap(struct vm_area_struct *vma); + +static inline void perf_event_mmap(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + __perf_event_mmap(vma); +} + +extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_fork(struct task_struct *tsk); + +extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); + +extern int sysctl_perf_event_paranoid; +extern int sysctl_perf_event_mlock; +extern int sysctl_perf_event_sample_rate; + +extern void perf_event_init(void); +extern void perf_tp_event(int event_id, u64 addr, u64 count, + void *record, int entry_size); + +#ifndef perf_misc_flags +#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ + PERF_RECORD_MISC_KERNEL) +#define perf_instruction_pointer(regs) instruction_pointer(regs) +#endif + +extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size, + int nmi, int sample); +extern void perf_output_end(struct perf_output_handle *handle); +extern void perf_output_copy(struct perf_output_handle *handle, + const void *buf, unsigned int len); +#else +static inline void +perf_event_task_sched_in(struct task_struct *task, int cpu) { } +static inline void +perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next, int cpu) { } +static inline void +perf_event_task_tick(struct task_struct *task, int cpu) { } +static inline int perf_event_init_task(struct task_struct *child) { return 0; } +static inline void perf_event_exit_task(struct task_struct *child) { } +static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_do_pending(void) { } +static inline void perf_event_print_debug(void) { } +static inline void perf_disable(void) { } +static inline void perf_enable(void) { } +static inline int perf_event_task_disable(void) { return -EINVAL; } +static inline int perf_event_task_enable(void) { return -EINVAL; } + +static inline void +perf_sw_event(u32 event_id, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) { } + +static inline void perf_event_mmap(struct vm_area_struct *vma) { } +static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_fork(struct task_struct *tsk) { } +static inline void perf_event_init(void) { } + +#endif + +#define perf_output_put(handle, x) \ + perf_output_copy((handle), &(x), sizeof(x)) + +#endif /* __KERNEL__ */ +#endif /* _LINUX_PERF_EVENT_H */ diff --git a/include/linux/pnp.h b/include/linux/pnp.h index b063c7328ba..fddfafaed02 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -360,6 +360,7 @@ struct pnp_driver { unsigned int flags; int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id); void (*remove) (struct pnp_dev *dev); + void (*shutdown) (struct pnp_dev *dev); int (*suspend) (struct pnp_dev *dev, pm_message_t state); int (*resume) (struct pnp_dev *dev); struct device_driver driver; diff --git a/include/linux/poison.h b/include/linux/poison.h index 6729f7dcd60..7fc194aef8c 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -65,6 +65,9 @@ #define MUTEX_DEBUG_INIT 0x11 #define MUTEX_DEBUG_FREE 0x22 +/********** lib/flex_array.c **********/ +#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ + /********** security/ **********/ #define KEY_DESTROY 0xbd diff --git a/include/linux/prctl.h b/include/linux/prctl.h index b00df4c79c6..07bff666e65 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -85,7 +85,7 @@ #define PR_SET_TIMERSLACK 29 #define PR_GET_TIMERSLACK 30 -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 +#define PR_TASK_PERF_EVENTS_DISABLE 31 +#define PR_TASK_PERF_EVENTS_ENABLE 32 #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 26361c4c037..3ebb2315364 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -135,8 +135,8 @@ static inline int sb_any_quota_active(struct super_block *sb) /* * Operations supported for diskquotas. */ -extern struct dquot_operations dquot_operations; -extern struct quotactl_ops vfs_quotactl_ops; +extern const struct dquot_operations dquot_operations; +extern const struct quotactl_ops vfs_quotactl_ops; #define sb_dquot_ops (&dquot_operations) #define sb_quotactl_ops (&vfs_quotactl_ops) diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index f9ddd03961a..589a40919f0 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -102,7 +102,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, */ #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ for (pos = rcu_dereference((head)->first); \ - (!is_a_nulls(pos)) && \ + (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 95e0615f4d7..6fe0363724e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1,5 +1,5 @@ /* - * Read-Copy Update mechanism for mutual exclusion + * Read-Copy Update mechanism for mutual exclusion * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ * Copyright IBM Corporation, 2001 * * Author: Dipankar Sarma <dipankar@in.ibm.com> - * + * * Based on the original work by Paul McKenney <paulmck@us.ibm.com> * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * Papers: @@ -26,7 +26,7 @@ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) * * For detailed explanation of Read-Copy Update mechanism see - - * http://lse.sourceforge.net/locking/rcupdate.html + * http://lse.sourceforge.net/locking/rcupdate.html * */ @@ -52,8 +52,13 @@ struct rcu_head { }; /* Exported common interfaces */ +#ifdef CONFIG_TREE_PREEMPT_RCU extern void synchronize_rcu(void); +#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#define synchronize_rcu synchronize_sched +#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ extern void synchronize_rcu_bh(void); +extern void synchronize_sched(void); extern void rcu_barrier(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); @@ -262,24 +267,6 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); /** - * synchronize_sched - block until all CPUs have exited any non-preemptive - * kernel code sequences. - * - * This means that all preempt_disable code sequences, including NMI and - * hardware-interrupt handlers, in progress on entry will have completed - * before this primitive returns. However, this does not guarantee that - * softirq handlers will have completed, since in some kernels, these - * handlers can run in process context, and can block. - * - * This primitive provides the guarantees made by the (now removed) - * synchronize_kernel() API. In contrast, synchronize_rcu() only - * guarantees that rcu_read_lock() sections will have completed. - * In "classic RCU", these two guarantees happen to be one and - * the same, but can differ in realtime RCU implementations. - */ -#define synchronize_sched() __synchronize_sched() - -/** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index a8930771782..37682770e9d 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -24,7 +24,7 @@ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU + * Documentation/RCU */ #ifndef __LINUX_RCUTREE_H @@ -53,6 +53,8 @@ static inline void __rcu_read_unlock(void) preempt_enable(); } +#define __synchronize_sched() synchronize_rcu() + static inline void exit_rcu(void) { } @@ -68,8 +70,6 @@ static inline void __rcu_read_unlock_bh(void) local_bh_enable(); } -#define __synchronize_sched() synchronize_rcu() - extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf116d0dbf2..477841d29fc 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -71,14 +71,10 @@ void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned lon void page_add_file_rmap(struct page *); void page_remove_rmap(struct page *); -#ifdef CONFIG_DEBUG_VM -void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address); -#else -static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) +static inline void page_dup_rmap(struct page *page) { atomic_inc(&page->_mapcount); } -#endif /* * Called from mm/vmscan.c to handle paging out diff --git a/include/linux/sched.h b/include/linux/sched.h index 8af3d249170..97b10da0a3e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -100,7 +100,7 @@ struct robust_list_head; struct bio; struct fs_struct; struct bts_context; -struct perf_counter_context; +struct perf_event_context; /* * List of flags we want to share for kernel threads, @@ -140,6 +140,10 @@ extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_iowait(void); +extern unsigned long nr_iowait_cpu(void); +extern unsigned long this_cpu_load(void); + + extern void calc_global_load(void); extern u64 cpu_nr_migrations(int cpu); @@ -257,7 +261,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); -extern int runqueue_is_locked(void); +extern int runqueue_is_locked(int cpu); extern void task_rq_unlock_wait(struct task_struct *p); extern cpumask_var_t nohz_cpu_mask; @@ -434,7 +438,9 @@ extern int get_dumpable(struct mm_struct *mm); /* dumpable bits */ #define MMF_DUMPABLE 0 /* core dump is permitted */ #define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ + #define MMF_DUMPABLE_BITS 2 +#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 @@ -444,6 +450,7 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_DUMP_ELF_HEADERS 6 #define MMF_DUMP_HUGETLB_PRIVATE 7 #define MMF_DUMP_HUGETLB_SHARED 8 + #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS #define MMF_DUMP_FILTER_BITS 7 #define MMF_DUMP_FILTER_MASK \ @@ -457,6 +464,10 @@ extern int get_dumpable(struct mm_struct *mm); #else # define MMF_DUMP_MASK_DEFAULT_ELF 0 #endif + /* leave room for more dump flags */ +#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ + +#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) struct sighand_struct { atomic_t count; @@ -632,6 +643,8 @@ struct signal_struct { unsigned audit_tty; struct tty_audit_buf *tty_audit_buf; #endif + + int oom_adj; /* OOM kill score adjustment (bit shift) */ }; /* Context switch must be unlocked if interrupts are to be enabled */ @@ -701,7 +714,7 @@ struct user_struct { #endif #endif -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS atomic_long_t locked_vm; #endif }; @@ -1075,6 +1088,8 @@ struct sched_class { void (*prio_changed) (struct rq *this_rq, struct task_struct *task, int oldprio, int running); + unsigned int (*get_rr_interval) (struct task_struct *task); + #ifdef CONFIG_FAIR_GROUP_SCHED void (*moved_group) (struct task_struct *p); #endif @@ -1212,7 +1227,6 @@ struct task_struct { * a short time */ unsigned char fpu_counter; - s8 oomkilladj; /* OOM kill score adjustment (bit shift). */ #ifdef CONFIG_BLK_DEV_IO_TRACE unsigned int btrace_seq; #endif @@ -1449,10 +1463,10 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif -#ifdef CONFIG_PERF_COUNTERS - struct perf_counter_context *perf_counter_ctxp; - struct mutex perf_counter_mutex; - struct list_head perf_counter_list; +#ifdef CONFIG_PERF_EVENTS + struct perf_event_context *perf_event_ctxp; + struct mutex perf_event_mutex; + struct list_head perf_event_list; #endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; /* Protected by alloc_lock */ @@ -1711,7 +1725,7 @@ extern cputime_t task_gtime(struct task_struct *p); #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ #define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_SWAPOFF 0x00080000 /* I am in swapoff */ +#define PF_OOM_ORIGIN 0x00080000 /* Allocating much memory to others */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ @@ -1753,7 +1767,6 @@ extern cputime_t task_gtime(struct task_struct *p); #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ -#define RCU_READ_UNLOCK_GOT_QS (1 << 2) /* CPU has responded to RCU core. */ static inline void rcu_copy_process(struct task_struct *p) { diff --git a/include/linux/swap.h b/include/linux/swap.h index 7c15334f3ff..6c990e658f4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -419,10 +419,22 @@ static inline swp_entry_t get_swap_page(void) } /* linux/mm/thrash.c */ -#define put_swap_token(mm) do { } while (0) -#define grab_swap_token(mm) do { } while (0) -#define has_swap_token(mm) 0 -#define disable_swap_token() do { } while (0) +static inline void put_swap_token(struct mm_struct *mm) +{ +} + +static inline void grab_swap_token(struct mm_struct *mm) +{ +} + +static inline int has_swap_token(struct mm_struct *mm) +{ + return 0; +} + +static inline void disable_swap_token(void) +{ +} static inline void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a8e37821cc6..8d8285a10db 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -55,7 +55,7 @@ struct compat_timeval; struct robust_list_head; struct getcpu_cache; struct old_linux_dirent; -struct perf_counter_attr; +struct perf_event_attr; #include <linux/types.h> #include <linux/aio_abi.h> @@ -100,33 +100,25 @@ struct perf_counter_attr; #ifdef CONFIG_EVENT_PROFILE #define TRACE_SYS_ENTER_PROFILE(sname) \ -static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call) \ +static int prof_sysenter_enable_##sname(void) \ { \ - int ret = 0; \ - if (!atomic_inc_return(&event_enter_##sname.profile_count)) \ - ret = reg_prof_syscall_enter("sys"#sname); \ - return ret; \ + return reg_prof_syscall_enter("sys"#sname); \ } \ \ -static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\ +static void prof_sysenter_disable_##sname(void) \ { \ - if (atomic_add_negative(-1, &event_enter_##sname.profile_count)) \ - unreg_prof_syscall_enter("sys"#sname); \ + unreg_prof_syscall_enter("sys"#sname); \ } #define TRACE_SYS_EXIT_PROFILE(sname) \ -static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call) \ +static int prof_sysexit_enable_##sname(void) \ { \ - int ret = 0; \ - if (!atomic_inc_return(&event_exit_##sname.profile_count)) \ - ret = reg_prof_syscall_exit("sys"#sname); \ - return ret; \ + return reg_prof_syscall_exit("sys"#sname); \ } \ \ -static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ +static void prof_sysexit_disable_##sname(void) \ { \ - if (atomic_add_negative(-1, &event_exit_##sname.profile_count)) \ - unreg_prof_syscall_exit("sys"#sname); \ + unreg_prof_syscall_exit("sys"#sname); \ } #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ @@ -885,7 +877,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -asmlinkage long sys_perf_counter_open( - struct perf_counter_attr __user *attr_uptr, +asmlinkage long sys_perf_event_open( + struct perf_event_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 3689d7d81fe..b59e78c5716 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -910,9 +910,10 @@ enum v4l2_colorfx { V4L2_COLORFX_SEPIA = 2, }; #define V4L2_CID_AUTOBRIGHTNESS (V4L2_CID_BASE+32) +#define V4L2_CID_BAND_STOP_FILTER (V4L2_CID_BASE+33) /* last CID + 1 */ -#define V4L2_CID_LASTP1 (V4L2_CID_BASE+33) +#define V4L2_CID_LASTP1 (V4L2_CID_BASE+34) /* MPEG-class control IDs defined by V4L2 */ #define V4L2_CID_MPEG_BASE (V4L2_CTRL_CLASS_MPEG | 0x900) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 81a97cf8f0a..2d0f222388a 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -166,15 +166,8 @@ static inline unsigned long zone_page_state(struct zone *zone, return x; } -extern unsigned long global_lru_pages(void); - -static inline unsigned long zone_lru_pages(struct zone *zone) -{ - return (zone_page_state(zone, NR_ACTIVE_ANON) - + zone_page_state(zone, NR_ACTIVE_FILE) - + zone_page_state(zone, NR_INACTIVE_ANON) - + zone_page_state(zone, NR_INACTIVE_FILE)); -} +extern unsigned long global_reclaimable_pages(void); +extern unsigned long zone_reclaimable_pages(struct zone *zone); #ifdef CONFIG_NUMA /* @@ -210,11 +203,6 @@ extern void zone_statistics(struct zone *, struct zone *); #endif /* CONFIG_NUMA */ -#define __add_zone_page_state(__z, __i, __d) \ - __mod_zone_page_state(__z, __i, __d) -#define __sub_zone_page_state(__z, __i, __d) \ - __mod_zone_page_state(__z, __i,-(__d)) - #define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d) #define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d)) |