diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-11 16:08:54 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-11 16:08:54 -0700 |
commit | c2d95729e3094ecdd8c54e856bbe971adbbd7f48 (patch) | |
tree | 76cc5b551227d3d55d68a93105c1fe8080dfb812 /include | |
parent | bbda1baeeb2f4aff3addac3d086a1e56c3f2503e (diff) | |
parent | b34081f1cd59585451efaa69e1dff1b9507e6c89 (diff) |
Merge branch 'akpm' (patches from Andrew Morton)
Merge first patch-bomb from Andrew Morton:
- Some pidns/fork/exec tweaks
- OCFS2 updates
- Most of MM - there remain quite a few memcg parts which depend on
pending core cgroups changes. Which might have been already merged -
I'll check tomorrow...
- Various misc stuff all over the place
- A few block bits which I never got around to sending to Jens -
relatively minor things.
- MAINTAINERS maintenance
- A small number of lib/ updates
- checkpatch updates
- epoll
- firmware/dmi-scan
- Some kprobes work for S390
- drivers/rtc updates
- hfsplus feature work
- vmcore feature work
- rbtree upgrades
- AOE updates
- pktcdvd cleanups
- PPS
- memstick
- w1
- New "inittmpfs" feature, which does the obvious
- More IPC work from Davidlohr.
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (303 commits)
lz4: fix compression/decompression signedness mismatch
ipc: drop ipc_lock_check
ipc, shm: drop shm_lock_check
ipc: drop ipc_lock_by_ptr
ipc, shm: guard against non-existant vma in shmdt(2)
ipc: document general ipc locking scheme
ipc,msg: drop msg_unlock
ipc: rename ids->rw_mutex
ipc,shm: shorten critical region for shmat
ipc,shm: cleanup do_shmat pasta
ipc,shm: shorten critical region for shmctl
ipc,shm: make shmctl_nolock lockless
ipc,shm: introduce shmctl_nolock
ipc: drop ipcctl_pre_down
ipc,shm: shorten critical region in shmctl_down
ipc,shm: introduce lockless functions to obtain the ipc object
initmpfs: use initramfs if rootfstype= or root= specified
initmpfs: make rootfs use tmpfs when CONFIG_TMPFS enabled
initmpfs: move rootfs code from fs/ramfs/ to init/
initmpfs: move bdi setup from init_rootfs to init_ramfs
...
Diffstat (limited to 'include')
28 files changed, 252 insertions, 120 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c3881553f7d..5f66d519a72 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -243,6 +243,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_EXEC_MAP: Can be mapped for execution * * BDI_CAP_SWAP_BACKED: Count shmem/tmpfs objects as swap-backed. + * + * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 @@ -254,6 +256,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_NO_ACCT_WB 0x00000080 #define BDI_CAP_SWAP_BACKED 0x00000100 #define BDI_CAP_STABLE_WRITES 0x00000200 +#define BDI_CAP_STRICTLIMIT 0x00000400 #define BDI_CAP_VMFLAGS \ (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 70cf138690e..e8112ae5053 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -31,7 +31,7 @@ struct linux_binprm { #ifdef __alpha__ unsigned int taso:1; #endif - unsigned int recursion_depth; + unsigned int recursion_depth; /* only for search_binary_handler() */ struct file * file; struct cred *cred; /* new credentials */ int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h new file mode 100644 index 00000000000..98e892ef6d5 --- /dev/null +++ b/include/linux/cmdline-parser.h @@ -0,0 +1,43 @@ +/* + * Parsing command line, get the partitions information. + * + * Written by Cai Zhiyong <caizhiyong@huawei.com> + * + */ +#ifndef CMDLINEPARSEH +#define CMDLINEPARSEH + +#include <linux/blkdev.h> + +/* partition flags */ +#define PF_RDONLY 0x01 /* Device is read only */ +#define PF_POWERUP_LOCK 0x02 /* Always locked after reset */ + +struct cmdline_subpart { + char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */ + sector_t from; + sector_t size; + int flags; + struct cmdline_subpart *next_subpart; +}; + +struct cmdline_parts { + char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */ + unsigned int nr_subparts; + struct cmdline_subpart *subpart; + struct cmdline_parts *next_parts; +}; + +void cmdline_parts_free(struct cmdline_parts **parts); + +int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline); + +struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, + const char *bdev); + +void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, + int slot, + int (*add_part)(int, struct cmdline_subpart *, void *), + void *param); + +#endif /* CMDLINEPARSEH */ diff --git a/include/linux/compat.h b/include/linux/compat.h index ec1aee4aec9..345da00a86e 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -43,6 +43,7 @@ #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\ asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ { \ return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 37e4f8da7cd..fe68a5a9858 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -12,6 +12,15 @@ extern unsigned long long elfcorehdr_addr; extern unsigned long long elfcorehdr_size; +extern int __weak elfcorehdr_alloc(unsigned long long *addr, + unsigned long long *size); +extern void __weak elfcorehdr_free(unsigned long long addr); +extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos); +extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos); +extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, + unsigned long from, unsigned long pfn, + unsigned long size, pgprot_t prot); + extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 661d374aeb2..f8d41cb1cbe 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -66,8 +66,8 @@ struct gen_pool_chunk { struct list_head next_chunk; /* next chunk in pool */ atomic_t avail; phys_addr_t phys_addr; /* physical starting address of memory chunk */ - unsigned long start_addr; /* starting address of memory chunk */ - unsigned long end_addr; /* ending address of memory chunk */ + unsigned long start_addr; /* start address of memory chunk */ + unsigned long end_addr; /* end address of memory chunk (inclusive) */ unsigned long bits[0]; /* bitmap for allocating memory chunk */ }; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c2b1801a160..0393270466c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -66,6 +66,9 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); int dequeue_hwpoisoned_huge_page(struct page *page); +bool isolate_huge_page(struct page *page, struct list_head *list); +void putback_active_hugepage(struct page *page); +bool is_hugepage_active(struct page *page); void copy_huge_page(struct page *dst, struct page *src); #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE @@ -134,6 +137,9 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page) return 0; } +#define isolate_huge_page(p, l) false +#define putback_active_hugepage(p) do {} while (0) +#define is_hugepage_active(x) false static inline void copy_huge_page(struct page *dst, struct page *src) { } @@ -261,6 +267,8 @@ struct huge_bootmem_page { }; struct page *alloc_huge_page_node(struct hstate *h, int nid); +struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, + unsigned long addr, int avoid_reserve); /* arch callback */ int __init alloc_bootmem_huge_page(struct hstate *h); @@ -371,9 +379,23 @@ static inline pgoff_t basepage_index(struct page *page) return __basepage_index(page); } +extern void dissolve_free_huge_pages(unsigned long start_pfn, + unsigned long end_pfn); +int pmd_huge_support(void); +/* + * Currently hugepage migration is enabled only for pmd-based hugepage. + * This function will be updated when hugepage migration is more widely + * supported. + */ +static inline int hugepage_migration_support(struct hstate *h) +{ + return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT); +} + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; #define alloc_huge_page_node(h, nid) NULL +#define alloc_huge_page_noerr(v, a, r) NULL #define alloc_bootmem_huge_page(h) NULL #define hstate_file(f) NULL #define hstate_sizelog(s) NULL @@ -396,6 +418,9 @@ static inline pgoff_t basepage_index(struct page *page) { return page->index; } +#define dissolve_free_huge_pages(s, e) do {} while (0) +#define pmd_huge_support() 0 +#define hugepage_migration_support(h) 0 #endif /* CONFIG_HUGETLB_PAGE */ #endif /* _LINUX_HUGETLB_H */ diff --git a/include/linux/init.h b/include/linux/init.h index e73f2b70852..f1c27a71d03 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -153,6 +153,7 @@ extern unsigned int reset_devices; void setup_arch(char **); void prepare_namespace(void); void __init load_default_modules(void); +int __init init_rootfs(void); extern void (*late_time_init)(void); diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index c4d870b0d5e..19c19a5eee2 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -22,7 +22,7 @@ struct ipc_ids { int in_use; unsigned short seq; unsigned short seq_max; - struct rw_semaphore rw_mutex; + struct rw_semaphore rwsem; struct idr ipcs_idr; int next_id; }; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index ca1d27a0d6a..925eaf28fca 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -264,10 +264,36 @@ extern void arch_arm_kprobe(struct kprobe *p); extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); -extern kprobe_opcode_t *get_insn_slot(void); -extern void free_insn_slot(kprobe_opcode_t *slot, int dirty); extern void kprobes_inc_nmissed_count(struct kprobe *p); +struct kprobe_insn_cache { + struct mutex mutex; + void *(*alloc)(void); /* allocate insn page */ + void (*free)(void *); /* free insn page */ + struct list_head pages; /* list of kprobe_insn_page */ + size_t insn_size; /* size of instruction slot */ + int nr_garbage; +}; + +extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c); +extern void __free_insn_slot(struct kprobe_insn_cache *c, + kprobe_opcode_t *slot, int dirty); + +#define DEFINE_INSN_CACHE_OPS(__name) \ +extern struct kprobe_insn_cache kprobe_##__name##_slots; \ + \ +static inline kprobe_opcode_t *get_##__name##_slot(void) \ +{ \ + return __get_insn_slot(&kprobe_##__name##_slots); \ +} \ + \ +static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\ +{ \ + __free_insn_slot(&kprobe_##__name##_slots, slot, dirty); \ +} \ + +DEFINE_INSN_CACHE_OPS(insn); + #ifdef CONFIG_OPTPROBES /* * Internal structure for direct jump optimized probe @@ -287,13 +313,13 @@ extern void arch_optimize_kprobes(struct list_head *oplist); extern void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list); extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); -extern kprobe_opcode_t *get_optinsn_slot(void); -extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty); extern int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr); extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); +DEFINE_INSN_CACHE_OPS(optinsn); + #ifdef CONFIG_SYSCTL extern int sysctl_kprobes_optimization; extern int proc_kprobes_optimization_handler(struct ctl_table *table, diff --git a/include/linux/lz4.h b/include/linux/lz4.h index d21c13f10a6..4356686b0a3 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len, * note : Destination buffer must be already allocated. * slightly faster than lz4_decompress_unknownoutputsize() */ -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len); +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len); /* * lz4_decompress_unknownoutputsize() @@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest, * Error if return (< 0) * note : Destination buffer must be already allocated. */ -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len); +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len); #endif diff --git a/include/linux/memblock.h b/include/linux/memblock.h index f388203db7e..31e95acddb4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -60,6 +60,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn, + unsigned long *end_pfn); void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, unsigned long *out_end_pfn, int *out_nid); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 0d7df39a588..da6716b9e3f 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -91,7 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol) } #define vma_policy(vma) ((vma)->vm_policy) -#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol)) static inline void mpol_get(struct mempolicy *pol) { @@ -126,6 +125,7 @@ struct shared_policy { spinlock_t lock; }; +int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, @@ -173,7 +173,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ static inline int vma_migratable(struct vm_area_struct *vma) { - if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP)) + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) return 0; /* * Migration allocates pages in the highest zone. If we cannot @@ -240,7 +240,12 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) } #define vma_policy(vma) NULL -#define vma_set_policy(vma, pol) do {} while(0) + +static inline int +vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) +{ + return 0; +} static inline void numa_policy_init(void) { diff --git a/include/linux/migrate.h b/include/linux/migrate.h index a405d3dc0f6..6fe52142063 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -41,8 +41,6 @@ extern int migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, enum migrate_mode mode, int reason); -extern int migrate_huge_page(struct page *, new_page_t x, - unsigned long private, enum migrate_mode mode); extern int fail_migrate_page(struct address_space *, struct page *, struct page *); @@ -62,9 +60,6 @@ static inline void putback_movable_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t x, unsigned long private, enum migrate_mode mode, int reason) { return -ENOSYS; } -static inline int migrate_huge_page(struct page *page, new_page_t x, - unsigned long private, enum migrate_mode mode) - { return -ENOSYS; } static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } diff --git a/include/linux/mm.h b/include/linux/mm.h index d2d59b4149d..caf543c7eaa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -115,6 +115,12 @@ extern unsigned int kobjsize(const void *objp); #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ +#ifdef CONFIG_MEM_SOFT_DIRTY +# define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */ +#else +# define VM_SOFTDIRTY 0 +#endif + #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ #define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ @@ -489,20 +495,6 @@ static inline int compound_order(struct page *page) return (unsigned long)page[1].lru.prev; } -static inline int compound_trans_order(struct page *page) -{ - int order; - unsigned long flags; - - if (!PageHead(page)) - return 0; - - flags = compound_lock_irqsave(page); - order = compound_order(page); - compound_unlock_irqrestore(page, flags); - return order; -} - static inline void set_compound_order(struct page *page, unsigned long order) { page[1].lru.prev = (void *)order; @@ -637,12 +629,12 @@ static inline enum zone_type page_zonenum(const struct page *page) #endif /* - * The identification function is only used by the buddy allocator for - * determining if two pages could be buddies. We are not really - * identifying a zone since we could be using a the section number - * id if we have not node id available in page flags. - * We guarantee only that it will return the same value for two - * combinable pages in a zone. + * The identification function is mainly used by the buddy allocator for + * determining if two pages could be buddies. We are not really identifying + * the zone since we could be using the section number id if we do not have + * node id available in page flags. + * We only guarantee that it will return the same value for two combinable + * pages in a zone. */ static inline int page_zone_id(struct page *page) { diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 1397ccf81e9..cf55945c83f 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -2,6 +2,7 @@ #define LINUX_MM_INLINE_H #include <linux/huge_mm.h> +#include <linux/swap.h> /** * page_is_file_cache - should the page be on a file LRU or anon LRU? diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index af4a3b77a8d..bd791e452ad 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -105,6 +105,7 @@ struct zone_padding { enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, + NR_ALLOC_BATCH, NR_LRU_BASE, NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ NR_ACTIVE_ANON, /* " " " " " */ @@ -352,7 +353,6 @@ struct zone { * free areas of different sizes */ spinlock_t lock; - int all_unreclaimable; /* All pages pinned */ #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* Set to true when the PG_migrate_skip bits should be cleared */ bool compact_blockskip_flush; diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index ffc444c38b0..403940787be 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -231,6 +231,7 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root, unsigned long radix_tree_prev_hole(struct radix_tree_root *root, unsigned long index, unsigned long max_scan); int radix_tree_preload(gfp_t gfp_mask); +int radix_tree_maybe_preload(gfp_t gfp_mask); void radix_tree_init(void); void *radix_tree_tag_set(struct radix_tree_root *root, unsigned long index, unsigned int tag); diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 69e37c2d1ea..753207c8ce2 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -25,7 +25,7 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); extern const struct file_operations ramfs_file_operations; extern const struct vm_operations_struct generic_file_vm_ops; -extern int __init init_rootfs(void); +extern int __init init_ramfs_fs(void); int ramfs_fill_super(struct super_block *sb, void *data, int silent); diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 0022c1bb1e2..aa870a4ddf5 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -68,6 +68,10 @@ extern struct rb_node *rb_prev(const struct rb_node *); extern struct rb_node *rb_first(const struct rb_root *); extern struct rb_node *rb_last(const struct rb_root *); +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); @@ -81,4 +85,22 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, *rb_link = node; } +/** + * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of + * given type safe against removal of rb_node entry + * + * @pos: the 'type *' to use as a loop cursor. + * @n: another 'type *' to use as temporary storage + * @root: 'rb_root *' of the rbtree. + * @field: the name of the rb_node field within 'type'. + */ +#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ + for (pos = rb_entry(rb_first_postorder(root), typeof(*pos), field),\ + n = rb_entry(rb_next_postorder(&pos->field), \ + typeof(*pos), field); \ + &pos->field; \ + pos = n, \ + n = rb_entry(rb_next_postorder(&pos->field), \ + typeof(*pos), field)) + #endif /* _LINUX_RBTREE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index ce1e1c0aaa3..45f254dddaf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2169,15 +2169,15 @@ static inline bool thread_group_leader(struct task_struct *p) * all we care about is that we have a task with the appropriate * pid, we don't actually care if we have the right task. */ -static inline int has_group_leader_pid(struct task_struct *p) +static inline bool has_group_leader_pid(struct task_struct *p) { - return p->pid == p->tgid; + return task_pid(p) == p->signal->leader_pid; } static inline -int same_thread_group(struct task_struct *p1, struct task_struct *p2) +bool same_thread_group(struct task_struct *p1, struct task_struct *p2) { - return p1->tgid == p2->tgid; + return p1->signal == p2->signal; } static inline struct task_struct *next_thread(const struct task_struct *p) diff --git a/include/linux/smp.h b/include/linux/smp.h index c181399f2c2..cfb7ca094b3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -28,6 +28,27 @@ extern unsigned int total_cpus; int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, int wait); +/* + * Call a function on all processors + */ +int on_each_cpu(smp_call_func_t func, void *info, int wait); + +/* + * Call a function on processors specified by mask, which might include + * the local one. + */ +void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, + void *info, bool wait); + +/* + * Call a function on each processor for which the supplied function + * cond_func returns a positive value. This may include the local + * processor. + */ +void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), + smp_call_func_t func, void *info, bool wait, + gfp_t gfp_flags); + #ifdef CONFIG_SMP #include <linux/preempt.h> @@ -95,27 +116,6 @@ static inline void call_function_init(void) { } #endif /* - * Call a function on all processors - */ -int on_each_cpu(smp_call_func_t func, void *info, int wait); - -/* - * Call a function on processors specified by mask, which might include - * the local one. - */ -void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, - void *info, bool wait); - -/* - * Call a function on each processor for which the supplied function - * cond_func returns a positive value. This may include the local - * processor. - */ -void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), - smp_call_func_t func, void *info, bool wait, - gfp_t gfp_flags); - -/* * Mark the boot cpu "online" so that it can call console drivers in * printk() and can access its per-cpu storage. */ @@ -139,43 +139,6 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info) } #define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) -#define on_each_cpu(func, info, wait) \ - ({ \ - unsigned long __flags; \ - local_irq_save(__flags); \ - func(info); \ - local_irq_restore(__flags); \ - 0; \ - }) -/* - * Note we still need to test the mask even for UP - * because we actually can get an empty mask from - * code that on SMP might call us without the local - * CPU in the mask. - */ -#define on_each_cpu_mask(mask, func, info, wait) \ - do { \ - if (cpumask_test_cpu(0, (mask))) { \ - local_irq_disable(); \ - (func)(info); \ - local_irq_enable(); \ - } \ - } while (0) -/* - * Preemption is disabled here to make sure the cond_func is called under the - * same condtions in UP and SMP. - */ -#define on_each_cpu_cond(cond_func, func, info, wait, gfp_flags)\ - do { \ - void *__info = (info); \ - preempt_disable(); \ - if ((cond_func)(0, __info)) { \ - local_irq_disable(); \ - (func)(__info); \ - local_irq_enable(); \ - } \ - preempt_enable(); \ - } while (0) static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) diff --git a/include/linux/swap.h b/include/linux/swap.h index d95cde5e257..c03c139219c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -182,6 +182,33 @@ enum { #define SWAP_MAP_SHMEM 0xbf /* Owned by shmem/tmpfs, in first swap_map */ /* + * We use this to track usage of a cluster. A cluster is a block of swap disk + * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All + * free clusters are organized into a list. We fetch an entry from the list to + * get a free cluster. + * + * The data field stores next cluster if the cluster is free or cluster usage + * counter otherwise. The flags field determines if a cluster is free. This is + * protected by swap_info_struct.lock. + */ +struct swap_cluster_info { + unsigned int data:24; + unsigned int flags:8; +}; +#define CLUSTER_FLAG_FREE 1 /* This cluster is free */ +#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */ + +/* + * We assign a cluster to each CPU, so each CPU can allocate swap entry from + * its own cluster and swapout sequentially. The purpose is to optimize swapout + * throughput. + */ +struct percpu_cluster { + struct swap_cluster_info index; /* Current cluster index */ + unsigned int next; /* Likely next allocation offset */ +}; + +/* * The in-memory structure used to track swap areas. */ struct swap_info_struct { @@ -191,14 +218,16 @@ struct swap_info_struct { signed char next; /* next type on the swap list */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ + struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ + struct swap_cluster_info free_cluster_head; /* free cluster list head */ + struct swap_cluster_info free_cluster_tail; /* free cluster list tail */ unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */ unsigned int pages; /* total of usable pages of swap */ unsigned int inuse_pages; /* number of those currently in use */ unsigned int cluster_next; /* likely index for next allocation */ unsigned int cluster_nr; /* countdown to next cluster search */ - unsigned int lowest_alloc; /* while preparing discard cluster */ - unsigned int highest_alloc; /* while preparing discard cluster */ + struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ struct swap_extent *curr_swap_extent; struct swap_extent first_swap_extent; struct block_device *bdev; /* swap device or bdev of swap file */ @@ -212,14 +241,18 @@ struct swap_info_struct { * protect map scan related fields like * swap_map, lowest_bit, highest_bit, * inuse_pages, cluster_next, - * cluster_nr, lowest_alloc and - * highest_alloc. other fields are only - * changed at swapon/swapoff, so are - * protected by swap_lock. changing - * flags need hold this lock and - * swap_lock. If both locks need hold, - * hold swap_lock first. + * cluster_nr, lowest_alloc, + * highest_alloc, free/discard cluster + * list. other fields are only changed + * at swapon/swapoff, so are protected + * by swap_lock. changing flags need + * hold this lock and swap_lock. If + * both locks need hold, hold swap_lock + * first. */ + struct work_struct discard_work; /* discard worker */ + struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */ + struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */ }; struct swap_list_t { @@ -414,6 +447,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) #else /* CONFIG_SWAP */ +#define swap_address_space(entry) (NULL) #define get_nr_swap_pages() 0L #define total_swap_pages 0L #define total_swapcache_pages() 0UL diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 84662ecc7b5..7fac04e7ff6 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -186,6 +186,7 @@ extern struct trace_event_functions exit_syscall_print_funcs; #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index bd6cf61142b..1855f0a22ad 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -70,6 +70,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif +#ifdef CONFIG_SMP + NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ + NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ +#endif + NR_TLB_LOCAL_FLUSH_ALL, + NR_TLB_LOCAL_FLUSH_ONE, NR_VM_EVENT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index c586679b6fe..e4b948080d2 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -143,7 +143,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, } extern unsigned long global_reclaimable_pages(void); -extern unsigned long zone_reclaimable_pages(struct zone *zone); #ifdef CONFIG_NUMA /* @@ -198,7 +197,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item); extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item); -void refresh_cpu_vm_stats(int); +void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); @@ -255,6 +254,7 @@ static inline void __dec_zone_page_state(struct page *page, static inline void refresh_cpu_vm_stats(int cpu) { } static inline void refresh_zone_stat_thresholds(void) { } +static inline void cpu_vm_stats_fold(int cpu) { } static inline void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) { } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 4e198ca1f68..021b8a319b9 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -98,8 +98,6 @@ int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); void sync_inodes_sb(struct super_block *); -long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, - enum wb_reason reason); void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 6bc943ecb84..d0c61347662 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -268,11 +268,13 @@ TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, int alloc_order, int fallback_order, - int alloc_migratetype, int fallback_migratetype), + int alloc_migratetype, int fallback_migratetype, + int change_ownership), TP_ARGS(page, alloc_order, fallback_order, - alloc_migratetype, fallback_migratetype), + alloc_migratetype, fallback_migratetype, + change_ownership), TP_STRUCT__entry( __field( struct page *, page ) @@ -280,6 +282,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, __field( int, fallback_order ) __field( int, alloc_migratetype ) __field( int, fallback_migratetype ) + __field( int, change_ownership ) ), TP_fast_assign( @@ -288,6 +291,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->fallback_order = fallback_order; __entry->alloc_migratetype = alloc_migratetype; __entry->fallback_migratetype = fallback_migratetype; + __entry->change_ownership = change_ownership; ), TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", @@ -299,7 +303,7 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->alloc_migratetype, __entry->fallback_migratetype, __entry->fallback_order < pageblock_order, - __entry->alloc_migratetype == __entry->fallback_migratetype) + __entry->change_ownership) ); #endif /* _TRACE_KMEM_H */ |