summaryrefslogtreecommitdiffstats
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h220
1 files changed, 192 insertions, 28 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 721f451c302..6507dde38b1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -14,6 +14,7 @@
#include <linux/mm_types.h>
#include <linux/range.h>
#include <linux/pfn.h>
+#include <linux/bit_spinlock.h>
struct mempolicy;
struct anon_vma;
@@ -82,6 +83,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_GROWSUP 0x00000200
#else
#define VM_GROWSUP 0x00000000
+#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */
#endif
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
@@ -101,7 +103,11 @@ extern unsigned int kobjsize(const void *objp);
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
+#else
+#define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */
+#endif
#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */
@@ -131,7 +137,8 @@ extern unsigned int kobjsize(const void *objp);
#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ)
/*
- * special vmas that are non-mergable, non-mlock()able
+ * Special vmas that are non-mergable, non-mlock()able.
+ * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
*/
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
@@ -145,6 +152,7 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */
#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */
+#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
/*
* This interface is used by x86 PAT code to identify a pfn mapping that is
@@ -242,6 +250,7 @@ struct inode;
* files which need it (119 of them)
*/
#include <linux/page-flags.h>
+#include <linux/huge_mm.h>
/*
* Methods to modify the page usage count.
@@ -305,6 +314,39 @@ static inline int is_vmalloc_or_module_addr(const void *x)
}
#endif
+static inline void compound_lock(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ bit_spin_lock(PG_compound_lock, &page->flags);
+#endif
+}
+
+static inline void compound_unlock(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ bit_spin_unlock(PG_compound_lock, &page->flags);
+#endif
+}
+
+static inline unsigned long compound_lock_irqsave(struct page *page)
+{
+ unsigned long uninitialized_var(flags);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ local_irq_save(flags);
+ compound_lock(page);
+#endif
+ return flags;
+}
+
+static inline void compound_unlock_irqrestore(struct page *page,
+ unsigned long flags)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ compound_unlock(page);
+ local_irq_restore(flags);
+#endif
+}
+
static inline struct page *compound_head(struct page *page)
{
if (unlikely(PageTail(page)))
@@ -319,9 +361,29 @@ static inline int page_count(struct page *page)
static inline void get_page(struct page *page)
{
- page = compound_head(page);
- VM_BUG_ON(atomic_read(&page->_count) == 0);
+ /*
+ * Getting a normal page or the head of a compound page
+ * requires to already have an elevated page->_count. Only if
+ * we're getting a tail page, the elevated page->_count is
+ * required only in the head page, so for tail pages the
+ * bugcheck only verifies that the page->_count isn't
+ * negative.
+ */
+ VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
atomic_inc(&page->_count);
+ /*
+ * Getting a tail page will elevate both the head and tail
+ * page->_count(s).
+ */
+ if (unlikely(PageTail(page))) {
+ /*
+ * This is safe only because
+ * __split_huge_page_refcount can't run under
+ * get_page().
+ */
+ VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+ atomic_inc(&page->first_page->_count);
+ }
}
static inline struct page *virt_to_head_page(const void *x)
@@ -339,6 +401,34 @@ static inline void init_page_count(struct page *page)
atomic_set(&page->_count, 1);
}
+/*
+ * PageBuddy() indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ *
+ * PAGE_BUDDY_MAPCOUNT_VALUE must be <= -2 but better not too close to
+ * -2 so that an underflow of the page_mapcount() won't be mistaken
+ * for a genuine PAGE_BUDDY_MAPCOUNT_VALUE. -128 can be created very
+ * efficiently by most CPU architectures.
+ */
+#define PAGE_BUDDY_MAPCOUNT_VALUE (-128)
+
+static inline int PageBuddy(struct page *page)
+{
+ return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE;
+}
+
+static inline void __SetPageBuddy(struct page *page)
+{
+ VM_BUG_ON(atomic_read(&page->_mapcount) != -1);
+ atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);
+}
+
+static inline void __ClearPageBuddy(struct page *page)
+{
+ VM_BUG_ON(!PageBuddy(page));
+ atomic_set(&page->_mapcount, -1);
+}
+
void put_page(struct page *page);
void put_pages_list(struct list_head *pages);
@@ -370,11 +460,40 @@ static inline int compound_order(struct page *page)
return (unsigned long)page[1].lru.prev;
}
+static inline int compound_trans_order(struct page *page)
+{
+ int order;
+ unsigned long flags;
+
+ if (!PageHead(page))
+ return 0;
+
+ flags = compound_lock_irqsave(page);
+ order = compound_order(page);
+ compound_unlock_irqrestore(page, flags);
+ return order;
+}
+
static inline void set_compound_order(struct page *page, unsigned long order)
{
page[1].lru.prev = (void *)order;
}
+#ifdef CONFIG_MMU
+/*
+ * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when
+ * servicing faults for write access. In the normal case, do always want
+ * pte_mkwrite. But get_user_pages can cause write faults for mappings
+ * that do not have writing enabled, when used by access_process_vm.
+ */
+static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_flags & VM_WRITE))
+ pte = pte_mkwrite(pte);
+ return pte;
+}
+#endif
+
/*
* Multiple processes may "see" the same page. E.g. for untouched
* mappings of /dev/null, all processes see the same page full of
@@ -490,7 +609,7 @@ static inline void set_compound_order(struct page *page, unsigned long order)
#endif
/*
- * Define the bit shifts to access each section. For non-existant
+ * Define the bit shifts to access each section. For non-existent
* sections we define the shift as 0; that plus a 0 mask ensures
* the compiler will optimise away reference to them.
*/
@@ -657,7 +776,7 @@ static inline struct address_space *page_mapping(struct page *page)
VM_BUG_ON(PageSlab(page));
if (unlikely(PageSwapCache(page)))
mapping = &swapper_space;
- else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
+ else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
mapping = NULL;
return mapping;
}
@@ -742,7 +861,14 @@ extern void pagefault_out_of_memory(void);
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
+/*
+ * Flags passed to show_mem() and __show_free_areas() to suppress output in
+ * various contexts.
+ */
+#define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */
+
extern void show_free_areas(void);
+extern void __show_free_areas(unsigned int flags);
int shmem_lock(struct file *file, int lock, struct user_struct *user);
struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
@@ -789,6 +915,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlb,
* @pgd_entry: if set, called for each non-empty PGD (top-level) entry
* @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
* @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
+ * this handler is required to be able to handle
+ * pmd_trans_huge() pmds. They may simply choose to
+ * split_huge_page() instead of handling it explicitly.
* @pte_entry: if set, called for each non-empty PTE (4th-level) entry
* @pte_hole: if set, called for each hole at all levels
* @hugetlb_entry: if set, called for each hugetlb entry
@@ -854,7 +983,13 @@ static inline int handle_mm_fault(struct mm_struct *mm,
extern int make_pages_present(unsigned long addr, unsigned long end);
extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
+extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len, int write);
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int len, unsigned int foll_flags,
+ struct page **pages, struct vm_area_struct **vmas,
+ int *nonblocking);
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int nr_pages, int write, int force,
struct page **pages, struct vm_area_struct **vmas);
@@ -876,11 +1011,33 @@ int set_page_dirty_lock(struct page *page);
int clear_page_dirty_for_io(struct page *page);
/* Is the vma a continuation of the stack vma above it? */
-static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
+static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
{
return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
}
+static inline int stack_guard_page_start(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ return (vma->vm_flags & VM_GROWSDOWN) &&
+ (vma->vm_start == addr) &&
+ !vma_growsdown(vma->vm_prev, addr);
+}
+
+/* Is the vma a continuation of the stack vma below it? */
+static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
+{
+ return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
+}
+
+static inline int stack_guard_page_end(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ return (vma->vm_flags & VM_GROWSUP) &&
+ (vma->vm_end == addr) &&
+ !vma_growsup(vma->vm_next, addr);
+}
+
extern unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len);
@@ -1064,7 +1221,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
#endif
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long address);
int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
/*
@@ -1133,16 +1291,18 @@ static inline void pgtable_page_dtor(struct page *page)
pte_unmap(pte); \
} while (0)
-#define pte_alloc_map(mm, pmd, address) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
- NULL: pte_offset_map(pmd, address))
+#define pte_alloc_map(mm, vma, pmd, address) \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \
+ pmd, address))? \
+ NULL: pte_offset_map(pmd, address))
#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \
+ pmd, address))? \
NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
#define pte_alloc_kernel(pmd, address) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
NULL: pte_offset_kernel(pmd, address))
extern void free_area_init(unsigned long * zones_size);
@@ -1196,8 +1356,6 @@ int add_from_early_node_map(struct range *range, int az,
int nr_range, int nid);
u64 __init find_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit);
-void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
- u64 goal, u64 limit);
typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
extern void sparse_memory_present_with_active_regions(int nid);
@@ -1225,7 +1383,7 @@ extern void setup_per_zone_wmarks(void);
extern void calculate_zone_inactive_ratio(struct zone *zone);
extern void mem_init(void);
extern void __init mmap_init(void);
-extern void show_mem(void);
+extern void show_mem(unsigned int flags);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
extern int after_bootmem;
@@ -1415,6 +1573,11 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
#define FOLL_GET 0x04 /* do get_page on page */
#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
+#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
+ * and return without waiting upon it */
+#define FOLL_MLOCK 0x40 /* mark page as mlocked */
+#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
+#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
@@ -1453,13 +1616,13 @@ static inline bool kernel_page_present(struct page *page) { return true; }
#endif /* CONFIG_HIBERNATION */
#endif
-extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
+extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
#ifdef __HAVE_ARCH_GATE_AREA
-int in_gate_area_no_task(unsigned long addr);
-int in_gate_area(struct task_struct *task, unsigned long addr);
+int in_gate_area_no_mm(unsigned long addr);
+int in_gate_area(struct mm_struct *mm, unsigned long addr);
#else
-int in_gate_area_no_task(unsigned long addr);
-#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
+int in_gate_area_no_mm(unsigned long addr);
+#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})
#endif /* __HAVE_ARCH_GATE_AREA */
int drop_caches_sysctl_handler(struct ctl_table *, int,
@@ -1507,16 +1670,17 @@ extern int sysctl_memory_failure_recovery;
extern void shake_page(struct page *p, int access);
extern atomic_long_t mce_bad_pages;
extern int soft_offline_page(struct page *page, int flags);
-#ifdef CONFIG_MEMORY_FAILURE
-int is_hwpoison_address(unsigned long addr);
-#else
-static inline int is_hwpoison_address(unsigned long addr)
-{
- return 0;
-}
-#endif
extern void dump_page(struct page *page);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
+extern void clear_huge_page(struct page *page,
+ unsigned long addr,
+ unsigned int pages_per_huge_page);
+extern void copy_user_huge_page(struct page *dst, struct page *src,
+ unsigned long addr, struct vm_area_struct *vma,
+ unsigned int pages_per_huge_page);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */