summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig9
-rw-r--r--mm/filemap.c31
-rw-r--r--mm/filemap.h4
-rw-r--r--mm/filemap_xip.c2
-rw-r--r--mm/memory_hotplug.c126
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c8
-rw-r--r--mm/shmem.c9
-rw-r--r--mm/slab.c21
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/tiny-shmem.c4
-rw-r--r--mm/vmscan.c39
13 files changed, 199 insertions, 62 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 66e65ab3942..8f5b45615f7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -115,7 +115,8 @@ config SPARSEMEM_EXTREME
# eventually, we can have this option just 'select SPARSEMEM'
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
- depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND
+ depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
+ depends on (IA64 || X86 || PPC64)
comment "Memory hotplug is currently incompatible with Software Suspend"
depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
@@ -145,3 +146,9 @@ config MIGRATION
while the virtual addresses are not changed. This is useful for
example on NUMA systems to put pages nearer to the processors accessing
the page.
+
+config RESOURCES_64BIT
+ bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && EXPERIMENTAL)
+ default 64BIT
+ help
+ This option allows memory and IO resources to be 64 bit.
diff --git a/mm/filemap.c b/mm/filemap.c
index 9c7334bafda..648f2c0c8e1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2069,7 +2069,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
{
struct file *file = iocb->ki_filp;
struct address_space * mapping = file->f_mapping;
- struct address_space_operations *a_ops = mapping->a_ops;
+ const struct address_space_operations *a_ops = mapping->a_ops;
struct inode *inode = mapping->host;
long status = 0;
struct page *page;
@@ -2095,14 +2095,21 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
do {
unsigned long index;
unsigned long offset;
- unsigned long maxlen;
size_t copied;
offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
index = pos >> PAGE_CACHE_SHIFT;
bytes = PAGE_CACHE_SIZE - offset;
- if (bytes > count)
- bytes = count;
+
+ /* Limit the size of the copy to the caller's write size */
+ bytes = min(bytes, count);
+
+ /*
+ * Limit the size of the copy to that of the current segment,
+ * because fault_in_pages_readable() doesn't know how to walk
+ * segments.
+ */
+ bytes = min(bytes, cur_iov->iov_len - iov_base);
/*
* Bring in the user page that we will copy from _first_.
@@ -2110,10 +2117,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
* same page as we're writing to, without it being marked
* up-to-date.
*/
- maxlen = cur_iov->iov_len - iov_base;
- if (maxlen > bytes)
- maxlen = bytes;
- fault_in_pages_readable(buf, maxlen);
+ fault_in_pages_readable(buf, bytes);
page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
if (!page) {
@@ -2121,6 +2125,12 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
break;
}
+ if (unlikely(bytes == 0)) {
+ status = 0;
+ copied = 0;
+ goto zero_length_segment;
+ }
+
status = a_ops->prepare_write(file, page, offset, offset+bytes);
if (unlikely(status)) {
loff_t isize = i_size_read(inode);
@@ -2150,7 +2160,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
page_cache_release(page);
continue;
}
- if (likely(copied > 0)) {
+zero_length_segment:
+ if (likely(copied >= 0)) {
if (!status)
status = copied;
@@ -2215,7 +2226,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos)
{
struct file *file = iocb->ki_filp;
- struct address_space * mapping = file->f_mapping;
+ const struct address_space * mapping = file->f_mapping;
size_t ocount; /* original count */
size_t count; /* after file limit checks */
struct inode *inode = mapping->host;
diff --git a/mm/filemap.h b/mm/filemap.h
index 536979fb4ba..3f2a343c601 100644
--- a/mm/filemap.h
+++ b/mm/filemap.h
@@ -88,7 +88,7 @@ filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
const struct iovec *iov = *iovp;
size_t base = *basep;
- while (bytes) {
+ do {
int copy = min(bytes, iov->iov_len - base);
bytes -= copy;
@@ -97,7 +97,7 @@ filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
iov++;
base = 0;
}
- }
+ } while (bytes);
*iovp = iov;
*basep = base;
}
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index b960ac8e591..b4fd0d7c9bf 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -273,7 +273,7 @@ __xip_file_write(struct file *filp, const char __user *buf,
size_t count, loff_t pos, loff_t *ppos)
{
struct address_space * mapping = filp->f_mapping;
- struct address_space_operations *a_ops = mapping->a_ops;
+ const struct address_space_operations *a_ops = mapping->a_ops;
struct inode *inode = mapping->host;
long status = 0;
struct page *page;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 841a077d5ae..ea4038838b0 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -21,6 +21,7 @@
#include <linux/memory_hotplug.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
+#include <linux/ioport.h>
#include <asm/tlbflush.h>
@@ -126,6 +127,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
unsigned long i;
unsigned long flags;
unsigned long onlined_pages = 0;
+ struct resource res;
+ u64 section_end;
+ unsigned long start_pfn;
struct zone *zone;
int need_zonelists_rebuild = 0;
@@ -148,10 +152,27 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
if (!populated_zone(zone))
need_zonelists_rebuild = 1;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pfn_to_page(pfn + i);
- online_page(page);
- onlined_pages++;
+ res.start = (u64)pfn << PAGE_SHIFT;
+ res.end = res.start + ((u64)nr_pages << PAGE_SHIFT) - 1;
+ res.flags = IORESOURCE_MEM; /* we just need system ram */
+ section_end = res.end;
+
+ while (find_next_system_ram(&res) >= 0) {
+ start_pfn = (unsigned long)(res.start >> PAGE_SHIFT);
+ nr_pages = (unsigned long)
+ ((res.end + 1 - res.start) >> PAGE_SHIFT);
+
+ if (PageReserved(pfn_to_page(start_pfn))) {
+ /* this region's page is not onlined now */
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pfn_to_page(start_pfn + i);
+ online_page(page);
+ onlined_pages++;
+ }
+ }
+
+ res.start = res.end + 1;
+ res.end = section_end;
}
zone->present_pages += onlined_pages;
zone->zone_pgdat->node_present_pages += onlined_pages;
@@ -163,3 +184,100 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
vm_total_pages = nr_free_pagecache_pages();
return 0;
}
+
+static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
+{
+ struct pglist_data *pgdat;
+ unsigned long zones_size[MAX_NR_ZONES] = {0};
+ unsigned long zholes_size[MAX_NR_ZONES] = {0};
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+
+ pgdat = arch_alloc_nodedata(nid);
+ if (!pgdat)
+ return NULL;
+
+ arch_refresh_nodedata(nid, pgdat);
+
+ /* we can use NODE_DATA(nid) from here */
+
+ /* init node's zones as empty zones, we don't have any present pages.*/
+ free_area_init_node(nid, pgdat, zones_size, start_pfn, zholes_size);
+
+ return pgdat;
+}
+
+static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
+{
+ arch_refresh_nodedata(nid, NULL);
+ arch_free_nodedata(pgdat);
+ return;
+}
+
+/* add this memory to iomem resource */
+static void register_memory_resource(u64 start, u64 size)
+{
+ struct resource *res;
+
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ BUG_ON(!res);
+
+ res->name = "System RAM";
+ res->start = start;
+ res->end = start + size - 1;
+ res->flags = IORESOURCE_MEM;
+ if (request_resource(&iomem_resource, res) < 0) {
+ printk("System RAM resource %llx - %llx cannot be added\n",
+ (unsigned long long)res->start, (unsigned long long)res->end);
+ kfree(res);
+ }
+}
+
+
+
+int add_memory(int nid, u64 start, u64 size)
+{
+ pg_data_t *pgdat = NULL;
+ int new_pgdat = 0;
+ int ret;
+
+ if (!node_online(nid)) {
+ pgdat = hotadd_new_pgdat(nid, start);
+ if (!pgdat)
+ return -ENOMEM;
+ new_pgdat = 1;
+ ret = kswapd_run(nid);
+ if (ret)
+ goto error;
+ }
+
+ /* call arch's memory hotadd */
+ ret = arch_add_memory(nid, start, size);
+
+ if (ret < 0)
+ goto error;
+
+ /* we online node here. we can't roll back from here. */
+ node_set_online(nid);
+
+ if (new_pgdat) {
+ ret = register_one_node(nid);
+ /*
+ * If sysfs file of new node can't create, cpu on the node
+ * can't be hot-added. There is no rollback way now.
+ * So, check by BUG_ON() to catch it reluctantly..
+ */
+ BUG_ON(ret);
+ }
+
+ /* register this memory as resource */
+ register_memory_resource(start, size);
+
+ return ret;
+error:
+ /* rollback pgdat allocation and others */
+ if (new_pgdat)
+ rollback_node_hotadd(nid, pgdat);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(add_memory);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8ccf6f1b147..4ec7026c7ba 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -516,14 +516,14 @@ static void set_ratelimit(void)
ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE;
}
-static int
+static int __cpuinit
ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
{
set_ratelimit();
return 0;
}
-static struct notifier_block ratelimit_nb = {
+static struct notifier_block __cpuinitdata ratelimit_nb = {
.notifier_call = ratelimit_handler,
.next = NULL,
};
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9f86191bb63..084a2de7e52 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -446,8 +446,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
arch_free_page(page, order);
if (!PageHighMem(page))
- mutex_debug_check_no_locks_freed(page_address(page),
- PAGE_SIZE<<order);
+ debug_check_no_locks_freed(page_address(page),
+ PAGE_SIZE<<order);
for (i = 0 ; i < (1 << order) ; ++i)
reserved += free_pages_check(page + i);
@@ -2009,7 +2009,7 @@ static inline void free_zone_pagesets(int cpu)
}
}
-static int pageset_cpuup_callback(struct notifier_block *nfb,
+static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -2031,7 +2031,7 @@ static int pageset_cpuup_callback(struct notifier_block *nfb,
return ret;
}
-static struct notifier_block pageset_notifier =
+static struct notifier_block __cpuinitdata pageset_notifier =
{ &pageset_cpuup_callback, NULL, 0 };
void __init setup_per_cpu_pageset(void)
diff --git a/mm/shmem.c b/mm/shmem.c
index 38bc3334f26..b14ff817d16 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -26,7 +26,6 @@
#include <linux/config.h>
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/devfs_fs_kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mman.h>
@@ -174,7 +173,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
}
static struct super_operations shmem_ops;
-static struct address_space_operations shmem_aops;
+static const struct address_space_operations shmem_aops;
static struct file_operations shmem_file_operations;
static struct inode_operations shmem_inode_operations;
static struct inode_operations shmem_dir_inode_operations;
@@ -2162,7 +2161,7 @@ static void destroy_inodecache(void)
printk(KERN_INFO "shmem_inode_cache: not all structures were freed\n");
}
-static struct address_space_operations shmem_aops = {
+static const struct address_space_operations shmem_aops = {
.writepage = shmem_writepage,
.set_page_dirty = __set_page_dirty_nobuffers,
#ifdef CONFIG_TMPFS
@@ -2252,9 +2251,7 @@ static int __init init_tmpfs(void)
printk(KERN_ERR "Could not register tmpfs\n");
goto out2;
}
-#ifdef CONFIG_TMPFS
- devfs_mk_dir("shm");
-#endif
+
shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER,
tmpfs_fs_type.name, NULL);
if (IS_ERR(shm_mnt)) {
diff --git a/mm/slab.c b/mm/slab.c
index 98ac20bc0de..233e39d14ca 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -89,6 +89,7 @@
#include <linux/config.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/poison.h>
#include <linux/swap.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
@@ -106,6 +107,7 @@
#include <linux/nodemask.h>
#include <linux/mempolicy.h>
#include <linux/mutex.h>
+#include <linux/rtmutex.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -492,17 +494,6 @@ struct kmem_cache {
#endif
#if DEBUG
-/*
- * Magic nums for obj red zoning.
- * Placed in the first word before and the first word after an obj.
- */
-#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */
-#define RED_ACTIVE 0x170FC2A5UL /* when obj is active */
-
-/* ...and for poisoning */
-#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
-#define POISON_FREE 0x6b /* for use-after-free poisoning */
-#define POISON_END 0xa5 /* end-byte of poisoning */
/*
* memory layout of objects:
@@ -1083,7 +1074,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
#endif
-static int cpuup_callback(struct notifier_block *nfb,
+static int __devinit cpuup_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -1265,7 +1256,9 @@ bad:
return NOTIFY_BAD;
}
-static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
+static struct notifier_block __cpuinitdata cpucache_notifier = {
+ &cpuup_callback, NULL, 0
+};
/*
* swap the static kmem_list3 with kmalloced memory
@@ -3405,7 +3398,7 @@ void kfree(const void *objp)
local_irq_save(flags);
kfree_debugcheck(objp);
c = virt_to_cache(objp);
- mutex_debug_check_no_locks_freed(objp, obj_size(c));
+ debug_check_no_locks_freed(objp, obj_size(c));
__cache_free(c, (void *)objp);
local_irq_restore(flags);
}
diff --git a/mm/sparse.c b/mm/sparse.c
index e0a3fe48aa3..c7a2b3a0e46 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -45,7 +45,7 @@ static struct mem_section *sparse_index_alloc(int nid)
static int sparse_index_init(unsigned long section_nr, int nid)
{
- static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(index_init_lock);
unsigned long root = SECTION_NR_TO_ROOT(section_nr);
struct mem_section *section;
int ret = 0;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e0e1583f32c..7535211bb49 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -24,7 +24,7 @@
* vmscan's shrink_list, to make sync_page look nicer, and to allow
* future use of radix_tree tags in the swap cache.
*/
-static struct address_space_operations swap_aops = {
+static const struct address_space_operations swap_aops = {
.writepage = swap_writepage,
.sync_page = block_sync_page,
.set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
index f9d6a9cc91c..5f2cbf0f153 100644
--- a/mm/tiny-shmem.c
+++ b/mm/tiny-shmem.c
@@ -12,7 +12,6 @@
#include <linux/fs.h>
#include <linux/init.h>
-#include <linux/devfs_fs_kernel.h>
#include <linux/vfs.h>
#include <linux/mount.h>
#include <linux/file.h>
@@ -33,9 +32,6 @@ static int __init init_tmpfs(void)
{
BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
-#ifdef CONFIG_TMPFS
- devfs_mk_dir("shm");
-#endif
shm_mnt = kern_mount(&tmpfs_fs_type);
BUG_ON(IS_ERR(shm_mnt));
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 72babac71de..eeacb0d695c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -34,6 +34,7 @@
#include <linux/notifier.h>
#include <linux/rwsem.h>
#include <linux/delay.h>
+#include <linux/kthread.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -1223,7 +1224,6 @@ static int kswapd(void *p)
};
cpumask_t cpumask;
- daemonize("kswapd%d", pgdat->node_id);
cpumask = node_to_cpumask(pgdat->node_id);
if (!cpus_empty(cpumask))
set_cpus_allowed(tsk, cpumask);
@@ -1450,7 +1450,7 @@ out:
not required for correctness. So if the last cpu in a node goes
away, we get changed to run anywhere: as the first one comes back,
restore their cpu bindings. */
-static int cpu_callback(struct notifier_block *nfb,
+static int __devinit cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
pg_data_t *pgdat;
@@ -1468,20 +1468,35 @@ static int cpu_callback(struct notifier_block *nfb,
}
#endif /* CONFIG_HOTPLUG_CPU */
+/*
+ * This kswapd start function will be called by init and node-hot-add.
+ * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
+ */
+int kswapd_run(int nid)
+{
+ pg_data_t *pgdat = NODE_DATA(nid);
+ int ret = 0;
+
+ if (pgdat->kswapd)
+ return 0;
+
+ pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
+ if (IS_ERR(pgdat->kswapd)) {
+ /* failure at boot is fatal */
+ BUG_ON(system_state == SYSTEM_BOOTING);
+ printk("Failed to start kswapd on node %d\n",nid);
+ ret = -1;
+ }
+ return ret;
+}
+
static int __init kswapd_init(void)
{
- pg_data_t *pgdat;
+ int nid;
swap_setup();
- for_each_online_pgdat(pgdat) {
- pid_t pid;
-
- pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
- BUG_ON(pid < 0);
- read_lock(&tasklist_lock);
- pgdat->kswapd = find_task_by_pid(pid);
- read_unlock(&tasklist_lock);
- }
+ for_each_online_node(nid)
+ kswapd_run(nid);
hotcpu_notifier(cpu_callback, 0);
return 0;
}