summaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c411
1 files changed, 207 insertions, 204 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0341c5700e3..8c6b3ce38f0 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -95,39 +95,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
}
/*
- * We need this because the bdev->unplug_fn can sleep and we cannot
- * hold swap_lock while calling the unplug_fn. And swap_lock
- * cannot be turned into a mutex.
- */
-static DECLARE_RWSEM(swap_unplug_sem);
-
-void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
-{
- swp_entry_t entry;
-
- down_read(&swap_unplug_sem);
- entry.val = page_private(page);
- if (PageSwapCache(page)) {
- struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
- struct backing_dev_info *bdi;
-
- /*
- * If the page is removed from swapcache from under us (with a
- * racy try_to_unuse/swapoff) we need an additional reference
- * count to avoid reading garbage from page_private(page) above.
- * If the WARN_ON triggers during a swapoff it maybe the race
- * condition and it's harmless. However if it triggers without
- * swapoff it signals a problem.
- */
- WARN_ON(page_count(page) <= 1);
-
- bdi = bdev->bd_inode->i_mapping->backing_dev_info;
- blk_run_backing_dev(bdi, page);
- }
- up_read(&swap_unplug_sem);
-}
-
-/*
* swapon tell device that all the old swap contents can be discarded,
* to allow the swap device to optimize its wear-levelling.
*/
@@ -212,8 +179,8 @@ static int wait_for_discard(void *word)
#define SWAPFILE_CLUSTER 256
#define LATENCY_LIMIT 256
-static inline unsigned long scan_swap_map(struct swap_info_struct *si,
- unsigned char usage)
+static unsigned long scan_swap_map(struct swap_info_struct *si,
+ unsigned char usage)
{
unsigned long offset;
unsigned long scan_base;
@@ -880,7 +847,7 @@ unsigned int count_swap_pages(int type, int free)
static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, swp_entry_t entry, struct page *page)
{
- struct mem_cgroup *ptr = NULL;
+ struct mem_cgroup *ptr;
spinlock_t *ptl;
pte_t *pte;
int ret = 1;
@@ -1550,6 +1517,36 @@ bad_bmap:
goto out;
}
+static void enable_swap_info(struct swap_info_struct *p, int prio,
+ unsigned char *swap_map)
+{
+ int i, prev;
+
+ spin_lock(&swap_lock);
+ if (prio >= 0)
+ p->prio = prio;
+ else
+ p->prio = --least_priority;
+ p->swap_map = swap_map;
+ p->flags |= SWP_WRITEOK;
+ nr_swap_pages += p->pages;
+ total_swap_pages += p->pages;
+
+ /* insert swap space into swap_list: */
+ prev = -1;
+ for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
+ if (p->prio >= swap_info[i]->prio)
+ break;
+ prev = i;
+ }
+ p->next = i;
+ if (prev < 0)
+ swap_list.head = swap_list.next = p->type;
+ else
+ swap_info[prev]->next = p->type;
+ spin_unlock(&swap_lock);
+}
+
SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
{
struct swap_info_struct *p = NULL;
@@ -1621,32 +1618,17 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
current->flags &= ~PF_OOM_ORIGIN;
if (err) {
+ /*
+ * reading p->prio and p->swap_map outside the lock is
+ * safe here because only sys_swapon and sys_swapoff
+ * change them, and there can be no other sys_swapon or
+ * sys_swapoff for this swap_info_struct at this point.
+ */
/* re-insert swap space back into swap_list */
- spin_lock(&swap_lock);
- if (p->prio < 0)
- p->prio = --least_priority;
- prev = -1;
- for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
- if (p->prio >= swap_info[i]->prio)
- break;
- prev = i;
- }
- p->next = i;
- if (prev < 0)
- swap_list.head = swap_list.next = type;
- else
- swap_info[prev]->next = type;
- nr_swap_pages += p->pages;
- total_swap_pages += p->pages;
- p->flags |= SWP_WRITEOK;
- spin_unlock(&swap_lock);
+ enable_swap_info(p, p->prio, p->swap_map);
goto out_dput;
}
- /* wait for any unplug function to finish */
- down_write(&swap_unplug_sem);
- up_write(&swap_unplug_sem);
-
destroy_swap_extents(p);
if (p->flags & SWP_CONTINUED)
free_swap_count_continuations(p);
@@ -1844,49 +1826,24 @@ static int __init max_swapfiles_check(void)
late_initcall(max_swapfiles_check);
#endif
-/*
- * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
- *
- * The swapon system call
- */
-SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
+static struct swap_info_struct *alloc_swap_info(void)
{
struct swap_info_struct *p;
- char *name = NULL;
- struct block_device *bdev = NULL;
- struct file *swap_file = NULL;
- struct address_space *mapping;
unsigned int type;
- int i, prev;
- int error;
- union swap_header *swap_header;
- unsigned int nr_good_pages;
- int nr_extents = 0;
- sector_t span;
- unsigned long maxpages;
- unsigned long swapfilepages;
- unsigned char *swap_map = NULL;
- struct page *page = NULL;
- struct inode *inode = NULL;
- int did_down = 0;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
spin_lock(&swap_lock);
for (type = 0; type < nr_swapfiles; type++) {
if (!(swap_info[type]->flags & SWP_USED))
break;
}
- error = -EPERM;
if (type >= MAX_SWAPFILES) {
spin_unlock(&swap_lock);
kfree(p);
- goto out;
+ return ERR_PTR(-EPERM);
}
if (type >= nr_swapfiles) {
p->type = type;
@@ -1911,81 +1868,49 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->next = -1;
spin_unlock(&swap_lock);
- name = getname(specialfile);
- error = PTR_ERR(name);
- if (IS_ERR(name)) {
- name = NULL;
- goto bad_swap_2;
- }
- swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
- error = PTR_ERR(swap_file);
- if (IS_ERR(swap_file)) {
- swap_file = NULL;
- goto bad_swap_2;
- }
-
- p->swap_file = swap_file;
- mapping = swap_file->f_mapping;
- inode = mapping->host;
-
- error = -EBUSY;
- for (i = 0; i < nr_swapfiles; i++) {
- struct swap_info_struct *q = swap_info[i];
+ return p;
+}
- if (i == type || !q->swap_file)
- continue;
- if (mapping == q->swap_file->f_mapping)
- goto bad_swap;
- }
+static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
+{
+ int error;
- error = -EINVAL;
if (S_ISBLK(inode->i_mode)) {
- bdev = bdgrab(I_BDEV(inode));
- error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+ p->bdev = bdgrab(I_BDEV(inode));
+ error = blkdev_get(p->bdev,
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL,
sys_swapon);
if (error < 0) {
- bdev = NULL;
- error = -EINVAL;
- goto bad_swap;
+ p->bdev = NULL;
+ return -EINVAL;
}
- p->old_block_size = block_size(bdev);
- error = set_blocksize(bdev, PAGE_SIZE);
+ p->old_block_size = block_size(p->bdev);
+ error = set_blocksize(p->bdev, PAGE_SIZE);
if (error < 0)
- goto bad_swap;
- p->bdev = bdev;
+ return error;
p->flags |= SWP_BLKDEV;
} else if (S_ISREG(inode->i_mode)) {
p->bdev = inode->i_sb->s_bdev;
mutex_lock(&inode->i_mutex);
- did_down = 1;
- if (IS_SWAPFILE(inode)) {
- error = -EBUSY;
- goto bad_swap;
- }
- } else {
- goto bad_swap;
- }
+ if (IS_SWAPFILE(inode))
+ return -EBUSY;
+ } else
+ return -EINVAL;
- swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
+ return 0;
+}
- /*
- * Read the swap header.
- */
- if (!mapping->a_ops->readpage) {
- error = -EINVAL;
- goto bad_swap;
- }
- page = read_mapping_page(mapping, 0, swap_file);
- if (IS_ERR(page)) {
- error = PTR_ERR(page);
- goto bad_swap;
- }
- swap_header = kmap(page);
+static unsigned long read_swap_header(struct swap_info_struct *p,
+ union swap_header *swap_header,
+ struct inode *inode)
+{
+ int i;
+ unsigned long maxpages;
+ unsigned long swapfilepages;
if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
printk(KERN_ERR "Unable to find swap-space signature\n");
- error = -EINVAL;
- goto bad_swap;
+ return 0;
}
/* swap partition endianess hack... */
@@ -2001,8 +1926,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
printk(KERN_WARNING
"Unable to handle swap header version %d\n",
swap_header->info.version);
- error = -EINVAL;
- goto bad_swap;
+ return 0;
}
p->lowest_bit = 1;
@@ -2033,61 +1957,155 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
p->highest_bit = maxpages - 1;
- error = -EINVAL;
if (!maxpages)
- goto bad_swap;
+ return 0;
+ swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
if (swapfilepages && maxpages > swapfilepages) {
printk(KERN_WARNING
"Swap area shorter than signature indicates\n");
- goto bad_swap;
+ return 0;
}
if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
- goto bad_swap;
+ return 0;
if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
- goto bad_swap;
+ return 0;
- /* OK, set up the swap map and apply the bad block list */
- swap_map = vmalloc(maxpages);
- if (!swap_map) {
- error = -ENOMEM;
- goto bad_swap;
- }
+ return maxpages;
+}
+
+static int setup_swap_map_and_extents(struct swap_info_struct *p,
+ union swap_header *swap_header,
+ unsigned char *swap_map,
+ unsigned long maxpages,
+ sector_t *span)
+{
+ int i;
+ unsigned int nr_good_pages;
+ int nr_extents;
- memset(swap_map, 0, maxpages);
nr_good_pages = maxpages - 1; /* omit header page */
for (i = 0; i < swap_header->info.nr_badpages; i++) {
unsigned int page_nr = swap_header->info.badpages[i];
- if (page_nr == 0 || page_nr > swap_header->info.last_page) {
- error = -EINVAL;
- goto bad_swap;
- }
+ if (page_nr == 0 || page_nr > swap_header->info.last_page)
+ return -EINVAL;
if (page_nr < maxpages) {
swap_map[page_nr] = SWAP_MAP_BAD;
nr_good_pages--;
}
}
- error = swap_cgroup_swapon(type, maxpages);
- if (error)
- goto bad_swap;
-
if (nr_good_pages) {
swap_map[0] = SWAP_MAP_BAD;
p->max = maxpages;
p->pages = nr_good_pages;
- nr_extents = setup_swap_extents(p, &span);
- if (nr_extents < 0) {
- error = nr_extents;
- goto bad_swap;
- }
+ nr_extents = setup_swap_extents(p, span);
+ if (nr_extents < 0)
+ return nr_extents;
nr_good_pages = p->pages;
}
if (!nr_good_pages) {
printk(KERN_WARNING "Empty swap-file\n");
+ return -EINVAL;
+ }
+
+ return nr_extents;
+}
+
+SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
+{
+ struct swap_info_struct *p;
+ char *name;
+ struct file *swap_file = NULL;
+ struct address_space *mapping;
+ int i;
+ int prio;
+ int error;
+ union swap_header *swap_header;
+ int nr_extents;
+ sector_t span;
+ unsigned long maxpages;
+ unsigned char *swap_map = NULL;
+ struct page *page = NULL;
+ struct inode *inode = NULL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ p = alloc_swap_info();
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ name = getname(specialfile);
+ if (IS_ERR(name)) {
+ error = PTR_ERR(name);
+ name = NULL;
+ goto bad_swap;
+ }
+ swap_file = filp_open(name, O_RDWR|O_LARGEFILE, 0);
+ if (IS_ERR(swap_file)) {
+ error = PTR_ERR(swap_file);
+ swap_file = NULL;
+ goto bad_swap;
+ }
+
+ p->swap_file = swap_file;
+ mapping = swap_file->f_mapping;
+
+ for (i = 0; i < nr_swapfiles; i++) {
+ struct swap_info_struct *q = swap_info[i];
+
+ if (q == p || !q->swap_file)
+ continue;
+ if (mapping == q->swap_file->f_mapping) {
+ error = -EBUSY;
+ goto bad_swap;
+ }
+ }
+
+ inode = mapping->host;
+ /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
+ error = claim_swapfile(p, inode);
+ if (unlikely(error))
+ goto bad_swap;
+
+ /*
+ * Read the swap header.
+ */
+ if (!mapping->a_ops->readpage) {
error = -EINVAL;
goto bad_swap;
}
+ page = read_mapping_page(mapping, 0, swap_file);
+ if (IS_ERR(page)) {
+ error = PTR_ERR(page);
+ goto bad_swap;
+ }
+ swap_header = kmap(page);
+
+ maxpages = read_swap_header(p, swap_header, inode);
+ if (unlikely(!maxpages)) {
+ error = -EINVAL;
+ goto bad_swap;
+ }
+
+ /* OK, set up the swap map and apply the bad block list */
+ swap_map = vzalloc(maxpages);
+ if (!swap_map) {
+ error = -ENOMEM;
+ goto bad_swap;
+ }
+
+ error = swap_cgroup_swapon(p->type, maxpages);
+ if (error)
+ goto bad_swap;
+
+ nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
+ maxpages, &span);
+ if (unlikely(nr_extents < 0)) {
+ error = nr_extents;
+ goto bad_swap;
+ }
if (p->bdev) {
if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2099,58 +2117,46 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
mutex_lock(&swapon_mutex);
- spin_lock(&swap_lock);
+ prio = -1;
if (swap_flags & SWAP_FLAG_PREFER)
- p->prio =
+ prio =
(swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
- else
- p->prio = --least_priority;
- p->swap_map = swap_map;
- p->flags |= SWP_WRITEOK;
- nr_swap_pages += nr_good_pages;
- total_swap_pages += nr_good_pages;
+ enable_swap_info(p, prio, swap_map);
printk(KERN_INFO "Adding %uk swap on %s. "
"Priority:%d extents:%d across:%lluk %s%s\n",
- nr_good_pages<<(PAGE_SHIFT-10), name, p->prio,
+ p->pages<<(PAGE_SHIFT-10), name, p->prio,
nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
(p->flags & SWP_DISCARDABLE) ? "D" : "");
- /* insert swap space into swap_list: */
- prev = -1;
- for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
- if (p->prio >= swap_info[i]->prio)
- break;
- prev = i;
- }
- p->next = i;
- if (prev < 0)
- swap_list.head = swap_list.next = type;
- else
- swap_info[prev]->next = type;
- spin_unlock(&swap_lock);
mutex_unlock(&swapon_mutex);
atomic_inc(&proc_poll_event);
wake_up_interruptible(&proc_poll_wait);
+ if (S_ISREG(inode->i_mode))
+ inode->i_flags |= S_SWAPFILE;
error = 0;
goto out;
bad_swap:
- if (bdev) {
- set_blocksize(bdev, p->old_block_size);
- blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
+ set_blocksize(p->bdev, p->old_block_size);
+ blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
}
destroy_swap_extents(p);
- swap_cgroup_swapoff(type);
-bad_swap_2:
+ swap_cgroup_swapoff(p->type);
spin_lock(&swap_lock);
p->swap_file = NULL;
p->flags = 0;
spin_unlock(&swap_lock);
vfree(swap_map);
- if (swap_file)
+ if (swap_file) {
+ if (inode && S_ISREG(inode->i_mode)) {
+ mutex_unlock(&inode->i_mutex);
+ inode = NULL;
+ }
filp_close(swap_file, NULL);
+ }
out:
if (page && !IS_ERR(page)) {
kunmap(page);
@@ -2158,11 +2164,8 @@ out:
}
if (name)
putname(name);
- if (did_down) {
- if (!error)
- inode->i_flags |= S_SWAPFILE;
+ if (inode && S_ISREG(inode->i_mode))
mutex_unlock(&inode->i_mutex);
- }
return error;
}