From f2d97f02961e8b1f8a24befb88ab0e5c886586ff Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:12:24 -0800 Subject: [PATCH] swsusp: remove encryption This patch removes the image encryption that is only used by swsusp instead of zeroing the image after resume in order to prevent someone from reading some confidential data from it in the future and it does not protect the image from being read by an unauthorized person before resume. The functionality it provides should really belong to the user space and will possibly be reimplemented after the swap-handling functionality of swsusp is moved to the user space. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 163 ++------------------------------------------------ 1 file changed, 4 insertions(+), 159 deletions(-) (limited to 'kernel/power/swsusp.c') diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index c05f46e7348..bd3097c583b 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -30,9 +30,6 @@ * Alex Badea : * Fixed runaway init * - * Andreas Steinmetz : - * Added encrypted suspend option - * * More state savers are welcome. Especially for the scsi layer... * * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt @@ -67,10 +64,6 @@ #include #include -#include -#include -#include - #include "power.h" #ifdef CONFIG_HIGHMEM @@ -81,10 +74,6 @@ static int save_highmem(void) { return 0; } static int restore_highmem(void) { return 0; } #endif -#define CIPHER "aes" -#define MAXKEY 32 -#define MAXIV 32 - extern char resume_file[]; /* Local variables that should not be affected by save */ @@ -102,8 +91,7 @@ suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; #define SWSUSP_SIG "S1SUSPEND" static struct swsusp_header { - char reserved[PAGE_SIZE - 20 - MAXKEY - MAXIV - sizeof(swp_entry_t)]; - u8 key_iv[MAXKEY+MAXIV]; + char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; swp_entry_t swsusp_info; char orig_sig[10]; char sig[10]; @@ -123,131 +111,6 @@ static struct swsusp_info swsusp_info; static unsigned short swapfile_used[MAX_SWAPFILES]; static unsigned short root_swap; -static int write_page(unsigned long addr, swp_entry_t *loc); -static int bio_read_page(pgoff_t page_off, void *page); - -static u8 key_iv[MAXKEY+MAXIV]; - -#ifdef CONFIG_SWSUSP_ENCRYPT - -static int crypto_init(int mode, void **mem) -{ - int error = 0; - int len; - char *modemsg; - struct crypto_tfm *tfm; - - modemsg = mode ? "suspend not possible" : "resume not possible"; - - tfm = crypto_alloc_tfm(CIPHER, CRYPTO_TFM_MODE_CBC); - if(!tfm) { - printk(KERN_ERR "swsusp: no tfm, %s\n", modemsg); - error = -EINVAL; - goto out; - } - - if(MAXKEY < crypto_tfm_alg_min_keysize(tfm)) { - printk(KERN_ERR "swsusp: key buffer too small, %s\n", modemsg); - error = -ENOKEY; - goto fail; - } - - if (mode) - get_random_bytes(key_iv, MAXKEY+MAXIV); - - len = crypto_tfm_alg_max_keysize(tfm); - if (len > MAXKEY) - len = MAXKEY; - - if (crypto_cipher_setkey(tfm, key_iv, len)) { - printk(KERN_ERR "swsusp: key setup failure, %s\n", modemsg); - error = -EKEYREJECTED; - goto fail; - } - - len = crypto_tfm_alg_ivsize(tfm); - - if (MAXIV < len) { - printk(KERN_ERR "swsusp: iv buffer too small, %s\n", modemsg); - error = -EOVERFLOW; - goto fail; - } - - crypto_cipher_set_iv(tfm, key_iv+MAXKEY, len); - - *mem=(void *)tfm; - - goto out; - -fail: crypto_free_tfm(tfm); -out: return error; -} - -static __inline__ void crypto_exit(void *mem) -{ - crypto_free_tfm((struct crypto_tfm *)mem); -} - -static __inline__ int crypto_write(struct pbe *p, void *mem) -{ - int error = 0; - struct scatterlist src, dst; - - src.page = virt_to_page(p->address); - src.offset = 0; - src.length = PAGE_SIZE; - dst.page = virt_to_page((void *)&swsusp_header); - dst.offset = 0; - dst.length = PAGE_SIZE; - - error = crypto_cipher_encrypt((struct crypto_tfm *)mem, &dst, &src, - PAGE_SIZE); - - if (!error) - error = write_page((unsigned long)&swsusp_header, - &(p->swap_address)); - return error; -} - -static __inline__ int crypto_read(struct pbe *p, void *mem) -{ - int error = 0; - struct scatterlist src, dst; - - error = bio_read_page(swp_offset(p->swap_address), (void *)p->address); - if (!error) { - src.offset = 0; - src.length = PAGE_SIZE; - dst.offset = 0; - dst.length = PAGE_SIZE; - src.page = dst.page = virt_to_page((void *)p->address); - - error = crypto_cipher_decrypt((struct crypto_tfm *)mem, &dst, - &src, PAGE_SIZE); - } - return error; -} -#else -static __inline__ int crypto_init(int mode, void *mem) -{ - return 0; -} - -static __inline__ void crypto_exit(void *mem) -{ -} - -static __inline__ int crypto_write(struct pbe *p, void *mem) -{ - return write_page(p->address, &(p->swap_address)); -} - -static __inline__ int crypto_read(struct pbe *p, void *mem) -{ - return bio_read_page(swp_offset(p->swap_address), (void *)p->address); -} -#endif - static int mark_swapfiles(swp_entry_t prev) { int error; @@ -259,7 +122,6 @@ static int mark_swapfiles(swp_entry_t prev) !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); memcpy(swsusp_header.sig,SWSUSP_SIG, 10); - memcpy(swsusp_header.key_iv, key_iv, MAXKEY+MAXIV); swsusp_header.swsusp_info = prev; error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), @@ -405,10 +267,6 @@ static int data_write(void) int error = 0, i = 0; unsigned int mod = nr_copy_pages / 100; struct pbe *p; - void *tfm; - - if ((error = crypto_init(1, &tfm))) - return error; if (!mod) mod = 1; @@ -417,14 +275,11 @@ static int data_write(void) for_each_pbe (p, pagedir_nosave) { if (!(i%mod)) printk( "\b\b\b\b%3d%%", i / mod ); - if ((error = crypto_write(p, tfm))) { - crypto_exit(tfm); + if ((error = write_page(p->address, &p->swap_address))) return error; - } i++; } printk("\b\b\b\bdone\n"); - crypto_exit(tfm); return error; } @@ -550,7 +405,6 @@ static int write_suspend_image(void) if ((error = close_swap())) goto FreePagedir; Done: - memset(key_iv, 0, MAXKEY+MAXIV); return error; FreePagedir: free_pagedir_entries(); @@ -812,8 +666,6 @@ static int check_sig(void) return error; if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); - memcpy(key_iv, swsusp_header.key_iv, MAXKEY+MAXIV); - memset(swsusp_header.key_iv, 0, MAXKEY+MAXIV); /* * Reset swap signature now. @@ -840,10 +692,6 @@ static int data_read(struct pbe *pblist) int error = 0; int i = 0; int mod = swsusp_info.image_pages / 100; - void *tfm; - - if ((error = crypto_init(0, &tfm))) - return error; if (!mod) mod = 1; @@ -855,15 +703,13 @@ static int data_read(struct pbe *pblist) if (!(i % mod)) printk("\b\b\b\b%3d%%", i / mod); - if ((error = crypto_read(p, tfm))) { - crypto_exit(tfm); + if ((error = bio_read_page(swp_offset(p->swap_address), + (void *)p->address))) return error; - } i++; } printk("\b\b\b\bdone\n"); - crypto_exit(tfm); return error; } @@ -986,7 +832,6 @@ int swsusp_read(void) error = read_suspend_image(); blkdev_put(resume_bdev); - memset(key_iv, 0, MAXKEY+MAXIV); if (!error) pr_debug("swsusp: Reading resume file was successful\n"); -- cgit v1.2.3-70-g09d2 From 7088a5c00103ef48782d6c359cd12b13a10666e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:13:05 -0800 Subject: [PATCH] swsusp: introduce the swap map structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch introduces the swap map structure that can be used by swsusp for keeping tracks of data pages written to the swap.  The structure itself is described in a comment within the patch. The overall idea is to reduce the amount of metadata written to the swap and to write and read the image pages sequentially, in a file-alike way. This makes the swap-handling part of swsusp fairly independent of its snapshot-handling part and will hopefully allow us to completely separate these two parts in the future. This patch is needed to remove the suspend image size limit imposed by the limited size of the swsusp_info structure, which is essential for x86-64 systems with more than 512 MB of RAM. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 6 +- kernel/power/disk.c | 8 +- kernel/power/power.h | 13 +- kernel/power/snapshot.c | 14 +- kernel/power/swsusp.c | 558 ++++++++++++++++++++++++++++++++++-------------- 5 files changed, 418 insertions(+), 181 deletions(-) (limited to 'kernel/power/swsusp.c') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index a61c04f804b..33bbaea23aa 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -14,11 +14,7 @@ typedef struct pbe { unsigned long address; /* address of the copy */ unsigned long orig_address; /* original address of page */ - swp_entry_t swap_address; - - struct pbe *next; /* also used as scratch space at - * end of page (see link, diskpage) - */ + struct pbe *next; } suspend_pagedir_t; #define for_each_pbe(pbe, pblist) \ diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 4d944b281b2..76a5131b0e8 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -25,9 +25,9 @@ extern suspend_disk_method_t pm_disk_mode; extern int swsusp_suspend(void); -extern int swsusp_write(void); +extern int swsusp_write(struct pbe *pblist, unsigned int nr_pages); extern int swsusp_check(void); -extern int swsusp_read(void); +extern int swsusp_read(struct pbe **pblist_ptr); extern void swsusp_close(void); extern int swsusp_resume(void); @@ -176,7 +176,7 @@ int pm_suspend_disk(void) if (in_suspend) { device_resume(); pr_debug("PM: writing image.\n"); - error = swsusp_write(); + error = swsusp_write(pagedir_nosave, nr_copy_pages); if (!error) power_down(pm_disk_mode); else { @@ -247,7 +247,7 @@ static int software_resume(void) pr_debug("PM: Reading swsusp image.\n"); - if ((error = swsusp_read())) { + if ((error = swsusp_read(&pagedir_nosave))) { swsusp_free(); goto Thaw; } diff --git a/kernel/power/power.h b/kernel/power/power.h index 6c042b5ee14..977877c6dcf 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -9,19 +9,14 @@ #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) #endif -#define MAX_PBES ((PAGE_SIZE - sizeof(struct new_utsname) \ - - 4 - 3*sizeof(unsigned long) - sizeof(int) \ - - sizeof(void *)) / sizeof(swp_entry_t)) - struct swsusp_info { struct new_utsname uts; u32 version_code; unsigned long num_physpages; int cpus; unsigned long image_pages; - unsigned long pagedir_pages; - suspend_pagedir_t * suspend_pagedir; - swp_entry_t pagedir[MAX_PBES]; + unsigned long pages; + swp_entry_t start; } __attribute__((aligned(PAGE_SIZE))); @@ -67,6 +62,8 @@ extern asmlinkage int swsusp_arch_resume(void); extern void free_pagedir(struct pbe *pblist); extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed); -extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages); extern void swsusp_free(void); extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed); +extern unsigned int snapshot_nr_pages(void); +extern struct pbe *snapshot_pblist(void); +extern void snapshot_pblist_set(struct pbe *pblist); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 4a6dbcefd37..152d56cdf01 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -33,6 +33,9 @@ #include "power.h" +struct pbe *pagedir_nosave; +unsigned int nr_copy_pages; + #ifdef CONFIG_HIGHMEM struct highmem_page { char *data; @@ -244,7 +247,7 @@ static inline void fill_pb_page(struct pbe *pbpage) * of memory pages allocated with alloc_pagedir() */ -void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) +static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) { struct pbe *pbpage, *p; unsigned int num = PBES_PER_PAGE; @@ -261,7 +264,6 @@ void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) p->next = p + 1; p->next = NULL; } - pr_debug("create_pbe_list(): initialized %d PBEs\n", num); } /** @@ -332,7 +334,8 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed if (!pbe) { /* get_zeroed_page() failed */ free_pagedir(pblist); pblist = NULL; - } + } else + create_pbe_list(pblist, nr_pages); return pblist; } @@ -395,7 +398,6 @@ static struct pbe *swsusp_alloc(unsigned int nr_pages) printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); return NULL; } - create_pbe_list(pblist, nr_pages); if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { printk(KERN_ERR "suspend: Allocating image pages failed.\n"); @@ -421,10 +423,6 @@ asmlinkage int swsusp_save(void) (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE, PAGES_FOR_IO, nr_free_pages()); - /* This is needed because of the fixed size of swsusp_info */ - if (MAX_PBES < (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE) - return -ENOSPC; - if (!enough_free_mem(nr_pages)) { printk(KERN_ERR "swsusp: Not enough free memory\n"); return -ENOMEM; diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index bd3097c583b..b09bd7c0998 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -30,6 +30,9 @@ * Alex Badea : * Fixed runaway init * + * Rafael J. Wysocki + * Added the swap map data structure and reworked the handling of swap + * * More state savers are welcome. Especially for the scsi layer... * * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt @@ -76,18 +79,6 @@ static int restore_highmem(void) { return 0; } extern char resume_file[]; -/* Local variables that should not be affected by save */ -unsigned int nr_copy_pages __nosavedata = 0; - -/* Suspend pagedir is allocated before final copy, therefore it - must be freed after resume - - Warning: this is even more evil than it seems. Pagedirs this file - talks about are completely different from page directories used by - MMU hardware. - */ -suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; - #define SWSUSP_SIG "S1SUSPEND" static struct swsusp_header { @@ -238,48 +229,205 @@ static int write_page(unsigned long addr, swp_entry_t *loc) } /** - * data_free - Free the swap entries used by the saved image. + * Swap map-handling functions + * + * The swap map is a data structure used for keeping track of each page + * written to the swap. It consists of many swap_map_page structures + * that contain each an array of MAP_PAGE_SIZE swap entries. + * These structures are linked together with the help of either the + * .next (in memory) or the .next_swap (in swap) member. * - * Walk the list of used swap entries and free each one. - * This is only used for cleanup when suspend fails. + * The swap map is created during suspend. At that time we need to keep + * it in memory, because we have to free all of the allocated swap + * entries if an error occurs. The memory needed is preallocated + * so that we know in advance if there's enough of it. + * + * The first swap_map_page structure is filled with the swap entries that + * correspond to the first MAP_PAGE_SIZE data pages written to swap and + * so on. After the all of the data pages have been written, the order + * of the swap_map_page structures in the map is reversed so that they + * can be read from swap in the original order. This causes the data + * pages to be loaded in exactly the same order in which they have been + * saved. + * + * During resume we only need to use one swap_map_page structure + * at a time, which means that we only need to use two memory pages for + * reading the image - one for reading the swap_map_page structures + * and the second for reading the data pages from swap. */ -static void data_free(void) + +#define MAP_PAGE_SIZE ((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \ + / sizeof(swp_entry_t)) + +struct swap_map_page { + swp_entry_t entries[MAP_PAGE_SIZE]; + swp_entry_t next_swap; + struct swap_map_page *next; +}; + +static inline void free_swap_map(struct swap_map_page *swap_map) { - swp_entry_t entry; - struct pbe *p; + struct swap_map_page *swp; - for_each_pbe (p, pagedir_nosave) { - entry = p->swap_address; - if (entry.val) - swap_free(entry); - else - break; + while (swap_map) { + swp = swap_map->next; + free_page((unsigned long)swap_map); + swap_map = swp; + } +} + +static struct swap_map_page *alloc_swap_map(unsigned int nr_pages) +{ + struct swap_map_page *swap_map, *swp; + unsigned n = 0; + + if (!nr_pages) + return NULL; + + pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages); + swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); + swp = swap_map; + for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) { + swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); + swp = swp->next; + if (!swp) { + free_swap_map(swap_map); + return NULL; + } } + return swap_map; } /** - * data_write - Write saved image to swap. - * - * Walk the list of pages in the image and sync each one to swap. + * reverse_swap_map - reverse the order of pages in the swap map + * @swap_map */ -static int data_write(void) + +static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map) { - int error = 0, i = 0; - unsigned int mod = nr_copy_pages / 100; - struct pbe *p; + struct swap_map_page *prev, *next; + + prev = NULL; + while (swap_map) { + next = swap_map->next; + swap_map->next = prev; + prev = swap_map; + swap_map = next; + } + return prev; +} - if (!mod) - mod = 1; +/** + * free_swap_map_entries - free the swap entries allocated to store + * the swap map @swap_map (this is only called in case of an error) + */ +static inline void free_swap_map_entries(struct swap_map_page *swap_map) +{ + while (swap_map) { + if (swap_map->next_swap.val) + swap_free(swap_map->next_swap); + swap_map = swap_map->next; + } +} - printk( "Writing data to swap (%d pages)... ", nr_copy_pages ); - for_each_pbe (p, pagedir_nosave) { - if (!(i%mod)) - printk( "\b\b\b\b%3d%%", i / mod ); - if ((error = write_page(p->address, &p->swap_address))) +/** + * save_swap_map - save the swap map used for tracing the data pages + * stored in the swap + */ + +static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start) +{ + swp_entry_t entry = (swp_entry_t){0}; + int error; + + while (swap_map) { + swap_map->next_swap = entry; + if ((error = write_page((unsigned long)swap_map, &entry))) return error; - i++; + swap_map = swap_map->next; } - printk("\b\b\b\bdone\n"); + *start = entry; + return 0; +} + +/** + * free_image_entries - free the swap entries allocated to store + * the image data pages (this is only called in case of an error) + */ + +static inline void free_image_entries(struct swap_map_page *swp) +{ + unsigned k; + + while (swp) { + for (k = 0; k < MAP_PAGE_SIZE; k++) + if (swp->entries[k].val) + swap_free(swp->entries[k]); + swp = swp->next; + } +} + +/** + * The swap_map_handle structure is used for handling the swap map in + * a file-alike way + */ + +struct swap_map_handle { + struct swap_map_page *cur; + unsigned int k; +}; + +static inline void init_swap_map_handle(struct swap_map_handle *handle, + struct swap_map_page *map) +{ + handle->cur = map; + handle->k = 0; +} + +static inline int swap_map_write_page(struct swap_map_handle *handle, + unsigned long addr) +{ + int error; + + error = write_page(addr, handle->cur->entries + handle->k); + if (error) + return error; + if (++handle->k >= MAP_PAGE_SIZE) { + handle->cur = handle->cur->next; + handle->k = 0; + } + return 0; +} + +/** + * save_image_data - save the data pages pointed to by the PBEs + * from the list @pblist using the swap map handle @handle + * (assume there are @nr_pages data pages to save) + */ + +static int save_image_data(struct pbe *pblist, + struct swap_map_handle *handle, + unsigned int nr_pages) +{ + unsigned int m; + struct pbe *p; + int error = 0; + + printk("Saving image data pages (%u pages) ... ", nr_pages); + m = nr_pages / 100; + if (!m) + m = 1; + nr_pages = 0; + for_each_pbe (p, pblist) { + error = swap_map_write_page(handle, p->address); + if (error) + break; + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; + } + if (!error) + printk("\b\b\b\bdone\n"); return error; } @@ -295,19 +443,20 @@ static void dump_info(void) pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname); pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus); pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages); - pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages); + pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages); } -static void init_header(void) +static void init_header(unsigned int nr_pages) { memset(&swsusp_info, 0, sizeof(swsusp_info)); swsusp_info.version_code = LINUX_VERSION_CODE; swsusp_info.num_physpages = num_physpages; memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname)); - swsusp_info.suspend_pagedir = pagedir_nosave; swsusp_info.cpus = num_online_cpus(); - swsusp_info.image_pages = nr_copy_pages; + swsusp_info.image_pages = nr_pages; + swsusp_info.pages = nr_pages + + ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT); } static int close_swap(void) @@ -326,39 +475,53 @@ static int close_swap(void) } /** - * free_pagedir_entries - Free pages used by the page directory. - * - * This is used during suspend for error recovery. + * pack_orig_addresses - the .orig_address fields of the PBEs from the + * list starting at @pbe are stored in the array @buf[] (1 page) */ -static void free_pagedir_entries(void) +static inline struct pbe *pack_orig_addresses(unsigned long *buf, + struct pbe *pbe) { - int i; + int j; - for (i = 0; i < swsusp_info.pagedir_pages; i++) - swap_free(swsusp_info.pagedir[i]); + for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { + buf[j] = pbe->orig_address; + pbe = pbe->next; + } + if (!pbe) + for (; j < PAGE_SIZE / sizeof(long); j++) + buf[j] = 0; + return pbe; } - /** - * write_pagedir - Write the array of pages holding the page directory. - * @last: Last swap entry we write (needed for header). + * save_image_metadata - save the .orig_address fields of the PBEs + * from the list @pblist using the swap map handle @handle */ -static int write_pagedir(void) +static int save_image_metadata(struct pbe *pblist, + struct swap_map_handle *handle) { - int error = 0; + unsigned long *buf; unsigned int n = 0; - struct pbe *pbe; + struct pbe *p; + int error = 0; - printk( "Writing pagedir..."); - for_each_pb_page (pbe, pagedir_nosave) { - if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++]))) - return error; + printk("Saving image metadata ... "); + buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC); + if (!buf) + return -ENOMEM; + p = pblist; + while (p) { + p = pack_orig_addresses(buf, p); + error = swap_map_write_page(handle, (unsigned long)buf); + if (error) + break; + n++; } - - swsusp_info.pagedir_pages = n; - printk("done (%u pages)\n", n); + free_page((unsigned long)buf); + if (!error) + printk("done (%u pages saved)\n", n); return error; } @@ -384,33 +547,48 @@ static int enough_swap(unsigned int nr_pages) /** * write_suspend_image - Write entire image and metadata. - * */ -static int write_suspend_image(void) +static int write_suspend_image(struct pbe *pblist, unsigned int nr_pages) { + struct swap_map_page *swap_map; + struct swap_map_handle handle; int error; - if (!enough_swap(nr_copy_pages)) { + if (!enough_swap(nr_pages)) { printk(KERN_ERR "swsusp: Not enough free swap\n"); return -ENOSPC; } - init_header(); - if ((error = data_write())) - goto FreeData; + init_header(nr_pages); + swap_map = alloc_swap_map(swsusp_info.pages); + if (!swap_map) + return -ENOMEM; + init_swap_map_handle(&handle, swap_map); - if ((error = write_pagedir())) - goto FreePagedir; + error = save_image_metadata(pblist, &handle); + if (!error) + error = save_image_data(pblist, &handle, nr_pages); + if (error) + goto Free_image_entries; - if ((error = close_swap())) - goto FreePagedir; - Done: + swap_map = reverse_swap_map(swap_map); + error = save_swap_map(swap_map, &swsusp_info.start); + if (error) + goto Free_map_entries; + + error = close_swap(); + if (error) + goto Free_map_entries; + +Free_swap_map: + free_swap_map(swap_map); return error; - FreePagedir: - free_pagedir_entries(); - FreeData: - data_free(); - goto Done; + +Free_map_entries: + free_swap_map_entries(swap_map); +Free_image_entries: + free_image_entries(swap_map); + goto Free_swap_map; } /* It is important _NOT_ to umount filesystems at this point. We want @@ -418,7 +596,7 @@ static int write_suspend_image(void) * filesystem clean: it is not. (And it does not matter, if we resume * correctly, we'll mark system clean, anyway.) */ -int swsusp_write(void) +int swsusp_write(struct pbe *pblist, unsigned int nr_pages) { int error; @@ -427,14 +605,12 @@ int swsusp_write(void) return error; } lock_swapdevices(); - error = write_suspend_image(); + error = write_suspend_image(pblist, nr_pages); /* This will unlock ignored swap devices since writing is finished */ lock_swapdevices(); return error; } - - int swsusp_suspend(void) { int error; @@ -531,7 +707,6 @@ static void copy_page_backup_list(struct pbe *dst, struct pbe *src) /* We assume both lists contain the same number of elements */ while (src) { dst->orig_address = src->orig_address; - dst->swap_address = src->swap_address; dst = dst->next; src = src->next; } @@ -611,6 +786,61 @@ static int bio_write_page(pgoff_t page_off, void *page) return submit(WRITE, page_off, page); } +/** + * The following functions allow us to read data using a swap map + * in a file-alike way + */ + +static inline void release_swap_map_reader(struct swap_map_handle *handle) +{ + if (handle->cur) + free_page((unsigned long)handle->cur); + handle->cur = NULL; +} + +static inline int get_swap_map_reader(struct swap_map_handle *handle, + swp_entry_t start) +{ + int error; + + if (!swp_offset(start)) + return -EINVAL; + handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); + if (!handle->cur) + return -ENOMEM; + error = bio_read_page(swp_offset(start), handle->cur); + if (error) { + release_swap_map_reader(handle); + return error; + } + handle->k = 0; + return 0; +} + +static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf) +{ + unsigned long offset; + int error; + + if (!handle->cur) + return -EINVAL; + offset = swp_offset(handle->cur->entries[handle->k]); + if (!offset) + return -EINVAL; + error = bio_read_page(offset, buf); + if (error) + return error; + if (++handle->k >= MAP_PAGE_SIZE) { + handle->k = 0; + offset = swp_offset(handle->cur->next_swap); + if (!offset) + release_swap_map_reader(handle); + else + error = bio_read_page(offset, handle->cur); + } + return error; +} + /* * Sanity check if this image makes sense with this kernel/swap context * I really don't think that it's foolproof but more than nothing.. @@ -639,7 +869,6 @@ static const char *sanity_check(void) return NULL; } - static int check_header(void) { const char *reason = NULL; @@ -653,7 +882,6 @@ static int check_header(void) printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); return -EPERM; } - nr_copy_pages = swsusp_info.image_pages; return error; } @@ -680,75 +908,88 @@ static int check_sig(void) } /** - * data_read - Read image pages from swap. - * - * You do not need to check for overlaps, check_pagedir() - * already did that. + * load_image_data - load the image data using the swap map handle + * @handle and store them using the page backup list @pblist + * (assume there are @nr_pages pages to load) */ -static int data_read(struct pbe *pblist) +static int load_image_data(struct pbe *pblist, + struct swap_map_handle *handle, + unsigned int nr_pages) { + int error; + unsigned int m; struct pbe *p; - int error = 0; - int i = 0; - int mod = swsusp_info.image_pages / 100; - - if (!mod) - mod = 1; - - printk("swsusp: Reading image data (%lu pages): ", - swsusp_info.image_pages); - - for_each_pbe (p, pblist) { - if (!(i % mod)) - printk("\b\b\b\b%3d%%", i / mod); - if ((error = bio_read_page(swp_offset(p->swap_address), - (void *)p->address))) - return error; - - i++; + if (!pblist) + return -EINVAL; + printk("Loading image data pages (%u pages) ... ", nr_pages); + m = nr_pages / 100; + if (!m) + m = 1; + nr_pages = 0; + p = pblist; + while (p) { + error = swap_map_read_page(handle, (void *)p->address); + if (error) + break; + p = p->next; + if (!(nr_pages % m)) + printk("\b\b\b\b%3d%%", nr_pages / m); + nr_pages++; } - printk("\b\b\b\bdone\n"); + if (!error) + printk("\b\b\b\bdone\n"); return error; } /** - * read_pagedir - Read page backup list pages from swap + * unpack_orig_addresses - copy the elements of @buf[] (1 page) to + * the PBEs in the list starting at @pbe */ -static int read_pagedir(struct pbe *pblist) +static inline struct pbe *unpack_orig_addresses(unsigned long *buf, + struct pbe *pbe) { - struct pbe *pbpage, *p; - unsigned int i = 0; - int error; + int j; - if (!pblist) - return -EFAULT; + for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) { + pbe->orig_address = buf[j]; + pbe = pbe->next; + } + return pbe; +} - printk("swsusp: Reading pagedir (%lu pages)\n", - swsusp_info.pagedir_pages); +/** + * load_image_metadata - load the image metadata using the swap map + * handle @handle and put them into the PBEs in the list @pblist + */ - for_each_pb_page (pbpage, pblist) { - unsigned long offset = swp_offset(swsusp_info.pagedir[i++]); +static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle) +{ + struct pbe *p; + unsigned long *buf; + unsigned int n = 0; + int error = 0; - error = -EFAULT; - if (offset) { - p = (pbpage + PB_PAGE_SKIP)->next; - error = bio_read_page(offset, (void *)pbpage); - (pbpage + PB_PAGE_SKIP)->next = p; - } + printk("Loading image metadata ... "); + buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC); + if (!buf) + return -ENOMEM; + p = pblist; + while (p) { + error = swap_map_read_page(handle, buf); if (error) break; + p = unpack_orig_addresses(buf, p); + n++; } - + free_page((unsigned long)buf); if (!error) - BUG_ON(i != swsusp_info.pagedir_pages); - + printk("done (%u pages loaded)\n", n); return error; } - static int check_suspend_image(void) { int error = 0; @@ -762,34 +1003,39 @@ static int check_suspend_image(void) return 0; } -static int read_suspend_image(void) +static int read_suspend_image(struct pbe **pblist_ptr) { int error = 0; - struct pbe *p; + struct pbe *p, *pblist; + struct swap_map_handle handle; + unsigned int nr_pages = swsusp_info.image_pages; - if (!(p = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 0))) + p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0); + if (!p) return -ENOMEM; - - if ((error = read_pagedir(p))) + error = get_swap_map_reader(&handle, swsusp_info.start); + if (error) + /* The PBE list at p will be released by swsusp_free() */ return error; - create_pbe_list(p, nr_copy_pages); - mark_unsafe_pages(p); - pagedir_nosave = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); - if (pagedir_nosave) { - create_pbe_list(pagedir_nosave, nr_copy_pages); - copy_page_backup_list(pagedir_nosave, p); + error = load_image_metadata(p, &handle); + if (!error) { + mark_unsafe_pages(p); + pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1); + if (pblist) + copy_page_backup_list(pblist, p); + free_pagedir(p); + if (!pblist) + error = -ENOMEM; + + /* Allocate memory for the image and read the data from swap */ + if (!error) + error = alloc_data_pages(pblist, GFP_ATOMIC, 1); + if (!error) + error = load_image_data(pblist, &handle, nr_pages); + if (!error) + *pblist_ptr = pblist; } - free_pagedir(p); - if (!pagedir_nosave) - return -ENOMEM; - - /* Allocate memory for the image and read the data from swap */ - - error = alloc_data_pages(pagedir_nosave, GFP_ATOMIC, 1); - - if (!error) - error = data_read(pagedir_nosave); - + release_swap_map_reader(&handle); return error; } @@ -821,7 +1067,7 @@ int swsusp_check(void) * swsusp_read - Read saved image from swap. */ -int swsusp_read(void) +int swsusp_read(struct pbe **pblist_ptr) { int error; @@ -830,7 +1076,7 @@ int swsusp_read(void) return PTR_ERR(resume_bdev); } - error = read_suspend_image(); + error = read_suspend_image(pblist_ptr); blkdev_put(resume_bdev); if (!error) -- cgit v1.2.3-70-g09d2 From 72a97e08394a3b2e75481ff680ec2a0591e3cba4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:13:46 -0800 Subject: [PATCH] swsusp: improve freeing of memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch makes swsusp free only as much memory as needed to complete the suspend and not as much as possible.  In the most of cases this should speed up the suspend and make the system much more responsive after resume, especially if a GUI (eg. X Windows) is used. If needed, the old behavior (ie to free as much memory as possible during suspend) can be restored by unsetting FAST_FREE in power.h Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 2 +- kernel/power/disk.c | 30 +++-------------------- kernel/power/power.h | 14 ++++++++--- kernel/power/snapshot.c | 65 +++++++++++++++++++++++++++++++++++++++++++++---- kernel/power/swsusp.c | 52 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 126 insertions(+), 37 deletions(-) (limited to 'kernel/power/swsusp.c') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 33bbaea23aa..5dc94e777fa 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -73,6 +73,6 @@ unsigned long get_safe_page(gfp_t gfp_mask); * XXX: We try to keep some more pages free so that I/O operations succeed * without paging. Might this be more? */ -#define PAGES_FOR_IO 512 +#define PAGES_FOR_IO 1024 #endif /* _LINUX_SWSUSP_H */ diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 76a5131b0e8..9e51cdf7b78 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -24,6 +24,7 @@ extern suspend_disk_method_t pm_disk_mode; +extern int swsusp_shrink_memory(void); extern int swsusp_suspend(void); extern int swsusp_write(struct pbe *pblist, unsigned int nr_pages); extern int swsusp_check(void); @@ -73,31 +74,6 @@ static void power_down(suspend_disk_method_t mode) static int in_suspend __nosavedata = 0; -/** - * free_some_memory - Try to free as much memory as possible - * - * ... but do not OOM-kill anyone - * - * Notice: all userland should be stopped at this point, or - * livelock is possible. - */ - -static void free_some_memory(void) -{ - unsigned int i = 0; - unsigned int tmp; - unsigned long pages = 0; - char *p = "-\\|/"; - - printk("Freeing memory... "); - while ((tmp = shrink_all_memory(10000))) { - pages += tmp; - printk("\b%c", p[i++ % 4]); - } - printk("\bdone (%li pages freed)\n", pages); -} - - static inline void platform_finish(void) { if (pm_disk_mode == PM_DISK_PLATFORM) { @@ -127,8 +103,8 @@ static int prepare_processes(void) } /* Free memory before shutting down devices. */ - free_some_memory(); - return 0; + if (!(error = swsusp_shrink_memory())) + return 0; thaw: thaw_processes(); enable_nonboot_cpus(); diff --git a/kernel/power/power.h b/kernel/power/power.h index 977877c6dcf..acdc83b3d89 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -49,18 +49,26 @@ extern void thaw_processes(void); extern int pm_prepare_console(void); extern void pm_restore_console(void); - /* References to section boundaries */ extern const void __nosave_begin, __nosave_end; extern unsigned int nr_copy_pages; -extern suspend_pagedir_t *pagedir_nosave; -extern suspend_pagedir_t *pagedir_save; +extern struct pbe *pagedir_nosave; + +/* + * This compilation switch determines the way in which memory will be freed + * during suspend. If defined, only as much memory will be freed as needed + * to complete the suspend, which will make it go faster. Otherwise, the + * largest possible amount of memory will be freed. + */ +#define FAST_FREE 1 extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); +extern unsigned int count_data_pages(void); extern void free_pagedir(struct pbe *pblist); +extern void release_eaten_pages(void); extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed); extern void swsusp_free(void); extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 152d56cdf01..e80d282dbf5 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -37,6 +37,31 @@ struct pbe *pagedir_nosave; unsigned int nr_copy_pages; #ifdef CONFIG_HIGHMEM +unsigned int count_highmem_pages(void) +{ + struct zone *zone; + unsigned long zone_pfn; + unsigned int n = 0; + + for_each_zone (zone) + if (is_highmem(zone)) { + mark_free_pages(zone); + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) { + struct page *page; + unsigned long pfn = zone_pfn + zone->zone_start_pfn; + if (!pfn_valid(pfn)) + continue; + page = pfn_to_page(pfn); + if (PageReserved(page)) + continue; + if (PageNosaveFree(page)) + continue; + n++; + } + } + return n; +} + struct highmem_page { char *data; struct page *page; @@ -152,17 +177,15 @@ static int saveable(struct zone *zone, unsigned long *zone_pfn) BUG_ON(PageReserved(page) && PageNosave(page)); if (PageNosave(page)) return 0; - if (PageReserved(page) && pfn_is_nosave(pfn)) { - pr_debug("[nosave pfn 0x%lx]", pfn); + if (PageReserved(page) && pfn_is_nosave(pfn)) return 0; - } if (PageNosaveFree(page)) return 0; return 1; } -static unsigned count_data_pages(void) +unsigned int count_data_pages(void) { struct zone *zone; unsigned long zone_pfn; @@ -266,6 +289,35 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) } } +/** + * On resume it is necessary to trace and eventually free the unsafe + * pages that have been allocated, because they are needed for I/O + * (on x86-64 we likely will "eat" these pages once again while + * creating the temporary page translation tables) + */ + +struct eaten_page { + struct eaten_page *next; + char padding[PAGE_SIZE - sizeof(void *)]; +}; + +static struct eaten_page *eaten_pages = NULL; + +void release_eaten_pages(void) +{ + struct eaten_page *p, *q; + + p = eaten_pages; + while (p) { + q = p->next; + /* We don't want swsusp_free() to free this page again */ + ClearPageNosave(virt_to_page(p)); + free_page((unsigned long)p); + p = q; + } + eaten_pages = NULL; +} + /** * @safe_needed - on resume, for storing the PBE list and the image, * we can only use memory pages that do not conflict with the pages @@ -284,9 +336,12 @@ static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) if (safe_needed) do { res = (void *)get_zeroed_page(gfp_mask); - if (res && PageNosaveFree(virt_to_page(res))) + if (res && PageNosaveFree(virt_to_page(res))) { /* This is for swsusp_free() */ SetPageNosave(virt_to_page(res)); + ((struct eaten_page *)res)->next = eaten_pages; + eaten_pages = res; + } } while (res && PageNosaveFree(virt_to_page(res))); else res = (void *)get_zeroed_page(gfp_mask); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index b09bd7c0998..f77f9397a36 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -70,11 +70,13 @@ #include "power.h" #ifdef CONFIG_HIGHMEM +unsigned int count_highmem_pages(void); int save_highmem(void); int restore_highmem(void); #else static int save_highmem(void) { return 0; } static int restore_highmem(void) { return 0; } +static unsigned int count_highmem_pages(void) { return 0; } #endif extern char resume_file[]; @@ -611,6 +613,52 @@ int swsusp_write(struct pbe *pblist, unsigned int nr_pages) return error; } +/** + * swsusp_shrink_memory - Try to free as much memory as needed + * + * ... but do not OOM-kill anyone + * + * Notice: all userland should be stopped before it is called, or + * livelock is possible. + */ + +#define SHRINK_BITE 10000 + +int swsusp_shrink_memory(void) +{ + long tmp; + struct zone *zone; + unsigned long pages = 0; + unsigned int i = 0; + char *p = "-\\|/"; + + printk("Shrinking memory... "); + do { +#ifdef FAST_FREE + tmp = 2 * count_highmem_pages(); + tmp += tmp / 50 + count_data_pages(); + tmp += (tmp + PBES_PER_PAGE - 1) / PBES_PER_PAGE + + PAGES_FOR_IO; + for_each_zone (zone) + if (!is_highmem(zone)) + tmp -= zone->free_pages; + if (tmp > 0) { + tmp = shrink_all_memory(SHRINK_BITE); + if (!tmp) + return -ENOMEM; + pages += tmp; + } +#else + tmp = shrink_all_memory(SHRINK_BITE); + pages += tmp; +#endif + printk("\b%c", p[i++%4]); + } while (tmp > 0); + printk("\bdone (%lu pages freed)\n", pages); + + return 0; +} + int swsusp_suspend(void) { int error; @@ -1030,8 +1078,10 @@ static int read_suspend_image(struct pbe **pblist_ptr) /* Allocate memory for the image and read the data from swap */ if (!error) error = alloc_data_pages(pblist, GFP_ATOMIC, 1); - if (!error) + if (!error) { + release_eaten_pages(); error = load_image_data(pblist, &handle, nr_pages); + } if (!error) *pblist_ptr = pblist; } -- cgit v1.2.3-70-g09d2 From b3a93a255ec33a04776ec50efb30b7a99168dda2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:15:22 -0800 Subject: [PATCH] swsusp: limit image size Limit the size of the suspend image to approx. 500 MB, which should improve the overall performance of swsusp on systems with more than 1 GB of RAM. It introduces the constant IMAGE_SIZE that can be set to the preferred size of the image (in MB) and modifies the memory-shrinking part of swsusp to take this constant into account (500 is the default value of IMAGE_SIZE). Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/power.h | 8 +++----- kernel/power/swsusp.c | 17 ++++++++--------- 2 files changed, 11 insertions(+), 14 deletions(-) (limited to 'kernel/power/swsusp.c') diff --git a/kernel/power/power.h b/kernel/power/power.h index e521e61e0d9..9b045990361 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -53,12 +53,10 @@ extern unsigned int nr_copy_pages; extern struct pbe *pagedir_nosave; /* - * This compilation switch determines the way in which memory will be freed - * during suspend. If defined, only as much memory will be freed as needed - * to complete the suspend, which will make it go faster. Otherwise, the - * largest possible amount of memory will be freed. + * Preferred image size in MB (set it to zero to get the smallest + * image possible) */ -#define FAST_FREE 1 +#define IMAGE_SIZE 500 extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index f77f9397a36..6d5ceaf4c36 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -626,7 +626,7 @@ int swsusp_write(struct pbe *pblist, unsigned int nr_pages) int swsusp_shrink_memory(void) { - long tmp; + long size, tmp; struct zone *zone; unsigned long pages = 0; unsigned int i = 0; @@ -634,11 +634,11 @@ int swsusp_shrink_memory(void) printk("Shrinking memory... "); do { -#ifdef FAST_FREE - tmp = 2 * count_highmem_pages(); - tmp += tmp / 50 + count_data_pages(); - tmp += (tmp + PBES_PER_PAGE - 1) / PBES_PER_PAGE + + size = 2 * count_highmem_pages(); + size += size / 50 + count_data_pages(); + size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE + PAGES_FOR_IO; + tmp = size; for_each_zone (zone) if (!is_highmem(zone)) tmp -= zone->free_pages; @@ -647,11 +647,10 @@ int swsusp_shrink_memory(void) if (!tmp) return -ENOMEM; pages += tmp; + } else if (size > (IMAGE_SIZE * 1024 * 1024) / PAGE_SIZE) { + tmp = shrink_all_memory(SHRINK_BITE); + pages += tmp; } -#else - tmp = shrink_all_memory(SHRINK_BITE); - pages += tmp; -#endif printk("\b%c", p[i++%4]); } while (tmp > 0); printk("\bdone (%lu pages freed)\n", pages); -- cgit v1.2.3-70-g09d2 From ca0aec0f7a94bf9f07fefa8bfd23282d4e8ceb8a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:15:56 -0800 Subject: [PATCH] swsusp: make image size limit tunable Make the suspend image size limit tunable via /sys/power/image_size. It is necessary for systems on which there is a limited amount of swap available for suspend. It can also be useful for optimizing performance of swsusp on systems with 1 GB of RAM or more. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/power/interface.txt | 11 +++++++++++ Documentation/power/swsusp.txt | 5 +++++ kernel/power/disk.c | 20 ++++++++++++++++++++ kernel/power/power.h | 7 ++----- kernel/power/swsusp.c | 10 +++++++++- 5 files changed, 47 insertions(+), 6 deletions(-) (limited to 'kernel/power/swsusp.c') diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index f5ebda5f427..bd4ffb5bd49 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -41,3 +41,14 @@ to. Writing to this file will accept one of It will only change to 'firmware' or 'platform' if the system supports it. +/sys/power/image_size controls the size of the image created by +the suspend-to-disk mechanism. It can be written a string +representing a non-negative integer that will be used as an upper +limit of the image size, in megabytes. The suspend-to-disk mechanism will +do its best to ensure the image size will not exceed that number. However, +if this turns out to be impossible, it will try to suspend anyway using the +smallest image possible. In particular, if "0" is written to this file, the +suspend image will be as small as possible. + +Reading from this file will display the current image size limit, which +is set to 500 MB by default. diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index b0d50840788..cd0fcd89a6f 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -27,6 +27,11 @@ echo shutdown > /sys/power/disk; echo disk > /sys/power/state echo platform > /sys/power/disk; echo disk > /sys/power/state +If you want to limit the suspend image size to N megabytes, do + +echo N > /sys/power/image_size + +before suspend (it is limited to 500 MB by default). Encrypted suspend image: ------------------------ diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 9e51cdf7b78..e24446f8d8c 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -365,9 +365,29 @@ out: power_attr(resume); +static ssize_t image_size_show(struct subsystem * subsys, char *buf) +{ + return sprintf(buf, "%u\n", image_size); +} + +static ssize_t image_size_store(struct subsystem * subsys, const char * buf, size_t n) +{ + unsigned int size; + + if (sscanf(buf, "%u", &size) == 1) { + image_size = size; + return n; + } + + return -EINVAL; +} + +power_attr(image_size); + static struct attribute * g[] = { &disk_attr.attr, &resume_attr.attr, + &image_size_attr.attr, NULL, }; diff --git a/kernel/power/power.h b/kernel/power/power.h index 9b045990361..273a5b1d70b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -52,11 +52,8 @@ extern const void __nosave_begin, __nosave_end; extern unsigned int nr_copy_pages; extern struct pbe *pagedir_nosave; -/* - * Preferred image size in MB (set it to zero to get the smallest - * image possible) - */ -#define IMAGE_SIZE 500 +/* Preferred image size in MB (default 500) */ +extern unsigned int image_size; extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 6d5ceaf4c36..d760a6a719f 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -69,6 +69,14 @@ #include "power.h" +/* + * Preferred image size in MB (tunable via /sys/power/image_size). + * When it is set to N, swsusp will do its best to ensure the image + * size will not exceed N MB, but if that is impossible, it will + * try to create the smallest image possible. + */ +unsigned int image_size = 500; + #ifdef CONFIG_HIGHMEM unsigned int count_highmem_pages(void); int save_highmem(void); @@ -647,7 +655,7 @@ int swsusp_shrink_memory(void) if (!tmp) return -ENOMEM; pages += tmp; - } else if (size > (IMAGE_SIZE * 1024 * 1024) / PAGE_SIZE) { + } else if (size > (image_size * 1024 * 1024) / PAGE_SIZE) { tmp = shrink_all_memory(SHRINK_BITE); pages += tmp; } -- cgit v1.2.3-70-g09d2 From 1adf6c8ea916bc4a2587a881ec7715fece63fb5e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:17:16 -0800 Subject: [PATCH] swsusp: improve handling of swap partitions This changes the handling of swap partitions by swsusp to avoid locking of the swap devices that are not used for suspend and, consequently, simplifies the code. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 128 ++++++++++++++------------------------------------ 1 file changed, 36 insertions(+), 92 deletions(-) (limited to 'kernel/power/swsusp.c') diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index d760a6a719f..0479c9be7d7 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -104,13 +104,7 @@ static struct swsusp_info swsusp_info; * Saving part... */ -/* We memorize in swapfile_used what swap devices are used for suspension */ -#define SWAPFILE_UNUSED 0 -#define SWAPFILE_SUSPEND 1 /* This is the suspending device */ -#define SWAPFILE_IGNORED 2 /* Those are other swap devices ignored for suspension */ - -static unsigned short swapfile_used[MAX_SWAPFILES]; -static unsigned short root_swap; +static unsigned short root_swap = 0xffff; static int mark_swapfiles(swp_entry_t prev) { @@ -146,7 +140,7 @@ static int mark_swapfiles(swp_entry_t prev) * devfs, since the resume code can only recognize the form /dev/hda4, * but the suspend code would see the long name.) */ -static int is_resume_device(const struct swap_info_struct *swap_info) +static inline int is_resume_device(const struct swap_info_struct *swap_info) { struct file *file = swap_info->swap_file; struct inode *inode = file->f_dentry->d_inode; @@ -156,55 +150,23 @@ static int is_resume_device(const struct swap_info_struct *swap_info) } static int swsusp_swap_check(void) /* This is called before saving image */ -{ - int i, len; - - len=strlen(resume_file); - root_swap = 0xFFFF; - - spin_lock(&swap_lock); - for (i=0; i (nr_pages + PAGES_FOR_IO + + pr_debug("swsusp: free swap pages: %u\n", free_swap); + return free_swap > (nr_pages + PAGES_FOR_IO + (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); } /** - * write_suspend_image - Write entire image and metadata. + * swsusp_write - Write entire image and metadata. + * + * It is important _NOT_ to umount filesystems at this point. We want + * them synced (in case something goes wrong) but we DO not want to mark + * filesystem clean: it is not. (And it does not matter, if we resume + * correctly, we'll mark system clean, anyway.) */ -static int write_suspend_image(struct pbe *pblist, unsigned int nr_pages) + +int swsusp_write(struct pbe *pblist, unsigned int nr_pages) { struct swap_map_page *swap_map; struct swap_map_handle handle; int error; + if ((error = swsusp_swap_check())) { + printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n"); + return error; + } if (!enough_swap(nr_pages)) { printk(KERN_ERR "swsusp: Not enough free swap\n"); return -ENOSPC; @@ -601,26 +565,6 @@ Free_image_entries: goto Free_swap_map; } -/* It is important _NOT_ to umount filesystems at this point. We want - * them synced (in case something goes wrong) but we DO not want to mark - * filesystem clean: it is not. (And it does not matter, if we resume - * correctly, we'll mark system clean, anyway.) - */ -int swsusp_write(struct pbe *pblist, unsigned int nr_pages) -{ - int error; - - if ((error = swsusp_swap_check())) { - printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); - return error; - } - lock_swapdevices(); - error = write_suspend_image(pblist, nr_pages); - /* This will unlock ignored swap devices since writing is finished */ - lock_swapdevices(); - return error; -} - /** * swsusp_shrink_memory - Try to free as much memory as needed * -- cgit v1.2.3-70-g09d2 From 277c6e2ad7369558dbd7ffbcc6dcbe16458bf723 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 6 Jan 2006 00:17:58 -0800 Subject: [PATCH] swsusp: save image header first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes the swsusp_info structure become the header of the image in the literal sense (ie. it is saved to the swap and read before any other image data with the help of the swsusp's swap map structure, so generally it is treated in the same way as the rest of the image). The main thing it does is to make swsusp_header contain the offset of the swap map used to track the image data pages rather than the offset of swsusp_info.  Simultaneously, swsusp_info becomes the first image page written to the swap. The other changes are generally consequences of the above with a few exceptions (there's some consolidation in the image reading part as a few functions turn into trivial wrappers around something else). Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/power.h | 1 - kernel/power/swsusp.c | 190 +++++++++++++++++--------------------------------- 2 files changed, 65 insertions(+), 126 deletions(-) (limited to 'kernel/power/swsusp.c') diff --git a/kernel/power/power.h b/kernel/power/power.h index 273a5b1d70b..7e8492fd142 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -16,7 +16,6 @@ struct swsusp_info { int cpus; unsigned long image_pages; unsigned long pages; - swp_entry_t start; } __attribute__((aligned(PAGE_SIZE))); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 0479c9be7d7..55a18d26abe 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -93,7 +93,7 @@ extern char resume_file[]; static struct swsusp_header { char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; - swp_entry_t swsusp_info; + swp_entry_t image; char orig_sig[10]; char sig[10]; } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; @@ -106,7 +106,7 @@ static struct swsusp_info swsusp_info; static unsigned short root_swap = 0xffff; -static int mark_swapfiles(swp_entry_t prev) +static int mark_swapfiles(swp_entry_t start) { int error; @@ -117,7 +117,7 @@ static int mark_swapfiles(swp_entry_t prev) !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); memcpy(swsusp_header.sig,SWSUSP_SIG, 10); - swsusp_header.swsusp_info = prev; + swsusp_header.image = start; error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), virt_to_page((unsigned long) @@ -423,22 +423,7 @@ static void init_header(unsigned int nr_pages) swsusp_info.cpus = num_online_cpus(); swsusp_info.image_pages = nr_pages; swsusp_info.pages = nr_pages + - ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT); -} - -static int close_swap(void) -{ - swp_entry_t entry; - int error; - - dump_info(); - error = write_page((unsigned long)&swsusp_info, &entry); - if (!error) { - printk( "S" ); - error = mark_swapfiles(entry); - printk( "|\n" ); - } - return error; + ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; } /** @@ -522,6 +507,7 @@ int swsusp_write(struct pbe *pblist, unsigned int nr_pages) { struct swap_map_page *swap_map; struct swap_map_handle handle; + swp_entry_t start; int error; if ((error = swsusp_swap_check())) { @@ -539,18 +525,23 @@ int swsusp_write(struct pbe *pblist, unsigned int nr_pages) return -ENOMEM; init_swap_map_handle(&handle, swap_map); - error = save_image_metadata(pblist, &handle); + error = swap_map_write_page(&handle, (unsigned long)&swsusp_info); + if (!error) + error = save_image_metadata(pblist, &handle); if (!error) error = save_image_data(pblist, &handle, nr_pages); if (error) goto Free_image_entries; swap_map = reverse_swap_map(swap_map); - error = save_swap_map(swap_map, &swsusp_info.start); + error = save_swap_map(swap_map, &start); if (error) goto Free_map_entries; - error = close_swap(); + dump_info(); + printk( "S" ); + error = mark_swapfiles(start); + printk( "|\n" ); if (error) goto Free_map_entries; @@ -840,70 +831,28 @@ static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf) return error; } -/* - * Sanity check if this image makes sense with this kernel/swap context - * I really don't think that it's foolproof but more than nothing.. - */ - -static const char *sanity_check(void) +static int check_header(void) { + char *reason = NULL; + dump_info(); if (swsusp_info.version_code != LINUX_VERSION_CODE) - return "kernel version"; + reason = "kernel version"; if (swsusp_info.num_physpages != num_physpages) - return "memory size"; + reason = "memory size"; if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname)) - return "system type"; + reason = "system type"; if (strcmp(swsusp_info.uts.release,system_utsname.release)) - return "kernel release"; + reason = "kernel release"; if (strcmp(swsusp_info.uts.version,system_utsname.version)) - return "version"; + reason = "version"; if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) - return "machine"; -#if 0 - /* We can't use number of online CPUs when we use hotplug to remove them ;-))) */ - if (swsusp_info.cpus != num_possible_cpus()) - return "number of cpus"; -#endif - return NULL; -} - -static int check_header(void) -{ - const char *reason = NULL; - int error; - - if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) - return error; - - /* Is this same machine? */ - if ((reason = sanity_check())) { - printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason); + reason = "machine"; + if (reason) { + printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason); return -EPERM; } - return error; -} - -static int check_sig(void) -{ - int error; - - memset(&swsusp_header, 0, sizeof(swsusp_header)); - if ((error = bio_read_page(0, &swsusp_header))) - return error; - if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { - memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); - - /* - * Reset swap signature now. - */ - error = bio_write_page(0, &swsusp_header); - } else { - return -EINVAL; - } - if (!error) - pr_debug("swsusp: Signature found, resuming\n"); - return error; + return 0; } /** @@ -989,33 +938,29 @@ static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handl return error; } -static int check_suspend_image(void) -{ - int error = 0; - - if ((error = check_sig())) - return error; - - if ((error = check_header())) - return error; - - return 0; -} - -static int read_suspend_image(struct pbe **pblist_ptr) +int swsusp_read(struct pbe **pblist_ptr) { - int error = 0; + int error; struct pbe *p, *pblist; struct swap_map_handle handle; - unsigned int nr_pages = swsusp_info.image_pages; + unsigned int nr_pages; + if (IS_ERR(resume_bdev)) { + pr_debug("swsusp: block device not initialised\n"); + return PTR_ERR(resume_bdev); + } + + error = get_swap_map_reader(&handle, swsusp_header.image); + if (!error) + error = swap_map_read_page(&handle, &swsusp_info); + if (!error) + error = check_header(); + if (error) + return error; + nr_pages = swsusp_info.image_pages; p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0); if (!p) return -ENOMEM; - error = get_swap_map_reader(&handle, swsusp_info.start); - if (error) - /* The PBE list at p will be released by swsusp_free() */ - return error; error = load_image_metadata(p, &handle); if (!error) { mark_unsafe_pages(p); @@ -1037,11 +982,18 @@ static int read_suspend_image(struct pbe **pblist_ptr) *pblist_ptr = pblist; } release_swap_map_reader(&handle); + + blkdev_put(resume_bdev); + + if (!error) + pr_debug("swsusp: Reading resume file was successful\n"); + else + pr_debug("swsusp: Error %d resuming\n", error); return error; } /** - * swsusp_check - Check for saved image in swap + * swsusp_check - Check for swsusp signature in the resume device */ int swsusp_check(void) @@ -1051,39 +1003,27 @@ int swsusp_check(void) resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); if (!IS_ERR(resume_bdev)) { set_blocksize(resume_bdev, PAGE_SIZE); - error = check_suspend_image(); + memset(&swsusp_header, 0, sizeof(swsusp_header)); + if ((error = bio_read_page(0, &swsusp_header))) + return error; + if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { + memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); + /* Reset swap signature now */ + error = bio_write_page(0, &swsusp_header); + } else { + return -EINVAL; + } if (error) - blkdev_put(resume_bdev); - } else + blkdev_put(resume_bdev); + else + pr_debug("swsusp: Signature found, resuming\n"); + } else { error = PTR_ERR(resume_bdev); - - if (!error) - pr_debug("swsusp: resume file found\n"); - else - pr_debug("swsusp: Error %d check for resume file\n", error); - return error; -} - -/** - * swsusp_read - Read saved image from swap. - */ - -int swsusp_read(struct pbe **pblist_ptr) -{ - int error; - - if (IS_ERR(resume_bdev)) { - pr_debug("swsusp: block device not initialised\n"); - return PTR_ERR(resume_bdev); } - error = read_suspend_image(pblist_ptr); - blkdev_put(resume_bdev); + if (error) + pr_debug("swsusp: Error %d check for resume file\n", error); - if (!error) - pr_debug("swsusp: Reading resume file was successful\n"); - else - pr_debug("swsusp: Error %d resuming\n", error); return error; } -- cgit v1.2.3-70-g09d2