From 7602bdf2fd14a40dd9b104e516fdc05e1bd17952 Mon Sep 17 00:00:00 2001 From: Ashwin Chaugule Date: Wed, 6 Dec 2006 20:31:57 -0800 Subject: [PATCH] new scheme to preempt swap token The new swap token patches replace the current token traversal algo. The old algo had a crude timeout parameter that was used to handover the token from one task to another. This algo, transfers the token to the tasks that are in need of the token. The urgency for the token is based on the number of times a task is required to swap-in pages. Accordingly, the priority of a task is incremented if it has been badly affected due to swap-outs. To ensure that the token doesnt bounce around rapidly, the token holders are given a priority boost. The priority of tasks is also decremented, if their rate of swap-in's keeps reducing. This way, the condition to check whether to pre-empt the swap token, is a matter of comparing two task's priority fields. [akpm@osdl.org: cleanups] Signed-off-by: Ashwin Chaugule Cc: Rik van Riel Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index eafe4a7b823..cad6a16260f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -344,9 +344,16 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; - /* Token based thrashing protection. */ - unsigned long swap_token_time; - char recent_pagein; + /* Swap token stuff */ + /* + * Last value of global fault stamp as seen by this process. + * In other words, this value gives an indication of how long + * it has been since this task got the token. + * Look at mm/thrash.c + */ + unsigned int faultstamp; + unsigned int token_priority; + unsigned int last_interval; /* coredumping support */ int core_waiters; -- cgit v1.2.3-70-g09d2 From 36de6437866bbb1d37e2312ff4f95ee4ed6d2b61 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 6 Dec 2006 20:33:42 -0800 Subject: [PATCH] Save some bytes in struct mm_struct Before: [acme@newtoy net-2.6.20]$ pahole --cacheline 32 kernel/sched.o mm_struct /* include2/asm/processor.h:542 */ struct mm_struct { struct vm_area_struct * mmap; /* 0 4 */ struct rb_root mm_rb; /* 4 4 */ struct vm_area_struct * mmap_cache; /* 8 4 */ long unsigned int (*get_unmapped_area)(); /* 12 4 */ void (*unmap_area)(); /* 16 4 */ long unsigned int mmap_base; /* 20 4 */ long unsigned int task_size; /* 24 4 */ long unsigned int cached_hole_size; /* 28 4 */ /* ---------- cacheline 1 boundary ---------- */ long unsigned int free_area_cache; /* 32 4 */ pgd_t * pgd; /* 36 4 */ atomic_t mm_users; /* 40 4 */ atomic_t mm_count; /* 44 4 */ int map_count; /* 48 4 */ struct rw_semaphore mmap_sem; /* 52 64 */ spinlock_t page_table_lock; /* 116 40 */ struct list_head mmlist; /* 156 8 */ mm_counter_t _file_rss; /* 164 4 */ mm_counter_t _anon_rss; /* 168 4 */ long unsigned int hiwater_rss; /* 172 4 */ long unsigned int hiwater_vm; /* 176 4 */ long unsigned int total_vm; /* 180 4 */ long unsigned int locked_vm; /* 184 4 */ long unsigned int shared_vm; /* 188 4 */ /* ---------- cacheline 6 boundary ---------- */ long unsigned int exec_vm; /* 192 4 */ long unsigned int stack_vm; /* 196 4 */ long unsigned int reserved_vm; /* 200 4 */ long unsigned int def_flags; /* 204 4 */ long unsigned int nr_ptes; /* 208 4 */ long unsigned int start_code; /* 212 4 */ long unsigned int end_code; /* 216 4 */ long unsigned int start_data; /* 220 4 */ /* ---------- cacheline 7 boundary ---------- */ long unsigned int end_data; /* 224 4 */ long unsigned int start_brk; /* 228 4 */ long unsigned int brk; /* 232 4 */ long unsigned int start_stack; /* 236 4 */ long unsigned int arg_start; /* 240 4 */ long unsigned int arg_end; /* 244 4 */ long unsigned int env_start; /* 248 4 */ long unsigned int env_end; /* 252 4 */ /* ---------- cacheline 8 boundary ---------- */ long unsigned int saved_auxv[44]; /* 256 176 */ unsigned int dumpable:2; /* 432 4 */ cpumask_t cpu_vm_mask; /* 436 4 */ mm_context_t context; /* 440 68 */ long unsigned int swap_token_time; /* 508 4 */ /* ---------- cacheline 16 boundary ---------- */ char recent_pagein; /* 512 1 */ /* XXX 3 bytes hole, try to pack */ int core_waiters; /* 516 4 */ struct completion * core_startup_done; /* 520 4 */ struct completion core_done; /* 524 52 */ rwlock_t ioctx_list_lock; /* 576 36 */ struct kioctx * ioctx_list; /* 612 4 */ }; /* size: 616, sum members: 613, holes: 1, sum holes: 3, cachelines: 20, last cacheline: 8 bytes */ After: [acme@newtoy net-2.6.20]$ pahole --cacheline 32 kernel/sched.o mm_struct /* include2/asm/processor.h:542 */ struct mm_struct { struct vm_area_struct * mmap; /* 0 4 */ struct rb_root mm_rb; /* 4 4 */ struct vm_area_struct * mmap_cache; /* 8 4 */ long unsigned int (*get_unmapped_area)(); /* 12 4 */ void (*unmap_area)(); /* 16 4 */ long unsigned int mmap_base; /* 20 4 */ long unsigned int task_size; /* 24 4 */ long unsigned int cached_hole_size; /* 28 4 */ /* ---------- cacheline 1 boundary ---------- */ long unsigned int free_area_cache; /* 32 4 */ pgd_t * pgd; /* 36 4 */ atomic_t mm_users; /* 40 4 */ atomic_t mm_count; /* 44 4 */ int map_count; /* 48 4 */ struct rw_semaphore mmap_sem; /* 52 64 */ spinlock_t page_table_lock; /* 116 40 */ struct list_head mmlist; /* 156 8 */ mm_counter_t _file_rss; /* 164 4 */ mm_counter_t _anon_rss; /* 168 4 */ long unsigned int hiwater_rss; /* 172 4 */ long unsigned int hiwater_vm; /* 176 4 */ long unsigned int total_vm; /* 180 4 */ long unsigned int locked_vm; /* 184 4 */ long unsigned int shared_vm; /* 188 4 */ /* ---------- cacheline 6 boundary ---------- */ long unsigned int exec_vm; /* 192 4 */ long unsigned int stack_vm; /* 196 4 */ long unsigned int reserved_vm; /* 200 4 */ long unsigned int def_flags; /* 204 4 */ long unsigned int nr_ptes; /* 208 4 */ long unsigned int start_code; /* 212 4 */ long unsigned int end_code; /* 216 4 */ long unsigned int start_data; /* 220 4 */ /* ---------- cacheline 7 boundary ---------- */ long unsigned int end_data; /* 224 4 */ long unsigned int start_brk; /* 228 4 */ long unsigned int brk; /* 232 4 */ long unsigned int start_stack; /* 236 4 */ long unsigned int arg_start; /* 240 4 */ long unsigned int arg_end; /* 244 4 */ long unsigned int env_start; /* 248 4 */ long unsigned int env_end; /* 252 4 */ /* ---------- cacheline 8 boundary ---------- */ long unsigned int saved_auxv[44]; /* 256 176 */ cpumask_t cpu_vm_mask; /* 432 4 */ mm_context_t context; /* 436 68 */ long unsigned int swap_token_time; /* 504 4 */ char recent_pagein; /* 508 1 */ unsigned char dumpable:2; /* 509 1 */ /* XXX 2 bytes hole, try to pack */ int core_waiters; /* 512 4 */ struct completion * core_startup_done; /* 516 4 */ struct completion core_done; /* 520 52 */ rwlock_t ioctx_list_lock; /* 572 36 */ struct kioctx * ioctx_list; /* 608 4 */ }; /* size: 612, sum members: 610, holes: 1, sum holes: 2, cachelines: 20, last cacheline: 4 bytes */ [acme@newtoy net-2.6.20]$ codiff -V /tmp/sched.o.before kernel/sched.o /pub/scm/linux/kernel/git/acme/net-2.6.20/kernel/sched.c: struct mm_struct | -4 dumpable:2; from: unsigned int /* 432(30) 4(2) */ to: unsigned char /* 509(6) 1(2) */ < SNIP other offset changes > 1 struct changed [acme@newtoy net-2.6.20]$ I'm not aware of any problem about using 2 byte wide bitfields where previously a 4 byte wide one was, holler if there is any, I wouldn't be surprised, bitfields are things from hell. For the curious, 432(30) means: at offset 432 from the struct start, at offset 30 in the bitfield (yeah, it comes backwards, hellish, huh?) ditto for 509(6), while 4(2) and 1(2) means "struct field size(bitfield size)". Now we have a 2 bytes hole and are using only 4 bytes of the last 32 bytes cacheline, any takers? :-) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index cad6a16260f..acfd2e15c5f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -338,7 +338,6 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ - unsigned dumpable:2; cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ @@ -355,6 +354,8 @@ struct mm_struct { unsigned int token_priority; unsigned int last_interval; + unsigned char dumpable:2; + /* coredumping support */ int core_waiters; struct completion *core_startup_done, core_done; -- cgit v1.2.3-70-g09d2 From 7dfb71030f7636a0d65200158113c37764552f93 Mon Sep 17 00:00:00 2001 From: Nigel Cunningham Date: Wed, 6 Dec 2006 20:34:23 -0800 Subject: [PATCH] Add include/linux/freezer.h and move definitions from sched.h Move process freezing functions from include/linux/sched.h to freezer.h, so that modifications to the freezer or the kernel configuration don't require recompiling just about everything. [akpm@osdl.org: fix ueagle driver] Signed-off-by: Nigel Cunningham Cc: "Rafael J. Wysocki" Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/signal.c | 1 + arch/avr32/kernel/signal.c | 2 +- arch/frv/kernel/signal.c | 2 +- arch/h8300/kernel/signal.c | 2 +- arch/i386/kernel/io_apic.c | 1 + arch/m32r/kernel/signal.c | 2 +- arch/powerpc/kernel/signal_32.c | 2 +- arch/sh/kernel/signal.c | 1 + arch/sh64/kernel/signal.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/char/hvc_console.c | 1 + drivers/edac/edac_mc.c | 1 + drivers/ieee1394/nodemgr.c | 1 + drivers/input/gameport/gameport.c | 1 + drivers/input/serio/serio.c | 1 + drivers/macintosh/therm_adt746x.c | 1 + drivers/macintosh/via-pmu.c | 2 +- drivers/macintosh/windfarm_core.c | 1 + drivers/md/md.c | 2 +- drivers/media/dvb/dvb-core/dvb_frontend.c | 2 +- drivers/media/video/msp3400-driver.c | 2 +- drivers/media/video/tvaudio.c | 1 + drivers/media/video/video-buf-dvb.c | 2 +- drivers/media/video/vivi.c | 1 + drivers/mfd/ucb1x00-ts.c | 2 +- drivers/net/irda/stir4200.c | 1 + drivers/net/wireless/airo.c | 1 + drivers/pcmcia/cs.c | 1 + drivers/pnp/pnpbios/core.c | 1 + drivers/usb/atm/ueagle-atm.c | 2 + drivers/usb/core/hub.c | 1 + drivers/usb/gadget/file_storage.c | 2 +- drivers/usb/storage/usb.c | 2 +- drivers/w1/w1.c | 1 + fs/afs/kafsasyncd.c | 1 + fs/afs/kafstimod.c | 1 + fs/cifs/cifsfs.c | 1 + fs/cifs/connect.c | 1 + fs/jbd/journal.c | 2 +- fs/jbd2/journal.c | 2 +- fs/jffs/intrep.c | 1 + fs/jffs2/background.c | 1 + fs/jfs/jfs_logmgr.c | 2 +- fs/jfs/jfs_txnmgr.c | 2 +- fs/lockd/clntproc.c | 1 + fs/xfs/linux-2.6/xfs_buf.c | 1 + fs/xfs/linux-2.6/xfs_super.c | 1 + include/linux/freezer.h | 84 +++++++++++++++++++++++++++++++ include/linux/sched.h | 81 ----------------------------- init/do_mounts_initrd.c | 1 + kernel/audit.c | 1 + kernel/power/disk.c | 1 + kernel/power/main.c | 1 + kernel/power/process.c | 1 + kernel/power/user.c | 1 + kernel/rtmutex-tester.c | 1 + kernel/sched.c | 2 +- kernel/signal.c | 1 + mm/pdflush.c | 1 + mm/vmscan.c | 1 + net/rxrpc/krxiod.c | 1 + net/rxrpc/krxsecd.c | 1 + net/rxrpc/krxtimod.c | 1 + net/sunrpc/svcsock.c | 1 + 64 files changed, 147 insertions(+), 101 deletions(-) create mode 100644 include/linux/freezer.h (limited to 'include/linux/sched.h') diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 48cf7fffddf..f38a60a03b8 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c index 33096651c24..0ec14854a20 100644 --- a/arch/avr32/kernel/signal.c +++ b/arch/avr32/kernel/signal.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index b8a5882b862..85baeae9666 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 7787f70a05b..02955604d76 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 3b7a63e0ed1..44c5a3206b2 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index b60cea4aeba..092ea86bb07 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 320353f0926..e4ebe1a6228 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #endif #include diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c index 50d7c4993be..bb1c480a59c 100644 --- a/arch/sh/kernel/signal.c +++ b/arch/sh/kernel/signal.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/sh64/kernel/signal.c b/arch/sh64/kernel/signal.c index 9e2ffc45c0e..1666d3efb52 100644 --- a/arch/sh64/kernel/signal.c +++ b/arch/sh64/kernel/signal.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index f2904f67af4..e45eaa26411 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 9902ffad3b1..cc2cd46bedc 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -38,6 +38,7 @@ #include #include #include +#include #include diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 75e9e38330f..1b4fc922180 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 8e7b83f8448..e829c9336b3 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "csr.h" diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index a0af97efe6a..79dfb4b25c9 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -23,6 +23,7 @@ #include #include /* HZ */ #include +#include /*#include */ diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index 211943f85cb..5f1d4032fd5 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -35,6 +35,7 @@ #include #include #include +#include MODULE_AUTHOR("Vojtech Pavlik "); MODULE_DESCRIPTION("Serio abstraction core"); diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index 13b953ae8eb..3d3bf1643e7 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index e63ea1c1f3c..c8558d4ed50 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index ab3faa702d5..e947af982f9 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -34,6 +34,7 @@ #include #include #include +#include #include diff --git a/drivers/md/md.c b/drivers/md/md.c index 8cbf9c9df1c..6c4345bde07 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -39,10 +39,10 @@ #include #include #include /* for invalidate_bdev */ -#include #include #include #include +#include #include diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index a2ab2eebfc6..e85972222ab 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c index cf43df3fe70..e1b56dc13c3 100644 --- a/drivers/media/video/msp3400-driver.c +++ b/drivers/media/video/msp3400-driver.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include "msp3400-driver.h" /* ---------------------------------------------------------------------- */ diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index fcaef4bf828..d506dfaa45a 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/video/video-buf-dvb.c b/drivers/media/video/video-buf-dvb.c index f53edf1923b..fcc5467e763 100644 --- a/drivers/media/video/video-buf-dvb.c +++ b/drivers/media/video/video-buf-dvb.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c index 3c8dc72dc8e..9986de5cb3d 100644 --- a/drivers/media/video/vivi.c +++ b/drivers/media/video/vivi.c @@ -36,6 +36,7 @@ #include #include #include +#include /* Wake up at about 30 fps */ #define WAKE_NUMERATOR 30 diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index 82938ad6ddb..ce1a4810821 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c index 3b4c4787593..c14a74634fd 100644 --- a/drivers/net/irda/stir4200.c +++ b/drivers/net/irda/stir4200.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index efcdaf1c5f7..44a22701da9 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "airo.h" diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index f9cd831a3f3..606a4674033 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 81a6c83d89a..81186f479a3 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index f2d196fa1e8..dae4ef1e8fe 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -64,6 +64,8 @@ #include #include #include +#include + #include #include "usbatm.h" diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 77c05be5241..2651c2e2a89 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index 8b975d15538..c98316ce838 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -250,7 +250,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index b401084b3d2..70644506651 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -49,7 +49,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index de3e9791f80..63c07243993 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -31,6 +31,7 @@ #include #include #include +#include #include diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c index f09a794f248..615df2407cb 100644 --- a/fs/afs/kafsasyncd.c +++ b/fs/afs/kafsasyncd.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "cell.h" #include "server.h" #include "volume.h" diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c index 65bc05ab818..694344e4d3c 100644 --- a/fs/afs/kafstimod.c +++ b/fs/afs/kafstimod.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "cell.h" #include "volume.h" #include "kafstimod.h" diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e6b5866e500..71bc87a37fc 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "cifsfs.h" #include "cifspdu.h" #define DECLARE_GLOBALS_HERE diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 71f77914ce9..2caca06b4ba 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include "cifspdu.h" diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index a8774bed20b..10fff944393 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 50356019ae3..44fc32bfd7f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index 4a543e11497..478a74e2e9d 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "intrep.h" #include "jffs_fm.h" diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index ff2a872e80e..6eb3daebd56 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "nodelist.h" diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index b89c9aba046..5065baa530b 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -67,7 +67,7 @@ #include #include /* for sync_blockdev() */ #include -#include +#include #include #include #include "jfs_incore.h" diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 81f6f04af19..d558e51b0df 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 3d84f600b63..50643b6a555 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index eef4a0ba11e..b971237c5a9 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -32,6 +32,7 @@ #include #include #include +#include STATIC kmem_zone_t *xfs_buf_zone; STATIC kmem_shaker_t xfs_buf_shake; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index de05abbbe7f..b93265b7c79 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -56,6 +56,7 @@ #include #include #include +#include STATIC struct quotactl_ops xfs_quotactl_operations; STATIC struct super_operations xfs_super_operations; diff --git a/include/linux/freezer.h b/include/linux/freezer.h new file mode 100644 index 00000000000..266373f7444 --- /dev/null +++ b/include/linux/freezer.h @@ -0,0 +1,84 @@ +/* Freezer declarations */ + +#ifdef CONFIG_PM +/* + * Check if a process has been frozen + */ +static inline int frozen(struct task_struct *p) +{ + return p->flags & PF_FROZEN; +} + +/* + * Check if there is a request to freeze a process + */ +static inline int freezing(struct task_struct *p) +{ + return p->flags & PF_FREEZE; +} + +/* + * Request that a process be frozen + * FIXME: SMP problem. We may not modify other process' flags! + */ +static inline void freeze(struct task_struct *p) +{ + p->flags |= PF_FREEZE; +} + +/* + * Sometimes we may need to cancel the previous 'freeze' request + */ +static inline void do_not_freeze(struct task_struct *p) +{ + p->flags &= ~PF_FREEZE; +} + +/* + * Wake up a frozen process + */ +static inline int thaw_process(struct task_struct *p) +{ + if (frozen(p)) { + p->flags &= ~PF_FROZEN; + wake_up_process(p); + return 1; + } + return 0; +} + +/* + * freezing is complete, mark process as frozen + */ +static inline void frozen_process(struct task_struct *p) +{ + p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN; +} + +extern void refrigerator(void); +extern int freeze_processes(void); +extern void thaw_processes(void); + +static inline int try_to_freeze(void) +{ + if (freezing(current)) { + refrigerator(); + return 1; + } else + return 0; +} +#else +static inline int frozen(struct task_struct *p) { return 0; } +static inline int freezing(struct task_struct *p) { return 0; } +static inline void freeze(struct task_struct *p) { BUG(); } +static inline int thaw_process(struct task_struct *p) { return 1; } +static inline void frozen_process(struct task_struct *p) { BUG(); } + +static inline void refrigerator(void) {} +static inline int freeze_processes(void) { BUG(); return 0; } +static inline void thaw_processes(void) {} + +static inline int try_to_freeze(void) { return 0; } + + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index acfd2e15c5f..837a012f573 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1618,87 +1618,6 @@ extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls); extern void normalize_rt_tasks(void); -#ifdef CONFIG_PM -/* - * Check if a process has been frozen - */ -static inline int frozen(struct task_struct *p) -{ - return p->flags & PF_FROZEN; -} - -/* - * Check if there is a request to freeze a process - */ -static inline int freezing(struct task_struct *p) -{ - return p->flags & PF_FREEZE; -} - -/* - * Request that a process be frozen - * FIXME: SMP problem. We may not modify other process' flags! - */ -static inline void freeze(struct task_struct *p) -{ - p->flags |= PF_FREEZE; -} - -/* - * Sometimes we may need to cancel the previous 'freeze' request - */ -static inline void do_not_freeze(struct task_struct *p) -{ - p->flags &= ~PF_FREEZE; -} - -/* - * Wake up a frozen process - */ -static inline int thaw_process(struct task_struct *p) -{ - if (frozen(p)) { - p->flags &= ~PF_FROZEN; - wake_up_process(p); - return 1; - } - return 0; -} - -/* - * freezing is complete, mark process as frozen - */ -static inline void frozen_process(struct task_struct *p) -{ - p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN; -} - -extern void refrigerator(void); -extern int freeze_processes(void); -extern void thaw_processes(void); - -static inline int try_to_freeze(void) -{ - if (freezing(current)) { - refrigerator(); - return 1; - } else - return 0; -} -#else -static inline int frozen(struct task_struct *p) { return 0; } -static inline int freezing(struct task_struct *p) { return 0; } -static inline void freeze(struct task_struct *p) { BUG(); } -static inline int thaw_process(struct task_struct *p) { return 1; } -static inline void frozen_process(struct task_struct *p) { BUG(); } - -static inline void refrigerator(void) {} -static inline int freeze_processes(void) { BUG(); return 0; } -static inline void thaw_processes(void) {} - -static inline int try_to_freeze(void) { return 0; } - -#endif /* CONFIG_PM */ #endif /* __KERNEL__ */ #endif diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 919a80cb322..2cfd7cb36e7 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "do_mounts.h" diff --git a/kernel/audit.c b/kernel/audit.c index 98106f6078b..d9b690ac684 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -57,6 +57,7 @@ #include #include #include +#include #include "audit.h" diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f5079231383..53b3b57c022 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "power.h" diff --git a/kernel/power/main.c b/kernel/power/main.c index 873228c71da..6096c71b182 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "power.h" diff --git a/kernel/power/process.c b/kernel/power/process.c index 72e72d2c61e..29be608e834 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * Timeout for stopping processes diff --git a/kernel/power/user.c b/kernel/power/user.c index a63b25c63b4..26c66941c00 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -22,6 +22,7 @@ #include #include #include +#include #include diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 6dcea9dd8c9..015fc633c96 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "rtmutex.h" diff --git a/kernel/sched.c b/kernel/sched.c index 3399701c680..12fdbef1d9b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index 8e19d278548..bc972d7e631 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/pdflush.c b/mm/pdflush.c index b02102feeb4..8ce0900dc95 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -21,6 +21,7 @@ #include // Prototypes pdflush_operation() #include #include +#include /* diff --git a/mm/vmscan.c b/mm/vmscan.c index 2a6a79f6813..f6616e81fac 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c index dada34a77b2..49effd92144 100644 --- a/net/rxrpc/krxiod.c +++ b/net/rxrpc/krxiod.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c index cea4eb5e249..3ab0f77409f 100644 --- a/net/rxrpc/krxsecd.c +++ b/net/rxrpc/krxsecd.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "internal.h" diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c index 3e7466900bd..9a9b6132dba 100644 --- a/net/rxrpc/krxtimod.c +++ b/net/rxrpc/krxtimod.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 64ca1f61dd9..1c68956824e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From e59e2ae2c29700117a54e85c106017c24837119f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 Dec 2006 20:35:59 -0800 Subject: [PATCH] SysRq-X: show blocked tasks Add SysRq-X support: show blocked (TASK_UNINTERRUPTIBLE) tasks only. Useful for debugging IO stalls. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/sysrq.c | 14 +++++++++++++- include/linux/sched.h | 11 ++++++++++- kernel/sched.c | 11 ++++++++--- 3 files changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux/sched.h') diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index c64f5bcff94..05810c8d20b 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -182,6 +182,18 @@ static struct sysrq_key_op sysrq_showstate_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; +static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty) +{ + show_state_filter(TASK_UNINTERRUPTIBLE); +} +static struct sysrq_key_op sysrq_showstate_blocked_op = { + .handler = sysrq_handle_showstate_blocked, + .help_msg = "showBlockedTasks", + .action_msg = "Show Blocked State", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + + static void sysrq_handle_showmem(int key, struct tty_struct *tty) { show_mem(); @@ -304,7 +316,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { /* May be assigned at init time by SMP VOYAGER */ NULL, /* v */ NULL, /* w */ - NULL, /* x */ + &sysrq_showstate_blocked_op, /* x */ NULL, /* y */ NULL /* z */ }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 837a012f573..0a90cefb0b0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -194,7 +194,16 @@ extern void init_idle(struct task_struct *idle, int cpu); extern cpumask_t nohz_cpu_mask; -extern void show_state(void); +/* + * Only dump TASK_* tasks. (-1 for all tasks) + */ +extern void show_state_filter(unsigned long state_filter); + +static inline void show_state(void) +{ + show_state_filter(-1); +} + extern void show_regs(struct pt_regs *); /* diff --git a/kernel/sched.c b/kernel/sched.c index 12fdbef1d9b..1848e280504 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4804,7 +4804,7 @@ static void show_task(struct task_struct *p) show_stack(p, NULL); } -void show_state(void) +void show_state_filter(unsigned long state_filter) { struct task_struct *g, *p; @@ -4824,11 +4824,16 @@ void show_state(void) * console might take alot of time: */ touch_nmi_watchdog(); - show_task(p); + if (p->state & state_filter) + show_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); - debug_show_all_locks(); + /* + * Only show locks if all tasks are dumped: + */ + if (state_filter == -1) + debug_show_all_locks(); } /** -- cgit v1.2.3-70-g09d2 From d3228a887cae75ef2b8b1211c31c539bef5a5698 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:38:22 -0800 Subject: [PATCH] make kernel/signal.c:kill_proc_info() static Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/signal.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0a90cefb0b0..3a767242e72 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1305,7 +1305,6 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp); extern int kill_pg_info(int, struct siginfo *, pid_t); -extern int kill_proc_info(int, struct siginfo *, pid_t); extern void do_notify_parent(struct task_struct *, int); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); diff --git a/kernel/signal.c b/kernel/signal.c index bc972d7e631..ec81defde33 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1134,8 +1134,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) return error; } -int -kill_proc_info(int sig, struct siginfo *info, pid_t pid) +static int kill_proc_info(int sig, struct siginfo *info, pid_t pid) { int error; rcu_read_lock(); -- cgit v1.2.3-70-g09d2 From 15ad7cdcfd76450d4beebc789ec646664238184d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 6 Dec 2006 20:40:36 -0800 Subject: [PATCH] struct seq_operations and struct file_operations constification - move some file_operations structs into the .rodata section - move static strings from policy_types[] array into the .rodata section - fix generic seq_operations usages, so that those structs may be defined as "const" as well [akpm@osdl.org: couple of fixes] Signed-off-by: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/seq_file.c | 4 ++-- include/linux/cpuset.h | 2 +- include/linux/mmzone.h | 2 +- include/linux/relay.h | 2 +- include/linux/sched.h | 2 +- include/linux/seq_file.h | 4 ++-- kernel/configs.c | 2 +- kernel/cpuset.c | 4 ++-- kernel/dma.c | 2 +- kernel/futex.c | 2 +- kernel/kallsyms.c | 4 ++-- kernel/lockdep_proc.c | 6 +++--- kernel/module.c | 2 +- kernel/power/user.c | 2 +- kernel/profile.c | 2 +- kernel/relay.c | 2 +- kernel/resource.c | 6 +++--- kernel/sched.c | 2 +- kernel/sysctl.c | 2 +- mm/mempolicy.c | 4 ++-- mm/page_alloc.c | 2 +- mm/shmem.c | 4 ++-- mm/slab.c | 4 ++-- mm/swapfile.c | 4 ++-- mm/vmstat.c | 8 ++++---- 25 files changed, 40 insertions(+), 40 deletions(-) (limited to 'include/linux/sched.h') diff --git a/fs/seq_file.c b/fs/seq_file.c index 555b9ac04c2..10690aa401c 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -26,7 +26,7 @@ * ERR_PTR(error). In the end of sequence they return %NULL. ->show() * returns 0 in case of success and negative number in case of error. */ -int seq_open(struct file *file, struct seq_operations *op) +int seq_open(struct file *file, const struct seq_operations *op) { struct seq_file *p = file->private_data; @@ -408,7 +408,7 @@ EXPORT_SYMBOL(single_open); int single_release(struct inode *inode, struct file *file) { - struct seq_operations *op = ((struct seq_file *)file->private_data)->op; + const struct seq_operations *op = ((struct seq_file *)file->private_data)->op; int res = seq_release(inode, file); kfree(op); return res; diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 748d2c99663..8821e1f75b4 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -46,7 +46,7 @@ extern int cpuset_excl_nodes_overlap(const struct task_struct *p); extern int cpuset_memory_pressure_enabled; extern void __cpuset_memory_pressure_bump(void); -extern struct file_operations proc_cpuset_operations; +extern const struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); extern void cpuset_lock(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index da6002dec20..e339a7345f2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -278,7 +278,7 @@ struct zone { /* * rarely used fields: */ - char *name; + const char *name; } ____cacheline_internodealigned_in_smp; /* diff --git a/include/linux/relay.h b/include/linux/relay.h index 0e3d91b7699..c6a48bfc8b1 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -274,7 +274,7 @@ static inline void subbuf_start_reserve(struct rchan_buf *buf, /* * exported relay file operations, kernel/relay.c */ -extern struct file_operations relay_file_operations; +extern const struct file_operations relay_file_operations; #endif /* _LINUX_RELAY_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3a767242e72..dede82c6344 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -573,7 +573,7 @@ struct sched_info { #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ #ifdef CONFIG_SCHEDSTATS -extern struct file_operations proc_schedstat_operations; +extern const struct file_operations proc_schedstat_operations; #endif /* CONFIG_SCHEDSTATS */ #ifdef CONFIG_TASK_DELAY_ACCT diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index b95f6eb7254..3e3cccbb1ca 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -20,7 +20,7 @@ struct seq_file { loff_t index; loff_t version; struct mutex lock; - struct seq_operations *op; + const struct seq_operations *op; void *private; }; @@ -31,7 +31,7 @@ struct seq_operations { int (*show) (struct seq_file *m, void *v); }; -int seq_open(struct file *, struct seq_operations *); +int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); diff --git a/kernel/configs.c b/kernel/configs.c index f9e31974f4a..8fa1fb28f8a 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -75,7 +75,7 @@ ikconfig_read_current(struct file *file, char __user *buf, return count; } -static struct file_operations ikconfig_file_ops = { +static const struct file_operations ikconfig_file_ops = { .owner = THIS_MODULE, .read = ikconfig_read_current, }; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9b62b4c03ad..4ef1d29297c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1531,7 +1531,7 @@ static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry, return simple_rename(old_dir, old_dentry, new_dir, new_dentry); } -static struct file_operations cpuset_file_operations = { +static const struct file_operations cpuset_file_operations = { .read = cpuset_file_read, .write = cpuset_file_write, .llseek = generic_file_llseek, @@ -2605,7 +2605,7 @@ static int cpuset_open(struct inode *inode, struct file *file) return single_open(file, proc_cpuset_show, pid); } -struct file_operations proc_cpuset_operations = { +const struct file_operations proc_cpuset_operations = { .open = cpuset_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/dma.c b/kernel/dma.c index 2020644c938..937b13ca33b 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -140,7 +140,7 @@ static int proc_dma_open(struct inode *inode, struct file *file) return single_open(file, proc_dma_show, NULL); } -static struct file_operations proc_dma_operations = { +static const struct file_operations proc_dma_operations = { .open = proc_dma_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/futex.c b/kernel/futex.c index a8302a1620e..95989a3b416 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1492,7 +1492,7 @@ static unsigned int futex_poll(struct file *filp, return ret; } -static struct file_operations futex_fops = { +static const struct file_operations futex_fops = { .release = futex_close, .poll = futex_poll, }; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 54befe36ee0..ab63cfc4299 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -398,7 +398,7 @@ static int s_show(struct seq_file *m, void *p) return 0; } -static struct seq_operations kallsyms_op = { +static const struct seq_operations kallsyms_op = { .start = s_start, .next = s_next, .stop = s_stop, @@ -433,7 +433,7 @@ static int kallsyms_release(struct inode *inode, struct file *file) return seq_release(inode, file); } -static struct file_operations kallsyms_operations = { +static const struct file_operations kallsyms_operations = { .open = kallsyms_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index f6e72eaab3f..b554b40a4aa 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -113,7 +113,7 @@ static int l_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations lockdep_ops = { +static const struct seq_operations lockdep_ops = { .start = l_start, .next = l_next, .stop = l_stop, @@ -135,7 +135,7 @@ static int lockdep_open(struct inode *inode, struct file *file) return res; } -static struct file_operations proc_lockdep_operations = { +static const struct file_operations proc_lockdep_operations = { .open = lockdep_open, .read = seq_read, .llseek = seq_lseek, @@ -319,7 +319,7 @@ static int lockdep_stats_open(struct inode *inode, struct file *file) return single_open(file, lockdep_stats_show, NULL); } -static struct file_operations proc_lockdep_stats_operations = { +static const struct file_operations proc_lockdep_stats_operations = { .open = lockdep_stats_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/module.c b/kernel/module.c index e2d09d604ca..d9eae45d014 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2209,7 +2209,7 @@ static int m_show(struct seq_file *m, void *p) Where refcount is a number or -, and deps is a comma-separated list of depends or -. */ -struct seq_operations modules_op = { +const struct seq_operations modules_op = { .start = m_start, .next = m_next, .stop = m_stop, diff --git a/kernel/power/user.c b/kernel/power/user.c index 069732e5695..89443b85163 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -383,7 +383,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, return error; } -static struct file_operations snapshot_fops = { +static const struct file_operations snapshot_fops = { .open = snapshot_open, .release = snapshot_release, .read = snapshot_read, diff --git a/kernel/profile.c b/kernel/profile.c index 0961d93e1d9..fb5e03d57e9 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -501,7 +501,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf, return count; } -static struct file_operations proc_profile_operations = { +static const struct file_operations proc_profile_operations = { .read = read_profile, .write = write_profile, }; diff --git a/kernel/relay.c b/kernel/relay.c index 2b92e8ece85..75a3a9a7efc 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1013,7 +1013,7 @@ static ssize_t relay_file_sendfile(struct file *filp, actor, &desc); } -struct file_operations relay_file_operations = { +const struct file_operations relay_file_operations = { .open = relay_file_open, .poll = relay_file_poll, .mmap = relay_file_mmap, diff --git a/kernel/resource.c b/kernel/resource.c index 6de60c12143..7b9a497419d 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -88,7 +88,7 @@ static int r_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations resource_op = { +static const struct seq_operations resource_op = { .start = r_start, .next = r_next, .stop = r_stop, @@ -115,14 +115,14 @@ static int iomem_open(struct inode *inode, struct file *file) return res; } -static struct file_operations proc_ioports_operations = { +static const struct file_operations proc_ioports_operations = { .open = ioports_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; -static struct file_operations proc_iomem_operations = { +static const struct file_operations proc_iomem_operations = { .open = iomem_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/sched.c b/kernel/sched.c index b43cef02ce5..f385eff4682 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -505,7 +505,7 @@ static int schedstat_open(struct inode *inode, struct file *file) return res; } -struct file_operations proc_schedstat_operations = { +const struct file_operations proc_schedstat_operations = { .open = schedstat_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6d7147cf66b..758dbbf972a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -170,7 +170,7 @@ static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); static int proc_opensys(struct inode *, struct file *); -struct file_operations proc_sys_file_operations = { +const struct file_operations proc_sys_file_operations = { .open = proc_opensys, .read = proc_readsys, .write = proc_writesys, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ad864f8708b..b917d6fdc1b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1707,8 +1707,8 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) * Display pages allocated per node and memory policy via /proc. */ -static const char *policy_types[] = { "default", "prefer", "bind", - "interleave" }; +static const char * const policy_types[] = + { "default", "prefer", "bind", "interleave" }; /* * Convert a mempolicy into a string. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 27ec7a1b802..cace22b3ac2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -83,7 +83,7 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { EXPORT_SYMBOL(totalram_pages); -static char *zone_names[MAX_NR_ZONES] = { +static char * const zone_names[MAX_NR_ZONES] = { "DMA", #ifdef CONFIG_ZONE_DMA32 "DMA32", diff --git a/mm/shmem.c b/mm/shmem.c index 007653680a7..c820b4f77b8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -177,7 +177,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages) static struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; -static struct file_operations shmem_file_operations; +static const struct file_operations shmem_file_operations; static struct inode_operations shmem_inode_operations; static struct inode_operations shmem_dir_inode_operations; static struct inode_operations shmem_special_inode_operations; @@ -2319,7 +2319,7 @@ static const struct address_space_operations shmem_aops = { .migratepage = migrate_page, }; -static struct file_operations shmem_file_operations = { +static const struct file_operations shmem_file_operations = { .mmap = shmem_mmap, #ifdef CONFIG_TMPFS .llseek = generic_file_llseek, diff --git a/mm/slab.c b/mm/slab.c index 86f5d6e995b..068cb4503c1 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4142,7 +4142,7 @@ static int s_show(struct seq_file *m, void *p) * + further values on SMP and with statistics enabled */ -struct seq_operations slabinfo_op = { +const struct seq_operations slabinfo_op = { .start = s_start, .next = s_next, .stop = s_stop, @@ -4340,7 +4340,7 @@ static int leaks_show(struct seq_file *m, void *p) return 0; } -struct seq_operations slabstats_op = { +const struct seq_operations slabstats_op = { .start = leaks_start, .next = s_next, .stop = s_stop, diff --git a/mm/swapfile.c b/mm/swapfile.c index 55242363de6..c5431072f42 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1368,7 +1368,7 @@ static int swap_show(struct seq_file *swap, void *v) return 0; } -static struct seq_operations swaps_op = { +static const struct seq_operations swaps_op = { .start = swap_start, .next = swap_next, .stop = swap_stop, @@ -1380,7 +1380,7 @@ static int swaps_open(struct inode *inode, struct file *file) return seq_open(file, &swaps_op); } -static struct file_operations proc_swaps_operations = { +static const struct file_operations proc_swaps_operations = { .open = swaps_open, .read = seq_read, .llseek = seq_lseek, diff --git a/mm/vmstat.c b/mm/vmstat.c index ef4176843d9..dc005a0c96a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -430,7 +430,7 @@ static int frag_show(struct seq_file *m, void *arg) return 0; } -struct seq_operations fragmentation_op = { +const struct seq_operations fragmentation_op = { .start = frag_start, .next = frag_next, .stop = frag_stop, @@ -452,7 +452,7 @@ struct seq_operations fragmentation_op = { #define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \ TEXT_FOR_HIGHMEM(xx) -static char *vmstat_text[] = { +static const char * const vmstat_text[] = { /* Zoned VM counters */ "nr_anon_pages", "nr_mapped", @@ -597,7 +597,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) return 0; } -struct seq_operations zoneinfo_op = { +const struct seq_operations zoneinfo_op = { .start = frag_start, /* iterate over all zones. The same as in * fragmentation. */ .next = frag_next, @@ -660,7 +660,7 @@ static void vmstat_stop(struct seq_file *m, void *arg) m->private = NULL; } -struct seq_operations vmstat_op = { +const struct seq_operations vmstat_op = { .start = vmstat_start, .next = vmstat_next, .stop = vmstat_stop, -- cgit v1.2.3-70-g09d2 From ae424ae4b5bcd820ad6ee6f0b986c4e14ed4d6cf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 8 Dec 2006 02:36:08 -0800 Subject: [PATCH] make set_special_pids() static Make set_special_pids() static, the only caller is daemonize(). Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/exit.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index dede82c6344..5e8a0ba6174 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1240,7 +1240,6 @@ extern struct mm_struct init_mm; #define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr) extern struct task_struct *find_task_by_pid_type(int type, int pid); -extern void set_special_pids(pid_t session, pid_t pgrp); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ diff --git a/kernel/exit.c b/kernel/exit.c index fa235779b6a..fa0495e167e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -314,7 +314,7 @@ void __set_special_pids(pid_t session, pid_t pgrp) } } -void set_special_pids(pid_t session, pid_t pgrp) +static void set_special_pids(pid_t session, pid_t pgrp) { write_lock_irq(&tasklist_lock); __set_special_pids(session, pgrp); -- cgit v1.2.3-70-g09d2 From 937949d9edbf4049bd41af6c9f92c26280584564 Mon Sep 17 00:00:00 2001 From: Cedric Le Goater Date: Fri, 8 Dec 2006 02:37:54 -0800 Subject: [PATCH] add process_session() helper routine Replace occurences of task->signal->session by a new process_session() helper routine. It will be useful for pid namespaces to abstract the session pid number. Signed-off-by: Cedric Le Goater Cc: Kirill Korotaev Cc: Eric W. Biederman Cc: Herbert Poetzl Cc: Sukadev Bhattiprolu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/irixelf.c | 2 +- drivers/char/mxser.c | 2 +- drivers/char/rocket.c | 2 +- drivers/char/tty_io.c | 12 ++++++------ fs/binfmt_elf.c | 4 ++-- fs/binfmt_elf_fdpic.c | 4 ++-- include/linux/sched.h | 5 +++++ kernel/exit.c | 19 ++++++++++--------- kernel/fork.c | 4 ++-- kernel/signal.c | 2 +- kernel/sys.c | 8 ++++---- 11 files changed, 35 insertions(+), 29 deletions(-) (limited to 'include/linux/sched.h') diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c index 1bbefbf4337..37cad5de515 100644 --- a/arch/mips/kernel/irixelf.c +++ b/arch/mips/kernel/irixelf.c @@ -1145,7 +1145,7 @@ static int irix_core_dump(long signr, struct pt_regs * regs, struct file *file) psinfo.pr_pid = prstatus.pr_pid = current->pid; psinfo.pr_ppid = prstatus.pr_ppid = current->parent->pid; psinfo.pr_pgrp = prstatus.pr_pgrp = process_group(current); - psinfo.pr_sid = prstatus.pr_sid = current->signal->session; + psinfo.pr_sid = prstatus.pr_sid = process_session(current); if (current->pid == current->tgid) { /* * This is the record for the group leader. Add in the diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 5ed2486b758..b1cc89c8982 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -994,7 +994,7 @@ static int mxser_open(struct tty_struct *tty, struct file *filp) mxser_change_speed(info, NULL); } - info->session = current->signal->session; + info->session = process_session(current); info->pgrp = process_group(current); /* diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index bac80056f7e..4fdf52e9f3b 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -1017,7 +1017,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp) /* * Info->count is now 1; so it's safe to sleep now. */ - info->session = current->signal->session; + info->session = process_session(current); info->pgrp = process_group(current); if ((info->flags & ROCKET_INITIALIZED) == 0) { diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 8a2d26e98ac..4ebba7ca1dc 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -1511,7 +1511,7 @@ void disassociate_ctty(int on_exit) spin_lock_irq(¤t->sighand->siglock); current->signal->tty_old_pgrp = 0; - session = current->signal->session; + session = process_session(current); spin_unlock_irq(¤t->sighand->siglock); mutex_lock(&tty_mutex); @@ -2897,7 +2897,7 @@ static int tiocsctty(struct tty_struct *tty, int arg) { int ret = 0; if (current->signal->leader && - (current->signal->session == tty->session)) + (process_session(current) == tty->session)) return ret; mutex_lock(&tty_mutex); @@ -2979,13 +2979,13 @@ static int tiocspgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t return retval; if (!current->signal->tty || (current->signal->tty != real_tty) || - (real_tty->session != current->signal->session)) + (real_tty->session != process_session(current))) return -ENOTTY; if (get_user(pgrp, p)) return -EFAULT; if (pgrp < 0) return -EINVAL; - if (session_of_pgrp(pgrp) != current->signal->session) + if (session_of_pgrp(pgrp) != process_session(current)) return -EPERM; real_tty->pgrp = pgrp; return 0; @@ -3338,7 +3338,7 @@ static void __do_SAK(struct work_struct *work) /* Kill the entire session */ do_each_task_pid(session, PIDTYPE_SID, p) { printk(KERN_NOTICE "SAK: killed process %d" - " (%s): p->signal->session==tty->session\n", + " (%s): process_session(p)==tty->session\n", p->pid, p->comm); send_sig(SIGKILL, p, 1); } while_each_task_pid(session, PIDTYPE_SID, p); @@ -3348,7 +3348,7 @@ static void __do_SAK(struct work_struct *work) do_each_thread(g, p) { if (p->signal->tty == tty) { printk(KERN_NOTICE "SAK: killed process %d" - " (%s): p->signal->session==tty->session\n", + " (%s): process_session(p)==tty->session\n", p->pid, p->comm); send_sig(SIGKILL, p, 1); continue; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index c6dbb4a7ec7..d3adfd353ff 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1317,7 +1317,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_pid = p->pid; prstatus->pr_ppid = p->parent->pid; prstatus->pr_pgrp = process_group(p); - prstatus->pr_sid = p->signal->session; + prstatus->pr_sid = process_session(p); if (thread_group_leader(p)) { /* * This is the record for the group leader. Add in the @@ -1363,7 +1363,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_pid = p->pid; psinfo->pr_ppid = p->parent->pid; psinfo->pr_pgrp = process_group(p); - psinfo->pr_sid = p->signal->session; + psinfo->pr_sid = process_session(p); i = p->state ? ffz(~p->state) + 1 : 0; psinfo->pr_state = i; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9f0b7efc3df..76f06f6bc2f 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1322,7 +1322,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_pid = p->pid; prstatus->pr_ppid = p->parent->pid; prstatus->pr_pgrp = process_group(p); - prstatus->pr_sid = p->signal->session; + prstatus->pr_sid = process_session(p); if (thread_group_leader(p)) { /* * This is the record for the group leader. Add in the @@ -1371,7 +1371,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_pid = p->pid; psinfo->pr_ppid = p->parent->pid; psinfo->pr_pgrp = process_group(p); - psinfo->pr_sid = p->signal->session; + psinfo->pr_sid = process_session(p); i = p->state ? ffz(~p->state) + 1 : 0; psinfo->pr_state = i; diff --git a/include/linux/sched.h b/include/linux/sched.h index 5e8a0ba6174..270d864a8ff 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1047,6 +1047,11 @@ static inline pid_t process_group(struct task_struct *tsk) return tsk->signal->pgrp; } +static inline pid_t process_session(struct task_struct *tsk) +{ + return tsk->signal->session; +} + static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; diff --git a/kernel/exit.c b/kernel/exit.c index fa0495e167e..8d289bfc13d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -193,14 +193,14 @@ int session_of_pgrp(int pgrp) read_lock(&tasklist_lock); do_each_task_pid(pgrp, PIDTYPE_PGID, p) { - if (p->signal->session > 0) { - sid = p->signal->session; + if (process_session(p) > 0) { + sid = process_session(p); goto out; } } while_each_task_pid(pgrp, PIDTYPE_PGID, p); p = find_task_by_pid(pgrp); if (p) - sid = p->signal->session; + sid = process_session(p); out: read_unlock(&tasklist_lock); @@ -225,8 +225,8 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) || p->exit_state || is_init(p->real_parent)) continue; - if (process_group(p->real_parent) != pgrp - && p->real_parent->signal->session == p->signal->session) { + if (process_group(p->real_parent) != pgrp && + process_session(p->real_parent) == process_session(p)) { ret = 0; break; } @@ -302,7 +302,7 @@ void __set_special_pids(pid_t session, pid_t pgrp) { struct task_struct *curr = current->group_leader; - if (curr->signal->session != session) { + if (process_session(curr) != session) { detach_pid(curr, PIDTYPE_SID); curr->signal->session = session; attach_pid(curr, PIDTYPE_SID, session); @@ -647,10 +647,11 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) * outside, so the child pgrp is now orphaned. */ if ((process_group(p) != process_group(father)) && - (p->signal->session == father->signal->session)) { + (process_session(p) == process_session(father))) { int pgrp = process_group(p); - if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) { + if (will_become_orphaned_pgrp(pgrp, NULL) && + has_stopped_jobs(pgrp)) { __kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp); __kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp); } @@ -784,7 +785,7 @@ static void exit_notify(struct task_struct *tsk) t = tsk->real_parent; if ((process_group(t) != process_group(tsk)) && - (t->signal->session == tsk->signal->session) && + (process_session(t) == process_session(tsk)) && will_become_orphaned_pgrp(process_group(tsk), tsk) && has_stopped_jobs(process_group(tsk))) { __kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk)); diff --git a/kernel/fork.c b/kernel/fork.c index 59770781932..298c4d6ab51 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1259,9 +1259,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (thread_group_leader(p)) { p->signal->tty = current->signal->tty; p->signal->pgrp = process_group(current); - p->signal->session = current->signal->session; + p->signal->session = process_session(current); attach_pid(p, PIDTYPE_PGID, process_group(p)); - attach_pid(p, PIDTYPE_SID, p->signal->session); + attach_pid(p, PIDTYPE_SID, process_session(p)); list_add_tail_rcu(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; diff --git a/kernel/signal.c b/kernel/signal.c index ec81defde33..9eac4db60ed 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -583,7 +583,7 @@ static int check_kill_permission(int sig, struct siginfo *info, error = -EPERM; if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) && ((sig != SIGCONT) || - (current->signal->session != t->signal->session)) + (process_session(current) != process_session(t))) && (current->euid ^ t->suid) && (current->euid ^ t->uid) && (current->uid ^ t->suid) && (current->uid ^ t->uid) && !capable(CAP_KILL)) diff --git a/kernel/sys.c b/kernel/sys.c index 1ac2d1c5d84..4f9d23a3095 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1381,7 +1381,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) if (p->real_parent == group_leader) { err = -EPERM; - if (p->signal->session != group_leader->signal->session) + if (process_session(p) != process_session(group_leader)) goto out; err = -EACCES; if (p->did_exec) @@ -1400,7 +1400,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) struct task_struct *p; do_each_task_pid(pgid, PIDTYPE_PGID, p) { - if (p->signal->session == group_leader->signal->session) + if (process_session(p) == process_session(group_leader)) goto ok_pgid; } while_each_task_pid(pgid, PIDTYPE_PGID, p); goto out; @@ -1459,7 +1459,7 @@ asmlinkage long sys_getpgrp(void) asmlinkage long sys_getsid(pid_t pid) { if (!pid) - return current->signal->session; + return process_session(current); else { int retval; struct task_struct *p; @@ -1471,7 +1471,7 @@ asmlinkage long sys_getsid(pid_t pid) if (p) { retval = security_task_getsid(p); if (!retval) - retval = p->signal->session; + retval = process_session(p); } read_unlock(&tasklist_lock); return retval; -- cgit v1.2.3-70-g09d2 From 1ec320afdc9552c92191d5f89fcd1ebe588334ca Mon Sep 17 00:00:00 2001 From: Cedric Le Goater Date: Fri, 8 Dec 2006 02:37:55 -0800 Subject: [PATCH] add process_session() helper routine: deprecate old field Add an anonymous union and ((deprecated)) to catch direct usage of the session field. [akpm@osdl.org: fix various missed conversions] [jdike@addtoit.com: fix UML bug] Signed-off-by: Jeff Dike Cc: Cedric Le Goater Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 2 +- fs/proc/array.c | 2 +- include/linux/init_task.h | 11 ++++++----- include/linux/sched.h | 19 +++++++++++++++++-- kernel/exit.c | 2 +- kernel/fork.c | 2 +- 6 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include/linux/sched.h') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 4ebba7ca1dc..48cee2004e9 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -3855,7 +3855,7 @@ EXPORT_SYMBOL(proc_clear_tty); void __proc_set_tty(struct task_struct *tsk, struct tty_struct *tty) { if (tty) { - tty->session = tsk->signal->session; + tty->session = process_session(tsk); tty->pgrp = process_group(tsk); } tsk->signal->tty = tty; diff --git a/fs/proc/array.c b/fs/proc/array.c index b0cd014a39b..70e4fab117b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -381,7 +381,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole) stime = cputime_add(stime, sig->stime); } - sid = sig->session; + sid = signal_session(sig); pgid = process_group(task); ppid = rcu_dereference(task->real_parent)->tgid; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 733790d4f7d..848a68af3d4 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -57,17 +57,18 @@ .cpu_vm_mask = CPU_MASK_ALL, \ } -#define INIT_SIGNALS(sig) { \ - .count = ATOMIC_INIT(1), \ +#define INIT_SIGNALS(sig) { \ + .count = ATOMIC_INIT(1), \ .wait_chldexit = __WAIT_QUEUE_HEAD_INITIALIZER(sig.wait_chldexit),\ - .shared_pending = { \ + .shared_pending = { \ .list = LIST_HEAD_INIT(sig.shared_pending.list), \ - .signal = {{0}}}, \ + .signal = {{0}}}, \ .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ .rlim = INIT_RLIMITS, \ .pgrp = 1, \ - .session = 1, \ + .tty_old_pgrp = 0, \ + { .__session = 1}, \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index 270d864a8ff..6fec1d41971 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -436,7 +436,12 @@ struct signal_struct { /* job control IDs */ pid_t pgrp; pid_t tty_old_pgrp; - pid_t session; + + union { + pid_t session __deprecated; + pid_t __session; + }; + /* boolean value for session group leader */ int leader; @@ -1047,9 +1052,19 @@ static inline pid_t process_group(struct task_struct *tsk) return tsk->signal->pgrp; } +static inline pid_t signal_session(struct signal_struct *sig) +{ + return sig->__session; +} + static inline pid_t process_session(struct task_struct *tsk) { - return tsk->signal->session; + return signal_session(tsk->signal); +} + +static inline void set_signal_session(struct signal_struct *sig, pid_t session) +{ + sig->__session = session; } static inline struct pid *task_pid(struct task_struct *task) diff --git a/kernel/exit.c b/kernel/exit.c index 8d289bfc13d..6267a6cc611 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -304,7 +304,7 @@ void __set_special_pids(pid_t session, pid_t pgrp) if (process_session(curr) != session) { detach_pid(curr, PIDTYPE_SID); - curr->signal->session = session; + set_signal_session(curr->signal, session); attach_pid(curr, PIDTYPE_SID, session); } if (process_group(curr) != pgrp) { diff --git a/kernel/fork.c b/kernel/fork.c index 298c4d6ab51..60d2644bfe8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1259,7 +1259,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (thread_group_leader(p)) { p->signal->tty = current->signal->tty; p->signal->pgrp = process_group(current); - p->signal->session = process_session(current); + set_signal_session(p->signal, process_session(current)); attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, process_session(p)); -- cgit v1.2.3-70-g09d2 From 84d737866e2babdeab0c6b18ea155c6a649663b8 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Fri, 8 Dec 2006 02:38:01 -0800 Subject: [PATCH] add child reaper to pid_namespace Add a per pid_namespace child-reaper. This is needed so processes are reaped within the same pid space and do not spill over to the parent pid space. Its also needed so containers preserve existing semantic that pid == 1 would reap orphaned children. This is based on Eric Biederman's patch: http://lkml.org/lkml/2006/2/6/285 Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Cedric Le Goater Cc: Kirill Korotaev Cc: Eric W. Biederman Cc: Herbert Poetzl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 5 +++-- include/linux/pid.h | 5 +++-- include/linux/pid_namespace.h | 6 ++++++ include/linux/sched.h | 1 - init/main.c | 5 ++--- kernel/exit.c | 23 +++++++++++++++-------- kernel/pid.c | 3 ++- kernel/signal.c | 11 +++++++++-- 8 files changed, 40 insertions(+), 19 deletions(-) (limited to 'include/linux/sched.h') diff --git a/fs/exec.c b/fs/exec.c index 60433e2254a..12d8cd461b4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -620,8 +621,8 @@ static int de_thread(struct task_struct *tsk) * Reparenting needs write_lock on tasklist_lock, * so it is safe to do it under read_lock. */ - if (unlikely(tsk->group_leader == child_reaper)) - child_reaper = tsk; + if (unlikely(tsk->group_leader == child_reaper(tsk))) + tsk->nsproxy->pid_ns->child_reaper = tsk; zap_other_threads(tsk); read_unlock(&tasklist_lock); diff --git a/include/linux/pid.h b/include/linux/pid.h index 2c0007d1721..4dec047b183 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -35,8 +35,9 @@ enum pid_type * * Holding a reference to struct pid solves both of these problems. * It is small so holding a reference does not consume a lot of - * resources, and since a new struct pid is allocated when the numeric - * pid value is reused we don't mistakenly refer to new processes. + * resources, and since a new struct pid is allocated when the numeric pid + * value is reused (when pids wrap around) we don't mistakenly refer to new + * processes. */ struct pid diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 76e7c6b2cf3..d2a9d419f01 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -19,6 +19,7 @@ struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; int last_pid; + struct task_struct *child_reaper; }; extern struct pid_namespace init_pid_ns; @@ -36,4 +37,9 @@ static inline void put_pid_ns(struct pid_namespace *ns) kref_put(&ns->kref, free_pid_ns); } +static inline struct task_struct *child_reaper(struct task_struct *tsk) +{ + return tsk->nsproxy->pid_ns->child_reaper; +} + #endif /* _LINUX_PID_NS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6fec1d41971..f0317edea14 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1400,7 +1400,6 @@ extern NORET_TYPE void do_group_exit(int); extern void daemonize(const char *, ...); extern int allow_signal(int); extern int disallow_signal(int); -extern struct task_struct *child_reaper; extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); diff --git a/init/main.c b/init/main.c index 4cdcd06e6d7..036f97c0c34 100644 --- a/init/main.c +++ b/init/main.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -626,8 +627,6 @@ static int __init initcall_debug_setup(char *str) } __setup("initcall_debug", initcall_debug_setup); -struct task_struct *child_reaper = &init_task; - extern initcall_t __initcall_start[], __initcall_end[]; static void __init do_initcalls(void) @@ -727,7 +726,7 @@ static int init(void * unused) * assumptions about where in the task array this * can be found. */ - child_reaper = current; + init_pid_ns.child_reaper = current; cad_pid = task_pid(current); diff --git a/kernel/exit.c b/kernel/exit.c index 28d9feedfd2..fd0e067952a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -48,7 +49,6 @@ #include extern void sem_exit (void); -extern struct task_struct *child_reaper; static void exit_mm(struct task_struct * tsk); @@ -260,7 +260,8 @@ static int has_stopped_jobs(int pgrp) } /** - * reparent_to_init - Reparent the calling kernel thread to the init task. + * reparent_to_init - Reparent the calling kernel thread to the init task + * of the pid space that the thread belongs to. * * If a kernel thread is launched as a result of a system call, or if * it ever exits, it should generally reparent itself to init so that @@ -278,8 +279,8 @@ static void reparent_to_init(void) ptrace_unlink(current); /* Reparent to init */ remove_parent(current); - current->parent = child_reaper; - current->real_parent = child_reaper; + current->parent = child_reaper(current); + current->real_parent = child_reaper(current); add_parent(current); /* Set the exit signal to SIGCHLD so we signal init on exit */ @@ -662,7 +663,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) * When we die, we re-parent all our children. * Try to give them to another thread in our thread * group, and if no such member exists, give it to - * the global child reaper process (ie "init") + * the child reaper process (ie "init") in our pid + * space. */ static void forget_original_parent(struct task_struct *father, struct list_head *to_release) @@ -673,7 +675,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release) do { reaper = next_thread(reaper); if (reaper == father) { - reaper = child_reaper; + reaper = child_reaper(father); break; } } while (reaper->exit_state); @@ -859,8 +861,13 @@ fastcall NORET_TYPE void do_exit(long code) panic("Aiee, killing interrupt handler!"); if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); - if (unlikely(tsk == child_reaper)) - panic("Attempted to kill init!"); + if (unlikely(tsk == child_reaper(tsk))) { + if (tsk->nsproxy->pid_ns != &init_pid_ns) + tsk->nsproxy->pid_ns->child_reaper = init_pid_ns.child_reaper; + else + panic("Attempted to kill init!"); + } + if (unlikely(current->ptrace & PT_TRACE_EXIT)) { current->ptrace_message = code; diff --git a/kernel/pid.c b/kernel/pid.c index 1d9cc268b49..2efe9d8d367 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -65,7 +65,8 @@ struct pid_namespace init_pid_ns = { .pidmap = { [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } }, - .last_pid = 0 + .last_pid = 0, + .child_reaper = &init_task }; /* diff --git a/kernel/signal.c b/kernel/signal.c index 9eac4db60ed..1921ffdc5e7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -24,6 +24,9 @@ #include #include #include +#include +#include + #include #include #include @@ -1877,8 +1880,12 @@ relock: if (sig_kernel_ignore(signr)) /* Default is nothing. */ continue; - /* Init gets no signals it doesn't want. */ - if (current == child_reaper) + /* + * Init of a pid space gets no signals it doesn't want from + * within that pid space. It can of course get signals from + * its parent pid space. + */ + if (current == child_reaper(current)) continue; if (sig_kernel_stop(signr)) { -- cgit v1.2.3-70-g09d2 From f4f154fd920b2178382a6a24a236348e4429ebc1 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 8 Dec 2006 02:39:47 -0800 Subject: [PATCH] fault injection: process filtering for fault-injection capabilities This patch provides process filtering feature. The process filter allows failing only permitted processes by /proc//make-it-fail Please see the example that demostrates how to inject slab allocation failures into module init/cleanup code in Documentation/fault-injection/fault-injection.txt Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 65 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/fault-inject.h | 2 ++ include/linux/sched.h | 3 ++ lib/fault-inject.c | 17 +++++++++++- 4 files changed, 86 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/fs/proc/base.c b/fs/proc/base.c index a3b5074118a..fd959d5b5a8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -853,6 +853,65 @@ static struct file_operations proc_seccomp_operations = { }; #endif /* CONFIG_SECCOMP */ +#ifdef CONFIG_FAULT_INJECTION +static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + char buffer[PROC_NUMBUF]; + size_t len; + int make_it_fail; + loff_t __ppos = *ppos; + + if (!task) + return -ESRCH; + make_it_fail = task->make_it_fail; + put_task_struct(task); + + len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); + if (__ppos >= len) + return 0; + if (count > len-__ppos) + count = len-__ppos; + if (copy_to_user(buf, buffer + __ppos, count)) + return -EFAULT; + *ppos = __ppos + count; + return count; +} + +static ssize_t proc_fault_inject_write(struct file * file, + const char __user * buf, size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[PROC_NUMBUF], *end; + int make_it_fail; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + make_it_fail = simple_strtol(buffer, &end, 0); + if (*end == '\n') + end++; + task = get_proc_task(file->f_dentry->d_inode); + if (!task) + return -ESRCH; + task->make_it_fail = make_it_fail; + put_task_struct(task); + if (end - buffer == 0) + return -EIO; + return end - buffer; +} + +static struct file_operations proc_fault_inject_operations = { + .read = proc_fault_inject_read, + .write = proc_fault_inject_write, +}; +#endif + static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; @@ -1793,6 +1852,9 @@ static struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, loginuid), #endif +#ifdef CONFIG_FAULT_INJECTION + REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), +#endif }; static int proc_tgid_base_readdir(struct file * filp, @@ -2068,6 +2130,9 @@ static struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, loginuid), #endif +#ifdef CONFIG_FAULT_INJECTION + REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), +#endif }; static int proc_tid_base_readdir(struct file * filp, diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 4df4902bc8d..a525f9b9f01 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -17,6 +17,7 @@ struct fault_attr { atomic_t times; atomic_t space; unsigned long verbose; + u32 task_filter; unsigned long count; @@ -30,6 +31,7 @@ struct fault_attr { struct dentry *times_file; struct dentry *space_file; struct dentry *verbose_file; + struct dentry *task_filter_file; } dentries; #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index f0317edea14..ad9c46071ff 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1045,6 +1045,9 @@ struct task_struct { #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info *delays; #endif +#ifdef CONFIG_FAULT_INJECTION + int make_it_fail; +#endif }; static inline pid_t process_group(struct task_struct *tsk) diff --git a/lib/fault-inject.c b/lib/fault-inject.c index a7cb3afd132..03468609d70 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -44,6 +45,11 @@ static void fail_dump(struct fault_attr *attr) #define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) +static int fail_task(struct fault_attr *attr, struct task_struct *task) +{ + return !in_interrupt() && task->make_it_fail; +} + /* * This code is stolen from failmalloc-1.0 * http://www.nongnu.org/failmalloc/ @@ -51,6 +57,9 @@ static void fail_dump(struct fault_attr *attr) int should_fail(struct fault_attr *attr, ssize_t size) { + if (attr->task_filter && !fail_task(attr, current)) + return 0; + if (atomic_read(&attr->times) == 0) return 0; @@ -135,6 +144,9 @@ void cleanup_fault_attr_dentries(struct fault_attr *attr) debugfs_remove(attr->dentries.verbose_file); attr->dentries.verbose_file = NULL; + debugfs_remove(attr->dentries.task_filter_file); + attr->dentries.task_filter_file = NULL; + if (attr->dentries.dir) WARN_ON(!simple_empty(attr->dentries.dir)); @@ -169,9 +181,12 @@ int init_fault_attr_dentries(struct fault_attr *attr, const char *name) attr->dentries.verbose_file = debugfs_create_ul("verbose", mode, dir, &attr->verbose); + attr->dentries.task_filter_file = debugfs_create_bool("task-filter", + mode, dir, &attr->task_filter); + if (!attr->dentries.probability_file || !attr->dentries.interval_file || !attr->dentries.times_file || !attr->dentries.space_file - || !attr->dentries.verbose_file) + || !attr->dentries.verbose_file || !attr->dentries.task_filter_file) goto fail; return 0; -- cgit v1.2.3-70-g09d2 From 7c3ab7381e79dfc7db14a67c6f4f3285664e1ec2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sun, 10 Dec 2006 02:19:19 -0800 Subject: [PATCH] io-accounting: core statistics The present per-task IO accounting isn't very useful. It simply counts the number of bytes passed into read() and write(). So if a process reads 1MB from an already-cached file, it is accused of having performed 1MB of I/O, which is wrong. (David Wright had some comments on the applicability of the present logical IO accounting: For billing purposes it is useless but for workload analysis it is very useful read_bytes/read_calls average read request size write_bytes/write_calls average write request size read_bytes/read_blocks ie logical/physical can indicate hit rate or thrashing write_bytes/write_blocks ie logical/physical guess since pdflush writes can be missed I often look for logical larger than physical to see filesystem cache problems. And the bytes/cpusec can help find applications that are dominating the cache and causing slow interactive response from page cache contention. I want to find the IO intensive applications and make sure they are doing efficient IO. Thus the acctcms(sysV) or csacms command would give the high IO commands). This patchset adds new accounting which tries to be more accurate. We account for three things: reads: attempt to count the number of bytes which this process really did cause to be fetched from the storage layer. Done at the submit_bio() level, so it is accurate for block-backed filesystems. I also attempt to wire up NFS and CIFS. writes: attempt to count the number of bytes which this process caused to be sent to the storage layer. This is done at page-dirtying time. The big inaccuracy here is truncate. If a process writes 1MB to a file and then deletes the file, it will in fact perform no writeout. But it will have been accounted as having caused 1MB of write. So... cancelled_writes: account the number of bytes which this process caused to not happen, by truncating pagecache. We _could_ just subtract this from the process's `write' accounting. But that means that some processes would be reported to have done negative amounts of write IO, which is silly. So we just report the raw number and punt this decision up to userspace. Now, we _could_ account for writes at the physical I/O level. But - This would require that we track memory-dirtying tasks at the per-page level (would require a new pointer in struct page). - It would mean that IO statistics for a process are usually only available long after that process has exitted. Which means that we probably cannot communicate this info via taskstats. This patch: Wire up the kernel-private data structures and the accessor functions to manipulate them. Cc: Jay Lan Cc: Shailabh Nagar Cc: Balbir Singh Cc: Chris Sturtivant Cc: Tony Ernst Cc: Guillaume Thouvenin Cc: David Wright Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ include/linux/task_io_accounting.h | 37 ++++++++++++++++++++++++++ include/linux/task_io_accounting_ops.h | 47 ++++++++++++++++++++++++++++++++++ init/Kconfig | 9 +++++++ kernel/fork.c | 2 ++ 5 files changed, 97 insertions(+) create mode 100644 include/linux/task_io_accounting.h create mode 100644 include/linux/task_io_accounting_ops.h (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index ad9c46071ff..1208feab46e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -82,6 +82,7 @@ struct sched_param { #include #include #include +#include #include @@ -1013,6 +1014,7 @@ struct task_struct { wait_queue_t *io_wait; /* i/o counters(bytes read/written, #syscalls */ u64 rchar, wchar, syscr, syscw; + struct task_io_accounting ioac; #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ diff --git a/include/linux/task_io_accounting.h b/include/linux/task_io_accounting.h new file mode 100644 index 00000000000..44d00e9ccee --- /dev/null +++ b/include/linux/task_io_accounting.h @@ -0,0 +1,37 @@ +/* + * task_io_accounting: a structure which is used for recording a single task's + * IO statistics. + * + * Don't include this header file directly - it is designed to be dragged in via + * sched.h. + * + * Blame akpm@osdl.org for all this. + */ + +#ifdef CONFIG_TASK_IO_ACCOUNTING +struct task_io_accounting { + /* + * The number of bytes which this task has caused to be read from + * storage. + */ + u64 read_bytes; + + /* + * The number of bytes which this task has caused, or shall cause to be + * written to disk. + */ + u64 write_bytes; + + /* + * A task can cause "negative" IO too. If this task truncates some + * dirty pagecache, some IO which another task has been accounted for + * (in its write_bytes) will not be happening. We _could_ just + * subtract that from the truncating task's write_bytes, but there is + * information loss in doing that. + */ + u64 cancelled_write_bytes; +}; +#else +struct task_io_accounting { +}; +#endif diff --git a/include/linux/task_io_accounting_ops.h b/include/linux/task_io_accounting_ops.h new file mode 100644 index 00000000000..df2a319106b --- /dev/null +++ b/include/linux/task_io_accounting_ops.h @@ -0,0 +1,47 @@ +/* + * Task I/O accounting operations + */ +#ifndef __TASK_IO_ACCOUNTING_OPS_INCLUDED +#define __TASK_IO_ACCOUNTING_OPS_INCLUDED + +#ifdef CONFIG_TASK_IO_ACCOUNTING +static inline void task_io_account_read(size_t bytes) +{ + current->ioac.read_bytes += bytes; +} + +static inline void task_io_account_write(size_t bytes) +{ + current->ioac.write_bytes += bytes; +} + +static inline void task_io_account_cancelled_write(size_t bytes) +{ + current->ioac.cancelled_write_bytes += bytes; +} + +static inline void task_io_accounting_init(struct task_struct *tsk) +{ + memset(&tsk->ioac, 0, sizeof(tsk->ioac)); +} + +#else + +static inline void task_io_account_read(size_t bytes) +{ +} + +static inline void task_io_account_write(size_t bytes) +{ +} + +static inline void task_io_account_cancelled_write(size_t bytes) +{ +} + +static inline void task_io_accounting_init(struct task_struct *tsk) +{ +} + +#endif /* CONFIG_TASK_IO_ACCOUNTING */ +#endif /* __TASK_IO_ACCOUNTING_OPS_INCLUDED */ diff --git a/init/Kconfig b/init/Kconfig index 14d484606fa..9edf103b3ec 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -304,6 +304,15 @@ config TASK_XACCT Say N if unsure. +config TASK_IO_ACCOUNTING + bool "Enable per-task storage I/O accounting (EXPERIMENTAL)" + depends on TASK_XACCT + help + Collect information on the number of bytes of storage I/O which this + task has caused. + + Say N if unsure. + config SYSCTL bool diff --git a/kernel/fork.c b/kernel/fork.c index 8c859eef8e6..086e172d0d3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -1055,6 +1056,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->wchar = 0; /* I/O counter: bytes written */ p->syscr = 0; /* I/O counter: read syscalls */ p->syscw = 0; /* I/O counter: write syscalls */ + task_io_accounting_init(p); acct_clear_integrals(p); p->it_virt_expires = cputime_zero; -- cgit v1.2.3-70-g09d2 From 08c183f31bdbb709f177f6d3110d5f288ea33933 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sun, 10 Dec 2006 02:20:29 -0800 Subject: [PATCH] sched: add option to serialize load balancing Large sched domains can be very expensive to scan. Add an option SD_SERIALIZE to the sched domain flags. If that flag is set then we make sure that no other such domain is being balanced. [akpm@osdl.org: build fix] Signed-off-by: Christoph Lameter Cc: Peter Williams Cc: Nick Piggin Cc: Christoph Lameter Cc: "Siddha, Suresh B" Cc: "Chen, Kenneth W" Acked-by: Ingo Molnar Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/topology.h | 1 + include/asm-ia64/topology.h | 1 + include/asm-powerpc/topology.h | 1 + include/asm-x86_64/topology.h | 1 + include/linux/sched.h | 1 + include/linux/topology.h | 3 ++- kernel/sched.c | 9 +++++++++ 7 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index 978d0959613..ac58580ad66 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -89,6 +89,7 @@ static inline int node_to_first_cpu(int node) .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h index a6e38565ab4..22ed6749557 100644 --- a/include/asm-ia64/topology.h +++ b/include/asm-ia64/topology.h @@ -101,6 +101,7 @@ void build_cpu_to_node_map(void); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 64, \ diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h index 50c014007de..6610495f5f1 100644 --- a/include/asm-powerpc/topology.h +++ b/include/asm-powerpc/topology.h @@ -66,6 +66,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) | SD_BALANCE_EXEC \ | SD_BALANCE_NEWIDLE \ | SD_WAKE_IDLE \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 5c8f49280db..2facec5914d 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -47,6 +47,7 @@ extern int __node_distance(int, int); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 1208feab46e..ea92e5c8908 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -648,6 +648,7 @@ enum idle_type #define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ #define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ #define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ +#define SD_SERIALIZE 1024 /* Only a single load balancing instance */ #define BALANCE_FOR_MC_POWER \ (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) diff --git a/include/linux/topology.h b/include/linux/topology.h index b93bb6cc6cc..6c5a6e6e813 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -194,7 +194,8 @@ .wake_idx = 0, /* unused */ \ .forkexec_idx = 0, /* unused */ \ .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE, \ + .flags = SD_LOAD_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 64, \ .nr_balance_failed = 0, \ diff --git a/kernel/sched.c b/kernel/sched.c index 0a4a26b21f6..2b2b780939c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2880,6 +2880,7 @@ static void update_load(struct rq *this_rq) * * Balancing parameters are set up in arch_init_sched_domains. */ +static DEFINE_SPINLOCK(balancing); static void run_rebalance_domains(struct softirq_action *h) { @@ -2909,6 +2910,11 @@ static void run_rebalance_domains(struct softirq_action *h) if (unlikely(!interval)) interval = 1; + if (sd->flags & SD_SERIALIZE) { + if (!spin_trylock(&balancing)) + goto out; + } + if (time_after_eq(jiffies, sd->last_balance + interval)) { if (load_balance(this_cpu, this_rq, sd, idle)) { /* @@ -2920,6 +2926,9 @@ static void run_rebalance_domains(struct softirq_action *h) } sd->last_balance = jiffies; } + if (sd->flags & SD_SERIALIZE) + spin_unlock(&balancing); +out: if (time_after(next_balance, sd->last_balance + interval)) next_balance = sd->last_balance + interval; } -- cgit v1.2.3-70-g09d2 From 783609c6cb4eaa23f2ac5c968a44483584ec133f Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Sun, 10 Dec 2006 02:20:33 -0800 Subject: [PATCH] sched: decrease number of load balances Currently at a particular domain, each cpu in the sched group will do a load balance at the frequency of balance_interval. More the cores and threads, more the cpus will be in each sched group at SMP and NUMA domain. And we endup spending quite a bit of time doing load balancing in those domains. Fix this by making only one cpu(first idle cpu or first cpu in the group if all the cpus are busy) in the sched group do the load balance at that particular sched domain and this load will slowly percolate down to the other cpus with in that group(when they do load balancing at lower domains). Signed-off-by: Suresh Siddha Cc: Christoph Lameter Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + kernel/sched.c | 59 ++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 48 insertions(+), 12 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index ea92e5c8908..72d6927d29e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -707,6 +707,7 @@ struct sched_domain { unsigned long lb_hot_gained[MAX_IDLE_TYPES]; unsigned long lb_nobusyg[MAX_IDLE_TYPES]; unsigned long lb_nobusyq[MAX_IDLE_TYPES]; + unsigned long lb_stopbalance[MAX_IDLE_TYPES]; /* Active load balancing */ unsigned long alb_cnt; diff --git a/kernel/sched.c b/kernel/sched.c index 15ce772a471..4e453431c61 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -428,7 +428,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) * bump this up when changing the output format or the meaning of an existing * format, so that tools can adapt (or abort) */ -#define SCHEDSTAT_VERSION 12 +#define SCHEDSTAT_VERSION 13 static int show_schedstat(struct seq_file *seq, void *v) { @@ -466,7 +466,7 @@ static int show_schedstat(struct seq_file *seq, void *v) seq_printf(seq, "domain%d %s", dcnt++, mask_str); for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; itype++) { - seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", + seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu", sd->lb_cnt[itype], sd->lb_balanced[itype], sd->lb_failed[itype], @@ -474,7 +474,8 @@ static int show_schedstat(struct seq_file *seq, void *v) sd->lb_gained[itype], sd->lb_hot_gained[itype], sd->lb_nobusyq[itype], - sd->lb_nobusyg[itype]); + sd->lb_nobusyg[itype], + sd->lb_stopbalance[itype]); } seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", sd->alb_cnt, sd->alb_failed, sd->alb_pushed, @@ -2249,7 +2250,7 @@ out: static struct sched_group * find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long *imbalance, enum idle_type idle, int *sd_idle, - cpumask_t *cpus) + cpumask_t *cpus, int *balance) { struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; unsigned long max_load, avg_load, total_load, this_load, total_pwr; @@ -2278,10 +2279,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, unsigned long load, group_capacity; int local_group; int i; + unsigned int balance_cpu = -1, first_idle_cpu = 0; unsigned long sum_nr_running, sum_weighted_load; local_group = cpu_isset(this_cpu, group->cpumask); + if (local_group) + balance_cpu = first_cpu(group->cpumask); + /* Tally up the load of all CPUs in the group */ sum_weighted_load = sum_nr_running = avg_load = 0; @@ -2297,9 +2302,14 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, *sd_idle = 0; /* Bias balancing toward cpus of our domain */ - if (local_group) + if (local_group) { + if (idle_cpu(i) && !first_idle_cpu) { + first_idle_cpu = 1; + balance_cpu = i; + } + load = target_load(i, load_idx); - else + } else load = source_load(i, load_idx); avg_load += load; @@ -2307,6 +2317,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, sum_weighted_load += rq->raw_weighted_load; } + /* + * First idle cpu or the first cpu(busiest) in this sched group + * is eligible for doing load balancing at this and above + * domains. + */ + if (local_group && balance_cpu != this_cpu && balance) { + *balance = 0; + goto ret; + } + total_load += avg_load; total_pwr += group->cpu_power; @@ -2498,8 +2518,8 @@ out_balanced: *imbalance = min_load_per_task; return group_min; } -ret: #endif +ret: *imbalance = 0; return NULL; } @@ -2550,7 +2570,8 @@ static inline unsigned long minus_1_or_zero(unsigned long n) * tasks if there is an imbalance. */ static int load_balance(int this_cpu, struct rq *this_rq, - struct sched_domain *sd, enum idle_type idle) + struct sched_domain *sd, enum idle_type idle, + int *balance) { int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; @@ -2573,7 +2594,13 @@ static int load_balance(int this_cpu, struct rq *this_rq, redo: group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, - &cpus); + &cpus, balance); + + if (*balance == 0) { + schedstat_inc(sd, lb_stopbalance[idle]); + goto out_balanced; + } + if (!group) { schedstat_inc(sd, lb_nobusyg[idle]); goto out_balanced; @@ -2715,7 +2742,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); redo: group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE, - &sd_idle, &cpus); + &sd_idle, &cpus, NULL); if (!group) { schedstat_inc(sd, lb_nobusyg[NEWLY_IDLE]); goto out_balanced; @@ -2885,7 +2912,7 @@ static DEFINE_SPINLOCK(balancing); static void run_rebalance_domains(struct softirq_action *h) { - int this_cpu = smp_processor_id(); + int this_cpu = smp_processor_id(), balance = 1; struct rq *this_rq = cpu_rq(this_cpu); unsigned long interval; struct sched_domain *sd; @@ -2917,7 +2944,7 @@ static void run_rebalance_domains(struct softirq_action *h) } if (time_after_eq(jiffies, sd->last_balance + interval)) { - if (load_balance(this_cpu, this_rq, sd, idle)) { + if (load_balance(this_cpu, this_rq, sd, idle, &balance)) { /* * We've pulled tasks over so either we're no * longer idle, or one of our SMT siblings is @@ -2932,6 +2959,14 @@ static void run_rebalance_domains(struct softirq_action *h) out: if (time_after(next_balance, sd->last_balance + interval)) next_balance = sd->last_balance + interval; + + /* + * Stop the load balance at this level. There is another + * CPU in our sched group which is doing load balancing more + * actively. + */ + if (!balance) + break; } this_rq->next_balance = next_balance; } -- cgit v1.2.3-70-g09d2 From 06066714f6016cffcb249f6ab21b7919de1bc859 Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Sun, 10 Dec 2006 02:20:35 -0800 Subject: [PATCH] sched: remove lb_stopbalance counter Remove scheduler stats lb_stopbalance counter. This counter can be calculated by: lb_balanced - lb_nobusyg - lb_nobusyq. There is no need to create gazillion counters while we can derive the value. Signed-off-by: Ken Chen Signed-off-by: Suresh Siddha Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/sched.c | 11 ++++------- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 72d6927d29e..ea92e5c8908 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -707,7 +707,6 @@ struct sched_domain { unsigned long lb_hot_gained[MAX_IDLE_TYPES]; unsigned long lb_nobusyg[MAX_IDLE_TYPES]; unsigned long lb_nobusyq[MAX_IDLE_TYPES]; - unsigned long lb_stopbalance[MAX_IDLE_TYPES]; /* Active load balancing */ unsigned long alb_cnt; diff --git a/kernel/sched.c b/kernel/sched.c index 4e453431c61..66e44b5b53d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -428,7 +428,7 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) * bump this up when changing the output format or the meaning of an existing * format, so that tools can adapt (or abort) */ -#define SCHEDSTAT_VERSION 13 +#define SCHEDSTAT_VERSION 14 static int show_schedstat(struct seq_file *seq, void *v) { @@ -466,7 +466,7 @@ static int show_schedstat(struct seq_file *seq, void *v) seq_printf(seq, "domain%d %s", dcnt++, mask_str); for (itype = SCHED_IDLE; itype < MAX_IDLE_TYPES; itype++) { - seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu", + seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", sd->lb_cnt[itype], sd->lb_balanced[itype], sd->lb_failed[itype], @@ -474,8 +474,7 @@ static int show_schedstat(struct seq_file *seq, void *v) sd->lb_gained[itype], sd->lb_hot_gained[itype], sd->lb_nobusyq[itype], - sd->lb_nobusyg[itype], - sd->lb_stopbalance[itype]); + sd->lb_nobusyg[itype]); } seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", sd->alb_cnt, sd->alb_failed, sd->alb_pushed, @@ -2596,10 +2595,8 @@ redo: group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, &cpus, balance); - if (*balance == 0) { - schedstat_inc(sd, lb_stopbalance[idle]); + if (*balance == 0) goto out_balanced; - } if (!group) { schedstat_inc(sd, lb_nobusyg[idle]); -- cgit v1.2.3-70-g09d2 From 8a102eed9c4e1d21bad07a8fd97bd4fbf125d966 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 13 Dec 2006 00:34:30 -0800 Subject: [PATCH] PM: Fix SMP races in the freezer Currently, to tell a task that it should go to the refrigerator, we set the PF_FREEZE flag for it and send a fake signal to it. Unfortunately there are two SMP-related problems with this approach. First, a task running on another CPU may be updating its flags while the freezer attempts to set PF_FREEZE for it and this may leave the task's flags in an inconsistent state. Second, there is a potential race between freeze_process() and refrigerator() in which freeze_process() running on one CPU is reading a task's PF_FREEZE flag while refrigerator() running on another CPU has just set PF_FROZEN for the same task and attempts to reset PF_FREEZE for it. If the refrigerator wins the race, freeze_process() will state that PF_FREEZE hasn't been set for the task and will set it unnecessarily, so the task will go to the refrigerator once again after it's been thawed. To solve first of these problems we need to stop using PF_FREEZE to tell tasks that they should go to the refrigerator. Instead, we can introduce a special TIF_*** flag and use it for this purpose, since it is allowed to change the other tasks' TIF_*** flags and there are special calls for it. To avoid the freeze_process()-refrigerator() race we can make freeze_process() to always check the task's PF_FROZEN flag after it's read its "freeze" flag. We should also make sure that refrigerator() will always reset the task's "freeze" flag after it's set PF_FROZEN for it. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Russell King Cc: David Howells Cc: Andi Kleen Cc: "Luck, Tony" Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-arm/thread_info.h | 2 ++ include/asm-frv/thread_info.h | 2 ++ include/asm-i386/thread_info.h | 2 ++ include/asm-ia64/thread_info.h | 2 ++ include/asm-powerpc/thread_info.h | 2 ++ include/asm-sh/thread_info.h | 2 ++ include/asm-x86_64/thread_info.h | 2 ++ include/linux/freezer.h | 11 ++++++----- include/linux/sched.h | 1 - kernel/power/process.c | 17 ++++++++++------- 10 files changed, 30 insertions(+), 13 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/asm-arm/thread_info.h b/include/asm-arm/thread_info.h index d9b8bddc873..5014794f9eb 100644 --- a/include/asm-arm/thread_info.h +++ b/include/asm-arm/thread_info.h @@ -147,6 +147,7 @@ extern void iwmmxt_task_switch(struct thread_info *); #define TIF_POLLING_NRFLAG 16 #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 +#define TIF_FREEZE 19 #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -154,6 +155,7 @@ extern void iwmmxt_task_switch(struct thread_info *); #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) +#define _TIF_FREEZE (1 << TIF_FREEZE) /* * Change these and you break ASM code in entry-common.S diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h index d66c48e6ef1..d881f518e6a 100644 --- a/include/asm-frv/thread_info.h +++ b/include/asm-frv/thread_info.h @@ -116,6 +116,7 @@ register struct thread_info *__current_thread_info asm("gr15"); #define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 /* OOM killer killed process */ +#define TIF_FREEZE 18 /* freezing for suspend */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -125,6 +126,7 @@ register struct thread_info *__current_thread_info asm("gr15"); #define _TIF_IRET (1 << TIF_IRET) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_FREEZE (1 << TIF_FREEZE) #define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ #define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */ diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index 46d32ad9208..4b187bb377b 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -134,6 +134,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 16 #define TIF_DEBUG 17 /* uses debug registers */ #define TIF_IO_BITMAP 18 /* uses I/O bitmap */ +#define TIF_FREEZE 19 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1<flags & PF_FREEZE; + return test_tsk_thread_flag(p, TIF_FREEZE); } /* * Request that a process be frozen - * FIXME: SMP problem. We may not modify other process' flags! */ static inline void freeze(struct task_struct *p) { - p->flags |= PF_FREEZE; + set_tsk_thread_flag(p, TIF_FREEZE); } /* @@ -33,7 +32,7 @@ static inline void freeze(struct task_struct *p) */ static inline void do_not_freeze(struct task_struct *p) { - p->flags &= ~PF_FREEZE; + clear_tsk_thread_flag(p, TIF_FREEZE); } /* @@ -54,7 +53,9 @@ static inline int thaw_process(struct task_struct *p) */ static inline void frozen_process(struct task_struct *p) { - p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN; + p->flags |= PF_FROZEN; + wmb(); + clear_tsk_thread_flag(p, TIF_FREEZE); } extern void refrigerator(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index ea92e5c8908..44637353519 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1144,7 +1144,6 @@ static inline void put_task_struct(struct task_struct *t) #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_FLUSHER 0x00001000 /* responsible for disk writeback */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_FREEZE 0x00004000 /* this task is being frozen for suspend now */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ diff --git a/kernel/power/process.c b/kernel/power/process.c index b9a32860bef..6d566bf7085 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -60,13 +60,16 @@ static inline void freeze_process(struct task_struct *p) unsigned long flags; if (!freezing(p)) { - if (p->state == TASK_STOPPED) - force_sig_specific(SIGSTOP, p); - - freeze(p); - spin_lock_irqsave(&p->sighand->siglock, flags); - signal_wake_up(p, p->state == TASK_STOPPED); - spin_unlock_irqrestore(&p->sighand->siglock, flags); + rmb(); + if (!frozen(p)) { + if (p->state == TASK_STOPPED) + force_sig_specific(SIGSTOP, p); + + freeze(p); + spin_lock_irqsave(&p->sighand->siglock, flags); + signal_wake_up(p, p->state == TASK_STOPPED); + spin_unlock_irqrestore(&p->sighand->siglock, flags); + } } } -- cgit v1.2.3-70-g09d2