From 3ca68df6ee61e1a2034f3307b9edb9b3d87e5ca1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 20:11:25 -0400 Subject: [GFS2] split gfs2_dinode into on-disk and host variants The latter is used as part of gfs2-private part of struct inode. It actually stores a lot of fields differently; for now the declaration is just cloned, inode field is swtiched and changes propagated. Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- include/linux/gfs2_ondisk.h | 48 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index a7ae7c177ca..f334b4bd291 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -270,6 +270,48 @@ struct gfs2_dinode { __u8 di_reserved[56]; }; +struct gfs2_dinode_host { + struct gfs2_meta_header di_header; + + struct gfs2_inum di_num; + + __be32 di_mode; /* mode of file */ + __be32 di_uid; /* owner's user id */ + __be32 di_gid; /* owner's group id */ + __be32 di_nlink; /* number of links to this file */ + __be64 di_size; /* number of bytes in file */ + __be64 di_blocks; /* number of blocks in file */ + __be64 di_atime; /* time last accessed */ + __be64 di_mtime; /* time last modified */ + __be64 di_ctime; /* time last changed */ + __be32 di_major; /* device major number */ + __be32 di_minor; /* device minor number */ + + /* This section varies from gfs1. Padding added to align with + * remainder of dinode + */ + __be64 di_goal_meta; /* rgrp to alloc from next */ + __be64 di_goal_data; /* data block goal */ + __be64 di_generation; /* generation number for NFS */ + + __be32 di_flags; /* GFS2_DIF_... */ + __be32 di_payload_format; /* GFS2_FORMAT_... */ + __u16 __pad1; /* Was ditype in gfs1 */ + __be16 di_height; /* height of metadata */ + __u32 __pad2; /* Unused incarnation number from gfs1 */ + + /* These only apply to directories */ + __u16 __pad3; /* Padding */ + __be16 di_depth; /* Number of bits in the table */ + __be32 di_entries; /* The number of entries in the directory */ + + struct gfs2_inum __pad4; /* Unused even in current gfs1 */ + + __be64 di_eattr; /* extended attribute block number */ + + __u8 di_reserved[56]; +}; + /* * directory structure - many of these per directory file */ @@ -422,8 +464,8 @@ extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf); extern void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf); extern void gfs2_quota_in(struct gfs2_quota *qu, const void *buf); extern void gfs2_quota_out(const struct gfs2_quota *qu, void *buf); -extern void gfs2_dinode_in(struct gfs2_dinode *di, const void *buf); -extern void gfs2_dinode_out(const struct gfs2_dinode *di, void *buf); +extern void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf); +extern void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf); extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf); extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf); extern void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf); @@ -436,7 +478,7 @@ extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf); /* Printing functions */ extern void gfs2_rindex_print(const struct gfs2_rindex *ri); -extern void gfs2_dinode_print(const struct gfs2_dinode *di); +extern void gfs2_dinode_print(const struct gfs2_dinode_host *di); #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From 5c6edb576f3800723bb65dbfaff82517089e32d0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 20:33:01 -0400 Subject: [GFS2] gfs2_dinode_host fields are host-endian Annotated scalar fields, dropped unused ones. Note that it's not at all obvious that we want to convert all of them to host-endian... Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 2 +- include/linux/gfs2_ondisk.h | 47 +++++++++++++++++++-------------------------- 2 files changed, 21 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 118dc693d11..1c876e0fb44 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -229,7 +229,7 @@ struct gfs2_inode { unsigned long i_flags; /* GIF_... */ u64 i_vn; - struct gfs2_dinode i_di; /* To be replaced by ref to block */ + struct gfs2_dinode_host i_di; /* To be replaced by ref to block */ struct gfs2_glock *i_gl; /* Move into i_gh? */ struct gfs2_holder i_iopen_gh; diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index f334b4bd291..0e67a89a969 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -275,41 +275,34 @@ struct gfs2_dinode_host { struct gfs2_inum di_num; - __be32 di_mode; /* mode of file */ - __be32 di_uid; /* owner's user id */ - __be32 di_gid; /* owner's group id */ - __be32 di_nlink; /* number of links to this file */ - __be64 di_size; /* number of bytes in file */ - __be64 di_blocks; /* number of blocks in file */ - __be64 di_atime; /* time last accessed */ - __be64 di_mtime; /* time last modified */ - __be64 di_ctime; /* time last changed */ - __be32 di_major; /* device major number */ - __be32 di_minor; /* device minor number */ + __u32 di_mode; /* mode of file */ + __u32 di_uid; /* owner's user id */ + __u32 di_gid; /* owner's group id */ + __u32 di_nlink; /* number of links to this file */ + __u64 di_size; /* number of bytes in file */ + __u64 di_blocks; /* number of blocks in file */ + __u64 di_atime; /* time last accessed */ + __u64 di_mtime; /* time last modified */ + __u64 di_ctime; /* time last changed */ + __u32 di_major; /* device major number */ + __u32 di_minor; /* device minor number */ /* This section varies from gfs1. Padding added to align with * remainder of dinode */ - __be64 di_goal_meta; /* rgrp to alloc from next */ - __be64 di_goal_data; /* data block goal */ - __be64 di_generation; /* generation number for NFS */ + __u64 di_goal_meta; /* rgrp to alloc from next */ + __u64 di_goal_data; /* data block goal */ + __u64 di_generation; /* generation number for NFS */ - __be32 di_flags; /* GFS2_DIF_... */ - __be32 di_payload_format; /* GFS2_FORMAT_... */ - __u16 __pad1; /* Was ditype in gfs1 */ - __be16 di_height; /* height of metadata */ - __u32 __pad2; /* Unused incarnation number from gfs1 */ + __u32 di_flags; /* GFS2_DIF_... */ + __u32 di_payload_format; /* GFS2_FORMAT_... */ + __u16 di_height; /* height of metadata */ /* These only apply to directories */ - __u16 __pad3; /* Padding */ - __be16 di_depth; /* Number of bits in the table */ - __be32 di_entries; /* The number of entries in the directory */ - - struct gfs2_inum __pad4; /* Unused even in current gfs1 */ + __u16 di_depth; /* Number of bits in the table */ + __u32 di_entries; /* The number of entries in the directory */ - __be64 di_eattr; /* extended attribute block number */ - - __u8 di_reserved[56]; + __u64 di_eattr; /* extended attribute block number */ }; /* -- cgit v1.2.3-70-g09d2 From f50dfaf78c01df3cc2d8819f07d6661915567bae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 20:45:02 -0400 Subject: [GFS2] split gfs2_sb Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 2 +- fs/gfs2/ondisk.c | 2 +- fs/gfs2/super.c | 2 +- fs/gfs2/super.h | 2 +- include/linux/gfs2_ondisk.h | 22 +++++++++++++++++++++- 5 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 1c876e0fb44..bd596ba74e5 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -450,7 +450,7 @@ struct gfs2_sbd { struct super_block *sd_vfs_meta; struct kobject sd_kobj; unsigned long sd_flags; /* SDF_... */ - struct gfs2_sb sd_sb; + struct gfs2_sb_host sd_sb; /* Constants computed on mount */ diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 52cb9a2dc05..5b32f1a3579 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -79,7 +79,7 @@ static void gfs2_meta_header_print(const struct gfs2_meta_header *mh) pv(mh, mh_format, "%u"); } -void gfs2_sb_in(struct gfs2_sb *sb, const void *buf) +void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) { const struct gfs2_sb *str = buf; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6a78b1b32e2..52aa3221fc9 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -97,7 +97,7 @@ void gfs2_tune_init(struct gfs2_tune *gt) * changed. */ -int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent) +int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) { unsigned int x; diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 5bb443ae0f5..ac95064c1e5 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -14,7 +14,7 @@ void gfs2_tune_init(struct gfs2_tune *gt); -int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent); +int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); struct page *gfs2_read_super(struct super_block *sb, sector_t sector); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 0e67a89a969..b7bdfef82e4 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -128,6 +128,26 @@ struct gfs2_sb { /* In gfs1, quota and license dinodes followed */ }; +struct gfs2_sb_host { + struct gfs2_meta_header sb_header; + + __be32 sb_fs_format; + __be32 sb_multihost_format; + __u32 __pad0; /* Was superblock flags in gfs1 */ + + __be32 sb_bsize; + __be32 sb_bsize_shift; + __u32 __pad1; /* Was journal segment size in gfs1 */ + + struct gfs2_inum sb_master_dir; /* Was jindex dinode in gfs1 */ + struct gfs2_inum __pad2; /* Was rindex dinode in gfs1 */ + struct gfs2_inum sb_root_dir; + + char sb_lockproto[GFS2_LOCKNAME_LEN]; + char sb_locktable[GFS2_LOCKNAME_LEN]; + /* In gfs1, quota and license dinodes followed */ +}; + /* * resource index structure */ @@ -450,7 +470,7 @@ struct gfs2_quota_change { extern void gfs2_inum_in(struct gfs2_inum *no, const void *buf); extern void gfs2_inum_out(const struct gfs2_inum *no, void *buf); -extern void gfs2_sb_in(struct gfs2_sb *sb, const void *buf); +extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf); extern void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf); extern void gfs2_rindex_out(const struct gfs2_rindex *ri, void *buf); extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf); -- cgit v1.2.3-70-g09d2 From bc558c87bb7e50c4f728d32684a9f4f4c73ebde3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 21:02:41 -0400 Subject: [GFS2] fields of gfs2_sb_host are host-endian ... and several could be killed, but that's another story. Annotate scalar ones, kill completely unused. Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- include/linux/gfs2_ondisk.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index b7bdfef82e4..a5d36cdc46e 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -131,16 +131,13 @@ struct gfs2_sb { struct gfs2_sb_host { struct gfs2_meta_header sb_header; - __be32 sb_fs_format; - __be32 sb_multihost_format; - __u32 __pad0; /* Was superblock flags in gfs1 */ + __u32 sb_fs_format; + __u32 sb_multihost_format; - __be32 sb_bsize; - __be32 sb_bsize_shift; - __u32 __pad1; /* Was journal segment size in gfs1 */ + __u32 sb_bsize; + __u32 sb_bsize_shift; struct gfs2_inum sb_master_dir; /* Was jindex dinode in gfs1 */ - struct gfs2_inum __pad2; /* Was rindex dinode in gfs1 */ struct gfs2_inum sb_root_dir; char sb_lockproto[GFS2_LOCKNAME_LEN]; -- cgit v1.2.3-70-g09d2 From 68826664d12827d7a732192e2f00ba46fb899414 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 21:07:22 -0400 Subject: [GFS2] split and annotate gfs2_rgrp Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 2 +- fs/gfs2/ondisk.c | 4 ++-- include/linux/gfs2_ondisk.h | 13 +++++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index bd596ba74e5..8ca7a7f3506 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -68,7 +68,7 @@ struct gfs2_rgrpd { struct list_head rd_recent; /* Recently used rgrps */ struct gfs2_glock *rd_gl; /* Glock for this rgrp */ struct gfs2_rindex rd_ri; - struct gfs2_rgrp rd_rg; + struct gfs2_rgrp_host rd_rg; u64 rd_rg_vn; struct gfs2_bitmap *rd_bits; unsigned int rd_bh_count; diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 5b32f1a3579..3b156a15cd8 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -120,7 +120,7 @@ void gfs2_rindex_print(const struct gfs2_rindex *ri) pv(ri, ri_bitbytes, "%u"); } -void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf) +void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf) { const struct gfs2_rgrp *str = buf; @@ -131,7 +131,7 @@ void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf) rg->rg_igeneration = be64_to_cpu(str->rg_igeneration); } -void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf) +void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf) { struct gfs2_rgrp *str = buf; diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index a5d36cdc46e..e4ca6e4176b 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -193,6 +193,15 @@ struct gfs2_rgrp { __u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */ }; +struct gfs2_rgrp_host { + struct gfs2_meta_header rg_header; + + __u32 rg_flags; + __u32 rg_free; + __u32 rg_dinodes; + __u64 rg_igeneration; +}; + /* * quota structure */ @@ -470,8 +479,8 @@ extern void gfs2_inum_out(const struct gfs2_inum *no, void *buf); extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf); extern void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf); extern void gfs2_rindex_out(const struct gfs2_rindex *ri, void *buf); -extern void gfs2_rgrp_in(struct gfs2_rgrp *rg, const void *buf); -extern void gfs2_rgrp_out(const struct gfs2_rgrp *rg, void *buf); +extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf); +extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf); extern void gfs2_quota_in(struct gfs2_quota *qu, const void *buf); extern void gfs2_quota_out(const struct gfs2_quota *qu, void *buf); extern void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf); -- cgit v1.2.3-70-g09d2 From e697264709c86040271cdd7abee781d7adbb7f91 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 21:29:46 -0400 Subject: [GFS2] split and annotate gfs2_inum_range Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 4 ++-- fs/gfs2/ondisk.c | 4 ++-- include/linux/gfs2_ondisk.h | 9 +++++++-- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 191a3df93d6..7eb6b440da6 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -436,7 +436,7 @@ static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino) { struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode); struct buffer_head *bh; - struct gfs2_inum_range ir; + struct gfs2_inum_range_host ir; int error; error = gfs2_trans_begin(sdp, RES_DINODE, 0); @@ -479,7 +479,7 @@ static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino) struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode); struct gfs2_holder gh; struct buffer_head *bh; - struct gfs2_inum_range ir; + struct gfs2_inum_range_host ir; int error; error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 3b156a15cd8..64f5f0c604a 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -263,7 +263,7 @@ void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf) lh->lh_hash = be32_to_cpu(str->lh_hash); } -void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf) +void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) { const struct gfs2_inum_range *str = buf; @@ -271,7 +271,7 @@ void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf) ir->ir_length = be64_to_cpu(str->ir_length); } -void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf) +void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) { struct gfs2_inum_range *str = buf; diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index e4ca6e4176b..c035587d066 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -445,6 +445,11 @@ struct gfs2_inum_range { __be64 ir_length; }; +struct gfs2_inum_range_host { + __u64 ir_start; + __u64 ir_length; +}; + /* * Statfs change * Describes an change to the pool of free and allocated @@ -488,8 +493,8 @@ extern void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf); extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf); extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf); extern void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf); -extern void gfs2_inum_range_in(struct gfs2_inum_range *ir, const void *buf); -extern void gfs2_inum_range_out(const struct gfs2_inum_range *ir, void *buf); +extern void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf); +extern void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf); extern void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf); extern void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf); extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf); -- cgit v1.2.3-70-g09d2 From 551676226163379c217e8ec54bd287eab9b8521e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 21:47:13 -0400 Subject: [GFS2] split and annotate gfs2_log_head Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 2 +- fs/gfs2/incore.h | 2 +- fs/gfs2/lops.c | 4 ++-- fs/gfs2/lops.h | 2 +- fs/gfs2/ondisk.c | 2 +- fs/gfs2/recovery.c | 22 +++++++++++----------- fs/gfs2/recovery.h | 2 +- fs/gfs2/super.c | 4 ++-- include/linux/gfs2_ondisk.h | 12 +++++++++++- 9 files changed, 31 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 41a6b6818a5..5406b193227 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -491,7 +491,7 @@ static void trans_go_xmote_bh(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_log_header head; + struct gfs2_log_header_host head; int error; if (gl->gl_state != LM_ST_UNLOCKED && diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 8ca7a7f3506..e69f3394a2c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -41,7 +41,7 @@ struct gfs2_log_operations { void (*lo_before_commit) (struct gfs2_sbd *sdp); void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); void (*lo_before_scan) (struct gfs2_jdesc *jd, - struct gfs2_log_header *head, int pass); + struct gfs2_log_header_host *head, int pass); int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start, struct gfs2_log_descriptor *ld, __be64 *ptr, int pass); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index ab6d1115f95..8a654cd253d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -182,7 +182,7 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) } static void buf_lo_before_scan(struct gfs2_jdesc *jd, - struct gfs2_log_header *head, int pass) + struct gfs2_log_header_host *head, int pass) { struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); @@ -328,7 +328,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) } static void revoke_lo_before_scan(struct gfs2_jdesc *jd, - struct gfs2_log_header *head, int pass) + struct gfs2_log_header_host *head, int pass) { struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 5839c05ae6b..965bc65c7c6 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -60,7 +60,7 @@ static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) } static inline void lops_before_scan(struct gfs2_jdesc *jd, - struct gfs2_log_header *head, + struct gfs2_log_header_host *head, unsigned int pass) { int x; diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 64f5f0c604a..84b1ebc7569 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -251,7 +251,7 @@ void gfs2_dinode_print(const struct gfs2_dinode_host *di) printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr); } -void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf) +void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf) { const struct gfs2_log_header *str = buf; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 62cd223819b..44781624162 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -132,10 +132,10 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp) */ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, - struct gfs2_log_header *head) + struct gfs2_log_header_host *head) { struct buffer_head *bh; - struct gfs2_log_header lh; + struct gfs2_log_header_host lh; u32 hash; int error; @@ -143,7 +143,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, if (error) return error; - memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header)); + memcpy(&lh, bh->b_data, sizeof(struct gfs2_log_header)); /* XXX */ lh.lh_hash = 0; hash = gfs2_disk_hash((char *)&lh, sizeof(struct gfs2_log_header)); gfs2_log_header_in(&lh, bh->b_data); @@ -174,7 +174,7 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk, */ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, - struct gfs2_log_header *head) + struct gfs2_log_header_host *head) { unsigned int orig_blk = *blk; int error; @@ -205,10 +205,10 @@ static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk, * Returns: errno */ -static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head) +static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { unsigned int blk = head->lh_blkno; - struct gfs2_log_header lh; + struct gfs2_log_header_host lh; int error; for (;;) { @@ -245,9 +245,9 @@ static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header *head) * Returns: errno */ -int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header *head) +int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { - struct gfs2_log_header lh_1, lh_m; + struct gfs2_log_header_host lh_1, lh_m; u32 blk_1, blk_2, blk_m; int error; @@ -320,7 +320,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, length = be32_to_cpu(ld->ld_length); if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) { - struct gfs2_log_header lh; + struct gfs2_log_header_host lh; error = get_log_header(jd, start, &lh); if (!error) { gfs2_replay_incr_blk(sdp, &start); @@ -363,7 +363,7 @@ static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start, * Returns: errno */ -static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header *head) +static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); @@ -425,7 +425,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - struct gfs2_log_header head; + struct gfs2_log_header_host head; struct gfs2_holder j_gh, ji_gh, t_gh; unsigned long t; int ro = 0; diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 961feedf4d8..f7235e61c72 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -26,7 +26,7 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); void gfs2_revoke_clean(struct gfs2_sbd *sdp); int gfs2_find_jhead(struct gfs2_jdesc *jd, - struct gfs2_log_header *head); + struct gfs2_log_header_host *head); int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); void gfs2_check_journals(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 52aa3221fc9..0faf563c83a 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -508,7 +508,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; struct gfs2_holder t_gh; - struct gfs2_log_header head; + struct gfs2_log_header_host head; int error; error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, @@ -873,7 +873,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd; struct lfcc *lfcc; LIST_HEAD(list); - struct gfs2_log_header lh; + struct gfs2_log_header_host lh; int error; error = gfs2_jindex_hold(sdp, &ji_gh); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index c035587d066..fb69a64c70c 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -405,6 +405,16 @@ struct gfs2_log_header { __be32 lh_hash; }; +struct gfs2_log_header_host { + struct gfs2_meta_header lh_header; + + __u64 lh_sequence; /* Sequence number of this transaction */ + __u32 lh_flags; /* GFS2_LOG_HEAD_... */ + __u32 lh_tail; /* Block number of log tail */ + __u32 lh_blkno; + __u32 lh_hash; +}; + /* * Log type descriptor */ @@ -492,7 +502,7 @@ extern void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf); extern void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf); extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf); extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf); -extern void gfs2_log_header_in(struct gfs2_log_header *lh, const void *buf); +extern void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf); extern void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf); extern void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf); extern void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf); -- cgit v1.2.3-70-g09d2 From e928a76f959e89884f6186bb6f846c533847d5df Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 21:57:23 -0400 Subject: [GFS2] split and annotate gfs2_meta_header Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/ondisk.c | 6 +++--- include/linux/gfs2_ondisk.h | 14 ++++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 84b1ebc7569..b5aa7ab97f2 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -54,7 +54,7 @@ static void gfs2_inum_print(const struct gfs2_inum *no) printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr); } -static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf) +static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf) { const struct gfs2_meta_header *str = buf; @@ -63,7 +63,7 @@ static void gfs2_meta_header_in(struct gfs2_meta_header *mh, const void *buf) mh->mh_format = be32_to_cpu(str->mh_format); } -static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf) +static void gfs2_meta_header_out(const struct gfs2_meta_header_host *mh, void *buf) { struct gfs2_meta_header *str = buf; @@ -72,7 +72,7 @@ static void gfs2_meta_header_out(const struct gfs2_meta_header *mh, void *buf) str->mh_format = cpu_to_be32(mh->mh_format); } -static void gfs2_meta_header_print(const struct gfs2_meta_header *mh) +static void gfs2_meta_header_print(const struct gfs2_meta_header_host *mh) { pv(mh, mh_magic, "0x%.8X"); pv(mh, mh_type, "%u"); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index fb69a64c70c..76eb9e1bb77 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -89,6 +89,12 @@ struct gfs2_meta_header { __be32 __pad1; /* Was incarnation number in gfs1 */ }; +struct gfs2_meta_header_host { + __u32 mh_magic; + __u32 mh_type; + __u32 mh_format; +}; + /* * super-block structure * @@ -129,7 +135,7 @@ struct gfs2_sb { }; struct gfs2_sb_host { - struct gfs2_meta_header sb_header; + struct gfs2_meta_header_host sb_header; __u32 sb_fs_format; __u32 sb_multihost_format; @@ -194,7 +200,7 @@ struct gfs2_rgrp { }; struct gfs2_rgrp_host { - struct gfs2_meta_header rg_header; + struct gfs2_meta_header_host rg_header; __u32 rg_flags; __u32 rg_free; @@ -297,7 +303,7 @@ struct gfs2_dinode { }; struct gfs2_dinode_host { - struct gfs2_meta_header di_header; + struct gfs2_meta_header_host di_header; struct gfs2_inum di_num; @@ -406,7 +412,7 @@ struct gfs2_log_header { }; struct gfs2_log_header_host { - struct gfs2_meta_header lh_header; + struct gfs2_meta_header_host lh_header; __u64 lh_sequence; /* Sequence number of this transaction */ __u32 lh_flags; /* GFS2_LOG_HEAD_... */ -- cgit v1.2.3-70-g09d2 From 1e81c4c3e0f55c95b6278a827262b80debd0dc7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 22:51:24 -0400 Subject: [GFS2] split and annotate gfs_rindex Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 2 +- fs/gfs2/ondisk.c | 4 ++-- fs/gfs2/rgrp.c | 2 +- include/linux/gfs2_ondisk.h | 14 +++++++++++--- 4 files changed, 15 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index e69f3394a2c..e4afc2c4d60 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -67,7 +67,7 @@ struct gfs2_rgrpd { struct list_head rd_list_mru; struct list_head rd_recent; /* Recently used rgrps */ struct gfs2_glock *rd_gl; /* Glock for this rgrp */ - struct gfs2_rindex rd_ri; + struct gfs2_rindex_host rd_ri; struct gfs2_rgrp_host rd_rg; u64 rd_rg_vn; struct gfs2_bitmap *rd_bits; diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index b5aa7ab97f2..c4e099d582f 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -97,7 +97,7 @@ void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); } -void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf) +void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf) { const struct gfs2_rindex *str = buf; @@ -109,7 +109,7 @@ void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf) } -void gfs2_rindex_print(const struct gfs2_rindex *ri) +void gfs2_rindex_print(const struct gfs2_rindex_host *ri) { printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr); pv(ri, ri_length, "%u"); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b261385c006..07dfd630505 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -253,7 +253,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) } -static inline int rgrp_contains_block(struct gfs2_rindex *ri, u64 block) +static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block) { u64 first = ri->ri_data0; u64 last = first + ri->ri_data; diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 76eb9e1bb77..7dd5e4c18a6 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -168,6 +168,14 @@ struct gfs2_rindex { __u8 ri_reserved[64]; }; +struct gfs2_rindex_host { + __u64 ri_addr; /* grp block disk address */ + __u64 ri_data0; /* first data location */ + __u32 ri_length; /* length of rgrp header in fs blocks */ + __u32 ri_data; /* num of data blocks in rgrp */ + __u32 ri_bitbytes; /* number of bytes in data bitmaps */ +}; + /* * resource group header structure */ @@ -498,8 +506,8 @@ struct gfs2_quota_change { extern void gfs2_inum_in(struct gfs2_inum *no, const void *buf); extern void gfs2_inum_out(const struct gfs2_inum *no, void *buf); extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf); -extern void gfs2_rindex_in(struct gfs2_rindex *ri, const void *buf); -extern void gfs2_rindex_out(const struct gfs2_rindex *ri, void *buf); +extern void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf); +extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf); extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf); extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf); extern void gfs2_quota_in(struct gfs2_quota *qu, const void *buf); @@ -517,7 +525,7 @@ extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf); /* Printing functions */ -extern void gfs2_rindex_print(const struct gfs2_rindex *ri); +extern void gfs2_rindex_print(const struct gfs2_rindex_host *ri); extern void gfs2_dinode_print(const struct gfs2_dinode_host *di); #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From 629a21e7ecedf779c68dcaa9a186069f57a7c652 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 22:51:24 -0400 Subject: [GFS2] split and annotate gfs2_inum Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 8 ++++---- fs/gfs2/dir.h | 8 ++++---- fs/gfs2/incore.h | 2 +- fs/gfs2/inode.c | 22 +++++++++++----------- fs/gfs2/inode.h | 4 ++-- fs/gfs2/ondisk.c | 6 +++--- fs/gfs2/ops_dentry.c | 2 +- fs/gfs2/ops_export.c | 8 ++++---- fs/gfs2/ops_export.h | 2 +- fs/gfs2/ops_file.c | 2 +- fs/gfs2/ops_fstype.c | 4 ++-- include/linux/gfs2_ondisk.h | 19 ++++++++++++------- 12 files changed, 46 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index e24af28b1a1..d67a3760ca3 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1194,7 +1194,7 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, int *copied) { const struct gfs2_dirent *dent, *dent_next; - struct gfs2_inum inum; + struct gfs2_inum_host inum; u64 off, off_next; unsigned int x, y; int run = 0; @@ -1456,7 +1456,7 @@ out: */ int gfs2_dir_search(struct inode *dir, const struct qstr *name, - struct gfs2_inum *inum, unsigned int *type) + struct gfs2_inum_host *inum, unsigned int *type) { struct buffer_head *bh; struct gfs2_dirent *dent; @@ -1531,7 +1531,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) */ int gfs2_dir_add(struct inode *inode, const struct qstr *name, - const struct gfs2_inum *inum, unsigned type) + const struct gfs2_inum_host *inum, unsigned type) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *bh; @@ -1666,7 +1666,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) */ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, - struct gfs2_inum *inum, unsigned int new_type) + struct gfs2_inum_host *inum, unsigned int new_type) { struct buffer_head *bh; struct gfs2_dirent *dent; diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 371233419b0..b21b33668a5 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -31,17 +31,17 @@ struct gfs2_inum; typedef int (*gfs2_filldir_t) (void *opaque, const char *name, unsigned int length, u64 offset, - struct gfs2_inum *inum, unsigned int type); + struct gfs2_inum_host *inum, unsigned int type); int gfs2_dir_search(struct inode *dir, const struct qstr *filename, - struct gfs2_inum *inum, unsigned int *type); + struct gfs2_inum_host *inum, unsigned int *type); int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inum *inum, unsigned int type); + const struct gfs2_inum_host *inum, unsigned int type); int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque, gfs2_filldir_t filldir); int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, - struct gfs2_inum *new_inum, unsigned int new_type); + struct gfs2_inum_host *new_inum, unsigned int new_type); int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index e4afc2c4d60..20c9b4f0e52 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -224,7 +224,7 @@ enum { struct gfs2_inode { struct inode i_inode; - struct gfs2_inum i_num; + struct gfs2_inum_host i_num; unsigned long i_flags; /* GIF_... */ diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7eb6b440da6..dadd1f35c86 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -112,7 +112,7 @@ void gfs2_inode_attr_out(struct gfs2_inode *ip) static int iget_test(struct inode *inode, void *opaque) { struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_inum *inum = opaque; + struct gfs2_inum_host *inum = opaque; if (ip && ip->i_num.no_addr == inum->no_addr) return 1; @@ -123,19 +123,19 @@ static int iget_test(struct inode *inode, void *opaque) static int iget_set(struct inode *inode, void *opaque) { struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_inum *inum = opaque; + struct gfs2_inum_host *inum = opaque; ip->i_num = *inum; return 0; } -struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum) +struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum) { return ilookup5(sb, (unsigned long)inum->no_formal_ino, iget_test, inum); } -static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum) +static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum) { return iget5_locked(sb, (unsigned long)inum->no_formal_ino, iget_test, iget_set, inum); @@ -150,7 +150,7 @@ static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum) * Returns: A VFS inode, or an error */ -struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned int type) +struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type) { struct inode *inode = gfs2_iget(sb, inum); struct gfs2_inode *ip = GFS2_I(inode); @@ -394,7 +394,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, struct super_block *sb = dir->i_sb; struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_holder d_gh; - struct gfs2_inum inum; + struct gfs2_inum_host inum; unsigned int type; int error = 0; struct inode *inode = NULL; @@ -610,7 +610,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, *gid = current->fsgid; } -static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum *inum, +static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum, u64 *generation) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); @@ -650,7 +650,7 @@ out: */ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - const struct gfs2_inum *inum, unsigned int mode, + const struct gfs2_inum_host *inum, unsigned int mode, unsigned int uid, unsigned int gid, const u64 *generation) { @@ -707,7 +707,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, } static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, - unsigned int mode, const struct gfs2_inum *inum, + unsigned int mode, const struct gfs2_inum_host *inum, const u64 *generation) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); @@ -866,7 +866,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, struct gfs2_inode *dip = ghs->gh_gl->gl_object; struct inode *dir = &dip->i_inode; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_inum inum; + struct gfs2_inum_host inum; int error; u64 generation; @@ -1018,7 +1018,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip) { - struct gfs2_inum inum; + struct gfs2_inum_host inum; unsigned int type; int error; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index f5d86176057..d699b920975 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -27,8 +27,8 @@ static inline int gfs2_is_dir(struct gfs2_inode *ip) void gfs2_inode_attr_in(struct gfs2_inode *ip); void gfs2_inode_attr_out(struct gfs2_inode *ip); -struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum *inum, unsigned type); -struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum *inum); +struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type); +struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum); int gfs2_inode_refresh(struct gfs2_inode *ip); diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index c4e099d582f..32f592f4a3c 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -32,7 +32,7 @@ * first arg: the cpu-order structure */ -void gfs2_inum_in(struct gfs2_inum *no, const void *buf) +void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf) { const struct gfs2_inum *str = buf; @@ -40,7 +40,7 @@ void gfs2_inum_in(struct gfs2_inum *no, const void *buf) no->no_addr = be64_to_cpu(str->no_addr); } -void gfs2_inum_out(const struct gfs2_inum *no, void *buf) +void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf) { struct gfs2_inum *str = buf; @@ -48,7 +48,7 @@ void gfs2_inum_out(const struct gfs2_inum *no, void *buf) str->no_addr = cpu_to_be64(no->no_addr); } -static void gfs2_inum_print(const struct gfs2_inum *no) +static void gfs2_inum_print(const struct gfs2_inum_host *no) { printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino); printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr); diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index 00041b1b802..c36f9e342e6 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c @@ -43,7 +43,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode = dentry->d_inode; struct gfs2_holder d_gh; struct gfs2_inode *ip; - struct gfs2_inum inum; + struct gfs2_inum_host inum; unsigned int type; int error; diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 86127d93bd3..33423a5c329 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -35,7 +35,7 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb, void *context) { struct gfs2_fh_obj fh_obj; - struct gfs2_inum *this, parent; + struct gfs2_inum_host *this, parent; if (fh_type != fh_len) return NULL; @@ -114,12 +114,12 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len, } struct get_name_filldir { - struct gfs2_inum inum; + struct gfs2_inum_host inum; char *name; }; static int get_name_filldir(void *opaque, const char *name, unsigned int length, - u64 offset, struct gfs2_inum *inum, + u64 offset, struct gfs2_inum_host *inum, unsigned int type) { struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; @@ -202,7 +202,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj; - struct gfs2_inum *inum = &fh_obj->this; + struct gfs2_inum_host *inum = &fh_obj->this; struct gfs2_holder i_gh, ri_gh, rgd_gh; struct gfs2_rgrpd *rgd; struct inode *inode; diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h index 09aca5046fb..f925a955b3b 100644 --- a/fs/gfs2/ops_export.h +++ b/fs/gfs2/ops_export.h @@ -15,7 +15,7 @@ extern struct export_operations gfs2_export_ops; struct gfs2_fh_obj { - struct gfs2_inum this; + struct gfs2_inum_host this; __u32 imode; }; diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 3064f133bf3..359965c368b 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -139,7 +139,7 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) */ static int filldir_func(void *opaque, const char *name, unsigned int length, - u64 offset, struct gfs2_inum *inum, + u64 offset, struct gfs2_inum_host *inum, unsigned int type) { struct filldir_reg *fdr = (struct filldir_reg *)opaque; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 882873a6bd6..d14e139d267 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -237,7 +237,7 @@ fail: } static struct inode *gfs2_lookup_root(struct super_block *sb, - struct gfs2_inum *inum) + struct gfs2_inum_host *inum) { return gfs2_inode_lookup(sb, inum, DT_DIR); } @@ -246,7 +246,7 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) { struct super_block *sb = sdp->sd_vfs; struct gfs2_holder sb_gh; - struct gfs2_inum *inum; + struct gfs2_inum_host *inum; struct inode *inode; int error = 0; diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 7dd5e4c18a6..b16df6e8f55 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -54,8 +54,13 @@ struct gfs2_inum { __be64 no_addr; }; -static inline int gfs2_inum_equal(const struct gfs2_inum *ino1, - const struct gfs2_inum *ino2) +struct gfs2_inum_host { + __u64 no_formal_ino; + __u64 no_addr; +}; + +static inline int gfs2_inum_equal(const struct gfs2_inum_host *ino1, + const struct gfs2_inum_host *ino2) { return ino1->no_formal_ino == ino2->no_formal_ino && ino1->no_addr == ino2->no_addr; @@ -143,8 +148,8 @@ struct gfs2_sb_host { __u32 sb_bsize; __u32 sb_bsize_shift; - struct gfs2_inum sb_master_dir; /* Was jindex dinode in gfs1 */ - struct gfs2_inum sb_root_dir; + struct gfs2_inum_host sb_master_dir; /* Was jindex dinode in gfs1 */ + struct gfs2_inum_host sb_root_dir; char sb_lockproto[GFS2_LOCKNAME_LEN]; char sb_locktable[GFS2_LOCKNAME_LEN]; @@ -313,7 +318,7 @@ struct gfs2_dinode { struct gfs2_dinode_host { struct gfs2_meta_header_host di_header; - struct gfs2_inum di_num; + struct gfs2_inum_host di_num; __u32 di_mode; /* mode of file */ __u32 di_uid; /* owner's user id */ @@ -503,8 +508,8 @@ struct gfs2_quota_change { #ifdef __KERNEL__ /* Translation functions */ -extern void gfs2_inum_in(struct gfs2_inum *no, const void *buf); -extern void gfs2_inum_out(const struct gfs2_inum *no, void *buf); +extern void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf); +extern void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf); extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf); extern void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf); extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf); -- cgit v1.2.3-70-g09d2 From b5bc9e8b065dbcd4c675e8c158d6e524f221b8e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 23:31:55 -0400 Subject: [GFS2] split and annotate gfs2_quota Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/ondisk.c | 2 +- fs/gfs2/quota.c | 2 +- include/linux/gfs2_ondisk.h | 9 +++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 32f592f4a3c..5db0737fcdb 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -144,7 +144,7 @@ void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf) memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); } -void gfs2_quota_in(struct gfs2_quota *qu, const void *buf) +void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) { const struct gfs2_quota *str = buf; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a3deae7416c..e3f5b8da484 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -743,7 +743,7 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; - struct gfs2_quota q; + struct gfs2_quota_host q; char buf[sizeof(struct gfs2_quota)]; struct file_ra_state ra_state; int error; diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index b16df6e8f55..431e03b8545 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -232,6 +232,12 @@ struct gfs2_quota { __u8 qu_reserved[64]; }; +struct gfs2_quota_host { + __u64 qu_limit; + __u64 qu_warn; + __u64 qu_value; +}; + /* * dinode structure */ @@ -515,8 +521,7 @@ extern void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf); extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf); extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf); extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf); -extern void gfs2_quota_in(struct gfs2_quota *qu, const void *buf); -extern void gfs2_quota_out(const struct gfs2_quota *qu, void *buf); +extern void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf); extern void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf); extern void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf); extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf); -- cgit v1.2.3-70-g09d2 From bd209cc017f231e8536550bdab1bf5da93c32798 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 23:43:19 -0400 Subject: [GFS2] split and annotate gfs2_statfs_change Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 4 ++-- fs/gfs2/ondisk.c | 4 ++-- fs/gfs2/ops_super.c | 2 +- fs/gfs2/super.c | 22 +++++++++++----------- fs/gfs2/super.h | 4 ++-- include/linux/gfs2_ondisk.h | 10 ++++++++-- 6 files changed, 26 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 20c9b4f0e52..c0a8c3b6a85 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -503,8 +503,8 @@ struct gfs2_sbd { spinlock_t sd_statfs_spin; struct mutex sd_statfs_mutex; - struct gfs2_statfs_change sd_statfs_master; - struct gfs2_statfs_change sd_statfs_local; + struct gfs2_statfs_change_host sd_statfs_master; + struct gfs2_statfs_change_host sd_statfs_local; unsigned long sd_statfs_sync_time; /* Resource group stuff */ diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 5db0737fcdb..84c59b165b6 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -279,7 +279,7 @@ void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf) str->ir_length = cpu_to_be64(ir->ir_length); } -void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf) +void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) { const struct gfs2_statfs_change *str = buf; @@ -288,7 +288,7 @@ void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf) sc->sc_dinodes = be64_to_cpu(str->sc_dinodes); } -void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf) +void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf) { struct gfs2_statfs_change *str = buf; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index b47d9598c04..9c786d1e702 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -215,7 +215,7 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_inode->i_sb; struct gfs2_sbd *sdp = sb->s_fs_info; - struct gfs2_statfs_change sc; + struct gfs2_statfs_change_host sc; int error; if (gfs2_tune_get(sdp, gt_statfs_slow)) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0faf563c83a..0ef83172708 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -587,9 +587,9 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) int gfs2_statfs_init(struct gfs2_sbd *sdp) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); - struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct buffer_head *m_bh, *l_bh; struct gfs2_holder gh; int error; @@ -634,7 +634,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, s64 dinodes) { struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); - struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct buffer_head *l_bh; int error; @@ -660,8 +660,8 @@ int gfs2_statfs_sync(struct gfs2_sbd *sdp) { struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); - struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; - struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct gfs2_holder gh; struct buffer_head *m_bh, *l_bh; int error; @@ -727,10 +727,10 @@ out: * Returns: errno */ -int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) +int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) { - struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; - struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; spin_lock(&sdp->sd_statfs_spin); @@ -760,7 +760,7 @@ int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) */ static int statfs_slow_fill(struct gfs2_rgrpd *rgd, - struct gfs2_statfs_change *sc) + struct gfs2_statfs_change_host *sc) { gfs2_rgrp_verify(rgd); sc->sc_total += rgd->rd_ri.ri_data; @@ -782,7 +782,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd, * Returns: errno */ -int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) +int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc) { struct gfs2_holder ri_gh; struct gfs2_rgrpd *rgd_next; @@ -792,7 +792,7 @@ int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) int done; int error = 0, err; - memset(sc, 0, sizeof(struct gfs2_statfs_change)); + memset(sc, 0, sizeof(struct gfs2_statfs_change_host)); gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); if (!gha) return -ENOMEM; diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index ac95064c1e5..e590b2df11d 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -45,8 +45,8 @@ int gfs2_statfs_init(struct gfs2_sbd *sdp); void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, s64 dinodes); int gfs2_statfs_sync(struct gfs2_sbd *sdp); -int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); -int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); +int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc); +int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc); int gfs2_freeze_fs(struct gfs2_sbd *sdp); void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 431e03b8545..3ce3a47b720 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -497,6 +497,12 @@ struct gfs2_statfs_change { __be64 sc_dinodes; }; +struct gfs2_statfs_change_host { + __u64 sc_total; + __u64 sc_free; + __u64 sc_dinodes; +}; + /* * Quota change * Describes an allocation change for a particular @@ -529,8 +535,8 @@ extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf); extern void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf); extern void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf); extern void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf); -extern void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, const void *buf); -extern void gfs2_statfs_change_out(const struct gfs2_statfs_change *sc, void *buf); +extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf); +extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf); extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf); /* Printing functions */ -- cgit v1.2.3-70-g09d2 From b62f963e1fdf838fed91faec21228d421a834f2d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Oct 2006 23:46:46 -0400 Subject: [GFS2] split and annotate gfs2_quota_change Signed-off-by: Al Viro Signed-off-by: Steven Whitehouse --- fs/gfs2/ondisk.c | 2 +- fs/gfs2/quota.c | 2 +- include/linux/gfs2_ondisk.h | 8 +++++++- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 84c59b165b6..2d1682ded4b 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -297,7 +297,7 @@ void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf) str->sc_dinodes = cpu_to_be64(sc->sc_dinodes); } -void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf) +void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf) { const struct gfs2_quota_change *str = buf; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e3f5b8da484..009d86c0008 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1103,7 +1103,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; y++, slot++) { - struct gfs2_quota_change qc; + struct gfs2_quota_change_host qc; struct gfs2_quota_data *qd; gfs2_quota_change_in(&qc, bh->b_data + diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 3ce3a47b720..10a507dfd83 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -517,6 +517,12 @@ struct gfs2_quota_change { __be32 qc_id; }; +struct gfs2_quota_change_host { + __u64 qc_change; + __u32 qc_flags; /* GFS2_QCF_... */ + __u32 qc_id; +}; + #ifdef __KERNEL__ /* Translation functions */ @@ -537,7 +543,7 @@ extern void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf) extern void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf); extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf); extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf); -extern void gfs2_quota_change_in(struct gfs2_quota_change *qc, const void *buf); +extern void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf); /* Printing functions */ -- cgit v1.2.3-70-g09d2 From 539e5d6b7ae8612c0393fe940d2da5b591318d3d Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Oct 2006 15:07:05 -0500 Subject: [GFS2] Change argument of gfs2_dinode_out Everywhere this was called, a struct gfs2_inode was available, but despite that, it was always called with a struct gfs2_dinode as an argument. By making this change it paves the way to start eliminating fields duplicated between the kernel's struct inode and the struct gfs2_dinode. Signed-off-by: Steven Whitehouse --- fs/gfs2/acl.c | 2 +- fs/gfs2/bmap.c | 12 ++++++------ fs/gfs2/dir.c | 20 ++++++++++---------- fs/gfs2/eattr.c | 14 +++++++------- fs/gfs2/inode.c | 6 +++--- fs/gfs2/ondisk.c | 5 ++++- fs/gfs2/ops_file.c | 2 +- fs/gfs2/ops_inode.c | 10 +++++----- include/linux/gfs2_ondisk.h | 3 ++- 9 files changed, 39 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 5f959b8ce40..906e403b054 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -201,7 +201,7 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode) (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT)); ip->i_di.di_mode = mode; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 51f6356bdcb..8c092ab2b4b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -498,7 +498,7 @@ static int gfs2_block_pointers(struct inode *inode, u64 lblock, int create, error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } set_buffer_new(bh_map); @@ -780,7 +780,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); up_write(&ip->i_rw_mutex); @@ -860,7 +860,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) goto out_end_trans; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); out_end_trans: @@ -970,7 +970,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) ip->i_di.di_size = size; ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); error = 1; @@ -983,7 +983,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); } } @@ -1057,7 +1057,7 @@ static int trunc_end(struct gfs2_inode *ip) ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); out: diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 59dc823c283..0742761e1e0 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -132,7 +132,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, if (ip->i_di.di_size < offset + size) ip->i_di.di_size = offset + size; ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -232,7 +232,7 @@ out: ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); return copied; @@ -907,7 +907,7 @@ static int dir_make_exhash(struct inode *inode) for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; dip->i_di.di_depth = y; - gfs2_dinode_out(&dip->i_di, dibh->b_data); + gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); @@ -1039,7 +1039,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) error = gfs2_meta_inode_buffer(dip, &dibh); if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { dip->i_di.di_blocks++; - gfs2_dinode_out(&dip->i_di, dibh->b_data); + gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); } @@ -1119,7 +1119,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) error = gfs2_meta_inode_buffer(dip, &dibh); if (!gfs2_assert_withdraw(sdp, !error)) { dip->i_di.di_depth++; - gfs2_dinode_out(&dip->i_di, dibh->b_data); + gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); } @@ -1517,7 +1517,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) return error; gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_blocks++; - gfs2_dinode_out(&ip->i_di, bh->b_data); + gfs2_dinode_out(ip, bh->b_data); brelse(bh); return 0; } @@ -1561,7 +1561,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_entries++; ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&ip->i_di, bh->b_data); + gfs2_dinode_out(ip, bh->b_data); brelse(bh); error = 0; break; @@ -1647,7 +1647,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) gfs2_trans_add_bh(dip->i_gl, bh, 1); dip->i_di.di_entries--; dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&dip->i_di, bh->b_data); + gfs2_dinode_out(dip, bh->b_data); brelse(bh); mark_inode_dirty(&dip->i_inode); @@ -1695,7 +1695,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, } dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&dip->i_di, bh->b_data); + gfs2_dinode_out(dip, bh->b_data); brelse(bh); return 0; } @@ -1875,7 +1875,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, goto out_end_trans; gfs2_trans_add_bh(dip->i_gl, dibh, 1); - gfs2_dinode_out(&dip->i_di, dibh->b_data); + gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); out_end_trans: diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 518f0c0786c..9b7bb565b59 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -302,7 +302,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, if (!error) { ip->i_di.di_ctime = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, } ip->i_di.di_ctime = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, } ip->i_di.di_ctime = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); out: gfs2_trans_end(GFS2_SB(&ip->i_inode)); @@ -1132,7 +1132,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) if (!error) { ip->i_di.di_ctime = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1287,7 +1287,7 @@ int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); gfs2_inode_attr_out(ip); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1397,7 +1397,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1446,7 +1446,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index fb969302f18..b861ddba868 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -338,7 +338,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) ip->i_inode.i_nlink = nlink; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); mark_inode_dirty(&ip->i_inode); @@ -792,7 +792,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, goto fail_end_trans; ip->i_di.di_nlink = 1; gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); return 0; @@ -1349,7 +1349,7 @@ __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) gfs2_inode_attr_out(ip); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } return error; diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 2d1682ded4b..2c50fa0261f 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -15,6 +15,8 @@ #include "gfs2.h" #include +#include +#include "incore.h" #define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \ struct->member); @@ -187,8 +189,9 @@ void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf) } -void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf) +void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) { + const struct gfs2_dinode_host *di = &ip->i_di; struct gfs2_dinode *str = buf; gfs2_meta_header_out(&di->di_header, buf); diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 2fc8868e065..7ea41757390 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -336,7 +336,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) goto out_trans_end; gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_flags = new_flags; - gfs2_dinode_out(&ip->i_di, bh->b_data); + gfs2_dinode_out(ip, bh->b_data); brelse(bh); out_trans_end: gfs2_trans_end(sdp); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index ef6e5ed70e9..bd268852c67 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -339,7 +339,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, error = gfs2_meta_inode_buffer(ip, &dibh); if (!gfs2_assert_withdraw(sdp, !error)) { - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size); brelse(dibh); @@ -414,7 +414,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) gfs2_inum_out(&dip->i_num, &dent->de_inum); dent->de_type = cpu_to_be16(DT_DIR); - gfs2_dinode_out(&ip->i_di, di); + gfs2_dinode_out(ip, di); brelse(dibh); } @@ -541,7 +541,7 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, error = gfs2_meta_inode_buffer(ip, &dibh); if (!gfs2_assert_withdraw(sdp, !error)) { - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -762,7 +762,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_end_trans; ip->i_di.di_ctime = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -949,7 +949,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) gfs2_inode_attr_out(ip); gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 10a507dfd83..550effaf0c2 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -535,7 +535,8 @@ extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf); extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf); extern void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf); extern void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf); -extern void gfs2_dinode_out(const struct gfs2_dinode_host *di, void *buf); +struct gfs2_inode; +extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf); extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf); extern void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf); -- cgit v1.2.3-70-g09d2 From 891ea14712da68e282de8583e5fa14f0d3f3731e Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Oct 2006 15:22:10 -0500 Subject: [GFS2] Change argument to gfs2_dinode_in This is a preliminary patch to enable the removal of fields in gfs2_dinode_host which are duplicated in struct inode. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 2 +- fs/gfs2/ondisk.c | 4 ++-- include/linux/gfs2_ondisk.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b861ddba868..9875e9356cd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -229,7 +229,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) return -EIO; } - gfs2_dinode_in(&ip->i_di, dibh->b_data); + gfs2_dinode_in(ip, dibh->b_data); brelse(dibh); diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 2c50fa0261f..edf87567bb2 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -155,8 +155,9 @@ void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) qu->qu_value = be64_to_cpu(str->qu_value); } -void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf) +void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) { + struct gfs2_dinode_host *di = &ip->i_di; const struct gfs2_dinode *str = buf; gfs2_meta_header_in(&di->di_header, buf); @@ -186,7 +187,6 @@ void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf) di->di_entries = be32_to_cpu(str->di_entries); di->di_eattr = be64_to_cpu(str->di_eattr); - } void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 550effaf0c2..08d8240ba53 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -534,8 +534,8 @@ extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf); extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf); extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf); extern void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf); -extern void gfs2_dinode_in(struct gfs2_dinode_host *di, const void *buf); struct gfs2_inode; +extern void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf); extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf); -- cgit v1.2.3-70-g09d2 From ea744d01c6a5acf1f6171b4c6e1658a742063613 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Oct 2006 15:28:00 -0500 Subject: [GFS2] Move gfs2_dinode_in to inode.c gfs2_dinode_in() is only ever called from one place, so move it to that place (in inode.c) and make it static. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 34 ++++++++++++++++++++++++++++++++++ fs/gfs2/ondisk.c | 36 +----------------------------------- include/linux/gfs2_ondisk.h | 2 +- 3 files changed, 36 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 9875e9356cd..b39cfcfe927 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -208,6 +208,40 @@ fail: return ERR_PTR(error); } +static void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) +{ + struct gfs2_dinode_host *di = &ip->i_di; + const struct gfs2_dinode *str = buf; + + gfs2_meta_header_in(&di->di_header, buf); + gfs2_inum_in(&di->di_num, &str->di_num); + + di->di_mode = be32_to_cpu(str->di_mode); + di->di_uid = be32_to_cpu(str->di_uid); + di->di_gid = be32_to_cpu(str->di_gid); + di->di_nlink = be32_to_cpu(str->di_nlink); + di->di_size = be64_to_cpu(str->di_size); + di->di_blocks = be64_to_cpu(str->di_blocks); + di->di_atime = be64_to_cpu(str->di_atime); + di->di_mtime = be64_to_cpu(str->di_mtime); + di->di_ctime = be64_to_cpu(str->di_ctime); + di->di_major = be32_to_cpu(str->di_major); + di->di_minor = be32_to_cpu(str->di_minor); + + di->di_goal_meta = be64_to_cpu(str->di_goal_meta); + di->di_goal_data = be64_to_cpu(str->di_goal_data); + di->di_generation = be64_to_cpu(str->di_generation); + + di->di_flags = be32_to_cpu(str->di_flags); + di->di_payload_format = be32_to_cpu(str->di_payload_format); + di->di_height = be16_to_cpu(str->di_height); + + di->di_depth = be16_to_cpu(str->di_depth); + di->di_entries = be32_to_cpu(str->di_entries); + + di->di_eattr = be64_to_cpu(str->di_eattr); +} + /** * gfs2_inode_refresh - Refresh the incore copy of the dinode * @ip: The GFS2 inode diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index edf87567bb2..062a44f87fe 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -56,7 +56,7 @@ static void gfs2_inum_print(const struct gfs2_inum_host *no) printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr); } -static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf) +void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf) { const struct gfs2_meta_header *str = buf; @@ -155,40 +155,6 @@ void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) qu->qu_value = be64_to_cpu(str->qu_value); } -void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) -{ - struct gfs2_dinode_host *di = &ip->i_di; - const struct gfs2_dinode *str = buf; - - gfs2_meta_header_in(&di->di_header, buf); - gfs2_inum_in(&di->di_num, &str->di_num); - - di->di_mode = be32_to_cpu(str->di_mode); - di->di_uid = be32_to_cpu(str->di_uid); - di->di_gid = be32_to_cpu(str->di_gid); - di->di_nlink = be32_to_cpu(str->di_nlink); - di->di_size = be64_to_cpu(str->di_size); - di->di_blocks = be64_to_cpu(str->di_blocks); - di->di_atime = be64_to_cpu(str->di_atime); - di->di_mtime = be64_to_cpu(str->di_mtime); - di->di_ctime = be64_to_cpu(str->di_ctime); - di->di_major = be32_to_cpu(str->di_major); - di->di_minor = be32_to_cpu(str->di_minor); - - di->di_goal_meta = be64_to_cpu(str->di_goal_meta); - di->di_goal_data = be64_to_cpu(str->di_goal_data); - di->di_generation = be64_to_cpu(str->di_generation); - - di->di_flags = be32_to_cpu(str->di_flags); - di->di_payload_format = be32_to_cpu(str->di_payload_format); - di->di_height = be16_to_cpu(str->di_height); - - di->di_depth = be16_to_cpu(str->di_depth); - di->di_entries = be32_to_cpu(str->di_entries); - - di->di_eattr = be64_to_cpu(str->di_eattr); -} - void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) { const struct gfs2_dinode_host *di = &ip->i_di; diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 08d8240ba53..4fc297a1ef9 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -528,6 +528,7 @@ struct gfs2_quota_change_host { extern void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf); extern void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf); +extern void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf); extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf); extern void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf); extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf); @@ -535,7 +536,6 @@ extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf); extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf); extern void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf); struct gfs2_inode; -extern void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf); extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf); -- cgit v1.2.3-70-g09d2 From 4cc14f0b88bf3e0b508143e091eb5a8dff3e3b9c Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Oct 2006 19:00:24 -0500 Subject: [GFS2] Change argument to gfs2_dinode_print Change argument for gfs2_dinode_print in order to prepare for removal of duplicate fields between struct inode and struct gfs2_dinode_host. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 8 ++++---- fs/gfs2/ondisk.c | 4 +++- fs/gfs2/ops_inode.c | 4 ++-- include/linux/gfs2_ondisk.h | 2 +- 4 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index b39cfcfe927..4c5d286fefd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -269,7 +269,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) { if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); + gfs2_dinode_print(ip); return -EIO; } if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino) @@ -289,7 +289,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip) if (ip->i_di.di_blocks != 1) { if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); + gfs2_dinode_print(ip); return -EIO; } @@ -359,7 +359,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) bigger than the old one, we must have underflowed. */ if (diff < 0 && nlink > ip->i_di.di_nlink) { if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); + gfs2_dinode_print(ip); return -EIO; } @@ -1010,7 +1010,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, if (ip->i_di.di_entries != 2) { if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); + gfs2_dinode_print(ip); return -EIO; } diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 062a44f87fe..77bed440833 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -190,8 +190,10 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) } -void gfs2_dinode_print(const struct gfs2_dinode_host *di) +void gfs2_dinode_print(const struct gfs2_inode *ip) { + const struct gfs2_dinode_host *di = &ip->i_di; + gfs2_meta_header_print(&di->di_header); gfs2_inum_print(&di->di_num); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index bd268852c67..b2c2fe613d7 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -467,7 +467,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) if (ip->i_di.di_entries < 2) { if (gfs2_consist_inode(ip)) - gfs2_dinode_print(&ip->i_di); + gfs2_dinode_print(ip); error = -EIO; goto out_gunlock; } @@ -640,7 +640,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (S_ISDIR(nip->i_di.di_mode)) { if (nip->i_di.di_entries < 2) { if (gfs2_consist_inode(nip)) - gfs2_dinode_print(&nip->i_di); + gfs2_dinode_print(nip); error = -EIO; goto out_gunlock; } diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 4fc297a1ef9..cf4c655d0d5 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -549,7 +549,7 @@ extern void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void * /* Printing functions */ extern void gfs2_rindex_print(const struct gfs2_rindex_host *ri); -extern void gfs2_dinode_print(const struct gfs2_dinode_host *di); +extern void gfs2_dinode_print(const struct gfs2_inode *ip); #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From af339c0241d0dd3b35f9097b4f4999bb22ffe502 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 1 Nov 2006 10:34:15 -0500 Subject: [GFS2] Shrink gfs2_inode (1) - di_header/di_num The metadata header doesn't need to be stored in the incore struct gfs2_inode since its constant, and this patch removes it. Also, there is already a field for the inode's number in the struct gfs2_inode, so we don't need one in struct gfs2_dinode_host as well. This saves 28 bytes of space in the struct gfs2_inode. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 25 +++++++++++-------------- fs/gfs2/ondisk.c | 22 +++++++++------------- include/linux/gfs2_ondisk.h | 5 ----- 3 files changed, 20 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 4c5d286fefd..7ba05fc553a 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -208,13 +208,18 @@ fail: return ERR_PTR(error); } -static void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) +static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) { struct gfs2_dinode_host *di = &ip->i_di; const struct gfs2_dinode *str = buf; - gfs2_meta_header_in(&di->di_header, buf); - gfs2_inum_in(&di->di_num, &str->di_num); + if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(ip); + return -EIO; + } + if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino)) + return -ESTALE; di->di_mode = be32_to_cpu(str->di_mode); di->di_uid = be32_to_cpu(str->di_uid); @@ -240,6 +245,7 @@ static void gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) di->di_entries = be32_to_cpu(str->di_entries); di->di_eattr = be64_to_cpu(str->di_eattr); + return 0; } /** @@ -263,21 +269,12 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) return -EIO; } - gfs2_dinode_in(ip, dibh->b_data); + error = gfs2_dinode_in(ip, dibh->b_data); brelse(dibh); - - if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) { - if (gfs2_consist_inode(ip)) - gfs2_dinode_print(ip); - return -EIO; - } - if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino) - return -ESTALE; - ip->i_vn = ip->i_gl->gl_vn; - return 0; + return error; } int gfs2_dinode_dealloc(struct gfs2_inode *ip) diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 77bed440833..4bc590edb60 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -56,7 +56,7 @@ static void gfs2_inum_print(const struct gfs2_inum_host *no) printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr); } -void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf) +static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf) { const struct gfs2_meta_header *str = buf; @@ -74,13 +74,6 @@ static void gfs2_meta_header_out(const struct gfs2_meta_header_host *mh, void *b str->mh_format = cpu_to_be32(mh->mh_format); } -static void gfs2_meta_header_print(const struct gfs2_meta_header_host *mh) -{ - pv(mh, mh_magic, "0x%.8X"); - pv(mh, mh_type, "%u"); - pv(mh, mh_format, "%u"); -} - void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) { const struct gfs2_sb *str = buf; @@ -160,8 +153,13 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) const struct gfs2_dinode_host *di = &ip->i_di; struct gfs2_dinode *str = buf; - gfs2_meta_header_out(&di->di_header, buf); - gfs2_inum_out(&di->di_num, (char *)&str->di_num); + str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); + str->di_header.__pad0 = 0; + str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); + str->di_header.__pad1 = 0; + + gfs2_inum_out(&ip->i_num, &str->di_num); str->di_mode = cpu_to_be32(di->di_mode); str->di_uid = cpu_to_be32(di->di_uid); @@ -187,15 +185,13 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_entries = cpu_to_be32(di->di_entries); str->di_eattr = cpu_to_be64(di->di_eattr); - } void gfs2_dinode_print(const struct gfs2_inode *ip) { const struct gfs2_dinode_host *di = &ip->i_di; - gfs2_meta_header_print(&di->di_header); - gfs2_inum_print(&di->di_num); + gfs2_inum_print(&ip->i_num); pv(di, di_mode, "0%o"); pv(di, di_uid, "%u"); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index cf4c655d0d5..c0e76fc718c 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -322,10 +322,6 @@ struct gfs2_dinode { }; struct gfs2_dinode_host { - struct gfs2_meta_header_host di_header; - - struct gfs2_inum_host di_num; - __u32 di_mode; /* mode of file */ __u32 di_uid; /* owner's user id */ __u32 di_gid; /* owner's group id */ @@ -528,7 +524,6 @@ struct gfs2_quota_change_host { extern void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf); extern void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf); -extern void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf); extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf); extern void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf); extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf); -- cgit v1.2.3-70-g09d2 From e7f14f4d094ea1a9ce1953375f5bc1500c760c79 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 31 Oct 2006 21:45:08 -0500 Subject: [GFS2] Shrink gfs2_inode (2) - di_major/di_minor This removes the device numbers from this structure by using inode->i_rdev instead. It also cleans up the code in gfs2_mknod. It results in shrinking the gfs2_inode by 8 bytes. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 36 ++++++++++++++++-------------------- fs/gfs2/inode.h | 2 +- fs/gfs2/ondisk.c | 4 ---- fs/gfs2/ops_inode.c | 38 +++++--------------------------------- include/linux/gfs2_ondisk.h | 2 -- 5 files changed, 22 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7ba05fc553a..a9959195654 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -51,17 +51,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip) struct gfs2_dinode_host *di = &ip->i_di; inode->i_ino = ip->i_num.no_addr; - - switch (di->di_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - inode->i_rdev = MKDEV(di->di_major, di->di_minor); - break; - default: - inode->i_rdev = 0; - break; - }; - inode->i_mode = di->di_mode; inode->i_nlink = di->di_nlink; inode->i_uid = di->di_uid; @@ -222,6 +211,15 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) return -ESTALE; di->di_mode = be32_to_cpu(str->di_mode); + ip->i_inode.i_rdev = 0; + switch (di->di_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), + be32_to_cpu(str->di_minor)); + break; + }; + di->di_uid = be32_to_cpu(str->di_uid); di->di_gid = be32_to_cpu(str->di_gid); di->di_nlink = be32_to_cpu(str->di_nlink); @@ -230,8 +228,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) di->di_atime = be64_to_cpu(str->di_atime); di->di_mtime = be64_to_cpu(str->di_mtime); di->di_ctime = be64_to_cpu(str->di_ctime); - di->di_major = be32_to_cpu(str->di_major); - di->di_minor = be32_to_cpu(str->di_minor); di->di_goal_meta = be64_to_cpu(str->di_goal_meta); di->di_goal_data = be64_to_cpu(str->di_goal_data); @@ -270,7 +266,6 @@ int gfs2_inode_refresh(struct gfs2_inode *ip) } error = gfs2_dinode_in(ip, dibh->b_data); - brelse(dibh); ip->i_vn = ip->i_gl->gl_vn; @@ -684,7 +679,7 @@ out: static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, const struct gfs2_inum_host *inum, unsigned int mode, unsigned int uid, unsigned int gid, - const u64 *generation) + const u64 *generation, dev_t dev) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_dinode *di; @@ -705,7 +700,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_size = cpu_to_be64(0); di->di_blocks = cpu_to_be64(1); di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds()); - di->di_major = di->di_minor = cpu_to_be32(0); + di->di_major = cpu_to_be32(MAJOR(dev)); + di->di_minor = cpu_to_be32(MINOR(dev)); di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); di->di_generation = cpu_to_be64(*generation); di->di_flags = cpu_to_be32(0); @@ -740,7 +736,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, unsigned int mode, const struct gfs2_inum_host *inum, - const u64 *generation) + const u64 *generation, dev_t dev) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); unsigned int uid, gid; @@ -761,7 +757,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, if (error) goto out_quota; - init_dinode(dip, gl, inum, mode, uid, gid, generation); + init_dinode(dip, gl, inum, mode, uid, gid, generation, dev); gfs2_quota_change(dip, +1, uid, gid); gfs2_trans_end(sdp); @@ -892,7 +888,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) */ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, - unsigned int mode) + unsigned int mode, dev_t dev) { struct inode *inode; struct gfs2_inode *dip = ghs->gh_gl->gl_object; @@ -950,7 +946,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, goto fail_gunlock; } - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation); + error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev); if (error) goto fail_gunlock2; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index d699b920975..33c9ea68f7e 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -37,7 +37,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff); struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, int is_root, struct nameidata *nd); struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, - unsigned int mode); + unsigned int mode, dev_t dev); int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, struct gfs2_inode *ip); int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 4bc590edb60..60dd943761a 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -170,8 +170,6 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_atime = cpu_to_be64(di->di_atime); str->di_mtime = cpu_to_be64(di->di_mtime); str->di_ctime = cpu_to_be64(di->di_ctime); - str->di_major = cpu_to_be32(di->di_major); - str->di_minor = cpu_to_be32(di->di_minor); str->di_goal_meta = cpu_to_be64(di->di_goal_meta); str->di_goal_data = cpu_to_be64(di->di_goal_data); @@ -202,8 +200,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) printk(KERN_INFO " di_atime = %lld\n", (long long)di->di_atime); printk(KERN_INFO " di_mtime = %lld\n", (long long)di->di_mtime); printk(KERN_INFO " di_ctime = %lld\n", (long long)di->di_ctime); - pv(di, di_major, "%u"); - pv(di, di_minor, "%u"); printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta); printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index b2c2fe613d7..c10b914bf8c 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -59,7 +59,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, gfs2_holder_init(dip->i_gl, 0, 0, ghs); for (;;) { - inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode); + inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0); if (!IS_ERR(inode)) { gfs2_trans_end(sdp); if (dip->i_alloc.al_rgd) @@ -326,7 +326,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, gfs2_holder_init(dip->i_gl, 0, 0, ghs); - inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO); + inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0); if (IS_ERR(inode)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); @@ -379,7 +379,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) gfs2_holder_init(dip->i_gl, 0, 0, ghs); - inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode); + inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0); if (IS_ERR(inode)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); @@ -504,47 +504,19 @@ out: static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { - struct gfs2_inode *dip = GFS2_I(dir), *ip; + struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_sbd *sdp = GFS2_SB(dir); struct gfs2_holder ghs[2]; struct inode *inode; - struct buffer_head *dibh; - u32 major = 0, minor = 0; - int error; - - switch (mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - major = MAJOR(dev); - minor = MINOR(dev); - break; - case S_IFIFO: - case S_IFSOCK: - break; - default: - return -EOPNOTSUPP; - }; gfs2_holder_init(dip->i_gl, 0, 0, ghs); - inode = gfs2_createi(ghs, &dentry->d_name, mode); + inode = gfs2_createi(ghs, &dentry->d_name, mode, dev); if (IS_ERR(inode)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); } - ip = ghs[1].gh_gl->gl_object; - - ip->i_di.di_major = major; - ip->i_di.di_minor = minor; - - error = gfs2_meta_inode_buffer(ip, &dibh); - - if (!gfs2_assert_withdraw(sdp, !error)) { - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - } - gfs2_trans_end(sdp); if (dip->i_alloc.al_rgd) gfs2_inplace_release(dip); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index c0e76fc718c..5bcf895b273 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -331,8 +331,6 @@ struct gfs2_dinode_host { __u64 di_atime; /* time last accessed */ __u64 di_mtime; /* time last modified */ __u64 di_ctime; /* time last changed */ - __u32 di_major; /* device major number */ - __u32 di_minor; /* device minor number */ /* This section varies from gfs1. Padding added to align with * remainder of dinode -- cgit v1.2.3-70-g09d2 From b60623c238b6a819bd04090139704e2cb57a751f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 1 Nov 2006 12:22:46 -0500 Subject: [GFS2] Shrink gfs2_inode (3) - di_mode This removes the duplicate di_mode field in favour of using the inode->i_mode field. This saves 4 bytes. Signed-off-by: Steven Whitehouse --- fs/gfs2/acl.c | 16 ++++++++-------- fs/gfs2/bmap.c | 2 +- fs/gfs2/eaops.c | 2 +- fs/gfs2/eattr.c | 8 ++++---- fs/gfs2/glock.c | 2 +- fs/gfs2/glops.c | 8 ++++---- fs/gfs2/inode.c | 18 +++++++----------- fs/gfs2/inode.h | 2 +- fs/gfs2/ondisk.c | 3 +-- fs/gfs2/ops_address.c | 1 - fs/gfs2/ops_dentry.c | 2 +- fs/gfs2/ops_file.c | 6 +++--- fs/gfs2/ops_inode.c | 17 ++++++++--------- fs/gfs2/ops_super.c | 2 +- include/linux/gfs2_ondisk.h | 1 - 15 files changed, 41 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 906e403b054..87f6304f2f5 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -76,9 +76,9 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) return -EOPNOTSUPP; if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER)) return -EPERM; - if (S_ISLNK(ip->i_di.di_mode)) + if (S_ISLNK(ip->i_inode.i_mode)) return -EOPNOTSUPP; - if (!access && !S_ISDIR(ip->i_di.di_mode)) + if (!access && !S_ISDIR(ip->i_inode.i_mode)) return -EACCES; return 0; @@ -198,8 +198,8 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { gfs2_assert_withdraw(sdp, - (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT)); - ip->i_di.di_mode = mode; + (ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT)); + ip->i_inode.i_mode = mode; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -215,12 +215,12 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct posix_acl *acl = NULL, *clone; struct gfs2_ea_request er; - mode_t mode = ip->i_di.di_mode; + mode_t mode = ip->i_inode.i_mode; int error; if (!sdp->sd_args.ar_posix_acl) return 0; - if (S_ISLNK(ip->i_di.di_mode)) + if (S_ISLNK(ip->i_inode.i_mode)) return 0; memset(&er, 0, sizeof(struct gfs2_ea_request)); @@ -232,7 +232,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) return error; if (!acl) { mode &= ~current->fs->umask; - if (mode != ip->i_di.di_mode) + if (mode != ip->i_inode.i_mode) error = munge_mode(ip, mode); return error; } @@ -244,7 +244,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) posix_acl_release(acl); acl = clone; - if (S_ISDIR(ip->i_di.di_mode)) { + if (S_ISDIR(ip->i_inode.i_mode)) { er.er_name = GFS2_POSIX_ACL_DEFAULT; er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; error = gfs2_system_eaops.eo_set(ip, &er); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8c092ab2b4b..481a0688254 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1109,7 +1109,7 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size) { int error; - if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_di.di_mode))) + if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode))) return -EINVAL; if (size > ip->i_di.di_size) diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index 92c54e9b0dc..cd747c00f67 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c @@ -120,7 +120,7 @@ static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) { if (!(er->er_flags & GFS2_ERF_MODE)) { - er->er_mode = ip->i_di.di_mode; + er->er_mode = ip->i_inode.i_mode; er->er_flags |= GFS2_ERF_MODE; } error = gfs2_acl_validate_set(ip, 1, er, diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 9b7bb565b59..5208fa94aad 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -711,9 +711,9 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (!error) { if (er->er_flags & GFS2_ERF_MODE) { gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), - (ip->i_di.di_mode & S_IFMT) == + (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); - ip->i_di.di_mode = er->er_mode; + ip->i_inode.i_mode = er->er_mode; } ip->i_di.di_ctime = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); @@ -847,8 +847,8 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, if (er->er_flags & GFS2_ERF_MODE) { gfs2_assert_withdraw(GFS2_SB(&ip->i_inode), - (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT)); - ip->i_di.di_mode = er->er_mode; + (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); + ip->i_inode.i_mode = er->er_mode; } ip->i_di.di_ctime = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 78fe0fae23f..44633c46717 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2078,7 +2078,7 @@ static int dump_inode(struct gfs2_inode *ip) printk(KERN_INFO " num = %llu %llu\n", (unsigned long long)ip->i_num.no_formal_ino, (unsigned long long)ip->i_num.no_addr); - printk(KERN_INFO " type = %u\n", IF2DT(ip->i_di.di_mode)); + printk(KERN_INFO " type = %u\n", IF2DT(ip->i_inode.i_mode)); printk(KERN_INFO " i_flags ="); for (x = 0; x < 32; x++) if (test_bit(x, &ip->i_flags)) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 5406b193227..aad45b7a927 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -92,7 +92,7 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) ip = gl->gl_object; inode = &ip->i_inode; - if (!ip || !S_ISREG(ip->i_di.di_mode)) + if (!ip || !S_ISREG(inode->i_mode)) return; if (!test_bit(GIF_PAGED, &ip->i_flags)) @@ -119,7 +119,7 @@ static void gfs2_page_inval(struct gfs2_glock *gl) ip = gl->gl_object; inode = &ip->i_inode; - if (!ip || !S_ISREG(ip->i_di.di_mode)) + if (!ip || !S_ISREG(inode->i_mode)) return; truncate_inode_pages(inode->i_mapping, 0); @@ -142,7 +142,7 @@ static void gfs2_page_wait(struct gfs2_glock *gl) struct address_space *mapping = inode->i_mapping; int error; - if (!S_ISREG(ip->i_di.di_mode)) + if (!S_ISREG(inode->i_mode)) return; error = filemap_fdatawait(mapping); @@ -164,7 +164,7 @@ static void gfs2_page_writeback(struct gfs2_glock *gl) struct inode *inode = &ip->i_inode; struct address_space *mapping = inode->i_mapping; - if (!S_ISREG(ip->i_di.di_mode)) + if (!S_ISREG(inode->i_mode)) return; filemap_fdatawrite(mapping); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a9959195654..de466043c91 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -51,7 +51,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip) struct gfs2_dinode_host *di = &ip->i_di; inode->i_ino = ip->i_num.no_addr; - inode->i_mode = di->di_mode; inode->i_nlink = di->di_nlink; inode->i_uid = di->di_uid; inode->i_gid = di->di_gid; @@ -88,9 +87,6 @@ void gfs2_inode_attr_out(struct gfs2_inode *ip) { struct inode *inode = &ip->i_inode; struct gfs2_dinode_host *di = &ip->i_di; - gfs2_assert_withdraw(GFS2_SB(inode), - (di->di_mode & S_IFMT) == (inode->i_mode & S_IFMT)); - di->di_mode = inode->i_mode; di->di_uid = inode->i_uid; di->di_gid = inode->i_gid; di->di_atime = inode->i_atime.tv_sec; @@ -210,9 +206,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino)) return -ESTALE; - di->di_mode = be32_to_cpu(str->di_mode); + ip->i_inode.i_mode = be32_to_cpu(str->di_mode); ip->i_inode.i_rdev = 0; - switch (di->di_mode & S_IFMT) { + switch (ip->i_inode.i_mode & S_IFMT) { case S_IFBLK: case S_IFCHR: ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major), @@ -620,7 +616,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, unsigned int *uid, unsigned int *gid) { if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && - (dip->i_di.di_mode & S_ISUID) && dip->i_di.di_uid) { + (dip->i_inode.i_mode & S_ISUID) && dip->i_di.di_uid) { if (S_ISDIR(*mode)) *mode |= S_ISUID; else if (dip->i_di.di_uid != current->fsuid) @@ -629,7 +625,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, } else *uid = current->fsuid; - if (dip->i_di.di_mode & S_ISGID) { + if (dip->i_inode.i_mode & S_ISGID) { if (S_ISDIR(*mode)) *mode |= S_ISGID; *gid = dip->i_di.di_gid; @@ -810,7 +806,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, goto fail_quota_locks; } - error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_di.di_mode)); + error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode)); if (error) goto fail_end_trans; @@ -1053,7 +1049,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) return -EPERM; - if ((dip->i_di.di_mode & S_ISVTX) && + if ((dip->i_inode.i_mode & S_ISVTX) && dip->i_di.di_uid != current->fsuid && ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER)) return -EPERM; @@ -1072,7 +1068,7 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, if (!gfs2_inum_equal(&inum, &ip->i_num)) return -ENOENT; - if (IF2DT(ip->i_di.di_mode) != type) { + if (IF2DT(ip->i_inode.i_mode) != type) { gfs2_consist_inode(dip); return -EIO; } diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 33c9ea68f7e..69cbf98509a 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -22,7 +22,7 @@ static inline int gfs2_is_jdata(struct gfs2_inode *ip) static inline int gfs2_is_dir(struct gfs2_inode *ip) { - return S_ISDIR(ip->i_di.di_mode); + return S_ISDIR(ip->i_inode.i_mode); } void gfs2_inode_attr_in(struct gfs2_inode *ip); diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 60dd943761a..6b50a5731a5 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -161,7 +161,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) gfs2_inum_out(&ip->i_num, &str->di_num); - str->di_mode = cpu_to_be32(di->di_mode); + str->di_mode = cpu_to_be32(ip->i_inode.i_mode); str->di_uid = cpu_to_be32(di->di_uid); str->di_gid = cpu_to_be32(di->di_gid); str->di_nlink = cpu_to_be32(di->di_nlink); @@ -191,7 +191,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) gfs2_inum_print(&ip->i_num); - pv(di, di_mode, "0%o"); pv(di, di_uid, "%u"); pv(di, di_gid, "%u"); pv(di, di_nlink, "%u"); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 015640b3f12..45a3d85b1d6 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -498,7 +498,6 @@ static int gfs2_commit_write(struct file *file, struct page *page, di->di_size = cpu_to_be64(inode->i_size); } - di->di_mode = cpu_to_be32(inode->i_mode); di->di_atime = cpu_to_be64(inode->i_atime.tv_sec); di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c index c36f9e342e6..d355899585d 100644 --- a/fs/gfs2/ops_dentry.c +++ b/fs/gfs2/ops_dentry.c @@ -76,7 +76,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) if (!gfs2_inum_equal(&ip->i_num, &inum)) goto invalid_gunlock; - if (IF2DT(ip->i_di.di_mode) != type) { + if (IF2DT(ip->i_inode.i_mode) != type) { gfs2_consist_inode(dip); goto fail_gunlock; } diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 7ea41757390..b52b9db1a2b 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -425,7 +425,7 @@ static int gfs2_open(struct inode *inode, struct file *file) gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; - if (S_ISREG(ip->i_di.di_mode)) { + if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) @@ -515,7 +515,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; - if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; if (sdp->sd_args.ar_localflocks) { @@ -617,7 +617,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; - if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; if (sdp->sd_args.ar_localflocks) diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index c10b914bf8c..cf7a5bae395 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -144,7 +144,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, int alloc_required; int error; - if (S_ISDIR(ip->i_di.di_mode)) + if (S_ISDIR(inode->i_mode)) return -EPERM; gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); @@ -220,7 +220,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, } error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num, - IF2DT(ip->i_di.di_mode)); + IF2DT(inode->i_mode)); if (error) goto out_end_trans; @@ -564,11 +564,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Make sure we aren't trying to move a dirctory into it's subdir */ - if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) { + if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) { dir_rename = 1; - error = gfs2_glock_nq_init(sdp->sd_rename_gl, - LM_ST_EXCLUSIVE, 0, + error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, &r_gh); if (error) goto out; @@ -609,7 +608,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_gunlock; - if (S_ISDIR(nip->i_di.di_mode)) { + if (S_ISDIR(nip->i_inode.i_mode)) { if (nip->i_di.di_entries < 2) { if (gfs2_consist_inode(nip)) gfs2_dinode_print(nip); @@ -646,7 +645,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = -EFBIG; goto out_gunlock; } - if (S_ISDIR(ip->i_di.di_mode) && + if (S_ISDIR(ip->i_inode.i_mode) && ndip->i_di.di_nlink == (u32)-1) { error = -EMLINK; goto out_gunlock; @@ -701,7 +700,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, /* Remove the target file, if it exists */ if (nip) { - if (S_ISDIR(nip->i_di.di_mode)) + if (S_ISDIR(nip->i_inode.i_mode)) error = gfs2_rmdiri(ndip, &ndentry->d_name, nip); else { error = gfs2_dir_del(ndip, &ndentry->d_name); @@ -743,7 +742,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_end_trans; error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num, - IF2DT(ip->i_di.di_mode)); + IF2DT(ip->i_inode.i_mode)); if (error) goto out_end_trans; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 9c786d1e702..86351756922 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -407,7 +407,7 @@ static void gfs2_delete_inode(struct inode *inode) if (error) goto out_uninit; - if (S_ISDIR(ip->i_di.di_mode) && + if (S_ISDIR(inode->i_mode) && (ip->i_di.di_flags & GFS2_DIF_EXHASH)) { error = gfs2_dir_exhash_dealloc(ip); if (error) diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 5bcf895b273..f1ea0b48060 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -322,7 +322,6 @@ struct gfs2_dinode { }; struct gfs2_dinode_host { - __u32 di_mode; /* mode of file */ __u32 di_uid; /* owner's user id */ __u32 di_gid; /* owner's group id */ __u32 di_nlink; /* number of links to this file */ -- cgit v1.2.3-70-g09d2 From 2933f9254a6af33db25270778c998a42029da668 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 1 Nov 2006 13:23:29 -0500 Subject: [GFS2] Shrink gfs2_inode (4) - di_uid/di_gid Remove duplicate di_uid/di_gid fields in favour of using inode->i_uid/inode->i_gid instead. This saves 8 bytes. Signed-off-by: Steven Whitehouse --- fs/gfs2/acl.c | 2 +- fs/gfs2/bmap.c | 2 +- fs/gfs2/eattr.c | 2 +- fs/gfs2/inode.c | 23 +++++++++-------------- fs/gfs2/ondisk.c | 6 ++---- fs/gfs2/ops_address.c | 2 +- fs/gfs2/ops_inode.c | 10 ++++------ fs/gfs2/ops_vm.c | 2 +- fs/gfs2/quota.c | 8 ++++---- fs/gfs2/rgrp.c | 11 +++++------ include/linux/gfs2_ondisk.h | 2 -- 11 files changed, 29 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 87f6304f2f5..3908992b27a 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -74,7 +74,7 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) { if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl) return -EOPNOTSUPP; - if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER)) + if (current->fsuid != ip->i_inode.i_uid && !capable(CAP_FOWNER)) return -EPERM; if (S_ISLNK(ip->i_inode.i_mode)) return -EOPNOTSUPP; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 481a0688254..0c913eecf88 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -819,7 +819,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) if (error) goto out; - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); if (error) goto out_gunlock_q; diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 5208fa94aad..935cc9a5716 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -687,7 +687,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) goto out; - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); if (error) goto out_gunlock_q; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index de466043c91..0de9b22f454 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -52,8 +52,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip) inode->i_ino = ip->i_num.no_addr; inode->i_nlink = di->di_nlink; - inode->i_uid = di->di_uid; - inode->i_gid = di->di_gid; i_size_write(inode, di->di_size); inode->i_atime.tv_sec = di->di_atime; inode->i_mtime.tv_sec = di->di_mtime; @@ -87,8 +85,6 @@ void gfs2_inode_attr_out(struct gfs2_inode *ip) { struct inode *inode = &ip->i_inode; struct gfs2_dinode_host *di = &ip->i_di; - di->di_uid = inode->i_uid; - di->di_gid = inode->i_gid; di->di_atime = inode->i_atime.tv_sec; di->di_mtime = inode->i_mtime.tv_sec; di->di_ctime = inode->i_ctime.tv_sec; @@ -216,8 +212,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) break; }; - di->di_uid = be32_to_cpu(str->di_uid); - di->di_gid = be32_to_cpu(str->di_gid); + ip->i_inode.i_uid = be32_to_cpu(str->di_uid); + ip->i_inode.i_gid = be32_to_cpu(str->di_gid); di->di_nlink = be32_to_cpu(str->di_nlink); di->di_size = be64_to_cpu(str->di_size); di->di_blocks = be64_to_cpu(str->di_blocks); @@ -616,19 +612,19 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, unsigned int *uid, unsigned int *gid) { if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && - (dip->i_inode.i_mode & S_ISUID) && dip->i_di.di_uid) { + (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { if (S_ISDIR(*mode)) *mode |= S_ISUID; - else if (dip->i_di.di_uid != current->fsuid) + else if (dip->i_inode.i_uid != current->fsuid) *mode &= ~07111; - *uid = dip->i_di.di_uid; + *uid = dip->i_inode.i_uid; } else *uid = current->fsuid; if (dip->i_inode.i_mode & S_ISGID) { if (S_ISDIR(*mode)) *mode |= S_ISGID; - *gid = dip->i_di.di_gid; + *gid = dip->i_inode.i_gid; } else *gid = current->fsgid; } @@ -783,8 +779,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, if (alloc_required < 0) goto fail; if (alloc_required) { - error = gfs2_quota_check(dip, dip->i_di.di_uid, - dip->i_di.di_gid); + error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); if (error) goto fail_quota_locks; @@ -1050,8 +1045,8 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, return -EPERM; if ((dip->i_inode.i_mode & S_ISVTX) && - dip->i_di.di_uid != current->fsuid && - ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER)) + dip->i_inode.i_uid != current->fsuid && + ip->i_inode.i_uid != current->fsuid && !capable(CAP_FOWNER)) return -EPERM; if (IS_APPEND(&dip->i_inode)) diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 6b50a5731a5..e224f6a2264 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -162,8 +162,8 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) gfs2_inum_out(&ip->i_num, &str->di_num); str->di_mode = cpu_to_be32(ip->i_inode.i_mode); - str->di_uid = cpu_to_be32(di->di_uid); - str->di_gid = cpu_to_be32(di->di_gid); + str->di_uid = cpu_to_be32(ip->i_inode.i_uid); + str->di_gid = cpu_to_be32(ip->i_inode.i_gid); str->di_nlink = cpu_to_be32(di->di_nlink); str->di_size = cpu_to_be64(di->di_size); str->di_blocks = cpu_to_be64(di->di_blocks); @@ -191,8 +191,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) gfs2_inum_print(&ip->i_num); - pv(di, di_uid, "%u"); - pv(di, di_gid, "%u"); pv(di, di_nlink, "%u"); printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 45a3d85b1d6..38b702a1824 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -386,7 +386,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page, if (error) goto out_alloc_put; - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); if (error) goto out_qunlock; diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index cf7a5bae395..efbcec3311b 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -196,8 +196,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_alloc; - error = gfs2_quota_check(dip, dip->i_di.di_uid, - dip->i_di.di_gid); + error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); if (error) goto out_gunlock_q; @@ -673,8 +672,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_alloc; - error = gfs2_quota_check(ndip, ndip->i_di.di_uid, - ndip->i_di.di_gid); + error = gfs2_quota_check(ndip, ndip->i_inode.i_uid, ndip->i_inode.i_gid); if (error) goto out_gunlock_q; @@ -885,8 +883,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) u32 ouid, ogid, nuid, ngid; int error; - ouid = ip->i_di.di_uid; - ogid = ip->i_di.di_gid; + ouid = inode->i_uid; + ogid = inode->i_gid; nuid = attr->ia_uid; ngid = attr->ia_gid; diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index 5453d2947ab..45a5f11fc39 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c @@ -76,7 +76,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) if (error) goto out; - error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); if (error) goto out_gunlock_q; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 5d00e9b2097..d0db881b55d 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -452,19 +452,19 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - error = qdsb_get(sdp, QUOTA_USER, ip->i_di.di_uid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd); if (error) goto out; al->al_qd_num++; qd++; - error = qdsb_get(sdp, QUOTA_GROUP, ip->i_di.di_gid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd); if (error) goto out; al->al_qd_num++; qd++; - if (uid != NO_QUOTA_CHANGE && uid != ip->i_di.di_uid) { + if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd); if (error) goto out; @@ -472,7 +472,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) qd++; } - if (gid != NO_QUOTA_CHANGE && gid != ip->i_di.di_gid) { + if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd); if (error) goto out; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 07dfd630505..ff0846528d5 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1217,7 +1217,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip) al->al_alloced++; gfs2_statfs_change(sdp, 0, -1, 0); - gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid); spin_lock(&sdp->sd_rindex_spin); rgd->rd_free_clone--; @@ -1261,7 +1261,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip) al->al_alloced++; gfs2_statfs_change(sdp, 0, -1, 0); - gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, +1, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_trans_add_unrevoke(sdp, block); spin_lock(&sdp->sd_rindex_spin); @@ -1337,8 +1337,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) gfs2_trans_add_rg(rgd); gfs2_statfs_change(sdp, 0, +blen, 0); - gfs2_quota_change(ip, -(s64)blen, - ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); } /** @@ -1366,7 +1365,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) gfs2_trans_add_rg(rgd); gfs2_statfs_change(sdp, 0, +blen, 0); - gfs2_quota_change(ip, -(s64)blen, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_meta_wipe(ip, bstart, blen); } @@ -1411,7 +1410,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { gfs2_free_uninit_di(rgd, ip->i_num.no_addr); - gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid); gfs2_meta_wipe(ip, ip->i_num.no_addr, 1); } diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index f1ea0b48060..896c7f81a63 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -322,8 +322,6 @@ struct gfs2_dinode { }; struct gfs2_dinode_host { - __u32 di_uid; /* owner's user id */ - __u32 di_gid; /* owner's group id */ __u32 di_nlink; /* number of links to this file */ __u64 di_size; /* number of bytes in file */ __u64 di_blocks; /* number of blocks in file */ -- cgit v1.2.3-70-g09d2 From 4f56110a00af5fb2e22fbccfcaf944d62cae8fcf Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 1 Nov 2006 14:04:17 -0500 Subject: [GFS2] Shrink gfs2_inode (5) - di_nlink Remove the di_nlink field in favour of inode->i_nlink and update the nlink handling to use the proper macros. This saves 4 bytes. Signed-off-by: Steven Whitehouse --- fs/gfs2/inode.c | 37 ++++++++++++++++++++++++------------- fs/gfs2/ondisk.c | 3 +-- fs/gfs2/ops_inode.c | 12 ++++++------ include/linux/gfs2_ondisk.h | 1 - 4 files changed, 31 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 0de9b22f454..71120398482 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -51,7 +51,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip) struct gfs2_dinode_host *di = &ip->i_di; inode->i_ino = ip->i_num.no_addr; - inode->i_nlink = di->di_nlink; i_size_write(inode, di->di_size); inode->i_atime.tv_sec = di->di_atime; inode->i_mtime.tv_sec = di->di_mtime; @@ -214,7 +213,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_inode.i_uid = be32_to_cpu(str->di_uid); ip->i_inode.i_gid = be32_to_cpu(str->di_gid); - di->di_nlink = be32_to_cpu(str->di_nlink); + /* + * We will need to review setting the nlink count here in the + * light of the forthcoming ro bind mount work. This is a reminder + * to do that. + */ + ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); di->di_size = be64_to_cpu(str->di_size); di->di_blocks = be64_to_cpu(str->di_blocks); di->di_atime = be64_to_cpu(str->di_atime); @@ -336,12 +340,12 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) u32 nlink; int error; - BUG_ON(ip->i_di.di_nlink != ip->i_inode.i_nlink); - nlink = ip->i_di.di_nlink + diff; + BUG_ON(diff != 1 && diff != -1); + nlink = ip->i_inode.i_nlink + diff; /* If we are reducing the nlink count, but the new value ends up being bigger than the old one, we must have underflowed. */ - if (diff < 0 && nlink > ip->i_di.di_nlink) { + if (diff < 0 && nlink > ip->i_inode.i_nlink) { if (gfs2_consist_inode(ip)) gfs2_dinode_print(ip); return -EIO; @@ -351,16 +355,19 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) if (error) return error; - ip->i_di.di_nlink = nlink; + if (diff > 0) + inc_nlink(&ip->i_inode); + else + drop_nlink(&ip->i_inode); + ip->i_di.di_ctime = get_seconds(); - ip->i_inode.i_nlink = nlink; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); mark_inode_dirty(&ip->i_inode); - if (ip->i_di.di_nlink == 0) { + if (ip->i_inode.i_nlink == 0) { struct gfs2_rgrpd *rgd; struct gfs2_holder ri_gh, rg_gh; @@ -375,7 +382,6 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) if (error) goto out_norgrp; - clear_nlink(&ip->i_inode); gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */ gfs2_glock_dq_uninit(&rg_gh); out_norgrp: @@ -586,7 +592,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, return error; /* Don't create entries in an unlinked directory */ - if (!dip->i_di.di_nlink) + if (!dip->i_inode.i_nlink) return -EPERM; error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL); @@ -602,7 +608,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, if (dip->i_di.di_entries == (u32)-1) return -EFBIG; - if (S_ISDIR(mode) && dip->i_di.di_nlink == (u32)-1) + if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) return -EMLINK; return 0; @@ -808,7 +814,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto fail_end_trans; - ip->i_di.di_nlink = 1; + ip->i_inode.i_nlink = 1; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1016,7 +1022,12 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, if (error) return error; - error = gfs2_change_nlink(ip, -2); + /* It looks odd, but it really should be done twice */ + error = gfs2_change_nlink(ip, -1); + if (error) + return error; + + error = gfs2_change_nlink(ip, -1); if (error) return error; diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index e224f6a2264..b4e354b1881 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -164,7 +164,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_mode = cpu_to_be32(ip->i_inode.i_mode); str->di_uid = cpu_to_be32(ip->i_inode.i_uid); str->di_gid = cpu_to_be32(ip->i_inode.i_gid); - str->di_nlink = cpu_to_be32(di->di_nlink); + str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); str->di_size = cpu_to_be64(di->di_size); str->di_blocks = cpu_to_be64(di->di_blocks); str->di_atime = cpu_to_be64(di->di_atime); @@ -191,7 +191,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) gfs2_inum_print(&ip->i_num); - pv(di, di_nlink, "%u"); printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks); printk(KERN_INFO " di_atime = %lld\n", (long long)di->di_atime); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index efbcec3311b..06176dee155 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -169,7 +169,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, } error = -EINVAL; - if (!dip->i_di.di_nlink) + if (!dip->i_inode.i_nlink) goto out_gunlock; error = -EFBIG; if (dip->i_di.di_entries == (u32)-1) @@ -178,10 +178,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out_gunlock; error = -EINVAL; - if (!ip->i_di.di_nlink) + if (!ip->i_inode.i_nlink) goto out_gunlock; error = -EMLINK; - if (ip->i_di.di_nlink == (u32)-1) + if (ip->i_inode.i_nlink == (u32)-1) goto out_gunlock; alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); @@ -386,7 +386,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) ip = ghs[1].gh_gl->gl_object; - ip->i_di.di_nlink = 2; + ip->i_inode.i_nlink = 2; ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); ip->i_di.di_flags |= GFS2_DIF_JDATA; ip->i_di.di_payload_format = GFS2_FORMAT_DE; @@ -636,7 +636,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, }; if (odip != ndip) { - if (!ndip->i_di.di_nlink) { + if (!ndip->i_inode.i_nlink) { error = -EINVAL; goto out_gunlock; } @@ -645,7 +645,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock; } if (S_ISDIR(ip->i_inode.i_mode) && - ndip->i_di.di_nlink == (u32)-1) { + ndip->i_inode.i_nlink == (u32)-1) { error = -EMLINK; goto out_gunlock; } diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 896c7f81a63..c61517b35b2 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -322,7 +322,6 @@ struct gfs2_dinode { }; struct gfs2_dinode_host { - __u32 di_nlink; /* number of links to this file */ __u64 di_size; /* number of bytes in file */ __u64 di_blocks; /* number of blocks in file */ __u64 di_atime; /* time last accessed */ -- cgit v1.2.3-70-g09d2 From 1a7b1eed5802502fd649e04784becd58557fdcf1 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 1 Nov 2006 14:35:17 -0500 Subject: [GFS2] Shrink gfs2_inode (6) - di_atime/di_mtime/di_ctime Remove the di_[amc]time fields and use inode->i_[amc]time fields instead. This saves 24 bytes from the gfs2_inode. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 10 +++++----- fs/gfs2/dir.c | 10 +++++----- fs/gfs2/eattr.c | 9 ++++----- fs/gfs2/inode.c | 44 +++++++++++--------------------------------- fs/gfs2/inode.h | 1 - fs/gfs2/ondisk.c | 10 +++------- fs/gfs2/ops_address.c | 4 ---- fs/gfs2/ops_inode.c | 3 +-- include/linux/gfs2_ondisk.h | 3 --- 9 files changed, 29 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 0c913eecf88..692d4a3da1b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -778,7 +778,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, gfs2_free_data(ip, bstart, blen); } - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_dinode_out(ip, dibh->b_data); @@ -853,7 +853,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size) } ip->i_di.di_size = size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); error = gfs2_meta_inode_buffer(ip, &dibh); if (error) @@ -968,7 +968,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (gfs2_is_stuffed(ip)) { ip->i_di.di_size = size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); @@ -980,7 +980,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) if (!error) { ip->i_di.di_size = size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1053,7 +1053,7 @@ static int trunc_end(struct gfs2_inode *ip) ip->i_num.no_addr; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); } - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; gfs2_trans_add_bh(ip->i_gl, dibh, 1); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 0742761e1e0..ca23c8beb3f 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -131,7 +131,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); if (ip->i_di.di_size < offset + size) ip->i_di.di_size = offset + size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -229,7 +229,7 @@ out: if (ip->i_di.di_size < offset + copied) ip->i_di.di_size = offset + copied; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1560,7 +1560,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, break; gfs2_trans_add_bh(ip->i_gl, bh, 1); ip->i_di.di_entries++; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_dinode_out(ip, bh->b_data); brelse(bh); error = 0; @@ -1646,7 +1646,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) gfs2_consist_inode(dip); gfs2_trans_add_bh(dip->i_gl, bh, 1); dip->i_di.di_entries--; - dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_dinode_out(dip, bh->b_data); brelse(bh); mark_inode_dirty(&dip->i_inode); @@ -1694,7 +1694,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, gfs2_trans_add_bh(dip->i_gl, bh, 1); } - dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_dinode_out(dip, bh->b_data); brelse(bh); return 0; diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 935cc9a5716..7dde84775ba 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -715,7 +715,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, (er->er_mode & S_IFMT)); ip->i_inode.i_mode = er->er_mode; } - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -850,7 +850,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT)); ip->i_inode.i_mode = er->er_mode; } - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1130,7 +1130,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1285,7 +1285,6 @@ int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, if (!error) { error = inode_setattr(&ip->i_inode, attr); gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); - gfs2_inode_attr_out(ip); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 71120398482..c22ae3c3a44 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -52,12 +52,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip) inode->i_ino = ip->i_num.no_addr; i_size_write(inode, di->di_size); - inode->i_atime.tv_sec = di->di_atime; - inode->i_mtime.tv_sec = di->di_mtime; - inode->i_ctime.tv_sec = di->di_ctime; - inode->i_atime.tv_nsec = 0; - inode->i_mtime.tv_nsec = 0; - inode->i_ctime.tv_nsec = 0; inode->i_blocks = di->di_blocks << (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); @@ -72,23 +66,6 @@ void gfs2_inode_attr_in(struct gfs2_inode *ip) inode->i_flags &= ~S_APPEND; } -/** - * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode - * @ip: The GFS2 inode - * - * Only copy out the attributes that we want the VFS layer - * to be able to modify. - */ - -void gfs2_inode_attr_out(struct gfs2_inode *ip) -{ - struct inode *inode = &ip->i_inode; - struct gfs2_dinode_host *di = &ip->i_di; - di->di_atime = inode->i_atime.tv_sec; - di->di_mtime = inode->i_mtime.tv_sec; - di->di_ctime = inode->i_ctime.tv_sec; -} - static int iget_test(struct inode *inode, void *opaque) { struct gfs2_inode *ip = GFS2_I(inode); @@ -221,9 +198,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); di->di_size = be64_to_cpu(str->di_size); di->di_blocks = be64_to_cpu(str->di_blocks); - di->di_atime = be64_to_cpu(str->di_atime); - di->di_mtime = be64_to_cpu(str->di_mtime); - di->di_ctime = be64_to_cpu(str->di_ctime); + ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); + ip->i_inode.i_atime.tv_nsec = 0; + ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); + ip->i_inode.i_mtime.tv_nsec = 0; + ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); + ip->i_inode.i_ctime.tv_nsec = 0; di->di_goal_meta = be64_to_cpu(str->di_goal_meta); di->di_goal_data = be64_to_cpu(str->di_goal_data); @@ -360,7 +340,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff) else drop_nlink(&ip->i_inode); - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); @@ -1224,7 +1204,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) return 0; curtime = get_seconds(); - if (curtime - ip->i_di.di_atime >= quantum) { + if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) { gfs2_glock_dq(gh); gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, gh); @@ -1236,7 +1216,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) trying to get exclusive lock. */ curtime = get_seconds(); - if (curtime - ip->i_di.di_atime >= quantum) { + if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) { struct buffer_head *dibh; struct gfs2_dinode *di; @@ -1250,11 +1230,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh) if (error) goto fail_end_trans; - ip->i_di.di_atime = curtime; + ip->i_inode.i_atime.tv_sec = curtime; gfs2_trans_add_bh(ip->i_gl, dibh, 1); di = (struct gfs2_dinode *)dibh->b_data; - di->di_atime = cpu_to_be64(ip->i_di.di_atime); + di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); brelse(dibh); gfs2_trans_end(sdp); @@ -1375,8 +1355,6 @@ __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) if (!error) { error = inode_setattr(&ip->i_inode, attr); gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); - gfs2_inode_attr_out(ip); - gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 69cbf98509a..54d584eddf2 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -26,7 +26,6 @@ static inline int gfs2_is_dir(struct gfs2_inode *ip) } void gfs2_inode_attr_in(struct gfs2_inode *ip); -void gfs2_inode_attr_out(struct gfs2_inode *ip); struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type); struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum); diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index b4e354b1881..82003e872a3 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -167,9 +167,9 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); str->di_size = cpu_to_be64(di->di_size); str->di_blocks = cpu_to_be64(di->di_blocks); - str->di_atime = cpu_to_be64(di->di_atime); - str->di_mtime = cpu_to_be64(di->di_mtime); - str->di_ctime = cpu_to_be64(di->di_ctime); + str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); + str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); + str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); str->di_goal_meta = cpu_to_be64(di->di_goal_meta); str->di_goal_data = cpu_to_be64(di->di_goal_data); @@ -193,10 +193,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size); printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks); - printk(KERN_INFO " di_atime = %lld\n", (long long)di->di_atime); - printk(KERN_INFO " di_mtime = %lld\n", (long long)di->di_mtime); - printk(KERN_INFO " di_ctime = %lld\n", (long long)di->di_ctime); - printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta); printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data); diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 38b702a1824..5c3962c80e8 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -498,10 +498,6 @@ static int gfs2_commit_write(struct file *file, struct page *page, di->di_size = cpu_to_be64(inode->i_size); } - di->di_atime = cpu_to_be64(inode->i_atime.tv_sec); - di->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec); - di->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec); - brelse(dibh); gfs2_trans_end(sdp); if (al->al_requested) { diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 06176dee155..585b43a94ac 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -729,7 +729,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = gfs2_meta_inode_buffer(ip, &dibh); if (error) goto out_end_trans; - ip->i_di.di_ctime = get_seconds(); + ip->i_inode.i_ctime.tv_sec = get_seconds(); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -915,7 +915,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) error = inode_setattr(inode, attr); gfs2_assert_warn(sdp, !error); - gfs2_inode_attr_out(ip); gfs2_trans_add_bh(ip->i_gl, dibh, 1); gfs2_dinode_out(ip, dibh->b_data); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index c61517b35b2..7f5a4a16224 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -324,9 +324,6 @@ struct gfs2_dinode { struct gfs2_dinode_host { __u64 di_size; /* number of bytes in file */ __u64 di_blocks; /* number of blocks in file */ - __u64 di_atime; /* time last accessed */ - __u64 di_mtime; /* time last modified */ - __u64 di_ctime; /* time last changed */ /* This section varies from gfs1. Padding added to align with * remainder of dinode -- cgit v1.2.3-70-g09d2 From a9583c7983cbba9726bfe64ee46613d654fc9e26 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 1 Nov 2006 20:09:14 -0500 Subject: [GFS2] Shrink gfs2_inode (7) - di_payload_format This is almost never used. Its there for backward compatibility with GFS1. It doesn't need its own field since it can always be calculated from the inode mode & flags. This saves a bit more space in the gfs2_inode. Signed-off-by: Steven Whitehouse --- fs/gfs2/dir.c | 1 - fs/gfs2/inode.c | 3 +-- fs/gfs2/ondisk.c | 6 +++--- fs/gfs2/ops_inode.c | 1 - include/linux/gfs2_ondisk.h | 1 - 5 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index ca23c8beb3f..c82d7cb4a65 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -902,7 +902,6 @@ static int dir_make_exhash(struct inode *inode) dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; dip->i_di.di_blocks++; dip->i_di.di_flags |= GFS2_DIF_EXHASH; - dip->i_di.di_payload_format = 0; for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; dip->i_di.di_depth = y; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c22ae3c3a44..f6177fc6832 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -210,7 +210,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) di->di_generation = be64_to_cpu(str->di_generation); di->di_flags = be32_to_cpu(str->di_flags); - di->di_payload_format = be32_to_cpu(str->di_payload_format); di->di_height = be16_to_cpu(str->di_height); di->di_depth = be16_to_cpu(str->di_depth); @@ -699,7 +698,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, } di->__pad1 = 0; - di->di_payload_format = cpu_to_be32(0); + di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); di->di_height = cpu_to_be32(0); di->__pad2 = 0; di->__pad3 = 0; diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index 82003e872a3..b2baba5c50b 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -176,9 +176,10 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_generation = cpu_to_be64(di->di_generation); str->di_flags = cpu_to_be32(di->di_flags); - str->di_payload_format = cpu_to_be32(di->di_payload_format); str->di_height = cpu_to_be16(di->di_height); - + str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && + !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ? + GFS2_FORMAT_DE : 0); str->di_depth = cpu_to_be16(di->di_depth); str->di_entries = cpu_to_be32(di->di_entries); @@ -197,7 +198,6 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data); pv(di, di_flags, "0x%.8X"); - pv(di, di_payload_format, "%u"); pv(di, di_height, "%u"); pv(di, di_depth, "%u"); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 585b43a94ac..0e4eade47bf 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -389,7 +389,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) ip->i_inode.i_nlink = 2; ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); ip->i_di.di_flags |= GFS2_DIF_JDATA; - ip->i_di.di_payload_format = GFS2_FORMAT_DE; ip->i_di.di_entries = 2; error = gfs2_meta_inode_buffer(ip, &dibh); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 7f5a4a16224..536575efb62 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -333,7 +333,6 @@ struct gfs2_dinode_host { __u64 di_generation; /* generation number for NFS */ __u32 di_flags; /* GFS2_DIF_... */ - __u32 di_payload_format; /* GFS2_FORMAT_... */ __u16 di_height; /* height of metadata */ /* These only apply to directories */ -- cgit v1.2.3-70-g09d2 From f6e58f01e8dc869803b9f73b2aa9d5bc3f32ca05 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Nov 2006 15:14:58 -0500 Subject: [GFS2] Don't copy meta_header for rgrp in and out The meta_header for an ondisk rgrp never changes, so there is no point copying it in and back out to disk. Also there is no reason to keep a copy for each rgrp in memory. The code already checks to ensure that the header is correct before it calls the routine to copy the data in, so that we don't even need to check whether its correct on disk in the functions in ondisk.c Signed-off-by: Steven Whitehouse --- fs/gfs2/ondisk.c | 11 ----------- include/linux/gfs2_ondisk.h | 2 -- 2 files changed, 13 deletions(-) (limited to 'include') diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c index b2baba5c50b..f2495f1e21a 100644 --- a/fs/gfs2/ondisk.c +++ b/fs/gfs2/ondisk.c @@ -65,15 +65,6 @@ static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *bu mh->mh_format = be32_to_cpu(str->mh_format); } -static void gfs2_meta_header_out(const struct gfs2_meta_header_host *mh, void *buf) -{ - struct gfs2_meta_header *str = buf; - - str->mh_magic = cpu_to_be32(mh->mh_magic); - str->mh_type = cpu_to_be32(mh->mh_type); - str->mh_format = cpu_to_be32(mh->mh_format); -} - void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) { const struct gfs2_sb *str = buf; @@ -119,7 +110,6 @@ void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf) { const struct gfs2_rgrp *str = buf; - gfs2_meta_header_in(&rg->rg_header, buf); rg->rg_flags = be32_to_cpu(str->rg_flags); rg->rg_free = be32_to_cpu(str->rg_free); rg->rg_dinodes = be32_to_cpu(str->rg_dinodes); @@ -130,7 +120,6 @@ void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf) { struct gfs2_rgrp *str = buf; - gfs2_meta_header_out(&rg->rg_header, buf); str->rg_flags = cpu_to_be32(rg->rg_flags); str->rg_free = cpu_to_be32(rg->rg_free); str->rg_dinodes = cpu_to_be32(rg->rg_dinodes); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index 536575efb62..8b7e4c1e32a 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -213,8 +213,6 @@ struct gfs2_rgrp { }; struct gfs2_rgrp_host { - struct gfs2_meta_header_host rg_header; - __u32 rg_flags; __u32 rg_free; __u32 rg_dinodes; -- cgit v1.2.3-70-g09d2 From 87e1652c7863b9ae406ff37f33c7ec2bb494d7b1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:00 +0100 Subject: [PATCH] x86-64: Don't keep interrupts disabled while spinning in spinlocks Follows i386. Based on patch from some folks at Google (MikeW, Edward G.?), but completely redone by AK. Signed-off-by: Andi Kleen --- include/asm-x86_64/spinlock.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h index 05ef097ba55..88bf981e73c 100644 --- a/include/asm-x86_64/spinlock.h +++ b/include/asm-x86_64/spinlock.h @@ -36,7 +36,34 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) "2:\t" : "=m" (lock->slock) : : "memory"); } -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) +/* + * Same as __raw_spin_lock, but reenable interrupts during spinning. + */ +#ifndef CONFIG_PROVE_LOCKING +static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) +{ + asm volatile( + "\n1:\t" + LOCK_PREFIX " ; decl %0\n\t" + "jns 5f\n" + "testl $0x200, %1\n\t" /* interrupts were disabled? */ + "jz 4f\n\t" + "sti\n" + "3:\t" + "rep;nop\n\t" + "cmpl $0, %0\n\t" + "jle 3b\n\t" + "cli\n\t" + "jmp 1b\n" + "4:\t" + "rep;nop\n\t" + "cmpl $0, %0\n\t" + "jg 1b\n\t" + "jmp 4b\n" + "5:\n\t" + : "+m" (lock->slock) : "r" ((unsigned)flags) : "memory"); +} +#endif static inline int __raw_spin_trylock(raw_spinlock_t *lock) { -- cgit v1.2.3-70-g09d2 From bd1d599518bf11992cc6d5b0df08da4a2b7b0db5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:00 +0100 Subject: [PATCH] x86-64: x86_64 rename X86_FEATURE_DTES to X86_FEATURE_DS Here is a patch (used by perfmon2) that renamed X86_FEATURE_DTES to X86_FEATURE_DS to match Intel's documentation for the Debug Store save area. The patch also adds cpu_has_ds. changelog: - rename X86_FEATURE_DTES to X86_FEATURE_DS to match documentation - adds cpu_has_ds to test for X86_FEATURE_DS Signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- include/asm-x86_64/cpufeature.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index ee792faaca0..65eb39e8f3c 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -29,7 +29,7 @@ #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ #define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ -#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ +#define X86_FEATURE_DS (0*32+21) /* Debug Store */ #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ @@ -112,5 +112,6 @@ #define cpu_has_cyrix_arr 0 #define cpu_has_centaur_mcr 0 #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) +#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) #endif /* __ASM_X8664_CPUFEATURE_H */ -- cgit v1.2.3-70-g09d2 From 36b2a8d5aff4cb3ee83d5e40447a8f073bcfe2fb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86-64: add X86_FEATURE_PEBS and detection Here is a patch (used by perfmon2) to detect the presence of the Precise Event Based Sampling (PEBS) feature for Intel 64-bit processors. The patch also adds the cpu_has_pebs macro. changelog: - adds X86_FEATURE_PEBS - adds cpu_has_pebs to test for X86_FEATURE_PEBS Signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 7 +++++++ include/asm-x86_64/cpufeature.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index fc944b5e8f4..619af2e2fa2 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -835,6 +835,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability); } + if (cpu_has_ds) { + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } + n = c->extended_cpuid_level; if (n >= 0x80000008) { unsigned eax = cpuid_eax(0x80000008); diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index 65eb39e8f3c..d280384807e 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -68,6 +68,7 @@ #define X86_FEATURE_FXSAVE_LEAK (3*32+7) /* FIP/FOP/FDP leaks through FXSAVE */ #define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */ #define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS (3*32+10) /* Precise-Event Based Sampling */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -113,5 +114,6 @@ #define cpu_has_centaur_mcr 0 #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) +#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) #endif /* __ASM_X8664_CPUFEATURE_H */ -- cgit v1.2.3-70-g09d2 From d8cebe65ea5179e3293c38427d71f4d73c795d39 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86-64: remove duplicated cpu_mask_to_apicid in x86_64 smp.h inline function cpu_mask_to_apicid in smp.h is duplicated with macro in mach_apic.h. Signed-off-by: Yinghai Lu Signed-off-by: Andi Kleen --- include/asm-x86_64/smp.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index d6b7c057edb..7ae7e7d89d1 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -82,11 +82,6 @@ extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ extern u8 x86_cpu_to_log_apicid[NR_CPUS]; extern u8 bios_cpu_apicid[]; -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) -{ - return cpus_addr(cpumask)[0]; -} - static inline int cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < NR_CPUS) -- cgit v1.2.3-70-g09d2 From d7731c0ff69dc3f18ea020257e627dae4d214fdb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] i386: i386 rename X86_FEATURE_DTES to X86_FEATURE_DS Here is a patch (used by perfmon2) that renames X86_FEATURE_DTES to X86_FEATURE_DS to match Intel's documentation for the Debug Store save area on i386. The patch also adds cpu_has_ds. - rename X86_FEATURE_DTES to X86_FEATURE_DS to match documentation - adds cpu_has_ds to test for X86_FEATURE_DS Signed-off-by: stephane eranian Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- include/asm-i386/cpufeature.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index d314ebb3d59..69ce35049a0 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -31,7 +31,7 @@ #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ #define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ -#define X86_FEATURE_DTES (0*32+21) /* Debug Trace Store */ +#define X86_FEATURE_DS (0*32+21) /* Debug Store */ #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ #define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ @@ -134,6 +134,7 @@ #define cpu_has_phe_enabled boot_cpu_has(X86_FEATURE_PHE_EN) #define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM) #define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) +#define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) #endif /* __ASM_I386_CPUFEATURE_H */ -- cgit v1.2.3-70-g09d2 From 42ed458aa51337357d7632c64aed4528f923e829 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] i386: i386 add X86_FEATURE_PEBS and detection Here is a patch (used by perfmon2) to detect the presence of the Precise Event Based Sampling (PEBS) feature for i386. The patch also adds the cpu_has_pebs macro. - adds X86_FEATURE_PEBS - adds cpu_has_pebs to test for X86_FEATURE_PEBS Signed-off-by: stephane eranian Signed-off-by: Andi Kleen Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/cpu/intel.c | 8 +++++++- include/asm-i386/cpufeature.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 94a95aa5227..798c2f617e8 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -195,8 +195,14 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); -} + if (cpu_has_ds) { + unsigned int l1; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<12))) + set_bit(X86_FEATURE_PEBS, c->x86_capability); + } +} static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) { diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index 69ce35049a0..231672558c1 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -73,6 +73,7 @@ #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -135,6 +136,7 @@ #define cpu_has_pmm boot_cpu_has(X86_FEATURE_PMM) #define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) +#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) #endif /* __ASM_I386_CPUFEATURE_H */ -- cgit v1.2.3-70-g09d2 From e5e3a0428968dcc1f9318ce1c941a918e99f8b84 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] i386: remove default_ldt, and simplify ldt-setting. This patch removes the default_ldt[] array, as it has been unused since iBCS stopped being supported. This means it is now possible to actually set an empty LDT segment. In order to deal with this, the set_ldt_desc/load_LDT pair has been replaced with a single set_ldt() operation which is responsible for both setting up the LDT descriptor in the GDT, and reloading the LDT register. If there are no LDT entries, the LDT register is loaded with a NULL descriptor. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Andi Kleen Acked-by: Zachary Amsden Signed-off-by: Andrew Morton --- arch/i386/kernel/ldt.c | 4 +--- arch/i386/kernel/traps.c | 3 --- include/asm-i386/desc.h | 50 ++++++++++++++++-------------------------- include/asm-i386/mmu_context.h | 4 ++-- 4 files changed, 22 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index 445211eb2d5..b410e5fb034 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -160,16 +160,14 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount) { int err; unsigned long size; - void *address; err = 0; - address = &default_ldt[0]; size = 5*sizeof(struct desc_struct); if (size > bytecount) size = bytecount; err = size; - if (copy_to_user(ptr, address, size)) + if (clear_user(ptr, size)) err = -EFAULT; return err; diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 48ebfab661b..56655ea8d98 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -61,9 +61,6 @@ int panic_on_unrecovered_nmi; asmlinkage int system_call(void); -struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, - { 0, 0 }, { 0, 0 } }; - /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 5874ef119ff..a0398f780ca 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -33,11 +33,6 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; } -/* - * This is the ldt that every process will get unless we need - * something other than this. - */ -extern struct desc_struct default_ldt[]; extern struct desc_struct idt_table[]; extern void set_intr_gate(unsigned int irq, void * addr); @@ -65,7 +60,6 @@ static inline void pack_gate(__u32 *a, __u32 *b, #define DESCTYPE_S 0x10 /* !system */ #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) -#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) @@ -115,13 +109,20 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const vo write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); } -static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries) +static inline void set_ldt(void *addr, unsigned int entries) { - __u32 a, b; - pack_descriptor(&a, &b, (unsigned long)addr, - entries * sizeof(struct desc_struct) - 1, - DESCTYPE_LDT, 0); - write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); + if (likely(entries == 0)) + __asm__ __volatile__("lldt %w0"::"q" (0)); + else { + unsigned cpu = smp_processor_id(); + __u32 a, b; + + pack_descriptor(&a, &b, (unsigned long)addr, + entries * sizeof(struct desc_struct) - 1, + DESCTYPE_LDT, 0); + write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); + __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)); + } } #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) @@ -153,35 +154,22 @@ static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entri static inline void clear_LDT(void) { - int cpu = get_cpu(); - - set_ldt_desc(cpu, &default_ldt[0], 5); - load_LDT_desc(); - put_cpu(); + set_ldt(NULL, 0); } /* * load one particular LDT into the current CPU */ -static inline void load_LDT_nolock(mm_context_t *pc, int cpu) +static inline void load_LDT_nolock(mm_context_t *pc) { - void *segments = pc->ldt; - int count = pc->size; - - if (likely(!count)) { - segments = &default_ldt[0]; - count = 5; - } - - set_ldt_desc(cpu, segments, count); - load_LDT_desc(); + set_ldt(pc->ldt, pc->size); } static inline void load_LDT(mm_context_t *pc) { - int cpu = get_cpu(); - load_LDT_nolock(pc, cpu); - put_cpu(); + preempt_disable(); + load_LDT_nolock(pc); + preempt_enable(); } static inline unsigned long get_desc_base(unsigned long *desc) diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 62b7bf18409..1b1495372c4 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -44,7 +44,7 @@ static inline void switch_mm(struct mm_struct *prev, * load the LDT, if the LDT is different: */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } #ifdef CONFIG_SMP else { @@ -56,7 +56,7 @@ static inline void switch_mm(struct mm_struct *prev, * tlb flush IPI delivery. We must reload %cr3. */ load_cr3(next->pgd); - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } } #endif -- cgit v1.2.3-70-g09d2 From bb81a09e55eaf7e5f798468ab971469b6f66a259 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] x86: all cpu backtrace When a spinlock lockup occurs, arrange for the NMI code to emit an all-cpu backtrace, so we get to see which CPU is holding the lock, and where. Cc: Andi Kleen Cc: Ingo Molnar Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/nmi.c | 26 ++++++++++++++++++++++++++ arch/x86_64/kernel/nmi.c | 29 ++++++++++++++++++++++++++++- include/asm-i386/nmi.h | 8 ++++++++ include/asm-x86_64/nmi.h | 3 +++ include/linux/nmi.h | 5 +++++ lib/spinlock_debug.c | 4 ++++ 6 files changed, 74 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index eaafe233a5d..171194ccb7b 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,8 @@ int nmi_watchdog_enabled; static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); +static cpumask_t backtrace_mask = CPU_MASK_NONE; + /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) */ @@ -907,6 +910,16 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) touched = 1; } + if (cpu_isset(cpu, backtrace_mask)) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk("NMI backtrace for cpu %d\n", cpu); + dump_stack(); + spin_unlock(&lock); + cpu_clear(cpu, backtrace_mask); + } + sum = per_cpu(irq_stat, cpu).apic_timer_irqs; /* if the apic timer isn't firing, this cpu isn't doing much */ @@ -1033,6 +1046,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, #endif +void __trigger_all_cpu_backtrace(void) +{ + int i; + + backtrace_mask = cpu_online_map; + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpus_empty(backtrace_mask)) + break; + mdelay(1); + } +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 7af9cb3e2d9..27e95e7922c 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -12,14 +12,15 @@ * Mikael Pettersson : PM converted to driver model. Disable/enable API. */ +#include #include #include #include #include #include -#include #include #include +#include #include #include @@ -41,6 +42,8 @@ int panic_on_unrecovered_nmi; static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner); static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[2]); +static cpumask_t backtrace_mask = CPU_MASK_NONE; + /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) */ @@ -782,6 +785,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = smp_processor_id(); struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 dummy; int rc=0; @@ -799,6 +803,16 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) touched = 1; } + if (cpu_isset(cpu, backtrace_mask)) { + static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + + spin_lock(&lock); + printk("NMI backtrace for cpu %d\n", cpu); + dump_stack(); + spin_unlock(&lock); + cpu_clear(cpu, backtrace_mask); + } + #ifdef CONFIG_X86_MCE /* Could check oops_in_progress here too, but it's safer not too */ @@ -931,6 +945,19 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, #endif +void __trigger_all_cpu_backtrace(void) +{ + int i; + + backtrace_mask = cpu_online_map; + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpus_empty(backtrace_mask)) + break; + mdelay(1); + } +} + EXPORT_SYMBOL(nmi_active); EXPORT_SYMBOL(nmi_watchdog); EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); diff --git a/include/asm-i386/nmi.h b/include/asm-i386/nmi.h index 269d315719c..b04333ea6f3 100644 --- a/include/asm-i386/nmi.h +++ b/include/asm-i386/nmi.h @@ -5,6 +5,9 @@ #define ASM_NMI_H #include +#include + +#ifdef ARCH_HAS_NMI_WATCHDOG /** * do_nmi_callback @@ -42,4 +45,9 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; +void __trigger_all_cpu_backtrace(void); +#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() + +#endif + #endif /* ASM_NMI_H */ diff --git a/include/asm-x86_64/nmi.h b/include/asm-x86_64/nmi.h index f367d4014b4..72375e7d32a 100644 --- a/include/asm-x86_64/nmi.h +++ b/include/asm-x86_64/nmi.h @@ -77,4 +77,7 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, extern int unknown_nmi_panic; +void __trigger_all_cpu_backtrace(void); +#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() + #endif /* ASM_NMI_H */ diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e16904e28c3..acb4ed13024 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -15,9 +15,14 @@ * disables interrupts for a long time. This call is stateless. */ #ifdef ARCH_HAS_NMI_WATCHDOG +#include extern void touch_nmi_watchdog(void); #else # define touch_nmi_watchdog() touch_softlockup_watchdog() #endif +#ifndef trigger_all_cpu_backtrace +#define trigger_all_cpu_backtrace() do { } while (0) +#endif + #endif diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c index b6c4f898197..479fd462eaa 100644 --- a/lib/spinlock_debug.c +++ b/lib/spinlock_debug.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -117,6 +118,9 @@ static void __spin_lock_debug(spinlock_t *lock) raw_smp_processor_id(), current->comm, current->pid, lock); dump_stack(); +#ifdef CONFIG_SMP + trigger_all_cpu_backtrace(); +#endif } } } -- cgit v1.2.3-70-g09d2 From be44d2aabce2d62f72d5751d1871b6212bf7a1c7 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] i386: espfix cleanup Clean up the espfix code: - Introduced PER_CPU() macro to be used from asm - Introduced GET_DESC_BASE() macro to be used from asm - Rewrote the fixup code in asm, as calling a C code with the altered %ss appeared to be unsafe - No longer altering the stack from a .fixup section - 16bit per-cpu stack is no longer used, instead the stack segment base is patched the way so that the high word of the kernel and user %esp are the same. - Added the limit-patching for the espfix segment. (Chuck Ebbert) [jeremy@goop.org: use the x86 scaling addressing mode rather than shifting] Signed-off-by: Stas Sergeev Signed-off-by: Andi Kleen Acked-by: Zachary Amsden Acked-by: Chuck Ebbert <76306.1226@compuserve.com> Acked-by: Jan Beulich Cc: Andi Kleen Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton --- arch/i386/kernel/asm-offsets.c | 5 +++ arch/i386/kernel/cpu/common.c | 11 ------- arch/i386/kernel/entry.S | 73 +++++++++++++++++++----------------------- arch/i386/kernel/head.S | 2 +- arch/i386/kernel/traps.c | 57 +++++++++------------------------ include/asm-i386/desc.h | 27 +++++++++++++--- include/asm-i386/percpu.h | 25 +++++++++++++++ 7 files changed, 103 insertions(+), 97 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index c80271f8f08..e94d910a28b 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -58,6 +58,11 @@ void foo(void) OFFSET(TI_sysenter_return, thread_info, sysenter_return); BLANK(); + OFFSET(GDS_size, Xgt_desc_struct, size); + OFFSET(GDS_address, Xgt_desc_struct, address); + OFFSET(GDS_pad, Xgt_desc_struct, pad); + BLANK(); + OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); BLANK(); diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d9f3e3c31f0..5532fc4e1bf 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -24,9 +24,6 @@ DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); -DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); -EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); - static int cachesize_override __cpuinitdata = -1; static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -603,7 +600,6 @@ void __cpuinit cpu_init(void) struct tss_struct * t = &per_cpu(init_tss, cpu); struct thread_struct *thread = ¤t->thread; struct desc_struct *gdt; - __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { @@ -651,13 +647,6 @@ old_gdt: * and set up the GDT descriptor: */ memcpy(gdt, cpu_gdt_table, GDT_SIZE); - - /* Set up GDT entry for 16bit stack */ - *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= - ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | - ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | - (CPU_16BIT_STACK_SIZE - 1); - cpu_gdt_descr->size = GDT_SIZE - 1; cpu_gdt_descr->address = (unsigned long)gdt; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 5a63d6fdb70..c38d801ba0b 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -48,6 +48,7 @@ #include #include #include +#include #include #include "irq_vectors.h" @@ -418,23 +419,18 @@ ldt_ss: * This is an "official" bug of all the x86-compatible * CPUs, which we can try to work around to make * dosemu and wine happy. */ - subl $8, %esp # reserve space for switch16 pointer - CFI_ADJUST_CFA_OFFSET 8 + movl OLDESP(%esp), %eax + movl %esp, %edx + call patch_espfix_desc + pushl $__ESPFIX_SS + CFI_ADJUST_CFA_OFFSET 4 + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 DISABLE_INTERRUPTS TRACE_IRQS_OFF - movl %esp, %eax - /* Set up the 16bit stack frame with switch32 pointer on top, - * and a switch16 pointer on top of the current frame. */ - call setup_x86_bogus_stack - CFI_ADJUST_CFA_OFFSET -8 # frame has moved - TRACE_IRQS_IRET - RESTORE_REGS - lss 20+4(%esp), %esp # switch to 16bit stack -1: INTERRUPT_RETURN -.section __ex_table,"a" - .align 4 - .long 1b,iret_exc -.previous + lss (%esp), %esp + CFI_ADJUST_CFA_OFFSET -8 + jmp restore_nocheck CFI_ENDPROC # perform work that needs to be done immediately before resumption @@ -524,30 +520,30 @@ syscall_badsys: CFI_ENDPROC #define FIXUP_ESPFIX_STACK \ - movl %esp, %eax; \ - /* switch to 32bit stack using the pointer on top of 16bit stack */ \ - lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ - /* copy data from 16bit stack to 32bit stack */ \ - call fixup_x86_bogus_stack; \ - /* put ESP to the proper location */ \ - movl %eax, %esp; -#define UNWIND_ESPFIX_STACK \ + /* since we are on a wrong stack, we cant make it a C code :( */ \ + GET_THREAD_INFO(%ebp); \ + movl TI_cpu(%ebp), %ebx; \ + PER_CPU(cpu_gdt_descr, %ebx); \ + movl GDS_address(%ebx), %ebx; \ + GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ + addl %esp, %eax; \ + pushl $__KERNEL_DS; \ + CFI_ADJUST_CFA_OFFSET 4; \ pushl %eax; \ CFI_ADJUST_CFA_OFFSET 4; \ + lss (%esp), %esp; \ + CFI_ADJUST_CFA_OFFSET -8; +#define UNWIND_ESPFIX_STACK \ movl %ss, %eax; \ - /* see if on 16bit stack */ \ + /* see if on espfix stack */ \ cmpw $__ESPFIX_SS, %ax; \ - je 28f; \ -27: popl %eax; \ - CFI_ADJUST_CFA_OFFSET -4; \ -.section .fixup,"ax"; \ -28: movl $__KERNEL_DS, %eax; \ + jne 27f; \ + movl $__KERNEL_DS, %eax; \ movl %eax, %ds; \ movl %eax, %es; \ - /* switch to 32bit stack */ \ + /* switch to normal stack */ \ FIXUP_ESPFIX_STACK; \ - jmp 27b; \ -.previous +27:; /* * Build the entry stubs and pointer table with @@ -614,7 +610,6 @@ error_code: pushl %eax CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET eax, 0 - xorl %eax, %eax pushl %ebp CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebp, 0 @@ -627,7 +622,6 @@ error_code: pushl %edx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET edx, 0 - decl %eax # eax = -1 pushl %ecx CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ecx, 0 @@ -644,7 +638,7 @@ error_code: /*CFI_REGISTER es, ecx*/ movl ES(%esp), %edi # get the function address movl ORIG_EAX(%esp), %edx # get the error code - movl %eax, ORIG_EAX(%esp) + movl $-1, ORIG_EAX(%esp) movl %ecx, ES(%esp) /*CFI_REL_OFFSET es, ES*/ movl $(__USER_DS), %ecx @@ -754,7 +748,7 @@ KPROBE_ENTRY(nmi) cmpw $__ESPFIX_SS, %ax popl %eax CFI_ADJUST_CFA_OFFSET -4 - je nmi_16bit_stack + je nmi_espfix_stack cmpl $sysenter_entry,(%esp) je nmi_stack_fixup pushl %eax @@ -797,7 +791,7 @@ nmi_debug_stack_check: FIX_STACK(24,nmi_stack_correct, 1) jmp nmi_stack_correct -nmi_16bit_stack: +nmi_espfix_stack: /* We have a RING0_INT_FRAME here. * * create the pointer to lss back @@ -806,7 +800,6 @@ nmi_16bit_stack: CFI_ADJUST_CFA_OFFSET 4 pushl %esp CFI_ADJUST_CFA_OFFSET 4 - movzwl %sp, %esp addw $4, (%esp) /* copy the iret frame of 12 bytes */ .rept 3 @@ -817,11 +810,11 @@ nmi_16bit_stack: CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL FIXUP_ESPFIX_STACK # %eax == %esp - CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved xorl %edx,%edx # zero error code call do_nmi RESTORE_REGS - lss 12+4(%esp), %esp # back to 16bit stack + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 1: INTERRUPT_RETURN CFI_ENDPROC .section __ex_table,"a" diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index ca31f18d277..b1f1df11fcc 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -584,7 +584,7 @@ ENTRY(cpu_gdt_table) .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ .quad 0x004092000000ffff /* 0xc8 APM DS data */ - .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ + .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ .quad 0x0000000000000000 /* 0xd8 - unused */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 56655ea8d98..f9bb1f89d68 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -1088,49 +1088,24 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, #endif } -fastcall void setup_x86_bogus_stack(unsigned char * stk) +fastcall unsigned long patch_espfix_desc(unsigned long uesp, + unsigned long kesp) { - unsigned long *switch16_ptr, *switch32_ptr; - struct pt_regs *regs; - unsigned long stack_top, stack_bot; - unsigned short iret_frame16_off; int cpu = smp_processor_id(); - /* reserve the space on 32bit stack for the magic switch16 pointer */ - memmove(stk, stk + 8, sizeof(struct pt_regs)); - switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); - regs = (struct pt_regs *)stk; - /* now the switch32 on 16bit stack */ - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; - switch32_ptr = (unsigned long *)(stack_top - 8); - iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; - /* copy iret frame on 16bit stack */ - memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); - /* fill in the switch pointers */ - switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; - switch16_ptr[1] = __ESPFIX_SS; - switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + - 8 - CPU_16BIT_STACK_SIZE; - switch32_ptr[1] = __KERNEL_DS; -} - -fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) -{ - unsigned long *switch32_ptr; - unsigned char *stack16, *stack32; - unsigned long stack_top, stack_bot; - int len; - int cpu = smp_processor_id(); - stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); - stack_top = stack_bot + CPU_16BIT_STACK_SIZE; - switch32_ptr = (unsigned long *)(stack_top - 8); - /* copy the data from 16bit stack to 32bit stack */ - len = CPU_16BIT_STACK_SIZE - 8 - sp; - stack16 = (unsigned char *)(stack_bot + sp); - stack32 = (unsigned char *) - (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); - memcpy(stack32, stack16, len); - return stack32; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; + unsigned long base = (kesp - uesp) & -THREAD_SIZE; + unsigned long new_kesp = kesp - base; + unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; + __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; + /* Set up base for espfix segment */ + desc &= 0x00f0ff0000000000ULL; + desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | + ((((__u64)base) << 32) & 0xff00000000000000ULL) | + ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | + (lim_pages & 0xffff); + *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; + return new_kesp; } /* diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index a0398f780ca..6cf2ac2bfde 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -4,8 +4,6 @@ #include #include -#define CPU_16BIT_STACK_SIZE 1024 - #ifndef __ASSEMBLY__ #include @@ -16,8 +14,6 @@ extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; -DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); - struct Xgt_desc_struct { unsigned short size; unsigned long address __attribute__((packed)); @@ -181,6 +177,29 @@ static inline unsigned long get_desc_base(unsigned long *desc) return base; } +#else /* __ASSEMBLY__ */ + +/* + * GET_DESC_BASE reads the descriptor base of the specified segment. + * + * Args: + * idx - descriptor index + * gdt - GDT pointer + * base - 32bit register to which the base will be written + * lo_w - lo word of the "base" register + * lo_b - lo byte of the "base" register + * hi_b - hi byte of the low word of the "base" register + * + * Example: + * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) + * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. + */ +#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ + movb idx*8+4(gdt), lo_b; \ + movb idx*8+7(gdt), hi_b; \ + shll $16, base; \ + movw idx*8+2(gdt), lo_w; + #endif /* !__ASSEMBLY__ */ #endif diff --git a/include/asm-i386/percpu.h b/include/asm-i386/percpu.h index 5764afa4b6a..510ae1d3486 100644 --- a/include/asm-i386/percpu.h +++ b/include/asm-i386/percpu.h @@ -1,6 +1,31 @@ #ifndef __ARCH_I386_PERCPU__ #define __ARCH_I386_PERCPU__ +#ifndef __ASSEMBLY__ #include +#else + +/* + * PER_CPU finds an address of a per-cpu variable. + * + * Args: + * var - variable name + * cpu - 32bit register containing the current CPU number + * + * The resulting address is stored in the "cpu" argument. + * + * Example: + * PER_CPU(cpu_gdt_descr, %ebx) + */ +#ifdef CONFIG_SMP +#define PER_CPU(var, cpu) \ + movl __per_cpu_offset(,cpu,4), cpu; \ + addl $per_cpu__/**/var, cpu; +#else /* ! SMP */ +#define PER_CPU(var, cpu) \ + movl $per_cpu__/**/var, cpu; +#endif /* SMP */ + +#endif /* !__ASSEMBLY__ */ #endif /* __ARCH_I386_PERCPU__ */ -- cgit v1.2.3-70-g09d2 From acc207616a91a413a50fdd8847a747c4a7324167 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 7 Dec 2006 02:14:01 +0100 Subject: [PATCH] i386: add sleazy FPU optimization i386 port of the sLeAZY-fpu feature. Chuck reports that this gives him a +/- 0.4% improvement on his simple benchmark x86_64 description follows: Right now the kernel on x86-64 has a 100% lazy fpu behavior: after *every* context switch a trap is taken for the first FPU use to restore the FPU context lazily. This is of course great for applications that have very sporadic or no FPU use (since then you avoid doing the expensive save/restore all the time). However for very frequent FPU users... you take an extra trap every context switch. The patch below adds a simple heuristic to this code: After 5 consecutive context switches of FPU use, the lazy behavior is disabled and the context gets restored every context switch. If the app indeed uses the FPU, the trap is avoided. (the chance of the 6th time slice using FPU after the previous 5 having done so are quite high obviously). After 256 switches, this is reset and lazy behavior is returned (until there are 5 consecutive ones again). The reason for this is to give apps that do longer bursts of FPU use still the lazy behavior back after some time. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/process.c | 12 ++++++++++++ arch/i386/kernel/traps.c | 3 ++- include/asm-i386/i387.h | 5 ++++- 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index dd53c58f64f..ae924c416b6 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -648,6 +648,11 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas __unlazy_fpu(prev_p); + + /* we're going to use this soon, after a few expensive things */ + if (next_p->fpu_counter > 5) + prefetch(&next->i387.fxsave); + /* * Reload esp0. */ @@ -697,6 +702,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas disable_tsc(prev_p, next_p); + /* If the task has used fpu the last 5 timeslices, just do a full + * restore of the math state immediately to avoid the trap; the + * chances of needing FPU soon are obviously high now + */ + if (next_p->fpu_counter > 5) + math_state_restore(); + return prev_p; } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index f9bb1f89d68..4a6fa2837df 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -1118,7 +1118,7 @@ fastcall unsigned long patch_espfix_desc(unsigned long uesp, * Must be called with kernel preemption disabled (in this case, * local interrupts are disabled at the call-site in entry.S). */ -asmlinkage void math_state_restore(struct pt_regs regs) +asmlinkage void math_state_restore(void) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; @@ -1128,6 +1128,7 @@ asmlinkage void math_state_restore(struct pt_regs regs) init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ + tsk->fpu_counter++; } #ifndef CONFIG_MATH_EMULATION diff --git a/include/asm-i386/i387.h b/include/asm-i386/i387.h index bc1d6edae1e..434936c732d 100644 --- a/include/asm-i386/i387.h +++ b/include/asm-i386/i387.h @@ -76,7 +76,9 @@ static inline void __save_init_fpu( struct task_struct *tsk ) #define __unlazy_fpu( tsk ) do { \ if (task_thread_info(tsk)->status & TS_USEDFPU) \ - save_init_fpu( tsk ); \ + save_init_fpu( tsk ); \ + else \ + tsk->fpu_counter = 0; \ } while (0) #define __clear_fpu( tsk ) \ @@ -118,6 +120,7 @@ static inline void save_init_fpu( struct task_struct *tsk ) extern unsigned short get_fpu_cwd( struct task_struct *tsk ); extern unsigned short get_fpu_swd( struct task_struct *tsk ); extern unsigned short get_fpu_mxcsr( struct task_struct *tsk ); +extern asmlinkage void math_state_restore(void); /* * Signal frame handlers... -- cgit v1.2.3-70-g09d2 From 86efef50cfff9905c4e4ec64f3d3d3b299226674 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] x86-64: x86-64 add Intel Core related PMU MSRs definitions Add o the x86-64 tree a bunch of MSRs related to performance monitoring for the processors based on Intel Core microarchitecture. It also adds some architectural MSRs for PEBS. A similar patch for i386 will follow. changelog: - add Intel Precise-Event Based sampling (PEBS) related MSR - add Intel Data Save (DS) Area related MSR - add Intel Core microarchitecure performance counter MSRs Signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- include/asm-x86_64/msr.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h index 37e194169fa..a745c5008a6 100644 --- a/include/asm-x86_64/msr.h +++ b/include/asm-x86_64/msr.h @@ -210,6 +210,10 @@ static inline unsigned int cpuid_edx(unsigned int op) #define MSR_IA32_LASTINTFROMIP 0x1dd #define MSR_IA32_LASTINTTOIP 0x1de +#define MSR_IA32_PEBS_ENABLE 0x3f1 +#define MSR_IA32_DS_AREA 0x600 +#define MSR_IA32_PERF_CAPABILITIES 0x345 + #define MSR_MTRRfix64K_00000 0x250 #define MSR_MTRRfix16K_80000 0x258 #define MSR_MTRRfix16K_A0000 0x259 @@ -407,4 +411,13 @@ static inline unsigned int cpuid_edx(unsigned int op) #define MSR_P4_U2L_ESCR0 0x3b0 #define MSR_P4_U2L_ESCR1 0x3b1 +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x309 +#define MSR_CORE_PERF_FIXED_CTR1 0x30a +#define MSR_CORE_PERF_FIXED_CTR2 0x30b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390 + #endif -- cgit v1.2.3-70-g09d2 From bb0d977ed42c79ed709c79dbab4ff2159941eb2a Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] i386: add Intel Core related PMU MSRs - add Intel Precise-Event Based sampling (PEBS) related MSR - add Intel Data Save (DS) Area related MSR - add Intel Core microarchitecure performance counter MSRs Signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- include/asm-i386/msr.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 62b76cd9695..1820d9d73af 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h @@ -141,6 +141,10 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) #define MSR_IA32_MC0_ADDR 0x402 #define MSR_IA32_MC0_MISC 0x403 +#define MSR_IA32_PEBS_ENABLE 0x3f1 +#define MSR_IA32_DS_AREA 0x600 +#define MSR_IA32_PERF_CAPABILITIES 0x345 + /* Pentium IV performance counter MSRs */ #define MSR_P4_BPU_PERFCTR0 0x300 #define MSR_P4_BPU_PERFCTR1 0x301 @@ -284,4 +288,13 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) #define MSR_TMTA_LRTI_READOUT 0x80868018 #define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x309 +#define MSR_CORE_PERF_FIXED_CTR1 0x30a +#define MSR_CORE_PERF_FIXED_CTR2 0x30b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x38d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x38e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x38f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x390 + #endif /* __ASM_MSR_H */ -- cgit v1.2.3-70-g09d2 From 9ca36101a8d74704d78f10910f89d62de96f9dc8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] i386: Basic definitions for i386-pda This patch has the basic definitions of struct i386_pda, and the segment selector in the GDT. asm-i386/pda.h is more or less a direct copy of asm-x86_64/pda.h. The most interesting difference is the use of _proxy_pda, which is used to give gcc a model for the actual memory operations on the real pda structure. No actual reference is ever made to _proxy_pda, so it is never defined. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: Zachary Amsden Cc: Jan Beulich Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/head.S | 2 +- include/asm-i386/pda.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++ include/asm-i386/segment.h | 5 ++- 3 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 include/asm-i386/pda.h (limited to 'include') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index b1f1df11fcc..4a83384c5a6 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -585,7 +585,7 @@ ENTRY(cpu_gdt_table) .quad 0x004092000000ffff /* 0xc8 APM DS data */ .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ - .quad 0x0000000000000000 /* 0xd8 - unused */ + .quad 0x0000000000000000 /* 0xd8 - PDA */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ .quad 0x0000000000000000 /* 0xf0 - unused */ diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h new file mode 100644 index 00000000000..4c39ccb1305 --- /dev/null +++ b/include/asm-i386/pda.h @@ -0,0 +1,95 @@ +/* + Per-processor Data Areas + Jeremy Fitzhardinge 2006 + Based on asm-x86_64/pda.h by Andi Kleen. + */ +#ifndef _I386_PDA_H +#define _I386_PDA_H + +#include + +struct i386_pda +{ + struct i386_pda *_pda; /* pointer to self */ +}; + +extern struct i386_pda *_cpu_pda[]; + +#define cpu_pda(i) (_cpu_pda[i]) + +#define pda_offset(field) offsetof(struct i386_pda, field) + +extern void __bad_pda_field(void); + +/* This variable is never instantiated. It is only used as a stand-in + for the real per-cpu PDA memory, so that gcc can understand what + memory operations the inline asms() below are performing. This + eliminates the need to make the asms volatile or have memory + clobbers, so gcc can readily analyse them. */ +extern struct i386_pda _proxy_pda; + +#define pda_to_op(op,field,val) \ + do { \ + typedef typeof(_proxy_pda.field) T__; \ + if (0) { T__ tmp__; tmp__ = (val); } \ + switch (sizeof(_proxy_pda.field)) { \ + case 1: \ + asm(op "b %1,%%gs:%c2" \ + : "+m" (_proxy_pda.field) \ + :"ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + case 2: \ + asm(op "w %1,%%gs:%c2" \ + : "+m" (_proxy_pda.field) \ + :"ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + case 4: \ + asm(op "l %1,%%gs:%c2" \ + : "+m" (_proxy_pda.field) \ + :"ri" ((T__)val), \ + "i"(pda_offset(field))); \ + break; \ + default: __bad_pda_field(); \ + } \ + } while (0) + +#define pda_from_op(op,field) \ + ({ \ + typeof(_proxy_pda.field) ret__; \ + switch (sizeof(_proxy_pda.field)) { \ + case 1: \ + asm(op "b %%gs:%c1,%0" \ + : "=r" (ret__) \ + : "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + case 2: \ + asm(op "w %%gs:%c1,%0" \ + : "=r" (ret__) \ + : "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + case 4: \ + asm(op "l %%gs:%c1,%0" \ + : "=r" (ret__) \ + : "i" (pda_offset(field)), \ + "m" (_proxy_pda.field)); \ + break; \ + default: __bad_pda_field(); \ + } \ + ret__; }) + +/* Return a pointer to a pda field */ +#define pda_addr(field) \ + ((typeof(_proxy_pda.field) *)((unsigned char *)read_pda(_pda) + \ + pda_offset(field))) + +#define read_pda(field) pda_from_op("mov",field) +#define write_pda(field,val) pda_to_op("mov",field,val) +#define add_pda(field,val) pda_to_op("add",field,val) +#define sub_pda(field,val) pda_to_op("sub",field,val) +#define or_pda(field,val) pda_to_op("or",field,val) + +#endif /* _I386_PDA_H */ diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index b7ab59685ba..5bdda79b6b5 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -39,7 +39,7 @@ * 25 - APM BIOS support * * 26 - ESPFIX small SS - * 27 - unused + * 27 - PDA [ per-cpu private data area ] * 28 - unused * 29 - unused * 30 - unused @@ -74,6 +74,9 @@ #define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE + 14) #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ENTRY_PDA (GDT_ENTRY_KERNEL_BASE + 15) +#define __KERNEL_PDA (GDT_ENTRY_PDA * 8) + #define GDT_ENTRY_DOUBLEFAULT_TSS 31 /* -- cgit v1.2.3-70-g09d2 From 62111195800d80c66cdc69063ea3145878c99fbf Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] i386: Initialize the per-CPU data area When a CPU is brought up, a PDA and GDT are allocated for it. The GDT's __KERNEL_PDA entry is pointed to the allocated PDA memory, so that all references using this segment descriptor will refer to the PDA. This patch rearranges CPU initialization a bit, so that the GDT/PDA are set up as early as possible in cpu_init(). Also for secondary CPUs, GDT+PDA are preallocated and initialized so all the secondary CPU needs to do is set up the ldt and load %gs. This will be important once smp_processor_id() and current use the PDA. In all cases, the PDA is set up in head.S, before a CPU starts running C code, so the PDA is always available. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: Zachary Amsden Cc: Jan Beulich Cc: Andi Kleen Cc: James Bottomley Cc: Matt Tolentino Signed-off-by: Andrew Morton --- arch/i386/kernel/cpu/common.c | 177 ++++++++++++++++++++++++++--------- arch/i386/kernel/smpboot.c | 28 ++++-- arch/i386/mach-voyager/voyager_smp.c | 14 ++- include/asm-i386/processor.h | 3 + 4 files changed, 172 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 5532fc4e1bf..2534e25ed74 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -18,12 +18,16 @@ #include #include #endif +#include #include "cpu.h" DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); +struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(_cpu_pda); + static int cachesize_override __cpuinitdata = -1; static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; @@ -588,41 +592,16 @@ void __init early_cpu_init(void) disable_pse = 1; #endif } -/* - * cpu_init() initializes state that is per-CPU. Some data is already - * initialized (naturally) in the bootstrap process, such as the GDT - * and IDT. We reload them nevertheless, this function acts as a - * 'CPU state barrier', nothing should get across. - */ -void __cpuinit cpu_init(void) + +__cpuinit int alloc_gdt(int cpu) { - int cpu = smp_processor_id(); - struct tss_struct * t = &per_cpu(init_tss, cpu); - struct thread_struct *thread = ¤t->thread; - struct desc_struct *gdt; struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + struct desc_struct *gdt; + struct i386_pda *pda; - if (cpu_test_and_set(cpu, cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); - for (;;) local_irq_enable(); - } - printk(KERN_INFO "Initializing CPU#%d\n", cpu); + gdt = (struct desc_struct *)cpu_gdt_descr->address; + pda = cpu_pda(cpu); - if (cpu_has_vme || cpu_has_tsc || cpu_has_de) - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - if (tsc_disable && cpu_has_tsc) { - printk(KERN_NOTICE "Disabling TSC...\n"); - /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ - clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); - set_in_cr4(X86_CR4_TSD); - } - - /* The CPU hotplug case */ - if (cpu_gdt_descr->address) { - gdt = (struct desc_struct *)cpu_gdt_descr->address; - memset(gdt, 0, PAGE_SIZE); - goto old_gdt; - } /* * This is a horrible hack to allocate the GDT. The problem * is that cpu_init() is called really early for the boot CPU @@ -630,36 +609,117 @@ void __cpuinit cpu_init(void) * CPUs, when bootmem will have gone away */ if (NODE_DATA(0)->bdata->node_bootmem_map) { - gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); - /* alloc_bootmem_pages panics on failure, so no check */ + BUG_ON(gdt != NULL || pda != NULL); + + gdt = alloc_bootmem_pages(PAGE_SIZE); + pda = alloc_bootmem(sizeof(*pda)); + /* alloc_bootmem(_pages) panics on failure, so no check */ + memset(gdt, 0, PAGE_SIZE); + memset(pda, 0, sizeof(*pda)); } else { - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); - if (unlikely(!gdt)) { - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); - for (;;) - local_irq_enable(); + /* GDT and PDA might already have been allocated if + this is a CPU hotplug re-insertion. */ + if (gdt == NULL) + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); + + if (pda == NULL) + pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); + + if (unlikely(!gdt || !pda)) { + free_pages((unsigned long)gdt, 0); + kfree(pda); + return 0; } } -old_gdt: + + cpu_gdt_descr->address = (unsigned long)gdt; + cpu_pda(cpu) = pda; + + return 1; +} + +/* Initial PDA used by boot CPU */ +struct i386_pda boot_pda = { + ._pda = &boot_pda, +}; + +/* Initialize the CPU's GDT and PDA. The boot CPU does this for + itself, but secondaries find this done for them. */ +__cpuinit int init_gdt(int cpu, struct task_struct *idle) +{ + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + struct desc_struct *gdt; + struct i386_pda *pda; + + /* For non-boot CPUs, the GDT and PDA should already have been + allocated. */ + if (!alloc_gdt(cpu)) { + printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); + return 0; + } + + gdt = (struct desc_struct *)cpu_gdt_descr->address; + pda = cpu_pda(cpu); + + BUG_ON(gdt == NULL || pda == NULL); + /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ memcpy(gdt, cpu_gdt_table, GDT_SIZE); cpu_gdt_descr->size = GDT_SIZE - 1; - cpu_gdt_descr->address = (unsigned long)gdt; + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, + (u32 *)&gdt[GDT_ENTRY_PDA].b, + (unsigned long)pda, sizeof(*pda) - 1, + 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ + + memset(pda, 0, sizeof(*pda)); + pda->_pda = pda; + + return 1; +} + +/* Common CPU init for both boot and secondary CPUs */ +static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) +{ + struct tss_struct * t = &per_cpu(init_tss, cpu); + struct thread_struct *thread = &curr->thread; + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); + + /* Reinit these anyway, even if they've already been done (on + the boot CPU, this will transition from the boot gdt+pda to + the real ones). */ load_gdt(cpu_gdt_descr); + + if (cpu_test_and_set(cpu, cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) local_irq_enable(); + } + + printk(KERN_INFO "Initializing CPU#%d\n", cpu); + + if (cpu_has_vme || cpu_has_tsc || cpu_has_de) + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + if (tsc_disable && cpu_has_tsc) { + printk(KERN_NOTICE "Disabling TSC...\n"); + /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ + clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); + set_in_cr4(X86_CR4_TSD); + } + load_idt(&idt_descr); /* * Set up and load the per-CPU TSS and LDT */ atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - BUG_ON(current->mm); - enter_lazy_tlb(&init_mm, current); + curr->active_mm = &init_mm; + if (curr->mm) + BUG(); + enter_lazy_tlb(&init_mm, curr); load_esp0(t, thread); set_tss_desc(cpu,t); @@ -690,6 +750,37 @@ old_gdt: mxcsr_feature_mask_init(); } +/* Entrypoint to initialize secondary CPU */ +void __cpuinit secondary_cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + + _cpu_init(cpu, curr); +} + +/* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT + * and IDT. We reload them nevertheless, this function acts as a + * 'CPU state barrier', nothing should get across. + */ +void __cpuinit cpu_init(void) +{ + int cpu = smp_processor_id(); + struct task_struct *curr = current; + + /* Set up the real GDT and PDA, so we can transition from the + boot versions. */ + if (!init_gdt(cpu, curr)) { + /* failed to allocate something; not much we can do... */ + for (;;) + local_irq_enable(); + } + + _cpu_init(cpu, curr); +} + #ifdef CONFIG_HOTPLUG_CPU void __cpuinit cpu_uninit(void) { diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 4bb8b77cd65..095636620fa 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -536,11 +537,11 @@ set_cpu_sibling_map(int cpu) static void __devinit start_secondary(void *unused) { /* - * Dont put anything before smp_callin(), SMP + * Don't put *anything* before secondary_cpu_init(), SMP * booting is too fragile that we want to limit the * things done here to the most necessary things. */ - cpu_init(); + secondary_cpu_init(); preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) @@ -599,13 +600,16 @@ void __devinit initialize_secondary(void) "movl %0,%%esp\n\t" "jmp *%1" : - :"r" (current->thread.esp),"r" (current->thread.eip)); + :"m" (current->thread.esp),"m" (current->thread.eip)); } +/* Static state in head.S used to set up a CPU */ extern struct { void * esp; unsigned short ss; } stack_start; +extern struct i386_pda *start_pda; +extern struct Xgt_desc_struct cpu_gdt_descr; #ifdef CONFIG_NUMA @@ -936,9 +940,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - ++cpucount; - alternatives_smp_switch(1); - /* * We can't use kernel_thread since we must avoid to * reschedule the child. @@ -946,15 +947,30 @@ static int __devinit do_boot_cpu(int apicid, int cpu) idle = alloc_idle_task(cpu); if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); + + /* Pre-allocate and initialize the CPU's GDT and PDA so it + doesn't have to do any memory allocation during the + delicate CPU-bringup phase. */ + if (!init_gdt(cpu, idle)) { + printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); + return -1; /* ? */ + } + idle->thread.eip = (unsigned long) start_secondary; /* start_eip had better be page-aligned! */ start_eip = setup_trampoline(); + ++cpucount; + alternatives_smp_switch(1); + /* So we see what's up */ printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); /* Stack for startup_32 can be just as for start_secondary onwards */ stack_start.esp = (void *) idle->thread.esp; + start_pda = cpu_pda(cpu); + cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu); + irq_ctx_init(cpu); x86_cpu_to_apicid[cpu] = apicid; diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index f3fea2ad50f..55428e656a3 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -28,6 +28,7 @@ #include #include #include +#include /* TLB state -- visible externally, indexed physically */ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; @@ -422,6 +423,7 @@ find_smp_config(void) VOYAGER_SUS_IN_CONTROL_PORT); current_thread_info()->cpu = boot_cpu_id; + write_pda(cpu_number, boot_cpu_id); } /* @@ -458,7 +460,7 @@ start_secondary(void *unused) /* external functions not defined in the headers */ extern void calibrate_delay(void); - cpu_init(); + secondary_cpu_init(); /* OK, we're in the routine */ ack_CPI(VIC_CPU_BOOT_CPI); @@ -578,6 +580,15 @@ do_boot_cpu(__u8 cpu) /* init_tasks (in sched.c) is indexed logically */ stack_start.esp = (void *) idle->thread.esp; + /* Pre-allocate and initialize the CPU's GDT and PDA so it + doesn't have to do any memory allocation during the + delicate CPU-bringup phase. */ + if (!init_gdt(cpu, idle)) { + printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); + cpucount--; + return; + } + irq_ctx_init(cpu); /* Note: Don't modify initial ss override */ @@ -1963,4 +1974,5 @@ void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); + write_pda(cpu_number, hard_smp_processor_id()); } diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index e0ddca94d50..a9f2041c7c8 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -727,4 +727,7 @@ extern unsigned long boot_option_idle_override; extern void enable_sep_cpu(void); extern int sysenter_setup(void); +extern int init_gdt(int cpu, struct task_struct *idle); +extern void secondary_cpu_init(void); + #endif /* __ASM_I386_PROCESSOR_H */ -- cgit v1.2.3-70-g09d2 From f95d47caae5302a63d92be9a0292abc90e2a14e1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] i386: Use %gs as the PDA base-segment in the kernel This patch is the meat of the PDA change. This patch makes several related changes: 1: Most significantly, %gs is now used in the kernel. This means that on entry, the old value of %gs is saved away, and it is reloaded with __KERNEL_PDA. 2: entry.S constructs the stack in the shape of struct pt_regs, and this is passed around the kernel so that the process's saved register state can be accessed. Unfortunately struct pt_regs doesn't currently have space for %gs (or %fs). This patch extends pt_regs to add space for gs (no space is allocated for %fs, since it won't be used, and it would just complicate the code in entry.S to work around the space). 3: Because %gs is now saved on the stack like %ds, %es and the integer registers, there are a number of places where it no longer needs to be handled specially; namely context switch, and saving/restoring the register state in a signal context. 4: And since kernel threads run in kernel space and call normal kernel code, they need to be created with their %gs == __KERNEL_PDA. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: Zachary Amsden Cc: Jan Beulich Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/asm-offsets.c | 1 + arch/i386/kernel/cpu/common.c | 21 +++++++++++-- arch/i386/kernel/entry.S | 70 +++++++++++++++++++++++++++++------------- arch/i386/kernel/head.S | 31 ++++++++++++++++--- arch/i386/kernel/process.c | 26 ++++++++-------- arch/i386/kernel/signal.c | 6 ++-- include/asm-i386/mmu_context.h | 4 +-- include/asm-i386/processor.h | 4 ++- include/asm-i386/ptrace.h | 2 ++ kernel/fork.c | 2 +- 10 files changed, 117 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 70b19807acf..9620872d353 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -72,6 +72,7 @@ void foo(void) OFFSET(PT_EAX, pt_regs, eax); OFFSET(PT_DS, pt_regs, xds); OFFSET(PT_ES, pt_regs, xes); + OFFSET(PT_GS, pt_regs, xgs); OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); OFFSET(PT_EIP, pt_regs, eip); OFFSET(PT_CS, pt_regs, xcs); diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 2534e25ed74..4e63d8ce602 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -593,6 +593,14 @@ void __init early_cpu_init(void) #endif } +/* Make sure %gs is initialized properly in idle threads */ +struct pt_regs * __devinit idle_regs(struct pt_regs *regs) +{ + memset(regs, 0, sizeof(struct pt_regs)); + regs->xgs = __KERNEL_PDA; + return regs; +} + __cpuinit int alloc_gdt(int cpu) { struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); @@ -644,6 +652,14 @@ struct i386_pda boot_pda = { ._pda = &boot_pda, }; +static inline void set_kernel_gs(void) +{ + /* Set %gs for this CPU's PDA. Memory clobber is to create a + barrier with respect to any PDA operations, so the compiler + doesn't move any before here. */ + asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); +} + /* Initialize the CPU's GDT and PDA. The boot CPU does this for itself, but secondaries find this done for them. */ __cpuinit int init_gdt(int cpu, struct task_struct *idle) @@ -693,6 +709,7 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) the boot CPU, this will transition from the boot gdt+pda to the real ones). */ load_gdt(cpu_gdt_descr); + set_kernel_gs(); if (cpu_test_and_set(cpu, cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -731,8 +748,8 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %fs and %gs. */ - asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); + /* Clear %fs. */ + asm volatile ("mov %0, %%fs" : : "r" (0)); /* Clear all 6 debug registers: */ set_debugreg(0, 0); diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 0069bf01603..b99d4a16007 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -30,12 +30,13 @@ * 18(%esp) - %eax * 1C(%esp) - %ds * 20(%esp) - %es - * 24(%esp) - orig_eax - * 28(%esp) - %eip - * 2C(%esp) - %cs - * 30(%esp) - %eflags - * 34(%esp) - %oldesp - * 38(%esp) - %oldss + * 24(%esp) - %gs + * 28(%esp) - orig_eax + * 2C(%esp) - %eip + * 30(%esp) - %cs + * 34(%esp) - %eflags + * 38(%esp) - %oldesp + * 3C(%esp) - %oldss * * "current" is in register %ebx during any slow entries. */ @@ -92,6 +93,9 @@ VM_MASK = 0x00020000 #define SAVE_ALL \ cld; \ + pushl %gs; \ + CFI_ADJUST_CFA_OFFSET 4;\ + /*CFI_REL_OFFSET gs, 0;*/\ pushl %es; \ CFI_ADJUST_CFA_OFFSET 4;\ /*CFI_REL_OFFSET es, 0;*/\ @@ -121,7 +125,9 @@ VM_MASK = 0x00020000 CFI_REL_OFFSET ebx, 0;\ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ - movl %edx, %es; + movl %edx, %es; \ + movl $(__KERNEL_PDA), %edx; \ + movl %edx, %gs #define RESTORE_INT_REGS \ popl %ebx; \ @@ -154,17 +160,22 @@ VM_MASK = 0x00020000 2: popl %es; \ CFI_ADJUST_CFA_OFFSET -4;\ /*CFI_RESTORE es;*/\ -.section .fixup,"ax"; \ -3: movl $0,(%esp); \ - jmp 1b; \ +3: popl %gs; \ + CFI_ADJUST_CFA_OFFSET -4;\ + /*CFI_RESTORE gs;*/\ +.pushsection .fixup,"ax"; \ 4: movl $0,(%esp); \ + jmp 1b; \ +5: movl $0,(%esp); \ jmp 2b; \ -.previous; \ +6: movl $0,(%esp); \ + jmp 3b; \ .section __ex_table,"a";\ .align 4; \ - .long 1b,3b; \ - .long 2b,4b; \ -.previous + .long 1b,4b; \ + .long 2b,5b; \ + .long 3b,6b; \ +.popsection #define RING0_INT_FRAME \ CFI_STARTPROC simple;\ @@ -231,6 +242,7 @@ check_userspace: andl $(VM_MASK | SEGMENT_RPL_MASK), %eax cmpl $USER_RPL, %eax jb resume_kernel # not returning to v8086 or userspace + ENTRY(resume_userspace) DISABLE_INTERRUPTS # make sure we don't miss an interrupt # setting need_resched or sigpending @@ -327,9 +339,16 @@ sysenter_past_esp: movl PT_OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON +1: mov PT_GS(%esp), %gs ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC - +.pushsection .fixup,"ax" +2: movl $0,PT_GS(%esp) + jmp 1b +.section __ex_table,"a" + .align 4 + .long 1b,2b +.popsection # system call handler stub ENTRY(system_call) @@ -375,7 +394,7 @@ restore_nocheck: TRACE_IRQS_IRET restore_nocheck_notrace: RESTORE_REGS - addl $4, %esp + addl $4, %esp # skip orig_eax/error_code CFI_ADJUST_CFA_OFFSET -4 1: INTERRUPT_RETURN .section .fixup,"ax" @@ -588,6 +607,10 @@ KPROBE_ENTRY(page_fault) CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: + /* the function address is in %gs's slot on the stack */ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ pushl %ds CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ds, 0*/ @@ -613,18 +636,20 @@ error_code: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 cld - pushl %es + pushl %gs CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET es, 0*/ + /*CFI_REL_OFFSET gs, 0*/ + movl $(__KERNEL_PDA), %ecx + movl %ecx, %gs UNWIND_ESPFIX_STACK popl %ecx CFI_ADJUST_CFA_OFFSET -4 /*CFI_REGISTER es, ecx*/ - movl PT_ES(%esp), %edi # get the function address + movl PT_GS(%esp), %edi # get the function address movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) - movl %ecx, PT_ES(%esp) - /*CFI_REL_OFFSET es, ES*/ + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + mov %ecx, PT_GS(%esp) + /*CFI_REL_OFFSET gs, ES*/ movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es @@ -936,6 +961,7 @@ ENTRY(arch_unwind_init_running) movl %ebx, PT_EAX(%edx) movl $__USER_DS, PT_DS(%edx) movl $__USER_DS, PT_ES(%edx) + movl $0, PT_GS(%edx) movl %ebx, PT_ORIG_EAX(%edx) movl %ecx, PT_EIP(%edx) movl 12(%esp), %ecx diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 4a83384c5a6..5b14e95ac8b 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -302,6 +302,7 @@ is386: movl $2,%ecx # set MP movl %eax,%cr0 call check_x87 + call setup_pda lgdt cpu_gdt_descr lidt idt_descr ljmp $(__KERNEL_CS),$1f @@ -312,10 +313,13 @@ is386: movl $2,%ecx # set MP movl %eax,%ds movl %eax,%es - xorl %eax,%eax # Clear FS/GS and LDT + xorl %eax,%eax # Clear FS and LDT movl %eax,%fs - movl %eax,%gs lldt %ax + + movl $(__KERNEL_PDA),%eax + mov %eax,%gs + cld # gcc2 wants the direction flag cleared at all times pushl $0 # fake return address for unwinder #ifdef CONFIG_SMP @@ -345,6 +349,23 @@ check_x87: .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ ret +/* + * Point the GDT at this CPU's PDA. On boot this will be + * cpu_gdt_table and boot_pda; for secondary CPUs, these will be + * that CPU's GDT and PDA. + */ +setup_pda: + /* get the PDA pointer */ + movl start_pda, %eax + + /* slot the PDA address into the GDT */ + mov cpu_gdt_descr+2, %ecx + mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ + shr $16, %eax + mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ + mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ + ret + /* * setup_idt * @@ -484,6 +505,8 @@ ENTRY(empty_zero_page) * This starts the data section. */ .data +ENTRY(start_pda) + .long boot_pda ENTRY(stack_start) .long init_thread_union+THREAD_SIZE @@ -525,7 +548,7 @@ idt_descr: # boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address -cpu_gdt_descr: +ENTRY(cpu_gdt_descr) .word GDT_ENTRIES*8-1 .long cpu_gdt_table @@ -585,7 +608,7 @@ ENTRY(cpu_gdt_table) .quad 0x004092000000ffff /* 0xc8 APM DS data */ .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ - .quad 0x0000000000000000 /* 0xd8 - PDA */ + .quad 0x00cf92000000ffff /* 0xd8 - PDA */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ .quad 0x0000000000000000 /* 0xf0 - unused */ diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index ae924c416b6..905364d4284 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -56,6 +56,7 @@ #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -346,6 +347,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.xds = __USER_DS; regs.xes = __USER_DS; + regs.xgs = __KERNEL_PDA; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS | get_kernel_rpl(); @@ -431,7 +433,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, p->thread.eip = (unsigned long) ret_from_fork; savesegment(fs,p->thread.fs); - savesegment(gs,p->thread.gs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { @@ -659,16 +660,16 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas load_esp0(tss, next); /* - * Save away %fs and %gs. No need to save %es and %ds, as - * those are always kernel segments while inside the kernel. - * Doing this before setting the new TLS descriptors avoids - * the situation where we temporarily have non-reloadable - * segments in %fs and %gs. This could be an issue if the - * NMI handler ever used %fs or %gs (it does not today), or - * if the kernel is running inside of a hypervisor layer. + * Save away %fs. No need to save %gs, as it was saved on the + * stack on entry. No need to save %es and %ds, as those are + * always kernel segments while inside the kernel. Doing this + * before setting the new TLS descriptors avoids the situation + * where we temporarily have non-reloadable segments in %fs + * and %gs. This could be an issue if the NMI handler ever + * used %fs or %gs (it does not today), or if the kernel is + * running inside of a hypervisor layer. */ savesegment(fs, prev->fs); - savesegment(gs, prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -676,16 +677,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas load_TLS(next, cpu); /* - * Restore %fs and %gs if needed. + * Restore %fs if needed. * - * Glibc normally makes %fs be zero, and %gs is one of - * the TLS segments. + * Glibc normally makes %fs be zero. */ if (unlikely(prev->fs | next->fs)) loadsegment(fs, next->fs); - if (prev->gs | next->gs) - loadsegment(gs, next->gs); /* * Restore IOPL if needed. diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 43002cfb40c..65d7620eaa0 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -128,7 +128,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) - GET_SEG(gs); + COPY_SEG(gs); GET_SEG(fs); COPY_SEG(es); COPY_SEG(ds); @@ -244,9 +244,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, { int tmp, err = 0; - tmp = 0; - savesegment(gs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->gs); + err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs); savesegment(fs, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->fs); diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 1b1495372c4..68ff102d6f5 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h @@ -62,8 +62,8 @@ static inline void switch_mm(struct mm_struct *prev, #endif } -#define deactivate_mm(tsk, mm) \ - asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) +#define deactivate_mm(tsk, mm) \ + asm("movl %0,%%fs": :"r" (0)); #define activate_mm(prev, next) \ switch_mm((prev),(next),NULL) diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index a9f2041c7c8..f73cf836e64 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -473,6 +473,7 @@ struct thread_struct { .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ + .gs = __KERNEL_PDA, \ } /* @@ -500,7 +501,8 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa } #define start_thread(regs, new_eip, new_esp) do { \ - __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \ + __asm__("movl %0,%%fs": :"r" (0)); \ + regs->xgs = 0; \ set_fs(USER_DS); \ regs->xds = __USER_DS; \ regs->xes = __USER_DS; \ diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index d505f501077..bdbc894339b 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -16,6 +16,8 @@ struct pt_regs { long eax; int xds; int xes; + /* int xfs; */ + int xgs; long orig_eax; long eip; int xcs; diff --git a/kernel/fork.c b/kernel/fork.c index 8cdd3e72ba5..fd22245e388 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1303,7 +1303,7 @@ fork_out: return ERR_PTR(retval); } -struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) +noinline struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); return regs; -- cgit v1.2.3-70-g09d2 From 66e10a44d724f1464b5e8b5a3eae1e2cbbc2cca6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:02 +0100 Subject: [PATCH] i386: Fix places where using %gs changes the usermode ABI There are a few places where the change in struct pt_regs and the use of %gs affect the userspace ABI. These are primarily debugging interfaces where thread state can be inspected or extracted. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: Zachary Amsden Cc: Jan Beulich Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/process.c | 6 +++--- arch/i386/kernel/ptrace.c | 18 ++++++------------ include/asm-i386/elf.h | 2 +- include/asm-i386/unwind.h | 1 + 4 files changed, 11 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 905364d4284..dc427254517 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -315,8 +315,8 @@ void show_regs(struct pt_regs * regs) regs->eax,regs->ebx,regs->ecx,regs->edx); printk("ESI: %08lx EDI: %08lx EBP: %08lx", regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes); + printk(" DS: %04x ES: %04x GS: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); cr0 = read_cr0(); cr2 = read_cr2(); @@ -509,7 +509,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->regs.ds = regs->xds; dump->regs.es = regs->xes; savesegment(fs,dump->regs.fs); - savesegment(gs,dump->regs.gs); + dump->regs.gs = regs->xgs; dump->regs.orig_eax = regs->orig_eax; dump->regs.eip = regs->eip; dump->regs.cs = regs->xcs; diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 775f50e9395..f3f94ac5736 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -94,13 +94,9 @@ static int putreg(struct task_struct *child, return -EIO; child->thread.fs = value; return 0; - case GS: - if (value && (value & 3) != 3) - return -EIO; - child->thread.gs = value; - return 0; case DS: case ES: + case GS: if (value && (value & 3) != 3) return -EIO; value &= 0xffff; @@ -116,8 +112,8 @@ static int putreg(struct task_struct *child, value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; break; } - if (regno > GS*4) - regno -= 2*4; + if (regno > ES*4) + regno -= 1*4; put_stack_long(child, regno - sizeof(struct pt_regs), value); return 0; } @@ -131,18 +127,16 @@ static unsigned long getreg(struct task_struct *child, case FS: retval = child->thread.fs; break; - case GS: - retval = child->thread.gs; - break; case DS: case ES: + case GS: case SS: case CS: retval = 0xffff; /* fall through */ default: - if (regno > GS*4) - regno -= 2*4; + if (regno > ES*4) + regno -= 1*4; regno = regno - sizeof(struct pt_regs); retval &= get_stack_long(child, regno); } diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 3a05436f31c..45d21a0c95b 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -91,7 +91,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t; pr_reg[7] = regs->xds; \ pr_reg[8] = regs->xes; \ savesegment(fs,pr_reg[9]); \ - savesegment(gs,pr_reg[10]); \ + pr_reg[10] = regs->xgs; \ pr_reg[11] = regs->orig_eax; \ pr_reg[12] = regs->eip; \ pr_reg[13] = regs->xcs; \ diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h index 5031d693b89..601fc67bd77 100644 --- a/include/asm-i386/unwind.h +++ b/include/asm-i386/unwind.h @@ -71,6 +71,7 @@ static inline void arch_unw_init_blocked(struct unwind_frame_info *info) info->regs.xss = __KERNEL_DS; info->regs.xds = __USER_DS; info->regs.xes = __USER_DS; + info->regs.xgs = __KERNEL_PDA; } extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, -- cgit v1.2.3-70-g09d2 From 49d26b6eaa8e970c8cf6e299e6ccba2474191bf5 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] i386: Update sys_vm86 to cope with changed pt_regs and %gs usage sys_vm86 uses a struct kernel_vm86_regs, which is identical to pt_regs, but adds an extra space for all the segment registers. Previously this structure was completely independent, so changes in pt_regs had to be reflected in kernel_vm86_regs. This changes just embeds pt_regs in kernel_vm86_regs, and makes the appropriate changes to vm86.c to deal with the new naming. Also, since %gs is dealt with differently in the kernel, this change adjusts vm86.c to reflect this. While making these changes, I also cleaned up some frankly bizarre code which was added when auditing was added to sys_vm86. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: Zachary Amsden Cc: Jan Beulich Cc: Andi Kleen Cc: Al Viro Cc: Jason Baron Cc: Chris Wright Signed-off-by: Andrew Morton --- arch/i386/kernel/vm86.c | 121 +++++++++++++++++++++++++++++------------------- include/asm-i386/vm86.h | 17 +------ 2 files changed, 76 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index cbcd61d6120..be2f96e67f7 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -72,10 +73,10 @@ /* * 8- and 16-bit register defines.. */ -#define AL(regs) (((unsigned char *)&((regs)->eax))[0]) -#define AH(regs) (((unsigned char *)&((regs)->eax))[1]) -#define IP(regs) (*(unsigned short *)&((regs)->eip)) -#define SP(regs) (*(unsigned short *)&((regs)->esp)) +#define AL(regs) (((unsigned char *)&((regs)->pt.eax))[0]) +#define AH(regs) (((unsigned char *)&((regs)->pt.eax))[1]) +#define IP(regs) (*(unsigned short *)&((regs)->pt.eip)) +#define SP(regs) (*(unsigned short *)&((regs)->pt.esp)) /* * virtual flags (16 and 32-bit versions) @@ -89,10 +90,37 @@ #define SAFE_MASK (0xDD5) #define RETURN_MASK (0xDFF) -#define VM86_REGS_PART2 orig_eax -#define VM86_REGS_SIZE1 \ - ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) ) -#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1) +/* convert kernel_vm86_regs to vm86_regs */ +static int copy_vm86_regs_to_user(struct vm86_regs __user *user, + const struct kernel_vm86_regs *regs) +{ + int ret = 0; + + /* kernel_vm86_regs is missing xfs, so copy everything up to + (but not including) xgs, and then rest after xgs. */ + ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs)); + ret += copy_to_user(&user->__null_gs, ®s->pt.xgs, + sizeof(struct kernel_vm86_regs) - + offsetof(struct kernel_vm86_regs, pt.xgs)); + + return ret; +} + +/* convert vm86_regs to kernel_vm86_regs */ +static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, + const struct vm86_regs __user *user, + unsigned extra) +{ + int ret = 0; + + ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs)); + ret += copy_from_user(®s->pt.xgs, &user->__null_gs, + sizeof(struct kernel_vm86_regs) - + offsetof(struct kernel_vm86_regs, pt.xgs) + + extra); + + return ret; +} struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) @@ -112,10 +140,8 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } - set_flags(regs->eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); - tmp = copy_to_user(¤t->thread.vm86_info->regs,regs, VM86_REGS_SIZE1); - tmp += copy_to_user(¤t->thread.vm86_info->regs.VM86_REGS_PART2, - ®s->VM86_REGS_PART2, VM86_REGS_SIZE2); + set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); + tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs,regs); tmp += put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap); if (tmp) { printk("vm86: could not access userspace vm86_info\n"); @@ -129,9 +155,11 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) current->thread.saved_esp0 = 0; put_cpu(); - loadsegment(fs, current->thread.saved_fs); - loadsegment(gs, current->thread.saved_gs); ret = KVM86->regs32; + + loadsegment(fs, current->thread.saved_fs); + ret->xgs = current->thread.saved_gs; + return ret; } @@ -183,9 +211,9 @@ asmlinkage int sys_vm86old(struct pt_regs regs) tsk = current; if (tsk->thread.saved_esp0) goto out; - tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); - tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, - (long)&info.vm86plus - (long)&info.regs.VM86_REGS_PART2); + tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, + offsetof(struct kernel_vm86_struct, vm86plus) - + sizeof(info.regs)); ret = -EFAULT; if (tmp) goto out; @@ -233,9 +261,9 @@ asmlinkage int sys_vm86(struct pt_regs regs) if (tsk->thread.saved_esp0) goto out; v86 = (struct vm86plus_struct __user *)regs.ecx; - tmp = copy_from_user(&info, v86, VM86_REGS_SIZE1); - tmp += copy_from_user(&info.regs.VM86_REGS_PART2, &v86->regs.VM86_REGS_PART2, - (long)&info.regs32 - (long)&info.regs.VM86_REGS_PART2); + tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, + offsetof(struct kernel_vm86_struct, regs32) - + sizeof(info.regs)); ret = -EFAULT; if (tmp) goto out; @@ -252,15 +280,15 @@ out: static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) { struct tss_struct *tss; - long eax; /* * make sure the vm86() system call doesn't try to do anything silly */ - info->regs.__null_ds = 0; - info->regs.__null_es = 0; + info->regs.pt.xds = 0; + info->regs.pt.xes = 0; + info->regs.pt.xgs = 0; -/* we are clearing fs,gs later just before "jmp resume_userspace", - * because starting with Linux 2.1.x they aren't no longer saved/restored +/* we are clearing fs later just before "jmp resume_userspace", + * because it is not saved/restored. */ /* @@ -268,10 +296,10 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk * has set it up safely, so this makes sure interrupt etc flags are * inherited from protected mode. */ - VEFLAGS = info->regs.eflags; - info->regs.eflags &= SAFE_MASK; - info->regs.eflags |= info->regs32->eflags & ~SAFE_MASK; - info->regs.eflags |= VM_MASK; + VEFLAGS = info->regs.pt.eflags; + info->regs.pt.eflags &= SAFE_MASK; + info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK; + info->regs.pt.eflags |= VM_MASK; switch (info->cpu_type) { case CPU_286: @@ -294,7 +322,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk info->regs32->eax = 0; tsk->thread.saved_esp0 = tsk->thread.esp0; savesegment(fs, tsk->thread.saved_fs); - savesegment(gs, tsk->thread.saved_gs); + tsk->thread.saved_gs = info->regs32->xgs; tss = &per_cpu(init_tss, get_cpu()); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; @@ -306,19 +334,18 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); - __asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t"); - __asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax)); /*call audit_syscall_exit since we do not exit via the normal paths */ if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(eax), eax); + audit_syscall_exit(AUDITSC_RESULT(0), 0); __asm__ __volatile__( "movl %0,%%esp\n\t" "movl %1,%%ebp\n\t" + "mov %2, %%fs\n\t" "jmp resume_userspace" : /* no outputs */ - :"r" (&info->regs), "r" (task_thread_info(tsk))); + :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); /* we never return here */ } @@ -348,12 +375,12 @@ static inline void clear_IF(struct kernel_vm86_regs * regs) static inline void clear_TF(struct kernel_vm86_regs * regs) { - regs->eflags &= ~TF_MASK; + regs->pt.eflags &= ~TF_MASK; } static inline void clear_AC(struct kernel_vm86_regs * regs) { - regs->eflags &= ~AC_MASK; + regs->pt.eflags &= ~AC_MASK; } /* It is correct to call set_IF(regs) from the set_vflags_* @@ -370,7 +397,7 @@ static inline void clear_AC(struct kernel_vm86_regs * regs) static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs) { set_flags(VEFLAGS, eflags, current->thread.v86mask); - set_flags(regs->eflags, eflags, SAFE_MASK); + set_flags(regs->pt.eflags, eflags, SAFE_MASK); if (eflags & IF_MASK) set_IF(regs); else @@ -380,7 +407,7 @@ static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) { set_flags(VFLAGS, flags, current->thread.v86mask); - set_flags(regs->eflags, flags, SAFE_MASK); + set_flags(regs->pt.eflags, flags, SAFE_MASK); if (flags & IF_MASK) set_IF(regs); else @@ -389,7 +416,7 @@ static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_reg static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) { - unsigned long flags = regs->eflags & RETURN_MASK; + unsigned long flags = regs->pt.eflags & RETURN_MASK; if (VEFLAGS & VIF_MASK) flags |= IF_MASK; @@ -493,7 +520,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i, unsigned long __user *intr_ptr; unsigned long segoffs; - if (regs->cs == BIOSSEG) + if (regs->pt.xcs == BIOSSEG) goto cannot_handle; if (is_revectored(i, &KVM86->int_revectored)) goto cannot_handle; @@ -505,9 +532,9 @@ static void do_int(struct kernel_vm86_regs *regs, int i, if ((segoffs >> 16) == BIOSSEG) goto cannot_handle; pushw(ssp, sp, get_vflags(regs), cannot_handle); - pushw(ssp, sp, regs->cs, cannot_handle); + pushw(ssp, sp, regs->pt.xcs, cannot_handle); pushw(ssp, sp, IP(regs), cannot_handle); - regs->cs = segoffs >> 16; + regs->pt.xcs = segoffs >> 16; SP(regs) -= 6; IP(regs) = segoffs & 0xffff; clear_TF(regs); @@ -524,7 +551,7 @@ int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno if (VMPI.is_vm86pus) { if ( (trapno==3) || (trapno==1) ) return_to_32bit(regs, VM86_TRAP + (trapno << 8)); - do_int(regs, trapno, (unsigned char __user *) (regs->ss << 4), SP(regs)); + do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs)); return 0; } if (trapno !=1) @@ -560,10 +587,10 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) handle_vm86_trap(regs, 0, 1); \ return; } while (0) - orig_flags = *(unsigned short *)®s->eflags; + orig_flags = *(unsigned short *)®s->pt.eflags; - csp = (unsigned char __user *) (regs->cs << 4); - ssp = (unsigned char __user *) (regs->ss << 4); + csp = (unsigned char __user *) (regs->pt.xcs << 4); + ssp = (unsigned char __user *) (regs->pt.xss << 4); sp = SP(regs); ip = IP(regs); @@ -650,7 +677,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) SP(regs) += 6; } IP(regs) = newip; - regs->cs = newcs; + regs->pt.xcs = newcs; CHECK_IF_IN_TRAP; if (data32) { set_vflags_long(newflags, regs); diff --git a/include/asm-i386/vm86.h b/include/asm-i386/vm86.h index 952fd695738..a5edf517b99 100644 --- a/include/asm-i386/vm86.h +++ b/include/asm-i386/vm86.h @@ -145,26 +145,13 @@ struct vm86plus_struct { * at the end of the structure. Look at ptrace.h to see the "normal" * setup. For user space layout see 'struct vm86_regs' above. */ +#include struct kernel_vm86_regs { /* * normal regs, with special meaning for the segment descriptors.. */ - long ebx; - long ecx; - long edx; - long esi; - long edi; - long ebp; - long eax; - long __null_ds; - long __null_es; - long orig_eax; - long eip; - unsigned short cs, __csh; - long eflags; - long esp; - unsigned short ss, __ssh; + struct pt_regs pt; /* * these are specific to v86 mode: */ -- cgit v1.2.3-70-g09d2 From b2938f880890ebfcccad356275e0000193153623 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] i386: Implement smp_processor_id() with the PDA Use the cpu_number in the PDA to implement raw_smp_processor_id. This is a little simpler than using thread_info, though the cpu field in thread_info cannot be removed since it is used for things other than getting the current CPU in common code. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: Zachary Amsden Cc: Jan Beulich Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/asm-offsets.c | 4 +++- arch/i386/kernel/cpu/common.c | 2 ++ arch/i386/kernel/entry.S | 3 +-- include/asm-i386/pda.h | 2 ++ include/asm-i386/smp.h | 3 ++- 5 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 9620872d353..85f1b038e9c 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -51,7 +51,6 @@ void foo(void) OFFSET(TI_exec_domain, thread_info, exec_domain); OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); - OFFSET(TI_cpu, thread_info, cpu); OFFSET(TI_preempt_count, thread_info, preempt_count); OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_restart_block, thread_info, restart_block); @@ -97,4 +96,7 @@ void foo(void) DEFINE(VDSO_PRELINK, VDSO_PRELINK); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); + + BLANK(); + OFFSET(PDA_cpu, i386_pda, cpu_number); } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 4e63d8ce602..e476202b887 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -650,6 +650,7 @@ __cpuinit int alloc_gdt(int cpu) /* Initial PDA used by boot CPU */ struct i386_pda boot_pda = { ._pda = &boot_pda, + .cpu_number = 0, }; static inline void set_kernel_gs(void) @@ -694,6 +695,7 @@ __cpuinit int init_gdt(int cpu, struct task_struct *idle) memset(pda, 0, sizeof(*pda)); pda->_pda = pda; + pda->cpu_number = cpu; return 1; } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index b99d4a16007..d7423efaeea 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -524,8 +524,7 @@ syscall_badsys: #define FIXUP_ESPFIX_STACK \ /* since we are on a wrong stack, we cant make it a C code :( */ \ - GET_THREAD_INFO(%ebp); \ - movl TI_cpu(%ebp), %ebx; \ + movl %gs:PDA_cpu, %ebx; \ PER_CPU(cpu_gdt_descr, %ebx); \ movl GDS_address(%ebx), %ebx; \ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h index 4c39ccb1305..f90fde22566 100644 --- a/include/asm-i386/pda.h +++ b/include/asm-i386/pda.h @@ -11,6 +11,8 @@ struct i386_pda { struct i386_pda *_pda; /* pointer to self */ + + int cpu_number; }; extern struct i386_pda *_cpu_pda[]; diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index bd59c1508e7..64fe624c02c 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -8,6 +8,7 @@ #include #include #include +#include #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -56,7 +57,7 @@ extern void cpu_uninit(void); * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (current_thread_info()->cpu) +#define raw_smp_processor_id() (read_pda(cpu_number)) extern cpumask_t cpu_callout_map; extern cpumask_t cpu_callin_map; -- cgit v1.2.3-70-g09d2 From ec7fcaabbfb3c5bd5189f857b6ac7bb9745ef291 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] i386: Implement "current" with the PDA Use the pcurrent field in the PDA to implement the "current" macro. This ends up compiling down to a single instruction to get the current task. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: Zachary Amsden Cc: Jan Beulich Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/asm-offsets.c | 2 ++ arch/i386/kernel/cpu/common.c | 2 ++ arch/i386/kernel/process.c | 1 + include/asm-i386/current.h | 7 ++++--- include/asm-i386/pda.h | 2 ++ 5 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 85f1b038e9c..0666eb0ed7b 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -15,6 +15,7 @@ #include #include #include +#include #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -99,4 +100,5 @@ void foo(void) BLANK(); OFFSET(PDA_cpu, i386_pda, cpu_number); + OFFSET(PDA_pcurrent, i386_pda, pcurrent); } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index e476202b887..6958ae5e2fa 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -651,6 +651,7 @@ __cpuinit int alloc_gdt(int cpu) struct i386_pda boot_pda = { ._pda = &boot_pda, .cpu_number = 0, + .pcurrent = &init_task, }; static inline void set_kernel_gs(void) @@ -696,6 +697,7 @@ __cpuinit int init_gdt(int cpu, struct task_struct *idle) memset(pda, 0, sizeof(*pda)); pda->_pda = pda; pda->cpu_number = cpu; + pda->pcurrent = idle; return 1; } diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index dc427254517..8749b10d380 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -684,6 +684,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas if (unlikely(prev->fs | next->fs)) loadsegment(fs, next->fs); + write_pda(pcurrent, next_p); /* * Restore IOPL if needed. diff --git a/include/asm-i386/current.h b/include/asm-i386/current.h index 3cbbecd7901..5252ee0f6d7 100644 --- a/include/asm-i386/current.h +++ b/include/asm-i386/current.h @@ -1,13 +1,14 @@ #ifndef _I386_CURRENT_H #define _I386_CURRENT_H -#include +#include +#include struct task_struct; -static __always_inline struct task_struct * get_current(void) +static __always_inline struct task_struct *get_current(void) { - return current_thread_info()->task; + return read_pda(pcurrent); } #define current get_current() diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h index f90fde22566..08a35c478af 100644 --- a/include/asm-i386/pda.h +++ b/include/asm-i386/pda.h @@ -7,12 +7,14 @@ #define _I386_PDA_H #include +#include struct i386_pda { struct i386_pda *_pda; /* pointer to self */ int cpu_number; + struct task_struct *pcurrent; /* current process */ }; extern struct i386_pda *_cpu_pda[]; -- cgit v1.2.3-70-g09d2 From 70463daca852db396ce17f179d2404b257ba0f66 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] i386: Store the interrupt regs pointer in the PDA Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/irq_regs.h | 28 +++++++++++++++++++++++++++- include/asm-i386/pda.h | 1 + 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/irq_regs.h b/include/asm-i386/irq_regs.h index 3dd9c0b7027..a1b3f7f594a 100644 --- a/include/asm-i386/irq_regs.h +++ b/include/asm-i386/irq_regs.h @@ -1 +1,27 @@ -#include +/* + * Per-cpu current frame pointer - the location of the last exception frame on + * the stack, stored in the PDA. + * + * Jeremy Fitzhardinge + */ +#ifndef _ASM_I386_IRQ_REGS_H +#define _ASM_I386_IRQ_REGS_H + +#include + +static inline struct pt_regs *get_irq_regs(void) +{ + return read_pda(irq_regs); +} + +static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) +{ + struct pt_regs *old_regs; + + old_regs = read_pda(irq_regs); + write_pda(irq_regs, new_regs); + + return old_regs; +} + +#endif /* _ASM_I386_IRQ_REGS_H */ diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h index 08a35c478af..2ba2736aa10 100644 --- a/include/asm-i386/pda.h +++ b/include/asm-i386/pda.h @@ -15,6 +15,7 @@ struct i386_pda int cpu_number; struct task_struct *pcurrent; /* current process */ + struct pt_regs *irq_regs; }; extern struct i386_pda *_cpu_pda[]; -- cgit v1.2.3-70-g09d2 From 63cb683c6ed56a420ba60df6a5b206a44e3f85fe Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] i386: PDA: Fix math emulator for new pt_regs This patch fixes the math emulator, which had not been adjusted to match the changed struct pt_regs. AK: extracted from larger patch by Jeremy. Signed-off-by: Andi Kleen --- include/asm-i386/math_emu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-i386/math_emu.h b/include/asm-i386/math_emu.h index 697673b555c..a4b0aa3320e 100644 --- a/include/asm-i386/math_emu.h +++ b/include/asm-i386/math_emu.h @@ -21,6 +21,7 @@ struct info { long ___eax; long ___ds; long ___es; + long ___fs; long ___orig_eax; long ___eip; long ___cs; -- cgit v1.2.3-70-g09d2 From 6569580de7ae367def89b7671029cb97c1965574 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] i386: Distinguish absolute symbols Ld knows about 2 kinds of symbols, absolute and section relative. Section relative symbols symbols change value when a section is moved and absolute symbols do not. Currently in the linker script we have several labels marking the beginning and ending of sections that are outside of sections, making them absolute symbols. Having a mixture of absolute and section relative symbols refereing to the same data is currently harmless but it is confusing. This must be done carefully as newer revs of ld do not place symbols that appear in sections without data and instead ld makes those symbols global :( My ultimate goal is to build a relocatable kernel. The safest and least intrusive technique is to generate relocation entries so the kernel can be relocated at load time. The only penalty would be an increase in the size of the kernel binary. The problem is that if absolute and relocatable symbols are not properly specified absolute symbols will be relocated or section relative symbols won't be, which is fatal. The practical motivation is that when generating kernels that will run from a reserved area for analyzing what caused a kernel panic, it is simpler if you don't need to hard code the physical memory location they will run at, especially for the distributions. [AK: and merged:] o Also put a message so that in future people can be aware of it and avoid introducing absolute symbols. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- arch/i386/kernel/vmlinux.lds.S | 113 +++++++++++++++++++++----------------- include/asm-generic/vmlinux.lds.h | 10 ++-- 2 files changed, 68 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index c6f84a0322b..cbd24860fbb 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -1,5 +1,11 @@ /* ld script to make i386 Linux kernel * Written by Martin Mares ; + * + * Don't define absolute symbols until and unless you know that symbol + * value is should remain constant even if kernel image is relocated + * at run time. Absolute symbols are not relocated. If symbol value should + * change if kernel is relocated, make the symbol section relative and + * put it inside the section definition. */ #define LOAD_OFFSET __PAGE_OFFSET @@ -24,31 +30,32 @@ SECTIONS . = __KERNEL_START; phys_startup_32 = startup_32 - LOAD_OFFSET; /* read-only */ - _text = .; /* Text and read-only data */ .text : AT(ADDR(.text) - LOAD_OFFSET) { + _text = .; /* Text and read-only data */ *(.text) SCHED_TEXT LOCK_TEXT KPROBES_TEXT *(.fixup) *(.gnu.warning) - } :text = 0x9090 - - _etext = .; /* End of text section */ + _etext = .; /* End of text section */ + } :text = 0x9090 . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } - __stop___ex_table = .; + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } RODATA . = ALIGN(4); - __tracedata_start = .; .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { + __tracedata_start = .; *(.tracedata) + __tracedata_end = .; } - __tracedata_end = .; /* writeable */ . = ALIGN(4096); @@ -58,10 +65,12 @@ SECTIONS } :data . = ALIGN(4096); - __nosave_begin = .; - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } - . = ALIGN(4096); - __nosave_end = .; + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(4096); + __nosave_end = .; + } . = ALIGN(4096); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { @@ -75,8 +84,10 @@ SECTIONS /* rarely changed data like cpu maps */ . = ALIGN(32); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } - _edata = .; /* End of data section */ + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + _edata = .; /* End of data section */ + } #ifdef CONFIG_STACK_UNWIND . = ALIGN(4); @@ -94,54 +105,56 @@ SECTIONS /* might get freed after init */ . = ALIGN(4096); - __smp_alt_begin = .; - __smp_alt_instructions = .; .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { + __smp_alt_begin = .; + __smp_alt_instructions = .; *(.smp_altinstructions) + __smp_alt_instructions_end = .; } - __smp_alt_instructions_end = .; . = ALIGN(4); - __smp_locks = .; .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + __smp_locks = .; *(.smp_locks) + __smp_locks_end = .; } - __smp_locks_end = .; .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { *(.smp_altinstr_replacement) + __smp_alt_end = .; } . = ALIGN(4096); - __smp_alt_end = .; /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ - __init_begin = .; .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + __init_begin = .; _sinittext = .; *(.init.text) _einittext = .; } .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } . = ALIGN(16); - __setup_start = .; - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) } - __setup_end = .; - __initcall_start = .; + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + __initcall_start = .; INITCALLS + __initcall_end = .; } - __initcall_end = .; - __con_initcall_start = .; .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + __con_initcall_start = .; *(.con_initcall.init) + __con_initcall_end = .; } - __con_initcall_end = .; SECURITY_INIT . = ALIGN(4); - __alt_instructions = .; .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; *(.altinstructions) + __alt_instructions_end = .; } - __alt_instructions_end = .; .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { *(.altinstr_replacement) } @@ -150,32 +163,32 @@ SECTIONS .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } . = ALIGN(4096); - __initramfs_start = .; - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } - __initramfs_end = .; + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } . = ALIGN(L1_CACHE_BYTES); - __per_cpu_start = .; - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } - __per_cpu_end = .; + .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { + __per_cpu_start = .; + *(.data.percpu) + __per_cpu_end = .; + } . = ALIGN(4096); - __init_end = .; /* freed after init ends here */ - __bss_start = .; /* BSS */ - .bss.page_aligned : AT(ADDR(.bss.page_aligned) - LOAD_OFFSET) { - *(.bss.page_aligned) - } .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + __init_end = .; + __bss_start = .; /* BSS */ + *(.bss.page_aligned) *(.bss) + . = ALIGN(4); + __bss_stop = .; + _end = . ; + /* This is where the kernel creates the early boot page tables */ + . = ALIGN(4096); + pg0 = . ; } - . = ALIGN(4); - __bss_stop = .; - - _end = . ; - - /* This is where the kernel creates the early boot page tables */ - . = ALIGN(4096); - pg0 = .; /* Sections to be discarded */ /DISCARD/ : { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e60d6f21fa6..9f4747780da 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -11,8 +11,8 @@ #define RODATA \ . = ALIGN(4096); \ - __start_rodata = .; \ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_rodata) = .; \ *(.rodata) *(.rodata.*) \ *(__vermagic) /* Kernel version magic */ \ } \ @@ -119,17 +119,17 @@ *(__ksymtab_strings) \ } \ \ + /* Unwind data binary search table */ \ + EH_FRAME_HDR \ + \ /* Built-in module parameters. */ \ __param : AT(ADDR(__param) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start___param) = .; \ *(__param) \ VMLINUX_SYMBOL(__stop___param) = .; \ + VMLINUX_SYMBOL(__end_rodata) = .; \ } \ \ - /* Unwind data binary search table */ \ - EH_FRAME_HDR \ - \ - __end_rodata = .; \ . = ALIGN(4096); #define SECURITY_INIT \ -- cgit v1.2.3-70-g09d2 From 9f45accf17efc050ba26bf77cc4f166c950b284e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 7 Dec 2006 02:14:03 +0100 Subject: [PATCH] i386: define __pa_symbol() On x86_64 we have to be careful with calculating the physical address of kernel symbols. Both because of compiler odditities and because the symbols live in a different range of the virtual address space. Having a defintition of __pa_symbol that works on both x86_64 and i386 simplifies writing code that works for both x86_64 and i386 that has these kinds of dependencies. So this patch adds the trivial i386 __pa_symbol definition. Added assembly magic similar to RELOC_HIDE as suggested by Andi Kleen. Just picked it up from x86_64. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- include/asm-i386/page.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index f5bf544c729..5a70501291d 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -124,6 +124,9 @@ extern int page_is_ram(unsigned long pagenr); #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) #define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +/* __pa_symbol should be used for C visible symbols. + This seems to be the official gcc blessed way to do such arithmetic. */ +#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x),0)) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #ifdef CONFIG_FLATMEM -- cgit v1.2.3-70-g09d2 From 2a43f3ede48ea3d5790b863b719a1e21c90a3697 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 7 Dec 2006 02:14:04 +0100 Subject: [PATCH] i386: CONFIG_PHYSICAL_START cleanup Defining __PHYSICAL_START and __KERNEL_START in asm-i386/page.h works but it triggers a full kernel rebuild for the silliest of reasons. This modifies the users to directly use CONFIG_PHYSICAL_START and linux/config.h which prevents the full rebuild problem, which makes the code much more maintainer and hopefully user friendly. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- arch/i386/boot/compressed/head.S | 7 +++---- arch/i386/boot/compressed/misc.c | 7 +++---- arch/i386/kernel/vmlinux.lds.S | 2 +- include/asm-i386/page.h | 3 --- 4 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index b5893e4ecd3..40a8de8270a 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -25,7 +25,6 @@ #include #include -#include .globl startup_32 @@ -75,7 +74,7 @@ startup_32: popl %esi # discard address popl %esi # real mode pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $__PHYSICAL_START + ljmp $(__BOOT_CS), $CONFIG_PHYSICAL_START /* * We come here, if we were loaded high. @@ -100,7 +99,7 @@ startup_32: popl %ecx # lcount popl %edx # high_buffer_start popl %eax # hcount - movl $__PHYSICAL_START,%edi + movl $CONFIG_PHYSICAL_START,%edi cli # make sure we don't get interrupted ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine @@ -125,5 +124,5 @@ move_routine_start: movsl movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $__PHYSICAL_START + ljmp $(__BOOT_CS), $CONFIG_PHYSICAL_START move_routine_end: diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index b2ccd543410..20970ff4411 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -13,7 +13,6 @@ #include #include #include -#include /* * gzip declarations @@ -303,7 +302,7 @@ static void setup_normal_output_buffer(void) #else if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); #endif - output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */ + output_data = (unsigned char *)CONFIG_PHYSICAL_START; /* Normally Points to 1M */ free_mem_end_ptr = (long)real_mode; } @@ -326,8 +325,8 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv) low_buffer_size = low_buffer_end - LOW_BUFFER_START; high_loaded = 1; free_mem_end_ptr = (long)high_buffer_start; - if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { - high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size); + if ( (CONFIG_PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { + high_buffer_start = (uch *)(CONFIG_PHYSICAL_START + low_buffer_size); mv->hcount = 0; /* say: we need not to move high_buffer */ } else mv->hcount = -1; diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index c217e18f108..f8d61ec4c8c 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -27,7 +27,7 @@ PHDRS { } SECTIONS { - . = __KERNEL_START; + . = LOAD_OFFSET + CONFIG_PHYSICAL_START; phys_startup_32 = startup_32 - LOAD_OFFSET; /* read-only */ .text : AT(ADDR(.text) - LOAD_OFFSET) { diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 5a70501291d..2b69686107a 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -112,12 +112,9 @@ extern int page_is_ram(unsigned long pagenr); #ifdef __ASSEMBLY__ #define __PAGE_OFFSET CONFIG_PAGE_OFFSET -#define __PHYSICAL_START CONFIG_PHYSICAL_START #else #define __PAGE_OFFSET ((unsigned long)CONFIG_PAGE_OFFSET) -#define __PHYSICAL_START ((unsigned long)CONFIG_PHYSICAL_START) #endif -#define __KERNEL_START (__PAGE_OFFSET + __PHYSICAL_START) #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) -- cgit v1.2.3-70-g09d2 From 968de4f02621db35b8ae5239c8cfc6664fb872d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 7 Dec 2006 02:14:04 +0100 Subject: [PATCH] i386: Relocatable kernel support This patch modifies the i386 kernel so that if CONFIG_RELOCATABLE is selected it will be able to be loaded at any 4K aligned address below 1G. The technique used is to compile the decompressor with -fPIC and modify it so the decompressor is fully relocatable. For the main kernel relocations are generated. Resulting in a kernel that is relocatable with no runtime overhead and no need to modify the source code. A reserved 32bit word in the parameters has been assigned to serve as a stack so we figure out where are running. Signed-off-by: Eric W. Biederman Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- arch/i386/Kconfig | 12 + arch/i386/Makefile | 4 +- arch/i386/boot/compressed/Makefile | 28 +- arch/i386/boot/compressed/head.S | 184 +++++++---- arch/i386/boot/compressed/misc.c | 261 ++++++++-------- arch/i386/boot/compressed/relocs.c | 563 ++++++++++++++++++++++++++++++++++ arch/i386/boot/compressed/vmlinux.lds | 43 +++ arch/i386/boot/compressed/vmlinux.scr | 3 +- arch/i386/boot/setup.S | 29 +- include/linux/screen_info.h | 3 +- 10 files changed, 920 insertions(+), 210 deletions(-) create mode 100644 arch/i386/boot/compressed/relocs.c create mode 100644 arch/i386/boot/compressed/vmlinux.lds (limited to 'include') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 8ff1c6fb5aa..d588ca874bb 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -773,6 +773,18 @@ config CRASH_DUMP PHYSICAL_START. For more details see Documentation/kdump/kdump.txt +config RELOCATABLE + bool "Build a relocatable kernel" + help + This build a kernel image that retains relocation information + so it can be loaded someplace besides the default 1MB. + The relocations tend to the kernel binary about 10% larger, + but are discarded at runtime. + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. + config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 0677908dfa0..d1aca52bf69 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -26,7 +26,9 @@ endif LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S -LDFLAGS_vmlinux := +ifdef CONFIG_RELOCATABLE +LDFLAGS_vmlinux := --emit-relocs +endif CHECKFLAGS += -D__i386__ CFLAGS += -pipe -msoft-float diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile index 258ea95224f..cc28da3a881 100644 --- a/arch/i386/boot/compressed/Makefile +++ b/arch/i386/boot/compressed/Makefile @@ -4,22 +4,42 @@ # create a compressed vmlinux image from the original vmlinux # -targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o +targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o \ + vmlinux.bin.all vmlinux.relocs EXTRA_AFLAGS := -traditional -LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 +LDFLAGS_vmlinux := -T +CFLAGS_misc.o += -fPIC +hostprogs-y := relocs -$(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE +$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE $(call if_changed,ld) @: $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(obj)/relocs $< > $@ +$(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE + $(call if_changed,relocs) + +vmlinux.bin.all-y := $(obj)/vmlinux.bin +vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs +quiet_cmd_relocbin = BUILD $@ + cmd_relocbin = cat $(filter-out FORCE,$^) > $@ +$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE + $(call if_changed,relocbin) + +ifdef CONFIG_RELOCATABLE +$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE + $(call if_changed,gzip) +else $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) +endif LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE +$(obj)/piggy.o: $(src)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE $(call if_changed,ld) diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index 40a8de8270a..e4dd7a6b9b0 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -25,9 +25,11 @@ #include #include +#include +.section ".text.head" .globl startup_32 - + startup_32: cld cli @@ -36,93 +38,141 @@ startup_32: movl %eax,%es movl %eax,%fs movl %eax,%gs + movl %eax,%ss - lss stack_start,%esp - xorl %eax,%eax -1: incl %eax # check that A20 really IS enabled - movl %eax,0x000000 # loop forever if it isn't - cmpl %eax,0x100000 - je 1b +/* Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x34-0x3f are used as the stack for this calculation. + * Only 4 bytes are needed. + */ + leal 0x40(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp + +/* Compute the delta between where we were compiled to run at + * and where the code will actually run at. + */ + /* Start with the delta to where the kernel will run at. If we are + * a relocatable kernel this is the delta to our load address otherwise + * this is the delta to CONFIG_PHYSICAL start. + */ +#ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx +#else + movl $(CONFIG_PHYSICAL_START - startup_32), %ebx +#endif + + /* Replace the compressed data size with the uncompressed size */ + subl input_len(%ebp), %ebx + movl output_len(%ebp), %eax + addl %eax, %ebx + /* Add 8 bytes for every 32K input block */ + shrl $12, %eax + addl %eax, %ebx + /* Add 32K + 18 bytes of extra slack */ + addl $(32768 + 18), %ebx + /* Align on a 4K boundary */ + addl $4095, %ebx + andl $~4095, %ebx + +/* Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushl %esi + leal _end(%ebp), %esi + leal _end(%ebx), %edi + movl $(_end - startup_32), %ecx + std + rep + movsb + cld + popl %esi + +/* Compute the kernel start address. + */ +#ifdef CONFIG_RELOCATABLE + leal startup_32(%ebp), %ebp +#else + movl $CONFIG_PHYSICAL_START, %ebp +#endif /* - * Initialize eflags. Some BIOS's leave bits like NT set. This would - * confuse the debugger if this code is traced. - * XXX - best to initialize before switching to protected mode. + * Jump to the relocated address. */ - pushl $0 - popfl + leal relocated(%ebx), %eax + jmp *%eax +.section ".text" +relocated: + /* * Clear BSS */ xorl %eax,%eax - movl $_edata,%edi - movl $_end,%ecx + leal _edata(%ebx),%edi + leal _end(%ebx), %ecx subl %edi,%ecx cld rep stosb + +/* + * Setup the stack for the decompressor + */ + leal stack_end(%ebx), %esp + /* * Do the decompression, and jump to the new kernel.. */ - subl $16,%esp # place for structure on the stack - movl %esp,%eax + movl output_len(%ebx), %eax + pushl %eax + pushl %ebp # output address + movl input_len(%ebx), %eax + pushl %eax # input_len + leal input_data(%ebx), %eax + pushl %eax # input_data + leal _end(%ebx), %eax + pushl %eax # end of the image as third argument pushl %esi # real mode pointer as second arg - pushl %eax # address of structure as first arg call decompress_kernel - orl %eax,%eax - jnz 3f - popl %esi # discard address - popl %esi # real mode pointer - xorl %ebx,%ebx - ljmp $(__BOOT_CS), $CONFIG_PHYSICAL_START + addl $20, %esp + popl %ecx + +#if CONFIG_RELOCATABLE +/* Find the address of the relocations. + */ + movl %ebp, %edi + addl %ecx, %edi + +/* Calculate the delta between where vmlinux was compiled to run + * and where it was actually loaded. + */ + movl %ebp, %ebx + subl $CONFIG_PHYSICAL_START, %ebx /* - * We come here, if we were loaded high. - * We need to move the move-in-place routine down to 0x1000 - * and then start it with the buffer addresses in registers, - * which we got from the stack. + * Process relocations. */ -3: - movl $move_routine_start,%esi - movl $0x1000,%edi - movl $move_routine_end,%ecx - subl %esi,%ecx - addl $3,%ecx - shrl $2,%ecx - cld - rep - movsl - - popl %esi # discard the address - popl %ebx # real mode pointer - popl %esi # low_buffer_start - popl %ecx # lcount - popl %edx # high_buffer_start - popl %eax # hcount - movl $CONFIG_PHYSICAL_START,%edi - cli # make sure we don't get interrupted - ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine + +1: subl $4, %edi + movl 0(%edi), %ecx + testl %ecx, %ecx + jz 2f + addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) + jmp 1b +2: +#endif /* - * Routine (template) for moving the decompressed kernel in place, - * if we were high loaded. This _must_ PIC-code ! + * Jump to the decompressed kernel. */ -move_routine_start: - movl %ecx,%ebp - shrl $2,%ecx - rep - movsl - movl %ebp,%ecx - andl $3,%ecx - rep - movsb - movl %edx,%esi - movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0 - addl $3,%ecx - shrl $2,%ecx - rep - movsl - movl %ebx,%esi # Restore setup pointer xorl %ebx,%ebx - ljmp $(__BOOT_CS), $CONFIG_PHYSICAL_START -move_routine_end: + jmp *%ebp + +.bss +.balign 4 +stack: + .fill 4096, 1, 0 +stack_end: diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index 20970ff4411..4eac24e95a1 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -13,6 +13,88 @@ #include #include #include +#include + +/* WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically + * at run time, but no relocation processing is performed. + * This means that it is not safe to place pointers in static structures. + */ + +/* + * Getting to provable safe in place decompression is hard. + * Worst case behaviours need to be analized. + * Background information: + * + * The file layout is: + * magic[2] + * method[1] + * flags[1] + * timestamp[4] + * extraflags[1] + * os[1] + * compressed data blocks[N] + * crc[4] orig_len[4] + * + * resulting in 18 bytes of non compressed data overhead. + * + * Files divided into blocks + * 1 bit (last block flag) + * 2 bits (block type) + * + * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved. + * The smallest block type encoding is always used. + * + * stored: + * 32 bits length in bytes. + * + * fixed: + * magic fixed tree. + * symbols. + * + * dynamic: + * dynamic tree encoding. + * symbols. + * + * + * The buffer for decompression in place is the length of the + * uncompressed data, plus a small amount extra to keep the algorithm safe. + * The compressed data is placed at the end of the buffer. The output + * pointer is placed at the start of the buffer and the input pointer + * is placed where the compressed data starts. Problems will occur + * when the output pointer overruns the input pointer. + * + * The output pointer can only overrun the input pointer if the input + * pointer is moving faster than the output pointer. A condition only + * triggered by data whose compressed form is larger than the uncompressed + * form. + * + * The worst case at the block level is a growth of the compressed data + * of 5 bytes per 32767 bytes. + * + * The worst case internal to a compressed block is very hard to figure. + * The worst case can at least be boundined by having one bit that represents + * 32764 bytes and then all of the rest of the bytes representing the very + * very last byte. + * + * All of which is enough to compute an amount of extra data that is required + * to be safe. To avoid problems at the block level allocating 5 extra bytes + * per 32767 bytes of data is sufficient. To avoind problems internal to a block + * adding an extra 32767 bytes (the worst case uncompressed block size) is + * sufficient, to ensure that in the worst case the decompressed data for + * block will stop the byte before the compressed data for a block begins. + * To avoid problems with the compressed data's meta information an extra 18 + * bytes are needed. Leading to the formula: + * + * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. + * + * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. + * Adding 32768 instead of 32767 just makes for round numbers. + * Adding the decompressor_size is necessary as it musht live after all + * of the data as well. Last I measured the decompressor is about 14K. + * 10K of actuall data and 4K of bss. + * + */ /* * gzip declarations @@ -29,15 +111,20 @@ typedef unsigned char uch; typedef unsigned short ush; typedef unsigned long ulg; -#define WSIZE 0x8000 /* Window size must be at least 32k, */ - /* and a power of two */ +#define WSIZE 0x80000000 /* Window size must be at least 32k, + * and a power of two + * We don't actually have a window just + * a huge output buffer so I report + * a 2G windows size, as that should + * always be larger than our output buffer. + */ -static uch *inbuf; /* input buffer */ -static uch window[WSIZE]; /* Sliding window buffer */ +static uch *inbuf; /* input buffer */ +static uch *window; /* Sliding window buffer, (and final output buffer) */ -static unsigned insize = 0; /* valid bytes in inbuf */ -static unsigned inptr = 0; /* index of next byte to be processed in inbuf */ -static unsigned outcnt = 0; /* bytes in output buffer */ +static unsigned insize; /* valid bytes in inbuf */ +static unsigned inptr; /* index of next byte to be processed in inbuf */ +static unsigned outcnt; /* bytes in output buffer */ /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ @@ -88,8 +175,6 @@ extern unsigned char input_data[]; extern int input_len; static long bytes_out = 0; -static uch *output_data; -static unsigned long output_ptr = 0; static void *malloc(int size); static void free(void *where); @@ -99,17 +184,10 @@ static void *memcpy(void *dest, const void *src, unsigned n); static void putstr(const char *); -extern int end; -static long free_mem_ptr = (long)&end; -static long free_mem_end_ptr; +static unsigned long free_mem_ptr; +static unsigned long free_mem_end_ptr; -#define INPLACE_MOVE_ROUTINE 0x1000 -#define LOW_BUFFER_START 0x2000 -#define LOW_BUFFER_MAX 0x90000 #define HEAP_SIZE 0x3000 -static unsigned int low_buffer_end, low_buffer_size; -static int high_loaded =0; -static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; static char *vidmem = (char *)0xb8000; static int vidport; @@ -150,7 +228,7 @@ static void gzip_mark(void **ptr) static void gzip_release(void **ptr) { - free_mem_ptr = (long) *ptr; + free_mem_ptr = (unsigned long) *ptr; } static void scroll(void) @@ -178,7 +256,7 @@ static void putstr(const char *s) y--; } } else { - vidmem [ ( x + cols * y ) * 2 ] = c; + vidmem [ ( x + cols * y ) * 2 ] = c; if ( ++x >= cols ) { x = 0; if ( ++y >= lines ) { @@ -223,58 +301,31 @@ static void* memcpy(void* dest, const void* src, unsigned n) */ static int fill_inbuf(void) { - if (insize != 0) { - error("ran out of input data"); - } - - inbuf = input_data; - insize = input_len; - inptr = 1; - return inbuf[0]; + error("ran out of input data"); + return 0; } /* =========================================================================== * Write the output window window[0..outcnt-1] and update crc and bytes_out. * (Used for the decompressed data only.) */ -static void flush_window_low(void) -{ - ulg c = crc; /* temporary variable */ - unsigned n; - uch *in, *out, ch; - - in = window; - out = &output_data[output_ptr]; - for (n = 0; n < outcnt; n++) { - ch = *out++ = *in++; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - output_ptr += (ulg)outcnt; - outcnt = 0; -} - -static void flush_window_high(void) -{ - ulg c = crc; /* temporary variable */ - unsigned n; - uch *in, ch; - in = window; - for (n = 0; n < outcnt; n++) { - ch = *output_data++ = *in++; - if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start; - c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); - } - crc = c; - bytes_out += (ulg)outcnt; - outcnt = 0; -} - static void flush_window(void) { - if (high_loaded) flush_window_high(); - else flush_window_low(); + /* With my window equal to my output buffer + * I only need to compute the crc here. + */ + ulg c = crc; /* temporary variable */ + unsigned n; + uch *in, ch; + + in = window; + for (n = 0; n < outcnt; n++) { + ch = *in++; + c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); + } + crc = c; + bytes_out += (ulg)outcnt; + outcnt = 0; } static void error(char *x) @@ -286,66 +337,8 @@ static void error(char *x) while(1); /* Halt */ } -#define STACK_SIZE (4096) - -long user_stack [STACK_SIZE]; - -struct { - long * a; - short b; - } stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS }; - -static void setup_normal_output_buffer(void) -{ -#ifdef STANDARD_MEMORY_BIOS_CALL - if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory"); -#else - if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); -#endif - output_data = (unsigned char *)CONFIG_PHYSICAL_START; /* Normally Points to 1M */ - free_mem_end_ptr = (long)real_mode; -} - -struct moveparams { - uch *low_buffer_start; int lcount; - uch *high_buffer_start; int hcount; -}; - -static void setup_output_buffer_if_we_run_high(struct moveparams *mv) -{ - high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE); -#ifdef STANDARD_MEMORY_BIOS_CALL - if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); -#else - if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); -#endif - mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; - low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX - ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; - low_buffer_size = low_buffer_end - LOW_BUFFER_START; - high_loaded = 1; - free_mem_end_ptr = (long)high_buffer_start; - if ( (CONFIG_PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { - high_buffer_start = (uch *)(CONFIG_PHYSICAL_START + low_buffer_size); - mv->hcount = 0; /* say: we need not to move high_buffer */ - } - else mv->hcount = -1; - mv->high_buffer_start = high_buffer_start; -} - -static void close_output_buffer_if_we_run_high(struct moveparams *mv) -{ - if (bytes_out > low_buffer_size) { - mv->lcount = low_buffer_size; - if (mv->hcount) - mv->hcount = bytes_out - low_buffer_size; - } else { - mv->lcount = bytes_out; - mv->hcount = 0; - } -} - -asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode) +asmlinkage void decompress_kernel(void *rmode, unsigned long end, + uch *input_data, unsigned long input_len, uch *output) { real_mode = rmode; @@ -360,13 +353,25 @@ asmlinkage int decompress_kernel(struct moveparams *mv, void *rmode) lines = RM_SCREEN_INFO.orig_video_lines; cols = RM_SCREEN_INFO.orig_video_cols; - if (free_mem_ptr < 0x100000) setup_normal_output_buffer(); - else setup_output_buffer_if_we_run_high(mv); + window = output; /* Output buffer (Normally at 1M) */ + free_mem_ptr = end; /* Heap */ + free_mem_end_ptr = end + HEAP_SIZE; + inbuf = input_data; /* Input buffer */ + insize = input_len; + inptr = 0; + + if (((u32)output - CONFIG_PHYSICAL_START) & 0x3fffff) + error("Destination address not 4M aligned"); + if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff)) + error("Destination address too large"); +#ifndef CONFIG_RELOCATABLE + if ((u32)output != CONFIG_PHYSICAL_START) + error("Wrong destination address"); +#endif makecrc(); putstr("Uncompressing Linux... "); gunzip(); putstr("Ok, booting the kernel.\n"); - if (high_loaded) close_output_buffer_if_we_run_high(mv); - return high_loaded; + return; } diff --git a/arch/i386/boot/compressed/relocs.c b/arch/i386/boot/compressed/relocs.c new file mode 100644 index 00000000000..0551ceb21be --- /dev/null +++ b/arch/i386/boot/compressed/relocs.c @@ -0,0 +1,563 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define USE_BSD +#include + +#define MAX_SHDRS 100 +static Elf32_Ehdr ehdr; +static Elf32_Shdr shdr[MAX_SHDRS]; +static Elf32_Sym *symtab[MAX_SHDRS]; +static Elf32_Rel *reltab[MAX_SHDRS]; +static char *strtab[MAX_SHDRS]; +static unsigned long reloc_count, reloc_idx; +static unsigned long *relocs; + +static void die(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static const char *sym_type(unsigned type) +{ + static const char *type_name[] = { +#define SYM_TYPE(X) [X] = #X + SYM_TYPE(STT_NOTYPE), + SYM_TYPE(STT_OBJECT), + SYM_TYPE(STT_FUNC), + SYM_TYPE(STT_SECTION), + SYM_TYPE(STT_FILE), + SYM_TYPE(STT_COMMON), + SYM_TYPE(STT_TLS), +#undef SYM_TYPE + }; + const char *name = "unknown sym type name"; + if (type < sizeof(type_name)/sizeof(type_name[0])) { + name = type_name[type]; + } + return name; +} + +static const char *sym_bind(unsigned bind) +{ + static const char *bind_name[] = { +#define SYM_BIND(X) [X] = #X + SYM_BIND(STB_LOCAL), + SYM_BIND(STB_GLOBAL), + SYM_BIND(STB_WEAK), +#undef SYM_BIND + }; + const char *name = "unknown sym bind name"; + if (bind < sizeof(bind_name)/sizeof(bind_name[0])) { + name = bind_name[bind]; + } + return name; +} + +static const char *sym_visibility(unsigned visibility) +{ + static const char *visibility_name[] = { +#define SYM_VISIBILITY(X) [X] = #X + SYM_VISIBILITY(STV_DEFAULT), + SYM_VISIBILITY(STV_INTERNAL), + SYM_VISIBILITY(STV_HIDDEN), + SYM_VISIBILITY(STV_PROTECTED), +#undef SYM_VISIBILITY + }; + const char *name = "unknown sym visibility name"; + if (visibility < sizeof(visibility_name)/sizeof(visibility_name[0])) { + name = visibility_name[visibility]; + } + return name; +} + +static const char *rel_type(unsigned type) +{ + static const char *type_name[] = { +#define REL_TYPE(X) [X] = #X + REL_TYPE(R_386_NONE), + REL_TYPE(R_386_32), + REL_TYPE(R_386_PC32), + REL_TYPE(R_386_GOT32), + REL_TYPE(R_386_PLT32), + REL_TYPE(R_386_COPY), + REL_TYPE(R_386_GLOB_DAT), + REL_TYPE(R_386_JMP_SLOT), + REL_TYPE(R_386_RELATIVE), + REL_TYPE(R_386_GOTOFF), + REL_TYPE(R_386_GOTPC), +#undef REL_TYPE + }; + const char *name = "unknown type rel type name"; + if (type < sizeof(type_name)/sizeof(type_name[0])) { + name = type_name[type]; + } + return name; +} + +static const char *sec_name(unsigned shndx) +{ + const char *sec_strtab; + const char *name; + sec_strtab = strtab[ehdr.e_shstrndx]; + name = ""; + if (shndx < ehdr.e_shnum) { + name = sec_strtab + shdr[shndx].sh_name; + } + else if (shndx == SHN_ABS) { + name = "ABSOLUTE"; + } + else if (shndx == SHN_COMMON) { + name = "COMMON"; + } + return name; +} + +static const char *sym_name(const char *sym_strtab, Elf32_Sym *sym) +{ + const char *name; + name = ""; + if (sym->st_name) { + name = sym_strtab + sym->st_name; + } + else { + name = sec_name(shdr[sym->st_shndx].sh_name); + } + return name; +} + + + +#if BYTE_ORDER == LITTLE_ENDIAN +#define le16_to_cpu(val) (val) +#define le32_to_cpu(val) (val) +#endif +#if BYTE_ORDER == BIG_ENDIAN +#define le16_to_cpu(val) bswap_16(val) +#define le32_to_cpu(val) bswap_32(val) +#endif + +static uint16_t elf16_to_cpu(uint16_t val) +{ + return le16_to_cpu(val); +} + +static uint32_t elf32_to_cpu(uint32_t val) +{ + return le32_to_cpu(val); +} + +static void read_ehdr(FILE *fp) +{ + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { + die("Cannot read ELF header: %s\n", + strerror(errno)); + } + if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) { + die("No ELF magic\n"); + } + if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) { + die("Not a 32 bit executable\n"); + } + if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { + die("Not a LSB ELF executable\n"); + } + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { + die("Unknown ELF version\n"); + } + /* Convert the fields to native endian */ + ehdr.e_type = elf16_to_cpu(ehdr.e_type); + ehdr.e_machine = elf16_to_cpu(ehdr.e_machine); + ehdr.e_version = elf32_to_cpu(ehdr.e_version); + ehdr.e_entry = elf32_to_cpu(ehdr.e_entry); + ehdr.e_phoff = elf32_to_cpu(ehdr.e_phoff); + ehdr.e_shoff = elf32_to_cpu(ehdr.e_shoff); + ehdr.e_flags = elf32_to_cpu(ehdr.e_flags); + ehdr.e_ehsize = elf16_to_cpu(ehdr.e_ehsize); + ehdr.e_phentsize = elf16_to_cpu(ehdr.e_phentsize); + ehdr.e_phnum = elf16_to_cpu(ehdr.e_phnum); + ehdr.e_shentsize = elf16_to_cpu(ehdr.e_shentsize); + ehdr.e_shnum = elf16_to_cpu(ehdr.e_shnum); + ehdr.e_shstrndx = elf16_to_cpu(ehdr.e_shstrndx); + + if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) { + die("Unsupported ELF header type\n"); + } + if (ehdr.e_machine != EM_386) { + die("Not for x86\n"); + } + if (ehdr.e_version != EV_CURRENT) { + die("Unknown ELF version\n"); + } + if (ehdr.e_ehsize != sizeof(Elf32_Ehdr)) { + die("Bad Elf header size\n"); + } + if (ehdr.e_phentsize != sizeof(Elf32_Phdr)) { + die("Bad program header entry\n"); + } + if (ehdr.e_shentsize != sizeof(Elf32_Shdr)) { + die("Bad section header entry\n"); + } + if (ehdr.e_shstrndx >= ehdr.e_shnum) { + die("String table index out of bounds\n"); + } +} + +static void read_shdrs(FILE *fp) +{ + int i; + if (ehdr.e_shnum > MAX_SHDRS) { + die("%d section headers supported: %d\n", + ehdr.e_shnum, MAX_SHDRS); + } + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + ehdr.e_shoff, strerror(errno)); + } + if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) { + die("Cannot read ELF section headers: %s\n", + strerror(errno)); + } + for(i = 0; i < ehdr.e_shnum; i++) { + shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name); + shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type); + shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags); + shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr); + shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset); + shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size); + shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link); + shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info); + shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign); + shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize); + } + +} + +static void read_strtabs(FILE *fp) +{ + int i; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_STRTAB) { + continue; + } + strtab[i] = malloc(shdr[i].sh_size); + if (!strtab[i]) { + die("malloc of %d bytes for strtab failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + } +} + +static void read_symtabs(FILE *fp) +{ + int i,j; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_SYMTAB) { + continue; + } + symtab[i] = malloc(shdr[i].sh_size); + if (!symtab[i]) { + die("malloc of %d bytes for symtab failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) { + symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name); + symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value); + symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size); + symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx); + } + } +} + + +static void read_relocs(FILE *fp) +{ + int i,j; + for(i = 0; i < ehdr.e_shnum; i++) { + if (shdr[i].sh_type != SHT_REL) { + continue; + } + reltab[i] = malloc(shdr[i].sh_size); + if (!reltab[i]) { + die("malloc of %d bytes for relocs failed\n", + shdr[i].sh_size); + } + if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) { + die("Seek to %d failed: %s\n", + shdr[i].sh_offset, strerror(errno)); + } + if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) { + die("Cannot read symbol table: %s\n", + strerror(errno)); + } + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset); + reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info); + } + } +} + + +static void print_absolute_symbols(void) +{ + int i; + printf("Absolute symbols\n"); + printf(" Num: Value Size Type Bind Visibility Name\n"); + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + int j; + if (shdr[i].sh_type != SHT_SYMTAB) { + continue; + } + sh_symtab = symtab[i]; + sym_strtab = strtab[shdr[i].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) { + Elf32_Sym *sym; + const char *name; + sym = &symtab[i][j]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + printf("%5d %08x %5d %10s %10s %12s %s\n", + j, sym->st_value, sym->st_size, + sym_type(ELF32_ST_TYPE(sym->st_info)), + sym_bind(ELF32_ST_BIND(sym->st_info)), + sym_visibility(ELF32_ST_VISIBILITY(sym->st_other)), + name); + } + } + printf("\n"); +} + +static void print_absolute_relocs(void) +{ + int i; + printf("Absolute relocations\n"); + printf("Offset Info Type Sym.Value Sym.Name\n"); + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + unsigned sec_applies, sec_symtab; + int j; + if (shdr[i].sh_type != SHT_REL) { + continue; + } + sec_symtab = shdr[i].sh_link; + sec_applies = shdr[i].sh_info; + if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = symtab[sec_symtab]; + sym_strtab = strtab[shdr[sec_symtab].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + const char *name; + rel = &reltab[i][j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + name = sym_name(sym_strtab, sym); + if (sym->st_shndx != SHN_ABS) { + continue; + } + printf("%08x %08x %10s %08x %s\n", + rel->r_offset, + rel->r_info, + rel_type(ELF32_R_TYPE(rel->r_info)), + sym->st_value, + name); + } + } + printf("\n"); +} + +static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) +{ + int i; + /* Walk through the relocations */ + for(i = 0; i < ehdr.e_shnum; i++) { + char *sym_strtab; + Elf32_Sym *sh_symtab; + unsigned sec_applies, sec_symtab; + int j; + if (shdr[i].sh_type != SHT_REL) { + continue; + } + sec_symtab = shdr[i].sh_link; + sec_applies = shdr[i].sh_info; + if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) { + continue; + } + sh_symtab = symtab[sec_symtab]; + sym_strtab = strtab[shdr[sec_symtab].sh_link]; + for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) { + Elf32_Rel *rel; + Elf32_Sym *sym; + unsigned r_type; + rel = &reltab[i][j]; + sym = &sh_symtab[ELF32_R_SYM(rel->r_info)]; + r_type = ELF32_R_TYPE(rel->r_info); + /* Don't visit relocations to absolute symbols */ + if (sym->st_shndx == SHN_ABS) { + continue; + } + if (r_type == R_386_PC32) { + /* PC relative relocations don't need to be adjusted */ + } + else if (r_type == R_386_32) { + /* Visit relocations that need to be adjusted */ + visit(rel, sym); + } + else { + die("Unsupported relocation type: %d\n", r_type); + } + } + } +} + +static void count_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + reloc_count += 1; +} + +static void collect_reloc(Elf32_Rel *rel, Elf32_Sym *sym) +{ + /* Remember the address that needs to be adjusted. */ + relocs[reloc_idx++] = rel->r_offset; +} + +static int cmp_relocs(const void *va, const void *vb) +{ + const unsigned long *a, *b; + a = va; b = vb; + return (*a == *b)? 0 : (*a > *b)? 1 : -1; +} + +static void emit_relocs(int as_text) +{ + int i; + /* Count how many relocations I have and allocate space for them. */ + reloc_count = 0; + walk_relocs(count_reloc); + relocs = malloc(reloc_count * sizeof(relocs[0])); + if (!relocs) { + die("malloc of %d entries for relocs failed\n", + reloc_count); + } + /* Collect up the relocations */ + reloc_idx = 0; + walk_relocs(collect_reloc); + + /* Order the relocations for more efficient processing */ + qsort(relocs, reloc_count, sizeof(relocs[0]), cmp_relocs); + + /* Print the relocations */ + if (as_text) { + /* Print the relocations in a form suitable that + * gas will like. + */ + printf(".section \".data.reloc\",\"a\"\n"); + printf(".balign 4\n"); + for(i = 0; i < reloc_count; i++) { + printf("\t .long 0x%08lx\n", relocs[i]); + } + printf("\n"); + } + else { + unsigned char buf[4]; + buf[0] = buf[1] = buf[2] = buf[3] = 0; + /* Print a stop */ + printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + /* Now print each relocation */ + for(i = 0; i < reloc_count; i++) { + buf[0] = (relocs[i] >> 0) & 0xff; + buf[1] = (relocs[i] >> 8) & 0xff; + buf[2] = (relocs[i] >> 16) & 0xff; + buf[3] = (relocs[i] >> 24) & 0xff; + printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]); + } + } +} + +static void usage(void) +{ + die("i386_reloc [--abs | --text] vmlinux\n"); +} + +int main(int argc, char **argv) +{ + int show_absolute; + int as_text; + const char *fname; + FILE *fp; + int i; + + show_absolute = 0; + as_text = 0; + fname = NULL; + for(i = 1; i < argc; i++) { + char *arg = argv[i]; + if (*arg == '-') { + if (strcmp(argv[1], "--abs") == 0) { + show_absolute = 1; + continue; + } + else if (strcmp(argv[1], "--text") == 0) { + as_text = 1; + continue; + } + } + else if (!fname) { + fname = arg; + continue; + } + usage(); + } + if (!fname) { + usage(); + } + fp = fopen(fname, "r"); + if (!fp) { + die("Cannot open %s: %s\n", + fname, strerror(errno)); + } + read_ehdr(fp); + read_shdrs(fp); + read_strtabs(fp); + read_symtabs(fp); + read_relocs(fp); + if (show_absolute) { + print_absolute_symbols(); + print_absolute_relocs(); + return 0; + } + emit_relocs(as_text); + return 0; +} diff --git a/arch/i386/boot/compressed/vmlinux.lds b/arch/i386/boot/compressed/vmlinux.lds new file mode 100644 index 00000000000..cc4854f6c6c --- /dev/null +++ b/arch/i386/boot/compressed/vmlinux.lds @@ -0,0 +1,43 @@ +OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") +OUTPUT_ARCH(i386) +ENTRY(startup_32) +SECTIONS +{ + /* Be careful parts of head.S assume startup_32 is at + * address 0. + */ + . = 0 ; + .text.head : { + _head = . ; + *(.text.head) + _ehead = . ; + } + .data.compressed : { + *(.data.compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + _end = . ; + } +} diff --git a/arch/i386/boot/compressed/vmlinux.scr b/arch/i386/boot/compressed/vmlinux.scr index 1ed9d791f86..707a88f7f29 100644 --- a/arch/i386/boot/compressed/vmlinux.scr +++ b/arch/i386/boot/compressed/vmlinux.scr @@ -1,9 +1,10 @@ SECTIONS { - .data : { + .data.compressed : { input_len = .; LONG(input_data_end - input_data) input_data = .; *(.data) + output_len = . - 4; input_data_end = .; } } diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index 3aec4538a11..9aa8b051818 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -588,11 +588,6 @@ rmodeswtch_normal: call default_switch rmodeswtch_end: -# we get the code32 start address and modify the below 'jmpi' -# (loader may have changed it) - movl %cs:code32_start, %eax - movl %eax, %cs:code32 - # Now we move the system to its rightful place ... but we check if we have a # big-kernel. In that case we *must* not move it ... testb $LOADED_HIGH, %cs:loadflags @@ -788,11 +783,12 @@ a20_err_msg: a20_done: #endif /* CONFIG_X86_VOYAGER */ -# set up gdt and idt +# set up gdt and idt and 32bit start address lidt idt_48 # load idt with 0,0 xorl %eax, %eax # Compute gdt_base movw %ds, %ax # (Convert %ds:gdt to a linear ptr) shll $4, %eax + addl %eax, code32 addl $gdt, %eax movl %eax, (gdt_48+2) lgdt gdt_48 # load gdt with whatever is @@ -851,9 +847,26 @@ flush_instr: # Manual, Mixing 16-bit and 32-bit code, page 16-6) .byte 0x66, 0xea # prefix + jmpi-opcode -code32: .long 0x1000 # will be set to 0x100000 - # for big kernels +code32: .long startup_32 # will be set to %cs+startup_32 .word __BOOT_CS +.code32 +startup_32: + movl $(__BOOT_DS), %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss + + xorl %eax, %eax +1: incl %eax # check that A20 really IS enabled + movl %eax, 0x00000000 # loop forever if it isn't + cmpl %eax, 0x00100000 + je 1b + + # Jump to the 32bit entry point + jmpl *(code32_start - start + (DELTA_INITSEG << 4))(%esi) +.code16 # Here's a bunch of information about your current kernel.. kernel_version: .ascii UTS_RELEASE diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 2925e66a673..b02308ee766 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -42,7 +42,8 @@ struct screen_info { u16 pages; /* 0x32 */ u16 vesa_attributes; /* 0x34 */ u32 capabilities; /* 0x36 */ - /* 0x3a -- 0x3f reserved for future expansion */ + /* 0x3a -- 0x3b reserved for future expansion */ + /* 0x3c -- 0x3f micro stack for relocatable kernels */ }; extern struct screen_info screen_info; -- cgit v1.2.3-70-g09d2 From e69f202d0a1419219198566e1c22218a5c71a9a6 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 7 Dec 2006 02:14:04 +0100 Subject: [PATCH] i386: Implement CONFIG_PHYSICAL_ALIGN o Now CONFIG_PHYSICAL_START is being replaced with CONFIG_PHYSICAL_ALIGN. Hardcoding the kernel physical start value creates a problem in relocatable kernel context due to boot loader limitations. For ex, if somebody compiles a relocatable kernel to be run from address 4MB, but this kernel will run from location 1MB as grub loads the kernel at physical address 1MB. Kernel thinks that I am a relocatable kernel and I should run from the address I have been loaded at. So somebody wanting to run kernel from 4MB alignment location (for improved performance regions) can't do that. o Hence, Eric proposed that probably CONFIG_PHYSICAL_ALIGN will make more sense in relocatable kernel context. At run time kernel will move itself to a physical addr location which meets user specified alignment restrictions. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen --- arch/i386/Kconfig | 33 ++++++++++++++++++--------------- arch/i386/boot/compressed/head.S | 26 ++++++++++++++------------ arch/i386/boot/compressed/misc.c | 7 ++++--- arch/i386/kernel/vmlinux.lds.S | 3 ++- include/asm-i386/boot.h | 6 +++++- 5 files changed, 43 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index d588ca874bb..fd2fa7a7ec5 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -785,23 +785,26 @@ config RELOCATABLE must live at a different physical address than the primary kernel. -config PHYSICAL_START - hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) - - default "0x1000000" if CRASH_DUMP +config PHYSICAL_ALIGN + hex "Alignment value to which kernel should be aligned" default "0x100000" + range 0x2000 0x400000 help - This gives the physical address where the kernel is loaded. Normally - for regular kernels this value is 0x100000 (1MB). But in the case - of kexec on panic the fail safe kernel needs to run at a different - address than the panic-ed kernel. This option is used to set the load - address for kernels used to capture crash dump on being kexec'ed - after panic. The default value for crash dump kernels is - 0x1000000 (16MB). This can also be set based on the "X" value as - specified in the "crashkernel=YM@XM" command line boot parameter - passed to the panic-ed kernel. Typically this parameter is set as - crashkernel=64M@16M. Please take a look at - Documentation/kdump/kdump.txt for more details about crash dumps. + This value puts the alignment restrictions on physical address + where kernel is loaded and run from. Kernel is compiled for an + address which meets above alignment restriction. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is set, kernel will move itself to nearest + address aligned to above value and run from there. + + If bootloader loads the kernel at a non-aligned address and + CONFIG_RELOCATABLE is not set, kernel will ignore the run time + load address and decompress itself to the address it has been + compiled for and run from there. The address for which kernel is + compiled already meets above alignment restrictions. Hence the + end result is that kernel runs from a physical address meeting + above alignment restrictions. Don't change this unless you know what you are doing. diff --git a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S index e4dd7a6b9b0..f395a4bb38b 100644 --- a/arch/i386/boot/compressed/head.S +++ b/arch/i386/boot/compressed/head.S @@ -26,6 +26,7 @@ #include #include #include +#include .section ".text.head" .globl startup_32 @@ -52,17 +53,17 @@ startup_32: 1: popl %ebp subl $1b, %ebp -/* Compute the delta between where we were compiled to run at - * and where the code will actually run at. +/* %ebp contains the address we are loaded at by the boot loader and %ebx + * contains the address where we should move the kernel image temporarily + * for safe in-place decompression. */ - /* Start with the delta to where the kernel will run at. If we are - * a relocatable kernel this is the delta to our load address otherwise - * this is the delta to CONFIG_PHYSICAL start. - */ + #ifdef CONFIG_RELOCATABLE - movl %ebp, %ebx + movl %ebp, %ebx + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx #else - movl $(CONFIG_PHYSICAL_START - startup_32), %ebx + movl $LOAD_PHYSICAL_ADDR, %ebx #endif /* Replace the compressed data size with the uncompressed size */ @@ -94,9 +95,10 @@ startup_32: /* Compute the kernel start address. */ #ifdef CONFIG_RELOCATABLE - leal startup_32(%ebp), %ebp + addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp + andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp #else - movl $CONFIG_PHYSICAL_START, %ebp + movl $LOAD_PHYSICAL_ADDR, %ebp #endif /* @@ -150,8 +152,8 @@ relocated: * and where it was actually loaded. */ movl %ebp, %ebx - subl $CONFIG_PHYSICAL_START, %ebx - + subl $LOAD_PHYSICAL_ADDR, %ebx + jz 2f /* Nothing to be done if loaded at compiled addr. */ /* * Process relocations. */ diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index 4eac24e95a1..dc153893155 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -14,6 +14,7 @@ #include #include #include +#include /* WARNING!! * This code is compiled with -fPIC and it is relocated dynamically @@ -360,12 +361,12 @@ asmlinkage void decompress_kernel(void *rmode, unsigned long end, insize = input_len; inptr = 0; - if (((u32)output - CONFIG_PHYSICAL_START) & 0x3fffff) - error("Destination address not 4M aligned"); + if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1)) + error("Destination address not CONFIG_PHYSICAL_ALIGN aligned"); if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff)) error("Destination address too large"); #ifndef CONFIG_RELOCATABLE - if ((u32)output != CONFIG_PHYSICAL_START) + if ((u32)output != LOAD_PHYSICAL_ADDR) error("Wrong destination address"); #endif diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index f8d61ec4c8c..6860f20aa57 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -14,6 +14,7 @@ #include #include #include +#include OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -27,7 +28,7 @@ PHDRS { } SECTIONS { - . = LOAD_OFFSET + CONFIG_PHYSICAL_START; + . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; phys_startup_32 = startup_32 - LOAD_OFFSET; /* read-only */ .text : AT(ADDR(.text) - LOAD_OFFSET) { diff --git a/include/asm-i386/boot.h b/include/asm-i386/boot.h index 96b228e6e79..8ce79a6fa89 100644 --- a/include/asm-i386/boot.h +++ b/include/asm-i386/boot.h @@ -12,4 +12,8 @@ #define EXTENDED_VGA 0xfffe /* 80x50 mode */ #define ASK_VGA 0xfffd /* ask for it at bootup */ -#endif +/* Physical address where kenrel should be loaded. */ +#define LOAD_PHYSICAL_ADDR ((0x100000 + CONFIG_PHYSICAL_ALIGN - 1) \ + & ~(CONFIG_PHYSICAL_ALIGN - 1)) + +#endif /* _LINUX_BOOT_H */ -- cgit v1.2.3-70-g09d2 From 249e83fe839a13b5dc94285daeeaf70f6e54d930 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 7 Dec 2006 02:14:04 +0100 Subject: [PATCH] x86-64: Extract segment descriptor definitions for use outside Code that wants to use struct desc_struct cannot do so on i386 because desc.h contains other code that will only compile on x86_64. So extract the structure definitions into a asm-x86_64/desc_defs.h. Signed-off-by: Avi Kivity Signed-off-by: Andi Kleen include/asm-x86_64/desc.h | 53 ------------------------------- include/asm-x86_64/desc_defs.h | 69 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 52 deletions(-) --- include/asm-x86_64/desc.h | 53 +------------------------------- include/asm-x86_64/desc_defs.h | 69 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 52 deletions(-) create mode 100644 include/asm-x86_64/desc_defs.h (limited to 'include') diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h index eb7723a4679..913d6ac0003 100644 --- a/include/asm-x86_64/desc.h +++ b/include/asm-x86_64/desc.h @@ -9,64 +9,13 @@ #include #include +#include #include #include -// 8 byte segment descriptor -struct desc_struct { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; - unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; -} __attribute__((packed)); - -struct n_desc_struct { - unsigned int a,b; -}; - extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; -enum { - GATE_INTERRUPT = 0xE, - GATE_TRAP = 0xF, - GATE_CALL = 0xC, -}; - -// 16byte gate -struct gate_struct { - u16 offset_low; - u16 segment; - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; - u16 offset_middle; - u32 offset_high; - u32 zero1; -} __attribute__((packed)); - -#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) -#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) -#define PTR_HIGH(x) ((unsigned long)(x) >> 32) - -enum { - DESC_TSS = 0x9, - DESC_LDT = 0x2, -}; - -// LDT or TSS descriptor in the GDT. 16 bytes. -struct ldttss_desc { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 5, dpl : 2, p : 1; - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; - u32 base3; - u32 zero1; -} __attribute__((packed)); - -struct desc_ptr { - unsigned short size; - unsigned long address; -} __attribute__((packed)) ; - #define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8)) #define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8)) #define clear_LDT() asm volatile("lldt %w0"::"r" (0)) diff --git a/include/asm-x86_64/desc_defs.h b/include/asm-x86_64/desc_defs.h new file mode 100644 index 00000000000..08900407009 --- /dev/null +++ b/include/asm-x86_64/desc_defs.h @@ -0,0 +1,69 @@ +/* Written 2000 by Andi Kleen */ +#ifndef __ARCH_DESC_DEFS_H +#define __ARCH_DESC_DEFS_H + +/* + * Segment descriptor structure definitions, usable from both x86_64 and i386 + * archs. + */ + +#ifndef __ASSEMBLY__ + +#include + +// 8 byte segment descriptor +struct desc_struct { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1; + unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8; +} __attribute__((packed)); + +struct n_desc_struct { + unsigned int a,b; +}; + +enum { + GATE_INTERRUPT = 0xE, + GATE_TRAP = 0xF, + GATE_CALL = 0xC, +}; + +// 16byte gate +struct gate_struct { + u16 offset_low; + u16 segment; + unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + u16 offset_middle; + u32 offset_high; + u32 zero1; +} __attribute__((packed)); + +#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) +#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF) +#define PTR_HIGH(x) ((unsigned long)(x) >> 32) + +enum { + DESC_TSS = 0x9, + DESC_LDT = 0x2, +}; + +// LDT or TSS descriptor in the GDT. 16 bytes. +struct ldttss_desc { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 5, dpl : 2, p : 1; + unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; + u32 base3; + u32 zero1; +} __attribute__((packed)); + +struct desc_ptr { + unsigned short size; + unsigned long address; +} __attribute__((packed)) ; + + +#endif /* !__ASSEMBLY__ */ + +#endif -- cgit v1.2.3-70-g09d2 From 770d132f03ac15b12919f1bac481f4beda13e094 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:05 +0100 Subject: [PATCH] i386: Retrieve CLFLUSH size from CPUID Also report it in /proc/cpuinfo similar to x86-64. Needed for followon patch Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/common.c | 3 +++ arch/i386/kernel/cpu/proc.c | 3 ++- include/asm-i386/processor.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 6958ae5e2fa..cda41aef79a 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -309,6 +309,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c) #else c->apicid = (ebx >> 24) & 0xFF; #endif + if (c->x86_capability[0] & (1<<19)) + c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8; } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -373,6 +375,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; + c->x86_clflush_size = 32; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 76aac088a32..6624d8583c4 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -152,9 +152,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, " [%d]", i); } - seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n", + seq_printf(m, "\nbogomips\t: %lu.%02lu\n", c->loops_per_jiffy/(500000/HZ), (c->loops_per_jiffy/(5000/HZ)) % 100); + seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size); return 0; } diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index f73cf836e64..98fa73b7176 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -72,6 +72,7 @@ struct cpuinfo_x86 { #endif unsigned char x86_max_cores; /* cpuid returned max cores value */ unsigned char apicid; + unsigned short x86_clflush_size; #ifdef CONFIG_SMP unsigned char booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical processor id. */ -- cgit v1.2.3-70-g09d2 From 3760dd6efa75c98e223643da2eb7040406433053 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:05 +0100 Subject: [PATCH] i386: Use CLFLUSH instead of WBINVD in change_page_attr CLFLUSH is a lot faster than WBINVD so try to use that. Signed-off-by: Andi Kleen --- arch/i386/mm/pageattr.c | 24 +++++++++++++++++------- include/asm-i386/cpufeature.h | 1 + 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 8564b6ae17e..ad91528bdc1 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -67,11 +67,17 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, return base; } -static void flush_kernel_map(void *dummy) +static void flush_kernel_map(void *arg) { - /* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */ - if (boot_cpu_data.x86_model >= 4) + unsigned long adr = (unsigned long)arg; + + if (adr && cpu_has_clflush) { + int i; + for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) + asm volatile("clflush (%0)" :: "r" (adr + i)); + } else if (boot_cpu_data.x86_model >= 4) wbinvd(); + /* Flush all to work around Errata in early athlons regarding * large page flushing. */ @@ -173,9 +179,9 @@ __change_page_attr(struct page *page, pgprot_t prot) return 0; } -static inline void flush_map(void) +static inline void flush_map(void *adr) { - on_each_cpu(flush_kernel_map, NULL, 1, 1); + on_each_cpu(flush_kernel_map, adr, 1, 1); } /* @@ -217,9 +223,13 @@ void global_flush_tlb(void) spin_lock_irq(&cpa_lock); list_replace_init(&df_list, &l); spin_unlock_irq(&cpa_lock); - flush_map(); - list_for_each_entry_safe(pg, next, &l, lru) + if (!cpu_has_clflush) + flush_map(0); + list_for_each_entry_safe(pg, next, &l, lru) { + if (cpu_has_clflush) + flush_map(page_address(pg)); __free_page(pg); + } } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index 231672558c1..4c83e059228 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -137,6 +137,7 @@ #define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) #define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) +#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) #endif /* __ASM_I386_CPUFEATURE_H */ -- cgit v1.2.3-70-g09d2 From 2fff0a48416af891dce38fd425246e337831e0bb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:05 +0100 Subject: [PATCH] Generic: Move __user cast into probe_kernel_address Caller of probe_kernel_address shouldn't need to know that pka is internally implemented with __get_user. So move the __user cast into pka. Signed-off-by: Andi Kleen --- include/linux/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index a48d7f11c7b..65a68da8bd5 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -36,7 +36,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, long ret; \ \ inc_preempt_count(); \ - ret = __get_user(retval, addr); \ + ret = __get_user(retval, (__force typeof(*addr) __user *)addr);\ dec_preempt_count(); \ ret; \ }) -- cgit v1.2.3-70-g09d2 From b2dff6a88cbed59d787a8ca7367c76ba385e1187 Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: Move find_max_pfn function to e820.c Move more code from setup.c into e820.c Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/e820.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 55 ------------------------------------------------ include/asm-i386/e820.h | 1 + 3 files changed, 53 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index 0db95760b07..be4934f6f85 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -539,3 +540,54 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map) return 0; } +/* + * Callback for efi_memory_walk. + */ +static int __init +efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) +{ + unsigned long *max_pfn = arg, pfn; + + if (start < end) { + pfn = PFN_UP(end -1); + if (pfn > *max_pfn) + *max_pfn = pfn; + } + return 0; +} + +static int __init +efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) +{ + memory_present(0, PFN_UP(start), PFN_DOWN(end)); + return 0; +} + +/* + * Find the highest page frame number we have available + */ +void __init find_max_pfn(void) +{ + int i; + + max_pfn = 0; + if (efi_enabled) { + efi_memmap_walk(efi_find_max_pfn, &max_pfn); + efi_memmap_walk(efi_memory_present_wrapper, NULL); + return; + } + + for (i = 0; i < e820.nr_map; i++) { + unsigned long start, end; + /* RAM? */ + if (e820.map[i].type != E820_RAM) + continue; + start = PFN_UP(e820.map[i].addr); + end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); + if (start >= end) + continue; + if (end > max_pfn) + max_pfn = end; + memory_present(0, start, end); + } +} diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index b7509aec0eb..3d808054fdf 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -63,9 +63,6 @@ #include #include -/* Forward Declaration. */ -void __init find_max_pfn(void); - /* This value is set up by the early boot code to point to the value immediately after the boot time page tables. It contains a *physical* address, and must not be in the .bss segment! */ @@ -387,29 +384,6 @@ static int __init parse_reservetop(char *arg) } early_param("reservetop", parse_reservetop); -/* - * Callback for efi_memory_walk. - */ -static int __init -efi_find_max_pfn(unsigned long start, unsigned long end, void *arg) -{ - unsigned long *max_pfn = arg, pfn; - - if (start < end) { - pfn = PFN_UP(end -1); - if (pfn > *max_pfn) - *max_pfn = pfn; - } - return 0; -} - -static int __init -efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) -{ - memory_present(0, PFN_UP(start), PFN_DOWN(end)); - return 0; -} - /* * This function checks if the entire range is mapped with type. * @@ -442,35 +416,6 @@ e820_all_mapped(unsigned long s, unsigned long e, unsigned type) return 0; } -/* - * Find the highest page frame number we have available - */ -void __init find_max_pfn(void) -{ - int i; - - max_pfn = 0; - if (efi_enabled) { - efi_memmap_walk(efi_find_max_pfn, &max_pfn); - efi_memmap_walk(efi_memory_present_wrapper, NULL); - return; - } - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - /* RAM? */ - if (e820.map[i].type != E820_RAM) - continue; - start = PFN_UP(e820.map[i].addr); - end = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - if (start >= end) - continue; - if (end > max_pfn) - max_pfn = end; - memory_present(0, start, end); - } -} - /* * Determine low and high memory ranges: */ diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index f7514fb6e8e..14756942515 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -38,6 +38,7 @@ extern struct e820map e820; extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); +extern void find_max_pfn(void); #endif/*!__ASSEMBLY__*/ -- cgit v1.2.3-70-g09d2 From b5b2405706005cc7765f6ecd00965d29e93f090a Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: Move e820/efi memmap walking code to e820.c This patch moves e820/efi memmap table walking function from setup.c to e820.c, also this patch adds extern declaration in header file. Signed-off-by: bibo,mao Signed-off-by: Andi Kleen arch/i386/kernel/e820.c | 115 +++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 118 ----------------------------------- include/asm-i386/e820.h | 2 arch/i386/kernel/e820.c | 115 +++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 118 ----------------------------------------------- include/asm-i386/e820.h | 2 3 files changed, 117 insertions(+), 118 deletions(-) --- arch/i386/kernel/e820.c | 115 +++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 118 ----------------------------------------------- include/asm-i386/e820.h | 2 + 3 files changed, 117 insertions(+), 118 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index be4934f6f85..47c495bf0cb 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -28,6 +28,11 @@ static struct change_member change_point_list[2*E820MAX] __initdata; static struct change_member *change_point[2*E820MAX] __initdata; static struct e820entry *overlap_list[E820MAX] __initdata; static struct e820entry new_bios[E820MAX] __initdata; +/* For PCI or other memory-mapped resources */ +unsigned long pci_mem_start = 0x10000000; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif struct resource data_resource = { .name = "Kernel data", .start = 0, @@ -591,3 +596,113 @@ void __init find_max_pfn(void) memory_present(0, start, end); } } + +/* + * Free all available memory for boot time allocation. Used + * as a callback function by efi_memory_walk() + */ + +static int __init +free_available_memory(unsigned long start, unsigned long end, void *arg) +{ + /* check max_low_pfn */ + if (start >= (max_low_pfn << PAGE_SHIFT)) + return 0; + if (end >= (max_low_pfn << PAGE_SHIFT)) + end = max_low_pfn << PAGE_SHIFT; + if (start < end) + free_bootmem(start, end - start); + + return 0; +} +/* + * Register fully available low RAM pages with the bootmem allocator. + */ +void __init register_bootmem_low_pages(unsigned long max_low_pfn) +{ + int i; + + if (efi_enabled) { + efi_memmap_walk(free_available_memory, NULL); + return; + } + for (i = 0; i < e820.nr_map; i++) { + unsigned long curr_pfn, last_pfn, size; + /* + * Reserve usable low memory + */ + if (e820.map[i].type != E820_RAM) + continue; + /* + * We are rounding up the start address of usable memory: + */ + curr_pfn = PFN_UP(e820.map[i].addr); + if (curr_pfn >= max_low_pfn) + continue; + /* + * ... and at the end of the usable range downwards: + */ + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); + + if (last_pfn > max_low_pfn) + last_pfn = max_low_pfn; + + /* + * .. finally, did all the rounding and playing + * around just make the area go away? + */ + if (last_pfn <= curr_pfn) + continue; + + size = last_pfn - curr_pfn; + free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); + } +} + +void __init register_memory(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; + + /* + * Search for the bigest gap in the low 32 bits of the e820 + * memory space. + */ + last = 0x100000000ull; + gapstart = 0x10000000; + gapsize = 0x400000; + i = e820.nr_map; + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap > gapsize) { + gapsize = gap; + gapstart = end; + } + } + if (start < last) + last = start; + } + + /* + * See how much we want to round up: start off with + * rounding to the next 1MB area. + */ + round = 0x100000; + while ((gapsize >> 4) > round) + round += round; + /* Fun with two's complement */ + pci_mem_start = (gapstart + round) & -round; + + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", + pci_mem_start, gapstart, gapsize); +} diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 3d808054fdf..51ed015a1f3 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -94,12 +94,6 @@ unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; -/* For PCI or other memory-mapped resources */ -unsigned long pci_mem_start = 0x10000000; -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - /* Boot loader ID as an integer, for the benefit of proc_dointvec */ int bootloader_type; @@ -475,68 +469,6 @@ unsigned long __init find_max_low_pfn(void) return max_low_pfn; } -/* - * Free all available memory for boot time allocation. Used - * as a callback function by efi_memory_walk() - */ - -static int __init -free_available_memory(unsigned long start, unsigned long end, void *arg) -{ - /* check max_low_pfn */ - if (start >= (max_low_pfn << PAGE_SHIFT)) - return 0; - if (end >= (max_low_pfn << PAGE_SHIFT)) - end = max_low_pfn << PAGE_SHIFT; - if (start < end) - free_bootmem(start, end - start); - - return 0; -} -/* - * Register fully available low RAM pages with the bootmem allocator. - */ -static void __init register_bootmem_low_pages(unsigned long max_low_pfn) -{ - int i; - - if (efi_enabled) { - efi_memmap_walk(free_available_memory, NULL); - return; - } - for (i = 0; i < e820.nr_map; i++) { - unsigned long curr_pfn, last_pfn, size; - /* - * Reserve usable low memory - */ - if (e820.map[i].type != E820_RAM) - continue; - /* - * We are rounding up the start address of usable memory: - */ - curr_pfn = PFN_UP(e820.map[i].addr); - if (curr_pfn >= max_low_pfn) - continue; - /* - * ... and at the end of the usable range downwards: - */ - last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); - - if (last_pfn > max_low_pfn) - last_pfn = max_low_pfn; - - /* - * .. finally, did all the rounding and playing - * around just make the area go away? - */ - if (last_pfn <= curr_pfn) - continue; - - size = last_pfn - curr_pfn; - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); - } -} - /* * workaround for Dell systems that neglect to reserve EBDA */ @@ -705,56 +637,6 @@ void __init remapped_pgdat_init(void) } } - - -static void __init register_memory(void) -{ - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; - - /* - * Search for the bigest gap in the low 32 bits of the e820 - * memory space. - */ - last = 0x100000000ull; - gapstart = 0x10000000; - gapsize = 0x400000; - i = e820.nr_map; - while (--i >= 0) { - unsigned long long start = e820.map[i].addr; - unsigned long long end = start + e820.map[i].size; - - /* - * Since "last" is at most 4GB, we know we'll - * fit in 32 bits if this condition is true - */ - if (last > end) { - unsigned long gap = last - end; - - if (gap > gapsize) { - gapsize = gap; - gapstart = end; - } - } - if (start < last) - last = start; - } - - /* - * See how much we want to round up: start off with - * rounding to the next 1MB area. - */ - round = 0x100000; - while ((gapsize >> 4) > round) - round += round; - /* Fun with two's complement */ - pci_mem_start = (gapstart + round) & -round; - - printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", - pci_mem_start, gapstart, gapsize); -} - #ifdef CONFIG_MCA static void set_mca_bus(int x) { diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index 14756942515..8da4175a553 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -39,6 +39,8 @@ extern struct e820map e820; extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); extern void find_max_pfn(void); +extern void register_bootmem_low_pages(unsigned long max_low_pfn); +extern void register_memory(void); #endif/*!__ASSEMBLY__*/ -- cgit v1.2.3-70-g09d2 From cef518e88b8ed94ea483c436ef5e5b151a3fabc6 Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] i386: Move memory map printing and other code to e820.c This patch moves e820 memory map print and memmap boot param parsing function from setup.c to e820.c, also adds limit_regions and print_memory_map declaration in header file. Signed-off-by: bibo,mao Signed-off-by: Andi Kleen arch/i386/kernel/e820.c | 152 +++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 158 --------------------------------- include/asm-i386/e820.h | 2 arch/i386/kernel/e820.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 153 ----------------------------------------------- include/asm-i386/e820.h | 2 3 files changed, 155 insertions(+), 152 deletions(-) --- arch/i386/kernel/e820.c | 152 ++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 153 +---------------------------------------------- include/asm-i386/e820.h | 2 + 3 files changed, 155 insertions(+), 152 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index 47c495bf0cb..b755255f272 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -33,6 +33,7 @@ unsigned long pci_mem_start = 0x10000000; #ifdef CONFIG_PCI EXPORT_SYMBOL(pci_mem_start); #endif +extern int user_defined_memmap; struct resource data_resource = { .name = "Kernel data", .start = 0, @@ -706,3 +707,154 @@ void __init register_memory(void) printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", pci_mem_start, gapstart, gapsize); } + +void __init print_memory_map(char *who) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + printk(" %s: %016Lx - %016Lx ", who, + e820.map[i].addr, + e820.map[i].addr + e820.map[i].size); + switch (e820.map[i].type) { + case E820_RAM: printk("(usable)\n"); + break; + case E820_RESERVED: + printk("(reserved)\n"); + break; + case E820_ACPI: + printk("(ACPI data)\n"); + break; + case E820_NVS: + printk("(ACPI NVS)\n"); + break; + default: printk("type %lu\n", e820.map[i].type); + break; + } + } +} + +void __init limit_regions(unsigned long long size) +{ + unsigned long long current_addr = 0; + int i; + + print_memory_map("limit_regions start"); + if (efi_enabled) { + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map, i = 0; p < memmap.map_end; + p += memmap.desc_size, i++) { + md = p; + current_addr = md->phys_addr + (md->num_pages << 12); + if (md->type == EFI_CONVENTIONAL_MEMORY) { + if (current_addr >= size) { + md->num_pages -= + (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); + memmap.nr_map = i + 1; + return; + } + } + } + } + for (i = 0; i < e820.nr_map; i++) { + current_addr = e820.map[i].addr + e820.map[i].size; + if (current_addr < size) + continue; + + if (e820.map[i].type != E820_RAM) + continue; + + if (e820.map[i].addr >= size) { + /* + * This region starts past the end of the + * requested size, skip it completely. + */ + e820.nr_map = i; + } else { + e820.nr_map = i + 1; + e820.map[i].size -= current_addr - size; + } + print_memory_map("limit_regions endfor"); + return; + } + print_memory_map("limit_regions endfunc"); +} + + /* + * This function checks if the entire range is mapped with type. + * + * Note: this function only works correct if the e820 table is sorted and + * not-overlapping, which is the case + */ +int __init +e820_all_mapped(unsigned long s, unsigned long e, unsigned type) +{ + u64 start = s; + u64 end = e; + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) + continue; + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full + * coverage */ + if (start >= end) + return 1; /* we're done */ + } + return 0; +} + +static int __init parse_memmap(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "exactmap") == 0) { +#ifdef CONFIG_CRASH_DUMP + /* If we are doing a crash dump, we + * still need to know the real mem + * size before original memory map is + * reset. + */ + find_max_pfn(); + saved_max_pfn = max_pfn; +#endif + e820.nr_map = 0; + user_defined_memmap = 1; + } else { + /* If the user specifies memory size, we + * limit the BIOS-provided memory map to + * that size. exactmap can be used to specify + * the exact map. mem=number can be used to + * trim the existing memory map. + */ + unsigned long long start_at, mem_size; + + mem_size = memparse(arg, &arg); + if (*arg == '@') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RAM); + } else if (*arg == '#') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_ACPI); + } else if (*arg == '$') { + start_at = memparse(arg+1, &arg); + add_memory_region(start_at, mem_size, E820_RESERVED); + } else { + limit_regions(mem_size); + user_defined_memmap = 1; + } + } + return 0; +} +early_param("memmap", parse_memmap); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 51ed015a1f3..e5bb87aa5a4 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -73,7 +73,6 @@ int disable_pse __devinitdata = 0; /* * Machine setup.. */ -extern struct e820map e820; extern struct resource code_resource; extern struct resource data_resource; @@ -137,79 +136,6 @@ static char command_line[COMMAND_LINE_SIZE]; unsigned char __initdata boot_params[PARAM_SIZE]; -static void __init limit_regions(unsigned long long size) -{ - unsigned long long current_addr = 0; - int i; - - if (efi_enabled) { - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map, i = 0; p < memmap.map_end; - p += memmap.desc_size, i++) { - md = p; - current_addr = md->phys_addr + (md->num_pages << 12); - if (md->type == EFI_CONVENTIONAL_MEMORY) { - if (current_addr >= size) { - md->num_pages -= - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); - memmap.nr_map = i + 1; - return; - } - } - } - } - for (i = 0; i < e820.nr_map; i++) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr < size) - continue; - - if (e820.map[i].type != E820_RAM) - continue; - - if (e820.map[i].addr >= size) { - /* - * This region starts past the end of the - * requested size, skip it completely. - */ - e820.nr_map = i; - } else { - e820.nr_map = i + 1; - e820.map[i].size -= current_addr - size; - } - return; - } -} - -#define E820_DEBUG 1 - -static void __init print_memory_map(char *who) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - printk(" %s: %016Lx - %016Lx ", who, - e820.map[i].addr, - e820.map[i].addr + e820.map[i].size); - switch (e820.map[i].type) { - case E820_RAM: printk("(usable)\n"); - break; - case E820_RESERVED: - printk("(reserved)\n"); - break; - case E820_ACPI: - printk("(ACPI data)\n"); - break; - case E820_NVS: - printk("(ACPI NVS)\n"); - break; - default: printk("type %lu\n", e820.map[i].type); - break; - } - } -} - #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; #ifdef CONFIG_EDD_MODULE @@ -233,7 +159,7 @@ static inline void copy_edd(void) } #endif -static int __initdata user_defined_memmap = 0; +int __initdata user_defined_memmap = 0; /* * "mem=nopentium" disables the 4MB page tables. @@ -270,51 +196,6 @@ static int __init parse_mem(char *arg) } early_param("mem", parse_mem); -static int __init parse_memmap(char *arg) -{ - if (!arg) - return -EINVAL; - - if (strcmp(arg, "exactmap") == 0) { -#ifdef CONFIG_CRASH_DUMP - /* If we are doing a crash dump, we - * still need to know the real mem - * size before original memory map is - * reset. - */ - find_max_pfn(); - saved_max_pfn = max_pfn; -#endif - e820.nr_map = 0; - user_defined_memmap = 1; - } else { - /* If the user specifies memory size, we - * limit the BIOS-provided memory map to - * that size. exactmap can be used to specify - * the exact map. mem=number can be used to - * trim the existing memory map. - */ - unsigned long long start_at, mem_size; - - mem_size = memparse(arg, &arg); - if (*arg == '@') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RAM); - } else if (*arg == '#') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_ACPI); - } else if (*arg == '$') { - start_at = memparse(arg+1, &arg); - add_memory_region(start_at, mem_size, E820_RESERVED); - } else { - limit_regions(mem_size); - user_defined_memmap = 1; - } - } - return 0; -} -early_param("memmap", parse_memmap); - #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header * stored by the crashed kernel. @@ -378,38 +259,6 @@ static int __init parse_reservetop(char *arg) } early_param("reservetop", parse_reservetop); - /* - * This function checks if the entire range is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; -} - /* * Determine low and high memory ranges: */ diff --git a/include/asm-i386/e820.h b/include/asm-i386/e820.h index 8da4175a553..395077aba58 100644 --- a/include/asm-i386/e820.h +++ b/include/asm-i386/e820.h @@ -41,6 +41,8 @@ extern int e820_all_mapped(unsigned long start, unsigned long end, extern void find_max_pfn(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); extern void register_memory(void); +extern void limit_regions(unsigned long long size); +extern void print_memory_map(char *who); #endif/*!__ASSEMBLY__*/ -- cgit v1.2.3-70-g09d2 From b34e90b8f0f30151349134f87b5dc6ef75a5218c Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] Calgary: use BIOS supplied BBARs and topology information Find the BBAR register address of each Calgary using the "Extended BIOS Data Area" rather than calculating it ourselves. Also get the bus topology (what PHB each bus is on) from Calgary rather than calculating it ourselves. This patch fixes http://bugzilla.kernel.org/show_bug.cgi?id=7407. Signed-off-by: Laurent Vivier Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen --- arch/x86_64/kernel/pci-calgary.c | 161 ++++++++++++++++++++++++++++++--------- include/asm-x86_64/rio.h | 76 ++++++++++++++++++ 2 files changed, 201 insertions(+), 36 deletions(-) create mode 100644 include/asm-x86_64/rio.h (limited to 'include') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index f53b581dfd0..afc0a53505f 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -41,6 +41,7 @@ #include #include #include +#include #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 #define PCI_VENDOR_DEVICE_ID_CALGARY \ @@ -115,14 +116,35 @@ static const unsigned long phb_offsets[] = { 0xB000 /* PHB3 */ }; +/* PHB debug registers */ + +static const unsigned long phb_debug_offsets[] = { + 0x4000 /* PHB 0 DEBUG */, + 0x5000 /* PHB 1 DEBUG */, + 0x6000 /* PHB 2 DEBUG */, + 0x7000 /* PHB 3 DEBUG */ +}; + +/* + * STUFF register for each debug PHB, + * byte 1 = start bus number, byte 2 = end bus number + */ + +#define PHB_DEBUG_STUFF_OFFSET 0x0020 + unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; static int translate_empty_slots __read_mostly = 0; static int calgary_detected __read_mostly = 0; +static struct rio_table_hdr *rio_table_hdr __initdata; +static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; +static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; + struct calgary_bus_info { void *tce_space; unsigned char translation_disabled; signed char phbid; + void __iomem *bbar; }; static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; @@ -475,6 +497,11 @@ static struct dma_mapping_ops calgary_dma_ops = { .unmap_sg = calgary_unmap_sg, }; +static inline void __iomem * busno_to_bbar(unsigned char num) +{ + return bus_info[num].bbar; +} + static inline int busno_to_phbid(unsigned char num) { return bus_info[num].phbid; @@ -828,31 +855,9 @@ static void __init calgary_disable_translation(struct pci_dev *dev) del_timer_sync(&tbl->watchdog_timer); } -static inline unsigned int __init locate_register_space(struct pci_dev *dev) +static inline void __iomem * __init locate_register_space(struct pci_dev *dev) { - int rionodeid; - u32 address; - - /* - * Each Calgary has four busses. The first four busses (first Calgary) - * have RIO node ID 2, then the next four (second Calgary) have RIO - * node ID 3, the next four (third Calgary) have node ID 2 again, etc. - * We use a gross hack - relying on the dev->bus->number ordering, - * modulo 14 - to decide which Calgary a given bus is on. Busses 0, 1, - * 2 and 4 are on the first Calgary (id 2), 6, 8, a and c are on the - * second (id 3), and then it repeats modulo 14. - */ - rionodeid = (dev->bus->number % 14 > 4) ? 3 : 2; - /* - * register space address calculation as follows: - * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase) - * ChassisBase is always zero for x366/x260/x460 - * RioNodeId is 2 for first Calgary, 3 for second Calgary - */ - address = START_ADDRESS - - (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 14)) + - (0x100000) * (rionodeid - CHASSIS_BASE); - return address; + return busno_to_bbar(dev->bus->number); } static void __init calgary_init_one_nontraslated(struct pci_dev *dev) @@ -864,15 +869,12 @@ static void __init calgary_init_one_nontraslated(struct pci_dev *dev) static int __init calgary_init_one(struct pci_dev *dev) { - u32 address; void __iomem *bbar; int ret; BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - address = locate_register_space(dev); - /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(address, 1024 * 1024); + bbar = locate_register_space(dev); if (!bbar) { ret = -ENODATA; goto done; @@ -898,6 +900,35 @@ static int __init calgary_init(void) { int ret = -ENODEV; struct pci_dev *dev = NULL; + int rio, phb, bus; + void __iomem *bbar; + void __iomem *target; + u8 start_bus, end_bus; + u32 val; + + for (rio = 0; rio < rio_table_hdr->num_rio_dev; rio++) { + + if ( (rio_devs[rio]->type != COMPAT_CALGARY) && + (rio_devs[rio]->type != ALT_CALGARY) ) + continue; + + /* map entire 1MB of Calgary config space */ + bbar = ioremap_nocache(rio_devs[rio]->BBAR, 1024 * 1024); + + for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { + + target = calgary_reg(bbar, phb_debug_offsets[phb] | + PHB_DEBUG_STUFF_OFFSET); + val = be32_to_cpu(readl(target)); + start_bus = (u8)((val & 0x00FF0000) >> 16); + end_bus = (u8)((val & 0x0000FF00) >> 8); + for (bus = start_bus; bus <= end_bus; bus++) { + bus_info[bus].bbar = bbar; + bus_info[bus].phbid = phb; + } + } + } + do { dev = pci_get_device(PCI_VENDOR_ID_IBM, @@ -962,13 +993,55 @@ static inline int __init determine_tce_table_size(u64 ram) return ret; } +static int __init build_detail_arrays(void) +{ + unsigned long ptr; + int i, scal_detail_size, rio_detail_size; + + if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + printk(KERN_WARNING + "Calgary: MAX_NUMNODES too low! Defined as %d, " + "but system has %d nodes.\n", + MAX_NUMNODES, rio_table_hdr->num_scal_dev); + return -ENODEV; + } + + switch (rio_table_hdr->version){ + default: + printk(KERN_WARNING + "Calgary: Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); + return -ENODEV; + case 2: + scal_detail_size = 11; + rio_detail_size = 13; + break; + case 3: + scal_detail_size = 12; + rio_detail_size = 15; + break; + } + + ptr = ((unsigned long)rio_table_hdr) + 3; + for (i = 0; i < rio_table_hdr->num_scal_dev; + i++, ptr += scal_detail_size) + scal_devs[i] = (struct scal_detail *)ptr; + + for (i = 0; i < rio_table_hdr->num_rio_dev; + i++, ptr += rio_detail_size) + rio_devs[i] = (struct rio_detail *)ptr; + + return 0; +} + void __init detect_calgary(void) { u32 val; int bus; void *tbl; int calgary_found = 0; - int phb = -1; + unsigned long ptr; + int offset; /* * if the user specified iommu=off or iommu=soft or we found @@ -980,6 +1053,29 @@ void __init detect_calgary(void) if (!early_pci_allowed()) return; + ptr = (unsigned long)phys_to_virt(get_bios_ebda()); + + rio_table_hdr = NULL; + offset = 0x180; + while (offset) { + /* The block id is stored in the 2nd word */ + if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ + /* set the pointer past the offset & block id */ + rio_table_hdr = (struct rio_table_hdr *)(ptr+offset+4); + break; + } + /* The next offset is stored in the 1st word. 0 means no more */ + offset = *((unsigned short *)(ptr + offset)); + } + if (!rio_table_hdr){ + printk(KERN_ERR "Calgary: Unable to locate " + "Rio Grande Table in EBDA - bailing!\n"); + return; + } + + if (build_detail_arrays()) + return; + specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { @@ -990,12 +1086,6 @@ void __init detect_calgary(void) if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) continue; - /* - * There are 4 PHBs per Calgary chip. Set phb to which phb (0-3) - * it is connected to releative to the clagary chip. - */ - phb = (phb + 1) % PHBS_PER_CALGARY; - if (info->translation_disabled) continue; @@ -1010,7 +1100,6 @@ void __init detect_calgary(void) if (!tbl) goto cleanup; info->tce_space = tbl; - info->phbid = phb; calgary_found = 1; break; } diff --git a/include/asm-x86_64/rio.h b/include/asm-x86_64/rio.h new file mode 100644 index 00000000000..1f315c39d76 --- /dev/null +++ b/include/asm-x86_64/rio.h @@ -0,0 +1,76 @@ +/* + * Derived from include/asm-i386/mach-summit/mach_mpparse.h + * and include/asm-i386/mach-default/bios_ebda.h + * + * Author: Laurent Vivier + * + */ + +#ifndef __ASM_RIO_H +#define __ASM_RIO_H + +#define RIO_TABLE_VERSION 3 + +struct rio_table_hdr { + u8 version; /* Version number of this data structure */ + u8 num_scal_dev; /* # of Scalability devices */ + u8 num_rio_dev; /* # of RIO I/O devices */ +} __attribute__((packed)); + +struct scal_detail { + u8 node_id; /* Scalability Node ID */ + u32 CBAR; /* Address of 1MB register space */ + u8 port0node; /* Node ID port connected to: 0xFF=None */ + u8 port0port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port1node; /* Node ID port connected to: 0xFF = None */ + u8 port1port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port2node; /* Node ID port connected to: 0xFF = None */ + u8 port2port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 chassis_num; /* 1 based Chassis number (1 = boot node) */ +} __attribute__((packed)); + +struct rio_detail { + u8 node_id; /* RIO Node ID */ + u32 BBAR; /* Address of 1MB register space */ + u8 type; /* Type of device */ + u8 owner_id; /* Node ID of Hurricane that owns this */ + /* node */ + u8 port0node; /* Node ID port connected to: 0xFF=None */ + u8 port0port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 port1node; /* Node ID port connected to: 0xFF=None */ + u8 port1port; /* Port num port connected to: 0,1,2, or */ + /* 0xFF=None */ + u8 first_slot; /* Lowest slot number below this Calgary */ + u8 status; /* Bit 0 = 1 : the XAPIC is used */ + /* = 0 : the XAPIC is not used, ie: */ + /* ints fwded to another XAPIC */ + /* Bits1:7 Reserved */ + u8 WP_index; /* instance index - lower ones have */ + /* lower slot numbers/PCI bus numbers */ + u8 chassis_num; /* 1 based Chassis number */ +} __attribute__((packed)); + +enum { + HURR_SCALABILTY = 0, /* Hurricane Scalability info */ + HURR_RIOIB = 2, /* Hurricane RIOIB info */ + COMPAT_CALGARY = 4, /* Compatibility Calgary */ + ALT_CALGARY = 5, /* Second Planar Calgary */ +}; + +/* + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E. + */ + +static inline unsigned long get_bios_ebda(void) +{ + unsigned long address= *(unsigned short *)phys_to_virt(0x40Eul); + address <<= 4; + return address; +} + +#endif /* __ASM_RIO_H */ -- cgit v1.2.3-70-g09d2 From eae93755540bae18aff46b8a0e621b5d65bd5380 Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:06 +0100 Subject: [PATCH] Calgary: check BBAR ioremap success when ioremapping This patch cleans up the previous "Use BIOS supplied BBAR information" patch. Mostly stylistic clenaups, but also check for ioremap failure when we ioremap the BBAR rather than when trying to use it. Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen Acked-by: Laurent Vivier --- arch/x86_64/kernel/pci-calgary.c | 85 +++++++++++++++++++++++----------------- include/asm-x86_64/rio.h | 8 ++-- 2 files changed, 52 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index afc0a53505f..8a1e4f35bc3 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -138,7 +138,7 @@ static int calgary_detected __read_mostly = 0; static struct rio_table_hdr *rio_table_hdr __initdata; static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; -static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; +static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata; struct calgary_bus_info { void *tce_space; @@ -855,11 +855,6 @@ static void __init calgary_disable_translation(struct pci_dev *dev) del_timer_sync(&tbl->watchdog_timer); } -static inline void __iomem * __init locate_register_space(struct pci_dev *dev) -{ - return busno_to_bbar(dev->bus->number); -} - static void __init calgary_init_one_nontraslated(struct pci_dev *dev) { pci_dev_get(dev); @@ -874,15 +869,10 @@ static int __init calgary_init_one(struct pci_dev *dev) BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM); - bbar = locate_register_space(dev); - if (!bbar) { - ret = -ENODATA; - goto done; - } - + bbar = busno_to_bbar(dev->bus->number); ret = calgary_setup_tar(dev, bbar); if (ret) - goto iounmap; + goto done; pci_dev_get(dev); dev->bus->self = dev; @@ -890,38 +880,39 @@ static int __init calgary_init_one(struct pci_dev *dev) return 0; -iounmap: - iounmap(bbar); done: return ret; } -static int __init calgary_init(void) +static int __init calgary_locate_bbars(void) { - int ret = -ENODEV; - struct pci_dev *dev = NULL; - int rio, phb, bus; + int ret; + int rioidx, phb, bus; void __iomem *bbar; void __iomem *target; + unsigned long offset; u8 start_bus, end_bus; u32 val; - for (rio = 0; rio < rio_table_hdr->num_rio_dev; rio++) { + ret = -ENODATA; + for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) { + struct rio_detail *rio = rio_devs[rioidx]; - if ( (rio_devs[rio]->type != COMPAT_CALGARY) && - (rio_devs[rio]->type != ALT_CALGARY) ) + if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY)) continue; /* map entire 1MB of Calgary config space */ - bbar = ioremap_nocache(rio_devs[rio]->BBAR, 1024 * 1024); + bbar = ioremap_nocache(rio->BBAR, 1024 * 1024); + if (!bbar) + goto error; for (phb = 0; phb < PHBS_PER_CALGARY; phb++) { + offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET; + target = calgary_reg(bbar, offset); - target = calgary_reg(bbar, phb_debug_offsets[phb] | - PHB_DEBUG_STUFF_OFFSET); val = be32_to_cpu(readl(target)); start_bus = (u8)((val & 0x00FF0000) >> 16); - end_bus = (u8)((val & 0x0000FF00) >> 8); + end_bus = (u8)((val & 0x0000FF00) >> 8); for (bus = start_bus; bus <= end_bus; bus++) { bus_info[bus].bbar = bbar; bus_info[bus].phbid = phb; @@ -929,6 +920,25 @@ static int __init calgary_init(void) } } + return 0; + +error: + /* scan bus_info and iounmap any bbars we previously ioremap'd */ + for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++) + if (bus_info[bus].bbar) + iounmap(bus_info[bus].bbar); + + return ret; +} + +static int __init calgary_init(void) +{ + int ret; + struct pci_dev *dev = NULL; + + ret = calgary_locate_bbars(); + if (ret) + return ret; do { dev = pci_get_device(PCI_VENDOR_ID_IBM, @@ -1000,18 +1010,13 @@ static int __init build_detail_arrays(void) if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ printk(KERN_WARNING - "Calgary: MAX_NUMNODES too low! Defined as %d, " + "Calgary: MAX_NUMNODES too low! Defined as %d, " "but system has %d nodes.\n", MAX_NUMNODES, rio_table_hdr->num_scal_dev); return -ENODEV; } switch (rio_table_hdr->version){ - default: - printk(KERN_WARNING - "Calgary: Invalid Rio Grande Table Version: %d\n", - rio_table_hdr->version); - return -ENODEV; case 2: scal_detail_size = 11; rio_detail_size = 13; @@ -1020,6 +1025,11 @@ static int __init build_detail_arrays(void) scal_detail_size = 12; rio_detail_size = 15; break; + default: + printk(KERN_WARNING + "Calgary: Invalid Rio Grande Table Version: %d\n", + rio_table_hdr->version); + return -EPROTO; } ptr = ((unsigned long)rio_table_hdr) + 3; @@ -1042,6 +1052,7 @@ void __init detect_calgary(void) int calgary_found = 0; unsigned long ptr; int offset; + int ret; /* * if the user specified iommu=off or iommu=soft or we found @@ -1061,27 +1072,29 @@ void __init detect_calgary(void) /* The block id is stored in the 2nd word */ if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){ /* set the pointer past the offset & block id */ - rio_table_hdr = (struct rio_table_hdr *)(ptr+offset+4); + rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); break; } /* The next offset is stored in the 1st word. 0 means no more */ offset = *((unsigned short *)(ptr + offset)); } - if (!rio_table_hdr){ + if (!rio_table_hdr) { printk(KERN_ERR "Calgary: Unable to locate " "Rio Grande Table in EBDA - bailing!\n"); return; } - if (build_detail_arrays()) + ret = build_detail_arrays(); + if (ret) { + printk(KERN_ERR "Calgary: build_detail_arrays ret %d\n", ret); return; + } specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE); for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { int dev; struct calgary_bus_info *info = &bus_info[bus]; - info->phbid = -1; if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY) continue; diff --git a/include/asm-x86_64/rio.h b/include/asm-x86_64/rio.h index 1f315c39d76..c7350f6d201 100644 --- a/include/asm-x86_64/rio.h +++ b/include/asm-x86_64/rio.h @@ -3,7 +3,6 @@ * and include/asm-i386/mach-default/bios_ebda.h * * Author: Laurent Vivier - * */ #ifndef __ASM_RIO_H @@ -19,7 +18,7 @@ struct rio_table_hdr { struct scal_detail { u8 node_id; /* Scalability Node ID */ - u32 CBAR; /* Address of 1MB register space */ + u32 CBAR; /* Address of 1MB register space */ u8 port0node; /* Node ID port connected to: 0xFF=None */ u8 port0port; /* Port num port connected to: 0,1,2, or */ /* 0xFF=None */ @@ -34,7 +33,7 @@ struct scal_detail { struct rio_detail { u8 node_id; /* RIO Node ID */ - u32 BBAR; /* Address of 1MB register space */ + u32 BBAR; /* Address of 1MB register space */ u8 type; /* Type of device */ u8 owner_id; /* Node ID of Hurricane that owns this */ /* node */ @@ -65,10 +64,9 @@ enum { * there is a real-mode segmented pointer pointing to the * 4K EBDA area at 0x40E. */ - static inline unsigned long get_bios_ebda(void) { - unsigned long address= *(unsigned short *)phys_to_virt(0x40Eul); + unsigned long address = *(unsigned short *)phys_to_virt(0x40EUL); address <<= 4; return address; } -- cgit v1.2.3-70-g09d2 From bff6547bb6a4e82c399d74e7fba78b12d2f162ed Mon Sep 17 00:00:00 2001 From: Muli Ben-Yehuda Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] Calgary: allow compiling Calgary in but not using it by default This patch makes it possible to compile Calgary in but not use it by default. In this mode, use 'iommu=calgary' to activate it. Signed-off-by: Muli Ben-Yehuda Signed-off-by: Jon Mason Signed-off-by: Andi Kleen --- Documentation/x86_64/boot-options.txt | 3 ++- arch/x86_64/Kconfig | 11 +++++++++++ arch/x86_64/kernel/pci-calgary.c | 9 +++++++++ arch/x86_64/kernel/pci-dma.c | 5 +++++ include/asm-x86_64/calgary.h | 2 ++ 5 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index ab9b1c046c0..dbdcaf68e3e 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt @@ -179,7 +179,7 @@ PCI IOMMU iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] - [,forcesac][,fullflush][,nomerge][,noaperture] + [,forcesac][,fullflush][,nomerge][,noaperture][,calgary] size set size of iommu (in bytes) noagp don't initialize the AGP driver and use full aperture. off don't use the IOMMU @@ -200,6 +200,7 @@ IOMMU buffering. nodac Forbid DMA >4GB panic Always panic when IOMMU overflows + calgary Use the Calgary IOMMU if it is available swiotlb=pages[,force] diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 010d2265f1c..5cb509dbffe 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -455,6 +455,17 @@ config CALGARY_IOMMU Normally the kernel will make the right choice by itself. If unsure, say Y. +config CALGARY_IOMMU_ENABLED_BY_DEFAULT + bool "Should Calgary be enabled by default?" + default y + depends on CALGARY_IOMMU + help + Should Calgary be enabled by default? if you choose 'y', Calgary + will be used (if it exists). If you choose 'n', Calgary will not be + used even if it exists. If you choose 'n' and would like to use + Calgary anyway, pass 'iommu=calgary' on the kernel command line. + If unsure, say Y. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB bool diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 8a1e4f35bc3..0ddf29dae7e 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c @@ -43,6 +43,12 @@ #include #include +#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT +int use_calgary __read_mostly = 1; +#else +int use_calgary __read_mostly = 0; +#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */ + #define PCI_DEVICE_ID_IBM_CALGARY 0x02a1 #define PCI_VENDOR_DEVICE_ID_CALGARY \ (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16) @@ -1061,6 +1067,9 @@ void __init detect_calgary(void) if (swiotlb || no_iommu || iommu_detected) return; + if (!use_calgary) + return; + if (!early_pci_allowed()) return; diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index f8d857453f8..683b7a5c1ab 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -296,6 +296,11 @@ __init int iommu_setup(char *p) gart_parse_options(p); #endif +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + p += strcspn(p, ","); if (*p == ',') ++p; diff --git a/include/asm-x86_64/calgary.h b/include/asm-x86_64/calgary.h index 6b93f5a3a5c..7ee90064571 100644 --- a/include/asm-x86_64/calgary.h +++ b/include/asm-x86_64/calgary.h @@ -51,6 +51,8 @@ struct iommu_table { #define TCE_TABLE_SIZE_4M 6 #define TCE_TABLE_SIZE_8M 7 +extern int use_calgary; + #ifdef CONFIG_CALGARY_IOMMU extern int calgary_iommu_init(void); extern void detect_calgary(void); -- cgit v1.2.3-70-g09d2 From b9a8d94a47f8a41766f6f7944adfb1d641349903 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] x86-64: Make x86_64 udelay() round up instead of down. Port two patches from i386 to x86_64 delay.c to make sure all rounding is done upward instead of downward. There is no sign in commit messages that the mismatch was done on purpose, and "delay() guarantees sleeping at least for the specified time" is still a valid rule IMHO. The original x86 patches are both from pre-GIT era, i.e.: "[PATCH] round up in __udelay()" in commit 54c7e1f5cc6771ff644d7bc21a2b829308bd126f "[PATCH] add 1 in __const_udelay()" in commit 42c77a9801b8877d8b90f65f75db758822a0bccc (both commits are from converted BK repository to x86_64). AK: fixed gcc warning linux/arch/x86_64/lib/delay.c:43: warning: suggest parentheses around + or - inside shift (did this actually work?) Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andi Kleen --- arch/x86_64/lib/delay.c | 4 ++-- include/asm-x86_64/delay.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index 50be90975d0..2dbebd30834 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c @@ -40,13 +40,13 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { - __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32); + __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1); } EXPORT_SYMBOL(__const_udelay); void __udelay(unsigned long usecs) { - __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ } EXPORT_SYMBOL(__udelay); diff --git a/include/asm-x86_64/delay.h b/include/asm-x86_64/delay.h index 65f64acc531..40146f611cc 100644 --- a/include/asm-x86_64/delay.h +++ b/include/asm-x86_64/delay.h @@ -16,7 +16,7 @@ extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); #define udelay(n) (__builtin_constant_p(n) ? \ - ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \ + ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ __udelay(n)) #define ndelay(n) (__builtin_constant_p(n) ? \ -- cgit v1.2.3-70-g09d2 From e6536c1262c56d302e749ab1b44fdb0b9786327d Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] x86: comment magic constants in delay.h For both i386 and x86_64, copy from arch/$ARCH/lib/delay.c comments about the used magic constants, plus a few other niceties. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andi Kleen include/asm-i386/delay.h | 5 ++++- include/asm-x86_64/delay.h | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) --- include/asm-i386/delay.h | 5 ++++- include/asm-x86_64/delay.h | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/delay.h b/include/asm-i386/delay.h index b1c7650dc7b..9ae5e3782ed 100644 --- a/include/asm-i386/delay.h +++ b/include/asm-i386/delay.h @@ -7,6 +7,7 @@ * Delay routines calling functions in arch/i386/lib/delay.c */ +/* Undefined functions to get compile-time errors */ extern void __bad_udelay(void); extern void __bad_ndelay(void); @@ -15,10 +16,12 @@ extern void __ndelay(unsigned long nsecs); extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); +/* 0x10c7 is 2**32 / 1000000 (rounded up) */ #define udelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ __udelay(n)) - + +/* 0x5 is 2**32 / 1000000000 (rounded up) */ #define ndelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) diff --git a/include/asm-x86_64/delay.h b/include/asm-x86_64/delay.h index 40146f611cc..c2669f1f552 100644 --- a/include/asm-x86_64/delay.h +++ b/include/asm-x86_64/delay.h @@ -7,18 +7,21 @@ * Delay routines calling functions in arch/x86_64/lib/delay.c */ +/* Undefined functions to get compile-time errors */ extern void __bad_udelay(void); extern void __bad_ndelay(void); extern void __udelay(unsigned long usecs); -extern void __ndelay(unsigned long usecs); +extern void __ndelay(unsigned long nsecs); extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); +/* 0x10c7 is 2**32 / 1000000 (rounded up) */ #define udelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ __udelay(n)) +/* 0x5 is 2**32 / 1000000000 (rounded up) */ #define ndelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) -- cgit v1.2.3-70-g09d2 From d3561b7fa0fb0fc583bab0eeda32bec9e4c4056d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:07 +0100 Subject: [PATCH] paravirt: header and stubs for paravirtualisation Create a paravirt.h header for all the critical operations which need to be replaced with hypervisor calls, and include that instead of defining native operations, when CONFIG_PARAVIRT. This patch does the dumbest possible replacement of paravirtualized instructions: calls through a "paravirt_ops" structure. Currently these are function implementations of native hardware: hypervisors will override the ops structure with their own variants. All the pv-ops functions are declared "fastcall" so that a specific register-based ABI is used, to make inlining assember easier. And: +From: Andy Whitcroft The paravirt ops introduce a 'weak' attribute onto memory_setup(). Code ordering leads to the following warnings on x86: arch/i386/kernel/setup.c:651: warning: weak declaration of `memory_setup' after first use results in unspecified behavior Move memory_setup() to avoid this. Signed-off-by: Rusty Russell Signed-off-by: Chris Wright Signed-off-by: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andy Whitcroft --- arch/i386/Kconfig | 11 + arch/i386/boot/compressed/misc.c | 1 + arch/i386/kernel/Makefile | 1 + arch/i386/kernel/asm-offsets.c | 10 + arch/i386/kernel/entry.S | 34 ++- arch/i386/kernel/i8259.c | 5 +- arch/i386/kernel/paravirt.c | 404 +++++++++++++++++++++++++++++ arch/i386/kernel/setup.c | 8 +- arch/i386/kernel/smpboot.c | 5 + arch/i386/kernel/time.c | 15 +- arch/i386/power/cpu.c | 8 +- drivers/net/de600.c | 1 - include/asm-i386/delay.h | 8 + include/asm-i386/desc.h | 9 +- include/asm-i386/io.h | 8 +- include/asm-i386/irq.h | 3 + include/asm-i386/irqflags.h | 42 +-- include/asm-i386/mach-default/setup_arch.h | 2 + include/asm-i386/msr.h | 5 + include/asm-i386/paravirt.h | 281 ++++++++++++++++++++ include/asm-i386/processor.h | 15 +- include/asm-i386/segment.h | 2 + include/asm-i386/setup.h | 1 + include/asm-i386/spinlock.h | 4 + include/asm-i386/suspend.h | 8 +- include/asm-i386/system.h | 16 +- include/asm-i386/time.h | 41 +++ 27 files changed, 890 insertions(+), 58 deletions(-) create mode 100644 arch/i386/kernel/paravirt.c create mode 100644 include/asm-i386/paravirt.h create mode 100644 include/asm-i386/time.h (limited to 'include') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 1f0f7b60995..bb1fa061c6c 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -182,6 +182,17 @@ config X86_ES7000 endchoice +config PARAVIRT + bool "Paravirtualization support (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Paravirtualization is a way of running multiple instances of + Linux on the same machine, under a hypervisor. This option + changes the kernel so it can modify itself when it is run + under a hypervisor, improving performance significantly. + However, when run without a hypervisor the kernel is + theoretically slower. If in doubt, say N. + config ACPI_SRAT bool default y diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index dc153893155..c6798c75c67 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c @@ -9,6 +9,7 @@ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ +#undef CONFIG_PARAVIRT #include #include #include diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index f614854bd71..40661213604 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o +obj-$(CONFIG_PARAVIRT) += paravirt.o EXTRA_AFLAGS := -traditional diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 0666eb0ed7b..1b2f3cd3327 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -101,4 +101,14 @@ void foo(void) BLANK(); OFFSET(PDA_cpu, i386_pda, cpu_number); OFFSET(PDA_pcurrent, i386_pda, pcurrent); + +#ifdef CONFIG_PARAVIRT + BLANK(); + OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled); + OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable); + OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable); + OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); + OFFSET(PARAVIRT_iret, paravirt_ops, iret); + OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); +#endif } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 0220bc8cbb4..d274612e05c 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -62,13 +62,6 @@ DF_MASK = 0x00000400 NT_MASK = 0x00004000 VM_MASK = 0x00020000 -/* These are replaces for paravirtualization */ -#define DISABLE_INTERRUPTS cli -#define ENABLE_INTERRUPTS sti -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit -#define INTERRUPT_RETURN iret -#define GET_CR0_INTO_EAX movl %cr0, %eax - #ifdef CONFIG_PREEMPT #define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF #else @@ -416,6 +409,20 @@ ldt_ss: jnz restore_nocheck testl $0x00400000, %eax # returning to 32bit stack? jnz restore_nocheck # allright, normal return + +#ifdef CONFIG_PARAVIRT + /* + * The kernel can't run on a non-flat stack if paravirt mode + * is active. Rather than try to fixup the high bits of + * ESP, bypass this code entirely. This may break DOSemu + * and/or Wine support in a paravirt VM, although the option + * is still available to implement the setting of the high + * 16-bits in the INTERRUPT_RETURN paravirt-op. + */ + cmpl $0, paravirt_ops+PARAVIRT_enabled + jne restore_nocheck +#endif + /* If returning to userspace with 16bit stack, * try to fix the higher word of ESP, as the CPU * won't restore it. @@ -833,6 +840,19 @@ nmi_espfix_stack: .previous KPROBE_END(nmi) +#ifdef CONFIG_PARAVIRT +ENTRY(native_iret) +1: iret +.section __ex_table,"a" + .align 4 + .long 1b,iret_exc +.previous + +ENTRY(native_irq_enable_sysexit) + sti + sysexit +#endif + KPROBE_ENTRY(int3) RING0_INT_FRAME pushl $-1 # mark this as an int diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 62996cd1708..c8d45821c78 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -381,7 +381,10 @@ void __init init_ISA_irqs (void) } } -void __init init_IRQ(void) +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) { int i; diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c new file mode 100644 index 00000000000..478192cd4b9 --- /dev/null +++ b/arch/i386/kernel/paravirt.c @@ -0,0 +1,404 @@ +/* Paravirtualization interfaces + Copyright (C) 2006 Rusty Russell IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +/* nop stub */ +static void native_nop(void) +{ +} + +static void __init default_banner(void) +{ + printk(KERN_INFO "Booting paravirtualized kernel on %s\n", + paravirt_ops.name); +} + +char *memory_setup(void) +{ + return paravirt_ops.memory_setup(); +} + +static fastcall unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm("movl %%db0, %0" :"=r" (val)); break; + case 1: + asm("movl %%db1, %0" :"=r" (val)); break; + case 2: + asm("movl %%db2, %0" :"=r" (val)); break; + case 3: + asm("movl %%db3, %0" :"=r" (val)); break; + case 6: + asm("movl %%db6, %0" :"=r" (val)); break; + case 7: + asm("movl %%db7, %0" :"=r" (val)); break; + default: + BUG(); + } + return val; +} + +static fastcall void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("movl %0,%%db0" : /* no output */ :"r" (value)); + break; + case 1: + asm("movl %0,%%db1" : /* no output */ :"r" (value)); + break; + case 2: + asm("movl %0,%%db2" : /* no output */ :"r" (value)); + break; + case 3: + asm("movl %0,%%db3" : /* no output */ :"r" (value)); + break; + case 6: + asm("movl %0,%%db6" : /* no output */ :"r" (value)); + break; + case 7: + asm("movl %0,%%db7" : /* no output */ :"r" (value)); + break; + default: + BUG(); + } +} + +void init_IRQ(void) +{ + paravirt_ops.init_IRQ(); +} + +static fastcall void native_clts(void) +{ + asm volatile ("clts"); +} + +static fastcall unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall void native_write_cr0(unsigned long val) +{ + asm volatile("movl %0,%%cr0": :"r" (val)); +} + +static fastcall unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall void native_write_cr2(unsigned long val) +{ + asm volatile("movl %0,%%cr2": :"r" (val)); +} + +static fastcall unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall void native_write_cr3(unsigned long val) +{ + asm volatile("movl %0,%%cr3": :"r" (val)); +} + +static fastcall unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); + return val; +} + +static fastcall unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist */ + asm("1: movl %%cr4, %0 \n" + "2: \n" + ".section __ex_table,\"a\" \n" + ".long 1b,2b \n" + ".previous \n" + : "=r" (val): "0" (0)); + return val; +} + +static fastcall void native_write_cr4(unsigned long val) +{ + asm volatile("movl %0,%%cr4": :"r" (val)); +} + +static fastcall unsigned long native_save_fl(void) +{ + unsigned long f; + asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); + return f; +} + +static fastcall void native_restore_fl(unsigned long f) +{ + asm volatile("pushl %0 ; popfl": /* no output */ + :"g" (f) + :"memory", "cc"); +} + +static fastcall void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static fastcall void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static fastcall void native_safe_halt(void) +{ + asm volatile("sti; hlt": : :"memory"); +} + +static fastcall void native_halt(void) +{ + asm volatile("hlt": : :"memory"); +} + +static fastcall void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + +static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) +{ + unsigned long long val; + + asm volatile("2: rdmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %3,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=r" (*err), "=A" (val) + : "c" (msr), "i" (-EFAULT)); + + return val; +} + +static fastcall int native_write_msr(unsigned int msr, unsigned long long val) +{ + int err; + asm volatile("2: wrmsr ; xorl %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: movl %4,%0 ; jmp 1b\n\t" + ".previous\n\t" + ".section __ex_table,\"a\"\n" + " .align 4\n\t" + " .long 2b,3b\n\t" + ".previous" + : "=a" (err) + : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), + "i" (-EFAULT)); + return err; +} + +static fastcall unsigned long long native_read_tsc(void) +{ + unsigned long long val; + asm volatile("rdtsc" : "=A" (val)); + return val; +} + +static fastcall unsigned long long native_read_pmc(void) +{ + unsigned long long val; + asm volatile("rdpmc" : "=A" (val)); + return val; +} + +static fastcall void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); +} + +static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lgdt %0"::"m" (*dtr)); +} + +static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} + +static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr) +{ + asm ("sgdt %0":"=m" (*dtr)); +} + +static fastcall void native_store_idt(struct Xgt_desc_struct *dtr) +{ + asm ("sidt %0":"=m" (*dtr)); +} + +static fastcall unsigned long native_store_tr(void) +{ + unsigned long tr; + asm ("str %0":"=r" (tr)); + return tr; +} + +static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu) +{ +#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] + C(0); C(1); C(2); +#undef C +} + +static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high) +{ + u32 *lp = (u32 *)((char *)dt + entry*8); + lp[0] = entry_low; + lp[1] = entry_high; +} + +static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) +{ + native_write_dt_entry(dt, entrynum, low, high); +} + +static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) +{ + native_write_dt_entry(dt, entrynum, low, high); +} + +static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) +{ + native_write_dt_entry(dt, entrynum, low, high); +} + +static fastcall void native_load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + tss->esp0 = thread->esp0; + + /* This can only happen when SEP is enabled, no need to test "SEP"arately */ + if (unlikely(tss->ss1 != thread->sysenter_cs)) { + tss->ss1 = thread->sysenter_cs; + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + } +} + +static fastcall void native_io_delay(void) +{ + asm volatile("outb %al,$0x80"); +} + +/* These are in entry.S */ +extern fastcall void native_iret(void); +extern fastcall void native_irq_enable_sysexit(void); + +static int __init print_banner(void) +{ + paravirt_ops.banner(); + return 0; +} +core_initcall(print_banner); + +struct paravirt_ops paravirt_ops = { + .name = "bare hardware", + .paravirt_enabled = 0, + .kernel_rpl = 0, + + .banner = default_banner, + .arch_setup = native_nop, + .memory_setup = machine_specific_memory_setup, + .get_wallclock = native_get_wallclock, + .set_wallclock = native_set_wallclock, + .time_init = time_init_hook, + .init_IRQ = native_init_IRQ, + + .cpuid = native_cpuid, + .get_debugreg = native_get_debugreg, + .set_debugreg = native_set_debugreg, + .clts = native_clts, + .read_cr0 = native_read_cr0, + .write_cr0 = native_write_cr0, + .read_cr2 = native_read_cr2, + .write_cr2 = native_write_cr2, + .read_cr3 = native_read_cr3, + .write_cr3 = native_write_cr3, + .read_cr4 = native_read_cr4, + .read_cr4_safe = native_read_cr4_safe, + .write_cr4 = native_write_cr4, + .save_fl = native_save_fl, + .restore_fl = native_restore_fl, + .irq_disable = native_irq_disable, + .irq_enable = native_irq_enable, + .safe_halt = native_safe_halt, + .halt = native_halt, + .wbinvd = native_wbinvd, + .read_msr = native_read_msr, + .write_msr = native_write_msr, + .read_tsc = native_read_tsc, + .read_pmc = native_read_pmc, + .load_tr_desc = native_load_tr_desc, + .set_ldt = native_set_ldt, + .load_gdt = native_load_gdt, + .load_idt = native_load_idt, + .store_gdt = native_store_gdt, + .store_idt = native_store_idt, + .store_tr = native_store_tr, + .load_tls = native_load_tls, + .write_ldt_entry = native_write_ldt_entry, + .write_gdt_entry = native_write_gdt_entry, + .write_idt_entry = native_write_idt_entry, + .load_esp0 = native_load_esp0, + + .set_iopl_mask = native_set_iopl_mask, + .io_delay = native_io_delay, + .const_udelay = __const_udelay, + + .irq_enable_sysexit = native_irq_enable_sysexit, + .iret = native_iret, +}; +EXPORT_SYMBOL(paravirt_ops); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index e5bb87aa5a4..695d53fd14d 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -495,6 +495,12 @@ static void set_mca_bus(int x) static void set_mca_bus(int x) { } #endif +/* Overridden in paravirt.c if CONFIG_PARAVIRT */ +char * __attribute__((weak)) memory_setup(void) +{ + return machine_specific_memory_setup(); +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -547,7 +553,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); else { printk(KERN_INFO "BIOS-provided physical RAM map:\n"); - print_memory_map(machine_specific_memory_setup()); + print_memory_map(memory_setup()); } copy_edd(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 095636620fa..cd7de9c9654 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -33,6 +33,11 @@ * Dave Jones : Report invalid combinations of Athlon CPUs. * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ + +/* SMP boot always wants to use real time delay to allow sufficient time for + * the APs to come online */ +#define USE_REAL_TIME_DELAY + #include #include #include diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 78af572fd17..c505b16c099 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "mach_time.h" @@ -116,10 +117,7 @@ static int set_rtc_mmss(unsigned long nowtime) /* gets recalled with irq locally disabled */ /* XXX - does irqsave resolve this? -johnstul */ spin_lock_irqsave(&rtc_lock, flags); - if (efi_enabled) - retval = efi_set_rtc_mmss(nowtime); - else - retval = mach_set_rtc_mmss(nowtime); + retval = set_wallclock(nowtime); spin_unlock_irqrestore(&rtc_lock, flags); return retval; @@ -223,10 +221,7 @@ unsigned long get_cmos_time(void) spin_lock_irqsave(&rtc_lock, flags); - if (efi_enabled) - retval = efi_get_time(); - else - retval = mach_get_cmos_time(); + retval = get_wallclock(); spin_unlock_irqrestore(&rtc_lock, flags); @@ -370,7 +365,7 @@ static void __init hpet_time_init(void) printk("Using HPET for base-timer\n"); } - time_init_hook(); + do_time_init(); } #endif @@ -392,5 +387,5 @@ void __init time_init(void) do_settimeofday(&ts); - time_init_hook(); + do_time_init(); } diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 5a1abeff033..2c15500f871 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -26,8 +26,8 @@ void __save_processor_state(struct saved_context *ctxt) /* * descriptor tables */ - store_gdt(&ctxt->gdt_limit); - store_idt(&ctxt->idt_limit); + store_gdt(&ctxt->gdt); + store_idt(&ctxt->idt); store_tr(ctxt->tr); /* @@ -99,8 +99,8 @@ void __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ - load_gdt(&ctxt->gdt_limit); - load_idt(&ctxt->idt_limit); + load_gdt(&ctxt->gdt); + load_idt(&ctxt->idt); /* * segment registers diff --git a/drivers/net/de600.c b/drivers/net/de600.c index 690bb40b353..8396e411f1c 100644 --- a/drivers/net/de600.c +++ b/drivers/net/de600.c @@ -43,7 +43,6 @@ static const char version[] = "de600.c: $Revision: 1.41-2.5 $, Bjorn Ekwall (bj * modify the following "#define": (see for more info) #define REALLY_SLOW_IO */ -#define SLOW_IO_BY_JUMPING /* Looks "better" than dummy write to port 0x80 :-) */ /* use 0 for production, 1 for verification, >2 for debug */ #ifdef DE600_DEBUG diff --git a/include/asm-i386/delay.h b/include/asm-i386/delay.h index 9ae5e3782ed..32d6678d0bb 100644 --- a/include/asm-i386/delay.h +++ b/include/asm-i386/delay.h @@ -16,6 +16,13 @@ extern void __ndelay(unsigned long nsecs); extern void __const_udelay(unsigned long usecs); extern void __delay(unsigned long loops); +#if defined(CONFIG_PARAVIRT) && !defined(USE_REAL_TIME_DELAY) +#define udelay(n) paravirt_ops.const_udelay((n) * 0x10c7ul) + +#define ndelay(n) paravirt_ops.const_udelay((n) * 5ul) + +#else /* !PARAVIRT || USE_REAL_TIME_DELAY */ + /* 0x10c7 is 2**32 / 1000000 (rounded up) */ #define udelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \ @@ -25,6 +32,7 @@ extern void __delay(unsigned long loops); #define ndelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) +#endif void use_tsc_delay(void); diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 6cf2ac2bfde..f19820f0834 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -55,6 +55,9 @@ static inline void pack_gate(__u32 *a, __u32 *b, #define DESCTYPE_DPL3 0x60 /* DPL-3 */ #define DESCTYPE_S 0x10 /* !system */ +#ifdef CONFIG_PARAVIRT +#include +#else #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) @@ -105,7 +108,11 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const vo write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); } -static inline void set_ldt(void *addr, unsigned int entries) +#define set_ldt native_set_ldt +#endif /* CONFIG_PARAVIRT */ + +static inline fastcall void native_set_ldt(const void *addr, + unsigned int entries) { if (likely(entries == 0)) __asm__ __volatile__("lldt %w0"::"q" (0)); diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h index 68df0dc3ab8..86ff5e83be2 100644 --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -256,11 +256,11 @@ static inline void flush_write_buffers(void) #endif /* __KERNEL__ */ -#ifdef SLOW_IO_BY_JUMPING -#define __SLOW_DOWN_IO "jmp 1f; 1: jmp 1f; 1:" +#if defined(CONFIG_PARAVIRT) +#include #else + #define __SLOW_DOWN_IO "outb %%al,$0x80;" -#endif static inline void slow_down_io(void) { __asm__ __volatile__( @@ -271,6 +271,8 @@ static inline void slow_down_io(void) { : : ); } +#endif + #ifdef CONFIG_X86_NUMAQ extern void *xquad_portio; /* Where the IO area was mapped */ #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 331726b4112..9e15ce0006e 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -41,4 +41,7 @@ extern int irqbalance_disable(char *str); extern void fixup_irqs(cpumask_t map); #endif +void init_IRQ(void); +void __init native_init_IRQ(void); + #endif /* _ASM_IRQ_H */ diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h index e1bdb97c07f..9ce01f3fb7b 100644 --- a/include/asm-i386/irqflags.h +++ b/include/asm-i386/irqflags.h @@ -10,6 +10,9 @@ #ifndef _ASM_IRQFLAGS_H #define _ASM_IRQFLAGS_H +#ifdef CONFIG_PARAVIRT +#include +#else #ifndef __ASSEMBLY__ static inline unsigned long __raw_local_save_flags(void) @@ -25,9 +28,6 @@ static inline unsigned long __raw_local_save_flags(void) return flags; } -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - static inline void raw_local_irq_restore(unsigned long flags) { __asm__ __volatile__( @@ -66,18 +66,6 @@ static inline void halt(void) __asm__ __volatile__("hlt": : :"memory"); } -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & (1 << 9)); -} - -static inline int raw_irqs_disabled(void) -{ - unsigned long flags = __raw_local_save_flags(); - - return raw_irqs_disabled_flags(flags); -} - /* * For spinlocks, etc: */ @@ -90,9 +78,33 @@ static inline unsigned long __raw_local_irq_save(void) return flags; } +#else +#define DISABLE_INTERRUPTS cli +#define ENABLE_INTERRUPTS sti +#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit +#define INTERRUPT_RETURN iret +#define GET_CR0_INTO_EAX movl %cr0, %eax +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + +#ifndef __ASSEMBLY__ +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + #define raw_local_irq_save(flags) \ do { (flags) = __raw_local_irq_save(); } while (0) +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & (1 << 9)); +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} #endif /* __ASSEMBLY__ */ /* diff --git a/include/asm-i386/mach-default/setup_arch.h b/include/asm-i386/mach-default/setup_arch.h index fb42099e7bd..605e3ccb991 100644 --- a/include/asm-i386/mach-default/setup_arch.h +++ b/include/asm-i386/mach-default/setup_arch.h @@ -2,4 +2,6 @@ /* no action for generic */ +#ifndef ARCH_SETUP #define ARCH_SETUP +#endif diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h index 1820d9d73af..5679d499307 100644 --- a/include/asm-i386/msr.h +++ b/include/asm-i386/msr.h @@ -1,6 +1,10 @@ #ifndef __ASM_MSR_H #define __ASM_MSR_H +#ifdef CONFIG_PARAVIRT +#include +#else + /* * Access to machine-specific registers (available on 586 and better only) * Note: the rd* operations modify the parameters directly (without using @@ -77,6 +81,7 @@ static inline void wrmsrl (unsigned long msr, unsigned long long val) __asm__ __volatile__("rdpmc" \ : "=a" (low), "=d" (high) \ : "c" (counter)) +#endif /* !CONFIG_PARAVIRT */ /* symbolic names for some interesting MSRs */ /* Intel defined MSRs. */ diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h new file mode 100644 index 00000000000..a7551a44686 --- /dev/null +++ b/include/asm-i386/paravirt.h @@ -0,0 +1,281 @@ +#ifndef __ASM_PARAVIRT_H +#define __ASM_PARAVIRT_H +/* Various instructions on x86 need to be replaced for + * para-virtualization: those hooks are defined here. */ +#include + +#ifdef CONFIG_PARAVIRT +#ifndef __ASSEMBLY__ +struct thread_struct; +struct Xgt_desc_struct; +struct tss_struct; +struct paravirt_ops +{ + unsigned int kernel_rpl; + int paravirt_enabled; + const char *name; + + void (*arch_setup)(void); + char *(*memory_setup)(void); + void (*init_IRQ)(void); + + void (*banner)(void); + + unsigned long (*get_wallclock)(void); + int (*set_wallclock)(unsigned long); + void (*time_init)(void); + + /* All the function pointers here are declared as "fastcall" + so that we get a specific register-based calling + convention. This makes it easier to implement inline + assembler replacements. */ + + void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + + unsigned long (fastcall *get_debugreg)(int regno); + void (fastcall *set_debugreg)(int regno, unsigned long value); + + void (fastcall *clts)(void); + + unsigned long (fastcall *read_cr0)(void); + void (fastcall *write_cr0)(unsigned long); + + unsigned long (fastcall *read_cr2)(void); + void (fastcall *write_cr2)(unsigned long); + + unsigned long (fastcall *read_cr3)(void); + void (fastcall *write_cr3)(unsigned long); + + unsigned long (fastcall *read_cr4_safe)(void); + unsigned long (fastcall *read_cr4)(void); + void (fastcall *write_cr4)(unsigned long); + + unsigned long (fastcall *save_fl)(void); + void (fastcall *restore_fl)(unsigned long); + void (fastcall *irq_disable)(void); + void (fastcall *irq_enable)(void); + void (fastcall *safe_halt)(void); + void (fastcall *halt)(void); + void (fastcall *wbinvd)(void); + + /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + u64 (fastcall *read_msr)(unsigned int msr, int *err); + int (fastcall *write_msr)(unsigned int msr, u64 val); + + u64 (fastcall *read_tsc)(void); + u64 (fastcall *read_pmc)(void); + + void (fastcall *load_tr_desc)(void); + void (fastcall *load_gdt)(const struct Xgt_desc_struct *); + void (fastcall *load_idt)(const struct Xgt_desc_struct *); + void (fastcall *store_gdt)(struct Xgt_desc_struct *); + void (fastcall *store_idt)(struct Xgt_desc_struct *); + void (fastcall *set_ldt)(const void *desc, unsigned entries); + unsigned long (fastcall *store_tr)(void); + void (fastcall *load_tls)(struct thread_struct *t, unsigned int cpu); + void (fastcall *write_ldt_entry)(void *dt, int entrynum, + u32 low, u32 high); + void (fastcall *write_gdt_entry)(void *dt, int entrynum, + u32 low, u32 high); + void (fastcall *write_idt_entry)(void *dt, int entrynum, + u32 low, u32 high); + void (fastcall *load_esp0)(struct tss_struct *tss, + struct thread_struct *thread); + + void (fastcall *set_iopl_mask)(unsigned mask); + + void (fastcall *io_delay)(void); + void (*const_udelay)(unsigned long loops); + + /* These two are jmp to, not actually called. */ + void (fastcall *irq_enable_sysexit)(void); + void (fastcall *iret)(void); +}; + +extern struct paravirt_ops paravirt_ops; + +#define paravirt_enabled() (paravirt_ops.paravirt_enabled) + +static inline void load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + paravirt_ops.load_esp0(tss, thread); +} + +#define ARCH_SETUP paravirt_ops.arch_setup(); +static inline unsigned long get_wallclock(void) +{ + return paravirt_ops.get_wallclock(); +} + +static inline int set_wallclock(unsigned long nowtime) +{ + return paravirt_ops.set_wallclock(nowtime); +} + +static inline void do_time_init(void) +{ + return paravirt_ops.time_init(); +} + +/* The paravirtualized CPUID instruction. */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + paravirt_ops.cpuid(eax, ebx, ecx, edx); +} + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, reg) var = paravirt_ops.get_debugreg(reg) +#define set_debugreg(val, reg) paravirt_ops.set_debugreg(reg, val) + +#define clts() paravirt_ops.clts() + +#define read_cr0() paravirt_ops.read_cr0() +#define write_cr0(x) paravirt_ops.write_cr0(x) + +#define read_cr2() paravirt_ops.read_cr2() +#define write_cr2(x) paravirt_ops.write_cr2(x) + +#define read_cr3() paravirt_ops.read_cr3() +#define write_cr3(x) paravirt_ops.write_cr3(x) + +#define read_cr4() paravirt_ops.read_cr4() +#define read_cr4_safe(x) paravirt_ops.read_cr4_safe() +#define write_cr4(x) paravirt_ops.write_cr4(x) + +static inline unsigned long __raw_local_save_flags(void) +{ + return paravirt_ops.save_fl(); +} + +static inline void raw_local_irq_restore(unsigned long flags) +{ + return paravirt_ops.restore_fl(flags); +} + +static inline void raw_local_irq_disable(void) +{ + paravirt_ops.irq_disable(); +} + +static inline void raw_local_irq_enable(void) +{ + paravirt_ops.irq_enable(); +} + +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = paravirt_ops.save_fl(); + + paravirt_ops.irq_disable(); + + return flags; +} + +static inline void raw_safe_halt(void) +{ + paravirt_ops.safe_halt(); +} + +static inline void halt(void) +{ + paravirt_ops.safe_halt(); +} +#define wbinvd() paravirt_ops.wbinvd() + +#define get_kernel_rpl() (paravirt_ops.kernel_rpl) + +#define rdmsr(msr,val1,val2) do { \ + int _err; \ + u64 _l = paravirt_ops.read_msr(msr,&_err); \ + val1 = (u32)_l; \ + val2 = _l >> 32; \ +} while(0) + +#define wrmsr(msr,val1,val2) do { \ + u64 _l = ((u64)(val2) << 32) | (val1); \ + paravirt_ops.write_msr((msr), _l); \ +} while(0) + +#define rdmsrl(msr,val) do { \ + int _err; \ + val = paravirt_ops.read_msr((msr),&_err); \ +} while(0) + +#define wrmsrl(msr,val) (paravirt_ops.write_msr((msr),(val))) +#define wrmsr_safe(msr,a,b) ({ \ + u64 _l = ((u64)(b) << 32) | (a); \ + paravirt_ops.write_msr((msr),_l); \ +}) + +/* rdmsr with exception handling */ +#define rdmsr_safe(msr,a,b) ({ \ + int _err; \ + u64 _l = paravirt_ops.read_msr(msr,&_err); \ + (*a) = (u32)_l; \ + (*b) = _l >> 32; \ + _err; }) + +#define rdtsc(low,high) do { \ + u64 _l = paravirt_ops.read_tsc(); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while(0) + +#define rdtscl(low) do { \ + u64 _l = paravirt_ops.read_tsc(); \ + low = (int)_l; \ +} while(0) + +#define rdtscll(val) (val = paravirt_ops.read_tsc()) + +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + +#define rdpmc(counter,low,high) do { \ + u64 _l = paravirt_ops.read_pmc(); \ + low = (u32)_l; \ + high = _l >> 32; \ +} while(0) + +#define load_TR_desc() (paravirt_ops.load_tr_desc()) +#define load_gdt(dtr) (paravirt_ops.load_gdt(dtr)) +#define load_idt(dtr) (paravirt_ops.load_idt(dtr)) +#define set_ldt(addr, entries) (paravirt_ops.set_ldt((addr), (entries))) +#define store_gdt(dtr) (paravirt_ops.store_gdt(dtr)) +#define store_idt(dtr) (paravirt_ops.store_idt(dtr)) +#define store_tr(tr) ((tr) = paravirt_ops.store_tr()) +#define load_TLS(t,cpu) (paravirt_ops.load_tls((t),(cpu))) +#define write_ldt_entry(dt, entry, low, high) \ + (paravirt_ops.write_ldt_entry((dt), (entry), (low), (high))) +#define write_gdt_entry(dt, entry, low, high) \ + (paravirt_ops.write_gdt_entry((dt), (entry), (low), (high))) +#define write_idt_entry(dt, entry, low, high) \ + (paravirt_ops.write_idt_entry((dt), (entry), (low), (high))) +#define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask)) + +/* The paravirtualized I/O functions */ +static inline void slow_down_io(void) { + paravirt_ops.io_delay(); +#ifdef REALLY_SLOW_IO + paravirt_ops.io_delay(); + paravirt_ops.io_delay(); + paravirt_ops.io_delay(); +#endif +} + +#define CLI_STRING "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax" +#define STI_STRING "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax" +#else /* __ASSEMBLY__ */ + +#define INTERRUPT_RETURN jmp *%cs:paravirt_ops+PARAVIRT_iret +#define DISABLE_INTERRUPTS pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax +#define ENABLE_INTERRUPTS pushl %eax; pushl %ecx; pushl %edx; call *%cs:paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax +#define ENABLE_INTERRUPTS_SYSEXIT jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit +#define GET_CR0_INTO_EAX call *paravirt_ops+PARAVIRT_read_cr0 +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ +#endif /* __ASM_PARAVIRT_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 98fa73b7176..6c2c4457be0 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -144,8 +144,8 @@ static inline void detect_ht(struct cpuinfo_x86 *c) {} #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) +static inline fastcall void native_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { /* ecx is often an input as well as an output. */ __asm__("cpuid" @@ -491,6 +491,12 @@ struct thread_struct { .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ } +#ifdef CONFIG_PARAVIRT +#include +#else +#define paravirt_enabled() 0 +#define __cpuid native_cpuid + static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) { tss->esp0 = thread->esp0; @@ -524,10 +530,13 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa : /* no output */ \ :"r" (value)) +#define set_iopl_mask native_set_iopl_mask +#endif /* CONFIG_PARAVIRT */ + /* * Set IOPL bits in EFLAGS from given mask */ -static inline void set_iopl_mask(unsigned mask) +static fastcall inline void native_set_iopl_mask(unsigned mask) { unsigned int reg; __asm__ __volatile__ ("pushfl;" diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index 5bdda79b6b5..3c796af3377 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h @@ -131,5 +131,7 @@ #define SEGMENT_LDT 0x4 #define SEGMENT_GDT 0x0 +#ifndef CONFIG_PARAVIRT #define get_kernel_rpl() 0 #endif +#endif diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h index 2734909eff8..9930c5a355f 100644 --- a/include/asm-i386/setup.h +++ b/include/asm-i386/setup.h @@ -70,6 +70,7 @@ extern unsigned char boot_params[PARAM_SIZE]; struct e820entry; char * __init machine_specific_memory_setup(void); +char *memory_setup(void); int __init copy_e820_map(struct e820entry * biosmap, int nr_map); int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map); diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h index c18b71fae6b..dea60709db2 100644 --- a/include/asm-i386/spinlock.h +++ b/include/asm-i386/spinlock.h @@ -7,8 +7,12 @@ #include #include +#ifdef CONFIG_PARAVIRT +#include +#else #define CLI_STRING "cli" #define STI_STRING "sti" +#endif /* CONFIG_PARAVIRT */ /* * Your basic SMP spinlocks, allowing only a single CPU anywhere diff --git a/include/asm-i386/suspend.h b/include/asm-i386/suspend.h index 08be1e5009d..30361526d56 100644 --- a/include/asm-i386/suspend.h +++ b/include/asm-i386/suspend.h @@ -23,12 +23,8 @@ arch_prepare_suspend(void) struct saved_context { u16 es, fs, gs, ss; unsigned long cr0, cr2, cr3, cr4; - u16 gdt_pad; - u16 gdt_limit; - unsigned long gdt_base; - u16 idt_pad; - u16 idt_limit; - unsigned long idt_base; + struct Xgt_desc_struct gdt; + struct Xgt_desc_struct idt; u16 ldt; u16 tss; unsigned long tr; diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index a6dabbcd6e6..a6d20d9a1a3 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h @@ -88,6 +88,9 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define savesegment(seg, value) \ asm volatile("mov %%" #seg ",%0":"=rm" (value)) +#ifdef CONFIG_PARAVIRT +#include +#else #define read_cr0() ({ \ unsigned int __dummy; \ __asm__ __volatile__( \ @@ -139,17 +142,18 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define write_cr4(x) \ __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) -/* - * Clear and set 'TS' bit respectively - */ +#define wbinvd() \ + __asm__ __volatile__ ("wbinvd": : :"memory") + +/* Clear the 'TS' bit */ #define clts() __asm__ __volatile__ ("clts") +#endif/* CONFIG_PARAVIRT */ + +/* Set the 'TS' bit */ #define stts() write_cr0(8 | read_cr0()) #endif /* __KERNEL__ */ -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory") - static inline unsigned long get_limit(unsigned long segment) { unsigned long __limit; diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h new file mode 100644 index 00000000000..ea8065af825 --- /dev/null +++ b/include/asm-i386/time.h @@ -0,0 +1,41 @@ +#ifndef _ASMi386_TIME_H +#define _ASMi386_TIME_H + +#include +#include "mach_time.h" + +static inline unsigned long native_get_wallclock(void) +{ + unsigned long retval; + + if (efi_enabled) + retval = efi_get_time(); + else + retval = mach_get_cmos_time(); + + return retval; +} + +static inline int native_set_wallclock(unsigned long nowtime) +{ + int retval; + + if (efi_enabled) + retval = efi_set_rtc_mmss(nowtime); + else + retval = mach_set_rtc_mmss(nowtime); + + return retval; +} + +#ifdef CONFIG_PARAVIRT +#include +#else /* !CONFIG_PARAVIRT */ + +#define get_wallclock() native_get_wallclock() +#define set_wallclock(x) native_set_wallclock(x) +#define do_time_init() time_init_hook() + +#endif /* CONFIG_PARAVIRT */ + +#endif -- cgit v1.2.3-70-g09d2 From 139ec7c416248b9ea227d21839235344edfee1e0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] paravirt: Patch inline replacements for paravirt intercepts It turns out that the most called ops, by several orders of magnitude, are the interrupt manipulation ops. These are obvious candidates for patching, so mark them up and create infrastructure for it. The method used is that the ops structure has a patch function, which is called for each place which needs to be patched: this returns a number of instructions (the rest are NOP-padded). Usually we can spare a register (%eax) for the binary patched code to use, but in a couple of critical places in entry.S we can't: we make the clobbers explicit at the call site, and manually clobber the allowed registers in debug mode as an extra check. And: Don't abuse CONFIG_DEBUG_KERNEL, add CONFIG_DEBUG_PARAVIRT. And: AK: Fix warnings in x86-64 alternative.c build And: AK: Fix compilation with defconfig And: ^From: Andrew Morton Some binutlises still like to emit references to __stop_parainstructions and __start_parainstructions. And: AK: Fix warnings about unused variables when PARAVIRT is disabled. Signed-off-by: Rusty Russell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Chris Wright Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/Kconfig.debug | 10 ++ arch/i386/kernel/alternative.c | 63 ++++++++++--- arch/i386/kernel/entry.S | 39 +++++--- arch/i386/kernel/module.c | 11 ++- arch/i386/kernel/paravirt.c | 44 +++++++++ arch/i386/kernel/vmlinux.lds.S | 6 ++ include/asm-i386/alternative.h | 13 ++- include/asm-i386/desc.h | 41 ++++---- include/asm-i386/irqflags.h | 4 +- include/asm-i386/paravirt.h | 189 ++++++++++++++++++++++++++++++------- include/asm-i386/processor.h | 198 +++++++++++++++++++-------------------- include/asm-i386/spinlock.h | 15 ++- include/asm-x86_64/alternative.h | 12 +++ scripts/mod/modpost.c | 2 + 14 files changed, 459 insertions(+), 188 deletions(-) (limited to 'include') diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index b31c0802e1c..f68cc6f215f 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug @@ -85,4 +85,14 @@ config DOUBLEFAULT option saves about 4k and might cause you much additional grey hair. +config DEBUG_PARAVIRT + bool "Enable some paravirtualization debugging" + default y + depends on PARAVIRT && DEBUG_KERNEL + help + Currently deliberately clobbers regs which are allowed to be + clobbered in inlined paravirt hooks, even in native mode. + If turning this off solves a problem, then DISABLE_INTERRUPTS() or + ENABLE_INTERRUPTS() is lying about what registers can be clobbered. + endmenu diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 535f9794fba..9eca21b49f6 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -124,6 +124,20 @@ static unsigned char** find_nop_table(void) #endif /* CONFIG_X86_64 */ +static void nop_out(void *insns, unsigned int len) +{ + unsigned char **noptable = find_nop_table(); + + while (len > 0) { + unsigned int noplen = len; + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + memcpy(insns, noptable[noplen], noplen); + insns += noplen; + len -= noplen; + } +} + extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; extern u8 *__smp_locks[], *__smp_locks_end[]; @@ -138,10 +152,9 @@ extern u8 __smp_alt_begin[], __smp_alt_end[]; void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { - unsigned char **noptable = find_nop_table(); struct alt_instr *a; u8 *instr; - int diff, i, k; + int diff; DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { @@ -159,13 +172,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) #endif memcpy(instr, a->replacement, a->replacementlen); diff = a->instrlen - a->replacementlen; - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); - } + nop_out(instr + a->replacementlen, diff); } } @@ -209,7 +216,6 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { - unsigned char **noptable = find_nop_table(); u8 **ptr; for (ptr = start; ptr < end; ptr++) { @@ -217,7 +223,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end continue; if (*ptr > text_end) continue; - **ptr = noptable[1][0]; + nop_out(*ptr, 1); }; } @@ -343,6 +349,40 @@ void alternatives_smp_switch(int smp) #endif +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +{ + struct paravirt_patch *p; + + for (p = start; p < end; p++) { + unsigned int used; + + used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, + p->len); +#ifdef CONFIG_DEBUG_PARAVIRT + { + int i; + /* Deliberately clobber regs using "not %reg" to find bugs. */ + for (i = 0; i < 3; i++) { + if (p->len - used >= 2 && (p->clobbers & (1 << i))) { + memcpy(p->instr + used, "\xf7\xd0", 2); + p->instr[used+1] |= i; + used += 2; + } + } + } +#endif + /* Pad the rest with nops */ + nop_out(p->instr + used, p->len - used); + } + + /* Sync to be conservative, in case we patched following instructions */ + sync_core(); +} +extern struct paravirt_patch __start_parainstructions[], + __stop_parainstructions[]; +#endif /* CONFIG_PARAVIRT */ + void __init alternative_instructions(void) { unsigned long flags; @@ -390,5 +430,6 @@ void __init alternative_instructions(void) alternatives_smp_switch(0); } #endif + apply_paravirt(__start_parainstructions, __stop_parainstructions); local_irq_restore(flags); } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index d274612e05c..de34b7fed3c 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -53,6 +53,19 @@ #include #include "irq_vectors.h" +/* + * We use macros for low-level operations which need to be overridden + * for paravirtualization. The following will never clobber any registers: + * INTERRUPT_RETURN (aka. "iret") + * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") + * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). + * + * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must + * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). + * Allowing a register to be clobbered can shrink the paravirt replacement + * enough to patch inline, increasing performance. + */ + #define nr_syscalls ((syscall_table_size)/4) CF_MASK = 0x00000001 @@ -63,9 +76,9 @@ NT_MASK = 0x00004000 VM_MASK = 0x00020000 #ifdef CONFIG_PREEMPT -#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF +#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else -#define preempt_stop +#define preempt_stop(clobbers) #define resume_kernel restore_nocheck #endif @@ -226,7 +239,7 @@ ENTRY(ret_from_fork) ALIGN RING0_PTREGS_FRAME ret_from_exception: - preempt_stop + preempt_stop(CLBR_ANY) ret_from_intr: GET_THREAD_INFO(%ebp) check_userspace: @@ -237,7 +250,7 @@ check_userspace: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) - DISABLE_INTERRUPTS # make sure we don't miss an interrupt + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -248,7 +261,7 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - DISABLE_INTERRUPTS + DISABLE_INTERRUPTS(CLBR_ANY) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: @@ -277,7 +290,7 @@ sysenter_past_esp: * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ - ENABLE_INTERRUPTS + ENABLE_INTERRUPTS(CLBR_NONE) pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ss, 0*/ @@ -322,7 +335,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) - DISABLE_INTERRUPTS + DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx @@ -364,7 +377,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) # store the return value syscall_exit: - DISABLE_INTERRUPTS # make sure we don't miss an interrupt + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -393,7 +406,7 @@ restore_nocheck_notrace: .section .fixup,"ax" iret_exc: TRACE_IRQS_ON - ENABLE_INTERRUPTS + ENABLE_INTERRUPTS(CLBR_NONE) pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -436,7 +449,7 @@ ldt_ss: CFI_ADJUST_CFA_OFFSET 4 pushl %eax CFI_ADJUST_CFA_OFFSET 4 - DISABLE_INTERRUPTS + DISABLE_INTERRUPTS(CLBR_EAX) TRACE_IRQS_OFF lss (%esp), %esp CFI_ADJUST_CFA_OFFSET -8 @@ -451,7 +464,7 @@ work_pending: jz work_notifysig work_resched: call schedule - DISABLE_INTERRUPTS # make sure we don't miss an interrupt + DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret TRACE_IRQS_OFF @@ -509,7 +522,7 @@ syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending TRACE_IRQS_ON - ENABLE_INTERRUPTS # could let do_syscall_trace() call + ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call # schedule() instead movl %esp, %eax movl $1, %edx @@ -693,7 +706,7 @@ ENTRY(device_not_available) GET_CR0_INTO_EAX testl $0x4, %eax # EM (math emulation bit) jne device_not_available_emulate - preempt_stop + preempt_stop(CLBR_ANY) call math_state_restore jmp ret_from_exception device_not_available_emulate: diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c index 470cf97e7cd..d7d9c8b23f7 100644 --- a/arch/i386/kernel/module.c +++ b/arch/i386/kernel/module.c @@ -108,7 +108,8 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, + *para = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -118,6 +119,8 @@ int module_finalize(const Elf_Ehdr *hdr, alt = s; if (!strcmp(".smp_locks", secstrings + s->sh_name)) locks= s; + if (!strcmp(".parainstructions", secstrings + s->sh_name)) + para = s; } if (alt) { @@ -132,6 +135,12 @@ int module_finalize(const Elf_Ehdr *hdr, lseg, lseg + locks->sh_size, tseg, tseg + text->sh_size); } + + if (para) { + void *pseg = (void *)para->sh_addr; + apply_paravirt(pseg, pseg + para->sh_size); + } + return 0; } diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 478192cd4b9..d4646042644 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -45,6 +45,49 @@ char *memory_setup(void) return paravirt_ops.memory_setup(); } +/* Simple instruction patching code. */ +#define DEF_NATIVE(name, code) \ + extern const char start_##name[], end_##name[]; \ + asm("start_" #name ": " code "; end_" #name ":") +DEF_NATIVE(cli, "cli"); +DEF_NATIVE(sti, "sti"); +DEF_NATIVE(popf, "push %eax; popf"); +DEF_NATIVE(pushf, "pushf; pop %eax"); +DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli"); +DEF_NATIVE(iret, "iret"); +DEF_NATIVE(sti_sysexit, "sti; sysexit"); + +static const struct native_insns +{ + const char *start, *end; +} native_insns[] = { + [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli }, + [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti }, + [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf }, + [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf }, + [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli }, + [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret }, + [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit }, +}; + +static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) +{ + unsigned int insn_len; + + /* Don't touch it if we don't have a replacement */ + if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start) + return len; + + insn_len = native_insns[type].end - native_insns[type].start; + + /* Similarly if we can't fit replacement. */ + if (len < insn_len) + return len; + + memcpy(insns, native_insns[type].start, insn_len); + return insn_len; +} + static fastcall unsigned long native_get_debugreg(int regno) { unsigned long val = 0; /* Damn you, gcc! */ @@ -349,6 +392,7 @@ struct paravirt_ops paravirt_ops = { .paravirt_enabled = 0, .kernel_rpl = 0, + .patch = native_patch, .banner = default_banner, .arch_setup = native_nop, .memory_setup = machine_specific_memory_setup, diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 6860f20aa57..5c69cf0e594 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -165,6 +165,12 @@ SECTIONS .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { *(.altinstr_replacement) } + . = ALIGN(4); + __start_parainstructions = .; + .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { + *(.parainstructions) + } + __stop_parainstructions = .; /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } diff --git a/include/asm-i386/alternative.h b/include/asm-i386/alternative.h index b01a7ec409c..b8fa9557c53 100644 --- a/include/asm-i386/alternative.h +++ b/include/asm-i386/alternative.h @@ -4,7 +4,7 @@ #ifdef __KERNEL__ #include - +#include #include struct alt_instr { @@ -118,4 +118,15 @@ static inline void alternatives_smp_switch(int smp) {} #define LOCK_PREFIX "" #endif +struct paravirt_patch; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); +#else +static inline void +apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +{} +#define __start_parainstructions NULL +#define __stop_parainstructions NULL +#endif + #endif /* _I386_ALTERNATIVE_H */ diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index f19820f0834..f398cc45644 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -81,31 +81,15 @@ static inline void load_TLS(struct thread_struct *t, unsigned int cpu) #undef C } -static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) -{ - __u32 *lp = (__u32 *)((char *)dt + entry*8); - *lp = entry_a; - *(lp+1) = entry_b; -} - #define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) #define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) #define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) -static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) -{ - __u32 a, b; - pack_gate(&a, &b, (unsigned long)addr, seg, type, 0); - write_idt_entry(idt_table, gate, a, b); -} - -static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr) +static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) { - __u32 a, b; - pack_descriptor(&a, &b, (unsigned long)addr, - offsetof(struct tss_struct, __cacheline_filler) - 1, - DESCTYPE_TSS, 0); - write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); + __u32 *lp = (__u32 *)((char *)dt + entry*8); + *lp = entry_a; + *(lp+1) = entry_b; } #define set_ldt native_set_ldt @@ -128,6 +112,23 @@ static inline fastcall void native_set_ldt(const void *addr, } } +static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) +{ + __u32 a, b; + pack_gate(&a, &b, (unsigned long)addr, seg, type, 0); + write_idt_entry(idt_table, gate, a, b); +} + +static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr) +{ + __u32 a, b; + pack_descriptor(&a, &b, (unsigned long)addr, + offsetof(struct tss_struct, __cacheline_filler) - 1, + DESCTYPE_TSS, 0); + write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); +} + + #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) #define LDT_entry_a(info) \ diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h index 9ce01f3fb7b..17b18cf4fe9 100644 --- a/include/asm-i386/irqflags.h +++ b/include/asm-i386/irqflags.h @@ -79,8 +79,8 @@ static inline unsigned long __raw_local_irq_save(void) } #else -#define DISABLE_INTERRUPTS cli -#define ENABLE_INTERRUPTS sti +#define DISABLE_INTERRUPTS(clobbers) cli +#define ENABLE_INTERRUPTS(clobbers) sti #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit #define INTERRUPT_RETURN iret #define GET_CR0_INTO_EAX movl %cr0, %eax diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index a7551a44686..081194751ad 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -3,8 +3,26 @@ /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ #include +#include #ifdef CONFIG_PARAVIRT +/* These are the most performance critical ops, so we want to be able to patch + * callers */ +#define PARAVIRT_IRQ_DISABLE 0 +#define PARAVIRT_IRQ_ENABLE 1 +#define PARAVIRT_RESTORE_FLAGS 2 +#define PARAVIRT_SAVE_FLAGS 3 +#define PARAVIRT_SAVE_FLAGS_IRQ_DISABLE 4 +#define PARAVIRT_INTERRUPT_RETURN 5 +#define PARAVIRT_STI_SYSEXIT 6 + +/* Bitmask of what can be clobbered: usually at least eax. */ +#define CLBR_NONE 0x0 +#define CLBR_EAX 0x1 +#define CLBR_ECX 0x2 +#define CLBR_EDX 0x4 +#define CLBR_ANY 0x7 + #ifndef __ASSEMBLY__ struct thread_struct; struct Xgt_desc_struct; @@ -15,6 +33,15 @@ struct paravirt_ops int paravirt_enabled; const char *name; + /* + * Patch may replace one of the defined code sequences with arbitrary + * code, subject to the same register constraints. This generally + * means the code is not free to clobber any registers other than EAX. + * The patch function should return the number of bytes of code + * generated, as we nop pad the rest in generic code. + */ + unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); + void (*arch_setup)(void); char *(*memory_setup)(void); void (*init_IRQ)(void); @@ -147,35 +174,6 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, #define read_cr4_safe(x) paravirt_ops.read_cr4_safe() #define write_cr4(x) paravirt_ops.write_cr4(x) -static inline unsigned long __raw_local_save_flags(void) -{ - return paravirt_ops.save_fl(); -} - -static inline void raw_local_irq_restore(unsigned long flags) -{ - return paravirt_ops.restore_fl(flags); -} - -static inline void raw_local_irq_disable(void) -{ - paravirt_ops.irq_disable(); -} - -static inline void raw_local_irq_enable(void) -{ - paravirt_ops.irq_enable(); -} - -static inline unsigned long __raw_local_irq_save(void) -{ - unsigned long flags = paravirt_ops.save_fl(); - - paravirt_ops.irq_disable(); - - return flags; -} - static inline void raw_safe_halt(void) { paravirt_ops.safe_halt(); @@ -267,15 +265,134 @@ static inline void slow_down_io(void) { #endif } -#define CLI_STRING "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax" -#define STI_STRING "pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax" +/* These all sit in the .parainstructions section to tell us what to patch. */ +struct paravirt_patch { + u8 *instr; /* original instructions */ + u8 instrtype; /* type of this instruction */ + u8 len; /* length of original instruction */ + u16 clobbers; /* what registers you may clobber */ +}; + +#define paravirt_alt(insn_string, typenum, clobber) \ + "771:\n\t" insn_string "\n" "772:\n" \ + ".pushsection .parainstructions,\"a\"\n" \ + " .long 771b\n" \ + " .byte " __stringify(typenum) "\n" \ + " .byte 772b-771b\n" \ + " .short " __stringify(clobber) "\n" \ + ".popsection" + +static inline unsigned long __raw_local_save_flags(void) +{ + unsigned long f; + + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%1;" + "popl %%edx; popl %%ecx", + PARAVIRT_SAVE_FLAGS, CLBR_NONE) + : "=a"(f): "m"(paravirt_ops.save_fl) + : "memory", "cc"); + return f; +} + +static inline void raw_local_irq_restore(unsigned long f) +{ + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%1;" + "popl %%edx; popl %%ecx", + PARAVIRT_RESTORE_FLAGS, CLBR_EAX) + : "=a"(f) : "m" (paravirt_ops.restore_fl), "0"(f) + : "memory", "cc"); +} + +static inline void raw_local_irq_disable(void) +{ + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%0;" + "popl %%edx; popl %%ecx", + PARAVIRT_IRQ_DISABLE, CLBR_EAX) + : : "m" (paravirt_ops.irq_disable) + : "memory", "eax", "cc"); +} + +static inline void raw_local_irq_enable(void) +{ + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%0;" + "popl %%edx; popl %%ecx", + PARAVIRT_IRQ_ENABLE, CLBR_EAX) + : : "m" (paravirt_ops.irq_enable) + : "memory", "eax", "cc"); +} + +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long f; + + __asm__ __volatile__(paravirt_alt( "pushl %%ecx; pushl %%edx;" + "call *%1; pushl %%eax;" + "call *%2; popl %%eax;" + "popl %%edx; popl %%ecx", + PARAVIRT_SAVE_FLAGS_IRQ_DISABLE, + CLBR_NONE) + : "=a"(f) + : "m" (paravirt_ops.save_fl), + "m" (paravirt_ops.irq_disable) + : "memory", "cc"); + return f; +} + +#define CLI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[irq_disable];" \ + "popl %%edx; popl %%ecx", \ + PARAVIRT_IRQ_DISABLE, CLBR_EAX) + +#define STI_STRING paravirt_alt("pushl %%ecx; pushl %%edx;" \ + "call *paravirt_ops+%c[irq_enable];" \ + "popl %%edx; popl %%ecx", \ + PARAVIRT_IRQ_ENABLE, CLBR_EAX) +#define CLI_STI_CLOBBERS , "%eax" +#define CLI_STI_INPUT_ARGS \ + , \ + [irq_disable] "i" (offsetof(struct paravirt_ops, irq_disable)), \ + [irq_enable] "i" (offsetof(struct paravirt_ops, irq_enable)) + #else /* __ASSEMBLY__ */ -#define INTERRUPT_RETURN jmp *%cs:paravirt_ops+PARAVIRT_iret -#define DISABLE_INTERRUPTS pushl %eax; pushl %ecx; pushl %edx; call *paravirt_ops+PARAVIRT_irq_disable; popl %edx; popl %ecx; popl %eax -#define ENABLE_INTERRUPTS pushl %eax; pushl %ecx; pushl %edx; call *%cs:paravirt_ops+PARAVIRT_irq_enable; popl %edx; popl %ecx; popl %eax -#define ENABLE_INTERRUPTS_SYSEXIT jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit -#define GET_CR0_INTO_EAX call *paravirt_ops+PARAVIRT_read_cr0 +#define PARA_PATCH(ptype, clobbers, ops) \ +771:; \ + ops; \ +772:; \ + .pushsection .parainstructions,"a"; \ + .long 771b; \ + .byte ptype; \ + .byte 772b-771b; \ + .short clobbers; \ + .popsection + +#define INTERRUPT_RETURN \ + PARA_PATCH(PARAVIRT_INTERRUPT_RETURN, CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_iret) + +#define DISABLE_INTERRUPTS(clobbers) \ + PARA_PATCH(PARAVIRT_IRQ_DISABLE, clobbers, \ + pushl %ecx; pushl %edx; \ + call *paravirt_ops+PARAVIRT_irq_disable; \ + popl %edx; popl %ecx) \ + +#define ENABLE_INTERRUPTS(clobbers) \ + PARA_PATCH(PARAVIRT_IRQ_ENABLE, clobbers, \ + pushl %ecx; pushl %edx; \ + call *%cs:paravirt_ops+PARAVIRT_irq_enable; \ + popl %edx; popl %ecx) + +#define ENABLE_INTERRUPTS_SYSEXIT \ + PARA_PATCH(PARAVIRT_STI_SYSEXIT, CLBR_ANY, \ + jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit) + +#define GET_CR0_INTO_EAX \ + call *paravirt_ops+PARAVIRT_read_cr0 + #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT */ #endif /* __ASM_PARAVIRT_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 6c2c4457be0..5f0418d0078 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -156,59 +156,6 @@ static inline fastcall void native_cpuid(unsigned int *eax, unsigned int *ebx, : "0" (*eax), "2" (*ecx)); } -/* - * Generic CPUID function - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx - * resulting in stale register contents being returned. - */ -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = 0; - __cpuid(eax, ebx, ecx, edx); -} - -/* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, - int *edx) -{ - *eax = op; - *ecx = count; - __cpuid(eax, ebx, ecx, edx); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return edx; -} - #define load_cr3(pgdir) write_cr3(__pa(pgdir)) /* @@ -491,22 +438,6 @@ struct thread_struct { .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ } -#ifdef CONFIG_PARAVIRT -#include -#else -#define paravirt_enabled() 0 -#define __cpuid native_cpuid - -static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) -{ - tss->esp0 = thread->esp0; - /* This can only happen when SEP is enabled, no need to test "SEP"arately */ - if (unlikely(tss->ss1 != thread->sysenter_cs)) { - tss->ss1 = thread->sysenter_cs; - wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); - } -} - #define start_thread(regs, new_eip, new_esp) do { \ __asm__("movl %0,%%fs": :"r" (0)); \ regs->xgs = 0; \ @@ -519,36 +450,6 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa regs->esp = new_esp; \ } while (0) -/* - * These special macros can be used to get or set a debugging register - */ -#define get_debugreg(var, register) \ - __asm__("movl %%db" #register ", %0" \ - :"=r" (var)) -#define set_debugreg(value, register) \ - __asm__("movl %0,%%db" #register \ - : /* no output */ \ - :"r" (value)) - -#define set_iopl_mask native_set_iopl_mask -#endif /* CONFIG_PARAVIRT */ - -/* - * Set IOPL bits in EFLAGS from given mask - */ -static fastcall inline void native_set_iopl_mask(unsigned mask) -{ - unsigned int reg; - __asm__ __volatile__ ("pushfl;" - "popl %0;" - "andl %1, %0;" - "orl %2, %0;" - "pushl %0;" - "popfl" - : "=&r" (reg) - : "i" (~X86_EFLAGS_IOPL), "r" (mask)); -} - /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; @@ -640,6 +541,105 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() +#ifdef CONFIG_PARAVIRT +#include +#else +#define paravirt_enabled() 0 +#define __cpuid native_cpuid + +static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) +{ + tss->esp0 = thread->esp0; + /* This can only happen when SEP is enabled, no need to test "SEP"arately */ + if (unlikely(tss->ss1 != thread->sysenter_cs)) { + tss->ss1 = thread->sysenter_cs; + wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); + } +} + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, register) \ + __asm__("movl %%db" #register ", %0" \ + :"=r" (var)) +#define set_debugreg(value, register) \ + __asm__("movl %0,%%db" #register \ + : /* no output */ \ + :"r" (value)) + +#define set_iopl_mask native_set_iopl_mask +#endif /* CONFIG_PARAVIRT */ + +/* + * Set IOPL bits in EFLAGS from given mask + */ +static fastcall inline void native_set_iopl_mask(unsigned mask) +{ + unsigned int reg; + __asm__ __volatile__ ("pushfl;" + "popl %0;" + "andl %1, %0;" + "orl %2, %0;" + "pushl %0;" + "popfl" + : "=&r" (reg) + : "i" (~X86_EFLAGS_IOPL), "r" (mask)); +} + +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ecx */ +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, + int *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return eax; +} +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return ebx; +} +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return ecx; +} +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return edx; +} + /* generic versions from gas */ #define GENERIC_NOP1 ".byte 0x90\n" #define GENERIC_NOP2 ".byte 0x89,0xf6\n" diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h index dea60709db2..d3bcebed60c 100644 --- a/include/asm-i386/spinlock.h +++ b/include/asm-i386/spinlock.h @@ -12,6 +12,8 @@ #else #define CLI_STRING "cli" #define STI_STRING "sti" +#define CLI_STI_CLOBBERS +#define CLI_STI_INPUT_ARGS #endif /* CONFIG_PARAVIRT */ /* @@ -57,25 +59,28 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla { asm volatile( "\n1:\t" - LOCK_PREFIX " ; decb %0\n\t" + LOCK_PREFIX " ; decb %[slock]\n\t" "jns 5f\n" "2:\t" - "testl $0x200, %1\n\t" + "testl $0x200, %[flags]\n\t" "jz 4f\n\t" STI_STRING "\n" "3:\t" "rep;nop\n\t" - "cmpb $0, %0\n\t" + "cmpb $0, %[slock]\n\t" "jle 3b\n\t" CLI_STRING "\n\t" "jmp 1b\n" "4:\t" "rep;nop\n\t" - "cmpb $0, %0\n\t" + "cmpb $0, %[slock]\n\t" "jg 1b\n\t" "jmp 4b\n" "5:\n\t" - : "+m" (lock->slock) : "r" (flags) : "memory"); + : [slock] "+m" (lock->slock) + : [flags] "r" (flags) + CLI_STI_INPUT_ARGS + : "memory" CLI_STI_CLOBBERS); } #endif diff --git a/include/asm-x86_64/alternative.h b/include/asm-x86_64/alternative.h index a584826cc57..a6657b4f3e0 100644 --- a/include/asm-x86_64/alternative.h +++ b/include/asm-x86_64/alternative.h @@ -4,6 +4,7 @@ #ifdef __KERNEL__ #include +#include #include struct alt_instr { @@ -133,4 +134,15 @@ static inline void alternatives_smp_switch(int smp) {} #define LOCK_PREFIX "" #endif +struct paravirt_patch; +#ifdef CONFIG_PARAVIRT +void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); +#else +static inline void +apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +{} +#define __start_parainstructions NULL +#define __stop_parainstructions NULL +#endif + #endif /* _X86_64_ALTERNATIVE_H */ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 2e114162314..ac0a5822299 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -911,6 +911,7 @@ static int init_section_ref_ok(const char *name) ".toc1", /* used by ppc64 */ ".stab", ".rodata", + ".parainstructions", ".text.lock", "__bug_table", /* used by powerpc for BUG() */ ".pci_fixup_header", @@ -931,6 +932,7 @@ static int init_section_ref_ok(const char *name) ".altinstructions", ".eh_frame", ".debug", + ".parainstructions", NULL }; /* part of section name */ -- cgit v1.2.3-70-g09d2 From d7cd56111f30259e1b532a12e06f59f8e0a20355 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] i386: cpu_detect extraction Both lhype and Xen want to call the core of the x86 cpu detect code before calling start_kernel. (extracted from larger patch) AK: folded in start_kernel header patch Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Rusty Russell Signed-off-by: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Andi Kleen Signed-off-by: Andrew Morton --- arch/i386/kernel/cpu/common.c | 37 +++++++++++++++++++++---------------- include/asm-i386/processor.h | 3 +++ include/linux/start_kernel.h | 12 ++++++++++++ init/main.c | 1 + 4 files changed, 37 insertions(+), 16 deletions(-) create mode 100644 include/linux/start_kernel.h (limited to 'include') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index cda41aef79a..68bcb687019 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -236,29 +236,14 @@ static int __cpuinit have_cpuid_p(void) return flag_is_changeable_p(X86_EFLAGS_ID); } -/* Do minimum CPU detection early. - Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. - - WARNING: this function is only called on the BP. Don't add code here - that is supposed to run on all CPUs. */ -static void __init early_cpu_detect(void) +void __init cpu_detect(struct cpuinfo_x86 *c) { - struct cpuinfo_x86 *c = &boot_cpu_data; - - c->x86_cache_alignment = 32; - - if (!have_cpuid_p()) - return; - /* Get vendor name */ cpuid(0x00000000, &c->cpuid_level, (int *)&c->x86_vendor_id[0], (int *)&c->x86_vendor_id[8], (int *)&c->x86_vendor_id[4]); - get_cpu_vendor(c, 1); - c->x86 = 4; if (c->cpuid_level >= 0x00000001) { u32 junk, tfms, cap0, misc; @@ -275,6 +260,26 @@ static void __init early_cpu_detect(void) } } +/* Do minimum CPU detection early. + Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ +static void __init early_cpu_detect(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + c->x86_cache_alignment = 32; + + if (!have_cpuid_p()) + return; + + cpu_detect(c); + + get_cpu_vendor(c, 1); +} + static void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 5f0418d0078..a52d6544042 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -20,6 +20,7 @@ #include #include #include +#include /* flag for disabling the tsc */ extern int tsc_disable; @@ -112,6 +113,8 @@ extern struct cpuinfo_x86 cpu_data[]; extern int cpu_llc_id[NR_CPUS]; extern char ignore_fpu_irq; +void __init cpu_detect(struct cpuinfo_x86 *c); + extern void identify_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h new file mode 100644 index 00000000000..d3e5f275654 --- /dev/null +++ b/include/linux/start_kernel.h @@ -0,0 +1,12 @@ +#ifndef _LINUX_START_KERNEL_H +#define _LINUX_START_KERNEL_H + +#include +#include + +/* Define the prototype for start_kernel here, rather than cluttering + up something else. */ + +extern asmlinkage void __init start_kernel(void); + +#endif /* _LINUX_START_KERNEL_H */ diff --git a/init/main.c b/init/main.c index 36f608a7cfb..985c9ed8605 100644 --- a/init/main.c +++ b/init/main.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From c9ccf30d77f04064fe5436027ab9d2230c7cdd94 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] paravirt: Add startup infrastructure for paravirtualization 1) Each hypervisor writes a probe function to detect whether we are running under that hypervisor. paravirt_probe() registers this function. 2) If vmlinux is booted with ring != 0, we call all the probe functions (with registers except %esp intact) in link order: the winner will not return. Signed-off-by: Rusty Russell Signed-off-by: Chris Wright Signed-off-by: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Zachary Amsden Signed-off-by: Andrew Morton --- arch/i386/kernel/Makefile | 2 ++ arch/i386/kernel/head.S | 33 +++++++++++++++++++++++++++++++++ arch/i386/kernel/paravirt.c | 4 ++++ arch/i386/kernel/vmlinux.lds.S | 6 ++++++ include/asm-i386/paravirt.h | 5 +++++ 5 files changed, 50 insertions(+) (limited to 'include') diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 40661213604..1e8988e558c 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -39,6 +39,8 @@ obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o + +# Make sure this is linked after any other paravirt_ops structs: see head.S obj-$(CONFIG_PARAVIRT) += paravirt.o EXTRA_AFLAGS := -traditional diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 5b14e95ac8b..edef5084ce1 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -55,6 +55,12 @@ */ ENTRY(startup_32) +#ifdef CONFIG_PARAVIRT + movl %cs, %eax + testl $0x3, %eax + jnz startup_paravirt +#endif + /* * Set segments to known values. */ @@ -486,6 +492,33 @@ ignore_int: #endif iret +#ifdef CONFIG_PARAVIRT +startup_paravirt: + cld + movl $(init_thread_union+THREAD_SIZE),%esp + + /* We take pains to preserve all the regs. */ + pushl %edx + pushl %ecx + pushl %eax + + /* paravirt.o is last in link, and that probe fn never returns */ + pushl $__start_paravirtprobe +1: + movl 0(%esp), %eax + pushl (%eax) + movl 8(%esp), %eax + call *(%esp) + popl %eax + + movl 4(%esp), %eax + movl 8(%esp), %ecx + movl 12(%esp), %edx + + addl $4, (%esp) + jmp 1b +#endif + /* * Real beginning of normal "text" segment */ diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index d4646042644..5a9bd3250a1 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -387,6 +388,9 @@ static int __init print_banner(void) } core_initcall(print_banner); +/* We simply declare start_kernel to be the paravirt probe of last resort. */ +paravirt_probe(start_kernel); + struct paravirt_ops paravirt_ops = { .name = "bare hardware", .paravirt_enabled = 0, diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 5c69cf0e594..877dc5cfe3a 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -65,6 +65,12 @@ SECTIONS CONSTRUCTORS } :data + __start_paravirtprobe = .; + .paravirtprobe : AT(ADDR(.paravirtprobe) - LOAD_OFFSET) { + *(.paravirtprobe) + } + __stop_paravirtprobe = .; + . = ALIGN(4096); .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { __nosave_begin = .; diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 081194751ad..dd707d8c827 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -120,6 +120,11 @@ struct paravirt_ops void (fastcall *iret)(void); }; +/* Mark a paravirt probe function. */ +#define paravirt_probe(fn) \ + static asmlinkage void (*__paravirtprobe_##fn)(void) __attribute_used__ \ + __attribute__((__section__(".paravirtprobe"))) = fn + extern struct paravirt_ops paravirt_ops; #define paravirt_enabled() (paravirt_ops.paravirt_enabled) -- cgit v1.2.3-70-g09d2 From 4f205fd45a5c192907188d6f8f6d7e66db859248 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] paravirt: Allow selected bug checks to be Allow selected bug checks to be skipped by paravirt kernels. The two most important are the F00F workaround (which is either done by the hypervisor, or not required), and the 'hlt' instruction check, which can break under some hypervisors. Signed-off-by: Zachary Amsden Signed-off-by: Chris Wright Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton --- arch/i386/kernel/cpu/intel.c | 2 +- include/asm-i386/bugs.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 798c2f617e8..3ae795e9056 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -107,7 +107,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) * Note that the workaround only should be initialized once... */ c->f00f_bug = 0; - if ( c->x86 == 5 ) { + if (!paravirt_enabled() && c->x86 == 5) { static int f00f_workaround_enabled = 0; c->f00f_bug = 1; diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index 592ffeeda45..38f1aebbbdb 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -21,6 +21,7 @@ #include #include #include +#include static int __init no_halt(char *s) { @@ -91,6 +92,9 @@ static void __init check_fpu(void) static void __init check_hlt(void) { + if (paravirt_enabled()) + return; + printk(KERN_INFO "Checking 'hlt' instruction... "); if (!boot_cpu_data.hlt_works_ok) { printk("disabled\n"); -- cgit v1.2.3-70-g09d2 From 13623d79309dd82e1964458fa017979d16f33fa8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] paravirt: Add APIC accessors to paravirt-ops. Add APIC accessors to paravirt-ops. Unfortunately, we need two write functions, as some older broken hardware requires workarounds for Pentium APIC errata - this is the purpose of apic_write_atomic. AK: replaced __inline with inline Signed-off-by: Zachary Amsden Signed-off-by: Chris Wright Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton --- arch/i386/kernel/paravirt.c | 8 ++++++++ include/asm-i386/apic.h | 15 ++++++++++++--- include/asm-i386/paravirt.h | 27 +++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 5a9bd3250a1..fe82eb3adf4 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include /* nop stub */ static void native_nop(void) @@ -446,6 +448,12 @@ struct paravirt_ops paravirt_ops = { .io_delay = native_io_delay, .const_udelay = __const_udelay, +#ifdef CONFIG_X86_LOCAL_APIC + .apic_write = native_apic_write, + .apic_write_atomic = native_apic_write_atomic, + .apic_read = native_apic_read, +#endif + .irq_enable_sysexit = native_irq_enable_sysexit, .iret = native_iret, }; diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index b9529578fc3..41a44319905 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -37,18 +37,27 @@ extern void generic_apic_probe(void); /* * Basic functions accessing APICs. */ +#ifdef CONFIG_PARAVIRT +#include +#else +#define apic_write native_apic_write +#define apic_write_atomic native_apic_write_atomic +#define apic_read native_apic_read +#endif -static __inline void apic_write(unsigned long reg, unsigned long v) +static __inline fastcall void native_apic_write(unsigned long reg, + unsigned long v) { *((volatile unsigned long *)(APIC_BASE+reg)) = v; } -static __inline void apic_write_atomic(unsigned long reg, unsigned long v) +static __inline fastcall void native_apic_write_atomic(unsigned long reg, + unsigned long v) { xchg((volatile unsigned long *)(APIC_BASE+reg), v); } -static __inline unsigned long apic_read(unsigned long reg) +static __inline fastcall unsigned long native_apic_read(unsigned long reg) { return *((volatile unsigned long *)(APIC_BASE+reg)); } diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index dd707d8c827..e2c803fadb1 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -115,6 +115,12 @@ struct paravirt_ops void (fastcall *io_delay)(void); void (*const_udelay)(unsigned long loops); +#ifdef CONFIG_X86_LOCAL_APIC + void (fastcall *apic_write)(unsigned long reg, unsigned long v); + void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v); + unsigned long (fastcall *apic_read)(unsigned long reg); +#endif + /* These two are jmp to, not actually called. */ void (fastcall *irq_enable_sysexit)(void); void (fastcall *iret)(void); @@ -270,6 +276,27 @@ static inline void slow_down_io(void) { #endif } +#ifdef CONFIG_X86_LOCAL_APIC +/* + * Basic functions accessing APICs. + */ +static inline void apic_write(unsigned long reg, unsigned long v) +{ + paravirt_ops.apic_write(reg,v); +} + +static inline void apic_write_atomic(unsigned long reg, unsigned long v) +{ + paravirt_ops.apic_write_atomic(reg,v); +} + +static inline unsigned long apic_read(unsigned long reg) +{ + return paravirt_ops.apic_read(reg); +} +#endif + + /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch { u8 *instr; /* original instructions */ -- cgit v1.2.3-70-g09d2 From da181a8b3916aa7f2e3c5775d2bd2fe3454cf82d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] paravirt: Add MMU virtualization to paravirt_ops Add the three bare TLB accessor functions to paravirt-ops. Most amusingly, flush_tlb is redefined on SMP, so I can't call the paravirt op flush_tlb. Instead, I chose to indicate the actual flush type, kernel (global) vs. user (non-global). Global in this sense means using the global bit in the page table entry, which makes TLB entries persistent across CR3 reloads, not global as in the SMP sense of invoking remote shootdowns, so the term is confusingly overloaded. AK: folded in fix from Zach for PAE compilation Signed-off-by: Zachary Amsden Signed-off-by: Chris Wright Signed-off-by: Andi Kleen Cc: Rusty Russell Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton --- arch/i386/kernel/paravirt.c | 109 ++++++++++++++++++++++++++++++++++++++ arch/i386/mm/boot_ioremap.c | 1 + include/asm-i386/paravirt.h | 75 ++++++++++++++++++++++++++ include/asm-i386/pgtable-2level.h | 5 +- include/asm-i386/pgtable-3level.h | 40 +++++++------- include/asm-i386/pgtable.h | 4 +- include/asm-i386/tlbflush.h | 18 +++++-- 7 files changed, 226 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index fe82eb3adf4..3dceab5828f 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -31,6 +31,7 @@ #include #include #include +#include /* nop stub */ static void native_nop(void) @@ -379,6 +380,97 @@ static fastcall void native_io_delay(void) asm volatile("outb %al,$0x80"); } +static fastcall void native_flush_tlb(void) +{ + __native_flush_tlb(); +} + +/* + * Global pages have to be flushed a bit differently. Not a real + * performance problem because this does not happen often. + */ +static fastcall void native_flush_tlb_global(void) +{ + __native_flush_tlb_global(); +} + +static fastcall void native_flush_tlb_single(u32 addr) +{ + __native_flush_tlb_single(addr); +} + +#ifndef CONFIG_X86_PAE +static fastcall void native_set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + *pmdp = pmdval; +} + +#else /* CONFIG_X86_PAE */ + +static fastcall void native_set_pte(pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) +{ + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = pte.pte_high; + smp_wmb(); + ptep->pte_low = pte.pte_low; +} + +static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval) +{ + set_64bit((unsigned long long *)ptep,pte_val(pteval)); +} + +static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); +} + +static fastcall void native_set_pud(pud_t *pudp, pud_t pudval) +{ + *pudp = pudval; +} + +static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + ptep->pte_low = 0; + smp_wmb(); + ptep->pte_high = 0; +} + +static fastcall void native_pmd_clear(pmd_t *pmd) +{ + u32 *tmp = (u32 *)pmd; + *tmp = 0; + smp_wmb(); + *(tmp + 1) = 0; +} +#endif /* CONFIG_X86_PAE */ + /* These are in entry.S */ extern fastcall void native_iret(void); extern fastcall void native_irq_enable_sysexit(void); @@ -454,6 +546,23 @@ struct paravirt_ops paravirt_ops = { .apic_read = native_apic_read, #endif + .flush_tlb_user = native_flush_tlb, + .flush_tlb_kernel = native_flush_tlb_global, + .flush_tlb_single = native_flush_tlb_single, + + .set_pte = native_set_pte, + .set_pte_at = native_set_pte_at, + .set_pmd = native_set_pmd, + .pte_update = (void *)native_nop, + .pte_update_defer = (void *)native_nop, +#ifdef CONFIG_X86_PAE + .set_pte_atomic = native_set_pte_atomic, + .set_pte_present = native_set_pte_present, + .set_pud = native_set_pud, + .pte_clear = native_pte_clear, + .pmd_clear = native_pmd_clear, +#endif + .irq_enable_sysexit = native_irq_enable_sysexit, .iret = native_iret, }; diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c index 4de11f508c3..4de95a17a7d 100644 --- a/arch/i386/mm/boot_ioremap.c +++ b/arch/i386/mm/boot_ioremap.c @@ -16,6 +16,7 @@ */ #undef CONFIG_X86_PAE +#undef CONFIG_PARAVIRT #include #include #include diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index e2c803fadb1..9f06265065f 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h @@ -4,6 +4,7 @@ * para-virtualization: those hooks are defined here. */ #include #include +#include #ifdef CONFIG_PARAVIRT /* These are the most performance critical ops, so we want to be able to patch @@ -27,6 +28,7 @@ struct thread_struct; struct Xgt_desc_struct; struct tss_struct; +struct mm_struct; struct paravirt_ops { unsigned int kernel_rpl; @@ -121,6 +123,23 @@ struct paravirt_ops unsigned long (fastcall *apic_read)(unsigned long reg); #endif + void (fastcall *flush_tlb_user)(void); + void (fastcall *flush_tlb_kernel)(void); + void (fastcall *flush_tlb_single)(u32 addr); + + void (fastcall *set_pte)(pte_t *ptep, pte_t pteval); + void (fastcall *set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); + void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval); + void (fastcall *pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); + void (fastcall *pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); +#ifdef CONFIG_X86_PAE + void (fastcall *set_pte_atomic)(pte_t *ptep, pte_t pteval); + void (fastcall *set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); + void (fastcall *set_pud)(pud_t *pudp, pud_t pudval); + void (fastcall *pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + void (fastcall *pmd_clear)(pmd_t *pmdp); +#endif + /* These two are jmp to, not actually called. */ void (fastcall *irq_enable_sysexit)(void); void (fastcall *iret)(void); @@ -297,6 +316,62 @@ static inline unsigned long apic_read(unsigned long reg) #endif +#define __flush_tlb() paravirt_ops.flush_tlb_user() +#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() +#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) + +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + paravirt_ops.set_pte(ptep, pteval); +} + +static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) +{ + paravirt_ops.set_pte_at(mm, addr, ptep, pteval); +} + +static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + paravirt_ops.set_pmd(pmdp, pmdval); +} + +static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep) +{ + paravirt_ops.pte_update(mm, addr, ptep); +} + +static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) +{ + paravirt_ops.pte_update_defer(mm, addr, ptep); +} + +#ifdef CONFIG_X86_PAE +static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) +{ + paravirt_ops.set_pte_atomic(ptep, pteval); +} + +static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +{ + paravirt_ops.set_pte_present(mm, addr, ptep, pte); +} + +static inline void set_pud(pud_t *pudp, pud_t pudval) +{ + paravirt_ops.set_pud(pudp, pudval); +} + +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + paravirt_ops.pte_clear(mm, addr, ptep); +} + +static inline void pmd_clear(pmd_t *pmdp) +{ + paravirt_ops.pmd_clear(pmdp); +} +#endif + /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch { u8 *instr; /* original instructions */ diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 8d8d3b9ecdb..04d6186abc2 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -13,11 +13,14 @@ * within a page table are directly modified. Thus, the following * hook is made available. */ +#ifndef CONFIG_PARAVIRT #define set_pte(pteptr, pteval) (*(pteptr) = pteval) #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) +#endif + #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) #define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval) -#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval)) #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index c2d701ea35b..2a6e67db8bc 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -44,6 +44,7 @@ static inline int pte_exec_kernel(pte_t pte) return pte_x(pte); } +#ifndef CONFIG_PARAVIRT /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is @@ -80,25 +81,6 @@ static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte #define set_pud(pudptr,pudval) \ (*(pudptr) = (pudval)) -/* - * Pentium-II erratum A13: in PAE mode we explicitly have to flush - * the TLB via cr3 if the top-level pgd is changed... - * We do not let the generic code free and clear pgd entries due to - * this erratum. - */ -static inline void pud_clear (pud_t * pud) { } - -#define pud_page(pud) \ -((struct page *) __va(pud_val(pud) & PAGE_MASK)) - -#define pud_page_vaddr(pud) \ -((unsigned long) __va(pud_val(pud) & PAGE_MASK)) - - -/* Find an entry in the second-level page table.. */ -#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ - pmd_index(address)) - /* * For PTEs and PDEs, we must clear the P-bit first when clearing a page table * entry, so clear the bottom half first and enforce ordering with a compiler @@ -118,6 +100,26 @@ static inline void pmd_clear(pmd_t *pmd) smp_wmb(); *(tmp + 1) = 0; } +#endif + +/* + * Pentium-II erratum A13: in PAE mode we explicitly have to flush + * the TLB via cr3 if the top-level pgd is changed... + * We do not let the generic code free and clear pgd entries due to + * this erratum. + */ +static inline void pud_clear (pud_t * pud) { } + +#define pud_page(pud) \ +((struct page *) __va(pud_val(pud) & PAGE_MASK)) + +#define pud_page_vaddr(pud) \ +((unsigned long) __va(pud_val(pud) & PAGE_MASK)) + + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ + pmd_index(address)) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 7d398f493dd..efd7d90789d 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -15,6 +15,7 @@ #include #include #include +#include #ifndef _I386_BITOPS_H #include @@ -246,6 +247,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p # include #endif +#ifndef CONFIG_PARAVIRT /* * Rules for using pte_update - it must be called after any PTE update which * has not been done using the set_pte / clear_pte interfaces. It is used by @@ -261,7 +263,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p */ #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) - +#endif /* * We only update the dirty/accessed state if we set diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h index 360648b0f2b..4dd82840d53 100644 --- a/include/asm-i386/tlbflush.h +++ b/include/asm-i386/tlbflush.h @@ -4,7 +4,15 @@ #include #include -#define __flush_tlb() \ +#ifdef CONFIG_PARAVIRT +#include +#else +#define __flush_tlb() __native_flush_tlb() +#define __flush_tlb_global() __native_flush_tlb_global() +#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#endif + +#define __native_flush_tlb() \ do { \ unsigned int tmpreg; \ \ @@ -19,7 +27,7 @@ * Global pages have to be flushed a bit differently. Not a real * performance problem because this does not happen often. */ -#define __flush_tlb_global() \ +#define __native_flush_tlb_global() \ do { \ unsigned int tmpreg, cr4, cr4_orig; \ \ @@ -36,6 +44,9 @@ : "memory"); \ } while (0) +#define __native_flush_tlb_single(addr) \ + __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory") + # define __flush_tlb_all() \ do { \ if (cpu_has_pge) \ @@ -46,9 +57,6 @@ #define cpu_has_invlpg (boot_cpu_data.x86 > 3) -#define __flush_tlb_single(addr) \ - __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory") - #ifdef CONFIG_X86_INVLPG # define __flush_tlb_one(addr) __flush_tlb_single(addr) #else -- cgit v1.2.3-70-g09d2 From a2952d8949bb0b37c1be92a89c4f180c74292857 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] paravirt: Preparatory mmu header movement Move header includes for the nopud / nopmd types to the location of the actual pte / pgd type definitions. This allows generic 4-level page type code to be written before the split 2/3 level page table headers are included. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Chris Wright Signed-off-by: Andrew Morton --- include/asm-i386/page.h | 2 ++ include/asm-i386/pgtable-2level.h | 2 -- include/asm-i386/pgtable-3level.h | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 2b69686107a..fd3f64ace24 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -52,6 +52,7 @@ typedef struct { unsigned long long pgprot; } pgprot_t; #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) #define __pmd(x) ((pmd_t) { (x) } ) #define HPAGE_SHIFT 21 +#include #else typedef struct { unsigned long pte_low; } pte_t; typedef struct { unsigned long pgd; } pgd_t; @@ -59,6 +60,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define boot_pte_t pte_t /* or would you rather have a typedef */ #define pte_val(x) ((x).pte_low) #define HPAGE_SHIFT 22 +#include #endif #define PTE_MASK PAGE_MASK diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index 04d6186abc2..c08cbc4c2d8 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -1,8 +1,6 @@ #ifndef _I386_PGTABLE_2LEVEL_H #define _I386_PGTABLE_2LEVEL_H -#include - #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) #define pgd_ERROR(e) \ diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 2a6e67db8bc..54287f148c7 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -1,8 +1,6 @@ #ifndef _I386_PGTABLE_3LEVEL_H #define _I386_PGTABLE_3LEVEL_H -#include - /* * Intel Physical Address Extension (PAE) Mode - three-level page * tables on PPro+ CPUs. -- cgit v1.2.3-70-g09d2 From dfbea0ad50e08c52539bddce977b07f77a762ba4 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Thu, 7 Dec 2006 02:14:08 +0100 Subject: [PATCH] paravirt: fix parameter names in mmu operations Make parameter names match function argument names for the yet to be defined pte_update_defer accessor. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Chris Wright Signed-off-by: Andrew Morton --- include/asm-i386/pgtable.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index efd7d90789d..04dd39b973a 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -277,7 +277,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p do { \ if (dirty) { \ (ptep)->pte_low = (entry).pte_low; \ - pte_update_defer((vma)->vm_mm, (addr), (ptep)); \ + pte_update_defer((vma)->vm_mm, (address), (ptep)); \ flush_tlb_page(vma, address); \ } \ } while (0) @@ -307,7 +307,7 @@ do { \ __dirty = pte_dirty(*(ptep)); \ if (__dirty) { \ clear_bit(_PAGE_BIT_DIRTY, &(ptep)->pte_low); \ - pte_update_defer((vma)->vm_mm, (addr), (ptep)); \ + pte_update_defer((vma)->vm_mm, (address), (ptep)); \ flush_tlb_page(vma, address); \ } \ __dirty; \ @@ -320,7 +320,7 @@ do { \ __young = pte_young(*(ptep)); \ if (__young) { \ clear_bit(_PAGE_BIT_ACCESSED, &(ptep)->pte_low); \ - pte_update_defer((vma)->vm_mm, (addr), (ptep)); \ + pte_update_defer((vma)->vm_mm, (address), (ptep)); \ flush_tlb_page(vma, address); \ } \ __young; \ -- cgit v1.2.3-70-g09d2 From 8ecb8950695e907ed25acffec9e98c6806e311c8 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] paravirt: fix missing pte update The function ptep_get_and_clear uses an atomic instruction sequence to get and clear an active pte. Rather than add such an atomic operator to all virtual machine implementations in paravirt-ops, it is easier to support the raw atomic sequence and use either a trapping writable pagetable approach, or a post-update notification. For the post update notification, we require the pte_update function to be called after the access. Combine the 2-level and 3-level paging operators into one common function which does the post-update notification, and rename the actual atomic sequences to raw_ptep_xxx operators. Signed-off-by: Zachary Amsden Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: Jeremy Fitzhardinge Cc: Chris Wright Signed-off-by: Andrew Morton --- include/asm-i386/pgtable-2level.h | 3 +-- include/asm-i386/pgtable-3level.h | 3 +-- include/asm-i386/pgtable.h | 8 ++++++++ 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h index c08cbc4c2d8..38c3fcc0676 100644 --- a/include/asm-i386/pgtable-2level.h +++ b/include/asm-i386/pgtable-2level.h @@ -23,8 +23,7 @@ #define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte_low, 0)) +#define raw_ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0)) #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 54287f148c7..7a2318f3830 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -119,8 +119,7 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline pte_t raw_ptep_get_and_clear(pte_t *ptep) { pte_t res; diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 04dd39b973a..b4a301f647b 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -326,6 +326,14 @@ do { \ __young; \ }) +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_t pte = raw_ptep_get_and_clear(ptep); + pte_update(mm, addr, ptep); + return pte; +} + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) { -- cgit v1.2.3-70-g09d2 From c55d92d141b9c40c67db249de91f5c224eb49859 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] i386: Add support for compilation for Core2 gcc doesn't support -mtune=core2 yet, but will be soon. Use -mtune=generic or -mtune=i686 as fallback TBD need benchmarking for INTEL_USERCOPY etc. So far I used the same defaults as MPENTIUMM Signed-off-by: Andi Kleen --- arch/i386/Kconfig.cpu | 19 +++++++++++++------ arch/i386/Makefile.cpu | 1 + include/asm-i386/module.h | 2 ++ 3 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index fc4f2abccf0..821fd269ca5 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu @@ -103,8 +103,15 @@ config MPENTIUMM Select this for Intel Pentium M (not Pentium-4 M) notebook chips. +config MCORE2 + bool "Core 2/newer Xeon" + help + Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and 53xx) + CPUs. You can distingush newer from older Xeons by the CPU family + in /proc/cpuinfo. Newer ones have 6. + config MPENTIUM4 - bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" help Select this for Intel Pentium 4 chips. This includes the Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M @@ -229,7 +236,7 @@ config X86_L1_CACHE_SHIFT default "7" if MPENTIUM4 || X86_GENERIC default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX - default "6" if MK7 || MK8 || MPENTIUMM + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 config RWSEM_GENERIC_SPINLOCK bool @@ -287,17 +294,17 @@ config X86_ALIGNMENT_16 config X86_GOOD_APIC bool - depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON + depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 default y config X86_INTEL_USERCOPY bool - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 default y config X86_USE_PPRO_CHECKSUM bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 default y config X86_USE_3DNOW @@ -312,5 +319,5 @@ config X86_OOSTORE config X86_TSC bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ default y diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu index a11befba26d..a32c031c90d 100644 --- a/arch/i386/Makefile.cpu +++ b/arch/i386/Makefile.cpu @@ -32,6 +32,7 @@ cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) +cflags-$(CONFIG_MCORE2) += -march=i686 $(call cc-option,-mtune=core2,$(call cc-option,-mtune=generic,-mtune=i686)) # AMD Elan support cflags-$(CONFIG_X86_ELAN) += -march=i486 diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h index 424661d25bd..fe5ae42e027 100644 --- a/include/asm-i386/module.h +++ b/include/asm-i386/module.h @@ -20,6 +20,8 @@ struct mod_arch_specific #define MODULE_PROC_FAMILY "586TSC " #elif defined CONFIG_M586MMX #define MODULE_PROC_FAMILY "586MMX " +#elif defined CONFIG_MCORE2 +#define MODULE_PROC_FAMILY "CORE2 " #elif defined CONFIG_M686 #define MODULE_PROC_FAMILY "686 " #elif defined CONFIG_MPENTIUMII -- cgit v1.2.3-70-g09d2 From 103efcd9aac1de4da6a1477f2f3b9fcf35273a18 Mon Sep 17 00:00:00 2001 From: Ernie Petrides Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] x86-64: fix perms/range of vsyscall vma in /proc/*/maps The final line of /proc//maps on x86_64 for native 64-bit tasks shows an incorrect ending address and incorrect permissions. There is only a single page mapped in this vsyscall region, and it is accessible for both read and execute. The patch below fixes this. (Since 32-bit-compat tasks have a real vma with correct perms/range, no change is necessary for that scenario.) Before the patch, a "cat /proc/self/maps | tail -1" shows this: ffffffffff600000-ffffffffffe00000 ---p 00000000 [...] After the patch, this is the output: ffffffffff600000-ffffffffff601000 r-xp 00000000 [...] Signed-off-by: Ernie Petrides Signed-off-by: Andi Kleen --- arch/x86_64/kernel/vsyscall.c | 1 + arch/x86_64/mm/init.c | 7 ++++--- include/asm-x86_64/vsyscall.h | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 92546c1526f..c3de9a09cd9 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -290,6 +290,7 @@ static void __init map_vsyscall(void) extern char __vsyscall_0; unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); + /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); } diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 4c0c00ef3ca..2968b90ef8a 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -730,14 +730,15 @@ static __init int x8664_sysctl_init(void) __initcall(x8664_sysctl_init); #endif -/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only +/* A pseudo VMA to allow ptrace access for the vsyscall page. This only covers the 64bit vsyscall page now. 32bit has a real VMA now and does not need special handling anymore. */ static struct vm_area_struct gate_vma = { .vm_start = VSYSCALL_START, - .vm_end = VSYSCALL_END, - .vm_page_prot = PAGE_READONLY + .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT), + .vm_page_prot = PAGE_READONLY_EXEC, + .vm_flags = VM_READ | VM_EXEC }; struct vm_area_struct *get_gate_vma(struct task_struct *tsk) diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 01d1c17e284..05cb8dd200d 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h @@ -10,6 +10,7 @@ enum vsyscall_num { #define VSYSCALL_START (-10UL << 20) #define VSYSCALL_SIZE 1024 #define VSYSCALL_END (-2UL << 20) +#define VSYSCALL_MAPPED_PAGES 1 #define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr)) #ifdef __KERNEL__ -- cgit v1.2.3-70-g09d2 From 4a1c42275078f48b90428cdb062f5220d79ec9da Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] x86-64: remove prototype of free_bootmem_generic() The function doesn't exist (anymore). Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-x86_64/proto.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index e72cfcdf534..21fa5afa232 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -61,7 +61,6 @@ extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn); extern unsigned long numa_free_all_bootmem(void); extern void reserve_bootmem_generic(unsigned long phys, unsigned len); -extern void free_bootmem_generic(unsigned long phys, unsigned len); extern void load_gs_index(unsigned gs); -- cgit v1.2.3-70-g09d2 From eab724e5df17af0ed0dac03da8f75aa336c31206 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:09 +0100 Subject: [PATCH] x86-64: adjust pmd_bad() Make pmd_bad() symmetrical to pgd_bad() and pud_bad(). At once, simplify them all. TBD: tighten down the checks again as suggested by Hugh D. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/asm-x86_64/pgtable.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 0555c1c4d8f..59901c690a0 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -221,20 +221,19 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC -static inline unsigned long pgd_bad(pgd_t pgd) -{ - unsigned long val = pgd_val(pgd); - val &= ~PTE_MASK; - val &= ~(_PAGE_USER | _PAGE_DIRTY); - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); -} +static inline unsigned long pgd_bad(pgd_t pgd) +{ + return pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); +} static inline unsigned long pud_bad(pud_t pud) { - unsigned long val = pud_val(pud); - val &= ~PTE_MASK; - val &= ~(_PAGE_USER | _PAGE_DIRTY); - return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED); + return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); +} + +static inline unsigned long pmd_bad(pmd_t pmd) +{ + return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } #define pte_none(x) (!pte_val(x)) @@ -347,7 +346,6 @@ static inline int pmd_large(pmd_t pte) { #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE ) #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) -- cgit v1.2.3-70-g09d2 From 274e1bbdeeaf16e71418f11f5f305ab26061f2c2 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] x86: add write_pci_config_byte() to direct PCI access routines Mechanism of selecting physical mode in genapic when cpu hotplug is enabled on x86_64, broke the quirk(quirk_intel_irqbalance()) introduced for working around the transposing interrupt message errata in E7520/E7320/E7525 (revision ID 0x9 and below. errata #23 in http://download.intel.com/design/chipsets/specupdt/30304203.pdf). This errata requires the mode to be in logical flat, so that interrupts can be directed to more than one cpu(and thus use hardware IRQ balancing enabled by BIOS on these platforms). Following four patches fixes this by moving the quirk to early quirk and forcing the x86_64 genapic selection to logical flat on these platforms. Thanks to Shaohua for pointing out the breakage. This patch: Add write_pci_config_byte() to direct PCI access routines Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/i386/pci/early.c | 7 +++++++ include/asm-x86_64/pci-direct.h | 1 + 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/arch/i386/pci/early.c b/arch/i386/pci/early.c index 713d6c866ca..42df4b6606d 100644 --- a/arch/i386/pci/early.c +++ b/arch/i386/pci/early.c @@ -45,6 +45,13 @@ void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, outl(val, 0xcfc); } +void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) +{ + PDprintk("%x writing to %x: %x\n", slot, offset, val); + outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); + outb(val, 0xcfc); +} + int early_pci_allowed(void) { return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == diff --git a/include/asm-x86_64/pci-direct.h b/include/asm-x86_64/pci-direct.h index eba9cb471df..6823fa4f1af 100644 --- a/include/asm-x86_64/pci-direct.h +++ b/include/asm-x86_64/pci-direct.h @@ -10,6 +10,7 @@ extern u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset); extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); +extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); extern int early_pci_allowed(void); -- cgit v1.2.3-70-g09d2 From fd6d7d26897dec834d0b9fbdc59819b0332a1257 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] i386: introduce the mechanism of disabling cpu hotplug control Add 'enable_cpu_hotplug' flag and when cleared, the hotplug control file ("online") will not be added under /sys/devices/system/cpu/cpuX/ Next patch doing PCI quirks will use this. Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/i386/kernel/topology.c | 6 +++++- include/asm-i386/cpu.h | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 07d6da36a82..844c08fdb22 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -40,14 +40,18 @@ int arch_register_cpu(int num) * restrictions and assumptions in kernel. This basically * doesnt add a control file, one cannot attempt to offline * BSP. + * + * Also certain PCI quirks require not to enable hotplug control + * for all CPU's. */ - if (!num) + if (!num || !enable_cpu_hotplug) cpu_devices[num].cpu.no_control = 1; return register_cpu(&cpu_devices[num].cpu, num); } #ifdef CONFIG_HOTPLUG_CPU +int enable_cpu_hotplug = 1; void arch_unregister_cpu(int num) { return unregister_cpu(&cpu_devices[num].cpu); diff --git a/include/asm-i386/cpu.h b/include/asm-i386/cpu.h index b1bc7b1b64b..9d914e1e4aa 100644 --- a/include/asm-i386/cpu.h +++ b/include/asm-i386/cpu.h @@ -13,6 +13,9 @@ struct i386_cpu { extern int arch_register_cpu(int num); #ifdef CONFIG_HOTPLUG_CPU extern void arch_unregister_cpu(int); +extern int enable_cpu_hotplug; +#else +#define enable_cpu_hotplug 0 #endif DECLARE_PER_CPU(int, cpu_state); -- cgit v1.2.3-70-g09d2 From 72486f1f8f0a2bc828b9d30cf4690cf2dd6807fc Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] i386: change the 'no_control' field to 'hotpluggable' in the struct cpu Change the 'no_control' field in the cpu struct to a more positive and better term 'hotpluggable'. And change(/cleanup) the logic accordingly. Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/i386/kernel/topology.c | 4 ++-- arch/ia64/kernel/topology.c | 8 ++++---- arch/powerpc/kernel/sysfs.c | 8 ++++---- drivers/base/cpu.c | 6 +++--- include/linux/cpu.h | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 844c08fdb22..79cf608e14c 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -44,8 +44,8 @@ int arch_register_cpu(int num) * Also certain PCI quirks require not to enable hotplug control * for all CPU's. */ - if (!num || !enable_cpu_hotplug) - cpu_devices[num].cpu.no_control = 1; + if (num && enable_cpu_hotplug) + cpu_devices[num].cpu.hotpluggable = 1; return register_cpu(&cpu_devices[num].cpu, num); } diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 5629b45e89c..687500ddb4b 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -31,11 +31,11 @@ int arch_register_cpu(int num) { #if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU) /* - * If CPEI cannot be re-targetted, and this is - * CPEI target, then dont create the control file + * If CPEI can be re-targetted or if this is not + * CPEI target, then it is hotpluggable */ - if (!can_cpei_retarget() && is_cpu_cpei_target(num)) - sysfs_cpus[num].cpu.no_control = 1; + if (can_cpei_retarget() || !is_cpu_cpei_target(num)) + sysfs_cpus[num].cpu.hotpluggable = 1; map_cpu_to_node(num, node_cpuid[num].nid); #endif diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 22123a0d541..63ed265b7f0 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -239,7 +239,7 @@ static void unregister_cpu_online(unsigned int cpu) struct cpu *c = &per_cpu(cpu_devices, cpu); struct sys_device *s = &c->sysdev; - BUG_ON(c->no_control); + BUG_ON(!c->hotpluggable); if (!firmware_has_feature(FW_FEATURE_ISERIES) && cpu_has_feature(CPU_FTR_SMT)) @@ -424,10 +424,10 @@ static int __init topology_init(void) * CPU. For instance, the boot cpu might never be valid * for hotplugging. */ - if (!ppc_md.cpu_die) - c->no_control = 1; + if (ppc_md.cpu_die) + c->hotpluggable = 1; - if (cpu_online(cpu) || (c->no_control == 0)) { + if (cpu_online(cpu) || c->hotpluggable) { register_cpu(c, cpu); sysdev_create_file(&c->sysdev, &attr_physical_id); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 1f745f12f94..7fd095efaeb 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -104,8 +104,8 @@ static SYSDEV_ATTR(crash_notes, 0400, show_crash_notes, NULL); /* * register_cpu - Setup a driverfs device for a CPU. - * @cpu - Callers can set the cpu->no_control field to 1, to indicate not to - * generate a control file in sysfs for this CPU. + * @cpu - cpu->hotpluggable field set to 1 will generate a control file in + * sysfs for this CPU. * @num - CPU number to use when creating the device. * * Initialize and register the CPU device. @@ -119,7 +119,7 @@ int __devinit register_cpu(struct cpu *cpu, int num) error = sysdev_register(&cpu->sysdev); - if (!error && !cpu->no_control) + if (!error && cpu->hotpluggable) register_cpu_control(cpu); if (!error) cpu_sys_devices[num] = &cpu->sysdev; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f02d71bf689..ad90340e7db 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -27,7 +27,7 @@ struct cpu { int node_id; /* The node which contains the CPU */ - int no_control; /* Should the sysfs control file be created? */ + int hotpluggable; /* creates sysfs control file if hotpluggable */ struct sys_device sysdev; }; -- cgit v1.2.3-70-g09d2 From 9899f826fc90beba4f78083f6230e06cbe1050c9 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] x86-64: add genapic_force Add genapic_force. Used by the next Intel quirks patch. Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/x86_64/kernel/genapic.c | 9 ++++++++- include/asm-x86_64/genapic.h | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 8e78a75d186..b007433f96b 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -33,7 +33,7 @@ extern struct genapic apic_flat; extern struct genapic apic_physflat; struct genapic *genapic = &apic_flat; - +struct genapic *genapic_force; /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. @@ -46,6 +46,13 @@ void __init clustered_apic_check(void) u8 cluster_cnt[NUM_APIC_CLUSTERS]; int max_apic = 0; + /* genapic selection can be forced because of certain quirks. + */ + if (genapic_force) { + genapic = genapic_force; + goto print; + } + #if defined(CONFIG_ACPI) /* * Some x86_64 machines use physical APIC mode regardless of how many diff --git a/include/asm-x86_64/genapic.h b/include/asm-x86_64/genapic.h index a0e9a4b9348..b80f4bb5f27 100644 --- a/include/asm-x86_64/genapic.h +++ b/include/asm-x86_64/genapic.h @@ -30,6 +30,6 @@ struct genapic { }; -extern struct genapic *genapic; +extern struct genapic *genapic, *genapic_force, apic_flat; #endif -- cgit v1.2.3-70-g09d2 From b0d0a4ba45760b10ecee9035ed45b442c1a6cc84 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Thu, 7 Dec 2006 02:14:10 +0100 Subject: [PATCH] x86: fix the irqbalance quirk for E7320/E7520/E7525 Move the irqbalance quirks for E7320/E7520/E7525(Errata 23 in http://download.intel.com/design/chipsets/specupdt/30304203.pdf) to early quirks. And add a PCI quirk for these platforms to check(which happens very late during the boot) if the APIC routing is indeed set to default flat mode. This fixes the breakage(in x86_64) of this quirk due to cpu hotplug which selects physical mode instead of the logical flat(as needed for this errata workaround). Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Cc: Andi Kleen Cc: "Li, Shaohua" Signed-off-by: Andrew Morton --- arch/i386/kernel/acpi/earlyquirk.c | 21 +++++++++++++++++ arch/i386/kernel/quirks.c | 46 +++++++++++++++++++++++++++++--------- arch/i386/kernel/smpboot.c | 7 ++++++ arch/x86_64/kernel/early-quirks.c | 13 +++++++++++ arch/x86_64/kernel/smpboot.c | 8 +++++++ include/asm-i386/genapic.h | 2 +- include/asm-i386/irq.h | 2 ++ include/asm-x86_64/proto.h | 1 + 8 files changed, 88 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index c9841692bb7..4b60af7f91d 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI @@ -49,6 +50,24 @@ static int __init check_bridge(int vendor, int device) return 0; } +static void check_intel(void) +{ + u16 vendor, device; + + vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID); + + if (vendor != PCI_VENDOR_ID_INTEL) + return; + + device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); +#ifdef CONFIG_SMP + if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || + device == PCI_DEVICE_ID_INTEL_E7520_MCH || + device == PCI_DEVICE_ID_INTEL_E7525_MCH) + quirk_intel_irqbalance(); +#endif +} + void __init check_acpi_pci(void) { int num, slot, func; @@ -60,6 +79,8 @@ void __init check_acpi_pci(void) if (!early_pci_allowed()) return; + check_intel(); + /* Poor man's PCI discovery */ for (num = 0; num < 32; num++) { for (slot = 0; slot < 32; slot++) { diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c index 9f6ab1789bb..a01320a7b63 100644 --- a/arch/i386/kernel/quirks.c +++ b/arch/i386/kernel/quirks.c @@ -3,10 +3,23 @@ */ #include #include +#include +#include +#include #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) +static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) +{ +#ifdef CONFIG_X86_64 + if (genapic != &apic_flat) + panic("APIC mode must be flat on this system\n"); +#elif defined(CONFIG_X86_GENERICARCH) + if (genapic != &apic_default) + panic("APIC mode must be default(flat) on this system. Use apic=default\n"); +#endif +} -static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) +void __init quirk_intel_irqbalance(void) { u8 config, rev; u32 word; @@ -16,18 +29,18 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) * based platforms. * Disable SW irqbalance/affinity on those platforms. */ - pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); + rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); if (rev > 0x9) return; printk(KERN_INFO "Intel E7520/7320/7525 detected."); - /* enable access to config space*/ - pci_read_config_byte(dev, 0xf4, &config); - pci_write_config_byte(dev, 0xf4, config|0x2); + /* enable access to config space */ + config = read_pci_config_byte(0, 0, 0, 0xf4); + write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); /* read xTPR register */ - raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); + word = read_pci_config_16(0, 0, 0x40, 0x4c); if (!(word & (1 << 13))) { printk(KERN_INFO "Disabling irq balancing and affinity\n"); @@ -37,14 +50,25 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) noirqdebug_setup(""); #ifdef CONFIG_PROC_FS no_irq_affinity = 1; +#endif +#ifdef CONFIG_HOTPLUG_CPU + printk(KERN_INFO "Disabling cpu hotplug control\n"); + enable_cpu_hotplug = 0; +#endif +#ifdef CONFIG_X86_64 + /* force the genapic selection to flat mode so that + * interrupts can be redirected to more than one CPU. + */ + genapic_force = &apic_flat; #endif } - /* put back the original value for config space*/ + /* put back the original value for config space */ if (!(config & 0x2)) - pci_write_config_byte(dev, 0xf4, config); + write_pci_config_byte(0, 0, 0, 0xf4, config); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); + #endif diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index cd7de9c9654..346f27f4c79 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -1482,6 +1483,12 @@ int __devinit __cpu_up(unsigned int cpu) cpu_set(cpu, smp_commenced_mask); while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); + +#ifdef CONFIG_X86_GENERICARCH + if (num_online_cpus() > 8 && genapic == &apic_default) + panic("Default flat APIC routing can't be used with > 8 cpus\n"); +#endif + return 0; } diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index fb0c6da41b7..829698f6d04 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c @@ -71,6 +71,18 @@ static void ati_bugs(void) { } +static void intel_bugs(void) +{ + u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); + +#ifdef CONFIG_SMP + if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || + device == PCI_DEVICE_ID_INTEL_E7520_MCH || + device == PCI_DEVICE_ID_INTEL_E7525_MCH) + quirk_intel_irqbalance(); +#endif +} + struct chipset { u16 vendor; void (*f)(void); @@ -80,6 +92,7 @@ static struct chipset early_qrk[] = { { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, { PCI_VENDOR_ID_VIA, via_bugs }, { PCI_VENDOR_ID_ATI, ati_bugs }, + { PCI_VENDOR_ID_INTEL, intel_bugs}, {} }; diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 62c2e747af5..4c161c208d5 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -60,6 +60,7 @@ #include #include #include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -1167,6 +1168,13 @@ int __cpuinit __cpu_up(unsigned int cpu) while (!cpu_isset(cpu, cpu_online_map)) cpu_relax(); + + if (num_online_cpus() > 8 && genapic == &apic_flat) { + printk(KERN_WARNING + "flat APIC routing can't be used with > 8 cpus\n"); + BUG(); + } + err = 0; return err; diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h index 8ffbb0f0745..fd2be593b06 100644 --- a/include/asm-i386/genapic.h +++ b/include/asm-i386/genapic.h @@ -122,6 +122,6 @@ struct genapic { APICFUNC(phys_pkg_id) \ } -extern struct genapic *genapic; +extern struct genapic *genapic, apic_default; #endif diff --git a/include/asm-i386/irq.h b/include/asm-i386/irq.h index 9e15ce0006e..11761cdaae1 100644 --- a/include/asm-i386/irq.h +++ b/include/asm-i386/irq.h @@ -37,6 +37,8 @@ static __inline__ int irq_canonicalize(int irq) extern int irqbalance_disable(char *str); #endif +extern void quirk_intel_irqbalance(void); + #ifdef CONFIG_HOTPLUG_CPU extern void fixup_irqs(cpumask_t map); #endif diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 21fa5afa232..6d324b83897 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -87,6 +87,7 @@ extern void syscall32_cpu_init(void); extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end); extern void early_quirks(void); +extern void quirk_intel_irqbalance(void); extern void check_efer(void); extern int unhandled_signal(struct task_struct *tsk, int sig); -- cgit v1.2.3-70-g09d2 From e1cccf48b182dd743c3c83a4fdf8dc570a43b393 Mon Sep 17 00:00:00 2001 From: Artiom Myaskouvskey Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] i386: call efi_get_time during suspend Function efi_get_time called not only during init kernel phase but also during suspend (from get_cmos_time). When it is called from get_cmos_time the corresponding runtime service should be called in virtual and not in physical mode. Signed-off-by: Artiom Myaskouvskey Signed-off-by: Andi Kleen Cc: "Narayanan, Chandramouli" Cc: "Jiossy, Rami" Cc: "Satt, Shai" Cc: Andi Kleen Cc: Matt Domsch Signed-off-by: Andrew Morton --- arch/i386/kernel/efi.c | 17 ++++++++++++----- include/linux/efi.h | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 8b40648d0ef..b92c7f0a358 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -194,17 +194,24 @@ inline int efi_set_rtc_mmss(unsigned long nowtime) return 0; } /* - * This should only be used during kernel init and before runtime - * services have been remapped, therefore, we'll need to call in physical - * mode. Note, this call isn't used later, so mark it __init. + * This is used during kernel init before runtime + * services have been remapped and also during suspend, therefore, + * we'll need to call both in physical and virtual modes. */ -inline unsigned long __init efi_get_time(void) +inline unsigned long efi_get_time(void) { efi_status_t status; efi_time_t eft; efi_time_cap_t cap; - status = phys_efi_get_time(&eft, &cap); + if (efi.get_time) { + /* if we are in virtual mode use remapped function */ + status = efi.get_time(&eft, &cap); + } else { + /* we are in physical mode */ + status = phys_efi_get_time(&eft, &cap); + } + if (status != EFI_SUCCESS) printk("Oops: efitime: can't read time status: 0x%lx\n",status); diff --git a/include/linux/efi.h b/include/linux/efi.h index 66d621dbcb6..91ecf49fbf2 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -300,7 +300,7 @@ extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource); -extern unsigned long __init efi_get_time(void); +extern unsigned long efi_get_time(void); extern int __init efi_set_rtc_mmss(unsigned long nowtime); extern struct efi_memory_map memmap; -- cgit v1.2.3-70-g09d2 From ee58fad51a2a767cb2567706ace967705233d881 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86-64: x86-64 add Intel BTS cpufeature bit and detection (take 2) Here is a small patch for x86-64 which adds a cpufeature flag and detection code for Intel's Branch Trace Store (BTS) feature. This feature can be found on Intel P4 and Core 2 processors among others. It can also be used by perfmon. changelog: - add CPU_FEATURE_BTS - add Branch Trace Store detection signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- arch/x86_64/kernel/setup.c | 2 ++ include/asm-x86_64/cpufeature.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 619af2e2fa2..a570c81c831 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -838,6 +838,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_ds) { unsigned int l1, l2; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); if (!(l1 & (1<<12))) set_bit(X86_FEATURE_PEBS, c->x86_capability); } diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h index d280384807e..0b3c686139f 100644 --- a/include/asm-x86_64/cpufeature.h +++ b/include/asm-x86_64/cpufeature.h @@ -69,6 +69,7 @@ #define X86_FEATURE_UP (3*32+8) /* SMP kernel running on UP */ #define X86_FEATURE_ARCH_PERFMON (3*32+9) /* Intel Architectural PerfMon */ #define X86_FEATURE_PEBS (3*32+10) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+11) /* Branch Trace Store */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -115,5 +116,6 @@ #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) #define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) +#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) #endif /* __ASM_X8664_CPUFEATURE_H */ -- cgit v1.2.3-70-g09d2 From 538f188e03c821c93b355c9fc346806cdd34e286 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] i386: i386 add Intel BTS cpufeature bit and detection (take 2) Here is a small patch for i386 which adds a cpufeature flag and detection code for Intel's Branch Trace Store (BTS) feature. This feature can be found on Intel P4 and Core 2 processors among others. It can also be used by perfmon. changelog: - add CPU_FEATURE_BTS - add Branch Trace Store detection signed-off-by: stephane eranian Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/intel.c | 2 ++ include/asm-i386/cpufeature.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 3ae795e9056..56fe2658495 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -199,6 +199,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) if (cpu_has_ds) { unsigned int l1; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); + if (!(l1 & (1<<11))) + set_bit(X86_FEATURE_BTS, c->x86_capability); if (!(l1 & (1<<12))) set_bit(X86_FEATURE_PEBS, c->x86_capability); } diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h index 4c83e059228..3f92b94e0d7 100644 --- a/include/asm-i386/cpufeature.h +++ b/include/asm-i386/cpufeature.h @@ -74,6 +74,7 @@ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -138,6 +139,7 @@ #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) #define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) +#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) #endif /* __ASM_I386_CPUFEATURE_H */ -- cgit v1.2.3-70-g09d2 From bf7e6a196318316e921f357557fca9d11d15f486 Mon Sep 17 00:00:00 2001 From: Artiom Myaskouvskey Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] i386: Preserve EFI run time regions with memmap parameter When using memmap kernel parameter in EFI boot we should also add to memory map memory regions of runtime services to enable their mapping later. AK: merged and cleaned up the patch Signed-off-by: Artiom Myaskouvskey Signed-off-by: Andi Kleen --- arch/i386/kernel/e820.c | 60 +++++++++++++++++++++++++++++++++++-------------- arch/i386/mm/init.c | 2 -- include/linux/efi.h | 1 + 3 files changed, 44 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index b704790f796..2f7d0a92fd7 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c @@ -742,29 +742,55 @@ void __init print_memory_map(char *who) } } -void __init limit_regions(unsigned long long size) +static __init __always_inline void efi_limit_regions(unsigned long long size) { unsigned long long current_addr = 0; + efi_memory_desc_t *md, *next_md; + void *p, *p1; + int i, j; + + j = 0; + p1 = memmap.map; + for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { + md = p; + next_md = p1; + current_addr = md->phys_addr + + PFN_PHYS(md->num_pages); + if (is_available_memory(md)) { + if (md->phys_addr >= size) continue; + memcpy(next_md, md, memmap.desc_size); + if (current_addr >= size) { + next_md->num_pages -= + PFN_UP(current_addr-size); + } + p1 += memmap.desc_size; + next_md = p1; + j++; + } else if ((md->attribute & EFI_MEMORY_RUNTIME) == + EFI_MEMORY_RUNTIME) { + /* In order to make runtime services + * available we have to include runtime + * memory regions in memory map */ + memcpy(next_md, md, memmap.desc_size); + p1 += memmap.desc_size; + next_md = p1; + j++; + } + } + memmap.nr_map = j; + memmap.map_end = memmap.map + + (memmap.nr_map * memmap.desc_size); +} + +void __init limit_regions(unsigned long long size) +{ + unsigned long long current_addr; int i; print_memory_map("limit_regions start"); if (efi_enabled) { - efi_memory_desc_t *md; - void *p; - - for (p = memmap.map, i = 0; p < memmap.map_end; - p += memmap.desc_size, i++) { - md = p; - current_addr = md->phys_addr + (md->num_pages << 12); - if (md->type == EFI_CONVENTIONAL_MEMORY) { - if (current_addr >= size) { - md->num_pages -= - (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); - memmap.nr_map = i + 1; - return; - } - } - } + efi_limit_regions(size); + return; } for (i = 0; i < e820.nr_map; i++) { current_addr = e820.map[i].addr + e820.map[i].size; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 167416155ee..f4dd048187f 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -192,8 +192,6 @@ static inline int page_kills_ppro(unsigned long pagenr) return 0; } -extern int is_available_memory(efi_memory_desc_t *); - int page_is_ram(unsigned long pagenr) { int i; diff --git a/include/linux/efi.h b/include/linux/efi.h index 91ecf49fbf2..df1c91855f0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -302,6 +302,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource); extern unsigned long efi_get_time(void); extern int __init efi_set_rtc_mmss(unsigned long nowtime); +extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** -- cgit v1.2.3-70-g09d2 From c7a3392e9e53e43c44a971de3dd480a8e2788e75 Mon Sep 17 00:00:00 2001 From: Wink Saville Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86-64: Fix comments for MSR_FS_BASE and MSR_GS_BASE. The comments for MSR_FS_BASE & MSR_GS_BASE were transposed. Signed-off-by: Wink Saville Signed-off-by: Andi Kleen --- include/asm-x86_64/msr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h index a745c5008a6..952783d35c7 100644 --- a/include/asm-x86_64/msr.h +++ b/include/asm-x86_64/msr.h @@ -169,8 +169,8 @@ static inline unsigned int cpuid_edx(unsigned int op) #define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ #define MSR_CSTAR 0xc0000083 /* compatibility mode SYSCALL target */ #define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ -#define MSR_FS_BASE 0xc0000100 /* 64bit GS base */ -#define MSR_GS_BASE 0xc0000101 /* 64bit FS base */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ #define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel) */ /* EFER bits: */ #define _EFER_SCE 0 /* SYSCALL/SYSRET */ -- cgit v1.2.3-70-g09d2 From 0741f4d207a644482d7a040f05cd264c98cf7ee8 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 7 Dec 2006 02:14:11 +0100 Subject: [PATCH] x86: add sysctl for kstack_depth_to_print Add sysctl for kstack_depth_to_print. This lets users change the amount of raw stack data printed in dump_stack() without having to reboot. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen --- Documentation/sysctl/kernel.txt | 8 ++++++++ arch/i386/kernel/traps.c | 2 +- arch/x86_64/kernel/traps.c | 2 +- include/asm-x86_64/stacktrace.h | 2 ++ kernel/sysctl.c | 9 +++++++++ 5 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 0bc7f1e3c9e..5922e84d913 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -27,6 +27,7 @@ show up in /proc/sys/kernel: - hotplug - java-appletviewer [ binfmt_java, obsolete ] - java-interpreter [ binfmt_java, obsolete ] +- kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] - modprobe ==> Documentation/kmod.txt - msgmax @@ -170,6 +171,13 @@ This flag controls the L2 cache of G3 processor boards. If ============================================================== +kstack_depth_to_print: (X86 only) + +Controls the number of words to print when dumping the raw +kernel stack. + +============================================================== + osrelease, ostype & version: # cat osrelease diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7b2f9f02208..1d48a75fa33 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -91,7 +91,7 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); -static int kstack_depth_to_print = 24; +int kstack_depth_to_print = 24; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 264db33476a..75ceccee178 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -108,7 +108,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) preempt_enable_no_resched(); } -static int kstack_depth_to_print = 12; +int kstack_depth_to_print = 12; #ifdef CONFIG_STACK_UNWIND static int call_trace = 1; #else diff --git a/include/asm-x86_64/stacktrace.h b/include/asm-x86_64/stacktrace.h index 5eb9799bef7..6f0b5459430 100644 --- a/include/asm-x86_64/stacktrace.h +++ b/include/asm-x86_64/stacktrace.h @@ -1,6 +1,8 @@ #ifndef _ASM_STACKTRACE_H #define _ASM_STACKTRACE_H 1 +extern int kstack_depth_to_print; + /* Generic stack tracer with callbacks */ struct stacktrace_ops { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e569f4792..6fc5e17086f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -54,6 +54,7 @@ extern int proc_nr_files(ctl_table *table, int write, struct file *filp, #ifdef CONFIG_X86 #include +#include #endif #if defined(CONFIG_SYSCTL) @@ -707,6 +708,14 @@ static ctl_table kern_table[] = { .mode = 0444, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "kstack_depth_to_print", + .data = &kstack_depth_to_print, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #if defined(CONFIG_MMU) { -- cgit v1.2.3-70-g09d2 From a1a70c25bed75ed36ed48bbe18b9029428d2452d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 7 Dec 2006 02:14:12 +0100 Subject: [PATCH] i386: always enable regparm -mregparm=3 has been enabled by default for some time on i386, and AFAIK there aren't any problems with it left. This patch removes the REGPARM config option and sets -mregparm=3 unconditionally. Signed-off-by: Adrian Bunk Signed-off-by: Andi Kleen --- Documentation/stable_api_nonsense.txt | 3 --- arch/i386/Kconfig | 14 -------------- arch/i386/Makefile | 4 +--- include/asm-i386/module.h | 8 +------- 4 files changed, 2 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/Documentation/stable_api_nonsense.txt b/Documentation/stable_api_nonsense.txt index f39c9d714db..a2afca3b2ba 100644 --- a/Documentation/stable_api_nonsense.txt +++ b/Documentation/stable_api_nonsense.txt @@ -62,9 +62,6 @@ consider the following facts about the Linux kernel: - different structures can contain different fields - Some functions may not be implemented at all, (i.e. some locks compile away to nothing for non-SMP builds.) - - Parameter passing of variables from function to function can be - done in different ways (the CONFIG_REGPARM option controls - this.) - Memory within the kernel can be aligned in different ways, depending on the build options. - Linux runs on a wide range of different processor architectures. diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index bb1fa061c6c..b6b2df40ca7 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -721,20 +721,6 @@ config BOOT_IOREMAP depends on (((X86_SUMMIT || X86_GENERICARCH) && NUMA) || (X86 && EFI)) default y -config REGPARM - bool "Use register arguments" - default y - help - Compile the kernel with -mregparm=3. This instructs gcc to use - a more efficient function call ABI which passes the first three - arguments of a function call via registers, which results in denser - and faster code. - - If this option is disabled, then the default ABI of passing - arguments via the stack is used. - - If unsure, say Y. - config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" depends on PROC_FS diff --git a/arch/i386/Makefile b/arch/i386/Makefile index d1aca52bf69..f7ac1aea1d8 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -31,7 +31,7 @@ LDFLAGS_vmlinux := --emit-relocs endif CHECKFLAGS += -D__i386__ -CFLAGS += -pipe -msoft-float +CFLAGS += -pipe -msoft-float -mregparm=3 # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) @@ -39,8 +39,6 @@ CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) # CPU-specific tuning. Anything which can be shared with UML should go here. include $(srctree)/arch/i386/Makefile.cpu -cflags-$(CONFIG_REGPARM) += -mregparm=3 - # temporary until string.h is fixed cflags-y += -ffreestanding diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h index fe5ae42e027..02f8f541cbe 100644 --- a/include/asm-i386/module.h +++ b/include/asm-i386/module.h @@ -62,18 +62,12 @@ struct mod_arch_specific #error unknown processor family #endif -#ifdef CONFIG_REGPARM -#define MODULE_REGPARM "REGPARM " -#else -#define MODULE_REGPARM "" -#endif - #ifdef CONFIG_4KSTACKS #define MODULE_STACKSIZE "4KSTACKS " #else #define MODULE_STACKSIZE "" #endif -#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_REGPARM MODULE_STACKSIZE +#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE #endif /* _ASM_I386_MODULE_H */ -- cgit v1.2.3-70-g09d2 From 359ad0d4015a9ab39243f2ebc4eb07915bd618b2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] unwinder: more sanity checks in Dwarf2 unwinder Tighten the requirements on both input to and output from the Dwarf2 unwinder. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/kernel/traps.c | 7 +++++++ arch/x86_64/kernel/traps.c | 7 +++++++ include/asm-i386/unwind.h | 12 ++++-------- include/asm-x86_64/unwind.h | 8 ++------ kernel/unwind.c | 16 +++++++++++++++- 5 files changed, 35 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 86d8476be4f..c447807e2a4 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -161,12 +161,19 @@ dump_trace_unwind(struct unwind_frame_info *info, void *data) { struct ops_and_data *oad = (struct ops_and_data *)data; int n = 0; + unsigned long sp = UNW_SP(info); + if (arch_unw_user_mode(info)) + return -1; while (unwind(info) == 0 && UNW_PC(info)) { n++; oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); } return n; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 9864d195c40..4fdd162f0be 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -225,12 +225,19 @@ static int dump_trace_unwind(struct unwind_frame_info *info, void *context) { struct ops_and_data *oad = (struct ops_and_data *)context; int n = 0; + unsigned long sp = UNW_SP(info); + if (arch_unw_user_mode(info)) + return -1; while (unwind(info) == 0 && UNW_PC(info)) { n++; oad->ops->address(oad->data, UNW_PC(info)); if (arch_unw_user_mode(info)) break; + if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) + && sp > UNW_SP(info)) + break; + sp = UNW_SP(info); } return n; } diff --git a/include/asm-i386/unwind.h b/include/asm-i386/unwind.h index 601fc67bd77..aa2c931e30d 100644 --- a/include/asm-i386/unwind.h +++ b/include/asm-i386/unwind.h @@ -79,17 +79,13 @@ extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, void *arg), void *arg); -static inline int arch_unw_user_mode(const struct unwind_frame_info *info) +static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info) { -#if 0 /* This can only work when selector register and EFLAGS saves/restores - are properly annotated (and tracked in UNW_REGISTER_INFO). */ - return user_mode_vm(&info->regs); -#else - return info->regs.eip < PAGE_OFFSET + return user_mode_vm(&info->regs) + || info->regs.eip < PAGE_OFFSET || (info->regs.eip >= __fix_to_virt(FIX_VDSO) - && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) + && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) || info->regs.esp < PAGE_OFFSET; -#endif } #else diff --git a/include/asm-x86_64/unwind.h b/include/asm-x86_64/unwind.h index 2e7ff10fd77..2f6349e4871 100644 --- a/include/asm-x86_64/unwind.h +++ b/include/asm-x86_64/unwind.h @@ -87,14 +87,10 @@ extern int arch_unwind_init_running(struct unwind_frame_info *, static inline int arch_unw_user_mode(const struct unwind_frame_info *info) { -#if 0 /* This can only work when selector register saves/restores - are properly annotated (and tracked in UNW_REGISTER_INFO). */ - return user_mode(&info->regs); -#else - return (long)info->regs.rip >= 0 + return user_mode(&info->regs) + || (long)info->regs.rip >= 0 || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END) || (long)info->regs.rsp >= 0; -#endif } #else diff --git a/kernel/unwind.c b/kernel/unwind.c index af48168a3af..7e721f10410 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -95,6 +95,7 @@ static const struct { typedef unsigned long uleb128_t; typedef signed long sleb128_t; +#define sleb128abs __builtin_labs static struct unwind_table { struct { @@ -787,7 +788,7 @@ int unwind(struct unwind_frame_info *frame) #define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) const u32 *fde = NULL, *cie = NULL; const u8 *ptr = NULL, *end = NULL; - unsigned long pc = UNW_PC(frame) - frame->call_frame; + unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; unsigned long startLoc = 0, endLoc = 0, cfa; unsigned i; signed ptrType = -1; @@ -936,6 +937,9 @@ int unwind(struct unwind_frame_info *frame) state.dataAlign = get_sleb128(&ptr, end); if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) cie = NULL; + else if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign)) + return -EPERM; else { retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); /* skip augmentation */ @@ -968,6 +972,8 @@ int unwind(struct unwind_frame_info *frame) #ifdef CONFIG_FRAME_POINTER unsigned long top, bottom; + if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long)) + return -EPERM; top = STACK_TOP(frame->task); bottom = STACK_BOTTOM(frame->task); # if FRAME_RETADDR_OFFSET < 0 @@ -1018,6 +1024,7 @@ int unwind(struct unwind_frame_info *frame) || state.regs[retAddrReg].where == Nowhere || state.cfa.reg >= ARRAY_SIZE(reg_info) || reg_info[state.cfa.reg].width != sizeof(unsigned long) + || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) || state.cfa.offs % sizeof(unsigned long)) return -EIO; /* update frame */ @@ -1038,6 +1045,8 @@ int unwind(struct unwind_frame_info *frame) #else # define CASES CASE(8); CASE(16); CASE(32); CASE(64) #endif + pc = UNW_PC(frame); + sp = UNW_SP(frame); for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { if (REG_INVALID(i)) { if (state.regs[i].where == Nowhere) @@ -1118,6 +1127,11 @@ int unwind(struct unwind_frame_info *frame) } } + if (UNW_PC(frame) % state.codeAlign + || UNW_SP(frame) % sleb128abs(state.dataAlign) + || (pc == UNW_PC(frame) && sp == UNW_SP(frame))) + return -EIO; + return 0; #undef CASES #undef FRAME_REG -- cgit v1.2.3-70-g09d2 From d331e739f5ad2aaa9d8553891ba6ca823bdbce37 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: Fix interrupt race in idle callback (3rd try) Idle callbacks has some races when enter_idle() sets isidle and subsequent interrupts that can happen on that CPU, before CPU goes to idle. Due to this, an IDLE_END can get called before IDLE_START. To avoid these races, disable interrupts before enter_idle and make sure that all idle routines do not enable interrupts before entering idle. Note that poll_idle() still has a this race as it has to enable interrupts before going to idle. But, all other idle routines have the race fixed. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andi Kleen --- arch/x86_64/kernel/process.c | 19 +++++++++++++++++-- include/asm-x86_64/processor.h | 8 ++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 0b7b4caa4f7..a418ee4c8c6 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -127,6 +127,7 @@ static void default_idle(void) */ static void poll_idle (void) { + local_irq_enable(); cpu_relax(); } @@ -208,6 +209,12 @@ void cpu_idle (void) idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + /* + * Idle routines should keep interrupts disabled + * from here on, until they go to idle. + * Otherwise, idle callbacks can misfire. + */ + local_irq_disable(); enter_idle(); idle(); /* In many cases the interrupt that ended idle @@ -245,8 +252,16 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { - local_irq_enable(); - mwait_idle_with_hints(0,0); + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else { + local_irq_enable(); + } } void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index cef17e0f828..76552d72804 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -475,6 +475,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) : :"a" (eax), "c" (ecx)); } +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +{ + /* "mwait %eax,%ecx;" */ + asm volatile( + "sti; .byte 0x0f,0x01,0xc9;" + : :"a" (eax), "c" (ecx)); +} + extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); #define stack_current() \ -- cgit v1.2.3-70-g09d2 From 9ee4016888f1b226d7bb9a10ddb782868f658b31 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: include/asm-x86_64/cpufeature.h isn't a userspace header Nothing in include/asm-x86_64/cpufeature.h is part of the userspace<->kernel interface. Signed-off-by: Adrian Bunk Signed-off-by: Andi Kleen --- include/asm-x86_64/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild index 1ee9b07f3fe..6c455b4068c 100644 --- a/include/asm-x86_64/Kbuild +++ b/include/asm-x86_64/Kbuild @@ -6,7 +6,6 @@ ALTARCHDEF := defined __i386__ header-y += boot.h header-y += bootsetup.h -header-y += cpufeature.h header-y += debugreg.h header-y += ldt.h header-y += msr.h -- cgit v1.2.3-70-g09d2 From e4b522d7ef144fb2ad6a4cb23d9cb5ec154be8bc Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: fix asm constraints in i386 atomic_add_return Since v->counter is both read and written, it should be an output as well as an input for the asm. The current code only gets away with this because counter is volatile. Also, according to Documents/atomic_ops.txt, atomic_add_return should provide a memory barrier, in particular a compiler barrier, so the asm should be marked as clobbering memory. Test case: #include typedef struct { int counter; } atomic_t; /* NB: no "volatile" */ #define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) static __inline__ int atomic_add_return(int i, atomic_t *v) { int __i = i; __asm__ __volatile__( "lock; xaddl %0, %1;" :"=r"(i) :"m"(v->counter), "0"(i)); /* __asm__ __volatile__( "lock; xaddl %0, %1" :"+r" (i), "+m" (v->counter) : : "memory"); */ return i + __i; } int main (void) { atomic_t a = ATOMIC_INIT(0); int x; x = atomic_add_return (1, &a); if ((x!=1) || (atomic_read(&a)!=1)) printf("fail: %i, %i\n", x, atomic_read(&a)); } Signed-off-by: Duncan Sands Signed-off-by: Andi Kleen Cc: Andi Kleen Acked-by: David Howells Signed-off-by: Andrew Morton --- include/asm-i386/atomic.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h index 51a16624252..6aab7a105fa 100644 --- a/include/asm-i386/atomic.h +++ b/include/asm-i386/atomic.h @@ -187,9 +187,9 @@ static __inline__ int atomic_add_return(int i, atomic_t *v) /* Modern 486+ processor */ __i = i; __asm__ __volatile__( - LOCK_PREFIX "xaddl %0, %1;" - :"=r"(i) - :"m"(v->counter), "0"(i)); + LOCK_PREFIX "xaddl %0, %1" + :"+r" (i), "+m" (v->counter) + : : "memory"); return i + __i; #ifdef CONFIG_M386 -- cgit v1.2.3-70-g09d2 From 9dc452ba2d47f376987a99c0819833af0b46cc3f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 7 Dec 2006 02:14:13 +0100 Subject: [PATCH] x86-64: Fix constraints in atomic_add_return() Following i386 from Duncan Sands Signed-off-by: Andi Kleen --- include/asm-x86_64/atomic.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h index 007e88d6d43..626d3715fbb 100644 --- a/include/asm-x86_64/atomic.h +++ b/include/asm-x86_64/atomic.h @@ -189,9 +189,9 @@ static __inline__ int atomic_add_return(int i, atomic_t *v) { int __i = i; __asm__ __volatile__( - LOCK_PREFIX "xaddl %0, %1;" - :"=r"(i) - :"m"(v->counter), "0"(i)); + LOCK_PREFIX "xaddl %0, %1" + :"+r" (i), "+m" (v->counter) + : : "memory"); return i + __i; } -- cgit v1.2.3-70-g09d2 From d7fb02712818643bab79a6b3cb8270a747d0227b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 7 Dec 2006 02:14:19 +0100 Subject: [PATCH] x86-64: remove remaining pc98 code Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/io_apic.c | 23 ++--------------------- arch/i386/kernel/mpparse.c | 2 -- include/asm-i386/mpspec_def.h | 2 -- 3 files changed, 2 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 56f571c9fc0..7f015a71ab5 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -842,8 +842,7 @@ static int __init find_isa_irq_pin(int irq, int type) if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA || - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 + mp_bus_id_to_type[lbus] == MP_BUS_MCA ) && (mp_irqs[i].mpc_irqtype == type) && (mp_irqs[i].mpc_srcbusirq == irq)) @@ -862,8 +861,7 @@ static int __init find_isa_irq_apic(int irq, int type) if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || mp_bus_id_to_type[lbus] == MP_BUS_EISA || - mp_bus_id_to_type[lbus] == MP_BUS_MCA || - mp_bus_id_to_type[lbus] == MP_BUS_NEC98 + mp_bus_id_to_type[lbus] == MP_BUS_MCA ) && (mp_irqs[i].mpc_irqtype == type) && (mp_irqs[i].mpc_srcbusirq == irq)) @@ -993,12 +991,6 @@ static int EISA_ELCR(unsigned int irq) #define default_MCA_trigger(idx) (1) #define default_MCA_polarity(idx) (0) -/* NEC98 interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_NEC98_trigger(idx) (0) -#define default_NEC98_polarity(idx) (0) - static int __init MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mpc_srcbus; @@ -1033,11 +1025,6 @@ static int __init MPBIOS_polarity(int idx) polarity = default_MCA_polarity(idx); break; } - case MP_BUS_NEC98: /* NEC 98 pin */ - { - polarity = default_NEC98_polarity(idx); - break; - } default: { printk(KERN_WARNING "broken BIOS!!\n"); @@ -1107,11 +1094,6 @@ static int MPBIOS_trigger(int idx) trigger = default_MCA_trigger(idx); break; } - case MP_BUS_NEC98: /* NEC 98 pin */ - { - trigger = default_NEC98_trigger(idx); - break; - } default: { printk(KERN_WARNING "broken BIOS!!\n"); @@ -1173,7 +1155,6 @@ static int pin_2_irq(int idx, int apic, int pin) case MP_BUS_ISA: /* ISA pin */ case MP_BUS_EISA: case MP_BUS_MCA: - case MP_BUS_NEC98: { irq = mp_irqs[idx].mpc_srcbusirq; break; diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 442aaf8c77e..2ce67228dff 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -249,8 +249,6 @@ static void __init MP_bus_info (struct mpc_config_bus *m) mp_current_pci_id++; } else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; - } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) { - mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98; } else { printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); } diff --git a/include/asm-i386/mpspec_def.h b/include/asm-i386/mpspec_def.h index 76feedf85a8..13bafb16e7a 100644 --- a/include/asm-i386/mpspec_def.h +++ b/include/asm-i386/mpspec_def.h @@ -97,7 +97,6 @@ struct mpc_config_bus #define BUSTYPE_TC "TC" #define BUSTYPE_VME "VME" #define BUSTYPE_XPRESS "XPRESS" -#define BUSTYPE_NEC98 "NEC98" struct mpc_config_ioapic { @@ -182,7 +181,6 @@ enum mp_bustype { MP_BUS_EISA, MP_BUS_PCI, MP_BUS_MCA, - MP_BUS_NEC98 }; #endif -- cgit v1.2.3-70-g09d2 From 116780fc04d9f6cd3ceeab0251681f1dfda53367 Mon Sep 17 00:00:00 2001 From: Burman Yan Date: Thu, 7 Dec 2006 02:14:19 +0100 Subject: [PATCH] i386: replace kmalloc+memset with kzalloc Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- arch/i386/kernel/cpu/intel_cacheinfo.c | 11 +++-------- arch/i386/kernel/mca.c | 13 ++++--------- arch/i386/kernel/pci-dma.c | 6 ++---- arch/i386/mach-voyager/voyager_cat.c | 6 ++---- include/asm-i386/thread_info.h | 10 +--------- 5 files changed, 12 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 5c43be47587..80b4c5d421b 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -480,12 +480,10 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) if (num_cache_leaves == 0) return -ENOENT; - cpuid4_info[cpu] = kmalloc( + cpuid4_info[cpu] = kzalloc( sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); if (unlikely(cpuid4_info[cpu] == NULL)) return -ENOMEM; - memset(cpuid4_info[cpu], 0, - sizeof(struct _cpuid4_info) * num_cache_leaves); oldmask = current->cpus_allowed; retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); @@ -658,17 +656,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) return -ENOENT; /* Allocate all required memory */ - cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL); + cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); if (unlikely(cache_kobject[cpu] == NULL)) goto err_out; - memset(cache_kobject[cpu], 0, sizeof(struct kobject)); - index_kobject[cpu] = kmalloc( + index_kobject[cpu] = kzalloc( sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); if (unlikely(index_kobject[cpu] == NULL)) goto err_out; - memset(index_kobject[cpu], 0, - sizeof(struct _index_kobject) * num_cache_leaves); return 0; diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index eb57a851789..b83672b8952 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -283,10 +283,9 @@ static int __init mca_init(void) bus->f.mca_transform_memory = mca_dummy_transform_memory; /* get the motherboard device */ - mca_dev = kmalloc(sizeof(struct mca_device), GFP_KERNEL); + mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL); if(unlikely(!mca_dev)) goto out_nomem; - memset(mca_dev, 0, sizeof(struct mca_device)); /* * We do not expect many MCA interrupts during initialization, @@ -310,11 +309,9 @@ static int __init mca_init(void) mca_dev->slot = MCA_MOTHERBOARD; mca_register_device(MCA_PRIMARY_BUS, mca_dev); - mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC); + mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); if(unlikely(!mca_dev)) goto out_unlock_nomem; - memset(mca_dev, 0, sizeof(struct mca_device)); - /* Put motherboard into video setup mode, read integrated video * POS registers, and turn motherboard setup off. @@ -349,10 +346,9 @@ static int __init mca_init(void) } if(which_scsi) { /* found a scsi card */ - mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC); + mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); if(unlikely(!mca_dev)) goto out_unlock_nomem; - memset(mca_dev, 0, sizeof(struct mca_device)); for(j = 0; j < 8; j++) mca_dev->pos[j] = pos[j]; @@ -378,10 +374,9 @@ static int __init mca_init(void) if(!mca_read_and_store_pos(pos)) continue; - mca_dev = kmalloc(sizeof(struct mca_device), GFP_ATOMIC); + mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC); if(unlikely(!mca_dev)) goto out_unlock_nomem; - memset(mca_dev, 0, sizeof(struct mca_device)); for(j=0; j<8; j++) mca_dev->pos[j]=pos[j]; diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 5c8c6ef1fc5..41af692c158 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -92,14 +92,12 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, if (!mem_base) goto out; - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); if (!dev->dma_mem) goto out; - memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); - dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); if (!dev->dma_mem->bitmap) goto free1_out; - memset(dev->dma_mem->bitmap, 0, bitmap_size); dev->dma_mem->virt_base = mem_base; dev->dma_mem->device_base = device_addr; diff --git a/arch/i386/mach-voyager/voyager_cat.c b/arch/i386/mach-voyager/voyager_cat.c index f50c6c6ad68..943a9473b13 100644 --- a/arch/i386/mach-voyager/voyager_cat.c +++ b/arch/i386/mach-voyager/voyager_cat.c @@ -776,7 +776,7 @@ voyager_cat_init(void) for(asic=0; asic < (*modpp)->num_asics; asic++) { int j; voyager_asic_t *asicp = *asicpp - = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/ + = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL); /*&voyager_asic_storage[asic_count++];*/ voyager_sp_table_t *sp_table; voyager_at_t *asic_table; voyager_jtt_t *jtag_table; @@ -785,7 +785,6 @@ voyager_cat_init(void) printk("**WARNING** kmalloc failure in cat_init\n"); continue; } - memset(asicp, 0, sizeof(voyager_asic_t)); asicpp = &(asicp->next); asicp->asic_location = asic; sp_table = (voyager_sp_table_t *)(eprom_buf + sp_offset); @@ -851,8 +850,7 @@ voyager_cat_init(void) #endif { - struct resource *res = kmalloc(sizeof(struct resource),GFP_KERNEL); - memset(res, 0, sizeof(struct resource)); + struct resource *res = kzalloc(sizeof(struct resource),GFP_KERNEL); res->name = kmalloc(128, GFP_KERNEL); sprintf((char *)res->name, "Voyager %s Quad CPI", cat_module_name(i)); res->start = qic_addr; diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index 54d6d7aea93..46d32ad9208 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -95,15 +95,7 @@ static inline struct thread_info *current_thread_info(void) /* thread information allocation */ #ifdef CONFIG_DEBUG_STACK_USAGE -#define alloc_thread_info(tsk) \ - ({ \ - struct thread_info *ret; \ - \ - ret = kmalloc(THREAD_SIZE, GFP_KERNEL); \ - if (ret) \ - memset(ret, 0, THREAD_SIZE); \ - ret; \ - }) +#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL) #else #define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) #endif -- cgit v1.2.3-70-g09d2 From b65780e123ba9b762276482bbfb52836e4d41fd9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Dec 2006 02:14:19 +0100 Subject: [PATCH] unwinder: move .eh_frame to RODATA The .eh_frame section contents is never written to, so it can as well benefit from CONFIG_DEBUG_RODATA. Diff-ed against firstfloor tree. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- arch/i386/kernel/vmlinux.lds.S | 9 --------- arch/x86_64/kernel/vmlinux.lds.S | 9 --------- include/asm-generic/vmlinux.lds.h | 17 ++++++++++++----- kernel/unwind.c | 2 +- 4 files changed, 13 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 25581e87c60..56e6ad5cb04 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -102,15 +102,6 @@ SECTIONS _edata = .; /* End of data section */ } -#ifdef CONFIG_STACK_UNWIND - . = ALIGN(4); - .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { - __start_unwind = .; - *(.eh_frame) - __end_unwind = .; - } -#endif - . = ALIGN(THREAD_SIZE); /* init_task */ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { *(.data.init_task) diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index d9534e750d4..6a1f8f491e5 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -51,15 +51,6 @@ SECTIONS RODATA -#ifdef CONFIG_STACK_UNWIND - . = ALIGN(8); - .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { - __start_unwind = .; - *(.eh_frame) - __end_unwind = .; - } -#endif - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 9f4747780da..4d4c62d1105 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -119,8 +119,7 @@ *(__ksymtab_strings) \ } \ \ - /* Unwind data binary search table */ \ - EH_FRAME_HDR \ + EH_FRAME \ \ /* Built-in module parameters. */ \ __param : AT(ADDR(__param) - LOAD_OFFSET) { \ @@ -162,15 +161,23 @@ VMLINUX_SYMBOL(__kprobes_text_end) = .; #ifdef CONFIG_STACK_UNWIND - /* Unwind data binary search table */ -#define EH_FRAME_HDR \ +#define EH_FRAME \ + /* Unwind data binary search table */ \ + . = ALIGN(8); \ .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_unwind_hdr) = .; \ *(.eh_frame_hdr) \ VMLINUX_SYMBOL(__end_unwind_hdr) = .; \ + } \ + /* Unwind data */ \ + . = ALIGN(8); \ + .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start_unwind) = .; \ + *(.eh_frame) \ + VMLINUX_SYMBOL(__end_unwind) = .; \ } #else -#define EH_FRAME_HDR +#define EH_FRAME #endif /* DWARF debug sections. diff --git a/kernel/unwind.c b/kernel/unwind.c index 08645aa7c2d..09c26132924 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -19,7 +19,7 @@ #include #include -extern char __start_unwind[], __end_unwind[]; +extern const char __start_unwind[], __end_unwind[]; extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; #define MAX_STACK_DEPTH 8 -- cgit v1.2.3-70-g09d2 From 334c29a64507dda187565dd0db0403de3d70ec8b Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 4 Dec 2006 19:31:51 -0800 Subject: [GENETLINK]: Move command capabilities to flags. This patch moves command capabilities to command flags. Other than being cleaner, saves several bytes. We increment the nlctrl version so as to signal to user space that to not expect the attributes. We will try to be careful not to do this too often ;-> Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/genetlink.h | 6 +++--- net/netlink/genetlink.c | 18 ++++++++---------- 2 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 9049dc65ae5..f7a93770e1b 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -17,6 +17,9 @@ struct genlmsghdr { #define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) #define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 /* * List of reserved static generic netlink identifiers: @@ -58,9 +61,6 @@ enum { CTRL_ATTR_OP_UNSPEC, CTRL_ATTR_OP_ID, CTRL_ATTR_OP_FLAGS, - CTRL_ATTR_OP_POLICY, - CTRL_ATTR_OP_DOIT, - CTRL_ATTR_OP_DUMPIT, __CTRL_ATTR_OP_MAX, }; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index b9b03747c1f..b5df749cba8 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -143,6 +143,13 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops) goto errout; } + if (ops->dumpit) + ops->flags |= GENL_CMD_CAP_DO; + if (ops->doit) + ops->flags |= GENL_CMD_CAP_DUMP; + if (ops->policy) + ops->flags |= GENL_CMD_CAP_HASPOL; + genl_lock(); list_add_tail(&ops->ops_list, &family->ops_list); genl_unlock(); @@ -387,7 +394,7 @@ static void genl_rcv(struct sock *sk, int len) static struct genl_family genl_ctrl = { .id = GENL_ID_CTRL, .name = "nlctrl", - .version = 0x1, + .version = 0x2, .maxattr = CTRL_ATTR_MAX, }; @@ -425,15 +432,6 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); - if (ops->policy) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY); - - if (ops->doit) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT); - - if (ops->dumpit) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT); - nla_nest_end(skb, nest); } -- cgit v1.2.3-70-g09d2 From 7cf4c1a5fd13820d7591179c0b925d739b2be9a7 Mon Sep 17 00:00:00 2001 From: Kazunori MIYAZAWA Date: Sat, 28 Oct 2006 13:21:22 +1000 Subject: [IPSEC]: Add support for AES-XCBC-MAC The glue of xfrm. Signed-off-by: Kazunori MIYAZAWA Signed-off-by: Herbert Xu --- include/linux/pfkeyv2.h | 1 + net/xfrm/xfrm_algo.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h index 0f0b880c428..265bafab649 100644 --- a/include/linux/pfkeyv2.h +++ b/include/linux/pfkeyv2.h @@ -285,6 +285,7 @@ struct sadb_x_sec_ctx { #define SADB_X_AALG_SHA2_384HMAC 6 #define SADB_X_AALG_SHA2_512HMAC 7 #define SADB_X_AALG_RIPEMD160HMAC 8 +#define SADB_X_AALG_AES_XCBC_MAC 9 #define SADB_X_AALG_NULL 251 /* kame */ #define SADB_AALG_MAX 251 diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 5a0dbeb6bbe..6b381fc0383 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -119,6 +119,23 @@ static struct xfrm_algo_desc aalg_list[] = { .sadb_alg_maxbits = 160 } }, +{ + .name = "xcbc(aes)", + + .uinfo = { + .auth = { + .icv_truncbits = 96, + .icv_fullbits = 128, + } + }, + + .desc = { + .sadb_alg_id = SADB_X_AALG_AES_XCBC_MAC, + .sadb_alg_ivlen = 0, + .sadb_alg_minbits = 128, + .sadb_alg_maxbits = 128 + } +}, }; static struct xfrm_algo_desc ealg_list[] = { -- cgit v1.2.3-70-g09d2 From cc44215eaaa5e4032946b962353526ae6c370c0e Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 22 Nov 2006 17:55:00 +1100 Subject: [CRYPTO] api: Remove unused functions This patch removes the following no longer used functions: - api.c: crypto_alg_available() - digest.c: crypto_digest_init() - digest.c: crypto_digest_update() - digest.c: crypto_digest_final() - digest.c: crypto_digest_digest() Signed-off-by: Adrian Bunk Signed-off-by: Herbert Xu --- crypto/api.c | 15 --------------- crypto/digest.c | 48 ------------------------------------------------ include/linux/crypto.h | 22 ---------------------- 3 files changed, 85 deletions(-) (limited to 'include') diff --git a/crypto/api.c b/crypto/api.c index 4fb7fa45cb0..8c446871cd5 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -466,23 +466,8 @@ void crypto_free_tfm(struct crypto_tfm *tfm) kfree(tfm); } -int crypto_alg_available(const char *name, u32 flags) -{ - int ret = 0; - struct crypto_alg *alg = crypto_alg_mod_lookup(name, 0, - CRYPTO_ALG_ASYNC); - - if (!IS_ERR(alg)) { - crypto_mod_put(alg); - ret = 1; - } - - return ret; -} - EXPORT_SYMBOL_GPL(crypto_alloc_tfm); EXPORT_SYMBOL_GPL(crypto_free_tfm); -EXPORT_SYMBOL_GPL(crypto_alg_available); int crypto_has_alg(const char *name, u32 type, u32 mask) { diff --git a/crypto/digest.c b/crypto/digest.c index 0155a94e4b1..8f4593268ce 100644 --- a/crypto/digest.c +++ b/crypto/digest.c @@ -21,54 +21,6 @@ #include "internal.h" #include "scatterwalk.h" -void crypto_digest_init(struct crypto_tfm *tfm) -{ - struct crypto_hash *hash = crypto_hash_cast(tfm); - struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; - - crypto_hash_init(&desc); -} -EXPORT_SYMBOL_GPL(crypto_digest_init); - -void crypto_digest_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg) -{ - struct crypto_hash *hash = crypto_hash_cast(tfm); - struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; - unsigned int nbytes = 0; - unsigned int i; - - for (i = 0; i < nsg; i++) - nbytes += sg[i].length; - - crypto_hash_update(&desc, sg, nbytes); -} -EXPORT_SYMBOL_GPL(crypto_digest_update); - -void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) -{ - struct crypto_hash *hash = crypto_hash_cast(tfm); - struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; - - crypto_hash_final(&desc, out); -} -EXPORT_SYMBOL_GPL(crypto_digest_final); - -void crypto_digest_digest(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg, u8 *out) -{ - struct crypto_hash *hash = crypto_hash_cast(tfm); - struct hash_desc desc = { .tfm = hash, .flags = tfm->crt_flags }; - unsigned int nbytes = 0; - unsigned int i; - - for (i = 0; i < nsg; i++) - nbytes += sg[i].length; - - crypto_hash_digest(&desc, sg, nbytes, out); -} -EXPORT_SYMBOL_GPL(crypto_digest_digest); - static int init(struct hash_desc *desc) { struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 6485e9716b3..4aa9046601d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -241,12 +241,8 @@ int crypto_unregister_alg(struct crypto_alg *alg); * Algorithm query interface. */ #ifdef CONFIG_CRYPTO -int crypto_alg_available(const char *name, u32 flags) - __deprecated_for_modules; int crypto_has_alg(const char *name, u32 type, u32 mask); #else -static int crypto_alg_available(const char *name, u32 flags) - __deprecated_for_modules; static inline int crypto_alg_available(const char *name, u32 flags) { return 0; @@ -707,16 +703,6 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, dst, src); } -void crypto_digest_init(struct crypto_tfm *tfm) __deprecated_for_modules; -void crypto_digest_update(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg) - __deprecated_for_modules; -void crypto_digest_final(struct crypto_tfm *tfm, u8 *out) - __deprecated_for_modules; -void crypto_digest_digest(struct crypto_tfm *tfm, - struct scatterlist *sg, unsigned int nsg, u8 *out) - __deprecated_for_modules; - static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm) { return (struct crypto_hash *)tfm; @@ -729,14 +715,6 @@ static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm) return __crypto_hash_cast(tfm); } -static int crypto_digest_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) __deprecated; -static inline int crypto_digest_setkey(struct crypto_tfm *tfm, - const u8 *key, unsigned int keylen) -{ - return tfm->crt_hash.setkey(crypto_hash_cast(tfm), key, keylen); -} - static inline struct crypto_hash *crypto_alloc_hash(const char *alg_name, u32 type, u32 mask) { -- cgit v1.2.3-70-g09d2 From aec3694b987900de7ab789ea5749d673e0d634c4 Mon Sep 17 00:00:00 2001 From: Rik Snel Date: Sun, 29 Oct 2006 11:02:07 +1100 Subject: [CRYPTO] lib: some common 128-bit block operations, nicely centralized 128bit is a common blocksize in linux kernel cryptography, so it helps to centralize some common operations. The code, while mostly trivial, is based on a header file mode_hdr.h in http://fp.gladman.plus.com/AES/modes.vc8.19-06-06.zip The original copyright (and GPL statement) of the original author, Dr Brian Gladman, is preserved. Signed-off-by: Rik Snel Signed-off-by: Herbert Xu --- include/crypto/b128ops.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 include/crypto/b128ops.h (limited to 'include') diff --git a/include/crypto/b128ops.h b/include/crypto/b128ops.h new file mode 100644 index 00000000000..0b8e6bc5530 --- /dev/null +++ b/include/crypto/b128ops.h @@ -0,0 +1,80 @@ +/* b128ops.h - common 128-bit block operations + * + * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. + * Copyright (c) 2006, Rik Snel + * + * Based on Dr Brian Gladman's (GPL'd) work published at + * http://fp.gladman.plus.com/cryptography_technology/index.htm + * See the original copyright notice below. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 13/06/2006 +*/ + +#ifndef _CRYPTO_B128OPS_H +#define _CRYPTO_B128OPS_H + +#include + +typedef struct { + u64 a, b; +} u128; + +typedef struct { + __be64 a, b; +} be128; + +typedef struct { + __le64 b, a; +} le128; + +static inline void u128_xor(u128 *r, const u128 *p, const u128 *q) +{ + r->a = p->a ^ q->a; + r->b = p->b ^ q->b; +} + +static inline void be128_xor(be128 *r, const be128 *p, const be128 *q) +{ + u128_xor((u128 *)r, (u128 *)p, (u128 *)q); +} + +static inline void le128_xor(le128 *r, const le128 *p, const le128 *q) +{ + u128_xor((u128 *)r, (u128 *)p, (u128 *)q); +} + +#endif /* _CRYPTO_B128OPS_H */ -- cgit v1.2.3-70-g09d2 From c494e0705d670c51ac736c8c4d92750705fe3187 Mon Sep 17 00:00:00 2001 From: Rik Snel Date: Wed, 29 Nov 2006 18:59:44 +1100 Subject: [CRYPTO] lib: table driven multiplications in GF(2^128) A lot of cypher modes need multiplications in GF(2^128). LRW, ABL, GCM... I use functions from this library in my LRW implementation and I will also use them in my ABL (Arbitrary Block Length, an unencumbered (correct me if I am wrong, wide block cipher mode). Elements of GF(2^128) must be presented as u128 *, it encourages automatic and proper alignment. The library contains support for two different representations of GF(2^128), see the comment in gf128mul.h. There different levels of optimization (memory/speed tradeoff). The code is based on work by Dr Brian Gladman. Notable changes: - deletion of two optimization modes - change from u32 to u64 for faster handling on 64bit machines - support for 'bbe' representation in addition to the, already implemented, 'lle' representation. - move 'inline void' functions from header to 'static void' in the source file - update to use the linux coding style conventions The original can be found at: http://fp.gladman.plus.com/AES/modes.vc8.19-06-06.zip The copyright (and GPL statement) of the original author is preserved. Signed-off-by: Rik Snel Signed-off-by: Herbert Xu --- crypto/Kconfig | 10 + crypto/Makefile | 1 + crypto/gf128mul.c | 466 ++++++++++++++++++++++++++++++++++++++++++++++ include/crypto/gf128mul.h | 198 ++++++++++++++++++++ 4 files changed, 675 insertions(+) create mode 100644 crypto/gf128mul.c create mode 100644 include/crypto/gf128mul.h (limited to 'include') diff --git a/crypto/Kconfig b/crypto/Kconfig index 4495e46660b..f941ffb2a08 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -139,6 +139,16 @@ config CRYPTO_TGR192 See also: . +config CRYPTO_GF128MUL + tristate "GF(2^128) multiplication functions (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Efficient table driven implementation of multiplications in the + field GF(2^128). This is needed by some cypher modes. This + option will be selected automatically if you select such a + cipher mode. Only select this option by hand if you expect to load + an external module that requires these functions. + config CRYPTO_ECB tristate "ECB support" select CRYPTO_BLKCIPHER diff --git a/crypto/Makefile b/crypto/Makefile index aba9625fb42..0ab9ff045e9 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256.o obj-$(CONFIG_CRYPTO_SHA512) += sha512.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o +obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o obj-$(CONFIG_CRYPTO_CBC) += cbc.o obj-$(CONFIG_CRYPTO_DES) += des.o diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c new file mode 100644 index 00000000000..0a2aadfa1d8 --- /dev/null +++ b/crypto/gf128mul.c @@ -0,0 +1,466 @@ +/* gf128mul.c - GF(2^128) multiplication functions + * + * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. + * Copyright (c) 2006, Rik Snel + * + * Based on Dr Brian Gladman's (GPL'd) work published at + * http://fp.gladman.plus.com/cryptography_technology/index.htm + * See the original copyright notice below. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue 31/01/2006 + + This file provides fast multiplication in GF(128) as required by several + cryptographic authentication modes +*/ + +#include +#include +#include +#include + +#define gf128mul_dat(q) { \ + q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\ + q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\ + q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\ + q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\ + q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\ + q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\ + q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\ + q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\ + q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\ + q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\ + q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\ + q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\ + q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\ + q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\ + q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\ + q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\ + q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\ + q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\ + q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\ + q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\ + q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\ + q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\ + q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\ + q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\ + q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\ + q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\ + q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\ + q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\ + q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\ + q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\ + q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\ + q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \ +} + +/* Given the value i in 0..255 as the byte overflow when a field element + in GHASH is multipled by x^8, this function will return the values that + are generated in the lo 16-bit word of the field value by applying the + modular polynomial. The values lo_byte and hi_byte are returned via the + macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into + memory as required by a suitable definition of this macro operating on + the table above +*/ + +#define xx(p, q) 0x##p##q + +#define xda_bbe(i) ( \ + (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \ + (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \ + (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \ + (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \ +) + +#define xda_lle(i) ( \ + (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \ + (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \ + (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \ + (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \ +) + +static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle); +static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe); + +/* These functions multiply a field element by x, by x^4 and by x^8 + * in the polynomial field representation. It uses 32-bit word operations + * to gain speed but compensates for machine endianess and hence works + * correctly on both styles of machine. + */ + +static void gf128mul_x_lle(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_lle[(b << 7) & 0xff]; + + r->b = cpu_to_be64((b >> 1) | (a << 63)); + r->a = cpu_to_be64((a >> 1) ^ (_tt << 48)); +} + +static void gf128mul_x_bbe(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_bbe[a >> 63]; + + r->a = cpu_to_be64((a << 1) | (b >> 63)); + r->b = cpu_to_be64((b << 1) ^ _tt); +} + +static void gf128mul_x8_lle(be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_lle[b & 0xff]; + + x->b = cpu_to_be64((b >> 8) | (a << 56)); + x->a = cpu_to_be64((a >> 8) ^ (_tt << 48)); +} + +static void gf128mul_x8_bbe(be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + u64 _tt = gf128mul_table_bbe[a >> 56]; + + x->a = cpu_to_be64((a << 8) | (b >> 56)); + x->b = cpu_to_be64((b << 8) ^ _tt); +} + +void gf128mul_lle(be128 *r, const be128 *b) +{ + be128 p[8]; + int i; + + p[0] = *r; + for (i = 0; i < 7; ++i) + gf128mul_x_lle(&p[i + 1], &p[i]); + + memset(r, 0, sizeof(r)); + for (i = 0;;) { + u8 ch = ((u8 *)b)[15 - i]; + + if (ch & 0x80) + be128_xor(r, r, &p[0]); + if (ch & 0x40) + be128_xor(r, r, &p[1]); + if (ch & 0x20) + be128_xor(r, r, &p[2]); + if (ch & 0x10) + be128_xor(r, r, &p[3]); + if (ch & 0x08) + be128_xor(r, r, &p[4]); + if (ch & 0x04) + be128_xor(r, r, &p[5]); + if (ch & 0x02) + be128_xor(r, r, &p[6]); + if (ch & 0x01) + be128_xor(r, r, &p[7]); + + if (++i >= 16) + break; + + gf128mul_x8_lle(r); + } +} +EXPORT_SYMBOL(gf128mul_lle); + +void gf128mul_bbe(be128 *r, const be128 *b) +{ + be128 p[8]; + int i; + + p[0] = *r; + for (i = 0; i < 7; ++i) + gf128mul_x_bbe(&p[i + 1], &p[i]); + + memset(r, 0, sizeof(r)); + for (i = 0;;) { + u8 ch = ((u8 *)b)[i]; + + if (ch & 0x80) + be128_xor(r, r, &p[7]); + if (ch & 0x40) + be128_xor(r, r, &p[6]); + if (ch & 0x20) + be128_xor(r, r, &p[5]); + if (ch & 0x10) + be128_xor(r, r, &p[4]); + if (ch & 0x08) + be128_xor(r, r, &p[3]); + if (ch & 0x04) + be128_xor(r, r, &p[2]); + if (ch & 0x02) + be128_xor(r, r, &p[1]); + if (ch & 0x01) + be128_xor(r, r, &p[0]); + + if (++i >= 16) + break; + + gf128mul_x8_bbe(r); + } +} +EXPORT_SYMBOL(gf128mul_bbe); + +/* This version uses 64k bytes of table space. + A 16 byte buffer has to be multiplied by a 16 byte key + value in GF(128). If we consider a GF(128) value in + the buffer's lowest byte, we can construct a table of + the 256 16 byte values that result from the 256 values + of this byte. This requires 4096 bytes. But we also + need tables for each of the 16 higher bytes in the + buffer as well, which makes 64 kbytes in total. +*/ +/* additional explanation + * t[0][BYTE] contains g*BYTE + * t[1][BYTE] contains g*x^8*BYTE + * .. + * t[15][BYTE] contains g*x^120*BYTE */ +struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g) +{ + struct gf128mul_64k *t; + int i, j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + for (i = 0; i < 16; i++) { + t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL); + if (!t->t[i]) { + gf128mul_free_64k(t); + t = NULL; + goto out; + } + } + + t->t[0]->t[128] = *g; + for (j = 64; j > 0; j >>= 1) + gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]); + + for (i = 0;;) { + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[i]->t[j + k], + &t->t[i]->t[j], &t->t[i]->t[k]); + + if (++i >= 16) + break; + + for (j = 128; j > 0; j >>= 1) { + t->t[i]->t[j] = t->t[i - 1]->t[j]; + gf128mul_x8_lle(&t->t[i]->t[j]); + } + } + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_64k_lle); + +struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g) +{ + struct gf128mul_64k *t; + int i, j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + for (i = 0; i < 16; i++) { + t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL); + if (!t->t[i]) { + gf128mul_free_64k(t); + t = NULL; + goto out; + } + } + + t->t[0]->t[1] = *g; + for (j = 1; j <= 64; j <<= 1) + gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]); + + for (i = 0;;) { + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[i]->t[j + k], + &t->t[i]->t[j], &t->t[i]->t[k]); + + if (++i >= 16) + break; + + for (j = 128; j > 0; j >>= 1) { + t->t[i]->t[j] = t->t[i - 1]->t[j]; + gf128mul_x8_bbe(&t->t[i]->t[j]); + } + } + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_64k_bbe); + +void gf128mul_free_64k(struct gf128mul_64k *t) +{ + int i; + + for (i = 0; i < 16; i++) + kfree(t->t[i]); + kfree(t); +} +EXPORT_SYMBOL(gf128mul_free_64k); + +void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i; + + *r = t->t[0]->t[ap[0]]; + for (i = 1; i < 16; ++i) + be128_xor(r, r, &t->t[i]->t[ap[i]]); + *a = *r; +} +EXPORT_SYMBOL(gf128mul_64k_lle); + +void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i; + + *r = t->t[0]->t[ap[15]]; + for (i = 1; i < 16; ++i) + be128_xor(r, r, &t->t[i]->t[ap[15 - i]]); + *a = *r; +} +EXPORT_SYMBOL(gf128mul_64k_bbe); + +/* This version uses 4k bytes of table space. + A 16 byte buffer has to be multiplied by a 16 byte key + value in GF(128). If we consider a GF(128) value in a + single byte, we can construct a table of the 256 16 byte + values that result from the 256 values of this byte. + This requires 4096 bytes. If we take the highest byte in + the buffer and use this table to get the result, we then + have to multiply by x^120 to get the final value. For the + next highest byte the result has to be multiplied by x^112 + and so on. But we can do this by accumulating the result + in an accumulator starting with the result for the top + byte. We repeatedly multiply the accumulator value by + x^8 and then add in (i.e. xor) the 16 bytes of the next + lower byte in the buffer, stopping when we reach the + lowest byte. This requires a 4096 byte table. +*/ +struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g) +{ + struct gf128mul_4k *t; + int j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + t->t[128] = *g; + for (j = 64; j > 0; j >>= 1) + gf128mul_x_lle(&t->t[j], &t->t[j+j]); + + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_4k_lle); + +struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g) +{ + struct gf128mul_4k *t; + int j, k; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + goto out; + + t->t[1] = *g; + for (j = 1; j <= 64; j <<= 1) + gf128mul_x_bbe(&t->t[j + j], &t->t[j]); + + for (j = 2; j < 256; j += j) + for (k = 1; k < j; ++k) + be128_xor(&t->t[j + k], &t->t[j], &t->t[k]); + +out: + return t; +} +EXPORT_SYMBOL(gf128mul_init_4k_bbe); + +void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i = 15; + + *r = t->t[ap[15]]; + while (i--) { + gf128mul_x8_lle(r); + be128_xor(r, r, &t->t[ap[i]]); + } + *a = *r; +} +EXPORT_SYMBOL(gf128mul_4k_lle); + +void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t) +{ + u8 *ap = (u8 *)a; + be128 r[1]; + int i = 0; + + *r = t->t[ap[0]]; + while (++i < 16) { + gf128mul_x8_bbe(r); + be128_xor(r, r, &t->t[ap[i]]); + } + *a = *r; +} +EXPORT_SYMBOL(gf128mul_4k_bbe); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)"); diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h new file mode 100644 index 00000000000..4fd31520244 --- /dev/null +++ b/include/crypto/gf128mul.h @@ -0,0 +1,198 @@ +/* gf128mul.h - GF(2^128) multiplication functions + * + * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. + * Copyright (c) 2006 Rik Snel + * + * Based on Dr Brian Gladman's (GPL'd) work published at + * http://fp.gladman.plus.com/cryptography_technology/index.htm + * See the original copyright notice below. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ +/* + --------------------------------------------------------------------------- + Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The free distribution and use of this software in both source and binary + form is allowed (with or without changes) provided that: + + 1. distributions of this source code include the above copyright + notice, this list of conditions and the following disclaimer; + + 2. distributions in binary form include the above copyright + notice, this list of conditions and the following disclaimer + in the documentation and/or other associated materials; + + 3. the copyright holder's name is not used to endorse products + built using this software without specific written permission. + + ALTERNATIVELY, provided that this notice is retained in full, this product + may be distributed under the terms of the GNU General Public License (GPL), + in which case the provisions of the GPL apply INSTEAD OF those given above. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 31/01/2006 + + An implementation of field multiplication in Galois Field GF(128) +*/ + +#ifndef _CRYPTO_GF128MUL_H +#define _CRYPTO_GF128MUL_H + +#include +#include + +/* Comment by Rik: + * + * For some background on GF(2^128) see for example: http://- + * csrc.nist.gov/CryptoToolkit/modes/proposedmodes/gcm/gcm-revised-spec.pdf + * + * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can + * be mapped to computer memory in a variety of ways. Let's examine + * three common cases. + * + * Take a look at the 16 binary octets below in memory order. The msb's + * are left and the lsb's are right. char b[16] is an array and b[0] is + * the first octet. + * + * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 + * b[0] b[1] b[2] b[3] b[13] b[14] b[15] + * + * Every bit is a coefficient of some power of X. We can store the bits + * in every byte in little-endian order and the bytes themselves also in + * little endian order. I will call this lle (little-little-endian). + * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks + * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. + * This format was originally implemented in gf128mul and is used + * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). + * + * Another convention says: store the bits in bigendian order and the + * bytes also. This is bbe (big-big-endian). Now the buffer above + * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, + * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe + * is partly implemented. + * + * Both of the above formats are easy to implement on big-endian + * machines. + * + * EME (which is patent encumbered) uses the ble format (bits are stored + * in big endian order and the bytes in little endian). The above buffer + * represents X^7 in this case and the primitive polynomial is b[0] = 0x87. + * + * The common machine word-size is smaller than 128 bits, so to make + * an efficient implementation we must split into machine word sizes. + * This file uses one 32bit for the moment. Machine endianness comes into + * play. The lle format in relation to machine endianness is discussed + * below by the original author of gf128mul Dr Brian Gladman. + * + * Let's look at the bbe and ble format on a little endian machine. + * + * bbe on a little endian machine u32 x[4]: + * + * MS x[0] LS MS x[1] LS + * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 + * + * MS x[2] LS MS x[3] LS + * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 + * + * ble on a little endian machine + * + * MS x[0] LS MS x[1] LS + * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 + * + * MS x[2] LS MS x[3] LS + * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 + * + * Multiplications in GF(2^128) are mostly bit-shifts, so you see why + * ble (and lbe also) are easier to implement on a little-endian + * machine than on a big-endian machine. The converse holds for bbe + * and lle. + * + * Note: to have good alignment, it seems to me that it is sufficient + * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize + * machines this will automatically aligned to wordsize and on a 64-bit + * machine also. + */ +/* Multiply a GF128 field element by x. Field elements are held in arrays + of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower + indexed bits placed in the more numerically significant bit positions + within bytes. + + On little endian machines the bit indexes translate into the bit + positions within four 32-bit words in the following way + + MS x[0] LS MS x[1] LS + ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 + + MS x[2] LS MS x[3] LS + ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 + + On big endian machines the bit indexes translate into the bit + positions within four 32-bit words in the following way + + MS x[0] LS MS x[1] LS + ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 + + MS x[2] LS MS x[3] LS + ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls + 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 +*/ + +/* A slow generic version of gf_mul, implemented for lle and bbe + * It multiplies a and b and puts the result in a */ +void gf128mul_lle(be128 *a, const be128 *b); + +void gf128mul_bbe(be128 *a, const be128 *b); + + +/* 4k table optimization */ + +struct gf128mul_4k { + be128 t[256]; +}; + +struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); +struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); +void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t); +void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); + +static inline void gf128mul_free_4k(struct gf128mul_4k *t) +{ + kfree(t); +} + + +/* 64k table optimization, implemented for lle and bbe */ + +struct gf128mul_64k { + struct gf128mul_4k *t[16]; +}; + +/* first initialize with the constant factor with which you + * want to multiply and then call gf128_64k_lle with the other + * factor in the first argument, the table in the second and a + * scratch register in the third. Afterwards *a = *r. */ +struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g); +struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); +void gf128mul_free_64k(struct gf128mul_64k *t); +void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t); +void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); + +#endif /* _CRYPTO_GF128MUL_H */ -- cgit v1.2.3-70-g09d2 From 9ee0779e994c6916863045297b831212e285da3b Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Tue, 5 Dec 2006 13:44:31 -0800 Subject: [NETFILTER]: nf_conntrack: fix warning in PPTP helper Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nf_conntrack_pptp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h index fb049ec11ff..9d8144a488c 100644 --- a/include/linux/netfilter/nf_conntrack_pptp.h +++ b/include/linux/netfilter/nf_conntrack_pptp.h @@ -2,6 +2,8 @@ #ifndef _NF_CONNTRACK_PPTP_H #define _NF_CONNTRACK_PPTP_H +#include + /* state of the control session */ enum pptp_ctrlsess_state { PPTP_SESSION_NONE, /* no session present */ @@ -295,7 +297,6 @@ union pptp_ctrl_union { /* crap needed for nf_conntrack_compat.h */ struct nf_conn; struct nf_conntrack_expect; -enum ip_conntrack_info; extern int (*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb, -- cgit v1.2.3-70-g09d2 From 95b99a670df31ca5271f503f378e5cac3aee8f5e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 6 Dec 2006 20:10:07 -0800 Subject: [IRDA] irlan: Fix compile warning when CONFIG_PROC_FS=n include/net/irda/irlan_filter.h:31: warning: 'struct seq_file' declared inside parameter list include/net/irda/irlan_filter.h:31: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/net/irda/irlan_filter.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/irda/irlan_filter.h b/include/net/irda/irlan_filter.h index 492dedaa8ac..1720539ac2c 100644 --- a/include/net/irda/irlan_filter.h +++ b/include/net/irda/irlan_filter.h @@ -28,6 +28,8 @@ void irlan_check_command_param(struct irlan_cb *self, char *param, char *value); void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb); +#ifdef CONFIG_PROC_FS void irlan_print_filter(struct seq_file *seq, int filter_type); +#endif #endif /* IRLAN_FILTER_H */ -- cgit v1.2.3-70-g09d2 From 161a09e737f0761ca064ee6a907313402f7a54b6 Mon Sep 17 00:00:00 2001 From: Joy Latten Date: Mon, 27 Nov 2006 13:11:54 -0600 Subject: audit: Add auditing to ipsec An audit message occurs when an ipsec SA or ipsec policy is created/deleted. Signed-off-by: Joy Latten Signed-off-by: James Morris Signed-off-by: David S. Miller --- include/linux/audit.h | 6 +++ include/net/xfrm.h | 19 +++++--- kernel/auditsc.c | 6 ++- net/key/af_key.c | 27 +++++++++-- net/xfrm/xfrm_policy.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++- net/xfrm/xfrm_state.c | 17 ++++++- net/xfrm/xfrm_user.c | 33 ++++++++++++-- 7 files changed, 213 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index b2ca666d999..0e07db6cc0d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -101,6 +101,10 @@ #define AUDIT_MAC_CIPSOV4_DEL 1408 /* NetLabel: del CIPSOv4 DOI entry */ #define AUDIT_MAC_MAP_ADD 1409 /* NetLabel: add LSM domain mapping */ #define AUDIT_MAC_MAP_DEL 1410 /* NetLabel: del LSM domain mapping */ +#define AUDIT_MAC_IPSEC_ADDSA 1411 /* Add a XFRM state */ +#define AUDIT_MAC_IPSEC_DELSA 1412 /* Delete a XFRM state */ +#define AUDIT_MAC_IPSEC_ADDSPD 1413 /* Add a XFRM policy */ +#define AUDIT_MAC_IPSEC_DELSPD 1414 /* Delete a XFRM policy */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 @@ -377,6 +381,7 @@ extern void auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); extern uid_t audit_get_loginuid(struct audit_context *ctx); +extern void audit_log_task_context(struct audit_buffer *ab); extern int __audit_ipc_obj(struct kern_ipc_perm *ipcp); extern int __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode); extern int audit_bprm(struct linux_binprm *bprm); @@ -449,6 +454,7 @@ extern int audit_n_rules; #define audit_inode_update(i) do { ; } while (0) #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) +#define audit_log_task_context(b) do { ; } while (0) #define audit_ipc_obj(i) ({ 0; }) #define audit_ipc_set_perm(q,u,g,m) ({ 0; }) #define audit_bprm(p) ({ 0; }) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 15ec19dcf9c..f699cdcab40 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -392,6 +392,15 @@ extern int xfrm_unregister_km(struct xfrm_mgr *km); extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; +/* Audit Information */ +struct xfrm_audit +{ + uid_t loginuid; + u32 secid; +}; +void xfrm_audit_log(uid_t auid, u32 secid, int type, int result, + struct xfrm_policy *xp, struct xfrm_state *x); + static inline void xfrm_pol_hold(struct xfrm_policy *policy) { if (likely(policy != NULL)) @@ -906,7 +915,7 @@ static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **s #endif extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); -extern void xfrm_state_flush(u8 proto); +extern void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info); extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq); extern void xfrm_replay_notify(struct xfrm_state *x, int event); @@ -959,13 +968,13 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete); struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete); -void xfrm_policy_flush(u8 type); +void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); void xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi); -struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, +struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, + xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); -extern void xfrm_policy_flush(u8 type); +extern void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ab97e510123..40722e26de9 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -731,7 +731,7 @@ static inline void audit_free_context(struct audit_context *context) printk(KERN_ERR "audit: freed %d contexts\n", count); } -static void audit_log_task_context(struct audit_buffer *ab) +void audit_log_task_context(struct audit_buffer *ab) { char *ctx = NULL; ssize_t len = 0; @@ -760,6 +760,8 @@ error_path: return; } +EXPORT_SYMBOL(audit_log_task_context); + static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) { char name[sizeof(tsk->comm)]; @@ -1488,6 +1490,8 @@ uid_t audit_get_loginuid(struct audit_context *ctx) return ctx ? ctx->loginuid : -1; } +EXPORT_SYMBOL(audit_get_loginuid); + /** * __audit_mq_open - record audit data for a POSIX MQ open * @oflag: open flag diff --git a/net/key/af_key.c b/net/key/af_key.c index 0e1dbfbb9b1..5dd5094659a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -1420,6 +1421,9 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, else err = xfrm_state_update(x); + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); @@ -1460,8 +1464,12 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h err = -EPERM; goto out; } - + err = xfrm_state_delete(x); + + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + if (err < 0) goto out; @@ -1637,12 +1645,15 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd { unsigned proto; struct km_event c; + struct xfrm_audit audit_info; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; - xfrm_state_flush(proto); + audit_info.loginuid = audit_get_loginuid(current->audit_context); + audit_info.secid = 0; + xfrm_state_flush(proto, &audit_info); c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; @@ -2205,6 +2216,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_ADDSPD, err ? 0 : 1, xp, NULL); + if (err) goto out; @@ -2282,6 +2296,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); security_xfrm_policy_free(&tmp); + + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL); + if (xp == NULL) return -ENOENT; @@ -2416,8 +2434,11 @@ static int key_notify_policy_flush(struct km_event *c) static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs) { struct km_event c; + struct xfrm_audit audit_info; - xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN); + audit_info.loginuid = audit_get_loginuid(current->audit_context); + audit_info.secid = 0; + xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4f04222698d..47c13649bac 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "xfrm_hash.h" @@ -804,7 +805,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(u8 type) +void xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) { int dir; @@ -824,6 +825,9 @@ void xfrm_policy_flush(u8 type) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); + xfrm_audit_log(audit_info->loginuid, audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 1, pol, NULL); + xfrm_policy_kill(pol); killed++; @@ -842,6 +846,11 @@ void xfrm_policy_flush(u8 type) hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSPD, 1, + pol, NULL); + xfrm_policy_kill(pol); killed++; @@ -1977,6 +1986,115 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, EXPORT_SYMBOL(xfrm_bundle_ok); +/* Audit addition and deletion of SAs and ipsec policy */ + +void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, + struct xfrm_policy *xp, struct xfrm_state *x) +{ + + char *secctx; + u32 secctx_len; + struct xfrm_sec_ctx *sctx = NULL; + struct audit_buffer *audit_buf; + int family; + extern int audit_enabled; + + if (audit_enabled == 0) + return; + + audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type); + if (audit_buf == NULL) + return; + + switch(type) { + case AUDIT_MAC_IPSEC_ADDSA: + audit_log_format(audit_buf, "SAD add: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_DELSA: + audit_log_format(audit_buf, "SAD delete: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_ADDSPD: + audit_log_format(audit_buf, "SPD add: auid=%u", auid); + break; + case AUDIT_MAC_IPSEC_DELSPD: + audit_log_format(audit_buf, "SPD delete: auid=%u", auid); + break; + default: + return; + } + + if (sid != 0 && + security_secid_to_secctx(sid, &secctx, &secctx_len) == 0) + audit_log_format(audit_buf, " subj=%s", secctx); + else + audit_log_task_context(audit_buf); + + if (xp) { + family = xp->selector.family; + if (xp->security) + sctx = xp->security; + } else { + family = x->props.family; + if (x->security) + sctx = x->security; + } + + if (sctx) + audit_log_format(audit_buf, + " sec_alg=%u sec_doi=%u sec_obj=%s", + sctx->ctx_alg, sctx->ctx_doi, sctx->ctx_str); + + switch(family) { + case AF_INET: + { + struct in_addr saddr, daddr; + if (xp) { + saddr.s_addr = xp->selector.saddr.a4; + daddr.s_addr = xp->selector.daddr.a4; + } else { + saddr.s_addr = x->props.saddr.a4; + daddr.s_addr = x->id.daddr.a4; + } + audit_log_format(audit_buf, + " src=%u.%u.%u.%u dst=%u.%u.%u.%u", + NIPQUAD(saddr), NIPQUAD(daddr)); + } + break; + case AF_INET6: + { + struct in6_addr saddr6, daddr6; + if (xp) { + memcpy(&saddr6, xp->selector.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, xp->selector.daddr.a6, + sizeof(struct in6_addr)); + } else { + memcpy(&saddr6, x->props.saddr.a6, + sizeof(struct in6_addr)); + memcpy(&daddr6, x->id.daddr.a6, + sizeof(struct in6_addr)); + } + audit_log_format(audit_buf, + " src=" NIP6_FMT "dst=" NIP6_FMT, + NIP6(saddr6), NIP6(daddr6)); + } + break; + } + + if (x) + audit_log_format(audit_buf, " spi=%lu(0x%lx) protocol=%s", + (unsigned long)ntohl(x->id.spi), + (unsigned long)ntohl(x->id.spi), + x->id.proto == IPPROTO_AH ? "AH" : + (x->id.proto == IPPROTO_ESP ? + "ESP" : "IPCOMP")); + + audit_log_format(audit_buf, " res=%u", result); + audit_log_end(audit_buf); +} + +EXPORT_SYMBOL(xfrm_audit_log); + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { int err = 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a14c88bf17f..d5d3a6f1f60 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "xfrm_hash.h" @@ -238,6 +239,7 @@ static void xfrm_timer_handler(unsigned long data) unsigned long now = (unsigned long)xtime.tv_sec; long next = LONG_MAX; int warn = 0; + int err = 0; spin_lock(&x->lock); if (x->km.state == XFRM_STATE_DEAD) @@ -295,9 +297,14 @@ expired: next = 2; goto resched; } - if (!__xfrm_state_delete(x) && x->id.spi) + + err = __xfrm_state_delete(x); + if (!err && x->id.spi) km_state_expired(x, 1, 0); + xfrm_audit_log(audit_get_loginuid(current->audit_context), 0, + AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + out: spin_unlock(&x->lock); } @@ -384,9 +391,10 @@ int xfrm_state_delete(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete); -void xfrm_state_flush(u8 proto) +void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) { int i; + int err = 0; spin_lock_bh(&xfrm_state_lock); for (i = 0; i <= xfrm_state_hmask; i++) { @@ -400,6 +408,11 @@ restart: spin_unlock_bh(&xfrm_state_lock); xfrm_state_delete(x); + err = xfrm_state_delete(x); + xfrm_audit_log(audit_info->loginuid, + audit_info->secid, + AUDIT_MAC_IPSEC_DELSA, + err ? 0 : 1, NULL, x); xfrm_state_put(x); spin_lock_bh(&xfrm_state_lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 311205ffa77..e5372b11fc8 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -31,6 +31,7 @@ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include #endif +#include static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) { @@ -454,6 +455,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) else err = xfrm_state_update(x); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x); + if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); @@ -523,6 +527,10 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) } err = xfrm_state_delete(x); + + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x); + if (err < 0) goto out; @@ -1030,6 +1038,9 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSPD, err ? 0 : 1, xp, NULL); + if (err) { security_xfrm_policy_free(xp); kfree(xp); @@ -1257,6 +1268,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); security_xfrm_policy_free(&tmp); } + if (delete) + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSPD, (xp) ? 1 : 0, xp, NULL); + if (xp == NULL) return -ENOENT; @@ -1291,8 +1306,11 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma { struct km_event c; struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); + struct xfrm_audit audit_info; - xfrm_state_flush(p->proto); + audit_info.loginuid = NETLINK_CB(skb).loginuid; + audit_info.secid = NETLINK_CB(skb).sid; + xfrm_state_flush(p->proto, &audit_info); c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1442,12 +1460,15 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **x struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; + struct xfrm_audit audit_info; err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); if (err) return err; - xfrm_policy_flush(type); + audit_info.loginuid = NETLINK_CB(skb).loginuid; + audit_info.secid = NETLINK_CB(skb).sid; + xfrm_policy_flush(type, &audit_info); c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1502,6 +1523,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * err = 0; if (up->hard) { xfrm_policy_delete(xp, p->dir); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSPD, 1, xp, NULL); + } else { // reset the timers here? printk("Dont know what to do with soft policy expire\n"); @@ -1533,8 +1557,11 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void ** goto out; km_state_expired(x, ue->hard, current->pid); - if (ue->hard) + if (ue->hard) { __xfrm_state_delete(x); + xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, + AUDIT_MAC_IPSEC_DELSA, 1, NULL, x); + } out: spin_unlock_bh(&x->lock); xfrm_state_put(x); -- cgit v1.2.3-70-g09d2 From c9204d9ca79baac564b49d36d0228a69d7ded084 Mon Sep 17 00:00:00 2001 From: Joy Latten Date: Thu, 30 Nov 2006 15:50:43 -0600 Subject: audit: disable ipsec auditing when CONFIG_AUDITSYSCALL=n Disables auditing in ipsec when CONFIG_AUDITSYSCALL is disabled in the kernel. Also includes a bug fix for xfrm_state.c as a result of original ipsec audit patch. Signed-off-by: Joy Latten Signed-off-by: James Morris Signed-off-by: David S. Miller --- include/net/xfrm.h | 7 ++++++- net/xfrm/xfrm_policy.c | 2 ++ net/xfrm/xfrm_state.c | 1 - 3 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f699cdcab40..e4765413cf8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -398,8 +398,13 @@ struct xfrm_audit uid_t loginuid; u32 secid; }; -void xfrm_audit_log(uid_t auid, u32 secid, int type, int result, + +#ifdef CONFIG_AUDITSYSCALL +extern void xfrm_audit_log(uid_t auid, u32 secid, int type, int result, struct xfrm_policy *xp, struct xfrm_state *x); +#else +#define xfrm_audit_log(a,s,t,r,p,x) do { ; } while (0) +#endif /* CONFIG_AUDITSYSCALL */ static inline void xfrm_pol_hold(struct xfrm_policy *policy) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 47c13649bac..140bb9b384a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1986,6 +1986,7 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, EXPORT_SYMBOL(xfrm_bundle_ok); +#ifdef CONFIG_AUDITSYSCALL /* Audit addition and deletion of SAs and ipsec policy */ void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, @@ -2094,6 +2095,7 @@ void xfrm_audit_log(uid_t auid, u32 sid, int type, int result, } EXPORT_SYMBOL(xfrm_audit_log); +#endif /* CONFIG_AUDITSYSCALL */ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d5d3a6f1f60..fdb08d9f34a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -407,7 +407,6 @@ restart: xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_delete(x); err = xfrm_state_delete(x); xfrm_audit_log(audit_info->loginuid, audit_info->secid, -- cgit v1.2.3-70-g09d2 From 676917d488212303ce4a7d033815ce8799201010 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 7 Dec 2006 00:20:22 -0800 Subject: [TG3]: Add 5787F device ID. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 4 +++- include/linux/pci_ids.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index e6561c1d85d..5514828b792 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -192,6 +192,7 @@ static struct pci_device_id tg3_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787F)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715)}, @@ -10859,7 +10860,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->pdev->device == PCI_DEVICE_ID_TIGON3_5705F)) || (tp->pdev->vendor == PCI_VENDOR_ID_BROADCOM && (tp->pdev->device == PCI_DEVICE_ID_TIGON3_5751F || - tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F)) || + tp->pdev->device == PCI_DEVICE_ID_TIGON3_5753F || + tp->pdev->device == PCI_DEVICE_ID_TIGON3_5787F)) || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) tp->tg3_flags |= TG3_FLAG_10_100_ONLY; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c09da1e30c5..edddcce68b9 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1931,6 +1931,7 @@ #define PCI_DEVICE_ID_TIGON3_5750M 0x167c #define PCI_DEVICE_ID_TIGON3_5751M 0x167d #define PCI_DEVICE_ID_TIGON3_5751F 0x167e +#define PCI_DEVICE_ID_TIGON3_5787F 0x167f #define PCI_DEVICE_ID_TIGON3_5787M 0x1693 #define PCI_DEVICE_ID_TIGON3_5782 0x1696 #define PCI_DEVICE_ID_TIGON3_5786 0x169a -- cgit v1.2.3-70-g09d2 From 9d26e213423923c9e033ccd373705118131827c9 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 7 Dec 2006 00:21:14 -0800 Subject: [TG3]: Add TG3_FLG2_IS_NIC flag. Add Tg3_FLG2_IS_NIC flag to unambiguously determine whether the device is NIC or onboard. Previously, the EEPROM_WRITE_PROT flag was overloaded to also mean onboard. With the separation, we can support some devices that are onboard but do not use eeprom write protect. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/tg3.c | 37 ++++++++++++++++++++++++------------- drivers/net/tg3.h | 1 + include/linux/pci_ids.h | 2 ++ 3 files changed, 27 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 5514828b792..16bc05fe531 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -1062,7 +1062,7 @@ static void tg3_frob_aux_power(struct tg3 *tp) { struct tg3 *tp_peer = tp; - if ((tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) != 0) + if ((tp->tg3_flags2 & TG3_FLG2_IS_NIC) == 0) return; if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) || @@ -1213,8 +1213,8 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state) power_control); udelay(100); /* Delay after power state change */ - /* Switch out of Vaux if it is not a LOM */ - if (!(tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT)) + /* Switch out of Vaux if it is a NIC */ + if (tp->tg3_flags2 & TG3_FLG2_IS_NIC) tw32_wait_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl, 100); return 0; @@ -6397,16 +6397,17 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) udelay(40); /* tp->grc_local_ctrl is partially set up during tg3_get_invariants(). - * If TG3_FLAG_EEPROM_WRITE_PROT is set, we should read the + * If TG3_FLG2_IS_NIC is zero, we should read the * register to preserve the GPIO settings for LOMs. The GPIOs, * whether used as inputs or outputs, are set by boot code after * reset. */ - if (tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) { + if (!(tp->tg3_flags2 & TG3_FLG2_IS_NIC)) { u32 gpio_mask; - gpio_mask = GRC_LCLCTRL_GPIO_OE0 | GRC_LCLCTRL_GPIO_OE2 | - GRC_LCLCTRL_GPIO_OUTPUT0 | GRC_LCLCTRL_GPIO_OUTPUT2; + gpio_mask = GRC_LCLCTRL_GPIO_OE0 | GRC_LCLCTRL_GPIO_OE1 | + GRC_LCLCTRL_GPIO_OE2 | GRC_LCLCTRL_GPIO_OUTPUT0 | + GRC_LCLCTRL_GPIO_OUTPUT1 | GRC_LCLCTRL_GPIO_OUTPUT2; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5752) gpio_mask |= GRC_LCLCTRL_GPIO_OE3 | @@ -6418,8 +6419,9 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) tp->grc_local_ctrl |= tr32(GRC_LOCAL_CTRL) & gpio_mask; /* GPIO1 must be driven high for eeprom write protect */ - tp->grc_local_ctrl |= (GRC_LCLCTRL_GPIO_OE1 | - GRC_LCLCTRL_GPIO_OUTPUT1); + if (tp->tg3_flags & TG3_FLAG_EEPROM_WRITE_PROT) + tp->grc_local_ctrl |= (GRC_LCLCTRL_GPIO_OE1 | + GRC_LCLCTRL_GPIO_OUTPUT1); } tw32_f(GRC_LOCAL_CTRL, tp->grc_local_ctrl); udelay(100); @@ -9963,8 +9965,10 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { - if (!(tr32(PCIE_TRANSACTION_CFG) & PCIE_TRANS_CFG_LOM)) + if (!(tr32(PCIE_TRANSACTION_CFG) & PCIE_TRANS_CFG_LOM)) { tp->tg3_flags &= ~TG3_FLAG_EEPROM_WRITE_PROT; + tp->tg3_flags2 |= TG3_FLG2_IS_NIC; + } return; } @@ -10064,10 +10068,17 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) tp->led_ctrl = LED_CTRL_MODE_PHY_2; - if (nic_cfg & NIC_SRAM_DATA_CFG_EEPROM_WP) + if (nic_cfg & NIC_SRAM_DATA_CFG_EEPROM_WP) { tp->tg3_flags |= TG3_FLAG_EEPROM_WRITE_PROT; - else + if ((tp->pdev->subsystem_vendor == + PCI_VENDOR_ID_ARIMA) && + (tp->pdev->subsystem_device == 0x205a || + tp->pdev->subsystem_device == 0x2063)) + tp->tg3_flags &= ~TG3_FLAG_EEPROM_WRITE_PROT; + } else { tp->tg3_flags &= ~TG3_FLAG_EEPROM_WRITE_PROT; + tp->tg3_flags2 |= TG3_FLG2_IS_NIC; + } if (nic_cfg & NIC_SRAM_DATA_CFG_ASF_ENABLE) { tp->tg3_flags |= TG3_FLAG_ENABLE_ASF; @@ -10693,7 +10704,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->tg3_flags |= TG3_FLAG_SRAM_USE_CONFIG; /* Get eeprom hw config before calling tg3_set_power_state(). - * In particular, the TG3_FLAG_EEPROM_WRITE_PROT flag must be + * In particular, the TG3_FLG2_IS_NIC flag must be * determined before calling tg3_set_power_state() so that * we know whether or not to switch out of Vaux power. * When the flag is set, it means that GPIO1 is used for eeprom diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 92f53000bce..dfaf4ed127b 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2233,6 +2233,7 @@ struct tg3 { #define TG3_FLG2_PCI_EXPRESS 0x00000200 #define TG3_FLG2_ASF_NEW_HANDSHAKE 0x00000400 #define TG3_FLG2_HW_AUTONEG 0x00000800 +#define TG3_FLG2_IS_NIC 0x00001000 #define TG3_FLG2_PHY_SERDES 0x00002000 #define TG3_FLG2_CAPACITIVE_COUPLING 0x00004000 #define TG3_FLG2_FLASH 0x00008000 diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index edddcce68b9..ebc597d5724 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2003,6 +2003,8 @@ #define PCI_DEVICE_ID_FARSITE_TE1 0x1610 #define PCI_DEVICE_ID_FARSITE_TE1C 0x1612 +#define PCI_VENDOR_ID_ARIMA 0x161f + #define PCI_VENDOR_ID_SIBYTE 0x166d #define PCI_DEVICE_ID_BCM1250_PCI 0x0001 #define PCI_DEVICE_ID_BCM1250_HT 0x0002 -- cgit v1.2.3-70-g09d2 From 676dcb8bc2ec78d80091037773598d6ec8c673d6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 6 Dec 2006 20:31:30 -0800 Subject: [PATCH] add bottom_half.h With CONFIG_SMP=n: drivers/input/ff-memless.c:384: warning: implicit declaration of function 'local_bh_disable' drivers/input/ff-memless.c:393: warning: implicit declaration of function 'local_bh_enable' Really linux/spinlock.h should include linux/interrupt.h. But interrupt.h includes sched.h which will need spinlock.h. So the patch breaks the _bh declarations out into a separate header and includes it in both interrupt.h and spinlock.h. Cc: "Randy.Dunlap" Cc: Andi Kleen Cc: Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bottom_half.h | 10 ++++++++++ include/linux/interrupt.h | 7 +------ include/linux/spinlock.h | 1 + 3 files changed, 12 insertions(+), 6 deletions(-) create mode 100644 include/linux/bottom_half.h (limited to 'include') diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h new file mode 100644 index 00000000000..777dbf695d4 --- /dev/null +++ b/include/linux/bottom_half.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_BH_H +#define _LINUX_BH_H + +extern void local_bh_disable(void); +extern void __local_bh_enable(void); +extern void _local_bh_enable(void); +extern void local_bh_enable(void); +extern void local_bh_enable_ip(unsigned long ip); + +#endif /* _LINUX_BH_H */ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5b83e7b5962..de7593f4e89 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -217,12 +218,6 @@ static inline void __deprecated save_and_cli(unsigned long *x) #define save_and_cli(x) save_and_cli(&x) #endif /* CONFIG_SMP */ -extern void local_bh_disable(void); -extern void __local_bh_enable(void); -extern void _local_bh_enable(void); -extern void local_bh_enable(void); -extern void local_bh_enable_ip(unsigned long ip); - /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high frequency threaded job scheduling. For almost all the purposes tasklets are more than enough. F.e. all serial device BHs et diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 8451052ca66..94b767d6427 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -52,6 +52,7 @@ #include #include #include +#include #include -- cgit v1.2.3-70-g09d2 From 89689ae7f95995723fbcd5c116c47933a3bb8b13 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:31:45 -0800 Subject: [PATCH] Get rid of zone_table[] The zone table is mostly not needed. If we have a node in the page flags then we can get to the zone via NODE_DATA() which is much more likely to be already in the cpu cache. In case of SMP and UP NODE_DATA() is a constant pointer which allows us to access an exact replica of zonetable in the node_zones field. In all of the above cases there will be no need at all for the zone table. The only remaining case is if in a NUMA system the node numbers do not fit into the page flags. In that case we make sparse generate a table that maps sections to nodes and use that table to to figure out the node number. This table is sized to fit in a single cache line for the known 32 bit NUMA platform which makes it very likely that the information can be obtained without a cache miss. For sparsemem the zone table seems to be have been fairly large based on the maximum possible number of sections and the number of zones per node. There is some memory saving by removing zone_table. The main benefit is to reduce the cache foootprint of the VM from the frequent lookups of zones. Plus it simplifies the page allocator. [akpm@osdl.org: build fix] Signed-off-by: Christoph Lameter Cc: Dave Hansen Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 57 +++++++++++++++++++++++++++++++++-------------------- mm/memory_hotplug.c | 1 - mm/page_alloc.c | 22 --------------------- mm/sparse.c | 23 +++++++++++++++++++++ 4 files changed, 59 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index d538de90196..ab6e4974f37 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -396,7 +396,9 @@ void split_page(struct page *page, unsigned int order); * We are going to use the flags for the page to node mapping if its in * there. This includes the case where there is no node, so it is implicit. */ -#define FLAGS_HAS_NODE (NODES_WIDTH > 0 || NODES_SHIFT == 0) +#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0) +#define NODE_NOT_IN_PAGE_FLAGS +#endif #ifndef PFN_SECTION_SHIFT #define PFN_SECTION_SHIFT 0 @@ -411,13 +413,18 @@ void split_page(struct page *page, unsigned int order); #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) -/* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */ -#if FLAGS_HAS_NODE -#define ZONETABLE_SHIFT (NODES_SHIFT + ZONES_SHIFT) +/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */ +#ifdef NODE_NOT_IN_PAGEFLAGS +#define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) #else -#define ZONETABLE_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) +#define ZONEID_SHIFT (NODES_SHIFT + ZONES_SHIFT) +#endif + +#if ZONES_WIDTH > 0 +#define ZONEID_PGSHIFT ZONES_PGSHIFT +#else +#define ZONEID_PGSHIFT NODES_PGOFF #endif -#define ZONETABLE_PGSHIFT ZONES_PGSHIFT #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED #error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED @@ -426,23 +433,25 @@ void split_page(struct page *page, unsigned int order); #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) +#define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(struct page *page) { return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } -struct zone; -extern struct zone *zone_table[]; - +/* + * The identification function is only used by the buddy allocator for + * determining if two pages could be buddies. We are not really + * identifying a zone since we could be using a the section number + * id if we have not node id available in page flags. + * We guarantee only that it will return the same value for two + * combinable pages in a zone. + */ static inline int page_zone_id(struct page *page) { - return (page->flags >> ZONETABLE_PGSHIFT) & ZONETABLE_MASK; -} -static inline struct zone *page_zone(struct page *page) -{ - return zone_table[page_zone_id(page)]; + BUILD_BUG_ON(ZONEID_PGSHIFT == 0 && ZONEID_MASK); + return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; } static inline unsigned long zone_to_nid(struct zone *zone) @@ -454,13 +463,20 @@ static inline unsigned long zone_to_nid(struct zone *zone) #endif } +#ifdef NODE_NOT_IN_PAGE_FLAGS +extern unsigned long page_to_nid(struct page *page); +#else static inline unsigned long page_to_nid(struct page *page) { - if (FLAGS_HAS_NODE) - return (page->flags >> NODES_PGSHIFT) & NODES_MASK; - else - return zone_to_nid(page_zone(page)); + return (page->flags >> NODES_PGSHIFT) & NODES_MASK; } +#endif + +static inline struct zone *page_zone(struct page *page) +{ + return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; +} + static inline unsigned long page_to_section(struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; @@ -477,6 +493,7 @@ static inline void set_page_node(struct page *page, unsigned long node) page->flags &= ~(NODES_MASK << NODES_PGSHIFT); page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; } + static inline void set_page_section(struct page *page, unsigned long section) { page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); @@ -947,8 +964,6 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); -extern void zonetable_add(struct zone *zone, int nid, enum zone_type zid, - unsigned long pfn, unsigned long size); #ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fd678a662ea..0c055a090f4 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -72,7 +72,6 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn) return ret; } memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn); - zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages); return 0; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 08360aa111f..23bc5bcbdcf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -83,13 +83,6 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { EXPORT_SYMBOL(totalram_pages); -/* - * Used by page_zone() to look up the address of the struct zone whose - * id is encoded in the upper bits of page->flags - */ -struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; -EXPORT_SYMBOL(zone_table); - static char *zone_names[MAX_NR_ZONES] = { "DMA", #ifdef CONFIG_ZONE_DMA32 @@ -1715,20 +1708,6 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, } } -#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) -void zonetable_add(struct zone *zone, int nid, enum zone_type zid, - unsigned long pfn, unsigned long size) -{ - unsigned long snum = pfn_to_section_nr(pfn); - unsigned long end = pfn_to_section_nr(pfn + size); - - if (FLAGS_HAS_NODE) - zone_table[ZONETABLE_INDEX(nid, zid)] = zone; - else - for (; snum <= end; snum++) - zone_table[ZONETABLE_INDEX(snum, zid)] = zone; -} - #ifndef __HAVE_ARCH_MEMMAP_INIT #define memmap_init(size, nid, zone, start_pfn) \ memmap_init_zone((size), (nid), (zone), (start_pfn)) @@ -2421,7 +2400,6 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, if (!size) continue; - zonetable_add(zone, nid, j, zone_start_pfn, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size); BUG_ON(ret); zone_start_pfn += size; diff --git a/mm/sparse.c b/mm/sparse.c index b3c82ba3001..158d6a2a526 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -24,6 +24,25 @@ struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] #endif EXPORT_SYMBOL(mem_section); +#ifdef NODE_NOT_IN_PAGE_FLAGS +/* + * If we did not store the node number in the page then we have to + * do a lookup in the section_to_node_table in order to find which + * node the page belongs to. + */ +#if MAX_NUMNODES <= 256 +static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; +#else +static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; +#endif + +unsigned long page_to_nid(struct page *page) +{ + return section_to_node_table[page_to_section(page)]; +} +EXPORT_SYMBOL(page_to_nid); +#endif + #ifdef CONFIG_SPARSEMEM_EXTREME static struct mem_section *sparse_index_alloc(int nid) { @@ -49,6 +68,10 @@ static int sparse_index_init(unsigned long section_nr, int nid) struct mem_section *section; int ret = 0; +#ifdef NODE_NOT_IN_PAGE_FLAGS + section_to_node_table[section_nr] = nid; +#endif + if (mem_section[root]) return -EEXIST; -- cgit v1.2.3-70-g09d2 From 9276b1bc96a132f4068fdee00983c532f43d3a26 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 6 Dec 2006 20:31:48 -0800 Subject: [PATCH] memory page_alloc zonelist caching speedup Optimize the critical zonelist scanning for free pages in the kernel memory allocator by caching the zones that were found to be full recently, and skipping them. Remembers the zones in a zonelist that were short of free memory in the last second. And it stashes a zone-to-node table in the zonelist struct, to optimize that conversion (minimize its cache footprint.) Recent changes: This differs in a significant way from a similar patch that I posted a week ago. Now, instead of having a nodemask_t of recently full nodes, I have a bitmask of recently full zones. This solves a problem that last weeks patch had, which on systems with multiple zones per node (such as DMA zone) would take seeing any of these zones full as meaning that all zones on that node were full. Also I changed names - from "zonelist faster" to "zonelist cache", as that seemed to better convey what we're doing here - caching some of the key zonelist state (for faster access.) See below for some performance benchmark results. After all that discussion with David on why I didn't need them, I went and got some ;). I wanted to verify that I had not hurt the normal case of memory allocation noticeably. At least for my one little microbenchmark, I found (1) the normal case wasn't affected, and (2) workloads that forced scanning across multiple nodes for memory improved up to 10% fewer System CPU cycles and lower elapsed clock time ('sys' and 'real'). Good. See details, below. I didn't have the logic in get_page_from_freelist() for various full nodes and zone reclaim failures correct. That should be fixed up now - notice the new goto labels zonelist_scan, this_zone_full, and try_next_zone, in get_page_from_freelist(). There are two reasons I persued this alternative, over some earlier proposals that would have focused on optimizing the fake numa emulation case by caching the last useful zone: 1) Contrary to what I said before, we (SGI, on large ia64 sn2 systems) have seen real customer loads where the cost to scan the zonelist was a problem, due to many nodes being full of memory before we got to a node we could use. Or at least, I think we have. This was related to me by another engineer, based on experiences from some time past. So this is not guaranteed. Most likely, though. The following approach should help such real numa systems just as much as it helps fake numa systems, or any combination thereof. 2) The effort to distinguish fake from real numa, using node_distance, so that we could cache a fake numa node and optimize choosing it over equivalent distance fake nodes, while continuing to properly scan all real nodes in distance order, was going to require a nasty blob of zonelist and node distance munging. The following approach has no new dependency on node distances or zone sorting. See comment in the patch below for a description of what it actually does. Technical details of note (or controversy): - See the use of "zlc_active" and "did_zlc_setup" below, to delay adding any work for this new mechanism until we've looked at the first zone in zonelist. I figured the odds of the first zone having the memory we needed were high enough that we should just look there, first, then get fancy only if we need to keep looking. - Some odd hackery was needed to add items to struct zonelist, while not tripping up the custom zonelists built by the mm/mempolicy.c code for MPOL_BIND. My usual wordy comments below explain this. Search for "MPOL_BIND". - Some per-node data in the struct zonelist is now modified frequently, with no locking. Multiple CPU cores on a node could hit and mangle this data. The theory is that this is just performance hint data, and the memory allocator will work just fine despite any such mangling. The fields at risk are the struct 'zonelist_cache' fields 'fullzones' (a bitmask) and 'last_full_zap' (unsigned long jiffies). It should all be self correcting after at most a one second delay. - This still does a linear scan of the same lengths as before. All I've optimized is making the scan faster, not algorithmically shorter. It is now able to scan a compact array of 'unsigned short' in the case of many full nodes, so one cache line should cover quite a few nodes, rather than each node hitting another one or two new and distinct cache lines. - If both Andi and Nick don't find this too complicated, I will be (pleasantly) flabbergasted. - I removed the comment claiming we only use one cachline's worth of zonelist. We seem, at least in the fake numa case, to have put the lie to that claim. - I pay no attention to the various watermarks and such in this performance hint. A node could be marked full for one watermark, and then skipped over when searching for a page using a different watermark. I think that's actually quite ok, as it will tend to slightly increase the spreading of memory over other nodes, away from a memory stressed node. =============== Performance - some benchmark results and analysis: This benchmark runs a memory hog program that uses multiple threads to touch alot of memory as quickly as it can. Multiple runs were made, touching 12, 38, 64 or 90 GBytes out of the total 96 GBytes on the system, and using 1, 19, 37, or 55 threads (on a 56 CPU system.) System, user and real (elapsed) timings were recorded for each run, shown in units of seconds, in the table below. Two kernels were tested - 2.6.18-mm3 and the same kernel with this zonelist caching patch added. The table also shows the percentage improvement the zonelist caching sys time is over (lower than) the stock *-mm kernel. number 2.6.18-mm3 zonelist-cache delta (< 0 good) percent GBs N ------------ -------------- ---------------- systime mem threads sys user real sys user real sys user real better 12 1 153 24 177 151 24 176 -2 0 -1 1% 12 19 99 22 8 99 22 8 0 0 0 0% 12 37 111 25 6 112 25 6 1 0 0 -0% 12 55 115 25 5 110 23 5 -5 -2 0 4% 38 1 502 74 576 497 73 570 -5 -1 -6 0% 38 19 426 78 48 373 76 39 -53 -2 -9 12% 38 37 544 83 36 547 82 36 3 -1 0 -0% 38 55 501 77 23 511 80 24 10 3 1 -1% 64 1 917 125 1042 890 124 1014 -27 -1 -28 2% 64 19 1118 138 119 965 141 103 -153 3 -16 13% 64 37 1202 151 94 1136 150 81 -66 -1 -13 5% 64 55 1118 141 61 1072 140 58 -46 -1 -3 4% 90 1 1342 177 1519 1275 174 1450 -67 -3 -69 4% 90 19 2392 199 192 2116 189 176 -276 -10 -16 11% 90 37 3313 238 175 2972 225 145 -341 -13 -30 10% 90 55 1948 210 104 1843 213 100 -105 3 -4 5% Notes: 1) This test ran a memory hog program that started a specified number N of threads, and had each thread allocate and touch 1/N'th of the total memory to be used in the test run in a single loop, writing a constant word to memory, one store every 4096 bytes. Watching this test during some earlier trial runs, I would see each of these threads sit down on one CPU and stay there, for the remainder of the pass, a different CPU for each thread. 2) The 'real' column is not comparable to the 'sys' or 'user' columns. The 'real' column is seconds wall clock time elapsed, from beginning to end of that test pass. The 'sys' and 'user' columns are total CPU seconds spent on that test pass. For a 19 thread test run, for example, the sum of 'sys' and 'user' could be up to 19 times the number of 'real' elapsed wall clock seconds. 3) Tests were run on a fresh, single-user boot, to minimize the amount of memory already in use at the start of the test, and to minimize the amount of background activity that might interfere. 4) Tests were done on a 56 CPU, 28 Node system with 96 GBytes of RAM. 5) Notice that the 'real' time gets large for the single thread runs, even though the measured 'sys' and 'user' times are modest. I'm not sure what that means - probably something to do with it being slow for one thread to be accessing memory along ways away. Perhaps the fake numa system, running ostensibly the same workload, would not show this substantial degradation of 'real' time for one thread on many nodes -- lets hope not. 6) The high thread count passes (one thread per CPU - on 55 of 56 CPUs) ran quite efficiently, as one might expect. Each pair of threads needed to allocate and touch the memory on the node the two threads shared, a pleasantly parallizable workload. 7) The intermediate thread count passes, when asking for alot of memory forcing them to go to a few neighboring nodes, improved the most with this zonelist caching patch. Conclusions: * This zonelist cache patch probably makes little difference one way or the other for most workloads on real numa hardware, if those workloads avoid heavy off node allocations. * For memory intensive workloads requiring substantial off-node allocations on real numa hardware, this patch improves both kernel and elapsed timings up to ten per-cent. * For fake numa systems, I'm optimistic, but will have to leave that up to Rohit Seth to actually test (once I get him a 2.6.18 backport.) Signed-off-by: Paul Jackson Cc: Rohit Seth Cc: Christoph Lameter Cc: David Rientjes Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 2 + include/linux/mmzone.h | 85 ++++++++++++++++++++-- mm/mempolicy.c | 2 + mm/page_alloc.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 265 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 4d8adf66368..748d2c99663 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -23,6 +23,7 @@ extern void cpuset_fork(struct task_struct *p); extern void cpuset_exit(struct task_struct *p); extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); +#define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); void cpuset_update_task_memory_state(void); #define cpuset_nodes_subset_current_mems_allowed(nodes) \ @@ -83,6 +84,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) return node_possible_map; } +#define cpuset_current_mems_allowed (node_online_map) static inline void cpuset_init_current_mems_allowed(void) {} static inline void cpuset_update_task_memory_state(void) {} #define cpuset_nodes_subset_current_mems_allowed(nodes) (1) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e06683e2bea..09bf9d8d7b7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -288,19 +288,94 @@ struct zone { */ #define DEF_PRIORITY 12 +/* Maximum number of zones on a zonelist */ +#define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES) + +#ifdef CONFIG_NUMA +/* + * We cache key information from each zonelist for smaller cache + * footprint when scanning for free pages in get_page_from_freelist(). + * + * 1) The BITMAP fullzones tracks which zones in a zonelist have come + * up short of free memory since the last time (last_fullzone_zap) + * we zero'd fullzones. + * 2) The array z_to_n[] maps each zone in the zonelist to its node + * id, so that we can efficiently evaluate whether that node is + * set in the current tasks mems_allowed. + * + * Both fullzones and z_to_n[] are one-to-one with the zonelist, + * indexed by a zones offset in the zonelist zones[] array. + * + * The get_page_from_freelist() routine does two scans. During the + * first scan, we skip zones whose corresponding bit in 'fullzones' + * is set or whose corresponding node in current->mems_allowed (which + * comes from cpusets) is not set. During the second scan, we bypass + * this zonelist_cache, to ensure we look methodically at each zone. + * + * Once per second, we zero out (zap) fullzones, forcing us to + * reconsider nodes that might have regained more free memory. + * The field last_full_zap is the time we last zapped fullzones. + * + * This mechanism reduces the amount of time we waste repeatedly + * reexaming zones for free memory when they just came up low on + * memory momentarilly ago. + * + * The zonelist_cache struct members logically belong in struct + * zonelist. However, the mempolicy zonelists constructed for + * MPOL_BIND are intentionally variable length (and usually much + * shorter). A general purpose mechanism for handling structs with + * multiple variable length members is more mechanism than we want + * here. We resort to some special case hackery instead. + * + * The MPOL_BIND zonelists don't need this zonelist_cache (in good + * part because they are shorter), so we put the fixed length stuff + * at the front of the zonelist struct, ending in a variable length + * zones[], as is needed by MPOL_BIND. + * + * Then we put the optional zonelist cache on the end of the zonelist + * struct. This optional stuff is found by a 'zlcache_ptr' pointer in + * the fixed length portion at the front of the struct. This pointer + * both enables us to find the zonelist cache, and in the case of + * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL) + * to know that the zonelist cache is not there. + * + * The end result is that struct zonelists come in two flavors: + * 1) The full, fixed length version, shown below, and + * 2) The custom zonelists for MPOL_BIND. + * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache. + * + * Even though there may be multiple CPU cores on a node modifying + * fullzones or last_full_zap in the same zonelist_cache at the same + * time, we don't lock it. This is just hint data - if it is wrong now + * and then, the allocator will still function, perhaps a bit slower. + */ + + +struct zonelist_cache { + DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */ + unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */ + unsigned long last_full_zap; /* when last zap'd (jiffies) */ +}; +#else +struct zonelist_cache; +#endif + /* * One allocation request operates on a zonelist. A zonelist * is a list of zones, the first one is the 'goal' of the * allocation, the other zones are fallback zones, in decreasing * priority. * - * Right now a zonelist takes up less than a cacheline. We never - * modify it apart from boot-up, and only a few indices are used, - * so despite the zonelist table being relatively big, the cache - * footprint of this construct is very small. + * If zlcache_ptr is not NULL, then it is just the address of zlcache, + * as explained above. If zlcache_ptr is NULL, there is no zlcache. */ + struct zonelist { - struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited + struct zonelist_cache *zlcache_ptr; // NULL or &zlcache + struct zone *zones[MAX_ZONES_PER_ZONELIST + 1]; // NULL delimited +#ifdef CONFIG_NUMA + struct zonelist_cache zlcache; // optional ... +#endif }; #ifdef CONFIG_ARCH_POPULATES_NODE_MAP diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 617fb31086e..fb907236bbd 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -141,9 +141,11 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) enum zone_type k; max = 1 + MAX_NR_ZONES * nodes_weight(*nodes); + max++; /* space for zlcache_ptr (see mmzone.h) */ zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); if (!zl) return NULL; + zl->zlcache_ptr = NULL; num = 0; /* First put in the highest zones from all nodes, then all the next lower zones etc. Avoid empty zones because the memory allocator diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 23bc5bcbdcf..230771d3c6b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -918,6 +918,126 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, return 1; } +#ifdef CONFIG_NUMA +/* + * zlc_setup - Setup for "zonelist cache". Uses cached zone data to + * skip over zones that are not allowed by the cpuset, or that have + * been recently (in last second) found to be nearly full. See further + * comments in mmzone.h. Reduces cache footprint of zonelist scans + * that have to skip over alot of full or unallowed zones. + * + * If the zonelist cache is present in the passed in zonelist, then + * returns a pointer to the allowed node mask (either the current + * tasks mems_allowed, or node_online_map.) + * + * If the zonelist cache is not available for this zonelist, does + * nothing and returns NULL. + * + * If the fullzones BITMAP in the zonelist cache is stale (more than + * a second since last zap'd) then we zap it out (clear its bits.) + * + * We hold off even calling zlc_setup, until after we've checked the + * first zone in the zonelist, on the theory that most allocations will + * be satisfied from that first zone, so best to examine that zone as + * quickly as we can. + */ +static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + nodemask_t *allowednodes; /* zonelist_cache approximation */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return NULL; + + if (jiffies - zlc->last_full_zap > 1 * HZ) { + bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); + zlc->last_full_zap = jiffies; + } + + allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? + &cpuset_current_mems_allowed : + &node_online_map; + return allowednodes; +} + +/* + * Given 'z' scanning a zonelist, run a couple of quick checks to see + * if it is worth looking at further for free memory: + * 1) Check that the zone isn't thought to be full (doesn't have its + * bit set in the zonelist_cache fullzones BITMAP). + * 2) Check that the zones node (obtained from the zonelist_cache + * z_to_n[] mapping) is allowed in the passed in allowednodes mask. + * Return true (non-zero) if zone is worth looking at further, or + * else return false (zero) if it is not. + * + * This check -ignores- the distinction between various watermarks, + * such as GFP_HIGH, GFP_ATOMIC, PF_MEMALLOC, ... If a zone is + * found to be full for any variation of these watermarks, it will + * be considered full for up to one second by all requests, unless + * we are so low on memory on all allowed nodes that we are forced + * into the second scan of the zonelist. + * + * In the second scan we ignore this zonelist cache and exactly + * apply the watermarks to all zones, even it is slower to do so. + * We are low on memory in the second scan, and should leave no stone + * unturned looking for a free page. + */ +static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, + nodemask_t *allowednodes) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + int i; /* index of *z in zonelist zones */ + int n; /* node that zone *z is on */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return 1; + + i = z - zonelist->zones; + n = zlc->z_to_n[i]; + + /* This zone is worth trying if it is allowed but not full */ + return node_isset(n, *allowednodes) && !test_bit(i, zlc->fullzones); +} + +/* + * Given 'z' scanning a zonelist, set the corresponding bit in + * zlc->fullzones, so that subsequent attempts to allocate a page + * from that zone don't waste time re-examining it. + */ +static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) +{ + struct zonelist_cache *zlc; /* cached zonelist speedup info */ + int i; /* index of *z in zonelist zones */ + + zlc = zonelist->zlcache_ptr; + if (!zlc) + return; + + i = z - zonelist->zones; + + set_bit(i, zlc->fullzones); +} + +#else /* CONFIG_NUMA */ + +static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) +{ + return NULL; +} + +static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z, + nodemask_t *allowednodes) +{ + return 1; +} + +static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z) +{ +} +#endif /* CONFIG_NUMA */ + /* * get_page_from_freelist goes through the zonelist trying to allocate * a page. @@ -926,23 +1046,32 @@ static struct page * get_page_from_freelist(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, int alloc_flags) { - struct zone **z = zonelist->zones; + struct zone **z; struct page *page = NULL; - int classzone_idx = zone_idx(*z); + int classzone_idx = zone_idx(zonelist->zones[0]); struct zone *zone; + nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ + int zlc_active = 0; /* set if using zonelist_cache */ + int did_zlc_setup = 0; /* just call zlc_setup() one time */ +zonelist_scan: /* - * Go through the zonelist once, looking for a zone with enough free. + * Scan zonelist, looking for a zone with enough free. * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ + z = zonelist->zones; + do { + if (NUMA_BUILD && zlc_active && + !zlc_zone_worth_trying(zonelist, z, allowednodes)) + continue; zone = *z; if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) && zone->zone_pgdat != zonelist->zones[0]->zone_pgdat)) break; if ((alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed(zone, gfp_mask)) - continue; + goto try_next_zone; if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; @@ -956,15 +1085,30 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, classzone_idx, alloc_flags)) { if (!zone_reclaim_mode || !zone_reclaim(zone, gfp_mask, order)) - continue; + goto this_zone_full; } } page = buffered_rmqueue(zonelist, zone, order, gfp_mask); if (page) break; - +this_zone_full: + if (NUMA_BUILD) + zlc_mark_zone_full(zonelist, z); +try_next_zone: + if (NUMA_BUILD && !did_zlc_setup) { + /* we do zlc_setup after the first zone is tried */ + allowednodes = zlc_setup(zonelist, alloc_flags); + zlc_active = 1; + did_zlc_setup = 1; + } } while (*(++z) != NULL); + + if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) { + /* Disable zlc cache for second zonelist scan */ + zlc_active = 0; + goto zonelist_scan; + } return page; } @@ -1535,6 +1679,24 @@ static void __meminit build_zonelists(pg_data_t *pgdat) } } +/* Construct the zonelist performance cache - see further mmzone.h */ +static void __meminit build_zonelist_cache(pg_data_t *pgdat) +{ + int i; + + for (i = 0; i < MAX_NR_ZONES; i++) { + struct zonelist *zonelist; + struct zonelist_cache *zlc; + struct zone **z; + + zonelist = pgdat->node_zonelists + i; + zonelist->zlcache_ptr = zlc = &zonelist->zlcache; + bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST); + for (z = zonelist->zones; *z; z++) + zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z); + } +} + #else /* CONFIG_NUMA */ static void __meminit build_zonelists(pg_data_t *pgdat) @@ -1572,14 +1734,26 @@ static void __meminit build_zonelists(pg_data_t *pgdat) } } +/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */ +static void __meminit build_zonelist_cache(pg_data_t *pgdat) +{ + int i; + + for (i = 0; i < MAX_NR_ZONES; i++) + pgdat->node_zonelists[i].zlcache_ptr = NULL; +} + #endif /* CONFIG_NUMA */ /* return values int ....just for stop_machine_run() */ static int __meminit __build_all_zonelists(void *dummy) { int nid; - for_each_online_node(nid) + + for_each_online_node(nid) { build_zonelists(NODE_DATA(nid)); + build_zonelist_cache(NODE_DATA(nid)); + } return 0; } -- cgit v1.2.3-70-g09d2 From 7253f4ef04b1cd138baf2b29a95473743ac0a307 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 6 Dec 2006 20:31:49 -0800 Subject: [PATCH] memory page_alloc zonelist caching reorder structure Rearrange the struct members in the 'struct zonelist_cache' structure, so as to put the readonly (once initialized) z_to_n[] array first, where it will come right after the zones[] array in struct zonelist. This pretty much eliminates the chance that the two frequently written elements of 'struct zonelist_cache', the fullzones bitmap and last_full_zap times, will end up on the same cache line as the performance sensitive, frequently read, never (after init) written zones[] array. Keeping frequently written data off frequently read cache lines is good for performance. Thanks to Rohit Seth for the suggestion. Signed-off-by: Paul Jackson Cc: Rohit Seth Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 09bf9d8d7b7..da6002dec20 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -352,8 +352,8 @@ struct zone { struct zonelist_cache { - DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */ unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */ + DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */ unsigned long last_full_zap; /* when last zap'd (jiffies) */ }; #else -- cgit v1.2.3-70-g09d2 From 7602bdf2fd14a40dd9b104e516fdc05e1bd17952 Mon Sep 17 00:00:00 2001 From: Ashwin Chaugule Date: Wed, 6 Dec 2006 20:31:57 -0800 Subject: [PATCH] new scheme to preempt swap token The new swap token patches replace the current token traversal algo. The old algo had a crude timeout parameter that was used to handover the token from one task to another. This algo, transfers the token to the tasks that are in need of the token. The urgency for the token is based on the number of times a task is required to swap-in pages. Accordingly, the priority of a task is incremented if it has been badly affected due to swap-outs. To ensure that the token doesnt bounce around rapidly, the token holders are given a priority boost. The priority of tasks is also decremented, if their rate of swap-in's keeps reducing. This way, the condition to check whether to pre-empt the swap token, is a matter of comparing two task's priority fields. [akpm@osdl.org: cleanups] Signed-off-by: Ashwin Chaugule Cc: Rik van Riel Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 13 ++++-- include/linux/swap.h | 1 - kernel/fork.c | 8 ++++ kernel/sysctl.c | 11 ----- mm/thrash.c | 116 ++++++++++++++++++++------------------------------ 5 files changed, 63 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index eafe4a7b823..cad6a16260f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -344,9 +344,16 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; - /* Token based thrashing protection. */ - unsigned long swap_token_time; - char recent_pagein; + /* Swap token stuff */ + /* + * Last value of global fault stamp as seen by this process. + * In other words, this value gives an indication of how long + * it has been since this task got the token. + * Look at mm/thrash.c + */ + unsigned int faultstamp; + unsigned int token_priority; + unsigned int last_interval; /* coredumping support */ int core_waiters; diff --git a/include/linux/swap.h b/include/linux/swap.h index e7c36ba2a2d..89f8a39773b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -259,7 +259,6 @@ extern spinlock_t swap_lock; /* linux/mm/thrash.c */ extern struct mm_struct * swap_token_mm; -extern unsigned long swap_token_default_timeout; extern void grab_swap_token(void); extern void __put_swap_token(struct mm_struct *); diff --git a/kernel/fork.c b/kernel/fork.c index 8cdd3e72ba5..5678e6c61ef 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -479,6 +479,10 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); + /* Initializing for Swap token stuff */ + mm->token_priority = 0; + mm->last_interval = 0; + if (!mm_init(mm)) goto fail_nomem; @@ -542,6 +546,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) goto fail_nomem; good_mm: + /* Initializing for Swap token stuff */ + mm->token_priority = 0; + mm->last_interval = 0; + tsk->mm = mm; tsk->active_mm = mm; return 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e569f4792..7abe9704e75 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -977,17 +977,6 @@ static ctl_table vm_table[] = { .extra1 = &zero, }, #endif -#ifdef CONFIG_SWAP - { - .ctl_name = VM_SWAP_TOKEN_TIMEOUT, - .procname = "swap_token_timeout", - .data = &swap_token_default_timeout, - .maxlen = sizeof(swap_token_default_timeout), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, -#endif #ifdef CONFIG_NUMA { .ctl_name = VM_ZONE_RECLAIM_MODE, diff --git a/mm/thrash.c b/mm/thrash.c index f4c560b4a2b..19e428ca8b0 100644 --- a/mm/thrash.c +++ b/mm/thrash.c @@ -7,100 +7,74 @@ * * Simple token based thrashing protection, using the algorithm * described in: http://www.cs.wm.edu/~sjiang/token.pdf + * + * Sep 2006, Ashwin Chaugule + * Improved algorithm to pass token: + * Each task has a priority which is incremented if it contended + * for the token in an interval less than its previous attempt. + * If the token is acquired, that task's priority is boosted to prevent + * the token from bouncing around too often and to let the task make + * some progress in its execution. */ + #include #include #include #include static DEFINE_SPINLOCK(swap_token_lock); -static unsigned long swap_token_timeout; -static unsigned long swap_token_check; -struct mm_struct * swap_token_mm = &init_mm; - -#define SWAP_TOKEN_CHECK_INTERVAL (HZ * 2) -#define SWAP_TOKEN_TIMEOUT (300 * HZ) -/* - * Currently disabled; Needs further code to work at HZ * 300. - */ -unsigned long swap_token_default_timeout = SWAP_TOKEN_TIMEOUT; - -/* - * Take the token away if the process had no page faults - * in the last interval, or if it has held the token for - * too long. - */ -#define SWAP_TOKEN_ENOUGH_RSS 1 -#define SWAP_TOKEN_TIMED_OUT 2 -static int should_release_swap_token(struct mm_struct *mm) -{ - int ret = 0; - if (!mm->recent_pagein) - ret = SWAP_TOKEN_ENOUGH_RSS; - else if (time_after(jiffies, swap_token_timeout)) - ret = SWAP_TOKEN_TIMED_OUT; - mm->recent_pagein = 0; - return ret; -} +struct mm_struct *swap_token_mm; +unsigned int global_faults; -/* - * Try to grab the swapout protection token. We only try to - * grab it once every TOKEN_CHECK_INTERVAL, both to prevent - * SMP lock contention and to check that the process that held - * the token before is no longer thrashing. - */ void grab_swap_token(void) { - struct mm_struct *mm; - int reason; + int current_interval; - /* We have the token. Let others know we still need it. */ - if (has_swap_token(current->mm)) { - current->mm->recent_pagein = 1; - if (unlikely(!swap_token_default_timeout)) - disable_swap_token(); - return; - } - - if (time_after(jiffies, swap_token_check)) { + global_faults++; - if (!swap_token_default_timeout) { - swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; - return; - } - - /* ... or if we recently held the token. */ - if (time_before(jiffies, current->mm->swap_token_time)) - return; + current_interval = global_faults - current->mm->faultstamp; - if (!spin_trylock(&swap_token_lock)) - return; + if (!spin_trylock(&swap_token_lock)) + return; - swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; + /* First come first served */ + if (swap_token_mm == NULL) { + current->mm->token_priority = current->mm->token_priority + 2; + swap_token_mm = current->mm; + goto out; + } - mm = swap_token_mm; - if ((reason = should_release_swap_token(mm))) { - unsigned long eligible = jiffies; - if (reason == SWAP_TOKEN_TIMED_OUT) { - eligible += swap_token_default_timeout; - } - mm->swap_token_time = eligible; - swap_token_timeout = jiffies + swap_token_default_timeout; + if (current->mm != swap_token_mm) { + if (current_interval < current->mm->last_interval) + current->mm->token_priority++; + else { + current->mm->token_priority--; + if (unlikely(current->mm->token_priority < 0)) + current->mm->token_priority = 0; + } + /* Check if we deserve the token */ + if (current->mm->token_priority > + swap_token_mm->token_priority) { + current->mm->token_priority += 2; swap_token_mm = current->mm; } - spin_unlock(&swap_token_lock); + } else { + /* Token holder came in again! */ + current->mm->token_priority += 2; } - return; + +out: + current->mm->faultstamp = global_faults; + current->mm->last_interval = current_interval; + spin_unlock(&swap_token_lock); +return; } /* Called on process exit. */ void __put_swap_token(struct mm_struct *mm) { spin_lock(&swap_token_lock); - if (likely(mm == swap_token_mm)) { - mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL; - swap_token_mm = &init_mm; - swap_token_check = jiffies; - } + if (likely(mm == swap_token_mm)) + swap_token_mm = NULL; spin_unlock(&swap_token_lock); } -- cgit v1.2.3-70-g09d2 From cc102509074bba0316f2b5deebd7ef4447da295e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 6 Dec 2006 20:32:00 -0800 Subject: [PATCH] mm: add arch_alloc_page Add an arch_alloc_page to match arch_free_page. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 3 +++ mm/page_alloc.c | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index bf2b6bc3f6f..00c314aedab 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -116,6 +116,9 @@ static inline enum zone_type gfp_zone(gfp_t flags) #ifndef HAVE_ARCH_FREE_PAGE static inline void arch_free_page(struct page *page, int order) { } #endif +#ifndef HAVE_ARCH_ALLOC_PAGE +static inline void arch_alloc_page(struct page *page, int order) { } +#endif extern struct page * FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *)); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 230771d3c6b..cd47e8f7bd5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -598,6 +598,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) 1 << PG_checked | 1 << PG_mappedtodisk); set_page_private(page, 0); set_page_refcounted(page); + + arch_alloc_page(page, order); kernel_map_pages(page, 1 << order, 1); if (gfp_flags & __GFP_ZERO) -- cgit v1.2.3-70-g09d2 From 39dde65c9940c97fcd178a3d2b1c57ed8b7b68aa Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Wed, 6 Dec 2006 20:32:03 -0800 Subject: [PATCH] shared page table for hugetlb page Following up with the work on shared page table done by Dave McCracken. This set of patch target shared page table for hugetlb memory only. The shared page table is particular useful in the situation of large number of independent processes sharing large shared memory segments. In the normal page case, the amount of memory saved from process' page table is quite significant. For hugetlb, the saving on page table memory is not the primary objective (as hugetlb itself already cuts down page table overhead significantly), instead, the purpose of using shared page table on hugetlb is to allow faster TLB refill and smaller cache pollution upon TLB miss. With PT sharing, pte entries are shared among hundreds of processes, the cache consumption used by all the page table is smaller and in return, application gets much higher cache hit ratio. One other effect is that cache hit ratio with hardware page walker hitting on pte in cache will be higher and this helps to reduce tlb miss latency. These two effects contribute to higher application performance. Signed-off-by: Ken Chen Acked-by: Hugh Dickins Cc: Dave McCracken Cc: William Lee Irwin III Cc: "Luck, Tony" Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: David Gibson Cc: Adam Litke Cc: Paul Mundt Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/hugetlbpage.c | 112 +++++++++++++++++++++++++++++++++++++++++- arch/ia64/mm/hugetlbpage.c | 5 ++ arch/powerpc/mm/hugetlbpage.c | 5 ++ arch/sh/mm/hugetlbpage.c | 5 ++ arch/sh64/mm/hugetlbpage.c | 5 ++ arch/sparc64/mm/hugetlbpage.c | 5 ++ include/linux/hugetlb.h | 1 + mm/hugetlb.c | 7 +++ 8 files changed, 144 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 1719a8141f8..34728e4afe4 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c @@ -17,6 +17,113 @@ #include #include +static unsigned long page_table_shareable(struct vm_area_struct *svma, + struct vm_area_struct *vma, + unsigned long addr, pgoff_t idx) +{ + unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) + + svma->vm_start; + unsigned long sbase = saddr & PUD_MASK; + unsigned long s_end = sbase + PUD_SIZE; + + /* + * match the virtual addresses, permission and the alignment of the + * page table page. + */ + if (pmd_index(addr) != pmd_index(saddr) || + vma->vm_flags != svma->vm_flags || + sbase < svma->vm_start || svma->vm_end < s_end) + return 0; + + return saddr; +} + +static int vma_shareable(struct vm_area_struct *vma, unsigned long addr) +{ + unsigned long base = addr & PUD_MASK; + unsigned long end = base + PUD_SIZE; + + /* + * check on proper vm_flags and page table alignment + */ + if (vma->vm_flags & VM_MAYSHARE && + vma->vm_start <= base && end <= vma->vm_end) + return 1; + return 0; +} + +/* + * search for a shareable pmd page for hugetlb. + */ +static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +{ + struct vm_area_struct *vma = find_vma(mm, addr); + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff; + struct prio_tree_iter iter; + struct vm_area_struct *svma; + unsigned long saddr; + pte_t *spte = NULL; + + if (!vma_shareable(vma, addr)) + return; + + spin_lock(&mapping->i_mmap_lock); + vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { + if (svma == vma) + continue; + + saddr = page_table_shareable(svma, vma, addr, idx); + if (saddr) { + spte = huge_pte_offset(svma->vm_mm, saddr); + if (spte) { + get_page(virt_to_page(spte)); + break; + } + } + } + + if (!spte) + goto out; + + spin_lock(&mm->page_table_lock); + if (pud_none(*pud)) + pud_populate(mm, pud, (unsigned long) spte & PAGE_MASK); + else + put_page(virt_to_page(spte)); + spin_unlock(&mm->page_table_lock); +out: + spin_unlock(&mapping->i_mmap_lock); +} + +/* + * unmap huge page backed by shared pte. + * + * Hugetlb pte page is ref counted at the time of mapping. If pte is shared + * indicated by page_count > 1, unmap is achieved by clearing pud and + * decrementing the ref count. If count == 1, the pte page is not shared. + * + * called with vma->vm_mm->page_table_lock held. + * + * returns: 1 successfully unmapped a shared pte page + * 0 the underlying pte page is not shared, or it is the last user + */ +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + pgd_t *pgd = pgd_offset(mm, *addr); + pud_t *pud = pud_offset(pgd, *addr); + + BUG_ON(page_count(virt_to_page(ptep)) == 0); + if (page_count(virt_to_page(ptep)) == 1) + return 0; + + pud_clear(pud); + put_page(virt_to_page(ptep)); + *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; + return 1; +} + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; @@ -25,8 +132,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); - if (pud) + if (pud) { + if (pud_none(*pud)) + huge_pmd_share(mm, addr, pud); pte = (pte_t *) pmd_alloc(mm, pud, addr); + } BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); return pte; diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index f3a9585e98a..0c7e94edc20 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -64,6 +64,11 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr) return pte; } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + #define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } /* diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 506d89768d4..424a8f57e15 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -146,6 +146,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) return hugepte_offset(hpdp, addr); } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) { pte_t *hugepte = hugepd_page(*hpdp); diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 329059d6b54..cf2c2ee35a3 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -63,6 +63,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c index 187cf01750b..4b455f61114 100644 --- a/arch/sh64/mm/hugetlbpage.c +++ b/arch/sh64/mm/hugetlbpage.c @@ -53,6 +53,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c index 53b9b1f528e..33fd0b265e7 100644 --- a/arch/sparc64/mm/hugetlbpage.c +++ b/arch/sparc64/mm/hugetlbpage.c @@ -235,6 +235,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return pte; } +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +{ + return 0; +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ace64e57e17..a60995afe33 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -35,6 +35,7 @@ extern int sysctl_hugetlb_shm_group; pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr); +int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f7355bf2f28..9244971b679 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -386,6 +386,9 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, if (!ptep) continue; + if (huge_pmd_unshare(mm, &address, ptep)) + continue; + pte = huge_ptep_get_and_clear(mm, address, ptep); if (pte_none(pte)) continue; @@ -658,11 +661,14 @@ void hugetlb_change_protection(struct vm_area_struct *vma, BUG_ON(address >= end); flush_cache_range(vma, address, end); + spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); spin_lock(&mm->page_table_lock); for (; address < end; address += HPAGE_SIZE) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; + if (huge_pmd_unshare(mm, &address, ptep)) + continue; if (!pte_none(*ptep)) { pte = huge_ptep_get_and_clear(mm, address, ptep); pte = pte_mkhuge(pte_modify(pte, newprot)); @@ -671,6 +677,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma, } } spin_unlock(&mm->page_table_lock); + spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); flush_tlb_range(vma, start, end); } -- cgit v1.2.3-70-g09d2 From a866374aecc90c7d90619727ccd851ac096b2fc7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Dec 2006 20:32:20 -0800 Subject: [PATCH] mm: pagefault_{disable,enable}() Introduce pagefault_{disable,enable}() and use these where previously we did manual preempt increments/decrements to make the pagefault handler do the atomic thing. Currently they still rely on the increased preempt count, but do not rely on the disabled preemption, this might go away in the future. (NOTE: the extra barrier() in pagefault_disable might fix some holes on machines which have too many registers for their own good) [heiko.carstens@de.ibm.com: s390 fix] Signed-off-by: Peter Zijlstra Acked-by: Nick Piggin Cc: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/kernel/futex.c | 4 ++-- arch/i386/mm/highmem.c | 10 ++++------ arch/mips/mm/highmem.c | 10 ++++------ arch/s390/lib/uaccess_std.c | 6 +++--- arch/sparc/mm/highmem.c | 8 +++----- include/asm-frv/highmem.h | 5 ++--- include/asm-generic/futex.h | 4 ++-- include/asm-i386/futex.h | 4 ++-- include/asm-ia64/futex.h | 4 ++-- include/asm-mips/futex.h | 4 ++-- include/asm-parisc/futex.h | 4 ++-- include/asm-powerpc/futex.h | 4 ++-- include/asm-ppc/highmem.h | 8 +++----- include/asm-sparc64/futex.h | 4 ++-- include/asm-x86_64/futex.h | 4 ++-- include/linux/uaccess.h | 39 +++++++++++++++++++++++++++++++++++++-- kernel/futex.c | 28 ++++++++++++++-------------- 17 files changed, 88 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c index eae874a970c..53dc5ed1ebd 100644 --- a/arch/frv/kernel/futex.c +++ b/arch/frv/kernel/futex.c @@ -200,7 +200,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -223,7 +223,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) break; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index f9f647cdbc7..178bbfe6cba 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -32,7 +32,7 @@ void *kmap_atomic(struct page *page, enum km_type type) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - inc_preempt_count(); + pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -52,8 +52,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #ifdef CONFIG_DEBUG_HIGHMEM if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); return; } @@ -68,8 +67,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) */ kpte_clear_flush(kmap_pte-idx, vaddr); - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } /* This is the same as kmap_atomic() but can map memory that doesn't @@ -80,7 +78,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) enum fixed_addresses idx; unsigned long vaddr; - inc_preempt_count(); + pagefault_disable(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index 99ebf3ccc22..675502ada5a 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -39,7 +39,7 @@ void *__kmap_atomic(struct page *page, enum km_type type) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - inc_preempt_count(); + pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -62,8 +62,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type) enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); if (vaddr < FIXADDR_START) { // FIXME - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); return; } @@ -78,8 +77,7 @@ void __kunmap_atomic(void *kvaddr, enum km_type type) local_flush_tlb_one(vaddr); #endif - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } #ifndef CONFIG_LIMITED_DMA @@ -92,7 +90,7 @@ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) enum fixed_addresses idx; unsigned long vaddr; - inc_preempt_count(); + pagefault_disable(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c index 2d549ed2e11..bbaca66fa29 100644 --- a/arch/s390/lib/uaccess_std.c +++ b/arch/s390/lib/uaccess_std.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #ifndef __s390x__ @@ -258,7 +258,7 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old) { int oldval = 0, newval, ret; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -284,7 +284,7 @@ int futex_atomic_op(int op, int __user *uaddr, int oparg, int *old) default: ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); *old = oldval; return ret; } diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 4d8ed9c6518..01fc6c25429 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -35,7 +35,7 @@ void *kmap_atomic(struct page *page, enum km_type type) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - inc_preempt_count(); + pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -70,8 +70,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned long idx = type + KM_TYPE_NR*smp_processor_id(); if (vaddr < FIXADDR_START) { // FIXME - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); return; } @@ -97,8 +96,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #endif #endif - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } /* We may be fed a pagetable here by ptep_to_xxx and others. */ diff --git a/include/asm-frv/highmem.h b/include/asm-frv/highmem.h index 0f390f41f81..ff4d6cdeb15 100644 --- a/include/asm-frv/highmem.h +++ b/include/asm-frv/highmem.h @@ -115,7 +115,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) { unsigned long paddr; - inc_preempt_count(); + pagefault_disable(); paddr = page_to_phys(page); switch (type) { @@ -170,8 +170,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) default: BUG(); } - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index df893c16031..f422df0956a 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -21,7 +21,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -33,7 +33,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h index 946d97cfea2..438ef0ec710 100644 --- a/include/asm-i386/futex.h +++ b/include/asm-i386/futex.h @@ -56,7 +56,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); if (op == FUTEX_OP_SET) __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); @@ -88,7 +88,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h index 07d77f3a8cb..8a98a265413 100644 --- a/include/asm-ia64/futex.h +++ b/include/asm-ia64/futex.h @@ -59,7 +59,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -83,7 +83,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h index 927a216bd53..47e5679c235 100644 --- a/include/asm-mips/futex.h +++ b/include/asm-mips/futex.h @@ -88,7 +88,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -115,7 +115,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h index d84bbb283fd..dbee6e60aa8 100644 --- a/include/asm-parisc/futex.h +++ b/include/asm-parisc/futex.h @@ -21,7 +21,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -33,7 +33,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-powerpc/futex.h b/include/asm-powerpc/futex.h index 936422e5489..3f3673fd3ff 100644 --- a/include/asm-powerpc/futex.h +++ b/include/asm-powerpc/futex.h @@ -43,7 +43,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -65,7 +65,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-ppc/highmem.h b/include/asm-ppc/highmem.h index 1d2c4ef81c2..f7b21ee302b 100644 --- a/include/asm-ppc/highmem.h +++ b/include/asm-ppc/highmem.h @@ -79,7 +79,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type) unsigned long vaddr; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ - inc_preempt_count(); + pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -101,8 +101,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) unsigned int idx = type + KM_TYPE_NR*smp_processor_id(); if (vaddr < KMAP_FIX_BEGIN) { // FIXME - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); return; } @@ -115,8 +114,7 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type) pte_clear(&init_mm, vaddr, kmap_pte+idx); flush_tlb_page(NULL, vaddr); #endif - dec_preempt_count(); - preempt_check_resched(); + pagefault_enable(); } static inline struct page *kmap_atomic_to_page(void *ptr) diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h index 7392fc4a954..876312fe82c 100644 --- a/include/asm-sparc64/futex.h +++ b/include/asm-sparc64/futex.h @@ -45,7 +45,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -67,7 +67,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/asm-x86_64/futex.h b/include/asm-x86_64/futex.h index 9804bf07b09..5cdfb08013c 100644 --- a/include/asm-x86_64/futex.h +++ b/include/asm-x86_64/futex.h @@ -55,7 +55,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - inc_preempt_count(); + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -78,7 +78,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) ret = -ENOSYS; } - dec_preempt_count(); + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index a48d7f11c7b..67918c22339 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -1,8 +1,43 @@ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ +#include #include +/* + * These routines enable/disable the pagefault handler in that + * it will not take any locks and go straight to the fixup table. + * + * They have great resemblance to the preempt_disable/enable calls + * and in fact they are identical; this is because currently there is + * no other way to make the pagefault handlers do this. So we do + * disable preemption but we don't necessarily care about that. + */ +static inline void pagefault_disable(void) +{ + inc_preempt_count(); + /* + * make sure to have issued the store before a pagefault + * can hit. + */ + barrier(); +} + +static inline void pagefault_enable(void) +{ + /* + * make sure to issue those last loads/stores before enabling + * the pagefault handler again. + */ + barrier(); + dec_preempt_count(); + /* + * make sure we do.. + */ + barrier(); + preempt_check_resched(); +} + #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, @@ -35,9 +70,9 @@ static inline unsigned long __copy_from_user_nocache(void *to, ({ \ long ret; \ \ - inc_preempt_count(); \ + pagefault_disable(); \ ret = __get_user(retval, addr); \ - dec_preempt_count(); \ + pagefault_enable(); \ ret; \ }) diff --git a/kernel/futex.c b/kernel/futex.c index 93ef30ba209..af7b81cbde3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -282,9 +282,9 @@ static inline int get_futex_value_locked(u32 *dest, u32 __user *from) { int ret; - inc_preempt_count(); + pagefault_disable(); ret = __copy_from_user_inatomic(dest, from, sizeof(u32)); - dec_preempt_count(); + pagefault_enable(); return ret ? -EFAULT : 0; } @@ -585,9 +585,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) if (!(uval & FUTEX_OWNER_DIED)) { newval = FUTEX_WAITERS | new_owner->pid; - inc_preempt_count(); + pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + pagefault_enable(); if (curval == -EFAULT) return -EFAULT; if (curval != uval) @@ -618,9 +618,9 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) * There is no waiter, so we unlock the futex. The owner died * bit has not to be preserved here. We are the owner: */ - inc_preempt_count(); + pagefault_disable(); oldval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 0); - dec_preempt_count(); + pagefault_enable(); if (oldval == -EFAULT) return oldval; @@ -1158,9 +1158,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, */ newval = current->pid; - inc_preempt_count(); + pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, 0, newval); - dec_preempt_count(); + pagefault_enable(); if (unlikely(curval == -EFAULT)) goto uaddr_faulted; @@ -1183,9 +1183,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, uval = curval; newval = uval | FUTEX_WAITERS; - inc_preempt_count(); + pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + pagefault_enable(); if (unlikely(curval == -EFAULT)) goto uaddr_faulted; @@ -1215,10 +1215,10 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec, newval = current->pid | FUTEX_OWNER_DIED | FUTEX_WAITERS; - inc_preempt_count(); + pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + pagefault_enable(); if (unlikely(curval == -EFAULT)) goto uaddr_faulted; @@ -1390,9 +1390,9 @@ retry_locked: * anyone else up: */ if (!(uval & FUTEX_OWNER_DIED)) { - inc_preempt_count(); + pagefault_disable(); uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); - dec_preempt_count(); + pagefault_enable(); } if (unlikely(uval == -EFAULT)) -- cgit v1.2.3-70-g09d2 From ad76fb6b5a5183255279e0ab5260715481770678 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Dec 2006 20:32:21 -0800 Subject: [PATCH] mm: k{,um}map_atomic() vs in_atomic() Make kmap_atomic/kunmap_atomic denote a pagefault disabled scope. All non trivial implementations already do this anyway. Signed-off-by: Peter Zijlstra Acked-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-mips/highmem.h | 10 ++++++++-- include/linux/highmem.h | 8 +++++--- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-mips/highmem.h b/include/asm-mips/highmem.h index c976bfaaba8..f8c8182f7f2 100644 --- a/include/asm-mips/highmem.h +++ b/include/asm-mips/highmem.h @@ -21,6 +21,7 @@ #include #include +#include #include /* undef for production */ @@ -70,11 +71,16 @@ static inline void *kmap(struct page *page) static inline void *kmap_atomic(struct page *page, enum km_type type) { + pagefault_disable(); return page_address(page); } -static inline void kunmap_atomic(void *kvaddr, enum km_type type) { } -#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) +static inline void kunmap_atomic(void *kvaddr, enum km_type type) +{ + pagefault_enable(); +} + +#define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index fd7d12daa94..3d8768b619e 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -3,6 +3,7 @@ #include #include +#include #include @@ -41,9 +42,10 @@ static inline void *kmap(struct page *page) #define kunmap(page) do { (void) (page); } while (0) -#define kmap_atomic(page, idx) page_address(page) -#define kunmap_atomic(addr, idx) do { } while (0) -#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) +#define kmap_atomic(page, idx) \ + ({ pagefault_disable(); page_address(page); }) +#define kunmap_atomic(addr, idx) do { pagefault_enable(); } while (0) +#define kmap_atomic_pfn(pfn, idx) kmap_atomic(pfn_to_page(pfn), (idx)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) #endif -- cgit v1.2.3-70-g09d2 From 8b98c1699eba23cfd2e8b366625c50ff5fd1415b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Dec 2006 20:32:30 -0800 Subject: [PATCH] leak tracking for kmalloc_node We have variants of kmalloc and kmem_cache_alloc that leave leak tracking to the caller. This is used for subsystem-specific allocators like skb_alloc. To make skb_alloc node-aware we need similar routines for the node-aware slab allocator, which this patch adds. Note that the code is rather ugly, but it mirrors the non-node-aware code 1:1: [akpm@osdl.org: add module export] Signed-off-by: Christoph Hellwig Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 23 ++++++++++++++++++++++ mm/slab.c | 55 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 65 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index c4947b8a2c0..66c4640d365 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -236,7 +236,25 @@ found: } return __kmalloc_node(size, flags, node); } + +/* + * kmalloc_node_track_caller is a special version of kmalloc_node that + * records the calling function of the routine calling it for slab leak + * tracking instead of just the calling function (confusing, eh?). + * It's useful when the call to kmalloc_node comes from a widely-used + * standard allocator where we care about the real place the memory + * allocation request comes from. + */ +#ifndef CONFIG_DEBUG_SLAB +#define kmalloc_node_track_caller(size, flags, node) \ + __kmalloc_node(size, flags, node) #else +extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); +#define kmalloc_node_track_caller(size, flags, node) \ + __kmalloc_node_track_caller(size, flags, node, \ + __builtin_return_address(0)) +#endif +#else /* CONFIG_NUMA */ static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node) { return kmem_cache_alloc(cachep, flags); @@ -245,6 +263,9 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { return kmalloc(size, flags); } + +#define kmalloc_node_track_caller(size, flags, node) \ + kmalloc_track_caller(size, flags) #endif extern int FASTCALL(kmem_cache_reap(int)); @@ -283,6 +304,8 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) #define kzalloc(s, f) __kzalloc(s, f) #define kmalloc_track_caller kmalloc +#define kmalloc_node_track_caller kmalloc_node + #endif /* CONFIG_SLOB */ /* System wide caches */ diff --git a/mm/slab.c b/mm/slab.c index bfd654c0ef4..8f3f61cacb5 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1015,7 +1015,7 @@ static inline void *alternate_node_alloc(struct kmem_cache *cachep, return NULL; } -static inline void *__cache_alloc_node(struct kmem_cache *cachep, +static inline void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { return NULL; @@ -1023,7 +1023,7 @@ static inline void *__cache_alloc_node(struct kmem_cache *cachep, #else /* CONFIG_NUMA */ -static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); +static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); static void *alternate_node_alloc(struct kmem_cache *, gfp_t); static struct array_cache **alloc_alien_cache(int node, int limit) @@ -3130,10 +3130,10 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep, objp = ____cache_alloc(cachep, flags); /* * We may just have run out of memory on the local node. - * __cache_alloc_node() knows how to locate memory on other nodes + * ____cache_alloc_node() knows how to locate memory on other nodes */ if (NUMA_BUILD && !objp) - objp = __cache_alloc_node(cachep, flags, numa_node_id()); + objp = ____cache_alloc_node(cachep, flags, numa_node_id()); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); @@ -3160,7 +3160,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) else if (current->mempolicy) nid_alloc = slab_node(current->mempolicy); if (nid_alloc != nid_here) - return __cache_alloc_node(cachep, flags, nid_alloc); + return ____cache_alloc_node(cachep, flags, nid_alloc); return NULL; } @@ -3183,7 +3183,7 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) if (zone_idx(*z) <= ZONE_NORMAL && cpuset_zone_allowed(*z, flags) && cache->nodelists[nid]) - obj = __cache_alloc_node(cache, + obj = ____cache_alloc_node(cache, flags | __GFP_THISNODE, nid); } return obj; @@ -3192,7 +3192,7 @@ void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) /* * A interface to enable slab creation on nodeid */ -static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, +static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { struct list_head *entry; @@ -3465,7 +3465,9 @@ out: * New and improved: it will now make sure that the object gets * put on the correct node list so that there is no false sharing. */ -void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) +static __always_inline void * +__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, + int nodeid, void *caller) { unsigned long save_flags; void *ptr; @@ -3477,17 +3479,23 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) !cachep->nodelists[nodeid]) ptr = ____cache_alloc(cachep, flags); else - ptr = __cache_alloc_node(cachep, flags, nodeid); + ptr = ____cache_alloc_node(cachep, flags, nodeid); local_irq_restore(save_flags); - ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, - __builtin_return_address(0)); + ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); return ptr; } + +void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) +{ + return __cache_alloc_node(cachep, flags, nodeid, + __builtin_return_address(0)); +} EXPORT_SYMBOL(kmem_cache_alloc_node); -void *__kmalloc_node(size_t size, gfp_t flags, int node) +static __always_inline void * +__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) { struct kmem_cache *cachep; @@ -3496,8 +3504,29 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) return NULL; return kmem_cache_alloc_node(cachep, flags, node); } + +#ifdef CONFIG_DEBUG_SLAB +void *__kmalloc_node(size_t size, gfp_t flags, int node) +{ + return __do_kmalloc_node(size, flags, node, + __builtin_return_address(0)); +} EXPORT_SYMBOL(__kmalloc_node); -#endif + +void *__kmalloc_node_track_caller(size_t size, gfp_t flags, + int node, void *caller) +{ + return __do_kmalloc_node(size, flags, node, caller); +} +EXPORT_SYMBOL(__kmalloc_node_track_caller); +#else +void *__kmalloc_node(size_t size, gfp_t flags, int node) +{ + return __do_kmalloc_node(size, flags, node, NULL); +} +EXPORT_SYMBOL(__kmalloc_node); +#endif /* CONFIG_DEBUG_SLAB */ +#endif /* CONFIG_NUMA */ /** * __do_kmalloc - allocate memory -- cgit v1.2.3-70-g09d2 From 873481367edb18a7d0d7e5a285e6728c16bb44a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Dec 2006 20:32:33 -0800 Subject: [PATCH] add numa node information to struct device For node-aware skb allocations we need information about the node in struct net_device or struct device. Davem suggested to put it into struct device which this patch does. In particular: - struct device gets a new int numa_node member if CONFIG_NUMA is set - there are two new helpers, dev_to_node and set_dev_node to transparently deal with the non-numa case - for pci devices the node-info is set to the value we get from pcibus_to_node. Note that for some architectures pcibus_to_node doesn't work yet at the time we call it currently. This is harmless and will just mean skb allocations aren't node-local on this architectures until the implementation of pcibus_to_node on these architectures have been updated (There are patches for x86 and x86_64 floating around) [akpm@osdl.org: cleanup] Signed-off-by: Christoph Hellwig Cc: Christoph Lameter Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/core.c | 1 + drivers/pci/probe.c | 1 + include/linux/device.h | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+) (limited to 'include') diff --git a/drivers/base/core.c b/drivers/base/core.c index e4b530ef757..67b79a7592a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -386,6 +386,7 @@ void device_initialize(struct device *dev) INIT_LIST_HEAD(&dev->node); init_MUTEX(&dev->sem); device_init_wakeup(dev, 0); + set_dev_node(dev, -1); } #ifdef CONFIG_SYSFS_DEPRECATED diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0eeac60042b..6a3c1e72890 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -873,6 +873,7 @@ void __devinit pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dev->dev.release = pci_release_dev; pci_dev_get(dev); + set_dev_node(&dev->dev, pcibus_to_node(bus)); dev->dev.dma_mask = &dev->dma_mask; dev->dev.coherent_dma_mask = 0xffffffffull; diff --git a/include/linux/device.h b/include/linux/device.h index 583a341e016..49ab53ce92d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -371,6 +371,9 @@ struct device { core doesn't touch it */ struct dev_pm_info power; +#ifdef CONFIG_NUMA + int numa_node; /* NUMA node this device is close to */ +#endif u64 *dma_mask; /* dma mask (if dma'able device) */ u64 coherent_dma_mask;/* Like dma_mask, but for alloc_coherent mappings as @@ -394,6 +397,25 @@ struct device { void (*release)(struct device * dev); }; +#ifdef CONFIG_NUMA +static inline int dev_to_node(struct device *dev) +{ + return dev->numa_node; +} +static inline void set_dev_node(struct device *dev, int node) +{ + dev->numa_node = node; +} +#else +static inline int dev_to_node(struct device *dev) +{ + return -1; +} +static inline void set_dev_node(struct device *dev, int node) +{ +} +#endif + static inline void * dev_get_drvdata (struct device *dev) { -- cgit v1.2.3-70-g09d2 From b30973f877fea1a3fb84e05599890fcc082a88e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Dec 2006 20:32:36 -0800 Subject: [PATCH] node-aware skb allocation Node-aware allocation of skbs for the receive path. Details: - __alloc_skb gets a new node argument and cals the node-aware slab functions with it. - netdev_alloc_skb passed the node number it gets from dev_to_node to it, everyone else passes -1 (any node) Signed-off-by: Christoph Hellwig Cc: Christoph Lameter Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/skbuff.h | 6 +++--- net/core/skbuff.c | 12 +++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a05a5f7c0b7..1d649f3eb00 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -332,17 +332,17 @@ struct sk_buff { extern void kfree_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct sk_buff *__alloc_skb(unsigned int size, - gfp_t priority, int fclone); + gfp_t priority, int fclone, int node); static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 0); + return __alloc_skb(size, priority, 0, -1); } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { - return __alloc_skb(size, priority, 1); + return __alloc_skb(size, priority, 1, -1); } extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8e1c385e5ba..7217fb8928f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -132,6 +132,7 @@ EXPORT_SYMBOL(skb_truesize_bug); * @gfp_mask: allocation mask * @fclone: allocate from fclone cache instead of head cache * and allocate a cloned (child) skb + * @node: numa node to allocate memory on * * Allocate a new &sk_buff. The returned buffer has no headroom and a * tail room of size bytes. The object has a reference count of one. @@ -141,7 +142,7 @@ EXPORT_SYMBOL(skb_truesize_bug); * %GFP_ATOMIC. */ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, - int fclone) + int fclone, int node) { kmem_cache_t *cache; struct skb_shared_info *shinfo; @@ -151,14 +152,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, cache = fclone ? skbuff_fclone_cache : skbuff_head_cache; /* Get the HEAD */ - skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA); + skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); if (!skb) goto out; /* Get the DATA. Size must match skb_add_mtu(). */ size = SKB_DATA_ALIGN(size); - data = kmalloc_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask); + data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), + gfp_mask, node); if (!data) goto nodata; @@ -267,9 +268,10 @@ nodata: struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask) { + int node = dev->class_dev.dev ? dev_to_node(dev->class_dev.dev) : -1; struct sk_buff *skb; - skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); skb->dev = dev; -- cgit v1.2.3-70-g09d2 From 54cc211ce3fc73a9d21c6316886db0676beaca95 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:32:45 -0800 Subject: [PATCH] Remove bio_cachep from slab.h Remove bio_cachep from slab.h - it no longer exists. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 66c4640d365..6b7d096f056 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -315,7 +315,6 @@ extern kmem_cache_t *files_cachep; extern kmem_cache_t *filp_cachep; extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sighand_cachep; -extern kmem_cache_t *bio_cachep; #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From 298ec1e2ac85cecce3eddd167286359358c44d5d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:32:47 -0800 Subject: [PATCH] Move sighand_cachep to include/signal.h Move sighand_cachep definitioni to linux/signal.h The sighand cache is only used in fs/exec.c and kernel/fork.c. It is defined in kernel/fork.c but only used in fs/exec.c. The sighand_cachep is related to signal processing. So add the definition to signal.h. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 2 ++ include/linux/slab.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/signal.h b/include/linux/signal.h index 117135e33d6..14749056dd6 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -241,6 +241,8 @@ extern int sigprocmask(int, sigset_t *, sigset_t *); struct pt_regs; extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); +extern struct kmem_cache *sighand_cachep; + #endif /* __KERNEL__ */ #endif /* _LINUX_SIGNAL_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 6b7d096f056..467b297d2c6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -314,7 +314,6 @@ extern kmem_cache_t *names_cachep; extern kmem_cache_t *files_cachep; extern kmem_cache_t *filp_cachep; extern kmem_cache_t *fs_cachep; -extern kmem_cache_t *sighand_cachep; #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From c43692e85f306667545b91194c748a6e46c1f8b4 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:32:48 -0800 Subject: [PATCH] Move vm_area_cachep to include/mm.h vm_area_cachep is used to store vm_area_structs. So move to mm.h. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ include/linux/slab.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ab6e4974f37..840303769c1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -114,6 +114,8 @@ struct vm_area_struct { #endif }; +extern struct kmem_cache *vm_area_cachep; + /* * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is * disabled, then there's a single shared list of VMAs maintained by the diff --git a/include/linux/slab.h b/include/linux/slab.h index 467b297d2c6..a30a4028a92 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -309,7 +309,6 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) #endif /* CONFIG_SLOB */ /* System wide caches */ -extern kmem_cache_t *vm_area_cachep; extern kmem_cache_t *names_cachep; extern kmem_cache_t *files_cachep; extern kmem_cache_t *filp_cachep; -- cgit v1.2.3-70-g09d2 From 5d6538fcf231faccb2ac42f92851d259d00e62f9 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:32:50 -0800 Subject: [PATCH] Move files_cachep to include/file.h Proper place is in file.h since files_cachep uses are rated to file I/O. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/file.h | 2 ++ include/linux/slab.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/file.h b/include/linux/file.h index 74183e6f7f4..be37423e992 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -114,4 +114,6 @@ struct files_struct *get_files_struct(struct task_struct *); void FASTCALL(put_files_struct(struct files_struct *fs)); void reset_files_struct(struct task_struct *, struct files_struct *); +extern struct kmem_cache *files_cachep; + #endif /* __LINUX_FILE_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index a30a4028a92..dd9efecff41 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -310,7 +310,6 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) /* System wide caches */ extern kmem_cache_t *names_cachep; -extern kmem_cache_t *files_cachep; extern kmem_cache_t *filp_cachep; extern kmem_cache_t *fs_cachep; -- cgit v1.2.3-70-g09d2 From 8b7d91eb7f6a3e8f0caaa613937bda5ab7dc7dc2 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:32:52 -0800 Subject: [PATCH] Move filep_cachep to include/file.h filp_cachep is only used in fs/file_table.c and in fs/dcache.c where it is defined. Move it to related definitions in linux/file.h. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/file.h | 2 ++ include/linux/slab.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/file.h b/include/linux/file.h index be37423e992..6e77b9177f9 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -64,6 +64,8 @@ struct files_struct { #define files_fdtable(files) (rcu_dereference((files)->fdt)) +extern struct kmem_cache *filp_cachep; + extern void FASTCALL(__fput(struct file *)); extern void FASTCALL(fput(struct file *)); diff --git a/include/linux/slab.h b/include/linux/slab.h index dd9efecff41..f9202d60d76 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -310,7 +310,6 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) /* System wide caches */ extern kmem_cache_t *names_cachep; -extern kmem_cache_t *filp_cachep; extern kmem_cache_t *fs_cachep; #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From aa362a83e78d2e9320da588805cf2a0b53356bc3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:32:54 -0800 Subject: [PATCH] Move fs_cachep to linux/fs_struct.h fs_cachep is only used in kernel/exit.c and in kernel/fork.c. It is used to store fs_struct items so it should be placed in linux/fs_struct.h Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs_struct.h | 2 ++ include/linux/slab.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index c623d12a486..11a36ceddf7 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -18,6 +18,8 @@ struct fs_struct { .umask = 0022, \ } +extern struct kmem_cache *fs_cachep; + extern void exit_fs(struct task_struct *); extern void set_fs_altroot(void); extern void set_fs_root(struct fs_struct *, struct vfsmount *, struct dentry *); diff --git a/include/linux/slab.h b/include/linux/slab.h index f9202d60d76..bfc063ee94e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -310,7 +310,6 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) /* System wide caches */ extern kmem_cache_t *names_cachep; -extern kmem_cache_t *fs_cachep; #endif /* __KERNEL__ */ -- cgit v1.2.3-70-g09d2 From b86c089b83b8ae2bc814db865057768a9ba787b5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:32:57 -0800 Subject: [PATCH] Move names_cachep to linux/fs.h The names_cachep is used for getname() and putname(). So lets put it into fs.h near those two definitions. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 ++ include/linux/slab.h | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index cac7b1ef954..a8039c8d8cb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1481,6 +1481,8 @@ extern char * getname(const char __user *); extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init(unsigned long); +extern struct kmem_cache *names_cachep; + #define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) #ifndef CONFIG_AUDITSYSCALL diff --git a/include/linux/slab.h b/include/linux/slab.h index bfc063ee94e..e67314e4a0a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -308,9 +308,6 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) #endif /* CONFIG_SLOB */ -/* System wide caches */ -extern kmem_cache_t *names_cachep; - #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ -- cgit v1.2.3-70-g09d2 From ebe29738f3934ad6a93c8bd76e30aa5d797a269d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:32:59 -0800 Subject: [PATCH] Remove uses of kmem_cache_t from mm/* and include/linux/slab.h Remove all uses of kmem_cache_t (the most were left in slab.h). The typedef for kmem_cache_t is then only necessary for other kernel subsystems. Add a comment to that effect. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index e67314e4a0a..b831776b2fc 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -9,6 +9,7 @@ #if defined(__KERNEL__) +/* kmem_cache_t exists for legacy reasons and is not used by code in mm */ typedef struct kmem_cache kmem_cache_t; #include @@ -57,22 +58,23 @@ typedef struct kmem_cache kmem_cache_t; /* prototypes */ extern void __init kmem_cache_init(void); -extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long, - void (*)(void *, kmem_cache_t *, unsigned long), - void (*)(void *, kmem_cache_t *, unsigned long)); -extern void kmem_cache_destroy(kmem_cache_t *); -extern int kmem_cache_shrink(kmem_cache_t *); -extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t); +extern struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, + unsigned long, + void (*)(void *, struct kmem_cache *, unsigned long), + void (*)(void *, struct kmem_cache *, unsigned long)); +extern void kmem_cache_destroy(struct kmem_cache *); +extern int kmem_cache_shrink(struct kmem_cache *); +extern void *kmem_cache_alloc(struct kmem_cache *, gfp_t); extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); -extern void kmem_cache_free(kmem_cache_t *, void *); -extern unsigned int kmem_cache_size(kmem_cache_t *); -extern const char *kmem_cache_name(kmem_cache_t *); +extern void kmem_cache_free(struct kmem_cache *, void *); +extern unsigned int kmem_cache_size(struct kmem_cache *); +extern const char *kmem_cache_name(struct kmem_cache *); /* Size description struct for general caches. */ struct cache_sizes { - size_t cs_size; - kmem_cache_t *cs_cachep; - kmem_cache_t *cs_dmacachep; + size_t cs_size; + struct kmem_cache *cs_cachep; + struct kmem_cache *cs_dmacachep; }; extern struct cache_sizes malloc_sizes[]; @@ -211,7 +213,7 @@ extern unsigned int ksize(const void *); extern int slab_is_available(void); #ifdef CONFIG_NUMA -extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); +extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); extern void *__kmalloc_node(size_t size, gfp_t flags, int node); static inline void *kmalloc_node(size_t size, gfp_t flags, int node) @@ -255,7 +257,8 @@ extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); __builtin_return_address(0)) #endif #else /* CONFIG_NUMA */ -static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node) +static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, + gfp_t flags, int node) { return kmem_cache_alloc(cachep, flags); } @@ -269,7 +272,7 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) #endif extern int FASTCALL(kmem_cache_reap(int)); -extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); +extern int FASTCALL(kmem_ptr_validate(struct kmem_cache *cachep, void *ptr)); #else /* CONFIG_SLOB */ -- cgit v1.2.3-70-g09d2 From 25ba77c141dbcd2602dd0171824d0d72aa023a01 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 6 Dec 2006 20:33:03 -0800 Subject: [PATCH] numa node ids are int, page_to_nid and zone_to_nid should return int NUMA node ids are passed as either int or unsigned int almost exclusivly page_to_nid and zone_to_nid both return unsigned long. This is a throw back to when page_to_nid was a #define and was thus exposing the real type of the page flags field. In addition to fixing up the definitions of page_to_nid and zone_to_nid I audited the users of these functions identifying the following incorrect uses: 1) mm/page_alloc.c show_node() -- printk dumping the node id, 2) include/asm-ia64/pgalloc.h pgtable_quicklist_free() -- comparison against numa_node_id() which returns an int from cpu_to_node(), and 3) mm/mpolicy.c check_pte_range -- used as an index in node_isset which uses bit_set which in generic code takes an int. Signed-off-by: Andy Whitcroft Cc: Christoph Lameter Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-ia64/pgalloc.h | 2 +- include/linux/mm.h | 6 +++--- mm/mempolicy.c | 2 +- mm/page_alloc.c | 2 +- mm/sparse.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-ia64/pgalloc.h b/include/asm-ia64/pgalloc.h index 9cb68e9b377..393e04c42a2 100644 --- a/include/asm-ia64/pgalloc.h +++ b/include/asm-ia64/pgalloc.h @@ -60,7 +60,7 @@ static inline void *pgtable_quicklist_alloc(void) static inline void pgtable_quicklist_free(void *pgtable_entry) { #ifdef CONFIG_NUMA - unsigned long nid = page_to_nid(virt_to_page(pgtable_entry)); + int nid = page_to_nid(virt_to_page(pgtable_entry)); if (unlikely(nid != numa_node_id())) { free_page((unsigned long)pgtable_entry); diff --git a/include/linux/mm.h b/include/linux/mm.h index 840303769c1..0e266fe1b4c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -456,7 +456,7 @@ static inline int page_zone_id(struct page *page) return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK; } -static inline unsigned long zone_to_nid(struct zone *zone) +static inline int zone_to_nid(struct zone *zone) { #ifdef CONFIG_NUMA return zone->node; @@ -466,9 +466,9 @@ static inline unsigned long zone_to_nid(struct zone *zone) } #ifdef NODE_NOT_IN_PAGE_FLAGS -extern unsigned long page_to_nid(struct page *page); +extern int page_to_nid(struct page *page); #else -static inline unsigned long page_to_nid(struct page *page) +static inline int page_to_nid(struct page *page) { return (page->flags >> NODES_PGSHIFT) & NODES_MASK; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index fb907236bbd..e7b69c90cfd 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -221,7 +221,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { struct page *page; - unsigned int nid; + int nid; if (!pte_present(*pte)) continue; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 86f2984f8b7..614d427854a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1407,7 +1407,7 @@ unsigned int nr_free_pagecache_pages(void) static inline void show_node(struct zone *zone) { if (NUMA_BUILD) - printk("Node %ld ", zone_to_nid(zone)); + printk("Node %d ", zone_to_nid(zone)); } void si_meminfo(struct sysinfo *val) diff --git a/mm/sparse.c b/mm/sparse.c index 158d6a2a526..ac26eb0d73c 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -36,7 +36,7 @@ static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned; #endif -unsigned long page_to_nid(struct page *page) +int page_to_nid(struct page *page) { return section_to_node_table[page_to_section(page)]; } -- cgit v1.2.3-70-g09d2 From 4af2bfc1202041006a0f01d0591a975f6c573f09 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 6 Dec 2006 20:33:04 -0800 Subject: [PATCH] silence unused pgdat warning from alloc_bootmem_node and friends x86 NUMA systems only define bootmem for node 0. alloc_bootmem_node() and friends therefore ignore the passed pgdat and use NODE_DATA(0) in all cases. This leads to the following warnings as we are not using the passed parameter: .../mm/page_alloc.c: In function 'zone_wait_table_init': .../mm/page_alloc.c:2259: warning: unused variable 'pgdat' One option would be to define all variables used with these macros __attribute__ ((unused)), but this would leave us exposed should these become genuinely unused. The key here is that we _are_ using the value, we ignore it but that is a deliberate action. This patch adds a nested local variable within the alloc_bootmem_node helper to which the pgdat parameter is assigned making it 'used'. The nested local is marked __attribute__ ((unused)) to silence this same warning for it. Signed-off-by: Andy Whitcroft Cc: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/mmzone.h | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 61b07332200..3503ad66945 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -120,13 +120,26 @@ static inline int pfn_valid(int pfn) __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) -#define alloc_bootmem_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) - +#define alloc_bootmem_node(pgdat, x) \ +({ \ + struct pglist_data __attribute__ ((unused)) \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ + __pa(MAX_DMA_ADDRESS)); \ +}) +#define alloc_bootmem_pages_node(pgdat, x) \ +({ \ + struct pglist_data __attribute__ ((unused)) \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \ + __pa(MAX_DMA_ADDRESS)) \ +}) +#define alloc_bootmem_low_pages_node(pgdat, x) \ +({ \ + struct pglist_data __attribute__ ((unused)) \ + *__alloc_bootmem_node__pgdat = (pgdat); \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \ +}) #endif /* CONFIG_NEED_MULTIPLE_NODES */ #endif /* _ASM_MMZONE_H_ */ -- cgit v1.2.3-70-g09d2 From 6e0eaa4b05cf53ca5caa702fd2760a5b3376be69 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:33:10 -0800 Subject: [PATCH] slab: remove SLAB_NO_GROW It is only used internally in the slab. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 -- mm/slab.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index b831776b2fc..9ffd1c1616b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -28,8 +28,6 @@ typedef struct kmem_cache kmem_cache_t; #define SLAB_LEVEL_MASK GFP_LEVEL_MASK -#define SLAB_NO_GROW __GFP_NO_GROW /* don't grow a cache */ - /* flags to pass to kmem_cache_create(). * The first 3 are only valid when the allocator as been build * SLAB_DEBUG_SUPPORT. diff --git a/mm/slab.c b/mm/slab.c index 8f3f61cacb5..e853dfe8fd7 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2721,8 +2721,8 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) * Be lazy and only check for valid flags here, keeping it out of the * critical path in kmem_cache_alloc(). */ - BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)); - if (flags & SLAB_NO_GROW) + BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | __GFP_NO_GROW)); + if (flags & __GFP_NO_GROW) return 0; ctor_flags = SLAB_CTOR_CONSTRUCTOR; -- cgit v1.2.3-70-g09d2 From a06d72c1dcbff015250df6ad9f0b1d18c02113bf Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:33:12 -0800 Subject: [PATCH] slab: remove SLAB_LEVEL_MASK SLAB_LEVEL_MASK is only used internally to the slab and is and alias of GFP_LEVEL_MASK. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 -- mm/slab.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 9ffd1c1616b..6f7b9bb4306 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -26,8 +26,6 @@ typedef struct kmem_cache kmem_cache_t; #define SLAB_KERNEL GFP_KERNEL #define SLAB_DMA GFP_DMA -#define SLAB_LEVEL_MASK GFP_LEVEL_MASK - /* flags to pass to kmem_cache_create(). * The first 3 are only valid when the allocator as been build * SLAB_DEBUG_SUPPORT. diff --git a/mm/slab.c b/mm/slab.c index e853dfe8fd7..9f34b4946fb 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2721,12 +2721,12 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) * Be lazy and only check for valid flags here, keeping it out of the * critical path in kmem_cache_alloc(). */ - BUG_ON(flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | __GFP_NO_GROW)); + BUG_ON(flags & ~(SLAB_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW)); if (flags & __GFP_NO_GROW) return 0; ctor_flags = SLAB_CTOR_CONSTRUCTOR; - local_flags = (flags & SLAB_LEVEL_MASK); + local_flags = (flags & GFP_LEVEL_MASK); if (!(local_flags & __GFP_WAIT)) /* * Not allowed to sleep. Need to tell a constructor about -- cgit v1.2.3-70-g09d2 From 55acbda0965ca0a29b0ca276e7d17a55edc11d1b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:33:13 -0800 Subject: [PATCH] slab: remove SLAB_NOIO SLAB_NOIO is an alias of GFP_NOIO with a single instance of use. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/usb/storage/transport.c | 2 +- include/linux/slab.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 47644b5b615..323293a3e61 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -427,7 +427,7 @@ static int usb_stor_bulk_transfer_sglist(struct us_data *us, unsigned int pipe, US_DEBUGP("%s: xfer %u bytes, %d entries\n", __FUNCTION__, length, num_sg); result = usb_sg_init(&us->current_sg, us->pusb_dev, pipe, 0, - sg, num_sg, length, SLAB_NOIO); + sg, num_sg, length, GFP_NOIO); if (result) { US_DEBUGP("usb_sg_init returned %d\n", result); return USB_STOR_XFER_ERROR; diff --git a/include/linux/slab.h b/include/linux/slab.h index 6f7b9bb4306..43ced80c327 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -20,7 +20,6 @@ typedef struct kmem_cache kmem_cache_t; /* flags for kmem_cache_alloc() */ #define SLAB_NOFS GFP_NOFS -#define SLAB_NOIO GFP_NOIO #define SLAB_ATOMIC GFP_ATOMIC #define SLAB_USER GFP_USER #define SLAB_KERNEL GFP_KERNEL -- cgit v1.2.3-70-g09d2 From e6b4f8da3a88457148038bc952043e99a7fdba64 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:33:14 -0800 Subject: [PATCH] slab: remove SLAB_NOFS SLAB_NOFS is an alias of GFP_NOFS. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/cifs/misc.c | 4 ++-- fs/cifs/transport.c | 2 +- fs/dquot.c | 2 +- fs/ext3/super.c | 2 +- fs/ext4/super.c | 2 +- fs/hpfs/super.c | 2 +- fs/nfs/read.c | 2 +- fs/nfs/write.c | 4 ++-- fs/ntfs/attrib.c | 2 +- fs/ntfs/index.c | 2 +- fs/ntfs/inode.c | 4 ++-- fs/ntfs/unistr.c | 2 +- fs/ocfs2/dlm/dlmfs.c | 2 +- fs/ocfs2/super.c | 2 +- include/linux/slab.h | 1 - 15 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index bbc9cd34b6e..8355daff504 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -153,7 +153,7 @@ cifs_buf_get(void) albeit slightly larger than necessary and maxbuffersize defaults to this and can not be bigger */ ret_buf = - (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | SLAB_NOFS); + (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | GFP_NOFS); /* clear the first few header bytes */ /* for most paths, more is cleared in header_assemble */ @@ -192,7 +192,7 @@ cifs_small_buf_get(void) albeit slightly larger than necessary and maxbuffersize defaults to this and can not be bigger */ ret_buf = - (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | SLAB_NOFS); + (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | GFP_NOFS); if (ret_buf) { /* No need to clear memory here, cleared in header assemble */ /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 48d47b46b1f..7514237cf31 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -51,7 +51,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) } temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp, - SLAB_KERNEL | SLAB_NOFS); + SLAB_KERNEL | GFP_NOFS); if (temp == NULL) return temp; else { diff --git a/fs/dquot.c b/fs/dquot.c index 9af789567e5..c6ae6c0e0cf 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -600,7 +600,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) { struct dquot *dquot; - dquot = kmem_cache_alloc(dquot_cachep, SLAB_NOFS); + dquot = kmem_cache_alloc(dquot_cachep, GFP_NOFS); if(!dquot) return NODQUOT; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index afc2d4f42d7..0cf633f0cfa 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -445,7 +445,7 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) { struct ext3_inode_info *ei; - ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS); + ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); if (!ei) return NULL; #ifdef CONFIG_EXT3_FS_POSIX_ACL diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b4b022aa2bc..c730cbc8403 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -495,7 +495,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) { struct ext4_inode_info *ei; - ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS); + ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 450b5e0b478..46ceadd6f16 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -165,7 +165,7 @@ static kmem_cache_t * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) { struct hpfs_inode_info *ei; - ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, SLAB_NOFS); + ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; ei->vfs_inode.i_version = 1; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c2e49c397a2..56f66f0ccb6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -46,7 +46,7 @@ static mempool_t *nfs_rdata_mempool; struct nfs_read_data *nfs_readdata_alloc(size_t len) { unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; - struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); + struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); if (p) { memset(p, 0, sizeof(*p)); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 883dd4a1c15..f7dd0d00595 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -93,7 +93,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); struct nfs_write_data *nfs_commit_alloc(void) { - struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); + struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); if (p) { memset(p, 0, sizeof(*p)); @@ -112,7 +112,7 @@ void nfs_commit_free(struct nfs_write_data *p) struct nfs_write_data *nfs_writedata_alloc(size_t len) { unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; - struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); + struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); if (p) { memset(p, 0, sizeof(*p)); diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 9f08e851cfb..c577d8e1bd9 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1272,7 +1272,7 @@ ntfs_attr_search_ctx *ntfs_attr_get_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec) { ntfs_attr_search_ctx *ctx; - ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS); + ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, GFP_NOFS); if (ctx) ntfs_attr_init_search_ctx(ctx, ni, mrec); return ctx; diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c index e32cde48636..2194eff4974 100644 --- a/fs/ntfs/index.c +++ b/fs/ntfs/index.c @@ -38,7 +38,7 @@ ntfs_index_context *ntfs_index_ctx_get(ntfs_inode *idx_ni) { ntfs_index_context *ictx; - ictx = kmem_cache_alloc(ntfs_index_ctx_cache, SLAB_NOFS); + ictx = kmem_cache_alloc(ntfs_index_ctx_cache, GFP_NOFS); if (ictx) *ictx = (ntfs_index_context){ .idx_ni = idx_ni }; return ictx; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 2d3de9c8981..247989891b4 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -324,7 +324,7 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb) ntfs_inode *ni; ntfs_debug("Entering."); - ni = kmem_cache_alloc(ntfs_big_inode_cache, SLAB_NOFS); + ni = kmem_cache_alloc(ntfs_big_inode_cache, GFP_NOFS); if (likely(ni != NULL)) { ni->state = 0; return VFS_I(ni); @@ -349,7 +349,7 @@ static inline ntfs_inode *ntfs_alloc_extent_inode(void) ntfs_inode *ni; ntfs_debug("Entering."); - ni = kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS); + ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS); if (likely(ni != NULL)) { ni->state = 0; return ni; diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c index 6a495f7369f..005ca4b0f13 100644 --- a/fs/ntfs/unistr.c +++ b/fs/ntfs/unistr.c @@ -266,7 +266,7 @@ int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, /* We do not trust outside sources. */ if (likely(ins)) { - ucs = kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS); + ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS); if (likely(ucs)) { for (i = o = 0; i < ins_len; i += wc_len) { wc_len = nls->char2uni(ins + i, ins_len - i, diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 16b8d1ba706..01f91501f70 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -276,7 +276,7 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb) { struct dlmfs_inode_private *ip; - ip = kmem_cache_alloc(dlmfs_inode_cache, SLAB_NOFS); + ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS); if (!ip) return NULL; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index d9b4214a12d..7574d26ee0f 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -303,7 +303,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) { struct ocfs2_inode_info *oi; - oi = kmem_cache_alloc(ocfs2_inode_cachep, SLAB_NOFS); + oi = kmem_cache_alloc(ocfs2_inode_cachep, GFP_NOFS); if (!oi) return NULL; diff --git a/include/linux/slab.h b/include/linux/slab.h index 43ced80c327..5b70d52b4f8 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -19,7 +19,6 @@ typedef struct kmem_cache kmem_cache_t; #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ /* flags for kmem_cache_alloc() */ -#define SLAB_NOFS GFP_NOFS #define SLAB_ATOMIC GFP_ATOMIC #define SLAB_USER GFP_USER #define SLAB_KERNEL GFP_KERNEL -- cgit v1.2.3-70-g09d2 From f7267c0c0721fd02ad3dc37c3d6dd24ccd81d4d6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:33:15 -0800 Subject: [PATCH] slab: remove SLAB_USER SLAB_USER is an alias of GFP_USER Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/crypto.c | 4 ++-- fs/ecryptfs/inode.c | 2 +- include/linux/slab.h | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f63a7755fe8..776b2eed371 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1334,7 +1334,7 @@ int ecryptfs_write_headers(struct dentry *ecryptfs_dentry, goto out; } /* Released in this function */ - page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, SLAB_USER); + page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, GFP_USER); if (!page_virt) { ecryptfs_printk(KERN_ERR, "Out of memory\n"); rc = -ENOMEM; @@ -1493,7 +1493,7 @@ int ecryptfs_read_headers(struct dentry *ecryptfs_dentry, &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat; /* Read the first page from the underlying file */ - page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, SLAB_USER); + page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER); if (!page_virt) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n"); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index dfcc68484f4..70911412044 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -404,7 +404,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, /* Released in this function */ page_virt = (char *)kmem_cache_alloc(ecryptfs_header_cache_2, - SLAB_USER); + GFP_USER); if (!page_virt) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, diff --git a/include/linux/slab.h b/include/linux/slab.h index 5b70d52b4f8..d7ee28e5133 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -20,7 +20,6 @@ typedef struct kmem_cache kmem_cache_t; /* flags for kmem_cache_alloc() */ #define SLAB_ATOMIC GFP_ATOMIC -#define SLAB_USER GFP_USER #define SLAB_KERNEL GFP_KERNEL #define SLAB_DMA GFP_DMA -- cgit v1.2.3-70-g09d2 From 54e6ecb23951b195d02433a741c7f7cb0b796c78 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:33:16 -0800 Subject: [PATCH] slab: remove SLAB_ATOMIC SLAB_ATOMIC is an alias of GFP_ATOMIC Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/atm/he.c | 2 +- drivers/base/dmapool.c | 2 +- drivers/block/DAC960.c | 4 ++-- drivers/char/watchdog/pcwd_usb.c | 2 +- drivers/ieee1394/raw1394.c | 30 +++++++++++++++--------------- drivers/infiniband/hw/amso1100/c2_vq.c | 2 +- drivers/infiniband/hw/mthca/mthca_av.c | 2 +- drivers/isdn/gigaset/bas-gigaset.c | 20 ++++++++++---------- drivers/isdn/gigaset/usb-gigaset.c | 6 +++--- drivers/media/dvb/dvb-usb/usb-urb.c | 2 +- drivers/media/dvb/ttusb-dec/ttusb_dec.c | 2 +- drivers/s390/scsi/zfcp_fsf.c | 2 +- drivers/usb/core/hub.c | 2 +- drivers/usb/core/message.c | 2 +- drivers/usb/host/ehci-dbg.c | 2 +- drivers/usb/host/hc_crisv10.c | 10 +++++----- drivers/usb/host/ohci-dbg.c | 2 +- drivers/usb/host/uhci-q.c | 2 +- drivers/usb/input/aiptek.c | 2 +- drivers/usb/input/ati_remote.c | 6 +++--- drivers/usb/input/hid-core.c | 10 +++++----- drivers/usb/input/keyspan_remote.c | 2 +- drivers/usb/input/mtouchusb.c | 2 +- drivers/usb/input/powermate.c | 4 ++-- drivers/usb/input/touchkitusb.c | 2 +- drivers/usb/input/usbkbd.c | 8 ++++---- drivers/usb/input/usbmouse.c | 4 ++-- drivers/usb/input/xpad.c | 2 +- drivers/usb/input/yealink.c | 6 +++--- drivers/usb/misc/phidgetkit.c | 4 ++-- drivers/usb/misc/phidgetmotorcontrol.c | 4 ++-- drivers/usb/misc/usbtest.c | 8 ++++---- drivers/usb/mon/mon_text.c | 4 ++-- drivers/usb/net/catc.c | 2 +- drivers/usb/net/net1080.c | 2 +- drivers/usb/net/pegasus.c | 2 +- drivers/usb/net/rtl8150.c | 2 +- drivers/usb/serial/mos7720.c | 2 +- drivers/usb/serial/mos7840.c | 4 ++-- drivers/usb/storage/onetouch.c | 4 ++-- include/linux/slab.h | 1 - include/net/request_sock.h | 2 +- net/core/dst.c | 2 +- net/core/flow.c | 2 +- net/core/neighbour.c | 2 +- net/dccp/ccids/ccid3.c | 6 +++--- net/dccp/ccids/lib/loss_interval.c | 2 +- net/ipv4/inet_hashtables.c | 2 +- net/ipv4/inet_timewait_sock.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/sctp/sm_make_chunk.c | 2 +- net/sctp/socket.c | 2 +- net/xfrm/xfrm_input.c | 2 +- security/selinux/avc.c | 2 +- 55 files changed, 107 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/drivers/atm/he.c b/drivers/atm/he.c index c7314a79da0..2a2f0fc2288 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1724,7 +1724,7 @@ __alloc_tpd(struct he_dev *he_dev) struct he_tpd *tpd; dma_addr_t dma_handle; - tpd = pci_pool_alloc(he_dev->tpd_pool, SLAB_ATOMIC|SLAB_DMA, &dma_handle); + tpd = pci_pool_alloc(he_dev->tpd_pool, GFP_ATOMIC|SLAB_DMA, &dma_handle); if (tpd == NULL) return NULL; diff --git a/drivers/base/dmapool.c b/drivers/base/dmapool.c index b2efbd4cf71..fa4675254f6 100644 --- a/drivers/base/dmapool.c +++ b/drivers/base/dmapool.c @@ -297,7 +297,7 @@ restart: } } } - if (!(page = pool_alloc_page (pool, SLAB_ATOMIC))) { + if (!(page = pool_alloc_page (pool, GFP_ATOMIC))) { if (mem_flags & __GFP_WAIT) { DECLARE_WAITQUEUE (wait, current); diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 742d0740310..8d81a3a64c0 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -324,13 +324,13 @@ static boolean DAC960_CreateAuxiliaryStructures(DAC960_Controller_T *Controller) Command->Next = Controller->FreeCommands; Controller->FreeCommands = Command; Controller->Commands[CommandIdentifier-1] = Command; - ScatterGatherCPU = pci_pool_alloc(ScatterGatherPool, SLAB_ATOMIC, + ScatterGatherCPU = pci_pool_alloc(ScatterGatherPool, GFP_ATOMIC, &ScatterGatherDMA); if (ScatterGatherCPU == NULL) return DAC960_Failure(Controller, "AUXILIARY STRUCTURE CREATION"); if (RequestSensePool != NULL) { - RequestSenseCPU = pci_pool_alloc(RequestSensePool, SLAB_ATOMIC, + RequestSenseCPU = pci_pool_alloc(RequestSensePool, GFP_ATOMIC, &RequestSenseDMA); if (RequestSenseCPU == NULL) { pci_pool_free(ScatterGatherPool, ScatterGatherCPU, diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c index e275dd4a705..61138726b50 100644 --- a/drivers/char/watchdog/pcwd_usb.c +++ b/drivers/char/watchdog/pcwd_usb.c @@ -634,7 +634,7 @@ static int usb_pcwd_probe(struct usb_interface *interface, const struct usb_devi usb_pcwd->intr_size = (le16_to_cpu(endpoint->wMaxPacketSize) > 8 ? le16_to_cpu(endpoint->wMaxPacketSize) : 8); /* set up the memory buffer's */ - if (!(usb_pcwd->intr_buffer = usb_buffer_alloc(udev, usb_pcwd->intr_size, SLAB_ATOMIC, &usb_pcwd->intr_dma))) { + if (!(usb_pcwd->intr_buffer = usb_buffer_alloc(udev, usb_pcwd->intr_size, GFP_ATOMIC, &usb_pcwd->intr_dma))) { printk(KERN_ERR PFX "Out of memory\n"); goto error; } diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index 5ec4f5eb6b1..47f6a4e29b4 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -259,7 +259,7 @@ static void host_reset(struct hpsb_host *host) if (hi != NULL) { list_for_each_entry(fi, &hi->file_info_list, list) { if (fi->notification == RAW1394_NOTIFY_ON) { - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (req != NULL) { req->file_info = fi; @@ -306,13 +306,13 @@ static void iso_receive(struct hpsb_host *host, int channel, quadlet_t * data, if (!(fi->listen_channels & (1ULL << channel))) continue; - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) break; if (!ibs) { ibs = kmalloc(sizeof(*ibs) + length, - SLAB_ATOMIC); + GFP_ATOMIC); if (!ibs) { kfree(req); break; @@ -367,13 +367,13 @@ static void fcp_request(struct hpsb_host *host, int nodeid, int direction, if (!fi->fcp_buffer) continue; - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) break; if (!ibs) { ibs = kmalloc(sizeof(*ibs) + length, - SLAB_ATOMIC); + GFP_ATOMIC); if (!ibs) { kfree(req); break; @@ -593,7 +593,7 @@ static int state_initialized(struct file_info *fi, struct pending_request *req) switch (req->req.type) { case RAW1394_REQ_LIST_CARDS: spin_lock_irqsave(&host_info_lock, flags); - khl = kmalloc(sizeof(*khl) * host_count, SLAB_ATOMIC); + khl = kmalloc(sizeof(*khl) * host_count, GFP_ATOMIC); if (khl) { req->req.misc = host_count; @@ -1045,7 +1045,7 @@ static int arm_read(struct hpsb_host *host, int nodeid, quadlet_t * buffer, } if (arm_addr->notification_options & ARM_READ) { DBGMSG("arm_read -> entering notification-section"); - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) { DBGMSG("arm_read -> rcode_conflict_error"); spin_unlock_irqrestore(&host_info_lock, irqflags); @@ -1064,7 +1064,7 @@ static int arm_read(struct hpsb_host *host, int nodeid, quadlet_t * buffer, sizeof(struct arm_response) + sizeof(struct arm_request_response); } - req->data = kmalloc(size, SLAB_ATOMIC); + req->data = kmalloc(size, GFP_ATOMIC); if (!(req->data)) { free_pending_request(req); DBGMSG("arm_read -> rcode_conflict_error"); @@ -1198,7 +1198,7 @@ static int arm_write(struct hpsb_host *host, int nodeid, int destid, } if (arm_addr->notification_options & ARM_WRITE) { DBGMSG("arm_write -> entering notification-section"); - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) { DBGMSG("arm_write -> rcode_conflict_error"); spin_unlock_irqrestore(&host_info_lock, irqflags); @@ -1209,7 +1209,7 @@ static int arm_write(struct hpsb_host *host, int nodeid, int destid, sizeof(struct arm_request) + sizeof(struct arm_response) + (length) * sizeof(byte_t) + sizeof(struct arm_request_response); - req->data = kmalloc(size, SLAB_ATOMIC); + req->data = kmalloc(size, GFP_ATOMIC); if (!(req->data)) { free_pending_request(req); DBGMSG("arm_write -> rcode_conflict_error"); @@ -1400,7 +1400,7 @@ static int arm_lock(struct hpsb_host *host, int nodeid, quadlet_t * store, if (arm_addr->notification_options & ARM_LOCK) { byte_t *buf1, *buf2; DBGMSG("arm_lock -> entering notification-section"); - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) { DBGMSG("arm_lock -> rcode_conflict_error"); spin_unlock_irqrestore(&host_info_lock, irqflags); @@ -1408,7 +1408,7 @@ static int arm_lock(struct hpsb_host *host, int nodeid, quadlet_t * store, The request may be retried */ } size = sizeof(struct arm_request) + sizeof(struct arm_response) + 3 * sizeof(*store) + sizeof(struct arm_request_response); /* maximum */ - req->data = kmalloc(size, SLAB_ATOMIC); + req->data = kmalloc(size, GFP_ATOMIC); if (!(req->data)) { free_pending_request(req); DBGMSG("arm_lock -> rcode_conflict_error"); @@ -1628,7 +1628,7 @@ static int arm_lock64(struct hpsb_host *host, int nodeid, octlet_t * store, if (arm_addr->notification_options & ARM_LOCK) { byte_t *buf1, *buf2; DBGMSG("arm_lock64 -> entering notification-section"); - req = __alloc_pending_request(SLAB_ATOMIC); + req = __alloc_pending_request(GFP_ATOMIC); if (!req) { spin_unlock_irqrestore(&host_info_lock, irqflags); DBGMSG("arm_lock64 -> rcode_conflict_error"); @@ -1636,7 +1636,7 @@ static int arm_lock64(struct hpsb_host *host, int nodeid, octlet_t * store, The request may be retried */ } size = sizeof(struct arm_request) + sizeof(struct arm_response) + 3 * sizeof(*store) + sizeof(struct arm_request_response); /* maximum */ - req->data = kmalloc(size, SLAB_ATOMIC); + req->data = kmalloc(size, GFP_ATOMIC); if (!(req->data)) { free_pending_request(req); spin_unlock_irqrestore(&host_info_lock, irqflags); @@ -2443,7 +2443,7 @@ static void queue_rawiso_event(struct file_info *fi) /* only one ISO activity event may be in the queue */ if (!__rawiso_event_in_queue(fi)) { struct pending_request *req = - __alloc_pending_request(SLAB_ATOMIC); + __alloc_pending_request(GFP_ATOMIC); if (req) { req->file_info = fi; diff --git a/drivers/infiniband/hw/amso1100/c2_vq.c b/drivers/infiniband/hw/amso1100/c2_vq.c index 40caeb5f41b..36620a22413 100644 --- a/drivers/infiniband/hw/amso1100/c2_vq.c +++ b/drivers/infiniband/hw/amso1100/c2_vq.c @@ -164,7 +164,7 @@ void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r) */ void *vq_repbuf_alloc(struct c2_dev *c2dev) { - return kmem_cache_alloc(c2dev->host_msg_cache, SLAB_ATOMIC); + return kmem_cache_alloc(c2dev->host_msg_cache, GFP_ATOMIC); } /* diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 57cdc1bc5f5..27caf3b0648 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -189,7 +189,7 @@ int mthca_create_ah(struct mthca_dev *dev, on_hca_fail: if (ah->type == MTHCA_AH_PCI_POOL) { ah->av = pci_pool_alloc(dev->av_table.pool, - SLAB_ATOMIC, &ah->avdma); + GFP_ATOMIC, &ah->avdma); if (!ah->av) return -ENOMEM; diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index 0c937325a1b..5857e7e23f8 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -572,7 +572,7 @@ static int atread_submit(struct cardstate *cs, int timeout) ucs->rcvbuf, ucs->rcvbuf_size, read_ctrl_callback, cs->inbuf); - if ((ret = usb_submit_urb(ucs->urb_cmd_in, SLAB_ATOMIC)) != 0) { + if ((ret = usb_submit_urb(ucs->urb_cmd_in, GFP_ATOMIC)) != 0) { update_basstate(ucs, 0, BS_ATRDPEND); dev_err(cs->dev, "could not submit HD_READ_ATMESSAGE: %s\n", get_usb_rcmsg(ret)); @@ -747,7 +747,7 @@ static void read_int_callback(struct urb *urb) check_pending(ucs); resubmit: - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc != 0 && rc != -ENODEV)) { dev_err(cs->dev, "could not resubmit interrupt URB: %s\n", get_usb_rcmsg(rc)); @@ -807,7 +807,7 @@ static void read_iso_callback(struct urb *urb) urb->number_of_packets = BAS_NUMFRAMES; gig_dbg(DEBUG_ISO, "%s: isoc read overrun/resubmit", __func__); - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc != 0 && rc != -ENODEV)) { dev_err(bcs->cs->dev, "could not resubmit isochronous read " @@ -900,7 +900,7 @@ static int starturbs(struct bc_state *bcs) } dump_urb(DEBUG_ISO, "Initial isoc read", urb); - if ((rc = usb_submit_urb(urb, SLAB_ATOMIC)) != 0) + if ((rc = usb_submit_urb(urb, GFP_ATOMIC)) != 0) goto error; } @@ -935,7 +935,7 @@ static int starturbs(struct bc_state *bcs) /* submit two URBs, keep third one */ for (k = 0; k < 2; ++k) { dump_urb(DEBUG_ISO, "Initial isoc write", urb); - rc = usb_submit_urb(ubc->isoouturbs[k].urb, SLAB_ATOMIC); + rc = usb_submit_urb(ubc->isoouturbs[k].urb, GFP_ATOMIC); if (rc != 0) goto error; } @@ -1042,7 +1042,7 @@ static int submit_iso_write_urb(struct isow_urbctx_t *ucx) return 0; /* no data to send */ urb->number_of_packets = nframe; - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc)) { if (rc == -ENODEV) /* device removed - give up silently */ @@ -1341,7 +1341,7 @@ static void read_iso_tasklet(unsigned long data) urb->dev = bcs->cs->hw.bas->udev; urb->transfer_flags = URB_ISO_ASAP; urb->number_of_packets = BAS_NUMFRAMES; - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc != 0 && rc != -ENODEV)) { dev_err(cs->dev, "could not resubmit isochronous read URB: %s\n", @@ -1458,7 +1458,7 @@ static void write_ctrl_callback(struct urb *urb) ucs->retry_ctrl); /* urb->dev is clobbered by USB subsystem */ urb->dev = ucs->udev; - rc = usb_submit_urb(urb, SLAB_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(rc)) { dev_err(&ucs->interface->dev, "could not resubmit request 0x%02x: %s\n", @@ -1517,7 +1517,7 @@ static int req_submit(struct bc_state *bcs, int req, int val, int timeout) (unsigned char*) &ucs->dr_ctrl, NULL, 0, write_ctrl_callback, ucs); ucs->retry_ctrl = 0; - ret = usb_submit_urb(ucs->urb_ctrl, SLAB_ATOMIC); + ret = usb_submit_urb(ucs->urb_ctrl, GFP_ATOMIC); if (unlikely(ret)) { dev_err(bcs->cs->dev, "could not submit request 0x%02x: %s\n", req, get_usb_rcmsg(ret)); @@ -1763,7 +1763,7 @@ static int atwrite_submit(struct cardstate *cs, unsigned char *buf, int len) usb_sndctrlpipe(ucs->udev, 0), (unsigned char*) &ucs->dr_cmd_out, buf, len, write_command_callback, cs); - rc = usb_submit_urb(ucs->urb_cmd_out, SLAB_ATOMIC); + rc = usb_submit_urb(ucs->urb_cmd_out, GFP_ATOMIC); if (unlikely(rc)) { update_basstate(ucs, 0, BS_ATWRPEND); dev_err(cs->dev, "could not submit HD_WRITE_ATMESSAGE: %s\n", diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c index 5ebf49ac9b2..af89ce188f2 100644 --- a/drivers/isdn/gigaset/usb-gigaset.c +++ b/drivers/isdn/gigaset/usb-gigaset.c @@ -410,7 +410,7 @@ static void gigaset_read_int_callback(struct urb *urb) if (resubmit) { spin_lock_irqsave(&cs->lock, flags); - r = cs->connected ? usb_submit_urb(urb, SLAB_ATOMIC) : -ENODEV; + r = cs->connected ? usb_submit_urb(urb, GFP_ATOMIC) : -ENODEV; spin_unlock_irqrestore(&cs->lock, flags); if (r) dev_err(cs->dev, "error %d when resubmitting urb.\n", @@ -486,7 +486,7 @@ static int send_cb(struct cardstate *cs, struct cmdbuf_t *cb) atomic_set(&ucs->busy, 1); spin_lock_irqsave(&cs->lock, flags); - status = cs->connected ? usb_submit_urb(ucs->bulk_out_urb, SLAB_ATOMIC) : -ENODEV; + status = cs->connected ? usb_submit_urb(ucs->bulk_out_urb, GFP_ATOMIC) : -ENODEV; spin_unlock_irqrestore(&cs->lock, flags); if (status) { @@ -664,7 +664,7 @@ static int write_modem(struct cardstate *cs) ucs->bulk_out_endpointAddr & 0x0f), ucs->bulk_out_buffer, count, gigaset_write_bulk_callback, cs); - ret = usb_submit_urb(ucs->bulk_out_urb, SLAB_ATOMIC); + ret = usb_submit_urb(ucs->bulk_out_urb, GFP_ATOMIC); } else { ret = -ENODEV; } diff --git a/drivers/media/dvb/dvb-usb/usb-urb.c b/drivers/media/dvb/dvb-usb/usb-urb.c index 78035ee824c..397f51a7b2a 100644 --- a/drivers/media/dvb/dvb-usb/usb-urb.c +++ b/drivers/media/dvb/dvb-usb/usb-urb.c @@ -116,7 +116,7 @@ static int usb_allocate_stream_buffers(struct usb_data_stream *stream, int num, for (stream->buf_num = 0; stream->buf_num < num; stream->buf_num++) { deb_mem("allocating buffer %d\n",stream->buf_num); if (( stream->buf_list[stream->buf_num] = - usb_buffer_alloc(stream->udev, size, SLAB_ATOMIC, + usb_buffer_alloc(stream->udev, size, GFP_ATOMIC, &stream->dma_addr[stream->buf_num]) ) == NULL) { deb_mem("not enough memory for urb-buffer allocation.\n"); usb_free_stream_buffers(stream); diff --git a/drivers/media/dvb/ttusb-dec/ttusb_dec.c b/drivers/media/dvb/ttusb-dec/ttusb_dec.c index 8135f3e76ae..10b121ada83 100644 --- a/drivers/media/dvb/ttusb-dec/ttusb_dec.c +++ b/drivers/media/dvb/ttusb-dec/ttusb_dec.c @@ -1244,7 +1244,7 @@ static int ttusb_dec_init_usb(struct ttusb_dec *dec) return -ENOMEM; } dec->irq_buffer = usb_buffer_alloc(dec->udev,IRQ_PACKET_SIZE, - SLAB_ATOMIC, &dec->irq_dma_handle); + GFP_ATOMIC, &dec->irq_dma_handle); if(!dec->irq_buffer) { return -ENOMEM; } diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 277826cdd0c..067f1519eb0 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -109,7 +109,7 @@ zfcp_fsf_req_alloc(mempool_t *pool, int req_flags) ptr = kmalloc(size, GFP_ATOMIC); else ptr = kmem_cache_alloc(zfcp_data.fsf_req_qtcb_cache, - SLAB_ATOMIC); + GFP_ATOMIC); } if (unlikely(!ptr)) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 9be41ed1f9a..0a46acf557a 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -460,7 +460,7 @@ void usb_hub_tt_clear_buffer (struct usb_device *udev, int pipe) * since each TT has "at least two" buffers that can need it (and * there can be many TTs per hub). even if they're uncommon. */ - if ((clear = kmalloc (sizeof *clear, SLAB_ATOMIC)) == NULL) { + if ((clear = kmalloc (sizeof *clear, GFP_ATOMIC)) == NULL) { dev_err (&udev->dev, "can't save CLEAR_TT_BUFFER state\n"); /* FIXME recover somehow ... RESET_TT? */ return; diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 7390b67c609..149aa8bfb1f 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -488,7 +488,7 @@ void usb_sg_wait (struct usb_sg_request *io) int retval; io->urbs [i]->dev = io->dev; - retval = usb_submit_urb (io->urbs [i], SLAB_ATOMIC); + retval = usb_submit_urb (io->urbs [i], GFP_ATOMIC); /* after we submit, let completions or cancelations fire; * we handshake using io->status. diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c index 34b7a31cd85..56349d21e6e 100644 --- a/drivers/usb/host/ehci-dbg.c +++ b/drivers/usb/host/ehci-dbg.c @@ -492,7 +492,7 @@ show_periodic (struct class_device *class_dev, char *buf) unsigned i; __le32 tag; - if (!(seen = kmalloc (DBG_SCHED_LIMIT * sizeof *seen, SLAB_ATOMIC))) + if (!(seen = kmalloc (DBG_SCHED_LIMIT * sizeof *seen, GFP_ATOMIC))) return 0; seen_count = 0; diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c index 87eca6aeacf..396dc69d4b4 100644 --- a/drivers/usb/host/hc_crisv10.c +++ b/drivers/usb/host/hc_crisv10.c @@ -188,7 +188,7 @@ static DEFINE_TIMER(bulk_eot_timer, NULL, 0, 0); #define CHECK_ALIGN(x) if (((__u32)(x)) & 0x00000003) \ {panic("Alignment check (DWORD) failed at %s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__);} -#define SLAB_FLAG (in_interrupt() ? SLAB_ATOMIC : SLAB_KERNEL) +#define SLAB_FLAG (in_interrupt() ? GFP_ATOMIC : SLAB_KERNEL) #define KMALLOC_FLAG (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) /* Most helpful debugging aid */ @@ -1743,7 +1743,7 @@ static irqreturn_t etrax_usb_tx_interrupt(int irq, void *vhc) *R_DMA_CH8_SUB3_CLR_INTR = IO_STATE(R_DMA_CH8_SUB3_CLR_INTR, clr_descr, do); - comp_data = (usb_isoc_complete_data_t*)kmem_cache_alloc(isoc_compl_cache, SLAB_ATOMIC); + comp_data = (usb_isoc_complete_data_t*)kmem_cache_alloc(isoc_compl_cache, GFP_ATOMIC); assert(comp_data != NULL); INIT_WORK(&comp_data->usb_bh, etrax_usb_isoc_descr_interrupt_bottom_half, comp_data); @@ -3010,7 +3010,7 @@ static void etrax_usb_add_to_isoc_sb_list(struct urb *urb, int epid) if (!urb->iso_frame_desc[i].length) continue; - next_sb_desc = (USB_SB_Desc_t*)kmem_cache_alloc(usb_desc_cache, SLAB_ATOMIC); + next_sb_desc = (USB_SB_Desc_t*)kmem_cache_alloc(usb_desc_cache, GFP_ATOMIC); assert(next_sb_desc != NULL); if (urb->iso_frame_desc[i].length > 0) { @@ -3063,7 +3063,7 @@ static void etrax_usb_add_to_isoc_sb_list(struct urb *urb, int epid) if (TxIsocEPList[epid].sub == 0) { dbg_isoc("Isoc traffic not already running, allocating SB"); - next_sb_desc = (USB_SB_Desc_t*)kmem_cache_alloc(usb_desc_cache, SLAB_ATOMIC); + next_sb_desc = (USB_SB_Desc_t*)kmem_cache_alloc(usb_desc_cache, GFP_ATOMIC); assert(next_sb_desc != NULL); next_sb_desc->command = (IO_STATE(USB_SB_command, tt, in) | @@ -3317,7 +3317,7 @@ static irqreturn_t etrax_usb_hc_interrupt_top_half(int irq, void *vhc) restore_flags(flags); - reg = (usb_interrupt_registers_t *)kmem_cache_alloc(top_half_reg_cache, SLAB_ATOMIC); + reg = (usb_interrupt_registers_t *)kmem_cache_alloc(top_half_reg_cache, GFP_ATOMIC); assert(reg != NULL); diff --git a/drivers/usb/host/ohci-dbg.c b/drivers/usb/host/ohci-dbg.c index 8293c1d4be3..0f47a57dac2 100644 --- a/drivers/usb/host/ohci-dbg.c +++ b/drivers/usb/host/ohci-dbg.c @@ -505,7 +505,7 @@ show_periodic (struct class_device *class_dev, char *buf) char *next; unsigned i; - if (!(seen = kmalloc (DBG_SCHED_LIMIT * sizeof *seen, SLAB_ATOMIC))) + if (!(seen = kmalloc (DBG_SCHED_LIMIT * sizeof *seen, GFP_ATOMIC))) return 0; seen_count = 0; diff --git a/drivers/usb/host/uhci-q.c b/drivers/usb/host/uhci-q.c index 06115f22a4f..30b88459ac7 100644 --- a/drivers/usb/host/uhci-q.c +++ b/drivers/usb/host/uhci-q.c @@ -498,7 +498,7 @@ static inline struct urb_priv *uhci_alloc_urb_priv(struct uhci_hcd *uhci, { struct urb_priv *urbp; - urbp = kmem_cache_alloc(uhci_up_cachep, SLAB_ATOMIC); + urbp = kmem_cache_alloc(uhci_up_cachep, GFP_ATOMIC); if (!urbp) return NULL; diff --git a/drivers/usb/input/aiptek.c b/drivers/usb/input/aiptek.c index bf428184608..9f52429ce65 100644 --- a/drivers/usb/input/aiptek.c +++ b/drivers/usb/input/aiptek.c @@ -1988,7 +1988,7 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id) goto fail1; aiptek->data = usb_buffer_alloc(usbdev, AIPTEK_PACKET_LENGTH, - SLAB_ATOMIC, &aiptek->data_dma); + GFP_ATOMIC, &aiptek->data_dma); if (!aiptek->data) goto fail1; diff --git a/drivers/usb/input/ati_remote.c b/drivers/usb/input/ati_remote.c index ff23318dc30..b724e36f7b9 100644 --- a/drivers/usb/input/ati_remote.c +++ b/drivers/usb/input/ati_remote.c @@ -592,7 +592,7 @@ static void ati_remote_irq_in(struct urb *urb) __FUNCTION__, urb->status); } - retval = usb_submit_urb(urb, SLAB_ATOMIC); + retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&ati_remote->interface->dev, "%s: usb_submit_urb()=%d\n", __FUNCTION__, retval); @@ -604,12 +604,12 @@ static void ati_remote_irq_in(struct urb *urb) static int ati_remote_alloc_buffers(struct usb_device *udev, struct ati_remote *ati_remote) { - ati_remote->inbuf = usb_buffer_alloc(udev, DATA_BUFSIZE, SLAB_ATOMIC, + ati_remote->inbuf = usb_buffer_alloc(udev, DATA_BUFSIZE, GFP_ATOMIC, &ati_remote->inbuf_dma); if (!ati_remote->inbuf) return -1; - ati_remote->outbuf = usb_buffer_alloc(udev, DATA_BUFSIZE, SLAB_ATOMIC, + ati_remote->outbuf = usb_buffer_alloc(udev, DATA_BUFSIZE, GFP_ATOMIC, &ati_remote->outbuf_dma); if (!ati_remote->outbuf) return -1; diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c index 4295bab4f1e..f1d0e1d6982 100644 --- a/drivers/usb/input/hid-core.c +++ b/drivers/usb/input/hid-core.c @@ -1079,7 +1079,7 @@ static void hid_irq_in(struct urb *urb) warn("input irq status %d received", urb->status); } - status = usb_submit_urb(urb, SLAB_ATOMIC); + status = usb_submit_urb(urb, GFP_ATOMIC); if (status) { clear_bit(HID_IN_RUNNING, &hid->iofl); if (status != -EPERM) { @@ -1864,13 +1864,13 @@ static void hid_find_max_report(struct hid_device *hid, unsigned int type, int * static int hid_alloc_buffers(struct usb_device *dev, struct hid_device *hid) { - if (!(hid->inbuf = usb_buffer_alloc(dev, hid->bufsize, SLAB_ATOMIC, &hid->inbuf_dma))) + if (!(hid->inbuf = usb_buffer_alloc(dev, hid->bufsize, GFP_ATOMIC, &hid->inbuf_dma))) return -1; - if (!(hid->outbuf = usb_buffer_alloc(dev, hid->bufsize, SLAB_ATOMIC, &hid->outbuf_dma))) + if (!(hid->outbuf = usb_buffer_alloc(dev, hid->bufsize, GFP_ATOMIC, &hid->outbuf_dma))) return -1; - if (!(hid->cr = usb_buffer_alloc(dev, sizeof(*(hid->cr)), SLAB_ATOMIC, &hid->cr_dma))) + if (!(hid->cr = usb_buffer_alloc(dev, sizeof(*(hid->cr)), GFP_ATOMIC, &hid->cr_dma))) return -1; - if (!(hid->ctrlbuf = usb_buffer_alloc(dev, hid->bufsize, SLAB_ATOMIC, &hid->ctrlbuf_dma))) + if (!(hid->ctrlbuf = usb_buffer_alloc(dev, hid->bufsize, GFP_ATOMIC, &hid->ctrlbuf_dma))) return -1; return 0; diff --git a/drivers/usb/input/keyspan_remote.c b/drivers/usb/input/keyspan_remote.c index 50aa8108a50..98bd323369c 100644 --- a/drivers/usb/input/keyspan_remote.c +++ b/drivers/usb/input/keyspan_remote.c @@ -456,7 +456,7 @@ static int keyspan_probe(struct usb_interface *interface, const struct usb_devic remote->in_endpoint = endpoint; remote->toggle = -1; /* Set to -1 so we will always not match the toggle from the first remote message. */ - remote->in_buffer = usb_buffer_alloc(udev, RECV_SIZE, SLAB_ATOMIC, &remote->in_dma); + remote->in_buffer = usb_buffer_alloc(udev, RECV_SIZE, GFP_ATOMIC, &remote->in_dma); if (!remote->in_buffer) { retval = -ENOMEM; goto fail1; diff --git a/drivers/usb/input/mtouchusb.c b/drivers/usb/input/mtouchusb.c index 79a85d46cb1..92c4e07da4c 100644 --- a/drivers/usb/input/mtouchusb.c +++ b/drivers/usb/input/mtouchusb.c @@ -164,7 +164,7 @@ static int mtouchusb_alloc_buffers(struct usb_device *udev, struct mtouch_usb *m dbg("%s - called", __FUNCTION__); mtouch->data = usb_buffer_alloc(udev, MTOUCHUSB_REPORT_DATA_SIZE, - SLAB_ATOMIC, &mtouch->data_dma); + GFP_ATOMIC, &mtouch->data_dma); if (!mtouch->data) return -1; diff --git a/drivers/usb/input/powermate.c b/drivers/usb/input/powermate.c index 0bf91778c40..fea97e5437f 100644 --- a/drivers/usb/input/powermate.c +++ b/drivers/usb/input/powermate.c @@ -277,12 +277,12 @@ static int powermate_input_event(struct input_dev *dev, unsigned int type, unsig static int powermate_alloc_buffers(struct usb_device *udev, struct powermate_device *pm) { pm->data = usb_buffer_alloc(udev, POWERMATE_PAYLOAD_SIZE_MAX, - SLAB_ATOMIC, &pm->data_dma); + GFP_ATOMIC, &pm->data_dma); if (!pm->data) return -1; pm->configcr = usb_buffer_alloc(udev, sizeof(*(pm->configcr)), - SLAB_ATOMIC, &pm->configcr_dma); + GFP_ATOMIC, &pm->configcr_dma); if (!pm->configcr) return -1; diff --git a/drivers/usb/input/touchkitusb.c b/drivers/usb/input/touchkitusb.c index 05c0d1ca39a..2a314b06592 100644 --- a/drivers/usb/input/touchkitusb.c +++ b/drivers/usb/input/touchkitusb.c @@ -248,7 +248,7 @@ static int touchkit_alloc_buffers(struct usb_device *udev, struct touchkit_usb *touchkit) { touchkit->data = usb_buffer_alloc(udev, TOUCHKIT_REPORT_DATA_SIZE, - SLAB_ATOMIC, &touchkit->data_dma); + GFP_ATOMIC, &touchkit->data_dma); if (!touchkit->data) return -1; diff --git a/drivers/usb/input/usbkbd.c b/drivers/usb/input/usbkbd.c index dac88640eab..8505824848f 100644 --- a/drivers/usb/input/usbkbd.c +++ b/drivers/usb/input/usbkbd.c @@ -122,7 +122,7 @@ static void usb_kbd_irq(struct urb *urb) memcpy(kbd->old, kbd->new, 8); resubmit: - i = usb_submit_urb (urb, SLAB_ATOMIC); + i = usb_submit_urb (urb, GFP_ATOMIC); if (i) err ("can't resubmit intr, %s-%s/input0, status %d", kbd->usbdev->bus->bus_name, @@ -196,11 +196,11 @@ static int usb_kbd_alloc_mem(struct usb_device *dev, struct usb_kbd *kbd) return -1; if (!(kbd->led = usb_alloc_urb(0, GFP_KERNEL))) return -1; - if (!(kbd->new = usb_buffer_alloc(dev, 8, SLAB_ATOMIC, &kbd->new_dma))) + if (!(kbd->new = usb_buffer_alloc(dev, 8, GFP_ATOMIC, &kbd->new_dma))) return -1; - if (!(kbd->cr = usb_buffer_alloc(dev, sizeof(struct usb_ctrlrequest), SLAB_ATOMIC, &kbd->cr_dma))) + if (!(kbd->cr = usb_buffer_alloc(dev, sizeof(struct usb_ctrlrequest), GFP_ATOMIC, &kbd->cr_dma))) return -1; - if (!(kbd->leds = usb_buffer_alloc(dev, 1, SLAB_ATOMIC, &kbd->leds_dma))) + if (!(kbd->leds = usb_buffer_alloc(dev, 1, GFP_ATOMIC, &kbd->leds_dma))) return -1; return 0; diff --git a/drivers/usb/input/usbmouse.c b/drivers/usb/input/usbmouse.c index 68a55642c08..64a33e420cf 100644 --- a/drivers/usb/input/usbmouse.c +++ b/drivers/usb/input/usbmouse.c @@ -86,7 +86,7 @@ static void usb_mouse_irq(struct urb *urb) input_sync(dev); resubmit: - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status) err ("can't resubmit intr, %s-%s/input0, status %d", mouse->usbdev->bus->bus_name, @@ -137,7 +137,7 @@ static int usb_mouse_probe(struct usb_interface *intf, const struct usb_device_i if (!mouse || !input_dev) goto fail1; - mouse->data = usb_buffer_alloc(dev, 8, SLAB_ATOMIC, &mouse->data_dma); + mouse->data = usb_buffer_alloc(dev, 8, GFP_ATOMIC, &mouse->data_dma); if (!mouse->data) goto fail1; diff --git a/drivers/usb/input/xpad.c b/drivers/usb/input/xpad.c index df97e5c803f..e4bc76ebc83 100644 --- a/drivers/usb/input/xpad.c +++ b/drivers/usb/input/xpad.c @@ -325,7 +325,7 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id goto fail1; xpad->idata = usb_buffer_alloc(udev, XPAD_PKT_LEN, - SLAB_ATOMIC, &xpad->idata_dma); + GFP_ATOMIC, &xpad->idata_dma); if (!xpad->idata) goto fail1; diff --git a/drivers/usb/input/yealink.c b/drivers/usb/input/yealink.c index 2268ca311ad..caff8e6d744 100644 --- a/drivers/usb/input/yealink.c +++ b/drivers/usb/input/yealink.c @@ -874,17 +874,17 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id) /* allocate usb buffers */ yld->irq_data = usb_buffer_alloc(udev, USB_PKT_LEN, - SLAB_ATOMIC, &yld->irq_dma); + GFP_ATOMIC, &yld->irq_dma); if (yld->irq_data == NULL) return usb_cleanup(yld, -ENOMEM); yld->ctl_data = usb_buffer_alloc(udev, USB_PKT_LEN, - SLAB_ATOMIC, &yld->ctl_dma); + GFP_ATOMIC, &yld->ctl_dma); if (!yld->ctl_data) return usb_cleanup(yld, -ENOMEM); yld->ctl_req = usb_buffer_alloc(udev, sizeof(*(yld->ctl_req)), - SLAB_ATOMIC, &yld->ctl_req_dma); + GFP_ATOMIC, &yld->ctl_req_dma); if (yld->ctl_req == NULL) return usb_cleanup(yld, -ENOMEM); diff --git a/drivers/usb/misc/phidgetkit.c b/drivers/usb/misc/phidgetkit.c index 9659c79e187..371bf2b1197 100644 --- a/drivers/usb/misc/phidgetkit.c +++ b/drivers/usb/misc/phidgetkit.c @@ -377,7 +377,7 @@ static void interfacekit_irq(struct urb *urb) schedule_delayed_work(&kit->do_notify, 0); resubmit: - status = usb_submit_urb(urb, SLAB_ATOMIC); + status = usb_submit_urb(urb, GFP_ATOMIC); if (status) err("can't resubmit intr, %s-%s/interfacekit0, status %d", kit->udev->bus->bus_name, @@ -568,7 +568,7 @@ static int interfacekit_probe(struct usb_interface *intf, const struct usb_devic kit->dev_no = -1; kit->ifkit = ifkit; - kit->data = usb_buffer_alloc(dev, URB_INT_SIZE, SLAB_ATOMIC, &kit->data_dma); + kit->data = usb_buffer_alloc(dev, URB_INT_SIZE, GFP_ATOMIC, &kit->data_dma); if (!kit->data) goto out; diff --git a/drivers/usb/misc/phidgetmotorcontrol.c b/drivers/usb/misc/phidgetmotorcontrol.c index 2bb4fa572bb..5727e1ea2f9 100644 --- a/drivers/usb/misc/phidgetmotorcontrol.c +++ b/drivers/usb/misc/phidgetmotorcontrol.c @@ -151,7 +151,7 @@ static void motorcontrol_irq(struct urb *urb) schedule_delayed_work(&mc->do_notify, 0); resubmit: - status = usb_submit_urb(urb, SLAB_ATOMIC); + status = usb_submit_urb(urb, GFP_ATOMIC); if (status) dev_err(&mc->intf->dev, "can't resubmit intr, %s-%s/motorcontrol0, status %d", @@ -338,7 +338,7 @@ static int motorcontrol_probe(struct usb_interface *intf, const struct usb_devic goto out; mc->dev_no = -1; - mc->data = usb_buffer_alloc(dev, URB_INT_SIZE, SLAB_ATOMIC, &mc->data_dma); + mc->data = usb_buffer_alloc(dev, URB_INT_SIZE, GFP_ATOMIC, &mc->data_dma); if (!mc->data) goto out; diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 194065dbb51..ea04dccdc65 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -819,7 +819,7 @@ error: /* resubmit if we need to, else mark this as done */ if ((status == 0) && (ctx->pending < ctx->count)) { - if ((status = usb_submit_urb (urb, SLAB_ATOMIC)) != 0) { + if ((status = usb_submit_urb (urb, GFP_ATOMIC)) != 0) { dbg ("can't resubmit ctrl %02x.%02x, err %d", reqp->bRequestType, reqp->bRequest, status); urb->dev = NULL; @@ -999,7 +999,7 @@ test_ctrl_queue (struct usbtest_dev *dev, struct usbtest_param *param) context.urb = urb; spin_lock_irq (&context.lock); for (i = 0; i < param->sglen; i++) { - context.status = usb_submit_urb (urb [i], SLAB_ATOMIC); + context.status = usb_submit_urb (urb [i], GFP_ATOMIC); if (context.status != 0) { dbg ("can't submit urb[%d], status %d", i, context.status); @@ -1041,7 +1041,7 @@ static void unlink1_callback (struct urb *urb) // we "know" -EPIPE (stall) never happens if (!status) - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status) { urb->status = status; complete ((struct completion *) urb->context); @@ -1481,7 +1481,7 @@ test_iso_queue (struct usbtest_dev *dev, struct usbtest_param *param, spin_lock_irq (&context.lock); for (i = 0; i < param->sglen; i++) { ++context.pending; - status = usb_submit_urb (urbs [i], SLAB_ATOMIC); + status = usb_submit_urb (urbs [i], GFP_ATOMIC); if (status < 0) { ERROR (dev, "submit iso[%d], error %d\n", i, status); if (i == 0) { diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index 7a2346c5328..46aecc8754f 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -147,7 +147,7 @@ static void mon_text_event(struct mon_reader_text *rp, struct urb *urb, stamp = mon_get_timestamp(); if (rp->nevents >= EVENT_MAX || - (ep = kmem_cache_alloc(rp->e_slab, SLAB_ATOMIC)) == NULL) { + (ep = kmem_cache_alloc(rp->e_slab, GFP_ATOMIC)) == NULL) { rp->r.m_bus->cnt_text_lost++; return; } @@ -188,7 +188,7 @@ static void mon_text_error(void *data, struct urb *urb, int error) struct mon_event_text *ep; if (rp->nevents >= EVENT_MAX || - (ep = kmem_cache_alloc(rp->e_slab, SLAB_ATOMIC)) == NULL) { + (ep = kmem_cache_alloc(rp->e_slab, GFP_ATOMIC)) == NULL) { rp->r.m_bus->cnt_text_lost++; return; } diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c index 907b820a5fa..4852012735f 100644 --- a/drivers/usb/net/catc.c +++ b/drivers/usb/net/catc.c @@ -345,7 +345,7 @@ static void catc_irq_done(struct urb *urb) } } resubmit: - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status) err ("can't resubmit intr, %s-%s, status %d", catc->usbdev->bus->bus_name, diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c index a77410562e1..49363595451 100644 --- a/drivers/usb/net/net1080.c +++ b/drivers/usb/net/net1080.c @@ -383,7 +383,7 @@ static void nc_ensure_sync(struct usbnet *dev) int status; /* Send a flush */ - urb = usb_alloc_urb(0, SLAB_ATOMIC); + urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) return; diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c index b5690b3834e..d48c024cff5 100644 --- a/drivers/usb/net/pegasus.c +++ b/drivers/usb/net/pegasus.c @@ -856,7 +856,7 @@ static void intr_callback(struct urb *urb) pegasus->stats.rx_missed_errors += ((d[3] & 0x7f) << 8) | d[4]; } - status = usb_submit_urb(urb, SLAB_ATOMIC); + status = usb_submit_urb(urb, GFP_ATOMIC); if (status == -ENODEV) netif_device_detach(pegasus->net); if (status && netif_msg_timer(pegasus)) diff --git a/drivers/usb/net/rtl8150.c b/drivers/usb/net/rtl8150.c index 72171f94ded..c54235f73cb 100644 --- a/drivers/usb/net/rtl8150.c +++ b/drivers/usb/net/rtl8150.c @@ -587,7 +587,7 @@ static void intr_callback(struct urb *urb) } resubmit: - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status == -ENODEV) netif_device_detach(dev->netdev); else if (status) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 82cd15b894b..70f93b18292 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -363,7 +363,7 @@ static int mos7720_open(struct usb_serial_port *port, struct file * filp) /* Initialising the write urb pool */ for (j = 0; j < NUM_URBS; ++j) { - urb = usb_alloc_urb(0,SLAB_ATOMIC); + urb = usb_alloc_urb(0,GFP_ATOMIC); mos7720_port->write_urb_pool[j] = urb; if (urb == NULL) { diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 02c89e10b2c..5432c634008 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -826,7 +826,7 @@ static int mos7840_open(struct usb_serial_port *port, struct file *filp) /* Initialising the write urb pool */ for (j = 0; j < NUM_URBS; ++j) { - urb = usb_alloc_urb(0, SLAB_ATOMIC); + urb = usb_alloc_urb(0, GFP_ATOMIC); mos7840_port->write_urb_pool[j] = urb; if (urb == NULL) { @@ -2786,7 +2786,7 @@ static int mos7840_startup(struct usb_serial *serial) i + 1, status); } - mos7840_port->control_urb = usb_alloc_urb(0, SLAB_ATOMIC); + mos7840_port->control_urb = usb_alloc_urb(0, GFP_ATOMIC); mos7840_port->ctrl_buf = kmalloc(16, GFP_KERNEL); } diff --git a/drivers/usb/storage/onetouch.c b/drivers/usb/storage/onetouch.c index 3a158d58441..e565d3d2ab2 100644 --- a/drivers/usb/storage/onetouch.c +++ b/drivers/usb/storage/onetouch.c @@ -76,7 +76,7 @@ static void usb_onetouch_irq(struct urb *urb) input_sync(dev); resubmit: - status = usb_submit_urb (urb, SLAB_ATOMIC); + status = usb_submit_urb (urb, GFP_ATOMIC); if (status) err ("can't resubmit intr, %s-%s/input0, status %d", onetouch->udev->bus->bus_name, @@ -154,7 +154,7 @@ int onetouch_connect_input(struct us_data *ss) goto fail1; onetouch->data = usb_buffer_alloc(udev, ONETOUCH_PKT_LEN, - SLAB_ATOMIC, &onetouch->data_dma); + GFP_ATOMIC, &onetouch->data_dma); if (!onetouch->data) goto fail1; diff --git a/include/linux/slab.h b/include/linux/slab.h index d7ee28e5133..34b046ea88f 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -19,7 +19,6 @@ typedef struct kmem_cache kmem_cache_t; #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ /* flags for kmem_cache_alloc() */ -#define SLAB_ATOMIC GFP_ATOMIC #define SLAB_KERNEL GFP_KERNEL #define SLAB_DMA GFP_DMA diff --git a/include/net/request_sock.h b/include/net/request_sock.h index e37baaf2080..426f0fe774e 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -60,7 +60,7 @@ struct request_sock { static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops) { - struct request_sock *req = kmem_cache_alloc(ops->slab, SLAB_ATOMIC); + struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC); if (req != NULL) req->rsk_ops = ops; diff --git a/net/core/dst.c b/net/core/dst.c index 1a5e49da0e7..836ec660692 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -125,7 +125,7 @@ void * dst_alloc(struct dst_ops * ops) if (ops->gc()) return NULL; } - dst = kmem_cache_alloc(ops->kmem_cachep, SLAB_ATOMIC); + dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) return NULL; memset(dst, 0, ops->entry_size); diff --git a/net/core/flow.c b/net/core/flow.c index b16d31ae5e5..5df3e297f81 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -211,7 +211,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, if (flow_count(cpu) > flow_hwm) flow_cache_shrink(cpu); - fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC); + fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { fle->next = *head; *head = fle; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ba509a4a8e9..0ab1987b934 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -251,7 +251,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) goto out_entries; } - n = kmem_cache_alloc(tbl->kmem_cachep, SLAB_ATOMIC); + n = kmem_cache_alloc(tbl->kmem_cachep, GFP_ATOMIC); if (!n) goto out_entries; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index cf8c07b2704..66a27b9688c 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -295,7 +295,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); if (new_packet == NULL || new_packet->dccphtx_sent) { new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, - SLAB_ATOMIC); + GFP_ATOMIC); if (unlikely(new_packet == NULL)) { DCCP_WARN("%s, sk=%p, not enough mem to add to history," @@ -889,7 +889,7 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) /* new loss event detected */ /* calculate last interval length */ seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); - entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC); + entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC); if (entry == NULL) { DCCP_BUG("out of memory - can not allocate entry"); @@ -1011,7 +1011,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, - skb, SLAB_ATOMIC); + skb, GFP_ATOMIC); if (unlikely(packet == NULL)) { DCCP_WARN("%s, sk=%p, Not enough mem to add rx packet " "to history, consider it lost!\n", dccp_role(sk), sk); diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 48b9b93f8ac..0a0baef16b3 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -125,7 +125,7 @@ int dccp_li_hist_interval_new(struct dccp_li_hist *hist, int i; for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { - entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); + entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC); if (entry == NULL) { dccp_li_hist_purge(hist, list); DCCP_BUG("loss interval list entry is NULL"); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 244c4f445c7..bd6c9bc4189 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -31,7 +31,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, struct inet_bind_hashbucket *head, const unsigned short snum) { - struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC); + struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { tb->port = snum; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 8c74f9168b7..e28330aa413 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -91,7 +91,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat { struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, - SLAB_ATOMIC); + GFP_ATOMIC); if (tw != NULL) { const struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bf526115e51..97a8cfbb61a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -150,7 +150,7 @@ static __inline__ struct fib6_node * node_alloc(void) { struct fib6_node *fn; - if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL) + if ((fn = kmem_cache_alloc(fib6_node_kmem, GFP_ATOMIC)) != NULL) memset(fn, 0, sizeof(struct fib6_node)); return fn; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 01a5c52a2be..d4f68b0f27d 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -180,7 +180,7 @@ try_next_2:; spi = 0; goto out; alloc_spi: - x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC); + x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC); if (!x6spi) goto out; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 04954e5f684..8d55d10041f 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -979,7 +979,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, { struct sctp_chunk *retval; - retval = kmem_cache_alloc(sctp_chunk_cachep, SLAB_ATOMIC); + retval = kmem_cache_alloc(sctp_chunk_cachep, GFP_ATOMIC); if (!retval) goto nodata; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 02b27145b27..49607792cbd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4989,7 +4989,7 @@ static struct sctp_bind_bucket *sctp_bucket_create( { struct sctp_bind_bucket *pp; - pp = kmem_cache_alloc(sctp_bucket_cachep, SLAB_ATOMIC); + pp = kmem_cache_alloc(sctp_bucket_cachep, GFP_ATOMIC); SCTP_DBG_OBJCNT_INC(bind_bucket); if (pp) { pp->port = snum; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index e8198a2c785..a898a6a83a5 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -27,7 +27,7 @@ struct sec_path *secpath_dup(struct sec_path *src) { struct sec_path *sp; - sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC); + sp = kmem_cache_alloc(secpath_cachep, GFP_ATOMIC); if (!sp) return NULL; diff --git a/security/selinux/avc.c b/security/selinux/avc.c index e73ac1ab7cf..65b4ec9c699 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -332,7 +332,7 @@ static struct avc_node *avc_alloc_node(void) { struct avc_node *node; - node = kmem_cache_alloc(avc_node_cachep, SLAB_ATOMIC); + node = kmem_cache_alloc(avc_node_cachep, GFP_ATOMIC); if (!node) goto out; -- cgit v1.2.3-70-g09d2 From e94b1766097d53e6f3ccfb36c8baa562ffeda3fc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:33:17 -0800 Subject: [PATCH] slab: remove SLAB_KERNEL SLAB_KERNEL is an alias of GFP_KERNEL. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/sysenter.c | 2 +- arch/ia64/ia32/binfmt_elf32.c | 8 ++++---- arch/ia64/kernel/perfmon.c | 2 +- arch/ia64/mm/init.c | 4 ++-- arch/powerpc/kernel/vdso.c | 2 +- arch/powerpc/platforms/cell/spufs/inode.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86_64/ia32/ia32_binfmt.c | 2 +- arch/x86_64/ia32/syscall32.c | 2 +- drivers/atm/he.c | 4 ++-- drivers/base/dmapool.c | 2 +- drivers/dma/ioatdma.c | 4 ++-- drivers/ieee1394/hosts.c | 2 +- drivers/ieee1394/ohci1394.c | 8 ++++---- drivers/ieee1394/pcilynx.c | 2 +- drivers/ieee1394/raw1394.c | 10 +++++----- drivers/infiniband/hw/ehca/ehca_av.c | 2 +- drivers/infiniband/hw/ehca/ehca_cq.c | 2 +- drivers/infiniband/hw/ehca/ehca_main.c | 2 +- drivers/infiniband/hw/ehca/ehca_mrmw.c | 4 ++-- drivers/infiniband/hw/ehca/ehca_pd.c | 2 +- drivers/infiniband/hw/ehca/ehca_qp.c | 2 +- drivers/input/touchscreen/ads7846.c | 2 +- drivers/isdn/gigaset/bas-gigaset.c | 14 +++++++------- drivers/isdn/gigaset/usb-gigaset.c | 6 +++--- drivers/media/dvb/cinergyT2/cinergyT2.c | 2 +- drivers/mtd/devices/m25p80.c | 2 +- drivers/scsi/ipr.c | 2 +- drivers/spi/spi.c | 4 ++-- drivers/spi/spi_bitbang.c | 2 +- drivers/usb/core/hub.c | 4 ++-- drivers/usb/gadget/gmidi.c | 2 +- drivers/usb/gadget/goku_udc.c | 2 +- drivers/usb/gadget/inode.c | 6 +++--- drivers/usb/gadget/net2280.c | 2 +- drivers/usb/gadget/omap_udc.c | 2 +- drivers/usb/gadget/zero.c | 2 +- drivers/usb/host/hc_crisv10.c | 2 +- drivers/usb/host/ohci-pnx4008.c | 2 +- drivers/usb/input/acecad.c | 2 +- drivers/usb/input/usbtouchscreen.c | 2 +- drivers/usb/misc/usbtest.c | 28 ++++++++++++++-------------- drivers/usb/net/rndis_host.c | 2 +- drivers/usb/net/usbnet.c | 4 ++-- fs/adfs/super.c | 2 +- fs/affs/super.c | 2 +- fs/afs/super.c | 2 +- fs/befs/linuxvfs.c | 2 +- fs/bfs/inode.c | 2 +- fs/block_dev.c | 2 +- fs/cifs/cifsfs.c | 2 +- fs/cifs/misc.c | 4 ++-- fs/cifs/transport.c | 4 ++-- fs/coda/inode.c | 2 +- fs/dnotify.c | 2 +- fs/ecryptfs/crypto.c | 2 +- fs/ecryptfs/file.c | 2 +- fs/ecryptfs/inode.c | 4 ++-- fs/ecryptfs/keystore.c | 2 +- fs/ecryptfs/main.c | 4 ++-- fs/ecryptfs/super.c | 2 +- fs/efs/super.c | 2 +- fs/eventpoll.c | 4 ++-- fs/exec.c | 2 +- fs/ext2/super.c | 2 +- fs/fat/cache.c | 2 +- fs/fat/inode.c | 2 +- fs/fcntl.c | 2 +- fs/freevxfs/vxfs_inode.c | 4 ++-- fs/fuse/dev.c | 2 +- fs/fuse/inode.c | 2 +- fs/hfs/super.c | 2 +- fs/hfsplus/super.c | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/inode.c | 2 +- fs/isofs/inode.c | 2 +- fs/jffs2/super.c | 2 +- fs/locks.c | 2 +- fs/minix/inode.c | 2 +- fs/ncpfs/inode.c | 2 +- fs/nfs/direct.c | 2 +- fs/nfs/inode.c | 2 +- fs/nfs/pagelist.c | 2 +- fs/openpromfs/inode.c | 2 +- fs/proc/inode.c | 2 +- fs/qnx4/inode.c | 2 +- fs/reiserfs/super.c | 2 +- fs/romfs/inode.c | 2 +- fs/smbfs/inode.c | 2 +- fs/smbfs/request.c | 2 +- fs/sysv/inode.c | 2 +- fs/udf/super.c | 2 +- fs/ufs/super.c | 2 +- include/linux/fs.h | 2 +- include/linux/rmap.h | 2 +- include/linux/slab.h | 1 - include/linux/taskstats_kern.h | 2 +- ipc/mqueue.c | 2 +- kernel/delayacct.c | 2 +- kernel/fork.c | 6 +++--- kernel/taskstats.c | 2 +- kernel/user.c | 2 +- mm/mempolicy.c | 2 +- mm/mmap.c | 4 ++-- mm/shmem.c | 2 +- mm/slab.c | 2 +- net/decnet/dn_table.c | 2 +- net/ipv4/fib_hash.c | 4 ++-- net/ipv4/fib_trie.c | 4 ++-- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- security/keys/key.c | 2 +- security/selinux/hooks.c | 2 +- security/selinux/ss/avtab.c | 2 +- 114 files changed, 164 insertions(+), 165 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 713ba39d32c..0bbacd0ec17 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c @@ -132,7 +132,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) goto up_fail; } - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { ret = -ENOMEM; goto up_fail; diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c index daa6b91bc92..578737ec762 100644 --- a/arch/ia64/ia32/binfmt_elf32.c +++ b/arch/ia64/ia32/binfmt_elf32.c @@ -91,7 +91,7 @@ ia64_elf32_init (struct pt_regs *regs) * it with privilege level 3 because the IVE uses non-privileged accesses to these * tables. IA-32 segmentation is used to protect against IA-32 accesses to them. */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -117,7 +117,7 @@ ia64_elf32_init (struct pt_regs *regs) * code is locked in specific gate page, which is pointed by pretcode * when setup_frame_ia32 */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -142,7 +142,7 @@ ia64_elf32_init (struct pt_regs *regs) * Install LDT as anonymous memory. This gives us all-zero segment descriptors * until a task modifies them via modify_ldt(). */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -214,7 +214,7 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!mpnt) return -ENOMEM; diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 3aaede0d698..e2321536ee4 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2302,7 +2302,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon DPRINT(("smpl_buf @%p\n", smpl_buf)); /* allocate vma */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!vma) { DPRINT(("Cannot allocate vma\n")); goto error_kmem; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index ff87a5cba39..56dc2024220 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -156,7 +156,7 @@ ia64_init_addr_space (void) * the problem. When the process attempts to write to the register backing store * for the first time, it will get a SEGFAULT in this case. */ - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; @@ -175,7 +175,7 @@ ia64_init_addr_space (void) /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ if (!(current->personality & MMAP_PAGE_ZERO)) { - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index c913ad5cad2..a4b28c73bba 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -264,7 +264,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, /* Allocate a VMA structure and fill it up */ - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (vma == NULL) { rc = -ENOMEM; goto fail_mmapsem; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index c7d010749a1..7edfcc9d285 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -48,7 +48,7 @@ spufs_alloc_inode(struct super_block *sb) { struct spufs_inode_info *ei; - ei = kmem_cache_alloc(spufs_inode_cache, SLAB_KERNEL); + ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL); if (!ei) return NULL; diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 075d6cc1a2d..deb46941f31 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -97,7 +97,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, goto up_fail; } - vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); if (!vma) { ret = -ENOMEM; goto up_fail; diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 82ef182de6a..932a62ad6c8 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -351,7 +351,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!mpnt) return -ENOMEM; diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 3a01329473a..3e5ed20cba4 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c @@ -49,7 +49,7 @@ int syscall32_setup_pages(struct linux_binprm *bprm, int exstack) struct mm_struct *mm = current->mm; int ret; - vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!vma) return -ENOMEM; diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 2a2f0fc2288..ec8a7a633e6 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -820,7 +820,7 @@ he_init_group(struct he_dev *he_dev, int group) void *cpuaddr; #ifdef USE_RBPS_POOL - cpuaddr = pci_pool_alloc(he_dev->rbps_pool, SLAB_KERNEL|SLAB_DMA, &dma_handle); + cpuaddr = pci_pool_alloc(he_dev->rbps_pool, GFP_KERNEL|SLAB_DMA, &dma_handle); if (cpuaddr == NULL) return -ENOMEM; #else @@ -884,7 +884,7 @@ he_init_group(struct he_dev *he_dev, int group) void *cpuaddr; #ifdef USE_RBPL_POOL - cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, SLAB_KERNEL|SLAB_DMA, &dma_handle); + cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|SLAB_DMA, &dma_handle); if (cpuaddr == NULL) return -ENOMEM; #else diff --git a/drivers/base/dmapool.c b/drivers/base/dmapool.c index fa4675254f6..dbe0735f8c9 100644 --- a/drivers/base/dmapool.c +++ b/drivers/base/dmapool.c @@ -126,7 +126,7 @@ dma_pool_create (const char *name, struct device *dev, } else if (allocation < size) return NULL; - if (!(retval = kmalloc (sizeof *retval, SLAB_KERNEL))) + if (!(retval = kmalloc (sizeof *retval, GFP_KERNEL))) return retval; strlcpy (retval->name, name, sizeof retval->name); diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c index 0358419a0e4..8e872610461 100644 --- a/drivers/dma/ioatdma.c +++ b/drivers/dma/ioatdma.c @@ -636,10 +636,10 @@ static int ioat_self_test(struct ioat_device *device) dma_cookie_t cookie; int err = 0; - src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, SLAB_KERNEL); + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); if (!src) return -ENOMEM; - dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, SLAB_KERNEL); + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); if (!dest) { kfree(src); return -ENOMEM; diff --git a/drivers/ieee1394/hosts.c b/drivers/ieee1394/hosts.c index 8f4378a1631..b935e08695a 100644 --- a/drivers/ieee1394/hosts.c +++ b/drivers/ieee1394/hosts.c @@ -123,7 +123,7 @@ struct hpsb_host *hpsb_alloc_host(struct hpsb_host_driver *drv, size_t extra, int i; int hostnum = 0; - h = kzalloc(sizeof(*h) + extra, SLAB_KERNEL); + h = kzalloc(sizeof(*h) + extra, GFP_KERNEL); if (!h) return NULL; diff --git a/drivers/ieee1394/ohci1394.c b/drivers/ieee1394/ohci1394.c index 6e8ea9110c4..eae97d8dcf0 100644 --- a/drivers/ieee1394/ohci1394.c +++ b/drivers/ieee1394/ohci1394.c @@ -1225,7 +1225,7 @@ static int ohci_iso_recv_init(struct hpsb_iso *iso) int ctx; int ret = -ENOMEM; - recv = kmalloc(sizeof(*recv), SLAB_KERNEL); + recv = kmalloc(sizeof(*recv), GFP_KERNEL); if (!recv) return -ENOMEM; @@ -1918,7 +1918,7 @@ static int ohci_iso_xmit_init(struct hpsb_iso *iso) int ctx; int ret = -ENOMEM; - xmit = kmalloc(sizeof(*xmit), SLAB_KERNEL); + xmit = kmalloc(sizeof(*xmit), GFP_KERNEL); if (!xmit) return -ENOMEM; @@ -3021,7 +3021,7 @@ alloc_dma_rcv_ctx(struct ti_ohci *ohci, struct dma_rcv_ctx *d, return -ENOMEM; } - d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, SLAB_KERNEL, d->prg_bus+i); + d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, GFP_KERNEL, d->prg_bus+i); OHCI_DMA_ALLOC("pool dma_rcv prg[%d]", i); if (d->prg_cpu[i] != NULL) { @@ -3117,7 +3117,7 @@ alloc_dma_trm_ctx(struct ti_ohci *ohci, struct dma_trm_ctx *d, OHCI_DMA_ALLOC("dma_rcv prg pool"); for (i = 0; i < d->num_desc; i++) { - d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, SLAB_KERNEL, d->prg_bus+i); + d->prg_cpu[i] = pci_pool_alloc(d->prg_pool, GFP_KERNEL, d->prg_bus+i); OHCI_DMA_ALLOC("pool dma_trm prg[%d]", i); if (d->prg_cpu[i] != NULL) { diff --git a/drivers/ieee1394/pcilynx.c b/drivers/ieee1394/pcilynx.c index 0a7412e27eb..9cab1d66147 100644 --- a/drivers/ieee1394/pcilynx.c +++ b/drivers/ieee1394/pcilynx.c @@ -1428,7 +1428,7 @@ static int __devinit add_card(struct pci_dev *dev, struct i2c_algo_bit_data i2c_adapter_data; error = -ENOMEM; - i2c_ad = kmalloc(sizeof(*i2c_ad), SLAB_KERNEL); + i2c_ad = kmalloc(sizeof(*i2c_ad), GFP_KERNEL); if (!i2c_ad) FAIL("failed to allocate I2C adapter memory"); memcpy(i2c_ad, &bit_ops, sizeof(struct i2c_adapter)); diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c index 47f6a4e29b4..bf71e069eaf 100644 --- a/drivers/ieee1394/raw1394.c +++ b/drivers/ieee1394/raw1394.c @@ -112,7 +112,7 @@ static struct pending_request *__alloc_pending_request(gfp_t flags) static inline struct pending_request *alloc_pending_request(void) { - return __alloc_pending_request(SLAB_KERNEL); + return __alloc_pending_request(GFP_KERNEL); } static void free_pending_request(struct pending_request *req) @@ -1737,7 +1737,7 @@ static int arm_register(struct file_info *fi, struct pending_request *req) return (-EINVAL); } /* addr-list-entry for fileinfo */ - addr = kmalloc(sizeof(*addr), SLAB_KERNEL); + addr = kmalloc(sizeof(*addr), GFP_KERNEL); if (!addr) { req->req.length = 0; return (-ENOMEM); @@ -2103,7 +2103,7 @@ static int write_phypacket(struct file_info *fi, struct pending_request *req) static int get_config_rom(struct file_info *fi, struct pending_request *req) { int ret = sizeof(struct raw1394_request); - quadlet_t *data = kmalloc(req->req.length, SLAB_KERNEL); + quadlet_t *data = kmalloc(req->req.length, GFP_KERNEL); int status; if (!data) @@ -2133,7 +2133,7 @@ static int get_config_rom(struct file_info *fi, struct pending_request *req) static int update_config_rom(struct file_info *fi, struct pending_request *req) { int ret = sizeof(struct raw1394_request); - quadlet_t *data = kmalloc(req->req.length, SLAB_KERNEL); + quadlet_t *data = kmalloc(req->req.length, GFP_KERNEL); if (!data) return -ENOMEM; if (copy_from_user(data, int2ptr(req->req.sendb), req->req.length)) { @@ -2779,7 +2779,7 @@ static int raw1394_open(struct inode *inode, struct file *file) { struct file_info *fi; - fi = kzalloc(sizeof(*fi), SLAB_KERNEL); + fi = kzalloc(sizeof(*fi), GFP_KERNEL); if (!fi) return -ENOMEM; diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c index 214e2fdddee..0d6e2c4bb24 100644 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ b/drivers/infiniband/hw/ehca/ehca_av.c @@ -57,7 +57,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); - av = kmem_cache_alloc(av_cache, SLAB_KERNEL); + av = kmem_cache_alloc(av_cache, GFP_KERNEL); if (!av) { ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", pd, ah_attr); diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 458fe19648a..93995b658d9 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -134,7 +134,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) return ERR_PTR(-EINVAL); - my_cq = kmem_cache_alloc(cq_cache, SLAB_KERNEL); + my_cq = kmem_cache_alloc(cq_cache, GFP_KERNEL); if (!my_cq) { ehca_err(device, "Out of memory for ehca_cq struct device=%p", device); diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index 3d1c1c53503..cc47e4c13a1 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -108,7 +108,7 @@ static struct kmem_cache *ctblk_cache = NULL; void *ehca_alloc_fw_ctrlblock(void) { - void *ret = kmem_cache_zalloc(ctblk_cache, SLAB_KERNEL); + void *ret = kmem_cache_zalloc(ctblk_cache, GFP_KERNEL); if (!ret) ehca_gen_err("Out of memory for ctblk"); return ret; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index abce676c0ae..0a5e2214cc5 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -53,7 +53,7 @@ static struct ehca_mr *ehca_mr_new(void) { struct ehca_mr *me; - me = kmem_cache_alloc(mr_cache, SLAB_KERNEL); + me = kmem_cache_alloc(mr_cache, GFP_KERNEL); if (me) { memset(me, 0, sizeof(struct ehca_mr)); spin_lock_init(&me->mrlock); @@ -72,7 +72,7 @@ static struct ehca_mw *ehca_mw_new(void) { struct ehca_mw *me; - me = kmem_cache_alloc(mw_cache, SLAB_KERNEL); + me = kmem_cache_alloc(mw_cache, GFP_KERNEL); if (me) { memset(me, 0, sizeof(struct ehca_mw)); spin_lock_init(&me->mwlock); diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c index 2c3cdc6f7b3..d5345e5b3cd 100644 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ b/drivers/infiniband/hw/ehca/ehca_pd.c @@ -50,7 +50,7 @@ struct ib_pd *ehca_alloc_pd(struct ib_device *device, { struct ehca_pd *pd; - pd = kmem_cache_alloc(pd_cache, SLAB_KERNEL); + pd = kmem_cache_alloc(pd_cache, GFP_KERNEL); if (!pd) { ehca_err(device, "device=%p context=%p out of memory", device, context); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index 8682aa50c70..c6c9cef203e 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -450,7 +450,7 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd, if (pd->uobject && udata) context = pd->uobject->context; - my_qp = kmem_cache_alloc(qp_cache, SLAB_KERNEL); + my_qp = kmem_cache_alloc(qp_cache, GFP_KERNEL); if (!my_qp) { ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); return ERR_PTR(-ENOMEM); diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index f56d6a0f062..0517c7387d6 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -189,7 +189,7 @@ static int ads7846_read12_ser(struct device *dev, unsigned command) { struct spi_device *spi = to_spi_device(dev); struct ads7846 *ts = dev_get_drvdata(dev); - struct ser_req *req = kzalloc(sizeof *req, SLAB_KERNEL); + struct ser_req *req = kzalloc(sizeof *req, GFP_KERNEL); int status; int sample; int i; diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index 5857e7e23f8..63b629b1cdb 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -2218,21 +2218,21 @@ static int gigaset_probe(struct usb_interface *interface, * - three for the different uses of the default control pipe * - three for each isochronous pipe */ - if (!(ucs->urb_int_in = usb_alloc_urb(0, SLAB_KERNEL)) || - !(ucs->urb_cmd_in = usb_alloc_urb(0, SLAB_KERNEL)) || - !(ucs->urb_cmd_out = usb_alloc_urb(0, SLAB_KERNEL)) || - !(ucs->urb_ctrl = usb_alloc_urb(0, SLAB_KERNEL))) + if (!(ucs->urb_int_in = usb_alloc_urb(0, GFP_KERNEL)) || + !(ucs->urb_cmd_in = usb_alloc_urb(0, GFP_KERNEL)) || + !(ucs->urb_cmd_out = usb_alloc_urb(0, GFP_KERNEL)) || + !(ucs->urb_ctrl = usb_alloc_urb(0, GFP_KERNEL))) goto allocerr; for (j = 0; j < 2; ++j) { ubc = cs->bcs[j].hw.bas; for (i = 0; i < BAS_OUTURBS; ++i) if (!(ubc->isoouturbs[i].urb = - usb_alloc_urb(BAS_NUMFRAMES, SLAB_KERNEL))) + usb_alloc_urb(BAS_NUMFRAMES, GFP_KERNEL))) goto allocerr; for (i = 0; i < BAS_INURBS; ++i) if (!(ubc->isoinurbs[i] = - usb_alloc_urb(BAS_NUMFRAMES, SLAB_KERNEL))) + usb_alloc_urb(BAS_NUMFRAMES, GFP_KERNEL))) goto allocerr; } @@ -2246,7 +2246,7 @@ static int gigaset_probe(struct usb_interface *interface, (endpoint->bEndpointAddress) & 0x0f), ucs->int_in_buf, 3, read_int_callback, cs, endpoint->bInterval); - if ((rc = usb_submit_urb(ucs->urb_int_in, SLAB_KERNEL)) != 0) { + if ((rc = usb_submit_urb(ucs->urb_int_in, GFP_KERNEL)) != 0) { dev_err(cs->dev, "could not submit interrupt URB: %s\n", get_usb_rcmsg(rc)); goto error; diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c index af89ce188f2..04f2ad7ba8b 100644 --- a/drivers/isdn/gigaset/usb-gigaset.c +++ b/drivers/isdn/gigaset/usb-gigaset.c @@ -763,7 +763,7 @@ static int gigaset_probe(struct usb_interface *interface, goto error; } - ucs->bulk_out_urb = usb_alloc_urb(0, SLAB_KERNEL); + ucs->bulk_out_urb = usb_alloc_urb(0, GFP_KERNEL); if (!ucs->bulk_out_urb) { dev_err(cs->dev, "Couldn't allocate bulk_out_urb\n"); retval = -ENOMEM; @@ -774,7 +774,7 @@ static int gigaset_probe(struct usb_interface *interface, atomic_set(&ucs->busy, 0); - ucs->read_urb = usb_alloc_urb(0, SLAB_KERNEL); + ucs->read_urb = usb_alloc_urb(0, GFP_KERNEL); if (!ucs->read_urb) { dev_err(cs->dev, "No free urbs available\n"); retval = -ENOMEM; @@ -797,7 +797,7 @@ static int gigaset_probe(struct usb_interface *interface, gigaset_read_int_callback, cs->inbuf + 0, endpoint->bInterval); - retval = usb_submit_urb(ucs->read_urb, SLAB_KERNEL); + retval = usb_submit_urb(ucs->read_urb, GFP_KERNEL); if (retval) { dev_err(cs->dev, "Could not submit URB (error %d)\n", -retval); goto error; diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c index 206c13e47a0..9123147e376 100644 --- a/drivers/media/dvb/cinergyT2/cinergyT2.c +++ b/drivers/media/dvb/cinergyT2/cinergyT2.c @@ -287,7 +287,7 @@ static int cinergyt2_alloc_stream_urbs (struct cinergyt2 *cinergyt2) int i; cinergyt2->streambuf = usb_buffer_alloc(cinergyt2->udev, STREAM_URB_COUNT*STREAM_BUF_SIZE, - SLAB_KERNEL, &cinergyt2->streambuf_dmahandle); + GFP_KERNEL, &cinergyt2->streambuf_dmahandle); if (!cinergyt2->streambuf) { dprintk(1, "failed to alloc consistent stream memory area, bailing out!\n"); return -ENOMEM; diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index ef4a731ca5c..334e078ffaf 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -451,7 +451,7 @@ static int __devinit m25p_probe(struct spi_device *spi) return -ENODEV; } - flash = kzalloc(sizeof *flash, SLAB_KERNEL); + flash = kzalloc(sizeof *flash, GFP_KERNEL); if (!flash) return -ENOMEM; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index ccd4dafce8e..b318500785e 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6940,7 +6940,7 @@ static int __devinit ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) return -ENOMEM; for (i = 0; i < IPR_NUM_CMD_BLKS; i++) { - ipr_cmd = pci_pool_alloc (ioa_cfg->ipr_cmd_pool, SLAB_KERNEL, &dma_addr); + ipr_cmd = pci_pool_alloc (ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr); if (!ipr_cmd) { ipr_free_cmd_blks(ioa_cfg); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c3c0626f550..09f2c74a40c 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -360,7 +360,7 @@ spi_alloc_master(struct device *dev, unsigned size) if (!dev) return NULL; - master = kzalloc(size + sizeof *master, SLAB_KERNEL); + master = kzalloc(size + sizeof *master, GFP_KERNEL); if (!master) return NULL; @@ -607,7 +607,7 @@ static int __init spi_init(void) { int status; - buf = kmalloc(SPI_BUFSIZ, SLAB_KERNEL); + buf = kmalloc(SPI_BUFSIZ, GFP_KERNEL); if (!buf) { status = -ENOMEM; goto err0; diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c index 08c1c57c612..57289b61d0b 100644 --- a/drivers/spi/spi_bitbang.c +++ b/drivers/spi/spi_bitbang.c @@ -196,7 +196,7 @@ int spi_bitbang_setup(struct spi_device *spi) return -EINVAL; if (!cs) { - cs = kzalloc(sizeof *cs, SLAB_KERNEL); + cs = kzalloc(sizeof *cs, GFP_KERNEL); if (!cs) return -ENOMEM; spi->controller_state = cs; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 0a46acf557a..77c05be5241 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2371,7 +2371,7 @@ check_highspeed (struct usb_hub *hub, struct usb_device *udev, int port1) struct usb_qualifier_descriptor *qual; int status; - qual = kmalloc (sizeof *qual, SLAB_KERNEL); + qual = kmalloc (sizeof *qual, GFP_KERNEL); if (qual == NULL) return; @@ -2922,7 +2922,7 @@ static int config_descriptors_changed(struct usb_device *udev) if (len < le16_to_cpu(udev->config[index].desc.wTotalLength)) len = le16_to_cpu(udev->config[index].desc.wTotalLength); } - buf = kmalloc (len, SLAB_KERNEL); + buf = kmalloc (len, GFP_KERNEL); if (buf == NULL) { dev_err(&udev->dev, "no mem to re-read configs after reset\n"); /* assume the worst */ diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c index 64554acad63..31351826f2b 100644 --- a/drivers/usb/gadget/gmidi.c +++ b/drivers/usb/gadget/gmidi.c @@ -1236,7 +1236,7 @@ autoconf_fail: /* ok, we made sense of the hardware ... */ - dev = kzalloc(sizeof(*dev), SLAB_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { return -ENOMEM; } diff --git a/drivers/usb/gadget/goku_udc.c b/drivers/usb/gadget/goku_udc.c index a3076da3f4e..805a9826842 100644 --- a/drivers/usb/gadget/goku_udc.c +++ b/drivers/usb/gadget/goku_udc.c @@ -1864,7 +1864,7 @@ static int goku_probe(struct pci_dev *pdev, const struct pci_device_id *id) } /* alloc, and start init */ - dev = kmalloc (sizeof *dev, SLAB_KERNEL); + dev = kmalloc (sizeof *dev, GFP_KERNEL); if (dev == NULL){ pr_debug("enomem %s\n", pci_name(pdev)); retval = -ENOMEM; diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 86924f9cdd7..3fb1044a4db 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -412,7 +412,7 @@ ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) /* FIXME readahead for O_NONBLOCK and poll(); careful with ZLPs */ value = -ENOMEM; - kbuf = kmalloc (len, SLAB_KERNEL); + kbuf = kmalloc (len, GFP_KERNEL); if (unlikely (!kbuf)) goto free1; @@ -456,7 +456,7 @@ ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) /* FIXME writebehind for O_NONBLOCK and poll(), qlen = 1 */ value = -ENOMEM; - kbuf = kmalloc (len, SLAB_KERNEL); + kbuf = kmalloc (len, GFP_KERNEL); if (!kbuf) goto free1; if (copy_from_user (kbuf, buf, len)) { @@ -1898,7 +1898,7 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) buf += 4; length -= 4; - kbuf = kmalloc (length, SLAB_KERNEL); + kbuf = kmalloc (length, GFP_KERNEL); if (!kbuf) return -ENOMEM; if (copy_from_user (kbuf, buf, length)) { diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c index 0b590831582..3024c679e38 100644 --- a/drivers/usb/gadget/net2280.c +++ b/drivers/usb/gadget/net2280.c @@ -2861,7 +2861,7 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id) } /* alloc, and start init */ - dev = kzalloc (sizeof *dev, SLAB_KERNEL); + dev = kzalloc (sizeof *dev, GFP_KERNEL); if (dev == NULL){ retval = -ENOMEM; goto done; diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c index 48a09fd89d1..030d87c28c2 100644 --- a/drivers/usb/gadget/omap_udc.c +++ b/drivers/usb/gadget/omap_udc.c @@ -2581,7 +2581,7 @@ omap_udc_setup(struct platform_device *odev, struct otg_transceiver *xceiv) /* UDC_PULLUP_EN gates the chip clock */ // OTG_SYSCON_1_REG |= DEV_IDLE_EN; - udc = kzalloc(sizeof(*udc), SLAB_KERNEL); + udc = kzalloc(sizeof(*udc), GFP_KERNEL); if (!udc) return -ENOMEM; diff --git a/drivers/usb/gadget/zero.c b/drivers/usb/gadget/zero.c index 0f809dd6849..40710ea1b49 100644 --- a/drivers/usb/gadget/zero.c +++ b/drivers/usb/gadget/zero.c @@ -1190,7 +1190,7 @@ autoconf_fail: /* ok, we made sense of the hardware ... */ - dev = kzalloc(sizeof(*dev), SLAB_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; spin_lock_init (&dev->lock); diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c index 396dc69d4b4..7fd872aa654 100644 --- a/drivers/usb/host/hc_crisv10.c +++ b/drivers/usb/host/hc_crisv10.c @@ -188,7 +188,7 @@ static DEFINE_TIMER(bulk_eot_timer, NULL, 0, 0); #define CHECK_ALIGN(x) if (((__u32)(x)) & 0x00000003) \ {panic("Alignment check (DWORD) failed at %s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__);} -#define SLAB_FLAG (in_interrupt() ? GFP_ATOMIC : SLAB_KERNEL) +#define SLAB_FLAG (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) #define KMALLOC_FLAG (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) /* Most helpful debugging aid */ diff --git a/drivers/usb/host/ohci-pnx4008.c b/drivers/usb/host/ohci-pnx4008.c index 2dbb7741490..7f26f9bdbaf 100644 --- a/drivers/usb/host/ohci-pnx4008.c +++ b/drivers/usb/host/ohci-pnx4008.c @@ -134,7 +134,7 @@ static int isp1301_attach(struct i2c_adapter *adap, int addr, int kind) { struct i2c_client *c; - c = (struct i2c_client *)kzalloc(sizeof(*c), SLAB_KERNEL); + c = (struct i2c_client *)kzalloc(sizeof(*c), GFP_KERNEL); if (!c) return -ENOMEM; diff --git a/drivers/usb/input/acecad.c b/drivers/usb/input/acecad.c index 0096373b5f9..909138e5aa0 100644 --- a/drivers/usb/input/acecad.c +++ b/drivers/usb/input/acecad.c @@ -152,7 +152,7 @@ static int usb_acecad_probe(struct usb_interface *intf, const struct usb_device_ if (!acecad || !input_dev) goto fail1; - acecad->data = usb_buffer_alloc(dev, 8, SLAB_KERNEL, &acecad->data_dma); + acecad->data = usb_buffer_alloc(dev, 8, GFP_KERNEL, &acecad->data_dma); if (!acecad->data) goto fail1; diff --git a/drivers/usb/input/usbtouchscreen.c b/drivers/usb/input/usbtouchscreen.c index 49704d4ed0e..7f3c57da9bc 100644 --- a/drivers/usb/input/usbtouchscreen.c +++ b/drivers/usb/input/usbtouchscreen.c @@ -680,7 +680,7 @@ static int usbtouch_probe(struct usb_interface *intf, type->process_pkt = usbtouch_process_pkt; usbtouch->data = usb_buffer_alloc(udev, type->rept_size, - SLAB_KERNEL, &usbtouch->data_dma); + GFP_KERNEL, &usbtouch->data_dma); if (!usbtouch->data) goto out_free; diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index ea04dccdc65..fb321864a92 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -213,7 +213,7 @@ static struct urb *simple_alloc_urb ( if (bytes < 0) return NULL; - urb = usb_alloc_urb (0, SLAB_KERNEL); + urb = usb_alloc_urb (0, GFP_KERNEL); if (!urb) return urb; usb_fill_bulk_urb (urb, udev, pipe, NULL, bytes, simple_callback, NULL); @@ -223,7 +223,7 @@ static struct urb *simple_alloc_urb ( urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; if (usb_pipein (pipe)) urb->transfer_flags |= URB_SHORT_NOT_OK; - urb->transfer_buffer = usb_buffer_alloc (udev, bytes, SLAB_KERNEL, + urb->transfer_buffer = usb_buffer_alloc (udev, bytes, GFP_KERNEL, &urb->transfer_dma); if (!urb->transfer_buffer) { usb_free_urb (urb); @@ -315,7 +315,7 @@ static int simple_io ( init_completion (&completion); if (usb_pipeout (urb->pipe)) simple_fill_buf (urb); - if ((retval = usb_submit_urb (urb, SLAB_KERNEL)) != 0) + if ((retval = usb_submit_urb (urb, GFP_KERNEL)) != 0) break; /* NOTE: no timeouts; can't be broken out of by interrupt */ @@ -374,7 +374,7 @@ alloc_sglist (int nents, int max, int vary) unsigned i; unsigned size = max; - sg = kmalloc (nents * sizeof *sg, SLAB_KERNEL); + sg = kmalloc (nents * sizeof *sg, GFP_KERNEL); if (!sg) return NULL; @@ -382,7 +382,7 @@ alloc_sglist (int nents, int max, int vary) char *buf; unsigned j; - buf = kzalloc (size, SLAB_KERNEL); + buf = kzalloc (size, GFP_KERNEL); if (!buf) { free_sglist (sg, i); return NULL; @@ -428,7 +428,7 @@ static int perform_sglist ( (udev->speed == USB_SPEED_HIGH) ? (INTERRUPT_RATE << 3) : INTERRUPT_RATE, - sg, nents, 0, SLAB_KERNEL); + sg, nents, 0, GFP_KERNEL); if (retval) break; @@ -855,7 +855,7 @@ test_ctrl_queue (struct usbtest_dev *dev, struct usbtest_param *param) * as with bulk/intr sglists, sglen is the queue depth; it also * controls which subtests run (more tests than sglen) or rerun. */ - urb = kcalloc(param->sglen, sizeof(struct urb *), SLAB_KERNEL); + urb = kcalloc(param->sglen, sizeof(struct urb *), GFP_KERNEL); if (!urb) return -ENOMEM; for (i = 0; i < param->sglen; i++) { @@ -981,7 +981,7 @@ test_ctrl_queue (struct usbtest_dev *dev, struct usbtest_param *param) if (!u) goto cleanup; - reqp = usb_buffer_alloc (udev, sizeof *reqp, SLAB_KERNEL, + reqp = usb_buffer_alloc (udev, sizeof *reqp, GFP_KERNEL, &u->setup_dma); if (!reqp) goto cleanup; @@ -1067,7 +1067,7 @@ static int unlink1 (struct usbtest_dev *dev, int pipe, int size, int async) * FIXME want additional tests for when endpoint is STALLing * due to errors, or is just NAKing requests. */ - if ((retval = usb_submit_urb (urb, SLAB_KERNEL)) != 0) { + if ((retval = usb_submit_urb (urb, GFP_KERNEL)) != 0) { dev_dbg (&dev->intf->dev, "submit fail %d\n", retval); return retval; } @@ -1251,7 +1251,7 @@ static int ctrl_out (struct usbtest_dev *dev, if (length < 1 || length > 0xffff || vary >= length) return -EINVAL; - buf = kmalloc(length, SLAB_KERNEL); + buf = kmalloc(length, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1403,7 +1403,7 @@ static struct urb *iso_alloc_urb ( maxp *= 1 + (0x3 & (le16_to_cpu(desc->wMaxPacketSize) >> 11)); packets = (bytes + maxp - 1) / maxp; - urb = usb_alloc_urb (packets, SLAB_KERNEL); + urb = usb_alloc_urb (packets, GFP_KERNEL); if (!urb) return urb; urb->dev = udev; @@ -1411,7 +1411,7 @@ static struct urb *iso_alloc_urb ( urb->number_of_packets = packets; urb->transfer_buffer_length = bytes; - urb->transfer_buffer = usb_buffer_alloc (udev, bytes, SLAB_KERNEL, + urb->transfer_buffer = usb_buffer_alloc (udev, bytes, GFP_KERNEL, &urb->transfer_dma); if (!urb->transfer_buffer) { usb_free_urb (urb); @@ -1900,7 +1900,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id) } #endif - dev = kzalloc(sizeof(*dev), SLAB_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; info = (struct usbtest_info *) id->driver_info; @@ -1910,7 +1910,7 @@ usbtest_probe (struct usb_interface *intf, const struct usb_device_id *id) dev->intf = intf; /* cacheline-aligned scratch for i/o */ - if ((dev->buf = kmalloc (TBUF_SIZE, SLAB_KERNEL)) == NULL) { + if ((dev->buf = kmalloc (TBUF_SIZE, GFP_KERNEL)) == NULL) { kfree (dev); return -ENOMEM; } diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c index c2a28d88ef3..99f26b3e502 100644 --- a/drivers/usb/net/rndis_host.c +++ b/drivers/usb/net/rndis_host.c @@ -469,7 +469,7 @@ static void rndis_unbind(struct usbnet *dev, struct usb_interface *intf) struct rndis_halt *halt; /* try to clear any rndis state/activity (no i/o from stack!) */ - halt = kcalloc(1, sizeof *halt, SLAB_KERNEL); + halt = kcalloc(1, sizeof *halt, GFP_KERNEL); if (halt) { halt->msg_type = RNDIS_MSG_HALT; halt->msg_len = ccpu2(sizeof *halt); diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c index 327f9755567..6e39e998825 100644 --- a/drivers/usb/net/usbnet.c +++ b/drivers/usb/net/usbnet.c @@ -179,9 +179,9 @@ static int init_status (struct usbnet *dev, struct usb_interface *intf) period = max ((int) dev->status->desc.bInterval, (dev->udev->speed == USB_SPEED_HIGH) ? 7 : 3); - buf = kmalloc (maxp, SLAB_KERNEL); + buf = kmalloc (maxp, GFP_KERNEL); if (buf) { - dev->interrupt = usb_alloc_urb (0, SLAB_KERNEL); + dev->interrupt = usb_alloc_urb (0, GFP_KERNEL); if (!dev->interrupt) { kfree (buf); return -ENOMEM; diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 9ade139086f..52eb10ca654 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -217,7 +217,7 @@ static kmem_cache_t *adfs_inode_cachep; static struct inode *adfs_alloc_inode(struct super_block *sb) { struct adfs_inode_info *ei; - ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, SLAB_KERNEL); + ei = (struct adfs_inode_info *)kmem_cache_alloc(adfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/affs/super.c b/fs/affs/super.c index 5ea72c3a16c..81c73ec09f6 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -71,7 +71,7 @@ static kmem_cache_t * affs_inode_cachep; static struct inode *affs_alloc_inode(struct super_block *sb) { struct affs_inode_info *ei; - ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, SLAB_KERNEL); + ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->vfs_inode.i_version = 1; diff --git a/fs/afs/super.c b/fs/afs/super.c index 67d1f5c819e..c6ead009bf7 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -412,7 +412,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb) struct afs_vnode *vnode; vnode = (struct afs_vnode *) - kmem_cache_alloc(afs_inode_cachep, SLAB_KERNEL); + kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL); if (!vnode) return NULL; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 07f7144f0e2..995348df94a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -277,7 +277,7 @@ befs_alloc_inode(struct super_block *sb) { struct befs_inode_info *bi; bi = (struct befs_inode_info *)kmem_cache_alloc(befs_inode_cachep, - SLAB_KERNEL); + GFP_KERNEL); if (!bi) return NULL; return &bi->vfs_inode; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index ed27ffb3459..2e45123c8f7 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -233,7 +233,7 @@ static kmem_cache_t * bfs_inode_cachep; static struct inode *bfs_alloc_inode(struct super_block *sb) { struct bfs_inode_info *bi; - bi = kmem_cache_alloc(bfs_inode_cachep, SLAB_KERNEL); + bi = kmem_cache_alloc(bfs_inode_cachep, GFP_KERNEL); if (!bi) return NULL; return &bi->vfs_inode; diff --git a/fs/block_dev.c b/fs/block_dev.c index 36c0e7af9d0..063506705f2 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -239,7 +239,7 @@ static kmem_cache_t * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) { - struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, SLAB_KERNEL); + struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 84976cdbe71..84168629cea 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -245,7 +245,7 @@ static struct inode * cifs_alloc_inode(struct super_block *sb) { struct cifsInodeInfo *cifs_inode; - cifs_inode = kmem_cache_alloc(cifs_inode_cachep, SLAB_KERNEL); + cifs_inode = kmem_cache_alloc(cifs_inode_cachep, GFP_KERNEL); if (!cifs_inode) return NULL; cifs_inode->cifsAttrs = 0x20; /* default */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 8355daff504..aedf683f011 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -153,7 +153,7 @@ cifs_buf_get(void) albeit slightly larger than necessary and maxbuffersize defaults to this and can not be bigger */ ret_buf = - (struct smb_hdr *) mempool_alloc(cifs_req_poolp, SLAB_KERNEL | GFP_NOFS); + (struct smb_hdr *) mempool_alloc(cifs_req_poolp, GFP_KERNEL | GFP_NOFS); /* clear the first few header bytes */ /* for most paths, more is cleared in header_assemble */ @@ -192,7 +192,7 @@ cifs_small_buf_get(void) albeit slightly larger than necessary and maxbuffersize defaults to this and can not be bigger */ ret_buf = - (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, SLAB_KERNEL | GFP_NOFS); + (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, GFP_KERNEL | GFP_NOFS); if (ret_buf) { /* No need to clear memory here, cleared in header assemble */ /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 7514237cf31..1f727765a8e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -51,7 +51,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) } temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp, - SLAB_KERNEL | GFP_NOFS); + GFP_KERNEL | GFP_NOFS); if (temp == NULL) return temp; else { @@ -118,7 +118,7 @@ AllocOplockQEntry(struct inode * pinode, __u16 fid, struct cifsTconInfo * tcon) return NULL; } temp = (struct oplock_q_entry *) kmem_cache_alloc(cifs_oplock_cachep, - SLAB_KERNEL); + GFP_KERNEL); if (temp == NULL) return temp; else { diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 88d12332116..50cedd2617d 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -43,7 +43,7 @@ static kmem_cache_t * coda_inode_cachep; static struct inode *coda_alloc_inode(struct super_block *sb) { struct coda_inode_info *ei; - ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, SLAB_KERNEL); + ei = (struct coda_inode_info *)kmem_cache_alloc(coda_inode_cachep, GFP_KERNEL); if (!ei) return NULL; memset(&ei->c_fid, 0, sizeof(struct CodaFid)); diff --git a/fs/dnotify.c b/fs/dnotify.c index 2b0442db67e..e778b1737b7 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -77,7 +77,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) inode = filp->f_dentry->d_inode; if (!S_ISDIR(inode->i_mode)) return -ENOTDIR; - dn = kmem_cache_alloc(dn_cache, SLAB_KERNEL); + dn = kmem_cache_alloc(dn_cache, GFP_KERNEL); if (dn == NULL) return -ENOMEM; spin_lock(&inode->i_lock); diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 776b2eed371..7196f50fe15 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -628,7 +628,7 @@ int ecryptfs_decrypt_page(struct file *file, struct page *page) num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size; base_extent = (page->index * num_extents_per_page); lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache, - SLAB_KERNEL); + GFP_KERNEL); if (!lower_page_virt) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Error getting page for encrypted " diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index a92ef05eff8..42099e779a5 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -250,7 +250,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) int lower_flags; /* Released in ecryptfs_release or end of function if failure */ - file_info = kmem_cache_alloc(ecryptfs_file_info_cache, SLAB_KERNEL); + file_info = kmem_cache_alloc(ecryptfs_file_info_cache, GFP_KERNEL); ecryptfs_set_file_private(file, file_info); if (!file_info) { ecryptfs_printk(KERN_ERR, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 70911412044..8a1945a84c3 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -369,7 +369,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, BUG_ON(!atomic_read(&lower_dentry->d_count)); ecryptfs_set_dentry_private(dentry, kmem_cache_alloc(ecryptfs_dentry_info_cache, - SLAB_KERNEL)); + GFP_KERNEL)); if (!ecryptfs_dentry_to_private(dentry)) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Out of memory whilst attempting " @@ -795,7 +795,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) /* Released at out_free: label */ ecryptfs_set_file_private(&fake_ecryptfs_file, kmem_cache_alloc(ecryptfs_file_info_cache, - SLAB_KERNEL)); + GFP_KERNEL)); if (unlikely(!ecryptfs_file_to_private(&fake_ecryptfs_file))) { rc = -ENOMEM; goto out; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index c3746f56d16..745c0f1bfbb 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -207,7 +207,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or * at end of function upon failure */ auth_tok_list_item = - kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, SLAB_KERNEL); + kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL); if (!auth_tok_list_item) { ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n"); rc = -ENOMEM; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a78d87d14ba..a2c6ccbce30 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -378,7 +378,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) /* Released in ecryptfs_put_super() */ ecryptfs_set_superblock_private(sb, kmem_cache_alloc(ecryptfs_sb_info_cache, - SLAB_KERNEL)); + GFP_KERNEL)); if (!ecryptfs_superblock_to_private(sb)) { ecryptfs_printk(KERN_WARNING, "Out of memory\n"); rc = -ENOMEM; @@ -402,7 +402,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent) /* through deactivate_super(sb) from get_sb_nodev() */ ecryptfs_set_dentry_private(sb->s_root, kmem_cache_alloc(ecryptfs_dentry_info_cache, - SLAB_KERNEL)); + GFP_KERNEL)); if (!ecryptfs_dentry_to_private(sb->s_root)) { ecryptfs_printk(KERN_ERR, "dentry_info_cache alloc failed\n"); diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 825757ae486..eaa5daaf106 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -50,7 +50,7 @@ static struct inode *ecryptfs_alloc_inode(struct super_block *sb) struct inode *inode = NULL; ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache, - SLAB_KERNEL); + GFP_KERNEL); if (unlikely(!ecryptfs_inode)) goto out; ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat); diff --git a/fs/efs/super.c b/fs/efs/super.c index b3f50651eb6..69b15a996cf 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -57,7 +57,7 @@ static kmem_cache_t * efs_inode_cachep; static struct inode *efs_alloc_inode(struct super_block *sb) { struct efs_inode_info *ei; - ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, SLAB_KERNEL); + ei = (struct efs_inode_info *)kmem_cache_alloc(efs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ae228ec54e9..f5c88435c6b 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -961,7 +961,7 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, struct epitem *epi = ep_item_from_epqueue(pt); struct eppoll_entry *pwq; - if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) { + if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) { init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); pwq->whead = whead; pwq->base = epi; @@ -1004,7 +1004,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct ep_pqueue epq; error = -ENOMEM; - if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL))) + if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) goto eexit_1; /* Item initialization follow here ... */ diff --git a/fs/exec.c b/fs/exec.c index d993ea1a81a..2092bd20746 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -404,7 +404,7 @@ int setup_arg_pages(struct linux_binprm *bprm, bprm->loader += stack_base; bprm->exec += stack_base; - mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!mpnt) return -ENOMEM; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index d8b9abd95d0..85c237e7385 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -140,7 +140,7 @@ static kmem_cache_t * ext2_inode_cachep; static struct inode *ext2_alloc_inode(struct super_block *sb) { struct ext2_inode_info *ei; - ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL); + ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; #ifdef CONFIG_EXT2_FS_POSIX_ACL diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 82cc4f59e3b..8c272278455 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -63,7 +63,7 @@ void fat_cache_destroy(void) static inline struct fat_cache *fat_cache_alloc(struct inode *inode) { - return kmem_cache_alloc(fat_cache_cachep, SLAB_KERNEL); + return kmem_cache_alloc(fat_cache_cachep, GFP_KERNEL); } static inline void fat_cache_free(struct fat_cache *cache) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 78945b53b0f..b58fd0c9f3c 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -482,7 +482,7 @@ static kmem_cache_t *fat_inode_cachep; static struct inode *fat_alloc_inode(struct super_block *sb) { struct msdos_inode_info *ei; - ei = kmem_cache_alloc(fat_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(fat_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/fcntl.c b/fs/fcntl.c index e4f26165f12..c03dc9cb21c 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -567,7 +567,7 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap int result = 0; if (on) { - new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL); + new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); if (!new) return -ENOMEM; } diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 4786d51ad3b..d2dd0d70007 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -103,7 +103,7 @@ vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino) struct vxfs_inode_info *vip; struct vxfs_dinode *dip; - if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL))) + if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL))) goto fail; dip = (struct vxfs_dinode *)(bp->b_data + offset); memcpy(vip, dip, sizeof(*vip)); @@ -145,7 +145,7 @@ __vxfs_iget(ino_t ino, struct inode *ilistp) struct vxfs_dinode *dip; caddr_t kaddr = (char *)page_address(pp); - if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, SLAB_KERNEL))) + if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL))) goto fail; dip = (struct vxfs_dinode *)(kaddr + offset); memcpy(vip, dip, sizeof(*vip)); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 66571eafbb1..8c15139f275 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -41,7 +41,7 @@ static void fuse_request_init(struct fuse_req *req) struct fuse_req *fuse_request_alloc(void) { - struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL); + struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL); if (req) fuse_request_init(req); return req; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fc420357037..e039e2047cc 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -46,7 +46,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) struct inode *inode; struct fuse_inode *fi; - inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL); + inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); if (!inode) return NULL; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 85b17b3fa4a..ffc6409132c 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -145,7 +145,7 @@ static struct inode *hfs_alloc_inode(struct super_block *sb) { struct hfs_inode_info *i; - i = kmem_cache_alloc(hfs_inode_cachep, SLAB_KERNEL); + i = kmem_cache_alloc(hfs_inode_cachep, GFP_KERNEL); return i ? &i->vfs_inode : NULL; } diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 194eede52fa..4a0c70c76c8 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -440,7 +440,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb) { struct hfsplus_inode_info *i; - i = kmem_cache_alloc(hfsplus_inode_cachep, SLAB_KERNEL); + i = kmem_cache_alloc(hfsplus_inode_cachep, GFP_KERNEL); return i ? &i->vfs_inode : NULL; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7f4756963d0..36e52173a54 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -522,7 +522,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) return NULL; - p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); + p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL); if (unlikely(!p)) { hugetlbfs_inc_free_inodes(sbinfo); return NULL; diff --git a/fs/inode.c b/fs/inode.c index 26cdb115ce6..dd15984d51a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -109,7 +109,7 @@ static struct inode *alloc_inode(struct super_block *sb) if (sb->s_op->alloc_inode) inode = sb->s_op->alloc_inode(sb); else - inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL); + inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL); if (inode) { struct address_space * const mapping = &inode->i_data; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index c34b862cdbf..4b6381cd2cf 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -62,7 +62,7 @@ static kmem_cache_t *isofs_inode_cachep; static struct inode *isofs_alloc_inode(struct super_block *sb) { struct iso_inode_info *ei; - ei = kmem_cache_alloc(isofs_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(isofs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index bc4b8106a49..77be534ce42 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -33,7 +33,7 @@ static kmem_cache_t *jffs2_inode_cachep; static struct inode *jffs2_alloc_inode(struct super_block *sb) { struct jffs2_inode_info *ei; - ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, SLAB_KERNEL); + ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/locks.c b/fs/locks.c index e0b6a80649a..a7b97d50c1e 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -147,7 +147,7 @@ static kmem_cache_t *filelock_cache __read_mostly; /* Allocate an empty lock structure. */ static struct file_lock *locks_alloc_lock(void) { - return kmem_cache_alloc(filelock_cache, SLAB_KERNEL); + return kmem_cache_alloc(filelock_cache, GFP_KERNEL); } static void locks_release_private(struct file_lock *fl) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 1e36bae4d0e..ce532c2deda 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -56,7 +56,7 @@ static kmem_cache_t * minix_inode_cachep; static struct inode *minix_alloc_inode(struct super_block *sb) { struct minix_inode_info *ei; - ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, SLAB_KERNEL); + ei = (struct minix_inode_info *)kmem_cache_alloc(minix_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 72dad552aa0..ed84d899220 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -45,7 +45,7 @@ static kmem_cache_t * ncp_inode_cachep; static struct inode *ncp_alloc_inode(struct super_block *sb) { struct ncp_inode_info *ei; - ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, SLAB_KERNEL); + ei = (struct ncp_inode_info *)kmem_cache_alloc(ncp_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bdfabf854a5..769fd0a0c77 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -143,7 +143,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; - dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL); + dreq = kmem_cache_alloc(nfs_direct_cachep, GFP_KERNEL); if (!dreq) return NULL; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 08cc4c5919a..6b53aae4ed2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1080,7 +1080,7 @@ void nfs4_clear_inode(struct inode *inode) struct inode *nfs_alloc_inode(struct super_block *sb) { struct nfs_inode *nfsi; - nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); + nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL); if (!nfsi) return NULL; nfsi->flags = 0UL; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 829af323f28..a1561a820ab 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -26,7 +26,7 @@ static inline struct nfs_page * nfs_page_alloc(void) { struct nfs_page *p; - p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL); + p = kmem_cache_alloc(nfs_page_cachep, GFP_KERNEL); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->wb_list); diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 592a6402e85..911d1bcfc56 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -336,7 +336,7 @@ static struct inode *openprom_alloc_inode(struct super_block *sb) { struct op_inode_info *oi; - oi = kmem_cache_alloc(op_inode_cachep, SLAB_KERNEL); + oi = kmem_cache_alloc(op_inode_cachep, GFP_KERNEL); if (!oi) return NULL; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 49dfb2ab783..b24cdb2f17c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -88,7 +88,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb) struct proc_inode *ei; struct inode *inode; - ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); + ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->pid = NULL; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 5a41db2a218..5b943eb11d7 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -520,7 +520,7 @@ static kmem_cache_t *qnx4_inode_cachep; static struct inode *qnx4_alloc_inode(struct super_block *sb) { struct qnx4_inode_info *ei; - ei = kmem_cache_alloc(qnx4_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(qnx4_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 17249994110..32332516d65 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -496,7 +496,7 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb) { struct reiserfs_inode_info *ei; ei = (struct reiserfs_inode_info *) - kmem_cache_alloc(reiserfs_inode_cachep, SLAB_KERNEL); + kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index ddcd9e1ef28..d1b455f9b66 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -555,7 +555,7 @@ static kmem_cache_t * romfs_inode_cachep; static struct inode *romfs_alloc_inode(struct super_block *sb) { struct romfs_inode_info *ei; - ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, SLAB_KERNEL); + ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 2c122ee83ad..22161710348 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -55,7 +55,7 @@ static kmem_cache_t *smb_inode_cachep; static struct inode *smb_alloc_inode(struct super_block *sb) { struct smb_inode_info *ei; - ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, SLAB_KERNEL); + ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index 0fb74697abc..3eb1402191b 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -61,7 +61,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server, struct smb_request *req; unsigned char *buf = NULL; - req = kmem_cache_alloc(req_cachep, SLAB_KERNEL); + req = kmem_cache_alloc(req_cachep, GFP_KERNEL); VERBOSE("allocating request: %p\n", req); if (!req) goto out; diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index d63c5e48b05..a6ca12b747c 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -307,7 +307,7 @@ static struct inode *sysv_alloc_inode(struct super_block *sb) { struct sysv_inode_info *si; - si = kmem_cache_alloc(sysv_inode_cachep, SLAB_KERNEL); + si = kmem_cache_alloc(sysv_inode_cachep, GFP_KERNEL); if (!si) return NULL; return &si->vfs_inode; diff --git a/fs/udf/super.c b/fs/udf/super.c index 1aea6a4f9a4..e50f24221de 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -112,7 +112,7 @@ static kmem_cache_t * udf_inode_cachep; static struct inode *udf_alloc_inode(struct super_block *sb) { struct udf_inode_info *ei; - ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL); + ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, GFP_KERNEL); if (!ei) return NULL; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index ec79e3091d1..85a88c0c5e6 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1209,7 +1209,7 @@ static kmem_cache_t * ufs_inode_cachep; static struct inode *ufs_alloc_inode(struct super_block *sb) { struct ufs_inode_info *ei; - ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, SLAB_KERNEL); + ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_KERNEL); if (!ei) return NULL; ei->vfs_inode.i_version = 1; diff --git a/include/linux/fs.h b/include/linux/fs.h index a8039c8d8cb..94b831b8157 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1483,7 +1483,7 @@ extern void __init vfs_caches_init(unsigned long); extern struct kmem_cache *names_cachep; -#define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL) +#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) #ifndef CONFIG_AUDITSYSCALL #define putname(name) __putname(name) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index db2c1df4fef..61c2ab634b0 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -34,7 +34,7 @@ extern kmem_cache_t *anon_vma_cachep; static inline struct anon_vma *anon_vma_alloc(void) { - return kmem_cache_alloc(anon_vma_cachep, SLAB_KERNEL); + return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); } static inline void anon_vma_free(struct anon_vma *anon_vma) diff --git a/include/linux/slab.h b/include/linux/slab.h index 34b046ea88f..639f65efa46 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -19,7 +19,6 @@ typedef struct kmem_cache kmem_cache_t; #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ /* flags for kmem_cache_alloc() */ -#define SLAB_KERNEL GFP_KERNEL #define SLAB_DMA GFP_DMA /* flags to pass to kmem_cache_create(). diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 6562a2050a2..f81a5af8a4f 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -35,7 +35,7 @@ static inline void taskstats_tgid_alloc(struct task_struct *tsk) return; /* No problem if kmem_cache_zalloc() fails */ - stats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); + stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); spin_lock_irq(&tsk->sighand->siglock); if (!sig->stats) { diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 7c274002c9f..813bb941342 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -224,7 +224,7 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb) { struct mqueue_inode_info *ei; - ei = kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(mqueue_inode_cachep, GFP_KERNEL); if (!ei) return NULL; return &ei->vfs_inode; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 66a0ea48751..70e9ec60308 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -41,7 +41,7 @@ void delayacct_init(void) void __delayacct_tsk_init(struct task_struct *tsk) { - tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); + tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL); if (tsk->delays) spin_lock_init(&tsk->delays->lock); } diff --git a/kernel/fork.c b/kernel/fork.c index 5678e6c61ef..711aa5f10da 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -237,7 +237,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) goto fail_nomem; charge = len; } - tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!tmp) goto fail_nomem; *tmp = *mpnt; @@ -319,7 +319,7 @@ static inline void mm_free_pgd(struct mm_struct * mm) __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock); -#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL)) +#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) #define free_mm(mm) (kmem_cache_free(mm_cachep, (mm))) #include @@ -621,7 +621,7 @@ static struct files_struct *alloc_files(void) struct files_struct *newf; struct fdtable *fdt; - newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); if (!newf) goto out; diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d3d28919d4b..1b2b326cf70 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -425,7 +425,7 @@ void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) *mycpu = raw_smp_processor_id(); *ptidstats = NULL; - tmp = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); + tmp = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); if (!tmp) return; diff --git a/kernel/user.c b/kernel/user.c index 220e586127a..c1f93c164c9 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -132,7 +132,7 @@ struct user_struct * alloc_uid(uid_t uid) if (!up) { struct user_struct *new; - new = kmem_cache_alloc(uid_cachep, SLAB_KERNEL); + new = kmem_cache_alloc(uid_cachep, GFP_KERNEL); if (!new) return NULL; new->uid = uid; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e7b69c90cfd..ad864f8708b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1326,7 +1326,7 @@ struct mempolicy *__mpol_copy(struct mempolicy *old) atomic_set(&new->refcnt, 1); if (new->policy == MPOL_BIND) { int sz = ksize(old->v.zonelist); - new->v.zonelist = kmemdup(old->v.zonelist, sz, SLAB_KERNEL); + new->v.zonelist = kmemdup(old->v.zonelist, sz, GFP_KERNEL); if (!new->v.zonelist) { kmem_cache_free(policy_cache, new); return ERR_PTR(-ENOMEM); diff --git a/mm/mmap.c b/mm/mmap.c index 7b40abd7cba..7be110e98d4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1736,7 +1736,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, if (mm->map_count >= sysctl_max_map_count) return -ENOMEM; - new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (!new) return -ENOMEM; @@ -2057,7 +2057,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, vma_start < new_vma->vm_end) *vmap = new_vma; } else { - new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); + new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); if (new_vma) { *new_vma = *vma; pol = mpol_copy(vma_policy(vma)); diff --git a/mm/shmem.c b/mm/shmem.c index 4959535fc14..bdaecfdaabd 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2263,7 +2263,7 @@ static struct kmem_cache *shmem_inode_cachep; static struct inode *shmem_alloc_inode(struct super_block *sb) { struct shmem_inode_info *p; - p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, SLAB_KERNEL); + p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); if (!p) return NULL; return &p->vfs_inode; diff --git a/mm/slab.c b/mm/slab.c index 9f34b4946fb..1f374c1df01 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2237,7 +2237,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, align = ralign; /* Get cache's description obj. */ - cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL); + cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); if (!cachep) goto oops; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index bdbc3f43166..101e5ccaf09 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -590,7 +590,7 @@ create: replace: err = -ENOBUFS; - new_f = kmem_cache_alloc(dn_hash_kmem, SLAB_KERNEL); + new_f = kmem_cache_alloc(dn_hash_kmem, GFP_KERNEL); if (new_f == NULL) goto out; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 107bb6cbb0b..4463443e42c 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -485,13 +485,13 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) goto out; err = -ENOBUFS; - new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; new_f = NULL; if (!f) { - new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL); + new_f = kmem_cache_alloc(fn_hash_kmem, GFP_KERNEL); if (new_f == NULL) goto out_free_new_fa; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d17990ec724..6be6caf1af3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1187,7 +1187,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) u8 state; err = -ENOBUFS; - new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; @@ -1232,7 +1232,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) goto out; err = -ENOBUFS; - new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL); + new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (new_fa == NULL) goto out; diff --git a/net/socket.c b/net/socket.c index e8db54702a6..4f417c2ddc1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -236,7 +236,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; - ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); + ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; init_waitqueue_head(&ei->socket.wait); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 49dba5febbb..df753d0a884 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -143,7 +143,7 @@ static struct inode * rpc_alloc_inode(struct super_block *sb) { struct rpc_inode *rpci; - rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, SLAB_KERNEL); + rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL); if (!rpci) return NULL; return &rpci->vfs_inode; diff --git a/security/keys/key.c b/security/keys/key.c index 70eacbe5abd..157bac658bf 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -285,7 +285,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, } /* allocate and initialise the key and its description */ - key = kmem_cache_alloc(key_jar, SLAB_KERNEL); + key = kmem_cache_alloc(key_jar, GFP_KERNEL); if (!key) goto no_memory_2; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 78f98fe084e..ac1aeed0b28 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -181,7 +181,7 @@ static int inode_alloc_security(struct inode *inode) struct task_security_struct *tsec = current->security; struct inode_security_struct *isec; - isec = kmem_cache_alloc(sel_inode_cache, SLAB_KERNEL); + isec = kmem_cache_alloc(sel_inode_cache, GFP_KERNEL); if (!isec) return -ENOMEM; diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index d049c7acbc8..2dfc6134c2c 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -36,7 +36,7 @@ avtab_insert_node(struct avtab *h, int hvalue, struct avtab_key *key, struct avtab_datum *datum) { struct avtab_node * newnode; - newnode = kmem_cache_alloc(avtab_node_cachep, SLAB_KERNEL); + newnode = kmem_cache_alloc(avtab_node_cachep, GFP_KERNEL); if (newnode == NULL) return NULL; memset(newnode, 0, sizeof(struct avtab_node)); -- cgit v1.2.3-70-g09d2 From 441e143e95f5aa1e04026cb0aa71c801ba53982f Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:33:19 -0800 Subject: [PATCH] slab: remove SLAB_DMA SLAB_DMA is an alias of GFP_DMA. This is the last one so we remove the leftover comment too. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/atm/he.c | 6 +++--- drivers/s390/block/dasd_eckd.c | 2 +- drivers/s390/block/dasd_fba.c | 2 +- drivers/usb/core/buffer.c | 2 +- include/linux/slab.h | 3 --- mm/slab.c | 4 ++-- 6 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/atm/he.c b/drivers/atm/he.c index ec8a7a633e6..7d9b4e52f0b 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -820,7 +820,7 @@ he_init_group(struct he_dev *he_dev, int group) void *cpuaddr; #ifdef USE_RBPS_POOL - cpuaddr = pci_pool_alloc(he_dev->rbps_pool, GFP_KERNEL|SLAB_DMA, &dma_handle); + cpuaddr = pci_pool_alloc(he_dev->rbps_pool, GFP_KERNEL|GFP_DMA, &dma_handle); if (cpuaddr == NULL) return -ENOMEM; #else @@ -884,7 +884,7 @@ he_init_group(struct he_dev *he_dev, int group) void *cpuaddr; #ifdef USE_RBPL_POOL - cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|SLAB_DMA, &dma_handle); + cpuaddr = pci_pool_alloc(he_dev->rbpl_pool, GFP_KERNEL|GFP_DMA, &dma_handle); if (cpuaddr == NULL) return -ENOMEM; #else @@ -1724,7 +1724,7 @@ __alloc_tpd(struct he_dev *he_dev) struct he_tpd *tpd; dma_addr_t dma_handle; - tpd = pci_pool_alloc(he_dev->tpd_pool, GFP_ATOMIC|SLAB_DMA, &dma_handle); + tpd = pci_pool_alloc(he_dev->tpd_pool, GFP_ATOMIC|GFP_DMA, &dma_handle); if (tpd == NULL) return NULL; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 5ecea3e4fde..fdaa471e845 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1215,7 +1215,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req) dst = page_address(bv->bv_page) + bv->bv_offset; if (dasd_page_cache) { char *copy = kmem_cache_alloc(dasd_page_cache, - SLAB_DMA | __GFP_NOWARN); + GFP_DMA | __GFP_NOWARN); if (copy && rq_data_dir(req) == WRITE) memcpy(copy + bv->bv_offset, dst, bv->bv_len); if (copy) diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index 80926c54822..b857fd5893f 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -308,7 +308,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req) dst = page_address(bv->bv_page) + bv->bv_offset; if (dasd_page_cache) { char *copy = kmem_cache_alloc(dasd_page_cache, - SLAB_DMA | __GFP_NOWARN); + GFP_DMA | __GFP_NOWARN); if (copy && rq_data_dir(req) == WRITE) memcpy(copy + bv->bv_offset, dst, bv->bv_len); if (copy) diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c index 840442a25b6..c3915dc2860 100644 --- a/drivers/usb/core/buffer.c +++ b/drivers/usb/core/buffer.c @@ -93,7 +93,7 @@ void hcd_buffer_destroy (struct usb_hcd *hcd) } -/* sometimes alloc/free could use kmalloc with SLAB_DMA, for +/* sometimes alloc/free could use kmalloc with GFP_DMA, for * better sharing and to leverage mm/slab.c intelligence. */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 639f65efa46..fbcfc208f52 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -18,9 +18,6 @@ typedef struct kmem_cache kmem_cache_t; #include /* kmalloc_sizes.h needs PAGE_SIZE */ #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ -/* flags for kmem_cache_alloc() */ -#define SLAB_DMA GFP_DMA - /* flags to pass to kmem_cache_create(). * The first 3 are only valid when the allocator as been build * SLAB_DEBUG_SUPPORT. diff --git a/mm/slab.c b/mm/slab.c index 1f374c1df01..bb831ba63e1 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2637,7 +2637,7 @@ static void cache_init_objs(struct kmem_cache *cachep, static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) { - if (flags & SLAB_DMA) + if (flags & GFP_DMA) BUG_ON(!(cachep->gfpflags & GFP_DMA)); else BUG_ON(cachep->gfpflags & GFP_DMA); @@ -2721,7 +2721,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) * Be lazy and only check for valid flags here, keeping it out of the * critical path in kmem_cache_alloc(). */ - BUG_ON(flags & ~(SLAB_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW)); + BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW)); if (flags & __GFP_NO_GROW) return 0; -- cgit v1.2.3-70-g09d2 From e18b890bb0881bbab6f4f1a6cd20d9c60d66b003 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 6 Dec 2006 20:33:20 -0800 Subject: [PATCH] slab: remove kmem_cache_t Replace all uses of kmem_cache_t with struct kmem_cache. The patch was generated using the following script: #!/bin/sh # # Replace one string by another in all the kernel sources. # set -e for file in `find * -name "*.c" -o -name "*.h"|xargs grep -l $1`; do quilt add $file sed -e "1,\$s/$1/$2/g" $file >/tmp/$$ mv /tmp/$$ $file quilt refresh done The script was run like this sh replace kmem_cache_t "struct kmem_cache" Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DMA-API.txt | 4 ++-- arch/arm/mach-s3c2410/dma.c | 4 ++-- arch/arm26/mm/memc.c | 6 +++--- arch/frv/mm/pgalloc.c | 6 +++--- arch/i386/mm/init.c | 4 ++-- arch/i386/mm/pgtable.c | 6 +++--- arch/ia64/ia32/ia32_support.c | 2 +- arch/ia64/ia32/sys_ia32.c | 2 +- arch/powerpc/kernel/rtas_flash.c | 4 ++-- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/init_64.c | 6 +++--- arch/powerpc/platforms/cell/spufs/inode.c | 4 ++-- arch/sh/kernel/cpu/sh4/sq.c | 2 +- arch/sh/mm/pmb.c | 6 +++--- arch/sparc64/mm/init.c | 4 ++-- arch/sparc64/mm/tsb.c | 2 +- block/cfq-iosched.c | 4 ++-- block/ll_rw_blk.c | 6 +++--- drivers/block/aoe/aoeblk.c | 2 +- drivers/ieee1394/eth1394.c | 2 +- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-mpath.c | 2 +- drivers/md/dm-snap.c | 6 +++--- drivers/md/dm.c | 4 ++-- drivers/md/kcopyd.c | 2 +- drivers/md/raid5.c | 4 ++-- drivers/message/i2o/i2o_block.h | 2 +- drivers/pci/msi.c | 2 +- drivers/s390/block/dasd_devmap.c | 2 +- drivers/s390/block/dasd_int.h | 2 +- drivers/s390/scsi/zfcp_def.h | 6 +++--- drivers/scsi/aic94xx/aic94xx.h | 4 ++-- drivers/scsi/aic94xx/aic94xx_hwi.c | 2 +- drivers/scsi/aic94xx/aic94xx_init.c | 4 ++-- drivers/scsi/libsas/sas_init.c | 2 +- drivers/scsi/qla2xxx/qla_os.c | 2 +- drivers/scsi/qla4xxx/ql4_os.c | 2 +- drivers/scsi/scsi.c | 2 +- drivers/scsi/scsi_lib.c | 4 ++-- drivers/scsi/scsi_tgt_lib.c | 2 +- drivers/usb/host/hc_crisv10.c | 6 +++--- drivers/usb/host/uhci-hcd.c | 2 +- drivers/usb/mon/mon_text.c | 6 +++--- fs/adfs/super.c | 4 ++-- fs/affs/super.c | 4 ++-- fs/afs/super.c | 6 +++--- fs/aio.c | 4 ++-- fs/befs/linuxvfs.c | 4 ++-- fs/bfs/inode.c | 4 ++-- fs/bio.c | 4 ++-- fs/block_dev.c | 4 ++-- fs/buffer.c | 4 ++-- fs/cifs/cifsfs.c | 14 +++++++------- fs/cifs/transport.c | 2 +- fs/coda/inode.c | 4 ++-- fs/configfs/configfs_internal.h | 2 +- fs/configfs/mount.c | 2 +- fs/dcache.c | 6 +++--- fs/dcookies.c | 2 +- fs/dlm/memory.c | 2 +- fs/dnotify.c | 2 +- fs/dquot.c | 2 +- fs/ecryptfs/main.c | 2 +- fs/efs/super.c | 4 ++-- fs/eventpoll.c | 4 ++-- fs/ext2/super.c | 4 ++-- fs/ext3/super.c | 4 ++-- fs/ext4/super.c | 4 ++-- fs/fat/cache.c | 4 ++-- fs/fat/inode.c | 4 ++-- fs/fcntl.c | 2 +- fs/freevxfs/vxfs_inode.c | 2 +- fs/fuse/dev.c | 2 +- fs/fuse/inode.c | 4 ++-- fs/gfs2/main.c | 4 ++-- fs/gfs2/util.c | 6 +++--- fs/gfs2/util.h | 6 +++--- fs/hfs/super.c | 4 ++-- fs/hfsplus/super.c | 4 ++-- fs/hpfs/super.c | 4 ++-- fs/hugetlbfs/inode.c | 4 ++-- fs/inode.c | 4 ++-- fs/inotify_user.c | 4 ++-- fs/isofs/inode.c | 4 ++-- fs/jbd/journal.c | 6 +++--- fs/jbd/revoke.c | 4 ++-- fs/jbd2/journal.c | 6 +++--- fs/jbd2/revoke.c | 4 ++-- fs/jffs/inode-v23.c | 4 ++-- fs/jffs/jffs_fm.c | 4 ++-- fs/jffs2/malloc.c | 18 +++++++++--------- fs/jffs2/super.c | 4 ++-- fs/jfs/jfs_metapage.c | 4 ++-- fs/jfs/super.c | 4 ++-- fs/locks.c | 4 ++-- fs/mbcache.c | 2 +- fs/minix/inode.c | 4 ++-- fs/namespace.c | 2 +- fs/ncpfs/inode.c | 4 ++-- fs/nfs/direct.c | 2 +- fs/nfs/inode.c | 4 ++-- fs/nfs/pagelist.c | 2 +- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- fs/nfsd/nfs4state.c | 10 +++++----- fs/ocfs2/dlm/dlmfs.c | 4 ++-- fs/ocfs2/dlm/dlmmaster.c | 2 +- fs/ocfs2/extent_map.c | 2 +- fs/ocfs2/inode.h | 2 +- fs/ocfs2/super.c | 4 ++-- fs/ocfs2/uptodate.c | 2 +- fs/openpromfs/inode.c | 4 ++-- fs/proc/inode.c | 4 ++-- fs/qnx4/inode.c | 4 ++-- fs/reiserfs/super.c | 4 ++-- fs/romfs/inode.c | 4 ++-- fs/smbfs/inode.c | 4 ++-- fs/smbfs/request.c | 2 +- fs/sysfs/mount.c | 2 +- fs/sysfs/sysfs.h | 2 +- fs/sysv/inode.c | 4 ++-- fs/udf/super.c | 4 ++-- fs/ufs/super.c | 4 ++-- include/acpi/platform/aclinux.h | 2 +- include/asm-arm26/pgalloc.h | 2 +- include/asm-i386/pgtable.h | 10 +++++----- include/asm-powerpc/pgalloc.h | 2 +- include/asm-sparc64/pgalloc.h | 2 +- include/linux/delayacct.h | 2 +- include/linux/i2o.h | 2 +- include/linux/jbd.h | 2 +- include/linux/jbd2.h | 2 +- include/linux/raid/raid5.h | 2 +- include/linux/rmap.h | 2 +- include/linux/skbuff.h | 2 +- include/linux/taskstats_kern.h | 2 +- include/net/dst.h | 2 +- include/net/inet_hashtables.h | 6 +++--- include/net/neighbour.h | 2 +- include/net/netfilter/nf_conntrack_expect.h | 2 +- include/net/request_sock.h | 2 +- include/net/sock.h | 2 +- include/net/timewait_sock.h | 2 +- include/scsi/libsas.h | 4 ++-- ipc/mqueue.c | 4 ++-- kernel/delayacct.c | 2 +- kernel/fork.c | 16 ++++++++-------- kernel/pid.c | 2 +- kernel/posix-timers.c | 2 +- kernel/signal.c | 2 +- kernel/taskstats.c | 2 +- kernel/user.c | 2 +- lib/idr.c | 4 ++-- lib/radix-tree.c | 4 ++-- net/bridge/br_fdb.c | 2 +- net/core/flow.c | 2 +- net/core/skbuff.c | 8 ++++---- net/core/sock.c | 2 +- net/dccp/ackvec.c | 4 ++-- net/dccp/ccid.c | 6 +++--- net/dccp/ccid.h | 4 ++-- net/dccp/ccids/lib/loss_interval.h | 2 +- net/dccp/ccids/lib/packet_history.h | 4 ++-- net/decnet/dn_table.c | 2 +- net/ipv4/fib_hash.c | 4 ++-- net/ipv4/fib_trie.c | 2 +- net/ipv4/inet_hashtables.c | 4 ++-- net/ipv4/inetpeer.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/ipvs/ip_vs_conn.c | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 4 ++-- net/ipv6/ip6_fib.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/netfilter/nf_conntrack_core.c | 6 +++--- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/xt_hashlimit.c | 2 +- net/sctp/protocol.c | 4 ++-- net/sctp/sm_make_chunk.c | 2 +- net/sctp/socket.c | 2 +- net/socket.c | 4 ++-- net/sunrpc/rpc_pipe.c | 4 ++-- net/sunrpc/sched.c | 4 ++-- net/tipc/handler.c | 2 +- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- security/keys/key.c | 2 +- security/selinux/avc.c | 2 +- security/selinux/hooks.c | 2 +- security/selinux/ss/avtab.c | 2 +- 189 files changed, 332 insertions(+), 332 deletions(-) (limited to 'include') diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 05431621c86..8621a064f7e 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -77,7 +77,7 @@ To get this part of the dma_ API, you must #include Many drivers need lots of small dma-coherent memory regions for DMA descriptors or I/O buffers. Rather than allocating in units of a page or more using dma_alloc_coherent(), you can use DMA pools. These work -much like a kmem_cache_t, except that they use the dma-coherent allocator +much like a struct kmem_cache, except that they use the dma-coherent allocator not __get_free_pages(). Also, they understand common hardware constraints for alignment, like queue heads needing to be aligned on N byte boundaries. @@ -94,7 +94,7 @@ The pool create() routines initialize a pool of dma-coherent buffers for use with a given device. It must be called in a context which can sleep. -The "name" is for diagnostics (like a kmem_cache_t name); dev and size +The "name" is for diagnostics (like a struct kmem_cache name); dev and size are like what you'd pass to dma_alloc_coherent(). The device's hardware alignment requirement for this type of data is "align" (which is expressed in bytes, and must be a power of two). If your device has no boundary diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c index 3d211dc2f2f..01abb0ace23 100644 --- a/arch/arm/mach-s3c2410/dma.c +++ b/arch/arm/mach-s3c2410/dma.c @@ -40,7 +40,7 @@ /* io map for dma */ static void __iomem *dma_base; -static kmem_cache_t *dma_kmem; +static struct kmem_cache *dma_kmem; struct s3c24xx_dma_selection dma_sel; @@ -1271,7 +1271,7 @@ struct sysdev_class dma_sysclass = { /* kmem cache implementation */ -static void s3c2410_dma_cache_ctor(void *p, kmem_cache_t *c, unsigned long f) +static void s3c2410_dma_cache_ctor(void *p, struct kmem_cache *c, unsigned long f) { memset(p, 0, sizeof(struct s3c2410_dma_buf)); } diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c index 34def6397c3..f2901581d4d 100644 --- a/arch/arm26/mm/memc.c +++ b/arch/arm26/mm/memc.c @@ -24,7 +24,7 @@ #define MEMC_TABLE_SIZE (256*sizeof(unsigned long)) -kmem_cache_t *pte_cache, *pgd_cache; +struct kmem_cache *pte_cache, *pgd_cache; int page_nr; /* @@ -162,12 +162,12 @@ void __init create_memmap_holes(struct meminfo *mi) { } -static void pte_cache_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) +static void pte_cache_ctor(void *pte, struct kmem_cache *cache, unsigned long flags) { memzero(pte, sizeof(pte_t) * PTRS_PER_PTE); } -static void pgd_cache_ctor(void *pgd, kmem_cache_t *cache, unsigned long flags) +static void pgd_cache_ctor(void *pgd, struct kmem_cache *cache, unsigned long flags) { memzero(pgd + MEMC_TABLE_SIZE, USER_PTRS_PER_PGD * sizeof(pgd_t)); } diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index f76dd03ddd9..19b13be114a 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -18,7 +18,7 @@ #include pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE))); -kmem_cache_t *pgd_cache; +struct kmem_cache *pgd_cache; pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { @@ -100,7 +100,7 @@ static inline void pgd_list_del(pgd_t *pgd) set_page_private(next, (unsigned long) pprev); } -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) +void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) { unsigned long flags; @@ -120,7 +120,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) } /* never called when PTRS_PER_PMD > 1 */ -void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) +void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) { unsigned long flags; /* can be called from interrupt context */ diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 167416155ee..a6a8e397dd8 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -699,8 +699,8 @@ int remove_memory(u64 start, u64 size) #endif #endif -kmem_cache_t *pgd_cache; -kmem_cache_t *pmd_cache; +struct kmem_cache *pgd_cache; +struct kmem_cache *pmd_cache; void __init pgtable_cache_init(void) { diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 10126e3f817..33be236fc6a 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -193,7 +193,7 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) return pte; } -void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) +void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) { memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); } @@ -233,7 +233,7 @@ static inline void pgd_list_del(pgd_t *pgd) set_page_private(next, (unsigned long)pprev); } -void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) +void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) { unsigned long flags; @@ -253,7 +253,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) } /* never called when PTRS_PER_PMD > 1 */ -void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused) +void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) { unsigned long flags; /* can be called from interrupt context */ diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c index c187743965a..6af400a12ca 100644 --- a/arch/ia64/ia32/ia32_support.c +++ b/arch/ia64/ia32/ia32_support.c @@ -249,7 +249,7 @@ ia32_init (void) #if PAGE_SHIFT > IA32_PAGE_SHIFT { - extern kmem_cache_t *partial_page_cachep; + extern struct kmem_cache *partial_page_cachep; partial_page_cachep = kmem_cache_create("partial_page_cache", sizeof(struct partial_page), 0, 0, diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 9d6a3f21014..a4a6e1463af 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -254,7 +254,7 @@ mmap_subpage (struct file *file, unsigned long start, unsigned long end, int pro } /* SLAB cache for partial_page structures */ -kmem_cache_t *partial_page_cachep; +struct kmem_cache *partial_page_cachep; /* * init partial_page_list. diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index b9561d30051..7d0f13fecc0 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -101,7 +101,7 @@ struct flash_block_list_header { /* just the header of flash_block_list */ static struct flash_block_list_header rtas_firmware_flash_list = {0, NULL}; /* Use slab cache to guarantee 4k alignment */ -static kmem_cache_t *flash_block_cache = NULL; +static struct kmem_cache *flash_block_cache = NULL; #define FLASH_BLOCK_LIST_VERSION (1UL) @@ -286,7 +286,7 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf, } /* constructor for flash_block_cache */ -void rtas_block_ctor(void *ptr, kmem_cache_t *cache, unsigned long flags) +void rtas_block_ctor(void *ptr, struct kmem_cache *cache, unsigned long flags) { memset(ptr, 0, RTAS_BLK_SIZE); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 424a8f57e15..89c836d5480 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -1047,7 +1047,7 @@ repeat: return err; } -static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) { memset(addr, 0, kmem_cache_size(cache)); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 9a178549cbc..d12a87ec5ae 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -141,7 +141,7 @@ static int __init setup_kcore(void) } module_init(setup_kcore); -static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) { memset(addr, 0, kmem_cache_size(cache)); } @@ -166,9 +166,9 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { /* Hugepages need one extra cache, initialized in hugetlbpage.c. We * can't put into the tables above, because HPAGE_SHIFT is not compile * time constant. */ -kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; +struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; #else -kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; +struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; #endif void pgtable_cache_init(void) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7edfcc9d285..e3af9112c02 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -40,7 +40,7 @@ #include "spufs.h" -static kmem_cache_t *spufs_inode_cache; +static struct kmem_cache *spufs_inode_cache; char *isolated_loader; static struct inode * @@ -65,7 +65,7 @@ spufs_destroy_inode(struct inode *inode) } static void -spufs_init_once(void *p, kmem_cache_t * cachep, unsigned long flags) +spufs_init_once(void *p, struct kmem_cache * cachep, unsigned long flags) { struct spufs_inode_info *ei = p; diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 55f43506995..0c9ea38d2ca 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -38,7 +38,7 @@ struct sq_mapping { static struct sq_mapping *sq_mapping_list; static DEFINE_SPINLOCK(sq_mapping_lock); -static kmem_cache_t *sq_cache; +static struct kmem_cache *sq_cache; static unsigned long *sq_bitmap; #define store_queue_barrier() \ diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index 92e745341e4..b60ad83a763 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -30,7 +30,7 @@ #define NR_PMB_ENTRIES 16 -static kmem_cache_t *pmb_cache; +static struct kmem_cache *pmb_cache; static unsigned long pmb_map; static struct pmb_entry pmb_init_map[] = { @@ -283,7 +283,7 @@ void pmb_unmap(unsigned long addr) } while (pmbe); } -static void pmb_cache_ctor(void *pmb, kmem_cache_t *cachep, unsigned long flags) +static void pmb_cache_ctor(void *pmb, struct kmem_cache *cachep, unsigned long flags) { struct pmb_entry *pmbe = pmb; @@ -297,7 +297,7 @@ static void pmb_cache_ctor(void *pmb, kmem_cache_t *cachep, unsigned long flags) spin_unlock_irq(&pmb_list_lock); } -static void pmb_cache_dtor(void *pmb, kmem_cache_t *cachep, unsigned long flags) +static void pmb_cache_dtor(void *pmb, struct kmem_cache *cachep, unsigned long flags) { spin_lock_irq(&pmb_list_lock); pmb_list_del(pmb); diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 09cb7fccc03..a8e8802eed4 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -176,9 +176,9 @@ unsigned long sparc64_kern_sec_context __read_mostly; int bigkernel = 0; -kmem_cache_t *pgtable_cache __read_mostly; +struct kmem_cache *pgtable_cache __read_mostly; -static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) { clear_page(addr); } diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index beaa02810f0..236d02f41a0 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c @@ -239,7 +239,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign } } -static kmem_cache_t *tsb_caches[8] __read_mostly; +static struct kmem_cache *tsb_caches[8] __read_mostly; static const char *tsb_cache_names[8] = { "tsb_8KB", diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 84e9be07318..78c6b312bd3 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -43,8 +43,8 @@ static int cfq_slice_idle = HZ / 125; #define RQ_CIC(rq) ((struct cfq_io_context*)(rq)->elevator_private) #define RQ_CFQQ(rq) ((rq)->elevator_private2) -static kmem_cache_t *cfq_pool; -static kmem_cache_t *cfq_ioc_pool; +static struct kmem_cache *cfq_pool; +static struct kmem_cache *cfq_ioc_pool; static DEFINE_PER_CPU(unsigned long, ioc_count); static struct completion *ioc_gone; diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index cc6e95f8e5d..a4ff3271d4a 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -44,17 +44,17 @@ static struct io_context *current_io_context(gfp_t gfp_flags, int node); /* * For the allocated request tables */ -static kmem_cache_t *request_cachep; +static struct kmem_cache *request_cachep; /* * For queue allocation */ -static kmem_cache_t *requestq_cachep; +static struct kmem_cache *requestq_cachep; /* * For io context allocations */ -static kmem_cache_t *iocontext_cachep; +static struct kmem_cache *iocontext_cachep; /* * Controlling structure to kblockd diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index aa25f8b09fe..478489c568a 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -12,7 +12,7 @@ #include #include "aoe.h" -static kmem_cache_t *buf_pool_cache; +static struct kmem_cache *buf_pool_cache; static ssize_t aoedisk_show_state(struct gendisk * disk, char *page) { diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c index 31e5cc49d61..27d6c642415 100644 --- a/drivers/ieee1394/eth1394.c +++ b/drivers/ieee1394/eth1394.c @@ -133,7 +133,7 @@ struct eth1394_node_info { #define ETH1394_DRIVER_NAME "eth1394" static const char driver_name[] = ETH1394_DRIVER_NAME; -static kmem_cache_t *packet_task_cache; +static struct kmem_cache *packet_task_cache; static struct hpsb_highlevel eth1394_highlevel; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ed2d4ef27fd..c7bee4f2eed 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -101,7 +101,7 @@ struct crypt_config { #define MIN_POOL_PAGES 32 #define MIN_BIO_PAGES 8 -static kmem_cache_t *_crypt_io_pool; +static struct kmem_cache *_crypt_io_pool; /* * Different IV generation algorithms: diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e77ee6fd104..cf8bf052138 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -101,7 +101,7 @@ typedef int (*action_fn) (struct pgpath *pgpath); #define MIN_IOS 256 /* Mempool size */ -static kmem_cache_t *_mpio_cache; +static struct kmem_cache *_mpio_cache; struct workqueue_struct *kmultipathd; static void process_queued_ios(struct work_struct *work); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 91c7aa1fed0..b0ce2ce8227 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -88,8 +88,8 @@ struct pending_exception { * Hash table mapping origin volumes to lists of snapshots and * a lock to protect it */ -static kmem_cache_t *exception_cache; -static kmem_cache_t *pending_cache; +static struct kmem_cache *exception_cache; +static struct kmem_cache *pending_cache; static mempool_t *pending_pool; /* @@ -228,7 +228,7 @@ static int init_exception_table(struct exception_table *et, uint32_t size) return 0; } -static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) +static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem) { struct list_head *slot; struct exception *ex, *next; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index fc4f743f3b5..7ec1b112a6d 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -121,8 +121,8 @@ struct mapped_device { }; #define MIN_IOS 256 -static kmem_cache_t *_io_cache; -static kmem_cache_t *_tio_cache; +static struct kmem_cache *_io_cache; +static struct kmem_cache *_tio_cache; static int __init local_init(void) { diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index b3c01496c73..b46f6c575f7 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -203,7 +203,7 @@ struct kcopyd_job { /* FIXME: this should scale with the number of pages */ #define MIN_JOBS 512 -static kmem_cache_t *_job_cache; +static struct kmem_cache *_job_cache; static mempool_t *_job_pool; /* diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 69c3e201fa3..52914d5cec7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -348,7 +348,7 @@ static int grow_one_stripe(raid5_conf_t *conf) static int grow_stripes(raid5_conf_t *conf, int num) { - kmem_cache_t *sc; + struct kmem_cache *sc; int devs = conf->raid_disks; sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev)); @@ -397,7 +397,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) LIST_HEAD(newstripes); struct disk_info *ndisks; int err = 0; - kmem_cache_t *sc; + struct kmem_cache *sc; int i; if (newsize <= conf->pool_size) diff --git a/drivers/message/i2o/i2o_block.h b/drivers/message/i2o/i2o_block.h index d9fdc95b440..67f921b4419 100644 --- a/drivers/message/i2o/i2o_block.h +++ b/drivers/message/i2o/i2o_block.h @@ -64,7 +64,7 @@ /* I2O Block OSM mempool struct */ struct i2o_block_mempool { - kmem_cache_t *slab; + struct kmem_cache *slab; mempool_t *pool; }; diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 9fc9a34ef24..9168401401b 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -26,7 +26,7 @@ static DEFINE_SPINLOCK(msi_lock); static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL }; -static kmem_cache_t* msi_cachep; +static struct kmem_cache* msi_cachep; static int pci_msi_enable = 1; diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 17fdd8c9f74..cf28ccc5794 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -25,7 +25,7 @@ #include "dasd_int.h" -kmem_cache_t *dasd_page_cache; +struct kmem_cache *dasd_page_cache; EXPORT_SYMBOL_GPL(dasd_page_cache); /* diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 9f52004f6fc..dc5dd509434 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -474,7 +474,7 @@ extern struct dasd_profile_info_t dasd_global_profile; extern unsigned int dasd_profile_level; extern struct block_device_operations dasd_device_operations; -extern kmem_cache_t *dasd_page_cache; +extern struct kmem_cache *dasd_page_cache; struct dasd_ccw_req * dasd_kmalloc_request(char *, int, int, struct dasd_device *); diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 74c0eac083e..32933ed54b8 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -1032,9 +1032,9 @@ struct zfcp_data { wwn_t init_wwpn; fcp_lun_t init_fcp_lun; char *driver_version; - kmem_cache_t *fsf_req_qtcb_cache; - kmem_cache_t *sr_buffer_cache; - kmem_cache_t *gid_pn_cache; + struct kmem_cache *fsf_req_qtcb_cache; + struct kmem_cache *sr_buffer_cache; + struct kmem_cache *gid_pn_cache; }; /** diff --git a/drivers/scsi/aic94xx/aic94xx.h b/drivers/scsi/aic94xx/aic94xx.h index 71a031df7a3..32f513b1b78 100644 --- a/drivers/scsi/aic94xx/aic94xx.h +++ b/drivers/scsi/aic94xx/aic94xx.h @@ -56,8 +56,8 @@ /* 2*ITNL timeout + 1 second */ #define AIC94XX_SCB_TIMEOUT (5*HZ) -extern kmem_cache_t *asd_dma_token_cache; -extern kmem_cache_t *asd_ascb_cache; +extern struct kmem_cache *asd_dma_token_cache; +extern struct kmem_cache *asd_ascb_cache; extern char sas_addr_str[2*SAS_ADDR_SIZE + 1]; static inline void asd_stringify_sas_addr(char *p, const u8 *sas_addr) diff --git a/drivers/scsi/aic94xx/aic94xx_hwi.c b/drivers/scsi/aic94xx/aic94xx_hwi.c index af7e0113436..da94e126ca8 100644 --- a/drivers/scsi/aic94xx/aic94xx_hwi.c +++ b/drivers/scsi/aic94xx/aic94xx_hwi.c @@ -1047,7 +1047,7 @@ irqreturn_t asd_hw_isr(int irq, void *dev_id) static inline struct asd_ascb *asd_ascb_alloc(struct asd_ha_struct *asd_ha, gfp_t gfp_flags) { - extern kmem_cache_t *asd_ascb_cache; + extern struct kmem_cache *asd_ascb_cache; struct asd_seq_data *seq = &asd_ha->seq; struct asd_ascb *ascb; unsigned long flags; diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c index 42302ef05ee..fbc82b00a41 100644 --- a/drivers/scsi/aic94xx/aic94xx_init.c +++ b/drivers/scsi/aic94xx/aic94xx_init.c @@ -450,8 +450,8 @@ static inline void asd_destroy_ha_caches(struct asd_ha_struct *asd_ha) asd_ha->scb_pool = NULL; } -kmem_cache_t *asd_dma_token_cache; -kmem_cache_t *asd_ascb_cache; +struct kmem_cache *asd_dma_token_cache; +struct kmem_cache *asd_ascb_cache; static int asd_create_global_caches(void) { diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index d65bc4e0f21..2f0c07fc3f4 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -36,7 +36,7 @@ #include "../scsi_sas_internal.h" -kmem_cache_t *sas_task_cache; +struct kmem_cache *sas_task_cache; /*------------ SAS addr hash -----------*/ void sas_hash_addr(u8 *hashed, const u8 *sas_addr) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index cbe0cad83b6..d03523d3bf3 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -24,7 +24,7 @@ char qla2x00_version_str[40]; /* * SRB allocation cache */ -static kmem_cache_t *srb_cachep; +static struct kmem_cache *srb_cachep; /* * Ioctl related information. diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 969c9e43102..9ef693c8809 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -19,7 +19,7 @@ char qla4xxx_version_str[40]; /* * SRB allocation cache */ -static kmem_cache_t *srb_cachep; +static struct kmem_cache *srb_cachep; /* * Module parameter information and variables diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index fafc00deaad..24cffd98ee6 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -136,7 +136,7 @@ const char * scsi_device_type(unsigned type) EXPORT_SYMBOL(scsi_device_type); struct scsi_host_cmd_pool { - kmem_cache_t *slab; + struct kmem_cache *slab; unsigned int users; char *name; unsigned int slab_flags; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index fb616c69151..1748e27501c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -36,7 +36,7 @@ struct scsi_host_sg_pool { size_t size; char *name; - kmem_cache_t *slab; + struct kmem_cache *slab; mempool_t *pool; }; @@ -241,7 +241,7 @@ struct scsi_io_context { char sense[SCSI_SENSE_BUFFERSIZE]; }; -static kmem_cache_t *scsi_io_context_cache; +static struct kmem_cache *scsi_io_context_cache; static void scsi_end_async(struct request *req, int uptodate) { diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index 386dbae17b4..d402aff5f31 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -33,7 +33,7 @@ #include "scsi_tgt_priv.h" static struct workqueue_struct *scsi_tgtd; -static kmem_cache_t *scsi_tgt_cmd_cache; +static struct kmem_cache *scsi_tgt_cmd_cache; /* * TODO: this struct will be killed when the block layer supports large bios diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c index 7fd872aa654..9325e46a68c 100644 --- a/drivers/usb/host/hc_crisv10.c +++ b/drivers/usb/host/hc_crisv10.c @@ -275,13 +275,13 @@ static volatile USB_SB_Desc_t TxIntrSB_zout __attribute__ ((aligned (4))); static int zout_buffer[4] __attribute__ ((aligned (4))); /* Cache for allocating new EP and SB descriptors. */ -static kmem_cache_t *usb_desc_cache; +static struct kmem_cache *usb_desc_cache; /* Cache for the registers allocated in the top half. */ -static kmem_cache_t *top_half_reg_cache; +static struct kmem_cache *top_half_reg_cache; /* Cache for the data allocated in the isoc descr top half. */ -static kmem_cache_t *isoc_compl_cache; +static struct kmem_cache *isoc_compl_cache; static struct usb_bus *etrax_usb_bus; diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c index 226bf3de8ed..e87692c31be 100644 --- a/drivers/usb/host/uhci-hcd.c +++ b/drivers/usb/host/uhci-hcd.c @@ -81,7 +81,7 @@ MODULE_PARM_DESC(debug, "Debug level"); static char *errbuf; #define ERRBUF_LEN (32 * 1024) -static kmem_cache_t *uhci_up_cachep; /* urb_priv */ +static struct kmem_cache *uhci_up_cachep; /* urb_priv */ static void suspend_rh(struct uhci_hcd *uhci, enum uhci_rh_state new_state); static void wakeup_rh(struct uhci_hcd *uhci); diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index 46aecc8754f..05cf2c9a8f8 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -50,7 +50,7 @@ struct mon_event_text { #define SLAB_NAME_SZ 30 struct mon_reader_text { - kmem_cache_t *e_slab; + struct kmem_cache *e_slab; int nevents; struct list_head e_list; struct mon_reader r; /* In C, parent class can be placed anywhere */ @@ -63,7 +63,7 @@ struct mon_reader_text { char slab_name[SLAB_NAME_SZ]; }; -static void mon_text_ctor(void *, kmem_cache_t *, unsigned long); +static void mon_text_ctor(void *, struct kmem_cache *, unsigned long); /* * mon_text_submit @@ -450,7 +450,7 @@ const struct file_operations mon_fops_text = { /* * Slab interface: constructor. */ -static void mon_text_ctor(void *mem, kmem_cache_t *slab, unsigned long sflags) +static void mon_text_ctor(void *mem, struct kmem_cache *slab, unsigned long sflags) { /* * Nothing to initialize. No, really! diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 52eb10ca654..e5a205c9f42 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -212,7 +212,7 @@ static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static kmem_cache_t *adfs_inode_cachep; +static struct kmem_cache *adfs_inode_cachep; static struct inode *adfs_alloc_inode(struct super_block *sb) { @@ -228,7 +228,7 @@ static void adfs_destroy_inode(struct inode *inode) kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; diff --git a/fs/affs/super.c b/fs/affs/super.c index 81c73ec09f6..3de93e79994 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -66,7 +66,7 @@ affs_write_super(struct super_block *sb) pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); } -static kmem_cache_t * affs_inode_cachep; +static struct kmem_cache * affs_inode_cachep; static struct inode *affs_alloc_inode(struct super_block *sb) { @@ -83,7 +83,7 @@ static void affs_destroy_inode(struct inode *inode) kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct affs_inode_info *ei = (struct affs_inode_info *) foo; diff --git a/fs/afs/super.c b/fs/afs/super.c index c6ead009bf7..9a351c4c756 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -35,7 +35,7 @@ struct afs_mount_params { struct afs_volume *volume; }; -static void afs_i_init_once(void *foo, kmem_cache_t *cachep, +static void afs_i_init_once(void *foo, struct kmem_cache *cachep, unsigned long flags); static int afs_get_sb(struct file_system_type *fs_type, @@ -65,7 +65,7 @@ static struct super_operations afs_super_ops = { .put_super = afs_put_super, }; -static kmem_cache_t *afs_inode_cachep; +static struct kmem_cache *afs_inode_cachep; static atomic_t afs_count_active_inodes; /*****************************************************************************/ @@ -384,7 +384,7 @@ static void afs_put_super(struct super_block *sb) /* * initialise an inode cache slab element prior to any use */ -static void afs_i_init_once(void *_vnode, kmem_cache_t *cachep, +static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep, unsigned long flags) { struct afs_vnode *vnode = (struct afs_vnode *) _vnode; diff --git a/fs/aio.c b/fs/aio.c index 287a1bc7a18..13aa9298abf 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -47,8 +47,8 @@ unsigned long aio_nr; /* current system wide number of aio requests */ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ /*----end sysctl variables---*/ -static kmem_cache_t *kiocb_cachep; -static kmem_cache_t *kioctx_cachep; +static struct kmem_cache *kiocb_cachep; +static struct kmem_cache *kioctx_cachep; static struct workqueue_struct *aio_wq; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 995348df94a..bce402eee55 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -61,7 +61,7 @@ static const struct super_operations befs_sops = { }; /* slab cache for befs_inode_info objects */ -static kmem_cache_t *befs_inode_cachep; +static struct kmem_cache *befs_inode_cachep; static const struct file_operations befs_dir_operations = { .read = generic_read_dir, @@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode) kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 2e45123c8f7..eac175ed9f4 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -228,7 +228,7 @@ static void bfs_write_super(struct super_block *s) unlock_kernel(); } -static kmem_cache_t * bfs_inode_cachep; +static struct kmem_cache * bfs_inode_cachep; static struct inode *bfs_alloc_inode(struct super_block *sb) { @@ -244,7 +244,7 @@ static void bfs_destroy_inode(struct inode *inode) kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct bfs_inode_info *bi = foo; diff --git a/fs/bio.c b/fs/bio.c index 50c40ce2cea..7ec737eda72 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -30,7 +30,7 @@ #define BIO_POOL_SIZE 256 -static kmem_cache_t *bio_slab __read_mostly; +static struct kmem_cache *bio_slab __read_mostly; #define BIOVEC_NR_POOLS 6 @@ -44,7 +44,7 @@ mempool_t *bio_split_pool __read_mostly; struct biovec_slab { int nr_vecs; char *name; - kmem_cache_t *slab; + struct kmem_cache *slab; }; /* diff --git a/fs/block_dev.c b/fs/block_dev.c index 063506705f2..13816b4d76f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -235,7 +235,7 @@ static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); -static kmem_cache_t * bdev_cachep __read_mostly; +static struct kmem_cache * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) { @@ -253,7 +253,7 @@ static void bdev_destroy_inode(struct inode *inode) kmem_cache_free(bdev_cachep, bdi); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; diff --git a/fs/buffer.c b/fs/buffer.c index 35527dca1db..a8ca0ac2148 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2908,7 +2908,7 @@ asmlinkage long sys_bdflush(int func, long data) /* * Buffer-head allocation */ -static kmem_cache_t *bh_cachep; +static struct kmem_cache *bh_cachep; /* * Once the number of bh's in the machine exceeds this level, we start @@ -2961,7 +2961,7 @@ void free_buffer_head(struct buffer_head *bh) EXPORT_SYMBOL(free_buffer_head); static void -init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags) +init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags) { if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 84168629cea..e6b5866e500 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -81,7 +81,7 @@ extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; -extern kmem_cache_t *cifs_oplock_cachep; +extern struct kmem_cache *cifs_oplock_cachep; static int cifs_read_super(struct super_block *sb, void *data, @@ -232,11 +232,11 @@ static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd) return generic_permission(inode, mask, NULL); } -static kmem_cache_t *cifs_inode_cachep; -static kmem_cache_t *cifs_req_cachep; -static kmem_cache_t *cifs_mid_cachep; -kmem_cache_t *cifs_oplock_cachep; -static kmem_cache_t *cifs_sm_req_cachep; +static struct kmem_cache *cifs_inode_cachep; +static struct kmem_cache *cifs_req_cachep; +static struct kmem_cache *cifs_mid_cachep; +struct kmem_cache *cifs_oplock_cachep; +static struct kmem_cache *cifs_sm_req_cachep; mempool_t *cifs_sm_req_poolp; mempool_t *cifs_req_poolp; mempool_t *cifs_mid_poolp; @@ -668,7 +668,7 @@ const struct file_operations cifs_dir_ops = { }; static void -cifs_init_once(void *inode, kmem_cache_t * cachep, unsigned long flags) +cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags) { struct cifsInodeInfo *cifsi = inode; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 1f727765a8e..f80007eaebf 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -34,7 +34,7 @@ #include "cifs_debug.h" extern mempool_t *cifs_mid_poolp; -extern kmem_cache_t *cifs_oplock_cachep; +extern struct kmem_cache *cifs_oplock_cachep; static struct mid_q_entry * AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 50cedd2617d..b64659fa82d 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -38,7 +38,7 @@ static void coda_clear_inode(struct inode *); static void coda_put_super(struct super_block *); static int coda_statfs(struct dentry *dentry, struct kstatfs *buf); -static kmem_cache_t * coda_inode_cachep; +static struct kmem_cache * coda_inode_cachep; static struct inode *coda_alloc_inode(struct super_block *sb) { @@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode) kmem_cache_free(coda_inode_cachep, ITOC(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct coda_inode_info *ei = (struct coda_inode_info *) foo; diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 3f4ff7a242b..f92cd303d2c 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -49,7 +49,7 @@ struct configfs_dirent { #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) extern struct vfsmount * configfs_mount; -extern kmem_cache_t *configfs_dir_cachep; +extern struct kmem_cache *configfs_dir_cachep; extern int configfs_is_root(struct config_item *item); diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 68bd5c93ca5..ed678529ebb 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -38,7 +38,7 @@ struct vfsmount * configfs_mount = NULL; struct super_block * configfs_sb = NULL; -kmem_cache_t *configfs_dir_cachep; +struct kmem_cache *configfs_dir_cachep; static int configfs_mnt_count = 0; static struct super_operations configfs_ops = { diff --git a/fs/dcache.c b/fs/dcache.c index fd4a428998e..a7c67cee5d8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -43,7 +43,7 @@ static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(dcache_lock); -static kmem_cache_t *dentry_cache __read_mostly; +static struct kmem_cache *dentry_cache __read_mostly; #define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname)) @@ -2072,10 +2072,10 @@ static void __init dcache_init(unsigned long mempages) } /* SLAB cache for __getname() consumers */ -kmem_cache_t *names_cachep __read_mostly; +struct kmem_cache *names_cachep __read_mostly; /* SLAB cache for file structures */ -kmem_cache_t *filp_cachep __read_mostly; +struct kmem_cache *filp_cachep __read_mostly; EXPORT_SYMBOL(d_genocide); diff --git a/fs/dcookies.c b/fs/dcookies.c index 0c4b0674854..21af1629f9b 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -37,7 +37,7 @@ struct dcookie_struct { static LIST_HEAD(dcookie_users); static DEFINE_MUTEX(dcookie_mutex); -static kmem_cache_t *dcookie_cache __read_mostly; +static struct kmem_cache *dcookie_cache __read_mostly; static struct list_head *dcookie_hashtable __read_mostly; static size_t hash_size __read_mostly; diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index 989b608fd83..5352b03ff5a 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -15,7 +15,7 @@ #include "config.h" #include "memory.h" -static kmem_cache_t *lkb_cache; +static struct kmem_cache *lkb_cache; int dlm_memory_init(void) diff --git a/fs/dnotify.c b/fs/dnotify.c index e778b1737b7..1f26a2b9eee 100644 --- a/fs/dnotify.c +++ b/fs/dnotify.c @@ -23,7 +23,7 @@ int dir_notify_enable __read_mostly = 1; -static kmem_cache_t *dn_cache __read_mostly; +static struct kmem_cache *dn_cache __read_mostly; static void redo_inode_mask(struct inode *inode) { diff --git a/fs/dquot.c b/fs/dquot.c index c6ae6c0e0cf..f9cd5e23ebd 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -131,7 +131,7 @@ static struct quota_format_type *quota_formats; /* List of registered formats */ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; /* SLAB cache for dquot structures */ -static kmem_cache_t *dquot_cachep; +static struct kmem_cache *dquot_cachep; int register_quota_format(struct quota_format_type *fmt) { diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index a2c6ccbce30..306f8fbd1a0 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -546,7 +546,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags) } static struct ecryptfs_cache_info { - kmem_cache_t **cache; + struct kmem_cache **cache; const char *name; size_t size; void (*ctor)(void*, struct kmem_cache *, unsigned long); diff --git a/fs/efs/super.c b/fs/efs/super.c index 69b15a996cf..dfebf21289f 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -52,7 +52,7 @@ static struct pt_types sgi_pt_types[] = { }; -static kmem_cache_t * efs_inode_cachep; +static struct kmem_cache * efs_inode_cachep; static struct inode *efs_alloc_inode(struct super_block *sb) { @@ -68,7 +68,7 @@ static void efs_destroy_inode(struct inode *inode) kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct efs_inode_info *ei = (struct efs_inode_info *) foo; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index f5c88435c6b..88a6f8d0b88 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -283,10 +283,10 @@ static struct mutex epmutex; static struct poll_safewake psw; /* Slab cache used to allocate "struct epitem" */ -static kmem_cache_t *epi_cache __read_mostly; +static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ -static kmem_cache_t *pwq_cache __read_mostly; +static struct kmem_cache *pwq_cache __read_mostly; /* Virtual fs used to allocate inodes for eventpoll files */ static struct vfsmount *eventpoll_mnt __read_mostly; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 85c237e7385..3aafb1dd917 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -135,7 +135,7 @@ static void ext2_put_super (struct super_block * sb) return; } -static kmem_cache_t * ext2_inode_cachep; +static struct kmem_cache * ext2_inode_cachep; static struct inode *ext2_alloc_inode(struct super_block *sb) { @@ -156,7 +156,7 @@ static void ext2_destroy_inode(struct inode *inode) kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 0cf633f0cfa..9856565d4dd 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -436,7 +436,7 @@ static void ext3_put_super (struct super_block * sb) return; } -static kmem_cache_t *ext3_inode_cachep; +static struct kmem_cache *ext3_inode_cachep; /* * Called inside transaction, so use GFP_NOFS @@ -462,7 +462,7 @@ static void ext3_destroy_inode(struct inode *inode) kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c730cbc8403..f2e8c4aa0fd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -486,7 +486,7 @@ static void ext4_put_super (struct super_block * sb) return; } -static kmem_cache_t *ext4_inode_cachep; +static struct kmem_cache *ext4_inode_cachep; /* * Called inside transaction, so use GFP_NOFS @@ -513,7 +513,7 @@ static void ext4_destroy_inode(struct inode *inode) kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 8c272278455..05c2941c74f 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -34,9 +34,9 @@ static inline int fat_max_cache(struct inode *inode) return FAT_MAX_CACHE; } -static kmem_cache_t *fat_cache_cachep; +static struct kmem_cache *fat_cache_cachep; -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct fat_cache *cache = (struct fat_cache *)foo; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index b58fd0c9f3c..a9e4688582a 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -477,7 +477,7 @@ static void fat_put_super(struct super_block *sb) kfree(sbi); } -static kmem_cache_t *fat_inode_cachep; +static struct kmem_cache *fat_inode_cachep; static struct inode *fat_alloc_inode(struct super_block *sb) { @@ -493,7 +493,7 @@ static void fat_destroy_inode(struct inode *inode) kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; diff --git a/fs/fcntl.c b/fs/fcntl.c index c03dc9cb21c..4740d35e52c 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -553,7 +553,7 @@ int send_sigurg(struct fown_struct *fown) } static DEFINE_RWLOCK(fasync_lock); -static kmem_cache_t *fasync_cache __read_mostly; +static struct kmem_cache *fasync_cache __read_mostly; /* * fasync_helper() is used by some character device drivers (mainly mice) diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index d2dd0d70007..0b7ae897cb7 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -46,7 +46,7 @@ extern const struct address_space_operations vxfs_immed_aops; extern struct inode_operations vxfs_immed_symlink_iops; -kmem_cache_t *vxfs_inode_cachep; +struct kmem_cache *vxfs_inode_cachep; #ifdef DIAGNOSTIC diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8c15139f275..357764d85ff 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -19,7 +19,7 @@ MODULE_ALIAS_MISCDEV(FUSE_MINOR); -static kmem_cache_t *fuse_req_cachep; +static struct kmem_cache *fuse_req_cachep; static struct fuse_conn *fuse_get_conn(struct file *file) { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e039e2047cc..2bdc652b8b4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -22,7 +22,7 @@ MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Filesystem in Userspace"); MODULE_LICENSE("GPL"); -static kmem_cache_t *fuse_inode_cachep; +static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); @@ -601,7 +601,7 @@ static struct file_system_type fuse_fs_type = { static decl_subsys(fuse, NULL, NULL); static decl_subsys(connections, NULL, NULL); -static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep, +static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct inode * inode = foo; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 9889c1eacec..7c1a9e22a52 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -25,7 +25,7 @@ #include "util.h" #include "glock.h" -static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_inode *ip = foo; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == @@ -37,7 +37,7 @@ static void gfs2_init_inode_once(void *foo, kmem_cache_t *cachep, unsigned long } } -static void gfs2_init_glock_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct gfs2_glock *gl = foo; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 196c604faad..e5707a9f78c 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -23,9 +23,9 @@ #include "lm.h" #include "util.h" -kmem_cache_t *gfs2_glock_cachep __read_mostly; -kmem_cache_t *gfs2_inode_cachep __read_mostly; -kmem_cache_t *gfs2_bufdata_cachep __read_mostly; +struct kmem_cache *gfs2_glock_cachep __read_mostly; +struct kmem_cache *gfs2_inode_cachep __read_mostly; +struct kmem_cache *gfs2_bufdata_cachep __read_mostly; void gfs2_assert_i(struct gfs2_sbd *sdp) { diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 76a50899fe9..7984dcf89ad 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -146,9 +146,9 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); -extern kmem_cache_t *gfs2_glock_cachep; -extern kmem_cache_t *gfs2_inode_cachep; -extern kmem_cache_t *gfs2_bufdata_cachep; +extern struct kmem_cache *gfs2_glock_cachep; +extern struct kmem_cache *gfs2_inode_cachep; +extern struct kmem_cache *gfs2_bufdata_cachep; static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, unsigned int *p) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ffc6409132c..a3698796600 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -24,7 +24,7 @@ #include "hfs_fs.h" #include "btree.h" -static kmem_cache_t *hfs_inode_cachep; +static struct kmem_cache *hfs_inode_cachep; MODULE_LICENSE("GPL"); @@ -430,7 +430,7 @@ static struct file_system_type hfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void hfs_init_once(void *p, kmem_cache_t *cachep, unsigned long flags) +static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flags) { struct hfs_inode_info *i = p; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 4a0c70c76c8..0f513c6bf84 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -434,7 +434,7 @@ MODULE_AUTHOR("Brad Boyer"); MODULE_DESCRIPTION("Extended Macintosh Filesystem"); MODULE_LICENSE("GPL"); -static kmem_cache_t *hfsplus_inode_cachep; +static struct kmem_cache *hfsplus_inode_cachep; static struct inode *hfsplus_alloc_inode(struct super_block *sb) { @@ -467,7 +467,7 @@ static struct file_system_type hfsplus_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void hfsplus_init_once(void *p, kmem_cache_t *cachep, unsigned long flags) +static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long flags) { struct hfsplus_inode_info *i = p; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 46ceadd6f16..34d68e21171 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -160,7 +160,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static kmem_cache_t * hpfs_inode_cachep; +static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) { @@ -177,7 +177,7 @@ static void hpfs_destroy_inode(struct inode *inode) kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 36e52173a54..0706f5aac6a 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -513,7 +513,7 @@ static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) } -static kmem_cache_t *hugetlbfs_inode_cachep; +static struct kmem_cache *hugetlbfs_inode_cachep; static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) { @@ -545,7 +545,7 @@ static const struct address_space_operations hugetlbfs_aops = { }; -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; diff --git a/fs/inode.c b/fs/inode.c index dd15984d51a..699aa4f7aa7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -97,7 +97,7 @@ static DEFINE_MUTEX(iprune_mutex); */ struct inodes_stat_t inodes_stat; -static kmem_cache_t * inode_cachep __read_mostly; +static struct kmem_cache * inode_cachep __read_mostly; static struct inode *alloc_inode(struct super_block *sb) { @@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode) EXPORT_SYMBOL(inode_init_once); -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct inode * inode = (struct inode *) foo; diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 017cb0f134d..e1956e6f116 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -34,8 +34,8 @@ #include -static kmem_cache_t *watch_cachep __read_mostly; -static kmem_cache_t *event_cachep __read_mostly; +static struct kmem_cache *watch_cachep __read_mostly; +static struct kmem_cache *event_cachep __read_mostly; static struct vfsmount *inotify_mnt __read_mostly; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4b6381cd2cf..ea55b6c469e 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -57,7 +57,7 @@ static void isofs_put_super(struct super_block *sb) static void isofs_read_inode(struct inode *); static int isofs_statfs (struct dentry *, struct kstatfs *); -static kmem_cache_t *isofs_inode_cachep; +static struct kmem_cache *isofs_inode_cachep; static struct inode *isofs_alloc_inode(struct super_block *sb) { @@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode) kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } -static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct iso_inode_info *ei = foo; diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index b85c686b60d..a8774bed20b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1630,7 +1630,7 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) #define JBD_MAX_SLABS 5 #define JBD_SLAB_INDEX(size) (size >> 11) -static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; +static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; static const char *jbd_slab_names[JBD_MAX_SLABS] = { "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" }; @@ -1693,7 +1693,7 @@ void jbd_slab_free(void *ptr, size_t size) /* * Journal_head storage management */ -static kmem_cache_t *journal_head_cache; +static struct kmem_cache *journal_head_cache; #ifdef CONFIG_JBD_DEBUG static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif @@ -1996,7 +1996,7 @@ static void __exit remove_jbd_proc_entry(void) #endif -kmem_cache_t *jbd_handle_cache; +struct kmem_cache *jbd_handle_cache; static int __init journal_init_handle_cache(void) { diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index c532429d8d9..d204ab394f3 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -70,8 +70,8 @@ #include #endif -static kmem_cache_t *revoke_record_cache; -static kmem_cache_t *revoke_table_cache; +static struct kmem_cache *revoke_record_cache; +static struct kmem_cache *revoke_table_cache; /* Each revoke record represents one single revoked block. During journal replay, this involves recording the transaction ID of the diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c60f378b0f7..50356019ae3 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1641,7 +1641,7 @@ void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry) #define JBD_MAX_SLABS 5 #define JBD_SLAB_INDEX(size) (size >> 11) -static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; +static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; static const char *jbd_slab_names[JBD_MAX_SLABS] = { "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k" }; @@ -1704,7 +1704,7 @@ void jbd2_slab_free(void *ptr, size_t size) /* * Journal_head storage management */ -static kmem_cache_t *jbd2_journal_head_cache; +static struct kmem_cache *jbd2_journal_head_cache; #ifdef CONFIG_JBD_DEBUG static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif @@ -2007,7 +2007,7 @@ static void __exit jbd2_remove_jbd_proc_entry(void) #endif -kmem_cache_t *jbd2_handle_cache; +struct kmem_cache *jbd2_handle_cache; static int __init journal_init_handle_cache(void) { diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 380d19917f3..f506646ad0f 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -70,8 +70,8 @@ #include #endif -static kmem_cache_t *jbd2_revoke_record_cache; -static kmem_cache_t *jbd2_revoke_table_cache; +static struct kmem_cache *jbd2_revoke_record_cache; +static struct kmem_cache *jbd2_revoke_table_cache; /* Each revoke record represents one single revoked block. During journal replay, this involves recording the transaction ID of the diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 3f7899ea4cb..9f15bce9202 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -61,8 +61,8 @@ static const struct file_operations jffs_dir_operations; static struct inode_operations jffs_dir_inode_operations; static const struct address_space_operations jffs_address_operations; -kmem_cache_t *node_cache = NULL; -kmem_cache_t *fm_cache = NULL; +struct kmem_cache *node_cache = NULL; +struct kmem_cache *fm_cache = NULL; /* Called by the VFS at mount time to initialize the whole file system. */ static int jffs_fill_super(struct super_block *sb, void *data, int silent) diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c index 29b68d939bd..077258b2103 100644 --- a/fs/jffs/jffs_fm.c +++ b/fs/jffs/jffs_fm.c @@ -29,8 +29,8 @@ static int jffs_mark_obsolete(struct jffs_fmcontrol *fmc, __u32 fm_offset); static struct jffs_fm *jffs_alloc_fm(void); static void jffs_free_fm(struct jffs_fm *n); -extern kmem_cache_t *fm_cache; -extern kmem_cache_t *node_cache; +extern struct kmem_cache *fm_cache; +extern struct kmem_cache *node_cache; #if CONFIG_JFFS_FS_VERBOSE > 0 void diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 33f29100501..83f9881ec4c 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c @@ -19,16 +19,16 @@ /* These are initialised to NULL in the kernel startup code. If you're porting to other operating systems, beware */ -static kmem_cache_t *full_dnode_slab; -static kmem_cache_t *raw_dirent_slab; -static kmem_cache_t *raw_inode_slab; -static kmem_cache_t *tmp_dnode_info_slab; -static kmem_cache_t *raw_node_ref_slab; -static kmem_cache_t *node_frag_slab; -static kmem_cache_t *inode_cache_slab; +static struct kmem_cache *full_dnode_slab; +static struct kmem_cache *raw_dirent_slab; +static struct kmem_cache *raw_inode_slab; +static struct kmem_cache *tmp_dnode_info_slab; +static struct kmem_cache *raw_node_ref_slab; +static struct kmem_cache *node_frag_slab; +static struct kmem_cache *inode_cache_slab; #ifdef CONFIG_JFFS2_FS_XATTR -static kmem_cache_t *xattr_datum_cache; -static kmem_cache_t *xattr_ref_cache; +static struct kmem_cache *xattr_datum_cache; +static struct kmem_cache *xattr_ref_cache; #endif int __init jffs2_create_slab_caches(void) diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 77be534ce42..7deb7825402 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -28,7 +28,7 @@ static void jffs2_put_super(struct super_block *); -static kmem_cache_t *jffs2_inode_cachep; +static struct kmem_cache *jffs2_inode_cachep; static struct inode *jffs2_alloc_inode(struct super_block *sb) { @@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode) kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); } -static void jffs2_i_init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 0cccd1c39d7..b1a1c729601 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -74,7 +74,7 @@ static inline void lock_metapage(struct metapage *mp) } #define METAPOOL_MIN_PAGES 32 -static kmem_cache_t *metapage_cache; +static struct kmem_cache *metapage_cache; static mempool_t *metapage_mempool; #define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE) @@ -180,7 +180,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp) #endif -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct metapage *mp = (struct metapage *)foo; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 9c1c6e0e633..ca3e1912853 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -44,7 +44,7 @@ MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); MODULE_LICENSE("GPL"); -static kmem_cache_t * jfs_inode_cachep; +static struct kmem_cache * jfs_inode_cachep; static struct super_operations jfs_super_operations; static struct export_operations jfs_export_operations; @@ -748,7 +748,7 @@ static struct file_system_type jfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; diff --git a/fs/locks.c b/fs/locks.c index a7b97d50c1e..1cb0c57fedb 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -142,7 +142,7 @@ int lease_break_time = 45; static LIST_HEAD(file_lock_list); static LIST_HEAD(blocked_list); -static kmem_cache_t *filelock_cache __read_mostly; +static struct kmem_cache *filelock_cache __read_mostly; /* Allocate an empty lock structure. */ static struct file_lock *locks_alloc_lock(void) @@ -199,7 +199,7 @@ EXPORT_SYMBOL(locks_init_lock); * Initialises the fields of the file lock which are invariant for * free file_locks. */ -static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags) { struct file_lock *lock = (struct file_lock *) foo; diff --git a/fs/mbcache.c b/fs/mbcache.c index 0ff71256e65..deeb9dc062d 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -85,7 +85,7 @@ struct mb_cache { #ifndef MB_CACHE_INDEXES_COUNT int c_indexes_count; #endif - kmem_cache_t *c_entry_cache; + struct kmem_cache *c_entry_cache; struct list_head *c_block_hash; struct list_head *c_indexes_hash[0]; }; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index ce532c2deda..629e09b38c5 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -51,7 +51,7 @@ static void minix_put_super(struct super_block *sb) return; } -static kmem_cache_t * minix_inode_cachep; +static struct kmem_cache * minix_inode_cachep; static struct inode *minix_alloc_inode(struct super_block *sb) { @@ -67,7 +67,7 @@ static void minix_destroy_inode(struct inode *inode) kmem_cache_free(minix_inode_cachep, minix_i(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct minix_inode_info *ei = (struct minix_inode_info *) foo; diff --git a/fs/namespace.c b/fs/namespace.c index 55442a6cf22..b00ac84ebbd 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -36,7 +36,7 @@ static int event; static struct list_head *mount_hashtable __read_mostly; static int hash_mask __read_mostly, hash_bits __read_mostly; -static kmem_cache_t *mnt_cache __read_mostly; +static struct kmem_cache *mnt_cache __read_mostly; static struct rw_semaphore namespace_sem; /* /sys/fs */ diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index ed84d899220..fae53243bb9 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -40,7 +40,7 @@ static void ncp_delete_inode(struct inode *); static void ncp_put_super(struct super_block *); static int ncp_statfs(struct dentry *, struct kstatfs *); -static kmem_cache_t * ncp_inode_cachep; +static struct kmem_cache * ncp_inode_cachep; static struct inode *ncp_alloc_inode(struct super_block *sb) { @@ -56,7 +56,7 @@ static void ncp_destroy_inode(struct inode *inode) kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 769fd0a0c77..2f488e1d9b6 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -58,7 +58,7 @@ #define NFSDBG_FACILITY NFSDBG_VFS -static kmem_cache_t *nfs_direct_cachep; +static struct kmem_cache *nfs_direct_cachep; /* * This represents a set of asynchronous requests that we're waiting on diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6b53aae4ed2..15afa460e62 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -55,7 +55,7 @@ static int nfs_update_inode(struct inode *, struct nfs_fattr *); static void nfs_zap_acl_cache(struct inode *); -static kmem_cache_t * nfs_inode_cachep; +static struct kmem_cache * nfs_inode_cachep; static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) @@ -1111,7 +1111,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) #endif } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct nfs_inode *nfsi = (struct nfs_inode *) foo; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index a1561a820ab..3fbfc2f0330 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -20,7 +20,7 @@ #define NFS_PARANOIA 1 -static kmem_cache_t *nfs_page_cachep; +static struct kmem_cache *nfs_page_cachep; static inline struct nfs_page * nfs_page_alloc(void) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 56f66f0ccb6..244a8c45b68 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -38,7 +38,7 @@ static int nfs_pagein_one(struct list_head *, struct inode *); static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; -static kmem_cache_t *nfs_rdata_cachep; +static struct kmem_cache *nfs_rdata_cachep; static mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f7dd0d00595..41b07288f99 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -85,7 +85,7 @@ static const struct rpc_call_ops nfs_write_partial_ops; static const struct rpc_call_ops nfs_write_full_ops; static const struct rpc_call_ops nfs_commit_ops; -static kmem_cache_t *nfs_wdata_cachep; +static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e431e93ab50..640c92b2a9f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -84,10 +84,10 @@ static void nfs4_set_recdir(char *recdir); */ static DEFINE_MUTEX(client_mutex); -static kmem_cache_t *stateowner_slab = NULL; -static kmem_cache_t *file_slab = NULL; -static kmem_cache_t *stateid_slab = NULL; -static kmem_cache_t *deleg_slab = NULL; +static struct kmem_cache *stateowner_slab = NULL; +static struct kmem_cache *file_slab = NULL; +static struct kmem_cache *stateid_slab = NULL; +static struct kmem_cache *deleg_slab = NULL; void nfs4_lock_state(void) @@ -1003,7 +1003,7 @@ alloc_init_file(struct inode *ino) } static void -nfsd4_free_slab(kmem_cache_t **slab) +nfsd4_free_slab(struct kmem_cache **slab) { if (*slab == NULL) return; diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 01f91501f70..941acf14e61 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -66,7 +66,7 @@ static struct file_operations dlmfs_file_operations; static struct inode_operations dlmfs_dir_inode_operations; static struct inode_operations dlmfs_root_inode_operations; static struct inode_operations dlmfs_file_inode_operations; -static kmem_cache_t *dlmfs_inode_cache; +static struct kmem_cache *dlmfs_inode_cache; struct workqueue_struct *user_dlm_worker; @@ -257,7 +257,7 @@ static ssize_t dlmfs_file_write(struct file *filp, } static void dlmfs_init_once(void *foo, - kmem_cache_t *cachep, + struct kmem_cache *cachep, unsigned long flags) { struct dlmfs_inode_private *ip = diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index f784177b624..856012b4fa4 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -221,7 +221,7 @@ EXPORT_SYMBOL_GPL(dlm_dump_all_mles); #endif /* 0 */ -static kmem_cache_t *dlm_mle_cache = NULL; +static struct kmem_cache *dlm_mle_cache = NULL; static void dlm_mle_release(struct kref *kref); diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index fcd4475d1f8..80ac69f11d9 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -61,7 +61,7 @@ struct ocfs2_em_insert_context { struct ocfs2_extent_map_entry *right_ent; }; -static kmem_cache_t *ocfs2_em_ent_cachep = NULL; +static struct kmem_cache *ocfs2_em_ent_cachep = NULL; static struct ocfs2_extent_map_entry * diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 46a378fbc40..1a7dd2945b3 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -106,7 +106,7 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) #define INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags & OCFS2_INODE_JOURNAL) #define SET_INODE_JOURNAL(i) (OCFS2_I(i)->ip_flags |= OCFS2_INODE_JOURNAL) -extern kmem_cache_t *ocfs2_inode_cache; +extern struct kmem_cache *ocfs2_inode_cache; extern const struct address_space_operations ocfs2_aops; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 7574d26ee0f..0524f8a04ad 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -68,7 +68,7 @@ #include "buffer_head_io.h" -static kmem_cache_t *ocfs2_inode_cachep = NULL; +static struct kmem_cache *ocfs2_inode_cachep = NULL; /* OCFS2 needs to schedule several differnt types of work which * require cluster locking, disk I/O, recovery waits, etc. Since these @@ -914,7 +914,7 @@ bail: } static void ocfs2_inode_init_once(void *data, - kmem_cache_t *cachep, + struct kmem_cache *cachep, unsigned long flags) { struct ocfs2_inode_info *oi = data; diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 9707ed7a320..39814b900fc 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -69,7 +69,7 @@ struct ocfs2_meta_cache_item { sector_t c_block; }; -static kmem_cache_t *ocfs2_uptodate_cachep = NULL; +static struct kmem_cache *ocfs2_uptodate_cachep = NULL; void ocfs2_metadata_cache_init(struct inode *inode) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 911d1bcfc56..26f44e0074e 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -330,7 +330,7 @@ out: return 0; } -static kmem_cache_t *op_inode_cachep; +static struct kmem_cache *op_inode_cachep; static struct inode *openprom_alloc_inode(struct super_block *sb) { @@ -415,7 +415,7 @@ static struct file_system_type openprom_fs_type = { .kill_sb = kill_anon_super, }; -static void op_inode_init_once(void *data, kmem_cache_t * cachep, unsigned long flags) +static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned long flags) { struct op_inode_info *oi = (struct op_inode_info *) data; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index b24cdb2f17c..e26945ba685 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -81,7 +81,7 @@ static void proc_read_inode(struct inode * inode) inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; } -static kmem_cache_t * proc_inode_cachep; +static struct kmem_cache * proc_inode_cachep; static struct inode *proc_alloc_inode(struct super_block *sb) { @@ -105,7 +105,7 @@ static void proc_destroy_inode(struct inode *inode) kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct proc_inode *ei = (struct proc_inode *) foo; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 5b943eb11d7..c047dc654d5 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -515,7 +515,7 @@ static void qnx4_read_inode(struct inode *inode) brelse(bh); } -static kmem_cache_t *qnx4_inode_cachep; +static struct kmem_cache *qnx4_inode_cachep; static struct inode *qnx4_alloc_inode(struct super_block *sb) { @@ -531,7 +531,7 @@ static void qnx4_destroy_inode(struct inode *inode) kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); } -static void init_once(void *foo, kmem_cache_t * cachep, +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 32332516d65..745bc714ba9 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -490,7 +490,7 @@ static void reiserfs_put_super(struct super_block *s) return; } -static kmem_cache_t *reiserfs_inode_cachep; +static struct kmem_cache *reiserfs_inode_cachep; static struct inode *reiserfs_alloc_inode(struct super_block *sb) { @@ -507,7 +507,7 @@ static void reiserfs_destroy_inode(struct inode *inode) kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } -static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index d1b455f9b66..c5af088d4a4 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -550,7 +550,7 @@ romfs_read_inode(struct inode *i) } } -static kmem_cache_t * romfs_inode_cachep; +static struct kmem_cache * romfs_inode_cachep; static struct inode *romfs_alloc_inode(struct super_block *sb) { @@ -566,7 +566,7 @@ static void romfs_destroy_inode(struct inode *inode) kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct romfs_inode_info *ei = (struct romfs_inode_info *) foo; diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 22161710348..4af4cd729a5 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -50,7 +50,7 @@ static void smb_put_super(struct super_block *); static int smb_statfs(struct dentry *, struct kstatfs *); static int smb_show_options(struct seq_file *, struct vfsmount *); -static kmem_cache_t *smb_inode_cachep; +static struct kmem_cache *smb_inode_cachep; static struct inode *smb_alloc_inode(struct super_block *sb) { @@ -66,7 +66,7 @@ static void smb_destroy_inode(struct inode *inode) kmem_cache_free(smb_inode_cachep, SMB_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct smb_inode_info *ei = (struct smb_inode_info *) foo; unsigned long flagmask = SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR; diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c index 3eb1402191b..a4bcae8a9af 100644 --- a/fs/smbfs/request.c +++ b/fs/smbfs/request.c @@ -25,7 +25,7 @@ #define ROUND_UP(x) (((x)+3) & ~3) /* cache for request structures */ -static kmem_cache_t *req_cachep; +static struct kmem_cache *req_cachep; static int smb_request_send_req(struct smb_request *req); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 20551a1b8a5..e503f858fba 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -16,7 +16,7 @@ struct vfsmount *sysfs_mount; struct super_block * sysfs_sb = NULL; -kmem_cache_t *sysfs_dir_cachep; +struct kmem_cache *sysfs_dir_cachep; static struct super_operations sysfs_ops = { .statfs = simple_statfs, diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 6f3d6bd5288..bd7cec295da 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -1,6 +1,6 @@ extern struct vfsmount * sysfs_mount; -extern kmem_cache_t *sysfs_dir_cachep; +extern struct kmem_cache *sysfs_dir_cachep; extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index a6ca12b747c..ead9864567e 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -301,7 +301,7 @@ static void sysv_delete_inode(struct inode *inode) unlock_kernel(); } -static kmem_cache_t *sysv_inode_cachep; +static struct kmem_cache *sysv_inode_cachep; static struct inode *sysv_alloc_inode(struct super_block *sb) { @@ -318,7 +318,7 @@ static void sysv_destroy_inode(struct inode *inode) kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); } -static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; diff --git a/fs/udf/super.c b/fs/udf/super.c index e50f24221de..397f54aaf66 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -107,7 +107,7 @@ static struct file_system_type udf_fstype = { .fs_flags = FS_REQUIRES_DEV, }; -static kmem_cache_t * udf_inode_cachep; +static struct kmem_cache * udf_inode_cachep; static struct inode *udf_alloc_inode(struct super_block *sb) { @@ -130,7 +130,7 @@ static void udf_destroy_inode(struct inode *inode) kmem_cache_free(udf_inode_cachep, UDF_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct udf_inode_info *ei = (struct udf_inode_info *) foo; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 85a88c0c5e6..0b18d2cc2ef 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1204,7 +1204,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static kmem_cache_t * ufs_inode_cachep; +static struct kmem_cache * ufs_inode_cachep; static struct inode *ufs_alloc_inode(struct super_block *sb) { @@ -1221,7 +1221,7 @@ static void ufs_destroy_inode(struct inode *inode) kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 47faf27913a..7f1e92930b6 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -64,7 +64,7 @@ /* Host-dependent types and defines */ #define ACPI_MACHINE_WIDTH BITS_PER_LONG -#define acpi_cache_t kmem_cache_t +#define acpi_cache_t struct kmem_cache #define acpi_spinlock spinlock_t * #define ACPI_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL(symbol); #define strtoul simple_strtoul diff --git a/include/asm-arm26/pgalloc.h b/include/asm-arm26/pgalloc.h index 6437167b1ff..7725af3ddb4 100644 --- a/include/asm-arm26/pgalloc.h +++ b/include/asm-arm26/pgalloc.h @@ -15,7 +15,7 @@ #include #include -extern kmem_cache_t *pte_cache; +extern struct kmem_cache *pte_cache; static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr){ return kmem_cache_alloc(pte_cache, GFP_KERNEL); diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 7d398f493dd..bfee7ddfff5 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -34,14 +34,14 @@ struct vm_area_struct; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) extern unsigned long empty_zero_page[1024]; extern pgd_t swapper_pg_dir[1024]; -extern kmem_cache_t *pgd_cache; -extern kmem_cache_t *pmd_cache; +extern struct kmem_cache *pgd_cache; +extern struct kmem_cache *pmd_cache; extern spinlock_t pgd_lock; extern struct page *pgd_list; -void pmd_ctor(void *, kmem_cache_t *, unsigned long); -void pgd_ctor(void *, kmem_cache_t *, unsigned long); -void pgd_dtor(void *, kmem_cache_t *, unsigned long); +void pmd_ctor(void *, struct kmem_cache *, unsigned long); +void pgd_ctor(void *, struct kmem_cache *, unsigned long); +void pgd_dtor(void *, struct kmem_cache *, unsigned long); void pgtable_cache_init(void); void paging_init(void); diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h index ae63db7b3e7..b0830db68f8 100644 --- a/include/asm-powerpc/pgalloc.h +++ b/include/asm-powerpc/pgalloc.h @@ -11,7 +11,7 @@ #include #include -extern kmem_cache_t *pgtable_cache[]; +extern struct kmem_cache *pgtable_cache[]; #ifdef CONFIG_PPC_64K_PAGES #define PTE_CACHE_NUM 0 diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index 010f9cd0a67..5891ff7ba76 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -13,7 +13,7 @@ #include /* Page table allocation/freeing. */ -extern kmem_cache_t *pgtable_cache; +extern struct kmem_cache *pgtable_cache; static inline pgd_t *pgd_alloc(struct mm_struct *mm) { diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 561e2a77805..55d1ca5e60f 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -30,7 +30,7 @@ #ifdef CONFIG_TASK_DELAY_ACCT extern int delayacct_on; /* Delay accounting turned on/off */ -extern kmem_cache_t *delayacct_cache; +extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 1fb02e17f6f..2514f4e286b 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -490,7 +490,7 @@ struct i2o_dma { */ struct i2o_pool { char *name; - kmem_cache_t *slab; + struct kmem_cache *slab; mempool_t *mempool; }; diff --git a/include/linux/jbd.h b/include/linux/jbd.h index fe89444b1c6..dacd566c9f6 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -949,7 +949,7 @@ void journal_put_journal_head(struct journal_head *jh); /* * handle management */ -extern kmem_cache_t *jbd_handle_cache; +extern struct kmem_cache *jbd_handle_cache; static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ddb12879578..98a0ae52660 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -958,7 +958,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh); /* * handle management */ -extern kmem_cache_t *jbd2_handle_cache; +extern struct kmem_cache *jbd2_handle_cache; static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) { diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index f13299a1559..03636d7918f 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -235,7 +235,7 @@ struct raid5_private_data { */ int active_name; char cache_name[2][20]; - kmem_cache_t *slab_cache; /* for allocating stripes */ + struct kmem_cache *slab_cache; /* for allocating stripes */ int seq_flush, seq_write; int quiesce; diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 61c2ab634b0..36f850373d2 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -30,7 +30,7 @@ struct anon_vma { #ifdef CONFIG_MMU -extern kmem_cache_t *anon_vma_cachep; +extern struct kmem_cache *anon_vma_cachep; static inline struct anon_vma *anon_vma_alloc(void) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1d649f3eb00..4ff3940210d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -345,7 +345,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, 1, -1); } -extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, +extern struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp, unsigned int size, gfp_t priority); extern void kfree_skbmem(struct sk_buff *skb); diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index f81a5af8a4f..ce8a912e542 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -12,7 +12,7 @@ #include #ifdef CONFIG_TASKSTATS -extern kmem_cache_t *taskstats_cache; +extern struct kmem_cache *taskstats_cache; extern struct mutex taskstats_exit_mutex; static inline void taskstats_exit_free(struct taskstats *tidstats) diff --git a/include/net/dst.h b/include/net/dst.h index e156e38e4ac..62b7e7598e9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -98,7 +98,7 @@ struct dst_ops int entry_size; atomic_t entries; - kmem_cache_t *kmem_cachep; + struct kmem_cache *kmem_cachep; }; #ifdef __KERNEL__ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index a9eb2eaf094..34cc76e3ddb 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -125,7 +125,7 @@ struct inet_hashinfo { rwlock_t lhash_lock ____cacheline_aligned; atomic_t lhash_users; wait_queue_head_t lhash_wait; - kmem_cache_t *bind_bucket_cachep; + struct kmem_cache *bind_bucket_cachep; }; static inline struct inet_ehash_bucket *inet_ehash_bucket( @@ -136,10 +136,10 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( } extern struct inet_bind_bucket * - inet_bind_bucket_create(kmem_cache_t *cachep, + inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, const unsigned short snum); -extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, +extern void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); static inline int inet_bhashfn(const __u16 lport, const int bhash_size) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index c8aacbd2e33..23967031ddb 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -160,7 +160,7 @@ struct neigh_table atomic_t entries; rwlock_t lock; unsigned long last_rand; - kmem_cache_t *kmem_cachep; + struct kmem_cache *kmem_cachep; struct neigh_statistics *stats; struct neighbour **hash_buckets; unsigned int hash_mask; diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index cef3136e22a..41bcc9eb420 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -7,7 +7,7 @@ #include extern struct list_head nf_conntrack_expect_list; -extern kmem_cache_t *nf_conntrack_expect_cachep; +extern struct kmem_cache *nf_conntrack_expect_cachep; extern struct file_operations exp_file_ops; struct nf_conntrack_expect diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 426f0fe774e..7aed02ce2b6 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -29,7 +29,7 @@ struct proto; struct request_sock_ops { int family; int obj_size; - kmem_cache_t *slab; + struct kmem_cache *slab; int (*rtx_syn_ack)(struct sock *sk, struct request_sock *req, struct dst_entry *dst); diff --git a/include/net/sock.h b/include/net/sock.h index fe3a33fad03..730899ce516 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -571,7 +571,7 @@ struct proto { int *sysctl_rmem; int max_header; - kmem_cache_t *slab; + struct kmem_cache *slab; unsigned int obj_size; atomic_t *orphan_count; diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index d7a306ea560..1e1ee3253fd 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -15,7 +15,7 @@ #include struct timewait_sock_ops { - kmem_cache_t *twsk_slab; + struct kmem_cache *twsk_slab; unsigned int twsk_obj_size; int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 9233ed5de66..0c775fceb67 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -557,7 +557,7 @@ struct sas_task { static inline struct sas_task *sas_alloc_task(gfp_t flags) { - extern kmem_cache_t *sas_task_cache; + extern struct kmem_cache *sas_task_cache; struct sas_task *task = kmem_cache_alloc(sas_task_cache, flags); if (task) { @@ -575,7 +575,7 @@ static inline struct sas_task *sas_alloc_task(gfp_t flags) static inline void sas_free_task(struct sas_task *task) { if (task) { - extern kmem_cache_t *sas_task_cache; + extern struct kmem_cache *sas_task_cache; BUG_ON(!list_empty(&task->list)); kmem_cache_free(sas_task_cache, task); } diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 813bb941342..3acc1661e51 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -90,7 +90,7 @@ static struct super_operations mqueue_super_ops; static void remove_notification(struct mqueue_inode_info *info); static spinlock_t mq_lock; -static kmem_cache_t *mqueue_inode_cachep; +static struct kmem_cache *mqueue_inode_cachep; static struct vfsmount *mqueue_mnt; static unsigned int queues_count; @@ -211,7 +211,7 @@ static int mqueue_get_sb(struct file_system_type *fs_type, return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt); } -static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags) { struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 70e9ec60308..766d5912b26 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -20,7 +20,7 @@ #include int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ -kmem_cache_t *delayacct_cache; +struct kmem_cache *delayacct_cache; static int __init delayacct_setup_disable(char *str) { diff --git a/kernel/fork.c b/kernel/fork.c index 711aa5f10da..2cf74edd329 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -82,26 +82,26 @@ int nr_processes(void) #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR # define alloc_task_struct() kmem_cache_alloc(task_struct_cachep, GFP_KERNEL) # define free_task_struct(tsk) kmem_cache_free(task_struct_cachep, (tsk)) -static kmem_cache_t *task_struct_cachep; +static struct kmem_cache *task_struct_cachep; #endif /* SLAB cache for signal_struct structures (tsk->signal) */ -static kmem_cache_t *signal_cachep; +static struct kmem_cache *signal_cachep; /* SLAB cache for sighand_struct structures (tsk->sighand) */ -kmem_cache_t *sighand_cachep; +struct kmem_cache *sighand_cachep; /* SLAB cache for files_struct structures (tsk->files) */ -kmem_cache_t *files_cachep; +struct kmem_cache *files_cachep; /* SLAB cache for fs_struct structures (tsk->fs) */ -kmem_cache_t *fs_cachep; +struct kmem_cache *fs_cachep; /* SLAB cache for vm_area_struct structures */ -kmem_cache_t *vm_area_cachep; +struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ -static kmem_cache_t *mm_cachep; +static struct kmem_cache *mm_cachep; void free_task(struct task_struct *tsk) { @@ -1421,7 +1421,7 @@ long do_fork(unsigned long clone_flags, #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags) +static void sighand_ctor(void *data, struct kmem_cache *cachep, unsigned long flags) { struct sighand_struct *sighand = data; diff --git a/kernel/pid.c b/kernel/pid.c index b914392085f..a48879b0b92 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -31,7 +31,7 @@ #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) static struct hlist_head *pid_hash; static int pidhash_shift; -static kmem_cache_t *pid_cachep; +static struct kmem_cache *pid_cachep; int pid_max = PID_MAX_DEFAULT; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 9cbb5d1be06..5fe87de10ff 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -70,7 +70,7 @@ /* * Lets keep our timers in a slab cache :-) */ -static kmem_cache_t *posix_timers_cache; +static struct kmem_cache *posix_timers_cache; static struct idr posix_timers_id; static DEFINE_SPINLOCK(idr_lock); diff --git a/kernel/signal.c b/kernel/signal.c index df18c167a2a..8e19d278548 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -33,7 +33,7 @@ * SLAB caches for signal bits. */ -static kmem_cache_t *sigqueue_cachep; +static struct kmem_cache *sigqueue_cachep; /* * In POSIX a signal is sent either to a specific thread (Linux task) diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 1b2b326cf70..f5f92014ae9 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -34,7 +34,7 @@ static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 }; static int family_registered; -kmem_cache_t *taskstats_cache; +struct kmem_cache *taskstats_cache; static struct genl_family family = { .id = GENL_ID_GENERATE, diff --git a/kernel/user.c b/kernel/user.c index c1f93c164c9..4869563080e 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -26,7 +26,7 @@ #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) #define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) -static kmem_cache_t *uid_cachep; +static struct kmem_cache *uid_cachep; static struct list_head uidhash_table[UIDHASH_SZ]; /* diff --git a/lib/idr.c b/lib/idr.c index 16d2143fea4..71853531d3b 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -33,7 +33,7 @@ #include #include -static kmem_cache_t *idr_layer_cache; +static struct kmem_cache *idr_layer_cache; static struct idr_layer *alloc_layer(struct idr *idp) { @@ -445,7 +445,7 @@ void *idr_replace(struct idr *idp, void *ptr, int id) } EXPORT_SYMBOL(idr_replace); -static void idr_cache_ctor(void * idr_layer, kmem_cache_t *idr_layer_cache, +static void idr_cache_ctor(void * idr_layer, struct kmem_cache *idr_layer_cache, unsigned long flags) { memset(idr_layer, 0, sizeof(struct idr_layer)); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index aa9bfd0bdbd..9eb25955019 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -63,7 +63,7 @@ static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly; /* * Radix tree node cache. */ -static kmem_cache_t *radix_tree_node_cachep; +static struct kmem_cache *radix_tree_node_cachep; /* * Per-cpu pool of preloaded nodes @@ -846,7 +846,7 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) EXPORT_SYMBOL(radix_tree_tagged); static void -radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags) +radix_tree_node_ctor(void *node, struct kmem_cache *cachep, unsigned long flags) { memset(node, 0, sizeof(struct radix_tree_node)); } diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d9f04864d15..8ca448db7a0 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -23,7 +23,7 @@ #include #include "br_private.h" -static kmem_cache_t *br_fdb_cache __read_mostly; +static struct kmem_cache *br_fdb_cache __read_mostly; static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr); diff --git a/net/core/flow.c b/net/core/flow.c index 5df3e297f81..104c25d00a1 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -44,7 +44,7 @@ static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; #define flow_table(cpu) (per_cpu(flow_tables, cpu)) -static kmem_cache_t *flow_cachep __read_mostly; +static struct kmem_cache *flow_cachep __read_mostly; static int flow_lwm, flow_hwm; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7217fb8928f..de7801d589e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -68,8 +68,8 @@ #include "kmap_skb.h" -static kmem_cache_t *skbuff_head_cache __read_mostly; -static kmem_cache_t *skbuff_fclone_cache __read_mostly; +static struct kmem_cache *skbuff_head_cache __read_mostly; +static struct kmem_cache *skbuff_fclone_cache __read_mostly; /* * Keep out-of-line to prevent kernel bloat. @@ -144,7 +144,7 @@ EXPORT_SYMBOL(skb_truesize_bug); struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, int fclone, int node) { - kmem_cache_t *cache; + struct kmem_cache *cache; struct skb_shared_info *shinfo; struct sk_buff *skb; u8 *data; @@ -211,7 +211,7 @@ nodata: * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, +struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp, unsigned int size, gfp_t gfp_mask) { diff --git a/net/core/sock.c b/net/core/sock.c index 419c7d3289c..4a432da441e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -841,7 +841,7 @@ struct sock *sk_alloc(int family, gfp_t priority, struct proto *prot, int zero_it) { struct sock *sk = NULL; - kmem_cache_t *slab = prot->slab; + struct kmem_cache *slab = prot->slab; if (slab != NULL) sk = kmem_cache_alloc(slab, priority); diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index bdf1bb7a82c..1f4727ddbdb 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -21,8 +21,8 @@ #include -static kmem_cache_t *dccp_ackvec_slab; -static kmem_cache_t *dccp_ackvec_record_slab; +static struct kmem_cache *dccp_ackvec_slab; +static struct kmem_cache *dccp_ackvec_record_slab; static struct dccp_ackvec_record *dccp_ackvec_record_new(void) { diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index ff05e59043c..d8cf92f09e6 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -55,9 +55,9 @@ static inline void ccids_read_unlock(void) #define ccids_read_unlock() do { } while(0) #endif -static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...) +static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...) { - kmem_cache_t *slab; + struct kmem_cache *slab; char slab_name_fmt[32], *slab_name; va_list args; @@ -75,7 +75,7 @@ static kmem_cache_t *ccid_kmem_cache_create(int obj_size, const char *fmt,...) return slab; } -static void ccid_kmem_cache_destroy(kmem_cache_t *slab) +static void ccid_kmem_cache_destroy(struct kmem_cache *slab) { if (slab != NULL) { const char *name = kmem_cache_name(slab); diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index c7c29514dce..bcc2d12ae81 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -27,9 +27,9 @@ struct ccid_operations { unsigned char ccid_id; const char *ccid_name; struct module *ccid_owner; - kmem_cache_t *ccid_hc_rx_slab; + struct kmem_cache *ccid_hc_rx_slab; __u32 ccid_hc_rx_obj_size; - kmem_cache_t *ccid_hc_tx_slab; + struct kmem_cache *ccid_hc_tx_slab; __u32 ccid_hc_tx_obj_size; int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk); int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 0ae85f0340b..eb257014dd7 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -20,7 +20,7 @@ #define DCCP_LI_HIST_IVAL_F_LENGTH 8 struct dccp_li_hist { - kmem_cache_t *dccplih_slab; + struct kmem_cache *dccplih_slab; }; extern struct dccp_li_hist *dccp_li_hist_new(const char *name); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 067cf1c85a3..9a8bcf224aa 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -68,14 +68,14 @@ struct dccp_rx_hist_entry { }; struct dccp_tx_hist { - kmem_cache_t *dccptxh_slab; + struct kmem_cache *dccptxh_slab; }; extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); struct dccp_rx_hist { - kmem_cache_t *dccprxh_slab; + struct kmem_cache *dccprxh_slab; }; extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 101e5ccaf09..13b2421991b 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -79,7 +79,7 @@ for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_n static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; static DEFINE_RWLOCK(dn_fib_tables_lock); -static kmem_cache_t *dn_hash_kmem __read_mostly; +static struct kmem_cache *dn_hash_kmem __read_mostly; static int dn_fib_hash_zombies; static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 4463443e42c..648f47c1c39 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -45,8 +45,8 @@ #include "fib_lookup.h" -static kmem_cache_t *fn_hash_kmem __read_mostly; -static kmem_cache_t *fn_alias_kmem __read_mostly; +static struct kmem_cache *fn_hash_kmem __read_mostly; +static struct kmem_cache *fn_alias_kmem __read_mostly; struct fib_node { struct hlist_node fn_hash; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 6be6caf1af3..cfb249cc0a5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -172,7 +172,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); static void tnode_free(struct tnode *tn); -static kmem_cache_t *fn_alias_kmem __read_mostly; +static struct kmem_cache *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bd6c9bc4189..8c79c8a4ea5 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -27,7 +27,7 @@ * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ -struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, +struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, const unsigned short snum) { @@ -45,7 +45,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, /* * Caller must hold hashbucket lock for this tb with local BH disabled */ -void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) +void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f072f3875af..711eb6d0285 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -73,7 +73,7 @@ /* Exported for inet_getid inline function. */ DEFINE_SPINLOCK(inet_peer_idlock); -static kmem_cache_t *peer_cachep __read_mostly; +static struct kmem_cache *peer_cachep __read_mostly; #define node_height(x) x->avl_height static struct inet_peer peer_fake_node = { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index efcf45ecc81..ecb5422ea23 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -105,7 +105,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); In this case data path is free of exclusive locks at all. */ -static kmem_cache_t *mrt_cachep __read_mostly; +static struct kmem_cache *mrt_cachep __read_mostly; static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 8832eb517d5..8086787a2c5 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -44,7 +44,7 @@ static struct list_head *ip_vs_conn_tab; /* SLAB cache for IPVS connections */ -static kmem_cache_t *ip_vs_conn_cachep __read_mostly; +static struct kmem_cache *ip_vs_conn_cachep __read_mostly; /* counter for current IPVS connections */ static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index f4b0e68a16d..8556a4f4f60 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -65,8 +65,8 @@ static LIST_HEAD(helpers); unsigned int ip_conntrack_htable_size __read_mostly = 0; int ip_conntrack_max __read_mostly; struct list_head *ip_conntrack_hash __read_mostly; -static kmem_cache_t *ip_conntrack_cachep __read_mostly; -static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; +static struct kmem_cache *ip_conntrack_cachep __read_mostly; +static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; unsigned int ip_ct_log_invalid __read_mostly; static LIST_HEAD(unconfirmed); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 97a8cfbb61a..96d8310ae9c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -50,7 +50,7 @@ struct rt6_statistics rt6_stats; -static kmem_cache_t * fib6_node_kmem __read_mostly; +static struct kmem_cache * fib6_node_kmem __read_mostly; enum fib_walk_state_t { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index d4f68b0f27d..12e426b9aac 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -50,7 +50,7 @@ static u32 xfrm6_tunnel_spi; #define XFRM6_TUNNEL_SPI_MIN 1 #define XFRM6_TUNNEL_SPI_MAX 0xffffffff -static kmem_cache_t *xfrm6_tunnel_spi_kmem __read_mostly; +static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly; #define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256 #define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256 diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index eaa0f8a1adb..a9638ff52a7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -108,7 +108,7 @@ static struct { size_t size; /* slab cache pointer */ - kmem_cache_t *cachep; + struct kmem_cache *cachep; /* allocated slab cache + modules which uses this slab cache */ int use; @@ -147,7 +147,7 @@ int nf_conntrack_register_cache(u_int32_t features, const char *name, { int ret = 0; char *cache_name; - kmem_cache_t *cachep; + struct kmem_cache *cachep; DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", features, name, size); @@ -226,7 +226,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_register_cache); /* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ void nf_conntrack_unregister_cache(u_int32_t features) { - kmem_cache_t *cachep; + struct kmem_cache *cachep; char *name; /* diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 588d3793704..c20f901fa17 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -29,7 +29,7 @@ LIST_HEAD(nf_conntrack_expect_list); EXPORT_SYMBOL_GPL(nf_conntrack_expect_list); -kmem_cache_t *nf_conntrack_expect_cachep __read_mostly; +struct kmem_cache *nf_conntrack_expect_cachep __read_mostly; static unsigned int nf_conntrack_expect_next_id; /* nf_conntrack_expect helper functions */ diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index a98de0b54d6..a5a6e192ac2 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -92,7 +92,7 @@ struct xt_hashlimit_htable { static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */ static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */ static HLIST_HEAD(hashlimit_htables); -static kmem_cache_t *hashlimit_cachep __read_mostly; +static struct kmem_cache *hashlimit_cachep __read_mostly; static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 11f3b549f4a..f2ba8615895 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -79,8 +79,8 @@ static struct sctp_pf *sctp_pf_inet_specific; static struct sctp_af *sctp_af_v4_specific; static struct sctp_af *sctp_af_v6_specific; -kmem_cache_t *sctp_chunk_cachep __read_mostly; -kmem_cache_t *sctp_bucket_cachep __read_mostly; +struct kmem_cache *sctp_chunk_cachep __read_mostly; +struct kmem_cache *sctp_bucket_cachep __read_mostly; /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8d55d10041f..30927d3a597 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -65,7 +65,7 @@ #include #include -extern kmem_cache_t *sctp_chunk_cachep; +extern struct kmem_cache *sctp_chunk_cachep; SCTP_STATIC struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 49607792cbd..1e8132b8c4d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -107,7 +107,7 @@ static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; -extern kmem_cache_t *sctp_bucket_cachep; +extern struct kmem_cache *sctp_bucket_cachep; /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) diff --git a/net/socket.c b/net/socket.c index 4f417c2ddc1..43eff489c87 100644 --- a/net/socket.c +++ b/net/socket.c @@ -230,7 +230,7 @@ int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, #define SOCKFS_MAGIC 0x534F434B -static kmem_cache_t *sock_inode_cachep __read_mostly; +static struct kmem_cache *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { @@ -257,7 +257,7 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) { struct socket_alloc *ei = (struct socket_alloc *)foo; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index df753d0a884..19703aa9659 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -33,7 +33,7 @@ static int rpc_mount_count; static struct file_system_type rpc_pipe_fs_type; -static kmem_cache_t *rpc_inode_cachep __read_mostly; +static struct kmem_cache *rpc_inode_cachep __read_mostly; #define RPC_UPCALL_TIMEOUT (30*HZ) @@ -824,7 +824,7 @@ static struct file_system_type rpc_pipe_fs_type = { }; static void -init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) { struct rpc_inode *rpci = (struct rpc_inode *) foo; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index eff44bcdc95..225e6510b52 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -34,8 +34,8 @@ static int rpc_task_id; #define RPC_BUFFER_MAXSIZE (2048) #define RPC_BUFFER_POOLSIZE (8) #define RPC_TASK_POOLSIZE (8) -static kmem_cache_t *rpc_task_slabp __read_mostly; -static kmem_cache_t *rpc_buffer_slabp __read_mostly; +static struct kmem_cache *rpc_task_slabp __read_mostly; +static struct kmem_cache *rpc_buffer_slabp __read_mostly; static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; diff --git a/net/tipc/handler.c b/net/tipc/handler.c index ae6ddf00a1a..eb80778d6d9 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -42,7 +42,7 @@ struct queue_item { unsigned long data; }; -static kmem_cache_t *tipc_queue_item_cache; +static struct kmem_cache *tipc_queue_item_cache; static struct list_head signal_queue_head; static DEFINE_SPINLOCK(qitem_lock); static int handler_enabled = 0; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index a898a6a83a5..414f8907038 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -12,7 +12,7 @@ #include #include -static kmem_cache_t *secpath_cachep __read_mostly; +static struct kmem_cache *secpath_cachep __read_mostly; void __secpath_destroy(struct sec_path *sp) { diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f6c77bd36fd..3f3f563eb4a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,7 +39,7 @@ EXPORT_SYMBOL(xfrm_policy_count); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; -static kmem_cache_t *xfrm_dst_cache __read_mostly; +static struct kmem_cache *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; static HLIST_HEAD(xfrm_policy_gc_list); diff --git a/security/keys/key.c b/security/keys/key.c index 157bac658bf..0db816f10f8 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -20,7 +20,7 @@ #include #include "internal.h" -static kmem_cache_t *key_jar; +static struct kmem_cache *key_jar; struct rb_root key_serial_tree; /* tree of keys indexed by serial */ DEFINE_SPINLOCK(key_serial_lock); diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 65b4ec9c699..e7c0b5e2066 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -124,7 +124,7 @@ DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 }; static struct avc_cache avc_cache; static struct avc_callback_node *avc_callbacks; -static kmem_cache_t *avc_node_cachep; +static struct kmem_cache *avc_node_cachep; static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass) { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ac1aeed0b28..44e9cd47054 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -124,7 +124,7 @@ static struct security_operations *secondary_ops = NULL; static LIST_HEAD(superblock_security_head); static DEFINE_SPINLOCK(sb_security_lock); -static kmem_cache_t *sel_inode_cache; +static struct kmem_cache *sel_inode_cache; /* Return security context for a given sid or just the context length if the buffer is null or length is 0 */ diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index 2dfc6134c2c..ebb993c5c24 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -28,7 +28,7 @@ (keyp->source_type << 9)) & \ AVTAB_HASH_MASK) -static kmem_cache_t *avtab_node_cachep; +static struct kmem_cache *avtab_node_cachep; static struct avtab_node* avtab_insert_node(struct avtab *h, int hvalue, -- cgit v1.2.3-70-g09d2 From 1b1cec4bbc59feac89670d5d6d222a02545bac94 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 6 Dec 2006 20:33:22 -0800 Subject: [PATCH] slab: deprecate kmem_cache_t Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index fbcfc208f52..2271886744f 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -7,16 +7,17 @@ #ifndef _LINUX_SLAB_H #define _LINUX_SLAB_H -#if defined(__KERNEL__) +#ifdef __KERNEL__ -/* kmem_cache_t exists for legacy reasons and is not used by code in mm */ -typedef struct kmem_cache kmem_cache_t; +#include +#include +#include +#include /* kmalloc_sizes.h needs PAGE_SIZE */ +#include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ +#include -#include -#include -#include -#include /* kmalloc_sizes.h needs PAGE_SIZE */ -#include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ +/* kmem_cache_t exists for legacy reasons and is not used by code in mm */ +typedef struct kmem_cache kmem_cache_t __deprecated; /* flags to pass to kmem_cache_create(). * The first 3 are only valid when the allocator as been build -- cgit v1.2.3-70-g09d2 From 33f2ef89f8e181486b63fdbdc97c6afa6ca9f34b Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 6 Dec 2006 20:33:32 -0800 Subject: [PATCH] mm: make compound page destructor handling explicit Currently we we use the lru head link of the second page of a compound page to hold its destructor. This was ok when it was purely an internal implmentation detail. However, hugetlbfs overrides this destructor violating the layering. Abstract this out as explicit calls, also introduce a type for the callback function allowing them to be type checked. For each callback we pre-declare the function, causing a type error on definition rather than on use elsewhere. [akpm@osdl.org: cleanups] Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 18 ++++++++++++++++++ mm/hugetlb.c | 2 +- mm/page_alloc.c | 2 +- mm/swap.c | 4 ++-- 4 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e266fe1b4c..a17b147c61e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -295,6 +295,24 @@ void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); +/* + * Compound pages have a destructor function. Provide a + * prototype for that function and accessor functions. + * These are _only_ valid on the head of a PG_compound page. + */ +typedef void compound_page_dtor(struct page *); + +static inline void set_compound_page_dtor(struct page *page, + compound_page_dtor *dtor) +{ + page[1].lru.next = (void *)dtor; +} + +static inline compound_page_dtor *get_compound_page_dtor(struct page *page) +{ + return (compound_page_dtor *)page[1].lru.next; +} + /* * Multiple processes may "see" the same page. E.g. for untouched * mappings of /dev/null, all processes see the same page full of diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2911a364481..0ccc7f23025 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -109,7 +109,7 @@ static int alloc_fresh_huge_page(void) if (nid == MAX_NUMNODES) nid = first_node(node_online_map); if (page) { - page[1].lru.next = (void *)free_huge_page; /* dtor */ + set_compound_page_dtor(page, free_huge_page); spin_lock(&hugetlb_lock); nr_huge_pages++; nr_huge_pages_node[page_to_nid(page)]++; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dc8753bdd47..d539f83c62b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -230,7 +230,7 @@ static void prep_compound_page(struct page *page, unsigned long order) int i; int nr_pages = 1 << order; - page[1].lru.next = (void *)free_compound_page; /* set dtor */ + set_compound_page_dtor(page, free_compound_page); page[1].lru.prev = (void *)order; for (i = 0; i < nr_pages; i++) { struct page *p = page + i; diff --git a/mm/swap.c b/mm/swap.c index d9a3770d8f3..017e72ca9bb 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -57,9 +57,9 @@ static void put_compound_page(struct page *page) { page = (struct page *)page_private(page); if (put_page_testzero(page)) { - void (*dtor)(struct page *page); + compound_page_dtor *dtor; - dtor = (void (*)(struct page *))page[1].lru.next; + dtor = get_compound_page_dtor(page); (*dtor)(page); } } -- cgit v1.2.3-70-g09d2 From 36de6437866bbb1d37e2312ff4f95ee4ed6d2b61 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 6 Dec 2006 20:33:42 -0800 Subject: [PATCH] Save some bytes in struct mm_struct Before: [acme@newtoy net-2.6.20]$ pahole --cacheline 32 kernel/sched.o mm_struct /* include2/asm/processor.h:542 */ struct mm_struct { struct vm_area_struct * mmap; /* 0 4 */ struct rb_root mm_rb; /* 4 4 */ struct vm_area_struct * mmap_cache; /* 8 4 */ long unsigned int (*get_unmapped_area)(); /* 12 4 */ void (*unmap_area)(); /* 16 4 */ long unsigned int mmap_base; /* 20 4 */ long unsigned int task_size; /* 24 4 */ long unsigned int cached_hole_size; /* 28 4 */ /* ---------- cacheline 1 boundary ---------- */ long unsigned int free_area_cache; /* 32 4 */ pgd_t * pgd; /* 36 4 */ atomic_t mm_users; /* 40 4 */ atomic_t mm_count; /* 44 4 */ int map_count; /* 48 4 */ struct rw_semaphore mmap_sem; /* 52 64 */ spinlock_t page_table_lock; /* 116 40 */ struct list_head mmlist; /* 156 8 */ mm_counter_t _file_rss; /* 164 4 */ mm_counter_t _anon_rss; /* 168 4 */ long unsigned int hiwater_rss; /* 172 4 */ long unsigned int hiwater_vm; /* 176 4 */ long unsigned int total_vm; /* 180 4 */ long unsigned int locked_vm; /* 184 4 */ long unsigned int shared_vm; /* 188 4 */ /* ---------- cacheline 6 boundary ---------- */ long unsigned int exec_vm; /* 192 4 */ long unsigned int stack_vm; /* 196 4 */ long unsigned int reserved_vm; /* 200 4 */ long unsigned int def_flags; /* 204 4 */ long unsigned int nr_ptes; /* 208 4 */ long unsigned int start_code; /* 212 4 */ long unsigned int end_code; /* 216 4 */ long unsigned int start_data; /* 220 4 */ /* ---------- cacheline 7 boundary ---------- */ long unsigned int end_data; /* 224 4 */ long unsigned int start_brk; /* 228 4 */ long unsigned int brk; /* 232 4 */ long unsigned int start_stack; /* 236 4 */ long unsigned int arg_start; /* 240 4 */ long unsigned int arg_end; /* 244 4 */ long unsigned int env_start; /* 248 4 */ long unsigned int env_end; /* 252 4 */ /* ---------- cacheline 8 boundary ---------- */ long unsigned int saved_auxv[44]; /* 256 176 */ unsigned int dumpable:2; /* 432 4 */ cpumask_t cpu_vm_mask; /* 436 4 */ mm_context_t context; /* 440 68 */ long unsigned int swap_token_time; /* 508 4 */ /* ---------- cacheline 16 boundary ---------- */ char recent_pagein; /* 512 1 */ /* XXX 3 bytes hole, try to pack */ int core_waiters; /* 516 4 */ struct completion * core_startup_done; /* 520 4 */ struct completion core_done; /* 524 52 */ rwlock_t ioctx_list_lock; /* 576 36 */ struct kioctx * ioctx_list; /* 612 4 */ }; /* size: 616, sum members: 613, holes: 1, sum holes: 3, cachelines: 20, last cacheline: 8 bytes */ After: [acme@newtoy net-2.6.20]$ pahole --cacheline 32 kernel/sched.o mm_struct /* include2/asm/processor.h:542 */ struct mm_struct { struct vm_area_struct * mmap; /* 0 4 */ struct rb_root mm_rb; /* 4 4 */ struct vm_area_struct * mmap_cache; /* 8 4 */ long unsigned int (*get_unmapped_area)(); /* 12 4 */ void (*unmap_area)(); /* 16 4 */ long unsigned int mmap_base; /* 20 4 */ long unsigned int task_size; /* 24 4 */ long unsigned int cached_hole_size; /* 28 4 */ /* ---------- cacheline 1 boundary ---------- */ long unsigned int free_area_cache; /* 32 4 */ pgd_t * pgd; /* 36 4 */ atomic_t mm_users; /* 40 4 */ atomic_t mm_count; /* 44 4 */ int map_count; /* 48 4 */ struct rw_semaphore mmap_sem; /* 52 64 */ spinlock_t page_table_lock; /* 116 40 */ struct list_head mmlist; /* 156 8 */ mm_counter_t _file_rss; /* 164 4 */ mm_counter_t _anon_rss; /* 168 4 */ long unsigned int hiwater_rss; /* 172 4 */ long unsigned int hiwater_vm; /* 176 4 */ long unsigned int total_vm; /* 180 4 */ long unsigned int locked_vm; /* 184 4 */ long unsigned int shared_vm; /* 188 4 */ /* ---------- cacheline 6 boundary ---------- */ long unsigned int exec_vm; /* 192 4 */ long unsigned int stack_vm; /* 196 4 */ long unsigned int reserved_vm; /* 200 4 */ long unsigned int def_flags; /* 204 4 */ long unsigned int nr_ptes; /* 208 4 */ long unsigned int start_code; /* 212 4 */ long unsigned int end_code; /* 216 4 */ long unsigned int start_data; /* 220 4 */ /* ---------- cacheline 7 boundary ---------- */ long unsigned int end_data; /* 224 4 */ long unsigned int start_brk; /* 228 4 */ long unsigned int brk; /* 232 4 */ long unsigned int start_stack; /* 236 4 */ long unsigned int arg_start; /* 240 4 */ long unsigned int arg_end; /* 244 4 */ long unsigned int env_start; /* 248 4 */ long unsigned int env_end; /* 252 4 */ /* ---------- cacheline 8 boundary ---------- */ long unsigned int saved_auxv[44]; /* 256 176 */ cpumask_t cpu_vm_mask; /* 432 4 */ mm_context_t context; /* 436 68 */ long unsigned int swap_token_time; /* 504 4 */ char recent_pagein; /* 508 1 */ unsigned char dumpable:2; /* 509 1 */ /* XXX 2 bytes hole, try to pack */ int core_waiters; /* 512 4 */ struct completion * core_startup_done; /* 516 4 */ struct completion core_done; /* 520 52 */ rwlock_t ioctx_list_lock; /* 572 36 */ struct kioctx * ioctx_list; /* 608 4 */ }; /* size: 612, sum members: 610, holes: 1, sum holes: 2, cachelines: 20, last cacheline: 4 bytes */ [acme@newtoy net-2.6.20]$ codiff -V /tmp/sched.o.before kernel/sched.o /pub/scm/linux/kernel/git/acme/net-2.6.20/kernel/sched.c: struct mm_struct | -4 dumpable:2; from: unsigned int /* 432(30) 4(2) */ to: unsigned char /* 509(6) 1(2) */ < SNIP other offset changes > 1 struct changed [acme@newtoy net-2.6.20]$ I'm not aware of any problem about using 2 byte wide bitfields where previously a 4 byte wide one was, holler if there is any, I wouldn't be surprised, bitfields are things from hell. For the curious, 432(30) means: at offset 432 from the struct start, at offset 30 in the bitfield (yeah, it comes backwards, hellish, huh?) ditto for 509(6), while 4(2) and 1(2) means "struct field size(bitfield size)". Now we have a 2 bytes hole and are using only 4 bytes of the last 32 bytes cacheline, any takers? :-) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index cad6a16260f..acfd2e15c5f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -338,7 +338,6 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ - unsigned dumpable:2; cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ @@ -355,6 +354,8 @@ struct mm_struct { unsigned int token_priority; unsigned int last_interval; + unsigned char dumpable:2; + /* coredumping support */ int core_waiters; struct completion *core_startup_done, core_done; -- cgit v1.2.3-70-g09d2 From 7cf9c2c76c1a17b32f2da85b50cd4fe468ed44b5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 6 Dec 2006 20:33:44 -0800 Subject: [PATCH] radix-tree: RCU lockless readside Make radix tree lookups safe to be performed without locks. Readers are protected against nodes being deleted by using RCU based freeing. Readers are protected against new node insertion by using memory barriers to ensure the node itself will be properly written before it is visible in the radix tree. Each radix tree node keeps a record of their height (above leaf nodes). This height does not change after insertion -- when the radix tree is extended, higher nodes are only inserted in the top. So a lookup can take the pointer to what is *now* the root node, and traverse down it even if the tree is concurrently extended and this node becomes a subtree of a new root. "Direct" pointers (tree height of 0, where root->rnode points directly to the data item) are handled by using the low bit of the pointer to signal whether rnode is a direct pointer or a pointer to a radix tree node. When a reader wants to traverse the next branch, they will take a copy of the pointer. This pointer will be either NULL (and the branch is empty) or non-NULL (and will point to a valid node). [akpm@osdl.org: cleanups] [Lee.Schermerhorn@hp.com: bugfixes, comments, simplifications] [clameter@sgi.com: build fix] Signed-off-by: Nick Piggin Cc: "Paul E. McKenney" Signed-off-by: Lee Schermerhorn Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 101 ++++++++++++++ lib/radix-tree.c | 327 +++++++++++++++++++++++++++++++-------------- mm/migrate.c | 19 ++- 3 files changed, 340 insertions(+), 107 deletions(-) (limited to 'include') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index cbfa1153742..0deb842541a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -1,6 +1,7 @@ /* * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig + * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -21,6 +22,35 @@ #include #include +#include +#include + +/* + * A direct pointer (root->rnode pointing directly to a data item, + * rather than another radix_tree_node) is signalled by the low bit + * set in the root->rnode pointer. + * + * In this case root->height is also NULL, but the direct pointer tests are + * needed for RCU lookups when root->height is unreliable. + */ +#define RADIX_TREE_DIRECT_PTR 1 + +static inline void *radix_tree_ptr_to_direct(void *ptr) +{ + return (void *)((unsigned long)ptr | RADIX_TREE_DIRECT_PTR); +} + +static inline void *radix_tree_direct_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_DIRECT_PTR); +} + +static inline int radix_tree_is_direct_ptr(void *ptr) +{ + return (int)((unsigned long)ptr & RADIX_TREE_DIRECT_PTR); +} + +/*** radix-tree API starts here ***/ #define RADIX_TREE_MAX_TAGS 2 @@ -47,6 +77,77 @@ do { \ (root)->rnode = NULL; \ } while (0) +/** + * Radix-tree synchronization + * + * The radix-tree API requires that users provide all synchronisation (with + * specific exceptions, noted below). + * + * Synchronization of access to the data items being stored in the tree, and + * management of their lifetimes must be completely managed by API users. + * + * For API usage, in general, + * - any function _modifying_ the the tree or tags (inserting or deleting + * items, setting or clearing tags must exclude other modifications, and + * exclude any functions reading the tree. + * - any function _reading_ the the tree or tags (looking up items or tags, + * gang lookups) must exclude modifications to the tree, but may occur + * concurrently with other readers. + * + * The notable exceptions to this rule are the following functions: + * radix_tree_lookup + * radix_tree_tag_get + * radix_tree_gang_lookup + * radix_tree_gang_lookup_tag + * radix_tree_tagged + * + * The first 4 functions are able to be called locklessly, using RCU. The + * caller must ensure calls to these functions are made within rcu_read_lock() + * regions. Other readers (lock-free or otherwise) and modifications may be + * running concurrently. + * + * It is still required that the caller manage the synchronization and lifetimes + * of the items. So if RCU lock-free lookups are used, typically this would mean + * that the items have their own locks, or are amenable to lock-free access; and + * that the items are freed by RCU (or only freed after having been deleted from + * the radix tree *and* a synchronize_rcu() grace period). + * + * (Note, rcu_assign_pointer and rcu_dereference are not needed to control + * access to data items when inserting into or looking up from the radix tree) + * + * radix_tree_tagged is able to be called without locking or RCU. + */ + +/** + * radix_tree_deref_slot - dereference a slot + * @pslot: pointer to slot, returned by radix_tree_lookup_slot + * Returns: item that was stored in that slot with any direct pointer flag + * removed. + * + * For use with radix_tree_lookup_slot(). Caller must hold tree at least read + * locked across slot lookup and dereference. More likely, will be used with + * radix_tree_replace_slot(), as well, so caller will hold tree write locked. + */ +static inline void *radix_tree_deref_slot(void **pslot) +{ + return radix_tree_direct_to_ptr(*pslot); +} +/** + * radix_tree_replace_slot - replace item in a slot + * @pslot: pointer to slot, returned by radix_tree_lookup_slot + * @item: new item to store in the slot. + * + * For use with radix_tree_lookup_slot(). Caller must hold tree write locked + * across slot lookup and replacement. + */ +static inline void radix_tree_replace_slot(void **pslot, void *item) +{ + BUG_ON(radix_tree_is_direct_ptr(item)); + rcu_assign_pointer(*pslot, + (void *)((unsigned long)item | + ((unsigned long)*pslot & RADIX_TREE_DIRECT_PTR))); +} + int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); void *radix_tree_lookup(struct radix_tree_root *, unsigned long); void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9eb25955019..e2cefabb5aa 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2,6 +2,7 @@ * Copyright (C) 2001 Momchil Velikov * Portions Copyright (C) 2001 Christoph Hellwig * Copyright (C) 2005 SGI, Christoph Lameter + * Copyright (C) 2006 Nick Piggin * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -30,6 +31,7 @@ #include #include #include +#include #ifdef __KERNEL__ @@ -45,7 +47,9 @@ ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) struct radix_tree_node { + unsigned int height; /* Height from the bottom */ unsigned int count; + struct rcu_head rcu_head; void *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; @@ -100,13 +104,21 @@ radix_tree_node_alloc(struct radix_tree_root *root) rtp->nr--; } } + BUG_ON(radix_tree_is_direct_ptr(ret)); return ret; } +static void radix_tree_node_rcu_free(struct rcu_head *head) +{ + struct radix_tree_node *node = + container_of(head, struct radix_tree_node, rcu_head); + kmem_cache_free(radix_tree_node_cachep, node); +} + static inline void radix_tree_node_free(struct radix_tree_node *node) { - kmem_cache_free(radix_tree_node_cachep, node); + call_rcu(&node->rcu_head, radix_tree_node_rcu_free); } /* @@ -222,11 +234,12 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) } do { + unsigned int newheight; if (!(node = radix_tree_node_alloc(root))) return -ENOMEM; /* Increase the height. */ - node->slots[0] = root->rnode; + node->slots[0] = radix_tree_direct_to_ptr(root->rnode); /* Propagate the aggregated tag info into the new root */ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { @@ -234,9 +247,11 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) tag_set(node, tag, 0); } + newheight = root->height+1; + node->height = newheight; node->count = 1; - root->rnode = node; - root->height++; + rcu_assign_pointer(root->rnode, node); + root->height = newheight; } while (height > root->height); out: return 0; @@ -258,6 +273,8 @@ int radix_tree_insert(struct radix_tree_root *root, int offset; int error; + BUG_ON(radix_tree_is_direct_ptr(item)); + /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { error = radix_tree_extend(root, index); @@ -275,11 +292,12 @@ int radix_tree_insert(struct radix_tree_root *root, /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) return -ENOMEM; + slot->height = height; if (node) { - node->slots[offset] = slot; + rcu_assign_pointer(node->slots[offset], slot); node->count++; } else - root->rnode = slot; + rcu_assign_pointer(root->rnode, slot); } /* Go a level down */ @@ -295,11 +313,11 @@ int radix_tree_insert(struct radix_tree_root *root, if (node) { node->count++; - node->slots[offset] = item; + rcu_assign_pointer(node->slots[offset], item); BUG_ON(tag_get(node, 0, offset)); BUG_ON(tag_get(node, 1, offset)); } else { - root->rnode = item; + rcu_assign_pointer(root->rnode, radix_tree_ptr_to_direct(item)); BUG_ON(root_tag_get(root, 0)); BUG_ON(root_tag_get(root, 1)); } @@ -308,49 +326,54 @@ int radix_tree_insert(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_insert); -static inline void **__lookup_slot(struct radix_tree_root *root, - unsigned long index) +/** + * radix_tree_lookup_slot - lookup a slot in a radix tree + * @root: radix tree root + * @index: index key + * + * Returns: the slot corresponding to the position @index in the + * radix tree @root. This is useful for update-if-exists operations. + * + * This function cannot be called under rcu_read_lock, it must be + * excluded from writers, as must the returned slot for subsequent + * use by radix_tree_deref_slot() and radix_tree_replace slot. + * Caller must hold tree write locked across slot lookup and + * replace. + */ +void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) { unsigned int height, shift; - struct radix_tree_node **slot; - - height = root->height; + struct radix_tree_node *node, **slot; - if (index > radix_tree_maxindex(height)) + node = root->rnode; + if (node == NULL) return NULL; - if (height == 0 && root->rnode) + if (radix_tree_is_direct_ptr(node)) { + if (index > 0) + return NULL; return (void **)&root->rnode; + } + + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = &root->rnode; - while (height > 0) { - if (*slot == NULL) + do { + slot = (struct radix_tree_node **) + (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + node = *slot; + if (node == NULL) return NULL; - slot = (struct radix_tree_node **) - ((*slot)->slots + - ((index >> shift) & RADIX_TREE_MAP_MASK)); shift -= RADIX_TREE_MAP_SHIFT; height--; - } + } while (height > 0); return (void **)slot; } - -/** - * radix_tree_lookup_slot - lookup a slot in a radix tree - * @root: radix tree root - * @index: index key - * - * Lookup the slot corresponding to the position @index in the radix tree - * @root. This is useful for update-if-exists operations. - */ -void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) -{ - return __lookup_slot(root, index); -} EXPORT_SYMBOL(radix_tree_lookup_slot); /** @@ -359,13 +382,45 @@ EXPORT_SYMBOL(radix_tree_lookup_slot); * @index: index key * * Lookup the item at the position @index in the radix tree @root. + * + * This function can be called under rcu_read_lock, however the caller + * must manage lifetimes of leaf nodes (eg. RCU may also be used to free + * them safely). No RCU barriers are required to access or modify the + * returned item, however. */ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { - void **slot; + unsigned int height, shift; + struct radix_tree_node *node, **slot; + + node = rcu_dereference(root->rnode); + if (node == NULL) + return NULL; + + if (radix_tree_is_direct_ptr(node)) { + if (index > 0) + return NULL; + return radix_tree_direct_to_ptr(node); + } + + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; + + shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = __lookup_slot(root, index); - return slot != NULL ? *slot : NULL; + do { + slot = (struct radix_tree_node **) + (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + node = rcu_dereference(*slot); + if (node == NULL) + return NULL; + + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); + + return node; } EXPORT_SYMBOL(radix_tree_lookup); @@ -495,27 +550,30 @@ int radix_tree_tag_get(struct radix_tree_root *root, unsigned long index, unsigned int tag) { unsigned int height, shift; - struct radix_tree_node *slot; + struct radix_tree_node *node; int saw_unset_tag = 0; - height = root->height; - if (index > radix_tree_maxindex(height)) - return 0; - /* check the root's tag bit */ if (!root_tag_get(root, tag)) return 0; - if (height == 0) - return 1; + node = rcu_dereference(root->rnode); + if (node == NULL) + return 0; + + if (radix_tree_is_direct_ptr(node)) + return (index == 0); + + height = node->height; + if (index > radix_tree_maxindex(height)) + return 0; shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; for ( ; ; ) { int offset; - if (slot == NULL) + if (node == NULL) return 0; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -524,15 +582,15 @@ int radix_tree_tag_get(struct radix_tree_root *root, * This is just a debug check. Later, we can bale as soon as * we see an unset tag. */ - if (!tag_get(slot, tag, offset)) + if (!tag_get(node, tag, offset)) saw_unset_tag = 1; if (height == 1) { - int ret = tag_get(slot, tag, offset); + int ret = tag_get(node, tag, offset); BUG_ON(ret && saw_unset_tag); return !!ret; } - slot = slot->slots[offset]; + node = rcu_dereference(node->slots[offset]); shift -= RADIX_TREE_MAP_SHIFT; height--; } @@ -541,47 +599,45 @@ EXPORT_SYMBOL(radix_tree_tag_get); #endif static unsigned int -__lookup(struct radix_tree_root *root, void **results, unsigned long index, +__lookup(struct radix_tree_node *slot, void **results, unsigned long index, unsigned int max_items, unsigned long *next_index) { unsigned int nr_found = 0; unsigned int shift, height; - struct radix_tree_node *slot; unsigned long i; - height = root->height; - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; + height = slot->height; + if (height == 0) goto out; - } - shift = (height-1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; for ( ; height > 1; height--) { - - for (i = (index >> shift) & RADIX_TREE_MAP_MASK ; - i < RADIX_TREE_MAP_SIZE; i++) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { if (slot->slots[i] != NULL) break; index &= ~((1UL << shift) - 1); index += 1UL << shift; if (index == 0) goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; } - if (i == RADIX_TREE_MAP_SIZE) - goto out; shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; + slot = rcu_dereference(slot->slots[i]); + if (slot == NULL) + goto out; } /* Bottom level: grab some items */ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { + struct radix_tree_node *node; index++; - if (slot->slots[i]) { - results[nr_found++] = slot->slots[i]; + node = slot->slots[i]; + if (node) { + results[nr_found++] = rcu_dereference(node); if (nr_found == max_items) goto out; } @@ -603,28 +659,51 @@ out: * *@results. * * The implementation is naive. + * + * Like radix_tree_lookup, radix_tree_gang_lookup may be called under + * rcu_read_lock. In this case, rather than the returned results being + * an atomic snapshot of the tree at a single point in time, the semantics + * of an RCU protected gang lookup are as though multiple radix_tree_lookups + * have been issued in individual locks, and results stored in 'results'. */ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items) { - const unsigned long max_index = radix_tree_maxindex(root->height); + unsigned long max_index; + struct radix_tree_node *node; unsigned long cur_index = first_index; - unsigned int ret = 0; + unsigned int ret; + + node = rcu_dereference(root->rnode); + if (!node) + return 0; + if (radix_tree_is_direct_ptr(node)) { + if (first_index > 0) + return 0; + node = radix_tree_direct_to_ptr(node); + results[0] = rcu_dereference(node); + return 1; + } + + max_index = radix_tree_maxindex(node->height); + + ret = 0; while (ret < max_items) { unsigned int nr_found; unsigned long next_index; /* Index of next search */ if (cur_index > max_index) break; - nr_found = __lookup(root, results + ret, cur_index, + nr_found = __lookup(node, results + ret, cur_index, max_items - ret, &next_index); ret += nr_found; if (next_index == 0) break; cur_index = next_index; } + return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup); @@ -634,55 +713,64 @@ EXPORT_SYMBOL(radix_tree_gang_lookup); * open-coding the search. */ static unsigned int -__lookup_tag(struct radix_tree_root *root, void **results, unsigned long index, +__lookup_tag(struct radix_tree_node *slot, void **results, unsigned long index, unsigned int max_items, unsigned long *next_index, unsigned int tag) { unsigned int nr_found = 0; - unsigned int shift; - unsigned int height = root->height; - struct radix_tree_node *slot; + unsigned int shift, height; - if (height == 0) { - if (root->rnode && index == 0) - results[nr_found++] = root->rnode; + height = slot->height; + if (height == 0) goto out; - } - - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = root->rnode; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; - do { - unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK; + while (height > 0) { + unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; - for ( ; i < RADIX_TREE_MAP_SIZE; i++) { - if (tag_get(slot, tag, i)) { - BUG_ON(slot->slots[i] == NULL); + for (;;) { + if (tag_get(slot, tag, i)) break; - } index &= ~((1UL << shift) - 1); index += 1UL << shift; if (index == 0) goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; } - if (i == RADIX_TREE_MAP_SIZE) - goto out; height--; if (height == 0) { /* Bottom level: grab some items */ unsigned long j = index & RADIX_TREE_MAP_MASK; for ( ; j < RADIX_TREE_MAP_SIZE; j++) { + struct radix_tree_node *node; index++; - if (tag_get(slot, tag, j)) { - BUG_ON(slot->slots[j] == NULL); - results[nr_found++] = slot->slots[j]; + if (!tag_get(slot, tag, j)) + continue; + node = slot->slots[j]; + /* + * Even though the tag was found set, we need to + * recheck that we have a non-NULL node, because + * if this lookup is lockless, it may have been + * subsequently deleted. + * + * Similar care must be taken in any place that + * lookup ->slots[x] without a lock (ie. can't + * rely on its value remaining the same). + */ + if (node) { + node = rcu_dereference(node); + results[nr_found++] = node; if (nr_found == max_items) goto out; } } } shift -= RADIX_TREE_MAP_SHIFT; - slot = slot->slots[i]; - } while (height > 0); + slot = rcu_dereference(slot->slots[i]); + if (slot == NULL) + break; + } out: *next_index = index; return nr_found; @@ -706,27 +794,44 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items, unsigned int tag) { - const unsigned long max_index = radix_tree_maxindex(root->height); + struct radix_tree_node *node; + unsigned long max_index; unsigned long cur_index = first_index; - unsigned int ret = 0; + unsigned int ret; /* check the root's tag bit */ if (!root_tag_get(root, tag)) return 0; + node = rcu_dereference(root->rnode); + if (!node) + return 0; + + if (radix_tree_is_direct_ptr(node)) { + if (first_index > 0) + return 0; + node = radix_tree_direct_to_ptr(node); + results[0] = rcu_dereference(node); + return 1; + } + + max_index = radix_tree_maxindex(node->height); + + ret = 0; while (ret < max_items) { unsigned int nr_found; unsigned long next_index; /* Index of next search */ if (cur_index > max_index) break; - nr_found = __lookup_tag(root, results + ret, cur_index, + nr_found = __lookup_tag(node, results + ret, cur_index, max_items - ret, &next_index, tag); ret += nr_found; if (next_index == 0) break; cur_index = next_index; } + return ret; } EXPORT_SYMBOL(radix_tree_gang_lookup_tag); @@ -742,8 +847,19 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) root->rnode->count == 1 && root->rnode->slots[0]) { struct radix_tree_node *to_free = root->rnode; + void *newptr; - root->rnode = to_free->slots[0]; + /* + * We don't need rcu_assign_pointer(), since we are simply + * moving the node from one part of the tree to another. If + * it was safe to dereference the old pointer to it + * (to_free->slots[0]), it will be safe to dereference the new + * one (root->rnode). + */ + newptr = to_free->slots[0]; + if (root->height == 1) + newptr = radix_tree_ptr_to_direct(newptr); + root->rnode = newptr; root->height--; /* must only free zeroed nodes into the slab */ tag_clear(to_free, 0, 0); @@ -767,6 +883,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; struct radix_tree_node *slot = NULL; + struct radix_tree_node *to_free; unsigned int height, shift; int tag; int offset; @@ -777,6 +894,7 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) slot = root->rnode; if (height == 0 && root->rnode) { + slot = radix_tree_direct_to_ptr(slot); root_tag_clear_all(root); root->rnode = NULL; goto out; @@ -809,10 +927,17 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) radix_tree_tag_clear(root, index, tag); } + to_free = NULL; /* Now free the nodes we do not need anymore */ while (pathp->node) { pathp->node->slots[pathp->offset] = NULL; pathp->node->count--; + /* + * Queue the node for deferred freeing after the + * last reference to it disappears (set NULL, above). + */ + if (to_free) + radix_tree_node_free(to_free); if (pathp->node->count) { if (pathp->node == root->rnode) @@ -821,13 +946,15 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) } /* Node with zero slots in use so free it */ - radix_tree_node_free(pathp->node); - + to_free = pathp->node; pathp--; + } root_tag_clear_all(root); root->height = 0; root->rnode = NULL; + if (to_free) + radix_tree_node_free(to_free); out: return slot; diff --git a/mm/migrate.c b/mm/migrate.c index b4979d423d2..e9b161bde95 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -294,7 +294,7 @@ out: static int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page) { - struct page **radix_pointer; + void **pslot; if (!mapping) { /* Anonymous page */ @@ -305,12 +305,11 @@ static int migrate_page_move_mapping(struct address_space *mapping, write_lock_irq(&mapping->tree_lock); - radix_pointer = (struct page **)radix_tree_lookup_slot( - &mapping->page_tree, - page_index(page)); + pslot = radix_tree_lookup_slot(&mapping->page_tree, + page_index(page)); if (page_count(page) != 2 + !!PagePrivate(page) || - *radix_pointer != page) { + (struct page *)radix_tree_deref_slot(pslot) != page) { write_unlock_irq(&mapping->tree_lock); return -EAGAIN; } @@ -318,7 +317,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, /* * Now we know that no one else is looking at the page. */ - get_page(newpage); + get_page(newpage); /* add cache reference */ #ifdef CONFIG_SWAP if (PageSwapCache(page)) { SetPageSwapCache(newpage); @@ -326,8 +325,14 @@ static int migrate_page_move_mapping(struct address_space *mapping, } #endif - *radix_pointer = newpage; + radix_tree_replace_slot(pslot, newpage); + + /* + * Drop cache reference from old page. + * We know this isn't the last reference. + */ __put_page(page); + write_unlock_irq(&mapping->tree_lock); return 0; -- cgit v1.2.3-70-g09d2 From 915bae9ebe41e52d71ad8b06d50e4ab26189f964 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Dec 2006 20:34:07 -0800 Subject: [PATCH] swsusp: use partition device and offset to identify swap areas The Linux kernel handles swap files almost in the same way as it handles swap partitions and there are only two differences between these two types of swap areas: (1) swap files need not be contiguous, (2) the header of a swap file is not in the first block of the partition that holds it. From the swsusp's point of view (1) is not a problem, because it is already taken care of by the swap-handling code, but (2) has to be taken into consideration. In principle the location of a swap file's header may be determined with the help of appropriate filesystem driver. Unfortunately, however, it requires the filesystem holding the swap file to be mounted, and if this filesystem is journaled, it cannot be mounted during a resume from disk. For this reason we need some other means by which swap areas can be identified. For example, to identify a swap area we can use the partition that holds the area and the offset from the beginning of this partition at which the swap header is located. The following patch allows swsusp to identify swap areas this way. It changes swap_type_of() so that it takes an additional argument representing an offset of the swap header within the partition represented by its first argument. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- kernel/power/swap.c | 2 +- kernel/power/user.c | 5 +++-- mm/swapfile.c | 38 ++++++++++++++++++++++++++------------ 4 files changed, 31 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 89f8a39773b..d51e35e4e16 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -247,7 +247,7 @@ extern int swap_duplicate(swp_entry_t); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern void swap_free(swp_entry_t); extern void free_swap_and_cache(swp_entry_t); -extern int swap_type_of(dev_t); +extern int swap_type_of(dev_t, sector_t); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 1a3b0dd2c3f..7b10da16389 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -74,7 +74,7 @@ static int mark_swapfiles(swp_entry_t start) static int swsusp_swap_check(void) /* This is called before saving image */ { - int res = swap_type_of(swsusp_resume_device); + int res = swap_type_of(swsusp_resume_device, 0); if (res >= 0) { root_swap = res; diff --git a/kernel/power/user.c b/kernel/power/user.c index 4c24ca5d62e..a327b18a5ff 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -55,7 +55,8 @@ static int snapshot_open(struct inode *inode, struct file *filp) filp->private_data = data; memset(&data->handle, 0, sizeof(struct snapshot_handle)); if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { - data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device) : -1; + data->swap = swsusp_resume_device ? + swap_type_of(swsusp_resume_device, 0) : -1; data->mode = O_RDONLY; } else { data->swap = -1; @@ -265,7 +266,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, * so we need to recode them */ if (old_decode_dev(arg)) { - data->swap = swap_type_of(old_decode_dev(arg)); + data->swap = swap_type_of(old_decode_dev(arg), 0); if (data->swap < 0) error = -ENODEV; } else { diff --git a/mm/swapfile.c b/mm/swapfile.c index f315131db00..2bfacbac0f4 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -427,34 +427,48 @@ void free_swap_and_cache(swp_entry_t entry) #ifdef CONFIG_SOFTWARE_SUSPEND /* - * Find the swap type that corresponds to given device (if any) + * Find the swap type that corresponds to given device (if any). * - * This is needed for software suspend and is done in such a way that inode - * aliasing is allowed. + * @offset - number of the PAGE_SIZE-sized block of the device, starting + * from 0, in which the swap header is expected to be located. + * + * This is needed for the suspend to disk (aka swsusp). */ -int swap_type_of(dev_t device) +int swap_type_of(dev_t device, sector_t offset) { + struct block_device *bdev = NULL; int i; + if (device) + bdev = bdget(device); + spin_lock(&swap_lock); for (i = 0; i < nr_swapfiles; i++) { - struct inode *inode; + struct swap_info_struct *sis = swap_info + i; - if (!(swap_info[i].flags & SWP_WRITEOK)) + if (!(sis->flags & SWP_WRITEOK)) continue; - if (!device) { + if (!bdev) { spin_unlock(&swap_lock); return i; } - inode = swap_info[i].swap_file->f_dentry->d_inode; - if (S_ISBLK(inode->i_mode) && - device == MKDEV(imajor(inode), iminor(inode))) { - spin_unlock(&swap_lock); - return i; + if (bdev == sis->bdev) { + struct swap_extent *se; + + se = list_entry(sis->extent_list.next, + struct swap_extent, list); + if (se->start_block == offset) { + spin_unlock(&swap_lock); + bdput(bdev); + return i; + } } } spin_unlock(&swap_lock); + if (bdev) + bdput(bdev); + return -ENODEV; } -- cgit v1.2.3-70-g09d2 From 3aef83e0ef1ffb8ea3bea97be46821a45c952173 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Dec 2006 20:34:10 -0800 Subject: [PATCH] swsusp: use block device offsets to identify swap locations Make swsusp use block device offsets instead of swap offsets to identify swap locations and make it use the same code paths for writing as well as for reading data. This allows us to use the same code for handling swap files and swap partitions and to simplify the code, eg. by dropping rw_swap_page_sync(). Signed-off-by: Rafael J. Wysocki Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 +- kernel/power/power.h | 2 +- kernel/power/swap.c | 133 ++++++++++++++++++++++++++------------------------ kernel/power/swsusp.c | 10 ++-- kernel/power/user.c | 7 +-- mm/page_io.c | 45 ----------------- mm/swapfile.c | 17 +++++++ 7 files changed, 98 insertions(+), 119 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index d51e35e4e16..add51cebc8d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -218,8 +218,6 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); /* linux/mm/page_io.c */ extern int swap_readpage(struct file *, struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, - struct bio **bio_chain); extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err); /* linux/mm/swap_state.c */ @@ -250,6 +248,7 @@ extern void free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t); extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); +extern sector_t swapdev_block(int, pgoff_t); extern struct swap_info_struct *get_swap_info_struct(unsigned); extern int can_share_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *); diff --git a/kernel/power/power.h b/kernel/power/power.h index 87ecb1856ee..210ebba2602 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -146,7 +146,7 @@ struct bitmap_page { extern void free_bitmap(struct bitmap_page *bitmap); extern struct bitmap_page *alloc_bitmap(unsigned int nr_bits); -extern unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap); +extern sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap); extern void free_all_swap_pages(int swap, struct bitmap_page *bitmap); extern int swsusp_check(void); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 7768c2ba755..1b08f46bbb7 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -34,8 +34,8 @@ extern char resume_file[]; #define SWSUSP_SIG "S1SUSPEND" static struct swsusp_header { - char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)]; - swp_entry_t image; + char reserved[PAGE_SIZE - 20 - sizeof(sector_t)]; + sector_t image; char orig_sig[10]; char sig[10]; } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header; @@ -100,9 +100,9 @@ static int bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) return submit(READ, page_off, virt_to_page(addr), bio_chain); } -static int bio_write_page(pgoff_t page_off, void *addr) +static int bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) { - return submit(WRITE, page_off, virt_to_page(addr), NULL); + return submit(WRITE, page_off, virt_to_page(addr), bio_chain); } static int wait_on_bio_chain(struct bio **bio_chain) @@ -157,22 +157,19 @@ static void show_speed(struct timeval *start, struct timeval *stop, * Saving part */ -static int mark_swapfiles(swp_entry_t start) +static int mark_swapfiles(sector_t start) { int error; - rw_swap_page_sync(READ, swp_entry(root_swap, 0), - virt_to_page((unsigned long)&swsusp_header), NULL); + bio_read_page(0, &swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) || !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) { memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10); memcpy(swsusp_header.sig,SWSUSP_SIG, 10); swsusp_header.image = start; - error = rw_swap_page_sync(WRITE, swp_entry(root_swap, 0), - virt_to_page((unsigned long)&swsusp_header), - NULL); + error = bio_write_page(0, &swsusp_header, NULL); } else { - pr_debug("swsusp: Partition is not swap space.\n"); + printk(KERN_ERR "swsusp: Swap header not found!\n"); error = -ENODEV; } return error; @@ -185,12 +182,21 @@ static int mark_swapfiles(swp_entry_t start) static int swsusp_swap_check(void) /* This is called before saving image */ { - int res = swap_type_of(swsusp_resume_device, 0); + int res; + + res = swap_type_of(swsusp_resume_device, 0); + if (res < 0) + return res; + + root_swap = res; + resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_WRITE); + if (IS_ERR(resume_bdev)) + return PTR_ERR(resume_bdev); + + res = set_blocksize(resume_bdev, PAGE_SIZE); + if (res < 0) + blkdev_put(resume_bdev); - if (res >= 0) { - root_swap = res; - return 0; - } return res; } @@ -201,36 +207,26 @@ static int swsusp_swap_check(void) /* This is called before saving image */ * @bio_chain: Link the next write BIO here */ -static int write_page(void *buf, unsigned long offset, struct bio **bio_chain) +static int write_page(void *buf, sector_t offset, struct bio **bio_chain) { - swp_entry_t entry; - int error = -ENOSPC; - - if (offset) { - struct page *page = virt_to_page(buf); - - if (bio_chain) { - /* - * Whether or not we successfully allocated a copy page, - * we take a ref on the page here. It gets undone in - * wait_on_bio_chain(). - */ - struct page *page_copy; - page_copy = alloc_page(GFP_ATOMIC); - if (page_copy == NULL) { - WARN_ON_ONCE(1); - bio_chain = NULL; /* Go synchronous */ - get_page(page); - } else { - memcpy(page_address(page_copy), - page_address(page), PAGE_SIZE); - page = page_copy; - } + void *src; + + if (!offset) + return -ENOSPC; + + if (bio_chain) { + src = (void *)__get_free_page(GFP_ATOMIC); + if (src) { + memcpy(src, buf, PAGE_SIZE); + } else { + WARN_ON_ONCE(1); + bio_chain = NULL; /* Go synchronous */ + src = buf; } - entry = swp_entry(root_swap, offset); - error = rw_swap_page_sync(WRITE, entry, page, bio_chain); + } else { + src = buf; } - return error; + return bio_write_page(offset, src, bio_chain); } /* @@ -248,11 +244,11 @@ static int write_page(void *buf, unsigned long offset, struct bio **bio_chain) * at a time. */ -#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(long) - 1) +#define MAP_PAGE_ENTRIES (PAGE_SIZE / sizeof(sector_t) - 1) struct swap_map_page { - unsigned long entries[MAP_PAGE_ENTRIES]; - unsigned long next_swap; + sector_t entries[MAP_PAGE_ENTRIES]; + sector_t next_swap; }; /** @@ -262,7 +258,7 @@ struct swap_map_page { struct swap_map_handle { struct swap_map_page *cur; - unsigned long cur_swap; + sector_t cur_swap; struct bitmap_page *bitmap; unsigned int k; }; @@ -287,7 +283,7 @@ static int get_swap_writer(struct swap_map_handle *handle) release_swap_writer(handle); return -ENOMEM; } - handle->cur_swap = alloc_swap_page(root_swap, handle->bitmap); + handle->cur_swap = alloc_swapdev_block(root_swap, handle->bitmap); if (!handle->cur_swap) { release_swap_writer(handle); return -ENOSPC; @@ -300,11 +296,11 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, struct bio **bio_chain) { int error = 0; - unsigned long offset; + sector_t offset; if (!handle->cur) return -EINVAL; - offset = alloc_swap_page(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap, handle->bitmap); error = write_page(buf, offset, bio_chain); if (error) return error; @@ -313,7 +309,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, error = wait_on_bio_chain(bio_chain); if (error) goto out; - offset = alloc_swap_page(root_swap, handle->bitmap); + offset = alloc_swapdev_block(root_swap, handle->bitmap); if (!offset) return -ENOSPC; handle->cur->next_swap = offset; @@ -413,37 +409,47 @@ int swsusp_write(void) struct swsusp_info *header; int error; - if ((error = swsusp_swap_check())) { + error = swsusp_swap_check(); + if (error) { printk(KERN_ERR "swsusp: Cannot find swap device, try " "swapon -a.\n"); return error; } memset(&snapshot, 0, sizeof(struct snapshot_handle)); error = snapshot_read_next(&snapshot, PAGE_SIZE); - if (error < PAGE_SIZE) - return error < 0 ? error : -EFAULT; + if (error < PAGE_SIZE) { + if (error >= 0) + error = -EFAULT; + + goto out; + } header = (struct swsusp_info *)data_of(snapshot); if (!enough_swap(header->pages)) { printk(KERN_ERR "swsusp: Not enough free swap\n"); - return -ENOSPC; + error = -ENOSPC; + goto out; } error = get_swap_writer(&handle); if (!error) { - unsigned long start = handle.cur_swap; + sector_t start = handle.cur_swap; + error = swap_write_page(&handle, header, NULL); if (!error) error = save_image(&handle, &snapshot, header->pages - 1); + if (!error) { flush_swap_writer(&handle); printk("S"); - error = mark_swapfiles(swp_entry(root_swap, start)); + error = mark_swapfiles(start); printk("|\n"); } } if (error) free_all_swap_pages(root_swap, handle.bitmap); release_swap_writer(&handle); +out: + swsusp_close(); return error; } @@ -459,17 +465,18 @@ static void release_swap_reader(struct swap_map_handle *handle) handle->cur = NULL; } -static int get_swap_reader(struct swap_map_handle *handle, - swp_entry_t start) +static int get_swap_reader(struct swap_map_handle *handle, sector_t start) { int error; - if (!swp_offset(start)) + if (!start) return -EINVAL; + handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC); if (!handle->cur) return -ENOMEM; - error = bio_read_page(swp_offset(start), handle->cur, NULL); + + error = bio_read_page(start, handle->cur, NULL); if (error) { release_swap_reader(handle); return error; @@ -481,7 +488,7 @@ static int get_swap_reader(struct swap_map_handle *handle, static int swap_read_page(struct swap_map_handle *handle, void *buf, struct bio **bio_chain) { - unsigned long offset; + sector_t offset; int error; if (!handle->cur) @@ -608,7 +615,7 @@ int swsusp_check(void) if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) { memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10); /* Reset swap signature now */ - error = bio_write_page(0, &swsusp_header); + error = bio_write_page(0, &swsusp_header, NULL); } else { return -EINVAL; } diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 0b66659dc51..4147a756a8c 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -134,18 +134,18 @@ static int bitmap_set(struct bitmap_page *bitmap, unsigned long bit) return 0; } -unsigned long alloc_swap_page(int swap, struct bitmap_page *bitmap) +sector_t alloc_swapdev_block(int swap, struct bitmap_page *bitmap) { unsigned long offset; offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { - if (bitmap_set(bitmap, offset)) { + if (bitmap_set(bitmap, offset)) swap_free(swp_entry(swap, offset)); - offset = 0; - } + else + return swapdev_block(swap, offset); } - return offset; + return 0; } void free_all_swap_pages(int swap, struct bitmap_page *bitmap) diff --git a/kernel/power/user.c b/kernel/power/user.c index a327b18a5ff..f0b7ef8bdd7 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -126,7 +126,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, { int error = 0; struct snapshot_data *data; - loff_t offset, avail; + loff_t avail; + sector_t offset; if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC) return -ENOTTY; @@ -240,10 +241,10 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; } } - offset = alloc_swap_page(data->swap, data->bitmap); + offset = alloc_swapdev_block(data->swap, data->bitmap); if (offset) { offset <<= PAGE_SHIFT; - error = put_user(offset, (loff_t __user *)arg); + error = put_user(offset, (sector_t __user *)arg); } else { error = -ENOSPC; } diff --git a/mm/page_io.c b/mm/page_io.c index d4840ecbf8f..dbffec0d78c 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -147,48 +147,3 @@ int swap_readpage(struct file *file, struct page *page) out: return ret; } - -#ifdef CONFIG_SOFTWARE_SUSPEND -/* - * A scruffy utility function to read or write an arbitrary swap page - * and wait on the I/O. The caller must have a ref on the page. - * - * We use end_swap_bio_read() even for writes, because it happens to do what - * we want. - */ -int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, - struct bio **bio_chain) -{ - struct bio *bio; - int ret = 0; - int bio_rw; - - lock_page(page); - - bio = get_swap_bio(GFP_KERNEL, entry.val, page, end_swap_bio_read); - if (bio == NULL) { - unlock_page(page); - ret = -ENOMEM; - goto out; - } - - bio_rw = rw; - if (!bio_chain) - bio_rw |= (1 << BIO_RW_SYNC); - if (bio_chain) - bio_get(bio); - submit_bio(bio_rw, bio); - if (bio_chain == NULL) { - wait_on_page_locked(page); - - if (!PageUptodate(page) || PageError(page)) - ret = -EIO; - } - if (bio_chain) { - bio->bi_private = *bio_chain; - *bio_chain = bio; - } -out: - return ret; -} -#endif diff --git a/mm/swapfile.c b/mm/swapfile.c index 2bfacbac0f4..55242363de6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -945,6 +945,23 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset) } } +#ifdef CONFIG_SOFTWARE_SUSPEND +/* + * Get the (PAGE_SIZE) block corresponding to given offset on the swapdev + * corresponding to given index in swap_info (swap type). + */ +sector_t swapdev_block(int swap_type, pgoff_t offset) +{ + struct swap_info_struct *sis; + + if (swap_type >= nr_swapfiles) + return 0; + + sis = swap_info + swap_type; + return (sis->flags & SWP_WRITEOK) ? map_swap_page(sis, offset) : 0; +} +#endif /* CONFIG_SOFTWARE_SUSPEND */ + /* * Free all of a swapdev's extent information */ -- cgit v1.2.3-70-g09d2 From 8357376d3df21b7d6f857931a57ac50da9c66e26 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Dec 2006 20:34:18 -0800 Subject: [PATCH] swsusp: Improve handling of highmem Currently swsusp saves the contents of highmem pages by copying them to the normal zone which is quite inefficient (eg. it requires two normal pages to be used for saving one highmem page). This may be improved by using highmem for saving the contents of saveable highmem pages. Namely, during the suspend phase of the suspend-resume cycle we try to allocate as many free highmem pages as there are saveable highmem pages. If there are not enough highmem image pages to store the contents of all of the saveable highmem pages, some of them will be stored in the "normal" memory. Next, we allocate as many free "normal" pages as needed to store the (remaining) image data. We use a memory bitmap to mark the allocated free pages (ie. highmem as well as "normal" image pages). Now, we use another memory bitmap to mark all of the saveable pages (highmem as well as "normal") and the contents of the saveable pages are copied into the image pages. Then, the second bitmap is used to save the pfns corresponding to the saveable pages and the first one is used to save their data. During the resume phase the pfns of the pages that were saveable during the suspend are loaded from the image and used to mark the "unsafe" page frames. Next, we try to allocate as many free highmem page frames as to load all of the image data that had been in the highmem before the suspend and we allocate so many free "normal" page frames that the total number of allocated free pages (highmem and "normal") is equal to the size of the image. While doing this we have to make sure that there will be some extra free "normal" and "safe" page frames for two lists of PBEs constructed later. Now, the image data are loaded, if possible, into their "original" page frames. The image data that cannot be written into their "original" page frames are loaded into "safe" page frames and their "original" kernel virtual addresses, as well as the addresses of the "safe" pages containing their copies, are stored in one of two lists of PBEs. One list of PBEs is for the copies of "normal" suspend pages (ie. "normal" pages that were saveable during the suspend) and it is used in the same way as previously (ie. by the architecture-dependent parts of swsusp). The other list of PBEs is for the copies of highmem suspend pages. The pages in this list are restored (in a reversible way) right before the arch-dependent code is called. Signed-off-by: Rafael J. Wysocki Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/suspend.h | 9 +- kernel/power/power.h | 2 +- kernel/power/snapshot.c | 851 ++++++++++++++++++++++++++++++++++++------------ kernel/power/swap.c | 2 +- kernel/power/swsusp.c | 53 +-- kernel/power/user.c | 2 +- mm/vmscan.c | 3 + 7 files changed, 680 insertions(+), 242 deletions(-) (limited to 'include') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1237f16ecd..bf99bd49f8e 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -9,10 +9,13 @@ #include #include -/* page backup entry */ +/* struct pbe is used for creating lists of pages that should be restored + * atomically during the resume from disk, because the page frames they have + * occupied before the suspend are in use. + */ struct pbe { - unsigned long address; /* address of the copy */ - unsigned long orig_address; /* original address of page */ + void *address; /* address of the copy */ + void *orig_address; /* original address of a page */ struct pbe *next; }; diff --git a/kernel/power/power.h b/kernel/power/power.h index 7dbfd9f67e1..3763343bde2 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -103,8 +103,8 @@ struct snapshot_handle { extern unsigned int snapshot_additional_pages(struct zone *zone); extern int snapshot_read_next(struct snapshot_handle *handle, size_t count); extern int snapshot_write_next(struct snapshot_handle *handle, size_t count); +extern void snapshot_write_finalize(struct snapshot_handle *handle); extern int snapshot_image_loaded(struct snapshot_handle *handle); -extern void snapshot_free_unused_memory(struct snapshot_handle *handle); /* * This structure is used to pass the values needed for the identification diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 99f9b7d177d..fd8251d40eb 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1,15 +1,15 @@ /* * linux/kernel/power/snapshot.c * - * This file provide system snapshot/restore functionality. + * This file provides system snapshot/restore functionality for swsusp. * * Copyright (C) 1998-2005 Pavel Machek + * Copyright (C) 2006 Rafael J. Wysocki * - * This file is released under the GPLv2, and is based on swsusp.c. + * This file is released under the GPLv2. * */ - #include #include #include @@ -34,137 +34,24 @@ #include "power.h" -/* List of PBEs used for creating and restoring the suspend image */ +/* List of PBEs needed for restoring the pages that were allocated before + * the suspend and included in the suspend image, but have also been + * allocated by the "resume" kernel, so their contents cannot be written + * directly to their "original" page frames. + */ struct pbe *restore_pblist; -static unsigned int nr_copy_pages; -static unsigned int nr_meta_pages; +/* Pointer to an auxiliary buffer (1 page) */ static void *buffer; -#ifdef CONFIG_HIGHMEM -unsigned int count_highmem_pages(void) -{ - struct zone *zone; - unsigned long zone_pfn; - unsigned int n = 0; - - for_each_zone (zone) - if (is_highmem(zone)) { - mark_free_pages(zone); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) { - struct page *page; - unsigned long pfn = zone_pfn + zone->zone_start_pfn; - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - if (PageReserved(page)) - continue; - if (PageNosaveFree(page)) - continue; - n++; - } - } - return n; -} - -struct highmem_page { - char *data; - struct page *page; - struct highmem_page *next; -}; - -static struct highmem_page *highmem_copy; - -static int save_highmem_zone(struct zone *zone) -{ - unsigned long zone_pfn; - mark_free_pages(zone); - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) { - struct page *page; - struct highmem_page *save; - void *kaddr; - unsigned long pfn = zone_pfn + zone->zone_start_pfn; - - if (!(pfn%10000)) - printk("."); - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - /* - * This condition results from rvmalloc() sans vmalloc_32() - * and architectural memory reservations. This should be - * corrected eventually when the cases giving rise to this - * are better understood. - */ - if (PageReserved(page)) - continue; - BUG_ON(PageNosave(page)); - if (PageNosaveFree(page)) - continue; - save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC); - if (!save) - return -ENOMEM; - save->next = highmem_copy; - save->page = page; - save->data = (void *) get_zeroed_page(GFP_ATOMIC); - if (!save->data) { - kfree(save); - return -ENOMEM; - } - kaddr = kmap_atomic(page, KM_USER0); - memcpy(save->data, kaddr, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - highmem_copy = save; - } - return 0; -} - -int save_highmem(void) -{ - struct zone *zone; - int res = 0; - - pr_debug("swsusp: Saving Highmem"); - drain_local_pages(); - for_each_zone (zone) { - if (is_highmem(zone)) - res = save_highmem_zone(zone); - if (res) - return res; - } - printk("\n"); - return 0; -} - -int restore_highmem(void) -{ - printk("swsusp: Restoring Highmem\n"); - while (highmem_copy) { - struct highmem_page *save = highmem_copy; - void *kaddr; - highmem_copy = save->next; - - kaddr = kmap_atomic(save->page, KM_USER0); - memcpy(kaddr, save->data, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - free_page((long) save->data); - kfree(save); - } - return 0; -} -#else -static inline unsigned int count_highmem_pages(void) {return 0;} -static inline int save_highmem(void) {return 0;} -static inline int restore_highmem(void) {return 0;} -#endif - /** * @safe_needed - on resume, for storing the PBE list and the image, * we can only use memory pages that do not conflict with the pages - * used before suspend. + * used before suspend. The unsafe pages have PageNosaveFree set + * and we count them using unsafe_pages. * - * The unsafe pages are marked with the PG_nosave_free flag - * and we count them using unsafe_pages + * Each allocated image page is marked as PageNosave and PageNosaveFree + * so that swsusp_free() can release it. */ #define PG_ANY 0 @@ -174,7 +61,7 @@ static inline int restore_highmem(void) {return 0;} static unsigned int allocated_unsafe_pages; -static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) +static void *get_image_page(gfp_t gfp_mask, int safe_needed) { void *res; @@ -195,20 +82,38 @@ static void *alloc_image_page(gfp_t gfp_mask, int safe_needed) unsigned long get_safe_page(gfp_t gfp_mask) { - return (unsigned long)alloc_image_page(gfp_mask, PG_SAFE); + return (unsigned long)get_image_page(gfp_mask, PG_SAFE); +} + +static struct page *alloc_image_page(gfp_t gfp_mask) { + struct page *page; + + page = alloc_page(gfp_mask); + if (page) { + SetPageNosave(page); + SetPageNosaveFree(page); + } + return page; } /** * free_image_page - free page represented by @addr, allocated with - * alloc_image_page (page flags set by it must be cleared) + * get_image_page (page flags set by it must be cleared) */ static inline void free_image_page(void *addr, int clear_nosave_free) { - ClearPageNosave(virt_to_page(addr)); + struct page *page; + + BUG_ON(!virt_addr_valid(addr)); + + page = virt_to_page(addr); + + ClearPageNosave(page); if (clear_nosave_free) - ClearPageNosaveFree(virt_to_page(addr)); - free_page((unsigned long)addr); + ClearPageNosaveFree(page); + + __free_page(page); } /* struct linked_page is used to build chains of pages */ @@ -269,7 +174,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { struct linked_page *lp; - lp = alloc_image_page(ca->gfp_mask, ca->safe_needed); + lp = get_image_page(ca->gfp_mask, ca->safe_needed); if (!lp) return NULL; @@ -446,8 +351,8 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) /* Compute the number of zones */ nr = 0; - for_each_zone (zone) - if (populated_zone(zone) && !is_highmem(zone)) + for_each_zone(zone) + if (populated_zone(zone)) nr++; /* Allocate the list of zones bitmap objects */ @@ -459,10 +364,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) } /* Initialize the zone bitmap objects */ - for_each_zone (zone) { + for_each_zone(zone) { unsigned long pfn; - if (!populated_zone(zone) || is_highmem(zone)) + if (!populated_zone(zone)) continue; zone_bm->start_pfn = zone->zone_start_pfn; @@ -481,7 +386,7 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) while (bb) { unsigned long *ptr; - ptr = alloc_image_page(gfp_mask, safe_needed); + ptr = get_image_page(gfp_mask, safe_needed); bb->data = ptr; if (!ptr) goto Free; @@ -669,9 +574,81 @@ unsigned int snapshot_additional_pages(struct zone *zone) res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); - return res; + return 2 * res; } +#ifdef CONFIG_HIGHMEM +/** + * count_free_highmem_pages - compute the total number of free highmem + * pages, system-wide. + */ + +static unsigned int count_free_highmem_pages(void) +{ + struct zone *zone; + unsigned int cnt = 0; + + for_each_zone(zone) + if (populated_zone(zone) && is_highmem(zone)) + cnt += zone->free_pages; + + return cnt; +} + +/** + * saveable_highmem_page - Determine whether a highmem page should be + * included in the suspend image. + * + * We should save the page if it isn't Nosave or NosaveFree, or Reserved, + * and it isn't a part of a free chunk of pages. + */ + +static struct page *saveable_highmem_page(unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid(pfn)) + return NULL; + + page = pfn_to_page(pfn); + + BUG_ON(!PageHighMem(page)); + + if (PageNosave(page) || PageReserved(page) || PageNosaveFree(page)) + return NULL; + + return page; +} + +/** + * count_highmem_pages - compute the total number of saveable highmem + * pages. + */ + +unsigned int count_highmem_pages(void) +{ + struct zone *zone; + unsigned int n = 0; + + for_each_zone(zone) { + unsigned long pfn, max_zone_pfn; + + if (!is_highmem(zone)) + continue; + + mark_free_pages(zone); + max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (saveable_highmem_page(pfn)) + n++; + } + return n; +} +#else +static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; } +static inline unsigned int count_highmem_pages(void) { return 0; } +#endif /* CONFIG_HIGHMEM */ + /** * pfn_is_nosave - check if given pfn is in the 'nosave' section */ @@ -684,12 +661,12 @@ static inline int pfn_is_nosave(unsigned long pfn) } /** - * saveable - Determine whether a page should be cloned or not. - * @pfn: The page + * saveable - Determine whether a non-highmem page should be included in + * the suspend image. * - * We save a page if it isn't Nosave, and is not in the range of pages - * statically defined as 'unsaveable', and it - * isn't a part of a free chunk of pages. + * We should save the page if it isn't Nosave, and is not in the range + * of pages statically defined as 'unsaveable', and it isn't a part of + * a free chunk of pages. */ static struct page *saveable_page(unsigned long pfn) @@ -701,76 +678,130 @@ static struct page *saveable_page(unsigned long pfn) page = pfn_to_page(pfn); - if (PageNosave(page)) + BUG_ON(PageHighMem(page)); + + if (PageNosave(page) || PageNosaveFree(page)) return NULL; + if (PageReserved(page) && pfn_is_nosave(pfn)) return NULL; - if (PageNosaveFree(page)) - return NULL; return page; } +/** + * count_data_pages - compute the total number of saveable non-highmem + * pages. + */ + unsigned int count_data_pages(void) { struct zone *zone; unsigned long pfn, max_zone_pfn; unsigned int n = 0; - for_each_zone (zone) { + for_each_zone(zone) { if (is_highmem(zone)) continue; + mark_free_pages(zone); max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) - n += !!saveable_page(pfn); + if(saveable_page(pfn)) + n++; } return n; } -static inline void copy_data_page(long *dst, long *src) +/* This is needed, because copy_page and memcpy are not usable for copying + * task structs. + */ +static inline void do_copy_page(long *dst, long *src) { int n; - /* copy_page and memcpy are not usable for copying task structs. */ for (n = PAGE_SIZE / sizeof(long); n; n--) *dst++ = *src++; } +#ifdef CONFIG_HIGHMEM +static inline struct page * +page_is_saveable(struct zone *zone, unsigned long pfn) +{ + return is_highmem(zone) ? + saveable_highmem_page(pfn) : saveable_page(pfn); +} + +static inline void +copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +{ + struct page *s_page, *d_page; + void *src, *dst; + + s_page = pfn_to_page(src_pfn); + d_page = pfn_to_page(dst_pfn); + if (PageHighMem(s_page)) { + src = kmap_atomic(s_page, KM_USER0); + dst = kmap_atomic(d_page, KM_USER1); + do_copy_page(dst, src); + kunmap_atomic(src, KM_USER0); + kunmap_atomic(dst, KM_USER1); + } else { + src = page_address(s_page); + if (PageHighMem(d_page)) { + /* Page pointed to by src may contain some kernel + * data modified by kmap_atomic() + */ + do_copy_page(buffer, src); + dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0); + memcpy(dst, buffer, PAGE_SIZE); + kunmap_atomic(dst, KM_USER0); + } else { + dst = page_address(d_page); + do_copy_page(dst, src); + } + } +} +#else +#define page_is_saveable(zone, pfn) saveable_page(pfn) + +static inline void +copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) +{ + do_copy_page(page_address(pfn_to_page(dst_pfn)), + page_address(pfn_to_page(src_pfn))); +} +#endif /* CONFIG_HIGHMEM */ + static void copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) { struct zone *zone; unsigned long pfn; - for_each_zone (zone) { + for_each_zone(zone) { unsigned long max_zone_pfn; - if (is_highmem(zone)) - continue; - mark_free_pages(zone); max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) - if (saveable_page(pfn)) + if (page_is_saveable(zone, pfn)) memory_bm_set_bit(orig_bm, pfn); } memory_bm_position_reset(orig_bm); memory_bm_position_reset(copy_bm); do { pfn = memory_bm_next_pfn(orig_bm); - if (likely(pfn != BM_END_OF_MAP)) { - struct page *page; - void *src; - - page = pfn_to_page(pfn); - src = page_address(page); - page = pfn_to_page(memory_bm_next_pfn(copy_bm)); - copy_data_page(page_address(page), src); - } + if (likely(pfn != BM_END_OF_MAP)) + copy_data_page(memory_bm_next_pfn(copy_bm), pfn); } while (pfn != BM_END_OF_MAP); } +/* Total number of image pages */ +static unsigned int nr_copy_pages; +/* Number of pages needed for saving the original pfns of the image pages */ +static unsigned int nr_meta_pages; + /** * swsusp_free - free pages allocated for the suspend. * @@ -792,7 +823,7 @@ void swsusp_free(void) if (PageNosave(page) && PageNosaveFree(page)) { ClearPageNosave(page); ClearPageNosaveFree(page); - free_page((long) page_address(page)); + __free_page(page); } } } @@ -802,34 +833,108 @@ void swsusp_free(void) buffer = NULL; } +#ifdef CONFIG_HIGHMEM +/** + * count_pages_for_highmem - compute the number of non-highmem pages + * that will be necessary for creating copies of highmem pages. + */ + +static unsigned int count_pages_for_highmem(unsigned int nr_highmem) +{ + unsigned int free_highmem = count_free_highmem_pages(); + + if (free_highmem >= nr_highmem) + nr_highmem = 0; + else + nr_highmem -= free_highmem; + + return nr_highmem; +} +#else +static unsigned int +count_pages_for_highmem(unsigned int nr_highmem) { return 0; } +#endif /* CONFIG_HIGHMEM */ /** - * enough_free_mem - Make sure we enough free memory to snapshot. - * - * Returns TRUE or FALSE after checking the number of available - * free pages. + * enough_free_mem - Make sure we have enough free memory for the + * snapshot image. */ -static int enough_free_mem(unsigned int nr_pages) +static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) { struct zone *zone; unsigned int free = 0, meta = 0; - for_each_zone (zone) - if (!is_highmem(zone)) { + for_each_zone(zone) { + meta += snapshot_additional_pages(zone); + if (!is_highmem(zone)) free += zone->free_pages; - meta += snapshot_additional_pages(zone); - } + } - pr_debug("swsusp: pages needed: %u + %u + %u, available pages: %u\n", + nr_pages += count_pages_for_highmem(nr_highmem); + pr_debug("swsusp: Normal pages needed: %u + %u + %u, available pages: %u\n", nr_pages, PAGES_FOR_IO, meta, free); return free > nr_pages + PAGES_FOR_IO + meta; } +#ifdef CONFIG_HIGHMEM +/** + * get_highmem_buffer - if there are some highmem pages in the suspend + * image, we may need the buffer to copy them and/or load their data. + */ + +static inline int get_highmem_buffer(int safe_needed) +{ + buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); + return buffer ? 0 : -ENOMEM; +} + +/** + * alloc_highmem_image_pages - allocate some highmem pages for the image. + * Try to allocate as many pages as needed, but if the number of free + * highmem pages is lesser than that, allocate them all. + */ + +static inline unsigned int +alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) +{ + unsigned int to_alloc = count_free_highmem_pages(); + + if (to_alloc > nr_highmem) + to_alloc = nr_highmem; + + nr_highmem -= to_alloc; + while (to_alloc-- > 0) { + struct page *page; + + page = alloc_image_page(__GFP_HIGHMEM); + memory_bm_set_bit(bm, page_to_pfn(page)); + } + return nr_highmem; +} +#else +static inline int get_highmem_buffer(int safe_needed) { return 0; } + +static inline unsigned int +alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } +#endif /* CONFIG_HIGHMEM */ + +/** + * swsusp_alloc - allocate memory for the suspend image + * + * We first try to allocate as many highmem pages as there are + * saveable highmem pages in the system. If that fails, we allocate + * non-highmem pages for the copies of the remaining highmem ones. + * + * In this approach it is likely that the copies of highmem pages will + * also be located in the high memory, because of the way in which + * copy_data_pages() works. + */ + static int swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, - unsigned int nr_pages) + unsigned int nr_pages, unsigned int nr_highmem) { int error; @@ -841,13 +946,19 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, if (error) goto Free; + if (nr_highmem > 0) { + error = get_highmem_buffer(PG_ANY); + if (error) + goto Free; + + nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem); + } while (nr_pages-- > 0) { - struct page *page = alloc_page(GFP_ATOMIC | __GFP_COLD); + struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); + if (!page) goto Free; - SetPageNosave(page); - SetPageNosaveFree(page); memory_bm_set_bit(copy_bm, page_to_pfn(page)); } return 0; @@ -857,30 +968,39 @@ Free: return -ENOMEM; } -/* Memory bitmap used for marking saveable pages */ +/* Memory bitmap used for marking saveable pages (during suspend) or the + * suspend image pages (during resume) + */ static struct memory_bitmap orig_bm; -/* Memory bitmap used for marking allocated pages that will contain the copies - * of saveable pages +/* Memory bitmap used on suspend for marking allocated pages that will contain + * the copies of saveable pages. During resume it is initially used for + * marking the suspend image pages, but then its set bits are duplicated in + * @orig_bm and it is released. Next, on systems with high memory, it may be + * used for marking "safe" highmem pages, but it has to be reinitialized for + * this purpose. */ static struct memory_bitmap copy_bm; asmlinkage int swsusp_save(void) { - unsigned int nr_pages; + unsigned int nr_pages, nr_highmem; - pr_debug("swsusp: critical section: \n"); + printk("swsusp: critical section: \n"); drain_local_pages(); nr_pages = count_data_pages(); - printk("swsusp: Need to copy %u pages\n", nr_pages); + nr_highmem = count_highmem_pages(); + printk("swsusp: Need to copy %u pages\n", nr_pages + nr_highmem); - if (!enough_free_mem(nr_pages)) { + if (!enough_free_mem(nr_pages, nr_highmem)) { printk(KERN_ERR "swsusp: Not enough free memory\n"); return -ENOMEM; } - if (swsusp_alloc(&orig_bm, ©_bm, nr_pages)) + if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { + printk(KERN_ERR "swsusp: Memory allocation failed\n"); return -ENOMEM; + } /* During allocating of suspend pagedir, new cold pages may appear. * Kill them. @@ -894,10 +1014,12 @@ asmlinkage int swsusp_save(void) * touch swap space! Except we must write out our image of course. */ + nr_pages += nr_highmem; nr_copy_pages = nr_pages; - nr_meta_pages = (nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT; + nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages); + return 0; } @@ -960,7 +1082,7 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) if (!buffer) { /* This makes the buffer be freed by swsusp_free() */ - buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); + buffer = get_image_page(GFP_ATOMIC, PG_ANY); if (!buffer) return -ENOMEM; } @@ -975,9 +1097,23 @@ int snapshot_read_next(struct snapshot_handle *handle, size_t count) memset(buffer, 0, PAGE_SIZE); pack_pfns(buffer, &orig_bm); } else { - unsigned long pfn = memory_bm_next_pfn(©_bm); + struct page *page; - handle->buffer = page_address(pfn_to_page(pfn)); + page = pfn_to_page(memory_bm_next_pfn(©_bm)); + if (PageHighMem(page)) { + /* Highmem pages are copied to the buffer, + * because we can't return with a kmapped + * highmem page (we may not be called again). + */ + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + memcpy(buffer, kaddr, PAGE_SIZE); + kunmap_atomic(kaddr, KM_USER0); + handle->buffer = buffer; + } else { + handle->buffer = page_address(page); + } } handle->prev = handle->cur; } @@ -1005,7 +1141,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) unsigned long pfn, max_zone_pfn; /* Clear page flags */ - for_each_zone (zone) { + for_each_zone(zone) { max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) if (pfn_valid(pfn)) @@ -1101,6 +1237,218 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) } } +/* List of "safe" pages that may be used to store data loaded from the suspend + * image + */ +static struct linked_page *safe_pages_list; + +#ifdef CONFIG_HIGHMEM +/* struct highmem_pbe is used for creating the list of highmem pages that + * should be restored atomically during the resume from disk, because the page + * frames they have occupied before the suspend are in use. + */ +struct highmem_pbe { + struct page *copy_page; /* data is here now */ + struct page *orig_page; /* data was here before the suspend */ + struct highmem_pbe *next; +}; + +/* List of highmem PBEs needed for restoring the highmem pages that were + * allocated before the suspend and included in the suspend image, but have + * also been allocated by the "resume" kernel, so their contents cannot be + * written directly to their "original" page frames. + */ +static struct highmem_pbe *highmem_pblist; + +/** + * count_highmem_image_pages - compute the number of highmem pages in the + * suspend image. The bits in the memory bitmap @bm that correspond to the + * image pages are assumed to be set. + */ + +static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) +{ + unsigned long pfn; + unsigned int cnt = 0; + + memory_bm_position_reset(bm); + pfn = memory_bm_next_pfn(bm); + while (pfn != BM_END_OF_MAP) { + if (PageHighMem(pfn_to_page(pfn))) + cnt++; + + pfn = memory_bm_next_pfn(bm); + } + return cnt; +} + +/** + * prepare_highmem_image - try to allocate as many highmem pages as + * there are highmem image pages (@nr_highmem_p points to the variable + * containing the number of highmem image pages). The pages that are + * "safe" (ie. will not be overwritten when the suspend image is + * restored) have the corresponding bits set in @bm (it must be + * unitialized). + * + * NOTE: This function should not be called if there are no highmem + * image pages. + */ + +static unsigned int safe_highmem_pages; + +static struct memory_bitmap *safe_highmem_bm; + +static int +prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +{ + unsigned int to_alloc; + + if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) + return -ENOMEM; + + if (get_highmem_buffer(PG_SAFE)) + return -ENOMEM; + + to_alloc = count_free_highmem_pages(); + if (to_alloc > *nr_highmem_p) + to_alloc = *nr_highmem_p; + else + *nr_highmem_p = to_alloc; + + safe_highmem_pages = 0; + while (to_alloc-- > 0) { + struct page *page; + + page = alloc_page(__GFP_HIGHMEM); + if (!PageNosaveFree(page)) { + /* The page is "safe", set its bit the bitmap */ + memory_bm_set_bit(bm, page_to_pfn(page)); + safe_highmem_pages++; + } + /* Mark the page as allocated */ + SetPageNosave(page); + SetPageNosaveFree(page); + } + memory_bm_position_reset(bm); + safe_highmem_bm = bm; + return 0; +} + +/** + * get_highmem_page_buffer - for given highmem image page find the buffer + * that suspend_write_next() should set for its caller to write to. + * + * If the page is to be saved to its "original" page frame or a copy of + * the page is to be made in the highmem, @buffer is returned. Otherwise, + * the copy of the page is to be made in normal memory, so the address of + * the copy is returned. + * + * If @buffer is returned, the caller of suspend_write_next() will write + * the page's contents to @buffer, so they will have to be copied to the + * right location on the next call to suspend_write_next() and it is done + * with the help of copy_last_highmem_page(). For this purpose, if + * @buffer is returned, @last_highmem page is set to the page to which + * the data will have to be copied from @buffer. + */ + +static struct page *last_highmem_page; + +static void * +get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +{ + struct highmem_pbe *pbe; + void *kaddr; + + if (PageNosave(page) && PageNosaveFree(page)) { + /* We have allocated the "original" page frame and we can + * use it directly to store the loaded page. + */ + last_highmem_page = page; + return buffer; + } + /* The "original" page frame has not been allocated and we have to + * use a "safe" page frame to store the loaded page. + */ + pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); + if (!pbe) { + swsusp_free(); + return NULL; + } + pbe->orig_page = page; + if (safe_highmem_pages > 0) { + struct page *tmp; + + /* Copy of the page will be stored in high memory */ + kaddr = buffer; + tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); + safe_highmem_pages--; + last_highmem_page = tmp; + pbe->copy_page = tmp; + } else { + /* Copy of the page will be stored in normal memory */ + kaddr = safe_pages_list; + safe_pages_list = safe_pages_list->next; + pbe->copy_page = virt_to_page(kaddr); + } + pbe->next = highmem_pblist; + highmem_pblist = pbe; + return kaddr; +} + +/** + * copy_last_highmem_page - copy the contents of a highmem image from + * @buffer, where the caller of snapshot_write_next() has place them, + * to the right location represented by @last_highmem_page . + */ + +static void copy_last_highmem_page(void) +{ + if (last_highmem_page) { + void *dst; + + dst = kmap_atomic(last_highmem_page, KM_USER0); + memcpy(dst, buffer, PAGE_SIZE); + kunmap_atomic(dst, KM_USER0); + last_highmem_page = NULL; + } +} + +static inline int last_highmem_page_copied(void) +{ + return !last_highmem_page; +} + +static inline void free_highmem_data(void) +{ + if (safe_highmem_bm) + memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); + + if (buffer) + free_image_page(buffer, PG_UNSAFE_CLEAR); +} +#else +static inline int get_safe_write_buffer(void) { return 0; } + +static unsigned int +count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } + +static inline int +prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +{ + return 0; +} + +static inline void * +get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +{ + return NULL; +} + +static inline void copy_last_highmem_page(void) {} +static inline int last_highmem_page_copied(void) { return 1; } +static inline void free_highmem_data(void) {} +#endif /* CONFIG_HIGHMEM */ + /** * prepare_image - use the memory bitmap @bm to mark the pages that will * be overwritten in the process of restoring the system memory state @@ -1110,20 +1458,25 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) * The idea is to allocate a new memory bitmap first and then allocate * as many pages as needed for the image data, but not to assign these * pages to specific tasks initially. Instead, we just mark them as - * allocated and create a list of "safe" pages that will be used later. + * allocated and create a lists of "safe" pages that will be used + * later. On systems with high memory a list of "safe" highmem pages is + * also created. */ #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) -static struct linked_page *safe_pages_list; - static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) { - unsigned int nr_pages; + unsigned int nr_pages, nr_highmem; struct linked_page *sp_list, *lp; int error; + /* If there is no highmem, the buffer will not be necessary */ + free_image_page(buffer, PG_UNSAFE_CLEAR); + buffer = NULL; + + nr_highmem = count_highmem_image_pages(bm); error = mark_unsafe_pages(bm); if (error) goto Free; @@ -1134,6 +1487,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) duplicate_memory_bitmap(new_bm, bm); memory_bm_free(bm, PG_UNSAFE_KEEP); + if (nr_highmem > 0) { + error = prepare_highmem_image(bm, &nr_highmem); + if (error) + goto Free; + } /* Reserve some safe pages for potential later use. * * NOTE: This way we make sure there will be enough safe pages for the @@ -1142,10 +1500,10 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) */ sp_list = NULL; /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ - nr_pages = nr_copy_pages - allocated_unsafe_pages; + nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); while (nr_pages > 0) { - lp = alloc_image_page(GFP_ATOMIC, PG_SAFE); + lp = get_image_page(GFP_ATOMIC, PG_SAFE); if (!lp) { error = -ENOMEM; goto Free; @@ -1156,7 +1514,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) } /* Preallocate memory for the image */ safe_pages_list = NULL; - nr_pages = nr_copy_pages - allocated_unsafe_pages; + nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; while (nr_pages > 0) { lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); if (!lp) { @@ -1196,6 +1554,9 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) struct pbe *pbe; struct page *page = pfn_to_page(memory_bm_next_pfn(bm)); + if (PageHighMem(page)) + return get_highmem_page_buffer(page, ca); + if (PageNosave(page) && PageNosaveFree(page)) /* We have allocated the "original" page frame and we can * use it directly to store the loaded page. @@ -1210,12 +1571,12 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) swsusp_free(); return NULL; } - pbe->orig_address = (unsigned long)page_address(page); - pbe->address = (unsigned long)safe_pages_list; + pbe->orig_address = page_address(page); + pbe->address = safe_pages_list; safe_pages_list = safe_pages_list->next; pbe->next = restore_pblist; restore_pblist = pbe; - return (void *)pbe->address; + return pbe->address; } /** @@ -1249,14 +1610,16 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) return 0; - if (!buffer) { - /* This makes the buffer be freed by swsusp_free() */ - buffer = alloc_image_page(GFP_ATOMIC, PG_ANY); + if (handle->offset == 0) { + if (!buffer) + /* This makes the buffer be freed by swsusp_free() */ + buffer = get_image_page(GFP_ATOMIC, PG_ANY); + if (!buffer) return -ENOMEM; - } - if (!handle->offset) + handle->buffer = buffer; + } handle->sync_read = 1; if (handle->prev < handle->cur) { if (handle->prev == 0) { @@ -1284,8 +1647,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) return -ENOMEM; } } else { + copy_last_highmem_page(); handle->buffer = get_buffer(&orig_bm, &ca); - handle->sync_read = 0; + if (handle->buffer != buffer) + handle->sync_read = 0; } handle->prev = handle->cur; } @@ -1301,15 +1666,73 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count) return count; } +/** + * snapshot_write_finalize - must be called after the last call to + * snapshot_write_next() in case the last page in the image happens + * to be a highmem page and its contents should be stored in the + * highmem. Additionally, it releases the memory that will not be + * used any more. + */ + +void snapshot_write_finalize(struct snapshot_handle *handle) +{ + copy_last_highmem_page(); + /* Free only if we have loaded the image entirely */ + if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) { + memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); + free_highmem_data(); + } +} + int snapshot_image_loaded(struct snapshot_handle *handle) { - return !(!nr_copy_pages || + return !(!nr_copy_pages || !last_highmem_page_copied() || handle->cur <= nr_meta_pages + nr_copy_pages); } -void snapshot_free_unused_memory(struct snapshot_handle *handle) +#ifdef CONFIG_HIGHMEM +/* Assumes that @buf is ready and points to a "safe" page */ +static inline void +swap_two_pages_data(struct page *p1, struct page *p2, void *buf) { - /* Free only if we have loaded the image entirely */ - if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) - memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); + void *kaddr1, *kaddr2; + + kaddr1 = kmap_atomic(p1, KM_USER0); + kaddr2 = kmap_atomic(p2, KM_USER1); + memcpy(buf, kaddr1, PAGE_SIZE); + memcpy(kaddr1, kaddr2, PAGE_SIZE); + memcpy(kaddr2, buf, PAGE_SIZE); + kunmap_atomic(kaddr1, KM_USER0); + kunmap_atomic(kaddr2, KM_USER1); +} + +/** + * restore_highmem - for each highmem page that was allocated before + * the suspend and included in the suspend image, and also has been + * allocated by the "resume" kernel swap its current (ie. "before + * resume") contents with the previous (ie. "before suspend") one. + * + * If the resume eventually fails, we can call this function once + * again and restore the "before resume" highmem state. + */ + +int restore_highmem(void) +{ + struct highmem_pbe *pbe = highmem_pblist; + void *buf; + + if (!pbe) + return 0; + + buf = get_image_page(GFP_ATOMIC, PG_SAFE); + if (!buf) + return -ENOMEM; + + while (pbe) { + swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); + pbe = pbe->next; + } + free_image_page(buf, PG_UNSAFE_CLEAR); + return 0; } +#endif /* CONFIG_HIGHMEM */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index aa5a9bff01f..cbd187e9041 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -558,7 +558,7 @@ static int load_image(struct swap_map_handle *handle, error = err2; if (!error) { printk("\b\b\b\bdone\n"); - snapshot_free_unused_memory(snapshot); + snapshot_write_finalize(snapshot); if (!snapshot_image_loaded(snapshot)) error = -ENODATA; } diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 4147a756a8c..68de5c1dbd7 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -64,10 +64,8 @@ int in_suspend __nosavedata = 0; #ifdef CONFIG_HIGHMEM unsigned int count_highmem_pages(void); -int save_highmem(void); int restore_highmem(void); #else -static inline int save_highmem(void) { return 0; } static inline int restore_highmem(void) { return 0; } static inline unsigned int count_highmem_pages(void) { return 0; } #endif @@ -184,7 +182,7 @@ static inline unsigned long __shrink_memory(long tmp) int swsusp_shrink_memory(void) { - long size, tmp; + long tmp; struct zone *zone; unsigned long pages = 0; unsigned int i = 0; @@ -192,15 +190,27 @@ int swsusp_shrink_memory(void) printk("Shrinking memory... "); do { - size = 2 * count_highmem_pages(); - size += size / 50 + count_data_pages() + PAGES_FOR_IO; + long size, highmem_size; + + highmem_size = count_highmem_pages(); + size = count_data_pages() + PAGES_FOR_IO; tmp = size; + size += highmem_size; for_each_zone (zone) - if (!is_highmem(zone) && populated_zone(zone)) { - tmp -= zone->free_pages; - tmp += zone->lowmem_reserve[ZONE_NORMAL]; - tmp += snapshot_additional_pages(zone); + if (populated_zone(zone)) { + if (is_highmem(zone)) { + highmem_size -= zone->free_pages; + } else { + tmp -= zone->free_pages; + tmp += zone->lowmem_reserve[ZONE_NORMAL]; + tmp += snapshot_additional_pages(zone); + } } + + if (highmem_size < 0) + highmem_size = 0; + + tmp += highmem_size; if (tmp > 0) { tmp = __shrink_memory(tmp); if (!tmp) @@ -223,6 +233,7 @@ int swsusp_suspend(void) if ((error = arch_prepare_suspend())) return error; + local_irq_disable(); /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* device_power_down() now. @@ -235,18 +246,11 @@ int swsusp_suspend(void) goto Enable_irqs; } - if ((error = save_highmem())) { - printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); - goto Restore_highmem; - } - save_processor_state(); if ((error = swsusp_arch_suspend())) printk(KERN_ERR "Error %d suspending\n", error); /* Restore control flow magically appears here */ restore_processor_state(); -Restore_highmem: - restore_highmem(); /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ @@ -268,18 +272,23 @@ int swsusp_resume(void) printk(KERN_ERR "Some devices failed to power down, very bad\n"); /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); - error = swsusp_arch_resume(); - /* Code below is only ever reached in case of failure. Otherwise - * execution continues at place where swsusp_arch_suspend was called - */ - BUG_ON(!error); + error = restore_highmem(); + if (!error) { + error = swsusp_arch_resume(); + /* The code below is only ever reached in case of a failure. + * Otherwise execution continues at place where + * swsusp_arch_suspend() was called + */ + BUG_ON(!error); + /* This call to restore_highmem() undos the previous one */ + restore_highmem(); + } /* The only reason why swsusp_arch_resume() can fail is memory being * very tight, so we have to free it as soon as we can to avoid * subsequent failures */ swsusp_free(); restore_processor_state(); - restore_highmem(); touch_softlockup_watchdog(); device_power_up(); local_irq_enable(); diff --git a/kernel/power/user.c b/kernel/power/user.c index 05c58a2c0dd..a63b25c63b4 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -194,12 +194,12 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, break; case SNAPSHOT_ATOMIC_RESTORE: + snapshot_write_finalize(&data->handle); if (data->mode != O_WRONLY || !data->frozen || !snapshot_image_loaded(&data->handle)) { error = -EPERM; break; } - snapshot_free_unused_memory(&data->handle); down(&pm_sem); pm_prepare_console(); suspend_console(); diff --git a/mm/vmscan.c b/mm/vmscan.c index 2e97baa3b2a..2a6a79f6813 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1260,6 +1260,9 @@ out: } if (!all_zones_ok) { cond_resched(); + + try_to_freeze(); + goto loop_again; } -- cgit v1.2.3-70-g09d2 From 7dfb71030f7636a0d65200158113c37764552f93 Mon Sep 17 00:00:00 2001 From: Nigel Cunningham Date: Wed, 6 Dec 2006 20:34:23 -0800 Subject: [PATCH] Add include/linux/freezer.h and move definitions from sched.h Move process freezing functions from include/linux/sched.h to freezer.h, so that modifications to the freezer or the kernel configuration don't require recompiling just about everything. [akpm@osdl.org: fix ueagle driver] Signed-off-by: Nigel Cunningham Cc: "Rafael J. Wysocki" Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/signal.c | 1 + arch/avr32/kernel/signal.c | 2 +- arch/frv/kernel/signal.c | 2 +- arch/h8300/kernel/signal.c | 2 +- arch/i386/kernel/io_apic.c | 1 + arch/m32r/kernel/signal.c | 2 +- arch/powerpc/kernel/signal_32.c | 2 +- arch/sh/kernel/signal.c | 1 + arch/sh64/kernel/signal.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/char/hvc_console.c | 1 + drivers/edac/edac_mc.c | 1 + drivers/ieee1394/nodemgr.c | 1 + drivers/input/gameport/gameport.c | 1 + drivers/input/serio/serio.c | 1 + drivers/macintosh/therm_adt746x.c | 1 + drivers/macintosh/via-pmu.c | 2 +- drivers/macintosh/windfarm_core.c | 1 + drivers/md/md.c | 2 +- drivers/media/dvb/dvb-core/dvb_frontend.c | 2 +- drivers/media/video/msp3400-driver.c | 2 +- drivers/media/video/tvaudio.c | 1 + drivers/media/video/video-buf-dvb.c | 2 +- drivers/media/video/vivi.c | 1 + drivers/mfd/ucb1x00-ts.c | 2 +- drivers/net/irda/stir4200.c | 1 + drivers/net/wireless/airo.c | 1 + drivers/pcmcia/cs.c | 1 + drivers/pnp/pnpbios/core.c | 1 + drivers/usb/atm/ueagle-atm.c | 2 + drivers/usb/core/hub.c | 1 + drivers/usb/gadget/file_storage.c | 2 +- drivers/usb/storage/usb.c | 2 +- drivers/w1/w1.c | 1 + fs/afs/kafsasyncd.c | 1 + fs/afs/kafstimod.c | 1 + fs/cifs/cifsfs.c | 1 + fs/cifs/connect.c | 1 + fs/jbd/journal.c | 2 +- fs/jbd2/journal.c | 2 +- fs/jffs/intrep.c | 1 + fs/jffs2/background.c | 1 + fs/jfs/jfs_logmgr.c | 2 +- fs/jfs/jfs_txnmgr.c | 2 +- fs/lockd/clntproc.c | 1 + fs/xfs/linux-2.6/xfs_buf.c | 1 + fs/xfs/linux-2.6/xfs_super.c | 1 + include/linux/freezer.h | 84 +++++++++++++++++++++++++++++++ include/linux/sched.h | 81 ----------------------------- init/do_mounts_initrd.c | 1 + kernel/audit.c | 1 + kernel/power/disk.c | 1 + kernel/power/main.c | 1 + kernel/power/process.c | 1 + kernel/power/user.c | 1 + kernel/rtmutex-tester.c | 1 + kernel/sched.c | 2 +- kernel/signal.c | 1 + mm/pdflush.c | 1 + mm/vmscan.c | 1 + net/rxrpc/krxiod.c | 1 + net/rxrpc/krxsecd.c | 1 + net/rxrpc/krxtimod.c | 1 + net/sunrpc/svcsock.c | 1 + 64 files changed, 147 insertions(+), 101 deletions(-) create mode 100644 include/linux/freezer.h (limited to 'include') diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 48cf7fffddf..f38a60a03b8 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c index 33096651c24..0ec14854a20 100644 --- a/arch/avr32/kernel/signal.c +++ b/arch/avr32/kernel/signal.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index b8a5882b862..85baeae9666 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 7787f70a05b..02955604d76 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 3b7a63e0ed1..44c5a3206b2 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index b60cea4aeba..092ea86bb07 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 320353f0926..e4ebe1a6228 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #endif #include diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c index 50d7c4993be..bb1c480a59c 100644 --- a/arch/sh/kernel/signal.c +++ b/arch/sh/kernel/signal.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/sh64/kernel/signal.c b/arch/sh64/kernel/signal.c index 9e2ffc45c0e..1666d3efb52 100644 --- a/arch/sh64/kernel/signal.c +++ b/arch/sh64/kernel/signal.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index f2904f67af4..e45eaa26411 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 9902ffad3b1..cc2cd46bedc 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -38,6 +38,7 @@ #include #include #include +#include #include diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 75e9e38330f..1b4fc922180 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c index 8e7b83f8448..e829c9336b3 100644 --- a/drivers/ieee1394/nodemgr.c +++ b/drivers/ieee1394/nodemgr.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "csr.h" diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index a0af97efe6a..79dfb4b25c9 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -23,6 +23,7 @@ #include #include /* HZ */ #include +#include /*#include */ diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index 211943f85cb..5f1d4032fd5 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -35,6 +35,7 @@ #include #include #include +#include MODULE_AUTHOR("Vojtech Pavlik "); MODULE_DESCRIPTION("Serio abstraction core"); diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index 13b953ae8eb..3d3bf1643e7 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index e63ea1c1f3c..c8558d4ed50 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c index ab3faa702d5..e947af982f9 100644 --- a/drivers/macintosh/windfarm_core.c +++ b/drivers/macintosh/windfarm_core.c @@ -34,6 +34,7 @@ #include #include #include +#include #include diff --git a/drivers/md/md.c b/drivers/md/md.c index 8cbf9c9df1c..6c4345bde07 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -39,10 +39,10 @@ #include #include #include /* for invalidate_bdev */ -#include #include #include #include +#include #include diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index a2ab2eebfc6..e85972222ab 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/media/video/msp3400-driver.c b/drivers/media/video/msp3400-driver.c index cf43df3fe70..e1b56dc13c3 100644 --- a/drivers/media/video/msp3400-driver.c +++ b/drivers/media/video/msp3400-driver.c @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include "msp3400-driver.h" /* ---------------------------------------------------------------------- */ diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index fcaef4bf828..d506dfaa45a 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/video/video-buf-dvb.c b/drivers/media/video/video-buf-dvb.c index f53edf1923b..fcc5467e763 100644 --- a/drivers/media/video/video-buf-dvb.c +++ b/drivers/media/video/video-buf-dvb.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/media/video/vivi.c b/drivers/media/video/vivi.c index 3c8dc72dc8e..9986de5cb3d 100644 --- a/drivers/media/video/vivi.c +++ b/drivers/media/video/vivi.c @@ -36,6 +36,7 @@ #include #include #include +#include /* Wake up at about 30 fps */ #define WAKE_NUMERATOR 30 diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index 82938ad6ddb..ce1a4810821 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c index 3b4c4787593..c14a74634fd 100644 --- a/drivers/net/irda/stir4200.c +++ b/drivers/net/irda/stir4200.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index efcdaf1c5f7..44a22701da9 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "airo.h" diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index f9cd831a3f3..606a4674033 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 81a6c83d89a..81186f479a3 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index f2d196fa1e8..dae4ef1e8fe 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -64,6 +64,8 @@ #include #include #include +#include + #include #include "usbatm.h" diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 77c05be5241..2651c2e2a89 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/usb/gadget/file_storage.c b/drivers/usb/gadget/file_storage.c index 8b975d15538..c98316ce838 100644 --- a/drivers/usb/gadget/file_storage.c +++ b/drivers/usb/gadget/file_storage.c @@ -250,7 +250,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index b401084b3d2..70644506651 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -49,7 +49,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index de3e9791f80..63c07243993 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -31,6 +31,7 @@ #include #include #include +#include #include diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c index f09a794f248..615df2407cb 100644 --- a/fs/afs/kafsasyncd.c +++ b/fs/afs/kafsasyncd.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "cell.h" #include "server.h" #include "volume.h" diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c index 65bc05ab818..694344e4d3c 100644 --- a/fs/afs/kafstimod.c +++ b/fs/afs/kafstimod.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "cell.h" #include "volume.h" #include "kafstimod.h" diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e6b5866e500..71bc87a37fc 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "cifsfs.h" #include "cifspdu.h" #define DECLARE_GLOBALS_HERE diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 71f77914ce9..2caca06b4ba 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include "cifspdu.h" diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index a8774bed20b..10fff944393 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 50356019ae3..44fc32bfd7f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index 4a543e11497..478a74e2e9d 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "intrep.h" #include "jffs_fm.h" diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index ff2a872e80e..6eb3daebd56 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "nodelist.h" diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index b89c9aba046..5065baa530b 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -67,7 +67,7 @@ #include #include /* for sync_blockdev() */ #include -#include +#include #include #include #include "jfs_incore.h" diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 81f6f04af19..d558e51b0df 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 3d84f600b63..50643b6a555 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index eef4a0ba11e..b971237c5a9 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -32,6 +32,7 @@ #include #include #include +#include STATIC kmem_zone_t *xfs_buf_zone; STATIC kmem_shaker_t xfs_buf_shake; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index de05abbbe7f..b93265b7c79 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -56,6 +56,7 @@ #include #include #include +#include STATIC struct quotactl_ops xfs_quotactl_operations; STATIC struct super_operations xfs_super_operations; diff --git a/include/linux/freezer.h b/include/linux/freezer.h new file mode 100644 index 00000000000..266373f7444 --- /dev/null +++ b/include/linux/freezer.h @@ -0,0 +1,84 @@ +/* Freezer declarations */ + +#ifdef CONFIG_PM +/* + * Check if a process has been frozen + */ +static inline int frozen(struct task_struct *p) +{ + return p->flags & PF_FROZEN; +} + +/* + * Check if there is a request to freeze a process + */ +static inline int freezing(struct task_struct *p) +{ + return p->flags & PF_FREEZE; +} + +/* + * Request that a process be frozen + * FIXME: SMP problem. We may not modify other process' flags! + */ +static inline void freeze(struct task_struct *p) +{ + p->flags |= PF_FREEZE; +} + +/* + * Sometimes we may need to cancel the previous 'freeze' request + */ +static inline void do_not_freeze(struct task_struct *p) +{ + p->flags &= ~PF_FREEZE; +} + +/* + * Wake up a frozen process + */ +static inline int thaw_process(struct task_struct *p) +{ + if (frozen(p)) { + p->flags &= ~PF_FROZEN; + wake_up_process(p); + return 1; + } + return 0; +} + +/* + * freezing is complete, mark process as frozen + */ +static inline void frozen_process(struct task_struct *p) +{ + p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN; +} + +extern void refrigerator(void); +extern int freeze_processes(void); +extern void thaw_processes(void); + +static inline int try_to_freeze(void) +{ + if (freezing(current)) { + refrigerator(); + return 1; + } else + return 0; +} +#else +static inline int frozen(struct task_struct *p) { return 0; } +static inline int freezing(struct task_struct *p) { return 0; } +static inline void freeze(struct task_struct *p) { BUG(); } +static inline int thaw_process(struct task_struct *p) { return 1; } +static inline void frozen_process(struct task_struct *p) { BUG(); } + +static inline void refrigerator(void) {} +static inline int freeze_processes(void) { BUG(); return 0; } +static inline void thaw_processes(void) {} + +static inline int try_to_freeze(void) { return 0; } + + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index acfd2e15c5f..837a012f573 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1618,87 +1618,6 @@ extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls); extern void normalize_rt_tasks(void); -#ifdef CONFIG_PM -/* - * Check if a process has been frozen - */ -static inline int frozen(struct task_struct *p) -{ - return p->flags & PF_FROZEN; -} - -/* - * Check if there is a request to freeze a process - */ -static inline int freezing(struct task_struct *p) -{ - return p->flags & PF_FREEZE; -} - -/* - * Request that a process be frozen - * FIXME: SMP problem. We may not modify other process' flags! - */ -static inline void freeze(struct task_struct *p) -{ - p->flags |= PF_FREEZE; -} - -/* - * Sometimes we may need to cancel the previous 'freeze' request - */ -static inline void do_not_freeze(struct task_struct *p) -{ - p->flags &= ~PF_FREEZE; -} - -/* - * Wake up a frozen process - */ -static inline int thaw_process(struct task_struct *p) -{ - if (frozen(p)) { - p->flags &= ~PF_FROZEN; - wake_up_process(p); - return 1; - } - return 0; -} - -/* - * freezing is complete, mark process as frozen - */ -static inline void frozen_process(struct task_struct *p) -{ - p->flags = (p->flags & ~PF_FREEZE) | PF_FROZEN; -} - -extern void refrigerator(void); -extern int freeze_processes(void); -extern void thaw_processes(void); - -static inline int try_to_freeze(void) -{ - if (freezing(current)) { - refrigerator(); - return 1; - } else - return 0; -} -#else -static inline int frozen(struct task_struct *p) { return 0; } -static inline int freezing(struct task_struct *p) { return 0; } -static inline void freeze(struct task_struct *p) { BUG(); } -static inline int thaw_process(struct task_struct *p) { return 1; } -static inline void frozen_process(struct task_struct *p) { BUG(); } - -static inline void refrigerator(void) {} -static inline int freeze_processes(void) { BUG(); return 0; } -static inline void thaw_processes(void) {} - -static inline int try_to_freeze(void) { return 0; } - -#endif /* CONFIG_PM */ #endif /* __KERNEL__ */ #endif diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 919a80cb322..2cfd7cb36e7 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "do_mounts.h" diff --git a/kernel/audit.c b/kernel/audit.c index 98106f6078b..d9b690ac684 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -57,6 +57,7 @@ #include #include #include +#include #include "audit.h" diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f5079231383..53b3b57c022 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "power.h" diff --git a/kernel/power/main.c b/kernel/power/main.c index 873228c71da..6096c71b182 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "power.h" diff --git a/kernel/power/process.c b/kernel/power/process.c index 72e72d2c61e..29be608e834 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * Timeout for stopping processes diff --git a/kernel/power/user.c b/kernel/power/user.c index a63b25c63b4..26c66941c00 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -22,6 +22,7 @@ #include #include #include +#include #include diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 6dcea9dd8c9..015fc633c96 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "rtmutex.h" diff --git a/kernel/sched.c b/kernel/sched.c index 3399701c680..12fdbef1d9b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index 8e19d278548..bc972d7e631 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/pdflush.c b/mm/pdflush.c index b02102feeb4..8ce0900dc95 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -21,6 +21,7 @@ #include // Prototypes pdflush_operation() #include #include +#include /* diff --git a/mm/vmscan.c b/mm/vmscan.c index 2a6a79f6813..f6616e81fac 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c index dada34a77b2..49effd92144 100644 --- a/net/rxrpc/krxiod.c +++ b/net/rxrpc/krxiod.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c index cea4eb5e249..3ab0f77409f 100644 --- a/net/rxrpc/krxsecd.c +++ b/net/rxrpc/krxsecd.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "internal.h" diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c index 3e7466900bd..9a9b6132dba 100644 --- a/net/rxrpc/krxtimod.c +++ b/net/rxrpc/krxtimod.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 64ca1f61dd9..1c68956824e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3-70-g09d2 From ff39593ad0ff7a79a3717edac6634407aa8200c2 Mon Sep 17 00:00:00 2001 From: Nigel Cunningham Date: Wed, 6 Dec 2006 20:34:28 -0800 Subject: [PATCH] swsusp: thaw userspace and kernel space separately Modify process thawing so that we can thaw kernel space without thawing userspace, and thaw kernelspace first. This will be useful in later patches, where I intend to get swsusp thawing kernel threads only before seeking to free memory. Signed-off-by: Nigel Cunningham Cc: Pavel Machek Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/freezer.h | 9 ++++++++- kernel/power/process.c | 25 ++++++++++++++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 266373f7444..294ebea859c 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -1,5 +1,8 @@ /* Freezer declarations */ +#define FREEZER_KERNEL_THREADS 0 +#define FREEZER_ALL_THREADS 1 + #ifdef CONFIG_PM /* * Check if a process has been frozen @@ -57,7 +60,8 @@ static inline void frozen_process(struct task_struct *p) extern void refrigerator(void); extern int freeze_processes(void); -extern void thaw_processes(void); +#define thaw_processes() do { thaw_some_processes(FREEZER_ALL_THREADS); } while(0) +#define thaw_kernel_threads() do { thaw_some_processes(FREEZER_KERNEL_THREADS); } while(0) static inline int try_to_freeze(void) { @@ -67,6 +71,9 @@ static inline int try_to_freeze(void) } else return 0; } + +extern void thaw_some_processes(int all); + #else static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } diff --git a/kernel/power/process.c b/kernel/power/process.c index fedabad5a18..cba8a5890ed 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -153,18 +153,29 @@ int freeze_processes(void) return 0; } -void thaw_processes(void) +void thaw_some_processes(int all) { struct task_struct *g, *p; + int pass = 0; /* Pass 0 = Kernel space, 1 = Userspace */ printk("Restarting tasks... "); read_lock(&tasklist_lock); - do_each_thread(g, p) { - if (!freezeable(p)) - continue; - if (!thaw_process(p)) - printk(KERN_INFO "Strange, %s not stopped\n", p->comm); - } while_each_thread(g, p); + do { + do_each_thread(g, p) { + /* + * is_user = 0 if kernel thread or borrowed mm, + * 1 otherwise. + */ + int is_user = !!(p->mm && !(p->flags & PF_BORROWED_MM)); + if (!freezeable(p) || (is_user != pass)) + continue; + if (!thaw_process(p)) + printk(KERN_INFO + "Strange, %s not stopped\n", p->comm); + } while_each_thread(g, p); + + pass++; + } while (pass < 2 && all); read_unlock(&tasklist_lock); schedule(); -- cgit v1.2.3-70-g09d2 From 2d4a34c9365c6e3f94a5b26ce296e1fce9b66c8b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Dec 2006 20:34:29 -0800 Subject: [PATCH] swsusp: Support i386 systems with PAE or without PSE Make swsusp support i386 systems with PAE or without PSE. This is done by creating temporary page tables located in resume-safe page frames before the suspend image is restored in the same way as x86_64 does it. Signed-off-by: Rafael J. Wysocki Cc: Andi Kleen Cc: Dave Jones Cc: Nigel Cunningham Cc: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/power/Makefile | 2 +- arch/i386/power/suspend.c | 158 +++++++++++++++++++++++++++++++++++++++++++++ arch/i386/power/swsusp.S | 9 ++- include/asm-i386/suspend.h | 13 +--- kernel/power/Kconfig | 2 +- 5 files changed, 168 insertions(+), 16 deletions(-) create mode 100644 arch/i386/power/suspend.c (limited to 'include') diff --git a/arch/i386/power/Makefile b/arch/i386/power/Makefile index 8cfa4e8a719..2de7bbf03cd 100644 --- a/arch/i386/power/Makefile +++ b/arch/i386/power/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_PM) += cpu.o -obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o suspend.o diff --git a/arch/i386/power/suspend.c b/arch/i386/power/suspend.c new file mode 100644 index 00000000000..db5e98d2eb7 --- /dev/null +++ b/arch/i386/power/suspend.c @@ -0,0 +1,158 @@ +/* + * Suspend support specific for i386 - temporary page tables + * + * Distribute under GPLv2 + * + * Copyright (c) 2006 Rafael J. Wysocki + */ + +#include +#include + +#include +#include +#include + +/* Defined in arch/i386/power/swsusp.S */ +extern int restore_image(void); + +/* Pointer to the temporary resume page tables */ +pgd_t *resume_pg_dir; + +/* The following three functions are based on the analogous code in + * arch/i386/mm/init.c + */ + +/* + * Create a middle page table on a resume-safe page and put a pointer to it in + * the given global directory entry. This only returns the gd entry + * in non-PAE compilation mode, since the middle layer is folded. + */ +static pmd_t *resume_one_md_table_init(pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd_table; + +#ifdef CONFIG_X86_PAE + pmd_table = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd_table) + return NULL; + + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); + pud = pud_offset(pgd, 0); + + BUG_ON(pmd_table != pmd_offset(pud, 0)); +#else + pud = pud_offset(pgd, 0); + pmd_table = pmd_offset(pud, 0); +#endif + + return pmd_table; +} + +/* + * Create a page table on a resume-safe page and place a pointer to it in + * a middle page directory entry. + */ +static pte_t *resume_one_page_table_init(pmd_t *pmd) +{ + if (pmd_none(*pmd)) { + pte_t *page_table = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!page_table) + return NULL; + + set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + + BUG_ON(page_table != pte_offset_kernel(pmd, 0)); + + return page_table; + } + + return pte_offset_kernel(pmd, 0); +} + +/* + * This maps the physical memory to kernel virtual address space, a total + * of max_low_pfn pages, by creating page tables starting from address + * PAGE_OFFSET. The page tables are allocated out of resume-safe pages. + */ +static int resume_physical_mapping_init(pgd_t *pgd_base) +{ + unsigned long pfn; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int pgd_idx, pmd_idx; + + pgd_idx = pgd_index(PAGE_OFFSET); + pgd = pgd_base + pgd_idx; + pfn = 0; + + for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { + pmd = resume_one_md_table_init(pgd); + if (!pmd) + return -ENOMEM; + + if (pfn >= max_low_pfn) + continue; + + for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { + if (pfn >= max_low_pfn) + break; + + /* Map with big pages if possible, otherwise create + * normal page tables. + * NOTE: We can mark everything as executable here + */ + if (cpu_has_pse) { + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); + pfn += PTRS_PER_PTE; + } else { + pte_t *max_pte; + + pte = resume_one_page_table_init(pmd); + if (!pte) + return -ENOMEM; + + max_pte = pte + PTRS_PER_PTE; + for (; pte < max_pte; pte++, pfn++) { + if (pfn >= max_low_pfn) + break; + + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + } + } + } + } + return 0; +} + +static inline void resume_init_first_level_page_table(pgd_t *pg_dir) +{ +#ifdef CONFIG_X86_PAE + int i; + + /* Init entries of the first-level page table to the zero page */ + for (i = 0; i < PTRS_PER_PGD; i++) + set_pgd(pg_dir + i, + __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); +#endif +} + +int swsusp_arch_resume(void) +{ + int error; + + resume_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!resume_pg_dir) + return -ENOMEM; + + resume_init_first_level_page_table(resume_pg_dir); + error = resume_physical_mapping_init(resume_pg_dir); + if (error) + return error; + + /* We have got enough memory and from now on we cannot recover */ + restore_image(); + return 0; +} diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S index 8a2b50a0aaa..53662e05b39 100644 --- a/arch/i386/power/swsusp.S +++ b/arch/i386/power/swsusp.S @@ -28,8 +28,9 @@ ENTRY(swsusp_arch_suspend) call swsusp_save ret -ENTRY(swsusp_arch_resume) - movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx +ENTRY(restore_image) + movl resume_pg_dir, %ecx + subl $__PAGE_OFFSET, %ecx movl %ecx, %cr3 movl restore_pblist, %edx @@ -51,6 +52,10 @@ copy_loop: .p2align 4,,7 done: + /* go back to the original page tables */ + movl $swapper_pg_dir, %ecx + subl $__PAGE_OFFSET, %ecx + movl %ecx, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movl mmu_cr4_features, %eax movl %eax, %edx diff --git a/include/asm-i386/suspend.h b/include/asm-i386/suspend.h index 08be1e5009d..c1da5caafaf 100644 --- a/include/asm-i386/suspend.h +++ b/include/asm-i386/suspend.h @@ -6,18 +6,7 @@ #include #include -static inline int -arch_prepare_suspend(void) -{ - /* If you want to make non-PSE machine work, turn off paging - in swsusp_arch_suspend. swsusp_pg_dir should have identity mapping, so - it could work... */ - if (!cpu_has_pse) { - printk(KERN_ERR "PSE is required for swsusp.\n"); - return -EPERM; - } - return 0; -} +static inline int arch_prepare_suspend(void) { return 0; } /* image of the saved processor state */ struct saved_context { diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 825068ca347..710ed084e7c 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -78,7 +78,7 @@ config PM_SYSFS_DEPRECATED config SOFTWARE_SUSPEND bool "Software Suspend" - depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP) && !X86_PAE) || ((FRV || PPC32) && !SMP)) + depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP)) ---help--- Enable the possibility of suspending the machine. It doesn't need ACPI or APM. -- cgit v1.2.3-70-g09d2 From a9b6f562f14dc28fb4b2415f0f275cede0abe9b5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Dec 2006 20:34:37 -0800 Subject: [PATCH] swsusp: Untangle thaw_processes Move the loop from thaw_processes() to a separate function and call it independently for kernel threads and user space processes so that the order of thawing tasks is clearly visible. Drop thaw_kernel_threads() which is never used. Signed-off-by: Rafael J. Wysocki Cc: Pavel Machek Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/freezer.h | 6 +----- kernel/power/process.c | 49 +++++++++++++++++++++++++++---------------------- 2 files changed, 28 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 294ebea859c..6e05e3e7ce3 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -1,8 +1,5 @@ /* Freezer declarations */ -#define FREEZER_KERNEL_THREADS 0 -#define FREEZER_ALL_THREADS 1 - #ifdef CONFIG_PM /* * Check if a process has been frozen @@ -60,8 +57,7 @@ static inline void frozen_process(struct task_struct *p) extern void refrigerator(void); extern int freeze_processes(void); -#define thaw_processes() do { thaw_some_processes(FREEZER_ALL_THREADS); } while(0) -#define thaw_kernel_threads() do { thaw_some_processes(FREEZER_KERNEL_THREADS); } while(0) +extern void thaw_processes(void); static inline int try_to_freeze(void) { diff --git a/kernel/power/process.c b/kernel/power/process.c index 1badb9a89ad..fd0ebb942f5 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -20,6 +20,8 @@ */ #define TIMEOUT (20 * HZ) +#define FREEZER_KERNEL_THREADS 0 +#define FREEZER_USER_SPACE 1 static inline int freezeable(struct task_struct * p) { @@ -79,6 +81,11 @@ static void cancel_freezing(struct task_struct *p) } } +static inline int is_user_space(struct task_struct *p) +{ + return p->mm && !(p->flags & PF_BORROWED_MM); +} + /* 0 = success, else # of processes that we failed to stop */ int freeze_processes(void) { @@ -103,10 +110,9 @@ int freeze_processes(void) cancel_freezing(p); continue; } - if (p->mm && !(p->flags & PF_BORROWED_MM)) { - /* The task is a user-space one. - * Freeze it unless there's a vfork completion - * pending + if (is_user_space(p)) { + /* Freeze the task unless there is a vfork + * completion pending */ if (!p->vfork_done) freeze_process(p); @@ -155,31 +161,30 @@ int freeze_processes(void) return 0; } -void thaw_some_processes(int all) +static void thaw_tasks(int thaw_user_space) { struct task_struct *g, *p; - int pass = 0; /* Pass 0 = Kernel space, 1 = Userspace */ - printk("Restarting tasks... "); read_lock(&tasklist_lock); - do { - do_each_thread(g, p) { - /* - * is_user = 0 if kernel thread or borrowed mm, - * 1 otherwise. - */ - int is_user = !!(p->mm && !(p->flags & PF_BORROWED_MM)); - if (!freezeable(p) || (is_user != pass)) - continue; - if (!thaw_process(p)) - printk(KERN_INFO - "Strange, %s not stopped\n", p->comm); - } while_each_thread(g, p); + do_each_thread(g, p) { + if (!freezeable(p)) + continue; - pass++; - } while (pass < 2 && all); + if (is_user_space(p) == !thaw_user_space) + continue; + if (!thaw_process(p)) + printk(KERN_WARNING " Strange, %s not stopped\n", + p->comm ); + } while_each_thread(g, p); read_unlock(&tasklist_lock); +} + +void thaw_processes(void) +{ + printk("Restarting tasks ... "); + thaw_tasks(FREEZER_KERNEL_THREADS); + thaw_tasks(FREEZER_USER_SPACE); schedule(); printk("done.\n"); } -- cgit v1.2.3-70-g09d2 From 341a595850dac1b0503df34260257d71b4fdf72c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Dec 2006 20:34:49 -0800 Subject: [PATCH] Support for freezeable workqueues Make it possible to create a workqueue the worker thread of which will be frozen during suspend, along with other kernel threads. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Nigel Cunningham Cc: David Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/workqueue.h | 8 +++++--- kernel/workqueue.c | 20 ++++++++++++++------ 2 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4a3ea83c6d1..f0cb1df7b47 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -147,9 +147,11 @@ struct execute_work { extern struct workqueue_struct *__create_workqueue(const char *name, - int singlethread); -#define create_workqueue(name) __create_workqueue((name), 0) -#define create_singlethread_workqueue(name) __create_workqueue((name), 1) + int singlethread, + int freezeable); +#define create_workqueue(name) __create_workqueue((name), 0, 0) +#define create_freezeable_workqueue(name) __create_workqueue((name), 0, 1) +#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0) extern void destroy_workqueue(struct workqueue_struct *wq); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8d1e7cb8a51..2945b094d87 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * The per-CPU workqueue (if single thread, we always use the first @@ -55,6 +56,8 @@ struct cpu_workqueue_struct { struct task_struct *thread; int run_depth; /* Detect run_workqueue() recursion depth */ + + int freezeable; /* Freeze the thread during suspend */ } ____cacheline_aligned; /* @@ -265,7 +268,8 @@ static int worker_thread(void *__cwq) struct k_sigaction sa; sigset_t blocked; - current->flags |= PF_NOFREEZE; + if (!cwq->freezeable) + current->flags |= PF_NOFREEZE; set_user_nice(current, -5); @@ -288,6 +292,9 @@ static int worker_thread(void *__cwq) set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { + if (cwq->freezeable) + try_to_freeze(); + add_wait_queue(&cwq->more_work, &wait); if (list_empty(&cwq->worklist)) schedule(); @@ -364,7 +371,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq) EXPORT_SYMBOL_GPL(flush_workqueue); static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, - int cpu) + int cpu, int freezeable) { struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); struct task_struct *p; @@ -374,6 +381,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, cwq->thread = NULL; cwq->insert_sequence = 0; cwq->remove_sequence = 0; + cwq->freezeable = freezeable; INIT_LIST_HEAD(&cwq->worklist); init_waitqueue_head(&cwq->more_work); init_waitqueue_head(&cwq->work_done); @@ -389,7 +397,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, } struct workqueue_struct *__create_workqueue(const char *name, - int singlethread) + int singlethread, int freezeable) { int cpu, destroy = 0; struct workqueue_struct *wq; @@ -409,7 +417,7 @@ struct workqueue_struct *__create_workqueue(const char *name, mutex_lock(&workqueue_mutex); if (singlethread) { INIT_LIST_HEAD(&wq->list); - p = create_workqueue_thread(wq, singlethread_cpu); + p = create_workqueue_thread(wq, singlethread_cpu, freezeable); if (!p) destroy = 1; else @@ -417,7 +425,7 @@ struct workqueue_struct *__create_workqueue(const char *name, } else { list_add(&wq->list, &workqueues); for_each_online_cpu(cpu) { - p = create_workqueue_thread(wq, cpu); + p = create_workqueue_thread(wq, cpu, freezeable); if (p) { kthread_bind(p, cpu); wake_up_process(p); @@ -667,7 +675,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, mutex_lock(&workqueue_mutex); /* Create a new workqueue thread for it. */ list_for_each_entry(wq, &workqueues, list) { - if (!create_workqueue_thread(wq, hotcpu)) { + if (!create_workqueue_thread(wq, hotcpu, 0)) { printk("workqueue for %i failed\n", hotcpu); return NOTIFY_BAD; } -- cgit v1.2.3-70-g09d2 From eef88d16a2cb641d9915bfdf6377e70fccec9fde Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:34:55 -0800 Subject: [PATCH] fix v850 compilation More fallout of the post 2.6.19-rc1 IRQ changes... CC init/main.o In file included from /home/bunk/linux/kernel-2.6/linux-2.6.19-rc6-mm2/include/linux/rtc.h:102, from /home/bunk/linux/kernel-2.6/linux-2.6.19-rc6-mm2/include/linux/efi.h:19, from /home/bunk/linux/kernel-2.6/linux-2.6.19-rc6-mm2/init/main.c:43: /home/bunk/linux/kernel-2.6/linux-2.6.19-rc6-mm2/include/linux/interrupt.h:67: error: conflicting types for 'irq_handler_t' include2/asm/irq.h:49: error: previous declaration of 'irq_handler_t' was here Signed-off-by: Adrian Bunk Cc: Miles Bader Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-v850/irq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/asm-v850/irq.h b/include/asm-v850/irq.h index 1bf096db8f4..88687c181f0 100644 --- a/include/asm-v850/irq.h +++ b/include/asm-v850/irq.h @@ -46,8 +46,6 @@ extern void init_irq_handlers (int base_irq, int num, int interval, struct hw_interrupt_type *irq_type); -typedef void (*irq_handler_t)(int irq, void *data, struct pt_regs *regs); - /* Handle interrupt IRQ. REGS are the registers at the time of ther interrupt. */ extern unsigned int handle_irq (int irq, struct pt_regs *regs); -- cgit v1.2.3-70-g09d2 From 799202cbd0ef6a201446d99fcbd78b9f0bda6ae5 Mon Sep 17 00:00:00 2001 From: Mike Miller Date: Wed, 6 Dec 2006 20:35:12 -0800 Subject: [PATCH] cciss: add support for 1024 logical volumes Add the support for a large number of logical volumes. We will soon have hardware that support up to 1024 logical volumes. Signed-off-by: Mike Miller Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/cciss.c | 114 ++++++++++++++++++++++++++++++-------------- drivers/block/cciss.h | 3 +- drivers/block/cciss_cmd.h | 2 +- include/linux/cciss_ioctl.h | 2 +- 4 files changed, 80 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 70cf9321ef5..c99cb7ed57f 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -1315,6 +1315,11 @@ static void cciss_update_drive_info(int ctlr, int drv_index) /* if it's the controller it's already added */ if (drv_index) { disk->queue = blk_init_queue(do_cciss_request, &h->lock); + sprintf(disk->disk_name, "cciss/c%dd%d", ctlr, drv_index); + disk->major = h->major; + disk->first_minor = drv_index << NWD_SHIFT; + disk->fops = &cciss_fops; + disk->private_data = &h->drv[drv_index]; /* Set up queue information */ disk->queue->backing_dev_info.ra_pages = READ_AHEAD; @@ -1393,11 +1398,6 @@ static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk) /* Set busy_configuring flag for this operation */ spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); - if (h->num_luns >= CISS_MAX_LUN) { - spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); - return -EINVAL; - } - if (h->busy_configuring) { spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); return -EBUSY; @@ -1430,17 +1430,8 @@ static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk) 0, 0, TYPE_CMD); if (return_code == IO_OK) { - listlength |= - (0xff & (unsigned int)(ld_buff->LUNListLength[0])) - << 24; - listlength |= - (0xff & (unsigned int)(ld_buff->LUNListLength[1])) - << 16; - listlength |= - (0xff & (unsigned int)(ld_buff->LUNListLength[2])) - << 8; - listlength |= - 0xff & (unsigned int)(ld_buff->LUNListLength[3]); + listlength = + be32_to_cpu(*(__u32 *) ld_buff->LUNListLength); } else { /* reading number of logical volumes failed */ printk(KERN_WARNING "cciss: report logical volume" " command failed\n"); @@ -1491,6 +1482,14 @@ static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk) if (drv_index == -1) goto freeret; + /*Check if the gendisk needs to be allocated */ + if (!h->gendisk[drv_index]){ + h->gendisk[drv_index] = alloc_disk(1 << NWD_SHIFT); + if (!h->gendisk[drv_index]){ + printk(KERN_ERR "cciss: could not allocate new disk %d\n", drv_index); + goto mem_msg; + } + } } h->drv[drv_index].LunID = lunid; cciss_update_drive_info(ctlr, drv_index); @@ -1528,6 +1527,7 @@ static int rebuild_lun_table(ctlr_info_t *h, struct gendisk *del_disk) static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, int clear_all) { + int i; ctlr_info_t *h = get_host(disk); if (!capable(CAP_SYS_RAWIO)) @@ -1551,9 +1551,35 @@ static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, del_gendisk(disk); if (q) { blk_cleanup_queue(q); + /* Set drv->queue to NULL so that we do not try + * to call blk_start_queue on this queue in the + * interrupt handler + */ drv->queue = NULL; } + /* If clear_all is set then we are deleting the logical + * drive, not just refreshing its info. For drives + * other than disk 0 we will call put_disk. We do not + * do this for disk 0 as we need it to be able to + * configure the controller. + */ + if (clear_all){ + /* This isn't pretty, but we need to find the + * disk in our array and NULL our the pointer. + * This is so that we will call alloc_disk if + * this index is used again later. + */ + for (i=0; i < CISS_MAX_LUN; i++){ + if(h->gendisk[i] == disk){ + h->gendisk[i] = NULL; + break; + } + } + put_disk(disk); + } } + } else { + set_capacity(disk, 0); } --h->num_luns; @@ -3119,13 +3145,7 @@ geo_inq: /* Returns -1 if no free entries are left. */ static int alloc_cciss_hba(void) { - struct gendisk *disk[NWD]; - int i, n; - for (n = 0; n < NWD; n++) { - disk[n] = alloc_disk(1 << NWD_SHIFT); - if (!disk[n]) - goto out; - } + int i; for (i = 0; i < MAX_CTLR; i++) { if (!hba[i]) { @@ -3133,20 +3153,18 @@ static int alloc_cciss_hba(void) p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL); if (!p) goto Enomem; - for (n = 0; n < NWD; n++) - p->gendisk[n] = disk[n]; + p->gendisk[0] = alloc_disk(1 << NWD_SHIFT); + if (!p->gendisk[0]) + goto Enomem; hba[i] = p; return i; } } printk(KERN_WARNING "cciss: This driver supports a maximum" " of %d controllers.\n", MAX_CTLR); - goto out; - Enomem: + return -1; +Enomem: printk(KERN_ERR "cciss: out of memory.\n"); - out: - while (n--) - put_disk(disk[n]); return -1; } @@ -3156,7 +3174,7 @@ static void free_hba(int i) int n; hba[i] = NULL; - for (n = 0; n < NWD; n++) + for (n = 0; n < CISS_MAX_LUN; n++) put_disk(p->gendisk[n]); kfree(p); } @@ -3169,9 +3187,8 @@ static void free_hba(int i) static int __devinit cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - request_queue_t *q; int i; - int j; + int j = 0; int rc; int dac; @@ -3283,16 +3300,29 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->busy_initializing = 0; - for (j = 0; j < NWD; j++) { /* mfm */ + do { drive_info_struct *drv = &(hba[i]->drv[j]); struct gendisk *disk = hba[i]->gendisk[j]; + request_queue_t *q; + + /* Check if the disk was allocated already */ + if (!disk){ + hba[i]->gendisk[j] = alloc_disk(1 << NWD_SHIFT); + disk = hba[i]->gendisk[j]; + } + + /* Check that the disk was able to be allocated */ + if (!disk) { + printk(KERN_ERR "cciss: unable to allocate memory for disk %d\n", j); + goto clean4; + } q = blk_init_queue(do_cciss_request, &hba[i]->lock); if (!q) { printk(KERN_ERR "cciss: unable to allocate queue for disk %d\n", j); - break; + goto clean4; } drv->queue = q; @@ -3324,7 +3354,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, blk_queue_hardsect_size(q, drv->block_size); set_capacity(disk, drv->nr_blocks); add_disk(disk); - } + j++; + } while (j <= hba[i]->highest_lun); return 1; @@ -3347,6 +3378,15 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, unregister_blkdev(hba[i]->major, hba[i]->devname); clean1: hba[i]->busy_initializing = 0; + /* cleanup any queues that may have been initialized */ + for (j=0; j <= hba[i]->highest_lun; j++){ + drive_info_struct *drv = &(hba[i]->drv[j]); + if (drv->queue) + blk_cleanup_queue(drv->queue); + } + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); free_hba(i); return -1; } @@ -3394,7 +3434,7 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev) remove_proc_entry(hba[i]->devname, proc_cciss); /* remove it from the disk list */ - for (j = 0; j < NWD; j++) { + for (j = 0; j < CISS_MAX_LUN; j++) { struct gendisk *disk = hba[i]->gendisk[j]; if (disk) { request_queue_t *q = disk->queue; diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h index c3df673f860..b70988dd33e 100644 --- a/drivers/block/cciss.h +++ b/drivers/block/cciss.h @@ -6,7 +6,6 @@ #include "cciss_cmd.h" -#define NWD 16 #define NWD_SHIFT 4 #define MAX_PART (1 << NWD_SHIFT) @@ -112,7 +111,7 @@ struct ctlr_info int next_to_run; // Disk structures we need to pass back - struct gendisk *gendisk[NWD]; + struct gendisk *gendisk[CISS_MAX_LUN]; #ifdef CONFIG_CISS_SCSI_TAPE void *scsi_ctlr; /* ptr to structure containing scsi related stuff */ /* list of block side commands the scsi error handling sucked up */ diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h index b2147cca5ac..43bf5593b59 100644 --- a/drivers/block/cciss_cmd.h +++ b/drivers/block/cciss_cmd.h @@ -89,7 +89,7 @@ typedef union _u64bit //########################################################################### //STRUCTURES //########################################################################### -#define CISS_MAX_LUN 16 +#define CISS_MAX_LUN 1024 #define CISS_MAX_PHYS_LUN 1024 // SCSI-3 Cmmands diff --git a/include/linux/cciss_ioctl.h b/include/linux/cciss_ioctl.h index 6e27f42e3a5..cb57c30081a 100644 --- a/include/linux/cciss_ioctl.h +++ b/include/linux/cciss_ioctl.h @@ -80,7 +80,7 @@ typedef __u32 DriverVer_type; #define HWORD __u16 #define DWORD __u32 -#define CISS_MAX_LUN 16 +#define CISS_MAX_LUN 1024 #define LEVEL2LUN 1 // index into Target(x) structure, due to byte swapping #define LEVEL3LUN 0 -- cgit v1.2.3-70-g09d2 From 238b8721a554a33a451a3f13bdb5be8fe5cfc927 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Wed, 6 Dec 2006 20:35:17 -0800 Subject: [PATCH] serial uartlite driver Add a driver for the Xilinx uartlite serial controller used in boards with the PPC405 core in the Xilinx V2P/V4 fpgas. The hardware is very simple (baudrate/start/stopbits fixed and no break support). See the datasheet for details: http://www.xilinx.com/bvdocs/ipcenter/data_sheet/opb_uartlite.pdf See http://thread.gmane.org/gmane.linux.serial/1237/ for the email thread. Signed-off-by: Peter Korsgaard Acked-by: Olof Johansson Cc: Russell King Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 6 + drivers/serial/Kconfig | 19 ++ drivers/serial/Makefile | 1 + drivers/serial/uartlite.c | 505 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/serial_core.h | 2 + 5 files changed, 533 insertions(+) create mode 100644 drivers/serial/uartlite.c (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 5dff26814a0..fa1bba8de1f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3454,6 +3454,12 @@ W: http://oss.sgi.com/projects/xfs T: git git://oss.sgi.com:8090/xfs/xfs-2.6 S: Supported +XILINX UARTLITE SERIAL DRIVER +P: Peter Korsgaard +M: jacmet@sunsite.dk +L: linux-serial@vger.kernel.org +S: Maintained + X86 3-LEVEL PAGING (PAE) SUPPORT P: Ingo Molnar M: mingo@redhat.com diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 0b71e7d1890..e936c91640b 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -511,6 +511,25 @@ config SERIAL_IMX_CONSOLE your boot loader (lilo or loadlin) about how to pass options to the kernel at boot time.) +config SERIAL_UARTLITE + tristate "Xilinx uartlite serial port support" + depends on PPC32 + select SERIAL_CORE + help + Say Y here if you want to use the Xilinx uartlite serial controller. + + To compile this driver as a module, choose M here: the + module will be called uartlite.ko. + +config SERIAL_UARTLITE_CONSOLE + bool "Support for console on Xilinx uartlite serial port" + depends on SERIAL_UARTLITE=y + select SERIAL_CORE_CONSOLE + help + Say Y here if you wish to use a Xilinx uartlite as the system + console (the system console is the device which receives all kernel + messages and warnings and which allows logins in single user mode). + config SERIAL_SUNCORE bool depends on SPARC diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index b4d8a7c182e..0dba0017a00 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -55,4 +55,5 @@ obj-$(CONFIG_SERIAL_VR41XX) += vr41xx_siu.o obj-$(CONFIG_SERIAL_SGI_IOC4) += ioc4_serial.o obj-$(CONFIG_SERIAL_SGI_IOC3) += ioc3_serial.o obj-$(CONFIG_SERIAL_ATMEL) += atmel_serial.o +obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o obj-$(CONFIG_SERIAL_NETX) += netx-serial.o diff --git a/drivers/serial/uartlite.c b/drivers/serial/uartlite.c new file mode 100644 index 00000000000..83690653b78 --- /dev/null +++ b/drivers/serial/uartlite.c @@ -0,0 +1,505 @@ +/* + * uartlite.c: Serial driver for Xilinx uartlite serial controller + * + * Peter Korsgaard + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ULITE_MAJOR 204 +#define ULITE_MINOR 187 +#define ULITE_NR_UARTS 4 + +/* For register details see datasheet: + http://www.xilinx.com/bvdocs/ipcenter/data_sheet/opb_uartlite.pdf +*/ +#define ULITE_RX 0x00 +#define ULITE_TX 0x04 +#define ULITE_STATUS 0x08 +#define ULITE_CONTROL 0x0c + +#define ULITE_REGION 16 + +#define ULITE_STATUS_RXVALID 0x01 +#define ULITE_STATUS_RXFULL 0x02 +#define ULITE_STATUS_TXEMPTY 0x04 +#define ULITE_STATUS_TXFULL 0x08 +#define ULITE_STATUS_IE 0x10 +#define ULITE_STATUS_OVERRUN 0x20 +#define ULITE_STATUS_FRAME 0x40 +#define ULITE_STATUS_PARITY 0x80 + +#define ULITE_CONTROL_RST_TX 0x01 +#define ULITE_CONTROL_RST_RX 0x02 +#define ULITE_CONTROL_IE 0x10 + + +static struct uart_port ports[ULITE_NR_UARTS]; + +static int ulite_receive(struct uart_port *port, int stat) +{ + struct tty_struct *tty = port->info->tty; + unsigned char ch = 0; + char flag = TTY_NORMAL; + + if ((stat & (ULITE_STATUS_RXVALID | ULITE_STATUS_OVERRUN + | ULITE_STATUS_FRAME)) == 0) + return 0; + + /* stats */ + if (stat & ULITE_STATUS_RXVALID) { + port->icount.rx++; + ch = readb(port->membase + ULITE_RX); + + if (stat & ULITE_STATUS_PARITY) + port->icount.parity++; + } + + if (stat & ULITE_STATUS_OVERRUN) + port->icount.overrun++; + + if (stat & ULITE_STATUS_FRAME) + port->icount.frame++; + + + /* drop byte with parity error if IGNPAR specificed */ + if (stat & port->ignore_status_mask & ULITE_STATUS_PARITY) + stat &= ~ULITE_STATUS_RXVALID; + + stat &= port->read_status_mask; + + if (stat & ULITE_STATUS_PARITY) + flag = TTY_PARITY; + + + stat &= ~port->ignore_status_mask; + + if (stat & ULITE_STATUS_RXVALID) + tty_insert_flip_char(tty, ch, flag); + + if (stat & ULITE_STATUS_FRAME) + tty_insert_flip_char(tty, 0, TTY_FRAME); + + if (stat & ULITE_STATUS_OVERRUN) + tty_insert_flip_char(tty, 0, TTY_OVERRUN); + + return 1; +} + +static int ulite_transmit(struct uart_port *port, int stat) +{ + struct circ_buf *xmit = &port->info->xmit; + + if (stat & ULITE_STATUS_TXFULL) + return 0; + + if (port->x_char) { + writeb(port->x_char, port->membase + ULITE_TX); + port->x_char = 0; + port->icount.tx++; + return 1; + } + + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) + return 0; + + writeb(xmit->buf[xmit->tail], port->membase + ULITE_TX); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE-1); + port->icount.tx++; + + /* wake up */ + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + + return 1; +} + +static irqreturn_t ulite_isr(int irq, void *dev_id) +{ + struct uart_port *port = (struct uart_port *)dev_id; + int busy; + + do { + int stat = readb(port->membase + ULITE_STATUS); + busy = ulite_receive(port, stat); + busy |= ulite_transmit(port, stat); + } while (busy); + + tty_flip_buffer_push(port->info->tty); + + return IRQ_HANDLED; +} + +static unsigned int ulite_tx_empty(struct uart_port *port) +{ + unsigned long flags; + unsigned int ret; + + spin_lock_irqsave(&port->lock, flags); + ret = readb(port->membase + ULITE_STATUS); + spin_unlock_irqrestore(&port->lock, flags); + + return ret & ULITE_STATUS_TXEMPTY ? TIOCSER_TEMT : 0; +} + +static unsigned int ulite_get_mctrl(struct uart_port *port) +{ + return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; +} + +static void ulite_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + /* N/A */ +} + +static void ulite_stop_tx(struct uart_port *port) +{ + /* N/A */ +} + +static void ulite_start_tx(struct uart_port *port) +{ + ulite_transmit(port, readb(port->membase + ULITE_STATUS)); +} + +static void ulite_stop_rx(struct uart_port *port) +{ + /* don't forward any more data (like !CREAD) */ + port->ignore_status_mask = ULITE_STATUS_RXVALID | ULITE_STATUS_PARITY + | ULITE_STATUS_FRAME | ULITE_STATUS_OVERRUN; +} + +static void ulite_enable_ms(struct uart_port *port) +{ + /* N/A */ +} + +static void ulite_break_ctl(struct uart_port *port, int ctl) +{ + /* N/A */ +} + +static int ulite_startup(struct uart_port *port) +{ + int ret; + + ret = request_irq(port->irq, ulite_isr, + IRQF_DISABLED | IRQF_SAMPLE_RANDOM, "uartlite", port); + if (ret) + return ret; + + writeb(ULITE_CONTROL_RST_RX | ULITE_CONTROL_RST_TX, + port->membase + ULITE_CONTROL); + writeb(ULITE_CONTROL_IE, port->membase + ULITE_CONTROL); + + return 0; +} + +static void ulite_shutdown(struct uart_port *port) +{ + writeb(0, port->membase + ULITE_CONTROL); + readb(port->membase + ULITE_CONTROL); /* dummy */ + free_irq(port->irq, port); +} + +static void ulite_set_termios(struct uart_port *port, struct termios *termios, + struct termios *old) +{ + unsigned long flags; + unsigned int baud; + + spin_lock_irqsave(&port->lock, flags); + + port->read_status_mask = ULITE_STATUS_RXVALID | ULITE_STATUS_OVERRUN + | ULITE_STATUS_TXFULL; + + if (termios->c_iflag & INPCK) + port->read_status_mask |= + ULITE_STATUS_PARITY | ULITE_STATUS_FRAME; + + port->ignore_status_mask = 0; + if (termios->c_iflag & IGNPAR) + port->ignore_status_mask |= ULITE_STATUS_PARITY + | ULITE_STATUS_FRAME | ULITE_STATUS_OVERRUN; + + /* ignore all characters if CREAD is not set */ + if ((termios->c_cflag & CREAD) == 0) + port->ignore_status_mask |= + ULITE_STATUS_RXVALID | ULITE_STATUS_PARITY + | ULITE_STATUS_FRAME | ULITE_STATUS_OVERRUN; + + /* update timeout */ + baud = uart_get_baud_rate(port, termios, old, 0, 460800); + uart_update_timeout(port, termios->c_cflag, baud); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *ulite_type(struct uart_port *port) +{ + return port->type == PORT_UARTLITE ? "uartlite" : NULL; +} + +static void ulite_release_port(struct uart_port *port) +{ + release_mem_region(port->mapbase, ULITE_REGION); + iounmap(port->membase); + port->membase = 0; +} + +static int ulite_request_port(struct uart_port *port) +{ + if (!request_mem_region(port->mapbase, ULITE_REGION, "uartlite")) { + dev_err(port->dev, "Memory region busy\n"); + return -EBUSY; + } + + port->membase = ioremap(port->mapbase, ULITE_REGION); + if (!port->membase) { + dev_err(port->dev, "Unable to map registers\n"); + release_mem_region(port->mapbase, ULITE_REGION); + return -EBUSY; + } + + return 0; +} + +static void ulite_config_port(struct uart_port *port, int flags) +{ + ulite_request_port(port); + port->type = PORT_UARTLITE; +} + +static int ulite_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + /* we don't want the core code to modify any port params */ + return -EINVAL; +} + +static struct uart_ops ulite_ops = { + .tx_empty = ulite_tx_empty, + .set_mctrl = ulite_set_mctrl, + .get_mctrl = ulite_get_mctrl, + .stop_tx = ulite_stop_tx, + .start_tx = ulite_start_tx, + .stop_rx = ulite_stop_rx, + .enable_ms = ulite_enable_ms, + .break_ctl = ulite_break_ctl, + .startup = ulite_startup, + .shutdown = ulite_shutdown, + .set_termios = ulite_set_termios, + .type = ulite_type, + .release_port = ulite_release_port, + .request_port = ulite_request_port, + .config_port = ulite_config_port, + .verify_port = ulite_verify_port +}; + +#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE +static void ulite_console_wait_tx(struct uart_port *port) +{ + int i; + + /* wait up to 10ms for the character(s) to be sent */ + for (i = 0; i < 10000; i++) { + if (readb(port->membase + ULITE_STATUS) & ULITE_STATUS_TXEMPTY) + break; + udelay(1); + } +} + +static void ulite_console_putchar(struct uart_port *port, int ch) +{ + ulite_console_wait_tx(port); + writeb(ch, port->membase + ULITE_TX); +} + +static void ulite_console_write(struct console *co, const char *s, + unsigned int count) +{ + struct uart_port *port = &ports[co->index]; + unsigned long flags; + unsigned int ier; + int locked = 1; + + if (oops_in_progress) { + locked = spin_trylock_irqsave(&port->lock, flags); + } else + spin_lock_irqsave(&port->lock, flags); + + /* save and disable interrupt */ + ier = readb(port->membase + ULITE_STATUS) & ULITE_STATUS_IE; + writeb(0, port->membase + ULITE_CONTROL); + + uart_console_write(port, s, count, ulite_console_putchar); + + ulite_console_wait_tx(port); + + /* restore interrupt state */ + if (ier) + writeb(ULITE_CONTROL_IE, port->membase + ULITE_CONTROL); + + if (locked) + spin_unlock_irqrestore(&port->lock, flags); +} + +static int __init ulite_console_setup(struct console *co, char *options) +{ + struct uart_port *port; + int baud = 9600; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + + if (co->index < 0 || co->index >= ULITE_NR_UARTS) + return -EINVAL; + + port = &ports[co->index]; + + /* not initialized yet? */ + if (!port->membase) + return -ENODEV; + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + + return uart_set_options(port, co, baud, parity, bits, flow); +} + +static struct uart_driver ulite_uart_driver; + +static struct console ulite_console = { + .name = "ttyUL", + .write = ulite_console_write, + .device = uart_console_device, + .setup = ulite_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, /* Specified on the cmdline (e.g. console=ttyUL0 ) */ + .data = &ulite_uart_driver, +}; + +static int __init ulite_console_init(void) +{ + register_console(&ulite_console); + return 0; +} + +console_initcall(ulite_console_init); + +#endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */ + +static struct uart_driver ulite_uart_driver = { + .owner = THIS_MODULE, + .driver_name = "uartlite", + .dev_name = "ttyUL", + .major = ULITE_MAJOR, + .minor = ULITE_MINOR, + .nr = ULITE_NR_UARTS, +#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE + .cons = &ulite_console, +#endif +}; + +static int __devinit ulite_probe(struct platform_device *pdev) +{ + struct resource *res, *res2; + struct uart_port *port; + + if (pdev->id < 0 || pdev->id >= ULITE_NR_UARTS) + return -EINVAL; + + if (ports[pdev->id].membase) + return -EBUSY; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res2) + return -ENODEV; + + port = &ports[pdev->id]; + + port->fifosize = 16; + port->regshift = 2; + port->iotype = UPIO_MEM; + port->iobase = 1; /* mark port in use */ + port->mapbase = res->start; + port->membase = 0; + port->ops = &ulite_ops; + port->irq = res2->start; + port->flags = UPF_BOOT_AUTOCONF; + port->dev = &pdev->dev; + port->type = PORT_UNKNOWN; + port->line = pdev->id; + + uart_add_one_port(&ulite_uart_driver, port); + platform_set_drvdata(pdev, port); + + return 0; +} + +static int ulite_remove(struct platform_device *pdev) +{ + struct uart_port *port = platform_get_drvdata(pdev); + + platform_set_drvdata(pdev, NULL); + + if (port) + uart_remove_one_port(&ulite_uart_driver, port); + + /* mark port as free */ + port->membase = 0; + + return 0; +} + +static struct platform_driver ulite_platform_driver = { + .probe = ulite_probe, + .remove = ulite_remove, + .driver = { + .owner = THIS_MODULE, + .name = "uartlite", + }, +}; + +int __init ulite_init(void) +{ + int ret; + + ret = uart_register_driver(&ulite_uart_driver); + if (ret) + return ret; + + ret = platform_driver_register(&ulite_platform_driver); + if (ret) + uart_unregister_driver(&ulite_uart_driver); + + return ret; +} + +void __exit ulite_exit(void) +{ + platform_driver_unregister(&ulite_platform_driver); + uart_unregister_driver(&ulite_uart_driver); +} + +module_init(ulite_init); +module_exit(ulite_exit); + +MODULE_AUTHOR("Peter Korsgaard "); +MODULE_DESCRIPTION("Xilinx uartlite serial driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 463ab953b09..82767213664 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -132,6 +132,8 @@ #define PORT_S3C2412 73 +/* Xilinx uartlite */ +#define PORT_UARTLITE 74 #ifdef __KERNEL__ -- cgit v1.2.3-70-g09d2 From ed07536ed6731775219c1df7fa26a7588753e693 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Dec 2006 20:35:24 -0800 Subject: [PATCH] lockdep: annotate nfs/nfsd in-kernel sockets Stick NFS sockets in their own class to avoid some lockdep warnings. NFS sockets are never exposed to user-space, and will hence not trigger certain code paths that would otherwise pose deadlock scenarios. [akpm@osdl.org: cleanups] Signed-off-by: Peter Zijlstra Signed-off-by: Steven Dickson Acked-by: Ingo Molnar Cc: Trond Myklebust Acked-by: Neil Brown Cc: "David S. Miller" Signed-off-by: Andrew Morton [ Fixed patch corruption by quilt, pointed out by Peter Zijlstra ] Signed-off-by: Linus Torvalds --- include/net/sock.h | 19 +++++++++++++++++++ kernel/lockdep.c | 1 + net/core/sock.c | 23 +++++------------------ net/sunrpc/svcsock.c | 31 +++++++++++++++++++++++++++++++ net/sunrpc/xprtsock.c | 31 +++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 730899ce516..03684e702d1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -746,6 +746,25 @@ static inline int sk_stream_wmem_schedule(struct sock *sk, int size) */ #define sock_owned_by_user(sk) ((sk)->sk_lock.owner) +/* + * Macro so as to not evaluate some arguments when + * lockdep is not enabled. + * + * Mark both the sk_lock and the sk_lock.slock as a + * per-address-family lock class. + */ +#define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ +do { \ + sk->sk_lock.owner = NULL; \ + init_waitqueue_head(&sk->sk_lock.wq); \ + spin_lock_init(&(sk)->sk_lock.slock); \ + debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ + sizeof((sk)->sk_lock)); \ + lockdep_set_class_and_name(&(sk)->sk_lock.slock, \ + (skey), (sname)); \ + lockdep_init_map(&(sk)->sk_lock.dep_map, (name), (key), 0); \ +} while (0) + extern void FASTCALL(lock_sock_nested(struct sock *sk, int subclass)); static inline void lock_sock(struct sock *sk) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c9fefdb1a7d..e33f6207f5b 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2645,6 +2645,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) } local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); static void print_held_locks_bug(struct task_struct *curr) { diff --git a/net/core/sock.c b/net/core/sock.c index 4a432da441e..0ed5b4f0bc4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -810,24 +810,11 @@ lenout: */ static void inline sock_lock_init(struct sock *sk) { - spin_lock_init(&sk->sk_lock.slock); - sk->sk_lock.owner = NULL; - init_waitqueue_head(&sk->sk_lock.wq); - /* - * Make sure we are not reinitializing a held lock: - */ - debug_check_no_locks_freed((void *)&sk->sk_lock, sizeof(sk->sk_lock)); - - /* - * Mark both the sk_lock and the sk_lock.slock as a - * per-address-family lock class: - */ - lockdep_set_class_and_name(&sk->sk_lock.slock, - af_family_slock_keys + sk->sk_family, - af_family_slock_key_strings[sk->sk_family]); - lockdep_init_map(&sk->sk_lock.dep_map, - af_family_key_strings[sk->sk_family], - af_family_keys + sk->sk_family, 0); + sock_lock_init_class_and_name(sk, + af_family_slock_key_strings[sk->sk_family], + af_family_slock_keys + sk->sk_family, + af_family_key_strings[sk->sk_family], + af_family_keys + sk->sk_family); } /** diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1c68956824e..99f54fb6d66 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -85,6 +85,35 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req); */ static int svc_conn_age_period = 6*60; +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key svc_key[2]; +static struct lock_class_key svc_slock_key[2]; + +static inline void svc_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + BUG_ON(sk->sk_lock.owner != NULL); + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", + &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); + break; + + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", + &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); + break; + + default: + BUG(); + } +} +#else +static inline void svc_reclassify_socket(struct socket *sock) +{ +} +#endif + /* * Queue up an idle server thread. Must have pool->sp_lock held. * Note: this is really a stack rather than a queue, so that we only @@ -1557,6 +1586,8 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0) return error; + svc_reclassify_socket(sock); + if (type == SOCK_STREAM) sock->sk->sk_reuse = 1; /* allow address reuse */ error = kernel_bind(sock, (struct sockaddr *) sin, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cfe3c15be94..2fc4a312326 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1058,6 +1058,35 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) return err; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key xs_key[2]; +static struct lock_class_key xs_slock_key[2]; + +static inline void xs_reclassify_socket(struct socket *sock) +{ + struct sock *sk = sock->sk; + BUG_ON(sk->sk_lock.owner != NULL); + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", + &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); + break; + + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", + &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); + break; + + default: + BUG(); + } +} +#else +static inline void xs_reclassify_socket(struct socket *sock) +{ +} +#endif + /** * xs_udp_connect_worker - set up a UDP socket * @work: RPC transport to connect @@ -1081,6 +1110,7 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } + xs_reclassify_socket(sock); if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { sock_release(sock); @@ -1165,6 +1195,7 @@ static void xs_tcp_connect_worker(struct work_struct *work) dprintk("RPC: can't create TCP transport socket (%d).\n", -err); goto out; } + xs_reclassify_socket(sock); if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { sock_release(sock); -- cgit v1.2.3-70-g09d2 From 48ed214d10ae3c3999af938970f7b5b58df77be3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 6 Dec 2006 20:35:37 -0800 Subject: [PATCH] constify inode accessors Change the signature of i_size_read(), IMINOR() and IMAJOR() because they, or the functions they call, will never modify the argument. Signed-off-by: Jan Engelhardt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 94b831b8157..d791bae9de9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -636,7 +636,7 @@ extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); * cmpxchg8b without the need of the lock prefix). For SMP compiles * and 64bit archs it makes no difference if preempt is enabled or not. */ -static inline loff_t i_size_read(struct inode *inode) +static inline loff_t i_size_read(const struct inode *inode) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) loff_t i_size; @@ -679,12 +679,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) #endif } -static inline unsigned iminor(struct inode *inode) +static inline unsigned iminor(const struct inode *inode) { return MINOR(inode->i_rdev); } -static inline unsigned imajor(struct inode *inode) +static inline unsigned imajor(const struct inode *inode) { return MAJOR(inode->i_rdev); } -- cgit v1.2.3-70-g09d2 From e9168c189fd54171124b5d25644024d99869e6a8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 6 Dec 2006 20:35:38 -0800 Subject: [PATCH] fuse: update userspace interface to version 7.8 Add a flag to the RELEASE message which specifies that a FLUSH operation should be performed as well. This interface update is needed for the FreeBSD port, and doesn't actually touch the Linux implementation at all. Also rename the unused 'flush_flags' in the FLUSH message to 'unused'. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fuse.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 9fc48a674b8..76336327a94 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -15,7 +15,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 7 +#define FUSE_KERNEL_MINOR_VERSION 8 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -92,6 +92,11 @@ struct fuse_file_lock { #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) +/** + * Release flags + */ +#define FUSE_RELEASE_FLUSH (1 << 0) + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -205,12 +210,13 @@ struct fuse_open_out { struct fuse_release_in { __u64 fh; __u32 flags; - __u32 padding; + __u32 release_flags; + __u64 lock_owner; }; struct fuse_flush_in { __u64 fh; - __u32 flush_flags; + __u32 unused; __u32 padding; __u64 lock_owner; }; -- cgit v1.2.3-70-g09d2 From b2d2272fae1e1df26ec8f93a6d5baea891dcce37 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 6 Dec 2006 20:35:51 -0800 Subject: [PATCH] fuse: add bmap support Add support for the BMAP operation for block device based filesystems. This is needed to support swap-files and lilo. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/dir.c | 2 ++ fs/fuse/file.c | 37 +++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 3 +++ include/linux/fuse.h | 11 +++++++++++ 4 files changed, 53 insertions(+) (limited to 'include') diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 677f3ed7f98..1cabdb229ad 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1024,6 +1024,8 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) if (attr->ia_valid & ATTR_SIZE) { unsigned long limit; is_truncate = 1; + if (IS_SWAPFILE(inode)) + return -ETXTBSY; limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) { send_sig(SIGXFSZ, current, 0); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 763a50daf1c..128f79c4080 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -754,6 +754,42 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) return err; } +static sector_t fuse_bmap(struct address_space *mapping, sector_t block) +{ + struct inode *inode = mapping->host; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_bmap_in inarg; + struct fuse_bmap_out outarg; + int err; + + if (!inode->i_sb->s_bdev || fc->no_bmap) + return 0; + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return 0; + + memset(&inarg, 0, sizeof(inarg)); + inarg.block = block; + inarg.blocksize = inode->i_sb->s_blocksize; + req->in.h.opcode = FUSE_BMAP; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (err == -ENOSYS) + fc->no_bmap = 1; + + return err ? 0 : outarg.block; +} + static const struct file_operations fuse_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -787,6 +823,7 @@ static const struct address_space_operations fuse_file_aops = { .commit_write = fuse_commit_write, .readpages = fuse_readpages, .set_page_dirty = fuse_set_page_dirty, + .bmap = fuse_bmap, }; void fuse_init_file_inode(struct inode *inode) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 91edb8932d9..58d482d9f6b 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -339,6 +339,9 @@ struct fuse_conn { /** Is interrupt not implemented by fs? */ unsigned no_interrupt : 1; + /** Is bmap not implemented by fs? */ + unsigned no_bmap : 1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 76336327a94..162a754f4db 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -132,6 +132,7 @@ enum fuse_opcode { FUSE_ACCESS = 34, FUSE_CREATE = 35, FUSE_INTERRUPT = 36, + FUSE_BMAP = 37, }; /* The read buffer is required to be at least 8k, but may be much larger */ @@ -302,6 +303,16 @@ struct fuse_interrupt_in { __u64 unique; }; +struct fuse_bmap_in { + __u64 block; + __u32 blocksize; + __u32 padding; +}; + +struct fuse_bmap_out { + __u64 block; +}; + struct fuse_in_header { __u32 len; __u32 opcode; -- cgit v1.2.3-70-g09d2 From 0ec7ca41f6f0f74a394a7d686bc0ee8afef84887 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 6 Dec 2006 20:35:52 -0800 Subject: [PATCH] fuse: add DESTROY operation Add a DESTROY operation for block device based filesystems. With the help of this operation, such a filesystem can flush dirty data to the device synchronously before the umount returns. This is needed in situations where the filesystem is assumed to be clean immediately after unmount (e.g. ejecting removable media). Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/fuse_i.h | 6 ++++++ fs/fuse/inode.c | 22 ++++++++++++++++++++++ include/linux/fuse.h | 1 + 3 files changed, 29 insertions(+) (limited to 'include') diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 58d482d9f6b..b98b20de740 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -298,6 +298,9 @@ struct fuse_conn { reply, before any other request, and never cleared */ unsigned conn_error : 1; + /** Connection successful. Only set in INIT */ + unsigned conn_init : 1; + /** Do readpages asynchronously? Only set in INIT */ unsigned async_read : 1; @@ -368,6 +371,9 @@ struct fuse_conn { /** Key for lock owner ID scrambling */ u32 scramble_key[4]; + + /** Reserved request for the DESTROY message */ + struct fuse_req *destroy_req; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1baaaeb2e85..437d61c6526 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -206,10 +206,23 @@ static void fuse_umount_begin(struct vfsmount *vfsmnt, int flags) fuse_abort_conn(get_fuse_conn_super(vfsmnt->mnt_sb)); } +static void fuse_send_destroy(struct fuse_conn *fc) +{ + struct fuse_req *req = fc->destroy_req; + if (req && fc->conn_init) { + fc->destroy_req = NULL; + req->in.h.opcode = FUSE_DESTROY; + req->force = 1; + request_send(fc, req); + fuse_put_request(fc, req); + } +} + static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); + fuse_send_destroy(fc); spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; @@ -410,6 +423,8 @@ static struct fuse_conn *new_conn(void) void fuse_conn_put(struct fuse_conn *fc) { if (atomic_dec_and_test(&fc->count)) { + if (fc->destroy_req) + fuse_request_free(fc->destroy_req); mutex_destroy(&fc->inst_mutex); kfree(fc); } @@ -466,6 +481,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); fc->minor = arg->minor; fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; + fc->conn_init = 1; } fuse_put_request(fc, req); fc->blocked = 0; @@ -563,6 +579,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!init_req) goto err_put_root; + if (is_bdev) { + fc->destroy_req = fuse_request_alloc(); + if (!fc->destroy_req) + goto err_put_root; + } + mutex_lock(&fuse_mutex); err = -EINVAL; if (file->private_data) diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 162a754f4db..534744efe30 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -133,6 +133,7 @@ enum fuse_opcode { FUSE_CREATE = 35, FUSE_INTERRUPT = 36, FUSE_BMAP = 37, + FUSE_DESTROY = 38, }; /* The read buffer is required to be at least 8k, but may be much larger */ -- cgit v1.2.3-70-g09d2 From e59e2ae2c29700117a54e85c106017c24837119f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 Dec 2006 20:35:59 -0800 Subject: [PATCH] SysRq-X: show blocked tasks Add SysRq-X support: show blocked (TASK_UNINTERRUPTIBLE) tasks only. Useful for debugging IO stalls. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/sysrq.c | 14 +++++++++++++- include/linux/sched.h | 11 ++++++++++- kernel/sched.c | 11 ++++++++--- 3 files changed, 31 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index c64f5bcff94..05810c8d20b 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -182,6 +182,18 @@ static struct sysrq_key_op sysrq_showstate_op = { .enable_mask = SYSRQ_ENABLE_DUMP, }; +static void sysrq_handle_showstate_blocked(int key, struct tty_struct *tty) +{ + show_state_filter(TASK_UNINTERRUPTIBLE); +} +static struct sysrq_key_op sysrq_showstate_blocked_op = { + .handler = sysrq_handle_showstate_blocked, + .help_msg = "showBlockedTasks", + .action_msg = "Show Blocked State", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + + static void sysrq_handle_showmem(int key, struct tty_struct *tty) { show_mem(); @@ -304,7 +316,7 @@ static struct sysrq_key_op *sysrq_key_table[36] = { /* May be assigned at init time by SMP VOYAGER */ NULL, /* v */ NULL, /* w */ - NULL, /* x */ + &sysrq_showstate_blocked_op, /* x */ NULL, /* y */ NULL /* z */ }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 837a012f573..0a90cefb0b0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -194,7 +194,16 @@ extern void init_idle(struct task_struct *idle, int cpu); extern cpumask_t nohz_cpu_mask; -extern void show_state(void); +/* + * Only dump TASK_* tasks. (-1 for all tasks) + */ +extern void show_state_filter(unsigned long state_filter); + +static inline void show_state(void) +{ + show_state_filter(-1); +} + extern void show_regs(struct pt_regs *); /* diff --git a/kernel/sched.c b/kernel/sched.c index 12fdbef1d9b..1848e280504 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4804,7 +4804,7 @@ static void show_task(struct task_struct *p) show_stack(p, NULL); } -void show_state(void) +void show_state_filter(unsigned long state_filter) { struct task_struct *g, *p; @@ -4824,11 +4824,16 @@ void show_state(void) * console might take alot of time: */ touch_nmi_watchdog(); - show_task(p); + if (p->state & state_filter) + show_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); - debug_show_all_locks(); + /* + * Only show locks if all tasks are dumped: + */ + if (state_filter == -1) + debug_show_all_locks(); } /** -- cgit v1.2.3-70-g09d2 From 5ec68b2e310437e99c297ba04e1afc5297aa6de1 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 6 Dec 2006 20:36:14 -0800 Subject: [PATCH] pull in necessary header files for cdev.h linux/cdev.h uses struct kobject and other structs and should therefore include them. Currently, a module either needs to add the missing includes itself, or, in case a module includes other headers already, needs to put last, which goes against a alphabetically-sorted include list. Signed-off-by: Jan Engelhardt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/cdev.h b/include/linux/cdev.h index ee5f53f2ca1..f309b00e986 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -2,6 +2,10 @@ #define _LINUX_CDEV_H #ifdef __KERNEL__ +#include +#include +#include + struct cdev { struct kobject kobj; struct module *owner; -- cgit v1.2.3-70-g09d2 From c140e110019f25ffa1c6f3f365b0c9103d0b8475 Mon Sep 17 00:00:00 2001 From: Ryan Underwood Date: Wed, 6 Dec 2006 20:36:38 -0800 Subject: [PATCH] parport_pc: Add support for OX16PCI952 parallel port Add support for the parallel port (implemented as separate PCI function) on the Oxford Semiconductor OX16PCI952. Signed-off-by: Ryan Underwood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/parport/parport_pc.c | 4 ++++ include/linux/pci_ids.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 39c96641bc7..5749500f45f 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -2747,6 +2747,7 @@ enum parport_pc_pci_cards { titan_1284p2, avlab_1p, avlab_2p, + oxsemi_952, oxsemi_954, oxsemi_840, aks_0100, @@ -2822,6 +2823,7 @@ static struct parport_pc_pci { /* avlab_2p */ { 2, { { 0, 1}, { 2, 3 },} }, /* The Oxford Semi cards are unusual: 954 doesn't support ECP, * and 840 locks up if you write 1 to bit 2! */ + /* oxsemi_952 */ { 1, { { 0, 1 }, } }, /* oxsemi_954 */ { 1, { { 0, -1 }, } }, /* oxsemi_840 */ { 1, { { 0, -1 }, } }, /* aks_0100 */ { 1, { { 0, -1 }, } }, @@ -2895,6 +2897,8 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { /* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/ { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, /* AFAVLAB_TK9902 */ { 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p}, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 }, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954PP, PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_954 }, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_12PCI840, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c09da1e30c5..dcdb90f06d7 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1864,6 +1864,7 @@ #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 +#define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523 #define PCI_VENDOR_ID_SAMSUNG 0x144d -- cgit v1.2.3-70-g09d2 From 20aa7b21b1cbd1aa3fbf5fc14da5f7484a61a824 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 6 Dec 2006 20:36:40 -0800 Subject: [PATCH] probe_kernel_address() needs to do set_fs() probe_kernel_address() purports to be generic, only it forgot to select KERNEL_DS, so it presently won't work right on all architectures. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 67918c22339..76c3fe32510 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -65,14 +65,22 @@ static inline unsigned long __copy_from_user_nocache(void *to, * do_page_fault() doesn't attempt to take mmap_sem. This makes * probe_kernel_address() suitable for use within regions where the caller * already holds mmap_sem, or other locks which nest inside mmap_sem. + * This must be a macro because __get_user() needs to know the types of the + * args. + * + * We don't include enough header files to be able to do the set_fs(). We + * require that the probe_kernel_address() caller will do that. */ #define probe_kernel_address(addr, retval) \ ({ \ long ret; \ + mm_segment_t old_fs = get_fs(); \ \ + set_fs(KERNEL_DS); \ pagefault_disable(); \ ret = __get_user(retval, addr); \ pagefault_enable(); \ + set_fs(old_fs); \ ret; \ }) -- cgit v1.2.3-70-g09d2 From 115085ea0794c0f339be8f9d25505c7f9861d824 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 6 Dec 2006 20:36:51 -0800 Subject: [PATCH] taskstats: cleanup do_exit() path do_exit: taskstats_exit_alloc() ... taskstats_exit_send() taskstats_exit_free() I think this is not good, let it be a single function exported to the core kernel, taskstats_exit(), which does alloc + send + free itself. Signed-off-by: Oleg Nesterov Cc: Balbir Singh Cc: Shailabh Nagar Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats_kern.h | 17 ++--------------- kernel/exit.c | 8 ++------ kernel/taskstats.c | 41 +++++++++++++++-------------------------- 3 files changed, 19 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index ce8a912e542..f1261a53249 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -15,12 +15,6 @@ extern struct kmem_cache *taskstats_cache; extern struct mutex taskstats_exit_mutex; -static inline void taskstats_exit_free(struct taskstats *tidstats) -{ - if (tidstats) - kmem_cache_free(taskstats_cache, tidstats); -} - static inline void taskstats_tgid_init(struct signal_struct *sig) { sig->stats = NULL; @@ -54,17 +48,10 @@ static inline void taskstats_tgid_free(struct signal_struct *sig) kmem_cache_free(taskstats_cache, sig->stats); } -extern void taskstats_exit_alloc(struct taskstats **, unsigned int *); -extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, unsigned int); +extern void taskstats_exit(struct task_struct *, int group_dead); extern void taskstats_init_early(void); #else -static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) -{} -static inline void taskstats_exit_free(struct taskstats *ptidstats) -{} -static inline void taskstats_exit_send(struct task_struct *tsk, - struct taskstats *tidstats, - int group_dead, unsigned int cpu) +static inline void taskstats_exit(struct task_struct *tsk, int group_dead) {} static inline void taskstats_tgid_init(struct signal_struct *sig) {} diff --git a/kernel/exit.c b/kernel/exit.c index 06de6c4e8ca..4e3f919edc4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -850,9 +850,7 @@ static void exit_notify(struct task_struct *tsk) fastcall NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; - struct taskstats *tidstats; int group_dead; - unsigned int mycpu; profile_task_exit(tsk); @@ -890,8 +888,6 @@ fastcall NORET_TYPE void do_exit(long code) current->comm, current->pid, preempt_count()); - taskstats_exit_alloc(&tidstats, &mycpu); - acct_update_integrals(tsk); if (tsk->mm) { update_hiwater_rss(tsk->mm); @@ -911,8 +907,8 @@ fastcall NORET_TYPE void do_exit(long code) #endif if (unlikely(tsk->audit_context)) audit_free(tsk); - taskstats_exit_send(tsk, tidstats, group_dead, mycpu); - taskstats_exit_free(tidstats); + + taskstats_exit(tsk, group_dead); exit_mm(tsk); diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d9d7c357623..2654886fe05 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -119,10 +119,10 @@ static int send_reply(struct sk_buff *skb, pid_t pid) /* * Send taskstats data in @skb to listeners registered for @cpu's exit data */ -static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) +static void send_cpu_listeners(struct sk_buff *skb, + struct listener_list *listeners) { struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); - struct listener_list *listeners; struct listener *s, *tmp; struct sk_buff *skb_next, *skb_cur = skb; void *reply = genlmsg_data(genlhdr); @@ -135,7 +135,6 @@ static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) } rc = 0; - listeners = &per_cpu(listener_array, cpu); down_read(&listeners->sem); list_for_each_entry(s, &listeners->list, list) { skb_next = NULL; @@ -413,28 +412,12 @@ err: return rc; } -void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) -{ - struct listener_list *listeners; - /* - * This is the cpu on which the task is exiting currently and will - * be the one for which the exit event is sent, even if the cpu - * on which this function is running changes later. - */ - *mycpu = raw_smp_processor_id(); - - listeners = &per_cpu(listener_array, *mycpu); - - *ptidstats = NULL; - if (!list_empty(&listeners->list)) - *ptidstats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); -} - /* Send pid data out on exit */ -void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, - int group_dead, unsigned int mycpu) +void taskstats_exit(struct task_struct *tsk, int group_dead) { int rc; + struct listener_list *listeners; + struct taskstats *tidstats; struct sk_buff *rep_skb; void *reply; size_t size; @@ -458,12 +441,17 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, fill_tgid_exit(tsk); } + listeners = &__raw_get_cpu_var(listener_array); + if (list_empty(&listeners->list)) + return; + + tidstats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); if (!tidstats) return; rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); if (rc < 0) - goto ret; + goto free_stats; rc = fill_pid(tsk->pid, tsk, tidstats); if (rc < 0) @@ -492,15 +480,16 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, nla_nest_end(rep_skb, na); send: - send_cpu_listeners(rep_skb, mycpu); + send_cpu_listeners(rep_skb, listeners); +free_stats: + kmem_cache_free(taskstats_cache, tidstats); return; nla_put_failure: genlmsg_cancel(rep_skb, reply); err_skb: nlmsg_free(rep_skb); -ret: - return; + goto free_stats; } static struct genl_ops taskstats_ops = { -- cgit v1.2.3-70-g09d2 From 34ec12349c8a9505adc59d72f92b4595bc2483ff Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 6 Dec 2006 20:36:52 -0800 Subject: [PATCH] taskstats: cleanup ->signal->stats allocation Allocate ->signal->stats on demand in taskstats_exit(), this allows us to remove taskstats_tgid_alloc() (the last non-trivial inline) from taskstat's public interface. Signed-off-by: Oleg Nesterov Cc: Balbir Singh Cc: Shailabh Nagar Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats_kern.h | 24 ------------------------ kernel/fork.c | 1 - kernel/taskstats.c | 26 +++++++++++++++++++++++++- 3 files changed, 25 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index f1261a53249..7e9680f4afd 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -20,28 +20,6 @@ static inline void taskstats_tgid_init(struct signal_struct *sig) sig->stats = NULL; } -static inline void taskstats_tgid_alloc(struct task_struct *tsk) -{ - struct signal_struct *sig = tsk->signal; - struct taskstats *stats; - - if (sig->stats != NULL) - return; - - /* No problem if kmem_cache_zalloc() fails */ - stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); - - spin_lock_irq(&tsk->sighand->siglock); - if (!sig->stats) { - sig->stats = stats; - stats = NULL; - } - spin_unlock_irq(&tsk->sighand->siglock); - - if (stats) - kmem_cache_free(taskstats_cache, stats); -} - static inline void taskstats_tgid_free(struct signal_struct *sig) { if (sig->stats) @@ -55,8 +33,6 @@ static inline void taskstats_exit(struct task_struct *tsk, int group_dead) {} static inline void taskstats_tgid_init(struct signal_struct *sig) {} -static inline void taskstats_tgid_alloc(struct task_struct *tsk) -{} static inline void taskstats_tgid_free(struct signal_struct *sig) {} static inline void taskstats_init_early(void) diff --git a/kernel/fork.c b/kernel/fork.c index f37980df1d5..65883814864 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -847,7 +847,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); - taskstats_tgid_alloc(current); return 0; } sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 2654886fe05..7d793d6b1e9 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -412,6 +412,30 @@ err: return rc; } +static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) +{ + struct signal_struct *sig = tsk->signal; + struct taskstats *stats; + + if (sig->stats || thread_group_empty(tsk)) + goto ret; + + /* No problem if kmem_cache_zalloc() fails */ + stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); + + spin_lock_irq(&tsk->sighand->siglock); + if (!sig->stats) { + sig->stats = stats; + stats = NULL; + } + spin_unlock_irq(&tsk->sighand->siglock); + + if (stats) + kmem_cache_free(taskstats_cache, stats); +ret: + return sig->stats; +} + /* Send pid data out on exit */ void taskstats_exit(struct task_struct *tsk, int group_dead) { @@ -433,7 +457,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) size = nla_total_size(sizeof(u32)) + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); - is_thread_group = (tsk->signal->stats != NULL); + is_thread_group = !!taskstats_tgid_alloc(tsk); if (is_thread_group) { /* PID + STATS + TGID + STATS */ size = 2 * size; -- cgit v1.2.3-70-g09d2 From 9774a1f54f173ad18e816496c8979f1bf8ef666a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 6 Dec 2006 20:36:56 -0800 Subject: [PATCH] Compile-time check re world-writeable module params One of the mistakes a module_param() user can make is to supply default value of module parameter as the last argument. module_param() accepts permissions instead. If default value is, say, 3 (-------wx), parameter becomes world-writeable. So far, the only remedy was to apply grep(1) and read drivers submitted to -mm. BTDT. With this patch applied, compiler will finally do some job. *) bounds checking on permissions *) world-writeable bit checking on permissions *) compile breakage if checks trigger First version of this check (only "& 2" part) directly caught 4 out of 7 places during my last grep. Subject: Neverending module_param() bugs [X] drivers/acpi/sbs.c:101:module_param(capacity_mode, int, CAPACITY_UNIT); [X] drivers/acpi/sbs.c:102:module_param(update_mode, int, UPDATE_MODE); [ ] drivers/acpi/sbs.c:103:module_param(update_info_mode, int, UPDATE_INFO_MODE); [ ] drivers/acpi/sbs.c:104:module_param(update_time, int, UPDATE_TIME); [ ] drivers/acpi/sbs.c:105:module_param(update_time2, int, UPDATE_TIME2); [X] drivers/char/watchdog/sbc8360.c:203:module_param(timeout, int, 27); [X] drivers/media/video/tuner-simple.c:13:module_param(offset, int, 0666); Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/moduleparam.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 7c0c2c198f1..4a189dadb16 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -63,6 +63,9 @@ struct kparam_array not there, read bits mean it's readable, write bits mean it's writable. */ #define __module_param_call(prefix, name, set, get, arg, perm) \ + /* Default value instead of permissions? */ \ + static int __param_perm_check_##name __attribute__((unused)) = \ + BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)); \ static char __param_str_##name[] = prefix #name; \ static struct kernel_param const __param_##name \ __attribute_used__ \ -- cgit v1.2.3-70-g09d2 From e0980dafa329d33bb88edc8a3ef9fab4e070590c Mon Sep 17 00:00:00 2001 From: Paul B Schroeder Date: Wed, 6 Dec 2006 20:37:03 -0800 Subject: [PATCH] Exar quad port serial This is on our "Envoy" boxes which we have, according to the documentation, an "Exar ST16C554/554D Quad UART with 16-byte Fifo's". The box also has two other "on-board" serial ports and a modem chip. The two on-board serial UARTs were being detected along with the first two Exar UARTs. The last two Exar UARTs were not showing up and neither was the modem. This patch was the only way I could the kernel to see beyond the standard four serial ports and get all four of the Exar UARTs to show up. [akpm@osdl.org: build fix] Signed-off-by: Paul B Schroeder Cc: Lennart Sorensen Acked-by: Alan Cox Cc: Russell King Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/8250_exar_st16c554.c | 52 +++++++++++++++++++++++++++++++++++++ drivers/serial/Kconfig | 11 ++++++++ drivers/serial/Makefile | 1 + include/linux/serial_8250.h | 1 + 4 files changed, 65 insertions(+) create mode 100644 drivers/serial/8250_exar_st16c554.c (limited to 'include') diff --git a/drivers/serial/8250_exar_st16c554.c b/drivers/serial/8250_exar_st16c554.c new file mode 100644 index 00000000000..567143ace15 --- /dev/null +++ b/drivers/serial/8250_exar_st16c554.c @@ -0,0 +1,52 @@ +/* + * linux/drivers/serial/8250_exar.c + * + * Written by Paul B Schroeder < pschroeder "at" uplogix "dot" com > + * Based on 8250_boca. + * + * Copyright (C) 2005 Russell King. + * Data taken from include/asm-i386/serial.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include + +#define PORT(_base,_irq) \ + { \ + .iobase = _base, \ + .irq = _irq, \ + .uartclk = 1843200, \ + .iotype = UPIO_PORT, \ + .flags = UPF_BOOT_AUTOCONF, \ + } + +static struct plat_serial8250_port exar_data[] = { + PORT(0x100, 5), + PORT(0x108, 5), + PORT(0x110, 5), + PORT(0x118, 5), + { }, +}; + +static struct platform_device exar_device = { + .name = "serial8250", + .id = PLAT8250_DEV_EXAR_ST16C554, + .dev = { + .platform_data = exar_data, + }, +}; + +static int __init exar_init(void) +{ + return platform_device_register(&exar_device); +} + +module_init(exar_init); + +MODULE_AUTHOR("Paul B Schroeder"); +MODULE_DESCRIPTION("8250 serial probe module for Exar cards"); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index e936c91640b..fc12d5df10e 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -210,6 +210,17 @@ config SERIAL_8250_BOCA To compile this driver as a module, choose M here: the module will be called 8250_boca. +config SERIAL_8250_EXAR_ST16C554 + tristate "Support Exar ST16C554/554D Quad UART" + depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS + help + The Uplogix Envoy TU301 uses this Exar Quad UART. If you are + tinkering with your Envoy TU301, or have a machine with this UART, + say Y here. + + To compile this driver as a module, choose M here: the module + will be called 8250_exar_st16c554. + config SERIAL_8250_HUB6 tristate "Support Hub6 cards" depends on SERIAL_8250 != n && ISA && SERIAL_8250_MANY_PORTS diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index 0dba0017a00..df3632cd7df 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_SERIAL_8250_CONSOLE) += 8250_early.o obj-$(CONFIG_SERIAL_8250_FOURPORT) += 8250_fourport.o obj-$(CONFIG_SERIAL_8250_ACCENT) += 8250_accent.o obj-$(CONFIG_SERIAL_8250_BOCA) += 8250_boca.o +obj-$(CONFIG_SERIAL_8250_EXAR_ST16C554) += 8250_exar_st16c554.o obj-$(CONFIG_SERIAL_8250_HUB6) += 8250_hub6.o obj-$(CONFIG_SERIAL_8250_MCA) += 8250_mca.o obj-$(CONFIG_SERIAL_8250_AU1X00) += 8250_au1x00.o diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 8e968141372..71310d80c09 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -41,6 +41,7 @@ enum { PLAT8250_DEV_FOURPORT, PLAT8250_DEV_ACCENT, PLAT8250_DEV_BOCA, + PLAT8250_DEV_EXAR_ST16C554, PLAT8250_DEV_HUB6, PLAT8250_DEV_MCA, PLAT8250_DEV_AU1X00, -- cgit v1.2.3-70-g09d2 From 3a229b39eb8497ae5f8077f81f7c8c3e1aacd624 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 6 Dec 2006 20:37:14 -0800 Subject: [PATCH] ext3: uninline large functions Saves nearly 4kbytes on x86. Cc: Arnaldo Carvalho de Melo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/Makefile | 2 +- fs/ext3/ext3_jbd.c | 59 +++++++++++++++++++++++++++++++++++++ include/linux/ext3_jbd.h | 76 +++++++++++------------------------------------- 3 files changed, 77 insertions(+), 60 deletions(-) create mode 100644 fs/ext3/ext3_jbd.c (limited to 'include') diff --git a/fs/ext3/Makefile b/fs/ext3/Makefile index 704cd44a40c..e77766a8b3f 100644 --- a/fs/ext3/Makefile +++ b/fs/ext3/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o + ioctl.o namei.o super.o symlink.o hash.o resize.o ext3_jbd.o ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c new file mode 100644 index 00000000000..e1f91fd26a9 --- /dev/null +++ b/fs/ext3/ext3_jbd.c @@ -0,0 +1,59 @@ +/* + * Interface between ext3 and JBD + */ + +#include + +int __ext3_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = journal_get_undo_access(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = journal_get_write_access(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = journal_forget(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_revoke(const char *where, handle_t *handle, + unsigned long blocknr, struct buffer_head *bh) +{ + int err = journal_revoke(handle, blocknr, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = journal_get_create_access(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext3_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = journal_dirty_metadata(handle, bh); + if (err) + ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h index ce0e6109aff..8c43b13a02f 100644 --- a/include/linux/ext3_jbd.h +++ b/include/linux/ext3_jbd.h @@ -109,74 +109,32 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode); * been done yet. */ -void ext3_journal_abort_handle(const char *caller, const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err); - -static inline int -__ext3_journal_get_undo_access(const char *where, handle_t *handle, - struct buffer_head *bh) +static inline void ext3_journal_release_buffer(handle_t *handle, + struct buffer_head *bh) { - int err = journal_get_undo_access(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; + journal_release_buffer(handle, bh); } -static inline int -__ext3_journal_get_write_access(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = journal_get_write_access(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +void ext3_journal_abort_handle(const char *caller, const char *err_fn, + struct buffer_head *bh, handle_t *handle, int err); -static inline void -ext3_journal_release_buffer(handle_t *handle, struct buffer_head *bh) -{ - journal_release_buffer(handle, bh); -} +int __ext3_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext3_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh) -{ - int err = journal_forget(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext3_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext3_journal_revoke(const char *where, handle_t *handle, - unsigned long blocknr, struct buffer_head *bh) -{ - int err = journal_revoke(handle, blocknr, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext3_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext3_journal_get_create_access(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = journal_get_create_access(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext3_journal_revoke(const char *where, handle_t *handle, + unsigned long blocknr, struct buffer_head *bh); -static inline int -__ext3_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = journal_dirty_metadata(handle, bh); - if (err) - ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext3_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh); +int __ext3_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh); #define ext3_journal_get_undo_access(handle, bh) \ __ext3_journal_get_undo_access(__FUNCTION__, (handle), (bh)) -- cgit v1.2.3-70-g09d2 From 8984d137df669a6e94dbce7b87095e4ce80b9e67 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 6 Dec 2006 20:37:15 -0800 Subject: [PATCH] ext4: uninline large functions Saves nearly 4kbytes on x86. Cc: Arnaldo Carvalho de Melo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/Makefile | 3 +- fs/ext4/ext4_jbd2.c | 59 ++++++++++++++++++++++++++++++++++++ include/linux/ext4_jbd2.h | 76 +++++++++++------------------------------------ 3 files changed, 78 insertions(+), 60 deletions(-) create mode 100644 fs/ext4/ext4_jbd2.c (limited to 'include') diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index a6acb96ebeb..ae6e7e502ac 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o + ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ + ext4_jbd2.o ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c new file mode 100644 index 00000000000..d6afe4e2734 --- /dev/null +++ b/fs/ext4/ext4_jbd2.c @@ -0,0 +1,59 @@ +/* + * Interface between ext4 and JBD + */ + +#include + +int __ext4_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = jbd2_journal_get_undo_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = jbd2_journal_get_write_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh) +{ + int err = jbd2_journal_forget(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_revoke(const char *where, handle_t *handle, + ext4_fsblk_t blocknr, struct buffer_head *bh) +{ + int err = jbd2_journal_revoke(handle, blocknr, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = jbd2_journal_get_create_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} + +int __ext4_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh) +{ + int err = jbd2_journal_dirty_metadata(handle, bh); + if (err) + ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); + return err; +} diff --git a/include/linux/ext4_jbd2.h b/include/linux/ext4_jbd2.h index 72dd631912e..d716e6392cf 100644 --- a/include/linux/ext4_jbd2.h +++ b/include/linux/ext4_jbd2.h @@ -114,74 +114,32 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); * been done yet. */ -void ext4_journal_abort_handle(const char *caller, const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err); - -static inline int -__ext4_journal_get_undo_access(const char *where, handle_t *handle, - struct buffer_head *bh) +static inline void ext4_journal_release_buffer(handle_t *handle, + struct buffer_head *bh) { - int err = jbd2_journal_get_undo_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; + jbd2_journal_release_buffer(handle, bh); } -static inline int -__ext4_journal_get_write_access(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = jbd2_journal_get_write_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +void ext4_journal_abort_handle(const char *caller, const char *err_fn, + struct buffer_head *bh, handle_t *handle, int err); -static inline void -ext4_journal_release_buffer(handle_t *handle, struct buffer_head *bh) -{ - jbd2_journal_release_buffer(handle, bh); -} +int __ext4_journal_get_undo_access(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_forget(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext4_journal_get_write_access(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext4_journal_revoke(const char *where, handle_t *handle, - ext4_fsblk_t blocknr, struct buffer_head *bh) -{ - int err = jbd2_journal_revoke(handle, blocknr, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext4_journal_forget(const char *where, handle_t *handle, + struct buffer_head *bh); -static inline int -__ext4_journal_get_create_access(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_get_create_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext4_journal_revoke(const char *where, handle_t *handle, + ext4_fsblk_t blocknr, struct buffer_head *bh); -static inline int -__ext4_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_dirty_metadata(handle, bh); - if (err) - ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err); - return err; -} +int __ext4_journal_get_create_access(const char *where, + handle_t *handle, struct buffer_head *bh); +int __ext4_journal_dirty_metadata(const char *where, + handle_t *handle, struct buffer_head *bh); #define ext4_journal_get_undo_access(handle, bh) \ __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh)) -- cgit v1.2.3-70-g09d2 From 6cfd76a26d9fe2ba54b9d496a48c1d9285e5c5ed Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Dec 2006 20:37:22 -0800 Subject: [PATCH] lockdep: name some old style locks Name some of the remaning 'old_style_spin_init' locks Signed-off-by: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 2 +- include/asm-i386/rwsem.h | 4 ++-- include/linux/init_task.h | 2 +- include/linux/mutex.h | 2 +- include/linux/rtmutex.h | 2 +- include/linux/rwsem-spinlock.h | 3 ++- include/linux/sunrpc/sched.h | 4 ++-- kernel/acct.c | 3 ++- kernel/irq/handle.c | 2 +- net/sunrpc/svcauth.c | 3 ++- security/keys/process_keys.c | 2 +- 11 files changed, 16 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index fe9c5e8e7e6..3124f1b04d6 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -452,7 +452,7 @@ void die(const char * str, struct pt_regs * regs, long err) u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(die.lock), .lock_owner = -1, .lock_owner_depth = 0 }; diff --git a/include/asm-i386/rwsem.h b/include/asm-i386/rwsem.h index bc598d6388e..041906f3c6d 100644 --- a/include/asm-i386/rwsem.h +++ b/include/asm-i386/rwsem.h @@ -75,8 +75,8 @@ struct rw_semaphore { #define __RWSEM_INITIALIZER(name) \ -{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEP_MAP_INIT(name) } +{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ + LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 33c5daacc74..733790d4f7d 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -73,7 +73,7 @@ extern struct nsproxy init_nsproxy; #define INIT_NSPROXY(nsproxy) { \ .count = ATOMIC_INIT(1), \ - .nslock = SPIN_LOCK_UNLOCKED, \ + .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ .uts_ns = &init_uts_ns, \ .namespace = NULL, \ INIT_IPC_NS(ipc_ns) \ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 27c48daa318..b2b91c47756 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -94,7 +94,7 @@ do { \ #define __MUTEX_INITIALIZER(lockname) \ { .count = ATOMIC_INIT(1) \ - , .wait_lock = SPIN_LOCK_UNLOCKED \ + , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ __DEBUG_MUTEX_INITIALIZER(lockname) \ __DEP_MAP_MUTEX_INITIALIZER(lockname) } diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 5d41dee82f8..b0090e9f788 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -63,7 +63,7 @@ struct hrtimer_sleeper; #endif #define __RT_MUTEX_INITIALIZER(mutexname) \ - { .wait_lock = SPIN_LOCK_UNLOCKED \ + { .wait_lock = __SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ , .wait_list = PLIST_HEAD_INIT(mutexname.wait_list, mutexname.wait_lock) \ , .owner = NULL \ __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index ae1fcadd598..813cee13da0 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -44,7 +44,8 @@ struct rw_semaphore { #endif #define __RWSEM_INITIALIZER(name) \ -{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } +{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \ + __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ struct rw_semaphore name = __RWSEM_INITIALIZER(name) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index f399c138f79..0746c3b16f3 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -222,7 +222,7 @@ struct rpc_wait_queue { #ifndef RPC_DEBUG # define RPC_WAITQ_INIT(var,qname) { \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(var.lock), \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ @@ -231,7 +231,7 @@ struct rpc_wait_queue { } #else # define RPC_WAITQ_INIT(var,qname) { \ - .lock = SPIN_LOCK_UNLOCKED, \ + .lock = __SPIN_LOCK_UNLOCKED(var.lock), \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ diff --git a/kernel/acct.c b/kernel/acct.c index 0aad5ca36a8..dc12db8600e 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -89,7 +89,8 @@ struct acct_glbs { struct timer_list timer; }; -static struct acct_glbs acct_globals __cacheline_aligned = {SPIN_LOCK_UNLOCKED}; +static struct acct_glbs acct_globals __cacheline_aligned = + {__SPIN_LOCK_UNLOCKED(acct_globals.lock)}; /* * Called whenever the timer says to check the free space. diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a681912bc89..aff1f0fabb0 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -54,7 +54,7 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned = { .chip = &no_irq_chip, .handle_irq = handle_bad_irq, .depth = 1, - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock), #ifdef CONFIG_SMP .affinity = CPU_MASK_ALL #endif diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index ee9bb1522d5..c7bb5f7f21a 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -119,7 +119,8 @@ EXPORT_SYMBOL(svc_auth_unregister); #define DN_HASHMASK (DN_HASHMAX-1) static struct hlist_head auth_domain_table[DN_HASHMAX]; -static spinlock_t auth_domain_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t auth_domain_lock = + __SPIN_LOCK_UNLOCKED(auth_domain_lock); void auth_domain_put(struct auth_domain *dom) { diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 32150cf7c37..b6f86808475 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(key_session_mutex); struct key_user root_key_user = { .usage = ATOMIC_INIT(3), .consq = LIST_HEAD_INIT(root_key_user.consq), - .lock = SPIN_LOCK_UNLOCKED, + .lock = __SPIN_LOCK_UNLOCKED(root_key_user.lock), .nkeys = ATOMIC_INIT(2), .nikeys = ATOMIC_INIT(2), .uid = 0, -- cgit v1.2.3-70-g09d2 From ece8a684c75df215320b4155944979e3f78c5c93 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 Dec 2006 20:37:24 -0800 Subject: [PATCH] sleep profiling Implement prof=sleep profiling. TASK_UNINTERRUPTIBLE sleeps will be taken as a profile hit, and every millisecond spent sleeping causes a profile-hit for the call site that initiated the sleep. Sample readprofile output on i386: 306 ps2_sendbyte 1.3973 432 call_usermodehelper_keys 1.9548 484 ps2_command 0.6453 790 __driver_attach 4.7879 1593 msleep 44.2500 3976 sync_buffer 64.1290 4076 do_lookup 12.4648 8587 sync_page 122.6714 20820 total 0.0067 (NOTE: architectures need to check whether get_wchan() can be called from deep within the wakeup path.) akpm: we need to mark more functions __sched. lock_sock(), msleep(), others.. akpm: the contention in do_lookup() is a surprise. Presumably doing disk reads for directory contents while holding i_mutex. [akpm@osdl.org: various fixes] Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 1 + include/linux/profile.h | 24 ++++++++++++++++++++++- kernel/profile.c | 39 ++++++++++++++++++++++++++++--------- kernel/sched.c | 11 +++++++++++ 4 files changed, 65 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8fe6b834ef2..2a40d9f6ffa 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1294,6 +1294,7 @@ and is between 256 and 4096 characters. It is defined in the file Param: "schedule" - profile schedule points. Param: - step/bucket size as a power of 2 for statistical time based profiling. + Param: "sleep" - profile D-state sleeping (millisecs) processor.max_cstate= [HW,ACPI] Limit processor to maximum C-state diff --git a/include/linux/profile.h b/include/linux/profile.h index acce53fd38b..5670b340c4e 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -6,10 +6,15 @@ #include #include #include +#include + #include +extern int prof_on __read_mostly; + #define CPU_PROFILING 1 #define SCHED_PROFILING 2 +#define SLEEP_PROFILING 3 struct proc_dir_entry; struct pt_regs; @@ -18,7 +23,24 @@ struct notifier_block; /* init basic kernel profiler */ void __init profile_init(void); void profile_tick(int); -void profile_hit(int, void *); + +/* + * Add multiple profiler hits to a given address: + */ +void profile_hits(int, void *ip, unsigned int nr_hits); + +/* + * Single profiler hit: + */ +static inline void profile_hit(int type, void *ip) +{ + /* + * Speedup for the common (no profiling enabled) case: + */ + if (unlikely(prof_on == type)) + profile_hits(type, ip, 1); +} + #ifdef CONFIG_PROC_FS void create_prof_cpu_mask(struct proc_dir_entry *); #else diff --git a/kernel/profile.c b/kernel/profile.c index 15b012df4ff..04fd84e8cdb 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -40,7 +40,7 @@ int (*timer_hook)(struct pt_regs *) __read_mostly; static atomic_t *prof_buffer; static unsigned long prof_len, prof_shift; -static int prof_on __read_mostly; +int prof_on __read_mostly; static cpumask_t prof_cpu_mask = CPU_MASK_ALL; #ifdef CONFIG_SMP static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); @@ -51,9 +51,19 @@ static DEFINE_MUTEX(profile_flip_mutex); static int __init profile_setup(char * str) { static char __initdata schedstr[] = "schedule"; + static char __initdata sleepstr[] = "sleep"; int par; - if (!strncmp(str, schedstr, strlen(schedstr))) { + if (!strncmp(str, sleepstr, strlen(sleepstr))) { + prof_on = SLEEP_PROFILING; + if (str[strlen(sleepstr)] == ',') + str += strlen(sleepstr) + 1; + if (get_option(&str, &par)) + prof_shift = par; + printk(KERN_INFO + "kernel sleep profiling enabled (shift: %ld)\n", + prof_shift); + } else if (!strncmp(str, sleepstr, strlen(sleepstr))) { prof_on = SCHED_PROFILING; if (str[strlen(schedstr)] == ',') str += strlen(schedstr) + 1; @@ -204,7 +214,8 @@ EXPORT_SYMBOL_GPL(profile_event_unregister); * positions to which hits are accounted during short intervals (e.g. * several seconds) is usually very small. Exclusion from buffer * flipping is provided by interrupt disablement (note that for - * SCHED_PROFILING profile_hit() may be called from process context). + * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from + * process context). * The hash function is meant to be lightweight as opposed to strong, * and was vaguely inspired by ppc64 firmware-supported inverted * pagetable hash functions, but uses a full hashtable full of finite @@ -257,7 +268,7 @@ static void profile_discard_flip_buffers(void) mutex_unlock(&profile_flip_mutex); } -void profile_hit(int type, void *__pc) +void profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long primary, secondary, flags, pc = (unsigned long)__pc; int i, j, cpu; @@ -274,21 +285,31 @@ void profile_hit(int type, void *__pc) put_cpu(); return; } + /* + * We buffer the global profiler buffer into a per-CPU + * queue and thus reduce the number of global (and possibly + * NUMA-alien) accesses. The write-queue is self-coalescing: + */ local_irq_save(flags); do { for (j = 0; j < PROFILE_GRPSZ; ++j) { if (hits[i + j].pc == pc) { - hits[i + j].hits++; + hits[i + j].hits += nr_hits; goto out; } else if (!hits[i + j].hits) { hits[i + j].pc = pc; - hits[i + j].hits = 1; + hits[i + j].hits = nr_hits; goto out; } } i = (i + secondary) & (NR_PROFILE_HIT - 1); } while (i != primary); - atomic_inc(&prof_buffer[pc]); + + /* + * Add the current hit(s) and flush the write-queue out + * to the global buffer: + */ + atomic_add(nr_hits, &prof_buffer[pc]); for (i = 0; i < NR_PROFILE_HIT; ++i) { atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); hits[i].pc = hits[i].hits = 0; @@ -356,14 +377,14 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, #define profile_flip_buffers() do { } while (0) #define profile_discard_flip_buffers() do { } while (0) -void profile_hit(int type, void *__pc) +void profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long pc; if (prof_on != type || !prof_buffer) return; pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; - atomic_inc(&prof_buffer[min(pc, prof_len - 1)]); + atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); } #endif /* !CONFIG_SMP */ diff --git a/kernel/sched.c b/kernel/sched.c index 343e1794233..75a005ed4ed 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -948,6 +948,17 @@ static void activate_task(struct task_struct *p, struct rq *rq, int local) } #endif + /* + * Sleep time is in units of nanosecs, so shift by 20 to get a + * milliseconds-range estimation of the amount of time that the task + * spent sleeping: + */ + if (unlikely(prof_on == SLEEP_PROFILING)) { + if (p->state == TASK_UNINTERRUPTIBLE) + profile_hits(SLEEP_PROFILING, (void *)get_wchan(p), + (now - p->timestamp) >> 20); + } + if (!rt_task(p)) p->prio = recalc_task_prio(p, now); -- cgit v1.2.3-70-g09d2 From d5abe669172f20a4129a711de0f250a4e07db298 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Dec 2006 20:37:26 -0800 Subject: [PATCH] debug: workqueue locking sanity Workqueue functions should not leak locks, assert so, printing the last function ran. Use macros in lockdep.h to avoid include dependency pains. [akpm@osdl.org: build fix] Signed-off-by: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 5 +++++ kernel/workqueue.c | 13 +++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 819f08f1310..da19aeb0dbe 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -243,6 +243,8 @@ extern void lock_release(struct lockdep_map *lock, int nested, # define INIT_LOCKDEP .lockdep_recursion = 0, +#define lockdep_depth(tsk) ((tsk)->lockdep_depth) + #else /* !LOCKDEP */ static inline void lockdep_off(void) @@ -277,6 +279,9 @@ static inline int lockdep_internal(void) * The class key takes no space if lockdep is disabled: */ struct lock_class_key { }; + +#define lockdep_depth(tsk) (0) + #endif /* !LOCKDEP */ #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 2945b094d87..5484d6e045c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -30,6 +30,8 @@ #include #include #include +#include +#include /* * The per-CPU workqueue (if single thread, we always use the first @@ -253,6 +255,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq) work_release(work); f(work); + if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { + printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " + "%s/0x%08x/%d\n", + current->comm, preempt_count(), + current->pid); + printk(KERN_ERR " last function: "); + print_symbol("%s\n", (unsigned long)f); + debug_show_held_locks(current); + dump_stack(); + } + spin_lock_irqsave(&cwq->lock, flags); cwq->remove_sequence++; wake_up(&cwq->work_done); -- cgit v1.2.3-70-g09d2 From f5738ceed46782aea7663d62cb6398eb05fc4ce0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Dec 2006 20:37:29 -0800 Subject: [PATCH] remove kernel syscalls The last thing we agreed on was to remove the macros entirely for 2.6.19, on all architectures. Unfortunately, I think nobody actually _did_ that, so they are still there. [akpm@osdl.org: x86_64 fix] Cc: David Woodhouse Cc: Greg Schafer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/vsyscall.c | 1 + include/asm-alpha/unistd.h | 182 ---------------------------- include/asm-arm/unistd.h | 150 ----------------------- include/asm-arm26/unistd.h | 133 --------------------- include/asm-frv/unistd.h | 119 ------------------- include/asm-h8300/unistd.h | 166 -------------------------- include/asm-i386/unistd.h | 98 --------------- include/asm-m32r/unistd.h | 111 ----------------- include/asm-m68k/unistd.h | 97 --------------- include/asm-m68knommu/unistd.h | 150 ----------------------- include/asm-mips/unistd.h | 262 ----------------------------------------- include/asm-powerpc/unistd.h | 109 ----------------- include/asm-s390/unistd.h | 154 ------------------------ include/asm-sh/unistd.h | 137 --------------------- include/asm-sh64/unistd.h | 142 ---------------------- include/asm-sparc/unistd.h | 130 -------------------- include/asm-sparc64/unistd.h | 118 ------------------- include/asm-v850/unistd.h | 160 ------------------------- include/asm-x86_64/unistd.h | 99 ---------------- include/asm-xtensa/unistd.h | 184 ----------------------------- 20 files changed, 1 insertion(+), 2701 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 92546c1526f..630036c06c7 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -42,6 +42,7 @@ #include #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) +#define __syscall_clobber "r11","rcx","memory" int __sysctl_vsyscall __section_sysctl_vsyscall = 1; seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; diff --git a/include/asm-alpha/unistd.h b/include/asm-alpha/unistd.h index 2cabbd465c0..84313d14e78 100644 --- a/include/asm-alpha/unistd.h +++ b/include/asm-alpha/unistd.h @@ -387,188 +387,6 @@ #define NR_SYSCALLS 447 -#if defined(__GNUC__) - -#define _syscall_return(type) \ - return (_sc_err ? errno = _sc_ret, _sc_ret = -1L : 0), (type) _sc_ret - -#define _syscall_clobbers \ - "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", \ - "$22", "$23", "$24", "$25", "$27", "$28" \ - -#define _syscall0(type, name) \ -type name(void) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - __asm__("callsys # %0 %1 %2" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - __asm__("callsys # %0 %1 %2 %3" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - __asm__("callsys # %0 %1 %2 %3 %4" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_18 __asm__("$18"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - _sc_18 = (long) (arg3); \ - __asm__("callsys # %0 %1 %2 %3 %4 %5" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17), \ - "r"(_sc_18) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_18 __asm__("$18"); \ - register long _sc_19 __asm__("$19"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - _sc_18 = (long) (arg3); \ - _sc_19 = (long) (arg4); \ - __asm__("callsys # %0 %1 %2 %3 %4 %5 %6" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17), \ - "r"(_sc_18), "1"(_sc_19) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_18 __asm__("$18"); \ - register long _sc_19 __asm__("$19"); \ - register long _sc_20 __asm__("$20"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - _sc_18 = (long) (arg3); \ - _sc_19 = (long) (arg4); \ - _sc_20 = (long) (arg5); \ - __asm__("callsys # %0 %1 %2 %3 %4 %5 %6 %7" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17), \ - "r"(_sc_18), "1"(_sc_19), "r"(_sc_20) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5, type6 arg6)\ -{ \ - long _sc_ret, _sc_err; \ - { \ - register long _sc_0 __asm__("$0"); \ - register long _sc_16 __asm__("$16"); \ - register long _sc_17 __asm__("$17"); \ - register long _sc_18 __asm__("$18"); \ - register long _sc_19 __asm__("$19"); \ - register long _sc_20 __asm__("$20"); \ - register long _sc_21 __asm__("$21"); \ - \ - _sc_0 = __NR_##name; \ - _sc_16 = (long) (arg1); \ - _sc_17 = (long) (arg2); \ - _sc_18 = (long) (arg3); \ - _sc_19 = (long) (arg4); \ - _sc_20 = (long) (arg5); \ - _sc_21 = (long) (arg6); \ - __asm__("callsys # %0 %1 %2 %3 %4 %5 %6 %7 %8" \ - : "=r"(_sc_0), "=r"(_sc_19) \ - : "0"(_sc_0), "r"(_sc_16), "r"(_sc_17), \ - "r"(_sc_18), "1"(_sc_19), "r"(_sc_20), "r"(_sc_21) \ - : _syscall_clobbers); \ - _sc_ret = _sc_0, _sc_err = _sc_19; \ - } \ - _syscall_return(type); \ -} - -#endif /* __GNUC__ */ - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index 14a87eec5a2..d44c629d842 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -377,156 +377,6 @@ #endif #ifdef __KERNEL__ -#include -#include - -#define __sys2(x) #x -#define __sys1(x) __sys2(x) - -#ifndef __syscall -#if defined(__thumb__) || defined(__ARM_EABI__) -#define __SYS_REG(name) register long __sysreg __asm__("r7") = __NR_##name; -#define __SYS_REG_LIST(regs...) "r" (__sysreg) , ##regs -#define __syscall(name) "swi\t0" -#else -#define __SYS_REG(name) -#define __SYS_REG_LIST(regs...) regs -#define __syscall(name) "swi\t" __sys1(__NR_##name) "" -#endif -#endif - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type,name) \ -type name(void) { \ - __SYS_REG(name) \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST() \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)\ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), "r" (__r3) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __r4 __asm__("r4") = (long)arg5; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), \ - "r" (__r3), "r" (__r4) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ - __SYS_REG(name) \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __r4 __asm__("r4") = (long)arg5; \ - register long __r5 __asm__("r5") = (long)arg6; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : __SYS_REG_LIST( "0" (__r0), "r" (__r1), "r" (__r2), \ - "r" (__r3), "r" (__r4), "r" (__r5) ) \ - : "memory" ); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_STAT64 diff --git a/include/asm-arm26/unistd.h b/include/asm-arm26/unistd.h index 25a5eead85b..4c3b919177e 100644 --- a/include/asm-arm26/unistd.h +++ b/include/asm-arm26/unistd.h @@ -311,139 +311,6 @@ #define __ARM_NR_usr26 (__ARM_NR_BASE+3) #ifdef __KERNEL__ -#include -#include - -#define __sys2(x) #x -#define __sys1(x) __sys2(x) - -#ifndef __syscall -#define __syscall(name) "swi\t" __sys1(__NR_##name) "" -#endif - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)-MAX_ERRNO) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type,name) \ -type name(void) { \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1),"r" (__r2) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)\ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1),"r" (__r2),"r" (__r3) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __r4 __asm__("r4") = (long)arg5; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1),"r" (__r2),"r" (__r3),"r" (__r4) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) { \ - register long __r0 __asm__("r0") = (long)arg1; \ - register long __r1 __asm__("r1") = (long)arg2; \ - register long __r2 __asm__("r2") = (long)arg3; \ - register long __r3 __asm__("r3") = (long)arg4; \ - register long __r4 __asm__("r4") = (long)arg5; \ - register long __r5 __asm__("r5") = (long)arg6; \ - register long __res_r0 __asm__("r0"); \ - long __res; \ - __asm__ __volatile__ ( \ - __syscall(name) \ - : "=r" (__res_r0) \ - : "r" (__r0),"r" (__r1),"r" (__r2),"r" (__r3), "r" (__r4),"r" (__r5) \ - : "lr"); \ - __res = __res_r0; \ - __syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h index 725e854928c..584c0417ae4 100644 --- a/include/asm-frv/unistd.h +++ b/include/asm-frv/unistd.h @@ -320,125 +320,6 @@ #ifdef __KERNEL__ #define NR_syscalls 310 -#include - -/* - * process the return value of a syscall, consigning it to one of two possible fates - * - user-visible error numbers are in the range -1 - -4095: see - */ -#undef __syscall_return -#define __syscall_return(type, res) \ -do { \ - unsigned long __sr2 = (res); \ - if (__builtin_expect(__sr2 >= (unsigned long)(-MAX_ERRNO), 0)) { \ - errno = (-__sr2); \ - __sr2 = ~0UL; \ - } \ - return (type) __sr2; \ -} while (0) - -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ - -#undef _syscall0 -#define _syscall0(type,name) \ -type name(void) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8"); \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "=r" (__sc0) \ - : "r" (__scnum)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall1 -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall2 -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall3 -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - register unsigned long __sc2 __asm__ ("gr10") = (unsigned long) arg3; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1), "r" (__sc2)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall4 -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - register unsigned long __sc2 __asm__ ("gr10") = (unsigned long) arg3; \ - register unsigned long __sc3 __asm__ ("gr11") = (unsigned long) arg4; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1), "r" (__sc2), "r" (__sc3)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall5 -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - register unsigned long __sc2 __asm__ ("gr10") = (unsigned long) arg3; \ - register unsigned long __sc3 __asm__ ("gr11") = (unsigned long) arg4; \ - register unsigned long __sc4 __asm__ ("gr12") = (unsigned long) arg5; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1), "r" (__sc2), \ - "r" (__sc3), "r" (__sc4)); \ - __syscall_return(type, __sc0); \ -} - -#undef _syscall6 -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5, type6, arg6) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ -{ \ - register unsigned long __scnum __asm__ ("gr7") = (__NR_##name); \ - register unsigned long __sc0 __asm__ ("gr8") = (unsigned long) arg1; \ - register unsigned long __sc1 __asm__ ("gr9") = (unsigned long) arg2; \ - register unsigned long __sc2 __asm__ ("gr10") = (unsigned long) arg3; \ - register unsigned long __sc3 __asm__ ("gr11") = (unsigned long) arg4; \ - register unsigned long __sc4 __asm__ ("gr12") = (unsigned long) arg5; \ - register unsigned long __sc5 __asm__ ("gr13") = (unsigned long) arg6; \ - __asm__ __volatile__ ("tira gr0,#0" \ - : "+r" (__sc0) \ - : "r" (__scnum), "r" (__sc1), "r" (__sc2), \ - "r" (__sc3), "r" (__sc4), "r" (__sc5)); \ - __syscall_return(type, __sc0); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION /* #define __ARCH_WANT_OLD_READDIR */ diff --git a/include/asm-h8300/unistd.h b/include/asm-h8300/unistd.h index 747788d629a..7ddd414f8d1 100644 --- a/include/asm-h8300/unistd.h +++ b/include/asm-h8300/unistd.h @@ -295,172 +295,6 @@ #ifdef __KERNEL__ #define NR_syscalls 289 -#include - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see - */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* avoid using res which is declared to be in register d0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type, name) \ -type name(void) \ -{ \ - register long __res __asm__("er0"); \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall1(type, name, atype, a) \ -type name(atype a) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - _a = (long)a; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall2(type, name, atype, a, btype, b) \ -type name(atype a, btype b) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - _a = (long)a; \ - _b = (long)b; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall3(type, name, atype, a, btype, b, ctype, c) \ -type name(atype a, btype b, ctype c) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - register long _c __asm__("er3"); \ - _a = (long)a; \ - _b = (long)b; \ - _c = (long)c; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b), \ - "g" (_c) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall4(type, name, atype, a, btype, b, \ - ctype, c, dtype, d) \ -type name(atype a, btype b, ctype c, dtype d) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - register long _c __asm__("er3"); \ - register long _d __asm__("er4"); \ - _a = (long)a; \ - _b = (long)b; \ - _c = (long)c; \ - _d = (long)d; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b), \ - "g" (_c), \ - "g" (_d) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall5(type, name, atype, a, btype, b, \ - ctype, c, dtype, d, etype, e) \ -type name(atype a, btype b, ctype c, dtype d, etype e) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - register long _c __asm__("er3"); \ - register long _d __asm__("er4"); \ - register long _e __asm__("er5"); \ - _a = (long)a; \ - _b = (long)b; \ - _c = (long)c; \ - _d = (long)d; \ - _e = (long)e; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b), \ - "g" (_c), \ - "g" (_d), \ - "g" (_e) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} - -#define _syscall6(type, name, atype, a, btype, b, \ - ctype, c, dtype, d, etype, e, ftype, f) \ -type name(atype a, btype b, ctype c, dtype d, etype e, ftype f) \ -{ \ - register long __res __asm__("er0"); \ - register long _a __asm__("er1"); \ - register long _b __asm__("er2"); \ - register long _c __asm__("er3"); \ - register long _d __asm__("er4"); \ - register long _e __asm__("er5"); \ - register long _f __asm__("er6"); \ - _a = (long)a; \ - _b = (long)b; \ - _c = (long)c; \ - _d = (long)d; \ - _e = (long)e; \ - _f = (long)f; \ - __asm__ __volatile__ ("mov.l %1,er0\n\t" \ - "trapa #0\n\t" \ - : "=r" (__res) \ - : "g" (__NR_##name), \ - "g" (_a), \ - "g" (_b), \ - "g" (_c), \ - "g" (_d), \ - "g" (_e) \ - "g" (_f) \ - : "cc", "memory"); \ - __syscall_return(type, __res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index beeeaf6b054..833fa1704ff 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h @@ -329,104 +329,6 @@ #ifdef __KERNEL__ #define NR_syscalls 320 -#include - -/* - * user-visible error numbers are in the range -1 - -MAX_ERRNO: see - * - */ -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -__asm__ volatile ("int $0x80" \ - : "=a" (__res) \ - : "0" (__NR_##name)); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)) : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)),"c" ((long)(arg2)) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)) : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "0" (__NR_##name),"ri" ((long)(arg1)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4)) : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -__asm__ volatile ("push %%ebx ; movl %2,%%ebx ; movl %1,%%eax ; " \ - "int $0x80 ; pop %%ebx" \ - : "=a" (__res) \ - : "i" (__NR_##name),"ri" ((long)(arg1)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,type6 arg6) \ -{ \ -long __res; \ - struct { long __a1; long __a6; } __s = { (long)arg1, (long)arg6 }; \ -__asm__ volatile ("push %%ebp ; push %%ebx ; movl 4(%2),%%ebp ; " \ - "movl 0(%2),%%ebx ; movl %1,%%eax ; int $0x80 ; " \ - "pop %%ebx ; pop %%ebp" \ - : "=a" (__res) \ - : "i" (__NR_##name),"0" ((long)(&__s)),"c" ((long)(arg2)), \ - "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)) \ - : "memory"); \ -__syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-m32r/unistd.h b/include/asm-m32r/unistd.h index 95aa34298d8..5b66bd3c6ed 100644 --- a/include/asm-m32r/unistd.h +++ b/include/asm-m32r/unistd.h @@ -296,117 +296,6 @@ #ifdef __KERNEL__ #define NR_syscalls 285 -#include - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see - * - */ - -#include /* SYSCALL_* */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* Avoid using "res" which is declared to be in register r0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __res __asm__("r0"); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __arg2 __asm__ ("r1") = (long)(arg2); \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res), "r" (__arg2) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __arg3 __asm__ ("r2") = (long)(arg3); \ -register long __arg2 __asm__ ("r1") = (long)(arg2); \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res), "r" (__arg2), \ - "r" (__arg3) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name(type1 arg1,type2 arg2,type3 arg3,type4 arg4) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __arg4 __asm__ ("r3") = (long)(arg4); \ -register long __arg3 __asm__ ("r2") = (long)(arg3); \ -register long __arg2 __asm__ ("r1") = (long)(arg2); \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res), "r" (__arg2), \ - "r" (__arg3), "r" (__arg4) \ - : "memory"); \ -__syscall_return(type,__res); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name(type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -register long __scno __asm__ ("r7") = __NR_##name; \ -register long __arg5 __asm__ ("r4") = (long)(arg5); \ -register long __arg4 __asm__ ("r3") = (long)(arg4); \ -register long __arg3 __asm__ ("r2") = (long)(arg3); \ -register long __arg2 __asm__ ("r1") = (long)(arg2); \ -register long __res __asm__ ("r0") = (long)(arg1); \ -__asm__ __volatile__ (\ - "trap #" SYSCALL_VECTOR "|| nop"\ - : "=r" (__res) \ - : "r" (__scno), "0" (__res), "r" (__arg2), \ - "r" (__arg3), "r" (__arg4), "r" (__arg5) \ - : "memory"); \ -__syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_STAT64 diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h index ad4348058c6..fdbb60e6a0d 100644 --- a/include/asm-m68k/unistd.h +++ b/include/asm-m68k/unistd.h @@ -317,103 +317,6 @@ #ifdef __KERNEL__ #define NR_syscalls 311 -#include - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see - */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* avoid using res which is declared to be in register d0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) ); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,atype,a) \ -type name(atype a) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a) ); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,atype,a,btype,b) \ -type name(atype a,btype b) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -register long __b __asm__ ("%d2") = (long)(b); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a), "d" (__b) \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,atype,a,btype,b,ctype,c) \ -type name(atype a,btype b,ctype c) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -register long __b __asm__ ("%d2") = (long)(b); \ -register long __c __asm__ ("%d3") = (long)(c); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a), "d" (__b), \ - "d" (__c) \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,atype,a,btype,b,ctype,c,dtype,d) \ -type name (atype a, btype b, ctype c, dtype d) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -register long __b __asm__ ("%d2") = (long)(b); \ -register long __c __asm__ ("%d3") = (long)(c); \ -register long __d __asm__ ("%d4") = (long)(d); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a), "d" (__b), \ - "d" (__c), "d" (__d) \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall5(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e) \ -type name (atype a,btype b,ctype c,dtype d,etype e) \ -{ \ -register long __res __asm__ ("%d0") = __NR_##name; \ -register long __a __asm__ ("%d1") = (long)(a); \ -register long __b __asm__ ("%d2") = (long)(b); \ -register long __c __asm__ ("%d3") = (long)(c); \ -register long __d __asm__ ("%d4") = (long)(d); \ -register long __e __asm__ ("%d5") = (long)(e); \ -__asm__ __volatile__ ("trap #0" \ - : "+d" (__res) \ - : "d" (__a), "d" (__b), \ - "d" (__c), "d" (__d), "d" (__e) \ - ); \ -__syscall_return(type,__res); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h index ebaf0319711..82e03195f32 100644 --- a/include/asm-m68knommu/unistd.h +++ b/include/asm-m68knommu/unistd.h @@ -318,156 +318,6 @@ #ifdef __KERNEL__ #define NR_syscalls 311 -#include - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: see - */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* avoid using res which is declared to be in register d0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _syscall0(type, name) \ -type name(void) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name) \ - : "cc", "%d0"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall1(type, name, atype, a) \ -type name(atype a) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "g" ((long)a) \ - : "cc", "%d0", "%d1"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall2(type, name, atype, a, btype, b) \ -type name(atype a, btype b) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %3, %%d2\n\t" \ - "movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "a" ((long)a), \ - "g" ((long)b) \ - : "cc", "%d0", "%d1", "%d2"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall3(type, name, atype, a, btype, b, ctype, c) \ -type name(atype a, btype b, ctype c) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %4, %%d3\n\t" \ - "movel %3, %%d2\n\t" \ - "movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "a" ((long)a), \ - "a" ((long)b), \ - "g" ((long)c) \ - : "cc", "%d0", "%d1", "%d2", "%d3"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall4(type, name, atype, a, btype, b, ctype, c, dtype, d) \ -type name(atype a, btype b, ctype c, dtype d) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %5, %%d4\n\t" \ - "movel %4, %%d3\n\t" \ - "movel %3, %%d2\n\t" \ - "movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "a" ((long)a), \ - "a" ((long)b), \ - "a" ((long)c), \ - "g" ((long)d) \ - : "cc", "%d0", "%d1", "%d2", "%d3", \ - "%d4"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} - -#define _syscall5(type, name, atype, a, btype, b, ctype, c, dtype, d, etype, e) \ -type name(atype a, btype b, ctype c, dtype d, etype e) \ -{ \ - long __res; \ - __asm__ __volatile__ ("movel %6, %%d5\n\t" \ - "movel %5, %%d4\n\t" \ - "movel %4, %%d3\n\t" \ - "movel %3, %%d2\n\t" \ - "movel %2, %%d1\n\t" \ - "movel %1, %%d0\n\t" \ - "trap #0\n\t" \ - "movel %%d0, %0" \ - : "=g" (__res) \ - : "i" (__NR_##name), \ - "a" ((long)a), \ - "a" ((long)b), \ - "a" ((long)c), \ - "a" ((long)d), \ - "g" ((long)e) \ - : "cc", "%d0", "%d1", "%d2", "%d3", \ - "%d4", "%d5"); \ - if ((unsigned long)(__res) >= (unsigned long)(-125)) { \ - errno = -__res; \ - __res = -1; \ - } \ - return (type)__res; \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h index ec56aa52f66..696cff39a1d 100644 --- a/include/asm-mips/unistd.h +++ b/include/asm-mips/unistd.h @@ -933,268 +933,6 @@ #ifndef __ASSEMBLY__ -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ - register unsigned long __a3 asm("$7"); \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %2\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "=r" (__a3) \ - : "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -/* - * DANGER: This macro isn't usable for the pipe(2) call - * which has a unusual return convention. - */ -#define _syscall1(type,name,atype,a) \ -type name(atype a) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a3 asm("$7"); \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %3\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "=r" (__a3) \ - : "r" (__a0), "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall2(type,name,atype,a,btype,b) \ -type name(atype a, btype b) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a3 asm("$7"); \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %4\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "=r" (__a3) \ - : "r" (__a0), "r" (__a1), "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall3(type,name,atype,a,btype,b,ctype,c) \ -type name(atype a, btype b, ctype c) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7"); \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %5\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "=r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall4(type,name,atype,a,btype,b,ctype,c,dtype,d) \ -type name(atype a, btype b, ctype c, dtype d) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %5\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_##name) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#if (_MIPS_SIM == _MIPS_SIM_ABI32) - -/* - * Using those means your brain needs more than an oil change ;-) - */ - -#define _syscall5(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e) \ -type name(atype a, btype b, ctype c, dtype d, etype e) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "lw\t$2, %6\n\t" \ - "subu\t$29, 32\n\t" \ - "sw\t$2, 16($29)\n\t" \ - "li\t$2, %5\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - "addiu\t$29, 32\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_##name), \ - "m" ((unsigned long)e) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall6(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e,ftype,f) \ -type name(atype a, btype b, ctype c, dtype d, etype e, ftype f) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "lw\t$2, %6\n\t" \ - "lw\t$8, %7\n\t" \ - "subu\t$29, 32\n\t" \ - "sw\t$2, 16($29)\n\t" \ - "sw\t$8, 20($29)\n\t" \ - "li\t$2, %5\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - "addiu\t$29, 32\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_##name), \ - "m" ((unsigned long)e), "m" ((unsigned long)f) \ - : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#endif /* (_MIPS_SIM == _MIPS_SIM_ABI32) */ - -#if (_MIPS_SIM == _MIPS_SIM_NABI32) || (_MIPS_SIM == _MIPS_SIM_ABI64) - -#define _syscall5(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e) \ -type name (atype a,btype b,ctype c,dtype d,etype e) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - register unsigned long __a4 asm("$8") = (unsigned long) e; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %6\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a4), "i" (__NR_##name) \ - : "$2", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#define _syscall6(type,name,atype,a,btype,b,ctype,c,dtype,d,etype,e,ftype,f) \ -type name (atype a,btype b,ctype c,dtype d,etype e,ftype f) \ -{ \ - register unsigned long __a0 asm("$4") = (unsigned long) a; \ - register unsigned long __a1 asm("$5") = (unsigned long) b; \ - register unsigned long __a2 asm("$6") = (unsigned long) c; \ - register unsigned long __a3 asm("$7") = (unsigned long) d; \ - register unsigned long __a4 asm("$8") = (unsigned long) e; \ - register unsigned long __a5 asm("$9") = (unsigned long) f; \ - unsigned long __v0; \ - \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - "li\t$2, %7\t\t\t# " #name "\n\t" \ - "syscall\n\t" \ - "move\t%0, $2\n\t" \ - ".set\treorder" \ - : "=&r" (__v0), "+r" (__a3) \ - : "r" (__a0), "r" (__a1), "r" (__a2), "r" (__a4), "r" (__a5), \ - "i" (__NR_##name) \ - : "$2", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24", \ - "memory"); \ - \ - if (__a3 == 0) \ - return (type) __v0; \ - errno = __v0; \ - return (type) -1; \ -} - -#endif /* (_MIPS_SIM == _MIPS_SIM_NABI32) || (_MIPS_SIM == _MIPS_SIM_ABI64) */ - - #define __ARCH_OMIT_COMPAT_SYS_GETDENTS64 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h index 04b6c17cc59..0ae954e3d25 100644 --- a/include/asm-powerpc/unistd.h +++ b/include/asm-powerpc/unistd.h @@ -334,115 +334,6 @@ #ifndef __ASSEMBLY__ -/* On powerpc a system call basically clobbers the same registers like a - * function call, with the exception of LR (which is needed for the - * "sc; bnslr" sequence) and CR (where only CR0.SO is clobbered to signal - * an error return status). - */ - -#define __syscall_nr(nr, type, name, args...) \ - unsigned long __sc_ret, __sc_err; \ - { \ - register unsigned long __sc_0 __asm__ ("r0"); \ - register unsigned long __sc_3 __asm__ ("r3"); \ - register unsigned long __sc_4 __asm__ ("r4"); \ - register unsigned long __sc_5 __asm__ ("r5"); \ - register unsigned long __sc_6 __asm__ ("r6"); \ - register unsigned long __sc_7 __asm__ ("r7"); \ - register unsigned long __sc_8 __asm__ ("r8"); \ - \ - __sc_loadargs_##nr(name, args); \ - __asm__ __volatile__ \ - ("sc \n\t" \ - "mfcr %0 " \ - : "=&r" (__sc_0), \ - "=&r" (__sc_3), "=&r" (__sc_4), \ - "=&r" (__sc_5), "=&r" (__sc_6), \ - "=&r" (__sc_7), "=&r" (__sc_8) \ - : __sc_asm_input_##nr \ - : "cr0", "ctr", "memory", \ - "r9", "r10","r11", "r12"); \ - __sc_ret = __sc_3; \ - __sc_err = __sc_0; \ - } \ - if (__sc_err & 0x10000000) \ - { \ - errno = __sc_ret; \ - __sc_ret = -1; \ - } \ - return (type) __sc_ret - -#define __sc_loadargs_0(name, dummy...) \ - __sc_0 = __NR_##name -#define __sc_loadargs_1(name, arg1) \ - __sc_loadargs_0(name); \ - __sc_3 = (unsigned long) (arg1) -#define __sc_loadargs_2(name, arg1, arg2) \ - __sc_loadargs_1(name, arg1); \ - __sc_4 = (unsigned long) (arg2) -#define __sc_loadargs_3(name, arg1, arg2, arg3) \ - __sc_loadargs_2(name, arg1, arg2); \ - __sc_5 = (unsigned long) (arg3) -#define __sc_loadargs_4(name, arg1, arg2, arg3, arg4) \ - __sc_loadargs_3(name, arg1, arg2, arg3); \ - __sc_6 = (unsigned long) (arg4) -#define __sc_loadargs_5(name, arg1, arg2, arg3, arg4, arg5) \ - __sc_loadargs_4(name, arg1, arg2, arg3, arg4); \ - __sc_7 = (unsigned long) (arg5) -#define __sc_loadargs_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ - __sc_loadargs_5(name, arg1, arg2, arg3, arg4, arg5); \ - __sc_8 = (unsigned long) (arg6) - -#define __sc_asm_input_0 "0" (__sc_0) -#define __sc_asm_input_1 __sc_asm_input_0, "1" (__sc_3) -#define __sc_asm_input_2 __sc_asm_input_1, "2" (__sc_4) -#define __sc_asm_input_3 __sc_asm_input_2, "3" (__sc_5) -#define __sc_asm_input_4 __sc_asm_input_3, "4" (__sc_6) -#define __sc_asm_input_5 __sc_asm_input_4, "5" (__sc_7) -#define __sc_asm_input_6 __sc_asm_input_5, "6" (__sc_8) - -#define _syscall0(type,name) \ -type name(void) \ -{ \ - __syscall_nr(0, type, name); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ - __syscall_nr(1, type, name, arg1); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1, type2 arg2) \ -{ \ - __syscall_nr(2, type, name, arg1, arg2); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1, type2 arg2, type3 arg3) \ -{ \ - __syscall_nr(3, type, name, arg1, arg2, arg3); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ - __syscall_nr(4, type, name, arg1, arg2, arg3, arg4); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ -{ \ - __syscall_nr(5, type, name, arg1, arg2, arg3, arg4, arg5); \ -} -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ -{ \ - __syscall_nr(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ -} - - #include #include #include diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h index 71d3c21b84f..fb6fef97d73 100644 --- a/include/asm-s390/unistd.h +++ b/include/asm-s390/unistd.h @@ -345,160 +345,6 @@ #ifdef __KERNEL__ -#include - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#define _svc_clobber "1", "cc", "memory" - -#define _syscall0(type,name) \ -type name(void) { \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) { \ - register type1 __arg1 asm("2") = arg1; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1, type2 arg2) { \ - register type1 __arg1 asm("2") = arg1; \ - register type2 __arg2 asm("3") = arg2; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1), \ - "d" (__arg2) \ - : _svc_clobber ); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1, type2 arg2, type3 arg3) { \ - register type1 __arg1 asm("2") = arg1; \ - register type2 __arg2 asm("3") = arg2; \ - register type3 __arg3 asm("4") = arg3; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1), \ - "d" (__arg2), \ - "d" (__arg3) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3, \ - type4,name4) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - register type1 __arg1 asm("2") = arg1; \ - register type2 __arg2 asm("3") = arg2; \ - register type3 __arg3 asm("4") = arg3; \ - register type4 __arg4 asm("5") = arg4; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1), \ - "d" (__arg2), \ - "d" (__arg3), \ - "d" (__arg4) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3, \ - type4,name4,type5,name5) \ -type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ - type5 arg5) { \ - register type1 __arg1 asm("2") = arg1; \ - register type2 __arg2 asm("3") = arg2; \ - register type3 __arg3 asm("4") = arg3; \ - register type4 __arg4 asm("5") = arg4; \ - register type5 __arg5 asm("6") = arg5; \ - register long __svcres asm("2"); \ - long __res; \ - asm volatile( \ - " .if %1 < 256\n" \ - " svc %b1\n" \ - " .else\n" \ - " la %%r1,%1\n" \ - " svc 0\n" \ - " .endif" \ - : "=d" (__svcres) \ - : "i" (__NR_##name), \ - "0" (__arg1), \ - "d" (__arg2), \ - "d" (__arg3), \ - "d" (__arg4), \ - "d" (__arg5) \ - : _svc_clobber); \ - __res = __svcres; \ - __syscall_return(type,__res); \ -} - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM diff --git a/include/asm-sh/unistd.h b/include/asm-sh/unistd.h index 0cae1d24876..f982073dc6c 100644 --- a/include/asm-sh/unistd.h +++ b/include/asm-sh/unistd.h @@ -332,143 +332,6 @@ #ifdef __KERNEL__ -#include - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: - * see */ - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - /* Avoid using "res" which is declared to be in register r0; \ - errno might expand to a function call and clobber it. */ \ - int __err = -(res); \ - errno = __err; \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - -#if defined(__sh2__) || defined(__SH2E__) || defined(__SH2A__) -#define SYSCALL_ARG0 "trapa #0x20" -#define SYSCALL_ARG1 "trapa #0x21" -#define SYSCALL_ARG2 "trapa #0x22" -#define SYSCALL_ARG3 "trapa #0x23" -#define SYSCALL_ARG4 "trapa #0x24" -#define SYSCALL_ARG5 "trapa #0x25" -#define SYSCALL_ARG6 "trapa #0x26" -#else -#define SYSCALL_ARG0 "trapa #0x10" -#define SYSCALL_ARG1 "trapa #0x11" -#define SYSCALL_ARG2 "trapa #0x12" -#define SYSCALL_ARG3 "trapa #0x13" -#define SYSCALL_ARG4 "trapa #0x14" -#define SYSCALL_ARG5 "trapa #0x15" -#define SYSCALL_ARG6 "trapa #0x16" -#endif - -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -__asm__ __volatile__ (SYSCALL_ARG0 \ - : "=z" (__sc0) \ - : "0" (__sc0) \ - : "memory" ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -__asm__ __volatile__ (SYSCALL_ARG1 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4) \ - : "memory"); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -__asm__ __volatile__ (SYSCALL_ARG2 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5) \ - : "memory"); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -register long __sc6 __asm__ ("r6") = (long) arg3; \ -__asm__ __volatile__ (SYSCALL_ARG3 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6) \ - : "memory"); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -register long __sc0 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -register long __sc6 __asm__ ("r6") = (long) arg3; \ -register long __sc7 __asm__ ("r7") = (long) arg4; \ -__asm__ __volatile__ (SYSCALL_ARG4 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), \ - "r" (__sc7) \ - : "memory" ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ -{ \ -register long __sc3 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -register long __sc6 __asm__ ("r6") = (long) arg3; \ -register long __sc7 __asm__ ("r7") = (long) arg4; \ -register long __sc0 __asm__ ("r0") = (long) arg5; \ -__asm__ __volatile__ (SYSCALL_ARG5 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), "r" (__sc7), \ - "r" (__sc3) \ - : "memory" ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ -{ \ -register long __sc3 __asm__ ("r3") = __NR_##name; \ -register long __sc4 __asm__ ("r4") = (long) arg1; \ -register long __sc5 __asm__ ("r5") = (long) arg2; \ -register long __sc6 __asm__ ("r6") = (long) arg3; \ -register long __sc7 __asm__ ("r7") = (long) arg4; \ -register long __sc0 __asm__ ("r0") = (long) arg5; \ -register long __sc1 __asm__ ("r1") = (long) arg6; \ -__asm__ __volatile__ (SYSCALL_ARG6 \ - : "=z" (__sc0) \ - : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), "r" (__sc7), \ - "r" (__sc3), "r" (__sc1) \ - : "memory" ); \ -__syscall_return(type,__sc0); \ -} - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/include/asm-sh64/unistd.h b/include/asm-sh64/unistd.h index ee7828b27ad..1f38a7aacaa 100644 --- a/include/asm-sh64/unistd.h +++ b/include/asm-sh64/unistd.h @@ -347,148 +347,6 @@ #ifdef __KERNEL__ #define NR_syscalls 321 -#include - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO: - * see */ - -#define __syscall_return(type, res) \ -do { \ - /* Note: when returning from kernel the return value is in r9 \ - ** This prevents conflicts between return value and arg1 \ - ** when dispatching signal handler, in other words makes \ - ** life easier in the system call epilogue (see entry.S) \ - */ \ - register unsigned long __sr2 __asm__ ("r2") = res; \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - __sr2 = -1; \ - } \ - return (type) (__sr2); \ -} while (0) - -/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x10 << 16) | __NR_##name); \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "()" \ - : "=r" (__sc0) \ - : "r" (__sc0) ); \ -__syscall_return(type,__sc0); \ -} - - /* - * The apparent spurious "dummy" assembler comment is *needed*, - * as without it, the compiler treats the arg variables - * as no longer live just before the asm. The compiler can - * then optimize the storage into any registers it wishes. - * The additional dummy statement forces the compiler to put - * the arguments into the correct registers before the TRAPA. - */ -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x11 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2)); \ -__asm__ __volatile__ ("!dummy %0 %1" \ - : \ - : "r" (__sc0), "r" (__sc2)); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x12 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3) ); \ -__asm__ __volatile__ ("!dummy %0 %1 %2" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3) ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -register unsigned long __sc4 __asm__ ("r4") = (unsigned long) arg3; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3,%4)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) ); \ -__asm__ __volatile__ ("!dummy %0 %1 %2 %3" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) ); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x14 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -register unsigned long __sc4 __asm__ ("r4") = (unsigned long) arg3; \ -register unsigned long __sc5 __asm__ ("r5") = (unsigned long) arg4; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3,%4,%5)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5) );\ -__asm__ __volatile__ ("!dummy %0 %1 %2 %3 %4" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5) );\ -__syscall_return(type,__sc0); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x15 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -register unsigned long __sc4 __asm__ ("r4") = (unsigned long) arg3; \ -register unsigned long __sc5 __asm__ ("r5") = (unsigned long) arg4; \ -register unsigned long __sc6 __asm__ ("r6") = (unsigned long) arg5; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3,%4,%5,%6)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5), \ - "r" (__sc6)); \ -__asm__ __volatile__ ("!dummy %0 %1 %2 %3 %4 %5" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5), \ - "r" (__sc6)); \ -__syscall_return(type,__sc0); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5, type6, arg6) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ -{ \ -register unsigned long __sc0 __asm__ ("r9") = ((0x16 << 16) | __NR_##name); \ -register unsigned long __sc2 __asm__ ("r2") = (unsigned long) arg1; \ -register unsigned long __sc3 __asm__ ("r3") = (unsigned long) arg2; \ -register unsigned long __sc4 __asm__ ("r4") = (unsigned long) arg3; \ -register unsigned long __sc5 __asm__ ("r5") = (unsigned long) arg4; \ -register unsigned long __sc6 __asm__ ("r6") = (unsigned long) arg5; \ -register unsigned long __sc7 __asm__ ("r7") = (unsigned long) arg6; \ -__asm__ __volatile__ ("trapa %1 !\t\t\t" #name "(%2,%3,%4,%5,%6,%7)" \ - : "=r" (__sc0) \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5), \ - "r" (__sc6), "r" (__sc7)); \ -__asm__ __volatile__ ("!dummy %0 %1 %2 %3 %4 %5 %6" \ - : \ - : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4), "r" (__sc5), \ - "r" (__sc6), "r" (__sc7)); \ -__syscall_return(type,__sc0); \ -} #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h index f7827fa4cd5..d5b2f8053b3 100644 --- a/include/asm-sparc/unistd.h +++ b/include/asm-sparc/unistd.h @@ -329,136 +329,6 @@ * find a free slot in the 0-302 range. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res)\ - : "r" (__g1) \ - : "o0", "cc"); \ -if (__res < -255 || __res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -register long __o3 __asm__ ("o3") = (long)(arg4); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__o3), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -register long __o3 __asm__ ("o3") = (long)(arg4); \ -register long __o4 __asm__ ("o4") = (long)(arg5); \ -__asm__ __volatile__ ("t 0x10\n\t" \ - "bcc 1f\n\t" \ - "mov %%o0, %0\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "1:\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__o3), "r" (__o4), "r" (__g1) \ - : "cc"); \ -if (__res < -255 || __res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h index 63669dad0d7..47047536f26 100644 --- a/include/asm-sparc64/unistd.h +++ b/include/asm-sparc64/unistd.h @@ -332,124 +332,6 @@ * find a free slot in the 0-302 range. */ -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res)\ - : "r" (__g1) \ - : "o0", "cc"); \ -if (__res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__g1) \ - : "cc"); \ -if (__res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__g1) \ - : "cc"); \ -if (__res >= 0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1) \ - : "cc"); \ -if (__res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -register long __o3 __asm__ ("o3") = (long)(arg4); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__o3), "r" (__g1) \ - : "cc"); \ -if (__res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -register long __g1 __asm__ ("g1") = __NR_##name; \ -register long __o0 __asm__ ("o0") = (long)(arg1); \ -register long __o1 __asm__ ("o1") = (long)(arg2); \ -register long __o2 __asm__ ("o2") = (long)(arg3); \ -register long __o3 __asm__ ("o3") = (long)(arg4); \ -register long __o4 __asm__ ("o4") = (long)(arg5); \ -__asm__ __volatile__ ("t 0x6d\n\t" \ - "sub %%g0, %%o0, %0\n\t" \ - "movcc %%xcc, %%o0, %0\n\t" \ - : "=r" (__res), "=&r" (__o0) \ - : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__o3), "r" (__o4), "r" (__g1) \ - : "cc"); \ -if (__res>=0) \ - return (type) __res; \ -errno = -__res; \ -return -1; \ -} - /* sysconf options, for SunOS compatibility */ #define _SC_ARG_MAX 1 #define _SC_CHILD_MAX 2 diff --git a/include/asm-v850/unistd.h b/include/asm-v850/unistd.h index 737401e7d3a..2241ed45ecf 100644 --- a/include/asm-v850/unistd.h +++ b/include/asm-v850/unistd.h @@ -204,168 +204,8 @@ #define __NR_gettid 201 #define __NR_tkill 202 - -/* Syscall protocol: - Syscall number in r12, args in r6-r9, r13-r14 - Return value in r10 - Trap 0 for `short' syscalls, where all the args can fit in function - call argument registers, and trap 1 when there are additional args in - r13-r14. */ - -#define SYSCALL_NUM "r12" -#define SYSCALL_ARG0 "r6" -#define SYSCALL_ARG1 "r7" -#define SYSCALL_ARG2 "r8" -#define SYSCALL_ARG3 "r9" -#define SYSCALL_ARG4 "r13" -#define SYSCALL_ARG5 "r14" -#define SYSCALL_RET "r10" - -#define SYSCALL_SHORT_TRAP "0" -#define SYSCALL_LONG_TRAP "1" - -/* Registers clobbered by any syscall. This _doesn't_ include the syscall - number (r12) or the `extended arg' registers (r13, r14), even though - they are actually clobbered too (this is because gcc's `asm' statement - doesn't allow a clobber to be used as an input or output). */ -#define SYSCALL_CLOBBERS "r1", "r5", "r11", "r15", "r16", \ - "r17", "r18", "r19" - -/* Registers clobbered by a `short' syscall. This includes all clobbers - except the syscall number (r12). */ -#define SYSCALL_SHORT_CLOBBERS SYSCALL_CLOBBERS, "r13", "r14" - #ifdef __KERNEL__ -#include -#include - -#define __syscall_return(type, res) \ - do { \ - /* user-visible error numbers are in the range -1 - -MAX_ERRNO: \ - see */ \ - if (__builtin_expect ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO), 0)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ - } while (0) - - -#define _syscall0(type, name) \ -type name (void) \ -{ \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall1(type, name, atype, a) \ -type name (atype a) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall), "r" (__a) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall2(type, name, atype, a, btype, b) \ -type name (atype a, btype b) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall), "r" (__a), "r" (__b) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall3(type, name, atype, a, btype, b, ctype, c) \ -type name (atype a, btype b, ctype c) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register ctype __c __asm__ (SYSCALL_ARG2) = c; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall), "r" (__a), "r" (__b), "r" (__c) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall4(type, name, atype, a, btype, b, ctype, c, dtype, d) \ -type name (atype a, btype b, ctype c, dtype d) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register ctype __c __asm__ (SYSCALL_ARG2) = c; \ - register dtype __d __asm__ (SYSCALL_ARG3) = d; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_SHORT_TRAP \ - : "=r" (__ret), "=r" (__syscall) \ - : "1" (__syscall), \ - "r" (__a), "r" (__b), "r" (__c), "r" (__d) \ - : SYSCALL_SHORT_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define _syscall5(type, name, atype, a, btype, b, ctype, c, dtype, d, etype,e)\ -type name (atype a, btype b, ctype c, dtype d, etype e) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register ctype __c __asm__ (SYSCALL_ARG2) = c; \ - register dtype __d __asm__ (SYSCALL_ARG3) = d; \ - register etype __e __asm__ (SYSCALL_ARG4) = e; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \ - : "=r" (__ret), "=r" (__syscall), "=r" (__e) \ - : "1" (__syscall), \ - "r" (__a), "r" (__b), "r" (__c), "r" (__d), "2" (__e) \ - : SYSCALL_CLOBBERS); \ - __syscall_return (type, __ret); \ -} - -#define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f) \ - __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP \ - : "=r" (ret), "=r" (syscall), \ - "=r" (e), "=r" (f) \ - : "1" (syscall), \ - "r" (a), "r" (b), "r" (c), "r" (d), \ - "2" (e), "3" (f) \ - : SYSCALL_CLOBBERS); - -#define _syscall6(type, name, atype, a, btype, b, ctype, c, dtype, d, etype, e, ftype, f) \ -type name (atype a, btype b, ctype c, dtype d, etype e, ftype f) \ -{ \ - register atype __a __asm__ (SYSCALL_ARG0) = a; \ - register btype __b __asm__ (SYSCALL_ARG1) = b; \ - register ctype __c __asm__ (SYSCALL_ARG2) = c; \ - register dtype __d __asm__ (SYSCALL_ARG3) = d; \ - register etype __e __asm__ (SYSCALL_ARG4) = e; \ - register etype __f __asm__ (SYSCALL_ARG5) = f; \ - register unsigned long __syscall __asm__ (SYSCALL_NUM) = __NR_##name; \ - register unsigned long __ret __asm__ (SYSCALL_RET); \ - __SYSCALL6_TRAP(__syscall, __ret, __a, __b, __c, __d, __e, __f); \ - __syscall_return (type, __ret); \ -} - - #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_STAT64 diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 777288eb7e7..c5f596e71fa 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -622,25 +622,7 @@ __SYSCALL(__NR_move_pages, sys_move_pages) #define __NR_syscall_max __NR_move_pages -#ifdef __KERNEL__ -#include -#endif - #ifndef __NO_STUBS - -/* user-visible error numbers are in the range -1 - -MAX_ERRNO */ - -#define __syscall_clobber "r11","rcx","memory" - -#define __syscall_return(type, res) \ -do { \ - if ((unsigned long)(res) >= (unsigned long)(-MAX_ERRNO)) { \ - errno = -(res); \ - res = -1; \ - } \ - return (type) (res); \ -} while (0) - #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT #define __ARCH_WANT_SYS_ALARM @@ -664,87 +646,6 @@ do { \ #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_TIME -#define __syscall "syscall" - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -__asm__ volatile (__syscall \ - : "=a" (__res) \ - : "0" (__NR_##name) : __syscall_clobber ); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type1,arg1) \ -type name(type1 arg1) \ -{ \ -long __res; \ -__asm__ volatile (__syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)) : __syscall_clobber ); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type1,arg1,type2,arg2) \ -type name(type1 arg1,type2 arg2) \ -{ \ -long __res; \ -__asm__ volatile (__syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)) : __syscall_clobber ); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ -type name(type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -__asm__ volatile (__syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \ - "d" ((long)(arg3)) : __syscall_clobber); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ -{ \ -long __res; \ -__asm__ volatile ("movq %5,%%r10 ;" __syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \ - "d" ((long)(arg3)),"g" ((long)(arg4)) : __syscall_clobber,"r10" ); \ -__syscall_return(type,__res); \ -} - -#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \ - "d" ((long)(arg3)),"g" ((long)(arg4)),"g" ((long)(arg5)) : \ - __syscall_clobber,"r8","r10" ); \ -__syscall_return(type,__res); \ -} - -#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ - type5,arg5,type6,arg6) \ -type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,type6 arg6) \ -{ \ -long __res; \ -__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; " __syscall \ - : "=a" (__res) \ - : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \ - "d" ((long)(arg3)), "g" ((long)(arg4)), "g" ((long)(arg5)), \ - "g" ((long)(arg6)) : \ - __syscall_clobber,"r8","r10","r9" ); \ -__syscall_return(type,__res); \ -} - #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/include/asm-xtensa/unistd.h b/include/asm-xtensa/unistd.h index 411f810a55c..2e1a1b997e7 100644 --- a/include/asm-xtensa/unistd.h +++ b/include/asm-xtensa/unistd.h @@ -218,190 +218,6 @@ #define SYSXTENSA_COUNT 5 /* count of syscall0 functions*/ -#ifdef __KERNEL__ -#include - -#define __syscall_return(type, res) return ((type)(res)) - -/* Tensilica's xt-xcc compiler is much more agressive at code - * optimization than gcc. Multiple __asm__ statements are - * insufficient for xt-xcc because subsequent optimization passes - * (beyond the front-end that knows of __asm__ statements and other - * such GNU Extensions to C) can modify the register selection for - * containment of C variables. - * - * xt-xcc cannot modify the contents of a single __asm__ statement, so - * we create single-asm versions of the syscall macros that are - * suitable and optimal for both xt-xcc and gcc. - * - * Linux takes system-call arguments in registers. The following - * design is optimized for user-land apps (e.g., glibc) which - * typically have a function wrapper around the "syscall" assembly - * instruction. It satisfies the Xtensa ABI while minizing argument - * shifting. - * - * The Xtensa ABI and software conventions require the system-call - * number in a2. If an argument exists in a2, we move it to the next - * available register. Note that for improved efficiency, we do NOT - * shift all parameters down one register to maintain the original - * order. - * - * At best case (zero arguments), we just write the syscall number to - * a2. At worst case (1 to 6 arguments), we move the argument in a2 - * to the next available register, then write the syscall number to - * a2. - * - * For clarity, the following truth table enumerates all possibilities. - * - * arguments syscall number arg0, arg1, arg2, arg3, arg4, arg5 - * --------- -------------- ---------------------------------- - * 0 a2 - * 1 a2 a3 - * 2 a2 a4, a3 - * 3 a2 a5, a3, a4 - * 4 a2 a6, a3, a4, a5 - * 5 a2 a7, a3, a4, a5, a6 - * 6 a2 a8, a3, a4, a5, a6, a7 - */ - -#define _syscall0(type,name) \ -type name(void) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name) \ - : "a2" \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall1(type,name,type0,arg0) \ -type name(type0 arg0) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a3, %2 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0) \ - : "a2", "a3" \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall2(type,name,type0,arg0,type1,arg1) \ -type name(type0 arg0,type1 arg1) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a4, %2 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1) \ - : "a2", "a3", "a4" \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall3(type,name,type0,arg0,type1,arg1,type2,arg2) \ -type name(type0 arg0,type1 arg1,type2 arg2) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a5, %2 \n" \ - " mov a4, %4 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1), "a" (arg2) \ - : "a2", "a3", "a4", "a5" \ - ); \ -__syscall_return(type,__res); \ -} - -#define _syscall4(type,name,type0,arg0,type1,arg1,type2,arg2,type3,arg3) \ -type name(type0 arg0,type1 arg1,type2 arg2,type3 arg3) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a6, %2 \n" \ - " mov a5, %5 \n" \ - " mov a4, %4 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1), "a" (arg2), "a" (arg3) \ - : "a2", "a3", "a4", "a5", "a6" \ - ); \ -__syscall_return(type,__res); \ -} - -/* Note that we save and restore the a7 frame pointer. - * Including a7 in the clobber list doesn't do what you'd expect. - */ -#define _syscall5(type,name,type0,arg0,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ -type name(type0 arg0,type1 arg1,type2 arg2,type3 arg3,type4 arg4) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a9, a7 \n" \ - " mov a7, %2 \n" \ - " mov a6, %6 \n" \ - " mov a5, %5 \n" \ - " mov a4, %4 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov a7, a9 \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1), "a" (arg2), \ - "a" (arg3), "a" (arg4) \ - : "a2", "a3", "a4", "a5", "a6", "a9" \ - ); \ -__syscall_return(type,__res); \ -} - -/* Note that we save and restore the a7 frame pointer. - * Including a7 in the clobber list doesn't do what you'd expect. - */ -#define _syscall6(type,name,type0,arg0,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ -type name(type0 arg0,type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \ -{ \ -long __res; \ -__asm__ __volatile__ ( \ - " mov a9, a7 \n" \ - " mov a8, %2 \n" \ - " mov a7, %7 \n" \ - " mov a6, %6 \n" \ - " mov a5, %5 \n" \ - " mov a4, %4 \n" \ - " mov a3, %3 \n" \ - " movi a2, %1 \n" \ - " syscall \n" \ - " mov a7, a9 \n" \ - " mov %0, a2 \n" \ - : "=a" (__res) \ - : "i" (__NR_##name), "a" (arg0), "a" (arg1), "a" (arg2), \ - "a" (arg3), "a" (arg4), "a" (arg5) \ - : "a2", "a3", "a4", "a5", "a6", "a8", "a9" \ - ); \ -__syscall_return(type,__res); \ -} - /* * "Conditional" syscalls * -- cgit v1.2.3-70-g09d2 From 04903664325acb3f199dd8a4b8f1aa437e9fd6b2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 6 Dec 2006 20:37:33 -0800 Subject: [PATCH] remove HASH_HIGHMEM It has no users and it's doubtful that we'll need it again. Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 3 +-- mm/page_alloc.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 31e9abb6d97..2275f274870 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -119,8 +119,7 @@ extern void *alloc_large_system_hash(const char *tablename, unsigned int *_hash_mask, unsigned long limit); -#define HASH_HIGHMEM 0x00000001 /* Consider highmem? */ -#define HASH_EARLY 0x00000002 /* Allocating during early boot? */ +#define HASH_EARLY 0x00000001 /* Allocating during early boot? */ /* Only NUMA needs hash distribution. * IA64 is known to have sufficient vmalloc space. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d539f83c62b..2273952300d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3226,7 +3226,7 @@ void *__init alloc_large_system_hash(const char *tablename, /* allow the kernel cmdline to have a say */ if (!numentries) { /* round applicable memory size up to nearest megabyte */ - numentries = (flags & HASH_HIGHMEM) ? nr_all_pages : nr_kernel_pages; + numentries = nr_kernel_pages; numentries += (1UL << (20 - PAGE_SHIFT)) - 1; numentries >>= 20 - PAGE_SHIFT; numentries <<= 20 - PAGE_SHIFT; -- cgit v1.2.3-70-g09d2 From cfd1893477fa94bb0915e39afa2f044ac978b5c6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Dec 2006 20:37:38 -0800 Subject: [PATCH] ktime: Fix signed / unsigned mismatch in ktime_to_ns The 32 bit implementation of ktime_to_ns returns unsigned value, while the 64 bit version correctly returns an signed value. There is no current user affected by this, but it has to be fixed, as ktime values can be negative. Pointed-out-by: Helmut Duregger Signed-off-by: Thomas Gleixner Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ktime.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 84eeecd60a0..611f17f79ee 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -248,9 +248,9 @@ static inline struct timeval ktime_to_timeval(const ktime_t kt) * * Returns the scalar nanoseconds representation of kt */ -static inline u64 ktime_to_ns(const ktime_t kt) +static inline s64 ktime_to_ns(const ktime_t kt) { - return (u64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec; + return (s64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec; } #endif -- cgit v1.2.3-70-g09d2 From 651971cb7242e8f6d7ebd153e69bd271cb731223 Mon Sep 17 00:00:00 2001 From: suzuki Date: Wed, 6 Dec 2006 20:37:48 -0800 Subject: [PATCH] Fix the size limit of compat space msgsize Currently we allocate 64k space on the user stack and use it the msgbuf for sys_{msgrcv,msgsnd} for compat and the results are later copied in user [ by copy_in_user]. This patch introduces helper routines for sys_{msgrcv,msgsnd} as below: do_msgsnd() : Accepts the mtype and user space ptr to the buffer along with the msqid and msgflg. do_msgrcv() : Accepts a kernel space ptr to mtype and a userspace ptr to the buffer. The mtype has to be copied back the user space msgbuf by the caller. These changes avoid the need to allocate the msgsize on the userspace ( thus removing the size limt ) and the overhead of an extra copy_in_user(). Signed-off-by: Suzuki K P Cc: Arnd Bergmann Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 6 ++++++ ipc/compat.c | 23 +++++++---------------- ipc/msg.c | 44 +++++++++++++++++++++++++++++++++----------- 3 files changed, 46 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/linux/msg.h b/include/linux/msg.h index acc7c174ff0..f1b60740d64 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -92,6 +92,12 @@ struct msg_queue { struct list_head q_senders; }; +/* Helper routines for sys_msgsnd and sys_msgrcv */ +extern long do_msgsnd(int msqid, long mtype, void __user *mtext, + size_t msgsz, int msgflg); +extern long do_msgrcv(int msqid, long *pmtype, void __user *mtext, + size_t msgsz, long msgtyp, int msgflg); + #endif /* __KERNEL__ */ #endif /* _LINUX_MSG_H */ diff --git a/ipc/compat.c b/ipc/compat.c index 4d20cfd38f0..fa18141539f 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -115,7 +115,6 @@ struct compat_shm_info { extern int sem_ctls[]; #define sc_semopm (sem_ctls[2]) -#define MAXBUF (64*1024) static inline int compat_ipc_parse_version(int *cmd) { @@ -307,35 +306,30 @@ long compat_sys_semctl(int first, int second, int third, void __user *uptr) long compat_sys_msgsnd(int first, int second, int third, void __user *uptr) { - struct msgbuf __user *p; struct compat_msgbuf __user *up = uptr; long type; if (first < 0) return -EINVAL; - if (second < 0 || (second >= MAXBUF - sizeof(struct msgbuf))) + if (second < 0) return -EINVAL; - p = compat_alloc_user_space(second + sizeof(struct msgbuf)); - if (get_user(type, &up->mtype) || - put_user(type, &p->mtype) || - copy_in_user(p->mtext, up->mtext, second)) + if (get_user(type, &up->mtype)) return -EFAULT; - return sys_msgsnd(first, p, second, third); + return do_msgsnd(first, type, up->mtext, second, third); } long compat_sys_msgrcv(int first, int second, int msgtyp, int third, int version, void __user *uptr) { - struct msgbuf __user *p; struct compat_msgbuf __user *up; long type; int err; if (first < 0) return -EINVAL; - if (second < 0 || (second >= MAXBUF - sizeof(struct msgbuf))) + if (second < 0) return -EINVAL; if (!version) { @@ -349,14 +343,11 @@ long compat_sys_msgrcv(int first, int second, int msgtyp, int third, uptr = compat_ptr(ipck.msgp); msgtyp = ipck.msgtyp; } - p = compat_alloc_user_space(second + sizeof(struct msgbuf)); - err = sys_msgrcv(first, p, second, msgtyp, third); + up = uptr; + err = do_msgrcv(first, &type, up->mtext, second, msgtyp, third); if (err < 0) goto out; - up = uptr; - if (get_user(type, &p->mtype) || - put_user(type, &up->mtype) || - copy_in_user(up->mtext, p->mtext, err)) + if (put_user(type, &up->mtype)) err = -EFAULT; out: return err; diff --git a/ipc/msg.c b/ipc/msg.c index 1266b1d0c8e..a388824740e 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -626,12 +626,11 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) return 0; } -asmlinkage long -sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) +long do_msgsnd(int msqid, long mtype, void __user *mtext, + size_t msgsz, int msgflg) { struct msg_queue *msq; struct msg_msg *msg; - long mtype; int err; struct ipc_namespace *ns; @@ -639,12 +638,10 @@ sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0) return -EINVAL; - if (get_user(mtype, &msgp->mtype)) - return -EFAULT; if (mtype < 1) return -EINVAL; - msg = load_msg(msgp->mtext, msgsz); + msg = load_msg(mtext, msgsz); if (IS_ERR(msg)) return PTR_ERR(msg); @@ -723,6 +720,16 @@ out_free: return err; } +asmlinkage long +sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) +{ + long mtype; + + if (get_user(mtype, &msgp->mtype)) + return -EFAULT; + return do_msgsnd(msqid, mtype, msgp->mtext, msgsz, msgflg); +} + static inline int convert_mode(long *msgtyp, int msgflg) { /* @@ -742,8 +749,8 @@ static inline int convert_mode(long *msgtyp, int msgflg) return SEARCH_EQUAL; } -asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, - long msgtyp, int msgflg) +long do_msgrcv(int msqid, long *pmtype, void __user *mtext, + size_t msgsz, long msgtyp, int msgflg) { struct msg_queue *msq; struct msg_msg *msg; @@ -889,15 +896,30 @@ out_unlock: return PTR_ERR(msg); msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz; - if (put_user (msg->m_type, &msgp->mtype) || - store_msg(msgp->mtext, msg, msgsz)) { + *pmtype = msg->m_type; + if (store_msg(mtext, msg, msgsz)) msgsz = -EFAULT; - } + free_msg(msg); return msgsz; } +asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, + long msgtyp, int msgflg) +{ + long err, mtype; + + err = do_msgrcv(msqid, &mtype, msgp->mtext, msgsz, msgtyp, msgflg); + if (err < 0) + goto out; + + if (put_user(mtype, &msgp->mtype)) + err = -EFAULT; +out: + return err; +} + #ifdef CONFIG_PROC_FS static int sysvipc_msg_proc_show(struct seq_file *s, void *it) { -- cgit v1.2.3-70-g09d2 From 386d9a7edd9f3492c99124b0a659e9ed7abb30f9 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 6 Dec 2006 20:37:53 -0800 Subject: [PATCH] elf: Always define elf_addr_t in linux/elf.h Define elf_addr_t in linux/elf.h. The size of the type is determined using ELF_CLASS. This allows us to remove the defines that today are spread all over .c and .h files. Signed-off-by: Magnus Damm Cc: Daniel Jacobowitz Cc: Roland McGrath Cc: Jakub Jelinek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/ia32/ia32priv.h | 2 -- arch/mips/kernel/binfmt_elfn32.c | 1 - arch/mips/kernel/binfmt_elfo32.c | 1 - arch/mips/kernel/irixelf.c | 4 ---- arch/parisc/kernel/binfmt_elf32.c | 1 - arch/s390/kernel/binfmt_elf32.c | 1 - arch/sparc64/kernel/binfmt_elf32.c | 1 - arch/x86_64/ia32/ia32_binfmt.c | 2 -- fs/binfmt_elf.c | 4 ---- fs/binfmt_elf_fdpic.c | 3 --- include/asm-powerpc/elf.h | 2 -- include/linux/elf.h | 2 ++ 12 files changed, 2 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h index 703a67c934f..cfa0bc0026b 100644 --- a/arch/ia64/ia32/ia32priv.h +++ b/arch/ia64/ia32/ia32priv.h @@ -330,8 +330,6 @@ struct old_linux32_dirent { void ia64_elf32_init(struct pt_regs *regs); #define ELF_PLAT_INIT(_r, load_addr) ia64_elf32_init(_r) -#define elf_addr_t u32 - /* This macro yields a bitmask that programs can use to figure out what instruction set this CPU supports. */ #define ELF_HWCAP 0 diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 4a9f1ecefaf..9b34238d41c 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -90,7 +90,6 @@ struct elf_prpsinfo32 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -#define elf_addr_t u32 #define elf_caddr_t u32 #define init_elf_binfmt init_elfn32_binfmt diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index e3181377989..993f7ec70f3 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -92,7 +92,6 @@ struct elf_prpsinfo32 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -#define elf_addr_t u32 #define elf_caddr_t u32 #define init_elf_binfmt init_elf32_binfmt diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c index ab12c8f0151..a82fae221ce 100644 --- a/arch/mips/kernel/irixelf.c +++ b/arch/mips/kernel/irixelf.c @@ -52,10 +52,6 @@ static struct linux_binfmt irix_format = { irix_core_dump, PAGE_SIZE }; -#ifndef elf_addr_t -#define elf_addr_t unsigned long -#endif - #ifdef DEBUG /* Debugging routines. */ static char *get_elf_p_type(Elf32_Word p_type) diff --git a/arch/parisc/kernel/binfmt_elf32.c b/arch/parisc/kernel/binfmt_elf32.c index 1e64e7b8811..ecb10a4f63c 100644 --- a/arch/parisc/kernel/binfmt_elf32.c +++ b/arch/parisc/kernel/binfmt_elf32.c @@ -75,7 +75,6 @@ struct elf_prpsinfo32 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -#define elf_addr_t unsigned int #define init_elf_binfmt init_elf32_binfmt #define ELF_PLATFORM ("PARISC32\0") diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c index 9565a2dcfad..5c46054195c 100644 --- a/arch/s390/kernel/binfmt_elf32.c +++ b/arch/s390/kernel/binfmt_elf32.c @@ -176,7 +176,6 @@ struct elf_prpsinfo32 #include -#define elf_addr_t u32 /* #define init_elf_binfmt init_elf32_binfmt */ diff --git a/arch/sparc64/kernel/binfmt_elf32.c b/arch/sparc64/kernel/binfmt_elf32.c index a98f3ae175a..9ad84ff10a1 100644 --- a/arch/sparc64/kernel/binfmt_elf32.c +++ b/arch/sparc64/kernel/binfmt_elf32.c @@ -141,7 +141,6 @@ cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value) value->tv_sec = jiffies / HZ; } -#define elf_addr_t u32 #undef start_thread #define start_thread start_thread32 #define init_elf_binfmt init_elf32_binfmt diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 932a62ad6c8..543ef4f405e 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -305,8 +305,6 @@ MODULE_AUTHOR("Eric Youngdale, Andi Kleen"); #undef MODULE_DESCRIPTION #undef MODULE_AUTHOR -#define elf_addr_t __u32 - static void elf32_init(struct pt_regs *); #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 68e20d5bfe1..14ea630a857 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -47,10 +47,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int); -#ifndef elf_addr_t -#define elf_addr_t unsigned long -#endif - /* * If we don't support core dumping, then supply a NULL so we * don't even try. diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index f86d5c9ce5e..ed9a61c6beb 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -40,9 +40,6 @@ #include typedef char *elf_caddr_t; -#ifndef elf_addr_t -#define elf_addr_t unsigned long -#endif #if 0 #define kdebug(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ ) diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h index b5436642a10..d36426c01b6 100644 --- a/include/asm-powerpc/elf.h +++ b/include/asm-powerpc/elf.h @@ -124,12 +124,10 @@ typedef elf_greg_t32 elf_gregset_t32[ELF_NGREG]; # define ELF_DATA ELFDATA2MSB typedef elf_greg_t64 elf_greg_t; typedef elf_gregset_t64 elf_gregset_t; -# define elf_addr_t unsigned long #else /* Assumption: ELF_ARCH == EM_PPC and ELF_CLASS == ELFCLASS32 */ typedef elf_greg_t32 elf_greg_t; typedef elf_gregset_t32 elf_gregset_t; -# define elf_addr_t __u32 #endif /* ELF_ARCH */ /* Floating point registers */ diff --git a/include/linux/elf.h b/include/linux/elf.h index 743d5c8e6d3..b403516d5c3 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -358,6 +358,7 @@ extern Elf32_Dyn _DYNAMIC []; #define elfhdr elf32_hdr #define elf_phdr elf32_phdr #define elf_note elf32_note +#define elf_addr_t Elf32_Off #else @@ -365,6 +366,7 @@ extern Elf64_Dyn _DYNAMIC []; #define elfhdr elf64_hdr #define elf_phdr elf64_phdr #define elf_note elf64_note +#define elf_addr_t Elf64_Off #endif -- cgit v1.2.3-70-g09d2 From b4c6c34a530b4d1c626f4ac0a884e0a9b849378c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 6 Dec 2006 20:38:11 -0800 Subject: [PATCH] kprobes: enable booster on the preemptible kernel When we are unregistering a kprobe-booster, we can't release its instruction buffer immediately on the preemptive kernel, because some processes might be preempted on the buffer. The freeze_processes() and thaw_processes() functions can clean most of processes up from the buffer. There are still some non-frozen threads who have the PF_NOFREEZE flag. If those threads are sleeping (not preempted) at the known place outside the buffer, we can ensure safety of freeing. However, the processing of this check routine takes a long time. So, this patch introduces the garbage collection mechanism of insn_slot. It also introduces the "dirty" flag to free_insn_slot because of efficiency. The "clean" instruction slots (dirty flag is cleared) are released immediately. But the "dirty" slots which are used by boosted kprobes, are marked as garbages. collect_garbage_slots() will be invoked to release "dirty" slots if there are more than INSNS_PER_PAGE garbage slots or if there are no unused slots. Cc: "Keshavamurthy, Anil S" Cc: Ananth N Mavinakayanahalli Cc: "bibo,mao" Cc: Prasanna S Panchamukhi Cc: Yumiko Sugita Cc: Satoshi Oshima Cc: Hideo Aoki Signed-off-by: Masami Hiramatsu Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 4 +- arch/ia64/kernel/kprobes.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/s390/kernel/kprobes.c | 2 +- arch/x86_64/kernel/kprobes.c | 2 +- include/linux/kprobes.h | 2 +- kernel/kprobes.c | 117 ++++++++++++++++++++++++++++++++++-------- 7 files changed, 103 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index fc79e1e859c..af1d5334499 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -184,7 +184,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); mutex_unlock(&kprobe_mutex); } @@ -333,7 +333,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) return 1; ss_probe: -#ifndef CONFIG_PREEMPT +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) if (p->ainsn.boostable == 1 && !p->post_handler){ /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 51217d63285..4d592ee9300 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -481,7 +481,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } /* diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7b8d12b9026..4657563f881 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -85,7 +85,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 67914fe7f31..576368c4f60 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -200,7 +200,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index ac241567e68..209c8c0bec7 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -224,7 +224,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { mutex_lock(&kprobe_mutex); - free_insn_slot(p->ainsn.insn); + free_insn_slot(p->ainsn.insn, 0); mutex_unlock(&kprobe_mutex); } diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index ac4c0559f75..769be39b968 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -165,7 +165,7 @@ extern void arch_disarm_kprobe(struct kprobe *p); extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern kprobe_opcode_t *get_insn_slot(void); -extern void free_insn_slot(kprobe_opcode_t *slot); +extern void free_insn_slot(kprobe_opcode_t *slot, int dirty); extern void kprobes_inc_nmissed_count(struct kprobe *p); /* Get the kprobe at this addr (if any) - called with preemption disabled */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 610c837ad9e..17ec4afb099 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -83,9 +84,36 @@ struct kprobe_insn_page { kprobe_opcode_t *insns; /* Page of instruction slots */ char slot_used[INSNS_PER_PAGE]; int nused; + int ngarbage; }; static struct hlist_head kprobe_insn_pages; +static int kprobe_garbage_slots; +static int collect_garbage_slots(void); + +static int __kprobes check_safety(void) +{ + int ret = 0; +#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) + ret = freeze_processes(); + if (ret == 0) { + struct task_struct *p, *q; + do_each_thread(p, q) { + if (p != current && p->state == TASK_RUNNING && + p->pid != 0) { + printk("Check failed: %s is running\n",p->comm); + ret = -1; + goto loop_end; + } + } while_each_thread(p, q); + } +loop_end: + thaw_processes(); +#else + synchronize_sched(); +#endif + return ret; +} /** * get_insn_slot() - Find a slot on an executable page for an instruction. @@ -96,6 +124,7 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) struct kprobe_insn_page *kip; struct hlist_node *pos; + retry: hlist_for_each(pos, &kprobe_insn_pages) { kip = hlist_entry(pos, struct kprobe_insn_page, hlist); if (kip->nused < INSNS_PER_PAGE) { @@ -112,7 +141,11 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) } } - /* All out of space. Need to allocate a new page. Use slot 0.*/ + /* If there are any garbage slots, collect it and try again. */ + if (kprobe_garbage_slots && collect_garbage_slots() == 0) { + goto retry; + } + /* All out of space. Need to allocate a new page. Use slot 0. */ kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL); if (!kip) { return NULL; @@ -133,10 +166,62 @@ kprobe_opcode_t __kprobes *get_insn_slot(void) memset(kip->slot_used, 0, INSNS_PER_PAGE); kip->slot_used[0] = 1; kip->nused = 1; + kip->ngarbage = 0; return kip->insns; } -void __kprobes free_insn_slot(kprobe_opcode_t *slot) +/* Return 1 if all garbages are collected, otherwise 0. */ +static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) +{ + kip->slot_used[idx] = 0; + kip->nused--; + if (kip->nused == 0) { + /* + * Page is no longer in use. Free it unless + * it's the last one. We keep the last one + * so as not to have to set it up again the + * next time somebody inserts a probe. + */ + hlist_del(&kip->hlist); + if (hlist_empty(&kprobe_insn_pages)) { + INIT_HLIST_NODE(&kip->hlist); + hlist_add_head(&kip->hlist, + &kprobe_insn_pages); + } else { + module_free(NULL, kip->insns); + kfree(kip); + } + return 1; + } + return 0; +} + +static int __kprobes collect_garbage_slots(void) +{ + struct kprobe_insn_page *kip; + struct hlist_node *pos, *next; + + /* Ensure no-one is preepmted on the garbages */ + if (check_safety() != 0) + return -EAGAIN; + + hlist_for_each_safe(pos, next, &kprobe_insn_pages) { + int i; + kip = hlist_entry(pos, struct kprobe_insn_page, hlist); + if (kip->ngarbage == 0) + continue; + kip->ngarbage = 0; /* we will collect all garbages */ + for (i = 0; i < INSNS_PER_PAGE; i++) { + if (kip->slot_used[i] == -1 && + collect_one_slot(kip, i)) + break; + } + } + kprobe_garbage_slots = 0; + return 0; +} + +void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty) { struct kprobe_insn_page *kip; struct hlist_node *pos; @@ -146,28 +231,18 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot) if (kip->insns <= slot && slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) { int i = (slot - kip->insns) / MAX_INSN_SIZE; - kip->slot_used[i] = 0; - kip->nused--; - if (kip->nused == 0) { - /* - * Page is no longer in use. Free it unless - * it's the last one. We keep the last one - * so as not to have to set it up again the - * next time somebody inserts a probe. - */ - hlist_del(&kip->hlist); - if (hlist_empty(&kprobe_insn_pages)) { - INIT_HLIST_NODE(&kip->hlist); - hlist_add_head(&kip->hlist, - &kprobe_insn_pages); - } else { - module_free(NULL, kip->insns); - kfree(kip); - } + if (dirty) { + kip->slot_used[i] = -1; + kip->ngarbage++; + } else { + collect_one_slot(kip, i); } - return; + break; } } + if (dirty && (++kprobe_garbage_slots > INSNS_PER_PAGE)) { + collect_garbage_slots(); + } } #endif -- cgit v1.2.3-70-g09d2 From 83df8db9e62129975fab6d800fb381faf0dfee74 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 6 Dec 2006 20:38:14 -0800 Subject: [PATCH] declare smp_call_function_single in generic code smp_call_function_single() needs to be visible in non-SMP builds, to fix: arch/x86_64/kernel/vsyscall.c:283: warning: implicit declaration of function 'smp_call_function_single' Signed-off-by: Randy Dunlap Cc: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-x86_64/smp.h | 7 ------- include/linux/smp.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index d6b7c057edb..f1bdd500d7a 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -118,13 +118,6 @@ static __inline int logical_smp_processor_id(void) #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] #else #define cpu_physical_id(cpu) boot_cpu_id -static inline int smp_call_function_single(int cpuid, void (*func) (void *info), - void *info, int retry, int wait) -{ - /* Disable interrupts here? */ - func(info); - return 0; -} #endif /* !CONFIG_SMP */ #endif diff --git a/include/linux/smp.h b/include/linux/smp.h index 51649987f69..7ba23ec8211 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -99,6 +99,13 @@ static inline int up_smp_call_function(void) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) +static inline int smp_call_function_single(int cpuid, void (*func) (void *info), + void *info, int retry, int wait) +{ + /* Disable interrupts here? */ + func(info); + return 0; +} #endif /* !SMP */ -- cgit v1.2.3-70-g09d2 From 02316067852187b8bec781bec07410e91af79627 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 Dec 2006 20:38:17 -0800 Subject: [PATCH] hotplug CPU: clean up hotcpu_notifier() use There was lots of #ifdef noise in the kernel due to hotcpu_notifier(fn, prio) not correctly marking 'fn' as used in the !HOTPLUG_CPU case, and thus generating compiler warnings of unused symbols, hence forcing people to add #ifdefs. the compiler can skip truly unused functions just fine: text data bss dec hex filename 1624412 728710 3674856 6027978 5bfaca vmlinux.before 1624412 728710 3674856 6027978 5bfaca vmlinux.after [akpm@osdl.org: topology.c fix] Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/mcheck/therm_throt.c | 2 -- arch/i386/kernel/cpuid.c | 2 -- arch/i386/kernel/microcode.c | 2 -- arch/i386/kernel/msr.c | 2 -- arch/ia64/kernel/palinfo.c | 2 -- arch/ia64/kernel/salinfo.c | 2 -- arch/s390/appldata/appldata_base.c | 2 -- arch/x86_64/kernel/mce.c | 2 -- arch/x86_64/kernel/mce_amd.c | 4 ++-- arch/x86_64/kernel/vsyscall.c | 2 -- block/ll_rw_blk.c | 4 ---- drivers/base/topology.c | 2 -- drivers/cpufreq/cpufreq.c | 2 -- fs/buffer.c | 2 -- include/linux/cpu.h | 6 +++--- kernel/cpuset.c | 4 ---- kernel/profile.c | 3 +-- kernel/sched.c | 3 --- kernel/workqueue.c | 2 -- lib/radix-tree.c | 2 -- mm/page_alloc.c | 4 ---- mm/swap.c | 2 ++ mm/vmscan.c | 2 -- net/core/dev.c | 2 -- net/core/flow.c | 2 -- 25 files changed, 8 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index bad8b442070..065005c3f16 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -116,7 +116,6 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); } -#ifdef CONFIG_HOTPLUG_CPU static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) { return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); @@ -153,7 +152,6 @@ static struct notifier_block thermal_throttle_cpu_notifier = { .notifier_call = thermal_throttle_cpu_callback, }; -#endif /* CONFIG_HOTPLUG_CPU */ static __init int thermal_throttle_init_device(void) { diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index ab0c327e79d..23b2cc748d4 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -167,7 +167,6 @@ static int cpuid_device_create(int i) return err; } -#ifdef CONFIG_HOTPLUG_CPU static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -187,7 +186,6 @@ static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = { .notifier_call = cpuid_class_cpu_callback, }; -#endif /* !CONFIG_HOTPLUG_CPU */ static int __init cpuid_init(void) { diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 23f5984d065..972346604f9 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -703,7 +703,6 @@ static struct sysdev_driver mc_sysdev_driver = { .resume = mc_sysdev_resume, }; -#ifdef CONFIG_HOTPLUG_CPU static __cpuinit int mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { @@ -726,7 +725,6 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) static struct notifier_block mc_cpu_notifier = { .notifier_call = mc_cpu_callback, }; -#endif static int __init microcode_init (void) { diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index a773f776c9e..7763c67ca28 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -250,7 +250,6 @@ static int msr_device_create(int i) return err; } -#ifdef CONFIG_HOTPLUG_CPU static int msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -271,7 +270,6 @@ static struct notifier_block __cpuinitdata msr_class_cpu_notifier = { .notifier_call = msr_class_cpu_callback, }; -#endif static int __init msr_init(void) { diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index 0b546e2b36a..c4c10a0b99d 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -952,7 +952,6 @@ remove_palinfo_proc_entries(unsigned int hcpu) } } -#ifdef CONFIG_HOTPLUG_CPU static int palinfo_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -974,7 +973,6 @@ static struct notifier_block palinfo_cpu_notifier = .notifier_call = palinfo_cpu_callback, .priority = 0, }; -#endif static int __init palinfo_init(void) diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index e63b8ca5344..fd607ca51a8 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -575,7 +575,6 @@ static struct file_operations salinfo_data_fops = { .write = salinfo_log_write, }; -#ifdef CONFIG_HOTPLUG_CPU static int __devinit salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { @@ -620,7 +619,6 @@ static struct notifier_block salinfo_cpu_notifier = .notifier_call = salinfo_cpu_callback, .priority = 0, }; -#endif /* CONFIG_HOTPLUG_CPU */ static int __init salinfo_init(void) diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 67d5cf9cba8..b8c23729026 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -561,7 +561,6 @@ appldata_offline_cpu(int cpu) spin_unlock(&appldata_timer_lock); } -#ifdef CONFIG_HOTPLUG_CPU static int __cpuinit appldata_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) @@ -582,7 +581,6 @@ appldata_cpu_notify(struct notifier_block *self, static struct notifier_block appldata_nb = { .notifier_call = appldata_cpu_notify, }; -#endif /* * appldata_init() diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index c7587fc3901..bc863c464a1 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -641,7 +641,6 @@ static __cpuinit int mce_create_device(unsigned int cpu) return err; } -#ifdef CONFIG_HOTPLUG_CPU static void mce_remove_device(unsigned int cpu) { int i; @@ -674,7 +673,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) static struct notifier_block mce_cpu_notifier = { .notifier_call = mce_cpu_callback, }; -#endif static __init int mce_init_device(void) { diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index 883fe747f64..fa09debad4b 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -551,7 +551,6 @@ out: return err; } -#ifdef CONFIG_HOTPLUG_CPU /* * let's be hotplug friendly. * in case of multiple core processors, the first core always takes ownership @@ -594,12 +593,14 @@ static void threshold_remove_bank(unsigned int cpu, int bank) sprintf(name, "threshold_bank%i", bank); +#ifdef CONFIG_SMP /* sibling symlink */ if (shared_bank[bank] && b->blocks->cpu != cpu) { sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); per_cpu(threshold_banks, cpu)[bank] = NULL; return; } +#endif /* remove all sibling symlinks before unregistering */ for_each_cpu_mask(i, b->cpus) { @@ -656,7 +657,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb, static struct notifier_block threshold_cpu_notifier = { .notifier_call = threshold_cpu_callback, }; -#endif /* CONFIG_HOTPLUG_CPU */ static __init int threshold_init_device(void) { diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 630036c06c7..3785e495473 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -275,7 +275,6 @@ static void __cpuinit cpu_vsyscall_init(void *arg) vsyscall_set_cpu(raw_smp_processor_id()); } -#ifdef CONFIG_HOTPLUG_CPU static int __cpuinit cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) { @@ -284,7 +283,6 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); return NOTIFY_DONE; } -#endif static void __init map_vsyscall(void) { diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index a4ff3271d4a..31512cd9f3a 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3459,8 +3459,6 @@ static void blk_done_softirq(struct softirq_action *h) } } -#ifdef CONFIG_HOTPLUG_CPU - static int blk_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -3486,8 +3484,6 @@ static struct notifier_block __devinitdata blk_cpu_notifier = { .notifier_call = blk_cpu_notify, }; -#endif /* CONFIG_HOTPLUG_CPU */ - /** * blk_complete_request - end I/O on a request * @req: the request being processed diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 3d12b85b096..067a9e8bc37 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -108,7 +108,6 @@ static int __cpuinit topology_add_dev(unsigned int cpu) return rc; } -#ifdef CONFIG_HOTPLUG_CPU static void __cpuinit topology_remove_dev(unsigned int cpu) { struct sys_device *sys_dev = get_cpu_sysdev(cpu); @@ -136,7 +135,6 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, } return rc ? NOTIFY_BAD : NOTIFY_OK; } -#endif static int __cpuinit topology_sysfs_init(void) { diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7a7c6e6dfe4..47ab42db122 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1537,7 +1537,6 @@ int cpufreq_update_policy(unsigned int cpu) } EXPORT_SYMBOL(cpufreq_update_policy); -#ifdef CONFIG_HOTPLUG_CPU static int cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { @@ -1577,7 +1576,6 @@ static struct notifier_block __cpuinitdata cpufreq_cpu_notifier = { .notifier_call = cpufreq_cpu_callback, }; -#endif /* CONFIG_HOTPLUG_CPU */ /********************************************************************* * REGISTER / UNREGISTER CPUFREQ DRIVER * diff --git a/fs/buffer.c b/fs/buffer.c index a8ca0ac2148..517860f2d75 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2972,7 +2972,6 @@ init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags) } } -#ifdef CONFIG_HOTPLUG_CPU static void buffer_exit_cpu(int cpu) { int i; @@ -2994,7 +2993,6 @@ static int buffer_cpu_notify(struct notifier_block *self, buffer_exit_cpu((unsigned long)hcpu); return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ void __init buffer_init(void) { diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f02d71bf689..71dc6ba4f73 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -89,9 +89,9 @@ int cpu_down(unsigned int cpu); #define lock_cpu_hotplug() do { } while (0) #define unlock_cpu_hotplug() do { } while (0) #define lock_cpu_hotplug_interruptible() 0 -#define hotcpu_notifier(fn, pri) do { } while (0) -#define register_hotcpu_notifier(nb) do { } while (0) -#define unregister_hotcpu_notifier(nb) do { } while (0) +#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define register_hotcpu_notifier(nb) do { (void)(nb); } while (0) +#define unregister_hotcpu_notifier(nb) do { (void)(nb); } while (0) /* CPUs don't go offline once they're online w/o CONFIG_HOTPLUG_CPU */ static inline int cpu_is_offline(int cpu) { return 0; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index bd1e89c4c96..9b62b4c03ad 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2044,7 +2044,6 @@ out: return err; } -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMORY_HOTPLUG) /* * If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs * or memory nodes, we need to walk over the cpuset hierarchy, @@ -2108,9 +2107,7 @@ static void common_cpu_mem_hotplug_unplug(void) mutex_unlock(&callback_mutex); mutex_unlock(&manage_mutex); } -#endif -#ifdef CONFIG_HOTPLUG_CPU /* * The top_cpuset tracks what CPUs and Memory Nodes are online, * period. This is necessary in order to make cpusets transparent @@ -2127,7 +2124,6 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb, common_cpu_mem_hotplug_unplug(); return 0; } -#endif #ifdef CONFIG_MEMORY_HOTPLUG /* diff --git a/kernel/profile.c b/kernel/profile.c index 04fd84e8cdb..0961d93e1d9 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -319,7 +319,6 @@ out: put_cpu(); } -#ifdef CONFIG_HOTPLUG_CPU static int __devinit profile_cpu_callback(struct notifier_block *info, unsigned long action, void *__cpu) { @@ -372,10 +371,10 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ #else /* !CONFIG_SMP */ #define profile_flip_buffers() do { } while (0) #define profile_discard_flip_buffers() do { } while (0) +#define profile_cpu_callback NULL void profile_hits(int type, void *__pc, unsigned int nr_hits) { diff --git a/kernel/sched.c b/kernel/sched.c index 75a005ed4ed..c83f531c288 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6740,8 +6740,6 @@ SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, sched_smt_power_savings_store); #endif - -#ifdef CONFIG_HOTPLUG_CPU /* * Force a reinitialization of the sched domains hierarchy. The domains * and groups cannot be updated in place without racing with the balancing @@ -6774,7 +6772,6 @@ static int update_sched_domains(struct notifier_block *nfb, return NOTIFY_OK; } -#endif void __init sched_init_smp(void) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5484d6e045c..c5257316f4b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -655,7 +655,6 @@ int current_is_keventd(void) } -#ifdef CONFIG_HOTPLUG_CPU /* Take the work from this (downed) CPU. */ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) { @@ -738,7 +737,6 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -#endif void init_workqueues(void) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e2cefabb5aa..d69ddbe4386 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -996,7 +996,6 @@ static __init void radix_tree_init_maxindex(void) height_to_maxindex[i] = __maxindex(i); } -#ifdef CONFIG_HOTPLUG_CPU static int radix_tree_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -1016,7 +1015,6 @@ static int radix_tree_callback(struct notifier_block *nfb, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ void __init radix_tree_init(void) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2273952300d..27ec7a1b802 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -701,7 +701,6 @@ void drain_node_pages(int nodeid) } #endif -#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) static void __drain_pages(unsigned int cpu) { unsigned long flags; @@ -723,7 +722,6 @@ static void __drain_pages(unsigned int cpu) } } } -#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM @@ -2907,7 +2905,6 @@ void __init free_area_init(unsigned long *zones_size) __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } -#ifdef CONFIG_HOTPLUG_CPU static int page_alloc_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -2922,7 +2919,6 @@ static int page_alloc_cpu_notify(struct notifier_block *self, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ void __init page_alloc_init(void) { diff --git a/mm/swap.c b/mm/swap.c index 017e72ca9bb..2ed7be39795 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -514,5 +514,7 @@ void __init swap_setup(void) * Right now other parts of the system means that we * _really_ don't want to cluster much more */ +#ifdef CONFIG_HOTPLUG_CPU hotcpu_notifier(cpu_swap_callback, 0); +#endif } diff --git a/mm/vmscan.c b/mm/vmscan.c index f6616e81fac..093f5fe6dd7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1513,7 +1513,6 @@ out: } #endif -#ifdef CONFIG_HOTPLUG_CPU /* It's optimal to keep kswapds on the same CPUs as their memory, but not required for correctness. So if the last cpu in a node goes away, we get changed to run anywhere: as the first one comes back, @@ -1534,7 +1533,6 @@ static int __devinit cpu_callback(struct notifier_block *nfb, } return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ /* * This kswapd start function will be called by init and node-hot-add. diff --git a/net/core/dev.c b/net/core/dev.c index 59d058a3b50..e660cb57e42 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3340,7 +3340,6 @@ void unregister_netdev(struct net_device *dev) EXPORT_SYMBOL(unregister_netdev); -#ifdef CONFIG_HOTPLUG_CPU static int dev_cpu_callback(struct notifier_block *nfb, unsigned long action, void *ocpu) @@ -3384,7 +3383,6 @@ static int dev_cpu_callback(struct notifier_block *nfb, return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_NET_DMA /** diff --git a/net/core/flow.c b/net/core/flow.c index 104c25d00a1..d137f971f97 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -340,7 +340,6 @@ static void __devinit flow_cache_cpu_prepare(int cpu) tasklet_init(tasklet, flow_cache_flush_tasklet, 0); } -#ifdef CONFIG_HOTPLUG_CPU static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) @@ -349,7 +348,6 @@ static int flow_cache_cpu(struct notifier_block *nfb, __flow_cache_shrink((unsigned long)hcpu, 0); return NOTIFY_OK; } -#endif /* CONFIG_HOTPLUG_CPU */ static int __init flow_cache_init(void) { -- cgit v1.2.3-70-g09d2 From ebe7e5fe4b41deeb2731c5b52d8c8e6ac08b1f74 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:38:21 -0800 Subject: [PATCH] remove kernel/lockdep.c:lockdep_internal Remove the no longer used lockdep_internal(). Signed-off-by: Adrian Bunk Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockdep.h | 6 ------ kernel/lockdep.c | 7 ------- 2 files changed, 13 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index da19aeb0dbe..498bfbd3b4e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -193,7 +193,6 @@ extern void lockdep_free_key_range(void *start, unsigned long size); extern void lockdep_off(void); extern void lockdep_on(void); -extern int lockdep_internal(void); /* * These methods are used by specific locking variants (spinlocks, @@ -255,11 +254,6 @@ static inline void lockdep_on(void) { } -static inline int lockdep_internal(void) -{ - return 0; -} - # define lock_acquire(l, s, t, r, c, i) do { } while (0) # define lock_release(l, n, i) do { } while (0) # define lockdep_init() do { } while (0) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 300d61bb314..3926c367435 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -140,13 +140,6 @@ void lockdep_on(void) EXPORT_SYMBOL(lockdep_on); -int lockdep_internal(void) -{ - return current->lockdep_recursion != 0; -} - -EXPORT_SYMBOL(lockdep_internal); - /* * Debugging switches: */ -- cgit v1.2.3-70-g09d2 From d3228a887cae75ef2b8b1211c31c539bef5a5698 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:38:22 -0800 Subject: [PATCH] make kernel/signal.c:kill_proc_info() static Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/signal.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0a90cefb0b0..3a767242e72 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1305,7 +1305,6 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp); extern int kill_pg_info(int, struct siginfo *, pid_t); -extern int kill_proc_info(int, struct siginfo *, pid_t); extern void do_notify_parent(struct task_struct *, int); extern void force_sig(int, struct task_struct *); extern void force_sig_specific(int, struct task_struct *); diff --git a/kernel/signal.c b/kernel/signal.c index bc972d7e631..ec81defde33 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1134,8 +1134,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) return error; } -int -kill_proc_info(int sig, struct siginfo *info, pid_t pid) +static int kill_proc_info(int sig, struct siginfo *info, pid_t pid) { int error; rcu_read_lock(); -- cgit v1.2.3-70-g09d2 From d394e122bc1adba0f3eb1ebec1cedb8a8c524741 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:38:26 -0800 Subject: [PATCH] make fs/jbd/transaction.c:__journal_temp_unlink_buffer() static Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/transaction.c | 4 +++- include/linux/jbd.h | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 4f82bcd63e4..d38e0d575e4 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -27,6 +27,8 @@ #include #include +static void __journal_temp_unlink_buffer(struct journal_head *jh); + /* * get_transaction: obtain a new transaction_t object. * @@ -1499,7 +1501,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) * * Called under j_list_lock. The journal may not be locked. */ -void __journal_temp_unlink_buffer(struct journal_head *jh) +static void __journal_temp_unlink_buffer(struct journal_head *jh) { struct journal_head **list = NULL; transaction_t *transaction; diff --git a/include/linux/jbd.h b/include/linux/jbd.h index dacd566c9f6..45273755126 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -839,7 +839,6 @@ struct journal_s */ /* Filing buffers */ -extern void __journal_temp_unlink_buffer(struct journal_head *jh); extern void journal_unfile_buffer(journal_t *, struct journal_head *); extern void __journal_unfile_buffer(struct journal_head *); extern void __journal_refile_buffer(struct journal_head *); -- cgit v1.2.3-70-g09d2 From 7ddae86095794cce4364740edd8463c77654a265 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:38:27 -0800 Subject: [PATCH] make fs/jbd2/transaction.c:__kbd2_journal_temp_unlink_buffer() static Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd2/transaction.c | 2 ++ include/linux/jbd2.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index c051a94c8a9..3a8700153cb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -27,6 +27,8 @@ #include #include +static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); + /* * jbd2_get_transaction: obtain a new transaction_t object. * diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 98a0ae52660..0e0fedd2039 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -848,7 +848,6 @@ struct journal_s */ /* Filing buffers */ -extern void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_unfile_buffer(struct journal_head *); extern void __jbd2_journal_refile_buffer(struct journal_head *); -- cgit v1.2.3-70-g09d2 From c585646dd1d98caf0a5f2e85c794c1441df6fac1 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:38:29 -0800 Subject: [PATCH] fs/lockd/host.c: make 2 functions static Make the following needlessly global functions static: - nlm_lookup_host() - nsm_find() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/lockd/host.c | 53 ++++++++++++++++++++++++--------------------- include/linux/lockd/lockd.h | 2 -- 2 files changed, 28 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index fb24a973034..3d4610c2a26 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -36,34 +36,14 @@ static DEFINE_MUTEX(nlm_host_mutex); static void nlm_gc_hosts(void); static struct nsm_handle * __nsm_find(const struct sockaddr_in *, const char *, int, int); - -/* - * Find an NLM server handle in the cache. If there is none, create it. - */ -struct nlm_host * -nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, - const char *hostname, int hostname_len) -{ - return nlm_lookup_host(0, sin, proto, version, - hostname, hostname_len); -} - -/* - * Find an NLM client handle in the cache. If there is none, create it. - */ -struct nlm_host * -nlmsvc_lookup_host(struct svc_rqst *rqstp, - const char *hostname, int hostname_len) -{ - return nlm_lookup_host(1, &rqstp->rq_addr, - rqstp->rq_prot, rqstp->rq_vers, - hostname, hostname_len); -} +static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, + const char *hostname, + int hostname_len); /* * Common host lookup routine for server & client */ -struct nlm_host * +static struct nlm_host * nlm_lookup_host(int server, const struct sockaddr_in *sin, int proto, int version, const char *hostname, @@ -194,6 +174,29 @@ nlm_destroy_host(struct nlm_host *host) kfree(host); } +/* + * Find an NLM server handle in the cache. If there is none, create it. + */ +struct nlm_host * +nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, + const char *hostname, int hostname_len) +{ + return nlm_lookup_host(0, sin, proto, version, + hostname, hostname_len); +} + +/* + * Find an NLM client handle in the cache. If there is none, create it. + */ +struct nlm_host * +nlmsvc_lookup_host(struct svc_rqst *rqstp, + const char *hostname, int hostname_len) +{ + return nlm_lookup_host(1, &rqstp->rq_addr, + rqstp->rq_prot, rqstp->rq_vers, + hostname, hostname_len); +} + /* * Create the NLM RPC client for an NLM peer */ @@ -495,7 +498,7 @@ out: return nsm; } -struct nsm_handle * +static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len) { return __nsm_find(sin, hostname, hostname_len, 1); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 862d9730a60..8c39654549d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -164,14 +164,12 @@ void nlmclnt_next_cookie(struct nlm_cookie *); */ struct nlm_host * nlmclnt_lookup_host(const struct sockaddr_in *, int, int, const char *, int); struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *, const char *, int); -struct nlm_host * nlm_lookup_host(int server, const struct sockaddr_in *, int, int, const char *, int); struct rpc_clnt * nlm_bind_host(struct nlm_host *); void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_release_host(struct nlm_host *); void nlm_shutdown_hosts(void); extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, int, u32); -struct nsm_handle *nsm_find(const struct sockaddr_in *, const char *, int); void nsm_release(struct nsm_handle *); -- cgit v1.2.3-70-g09d2 From d9489fb60614794cbca4b6b173c60ed9388974c6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 6 Dec 2006 20:38:43 -0800 Subject: [PATCH] kernel-doc: fix fusion and i2o docs Correct lots of typos, kernel-doc warnings, & kernel-doc usage in fusion and i2o drivers. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/message/fusion/mptbase.c | 255 ++++++++++++++++++++------------------ drivers/message/fusion/mptfc.c | 5 +- drivers/message/fusion/mptscsih.c | 24 ++-- drivers/message/fusion/mptspi.c | 4 +- drivers/message/i2o/bus-osm.c | 3 + drivers/message/i2o/device.c | 8 +- drivers/message/i2o/driver.c | 20 ++- drivers/message/i2o/exec-osm.c | 10 +- drivers/message/i2o/i2o_block.c | 15 ++- drivers/message/i2o/i2o_proc.c | 2 +- drivers/message/i2o/i2o_scsi.c | 11 +- drivers/message/i2o/pci.c | 5 +- include/linux/i2o.h | 18 +-- 13 files changed, 203 insertions(+), 177 deletions(-) (limited to 'include') diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 051b7c5b8f0..6e068cf1049 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -347,7 +347,7 @@ mpt_reply(MPT_ADAPTER *ioc, u32 pa) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_interrupt - MPT adapter (IOC) specific interrupt handler. * @irq: irq number (not used) * @bus_id: bus identifier cookie == pointer to MPT_ADAPTER structure @@ -387,14 +387,16 @@ mpt_interrupt(int irq, void *bus_id) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mpt_base_reply - MPT base driver's callback routine; all base driver - * "internal" request/reply processing is routed here. - * Currently used for EventNotification and EventAck handling. +/** + * mpt_base_reply - MPT base driver's callback routine * @ioc: Pointer to MPT_ADAPTER structure * @mf: Pointer to original MPT request frame * @reply: Pointer to MPT reply frame (NULL if TurboReply) * + * MPT base driver's callback routine; all base driver + * "internal" request/reply processing is routed here. + * Currently used for EventNotification and EventAck handling. + * * Returns 1 indicating original alloc'd request frame ptr * should be freed, or 0 if it shouldn't. */ @@ -530,7 +532,7 @@ mpt_base_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *reply) * @dclass: Protocol driver's class (%MPT_DRIVER_CLASS enum value) * * This routine is called by a protocol-specific driver (SCSI host, - * LAN, SCSI target) to register it's reply callback routine. Each + * LAN, SCSI target) to register its reply callback routine. Each * protocol-specific driver must do this before it will be able to * use any IOC resources, such as obtaining request frames. * @@ -572,7 +574,7 @@ mpt_register(MPT_CALLBACK cbfunc, MPT_DRIVER_CLASS dclass) * mpt_deregister - Deregister a protocol drivers resources. * @cb_idx: previously registered callback handle * - * Each protocol-specific driver should call this routine when it's + * Each protocol-specific driver should call this routine when its * module is unloaded. */ void @@ -617,7 +619,7 @@ mpt_event_register(int cb_idx, MPT_EVHANDLER ev_cbfunc) * * Each protocol-specific driver should call this routine * when it does not (or can no longer) handle events, - * or when it's module is unloaded. + * or when its module is unloaded. */ void mpt_event_deregister(int cb_idx) @@ -656,7 +658,7 @@ mpt_reset_register(int cb_idx, MPT_RESETHANDLER reset_func) * * Each protocol-specific driver should call this routine * when it does not (or can no longer) handle IOC reset handling, - * or when it's module is unloaded. + * or when its module is unloaded. */ void mpt_reset_deregister(int cb_idx) @@ -670,6 +672,8 @@ mpt_reset_deregister(int cb_idx) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mpt_device_driver_register - Register device driver hooks + * @dd_cbfunc: driver callbacks struct + * @cb_idx: MPT protocol driver index */ int mpt_device_driver_register(struct mpt_pci_driver * dd_cbfunc, int cb_idx) @@ -696,6 +700,7 @@ mpt_device_driver_register(struct mpt_pci_driver * dd_cbfunc, int cb_idx) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mpt_device_driver_deregister - DeRegister device driver hooks + * @cb_idx: MPT protocol driver index */ void mpt_device_driver_deregister(int cb_idx) @@ -887,8 +892,7 @@ mpt_add_sge(char *pAddr, u32 flagslength, dma_addr_t dma_addr) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mpt_send_handshake_request - Send MPT request via doorbell - * handshake method. + * mpt_send_handshake_request - Send MPT request via doorbell handshake method. * @handle: Handle of registered MPT protocol driver * @ioc: Pointer to MPT adapter structure * @reqBytes: Size of the request in bytes @@ -981,10 +985,13 @@ mpt_send_handshake_request(int handle, MPT_ADAPTER *ioc, int reqBytes, u32 *req, /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mpt_host_page_access_control - provides mechanism for the host - * driver to control the IOC's Host Page Buffer access. + * mpt_host_page_access_control - control the IOC's Host Page Buffer access * @ioc: Pointer to MPT adapter structure * @access_control_value: define bits below + * @sleepFlag: Specifies whether the process can sleep + * + * Provides mechanism for the host driver to control the IOC's + * Host Page Buffer access. * * Access Control Value - bits[15:12] * 0h Reserved @@ -1022,10 +1029,10 @@ mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_value, int slee /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mpt_host_page_alloc - allocate system memory for the fw - * If we already allocated memory in past, then resend the same pointer. - * ioc@: Pointer to pointer to IOC adapter - * ioc_init@: Pointer to ioc init config page + * @ioc: Pointer to pointer to IOC adapter + * @ioc_init: Pointer to ioc init config page * + * If we already allocated memory in past, then resend the same pointer. * Returns 0 for success, non-zero for failure. */ static int @@ -1091,12 +1098,15 @@ return 0; /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mpt_verify_adapter - Given a unique IOC identifier, set pointer to - * the associated MPT adapter structure. + * mpt_verify_adapter - Given IOC identifier, set pointer to its adapter structure. * @iocid: IOC unique identifier (integer) * @iocpp: Pointer to pointer to IOC adapter * - * Returns iocid and sets iocpp. + * Given a unique IOC identifier, set pointer to the associated MPT + * adapter structure. + * + * Returns iocid and sets iocpp if iocid is found. + * Returns -1 if iocid is not found. */ int mpt_verify_adapter(int iocid, MPT_ADAPTER **iocpp) @@ -1115,9 +1125,10 @@ mpt_verify_adapter(int iocid, MPT_ADAPTER **iocpp) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_attach - Install a PCI intelligent MPT adapter. * @pdev: Pointer to pci_dev structure + * @id: PCI device ID information * * This routine performs all the steps necessary to bring the IOC of * a MPT adapter to a OPERATIONAL state. This includes registering @@ -1417,10 +1428,9 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_detach - Remove a PCI intelligent MPT adapter. * @pdev: Pointer to pci_dev structure - * */ void @@ -1466,10 +1476,10 @@ mpt_detach(struct pci_dev *pdev) */ #ifdef CONFIG_PM /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_suspend - Fusion MPT base driver suspend routine. - * - * + * @pdev: Pointer to pci_dev structure + * @state: new state to enter */ int mpt_suspend(struct pci_dev *pdev, pm_message_t state) @@ -1505,10 +1515,9 @@ mpt_suspend(struct pci_dev *pdev, pm_message_t state) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_resume - Fusion MPT base driver resume routine. - * - * + * @pdev: Pointer to pci_dev structure */ int mpt_resume(struct pci_dev *pdev) @@ -1566,7 +1575,7 @@ mpt_signal_reset(int index, MPT_ADAPTER *ioc, int reset_phase) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_do_ioc_recovery - Initialize or recover MPT adapter. * @ioc: Pointer to MPT adapter structure * @reason: Event word / reason @@ -1892,13 +1901,15 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mpt_detect_bound_ports - Search for PCI bus/dev_function - * which matches PCI bus/dev_function (+/-1) for newly discovered 929, - * 929X, 1030 or 1035. +/** + * mpt_detect_bound_ports - Search for matching PCI bus/dev_function * @ioc: Pointer to MPT adapter structure * @pdev: Pointer to (struct pci_dev) structure * + * Search for PCI bus/dev_function which matches + * PCI bus/dev_function (+/-1) for newly discovered 929, + * 929X, 1030 or 1035. + * * If match on PCI dev_function +/-1 is found, bind the two MPT adapters * using alt_ioc pointer fields in their %MPT_ADAPTER structures. */ @@ -1945,9 +1956,9 @@ mpt_detect_bound_ports(MPT_ADAPTER *ioc, struct pci_dev *pdev) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_adapter_disable - Disable misbehaving MPT adapter. - * @this: Pointer to MPT adapter structure + * @ioc: Pointer to MPT adapter structure */ static void mpt_adapter_disable(MPT_ADAPTER *ioc) @@ -2046,9 +2057,8 @@ mpt_adapter_disable(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mpt_adapter_dispose - Free all resources associated with a MPT - * adapter. +/** + * mpt_adapter_dispose - Free all resources associated with an MPT adapter * @ioc: Pointer to MPT adapter structure * * This routine unregisters h/w resources and frees all alloc'd memory @@ -2099,8 +2109,8 @@ mpt_adapter_dispose(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * MptDisplayIocCapabilities - Disply IOC's capacilities. +/** + * MptDisplayIocCapabilities - Disply IOC's capabilities. * @ioc: Pointer to MPT adapter structure */ static void @@ -2142,7 +2152,7 @@ MptDisplayIocCapabilities(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * MakeIocReady - Get IOC to a READY state, using KickStart if needed. * @ioc: Pointer to MPT_ADAPTER structure * @force: Force hard KickStart of IOC @@ -2279,7 +2289,7 @@ MakeIocReady(MPT_ADAPTER *ioc, int force, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_GetIocState - Get the current state of a MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @cooked: Request raw or cooked IOC state @@ -2304,7 +2314,7 @@ mpt_GetIocState(MPT_ADAPTER *ioc, int cooked) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * GetIocFacts - Send IOCFacts request to MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @sleepFlag: Specifies whether the process can sleep @@ -2478,7 +2488,7 @@ GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * GetPortFacts - Send PortFacts request to MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @portnum: Port number @@ -2545,7 +2555,7 @@ GetPortFacts(MPT_ADAPTER *ioc, int portnum, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * SendIocInit - Send IOCInit request to MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @sleepFlag: Specifies whether the process can sleep @@ -2630,7 +2640,7 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag) } /* No need to byte swap the multibyte fields in the reply - * since we don't even look at it's contents. + * since we don't even look at its contents. */ dhsprintk((MYIOC_s_INFO_FMT "Sending PortEnable (req @ %p)\n", @@ -2672,7 +2682,7 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * SendPortEnable - Send PortEnable request to MPT adapter port. * @ioc: Pointer to MPT_ADAPTER structure * @portnum: Port number to enable @@ -2723,9 +2733,13 @@ SendPortEnable(MPT_ADAPTER *ioc, int portnum, int sleepFlag) return rc; } -/* - * ioc: Pointer to MPT_ADAPTER structure - * size - total FW bytes +/** + * mpt_alloc_fw_memory - allocate firmware memory + * @ioc: Pointer to MPT_ADAPTER structure + * @size: total FW bytes + * + * If memory has already been allocated, the same (cached) value + * is returned. */ void mpt_alloc_fw_memory(MPT_ADAPTER *ioc, int size) @@ -2742,9 +2756,12 @@ mpt_alloc_fw_memory(MPT_ADAPTER *ioc, int size) ioc->alloc_total += size; } } -/* - * If alt_img is NULL, delete from ioc structure. - * Else, delete a secondary image in same format. +/** + * mpt_free_fw_memory - free firmware memory + * @ioc: Pointer to MPT_ADAPTER structure + * + * If alt_img is NULL, delete from ioc structure. + * Else, delete a secondary image in same format. */ void mpt_free_fw_memory(MPT_ADAPTER *ioc) @@ -2763,7 +2780,7 @@ mpt_free_fw_memory(MPT_ADAPTER *ioc) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_do_upload - Construct and Send FWUpload request to MPT adapter port. * @ioc: Pointer to MPT_ADAPTER structure * @sleepFlag: Specifies whether the process can sleep @@ -2865,10 +2882,10 @@ mpt_do_upload(MPT_ADAPTER *ioc, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_downloadboot - DownloadBoot code * @ioc: Pointer to MPT_ADAPTER structure - * @flag: Specify which part of IOC memory is to be uploaded. + * @pFwHeader: Pointer to firmware header info * @sleepFlag: Specifies whether the process can sleep * * FwDownloadBoot requires Programmed IO access. @@ -3071,7 +3088,7 @@ mpt_downloadboot(MPT_ADAPTER *ioc, MpiFwHeader_t *pFwHeader, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * KickStart - Perform hard reset of MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @force: Force hard reset @@ -3145,12 +3162,12 @@ KickStart(MPT_ADAPTER *ioc, int force, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_diag_reset - Perform hard reset of the adapter. * @ioc: Pointer to MPT_ADAPTER structure * @ignore: Set if to honor and clear to ignore * the reset history bit - * @sleepflag: CAN_SLEEP if called in a non-interrupt thread, + * @sleepFlag: CAN_SLEEP if called in a non-interrupt thread, * else set to NO_SLEEP (use mdelay instead) * * This routine places the adapter in diagnostic mode via the @@ -3436,11 +3453,12 @@ mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * SendIocReset - Send IOCReset request to MPT adapter. * @ioc: Pointer to MPT_ADAPTER structure * @reset_type: reset type, expected values are * %MPI_FUNCTION_IOC_MESSAGE_UNIT_RESET or %MPI_FUNCTION_IO_UNIT_RESET + * @sleepFlag: Specifies whether the process can sleep * * Send IOCReset request to the MPT adapter. * @@ -3494,11 +3512,12 @@ SendIocReset(MPT_ADAPTER *ioc, u8 reset_type, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * initChainBuffers - Allocate memory for and initialize - * chain buffers, chain buffer control arrays and spinlock. - * @hd: Pointer to MPT_SCSI_HOST structure - * @init: If set, initialize the spin lock. +/** + * initChainBuffers - Allocate memory for and initialize chain buffers + * @ioc: Pointer to MPT_ADAPTER structure + * + * Allocates memory for and initializes chain buffers, + * chain buffer control arrays and spinlock. */ static int initChainBuffers(MPT_ADAPTER *ioc) @@ -3594,7 +3613,7 @@ initChainBuffers(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * PrimeIocFifos - Initialize IOC request and reply FIFOs. * @ioc: Pointer to MPT_ADAPTER structure * @@ -3891,15 +3910,15 @@ mpt_handshake_req_reply_wait(MPT_ADAPTER *ioc, int reqBytes, u32 *req, } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * WaitForDoorbellAck - Wait for IOC to clear the IOP_DOORBELL_STATUS bit - * in it's IntStatus register. +/** + * WaitForDoorbellAck - Wait for IOC doorbell handshake acknowledge * @ioc: Pointer to MPT_ADAPTER structure * @howlong: How long to wait (in seconds) * @sleepFlag: Specifies whether the process can sleep * * This routine waits (up to ~2 seconds max) for IOC doorbell - * handshake ACKnowledge. + * handshake ACKnowledge, indicated by the IOP_DOORBELL_STATUS + * bit in its IntStatus register being clear. * * Returns a negative value on failure, else wait loop count. */ @@ -3942,14 +3961,14 @@ WaitForDoorbellAck(MPT_ADAPTER *ioc, int howlong, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * WaitForDoorbellInt - Wait for IOC to set the HIS_DOORBELL_INTERRUPT bit - * in it's IntStatus register. +/** + * WaitForDoorbellInt - Wait for IOC to set its doorbell interrupt bit * @ioc: Pointer to MPT_ADAPTER structure * @howlong: How long to wait (in seconds) * @sleepFlag: Specifies whether the process can sleep * - * This routine waits (up to ~2 seconds max) for IOC doorbell interrupt. + * This routine waits (up to ~2 seconds max) for IOC doorbell interrupt + * (MPI_HIS_DOORBELL_INTERRUPT) to be set in the IntStatus register. * * Returns a negative value on failure, else wait loop count. */ @@ -3991,8 +4010,8 @@ WaitForDoorbellInt(MPT_ADAPTER *ioc, int howlong, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * WaitForDoorbellReply - Wait for and capture a IOC handshake reply. +/** + * WaitForDoorbellReply - Wait for and capture an IOC handshake reply. * @ioc: Pointer to MPT_ADAPTER structure * @howlong: How long to wait (in seconds) * @sleepFlag: Specifies whether the process can sleep @@ -4077,7 +4096,7 @@ WaitForDoorbellReply(MPT_ADAPTER *ioc, int howlong, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * GetLanConfigPages - Fetch LANConfig pages. * @ioc: Pointer to MPT_ADAPTER structure * @@ -4188,12 +4207,9 @@ GetLanConfigPages(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mptbase_sas_persist_operation - Perform operation on SAS Persitent Table +/** + * mptbase_sas_persist_operation - Perform operation on SAS Persistent Table * @ioc: Pointer to MPT_ADAPTER structure - * @sas_address: 64bit SAS Address for operation. - * @target_id: specified target for operation - * @bus: specified bus for operation * @persist_opcode: see below * * MPI_SAS_OP_CLEAR_NOT_PRESENT - Free all persist TargetID mappings for @@ -4202,7 +4218,7 @@ GetLanConfigPages(MPT_ADAPTER *ioc) * * NOTE: Don't use not this function during interrupt time. * - * Returns: 0 for success, non-zero error + * Returns 0 for success, non-zero error */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -4399,7 +4415,7 @@ mptbase_raid_process_event_data(MPT_ADAPTER *ioc, } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * GetIoUnitPage2 - Retrieve BIOS version and boot order information. * @ioc: Pointer to MPT_ADAPTER structure * @@ -4457,7 +4473,8 @@ GetIoUnitPage2(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* mpt_GetScsiPortSettings - read SCSI Port Page 0 and 2 +/** + * mpt_GetScsiPortSettings - read SCSI Port Page 0 and 2 * @ioc: Pointer to a Adapter Strucutre * @portnum: IOC port number * @@ -4644,7 +4661,8 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* mpt_readScsiDevicePageHeaders - save version and length of SDP1 +/** + * mpt_readScsiDevicePageHeaders - save version and length of SDP1 * @ioc: Pointer to a Adapter Strucutre * @portnum: IOC port number * @@ -4996,9 +5014,8 @@ mpt_read_ioc_pg_1(MPT_ADAPTER *ioc) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * SendEventNotification - Send EventNotification (on or off) request - * to MPT adapter. +/** + * SendEventNotification - Send EventNotification (on or off) request to adapter * @ioc: Pointer to MPT_ADAPTER structure * @EvSwitch: Event switch flags */ @@ -5062,8 +5079,8 @@ SendEventAck(MPT_ADAPTER *ioc, EventNotificationReply_t *evnp) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mpt_config - Generic function to issue config message - * @ioc - Pointer to an adapter structure - * @cfg - Pointer to a configuration structure. Struct contains + * @ioc: Pointer to an adapter structure + * @pCfg: Pointer to a configuration structure. Struct contains * action, page address, direction, physical address * and pointer to a configuration page header * Page header is updated. @@ -5188,8 +5205,8 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * mpt_timer_expired - Call back for timer process. +/** + * mpt_timer_expired - Callback for timer process. * Used only internal config functionality. * @data: Pointer to MPT_SCSI_HOST recast as an unsigned long */ @@ -5214,12 +5231,12 @@ mpt_timer_expired(unsigned long data) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_ioc_reset - Base cleanup for hard reset * @ioc: Pointer to the adapter structure * @reset_phase: Indicates pre- or post-reset functionality * - * Remark: Free's resources with internally generated commands. + * Remark: Frees resources with internally generated commands. */ static int mpt_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) @@ -5271,7 +5288,7 @@ mpt_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) * procfs (%MPT_PROCFS_MPTBASEDIR/...) support stuff... */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * procmpt_create - Create %MPT_PROCFS_MPTBASEDIR entries. * * Returns 0 for success, non-zero for failure. @@ -5297,7 +5314,7 @@ procmpt_create(void) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * procmpt_destroy - Tear down %MPT_PROCFS_MPTBASEDIR entries. * * Returns 0 for success, non-zero for failure. @@ -5311,16 +5328,16 @@ procmpt_destroy(void) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * procmpt_summary_read - Handle read request from /proc/mpt/summary - * or from /proc/mpt/iocN/summary. +/** + * procmpt_summary_read - Handle read request of a summary file * @buf: Pointer to area to write information * @start: Pointer to start pointer * @offset: Offset to start writing - * @request: + * @request: Amount of read data requested * @eof: Pointer to EOF integer * @data: Pointer * + * Handles read request from /proc/mpt/summary or /proc/mpt/iocN/summary. * Returns number of characters written to process performing the read. */ static int @@ -5355,12 +5372,12 @@ procmpt_summary_read(char *buf, char **start, off_t offset, int request, int *eo } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * procmpt_version_read - Handle read request from /proc/mpt/version. * @buf: Pointer to area to write information * @start: Pointer to start pointer * @offset: Offset to start writing - * @request: + * @request: Amount of read data requested * @eof: Pointer to EOF integer * @data: Pointer * @@ -5411,12 +5428,12 @@ procmpt_version_read(char *buf, char **start, off_t offset, int request, int *eo } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * procmpt_iocinfo_read - Handle read request from /proc/mpt/iocN/info. * @buf: Pointer to area to write information * @start: Pointer to start pointer * @offset: Offset to start writing - * @request: + * @request: Amount of read data requested * @eof: Pointer to EOF integer * @data: Pointer * @@ -5577,16 +5594,17 @@ mpt_print_ioc_summary(MPT_ADAPTER *ioc, char *buffer, int *size, int len, int sh */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mpt_HardResetHandler - Generic reset handler, issue SCSI Task - * Management call based on input arg values. If TaskMgmt fails, - * return associated SCSI request. + * mpt_HardResetHandler - Generic reset handler * @ioc: Pointer to MPT_ADAPTER structure * @sleepFlag: Indicates if sleep or schedule must be called. * + * Issues SCSI Task Management call based on input arg values. + * If TaskMgmt fails, returns associated SCSI request. + * * Remark: _HardResetHandler can be invoked from an interrupt thread (timer) * or a non-interrupt thread. In the former, must not call schedule(). * - * Remark: A return of -1 is a FATAL error case, as it means a + * Note: A return of -1 is a FATAL error case, as it means a * FW reload/initialization failed. * * Returns 0 for SUCCESS or -1 if FAILED. @@ -5935,13 +5953,14 @@ EventDescriptionStr(u8 event, u32 evData0, char *evStr) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* - * ProcessEventNotification - Route a received EventNotificationReply to - * all currently regeistered event handlers. +/** + * ProcessEventNotification - Route EventNotificationReply to all event handlers * @ioc: Pointer to MPT_ADAPTER structure * @pEventReply: Pointer to EventNotification reply frame * @evHandlers: Pointer to integer, number of event handlers * + * Routes a received EventNotificationReply to all currently registered + * event handlers. * Returns sum of event handlers return values. */ static int @@ -6056,7 +6075,7 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_fc_log_info - Log information returned from Fibre Channel IOC. * @ioc: Pointer to MPT_ADAPTER structure * @log_info: U32 LogInfo reply word from the IOC @@ -6077,7 +6096,7 @@ mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_spi_log_info - Log information returned from SCSI Parallel IOC. * @ioc: Pointer to MPT_ADAPTER structure * @mr: Pointer to MPT reply frame @@ -6200,7 +6219,7 @@ mpt_spi_log_info(MPT_ADAPTER *ioc, u32 log_info) }; /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_sas_log_info - Log information returned from SAS IOC. * @ioc: Pointer to MPT_ADAPTER structure * @log_info: U32 LogInfo reply word from the IOC @@ -6255,7 +6274,7 @@ union loginfo_type { } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * mpt_sp_ioc_info - IOC information returned from SCSI Parallel IOC. * @ioc: Pointer to MPT_ADAPTER structure * @ioc_status: U32 IOCStatus word from IOC @@ -6416,7 +6435,7 @@ EXPORT_SYMBOL(mpt_free_fw_memory); EXPORT_SYMBOL(mptbase_sas_persist_operation); /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * fusion_init - Fusion MPT base driver initialization routine. * * Returns 0 for success, non-zero for failure. @@ -6456,7 +6475,7 @@ fusion_init(void) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -/* +/** * fusion_exit - Perform driver unload cleanup. * * This routine frees all resources associated with each MPT adapter diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c index ef2b55e1991..ca2f9107f14 100644 --- a/drivers/message/fusion/mptfc.c +++ b/drivers/message/fusion/mptfc.c @@ -1395,8 +1395,7 @@ mptfc_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptfc_init - Register MPT adapter(s) as SCSI host(s) with - * linux scsi mid-layer. + * mptfc_init - Register MPT adapter(s) as SCSI host(s) with SCSI mid-layer. * * Returns 0 for success, non-zero for failure. */ @@ -1440,7 +1439,7 @@ mptfc_init(void) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptfc_remove - Removed fc infrastructure for devices + * mptfc_remove - Remove fc infrastructure for devices * @pdev: Pointer to pci_dev structure * */ diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 30524dc54b1..2c72c36b817 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1230,15 +1230,15 @@ mptscsih_host_info(MPT_ADAPTER *ioc, char *pbuf, off_t offset, int len) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mptscsih_proc_info - Return information about MPT adapter + * @host: scsi host struct + * @buffer: if write, user data; if read, buffer for user + * @start: returns the buffer address + * @offset: if write, 0; if read, the current offset into the buffer from + * the previous read. + * @length: if write, return length; + * @func: write = 1; read = 0 * * (linux scsi_host_template.info routine) - * - * buffer: if write, user data; if read, buffer for user - * length: if write, return length; - * offset: if write, 0; if read, the current offset into the buffer from - * the previous read. - * hostno: scsi host number - * func: if write = 1; if read = 0 */ int mptscsih_proc_info(struct Scsi_Host *host, char *buffer, char **start, off_t offset, @@ -1902,8 +1902,7 @@ mptscsih_bus_reset(struct scsi_cmnd * SCpnt) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptscsih_host_reset - Perform a SCSI host adapter RESET! - * new_eh variant + * mptscsih_host_reset - Perform a SCSI host adapter RESET (new_eh variant) * @SCpnt: Pointer to scsi_cmnd structure, IO which reset is due to * * (linux scsi_host_template.eh_host_reset_handler routine) @@ -1949,8 +1948,7 @@ mptscsih_host_reset(struct scsi_cmnd *SCpnt) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptscsih_tm_pending_wait - wait for pending task management request to - * complete. + * mptscsih_tm_pending_wait - wait for pending task management request to complete * @hd: Pointer to MPT host structure. * * Returns {SUCCESS,FAILED}. @@ -1982,6 +1980,7 @@ mptscsih_tm_pending_wait(MPT_SCSI_HOST * hd) /** * mptscsih_tm_wait_for_completion - wait for completion of TM task * @hd: Pointer to MPT host structure. + * @timeout: timeout in seconds * * Returns {SUCCESS,FAILED}. */ @@ -3429,8 +3428,7 @@ mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *io) /** * mptscsih_synchronize_cache - Send SYNCHRONIZE_CACHE to all disks. * @hd: Pointer to a SCSI HOST structure - * @vtarget: per device private data - * @lun: lun + * @vdevice: virtual target device * * Uses the ISR, but with special processing. * MUST be single-threaded. diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index f422c0d0621..36641da5928 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c @@ -1100,8 +1100,7 @@ static struct pci_driver mptspi_driver = { /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** - * mptspi_init - Register MPT adapter(s) as SCSI host(s) with - * linux scsi mid-layer. + * mptspi_init - Register MPT adapter(s) as SCSI host(s) with SCSI mid-layer. * * Returns 0 for success, non-zero for failure. */ @@ -1135,7 +1134,6 @@ mptspi_init(void) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** * mptspi_exit - Unregisters MPT adapter(s) - * */ static void __exit mptspi_exit(void) diff --git a/drivers/message/i2o/bus-osm.c b/drivers/message/i2o/bus-osm.c index d96c687aee9..c463dc2efc0 100644 --- a/drivers/message/i2o/bus-osm.c +++ b/drivers/message/i2o/bus-osm.c @@ -56,6 +56,9 @@ static int i2o_bus_scan(struct i2o_device *dev) /** * i2o_bus_store_scan - Scan the I2O Bus Adapter * @d: device which should be scanned + * @attr: device_attribute + * @buf: output buffer + * @count: buffer size * * Returns count. */ diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c index 55757af4e3b..b9df143e4ff 100644 --- a/drivers/message/i2o/device.c +++ b/drivers/message/i2o/device.c @@ -54,8 +54,8 @@ static inline int i2o_device_issue_claim(struct i2o_device *dev, u32 cmd, * @dev: I2O device to claim * @drv: I2O driver which wants to claim the device * - * Do the leg work to assign a device to a given OSM. If the claim succeed - * the owner of the rimary. If the attempt fails a negative errno code + * Do the leg work to assign a device to a given OSM. If the claim succeeds, + * the owner is the primary. If the attempt fails a negative errno code * is returned. On success zero is returned. */ int i2o_device_claim(struct i2o_device *dev) @@ -208,7 +208,7 @@ static struct i2o_device *i2o_device_alloc(void) /** * i2o_device_add - allocate a new I2O device and add it to the IOP - * @iop: I2O controller where the device is on + * @c: I2O controller that the device is on * @entry: LCT entry of the I2O device * * Allocate a new I2O device and initialize it with the LCT entry. The @@ -280,7 +280,7 @@ err: /** * i2o_device_remove - remove an I2O device from the I2O core - * @dev: I2O device which should be released + * @i2o_dev: I2O device which should be released * * Is used on I2O controller removal or LCT modification, when the device * is removed from the system. Note that the device could still hang diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c index 7fc7399bd2e..9104b65ff70 100644 --- a/drivers/message/i2o/driver.c +++ b/drivers/message/i2o/driver.c @@ -34,9 +34,7 @@ static spinlock_t i2o_drivers_lock; static struct i2o_driver **i2o_drivers; /** - * i2o_bus_match - Tell if a I2O device class id match the class ids of - * the I2O driver (OSM) - * + * i2o_bus_match - Tell if I2O device class id matches the class ids of the I2O driver (OSM) * @dev: device which should be verified * @drv: the driver to match against * @@ -248,7 +246,7 @@ int i2o_driver_dispatch(struct i2o_controller *c, u32 m) /** * i2o_driver_notify_controller_add_all - Send notify of added controller - * to all I2O drivers + * @c: newly added controller * * Send notifications to all registered drivers that a new controller was * added. @@ -267,8 +265,8 @@ void i2o_driver_notify_controller_add_all(struct i2o_controller *c) } /** - * i2o_driver_notify_controller_remove_all - Send notify of removed - * controller to all I2O drivers + * i2o_driver_notify_controller_remove_all - Send notify of removed controller + * @c: controller that is being removed * * Send notifications to all registered drivers that a controller was * removed. @@ -287,8 +285,8 @@ void i2o_driver_notify_controller_remove_all(struct i2o_controller *c) } /** - * i2o_driver_notify_device_add_all - Send notify of added device to all - * I2O drivers + * i2o_driver_notify_device_add_all - Send notify of added device + * @i2o_dev: newly added I2O device * * Send notifications to all registered drivers that a device was added. */ @@ -306,8 +304,8 @@ void i2o_driver_notify_device_add_all(struct i2o_device *i2o_dev) } /** - * i2o_driver_notify_device_remove_all - Send notify of removed device to - * all I2O drivers + * i2o_driver_notify_device_remove_all - Send notify of removed device + * @i2o_dev: device that is being removed * * Send notifications to all registered drivers that a device was removed. */ @@ -362,7 +360,7 @@ int __init i2o_driver_init(void) /** * i2o_driver_exit - clean up I2O drivers (OSMs) * - * Unregisters the I2O bus and free driver array. + * Unregisters the I2O bus and frees driver array. */ void __exit i2o_driver_exit(void) { diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c index 9e529d8dd5c..902753b2c66 100644 --- a/drivers/message/i2o/exec-osm.c +++ b/drivers/message/i2o/exec-osm.c @@ -94,8 +94,8 @@ static struct i2o_exec_wait *i2o_exec_wait_alloc(void) }; /** - * i2o_exec_wait_free - Free a i2o_exec_wait struct - * @i2o_exec_wait: I2O wait data which should be cleaned up + * i2o_exec_wait_free - Free an i2o_exec_wait struct + * @wait: I2O wait data which should be cleaned up */ static void i2o_exec_wait_free(struct i2o_exec_wait *wait) { @@ -105,7 +105,7 @@ static void i2o_exec_wait_free(struct i2o_exec_wait *wait) /** * i2o_msg_post_wait_mem - Post and wait a message with DMA buffers * @c: controller - * @m: message to post + * @msg: message to post * @timeout: time in seconds to wait * @dma: i2o_dma struct of the DMA buffer to free on failure * @@ -269,6 +269,7 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m, /** * i2o_exec_show_vendor_id - Displays Vendor ID of controller * @d: device of which the Vendor ID should be displayed + * @attr: device_attribute to display * @buf: buffer into which the Vendor ID should be printed * * Returns number of bytes printed into buffer. @@ -290,6 +291,7 @@ static ssize_t i2o_exec_show_vendor_id(struct device *d, /** * i2o_exec_show_product_id - Displays Product ID of controller * @d: device of which the Product ID should be displayed + * @attr: device_attribute to display * @buf: buffer into which the Product ID should be printed * * Returns number of bytes printed into buffer. @@ -365,7 +367,7 @@ static int i2o_exec_remove(struct device *dev) /** * i2o_exec_lct_modified - Called on LCT NOTIFY reply - * @c: I2O controller on which the LCT has modified + * @work: work struct for a specific controller * * This function handles asynchronus LCT NOTIFY replies. It parses the * new LCT and if the buffer for the LCT was to small sends a LCT NOTIFY diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index 70ae0025332..da9859f2caf 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -259,7 +259,7 @@ static int i2o_block_device_unlock(struct i2o_device *dev, u32 media_id) /** * i2o_block_device_power - Power management for device dev * @dev: I2O device which should receive the power management request - * @operation: Operation which should be send + * @op: Operation to send * * Send a power management request to the device dev. * @@ -315,7 +315,7 @@ static inline struct i2o_block_request *i2o_block_request_alloc(void) * i2o_block_request_free - Frees a I2O block request * @ireq: I2O block request which should be freed * - * Fres the allocated memory (give it back to the request mempool). + * Frees the allocated memory (give it back to the request mempool). */ static inline void i2o_block_request_free(struct i2o_block_request *ireq) { @@ -326,6 +326,7 @@ static inline void i2o_block_request_free(struct i2o_block_request *ireq) * i2o_block_sglist_alloc - Allocate the SG list and map it * @c: I2O controller to which the request belongs * @ireq: I2O block request + * @mptr: message body pointer * * Builds the SG list and map it to be accessable by the controller. * @@ -490,7 +491,7 @@ static void i2o_block_end_request(struct request *req, int uptodate, * i2o_block_reply - Block OSM reply handler. * @c: I2O controller from which the message arrives * @m: message id of reply - * qmsg: the actuall I2O message reply + * @msg: the actual I2O message reply * * This function gets all the message replies. * @@ -602,6 +603,8 @@ static void i2o_block_biosparam(unsigned long capacity, unsigned short *cyls, /** * i2o_block_open - Open the block device + * @inode: inode for block device being opened + * @file: file to open * * Power up the device, mount and lock the media. This function is called, * if the block device is opened for access. @@ -629,6 +632,8 @@ static int i2o_block_open(struct inode *inode, struct file *file) /** * i2o_block_release - Release the I2O block device + * @inode: inode for block device being released + * @file: file to close * * Unlock and unmount the media, and power down the device. Gets called if * the block device is closed. @@ -675,6 +680,8 @@ static int i2o_block_getgeo(struct block_device *bdev, struct hd_geometry *geo) /** * i2o_block_ioctl - Issue device specific ioctl calls. + * @inode: inode for block device ioctl + * @file: file for ioctl * @cmd: ioctl command * @arg: arg * @@ -902,7 +909,7 @@ static int i2o_block_transfer(struct request *req) /** * i2o_block_request_fn - request queue handling function - * q: request queue from which the request could be fetched + * @q: request queue from which the request could be fetched * * Takes the next request from the queue, transfers it and if no error * occurs dequeue it from the queue. On arrival of the reply the message diff --git a/drivers/message/i2o/i2o_proc.c b/drivers/message/i2o/i2o_proc.c index 3d2e76eea93..a61cb17c5c1 100644 --- a/drivers/message/i2o/i2o_proc.c +++ b/drivers/message/i2o/i2o_proc.c @@ -163,7 +163,7 @@ static int print_serial_number(struct seq_file *seq, u8 * serialno, int max_len) * i2o_get_class_name - do i2o class name lookup * @class: class number * - * Return a descriptive string for an i2o class + * Return a descriptive string for an i2o class. */ static const char *i2o_get_class_name(int class) { diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c index d5f93b12b6f..1045c8a518b 100644 --- a/drivers/message/i2o/i2o_scsi.c +++ b/drivers/message/i2o/i2o_scsi.c @@ -411,8 +411,7 @@ static void i2o_scsi_notify_device_add(struct i2o_device *i2o_dev) }; /** - * i2o_scsi_notify_device_remove - Retrieve notifications of removed - * devices + * i2o_scsi_notify_device_remove - Retrieve notifications of removed devices * @i2o_dev: the I2O device which was removed * * If a I2O device is removed, we catch the notification to remove the @@ -432,8 +431,7 @@ static void i2o_scsi_notify_device_remove(struct i2o_device *i2o_dev) }; /** - * i2o_scsi_notify_controller_add - Retrieve notifications of added - * controllers + * i2o_scsi_notify_controller_add - Retrieve notifications of added controllers * @c: the controller which was added * * If a I2O controller is added, we catch the notification to add a @@ -463,8 +461,7 @@ static void i2o_scsi_notify_controller_add(struct i2o_controller *c) }; /** - * i2o_scsi_notify_controller_remove - Retrieve notifications of removed - * controllers + * i2o_scsi_notify_controller_remove - Retrieve notifications of removed controllers * @c: the controller which was removed * * If a I2O controller is removed, we catch the notification to remove the @@ -751,7 +748,7 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt) * @capacity: size in sectors * @ip: geometry array * - * This is anyones guess quite frankly. We use the same rules everyone + * This is anyone's guess quite frankly. We use the same rules everyone * else appears to and hope. It seems to work. */ diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c index 8287f95c8c4..3661e6e065d 100644 --- a/drivers/message/i2o/pci.c +++ b/drivers/message/i2o/pci.c @@ -259,6 +259,7 @@ static irqreturn_t i2o_pci_interrupt(int irq, void *dev_id) /** * i2o_pci_irq_enable - Allocate interrupt for I2O controller + * @c: i2o_controller that the request is for * * Allocate an interrupt for the I2O controller, and activate interrupts * on the I2O controller. @@ -305,7 +306,7 @@ static void i2o_pci_irq_disable(struct i2o_controller *c) /** * i2o_pci_probe - Probe the PCI device for an I2O controller - * @dev: PCI device to test + * @pdev: PCI device to test * @id: id which matched with the PCI device id table * * Probe the PCI device for any device which is a memory of the @@ -447,7 +448,7 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev, /** * i2o_pci_remove - Removes a I2O controller from the system - * pdev: I2O controller which should be removed + * @pdev: I2O controller which should be removed * * Reset the I2O controller, disable interrupts and remove all allocated * resources. diff --git a/include/linux/i2o.h b/include/linux/i2o.h index 2514f4e286b..52f53e2e70c 100644 --- a/include/linux/i2o.h +++ b/include/linux/i2o.h @@ -986,7 +986,8 @@ extern void i2o_driver_unregister(struct i2o_driver *); /** * i2o_driver_notify_controller_add - Send notification of added controller - * to a single I2O driver + * @drv: I2O driver + * @c: I2O controller * * Send notification of added controller to a single registered driver. */ @@ -998,8 +999,9 @@ static inline void i2o_driver_notify_controller_add(struct i2o_driver *drv, }; /** - * i2o_driver_notify_controller_remove - Send notification of removed - * controller to a single I2O driver + * i2o_driver_notify_controller_remove - Send notification of removed controller + * @drv: I2O driver + * @c: I2O controller * * Send notification of removed controller to a single registered driver. */ @@ -1011,8 +1013,9 @@ static inline void i2o_driver_notify_controller_remove(struct i2o_driver *drv, }; /** - * i2o_driver_notify_device_add - Send notification of added device to a - * single I2O driver + * i2o_driver_notify_device_add - Send notification of added device + * @drv: I2O driver + * @i2o_dev: the added i2o_device * * Send notification of added device to a single registered driver. */ @@ -1025,7 +1028,8 @@ static inline void i2o_driver_notify_device_add(struct i2o_driver *drv, /** * i2o_driver_notify_device_remove - Send notification of removed device - * to a single I2O driver + * @drv: I2O driver + * @i2o_dev: the added i2o_device * * Send notification of removed device to a single registered driver. */ @@ -1148,7 +1152,7 @@ static inline void i2o_msg_post(struct i2o_controller *c, /** * i2o_msg_post_wait - Post and wait a message and wait until return * @c: controller - * @m: message to post + * @msg: message to post * @timeout: time in seconds to wait * * This API allows an OSM to post a message and then be told whether or -- cgit v1.2.3-70-g09d2 From 83b7b44e1c1e9e493ccd4146558481ab5af0116a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Dec 2006 20:38:53 -0800 Subject: [PATCH] fs: reorder some 'struct inode' fields to speedup i_size manipulations On 32bits SMP platforms, 64bits i_size is protected by a seqcount (i_size_seqcount). When i_size is read or written, i_size_seqcount is read/written as well, so it make sense to group these two fields together in the same cache line. This patch moves i_size_seqcount next to i_size, and also moves i_version to let offsetof(struct inode, i_size) being 0x40 instead of 0x3c (for 32bits platforms). For 64 bits platforms, i_size_seqcount doesnt exist, and the move of a 'long i_version' should not introduce a new hole because of padding. Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index d791bae9de9..3a1927e516d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -548,12 +548,15 @@ struct inode { uid_t i_uid; gid_t i_gid; dev_t i_rdev; + unsigned long i_version; loff_t i_size; +#ifdef __NEED_I_SIZE_ORDERED + seqcount_t i_size_seqcount; +#endif struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; unsigned int i_blkbits; - unsigned long i_version; blkcnt_t i_blocks; unsigned short i_bytes; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ @@ -598,9 +601,6 @@ struct inode { void *i_security; #endif void *i_private; /* fs or device private pointer */ -#ifdef __NEED_I_SIZE_ORDERED - seqcount_t i_size_seqcount; -#endif }; /* -- cgit v1.2.3-70-g09d2 From f67637ee4b5d90d41160d755b9a8cca18c394586 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 6 Dec 2006 20:38:54 -0800 Subject: [PATCH] Add struct dev pointer to dma_is_consistent() dma_is_consistent() is ill-designed in that it does not have a struct device pointer argument which makes proper support for systems that consist of a mix of coherent and non-coherent DMA devices hard. Change dma_is_consistent to take a struct device pointer as first argument and fix the sole caller to pass it. Signed-off-by: Ralf Baechle Cc: James Bottomley Cc: "David S. Miller" Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DMA-API.txt | 6 +++--- arch/mips/mm/dma-coherent.c | 2 +- arch/mips/mm/dma-ip27.c | 2 +- arch/mips/mm/dma-ip32.c | 2 +- arch/mips/mm/dma-noncoherent.c | 2 +- drivers/scsi/53c700.c | 2 +- include/asm-alpha/dma-mapping.h | 2 +- include/asm-arm/dma-mapping.h | 2 +- include/asm-avr32/dma-mapping.h | 2 +- include/asm-cris/dma-mapping.h | 2 +- include/asm-frv/dma-mapping.h | 2 +- include/asm-generic/dma-mapping.h | 2 +- include/asm-i386/dma-mapping.h | 2 +- include/asm-ia64/dma-mapping.h | 2 +- include/asm-m68k/dma-mapping.h | 2 +- include/asm-mips/dma-mapping.h | 2 +- include/asm-parisc/dma-mapping.h | 2 +- include/asm-powerpc/dma-mapping.h | 4 ++-- include/asm-sparc64/dma-mapping.h | 2 +- include/asm-um/dma-mapping.h | 2 +- include/asm-x86_64/dma-mapping.h | 2 +- include/asm-xtensa/dma-mapping.h | 2 +- 22 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 8621a064f7e..3dc1f9125ca 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -431,10 +431,10 @@ be identical to those passed in (and returned by dma_alloc_noncoherent()). int -dma_is_consistent(dma_addr_t dma_handle) +dma_is_consistent(struct device *dev, dma_addr_t dma_handle) -returns true if the memory pointed to by the dma_handle is actually -consistent. +returns true if the device dev is performing consistent DMA on the memory +area pointed to by the dma_handle. int dma_get_cache_alignment(void) diff --git a/arch/mips/mm/dma-coherent.c b/arch/mips/mm/dma-coherent.c index 7fa5fd16e46..18bc83e577c 100644 --- a/arch/mips/mm/dma-coherent.c +++ b/arch/mips/mm/dma-coherent.c @@ -190,7 +190,7 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); -int dma_is_consistent(dma_addr_t dma_addr) +int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } diff --git a/arch/mips/mm/dma-ip27.c b/arch/mips/mm/dma-ip27.c index 8da19fd22ac..8e9a5a8f5d6 100644 --- a/arch/mips/mm/dma-ip27.c +++ b/arch/mips/mm/dma-ip27.c @@ -197,7 +197,7 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); -int dma_is_consistent(dma_addr_t dma_addr) +int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } diff --git a/arch/mips/mm/dma-ip32.c b/arch/mips/mm/dma-ip32.c index ec54ed0d26f..08720a42100 100644 --- a/arch/mips/mm/dma-ip32.c +++ b/arch/mips/mm/dma-ip32.c @@ -363,7 +363,7 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); -int dma_is_consistent(dma_addr_t dma_addr) +int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 2eeffe5c2a3..4a3efc63337 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -299,7 +299,7 @@ int dma_supported(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_supported); -int dma_is_consistent(dma_addr_t dma_addr) +int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 335a25540c0..acee062cd6f 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -313,7 +313,7 @@ NCR_700_detect(struct scsi_host_template *tpnt, hostdata->status = memory + STATUS_OFFSET; /* all of these offsets are L1_CACHE_BYTES separated. It is fatal * if this isn't sufficient separation to avoid dma flushing issues */ - BUG_ON(!dma_is_consistent(pScript) && L1_CACHE_BYTES < dma_get_cache_alignment()); + BUG_ON(!dma_is_consistent(hostdata->dev, pScript) && L1_CACHE_BYTES < dma_get_cache_alignment()); hostdata->slots = (struct NCR_700_command_slot *)(memory + SLOTS_OFFSET); hostdata->dev = dev; diff --git a/include/asm-alpha/dma-mapping.h b/include/asm-alpha/dma-mapping.h index b9ff4d8cb33..b274bf6317c 100644 --- a/include/asm-alpha/dma-mapping.h +++ b/include/asm-alpha/dma-mapping.h @@ -51,7 +51,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(dev) (1) +#define dma_is_consistent(d, h) (1) int dma_set_mask(struct device *dev, u64 mask); diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h index 666617711c8..9bc46b486af 100644 --- a/include/asm-arm/dma-mapping.h +++ b/include/asm-arm/dma-mapping.h @@ -48,7 +48,7 @@ static inline int dma_get_cache_alignment(void) return 32; } -static inline int dma_is_consistent(dma_addr_t handle) +static inline int dma_is_consistent(struct device *dev, dma_addr_t handle) { return !!arch_is_coherent(); } diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h index 4c40cb41cdf..44630be2ee2 100644 --- a/include/asm-avr32/dma-mapping.h +++ b/include/asm-avr32/dma-mapping.h @@ -307,7 +307,7 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -static inline int dma_is_consistent(dma_addr_t dma_addr) +static inline int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 1; } diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h index cbf1a98f012..af704fdd3d0 100644 --- a/include/asm-cris/dma-mapping.h +++ b/include/asm-cris/dma-mapping.h @@ -156,7 +156,7 @@ dma_get_cache_alignment(void) return (1 << INTERNODE_CACHE_SHIFT); } -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline void dma_cache_sync(void *vaddr, size_t size, diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h index e9fc1d47797..7b97fc785f7 100644 --- a/include/asm-frv/dma-mapping.h +++ b/include/asm-frv/dma-mapping.h @@ -172,7 +172,7 @@ int dma_get_cache_alignment(void) return 1 << L1_CACHE_SHIFT; } -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline void dma_cache_sync(void *vaddr, size_t size, diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index b541e48cc54..b9be3fc344c 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -266,7 +266,7 @@ dma_error(dma_addr_t dma_addr) #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline int dma_get_cache_alignment(void) diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h index 81999a3ebe7..7da64c9f173 100644 --- a/include/asm-i386/dma-mapping.h +++ b/include/asm-i386/dma-mapping.h @@ -156,7 +156,7 @@ dma_get_cache_alignment(void) return (1 << INTERNODE_CACHE_SHIFT); } -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline void dma_cache_sync(void *vaddr, size_t size, diff --git a/include/asm-ia64/dma-mapping.h b/include/asm-ia64/dma-mapping.h index 99a8f8e1218..4b075bc032e 100644 --- a/include/asm-ia64/dma-mapping.h +++ b/include/asm-ia64/dma-mapping.h @@ -59,6 +59,6 @@ dma_cache_sync (void *vaddr, size_t size, enum dma_data_direction dir) mb(); } -#define dma_is_consistent(dma_handle) (1) /* all we do is coherent memory... */ +#define dma_is_consistent(d, h) (1) /* all we do is coherent memory... */ #endif /* _ASM_IA64_DMA_MAPPING_H */ diff --git a/include/asm-m68k/dma-mapping.h b/include/asm-m68k/dma-mapping.h index d90d841d3df..efc89c12f83 100644 --- a/include/asm-m68k/dma-mapping.h +++ b/include/asm-m68k/dma-mapping.h @@ -21,7 +21,7 @@ static inline int dma_get_cache_alignment(void) return 1 << L1_CACHE_SHIFT; } -static inline int dma_is_consistent(dma_addr_t dma_addr) +static inline int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return 0; } diff --git a/include/asm-mips/dma-mapping.h b/include/asm-mips/dma-mapping.h index 43288634c38..e17f70d7b70 100644 --- a/include/asm-mips/dma-mapping.h +++ b/include/asm-mips/dma-mapping.h @@ -63,7 +63,7 @@ dma_get_cache_alignment(void) return 128; } -extern int dma_is_consistent(dma_addr_t dma_addr); +extern int dma_is_consistent(struct device *dev, dma_addr_t dma_addr); extern void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction); diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h index 1e387e1dad3..c40d48afdc5 100644 --- a/include/asm-parisc/dma-mapping.h +++ b/include/asm-parisc/dma-mapping.h @@ -191,7 +191,7 @@ dma_get_cache_alignment(void) } static inline int -dma_is_consistent(dma_addr_t dma_addr) +dma_is_consistent(struct device *dev, dma_addr_t dma_addr) { return (hppa_dma_ops->dma_sync_single_for_cpu == NULL); } diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h index 7e38b5fddad..3cf635b53b8 100644 --- a/include/asm-powerpc/dma-mapping.h +++ b/include/asm-powerpc/dma-mapping.h @@ -342,9 +342,9 @@ static inline int dma_mapping_error(dma_addr_t dma_addr) #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #ifdef CONFIG_NOT_COHERENT_CACHE -#define dma_is_consistent(d) (0) +#define dma_is_consistent(d, h) (0) #else -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) #endif static inline int dma_get_cache_alignment(void) diff --git a/include/asm-sparc64/dma-mapping.h b/include/asm-sparc64/dma-mapping.h index 27c46fbeebd..5fe0072f3f8 100644 --- a/include/asm-sparc64/dma-mapping.h +++ b/include/asm-sparc64/dma-mapping.h @@ -181,7 +181,7 @@ dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t siz #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline int dma_get_cache_alignment(void) diff --git a/include/asm-um/dma-mapping.h b/include/asm-um/dma-mapping.h index babd2989511..defb5b8307d 100644 --- a/include/asm-um/dma-mapping.h +++ b/include/asm-um/dma-mapping.h @@ -94,7 +94,7 @@ dma_sync_sg(struct device *dev, struct scatterlist *sg, int nelems, #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline int dma_get_cache_alignment(void) diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h index 10174b110a5..c8cc4887fba 100644 --- a/include/asm-x86_64/dma-mapping.h +++ b/include/asm-x86_64/dma-mapping.h @@ -180,7 +180,7 @@ static inline int dma_get_cache_alignment(void) return boot_cpu_data.x86_clflush_size; } -#define dma_is_consistent(h) 1 +#define dma_is_consistent(d, h) 1 extern int dma_set_mask(struct device *dev, u64 mask); diff --git a/include/asm-xtensa/dma-mapping.h b/include/asm-xtensa/dma-mapping.h index c39c91dfcc6..827d1dfd9e1 100644 --- a/include/asm-xtensa/dma-mapping.h +++ b/include/asm-xtensa/dma-mapping.h @@ -170,7 +170,7 @@ dma_get_cache_alignment(void) return L1_CACHE_BYTES; } -#define dma_is_consistent(d) (1) +#define dma_is_consistent(d, h) (1) static inline void dma_cache_sync(void *vaddr, size_t size, -- cgit v1.2.3-70-g09d2 From d3fa72e4556ec1f04e46a0d561d9e785ecaa173d Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 6 Dec 2006 20:38:56 -0800 Subject: [PATCH] Pass struct dev pointer to dma_cache_sync() Pass struct dev pointer to dma_cache_sync() dma_cache_sync() is ill-designed in that it does not have a struct device pointer argument which makes proper support for systems that consist of a mix of coherent and non-coherent DMA devices hard. Change dma_cache_sync to take a struct device pointer as first argument and fix all its callers to pass it. Signed-off-by: Ralf Baechle Cc: James Bottomley Cc: "David S. Miller" Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DMA-API.txt | 2 +- arch/avr32/mm/dma-coherent.c | 2 +- arch/mips/mm/dma-coherent.c | 2 +- arch/mips/mm/dma-ip27.c | 2 +- arch/mips/mm/dma-ip32.c | 3 +- arch/mips/mm/dma-noncoherent.c | 3 +- drivers/net/lasi_82596.c | 94 +++++++++++++++++++-------------------- drivers/scsi/53c700.c | 80 ++++++++++++++++++--------------- drivers/scsi/53c700.h | 16 +++---- drivers/serial/mpsc.c | 22 ++++----- include/asm-alpha/dma-mapping.h | 2 +- include/asm-avr32/dma-mapping.h | 3 +- include/asm-cris/dma-mapping.h | 2 +- include/asm-frv/dma-mapping.h | 2 +- include/asm-generic/dma-mapping.h | 2 +- include/asm-i386/dma-mapping.h | 2 +- include/asm-ia64/dma-mapping.h | 3 +- include/asm-m68k/dma-mapping.h | 2 +- include/asm-mips/dma-mapping.h | 2 +- include/asm-parisc/dma-mapping.h | 2 +- include/asm-powerpc/dma-mapping.h | 2 +- include/asm-sh/dma-mapping.h | 2 +- include/asm-sh64/dma-mapping.h | 2 +- include/asm-sparc64/dma-mapping.h | 2 +- include/asm-um/dma-mapping.h | 2 +- include/asm-x86_64/dma-mapping.h | 3 +- include/asm-xtensa/dma-mapping.h | 2 +- 27 files changed, 137 insertions(+), 126 deletions(-) (limited to 'include') diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index 3dc1f9125ca..805db4b2cba 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -459,7 +459,7 @@ anything like this. You must also be extra careful about accessing memory you intend to sync partially. void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) Do a partial sync of memory that was allocated by diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c index 44ab8a7bdae..b68d669f823 100644 --- a/arch/avr32/mm/dma-coherent.c +++ b/arch/avr32/mm/dma-coherent.c @@ -11,7 +11,7 @@ #include #include -void dma_cache_sync(void *vaddr, size_t size, int direction) +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, int direction) { /* * No need to sync an uncached area diff --git a/arch/mips/mm/dma-coherent.c b/arch/mips/mm/dma-coherent.c index 18bc83e577c..5697c6e250a 100644 --- a/arch/mips/mm/dma-coherent.c +++ b/arch/mips/mm/dma-coherent.c @@ -197,7 +197,7 @@ int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) EXPORT_SYMBOL(dma_is_consistent); -void dma_cache_sync(void *vaddr, size_t size, +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); diff --git a/arch/mips/mm/dma-ip27.c b/arch/mips/mm/dma-ip27.c index 8e9a5a8f5d6..f088344db46 100644 --- a/arch/mips/mm/dma-ip27.c +++ b/arch/mips/mm/dma-ip27.c @@ -204,7 +204,7 @@ int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) EXPORT_SYMBOL(dma_is_consistent); -void dma_cache_sync(void *vaddr, size_t size, +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); diff --git a/arch/mips/mm/dma-ip32.c b/arch/mips/mm/dma-ip32.c index 08720a42100..b42b6f7456e 100644 --- a/arch/mips/mm/dma-ip32.c +++ b/arch/mips/mm/dma-ip32.c @@ -370,7 +370,8 @@ int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) EXPORT_SYMBOL(dma_is_consistent); -void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction) +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) { if (direction == DMA_NONE) return; diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 4a3efc63337..8cecef0957c 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -306,7 +306,8 @@ int dma_is_consistent(struct device *dev, dma_addr_t dma_addr) EXPORT_SYMBOL(dma_is_consistent); -void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction direction) +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) { if (direction == DMA_NONE) return; diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c index f4d815bca64..ea392f2a5aa 100644 --- a/drivers/net/lasi_82596.c +++ b/drivers/net/lasi_82596.c @@ -119,14 +119,14 @@ #define DEB(x,y) if (i596_debug & (x)) { y; } -#define CHECK_WBACK(addr,len) \ - do { dma_cache_sync((void *)addr, len, DMA_TO_DEVICE); } while (0) +#define CHECK_WBACK(priv, addr,len) \ + do { dma_cache_sync((priv)->dev, (void *)addr, len, DMA_TO_DEVICE); } while (0) -#define CHECK_INV(addr,len) \ - do { dma_cache_sync((void *)addr, len, DMA_FROM_DEVICE); } while(0) +#define CHECK_INV(priv, addr,len) \ + do { dma_cache_sync((priv)->dev, (void *)addr, len, DMA_FROM_DEVICE); } while(0) -#define CHECK_WBACK_INV(addr,len) \ - do { dma_cache_sync((void *)addr, len, DMA_BIDIRECTIONAL); } while (0) +#define CHECK_WBACK_INV(priv, addr,len) \ + do { dma_cache_sync((priv)->dev, (void *)addr, len, DMA_BIDIRECTIONAL); } while (0) #define PA_I82596_RESET 0 /* Offsets relative to LASI-LAN-Addr.*/ @@ -449,10 +449,10 @@ static inline void MPU_PORT(struct net_device *dev, int c, dma_addr_t x) static inline int wait_istat(struct net_device *dev, struct i596_private *lp, int delcnt, char *str) { - CHECK_INV(&(lp->iscp), sizeof(struct i596_iscp)); + CHECK_INV(lp, &(lp->iscp), sizeof(struct i596_iscp)); while (--delcnt && lp->iscp.stat) { udelay(10); - CHECK_INV(&(lp->iscp), sizeof(struct i596_iscp)); + CHECK_INV(lp, &(lp->iscp), sizeof(struct i596_iscp)); } if (!delcnt) { printk("%s: %s, iscp.stat %04x, didn't clear\n", @@ -466,10 +466,10 @@ static inline int wait_istat(struct net_device *dev, struct i596_private *lp, in static inline int wait_cmd(struct net_device *dev, struct i596_private *lp, int delcnt, char *str) { - CHECK_INV(&(lp->scb), sizeof(struct i596_scb)); + CHECK_INV(lp, &(lp->scb), sizeof(struct i596_scb)); while (--delcnt && lp->scb.command) { udelay(10); - CHECK_INV(&(lp->scb), sizeof(struct i596_scb)); + CHECK_INV(lp, &(lp->scb), sizeof(struct i596_scb)); } if (!delcnt) { printk("%s: %s, status %4.4x, cmd %4.4x.\n", @@ -522,7 +522,7 @@ static void i596_display_data(struct net_device *dev) rbd, rbd->count, rbd->b_next, rbd->b_data, rbd->size); rbd = rbd->v_next; } while (rbd != lp->rbd_head); - CHECK_INV(lp, sizeof(struct i596_private)); + CHECK_INV(lp, lp, sizeof(struct i596_private)); } @@ -592,7 +592,7 @@ static inline void init_rx_bufs(struct net_device *dev) rfd->b_next = WSWAPrfd(virt_to_dma(lp,lp->rfds)); rfd->cmd = CMD_EOL|CMD_FLEX; - CHECK_WBACK_INV(lp, sizeof(struct i596_private)); + CHECK_WBACK_INV(lp, lp, sizeof(struct i596_private)); } static inline void remove_rx_bufs(struct net_device *dev) @@ -629,7 +629,7 @@ static void rebuild_rx_bufs(struct net_device *dev) lp->rbd_head = lp->rbds; lp->rfds[0].rbd = WSWAPrbd(virt_to_dma(lp,lp->rbds)); - CHECK_WBACK_INV(lp, sizeof(struct i596_private)); + CHECK_WBACK_INV(lp, lp, sizeof(struct i596_private)); } @@ -663,8 +663,8 @@ static int init_i596_mem(struct net_device *dev) DEB(DEB_INIT, printk("%s: starting i82596.\n", dev->name)); - CHECK_WBACK(&(lp->scp), sizeof(struct i596_scp)); - CHECK_WBACK(&(lp->iscp), sizeof(struct i596_iscp)); + CHECK_WBACK(lp, &(lp->scp), sizeof(struct i596_scp)); + CHECK_WBACK(lp, &(lp->iscp), sizeof(struct i596_iscp)); MPU_PORT(dev, PORT_ALTSCP, virt_to_dma(lp,&lp->scp)); @@ -678,25 +678,25 @@ static int init_i596_mem(struct net_device *dev) rebuild_rx_bufs(dev); lp->scb.command = 0; - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); enable_irq(dev->irq); /* enable IRQs from LAN */ DEB(DEB_INIT, printk("%s: queuing CmdConfigure\n", dev->name)); memcpy(lp->cf_cmd.i596_config, init_setup, 14); lp->cf_cmd.cmd.command = CmdConfigure; - CHECK_WBACK(&(lp->cf_cmd), sizeof(struct cf_cmd)); + CHECK_WBACK(lp, &(lp->cf_cmd), sizeof(struct cf_cmd)); i596_add_cmd(dev, &lp->cf_cmd.cmd); DEB(DEB_INIT, printk("%s: queuing CmdSASetup\n", dev->name)); memcpy(lp->sa_cmd.eth_addr, dev->dev_addr, 6); lp->sa_cmd.cmd.command = CmdSASetup; - CHECK_WBACK(&(lp->sa_cmd), sizeof(struct sa_cmd)); + CHECK_WBACK(lp, &(lp->sa_cmd), sizeof(struct sa_cmd)); i596_add_cmd(dev, &lp->sa_cmd.cmd); DEB(DEB_INIT, printk("%s: queuing CmdTDR\n", dev->name)); lp->tdr_cmd.cmd.command = CmdTDR; - CHECK_WBACK(&(lp->tdr_cmd), sizeof(struct tdr_cmd)); + CHECK_WBACK(lp, &(lp->tdr_cmd), sizeof(struct tdr_cmd)); i596_add_cmd(dev, &lp->tdr_cmd.cmd); spin_lock_irqsave (&lp->lock, flags); @@ -708,7 +708,7 @@ static int init_i596_mem(struct net_device *dev) DEB(DEB_INIT, printk("%s: Issuing RX_START\n", dev->name)); lp->scb.command = RX_START; lp->scb.rfd = WSWAPrfd(virt_to_dma(lp,lp->rfds)); - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); CA(dev); @@ -740,13 +740,13 @@ static inline int i596_rx(struct net_device *dev) rfd = lp->rfd_head; /* Ref next frame to check */ - CHECK_INV(rfd, sizeof(struct i596_rfd)); + CHECK_INV(lp, rfd, sizeof(struct i596_rfd)); while ((rfd->stat) & STAT_C) { /* Loop while complete frames */ if (rfd->rbd == I596_NULL) rbd = NULL; else if (rfd->rbd == lp->rbd_head->b_addr) { rbd = lp->rbd_head; - CHECK_INV(rbd, sizeof(struct i596_rbd)); + CHECK_INV(lp, rbd, sizeof(struct i596_rbd)); } else { printk("%s: rbd chain broken!\n", dev->name); @@ -790,7 +790,7 @@ static inline int i596_rx(struct net_device *dev) dma_addr = dma_map_single(lp->dev, newskb->data, PKT_BUF_SZ, DMA_FROM_DEVICE); rbd->v_data = newskb->data; rbd->b_data = WSWAPchar(dma_addr); - CHECK_WBACK_INV(rbd, sizeof(struct i596_rbd)); + CHECK_WBACK_INV(lp, rbd, sizeof(struct i596_rbd)); } else skb = dev_alloc_skb(pkt_len + 2); @@ -842,7 +842,7 @@ memory_squeeze: if (rbd != NULL && (rbd->count & 0x4000)) { rbd->count = 0; lp->rbd_head = rbd->v_next; - CHECK_WBACK_INV(rbd, sizeof(struct i596_rbd)); + CHECK_WBACK_INV(lp, rbd, sizeof(struct i596_rbd)); } /* Tidy the frame descriptor, marking it as end of list */ @@ -860,10 +860,10 @@ memory_squeeze: lp->scb.rfd = rfd->b_next; lp->rfd_head = rfd->v_next; - CHECK_WBACK_INV(rfd->v_prev, sizeof(struct i596_rfd)); - CHECK_WBACK_INV(rfd, sizeof(struct i596_rfd)); + CHECK_WBACK_INV(lp, rfd->v_prev, sizeof(struct i596_rfd)); + CHECK_WBACK_INV(lp, rfd, sizeof(struct i596_rfd)); rfd = lp->rfd_head; - CHECK_INV(rfd, sizeof(struct i596_rfd)); + CHECK_INV(lp, rfd, sizeof(struct i596_rfd)); } DEB(DEB_RXFRAME, printk("frames %d\n", frames)); @@ -902,12 +902,12 @@ static inline void i596_cleanup_cmd(struct net_device *dev, struct i596_private ptr->v_next = NULL; ptr->b_next = I596_NULL; } - CHECK_WBACK_INV(ptr, sizeof(struct i596_cmd)); + CHECK_WBACK_INV(lp, ptr, sizeof(struct i596_cmd)); } wait_cmd(dev, lp, 100, "i596_cleanup_cmd timed out"); lp->scb.cmd = I596_NULL; - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); } @@ -925,7 +925,7 @@ static inline void i596_reset(struct net_device *dev, struct i596_private *lp) /* FIXME: this command might cause an lpmc */ lp->scb.command = CUC_ABORT | RX_ABORT; - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); CA(dev); /* wait for shutdown */ @@ -951,20 +951,20 @@ static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd) cmd->command |= (CMD_EOL | CMD_INTR); cmd->v_next = NULL; cmd->b_next = I596_NULL; - CHECK_WBACK(cmd, sizeof(struct i596_cmd)); + CHECK_WBACK(lp, cmd, sizeof(struct i596_cmd)); spin_lock_irqsave (&lp->lock, flags); if (lp->cmd_head != NULL) { lp->cmd_tail->v_next = cmd; lp->cmd_tail->b_next = WSWAPcmd(virt_to_dma(lp,&cmd->status)); - CHECK_WBACK(lp->cmd_tail, sizeof(struct i596_cmd)); + CHECK_WBACK(lp, lp->cmd_tail, sizeof(struct i596_cmd)); } else { lp->cmd_head = cmd; wait_cmd(dev, lp, 100, "i596_add_cmd timed out"); lp->scb.cmd = WSWAPcmd(virt_to_dma(lp,&cmd->status)); lp->scb.command = CUC_START; - CHECK_WBACK(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK(lp, &(lp->scb), sizeof(struct i596_scb)); CA(dev); } lp->cmd_tail = cmd; @@ -998,12 +998,12 @@ static int i596_test(struct net_device *dev) data = virt_to_dma(lp,tint); tint[1] = -1; - CHECK_WBACK(tint,PAGE_SIZE); + CHECK_WBACK(lp, tint, PAGE_SIZE); MPU_PORT(dev, 1, data); for(data = 1000000; data; data--) { - CHECK_INV(tint,PAGE_SIZE); + CHECK_INV(lp, tint, PAGE_SIZE); if(tint[1] != -1) break; @@ -1061,7 +1061,7 @@ static void i596_tx_timeout (struct net_device *dev) /* Issue a channel attention signal */ DEB(DEB_ERRORS, printk("Kicking board.\n")); lp->scb.command = CUC_START | RX_START; - CHECK_WBACK_INV(&(lp->scb), sizeof(struct i596_scb)); + CHECK_WBACK_INV(lp, &(lp->scb), sizeof(struct i596_scb)); CA (dev); lp->last_restart = lp->stats.tx_packets; } @@ -1118,8 +1118,8 @@ static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev) tbd->data = WSWAPchar(tx_cmd->dma_addr); DEB(DEB_TXADDR,print_eth(skb->data, "tx-queued")); - CHECK_WBACK_INV(tx_cmd, sizeof(struct tx_cmd)); - CHECK_WBACK_INV(tbd, sizeof(struct i596_tbd)); + CHECK_WBACK_INV(lp, tx_cmd, sizeof(struct tx_cmd)); + CHECK_WBACK_INV(lp, tbd, sizeof(struct i596_tbd)); i596_add_cmd(dev, &tx_cmd->cmd); lp->stats.tx_packets++; @@ -1228,7 +1228,7 @@ static int __devinit i82596_probe(struct net_device *dev, lp->dma_addr = dma_addr; lp->dev = gen_dev; - CHECK_WBACK_INV(dev->mem_start, sizeof(struct i596_private)); + CHECK_WBACK_INV(lp, dev->mem_start, sizeof(struct i596_private)); i = register_netdev(dev); if (i) { @@ -1295,7 +1295,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) DEB(DEB_INTS, printk("%s: i596 interrupt command unit inactive %x.\n", dev->name, status & 0x0700)); while (lp->cmd_head != NULL) { - CHECK_INV(lp->cmd_head, sizeof(struct i596_cmd)); + CHECK_INV(lp, lp->cmd_head, sizeof(struct i596_cmd)); if (!(lp->cmd_head->status & STAT_C)) break; @@ -1358,7 +1358,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) } ptr->v_next = NULL; ptr->b_next = I596_NULL; - CHECK_WBACK(ptr, sizeof(struct i596_cmd)); + CHECK_WBACK(lp, ptr, sizeof(struct i596_cmd)); lp->last_cmd = jiffies; } @@ -1372,13 +1372,13 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) ptr->command &= 0x1fff; ptr = ptr->v_next; - CHECK_WBACK_INV(prev, sizeof(struct i596_cmd)); + CHECK_WBACK_INV(lp, prev, sizeof(struct i596_cmd)); } if ((lp->cmd_head != NULL)) ack_cmd |= CUC_START; lp->scb.cmd = WSWAPcmd(virt_to_dma(lp,&lp->cmd_head->status)); - CHECK_WBACK_INV(&lp->scb, sizeof(struct i596_scb)); + CHECK_WBACK_INV(lp, &lp->scb, sizeof(struct i596_scb)); } if ((status & 0x1000) || (status & 0x4000)) { if ((status & 0x4000)) @@ -1397,7 +1397,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) } wait_cmd(dev, lp, 100, "i596 interrupt, timeout"); lp->scb.command = ack_cmd; - CHECK_WBACK(&lp->scb, sizeof(struct i596_scb)); + CHECK_WBACK(lp, &lp->scb, sizeof(struct i596_scb)); /* DANGER: I suspect that some kind of interrupt acknowledgement aside from acking the 82596 might be needed @@ -1426,7 +1426,7 @@ static int i596_close(struct net_device *dev) wait_cmd(dev, lp, 100, "close1 timed out"); lp->scb.command = CUC_ABORT | RX_ABORT; - CHECK_WBACK(&lp->scb, sizeof(struct i596_scb)); + CHECK_WBACK(lp, &lp->scb, sizeof(struct i596_scb)); CA(dev); @@ -1486,7 +1486,7 @@ static void set_multicast_list(struct net_device *dev) dev->name); else { lp->cf_cmd.cmd.command = CmdConfigure; - CHECK_WBACK_INV(&lp->cf_cmd, sizeof(struct cf_cmd)); + CHECK_WBACK_INV(lp, &lp->cf_cmd, sizeof(struct cf_cmd)); i596_add_cmd(dev, &lp->cf_cmd.cmd); } } @@ -1514,7 +1514,7 @@ static void set_multicast_list(struct net_device *dev) DEB(DEB_MULTI, printk("%s: Adding address %02x:%02x:%02x:%02x:%02x:%02x\n", dev->name, cp[0],cp[1],cp[2],cp[3],cp[4],cp[5])); } - CHECK_WBACK_INV(&lp->mc_cmd, sizeof(struct mc_cmd)); + CHECK_WBACK_INV(lp, &lp->mc_cmd, sizeof(struct mc_cmd)); i596_add_cmd(dev, &cmd->cmd); } } diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index acee062cd6f..68103e508db 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -362,11 +362,11 @@ NCR_700_detect(struct scsi_host_template *tpnt, for (j = 0; j < PATCHES; j++) script[LABELPATCHES[j]] = bS_to_host(pScript + SCRIPT[LABELPATCHES[j]]); /* now patch up fixed addresses. */ - script_patch_32(script, MessageLocation, + script_patch_32(hostdata->dev, script, MessageLocation, pScript + MSGOUT_OFFSET); - script_patch_32(script, StatusAddress, + script_patch_32(hostdata->dev, script, StatusAddress, pScript + STATUS_OFFSET); - script_patch_32(script, ReceiveMsgAddress, + script_patch_32(hostdata->dev, script, ReceiveMsgAddress, pScript + MSGIN_OFFSET); hostdata->script = script; @@ -821,8 +821,9 @@ process_extended_message(struct Scsi_Host *host, shost_printk(KERN_WARNING, host, "Unexpected SDTR msg\n"); hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->script, MessageCount, 1); + dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); + script_patch_16(hostdata->dev, hostdata->script, + MessageCount, 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -833,8 +834,9 @@ process_extended_message(struct Scsi_Host *host, printk(KERN_INFO "scsi%d: (%d:%d), Unsolicited WDTR after CMD, Rejecting\n", host->host_no, pun, lun); hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->script, MessageCount, 1); + dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); + script_patch_16(hostdata->dev, hostdata->script, MessageCount, + 1); resume_offset = hostdata->pScript + Ent_SendMessageWithATN; break; @@ -847,8 +849,9 @@ process_extended_message(struct Scsi_Host *host, printk("\n"); /* just reject it */ hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->script, MessageCount, 1); + dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); + script_patch_16(hostdata->dev, hostdata->script, MessageCount, + 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -929,8 +932,9 @@ process_message(struct Scsi_Host *host, struct NCR_700_Host_Parameters *hostdata printk("\n"); /* just reject it */ hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->script, MessageCount, 1); + dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); + script_patch_16(hostdata->dev, hostdata->script, MessageCount, + 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -939,7 +943,7 @@ process_message(struct Scsi_Host *host, struct NCR_700_Host_Parameters *hostdata } NCR_700_writel(temp, host, TEMP_REG); /* set us up to receive another message */ - dma_cache_sync(hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); return resume_offset; } @@ -1019,9 +1023,9 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, slot->SG[1].ins = bS_to_host(SCRIPT_RETURN); slot->SG[1].pAddr = 0; slot->resume_offset = hostdata->pScript; - dma_cache_sync(slot->SG, sizeof(slot->SG[0])*2, DMA_TO_DEVICE); - dma_cache_sync(SCp->sense_buffer, sizeof(SCp->sense_buffer), DMA_FROM_DEVICE); - + dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG[0])*2, DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, SCp->sense_buffer, sizeof(SCp->sense_buffer), DMA_FROM_DEVICE); + /* queue the command for reissue */ slot->state = NCR_700_SLOT_QUEUED; slot->flags = NCR_700_FLAG_AUTOSENSE; @@ -1136,11 +1140,12 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, hostdata->cmd = slot->cmnd; /* re-patch for this command */ - script_patch_32_abs(hostdata->script, CommandAddress, - slot->pCmd); - script_patch_16(hostdata->script, + script_patch_32_abs(hostdata->dev, hostdata->script, + CommandAddress, slot->pCmd); + script_patch_16(hostdata->dev, hostdata->script, CommandCount, slot->cmnd->cmd_len); - script_patch_32_abs(hostdata->script, SGScriptStartAddress, + script_patch_32_abs(hostdata->dev, hostdata->script, + SGScriptStartAddress, to32bit(&slot->pSG[0].ins)); /* Note: setting SXFER only works if we're @@ -1150,13 +1155,13 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, * should therefore always clear ACK */ NCR_700_writeb(NCR_700_get_SXFER(hostdata->cmd->device), host, SXFER_REG); - dma_cache_sync(hostdata->msgin, + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); - dma_cache_sync(hostdata->msgout, + dma_cache_sync(hostdata->dev, hostdata->msgout, MSG_ARRAY_SIZE, DMA_TO_DEVICE); /* I'm just being paranoid here, the command should * already have been flushed from the cache */ - dma_cache_sync(slot->cmnd->cmnd, + dma_cache_sync(hostdata->dev, slot->cmnd->cmnd, slot->cmnd->cmd_len, DMA_TO_DEVICE); @@ -1220,7 +1225,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, hostdata->reselection_id = reselection_id; /* just in case we have a stale simple tag message, clear it */ hostdata->msgin[1] = 0; - dma_cache_sync(hostdata->msgin, + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_BIDIRECTIONAL); if(hostdata->tag_negotiated & (1<pScript + Ent_GetReselectionWithTag; @@ -1336,7 +1341,7 @@ process_selection(struct Scsi_Host *host, __u32 dsp) hostdata->cmd = NULL; /* clear any stale simple tag message */ hostdata->msgin[1] = 0; - dma_cache_sync(hostdata->msgin, MSG_ARRAY_SIZE, + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_BIDIRECTIONAL); if(id == 0xff) { @@ -1433,29 +1438,30 @@ NCR_700_start_command(struct scsi_cmnd *SCp) NCR_700_set_flag(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION); } - script_patch_16(hostdata->script, MessageCount, count); + script_patch_16(hostdata->dev, hostdata->script, MessageCount, count); - script_patch_ID(hostdata->script, + script_patch_ID(hostdata->dev, hostdata->script, Device_ID, 1<script, CommandAddress, + script_patch_32_abs(hostdata->dev, hostdata->script, CommandAddress, slot->pCmd); - script_patch_16(hostdata->script, CommandCount, SCp->cmd_len); + script_patch_16(hostdata->dev, hostdata->script, CommandCount, + SCp->cmd_len); /* finally plumb the beginning of the SG list into the script * */ - script_patch_32_abs(hostdata->script, SGScriptStartAddress, - to32bit(&slot->pSG[0].ins)); + script_patch_32_abs(hostdata->dev, hostdata->script, + SGScriptStartAddress, to32bit(&slot->pSG[0].ins)); NCR_700_clear_fifo(SCp->device->host); if(slot->resume_offset == 0) slot->resume_offset = hostdata->pScript; /* now perform all the writebacks and invalidates */ - dma_cache_sync(hostdata->msgout, count, DMA_TO_DEVICE); - dma_cache_sync(hostdata->msgin, MSG_ARRAY_SIZE, + dma_cache_sync(hostdata->dev, hostdata->msgout, count, DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); - dma_cache_sync(SCp->cmnd, SCp->cmd_len, DMA_TO_DEVICE); - dma_cache_sync(hostdata->status, 1, DMA_FROM_DEVICE); + dma_cache_sync(hostdata->dev, SCp->cmnd, SCp->cmd_len, DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, hostdata->status, 1, DMA_FROM_DEVICE); /* set the synchronous period/offset */ NCR_700_writeb(NCR_700_get_SXFER(SCp->device), @@ -1631,7 +1637,7 @@ NCR_700_intr(int irq, void *dev_id) slot->SG[i].ins = bS_to_host(SCRIPT_NOP); slot->SG[i].pAddr = 0; } - dma_cache_sync(slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); /* and pretend we disconnected after * the command phase */ resume_offset = hostdata->pScript + Ent_MsgInDuringData; @@ -1897,9 +1903,9 @@ NCR_700_queuecommand(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)) } slot->SG[i].ins = bS_to_host(SCRIPT_RETURN); slot->SG[i].pAddr = 0; - dma_cache_sync(slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); + dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); DEBUG((" SETTING %08lx to %x\n", - (&slot->pSG[i].ins), + (&slot->pSG[i].ins), slot->SG[i].ins)); } slot->resume_offset = 0; diff --git a/drivers/scsi/53c700.h b/drivers/scsi/53c700.h index f5c3caf344a..f38822db421 100644 --- a/drivers/scsi/53c700.h +++ b/drivers/scsi/53c700.h @@ -415,31 +415,31 @@ struct NCR_700_Host_Parameters { #define NCR_710_MIN_XFERP 0 #define NCR_700_MIN_PERIOD 25 /* for SDTR message, 100ns */ -#define script_patch_32(script, symbol, value) \ +#define script_patch_32(dev, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ __u32 val = bS_to_cpu((script)[A_##symbol##_used[i]]) + value; \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync(&(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ DEBUG((" script, patching %s at %d to 0x%lx\n", \ #symbol, A_##symbol##_used[i], (value))); \ } \ } -#define script_patch_32_abs(script, symbol, value) \ +#define script_patch_32_abs(dev, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ (script)[A_##symbol##_used[i]] = bS_to_host(value); \ - dma_cache_sync(&(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ DEBUG((" script, patching %s at %d to 0x%lx\n", \ #symbol, A_##symbol##_used[i], (value))); \ } \ } /* Used for patching the SCSI ID in the SELECT instruction */ -#define script_patch_ID(script, symbol, value) \ +#define script_patch_ID(dev, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ @@ -447,13 +447,13 @@ struct NCR_700_Host_Parameters { val &= 0xff00ffff; \ val |= ((value) & 0xff) << 16; \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync(&(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ DEBUG((" script, patching ID field %s at %d to 0x%x\n", \ #symbol, A_##symbol##_used[i], val)); \ } \ } -#define script_patch_16(script, symbol, value) \ +#define script_patch_16(dev, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ @@ -461,7 +461,7 @@ struct NCR_700_Host_Parameters { val &= 0xffff0000; \ val |= ((value) & 0xffff); \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync(&(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ DEBUG((" script, patching short field %s at %d to 0x%x\n", \ #symbol, A_##symbol##_used[i], val)); \ } \ diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c index 8eea69f2998..29823bd60fb 100644 --- a/drivers/serial/mpsc.c +++ b/drivers/serial/mpsc.c @@ -555,7 +555,7 @@ mpsc_sdma_start_tx(struct mpsc_port_info *pi) if (!mpsc_sdma_tx_active(pi)) { txre = (struct mpsc_tx_desc *)(pi->txr + (pi->txr_tail * MPSC_TXRE_SIZE)); - dma_cache_sync((void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)txre, @@ -931,7 +931,7 @@ mpsc_init_rings(struct mpsc_port_info *pi) } txre->link = cpu_to_be32(pi->txr_p); /* Wrap last back to first */ - dma_cache_sync((void *) pi->dma_region, MPSC_DMA_ALLOC_SIZE, + dma_cache_sync(pi->port.dev, (void *) pi->dma_region, MPSC_DMA_ALLOC_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ @@ -1005,7 +1005,7 @@ mpsc_rx_intr(struct mpsc_port_info *pi) rxre = (struct mpsc_rx_desc *)(pi->rxr + (pi->rxr_posn*MPSC_RXRE_SIZE)); - dma_cache_sync((void *)rxre, MPSC_RXRE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *)rxre, MPSC_RXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)rxre, @@ -1029,7 +1029,7 @@ mpsc_rx_intr(struct mpsc_port_info *pi) } bp = pi->rxb + (pi->rxr_posn * MPSC_RXBE_SIZE); - dma_cache_sync((void *) bp, MPSC_RXBE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *) bp, MPSC_RXBE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)bp, @@ -1098,7 +1098,7 @@ next_frame: SDMA_DESC_CMDSTAT_F | SDMA_DESC_CMDSTAT_L); wmb(); - dma_cache_sync((void *)rxre, MPSC_RXRE_SIZE, DMA_BIDIRECTIONAL); + dma_cache_sync(pi->port.dev, (void *)rxre, MPSC_RXRE_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ flush_dcache_range((ulong)rxre, @@ -1109,7 +1109,7 @@ next_frame: pi->rxr_posn = (pi->rxr_posn + 1) & (MPSC_RXR_ENTRIES - 1); rxre = (struct mpsc_rx_desc *)(pi->rxr + (pi->rxr_posn * MPSC_RXRE_SIZE)); - dma_cache_sync((void *)rxre, MPSC_RXRE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *)rxre, MPSC_RXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)rxre, @@ -1143,7 +1143,7 @@ mpsc_setup_tx_desc(struct mpsc_port_info *pi, u32 count, u32 intr) SDMA_DESC_CMDSTAT_EI : 0)); wmb(); - dma_cache_sync((void *) txre, MPSC_TXRE_SIZE, DMA_BIDIRECTIONAL); + dma_cache_sync(pi->port.dev, (void *) txre, MPSC_TXRE_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ flush_dcache_range((ulong)txre, @@ -1192,7 +1192,7 @@ mpsc_copy_tx_data(struct mpsc_port_info *pi) else /* All tx data copied into ring bufs */ return; - dma_cache_sync((void *) bp, MPSC_TXBE_SIZE, DMA_BIDIRECTIONAL); + dma_cache_sync(pi->port.dev, (void *) bp, MPSC_TXBE_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ flush_dcache_range((ulong)bp, @@ -1217,7 +1217,7 @@ mpsc_tx_intr(struct mpsc_port_info *pi) txre = (struct mpsc_tx_desc *)(pi->txr + (pi->txr_tail * MPSC_TXRE_SIZE)); - dma_cache_sync((void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); + dma_cache_sync(pi->port.dev, (void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ invalidate_dcache_range((ulong)txre, @@ -1235,7 +1235,7 @@ mpsc_tx_intr(struct mpsc_port_info *pi) txre = (struct mpsc_tx_desc *)(pi->txr + (pi->txr_tail * MPSC_TXRE_SIZE)); - dma_cache_sync((void *) txre, MPSC_TXRE_SIZE, + dma_cache_sync(pi->port.dev, (void *) txre, MPSC_TXRE_SIZE, DMA_FROM_DEVICE); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ @@ -1652,7 +1652,7 @@ mpsc_console_write(struct console *co, const char *s, uint count) count--; } - dma_cache_sync((void *) bp, MPSC_TXBE_SIZE, DMA_BIDIRECTIONAL); + dma_cache_sync(pi->port.dev, (void *) bp, MPSC_TXBE_SIZE, DMA_BIDIRECTIONAL); #if defined(CONFIG_PPC32) && !defined(CONFIG_NOT_COHERENT_CACHE) if (pi->cache_mgmt) /* GT642[46]0 Res #COMM-2 */ flush_dcache_range((ulong)bp, diff --git a/include/asm-alpha/dma-mapping.h b/include/asm-alpha/dma-mapping.h index b274bf6317c..57e09f5e342 100644 --- a/include/asm-alpha/dma-mapping.h +++ b/include/asm-alpha/dma-mapping.h @@ -60,7 +60,7 @@ int dma_set_mask(struct device *dev, u64 mask); #define dma_sync_single_range(dev, addr, off, size, dir) do { } while (0) #define dma_sync_sg_for_cpu(dev, sg, nents, dir) do { } while (0) #define dma_sync_sg_for_device(dev, sg, nents, dir) do { } while (0) -#define dma_cache_sync(va, size, dir) do { } while (0) +#define dma_cache_sync(dev, va, size, dir) do { } while (0) #define dma_get_cache_alignment() L1_CACHE_BYTES diff --git a/include/asm-avr32/dma-mapping.h b/include/asm-avr32/dma-mapping.h index 44630be2ee2..0580b5d62bb 100644 --- a/include/asm-avr32/dma-mapping.h +++ b/include/asm-avr32/dma-mapping.h @@ -8,7 +8,8 @@ #include #include -extern void dma_cache_sync(void *vaddr, size_t size, int direction); +extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, + int direction); /* * Return whether the given device DMA address mask can be supported diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h index af704fdd3d0..662cea70152 100644 --- a/include/asm-cris/dma-mapping.h +++ b/include/asm-cris/dma-mapping.h @@ -159,7 +159,7 @@ dma_get_cache_alignment(void) #define dma_is_consistent(d, h) (1) static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { } diff --git a/include/asm-frv/dma-mapping.h b/include/asm-frv/dma-mapping.h index 7b97fc785f7..bcb2df68496 100644 --- a/include/asm-frv/dma-mapping.h +++ b/include/asm-frv/dma-mapping.h @@ -175,7 +175,7 @@ int dma_get_cache_alignment(void) #define dma_is_consistent(d, h) (1) static inline -void dma_cache_sync(void *vaddr, size_t size, +void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { flush_write_buffers(); diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h index b9be3fc344c..783ab9944d7 100644 --- a/include/asm-generic/dma-mapping.h +++ b/include/asm-generic/dma-mapping.h @@ -295,7 +295,7 @@ dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, } static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { /* could define this in terms of the dma_cache ... operations, diff --git a/include/asm-i386/dma-mapping.h b/include/asm-i386/dma-mapping.h index 7da64c9f173..183eebeebbd 100644 --- a/include/asm-i386/dma-mapping.h +++ b/include/asm-i386/dma-mapping.h @@ -159,7 +159,7 @@ dma_get_cache_alignment(void) #define dma_is_consistent(d, h) (1) static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { flush_write_buffers(); diff --git a/include/asm-ia64/dma-mapping.h b/include/asm-ia64/dma-mapping.h index 4b075bc032e..ebd5887f4b1 100644 --- a/include/asm-ia64/dma-mapping.h +++ b/include/asm-ia64/dma-mapping.h @@ -50,7 +50,8 @@ dma_set_mask (struct device *dev, u64 mask) extern int dma_get_cache_alignment(void); static inline void -dma_cache_sync (void *vaddr, size_t size, enum dma_data_direction dir) +dma_cache_sync (struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) { /* * IA-64 is cache-coherent, so this is mostly a no-op. However, we do need to diff --git a/include/asm-m68k/dma-mapping.h b/include/asm-m68k/dma-mapping.h index efc89c12f83..00259ed6fc9 100644 --- a/include/asm-m68k/dma-mapping.h +++ b/include/asm-m68k/dma-mapping.h @@ -41,7 +41,7 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size, { dma_free_coherent(dev, size, addr, handle); } -static inline void dma_cache_sync(void *vaddr, size_t size, +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) { /* we use coherent allocation, so not much to do here. */ diff --git a/include/asm-mips/dma-mapping.h b/include/asm-mips/dma-mapping.h index e17f70d7b70..236d1a467cc 100644 --- a/include/asm-mips/dma-mapping.h +++ b/include/asm-mips/dma-mapping.h @@ -65,7 +65,7 @@ dma_get_cache_alignment(void) extern int dma_is_consistent(struct device *dev, dma_addr_t dma_addr); -extern void dma_cache_sync(void *vaddr, size_t size, +extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); #define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY diff --git a/include/asm-parisc/dma-mapping.h b/include/asm-parisc/dma-mapping.h index c40d48afdc5..66f0b408c66 100644 --- a/include/asm-parisc/dma-mapping.h +++ b/include/asm-parisc/dma-mapping.h @@ -197,7 +197,7 @@ dma_is_consistent(struct device *dev, dma_addr_t dma_addr) } static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { if(hppa_dma_ops->dma_sync_single_for_cpu) diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h index 3cf635b53b8..7c7de87bd8a 100644 --- a/include/asm-powerpc/dma-mapping.h +++ b/include/asm-powerpc/dma-mapping.h @@ -378,7 +378,7 @@ static inline void dma_sync_single_range_for_device(struct device *dev, dma_sync_single_for_device(dev, dma_handle, offset + size, direction); } -static inline void dma_cache_sync(void *vaddr, size_t size, +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h index 56cd4b97723..37ab0c131a4 100644 --- a/include/asm-sh/dma-mapping.h +++ b/include/asm-sh/dma-mapping.h @@ -53,7 +53,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, consistent_free(vaddr, size); } -static inline void dma_cache_sync(void *vaddr, size_t size, +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) { consistent_sync(vaddr, size, (int)dir); diff --git a/include/asm-sh64/dma-mapping.h b/include/asm-sh64/dma-mapping.h index 68e27a8fca3..5efe906c59f 100644 --- a/include/asm-sh64/dma-mapping.h +++ b/include/asm-sh64/dma-mapping.h @@ -35,7 +35,7 @@ static inline void dma_free_coherent(struct device *dev, size_t size, consistent_free(NULL, size, vaddr, dma_handle); } -static inline void dma_cache_sync(void *vaddr, size_t size, +static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) { dma_cache_wback_inv((unsigned long)vaddr, size); diff --git a/include/asm-sparc64/dma-mapping.h b/include/asm-sparc64/dma-mapping.h index 5fe0072f3f8..2f858a2df94 100644 --- a/include/asm-sparc64/dma-mapping.h +++ b/include/asm-sparc64/dma-mapping.h @@ -210,7 +210,7 @@ dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, } static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { /* could define this in terms of the dma_cache ... operations, diff --git a/include/asm-um/dma-mapping.h b/include/asm-um/dma-mapping.h index defb5b8307d..f0ee4fb5591 100644 --- a/include/asm-um/dma-mapping.h +++ b/include/asm-um/dma-mapping.h @@ -112,7 +112,7 @@ dma_sync_single_range(struct device *dev, dma_addr_t dma_handle, } static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { BUG(); diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h index c8cc4887fba..be9ec689072 100644 --- a/include/asm-x86_64/dma-mapping.h +++ b/include/asm-x86_64/dma-mapping.h @@ -185,7 +185,8 @@ static inline int dma_get_cache_alignment(void) extern int dma_set_mask(struct device *dev, u64 mask); static inline void -dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir) +dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) { flush_write_buffers(); } diff --git a/include/asm-xtensa/dma-mapping.h b/include/asm-xtensa/dma-mapping.h index 827d1dfd9e1..82b03b3a2ee 100644 --- a/include/asm-xtensa/dma-mapping.h +++ b/include/asm-xtensa/dma-mapping.h @@ -173,7 +173,7 @@ dma_get_cache_alignment(void) #define dma_is_consistent(d, h) (1) static inline void -dma_cache_sync(void *vaddr, size_t size, +dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { consistent_sync(vaddr, size, direction); -- cgit v1.2.3-70-g09d2 From f7dff2b12654149c9cac8d8c79b6588759edd5a9 Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Wed, 6 Dec 2006 20:38:58 -0800 Subject: [PATCH] Handle per-subsystem mutexes for CONFIG_HOTPLUG_CPU not set Provide a common interface for all the subsystems to lock and unlock their per-subsystem hotcpu mutexes. When CONFIG_HOTPLUG_CPU is not set, these operations would be no-ops. [akpm@osdl.org: macros -> inlines] Signed-off-by: Gautham R Shenoy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 71dc6ba4f73..bf00ce6ecad 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -24,6 +24,7 @@ #include #include #include +#include struct cpu { int node_id; /* The node which contains the CPU */ @@ -74,6 +75,17 @@ extern struct sysdev_class cpu_sysdev_class; #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ + +static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex) +{ + mutex_lock(cpu_hp_mutex); +} + +static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) +{ + mutex_unlock(cpu_hp_mutex); +} + extern void lock_cpu_hotplug(void); extern void unlock_cpu_hotplug(void); #define hotcpu_notifier(fn, pri) { \ @@ -85,7 +97,14 @@ extern void unlock_cpu_hotplug(void); #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) int cpu_down(unsigned int cpu); #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) -#else + +#else /* CONFIG_HOTPLUG_CPU */ + +static inline void cpuhotplug_mutex_lock(struct mutex *cpu_hp_mutex) +{ } +static inline void cpuhotplug_mutex_unlock(struct mutex *cpu_hp_mutex) +{ } + #define lock_cpu_hotplug() do { } while (0) #define unlock_cpu_hotplug() do { } while (0) #define lock_cpu_hotplug_interruptible() 0 @@ -95,7 +114,7 @@ int cpu_down(unsigned int cpu); /* CPUs don't go offline once they're online w/o CONFIG_HOTPLUG_CPU */ static inline int cpu_is_offline(int cpu) { return 0; } -#endif +#endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_SUSPEND_SMP extern int disable_nonboot_cpus(void); -- cgit v1.2.3-70-g09d2 From 0b71c8e76d20d0329bf7e54f172389f3c343dc41 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 6 Dec 2006 20:39:15 -0800 Subject: [PATCH] remove useless carta_random32.h Remove the carta_random32.h header file. The carta_random32() function was was put in and removed in favor of random32(). In the removal process, the header file was forgotten. Signed-off-by: Stephane Eranian Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/carta_random32.h | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 include/linux/carta_random32.h (limited to 'include') diff --git a/include/linux/carta_random32.h b/include/linux/carta_random32.h deleted file mode 100644 index f6f3bd9f20b..00000000000 --- a/include/linux/carta_random32.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Fast, simple, yet decent quality random number generator based on - * a paper by David G. Carta ("Two Fast Implementations of the - * `Minimal Standard' Random Number Generator," Communications of the - * ACM, January, 1990). - * - * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. - * Contributed by Stephane Eranian - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - * 02111-1307 USA - */ -#ifndef _LINUX_CARTA_RANDOM32_H_ -#define _LINUX_CARTA_RANDOM32_H_ - -u64 carta_random32(u64 seed); - -#endif /* _LINUX_CARTA_RANDOM32_H_ */ -- cgit v1.2.3-70-g09d2 From 2ee91f197c0bc654b24eed5831fd12aa0d566a7d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 Dec 2006 20:39:32 -0800 Subject: [PATCH] lockdep: show more details about self-test failures Make the locking self-test failures (of 'FAILURE' type) easier to debug by printing more information. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/debug_locks.h | 2 +- kernel/mutex-debug.c | 3 +++ lib/locking-selftest.c | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 952bee79a8f..a1c10b0c4cf 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -24,7 +24,7 @@ extern int debug_locks_off(void); int __ret = 0; \ \ if (unlikely(c)) { \ - if (debug_locks_off()) \ + if (debug_locks_silent || debug_locks_off()) \ WARN_ON(1); \ __ret = 1; \ } \ diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 18651641a7b..841539d72c5 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -77,6 +77,9 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, void debug_mutex_unlock(struct mutex *lock) { + if (unlikely(!debug_locks)) + return; + DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); DEBUG_LOCKS_WARN_ON(lock->magic != lock); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 7945787f439..280332c1827 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -963,7 +963,9 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) printk("failed|"); } else { unexpected_testcase_failures++; + printk("FAILED|"); + dump_stack(); } } else { testcase_successes++; -- cgit v1.2.3-70-g09d2 From 12d40e43d251de4fa1f982567fc8b4ee5e858367 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 6 Dec 2006 20:39:53 -0800 Subject: [PATCH] Save some bytes in struct inode [acme@newtoy net-2.6.20]$ pahole --cacheline 64 fs/inode.o inode /* /pub/scm/linux/kernel/git/acme/net-2.6.20/include/linux/dcache.h:86 */ struct inode { struct hlist_node i_hash; /* 0 8 */ struct list_head i_list; /* 8 8 */ struct list_head i_sb_list; /* 16 8 */ struct list_head i_dentry; /* 24 8 */ long unsigned int i_ino; /* 32 4 */ atomic_t i_count; /* 36 4 */ umode_t i_mode; /* 40 2 */ /* XXX 2 bytes hole, try to pack */ unsigned int i_nlink; /* 44 4 */ uid_t i_uid; /* 48 4 */ gid_t i_gid; /* 52 4 */ dev_t i_rdev; /* 56 4 */ loff_t i_size; /* 60 8 */ struct timespec i_atime; /* 68 8 */ struct timespec i_mtime; /* 76 8 */ struct timespec i_ctime; /* 84 8 */ unsigned int i_blkbits; /* 92 4 */ long unsigned int i_version; /* 96 4 */ blkcnt_t i_blocks; /* 100 4 */ short unsigned int i_bytes; /* 104 2 */ /* XXX 2 bytes hole, try to pack */ spinlock_t i_lock; /* 108 40 */ struct mutex i_mutex; /* 148 76 */ struct rw_semaphore i_alloc_sem; /* 224 64 */ struct inode_operations * i_op; /* 288 4 */ const struct file_operations * i_fop; /* 292 4 */ struct super_block * i_sb; /* 296 4 */ struct file_lock * i_flock; /* 300 4 */ struct address_space * i_mapping; /* 304 4 */ struct address_space i_data; /* 308 188 */ struct list_head i_devices; /* 496 8 */ union ; /* 504 4 */ int i_cindex; /* 508 4 */ __u32 i_generation; /* 512 4 */ /* ---------- cacheline 8 boundary ---------- */ long unsigned int i_dnotify_mask; /* 516 4 */ struct dnotify_struct * i_dnotify; /* 520 4 */ struct list_head inotify_watches; /* 524 8 */ struct mutex inotify_mutex; /* 532 76 */ long unsigned int i_state; /* 608 4 */ long unsigned int dirtied_when; /* 612 4 */ unsigned int i_flags; /* 616 4 */ atomic_t i_writecount; /* 620 4 */ void * i_security; /* 624 4 */ void * i_private; /* 628 4 */ }; /* size: 632, sum members: 628, holes: 2, sum holes: 4 */ [acme@newtoy net-2.6.20]$ So just moving i_mode to after i_bytes we save 4 bytes by nuking both holes: [acme@newtoy net-2.6.20]$ codiff -V /tmp/inode.o.before fs/inode.o /pub/scm/linux/kernel/git/acme/net-2.6.20/fs/inode.c: struct inode | -4 i_mode; from: umode_t /* 40(0) 2(0) */ to: umode_t /* 102(0) 2(0) */ 1 struct changed [acme@newtoy net-2.6.20]$ I've prunned all the other offset changes, only this one is of interest here. So now we have: [acme@newtoy net-2.6.20]$ pahole --cacheline 64 ../OUTPUT/qemu/net-2.6.20/fs/inode.o inode /* /pub/scm/linux/kernel/git/acme/net-2.6.20/include/linux/dcache.h:86 */ struct inode { struct hlist_node i_hash; /* 0 8 */ struct list_head i_list; /* 8 8 */ struct list_head i_sb_list; /* 16 8 */ struct list_head i_dentry; /* 24 8 */ long unsigned int i_ino; /* 32 4 */ atomic_t i_count; /* 36 4 */ unsigned int i_nlink; /* 40 4 */ uid_t i_uid; /* 44 4 */ gid_t i_gid; /* 48 4 */ dev_t i_rdev; /* 52 4 */ loff_t i_size; /* 56 8 */ /* ---------- cacheline 1 boundary ---------- */ struct timespec i_atime; /* 64 8 */ struct timespec i_mtime; /* 72 8 */ struct timespec i_ctime; /* 80 8 */ unsigned int i_blkbits; /* 88 4 */ long unsigned int i_version; /* 92 4 */ blkcnt_t i_blocks; /* 96 4 */ short unsigned int i_bytes; /* 100 2 */ umode_t i_mode; /* 102 2 */ spinlock_t i_lock; /* 104 40 */ struct mutex i_mutex; /* 144 76 */ struct rw_semaphore i_alloc_sem; /* 220 64 */ struct inode_operations * i_op; /* 284 4 */ const struct file_operations * i_fop; /* 288 4 */ struct super_block * i_sb; /* 292 4 */ struct file_lock * i_flock; /* 296 4 */ struct address_space * i_mapping; /* 300 4 */ struct address_space i_data; /* 304 188 */ struct list_head i_devices; /* 492 8 */ union ; /* 500 4 */ int i_cindex; /* 504 4 */ __u32 i_generation; /* 508 4 */ /* ---------- cacheline 8 boundary ---------- */ long unsigned int i_dnotify_mask; /* 512 4 */ struct dnotify_struct * i_dnotify; /* 516 4 */ struct list_head inotify_watches; /* 520 8 */ struct mutex inotify_mutex; /* 528 76 */ long unsigned int i_state; /* 604 4 */ long unsigned int dirtied_when; /* 608 4 */ unsigned int i_flags; /* 612 4 */ atomic_t i_writecount; /* 616 4 */ void * i_security; /* 620 4 */ void * i_private; /* 624 4 */ }; /* size: 628 */ [acme@newtoy net-2.6.20]$ Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3a1927e516d..70b99fbb560 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -543,7 +543,6 @@ struct inode { struct list_head i_dentry; unsigned long i_ino; atomic_t i_count; - umode_t i_mode; unsigned int i_nlink; uid_t i_uid; gid_t i_gid; @@ -559,6 +558,7 @@ struct inode { unsigned int i_blkbits; blkcnt_t i_blocks; unsigned short i_bytes; + umode_t i_mode; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ struct mutex i_mutex; struct rw_semaphore i_alloc_sem; -- cgit v1.2.3-70-g09d2 From 0da1480ec33d4bac8c32051c1d33202be6dc439f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:40:03 -0800 Subject: [PATCH] proper prototype for remove_inode_dquot_ref() Add a proper prototype for remove_inode_dquot_ref() in include/linux/quotaops.h Signed-off-by: Adrian Bunk Acked-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 3 --- include/linux/quotaops.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index 699aa4f7aa7..9ecccab7326 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1242,9 +1242,6 @@ EXPORT_SYMBOL(inode_needs_sync); */ #ifdef CONFIG_QUOTA -/* Function back in dquot.c */ -int remove_inode_dquot_ref(struct inode *, int, struct list_head *); - void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head) { diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 5110201a415..90c23f690c0 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -37,6 +37,9 @@ extern int dquot_release(struct dquot *dquot); extern int dquot_commit_info(struct super_block *sb, int type); extern int dquot_mark_dquot_dirty(struct dquot *dquot); +int remove_inode_dquot_ref(struct inode *inode, int type, + struct list_head *tofree_head); + extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path); extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); -- cgit v1.2.3-70-g09d2 From 65867beb0de4d055637476327b533e5ffbec2b97 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Wed, 6 Dec 2006 20:40:07 -0800 Subject: [PATCH] Trivial cleanup in the PCI IDs for the CS5535 Rename a poorly worded PCI ID for the Geode GX and CS5535 companion chips. The graphics processor and host bridge actually live in the northbridge on the integrated processor, not in the companion chip. Signed-off-by: Jordan Crouse Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/geode/gxfb_core.c | 2 +- include/linux/pci_ids.h | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/video/geode/gxfb_core.c b/drivers/video/geode/gxfb_core.c index 0d3643fc629..a454dcb8e21 100644 --- a/drivers/video/geode/gxfb_core.c +++ b/drivers/video/geode/gxfb_core.c @@ -380,7 +380,7 @@ static void gxfb_remove(struct pci_dev *pdev) } static struct pci_device_id gxfb_id_table[] = { - { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_VIDEO, + { PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_GX_VIDEO, PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY << 16, 0xff0000, 0 }, { 0, } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index dcdb90f06d7..ff2dcb436cd 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -390,7 +390,7 @@ #define PCI_DEVICE_ID_NS_CS5535_IDE 0x002d #define PCI_DEVICE_ID_NS_CS5535_AUDIO 0x002e #define PCI_DEVICE_ID_NS_CS5535_USB 0x002f -#define PCI_DEVICE_ID_NS_CS5535_VIDEO 0x0030 +#define PCI_DEVICE_ID_NS_GX_VIDEO 0x0030 #define PCI_DEVICE_ID_NS_SATURN 0x0035 #define PCI_DEVICE_ID_NS_SCx200_BRIDGE 0x0500 #define PCI_DEVICE_ID_NS_SCx200_SMI 0x0501 @@ -403,8 +403,7 @@ #define PCI_DEVICE_ID_NS_SC1100_XBUS 0x0515 #define PCI_DEVICE_ID_NS_87410 0xd001 -#define PCI_DEVICE_ID_NS_CS5535_HOST_BRIDGE 0x0028 -#define PCI_DEVICE_ID_NS_CS5535_ISA_BRIDGE 0x002b +#define PCI_DEVICE_ID_NS_GX_HOST_BRIDGE 0x0028 #define PCI_VENDOR_ID_TSENG 0x100c #define PCI_DEVICE_ID_TSENG_W32P_2 0x3202 -- cgit v1.2.3-70-g09d2 From 5296c7bec8c85aa0a6964eca9ff4a1a8847fba8a Mon Sep 17 00:00:00 2001 From: Mariusz Kozlowski Date: Wed, 6 Dec 2006 20:40:10 -0800 Subject: [PATCH] fs: reiserfs add missing brackets Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/reiserfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 7bc6bfb8625..d0e4dce33ad 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -739,7 +739,7 @@ struct block_head { #define PUT_B_FREE_SPACE(p_s_bh,val) do { set_blkh_free_space(B_BLK_HEAD(p_s_bh),val); } while (0) /* Get right delimiting key. -- little endian */ -#define B_PRIGHT_DELIM_KEY(p_s_bh) (&(blk_right_delim_key(B_BLK_HEAD(p_s_bh)) +#define B_PRIGHT_DELIM_KEY(p_s_bh) (&(blk_right_delim_key(B_BLK_HEAD(p_s_bh)))) /* Does the buffer contain a disk leaf. */ #define B_IS_ITEMS_LEVEL(p_s_bh) (B_LEVEL(p_s_bh) == DISK_LEAF_NODE_LEVEL) -- cgit v1.2.3-70-g09d2 From a0e7688df1484fbf4d6d61c31f7d61a5d8cacf3c Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 6 Dec 2006 20:40:12 -0800 Subject: [PATCH] Kbuild: add 3 more header files to get properly "unifdef"ed Add 3 more files to get "unifdef"ed when creating sanitized headers with "make headers_install". Signed-off-by: Robert P. J. Day Acked-by: David Woodhouse Acked-by: "John W. Linville" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index ff433126361..e618b25b5ad 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -221,6 +221,7 @@ unifdef-y += if_bridge.h unifdef-y += if_ec.h unifdef-y += if_eql.h unifdef-y += if_ether.h +unifdef-y += if_fddi.h unifdef-y += if_frad.h unifdef-y += if_ltalk.h unifdef-y += if_pppox.h @@ -282,6 +283,7 @@ unifdef-y += nvram.h unifdef-y += parport.h unifdef-y += patchkey.h unifdef-y += pci.h +unifdef-y += personality.h unifdef-y += pktcdvd.h unifdef-y += pmu.h unifdef-y += poll.h @@ -337,6 +339,7 @@ unifdef-y += videodev.h unifdef-y += wait.h unifdef-y += wanrouter.h unifdef-y += watchdog.h +unifdef-y += wireless.h unifdef-y += xfrm.h objhdr-y += version.h -- cgit v1.2.3-70-g09d2 From bb8cc641653d785e3f3b8d3b0182a69edf825802 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:40:21 -0800 Subject: [PATCH] include/asm-cris/: "extern inline" -> "static inline" "extern inline" generates a warning with -Wmissing-prototypes and I'm currently working on getting the kernel cleaned up for adding this to the CFLAGS since it will help us to avoid a nasty class of runtime errors. If there are places that really need a forced inline, __always_inline would be the correct solution. Signed-off-by: Adrian Bunk Acked-by: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/arch-v10/bitops.h | 10 +++++----- include/asm-cris/semaphore-helper.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/asm-cris/arch-v10/bitops.h b/include/asm-cris/arch-v10/bitops.h index b73f5396e5a..be85f6de25d 100644 --- a/include/asm-cris/arch-v10/bitops.h +++ b/include/asm-cris/arch-v10/bitops.h @@ -10,7 +10,7 @@ * number. They differ in that the first function also inverts all bits * in the input. */ -extern inline unsigned long cris_swapnwbrlz(unsigned long w) +static inline unsigned long cris_swapnwbrlz(unsigned long w) { /* Let's just say we return the result in the same register as the input. Saying we clobber the input but can return the result @@ -26,7 +26,7 @@ extern inline unsigned long cris_swapnwbrlz(unsigned long w) return res; } -extern inline unsigned long cris_swapwbrlz(unsigned long w) +static inline unsigned long cris_swapwbrlz(unsigned long w) { unsigned res; __asm__ ("swapwbr %0 \n\t" @@ -40,7 +40,7 @@ extern inline unsigned long cris_swapwbrlz(unsigned long w) * ffz = Find First Zero in word. Undefined if no zero exists, * so code should check against ~0UL first.. */ -extern inline unsigned long ffz(unsigned long w) +static inline unsigned long ffz(unsigned long w) { return cris_swapnwbrlz(w); } @@ -51,7 +51,7 @@ extern inline unsigned long ffz(unsigned long w) * * Undefined if no bit exists, so code should check against 0 first. */ -extern inline unsigned long __ffs(unsigned long word) +static inline unsigned long __ffs(unsigned long word) { return cris_swapnwbrlz(~word); } @@ -65,7 +65,7 @@ extern inline unsigned long __ffs(unsigned long word) * differs in spirit from the above ffz (man ffs). */ -extern inline unsigned long kernel_ffs(unsigned long w) +static inline unsigned long kernel_ffs(unsigned long w) { return w ? cris_swapwbrlz (w) + 1 : 0; } diff --git a/include/asm-cris/semaphore-helper.h b/include/asm-cris/semaphore-helper.h index dbd0f30b85b..a8e1e6cb7cd 100644 --- a/include/asm-cris/semaphore-helper.h +++ b/include/asm-cris/semaphore-helper.h @@ -20,12 +20,12 @@ /* * These two _must_ execute atomically wrt each other. */ -extern inline void wake_one_more(struct semaphore * sem) +static inline void wake_one_more(struct semaphore * sem) { atomic_inc(&sem->waking); } -extern inline int waking_non_zero(struct semaphore *sem) +static inline int waking_non_zero(struct semaphore *sem) { unsigned long flags; int ret = 0; @@ -40,7 +40,7 @@ extern inline int waking_non_zero(struct semaphore *sem) return ret; } -extern inline int waking_non_zero_interruptible(struct semaphore *sem, +static inline int waking_non_zero_interruptible(struct semaphore *sem, struct task_struct *tsk) { int ret = 0; @@ -59,7 +59,7 @@ extern inline int waking_non_zero_interruptible(struct semaphore *sem, return ret; } -extern inline int waking_non_zero_trylock(struct semaphore *sem) +static inline int waking_non_zero_trylock(struct semaphore *sem) { int ret = 1; unsigned long flags; -- cgit v1.2.3-70-g09d2 From 219576e127c4f7770aa985f719e815936bc54367 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:40:22 -0800 Subject: [PATCH] include/asm-h8300/: "extern inline" -> "static inline" "extern inline" generates a warning with -Wmissing-prototypes and I'm currently working on getting the kernel cleaned up for adding this to the CFLAGS since it will help us to avoid a nasty class of runtime errors. If there are places that really need a forced inline, __always_inline would be the correct solution. Signed-off-by: Adrian Bunk Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-h8300/delay.h | 4 ++-- include/asm-h8300/mmu_context.h | 4 ++-- include/asm-h8300/pci.h | 4 ++-- include/asm-h8300/tlbflush.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-h8300/delay.h b/include/asm-h8300/delay.h index cbccbbdd640..743beba70f8 100644 --- a/include/asm-h8300/delay.h +++ b/include/asm-h8300/delay.h @@ -9,7 +9,7 @@ * Delay routines, using a pre-computed "loops_per_second" value. */ -extern __inline__ void __delay(unsigned long loops) +static inline void __delay(unsigned long loops) { __asm__ __volatile__ ("1:\n\t" "dec.l #1,%0\n\t" @@ -27,7 +27,7 @@ extern __inline__ void __delay(unsigned long loops) extern unsigned long loops_per_jiffy; -extern __inline__ void udelay(unsigned long usecs) +static inline void udelay(unsigned long usecs) { usecs *= 4295; /* 2**32 / 1000000 */ usecs /= (loops_per_jiffy*HZ); diff --git a/include/asm-h8300/mmu_context.h b/include/asm-h8300/mmu_context.h index 855721a5dcc..5c165f7bee0 100644 --- a/include/asm-h8300/mmu_context.h +++ b/include/asm-h8300/mmu_context.h @@ -9,7 +9,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } -extern inline int +static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { // mm->context = virt_to_phys(mm->pgd); @@ -23,7 +23,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, str { } -extern inline void activate_mm(struct mm_struct *prev_mm, +static inline void activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm) { } diff --git a/include/asm-h8300/pci.h b/include/asm-h8300/pci.h index 5edad5b70fd..0c771b05fdd 100644 --- a/include/asm-h8300/pci.h +++ b/include/asm-h8300/pci.h @@ -10,12 +10,12 @@ #define pcibios_assign_all_busses() 0 #define pcibios_scan_all_fns(a, b) 0 -extern inline void pcibios_set_master(struct pci_dev *dev) +static inline void pcibios_set_master(struct pci_dev *dev) { /* No special bus mastering setup handling */ } -extern inline void pcibios_penalize_isa_irq(int irq, int active) +static inline void pcibios_penalize_isa_irq(int irq, int active) { /* We don't do dynamic PCI IRQ allocation */ } diff --git a/include/asm-h8300/tlbflush.h b/include/asm-h8300/tlbflush.h index bbdffbeeede..9a2c5c9fd70 100644 --- a/include/asm-h8300/tlbflush.h +++ b/include/asm-h8300/tlbflush.h @@ -47,12 +47,12 @@ static inline void flush_tlb_range(struct mm_struct *mm, BUG(); } -extern inline void flush_tlb_kernel_page(unsigned long addr) +static inline void flush_tlb_kernel_page(unsigned long addr) { BUG(); } -extern inline void flush_tlb_pgtables(struct mm_struct *mm, +static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) { BUG(); -- cgit v1.2.3-70-g09d2 From 4b358e22064b4551aa8b4dcfe3efe70a13548676 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:40:28 -0800 Subject: [PATCH] cleanup include/asm-generic/atomic.h cleanup asm-generic/atomic.h - no longer a userspace header - remove the unneeded #include - #else/#endif comments [akpm@osdl.org: fix arm build] Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/Kbuild | 1 - include/asm-generic/atomic.h | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 3c06be38170..fa14f8cd30c 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -1,4 +1,3 @@ -header-y += atomic.h header-y += errno-base.h header-y += errno.h header-y += fcntl.h diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 42a95d9a064..b7e4a0467cb 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -66,7 +66,7 @@ static inline void atomic_long_sub(long i, atomic_long_t *l) atomic64_sub(i, v); } -#else +#else /* BITS_PER_LONG == 64 */ typedef atomic_t atomic_long_t; @@ -113,5 +113,6 @@ static inline void atomic_long_sub(long i, atomic_long_t *l) atomic_sub(i, v); } -#endif -#endif +#endif /* BITS_PER_LONG == 64 */ + +#endif /* _ASM_GENERIC_ATOMIC_H */ -- cgit v1.2.3-70-g09d2 From 15ad7cdcfd76450d4beebc789ec646664238184d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 6 Dec 2006 20:40:36 -0800 Subject: [PATCH] struct seq_operations and struct file_operations constification - move some file_operations structs into the .rodata section - move static strings from policy_types[] array into the .rodata section - fix generic seq_operations usages, so that those structs may be defined as "const" as well [akpm@osdl.org: couple of fixes] Signed-off-by: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/seq_file.c | 4 ++-- include/linux/cpuset.h | 2 +- include/linux/mmzone.h | 2 +- include/linux/relay.h | 2 +- include/linux/sched.h | 2 +- include/linux/seq_file.h | 4 ++-- kernel/configs.c | 2 +- kernel/cpuset.c | 4 ++-- kernel/dma.c | 2 +- kernel/futex.c | 2 +- kernel/kallsyms.c | 4 ++-- kernel/lockdep_proc.c | 6 +++--- kernel/module.c | 2 +- kernel/power/user.c | 2 +- kernel/profile.c | 2 +- kernel/relay.c | 2 +- kernel/resource.c | 6 +++--- kernel/sched.c | 2 +- kernel/sysctl.c | 2 +- mm/mempolicy.c | 4 ++-- mm/page_alloc.c | 2 +- mm/shmem.c | 4 ++-- mm/slab.c | 4 ++-- mm/swapfile.c | 4 ++-- mm/vmstat.c | 8 ++++---- 25 files changed, 40 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/fs/seq_file.c b/fs/seq_file.c index 555b9ac04c2..10690aa401c 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -26,7 +26,7 @@ * ERR_PTR(error). In the end of sequence they return %NULL. ->show() * returns 0 in case of success and negative number in case of error. */ -int seq_open(struct file *file, struct seq_operations *op) +int seq_open(struct file *file, const struct seq_operations *op) { struct seq_file *p = file->private_data; @@ -408,7 +408,7 @@ EXPORT_SYMBOL(single_open); int single_release(struct inode *inode, struct file *file) { - struct seq_operations *op = ((struct seq_file *)file->private_data)->op; + const struct seq_operations *op = ((struct seq_file *)file->private_data)->op; int res = seq_release(inode, file); kfree(op); return res; diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 748d2c99663..8821e1f75b4 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -46,7 +46,7 @@ extern int cpuset_excl_nodes_overlap(const struct task_struct *p); extern int cpuset_memory_pressure_enabled; extern void __cpuset_memory_pressure_bump(void); -extern struct file_operations proc_cpuset_operations; +extern const struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); extern void cpuset_lock(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index da6002dec20..e339a7345f2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -278,7 +278,7 @@ struct zone { /* * rarely used fields: */ - char *name; + const char *name; } ____cacheline_internodealigned_in_smp; /* diff --git a/include/linux/relay.h b/include/linux/relay.h index 0e3d91b7699..c6a48bfc8b1 100644 --- a/include/linux/relay.h +++ b/include/linux/relay.h @@ -274,7 +274,7 @@ static inline void subbuf_start_reserve(struct rchan_buf *buf, /* * exported relay file operations, kernel/relay.c */ -extern struct file_operations relay_file_operations; +extern const struct file_operations relay_file_operations; #endif /* _LINUX_RELAY_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3a767242e72..dede82c6344 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -573,7 +573,7 @@ struct sched_info { #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ #ifdef CONFIG_SCHEDSTATS -extern struct file_operations proc_schedstat_operations; +extern const struct file_operations proc_schedstat_operations; #endif /* CONFIG_SCHEDSTATS */ #ifdef CONFIG_TASK_DELAY_ACCT diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index b95f6eb7254..3e3cccbb1ca 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -20,7 +20,7 @@ struct seq_file { loff_t index; loff_t version; struct mutex lock; - struct seq_operations *op; + const struct seq_operations *op; void *private; }; @@ -31,7 +31,7 @@ struct seq_operations { int (*show) (struct seq_file *m, void *v); }; -int seq_open(struct file *, struct seq_operations *); +int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); diff --git a/kernel/configs.c b/kernel/configs.c index f9e31974f4a..8fa1fb28f8a 100644 --- a/kernel/configs.c +++ b/kernel/configs.c @@ -75,7 +75,7 @@ ikconfig_read_current(struct file *file, char __user *buf, return count; } -static struct file_operations ikconfig_file_ops = { +static const struct file_operations ikconfig_file_ops = { .owner = THIS_MODULE, .read = ikconfig_read_current, }; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 9b62b4c03ad..4ef1d29297c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1531,7 +1531,7 @@ static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry, return simple_rename(old_dir, old_dentry, new_dir, new_dentry); } -static struct file_operations cpuset_file_operations = { +static const struct file_operations cpuset_file_operations = { .read = cpuset_file_read, .write = cpuset_file_write, .llseek = generic_file_llseek, @@ -2605,7 +2605,7 @@ static int cpuset_open(struct inode *inode, struct file *file) return single_open(file, proc_cpuset_show, pid); } -struct file_operations proc_cpuset_operations = { +const struct file_operations proc_cpuset_operations = { .open = cpuset_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/dma.c b/kernel/dma.c index 2020644c938..937b13ca33b 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -140,7 +140,7 @@ static int proc_dma_open(struct inode *inode, struct file *file) return single_open(file, proc_dma_show, NULL); } -static struct file_operations proc_dma_operations = { +static const struct file_operations proc_dma_operations = { .open = proc_dma_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/futex.c b/kernel/futex.c index a8302a1620e..95989a3b416 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1492,7 +1492,7 @@ static unsigned int futex_poll(struct file *filp, return ret; } -static struct file_operations futex_fops = { +static const struct file_operations futex_fops = { .release = futex_close, .poll = futex_poll, }; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 54befe36ee0..ab63cfc4299 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -398,7 +398,7 @@ static int s_show(struct seq_file *m, void *p) return 0; } -static struct seq_operations kallsyms_op = { +static const struct seq_operations kallsyms_op = { .start = s_start, .next = s_next, .stop = s_stop, @@ -433,7 +433,7 @@ static int kallsyms_release(struct inode *inode, struct file *file) return seq_release(inode, file); } -static struct file_operations kallsyms_operations = { +static const struct file_operations kallsyms_operations = { .open = kallsyms_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index f6e72eaab3f..b554b40a4aa 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -113,7 +113,7 @@ static int l_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations lockdep_ops = { +static const struct seq_operations lockdep_ops = { .start = l_start, .next = l_next, .stop = l_stop, @@ -135,7 +135,7 @@ static int lockdep_open(struct inode *inode, struct file *file) return res; } -static struct file_operations proc_lockdep_operations = { +static const struct file_operations proc_lockdep_operations = { .open = lockdep_open, .read = seq_read, .llseek = seq_lseek, @@ -319,7 +319,7 @@ static int lockdep_stats_open(struct inode *inode, struct file *file) return single_open(file, lockdep_stats_show, NULL); } -static struct file_operations proc_lockdep_stats_operations = { +static const struct file_operations proc_lockdep_stats_operations = { .open = lockdep_stats_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/module.c b/kernel/module.c index e2d09d604ca..d9eae45d014 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2209,7 +2209,7 @@ static int m_show(struct seq_file *m, void *p) Where refcount is a number or -, and deps is a comma-separated list of depends or -. */ -struct seq_operations modules_op = { +const struct seq_operations modules_op = { .start = m_start, .next = m_next, .stop = m_stop, diff --git a/kernel/power/user.c b/kernel/power/user.c index 069732e5695..89443b85163 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -383,7 +383,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp, return error; } -static struct file_operations snapshot_fops = { +static const struct file_operations snapshot_fops = { .open = snapshot_open, .release = snapshot_release, .read = snapshot_read, diff --git a/kernel/profile.c b/kernel/profile.c index 0961d93e1d9..fb5e03d57e9 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -501,7 +501,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf, return count; } -static struct file_operations proc_profile_operations = { +static const struct file_operations proc_profile_operations = { .read = read_profile, .write = write_profile, }; diff --git a/kernel/relay.c b/kernel/relay.c index 2b92e8ece85..75a3a9a7efc 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1013,7 +1013,7 @@ static ssize_t relay_file_sendfile(struct file *filp, actor, &desc); } -struct file_operations relay_file_operations = { +const struct file_operations relay_file_operations = { .open = relay_file_open, .poll = relay_file_poll, .mmap = relay_file_mmap, diff --git a/kernel/resource.c b/kernel/resource.c index 6de60c12143..7b9a497419d 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -88,7 +88,7 @@ static int r_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations resource_op = { +static const struct seq_operations resource_op = { .start = r_start, .next = r_next, .stop = r_stop, @@ -115,14 +115,14 @@ static int iomem_open(struct inode *inode, struct file *file) return res; } -static struct file_operations proc_ioports_operations = { +static const struct file_operations proc_ioports_operations = { .open = ioports_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; -static struct file_operations proc_iomem_operations = { +static const struct file_operations proc_iomem_operations = { .open = iomem_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/sched.c b/kernel/sched.c index b43cef02ce5..f385eff4682 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -505,7 +505,7 @@ static int schedstat_open(struct inode *inode, struct file *file) return res; } -struct file_operations proc_schedstat_operations = { +const struct file_operations proc_schedstat_operations = { .open = schedstat_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6d7147cf66b..758dbbf972a 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -170,7 +170,7 @@ static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); static int proc_opensys(struct inode *, struct file *); -struct file_operations proc_sys_file_operations = { +const struct file_operations proc_sys_file_operations = { .open = proc_opensys, .read = proc_readsys, .write = proc_writesys, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ad864f8708b..b917d6fdc1b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1707,8 +1707,8 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) * Display pages allocated per node and memory policy via /proc. */ -static const char *policy_types[] = { "default", "prefer", "bind", - "interleave" }; +static const char * const policy_types[] = + { "default", "prefer", "bind", "interleave" }; /* * Convert a mempolicy into a string. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 27ec7a1b802..cace22b3ac2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -83,7 +83,7 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { EXPORT_SYMBOL(totalram_pages); -static char *zone_names[MAX_NR_ZONES] = { +static char * const zone_names[MAX_NR_ZONES] = { "DMA", #ifdef CONFIG_ZONE_DMA32 "DMA32", diff --git a/mm/shmem.c b/mm/shmem.c index 007653680a7..c820b4f77b8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -177,7 +177,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages) static struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; -static struct file_operations shmem_file_operations; +static const struct file_operations shmem_file_operations; static struct inode_operations shmem_inode_operations; static struct inode_operations shmem_dir_inode_operations; static struct inode_operations shmem_special_inode_operations; @@ -2319,7 +2319,7 @@ static const struct address_space_operations shmem_aops = { .migratepage = migrate_page, }; -static struct file_operations shmem_file_operations = { +static const struct file_operations shmem_file_operations = { .mmap = shmem_mmap, #ifdef CONFIG_TMPFS .llseek = generic_file_llseek, diff --git a/mm/slab.c b/mm/slab.c index 86f5d6e995b..068cb4503c1 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -4142,7 +4142,7 @@ static int s_show(struct seq_file *m, void *p) * + further values on SMP and with statistics enabled */ -struct seq_operations slabinfo_op = { +const struct seq_operations slabinfo_op = { .start = s_start, .next = s_next, .stop = s_stop, @@ -4340,7 +4340,7 @@ static int leaks_show(struct seq_file *m, void *p) return 0; } -struct seq_operations slabstats_op = { +const struct seq_operations slabstats_op = { .start = leaks_start, .next = s_next, .stop = s_stop, diff --git a/mm/swapfile.c b/mm/swapfile.c index 55242363de6..c5431072f42 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1368,7 +1368,7 @@ static int swap_show(struct seq_file *swap, void *v) return 0; } -static struct seq_operations swaps_op = { +static const struct seq_operations swaps_op = { .start = swap_start, .next = swap_next, .stop = swap_stop, @@ -1380,7 +1380,7 @@ static int swaps_open(struct inode *inode, struct file *file) return seq_open(file, &swaps_op); } -static struct file_operations proc_swaps_operations = { +static const struct file_operations proc_swaps_operations = { .open = swaps_open, .read = seq_read, .llseek = seq_lseek, diff --git a/mm/vmstat.c b/mm/vmstat.c index ef4176843d9..dc005a0c96a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -430,7 +430,7 @@ static int frag_show(struct seq_file *m, void *arg) return 0; } -struct seq_operations fragmentation_op = { +const struct seq_operations fragmentation_op = { .start = frag_start, .next = frag_next, .stop = frag_stop, @@ -452,7 +452,7 @@ struct seq_operations fragmentation_op = { #define TEXTS_FOR_ZONES(xx) xx "_dma", TEXT_FOR_DMA32(xx) xx "_normal", \ TEXT_FOR_HIGHMEM(xx) -static char *vmstat_text[] = { +static const char * const vmstat_text[] = { /* Zoned VM counters */ "nr_anon_pages", "nr_mapped", @@ -597,7 +597,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) return 0; } -struct seq_operations zoneinfo_op = { +const struct seq_operations zoneinfo_op = { .start = frag_start, /* iterate over all zones. The same as in * fragmentation. */ .next = frag_next, @@ -660,7 +660,7 @@ static void vmstat_stop(struct seq_file *m, void *arg) m->private = NULL; } -struct seq_operations vmstat_op = { +const struct seq_operations vmstat_op = { .start = vmstat_start, .next = vmstat_next, .stop = vmstat_stop, -- cgit v1.2.3-70-g09d2 From 7d1362c0d05b8543807ab403ac8ce813cab41fa4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 6 Dec 2006 20:40:38 -0800 Subject: [PATCH] cleanup asm/setup.h userspace visibility Make the contents of the userspace asm/setup.h header consistent on all architectures: - export setup.h to userspace on all architectures - export only COMMAND_LINE_SIZE to userspace - frv: move COMMAND_LINE_SIZE from param.h - i386: remove duplicate COMMAND_LINE_SIZE from param.h - arm: - export ATAGs to userspace - change u8/u16/u32 to __u8/__u16/__u32 Signed-off-by: Adrian Bunk Acked-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-arm/setup.h | 104 ++++++++++++++++++++++------------------- include/asm-arm26/setup.h | 4 ++ include/asm-avr32/setup.h | 4 ++ include/asm-frv/param.h | 1 - include/asm-frv/setup.h | 6 +++ include/asm-generic/Kbuild.asm | 1 + include/asm-i386/Kbuild | 1 - include/asm-i386/param.h | 1 - include/asm-i386/setup.h | 6 ++- include/asm-ia64/Kbuild | 1 - include/asm-m32r/setup.h | 9 +++- include/asm-m68k/setup.h | 6 ++- include/asm-m68knommu/setup.h | 5 ++ include/asm-mips/setup.h | 2 - include/asm-powerpc/setup.h | 3 -- include/asm-s390/setup.h | 3 +- include/asm-sh/setup.h | 6 ++- include/asm-sh64/setup.h | 6 +++ include/asm-x86_64/Kbuild | 1 - 19 files changed, 102 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/include/asm-arm/setup.h b/include/asm-arm/setup.h index aa4b5782f0c..e5407392afc 100644 --- a/include/asm-arm/setup.h +++ b/include/asm-arm/setup.h @@ -14,55 +14,57 @@ #ifndef __ASMARM_SETUP_H #define __ASMARM_SETUP_H +#include + #define COMMAND_LINE_SIZE 1024 /* The list ends with an ATAG_NONE node. */ #define ATAG_NONE 0x00000000 struct tag_header { - u32 size; - u32 tag; + __u32 size; + __u32 tag; }; /* The list must start with an ATAG_CORE node */ #define ATAG_CORE 0x54410001 struct tag_core { - u32 flags; /* bit 0 = read-only */ - u32 pagesize; - u32 rootdev; + __u32 flags; /* bit 0 = read-only */ + __u32 pagesize; + __u32 rootdev; }; /* it is allowed to have multiple ATAG_MEM nodes */ #define ATAG_MEM 0x54410002 struct tag_mem32 { - u32 size; - u32 start; /* physical start address */ + __u32 size; + __u32 start; /* physical start address */ }; /* VGA text type displays */ #define ATAG_VIDEOTEXT 0x54410003 struct tag_videotext { - u8 x; - u8 y; - u16 video_page; - u8 video_mode; - u8 video_cols; - u16 video_ega_bx; - u8 video_lines; - u8 video_isvga; - u16 video_points; + __u8 x; + __u8 y; + __u16 video_page; + __u8 video_mode; + __u8 video_cols; + __u16 video_ega_bx; + __u8 video_lines; + __u8 video_isvga; + __u16 video_points; }; /* describes how the ramdisk will be used in kernel */ #define ATAG_RAMDISK 0x54410004 struct tag_ramdisk { - u32 flags; /* bit 0 = load, bit 1 = prompt */ - u32 size; /* decompressed ramdisk size in _kilo_ bytes */ - u32 start; /* starting block of floppy-based RAM disk image */ + __u32 flags; /* bit 0 = load, bit 1 = prompt */ + __u32 size; /* decompressed ramdisk size in _kilo_ bytes */ + __u32 start; /* starting block of floppy-based RAM disk image */ }; /* describes where the compressed ramdisk image lives (virtual address) */ @@ -76,23 +78,23 @@ struct tag_ramdisk { #define ATAG_INITRD2 0x54420005 struct tag_initrd { - u32 start; /* physical start address */ - u32 size; /* size of compressed ramdisk image in bytes */ + __u32 start; /* physical start address */ + __u32 size; /* size of compressed ramdisk image in bytes */ }; /* board serial number. "64 bits should be enough for everybody" */ #define ATAG_SERIAL 0x54410006 struct tag_serialnr { - u32 low; - u32 high; + __u32 low; + __u32 high; }; /* board revision */ #define ATAG_REVISION 0x54410007 struct tag_revision { - u32 rev; + __u32 rev; }; /* initial values for vesafb-type framebuffers. see struct screen_info @@ -101,20 +103,20 @@ struct tag_revision { #define ATAG_VIDEOLFB 0x54410008 struct tag_videolfb { - u16 lfb_width; - u16 lfb_height; - u16 lfb_depth; - u16 lfb_linelength; - u32 lfb_base; - u32 lfb_size; - u8 red_size; - u8 red_pos; - u8 green_size; - u8 green_pos; - u8 blue_size; - u8 blue_pos; - u8 rsvd_size; - u8 rsvd_pos; + __u16 lfb_width; + __u16 lfb_height; + __u16 lfb_depth; + __u16 lfb_linelength; + __u32 lfb_base; + __u32 lfb_size; + __u8 red_size; + __u8 red_pos; + __u8 green_size; + __u8 green_pos; + __u8 blue_size; + __u8 blue_pos; + __u8 rsvd_size; + __u8 rsvd_pos; }; /* command line: \0 terminated string */ @@ -128,17 +130,17 @@ struct tag_cmdline { #define ATAG_ACORN 0x41000101 struct tag_acorn { - u32 memc_control_reg; - u32 vram_pages; - u8 sounddefault; - u8 adfsdrives; + __u32 memc_control_reg; + __u32 vram_pages; + __u8 sounddefault; + __u8 adfsdrives; }; /* footbridge memory clock, see arch/arm/mach-footbridge/arch.c */ #define ATAG_MEMCLK 0x41000402 struct tag_memclk { - u32 fmemclk; + __u32 fmemclk; }; struct tag { @@ -167,24 +169,26 @@ struct tag { }; struct tagtable { - u32 tag; + __u32 tag; int (*parse)(const struct tag *); }; -#define __tag __attribute_used__ __attribute__((__section__(".taglist.init"))) -#define __tagtable(tag, fn) \ -static struct tagtable __tagtable_##fn __tag = { tag, fn } - #define tag_member_present(tag,member) \ ((unsigned long)(&((struct tag *)0L)->member + 1) \ <= (tag)->hdr.size * 4) -#define tag_next(t) ((struct tag *)((u32 *)(t) + (t)->hdr.size)) +#define tag_next(t) ((struct tag *)((__u32 *)(t) + (t)->hdr.size)) #define tag_size(type) ((sizeof(struct tag_header) + sizeof(struct type)) >> 2) #define for_each_tag(t,base) \ for (t = base; t->hdr.size; t = tag_next(t)) +#ifdef __KERNEL__ + +#define __tag __attribute_used__ __attribute__((__section__(".taglist.init"))) +#define __tagtable(tag, fn) \ +static struct tagtable __tagtable_##fn __tag = { tag, fn } + /* * Memory map description */ @@ -217,4 +221,6 @@ struct early_params { static struct early_params __early_##fn __attribute_used__ \ __attribute__((__section__(".early_param.init"))) = { name, fn } +#endif /* __KERNEL__ */ + #endif diff --git a/include/asm-arm26/setup.h b/include/asm-arm26/setup.h index 6348931be65..1a867b4e8d5 100644 --- a/include/asm-arm26/setup.h +++ b/include/asm-arm26/setup.h @@ -16,6 +16,8 @@ #define COMMAND_LINE_SIZE 1024 +#ifdef __KERNEL__ + /* The list ends with an ATAG_NONE node. */ #define ATAG_NONE 0x00000000 @@ -202,4 +204,6 @@ struct meminfo { extern struct meminfo meminfo; +#endif /* __KERNEL__ */ + #endif diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h index 10193da4113..0a5224245e4 100644 --- a/include/asm-avr32/setup.h +++ b/include/asm-avr32/setup.h @@ -13,6 +13,8 @@ #define COMMAND_LINE_SIZE 256 +#ifdef __KERNEL__ + /* Magic number indicating that a tag table is present */ #define ATAG_MAGIC 0xa2a25441 @@ -138,4 +140,6 @@ void chip_enable_sdram(void); #endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ + #endif /* __ASM_AVR32_SETUP_H__ */ diff --git a/include/asm-frv/param.h b/include/asm-frv/param.h index 168381ebb41..365653b1726 100644 --- a/include/asm-frv/param.h +++ b/include/asm-frv/param.h @@ -18,6 +18,5 @@ #endif #define MAXHOSTNAMELEN 64 /* max length of hostname */ -#define COMMAND_LINE_SIZE 512 #endif /* _ASM_PARAM_H */ diff --git a/include/asm-frv/setup.h b/include/asm-frv/setup.h index 0d293b9a585..afd787ceede 100644 --- a/include/asm-frv/setup.h +++ b/include/asm-frv/setup.h @@ -12,6 +12,10 @@ #ifndef _ASM_SETUP_H #define _ASM_SETUP_H +#define COMMAND_LINE_SIZE 512 + +#ifdef __KERNEL__ + #include #ifndef __ASSEMBLY__ @@ -22,4 +26,6 @@ extern unsigned long __initdata num_mappedpages; #endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ + #endif /* _ASM_SETUP_H */ diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index a84c3d88a18..a37e95fe58d 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -14,6 +14,7 @@ unifdef-y += posix_types.h unifdef-y += ptrace.h unifdef-y += resource.h unifdef-y += sembuf.h +unifdef-y += setup.h unifdef-y += shmbuf.h unifdef-y += sigcontext.h unifdef-y += siginfo.h diff --git a/include/asm-i386/Kbuild b/include/asm-i386/Kbuild index 147e4ac1ebf..5ae93afc67e 100644 --- a/include/asm-i386/Kbuild +++ b/include/asm-i386/Kbuild @@ -7,5 +7,4 @@ header-y += ptrace-abi.h header-y += ucontext.h unifdef-y += mtrr.h -unifdef-y += setup.h unifdef-y += vm86.h diff --git a/include/asm-i386/param.h b/include/asm-i386/param.h index 745dc5bd0fb..21b32466fcd 100644 --- a/include/asm-i386/param.h +++ b/include/asm-i386/param.h @@ -18,6 +18,5 @@ #endif #define MAXHOSTNAMELEN 64 /* max length of hostname */ -#define COMMAND_LINE_SIZE 256 #endif diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h index 2734909eff8..c5b504bfbaa 100644 --- a/include/asm-i386/setup.h +++ b/include/asm-i386/setup.h @@ -6,6 +6,8 @@ #ifndef _i386_SETUP_H #define _i386_SETUP_H +#define COMMAND_LINE_SIZE 256 + #ifdef __KERNEL__ #include @@ -14,10 +16,8 @@ */ #define MAXMEM_PFN PFN_DOWN(MAXMEM) #define MAX_NONPAE_PFN (1 << 20) -#endif #define PARAM_SIZE 4096 -#define COMMAND_LINE_SIZE 256 #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -78,4 +78,6 @@ void __init add_memory_region(unsigned long long start, #endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + #endif /* _i386_SETUP_H */ diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild index 15818a18bc5..4a1e48b9f40 100644 --- a/include/asm-ia64/Kbuild +++ b/include/asm-ia64/Kbuild @@ -10,7 +10,6 @@ header-y += intrinsics.h header-y += perfmon_default_smpl.h header-y += ptrace_offsets.h header-y += rse.h -header-y += setup.h header-y += ucontext.h unifdef-y += perfmon.h diff --git a/include/asm-m32r/setup.h b/include/asm-m32r/setup.h index 52f4fa29abf..6a0b32202d4 100644 --- a/include/asm-m32r/setup.h +++ b/include/asm-m32r/setup.h @@ -1,6 +1,11 @@ /* * This is set up by the setup-routine at boot-time */ + +#define COMMAND_LINE_SIZE 512 + +#ifdef __KERNEL__ + #define PARAM ((unsigned char *)empty_zero_page) #define MOUNT_ROOT_RDONLY (*(unsigned long *) (PARAM+0x000)) @@ -18,8 +23,6 @@ #define SCREEN_INFO (*(struct screen_info *) (PARAM+0x200)) -#define COMMAND_LINE_SIZE (512) - #define RAMDISK_IMAGE_START_MASK (0x07FF) #define RAMDISK_PROMPT_FLAG (0x8000) #define RAMDISK_LOAD_FLAG (0x4000) @@ -27,3 +30,5 @@ extern unsigned long memory_start; extern unsigned long memory_end; +#endif /* __KERNEL__ */ + diff --git a/include/asm-m68k/setup.h b/include/asm-m68k/setup.h index 7facc9a46e7..2a8853cd655 100644 --- a/include/asm-m68k/setup.h +++ b/include/asm-m68k/setup.h @@ -41,8 +41,12 @@ #define MACH_Q40 10 #define MACH_SUN3X 11 +#define COMMAND_LINE_SIZE 256 + #ifdef __KERNEL__ +#define CL_SIZE COMMAND_LINE_SIZE + #ifndef __ASSEMBLY__ extern unsigned long m68k_machtype; #endif /* !__ASSEMBLY__ */ @@ -355,8 +359,6 @@ extern int m68k_is040or060; */ #define NUM_MEMINFO 4 -#define CL_SIZE 256 -#define COMMAND_LINE_SIZE CL_SIZE #ifndef __ASSEMBLY__ struct mem_info { diff --git a/include/asm-m68knommu/setup.h b/include/asm-m68knommu/setup.h index d2b0fcce41b..fb86bb2a607 100644 --- a/include/asm-m68knommu/setup.h +++ b/include/asm-m68knommu/setup.h @@ -1,5 +1,10 @@ +#ifdef __KERNEL__ + #include /* We have a bigger command line buffer. */ #undef COMMAND_LINE_SIZE + +#endif /* __KERNEL__ */ + #define COMMAND_LINE_SIZE 512 diff --git a/include/asm-mips/setup.h b/include/asm-mips/setup.h index 737fa4a6912..70009a90263 100644 --- a/include/asm-mips/setup.h +++ b/include/asm-mips/setup.h @@ -1,8 +1,6 @@ -#ifdef __KERNEL__ #ifndef _MIPS_SETUP_H #define _MIPS_SETUP_H #define COMMAND_LINE_SIZE 256 #endif /* __SETUP_H */ -#endif /* __KERNEL__ */ diff --git a/include/asm-powerpc/setup.h b/include/asm-powerpc/setup.h index 3d9740aae01..817fac0a071 100644 --- a/include/asm-powerpc/setup.h +++ b/include/asm-powerpc/setup.h @@ -1,9 +1,6 @@ #ifndef _ASM_POWERPC_SETUP_H #define _ASM_POWERPC_SETUP_H -#ifdef __KERNEL__ - #define COMMAND_LINE_SIZE 512 -#endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/include/asm-s390/setup.h b/include/asm-s390/setup.h index 7664bacdd83..9574fe80a04 100644 --- a/include/asm-s390/setup.h +++ b/include/asm-s390/setup.h @@ -8,12 +8,13 @@ #ifndef _ASM_S390_SETUP_H #define _ASM_S390_SETUP_H +#define COMMAND_LINE_SIZE 896 + #ifdef __KERNEL__ #include #define PARMAREA 0x10400 -#define COMMAND_LINE_SIZE 896 #define MEMORY_CHUNKS 16 /* max 0x7fff */ #define IPL_PARMBLOCK_ORIGIN 0x2000 diff --git a/include/asm-sh/setup.h b/include/asm-sh/setup.h index 34ca8a7f06b..1583c6b7bda 100644 --- a/include/asm-sh/setup.h +++ b/include/asm-sh/setup.h @@ -1,10 +1,12 @@ -#ifdef __KERNEL__ #ifndef _SH_SETUP_H #define _SH_SETUP_H #define COMMAND_LINE_SIZE 256 +#ifdef __KERNEL__ + int setup_early_printk(char *); -#endif /* _SH_SETUP_H */ #endif /* __KERNEL__ */ + +#endif /* _SH_SETUP_H */ diff --git a/include/asm-sh64/setup.h b/include/asm-sh64/setup.h index ebd42eb1b70..5b07b14c292 100644 --- a/include/asm-sh64/setup.h +++ b/include/asm-sh64/setup.h @@ -1,6 +1,10 @@ #ifndef __ASM_SH64_SETUP_H #define __ASM_SH64_SETUP_H +#define COMMAND_LINE_SIZE 256 + +#ifdef __KERNEL__ + #define PARAM ((unsigned char *)empty_zero_page) #define MOUNT_ROOT_RDONLY (*(unsigned long *) (PARAM+0x000)) #define RAMDISK_FLAGS (*(unsigned long *) (PARAM+0x004)) @@ -12,5 +16,7 @@ #define COMMAND_LINE ((char *) (PARAM+256)) #define COMMAND_LINE_SIZE 256 +#endif /* __KERNEL__ */ + #endif /* __ASM_SH64_SETUP_H */ diff --git a/include/asm-x86_64/Kbuild b/include/asm-x86_64/Kbuild index 1ee9b07f3fe..763521358fb 100644 --- a/include/asm-x86_64/Kbuild +++ b/include/asm-x86_64/Kbuild @@ -12,7 +12,6 @@ header-y += ldt.h header-y += msr.h header-y += prctl.h header-y += ptrace-abi.h -header-y += setup.h header-y += sigcontext32.h header-y += ucontext.h header-y += vsyscall32.h -- cgit v1.2.3-70-g09d2 From 85916f8166b59eeac63d2b4f7f1df8de849334b4 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 6 Dec 2006 20:40:41 -0800 Subject: [PATCH] Kexec / Kdump: Unify elf note code The elf note saving code is currently duplicated over several architectures. This cleanup patch simply adds code to a common file and then replaces the arch-specific code with calls to the newly added code. The only drawback with this approach is that s390 doesn't fully support kexec-on-panic which for that arch leads to introduction of unused code. Signed-off-by: Magnus Damm Cc: Vivek Goyal Cc: Andi Kleen Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 66 ++----------------------------------------- arch/powerpc/kernel/crash.c | 59 ++------------------------------------ arch/x86_64/kernel/crash.c | 69 ++------------------------------------------- include/linux/kexec.h | 1 + kernel/kexec.c | 56 ++++++++++++++++++++++++++++++++++++ 5 files changed, 63 insertions(+), 188 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 144b4328896..a5e0e990ea9 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -31,68 +31,6 @@ /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; -static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, - size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) +3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} - -static void final_note(u32 *buf) -{ - struct elf_note note; - - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} - -static void crash_save_this_cpu(struct pt_regs *regs, int cpu) -{ - struct elf_prstatus prstatus; - u32 *buf; - - if ((cpu < 0) || (cpu >= NR_CPUS)) - return; - - /* Using ELF notes here is opportunistic. - * I need a well defined structure format - * for the data I pass, and I need tags - * on the data to indicate what information I have - * squirrelled away. ELF notes happen to provide - * all of that, so there is no need to invent something new. - */ - buf = (u32*)per_cpu_ptr(crash_notes, cpu); - if (!buf) - return; - memset(&prstatus, 0, sizeof(prstatus)); - prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, - sizeof(prstatus)); - final_note(buf); -} - -static void crash_save_self(struct pt_regs *regs) -{ - int cpu; - - cpu = safe_smp_processor_id(); - crash_save_this_cpu(regs, cpu); -} - #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static atomic_t waiting_for_crash_ipi; @@ -121,7 +59,7 @@ static int crash_nmi_callback(struct notifier_block *self, crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } - crash_save_this_cpu(regs, cpu); + crash_save_cpu(regs, cpu); disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -195,5 +133,5 @@ void machine_crash_shutdown(struct pt_regs *regs) #if defined(CONFIG_X86_IO_APIC) disable_IO_APIC(); #endif - crash_save_self(regs); + crash_save_cpu(regs, safe_smp_processor_id()); } diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 89b03c8da9d..d3f2080d2ee 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -46,61 +46,6 @@ int crashing_cpu = -1; static cpumask_t cpus_in_crash = CPU_MASK_NONE; cpumask_t cpus_in_sr = CPU_MASK_NONE; -static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, - size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) +3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} - -static void final_note(u32 *buf) -{ - struct elf_note note; - - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} - -static void crash_save_this_cpu(struct pt_regs *regs, int cpu) -{ - struct elf_prstatus prstatus; - u32 *buf; - - if ((cpu < 0) || (cpu >= NR_CPUS)) - return; - - /* Using ELF notes here is opportunistic. - * I need a well defined structure format - * for the data I pass, and I need tags - * on the data to indicate what information I have - * squirrelled away. ELF notes happen to provide - * all of that that no need to invent something new. - */ - buf = (u32*)per_cpu_ptr(crash_notes, cpu); - if (!buf) - return; - - memset(&prstatus, 0, sizeof(prstatus)); - prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, - sizeof(prstatus)); - final_note(buf); -} - #ifdef CONFIG_SMP static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); @@ -113,7 +58,7 @@ void crash_ipi_callback(struct pt_regs *regs) hard_irq_disable(); if (!cpu_isset(cpu, cpus_in_crash)) - crash_save_this_cpu(regs, cpu); + crash_save_cpu(regs, cpu); cpu_set(cpu, cpus_in_crash); /* @@ -306,7 +251,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) * such that another IPI will not be sent. */ crashing_cpu = smp_processor_id(); - crash_save_this_cpu(regs, crashing_cpu); + crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); cpu_set(crashing_cpu, cpus_in_crash); if (ppc_md.kexec_cpu_down) diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index 3525f884af8..95a7a2c1313 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c @@ -28,71 +28,6 @@ /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu; -static u32 *append_elf_note(u32 *buf, char *name, unsigned type, - void *data, size_t data_len) -{ - struct elf_note note; - - note.n_namesz = strlen(name) + 1; - note.n_descsz = data_len; - note.n_type = type; - memcpy(buf, ¬e, sizeof(note)); - buf += (sizeof(note) +3)/4; - memcpy(buf, name, note.n_namesz); - buf += (note.n_namesz + 3)/4; - memcpy(buf, data, note.n_descsz); - buf += (note.n_descsz + 3)/4; - - return buf; -} - -static void final_note(u32 *buf) -{ - struct elf_note note; - - note.n_namesz = 0; - note.n_descsz = 0; - note.n_type = 0; - memcpy(buf, ¬e, sizeof(note)); -} - -static void crash_save_this_cpu(struct pt_regs *regs, int cpu) -{ - struct elf_prstatus prstatus; - u32 *buf; - - if ((cpu < 0) || (cpu >= NR_CPUS)) - return; - - /* Using ELF notes here is opportunistic. - * I need a well defined structure format - * for the data I pass, and I need tags - * on the data to indicate what information I have - * squirrelled away. ELF notes happen to provide - * all of that, no need to invent something new. - */ - - buf = (u32*)per_cpu_ptr(crash_notes, cpu); - - if (!buf) - return; - - memset(&prstatus, 0, sizeof(prstatus)); - prstatus.pr_pid = current->pid; - elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, - sizeof(prstatus)); - final_note(buf); -} - -static void crash_save_self(struct pt_regs *regs) -{ - int cpu; - - cpu = smp_processor_id(); - crash_save_this_cpu(regs, cpu); -} - #ifdef CONFIG_SMP static atomic_t waiting_for_crash_ipi; @@ -117,7 +52,7 @@ static int crash_nmi_callback(struct notifier_block *self, return NOTIFY_STOP; local_irq_disable(); - crash_save_this_cpu(regs, cpu); + crash_save_cpu(regs, cpu); disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ @@ -196,5 +131,5 @@ void machine_crash_shutdown(struct pt_regs *regs) disable_IO_APIC(); - crash_save_self(regs); + crash_save_cpu(regs, smp_processor_id()); } diff --git a/include/linux/kexec.h b/include/linux/kexec.h index a4ede62b339..e3abcec6c51 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -105,6 +105,7 @@ extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); +void crash_save_cpu(struct pt_regs *regs, int cpu); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; diff --git a/kernel/kexec.c b/kernel/kexec.c index d43692cf232..afbbbe981be 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -1066,6 +1068,60 @@ void crash_kexec(struct pt_regs *regs) } } +static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, + size_t data_len) +{ + struct elf_note note; + + note.n_namesz = strlen(name) + 1; + note.n_descsz = data_len; + note.n_type = type; + memcpy(buf, ¬e, sizeof(note)); + buf += (sizeof(note) + 3)/4; + memcpy(buf, name, note.n_namesz); + buf += (note.n_namesz + 3)/4; + memcpy(buf, data, note.n_descsz); + buf += (note.n_descsz + 3)/4; + + return buf; +} + +static void final_note(u32 *buf) +{ + struct elf_note note; + + note.n_namesz = 0; + note.n_descsz = 0; + note.n_type = 0; + memcpy(buf, ¬e, sizeof(note)); +} + +void crash_save_cpu(struct pt_regs *regs, int cpu) +{ + struct elf_prstatus prstatus; + u32 *buf; + + if ((cpu < 0) || (cpu >= NR_CPUS)) + return; + + /* Using ELF notes here is opportunistic. + * I need a well defined structure format + * for the data I pass, and I need tags + * on the data to indicate what information I have + * squirrelled away. ELF notes happen to provide + * all of that, so there is no need to invent something new. + */ + buf = (u32*)per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; + memset(&prstatus, 0, sizeof(prstatus)); + prstatus.pr_pid = current->pid; + elf_core_copy_regs(&prstatus.pr_reg, regs); + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + final_note(buf); +} + static int __init crash_notes_memory_init(void) { /* Allocate memory for saving cpu registers. */ -- cgit v1.2.3-70-g09d2 From 97d2a80584b30b5cd32da411deca1986ef61877a Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Wed, 6 Dec 2006 20:40:45 -0800 Subject: [PATCH] aio: remove ki_retried debugging member Remove the ki_retried member from struct kiocb. I think the idea was bounced around a while back, but Arnaldo pointed out another reason that we should dig it up when he pointed out that the last cacheline of struct kiocb only contains 4 bytes. By removing the debugging member, we save more than the 8 byte on 64 bit machines. Signed-off-by: Benjamin LaHaise Acked-by: Ken Chen Acked-by: Zach Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 15 --------------- include/linux/aio.h | 2 -- 2 files changed, 17 deletions(-) (limited to 'include') diff --git a/fs/aio.c b/fs/aio.c index f02ad2677fc..d3a6ec2c962 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -666,17 +666,6 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) ssize_t (*retry)(struct kiocb *); ssize_t ret; - if (iocb->ki_retried++ > 1024*1024) { - printk("Maximal retry count. Bytes done %Zd\n", - iocb->ki_nbytes - iocb->ki_left); - return -EAGAIN; - } - - if (!(iocb->ki_retried & 0xff)) { - pr_debug("%ld retry: %zd of %zd\n", iocb->ki_retried, - iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); - } - if (!(retry = iocb->ki_retry)) { printk("aio_run_iocb: iocb->ki_retry = NULL\n"); return 0; @@ -1005,9 +994,6 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2) kunmap_atomic(ring, KM_IRQ1); pr_debug("added to ring %p at [%lu]\n", iocb, tail); - - pr_debug("%ld retries: %zd of %zd\n", iocb->ki_retried, - iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes); put_rq: /* everything turned out well, dispose of the aiocb. */ ret = __aio_put_req(ctx, iocb); @@ -1590,7 +1576,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_opcode = iocb->aio_lio_opcode; init_waitqueue_func_entry(&req->ki_wait, aio_wake_function); INIT_LIST_HEAD(&req->ki_wait.task_list); - req->ki_retried = 0; ret = aio_setup_iocb(req); diff --git a/include/linux/aio.h b/include/linux/aio.h index 9e350fd44d7..3372ec6bf53 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -111,7 +111,6 @@ struct kiocb { size_t ki_nbytes; /* copy of iocb->aio_nbytes */ char __user *ki_buf; /* remaining iocb->aio_buf */ size_t ki_left; /* remaining bytes */ - long ki_retried; /* just for testing */ struct iovec ki_inline_vec; /* inline vector */ struct iovec *ki_iovec; unsigned long ki_nr_segs; @@ -238,7 +237,6 @@ do { \ } while (0) #define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait) -#define is_retried_kiocb(iocb) ((iocb)->ki_retried > 1) #include -- cgit v1.2.3-70-g09d2 From 6b39bb6548d60b9a18826134b5ccd5c3cef85fe2 Mon Sep 17 00:00:00 2001 From: Paul Clements Date: Wed, 6 Dec 2006 20:40:53 -0800 Subject: [PATCH] nbd: show nbd client pid in sysfs Allow nbd to expose the nbd-client daemon's PID in /sys/block/nbd/pid. This is helpful for tracking connection status of a device and for determining which nbd devices are currently in use. Signed-off-by: Paul Clements Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/nbd.c | 16 ++++++++++++++++ include/linux/nbd.h | 1 + 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9d1035e8d9d..7bf2cfbd628 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -355,14 +355,30 @@ harderror: return NULL; } +static ssize_t pid_show(struct gendisk *disk, char *page) +{ + return sprintf(page, "%ld\n", + (long) ((struct nbd_device *)disk->private_data)->pid); +} + +static struct disk_attribute pid_attr = { + .attr = { .name = "pid", .mode = S_IRUGO }, + .show = pid_show, +}; + static void nbd_do_it(struct nbd_device *lo) { struct request *req; BUG_ON(lo->magic != LO_MAGIC); + lo->pid = current->pid; + sysfs_create_file(&lo->disk->kobj, &pid_attr.attr); + while ((req = nbd_read_stat(lo)) != NULL) nbd_end_request(req); + + sysfs_remove_file(&lo->disk->kobj, &pid_attr.attr); return; } diff --git a/include/linux/nbd.h b/include/linux/nbd.h index d6b6dc09ad9..0f3e6930254 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -64,6 +64,7 @@ struct nbd_device { struct gendisk *disk; int blksize; u64 bytesize; + pid_t pid; /* pid of nbd-client, if attached */ }; #endif -- cgit v1.2.3-70-g09d2 From 759643b874907e76ae81e34df62f41ab6683f5c2 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 6 Dec 2006 20:40:59 -0800 Subject: [PATCH] IPMI: pass sysfs name from lower level driver Pass in the sysfs name from the lower-level IPMI driver, as the coming IPMI serial driver will need that to link properly from the serial device sysfs directory. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_msghandler.c | 34 ++++++++++++++++++++++++++++------ drivers/char/ipmi/ipmi_si_intf.c | 1 + include/linux/ipmi_smi.h | 1 + 3 files changed, 30 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 761ed269920..6a77b264eb2 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -205,6 +205,7 @@ struct ipmi_smi struct bmc_device *bmc; char *my_dev_name; + char *sysfs_name; /* This is the lower-layer's sender routine. */ struct ipmi_smi_handlers *handlers; @@ -2004,7 +2005,11 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf) { struct bmc_device *bmc = intf->bmc; - sysfs_remove_link(&intf->si_dev->kobj, "bmc"); + if (intf->sysfs_name) { + sysfs_remove_link(&intf->si_dev->kobj, intf->sysfs_name); + kfree(intf->sysfs_name); + intf->sysfs_name = NULL; + } if (intf->my_dev_name) { sysfs_remove_link(&bmc->dev->dev.kobj, intf->my_dev_name); kfree(intf->my_dev_name); @@ -2140,7 +2145,8 @@ out: return err; } -static int ipmi_bmc_register(ipmi_smi_t intf) +static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum, + const char *sysfs_name) { int rv; struct bmc_device *bmc = intf->bmc; @@ -2257,29 +2263,44 @@ static int ipmi_bmc_register(ipmi_smi_t intf) * create symlink from system interface device to bmc device * and back. */ + intf->sysfs_name = kstrdup(sysfs_name, GFP_KERNEL); + if (!intf->sysfs_name) { + rv = -ENOMEM; + printk(KERN_ERR + "ipmi_msghandler: allocate link to BMC: %d\n", + rv); + goto out_err; + } + rv = sysfs_create_link(&intf->si_dev->kobj, - &bmc->dev->dev.kobj, "bmc"); + &bmc->dev->dev.kobj, intf->sysfs_name); if (rv) { + kfree(intf->sysfs_name); + intf->sysfs_name = NULL; printk(KERN_ERR "ipmi_msghandler: Unable to create bmc symlink: %d\n", rv); goto out_err; } - size = snprintf(dummy, 0, "ipmi%d", intf->intf_num); + size = snprintf(dummy, 0, "ipmi%d", ifnum); intf->my_dev_name = kmalloc(size+1, GFP_KERNEL); if (!intf->my_dev_name) { + kfree(intf->sysfs_name); + intf->sysfs_name = NULL; rv = -ENOMEM; printk(KERN_ERR "ipmi_msghandler: allocate link from BMC: %d\n", rv); goto out_err; } - snprintf(intf->my_dev_name, size+1, "ipmi%d", intf->intf_num); + snprintf(intf->my_dev_name, size+1, "ipmi%d", ifnum); rv = sysfs_create_link(&bmc->dev->dev.kobj, &intf->si_dev->kobj, intf->my_dev_name); if (rv) { + kfree(intf->sysfs_name); + intf->sysfs_name = NULL; kfree(intf->my_dev_name); intf->my_dev_name = NULL; printk(KERN_ERR @@ -2464,6 +2485,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, void *send_info, struct ipmi_device_id *device_id, struct device *si_dev, + const char *sysfs_name, unsigned char slave_addr) { int i, j; @@ -2579,7 +2601,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, if (rv == 0) rv = add_proc_entries(intf, i); - rv = ipmi_bmc_register(intf); + rv = ipmi_bmc_register(intf, i, sysfs_name); out: if (rv) { diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index bb1fac104fd..89cdb928061 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2362,6 +2362,7 @@ static int try_smi_init(struct smi_info *new_smi) new_smi, &new_smi->device_id, new_smi->dev, + "bmc", new_smi->slave_addr); if (rv) { printk(KERN_ERR diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 6d9c7e4da47..2cc960da417 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -173,6 +173,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, void *send_info, struct ipmi_device_id *device_id, struct device *dev, + const char *sysfs_name, unsigned char slave_addr); /* -- cgit v1.2.3-70-g09d2 From b9675136e2ad95156fb93be6155f17590bb26fd7 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 6 Dec 2006 20:41:02 -0800 Subject: [PATCH] IPMI: Add maintenance mode Some commands and operations on a BMC can cause the BMC to "go away" for a while. This can cause the automatic flag processing and other things of that nature to timeout and generate annoying logs, or possibly cause other bad things to happen when in firmware update mode. Add detection of those commands (cold reset, warm reset, and any firmware command) and turns off automatic processing for 30 seconds. It also add a manual override either way. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_devintf.c | 25 ++++++++ drivers/char/ipmi/ipmi_msghandler.c | 117 +++++++++++++++++++++++++++++++++++- drivers/char/ipmi/ipmi_si_intf.c | 9 +++ include/linux/ipmi.h | 45 ++++++++++++++ include/linux/ipmi_msgdefs.h | 5 ++ include/linux/ipmi_smi.h | 7 +++ 6 files changed, 207 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c index 81fcf0ce21d..375d3378eec 100644 --- a/drivers/char/ipmi/ipmi_devintf.c +++ b/drivers/char/ipmi/ipmi_devintf.c @@ -596,6 +596,31 @@ static int ipmi_ioctl(struct inode *inode, rv = 0; break; } + + case IPMICTL_GET_MAINTENANCE_MODE_CMD: + { + int mode; + + mode = ipmi_get_maintenance_mode(priv->user); + if (copy_to_user(arg, &mode, sizeof(mode))) { + rv = -EFAULT; + break; + } + rv = 0; + break; + } + + case IPMICTL_SET_MAINTENANCE_MODE_CMD: + { + int mode; + + if (copy_from_user(&mode, arg, sizeof(mode))) { + rv = -EFAULT; + break; + } + rv = ipmi_set_maintenance_mode(priv->user, mode); + break; + } } return rv; diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 03f32611831..ff0e68f0386 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -48,7 +48,7 @@ #define PFX "IPMI message handler: " -#define IPMI_DRIVER_VERSION "39.0" +#define IPMI_DRIVER_VERSION "39.1" static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void); static int ipmi_init_msghandler(void); @@ -59,6 +59,9 @@ static int initialized = 0; static struct proc_dir_entry *proc_ipmi_root = NULL; #endif /* CONFIG_PROC_FS */ +/* Remain in auto-maintenance mode for this amount of time (in ms). */ +#define IPMI_MAINTENANCE_MODE_TIMEOUT 30000 + #define MAX_EVENTS_IN_QUEUE 25 /* Don't let a message sit in a queue forever, always time it with at lest @@ -262,6 +265,12 @@ struct ipmi_smi unsigned char local_sel_device; unsigned char local_event_generator; + /* For handling of maintenance mode. */ + int maintenance_mode; + int maintenance_mode_enable; + int auto_maintenance_timeout; + spinlock_t maintenance_mode_lock; /* Used in a timer... */ + /* A cheap hack, if this is non-null and a message to an interface comes in with a NULL user, call this routine with it. Note that the message will still be freed by the @@ -985,6 +994,65 @@ int ipmi_get_my_LUN(ipmi_user_t user, return 0; } +int ipmi_get_maintenance_mode(ipmi_user_t user) +{ + int mode; + unsigned long flags; + + spin_lock_irqsave(&user->intf->maintenance_mode_lock, flags); + mode = user->intf->maintenance_mode; + spin_unlock_irqrestore(&user->intf->maintenance_mode_lock, flags); + + return mode; +} +EXPORT_SYMBOL(ipmi_get_maintenance_mode); + +static void maintenance_mode_update(ipmi_smi_t intf) +{ + if (intf->handlers->set_maintenance_mode) + intf->handlers->set_maintenance_mode( + intf->send_info, intf->maintenance_mode_enable); +} + +int ipmi_set_maintenance_mode(ipmi_user_t user, int mode) +{ + int rv = 0; + unsigned long flags; + ipmi_smi_t intf = user->intf; + + spin_lock_irqsave(&intf->maintenance_mode_lock, flags); + if (intf->maintenance_mode != mode) { + switch (mode) { + case IPMI_MAINTENANCE_MODE_AUTO: + intf->maintenance_mode = mode; + intf->maintenance_mode_enable + = (intf->auto_maintenance_timeout > 0); + break; + + case IPMI_MAINTENANCE_MODE_OFF: + intf->maintenance_mode = mode; + intf->maintenance_mode_enable = 0; + break; + + case IPMI_MAINTENANCE_MODE_ON: + intf->maintenance_mode = mode; + intf->maintenance_mode_enable = 1; + break; + + default: + rv = -EINVAL; + goto out_unlock; + } + + maintenance_mode_update(intf); + } + out_unlock: + spin_unlock_irqrestore(&intf->maintenance_mode_lock, flags); + + return rv; +} +EXPORT_SYMBOL(ipmi_set_maintenance_mode); + int ipmi_set_gets_events(ipmi_user_t user, int val) { unsigned long flags; @@ -1322,6 +1390,24 @@ static int i_ipmi_request(ipmi_user_t user, goto out_err; } + if (((msg->netfn == IPMI_NETFN_APP_REQUEST) + && ((msg->cmd == IPMI_COLD_RESET_CMD) + || (msg->cmd == IPMI_WARM_RESET_CMD))) + || (msg->netfn == IPMI_NETFN_FIRMWARE_REQUEST)) + { + spin_lock_irqsave(&intf->maintenance_mode_lock, flags); + intf->auto_maintenance_timeout + = IPMI_MAINTENANCE_MODE_TIMEOUT; + if (!intf->maintenance_mode + && !intf->maintenance_mode_enable) + { + intf->maintenance_mode_enable = 1; + maintenance_mode_update(intf); + } + spin_unlock_irqrestore(&intf->maintenance_mode_lock, + flags); + } + if ((msg->data_len + 2) > IPMI_MAX_MSG_LENGTH) { spin_lock_irqsave(&intf->counter_lock, flags); intf->sent_invalid_commands++; @@ -2605,6 +2691,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, INIT_LIST_HEAD(&intf->waiting_events); intf->waiting_events_count = 0; mutex_init(&intf->cmd_rcvrs_mutex); + spin_lock_init(&intf->maintenance_mode_lock); INIT_LIST_HEAD(&intf->cmd_rcvrs); init_waitqueue_head(&intf->waitq); @@ -3593,6 +3680,30 @@ static void ipmi_timeout_handler(long timeout_period) list_for_each_entry_safe(msg, msg2, &timeouts, link) deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE); + + /* + * Maintenance mode handling. Check the timeout + * optimistically before we claim the lock. It may + * mean a timeout gets missed occasionally, but that + * only means the timeout gets extended by one period + * in that case. No big deal, and it avoids the lock + * most of the time. + */ + if (intf->auto_maintenance_timeout > 0) { + spin_lock_irqsave(&intf->maintenance_mode_lock, flags); + if (intf->auto_maintenance_timeout > 0) { + intf->auto_maintenance_timeout + -= timeout_period; + if (!intf->maintenance_mode + && (intf->auto_maintenance_timeout <= 0)) + { + intf->maintenance_mode_enable = 0; + maintenance_mode_update(intf); + } + } + spin_unlock_irqrestore(&intf->maintenance_mode_lock, + flags); + } } rcu_read_unlock(); } @@ -3606,6 +3717,10 @@ static void ipmi_request_event(void) /* Called from the timer, no need to check if handlers is * valid. */ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { + /* No event requests when in maintenance mode. */ + if (intf->maintenance_mode_enable) + continue; + handlers = intf->handlers; if (handlers) handlers->request_events(intf->send_info); diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 89cdb928061..f04bee76ba2 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -949,12 +949,21 @@ static int smi_start_processing(void *send_info, return 0; } +static void set_maintenance_mode(void *send_info, int enable) +{ + struct smi_info *smi_info = send_info; + + if (!enable) + atomic_set(&smi_info->req_events, 0); +} + static struct ipmi_smi_handlers handlers = { .owner = THIS_MODULE, .start_processing = smi_start_processing, .sender = sender, .request_events = request_events, + .set_maintenance_mode = set_maintenance_mode, .set_run_to_completion = set_run_to_completion, .poll = poll, }; diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 796ca009fd4..7a9db390c56 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -208,6 +208,15 @@ struct kernel_ipmi_msg code as the first byte of the incoming data, unlike a response. */ +/* + * Modes for ipmi_set_maint_mode() and the userland IOCTL. The AUTO + * setting is the default and means it will be set on certain + * commands. Hard setting it on and off will override automatic + * operation. + */ +#define IPMI_MAINTENANCE_MODE_AUTO 0 +#define IPMI_MAINTENANCE_MODE_OFF 1 +#define IPMI_MAINTENANCE_MODE_ON 2 #ifdef __KERNEL__ @@ -373,6 +382,35 @@ int ipmi_unregister_for_cmd(ipmi_user_t user, unsigned char cmd, unsigned int chans); +/* + * Go into a mode where the driver will not autonomously attempt to do + * things with the interface. It will still respond to attentions and + * interrupts, and it will expect that commands will complete. It + * will not automatcially check for flags, events, or things of that + * nature. + * + * This is primarily used for firmware upgrades. The idea is that + * when you go into firmware upgrade mode, you do this operation + * and the driver will not attempt to do anything but what you tell + * it or what the BMC asks for. + * + * Note that if you send a command that resets the BMC, the driver + * will still expect a response from that command. So the BMC should + * reset itself *after* the response is sent. Resetting before the + * response is just silly. + * + * If in auto maintenance mode, the driver will automatically go into + * maintenance mode for 30 seconds if it sees a cold reset, a warm + * reset, or a firmware NetFN. This means that code that uses only + * firmware NetFN commands to do upgrades will work automatically + * without change, assuming it sends a message every 30 seconds or + * less. + * + * See the IPMI_MAINTENANCE_MODE_xxx defines for what the mode means. + */ +int ipmi_get_maintenance_mode(ipmi_user_t user); +int ipmi_set_maintenance_mode(ipmi_user_t user, int mode); + /* * Allow run-to-completion mode to be set for the interface of * a specific user. @@ -656,4 +694,11 @@ struct ipmi_timing_parms #define IPMICTL_GET_TIMING_PARMS_CMD _IOR(IPMI_IOC_MAGIC, 23, \ struct ipmi_timing_parms) +/* + * Set the maintenance mode. See ipmi_set_maintenance_mode() above + * for a description of what this does. + */ +#define IPMICTL_GET_MAINTENANCE_MODE_CMD _IOR(IPMI_IOC_MAGIC, 30, int) +#define IPMICTL_SET_MAINTENANCE_MODE_CMD _IOW(IPMI_IOC_MAGIC, 31, int) + #endif /* __LINUX_IPMI_H */ diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h index 4d04d8b58a0..8d6759cc1a7 100644 --- a/include/linux/ipmi_msgdefs.h +++ b/include/linux/ipmi_msgdefs.h @@ -46,6 +46,8 @@ #define IPMI_NETFN_APP_REQUEST 0x06 #define IPMI_NETFN_APP_RESPONSE 0x07 #define IPMI_GET_DEVICE_ID_CMD 0x01 +#define IPMI_COLD_RESET_CMD 0x02 +#define IPMI_WARM_RESET_CMD 0x03 #define IPMI_CLEAR_MSG_FLAGS_CMD 0x30 #define IPMI_GET_DEVICE_GUID_CMD 0x08 #define IPMI_GET_MSG_FLAGS_CMD 0x31 @@ -60,6 +62,9 @@ #define IPMI_NETFN_STORAGE_RESPONSE 0x0b #define IPMI_ADD_SEL_ENTRY_CMD 0x44 +#define IPMI_NETFN_FIRMWARE_REQUEST 0x08 +#define IPMI_NETFN_FIRMWARE_RESPONSE 0x09 + /* The default slave address */ #define IPMI_BMC_SLAVE_ADDR 0x20 diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 2cc960da417..c0633108d05 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -115,6 +115,13 @@ struct ipmi_smi_handlers poll for operations during things like crash dumps. */ void (*poll)(void *send_info); + /* Enable/disable firmware maintenance mode. Note that this + is *not* the modes defined, this is simply an on/off + setting. The message handler does the mode handling. Note + that this is called from interupt context, so it cannot + block. */ + void (*set_maintenance_mode)(void *send_info, int enable); + /* Tell the handler that we are using it/not using it. The message handler get the modules that this handler belongs to; this function lets the SMI claim any modules that it -- cgit v1.2.3-70-g09d2 From 4d7cbac7c870ca66d8fb27d68188efbb5de2dffa Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 6 Dec 2006 20:41:14 -0800 Subject: [PATCH] IPMI: Fix BT long busy The IPMI BT subdriver has been patched to survive "long busy" timeouts seen during firmware upgrades and resets. The patch never returns the HOSED state, synthesizes response messages with meaningful completion codes, and recovers gracefully when the hardware finishes the long busy. The subdriver now issues a "Get BT Capabilities" command and properly uses those results. More informative completion codes are returned on error from transaction starts; this logic was propogated to the KCS and SMIC subdrivers. Finally, indent and other style quirks were normalized. Signed-off-by: Rocky Craig Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_bt_sm.c | 641 ++++++++++++++++++++++++--------------- drivers/char/ipmi/ipmi_kcs_sm.c | 14 +- drivers/char/ipmi/ipmi_si_intf.c | 12 +- drivers/char/ipmi/ipmi_smic_sm.c | 14 +- include/linux/ipmi_msgdefs.h | 8 +- 5 files changed, 423 insertions(+), 266 deletions(-) (limited to 'include') diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c index 0030cd8e2e9..6c59baa887a 100644 --- a/drivers/char/ipmi/ipmi_bt_sm.c +++ b/drivers/char/ipmi/ipmi_bt_sm.c @@ -33,11 +33,13 @@ #include /* for completion codes */ #include "ipmi_si_sm.h" -static int bt_debug = 0x00; /* Production value 0, see following flags */ +#define BT_DEBUG_OFF 0 /* Used in production */ +#define BT_DEBUG_ENABLE 1 /* Generic messages */ +#define BT_DEBUG_MSG 2 /* Prints all request/response buffers */ +#define BT_DEBUG_STATES 4 /* Verbose look at state changes */ + +static int bt_debug = BT_DEBUG_OFF; -#define BT_DEBUG_ENABLE 1 -#define BT_DEBUG_MSG 2 -#define BT_DEBUG_STATES 4 module_param(bt_debug, int, 0644); MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states"); @@ -47,38 +49,54 @@ MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states"); Since the Open IPMI architecture is single-message oriented at this stage, the queue depth of BT is of no concern. */ -#define BT_NORMAL_TIMEOUT 5000000 /* seconds in microseconds */ -#define BT_RETRY_LIMIT 2 -#define BT_RESET_DELAY 6000000 /* 6 seconds after warm reset */ +#define BT_NORMAL_TIMEOUT 5 /* seconds */ +#define BT_NORMAL_RETRY_LIMIT 2 +#define BT_RESET_DELAY 6 /* seconds after warm reset */ + +/* States are written in chronological order and usually cover + multiple rows of the state table discussion in the IPMI spec. */ enum bt_states { - BT_STATE_IDLE, + BT_STATE_IDLE = 0, /* Order is critical in this list */ BT_STATE_XACTION_START, BT_STATE_WRITE_BYTES, - BT_STATE_WRITE_END, BT_STATE_WRITE_CONSUME, - BT_STATE_B2H_WAIT, - BT_STATE_READ_END, - BT_STATE_RESET1, /* These must come last */ + BT_STATE_READ_WAIT, + BT_STATE_CLEAR_B2H, + BT_STATE_READ_BYTES, + BT_STATE_RESET1, /* These must come last */ BT_STATE_RESET2, BT_STATE_RESET3, BT_STATE_RESTART, - BT_STATE_HOSED + BT_STATE_PRINTME, + BT_STATE_CAPABILITIES_BEGIN, + BT_STATE_CAPABILITIES_END, + BT_STATE_LONG_BUSY /* BT doesn't get hosed :-) */ }; +/* Macros seen at the end of state "case" blocks. They help with legibility + and debugging. */ + +#define BT_STATE_CHANGE(X,Y) { bt->state = X; return Y; } + +#define BT_SI_SM_RETURN(Y) { last_printed = BT_STATE_PRINTME; return Y; } + struct si_sm_data { enum bt_states state; - enum bt_states last_state; /* assist printing and resets */ unsigned char seq; /* BT sequence number */ struct si_sm_io *io; - unsigned char write_data[IPMI_MAX_MSG_LENGTH]; - int write_count; - unsigned char read_data[IPMI_MAX_MSG_LENGTH]; - int read_count; - int truncated; - long timeout; - unsigned int error_retries; /* end of "common" fields */ + unsigned char write_data[IPMI_MAX_MSG_LENGTH]; + int write_count; + unsigned char read_data[IPMI_MAX_MSG_LENGTH]; + int read_count; + int truncated; + long timeout; /* microseconds countdown */ + int error_retries; /* end of "common" fields */ int nonzero_status; /* hung BMCs stay all 0 */ + enum bt_states complete; /* to divert the state machine */ + int BT_CAP_outreqs; + long BT_CAP_req2rsp; + int BT_CAP_retries; /* Recommended retries */ }; #define BT_CLR_WR_PTR 0x01 /* See IPMI 1.5 table 11.6.4 */ @@ -111,86 +129,118 @@ struct si_sm_data { static char *state2txt(unsigned char state) { switch (state) { - case BT_STATE_IDLE: return("IDLE"); - case BT_STATE_XACTION_START: return("XACTION"); - case BT_STATE_WRITE_BYTES: return("WR_BYTES"); - case BT_STATE_WRITE_END: return("WR_END"); - case BT_STATE_WRITE_CONSUME: return("WR_CONSUME"); - case BT_STATE_B2H_WAIT: return("B2H_WAIT"); - case BT_STATE_READ_END: return("RD_END"); - case BT_STATE_RESET1: return("RESET1"); - case BT_STATE_RESET2: return("RESET2"); - case BT_STATE_RESET3: return("RESET3"); - case BT_STATE_RESTART: return("RESTART"); - case BT_STATE_HOSED: return("HOSED"); + case BT_STATE_IDLE: return("IDLE"); + case BT_STATE_XACTION_START: return("XACTION"); + case BT_STATE_WRITE_BYTES: return("WR_BYTES"); + case BT_STATE_WRITE_CONSUME: return("WR_CONSUME"); + case BT_STATE_READ_WAIT: return("RD_WAIT"); + case BT_STATE_CLEAR_B2H: return("CLEAR_B2H"); + case BT_STATE_READ_BYTES: return("RD_BYTES"); + case BT_STATE_RESET1: return("RESET1"); + case BT_STATE_RESET2: return("RESET2"); + case BT_STATE_RESET3: return("RESET3"); + case BT_STATE_RESTART: return("RESTART"); + case BT_STATE_LONG_BUSY: return("LONG_BUSY"); + case BT_STATE_CAPABILITIES_BEGIN: return("CAP_BEGIN"); + case BT_STATE_CAPABILITIES_END: return("CAP_END"); } return("BAD STATE"); } #define STATE2TXT state2txt(bt->state) -static char *status2txt(unsigned char status, char *buf) +static char *status2txt(unsigned char status) { + /* + * This cannot be called by two threads at the same time and + * the buffer is always consumed immediately, so the static is + * safe to use. + */ + static char buf[40]; + strcpy(buf, "[ "); - if (status & BT_B_BUSY) strcat(buf, "B_BUSY "); - if (status & BT_H_BUSY) strcat(buf, "H_BUSY "); - if (status & BT_OEM0) strcat(buf, "OEM0 "); - if (status & BT_SMS_ATN) strcat(buf, "SMS "); - if (status & BT_B2H_ATN) strcat(buf, "B2H "); - if (status & BT_H2B_ATN) strcat(buf, "H2B "); + if (status & BT_B_BUSY) + strcat(buf, "B_BUSY "); + if (status & BT_H_BUSY) + strcat(buf, "H_BUSY "); + if (status & BT_OEM0) + strcat(buf, "OEM0 "); + if (status & BT_SMS_ATN) + strcat(buf, "SMS "); + if (status & BT_B2H_ATN) + strcat(buf, "B2H "); + if (status & BT_H2B_ATN) + strcat(buf, "H2B "); strcat(buf, "]"); return buf; } -#define STATUS2TXT(buf) status2txt(status, buf) +#define STATUS2TXT status2txt(status) + +/* called externally at insmod time, and internally on cleanup */ -/* This will be called from within this module on a hosed condition */ -#define FIRST_SEQ 0 static unsigned int bt_init_data(struct si_sm_data *bt, struct si_sm_io *io) { - bt->state = BT_STATE_IDLE; - bt->last_state = BT_STATE_IDLE; - bt->seq = FIRST_SEQ; - bt->io = io; - bt->write_count = 0; - bt->read_count = 0; - bt->error_retries = 0; - bt->nonzero_status = 0; - bt->truncated = 0; - bt->timeout = BT_NORMAL_TIMEOUT; + memset(bt, 0, sizeof(struct si_sm_data)); + if (bt->io != io) { /* external: one-time only things */ + bt->io = io; + bt->seq = 0; + } + bt->state = BT_STATE_IDLE; /* start here */ + bt->complete = BT_STATE_IDLE; /* end here */ + bt->BT_CAP_req2rsp = BT_NORMAL_TIMEOUT * 1000000; + bt->BT_CAP_retries = BT_NORMAL_RETRY_LIMIT; + /* BT_CAP_outreqs == zero is a flag to read BT Capabilities */ return 3; /* We claim 3 bytes of space; ought to check SPMI table */ } +/* Jam a completion code (probably an error) into a response */ + +static void force_result(struct si_sm_data *bt, unsigned char completion_code) +{ + bt->read_data[0] = 4; /* # following bytes */ + bt->read_data[1] = bt->write_data[1] | 4; /* Odd NetFn/LUN */ + bt->read_data[2] = bt->write_data[2]; /* seq (ignored) */ + bt->read_data[3] = bt->write_data[3]; /* Command */ + bt->read_data[4] = completion_code; + bt->read_count = 5; +} + +/* The upper state machine starts here */ + static int bt_start_transaction(struct si_sm_data *bt, unsigned char *data, unsigned int size) { unsigned int i; - if ((size < 2) || (size > (IPMI_MAX_MSG_LENGTH - 2))) - return -1; + if (size < 2) + return IPMI_REQ_LEN_INVALID_ERR; + if (size > IPMI_MAX_MSG_LENGTH) + return IPMI_REQ_LEN_EXCEEDED_ERR; - if ((bt->state != BT_STATE_IDLE) && (bt->state != BT_STATE_HOSED)) - return -2; + if (bt->state == BT_STATE_LONG_BUSY) + return IPMI_NODE_BUSY_ERR; + + if (bt->state != BT_STATE_IDLE) + return IPMI_NOT_IN_MY_STATE_ERR; if (bt_debug & BT_DEBUG_MSG) { - printk(KERN_WARNING "+++++++++++++++++++++++++++++++++++++\n"); - printk(KERN_WARNING "BT: write seq=0x%02X:", bt->seq); + printk(KERN_WARNING "BT: +++++++++++++++++ New command\n"); + printk(KERN_WARNING "BT: NetFn/LUN CMD [%d data]:", size - 2); for (i = 0; i < size; i ++) - printk (" %02x", data[i]); + printk (" %02x", data[i]); printk("\n"); } bt->write_data[0] = size + 1; /* all data plus seq byte */ bt->write_data[1] = *data; /* NetFn/LUN */ - bt->write_data[2] = bt->seq; + bt->write_data[2] = bt->seq++; memcpy(bt->write_data + 3, data + 1, size - 1); bt->write_count = size + 2; - bt->error_retries = 0; bt->nonzero_status = 0; - bt->read_count = 0; bt->truncated = 0; bt->state = BT_STATE_XACTION_START; - bt->last_state = BT_STATE_IDLE; - bt->timeout = BT_NORMAL_TIMEOUT; + bt->timeout = bt->BT_CAP_req2rsp; + force_result(bt, IPMI_ERR_UNSPECIFIED); return 0; } @@ -198,38 +248,30 @@ static int bt_start_transaction(struct si_sm_data *bt, it calls this. Strip out the length and seq bytes. */ static int bt_get_result(struct si_sm_data *bt, - unsigned char *data, - unsigned int length) + unsigned char *data, + unsigned int length) { int i, msg_len; msg_len = bt->read_count - 2; /* account for length & seq */ - /* Always NetFn, Cmd, cCode */ if (msg_len < 3 || msg_len > IPMI_MAX_MSG_LENGTH) { - printk(KERN_DEBUG "BT results: bad msg_len = %d\n", msg_len); - data[0] = bt->write_data[1] | 0x4; /* Kludge a response */ - data[1] = bt->write_data[3]; - data[2] = IPMI_ERR_UNSPECIFIED; + force_result(bt, IPMI_ERR_UNSPECIFIED); msg_len = 3; - } else { - data[0] = bt->read_data[1]; - data[1] = bt->read_data[3]; - if (length < msg_len) - bt->truncated = 1; - if (bt->truncated) { /* can be set in read_all_bytes() */ - data[2] = IPMI_ERR_MSG_TRUNCATED; - msg_len = 3; - } else - memcpy(data + 2, bt->read_data + 4, msg_len - 2); + } + data[0] = bt->read_data[1]; + data[1] = bt->read_data[3]; + if (length < msg_len || bt->truncated) { + data[2] = IPMI_ERR_MSG_TRUNCATED; + msg_len = 3; + } else + memcpy(data + 2, bt->read_data + 4, msg_len - 2); - if (bt_debug & BT_DEBUG_MSG) { - printk (KERN_WARNING "BT: res (raw)"); - for (i = 0; i < msg_len; i++) - printk(" %02x", data[i]); - printk ("\n"); - } + if (bt_debug & BT_DEBUG_MSG) { + printk (KERN_WARNING "BT: result %d bytes:", msg_len); + for (i = 0; i < msg_len; i++) + printk(" %02x", data[i]); + printk ("\n"); } - bt->read_count = 0; /* paranoia */ return msg_len; } @@ -238,22 +280,40 @@ static int bt_get_result(struct si_sm_data *bt, static void reset_flags(struct si_sm_data *bt) { + if (bt_debug) + printk(KERN_WARNING "IPMI BT: flag reset %s\n", + status2txt(BT_STATUS)); if (BT_STATUS & BT_H_BUSY) - BT_CONTROL(BT_H_BUSY); - if (BT_STATUS & BT_B_BUSY) - BT_CONTROL(BT_B_BUSY); - BT_CONTROL(BT_CLR_WR_PTR); - BT_CONTROL(BT_SMS_ATN); - - if (BT_STATUS & BT_B2H_ATN) { - int i; - BT_CONTROL(BT_H_BUSY); - BT_CONTROL(BT_B2H_ATN); - BT_CONTROL(BT_CLR_RD_PTR); - for (i = 0; i < IPMI_MAX_MSG_LENGTH + 2; i++) - BMC2HOST; - BT_CONTROL(BT_H_BUSY); - } + BT_CONTROL(BT_H_BUSY); /* force clear */ + BT_CONTROL(BT_CLR_WR_PTR); /* always reset */ + BT_CONTROL(BT_SMS_ATN); /* always clear */ + BT_INTMASK_W(BT_BMC_HWRST); +} + +/* Get rid of an unwanted/stale response. This should only be needed for + BMCs that support multiple outstanding requests. */ + +static void drain_BMC2HOST(struct si_sm_data *bt) +{ + int i, size; + + if (!(BT_STATUS & BT_B2H_ATN)) /* Not signalling a response */ + return; + + BT_CONTROL(BT_H_BUSY); /* now set */ + BT_CONTROL(BT_B2H_ATN); /* always clear */ + BT_STATUS; /* pause */ + BT_CONTROL(BT_B2H_ATN); /* some BMCs are stubborn */ + BT_CONTROL(BT_CLR_RD_PTR); /* always reset */ + if (bt_debug) + printk(KERN_WARNING "IPMI BT: stale response %s; ", + status2txt(BT_STATUS)); + size = BMC2HOST; + for (i = 0; i < size ; i++) + BMC2HOST; + BT_CONTROL(BT_H_BUSY); /* now clear */ + if (bt_debug) + printk("drained %d bytes\n", size + 1); } static inline void write_all_bytes(struct si_sm_data *bt) @@ -261,201 +321,256 @@ static inline void write_all_bytes(struct si_sm_data *bt) int i; if (bt_debug & BT_DEBUG_MSG) { - printk(KERN_WARNING "BT: write %d bytes seq=0x%02X", + printk(KERN_WARNING "BT: write %d bytes seq=0x%02X", bt->write_count, bt->seq); for (i = 0; i < bt->write_count; i++) printk (" %02x", bt->write_data[i]); printk ("\n"); } for (i = 0; i < bt->write_count; i++) - HOST2BMC(bt->write_data[i]); + HOST2BMC(bt->write_data[i]); } static inline int read_all_bytes(struct si_sm_data *bt) { unsigned char i; + /* length is "framing info", minimum = 4: NetFn, Seq, Cmd, cCode. + Keep layout of first four bytes aligned with write_data[] */ + bt->read_data[0] = BMC2HOST; bt->read_count = bt->read_data[0]; - if (bt_debug & BT_DEBUG_MSG) - printk(KERN_WARNING "BT: read %d bytes:", bt->read_count); - /* minimum: length, NetFn, Seq, Cmd, cCode == 5 total, or 4 more - following the length byte. */ if (bt->read_count < 4 || bt->read_count >= IPMI_MAX_MSG_LENGTH) { if (bt_debug & BT_DEBUG_MSG) - printk("bad length %d\n", bt->read_count); + printk(KERN_WARNING "BT: bad raw rsp len=%d\n", + bt->read_count); bt->truncated = 1; return 1; /* let next XACTION START clean it up */ } for (i = 1; i <= bt->read_count; i++) - bt->read_data[i] = BMC2HOST; - bt->read_count++; /* account for the length byte */ + bt->read_data[i] = BMC2HOST; + bt->read_count++; /* Account internally for length byte */ if (bt_debug & BT_DEBUG_MSG) { - for (i = 0; i < bt->read_count; i++) + int max = bt->read_count; + + printk(KERN_WARNING "BT: got %d bytes seq=0x%02X", + max, bt->read_data[2]); + if (max > 16) + max = 16; + for (i = 0; i < max; i++) printk (" %02x", bt->read_data[i]); - printk ("\n"); + printk ("%s\n", bt->read_count == max ? "" : " ..."); } - if (bt->seq != bt->write_data[2]) /* idiot check */ - printk(KERN_DEBUG "BT: internal error: sequence mismatch\n"); - /* per the spec, the (NetFn, Seq, Cmd) tuples should match */ - if ((bt->read_data[3] == bt->write_data[3]) && /* Cmd */ - (bt->read_data[2] == bt->write_data[2]) && /* Sequence */ - ((bt->read_data[1] & 0xF8) == (bt->write_data[1] & 0xF8))) + /* per the spec, the (NetFn[1], Seq[2], Cmd[3]) tuples must match */ + if ((bt->read_data[3] == bt->write_data[3]) && + (bt->read_data[2] == bt->write_data[2]) && + ((bt->read_data[1] & 0xF8) == (bt->write_data[1] & 0xF8))) return 1; if (bt_debug & BT_DEBUG_MSG) - printk(KERN_WARNING "BT: bad packet: " + printk(KERN_WARNING "IPMI BT: bad packet: " "want 0x(%02X, %02X, %02X) got (%02X, %02X, %02X)\n", - bt->write_data[1], bt->write_data[2], bt->write_data[3], + bt->write_data[1] | 0x04, bt->write_data[2], bt->write_data[3], bt->read_data[1], bt->read_data[2], bt->read_data[3]); return 0; } -/* Modifies bt->state appropriately, need to get into the bt_event() switch */ +/* Restart if retries are left, or return an error completion code */ -static void error_recovery(struct si_sm_data *bt, char *reason) +static enum si_sm_result error_recovery(struct si_sm_data *bt, + unsigned char status, + unsigned char cCode) { - unsigned char status; - char buf[40]; /* For getting status */ + char *reason; - bt->timeout = BT_NORMAL_TIMEOUT; /* various places want to retry */ + bt->timeout = bt->BT_CAP_req2rsp; - status = BT_STATUS; - printk(KERN_DEBUG "BT: %s in %s %s\n", reason, STATE2TXT, - STATUS2TXT(buf)); + switch (cCode) { + case IPMI_TIMEOUT_ERR: + reason = "timeout"; + break; + default: + reason = "internal error"; + break; + } + + printk(KERN_WARNING "IPMI BT: %s in %s %s ", /* open-ended line */ + reason, STATE2TXT, STATUS2TXT); + /* Per the IPMI spec, retries are based on the sequence number + known only to this module, so manage a restart here. */ (bt->error_retries)++; - if (bt->error_retries > BT_RETRY_LIMIT) { - printk(KERN_DEBUG "retry limit (%d) exceeded\n", BT_RETRY_LIMIT); - bt->state = BT_STATE_HOSED; - if (!bt->nonzero_status) - printk(KERN_ERR "IPMI: BT stuck, try power cycle\n"); - else if (bt->error_retries <= BT_RETRY_LIMIT + 1) { - printk(KERN_DEBUG "IPMI: BT reset (takes 5 secs)\n"); - bt->state = BT_STATE_RESET1; - } - return; + if (bt->error_retries < bt->BT_CAP_retries) { + printk("%d retries left\n", + bt->BT_CAP_retries - bt->error_retries); + bt->state = BT_STATE_RESTART; + return SI_SM_CALL_WITHOUT_DELAY; } - /* Sometimes the BMC queues get in an "off-by-one" state...*/ - if ((bt->state == BT_STATE_B2H_WAIT) && (status & BT_B2H_ATN)) { - printk(KERN_DEBUG "retry B2H_WAIT\n"); - return; + printk("failed %d retries, sending error response\n", + bt->BT_CAP_retries); + if (!bt->nonzero_status) + printk(KERN_ERR "IPMI BT: stuck, try power cycle\n"); + + /* this is most likely during insmod */ + else if (bt->seq <= (unsigned char)(bt->BT_CAP_retries & 0xFF)) { + printk(KERN_WARNING "IPMI: BT reset (takes 5 secs)\n"); + bt->state = BT_STATE_RESET1; + return SI_SM_CALL_WITHOUT_DELAY; } - printk(KERN_DEBUG "restart command\n"); - bt->state = BT_STATE_RESTART; + /* Concoct a useful error message, set up the next state, and + be done with this sequence. */ + + bt->state = BT_STATE_IDLE; + switch (cCode) { + case IPMI_TIMEOUT_ERR: + if (status & BT_B_BUSY) { + cCode = IPMI_NODE_BUSY_ERR; + bt->state = BT_STATE_LONG_BUSY; + } + break; + default: + break; + } + force_result(bt, cCode); + return SI_SM_TRANSACTION_COMPLETE; } -/* Check the status and (possibly) advance the BT state machine. The - default return is SI_SM_CALL_WITH_DELAY. */ +/* Check status and (usually) take action and change this state machine. */ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) { - unsigned char status; - char buf[40]; /* For getting status */ + unsigned char status, BT_CAP[8]; + static enum bt_states last_printed = BT_STATE_PRINTME; int i; status = BT_STATUS; bt->nonzero_status |= status; - - if ((bt_debug & BT_DEBUG_STATES) && (bt->state != bt->last_state)) + if ((bt_debug & BT_DEBUG_STATES) && (bt->state != last_printed)) { printk(KERN_WARNING "BT: %s %s TO=%ld - %ld \n", STATE2TXT, - STATUS2TXT(buf), + STATUS2TXT, bt->timeout, time); - bt->last_state = bt->state; + last_printed = bt->state; + } - if (bt->state == BT_STATE_HOSED) - return SI_SM_HOSED; + /* Commands that time out may still (eventually) provide a response. + This stale response will get in the way of a new response so remove + it if possible (hopefully during IDLE). Even if it comes up later + it will be rejected by its (now-forgotten) seq number. */ + + if ((bt->state < BT_STATE_WRITE_BYTES) && (status & BT_B2H_ATN)) { + drain_BMC2HOST(bt); + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + } - if (bt->state != BT_STATE_IDLE) { /* do timeout test */ + if ((bt->state != BT_STATE_IDLE) && + (bt->state < BT_STATE_PRINTME)) { /* check timeout */ bt->timeout -= time; - if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) { - error_recovery(bt, "timed out"); - return SI_SM_CALL_WITHOUT_DELAY; - } + if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) + return error_recovery(bt, + status, + IPMI_TIMEOUT_ERR); } switch (bt->state) { - case BT_STATE_IDLE: /* check for asynchronous messages */ + /* Idle state first checks for asynchronous messages from another + channel, then does some opportunistic housekeeping. */ + + case BT_STATE_IDLE: if (status & BT_SMS_ATN) { BT_CONTROL(BT_SMS_ATN); /* clear it */ return SI_SM_ATTN; } - return SI_SM_IDLE; - case BT_STATE_XACTION_START: - if (status & BT_H_BUSY) { + if (status & BT_H_BUSY) /* clear a leftover H_BUSY */ BT_CONTROL(BT_H_BUSY); - break; - } - if (status & BT_B2H_ATN) - break; - bt->state = BT_STATE_WRITE_BYTES; - return SI_SM_CALL_WITHOUT_DELAY; /* for logging */ - case BT_STATE_WRITE_BYTES: + /* Read BT capabilities if it hasn't been done yet */ + if (!bt->BT_CAP_outreqs) + BT_STATE_CHANGE(BT_STATE_CAPABILITIES_BEGIN, + SI_SM_CALL_WITHOUT_DELAY); + bt->timeout = bt->BT_CAP_req2rsp; + BT_SI_SM_RETURN(SI_SM_IDLE); + + case BT_STATE_XACTION_START: if (status & (BT_B_BUSY | BT_H2B_ATN)) - break; + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + if (BT_STATUS & BT_H_BUSY) + BT_CONTROL(BT_H_BUSY); /* force clear */ + BT_STATE_CHANGE(BT_STATE_WRITE_BYTES, + SI_SM_CALL_WITHOUT_DELAY); + + case BT_STATE_WRITE_BYTES: + if (status & BT_H_BUSY) + BT_CONTROL(BT_H_BUSY); /* clear */ BT_CONTROL(BT_CLR_WR_PTR); write_all_bytes(bt); - BT_CONTROL(BT_H2B_ATN); /* clears too fast to catch? */ - bt->state = BT_STATE_WRITE_CONSUME; - return SI_SM_CALL_WITHOUT_DELAY; /* it MIGHT sail through */ - - case BT_STATE_WRITE_CONSUME: /* BMCs usually blow right thru here */ - if (status & (BT_H2B_ATN | BT_B_BUSY)) - break; - bt->state = BT_STATE_B2H_WAIT; - /* fall through with status */ - - /* Stay in BT_STATE_B2H_WAIT until a packet matches. However, spinning - hard here, constantly reading status, seems to hold off the - generation of B2H_ATN so ALWAYS return CALL_WITH_DELAY. */ - - case BT_STATE_B2H_WAIT: - if (!(status & BT_B2H_ATN)) - break; - - /* Assume ordered, uncached writes: no need to wait */ - if (!(status & BT_H_BUSY)) - BT_CONTROL(BT_H_BUSY); /* set */ - BT_CONTROL(BT_B2H_ATN); /* clear it, ACK to the BMC */ - BT_CONTROL(BT_CLR_RD_PTR); /* reset the queue */ - i = read_all_bytes(bt); - BT_CONTROL(BT_H_BUSY); /* clear */ - if (!i) /* Try this state again */ - break; - bt->state = BT_STATE_READ_END; - return SI_SM_CALL_WITHOUT_DELAY; /* for logging */ - - case BT_STATE_READ_END: - - /* I could wait on BT_H_BUSY to go clear for a truly clean - exit. However, this is already done in XACTION_START - and the (possible) extra loop/status/possible wait affects - performance. So, as long as it works, just ignore H_BUSY */ - -#ifdef MAKE_THIS_TRUE_IF_NECESSARY + BT_CONTROL(BT_H2B_ATN); /* can clear too fast to catch */ + BT_STATE_CHANGE(BT_STATE_WRITE_CONSUME, + SI_SM_CALL_WITHOUT_DELAY); - if (status & BT_H_BUSY) - break; -#endif - bt->seq++; - bt->state = BT_STATE_IDLE; - return SI_SM_TRANSACTION_COMPLETE; + case BT_STATE_WRITE_CONSUME: + if (status & (BT_B_BUSY | BT_H2B_ATN)) + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + BT_STATE_CHANGE(BT_STATE_READ_WAIT, + SI_SM_CALL_WITHOUT_DELAY); + + /* Spinning hard can suppress B2H_ATN and force a timeout */ + + case BT_STATE_READ_WAIT: + if (!(status & BT_B2H_ATN)) + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + BT_CONTROL(BT_H_BUSY); /* set */ + + /* Uncached, ordered writes should just proceeed serially but + some BMCs don't clear B2H_ATN with one hit. Fast-path a + workaround without too much penalty to the general case. */ + + BT_CONTROL(BT_B2H_ATN); /* clear it to ACK the BMC */ + BT_STATE_CHANGE(BT_STATE_CLEAR_B2H, + SI_SM_CALL_WITHOUT_DELAY); + + case BT_STATE_CLEAR_B2H: + if (status & BT_B2H_ATN) { /* keep hitting it */ + BT_CONTROL(BT_B2H_ATN); + BT_SI_SM_RETURN(SI_SM_CALL_WITH_DELAY); + } + BT_STATE_CHANGE(BT_STATE_READ_BYTES, + SI_SM_CALL_WITHOUT_DELAY); + + case BT_STATE_READ_BYTES: + if (!(status & BT_H_BUSY)) /* check in case of retry */ + BT_CONTROL(BT_H_BUSY); + BT_CONTROL(BT_CLR_RD_PTR); /* start of BMC2HOST buffer */ + i = read_all_bytes(bt); /* true == packet seq match */ + BT_CONTROL(BT_H_BUSY); /* NOW clear */ + if (!i) /* Not my message */ + BT_STATE_CHANGE(BT_STATE_READ_WAIT, + SI_SM_CALL_WITHOUT_DELAY); + bt->state = bt->complete; + return bt->state == BT_STATE_IDLE ? /* where to next? */ + SI_SM_TRANSACTION_COMPLETE : /* normal */ + SI_SM_CALL_WITHOUT_DELAY; /* Startup magic */ + + case BT_STATE_LONG_BUSY: /* For example: after FW update */ + if (!(status & BT_B_BUSY)) { + reset_flags(bt); /* next state is now IDLE */ + bt_init_data(bt, bt->io); + } + return SI_SM_CALL_WITH_DELAY; /* No repeat printing */ case BT_STATE_RESET1: - reset_flags(bt); - bt->timeout = BT_RESET_DELAY; - bt->state = BT_STATE_RESET2; - break; + reset_flags(bt); + drain_BMC2HOST(bt); + BT_STATE_CHANGE(BT_STATE_RESET2, + SI_SM_CALL_WITH_DELAY); case BT_STATE_RESET2: /* Send a soft reset */ BT_CONTROL(BT_CLR_WR_PTR); @@ -464,29 +579,59 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) HOST2BMC(42); /* Sequence number */ HOST2BMC(3); /* Cmd == Soft reset */ BT_CONTROL(BT_H2B_ATN); - bt->state = BT_STATE_RESET3; - break; + bt->timeout = BT_RESET_DELAY * 1000000; + BT_STATE_CHANGE(BT_STATE_RESET3, + SI_SM_CALL_WITH_DELAY); - case BT_STATE_RESET3: + case BT_STATE_RESET3: /* Hold off everything for a bit */ if (bt->timeout > 0) - return SI_SM_CALL_WITH_DELAY; - bt->state = BT_STATE_RESTART; /* printk in debug modes */ - break; + return SI_SM_CALL_WITH_DELAY; + drain_BMC2HOST(bt); + BT_STATE_CHANGE(BT_STATE_RESTART, + SI_SM_CALL_WITH_DELAY); - case BT_STATE_RESTART: /* don't reset retries! */ - reset_flags(bt); - bt->write_data[2] = ++bt->seq; + case BT_STATE_RESTART: /* don't reset retries or seq! */ bt->read_count = 0; bt->nonzero_status = 0; - bt->timeout = BT_NORMAL_TIMEOUT; - bt->state = BT_STATE_XACTION_START; - break; - - default: /* HOSED is supposed to be caught much earlier */ - error_recovery(bt, "internal logic error"); - break; - } - return SI_SM_CALL_WITH_DELAY; + bt->timeout = bt->BT_CAP_req2rsp; + BT_STATE_CHANGE(BT_STATE_XACTION_START, + SI_SM_CALL_WITH_DELAY); + + /* Get BT Capabilities, using timing of upper level state machine. + Set outreqs to prevent infinite loop on timeout. */ + case BT_STATE_CAPABILITIES_BEGIN: + bt->BT_CAP_outreqs = 1; + { + unsigned char GetBT_CAP[] = { 0x18, 0x36 }; + bt->state = BT_STATE_IDLE; + bt_start_transaction(bt, GetBT_CAP, sizeof(GetBT_CAP)); + } + bt->complete = BT_STATE_CAPABILITIES_END; + BT_STATE_CHANGE(BT_STATE_XACTION_START, + SI_SM_CALL_WITH_DELAY); + + case BT_STATE_CAPABILITIES_END: + i = bt_get_result(bt, BT_CAP, sizeof(BT_CAP)); + bt_init_data(bt, bt->io); + if ((i == 8) && !BT_CAP[2]) { + bt->BT_CAP_outreqs = BT_CAP[3]; + bt->BT_CAP_req2rsp = BT_CAP[6] * 1000000; + bt->BT_CAP_retries = BT_CAP[7]; + } else + printk(KERN_WARNING "IPMI BT: using default values\n"); + if (!bt->BT_CAP_outreqs) + bt->BT_CAP_outreqs = 1; + printk(KERN_WARNING "IPMI BT: req2rsp=%ld secs retries=%d\n", + bt->BT_CAP_req2rsp / 1000000L, bt->BT_CAP_retries); + bt->timeout = bt->BT_CAP_req2rsp; + return SI_SM_CALL_WITHOUT_DELAY; + + default: /* should never occur */ + return error_recovery(bt, + status, + IPMI_ERR_UNSPECIFIED); + } + return SI_SM_CALL_WITH_DELAY; } static int bt_detect(struct si_sm_data *bt) @@ -497,7 +642,7 @@ static int bt_detect(struct si_sm_data *bt) test that first. The calling routine uses negative logic. */ if ((BT_STATUS == 0xFF) && (BT_INTMASK_R == 0xFF)) - return 1; + return 1; reset_flags(bt); return 0; } @@ -513,11 +658,11 @@ static int bt_size(void) struct si_sm_handlers bt_smi_handlers = { - .init_data = bt_init_data, - .start_transaction = bt_start_transaction, - .get_result = bt_get_result, - .event = bt_event, - .detect = bt_detect, - .cleanup = bt_cleanup, - .size = bt_size, + .init_data = bt_init_data, + .start_transaction = bt_start_transaction, + .get_result = bt_get_result, + .event = bt_event, + .detect = bt_detect, + .cleanup = bt_cleanup, + .size = bt_size, }; diff --git a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c index 2062675f9e9..fb46979567e 100644 --- a/drivers/char/ipmi/ipmi_kcs_sm.c +++ b/drivers/char/ipmi/ipmi_kcs_sm.c @@ -261,12 +261,14 @@ static int start_kcs_transaction(struct si_sm_data *kcs, unsigned char *data, { unsigned int i; - if ((size < 2) || (size > MAX_KCS_WRITE_SIZE)) { - return -1; - } - if ((kcs->state != KCS_IDLE) && (kcs->state != KCS_HOSED)) { - return -2; - } + if (size < 2) + return IPMI_REQ_LEN_INVALID_ERR; + if (size > MAX_KCS_WRITE_SIZE) + return IPMI_REQ_LEN_EXCEEDED_ERR; + + if ((kcs->state != KCS_IDLE) && (kcs->state != KCS_HOSED)) + return IPMI_NOT_IN_MY_STATE_ERR; + if (kcs_debug & KCS_DEBUG_MSG) { printk(KERN_DEBUG "start_kcs_transaction -"); for (i = 0; i < size; i ++) { diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index c7de2e86a6d..81a0c89598e 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -247,14 +247,18 @@ static void deliver_recv_msg(struct smi_info *smi_info, spin_lock(&(smi_info->si_lock)); } -static void return_hosed_msg(struct smi_info *smi_info) +static void return_hosed_msg(struct smi_info *smi_info, int cCode) { struct ipmi_smi_msg *msg = smi_info->curr_msg; + if (cCode < 0 || cCode > IPMI_ERR_UNSPECIFIED) + cCode = IPMI_ERR_UNSPECIFIED; + /* else use it as is */ + /* Make it a reponse */ msg->rsp[0] = msg->data[0] | 4; msg->rsp[1] = msg->data[1]; - msg->rsp[2] = IPMI_ERR_UNSPECIFIED; + msg->rsp[2] = cCode; msg->rsp_size = 3; smi_info->curr_msg = NULL; @@ -305,7 +309,7 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info) smi_info->curr_msg->data, smi_info->curr_msg->data_size); if (err) { - return_hosed_msg(smi_info); + return_hosed_msg(smi_info, err); } rv = SI_SM_CALL_WITHOUT_DELAY; @@ -647,7 +651,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info, /* If we were handling a user message, format a response to send to the upper layer to tell it about the error. */ - return_hosed_msg(smi_info); + return_hosed_msg(smi_info, IPMI_ERR_UNSPECIFIED); } si_sm_result = smi_info->handlers->event(smi_info->si_sm, 0); } diff --git a/drivers/char/ipmi/ipmi_smic_sm.c b/drivers/char/ipmi/ipmi_smic_sm.c index 39d7e5ef1a2..e64ea7d25d2 100644 --- a/drivers/char/ipmi/ipmi_smic_sm.c +++ b/drivers/char/ipmi/ipmi_smic_sm.c @@ -141,12 +141,14 @@ static int start_smic_transaction(struct si_sm_data *smic, { unsigned int i; - if ((size < 2) || (size > MAX_SMIC_WRITE_SIZE)) { - return -1; - } - if ((smic->state != SMIC_IDLE) && (smic->state != SMIC_HOSED)) { - return -2; - } + if (size < 2) + return IPMI_REQ_LEN_INVALID_ERR; + if (size > MAX_SMIC_WRITE_SIZE) + return IPMI_REQ_LEN_EXCEEDED_ERR; + + if ((smic->state != SMIC_IDLE) && (smic->state != SMIC_HOSED)) + return IPMI_NOT_IN_MY_STATE_ERR; + if (smic_debug & SMIC_DEBUG_MSG) { printk(KERN_INFO "start_smic_transaction -"); for (i = 0; i < size; i ++) { diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h index 8d6759cc1a7..b56a158d587 100644 --- a/include/linux/ipmi_msgdefs.h +++ b/include/linux/ipmi_msgdefs.h @@ -71,14 +71,18 @@ /* The BT interface on high-end HP systems supports up to 255 bytes in * one transfer. Its "virtual" BMC supports some commands that are longer * than 128 bytes. Use the full 256, plus NetFn/LUN, Cmd, cCode, plus - * some overhead. It would be nice to base this on the "BT Capabilities" - * but that's too hard to propagate to the rest of the driver. */ + * some overhead; it's not worth the effort to dynamically size this based + * on the results of the "Get BT Capabilities" command. */ #define IPMI_MAX_MSG_LENGTH 272 /* multiple of 16 */ #define IPMI_CC_NO_ERROR 0x00 #define IPMI_NODE_BUSY_ERR 0xc0 #define IPMI_INVALID_COMMAND_ERR 0xc1 +#define IPMI_TIMEOUT_ERR 0xc3 #define IPMI_ERR_MSG_TRUNCATED 0xc6 +#define IPMI_REQ_LEN_INVALID_ERR 0xc7 +#define IPMI_REQ_LEN_EXCEEDED_ERR 0xc8 +#define IPMI_NOT_IN_MY_STATE_ERR 0xd5 /* IPMI 2.0 */ #define IPMI_LOST_ARBITRATION_ERR 0x81 #define IPMI_BUS_ERR 0x82 #define IPMI_NAK_ON_WRITE_ERR 0x83 -- cgit v1.2.3-70-g09d2 From 6cf24f031bc97cb5a7c9df3b6e73c45b628b2b28 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 6 Dec 2006 20:41:39 -0800 Subject: [PATCH] elf.h: forward declare struct file In file included from include/asm/patch.h:14, from arch/ia64/kernel/patch.c:10: include/linux/elf.h:375: warning: "struct file" declared inside parameter list include/linux/elf.h:375: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/elf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/elf.h b/include/linux/elf.h index b403516d5c3..60713e6ea29 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -6,6 +6,8 @@ #include #include +struct file; + #ifndef elf_read_implies_exec /* Executables for which elf_read_implies_exec() returns TRUE will have the READ_IMPLIES_EXEC personality flag set automatically. -- cgit v1.2.3-70-g09d2 From 68380b581383c028830f79ec2670f4a193854aa6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 7 Dec 2006 09:28:19 -0800 Subject: Add "run_scheduled_work()" workqueue function This allows workqueue users to run just their own pending work, rather than wait for the whole workqueue to finish running. This solves the deadlock with networking libphy that was due to other workqueue entries possibly needing a lock that was held by the routine that wanted to flush its own work. It's not wonderful: if you absolutely need to synchronize with the work function having been executed, any user strictly speaking should have its own completion tracking logic, since when we run things explicitly by hand, the generic workqueue layer can no longer help us synchronize. Also, this is strictly only usable for work that has been scheduled without any delayed timers. You can not mix the new interface with schedule_delayed_work(). But it's better than what we had currently. Acked-by: Maciej W. Rozycki Signed-off-by: Linus Torvalds --- drivers/net/phy/phy.c | 3 +- include/linux/workqueue.h | 1 + kernel/workqueue.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 4044bb1ada8..e175f3910b1 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -587,8 +587,7 @@ int phy_stop_interrupts(struct phy_device *phydev) * Finish any pending work; we might have been scheduled * to be called from keventd ourselves, though. */ - if (!current_is_keventd()) - flush_scheduled_work(); + run_scheduled_work(&phydev->phy_queue); free_irq(phydev->irq, phydev); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f0cb1df7b47..edef8d50b26 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -162,6 +162,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq)); extern int FASTCALL(schedule_work(struct work_struct *work)); +extern int FASTCALL(run_scheduled_work(struct work_struct *work)); extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay)); extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c5257316f4b..6b186750e9b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -108,6 +108,79 @@ static inline void *get_wq_data(struct work_struct *work) return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK); } +static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work) +{ + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&cwq->lock, flags); + /* + * We need to re-validate the work info after we've gotten + * the cpu_workqueue lock. We can run the work now iff: + * + * - the wq_data still matches the cpu_workqueue_struct + * - AND the work is still marked pending + * - AND the work is still on a list (which will be this + * workqueue_struct list) + * + * All these conditions are important, because we + * need to protect against the work being run right + * now on another CPU (all but the last one might be + * true if it's currently running and has not been + * released yet, for example). + */ + if (get_wq_data(work) == cwq + && work_pending(work) + && !list_empty(&work->entry)) { + work_func_t f = work->func; + list_del_init(&work->entry); + spin_unlock_irqrestore(&cwq->lock, flags); + + if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management)) + work_release(work); + f(work); + + spin_lock_irqsave(&cwq->lock, flags); + cwq->remove_sequence++; + wake_up(&cwq->work_done); + ret = 1; + } + spin_unlock_irqrestore(&cwq->lock, flags); + return ret; +} + +/** + * run_scheduled_work - run scheduled work synchronously + * @work: work to run + * + * This checks if the work was pending, and runs it + * synchronously if so. It returns a boolean to indicate + * whether it had any scheduled work to run or not. + * + * NOTE! This _only_ works for normal work_structs. You + * CANNOT use this for delayed work, because the wq data + * for delayed work will not point properly to the per- + * CPU workqueue struct, but will change! + */ +int fastcall run_scheduled_work(struct work_struct *work) +{ + for (;;) { + struct cpu_workqueue_struct *cwq; + + if (!work_pending(work)) + return 0; + if (list_empty(&work->entry)) + return 0; + /* NOTE! This depends intimately on __queue_work! */ + cwq = get_wq_data(work); + if (!cwq) + return 0; + if (__run_work(cwq, work)) + return 1; + } +} +EXPORT_SYMBOL(run_scheduled_work); + /* Preempt must be disabled. */ static void __queue_work(struct cpu_workqueue_struct *cwq, struct work_struct *work) -- cgit v1.2.3-70-g09d2