From c6e666345e1b79c62ba82339cc7d55a89cb73f88 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 2 Aug 2012 09:48:50 +0200
Subject: block: split discard into aligned requests

When a disk has large discard_granularity and small max_discard_sectors,
discards are not split with optimal alignment.  In the limit case of
discard_granularity == max_discard_sectors, no request could be aligned
correctly, so in fact you might end up with no discarded logical blocks
at all.

Another example that helps showing the condition in the patch is with
discard_granularity == 64, max_discard_sectors == 128.  A request that is
submitted for 256 sectors 2..257 will be split in two: 2..129, 130..257.
However, only 2 aligned blocks out of 3 are included in the request;
128..191 may be left intact and not discarded.  With this patch, the
first request will be truncated to ensure good alignment of what's left,
and the split will be 2..127, 128..255, 256..257.  The patch will also
take into account the discard_alignment.

At most one extra request will be introduced, because the first request
will be reduced by at most granularity-1 sectors, and granularity
must be less than max_discard_sectors.  Subsequent requests will run
on round_down(max_discard_sectors, granularity) sectors, as in the
current code.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4e72a9d4823..281516ae8b4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1139,6 +1139,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
 		& (lim->discard_granularity - 1);
 }
 
+static inline int bdev_discard_alignment(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (bdev != bdev->bd_contains)
+		return bdev->bd_part->discard_alignment;
+
+	return q->limits.discard_alignment;
+}
+
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
 {
 	if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
-- 
cgit v1.2.3-70-g09d2


From f6166384095b7ecf77752b5e9096e6d03d75f7ae Mon Sep 17 00:00:00 2001
From: Peng Tao <bergwolf@gmail.com>
Date: Thu, 2 Aug 2012 15:36:09 +0300
Subject: NFS41: add pg_layout_private to nfs_pageio_descriptor

To allow layout driver to pass private information around
pg_init/pg_doio.

Signed-off-by: Peng Tao <tao.peng@emc.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/pagelist.c        | 2 ++
 include/linux/nfs_page.h | 1 +
 include/linux/nfs_xdr.h  | 1 +
 3 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 1a6732ed04a..311a79681e2 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = desc->pg_count;
 	hdr->dreq = desc->pg_dreq;
+	hdr->layout_private = desc->pg_layout_private;
 	hdr->release = release;
 	hdr->completion_ops = desc->pg_completion_ops;
 	if (hdr->completion_ops->init_hdr)
@@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
 	desc->pg_dreq = NULL;
+	desc->pg_layout_private = NULL;
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init);
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 880805774f9..92ce5783b70 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -69,6 +69,7 @@ struct nfs_pageio_descriptor {
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
 	struct nfs_direct_req	*pg_dreq;
+	void			*pg_layout_private;
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 00485e08439..ac7c8ae254f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1248,6 +1248,7 @@ struct nfs_pgio_header {
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
 	struct nfs_direct_req	*dreq;
+	void			*layout_private;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
-- 
cgit v1.2.3-70-g09d2


From 85b9f66a41eb8ee3f1dfc95707412705463cdd97 Mon Sep 17 00:00:00 2001
From: Asias He <asias@redhat.com>
Date: Thu, 2 Aug 2012 23:42:04 +0200
Subject: block: Add blk_bio_map_sg() helper

Add a helper to map a bio to a scatterlist, modelled after
blk_rq_map_sg.

This helper is useful for any driver that wants to create
a scatterlist from its ->make_request_fn method.

Changes in v2:
 - Use __blk_segment_map_sg to avoid duplicated code
 - Add cocbook style function comment

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Asias He <asias@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c      | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h |  2 ++
 2 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 576b68e7924..e76279e4116 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -209,6 +209,43 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 }
 EXPORT_SYMBOL(blk_rq_map_sg);
 
+/**
+ * blk_bio_map_sg - map a bio to a scatterlist
+ * @q: request_queue in question
+ * @bio: bio being mapped
+ * @sglist: scatterlist being mapped
+ *
+ * Note:
+ *    Caller must make sure sg can hold bio->bi_phys_segments entries
+ *
+ * Will return the number of sg entries setup
+ */
+int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
+		   struct scatterlist *sglist)
+{
+	struct bio_vec *bvec, *bvprv;
+	struct scatterlist *sg;
+	int nsegs, cluster;
+	unsigned long i;
+
+	nsegs = 0;
+	cluster = blk_queue_cluster(q);
+
+	bvprv = NULL;
+	sg = NULL;
+	bio_for_each_segment(bvec, bio, i) {
+		__blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg,
+				     &nsegs, &cluster);
+	} /* segments in bio */
+
+	if (sg)
+		sg_mark_end(sg);
+
+	BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments);
+	return nsegs;
+}
+EXPORT_SYMBOL(blk_bio_map_sg);
+
 static inline int ll_new_hw_segment(struct request_queue *q,
 				    struct request *req,
 				    struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 281516ae8b4..dc632975d54 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -894,6 +894,8 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
+extern int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
+			  struct scatterlist *sglist);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
 
-- 
cgit v1.2.3-70-g09d2


From d796c52ef0b71a988364f6109aeb63d79c5b116b Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 5 Aug 2012 19:04:57 -0400
Subject: ext4: make sure the journal sb is written in ext4_clear_journal_err()

After we transfer set the EXT4_ERROR_FS bit in the file system
superblock, it's not enough to call jbd2_journal_clear_err() to clear
the error indication from journal superblock --- we need to call
jbd2_journal_update_sb_errno() as well.  Otherwise, when the root file
system is mounted read-only, the journal is replayed, and the error
indicator is transferred to the superblock --- but the s_errno field
in the jbd2 superblock is left set (since although we cleared it in
memory, we never flushed it out to disk).

This can end up confusing e2fsck.  We should make e2fsck more robust
in this case, but the kernel shouldn't be leaving things in this
confused state, either.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/super.c      | 1 +
 fs/jbd2/journal.c    | 3 ++-
 include/linux/jbd2.h | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d76ec8277d3..ccc4bcad561 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4430,6 +4430,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
 		ext4_commit_super(sb, 1);
 
 		jbd2_journal_clear_err(journal);
+		jbd2_journal_update_sb_errno(journal);
 	}
 }
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e9a3c4c8559..bd23f2ebaa6 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1377,7 +1377,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
  * Update a journal's errno.  Write updated superblock to disk waiting for IO
  * to complete.
  */
-static void jbd2_journal_update_sb_errno(journal_t *journal)
+void jbd2_journal_update_sb_errno(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
 
@@ -1390,6 +1390,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
 
 	jbd2_write_superblock(journal, WRITE_SYNC);
 }
+EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
 
 /*
  * Read the superblock for a given journal, performing initial
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f334c7fab96..3efc43f3f16 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1125,6 +1125,7 @@ extern int	   jbd2_journal_destroy    (journal_t *);
 extern int	   jbd2_journal_recover    (journal_t *journal);
 extern int	   jbd2_journal_wipe       (journal_t *, int);
 extern int	   jbd2_journal_skip_recovery	(journal_t *);
+extern void	   jbd2_journal_update_sb_errno(journal_t *);
 extern void	   jbd2_journal_update_sb_log_tail	(journal_t *, tid_t,
 				unsigned long, int);
 extern void	   __jbd2_journal_abort_hard	(journal_t *);
-- 
cgit v1.2.3-70-g09d2


From 276f0f5d157bb4a816053f4f3a941dbcd4f76556 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fusionio.com>
Date: Thu, 9 Aug 2012 15:20:23 +0200
Subject: block: disable discard request merge temporarily

The SCSI discard request merge never worked, and looks no solution
for in future, let's disable it temporarily.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dc632975d54..4a2ab7c8539 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -601,7 +601,7 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
  * it already be started by driver.
  */
 #define RQ_NOMERGE_FLAGS	\
-	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
+	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
 	 (((rq)->cmd_flags & REQ_DISCARD) || \
-- 
cgit v1.2.3-70-g09d2


From 02b69cbdc2fb2e1bfbfd9ac0c246d7be1b08d3cd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 9 Aug 2012 10:08:46 +0000
Subject: netfilter: nf_ct_sip: fix IPv6 address parsing

Within SIP messages IPv6 addresses are enclosed in square brackets in most
cases, with the exception of the "received=" header parameter. Currently
the helper fails to parse enclosed addresses.

This patch:

- changes the SIP address parsing function to enforce square brackets
  when required, and accept them when not required but present, as
  recommended by RFC 5118.

- adds a new SDP address parsing function that never accepts square
  brackets since SDP doesn't use them.

With these changes, the SIP helper correctly parses all test messages
from RFC 5118 (Session Initiation Protocol (SIP) Torture Test Messages
for Internet Protocol Version 6 (IPv6)).

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_sip.h |  2 +-
 net/ipv4/netfilter/nf_nat_sip.c            |  4 +-
 net/netfilter/nf_conntrack_sip.c           | 87 ++++++++++++++++++++++++------
 3 files changed, 73 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index 0dfc8b7210a..89f2a627f3f 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -164,7 +164,7 @@ extern int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr
 				      unsigned int dataoff, unsigned int datalen,
 				      const char *name,
 				      unsigned int *matchoff, unsigned int *matchlen,
-				      union nf_inet_addr *addr);
+				      union nf_inet_addr *addr, bool delim);
 extern int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
 					unsigned int off, unsigned int datalen,
 					const char *name,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index ea4a23813d2..eef8f29e8bf 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -173,7 +173,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		 * the reply. */
 		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
 					       "maddr=", &poff, &plen,
-					       &addr) > 0 &&
+					       &addr, true) > 0 &&
 		    addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
 		    addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
@@ -187,7 +187,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		 * from which the server received the request. */
 		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
 					       "received=", &poff, &plen,
-					       &addr) > 0 &&
+					       &addr, false) > 0 &&
 		    addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
 		    addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 2fb666920cc..5c0a112aeee 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -183,12 +183,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr,
 	return len + digits_len(ct, dptr, limit, shift);
 }
 
-static int parse_addr(const struct nf_conn *ct, const char *cp,
-                      const char **endp, union nf_inet_addr *addr,
-                      const char *limit)
+static int sip_parse_addr(const struct nf_conn *ct, const char *cp,
+			  const char **endp, union nf_inet_addr *addr,
+			  const char *limit, bool delim)
 {
 	const char *end;
-	int ret = 0;
+	int ret;
 
 	if (!ct)
 		return 0;
@@ -197,16 +197,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp,
 	switch (nf_ct_l3num(ct)) {
 	case AF_INET:
 		ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+		if (ret == 0)
+			return 0;
 		break;
 	case AF_INET6:
+		if (cp < limit && *cp == '[')
+			cp++;
+		else if (delim)
+			return 0;
+
 		ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+		if (ret == 0)
+			return 0;
+
+		if (end < limit && *end == ']')
+			end++;
+		else if (delim)
+			return 0;
 		break;
 	default:
 		BUG();
 	}
 
-	if (ret == 0 || end == cp)
-		return 0;
 	if (endp)
 		*endp = end;
 	return 1;
@@ -219,7 +231,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr,
 	union nf_inet_addr addr;
 	const char *aux = dptr;
 
-	if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
+	if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) {
 		pr_debug("ip: %s parse failed.!\n", dptr);
 		return 0;
 	}
@@ -296,7 +308,7 @@ int ct_sip_parse_request(const struct nf_conn *ct,
 		return 0;
 	dptr += shift;
 
-	if (!parse_addr(ct, dptr, &end, addr, limit))
+	if (!sip_parse_addr(ct, dptr, &end, addr, limit, true))
 		return -1;
 	if (end < limit && *end == ':') {
 		end++;
@@ -550,7 +562,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
 	if (ret == 0)
 		return ret;
 
-	if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit))
+	if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true))
 		return -1;
 	if (*c == ':') {
 		c++;
@@ -599,7 +611,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
 			       unsigned int dataoff, unsigned int datalen,
 			       const char *name,
 			       unsigned int *matchoff, unsigned int *matchlen,
-			       union nf_inet_addr *addr)
+			       union nf_inet_addr *addr, bool delim)
 {
 	const char *limit = dptr + datalen;
 	const char *start, *end;
@@ -613,7 +625,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
 		return 0;
 
 	start += strlen(name);
-	if (!parse_addr(ct, start, &end, addr, limit))
+	if (!sip_parse_addr(ct, start, &end, addr, limit, delim))
 		return 0;
 	*matchoff = start - dptr;
 	*matchlen = end - start;
@@ -675,6 +687,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
 	return 1;
 }
 
+static int sdp_parse_addr(const struct nf_conn *ct, const char *cp,
+			  const char **endp, union nf_inet_addr *addr,
+			  const char *limit)
+{
+	const char *end;
+	int ret;
+
+	memset(addr, 0, sizeof(*addr));
+	switch (nf_ct_l3num(ct)) {
+	case AF_INET:
+		ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+		break;
+	case AF_INET6:
+		ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+		break;
+	default:
+		BUG();
+	}
+
+	if (ret == 0)
+		return 0;
+	if (endp)
+		*endp = end;
+	return 1;
+}
+
+/* skip ip address. returns its length. */
+static int sdp_addr_len(const struct nf_conn *ct, const char *dptr,
+			const char *limit, int *shift)
+{
+	union nf_inet_addr addr;
+	const char *aux = dptr;
+
+	if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) {
+		pr_debug("ip: %s parse failed.!\n", dptr);
+		return 0;
+	}
+
+	return dptr - aux;
+}
+
 /* SDP header parsing: a SDP session description contains an ordered set of
  * headers, starting with a section containing general session parameters,
  * optionally followed by multiple media descriptions.
@@ -686,10 +739,10 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
  */
 static const struct sip_header ct_sdp_hdrs[] = {
 	[SDP_HDR_VERSION]		= SDP_HDR("v=", NULL, digits_len),
-	[SDP_HDR_OWNER_IP4]		= SDP_HDR("o=", "IN IP4 ", epaddr_len),
-	[SDP_HDR_CONNECTION_IP4]	= SDP_HDR("c=", "IN IP4 ", epaddr_len),
-	[SDP_HDR_OWNER_IP6]		= SDP_HDR("o=", "IN IP6 ", epaddr_len),
-	[SDP_HDR_CONNECTION_IP6]	= SDP_HDR("c=", "IN IP6 ", epaddr_len),
+	[SDP_HDR_OWNER_IP4]		= SDP_HDR("o=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION_IP4]	= SDP_HDR("c=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_OWNER_IP6]		= SDP_HDR("o=", "IN IP6 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION_IP6]	= SDP_HDR("c=", "IN IP6 ", sdp_addr_len),
 	[SDP_HDR_MEDIA]			= SDP_HDR("m=", NULL, media_len),
 };
 
@@ -775,8 +828,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
 	if (ret <= 0)
 		return ret;
 
-	if (!parse_addr(ct, dptr + *matchoff, NULL, addr,
-			dptr + *matchoff + *matchlen))
+	if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr,
+			    dptr + *matchoff + *matchlen))
 		return -1;
 	return 1;
 }
-- 
cgit v1.2.3-70-g09d2


From 0bce9c46bf3b15f485d82d7e81dabed6ebcc24b1 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 10 Aug 2012 15:22:09 +0100
Subject: mutex: Place lock in contended state after fastpath_lock failure

ARM recently moved to asm-generic/mutex-xchg.h for its mutex
implementation after the previous implementation was found to be missing
some crucial memory barriers. However, this has revealed some problems
running hackbench on SMP platforms due to the way in which the
MUTEX_SPIN_ON_OWNER code operates.

The symptoms are that a bunch of hackbench tasks are left waiting on an
unlocked mutex and therefore never get woken up to claim it. This boils
down to the following sequence of events:

        Task A        Task B        Task C        Lock value
0                                                     1
1       lock()                                        0
2                     lock()                          0
3                     spin(A)                         0
4       unlock()                                      1
5                                   lock()            0
6                     cmpxchg(1,0)                    0
7                     contended()                    -1
8       lock()                                        0
9       spin(C)                                       0
10                                  unlock()          1
11      cmpxchg(1,0)                                  0
12      unlock()                                      1

At this point, the lock is unlocked, but Task B is in an uninterruptible
sleep with nobody to wake it up.

This patch fixes the problem by ensuring we put the lock into the
contended state if we fail to acquire it on the fastpath, ensuring that
any blocked waiters are woken up when the mutex is released.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Chris Mason <chris.mason@fusionio.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@vger.kernel.org>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-6e9lrw2avczr0617fzl5vqb8@git.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-generic/mutex-xchg.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index 580a6d35c70..c04e0db8a2d 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -26,7 +26,13 @@ static inline void
 __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
 	if (unlikely(atomic_xchg(count, 0) != 1))
-		fail_fn(count);
+		/*
+		 * We failed to acquire the lock, so mark it contended
+		 * to ensure that any waiting tasks are woken up by the
+		 * unlock slow path.
+		 */
+		if (likely(atomic_xchg(count, -1) != 1))
+			fail_fn(count);
 }
 
 /**
@@ -43,7 +49,8 @@ static inline int
 __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
 	if (unlikely(atomic_xchg(count, 0) != 1))
-		return fail_fn(count);
+		if (likely(atomic_xchg(count, -1) != 1))
+			return fail_fn(count);
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From f026cfa82f628db24b8cea41b9d6202af104cecb Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Tue, 14 Aug 2012 09:53:38 -0700
Subject: Revert "x86-64/efi: Use EFI to deal with platform wall clock"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit bacef661acdb634170a8faddbc1cf28e8f8b9eee.

This commit has been found to cause serious regressions on a number of
ASUS machines at the least.  We probably need to provide a 1:1 map in
addition to the EFI virtual memory map in order for this to work.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Reported-and-bisected-by: Jérôme Carretero <cJ-ko@zougloub.eu>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Matt Fleming <matt.fleming@intel.com>
Cc: Matthew Garrett <mjg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120805172903.5f8bb24c@zougloub.eu
---
 arch/x86/mm/pageattr.c      | 10 ++++------
 arch/x86/platform/efi/efi.c | 30 ++++++++++++++++++++++++++----
 include/linux/efi.h         |  2 ++
 init/main.c                 |  8 ++++----
 4 files changed, 36 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 931930a9616..a718e0d2350 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -919,13 +919,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
 	/*
 	 * On success we use clflush, when the CPU supports it to
-	 * avoid the wbindv. If the CPU does not support it, in the
-	 * error case, and during early boot (for EFI) we fall back
-	 * to cpa_flush_all (which uses wbinvd):
+	 * avoid the wbindv. If the CPU does not support it and in the
+	 * error case we fall back to cpa_flush_all (which uses
+	 * wbindv):
 	 */
-	if (early_boot_irqs_disabled)
-		__cpa_flush_all((void *)(long)cache);
-	else if (!ret && cpu_has_clflush) {
+	if (!ret && cpu_has_clflush) {
 		if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 2dc29f51e75..92660edaa1e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -234,7 +234,22 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
 	return status;
 }
 
-static int efi_set_rtc_mmss(unsigned long nowtime)
+static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
+					     efi_time_cap_t *tc)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	spin_lock_irqsave(&rtc_lock, flags);
+	efi_call_phys_prelog();
+	status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
+				virt_to_phys(tc));
+	efi_call_phys_epilog();
+	spin_unlock_irqrestore(&rtc_lock, flags);
+	return status;
+}
+
+int efi_set_rtc_mmss(unsigned long nowtime)
 {
 	int real_seconds, real_minutes;
 	efi_status_t 	status;
@@ -263,7 +278,7 @@ static int efi_set_rtc_mmss(unsigned long nowtime)
 	return 0;
 }
 
-static unsigned long efi_get_time(void)
+unsigned long efi_get_time(void)
 {
 	efi_status_t status;
 	efi_time_t eft;
@@ -606,13 +621,18 @@ static int __init efi_runtime_init(void)
 	}
 	/*
 	 * We will only need *early* access to the following
-	 * EFI runtime service before set_virtual_address_map
+	 * two EFI runtime services before set_virtual_address_map
 	 * is invoked.
 	 */
+	efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
 	efi_phys.set_virtual_address_map =
 		(efi_set_virtual_address_map_t *)
 		runtime->set_virtual_address_map;
-
+	/*
+	 * Make efi_get_time can be called before entering
+	 * virtual mode.
+	 */
+	efi.get_time = phys_efi_get_time;
 	early_iounmap(runtime, sizeof(efi_runtime_services_t));
 
 	return 0;
@@ -700,10 +720,12 @@ void __init efi_init(void)
 		efi_enabled = 0;
 		return;
 	}
+#ifdef CONFIG_X86_32
 	if (efi_native) {
 		x86_platform.get_wallclock = efi_get_time;
 		x86_platform.set_wallclock = efi_set_rtc_mmss;
 	}
+#endif
 
 #if EFI_DEBUG
 	print_efi_memmap();
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 103adc6d7e3..ec45ccd8708 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -503,6 +503,8 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
+extern unsigned long efi_get_time(void);
+extern int efi_set_rtc_mmss(unsigned long nowtime);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
diff --git a/init/main.c b/init/main.c
index e60679de61c..b28673087ac 100644
--- a/init/main.c
+++ b/init/main.c
@@ -461,10 +461,6 @@ static void __init mm_init(void)
 	percpu_init_late();
 	pgtable_cache_init();
 	vmalloc_init();
-#ifdef CONFIG_X86
-	if (efi_enabled)
-		efi_enter_virtual_mode();
-#endif
 }
 
 asmlinkage void __init start_kernel(void)
@@ -606,6 +602,10 @@ asmlinkage void __init start_kernel(void)
 	calibrate_delay();
 	pidmap_init();
 	anon_vma_init();
+#ifdef CONFIG_X86
+	if (efi_enabled)
+		efi_enter_virtual_mode();
+#endif
 	thread_info_cache_init();
 	cred_init();
 	fork_init(totalram_pages);
-- 
cgit v1.2.3-70-g09d2


From 47be03a28cc6c80e3aa2b3e8ed6d960ff0c5c0af Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:37 +0000
Subject: netpoll: use GFP_ATOMIC in slave_enable_netpoll() and
 __netpoll_setup()

slave_enable_netpoll() and __netpoll_setup() may be called
with read_lock() held, so should use GFP_ATOMIC to allocate
memory. Eric suggested to pass gfp flags to __netpoll_setup().

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  6 +++---
 drivers/net/team/team.c         | 16 +++++++++-------
 include/linux/netdevice.h       |  3 ++-
 include/linux/netpoll.h         |  2 +-
 net/8021q/vlan_dev.c            |  7 ++++---
 net/bridge/br_device.c          | 12 ++++++------
 net/bridge/br_if.c              |  2 +-
 net/bridge/br_private.h         |  4 ++--
 net/core/netpoll.c              |  8 ++++----
 9 files changed, 32 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6fae5f3ec7f..8697136e27c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1235,12 +1235,12 @@ static inline int slave_enable_netpoll(struct slave *slave)
 	struct netpoll *np;
 	int err = 0;
 
-	np = kzalloc(sizeof(*np), GFP_KERNEL);
+	np = kzalloc(sizeof(*np), GFP_ATOMIC);
 	err = -ENOMEM;
 	if (!np)
 		goto out;
 
-	err = __netpoll_setup(np, slave->dev);
+	err = __netpoll_setup(np, slave->dev, GFP_ATOMIC);
 	if (err) {
 		kfree(np);
 		goto out;
@@ -1292,7 +1292,7 @@ static void bond_netpoll_cleanup(struct net_device *bond_dev)
 	read_unlock(&bond->lock);
 }
 
-static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, gfp_t gfp)
 {
 	struct bonding *bond = netdev_priv(dev);
 	struct slave *slave;
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 87707ab3943..341b65dbbcd 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -795,16 +795,17 @@ static void team_port_leave(struct team *team, struct team_port *port)
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static int team_port_enable_netpoll(struct team *team, struct team_port *port)
+static int team_port_enable_netpoll(struct team *team, struct team_port *port,
+				    gfp_t gfp)
 {
 	struct netpoll *np;
 	int err;
 
-	np = kzalloc(sizeof(*np), GFP_KERNEL);
+	np = kzalloc(sizeof(*np), gfp);
 	if (!np)
 		return -ENOMEM;
 
-	err = __netpoll_setup(np, port->dev);
+	err = __netpoll_setup(np, port->dev, gfp);
 	if (err) {
 		kfree(np);
 		return err;
@@ -833,7 +834,8 @@ static struct netpoll_info *team_netpoll_info(struct team *team)
 }
 
 #else
-static int team_port_enable_netpoll(struct team *team, struct team_port *port)
+static int team_port_enable_netpoll(struct team *team, struct team_port *port,
+				    gfp_t gfp)
 {
 	return 0;
 }
@@ -913,7 +915,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 	}
 
 	if (team_netpoll_info(team)) {
-		err = team_port_enable_netpoll(team, port);
+		err = team_port_enable_netpoll(team, port, GFP_KERNEL);
 		if (err) {
 			netdev_err(dev, "Failed to enable netpoll on device %s\n",
 				   portname);
@@ -1443,7 +1445,7 @@ static void team_netpoll_cleanup(struct net_device *dev)
 }
 
 static int team_netpoll_setup(struct net_device *dev,
-			      struct netpoll_info *npifo)
+			      struct netpoll_info *npifo, gfp_t gfp)
 {
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
@@ -1451,7 +1453,7 @@ static int team_netpoll_setup(struct net_device *dev,
 
 	mutex_lock(&team->lock);
 	list_for_each_entry(port, &team->port_list, list) {
-		err = team_port_enable_netpoll(team, port);
+		err = team_port_enable_netpoll(team, port, gfp);
 		if (err) {
 			__team_netpoll_cleanup(team);
 			break;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a9db4f33407..3560d688161 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -953,7 +953,8 @@ struct net_device_ops {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
 	int			(*ndo_netpoll_setup)(struct net_device *dev,
-						     struct netpoll_info *info);
+						     struct netpoll_info *info,
+						     gfp_t gfp);
 	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 28f5389c924..bf2d51eec0f 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -43,7 +43,7 @@ struct netpoll_info {
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
 int netpoll_parse_options(struct netpoll *np, char *opt);
-int __netpoll_setup(struct netpoll *np, struct net_device *ndev);
+int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp);
 int netpoll_setup(struct netpoll *np);
 int netpoll_trap(void);
 void netpoll_set_trap(int trap);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 73a2a83ee2d..ee4ae0944ce 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -669,19 +669,20 @@ static void vlan_dev_poll_controller(struct net_device *dev)
 	return;
 }
 
-static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo,
+				  gfp_t gfp)
 {
 	struct vlan_dev_priv *info = vlan_dev_priv(dev);
 	struct net_device *real_dev = info->real_dev;
 	struct netpoll *netpoll;
 	int err = 0;
 
-	netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+	netpoll = kzalloc(sizeof(*netpoll), gfp);
 	err = -ENOMEM;
 	if (!netpoll)
 		goto out;
 
-	err = __netpoll_setup(netpoll, real_dev);
+	err = __netpoll_setup(netpoll, real_dev, gfp);
 	if (err) {
 		kfree(netpoll);
 		goto out;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 33348453760..ed0e0f9dc78 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -213,7 +213,8 @@ static void br_netpoll_cleanup(struct net_device *dev)
 	}
 }
 
-static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
+			    gfp_t gfp)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *p, *n;
@@ -222,8 +223,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
 	list_for_each_entry_safe(p, n, &br->port_list, list) {
 		if (!p->dev)
 			continue;
-
-		err = br_netpoll_enable(p);
+		err = br_netpoll_enable(p, gfp);
 		if (err)
 			goto fail;
 	}
@@ -236,17 +236,17 @@ fail:
 	goto out;
 }
 
-int br_netpoll_enable(struct net_bridge_port *p)
+int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
 {
 	struct netpoll *np;
 	int err = 0;
 
-	np = kzalloc(sizeof(*p->np), GFP_KERNEL);
+	np = kzalloc(sizeof(*p->np), gfp);
 	err = -ENOMEM;
 	if (!np)
 		goto out;
 
-	err = __netpoll_setup(np, p->dev);
+	err = __netpoll_setup(np, p->dev, gfp);
 	if (err) {
 		kfree(np);
 		goto out;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index e1144e1617b..171fd6b9bfe 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (err)
 		goto err2;
 
-	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
+	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL))))
 		goto err3;
 
 	err = netdev_set_master(dev, br->dev);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a768b2408ed..f507d2af964 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
 		netpoll_send_skb(np, skb);
 }
 
-extern int br_netpoll_enable(struct net_bridge_port *p);
+extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
 extern void br_netpoll_disable(struct net_bridge_port *p);
 #else
 static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
@@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
 {
 }
 
-static inline int br_netpoll_enable(struct net_bridge_port *p)
+static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
 {
 	return 0;
 }
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b4c90e42b44..37cc854774a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -715,7 +715,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 }
 EXPORT_SYMBOL(netpoll_parse_options);
 
-int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
+int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
 {
 	struct netpoll_info *npinfo;
 	const struct net_device_ops *ops;
@@ -734,7 +734,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 	}
 
 	if (!ndev->npinfo) {
-		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		npinfo = kmalloc(sizeof(*npinfo), gfp);
 		if (!npinfo) {
 			err = -ENOMEM;
 			goto out;
@@ -752,7 +752,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 
 		ops = np->dev->netdev_ops;
 		if (ops->ndo_netpoll_setup) {
-			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
 			if (err)
 				goto free_npinfo;
 		}
@@ -857,7 +857,7 @@ int netpoll_setup(struct netpoll *np)
 	refill_skbs();
 
 	rtnl_lock();
-	err = __netpoll_setup(np, ndev);
+	err = __netpoll_setup(np, ndev, GFP_KERNEL);
 	rtnl_unlock();
 
 	if (err)
-- 
cgit v1.2.3-70-g09d2


From 38e6bc185d9544dfad1774b3f8902a0b061aea25 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:38 +0000
Subject: netpoll: make __netpoll_cleanup non-block

Like the previous patch, slave_disable_netpoll() and __netpoll_cleanup()
may be called with read_lock() held too, so we should make them
non-block, by moving the cleanup and kfree() to call_rcu_bh() callbacks.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  4 +---
 include/linux/netpoll.h         |  3 +++
 net/8021q/vlan_dev.c            |  6 +-----
 net/bridge/br_device.c          |  6 +-----
 net/core/netpoll.c              | 42 +++++++++++++++++++++++++++++++----------
 5 files changed, 38 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8697136e27c..e42891683e3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1257,9 +1257,7 @@ static inline void slave_disable_netpoll(struct slave *slave)
 		return;
 
 	slave->np = NULL;
-	synchronize_rcu_bh();
-	__netpoll_cleanup(np);
-	kfree(np);
+	__netpoll_free_rcu(np);
 }
 static inline bool slave_dev_support_netpoll(struct net_device *slave_dev)
 {
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index bf2d51eec0f..907812efb4d 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -23,6 +23,7 @@ struct netpoll {
 	u8 remote_mac[ETH_ALEN];
 
 	struct list_head rx; /* rx_np list element */
+	struct rcu_head rcu;
 };
 
 struct netpoll_info {
@@ -38,6 +39,7 @@ struct netpoll_info {
 	struct delayed_work tx_work;
 
 	struct netpoll *netpoll;
+	struct rcu_head rcu;
 };
 
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
@@ -48,6 +50,7 @@ int netpoll_setup(struct netpoll *np);
 int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void __netpoll_cleanup(struct netpoll *np);
+void __netpoll_free_rcu(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ee4ae0944ce..b65623f9066 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -704,11 +704,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 
 	info->netpoll = NULL;
 
-        /* Wait for transmitting packets to finish before freeing. */
-        synchronize_rcu_bh();
-
-        __netpoll_cleanup(netpoll);
-        kfree(netpoll);
+	__netpoll_free_rcu(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ed0e0f9dc78..f41ba4048c9 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 	p->np = NULL;
 
-	/* Wait for transmitting packets to finish before freeing. */
-	synchronize_rcu_bh();
-
-	__netpoll_cleanup(np);
-	kfree(np);
+	__netpoll_free_rcu(np);
 }
 
 #endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 37cc854774a..dc17f1db147 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -878,6 +878,24 @@ static int __init netpoll_init(void)
 }
 core_initcall(netpoll_init);
 
+static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
+{
+	struct netpoll_info *npinfo =
+			container_of(rcu_head, struct netpoll_info, rcu);
+
+	skb_queue_purge(&npinfo->arp_tx);
+	skb_queue_purge(&npinfo->txq);
+
+	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
+	cancel_delayed_work(&npinfo->tx_work);
+
+	/* clean after last, unfinished work */
+	__skb_queue_purge(&npinfo->txq);
+	/* now cancel it again */
+	cancel_delayed_work(&npinfo->tx_work);
+	kfree(npinfo);
+}
+
 void __netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
@@ -903,20 +921,24 @@ void __netpoll_cleanup(struct netpoll *np)
 			ops->ndo_netpoll_cleanup(np->dev);
 
 		RCU_INIT_POINTER(np->dev->npinfo, NULL);
+		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
+	}
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-		/* avoid racing with NAPI reading npinfo */
-		synchronize_rcu_bh();
+static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+{
+	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
 
-		skb_queue_purge(&npinfo->arp_tx);
-		skb_queue_purge(&npinfo->txq);
-		cancel_delayed_work_sync(&npinfo->tx_work);
+	__netpoll_cleanup(np);
+	kfree(np);
+}
 
-		/* clean after last, unfinished work */
-		__skb_queue_purge(&npinfo->txq);
-		kfree(npinfo);
-	}
+void __netpoll_free_rcu(struct netpoll *np)
+{
+	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
 }
-EXPORT_SYMBOL_GPL(__netpoll_cleanup);
+EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
 
 void netpoll_cleanup(struct netpoll *np)
 {
-- 
cgit v1.2.3-70-g09d2


From 57c5d46191e75312934c00eba65b13a31ca95120 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:40 +0000
Subject: netpoll: take rcu_read_lock_bh() in netpoll_rx()

In __netpoll_rx(), it dereferences ->npinfo without rcu_dereference_bh(),
this patch fixes it by using the 'npinfo' passed from netpoll_rx()
where it is already dereferenced with rcu_dereference_bh().

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 4 ++--
 net/core/netpoll.c      | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 907812efb4d..5d881c38827 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -52,7 +52,7 @@ void netpoll_set_trap(int trap);
 void __netpoll_cleanup(struct netpoll *np);
 void __netpoll_free_rcu(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
-int __netpoll_rx(struct sk_buff *skb);
+int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo);
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev);
 static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
@@ -77,7 +77,7 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 
 	spin_lock(&npinfo->rx_lock);
 	/* check rx_flags again with the lock held */
-	if (npinfo->rx_flags && __netpoll_rx(skb))
+	if (npinfo->rx_flags && __netpoll_rx(skb, npinfo))
 		ret = true;
 	spin_unlock(&npinfo->rx_lock);
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index dc17f1db147..d055bb01328 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -543,13 +543,12 @@ static void arp_reply(struct sk_buff *skb)
 	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 }
 
-int __netpoll_rx(struct sk_buff *skb)
+int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
 	int proto, len, ulen;
 	int hits = 0;
 	const struct iphdr *iph;
 	struct udphdr *uh;
-	struct netpoll_info *npinfo = skb->dev->npinfo;
 	struct netpoll *np, *tmp;
 
 	if (list_empty(&npinfo->rx_np))
-- 
cgit v1.2.3-70-g09d2


From 91fe4a4b9e490a24f6702dd8afe72d8afab6fcdb Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:41 +0000
Subject: netpoll: use netpoll_rx_on() in netpoll_rx()

The logic of the code is same, just call netpoll_rx_on().

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 5d881c38827..2d178baa49d 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -63,6 +63,13 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
 
 #ifdef CONFIG_NETPOLL
+static inline int netpoll_rx_on(struct sk_buff *skb)
+{
+	struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo);
+
+	return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
+}
+
 static inline bool netpoll_rx(struct sk_buff *skb)
 {
 	struct netpoll_info *npinfo;
@@ -70,11 +77,11 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 	bool ret = false;
 
 	local_irq_save(flags);
-	npinfo = rcu_dereference_bh(skb->dev->npinfo);
 
-	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
+	if (!netpoll_rx_on(skb))
 		goto out;
 
+	npinfo = rcu_dereference_bh(skb->dev->npinfo);
 	spin_lock(&npinfo->rx_lock);
 	/* check rx_flags again with the lock held */
 	if (npinfo->rx_flags && __netpoll_rx(skb, npinfo))
@@ -86,13 +93,6 @@ out:
 	return ret;
 }
 
-static inline int netpoll_rx_on(struct sk_buff *skb)
-{
-	struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo);
-
-	return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
-}
-
 static inline int netpoll_receive_skb(struct sk_buff *skb)
 {
 	if (!list_empty(&skb->dev->napi_list))
-- 
cgit v1.2.3-70-g09d2


From 2899656b494dcd118123af1126826b115c8ea6f9 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:42 +0000
Subject: netpoll: take rcu_read_lock_bh() in netpoll_send_skb_on_dev()

This patch fixes several problems in the call path of
netpoll_send_skb_on_dev():

1. Disable IRQ's before calling netpoll_send_skb_on_dev().

2. All the callees of netpoll_send_skb_on_dev() should use
   rcu_dereference_bh() to dereference ->npinfo.

3. Rename arp_reply() to netpoll_arp_reply(), the former is too generic.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  3 +++
 net/core/netpoll.c      | 31 +++++++++++++++++--------------
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 2d178baa49d..61aee86cf21 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -57,7 +57,10 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev);
 static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
+	unsigned long flags;
+	local_irq_save(flags);
 	netpoll_send_skb_on_dev(np, skb, np->dev);
+	local_irq_restore(flags);
 }
 
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d055bb01328..174346ac15a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -54,7 +54,7 @@ static atomic_t trapped;
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void arp_reply(struct sk_buff *skb);
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -170,7 +170,8 @@ static void poll_napi(struct net_device *dev)
 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
 		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
-			budget = poll_one_napi(dev->npinfo, napi, budget);
+			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
+					       napi, budget);
 			spin_unlock(&napi->poll_lock);
 
 			if (!budget)
@@ -185,13 +186,14 @@ static void service_arp_queue(struct netpoll_info *npi)
 		struct sk_buff *skb;
 
 		while ((skb = skb_dequeue(&npi->arp_tx)))
-			arp_reply(skb);
+			netpoll_arp_reply(skb, npi);
 	}
 }
 
 static void netpoll_poll_dev(struct net_device *dev)
 {
 	const struct net_device_ops *ops;
+	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
 
 	if (!dev || !netif_running(dev))
 		return;
@@ -206,17 +208,18 @@ static void netpoll_poll_dev(struct net_device *dev)
 	poll_napi(dev);
 
 	if (dev->flags & IFF_SLAVE) {
-		if (dev->npinfo) {
+		if (ni) {
 			struct net_device *bond_dev = dev->master;
 			struct sk_buff *skb;
-			while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
+			struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo);
+			while ((skb = skb_dequeue(&ni->arp_tx))) {
 				skb->dev = bond_dev;
-				skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
+				skb_queue_tail(&bond_ni->arp_tx, skb);
 			}
 		}
 	}
 
-	service_arp_queue(dev->npinfo);
+	service_arp_queue(ni);
 
 	zap_completion_queue();
 }
@@ -302,6 +305,7 @@ static int netpoll_owner_active(struct net_device *dev)
 	return 0;
 }
 
+/* call with IRQ disabled */
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev)
 {
@@ -309,8 +313,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	unsigned long tries;
 	const struct net_device_ops *ops = dev->netdev_ops;
 	/* It is up to the caller to keep npinfo alive. */
-	struct netpoll_info *npinfo = np->dev->npinfo;
+	struct netpoll_info *npinfo;
+
+	WARN_ON_ONCE(!irqs_disabled());
 
+	npinfo = rcu_dereference_bh(np->dev->npinfo);
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
 		__kfree_skb(skb);
 		return;
@@ -319,11 +326,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	/* don't get messages out of order, and no recursion */
 	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
 		struct netdev_queue *txq;
-		unsigned long flags;
 
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
 
-		local_irq_save(flags);
 		/* try until next clock tick */
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 		     tries > 0; --tries) {
@@ -347,10 +352,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		}
 
 		WARN_ONCE(!irqs_disabled(),
-			"netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
+			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
 			dev->name, ops->ndo_start_xmit);
 
-		local_irq_restore(flags);
 	}
 
 	if (status != NETDEV_TX_OK) {
@@ -423,9 +427,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 }
 EXPORT_SYMBOL(netpoll_send_udp);
 
-static void arp_reply(struct sk_buff *skb)
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
 {
-	struct netpoll_info *npinfo = skb->dev->npinfo;
 	struct arphdr *arp;
 	unsigned char *arp_ptr;
 	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
-- 
cgit v1.2.3-70-g09d2


From e15c3c2294605f09f9b336b2f3b97086ab4b8145 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:45 +0000
Subject: netpoll: check netpoll tx status on the right device

Although this doesn't matter actually, because netpoll_tx_running()
doesn't use the parameter, the code will be more readable.

For team_dev_queue_xmit() we have to move it down to avoid
compile errors.

Cc: David Miller <davem@davemloft.net>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  2 +-
 include/linux/if_team.h         | 30 +++++++++++++++---------------
 net/bridge/br_forward.c         |  2 +-
 3 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e42891683e3..d688a8af432 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -398,7 +398,7 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 		     sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping));
 	skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
 
-	if (unlikely(netpoll_tx_running(slave_dev)))
+	if (unlikely(netpoll_tx_running(bond->dev)))
 		bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
 	else
 		dev_queue_xmit(skb);
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 6960fc1841a..aa2e167e1ef 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -96,21 +96,6 @@ static inline void team_netpoll_send_skb(struct team_port *port,
 }
 #endif
 
-static inline int team_dev_queue_xmit(struct team *team, struct team_port *port,
-				      struct sk_buff *skb)
-{
-	BUILD_BUG_ON(sizeof(skb->queue_mapping) !=
-		     sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping));
-	skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping);
-
-	skb->dev = port->dev;
-	if (unlikely(netpoll_tx_running(port->dev))) {
-		team_netpoll_send_skb(port, skb);
-		return 0;
-	}
-	return dev_queue_xmit(skb);
-}
-
 struct team_mode_ops {
 	int (*init)(struct team *team);
 	void (*exit)(struct team *team);
@@ -200,6 +185,21 @@ struct team {
 	long mode_priv[TEAM_MODE_PRIV_LONGS];
 };
 
+static inline int team_dev_queue_xmit(struct team *team, struct team_port *port,
+				      struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(skb->queue_mapping) !=
+		     sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping));
+	skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping);
+
+	skb->dev = port->dev;
+	if (unlikely(netpoll_tx_running(team->dev))) {
+		team_netpoll_send_skb(port, skb);
+		return 0;
+	}
+	return dev_queue_xmit(skb);
+}
+
 static inline struct hlist_head *team_port_index_hash(struct team *team,
 						      int port_index)
 {
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index e9466d41270..02015a505d2 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
 	skb->dev = to->dev;
 
-	if (unlikely(netpoll_tx_running(to->dev))) {
+	if (unlikely(netpoll_tx_running(to->br->dev))) {
 		if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
 			kfree_skb(skb);
 		else {
-- 
cgit v1.2.3-70-g09d2


From 77ab8a54d9a8dcc4a46484a04133314f33f2aba6 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Fri, 10 Aug 2012 01:24:46 +0000
Subject: netpoll: convert several functions to bool

These functions are just boolean, let them return
bool instead of int.

Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 61aee86cf21..66d5379c305 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -66,7 +66,7 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
 
 #ifdef CONFIG_NETPOLL
-static inline int netpoll_rx_on(struct sk_buff *skb)
+static inline bool netpoll_rx_on(struct sk_buff *skb)
 {
 	struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo);
 
@@ -125,7 +125,7 @@ static inline void netpoll_poll_unlock(void *have)
 	}
 }
 
-static inline int netpoll_tx_running(struct net_device *dev)
+static inline bool netpoll_tx_running(struct net_device *dev)
 {
 	return irqs_disabled();
 }
@@ -133,11 +133,11 @@ static inline int netpoll_tx_running(struct net_device *dev)
 #else
 static inline bool netpoll_rx(struct sk_buff *skb)
 {
-	return 0;
+	return false;
 }
-static inline int netpoll_rx_on(struct sk_buff *skb)
+static inline bool netpoll_rx_on(struct sk_buff *skb)
 {
-	return 0;
+	return false;
 }
 static inline int netpoll_receive_skb(struct sk_buff *skb)
 {
@@ -153,9 +153,9 @@ static inline void netpoll_poll_unlock(void *have)
 static inline void netpoll_netdev_init(struct net_device *dev)
 {
 }
-static inline int netpoll_tx_running(struct net_device *dev)
+static inline bool netpoll_tx_running(struct net_device *dev)
 {
-	return 0;
+	return false;
 }
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 6024935f5ff5f1646bce8404416318e5fd4a0c4a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 13 Aug 2012 02:49:59 +0000
Subject: llc2: Fix silent failure of llc_station_init()

llc_station_init() creates and processes an event skb with no effect
other than to change the state from DOWN to UP.  Allocation failure is
reported, but then ignored by its caller, llc2_init().  Remove this
possibility by simply initialising the state as UP.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc.h     |  2 +-
 net/llc/llc_station.c | 19 ++-----------------
 2 files changed, 3 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/llc.h b/include/net/llc.h
index 226c846cab0..f2d0fc57052 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -133,7 +133,7 @@ extern int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb,
 extern void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb);
 extern void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb);
 
-extern int llc_station_init(void);
+extern void llc_station_init(void);
 extern void llc_station_exit(void);
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 6828e39ec2e..45ddbb93c5d 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -687,12 +687,8 @@ static void llc_station_rcv(struct sk_buff *skb)
 	llc_station_state_process(skb);
 }
 
-int __init llc_station_init(void)
+void __init llc_station_init(void)
 {
-	int rc = -ENOBUFS;
-	struct sk_buff *skb;
-	struct llc_station_state_ev *ev;
-
 	skb_queue_head_init(&llc_main_station.mac_pdu_q);
 	skb_queue_head_init(&llc_main_station.ev_q.list);
 	spin_lock_init(&llc_main_station.ev_q.lock);
@@ -700,20 +696,9 @@ int __init llc_station_init(void)
 			(unsigned long)&llc_main_station);
 	llc_main_station.ack_timer.expires  = jiffies +
 						sysctl_llc_station_ack_timeout;
-	skb = alloc_skb(0, GFP_ATOMIC);
-	if (!skb)
-		goto out;
-	rc = 0;
 	llc_set_station_handler(llc_station_rcv);
-	ev = llc_station_ev(skb);
-	memset(ev, 0, sizeof(*ev));
 	llc_main_station.maximum_retry	= 1;
-	llc_main_station.state		= LLC_STATION_STATE_DOWN;
-	ev->type	= LLC_STATION_EV_TYPE_SIMPLE;
-	ev->prim_type	= LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK;
-	rc = llc_station_next_state(skb);
-out:
-	return rc;
+	llc_main_station.state		= LLC_STATION_STATE_UP;
 }
 
 void __exit llc_station_exit(void)
-- 
cgit v1.2.3-70-g09d2


From 4e8b14526ca7fb046a81c94002c1c43b6fdf0e9b Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 8 Aug 2012 15:36:20 -0400
Subject: time: Improve sanity checking of timekeeping inputs

Unexpected behavior could occur if the time is set to a value large
enough to overflow a 64bit ktime_t (which is something larger then the
year 2262).

Also unexpected behavior could occur if large negative offsets are
injected via adjtimex.

So this patch improves the sanity check timekeeping inputs by
improving the timespec_valid() check, and then makes better use of
timespec_valid() to make sure we don't set the time to an invalid
negative value or one that overflows ktime_t.

Note: This does not protect from setting the time close to overflowing
ktime_t and then letting natural accumulation cause the overflow.

Reported-by: CAI Qian <caiqian@redhat.com>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Zhouping Liu <zliu@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1344454580-17031-1-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/ktime.h     |  7 -------
 include/linux/time.h      | 22 ++++++++++++++++++++--
 kernel/time/timekeeping.c | 26 ++++++++++++++++++++++++--
 3 files changed, 44 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 603bec2913b..06177ba10a1 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -58,13 +58,6 @@ union ktime {
 
 typedef union ktime ktime_t;		/* Kill this */
 
-#define KTIME_MAX			((s64)~((u64)1 << 63))
-#if (BITS_PER_LONG == 64)
-# define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
-#else
-# define KTIME_SEC_MAX			LONG_MAX
-#endif
-
 /*
  * ktime_t definitions when using the 64-bit scalar representation:
  */
diff --git a/include/linux/time.h b/include/linux/time.h
index c81c5e40fcb..b0bbd8f0130 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -107,11 +107,29 @@ static inline struct timespec timespec_sub(struct timespec lhs,
 	return ts_delta;
 }
 
+#define KTIME_MAX			((s64)~((u64)1 << 63))
+#if (BITS_PER_LONG == 64)
+# define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
+#else
+# define KTIME_SEC_MAX			LONG_MAX
+#endif
+
 /*
  * Returns true if the timespec is norm, false if denorm:
  */
-#define timespec_valid(ts) \
-	(((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
+static inline bool timespec_valid(const struct timespec *ts)
+{
+	/* Dates before 1970 are bogus */
+	if (ts->tv_sec < 0)
+		return false;
+	/* Can't have more nanoseconds then a second */
+	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+		return false;
+	/* Disallow values that could overflow ktime_t */
+	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+		return false;
+	return true;
+}
 
 extern void read_persistent_clock(struct timespec *ts);
 extern void read_boot_clock(struct timespec *ts);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e16af197a2b..898bef066a4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -427,7 +427,7 @@ int do_settimeofday(const struct timespec *tv)
 	struct timespec ts_delta, xt;
 	unsigned long flags;
 
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+	if (!timespec_valid(tv))
 		return -EINVAL;
 
 	write_seqlock_irqsave(&tk->lock, flags);
@@ -463,6 +463,8 @@ int timekeeping_inject_offset(struct timespec *ts)
 {
 	struct timekeeper *tk = &timekeeper;
 	unsigned long flags;
+	struct timespec tmp;
+	int ret = 0;
 
 	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
@@ -471,10 +473,17 @@ int timekeeping_inject_offset(struct timespec *ts)
 
 	timekeeping_forward_now(tk);
 
+	/* Make sure the proposed value is valid */
+	tmp = timespec_add(tk_xtime(tk),  *ts);
+	if (!timespec_valid(&tmp)) {
+		ret = -EINVAL;
+		goto error;
+	}
 
 	tk_xtime_add(tk, ts);
 	tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
 
+error: /* even if we error out, we forwarded the time, so call update */
 	timekeeping_update(tk, true);
 
 	write_sequnlock_irqrestore(&tk->lock, flags);
@@ -482,7 +491,7 @@ int timekeeping_inject_offset(struct timespec *ts)
 	/* signal hrtimers about time change */
 	clock_was_set();
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(timekeeping_inject_offset);
 
@@ -649,7 +658,20 @@ void __init timekeeping_init(void)
 	struct timespec now, boot, tmp;
 
 	read_persistent_clock(&now);
+	if (!timespec_valid(&now)) {
+		pr_warn("WARNING: Persistent clock returned invalid value!\n"
+			"         Check your CMOS/BIOS settings.\n");
+		now.tv_sec = 0;
+		now.tv_nsec = 0;
+	}
+
 	read_boot_clock(&boot);
+	if (!timespec_valid(&boot)) {
+		pr_warn("WARNING: Boot clock returned invalid value!\n"
+			"         Check your CMOS/BIOS settings.\n");
+		boot.tv_sec = 0;
+		boot.tv_nsec = 0;
+	}
 
 	seqlock_init(&tk->lock);
 
-- 
cgit v1.2.3-70-g09d2


From 58569aee5a1a5dcc25c34a0a2ed9a377874e6b05 Mon Sep 17 00:00:00 2001
From: "Arnaud Patard (Rtp)" <arnaud.patard@rtp-net.org>
Date: Thu, 26 Jul 2012 12:15:46 +0200
Subject: ARM: Orion: Set eth packet size csum offload limit

The mv643xx ethernet controller limits the packet size for the TX
checksum offloading. This patch sets this limits for Kirkwood and
Dove which have smaller limits that the default.

As a side note, this patch is an updated version of a patch sent some years
ago: http://lists.infradead.org/pipermail/linux-arm-kernel/2010-June/017320.html
which seems to have been lost.

Signed-off-by: Arnaud Patard <arnaud.patard@rtp-net.org>
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
Cc: <stable@vger.kernel.org>
---
 arch/arm/mach-dove/common.c               | 3 ++-
 arch/arm/mach-kirkwood/common.c           | 4 ++--
 arch/arm/mach-mv78xx0/common.c            | 6 ++++--
 arch/arm/mach-orion5x/common.c            | 3 ++-
 arch/arm/plat-orion/common.c              | 8 ++++++--
 arch/arm/plat-orion/include/plat/common.h | 6 ++++--
 include/linux/mv643xx_eth.h               | 2 ++
 7 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c
index 4db5de54b6a..6321567d8ea 100644
--- a/arch/arm/mach-dove/common.c
+++ b/arch/arm/mach-dove/common.c
@@ -102,7 +102,8 @@ void __init dove_ehci1_init(void)
 void __init dove_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge00_init(eth_data, DOVE_GE00_PHYS_BASE,
-			IRQ_DOVE_GE00_SUM, IRQ_DOVE_GE00_ERR);
+			IRQ_DOVE_GE00_SUM, IRQ_DOVE_GE00_ERR,
+			1600);
 }
 
 /*****************************************************************************
diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c
index c4b64adcbfc..3226077735b 100644
--- a/arch/arm/mach-kirkwood/common.c
+++ b/arch/arm/mach-kirkwood/common.c
@@ -301,7 +301,7 @@ void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge00_init(eth_data,
 			GE00_PHYS_BASE, IRQ_KIRKWOOD_GE00_SUM,
-			IRQ_KIRKWOOD_GE00_ERR);
+			IRQ_KIRKWOOD_GE00_ERR, 1600);
 	/* The interface forgets the MAC address assigned by u-boot if
 	the clock is turned off, so claim the clk now. */
 	clk_prepare_enable(ge0);
@@ -315,7 +315,7 @@ void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge01_init(eth_data,
 			GE01_PHYS_BASE, IRQ_KIRKWOOD_GE01_SUM,
-			IRQ_KIRKWOOD_GE01_ERR);
+			IRQ_KIRKWOOD_GE01_ERR, 1600);
 	clk_prepare_enable(ge1);
 }
 
diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c
index b4c53b846c9..3057f7d4329 100644
--- a/arch/arm/mach-mv78xx0/common.c
+++ b/arch/arm/mach-mv78xx0/common.c
@@ -213,7 +213,8 @@ void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge00_init(eth_data,
 			GE00_PHYS_BASE, IRQ_MV78XX0_GE00_SUM,
-			IRQ_MV78XX0_GE_ERR);
+			IRQ_MV78XX0_GE_ERR,
+			MV643XX_TX_CSUM_DEFAULT_LIMIT);
 }
 
 
@@ -224,7 +225,8 @@ void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge01_init(eth_data,
 			GE01_PHYS_BASE, IRQ_MV78XX0_GE01_SUM,
-			NO_IRQ);
+			NO_IRQ,
+			MV643XX_TX_CSUM_DEFAULT_LIMIT);
 }
 
 
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 9148b229d0d..410291c6766 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -109,7 +109,8 @@ void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data)
 {
 	orion_ge00_init(eth_data,
 			ORION5X_ETH_PHYS_BASE, IRQ_ORION5X_ETH_SUM,
-			IRQ_ORION5X_ETH_ERR);
+			IRQ_ORION5X_ETH_ERR,
+			MV643XX_TX_CSUM_DEFAULT_LIMIT);
 }
 
 
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index d245a87dc01..b8b747a9d36 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -291,10 +291,12 @@ static struct platform_device orion_ge00 = {
 void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err)
+			    unsigned long irq_err,
+			    unsigned int tx_csum_limit)
 {
 	fill_resources(&orion_ge00_shared, orion_ge00_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
+	orion_ge00_shared_data.tx_csum_limit = tx_csum_limit;
 	ge_complete(&orion_ge00_shared_data,
 		    orion_ge00_resources, irq, &orion_ge00_shared,
 		    eth_data, &orion_ge00);
@@ -343,10 +345,12 @@ static struct platform_device orion_ge01 = {
 void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err)
+			    unsigned long irq_err,
+			    unsigned int tx_csum_limit)
 {
 	fill_resources(&orion_ge01_shared, orion_ge01_shared_resources,
 		       mapbase + 0x2000, SZ_16K - 1, irq_err);
+	orion_ge01_shared_data.tx_csum_limit = tx_csum_limit;
 	ge_complete(&orion_ge01_shared_data,
 		    orion_ge01_resources, irq, &orion_ge01_shared,
 		    eth_data, &orion_ge01);
diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h
index e00fdb21360..ae2377ef63e 100644
--- a/arch/arm/plat-orion/include/plat/common.h
+++ b/arch/arm/plat-orion/include/plat/common.h
@@ -39,12 +39,14 @@ void __init orion_rtc_init(unsigned long mapbase,
 void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err);
+			    unsigned long irq_err,
+			    unsigned int tx_csum_limit);
 
 void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
 			    unsigned long irq,
-			    unsigned long irq_err);
+			    unsigned long irq_err,
+			    unsigned int tx_csum_limit);
 
 void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data,
 			    unsigned long mapbase,
diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
index 51bf8ada6dc..49258e0ed1c 100644
--- a/include/linux/mv643xx_eth.h
+++ b/include/linux/mv643xx_eth.h
@@ -15,6 +15,8 @@
 #define MV643XX_ETH_SIZE_REG_4		0x2224
 #define MV643XX_ETH_BASE_ADDR_ENABLE_REG	0x2290
 
+#define MV643XX_TX_CSUM_DEFAULT_LIMIT	0
+
 struct mv643xx_eth_shared_platform_data {
 	struct mbus_dram_target_info	*dram;
 	struct platform_device	*shared_smi;
-- 
cgit v1.2.3-70-g09d2


From ca08649eb5dd30f11a5a8fe8659b48899b7ea6a1 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 16 Aug 2012 11:31:27 -0400
Subject: Revert "xen PVonHVM: move shared_info to MMIO before kexec"

This reverts commit 00e37bdb0113a98408de42db85be002f21dbffd3.

During shutdown of PVHVM guests with more than 2VCPUs on certain
machines we can hit the race where the replaced shared_info is not
replaced fast enough and the PV time clock retries reading the same
area over and over without any any success and is stuck in an
infinite loop.

Acked-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c   | 118 +++++----------------------------------------
 arch/x86/xen/suspend.c     |   2 +-
 arch/x86/xen/xen-ops.h     |   2 +-
 drivers/xen/platform-pci.c |  15 ------
 include/xen/events.h       |   2 -
 5 files changed, 13 insertions(+), 126 deletions(-)

(limited to 'include')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a6f8acbdfc9..f1814fc2cb7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -31,7 +31,6 @@
 #include <linux/pci.h>
 #include <linux/gfp.h>
 #include <linux/memblock.h>
-#include <linux/syscore_ops.h>
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
@@ -1472,130 +1471,38 @@ asmlinkage void __init xen_start_kernel(void)
 #endif
 }
 
-#ifdef CONFIG_XEN_PVHVM
-/*
- * The pfn containing the shared_info is located somewhere in RAM. This
- * will cause trouble if the current kernel is doing a kexec boot into a
- * new kernel. The new kernel (and its startup code) can not know where
- * the pfn is, so it can not reserve the page. The hypervisor will
- * continue to update the pfn, and as a result memory corruption occours
- * in the new kernel.
- *
- * One way to work around this issue is to allocate a page in the
- * xen-platform pci device's BAR memory range. But pci init is done very
- * late and the shared_info page is already in use very early to read
- * the pvclock. So moving the pfn from RAM to MMIO is racy because some
- * code paths on other vcpus could access the pfn during the small
- * window when the old pfn is moved to the new pfn. There is even a
- * small window were the old pfn is not backed by a mfn, and during that
- * time all reads return -1.
- *
- * Because it is not known upfront where the MMIO region is located it
- * can not be used right from the start in xen_hvm_init_shared_info.
- *
- * To minimise trouble the move of the pfn is done shortly before kexec.
- * This does not eliminate the race because all vcpus are still online
- * when the syscore_ops will be called. But hopefully there is no work
- * pending at this point in time. Also the syscore_op is run last which
- * reduces the risk further.
- */
-
-static struct shared_info *xen_hvm_shared_info;
-
-static void xen_hvm_connect_shared_info(unsigned long pfn)
+void __ref xen_hvm_init_shared_info(void)
 {
+	int cpu;
 	struct xen_add_to_physmap xatp;
+	static struct shared_info *shared_info_page = 0;
 
+	if (!shared_info_page)
+		shared_info_page = (struct shared_info *)
+			extend_brk(PAGE_SIZE, PAGE_SIZE);
 	xatp.domid = DOMID_SELF;
 	xatp.idx = 0;
 	xatp.space = XENMAPSPACE_shared_info;
-	xatp.gpfn = pfn;
+	xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
 	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
 		BUG();
 
-}
-static void xen_hvm_set_shared_info(struct shared_info *sip)
-{
-	int cpu;
-
-	HYPERVISOR_shared_info = sip;
+	HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
 
 	/* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
 	 * page, we use it in the event channel upcall and in some pvclock
 	 * related functions. We don't need the vcpu_info placement
 	 * optimizations because we don't use any pv_mmu or pv_irq op on
 	 * HVM.
-	 * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is
-	 * online but xen_hvm_set_shared_info is run at resume time too and
+	 * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
+	 * online but xen_hvm_init_shared_info is run at resume time too and
 	 * in that case multiple vcpus might be online. */
 	for_each_online_cpu(cpu) {
 		per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 	}
 }
 
-/* Reconnect the shared_info pfn to a mfn */
-void xen_hvm_resume_shared_info(void)
-{
-	xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
-}
-
-#ifdef CONFIG_KEXEC
-static struct shared_info *xen_hvm_shared_info_kexec;
-static unsigned long xen_hvm_shared_info_pfn_kexec;
-
-/* Remember a pfn in MMIO space for kexec reboot */
-void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn)
-{
-	xen_hvm_shared_info_kexec = sip;
-	xen_hvm_shared_info_pfn_kexec = pfn;
-}
-
-static void xen_hvm_syscore_shutdown(void)
-{
-	struct xen_memory_reservation reservation = {
-		.domid = DOMID_SELF,
-		.nr_extents = 1,
-	};
-	unsigned long prev_pfn;
-	int rc;
-
-	if (!xen_hvm_shared_info_kexec)
-		return;
-
-	prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT;
-	set_xen_guest_handle(reservation.extent_start, &prev_pfn);
-
-	/* Move pfn to MMIO, disconnects previous pfn from mfn */
-	xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec);
-
-	/* Update pointers, following hypercall is also a memory barrier */
-	xen_hvm_set_shared_info(xen_hvm_shared_info_kexec);
-
-	/* Allocate new mfn for previous pfn */
-	do {
-		rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
-		if (rc == 0)
-			msleep(123);
-	} while (rc == 0);
-
-	/* Make sure the previous pfn is really connected to a (new) mfn */
-	BUG_ON(rc != 1);
-}
-
-static struct syscore_ops xen_hvm_syscore_ops = {
-	.shutdown = xen_hvm_syscore_shutdown,
-};
-#endif
-
-/* Use a pfn in RAM, may move to MMIO before kexec. */
-static void __init xen_hvm_init_shared_info(void)
-{
-	/* Remember pointer for resume */
-	xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE);
-	xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT);
-	xen_hvm_set_shared_info(xen_hvm_shared_info);
-}
-
+#ifdef CONFIG_XEN_PVHVM
 static void __init init_hvm_pv_info(void)
 {
 	int major, minor;
@@ -1646,9 +1553,6 @@ static void __init xen_hvm_guest_init(void)
 	init_hvm_pv_info();
 
 	xen_hvm_init_shared_info();
-#ifdef CONFIG_KEXEC
-	register_syscore_ops(&xen_hvm_syscore_ops);
-#endif
 
 	if (xen_feature(XENFEAT_hvm_callback_vector))
 		xen_have_vector_callback = 1;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index ae8a00c39de..45329c8c226 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled)
 {
 #ifdef CONFIG_XEN_PVHVM
 	int cpu;
-	xen_hvm_resume_shared_info();
+	xen_hvm_init_shared_info();
 	xen_callback_vector();
 	xen_unplug_emulated_devices();
 	if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 1e4329e04e0..202d4c15015 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -41,7 +41,7 @@ void xen_enable_syscall(void);
 void xen_vcpu_restore(void);
 
 void xen_callback_vector(void);
-void xen_hvm_resume_shared_info(void);
+void xen_hvm_init_shared_info(void);
 void xen_unplug_emulated_devices(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index d4c50d63acb..97ca359ae2b 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -101,19 +101,6 @@ static int platform_pci_resume(struct pci_dev *pdev)
 	return 0;
 }
 
-static void __devinit prepare_shared_info(void)
-{
-#ifdef CONFIG_KEXEC
-	unsigned long addr;
-	struct shared_info *hvm_shared_info;
-
-	addr = alloc_xen_mmio(PAGE_SIZE);
-	hvm_shared_info = ioremap(addr, PAGE_SIZE);
-	memset(hvm_shared_info, 0, PAGE_SIZE);
-	xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT);
-#endif
-}
-
 static int __devinit platform_pci_init(struct pci_dev *pdev,
 				       const struct pci_device_id *ent)
 {
@@ -151,8 +138,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
 	platform_mmio = mmio_addr;
 	platform_mmiolen = mmio_len;
 
-	prepare_shared_info();
-
 	if (!xen_have_vector_callback) {
 		ret = xen_allocate_irq(pdev);
 		if (ret) {
diff --git a/include/xen/events.h b/include/xen/events.h
index 9c641deb65d..04399b28e82 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -58,8 +58,6 @@ void notify_remote_via_irq(int irq);
 
 void xen_irq_resume(void);
 
-void xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn);
-
 /* Clear an irq's pending state, in preparation for polling on it */
 void xen_clear_irq_pending(int irq);
 void xen_set_irq_pending(int irq);
-- 
cgit v1.2.3-70-g09d2


From 8857df3aceb7a8eb7558059b7da109e41dd1fb95 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 20 Jul 2012 09:31:00 +0100
Subject: iio: frequency: ADF4350: Fix potential reference div factor overflow.

With small channel spacing values and high reference frequencies it is
possible to exceed the range of the 10-bit counter.
Workaround by checking the range and widening some constrains.

We don't use the REG1_PHASE value in this case the datasheet recommends to set
it to 1 if not used.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/frequency/adf4350.c       | 24 +++++++++++++++---------
 include/linux/iio/frequency/adf4350.h |  2 ++
 2 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/iio/frequency/adf4350.c b/drivers/iio/frequency/adf4350.c
index 59fbb3ae40e..e35bb8f6fe7 100644
--- a/drivers/iio/frequency/adf4350.c
+++ b/drivers/iio/frequency/adf4350.c
@@ -129,7 +129,7 @@ static int adf4350_set_freq(struct adf4350_state *st, unsigned long long freq)
 {
 	struct adf4350_platform_data *pdata = st->pdata;
 	u64 tmp;
-	u32 div_gcd, prescaler;
+	u32 div_gcd, prescaler, chspc;
 	u16 mdiv, r_cnt = 0;
 	u8 band_sel_div;
 
@@ -158,14 +158,20 @@ static int adf4350_set_freq(struct adf4350_state *st, unsigned long long freq)
 	if (pdata->ref_div_factor)
 		r_cnt = pdata->ref_div_factor - 1;
 
-	do  {
-		r_cnt = adf4350_tune_r_cnt(st, r_cnt);
+	chspc = st->chspc;
 
-		st->r1_mod = st->fpfd / st->chspc;
-		while (st->r1_mod > ADF4350_MAX_MODULUS) {
-			r_cnt = adf4350_tune_r_cnt(st, r_cnt);
-			st->r1_mod = st->fpfd / st->chspc;
-		}
+	do  {
+		do {
+			do {
+				r_cnt = adf4350_tune_r_cnt(st, r_cnt);
+				st->r1_mod = st->fpfd / chspc;
+				if (r_cnt > ADF4350_MAX_R_CNT) {
+					/* try higher spacing values */
+					chspc++;
+					r_cnt = 0;
+				}
+			} while ((st->r1_mod > ADF4350_MAX_MODULUS) && r_cnt);
+		} while (r_cnt == 0);
 
 		tmp = freq * (u64)st->r1_mod + (st->fpfd > 1);
 		do_div(tmp, st->fpfd); /* Div round closest (n + d/2)/d */
@@ -194,7 +200,7 @@ static int adf4350_set_freq(struct adf4350_state *st, unsigned long long freq)
 	st->regs[ADF4350_REG0] = ADF4350_REG0_INT(st->r0_int) |
 				 ADF4350_REG0_FRACT(st->r0_fract);
 
-	st->regs[ADF4350_REG1] = ADF4350_REG1_PHASE(0) |
+	st->regs[ADF4350_REG1] = ADF4350_REG1_PHASE(1) |
 				 ADF4350_REG1_MOD(st->r1_mod) |
 				 prescaler;
 
diff --git a/include/linux/iio/frequency/adf4350.h b/include/linux/iio/frequency/adf4350.h
index b76b4a87065..be91f344d5f 100644
--- a/include/linux/iio/frequency/adf4350.h
+++ b/include/linux/iio/frequency/adf4350.h
@@ -87,6 +87,8 @@
 #define ADF4350_MAX_BANDSEL_CLK		125000 /* Hz */
 #define ADF4350_MAX_FREQ_REFIN		250000000 /* Hz */
 #define ADF4350_MAX_MODULUS		4095
+#define ADF4350_MAX_R_CNT		1023
+
 
 /**
  * struct adf4350_platform_data - platform specific information
-- 
cgit v1.2.3-70-g09d2


From ac5aa7f9e0891a115ab307b4bdde9c55b9232970 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Fri, 10 Aug 2012 16:52:58 +0200
Subject: pinctrl: header: trivial: declare struct device

As struct device is used as a function argument, it should at
least be declared (device.h is not included).

Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/consumer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 6dd96fb4548..e9b7f435084 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -20,6 +20,7 @@
 /* This struct is private to the core and should be regarded as a cookie */
 struct pinctrl;
 struct pinctrl_state;
+struct device;
 
 #ifdef CONFIG_PINCTRL
 
-- 
cgit v1.2.3-70-g09d2


From 8513915accc611e576dbebb93422c257e7e68be8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sat, 18 Aug 2012 17:43:05 -0700
Subject: ALSA: fix pcm.h kernel-doc warning and notation

Fix kernel-doc warning in <sound/pcm.h> and add function name to make
the kernel-doc notation complete.

Warning(include/sound/pcm.h:1081): No description found for parameter 'substream'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index c75c0d1a85e..cdca2ab1e71 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -1075,7 +1075,8 @@ static inline void snd_pcm_limit_isa_dma_size(int dma, size_t *max)
 const char *snd_pcm_format_name(snd_pcm_format_t format);
 
 /**
- * Get a string naming the direction of a stream
+ * snd_pcm_stream_str - Get a string naming the direction of a stream
+ * @substream: the pcm substream instance
  */
 static inline const char *snd_pcm_stream_str(struct snd_pcm_substream *substream)
 {
-- 
cgit v1.2.3-70-g09d2


From 3296193d1421c2d6f9e49e181cecfd917f0f5764 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Tue, 14 Aug 2012 13:20:23 +0000
Subject: dt: introduce for_each_available_child_of_node,
 of_get_next_available_child

Macro for_each_child_of_node() makes it easy to iterate over all of the
children for a given device tree node, including those nodes that are
marked as unavailable (i.e. status = "disabled").

Introduce for_each_available_child_of_node(), which is like
for_each_child_of_node(), but it automatically skips unavailable nodes.
This also requires the introduction of helper function
of_get_next_available_child(), which returns the next available child
node.

Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/base.c  | 27 +++++++++++++++++++++++++++
 include/linux/of.h |  7 +++++++
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index c181b94abc3..d4a1c9a043e 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -363,6 +363,33 @@ struct device_node *of_get_next_child(const struct device_node *node,
 }
 EXPORT_SYMBOL(of_get_next_child);
 
+/**
+ *	of_get_next_available_child - Find the next available child node
+ *	@node:	parent node
+ *	@prev:	previous child of the parent node, or NULL to get first
+ *
+ *      This function is like of_get_next_child(), except that it
+ *      automatically skips any disabled nodes (i.e. status = "disabled").
+ */
+struct device_node *of_get_next_available_child(const struct device_node *node,
+	struct device_node *prev)
+{
+	struct device_node *next;
+
+	read_lock(&devtree_lock);
+	next = prev ? prev->sibling : node->child;
+	for (; next; next = next->sibling) {
+		if (!of_device_is_available(next))
+			continue;
+		if (of_node_get(next))
+			break;
+	}
+	of_node_put(prev);
+	read_unlock(&devtree_lock);
+	return next;
+}
+EXPORT_SYMBOL(of_get_next_available_child);
+
 /**
  *	of_find_node_by_path - Find a node matching a full OF path
  *	@path:	The full path to match
diff --git a/include/linux/of.h b/include/linux/of.h
index 5919ee33f2b..1b1163225f3 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -190,10 +190,17 @@ extern struct device_node *of_get_parent(const struct device_node *node);
 extern struct device_node *of_get_next_parent(struct device_node *node);
 extern struct device_node *of_get_next_child(const struct device_node *node,
 					     struct device_node *prev);
+extern struct device_node *of_get_next_available_child(
+	const struct device_node *node, struct device_node *prev);
+
 #define for_each_child_of_node(parent, child) \
 	for (child = of_get_next_child(parent, NULL); child != NULL; \
 	     child = of_get_next_child(parent, child))
 
+#define for_each_available_child_of_node(parent, child) \
+	for (child = of_get_next_available_child(parent, NULL); child != NULL; \
+	     child = of_get_next_available_child(parent, child))
+
 static inline int of_get_child_count(const struct device_node *np)
 {
 	struct device_node *child;
-- 
cgit v1.2.3-70-g09d2


From c0de08d04215031d68fa13af36f347a6cfa252ca Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@regit.org>
Date: Thu, 16 Aug 2012 22:02:58 +0000
Subject: af_packet: don't emit packet on orig fanout group

If a packet is emitted on one socket in one group of fanout sockets,
it is transmitted again. It is thus read again on one of the sockets
of the fanout group. This result in a loop for software which
generate packets when receiving one.
This retransmission is not the intended behavior: a fanout group
must behave like a single socket. The packet should not be
transmitted on a socket if it originates from a socket belonging
to the same fanout group.

This patch fixes the issue by changing the transmission check to
take fanout group info account.

Reported-by: Aleksandr Kotov <a1k@mail.ru>
Signed-off-by: Eric Leblond <eric@regit.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 16 ++++++++++++++--
 net/packet/af_packet.c    |  9 +++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3560d688161..59dc05f3824 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1522,6 +1522,8 @@ struct packet_type {
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb);
+	bool			(*id_match)(struct packet_type *ptype,
+					    struct sock *sk);
 	void			*af_packet_priv;
 	struct list_head	list;
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index a39354ee143..debd9372472 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1642,6 +1642,19 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+{
+	if (ptype->af_packet_priv == NULL)
+		return false;
+
+	if (ptype->id_match)
+		return ptype->id_match(ptype, skb->sk);
+	else if ((struct sock *)ptype->af_packet_priv == skb->sk)
+		return true;
+
+	return false;
+}
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -1659,8 +1672,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
 		if ((ptype->dev == dev || !ptype->dev) &&
-		    (ptype->af_packet_priv == NULL ||
-		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
+		    (!skb_loop_sk(ptype, skb))) {
 			if (pt_prev) {
 				deliver_skb(skb2, pt_prev, skb->dev);
 				pt_prev = ptype;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8ac890a1a4c..aee7196aac3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1273,6 +1273,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
 	spin_unlock(&f->lock);
 }
 
+bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
+{
+	if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
+		return true;
+
+	return false;
+}
+
 static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 {
 	struct packet_sock *po = pkt_sk(sk);
@@ -1325,6 +1333,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		match->prot_hook.dev = po->prot_hook.dev;
 		match->prot_hook.func = packet_rcv_fanout;
 		match->prot_hook.af_packet_priv = match;
+		match->prot_hook.id_match = match_fanout_group;
 		dev_add_pack(&match->prot_hook);
 		list_add(&match->list, &fanout_list);
 	}
-- 
cgit v1.2.3-70-g09d2


From 9d7b0fc1ef1f17aff57c0dc9a59453d8fca255c3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 20 Aug 2012 02:56:56 -0700
Subject: net: ipv6: fix oops in inet_putpeer()

Commit 97bab73f (inet: Hide route peer accesses behind helpers.) introduced
a bug in xfrm6_policy_destroy(). The xfrm_dst's _rt6i_peer member is not
initialized, causing a false positive result from inetpeer_ptr_is_peer(),
which in turn causes a NULL pointer dereference in inet_putpeer().

Pid: 314, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #17 To Be Filled By O.E.M. To Be Filled By O.E.M./P4S800D-X
EIP: 0060:[<c03abf93>] EFLAGS: 00010246 CPU: 0
EIP is at inet_putpeer+0xe/0x16
EAX: 00000000 EBX: f3481700 ECX: 00000000 EDX: 000dd641
ESI: f3481700 EDI: c05e949c EBP: f551def4 ESP: f551def4
 DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068
CR0: 8005003b CR2: 00000070 CR3: 3243d000 CR4: 00000750
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
 f551df04 c0423de1 00000000 f3481700 f551df18 c038d5f7 f254b9f8 f551df28
 f34f85d8 f551df20 c03ef48d f551df3c c0396870 f30697e8 f24e1738 c05e98f4
 f5509540 c05cd2b4 f551df7c c0142d2b c043feb5 f5509540 00000000 c05cd2e8
 [<c0423de1>] xfrm6_dst_destroy+0x42/0xdb
 [<c038d5f7>] dst_destroy+0x1d/0xa4
 [<c03ef48d>] xfrm_bundle_flo_delete+0x2b/0x36
 [<c0396870>] flow_cache_gc_task+0x85/0x9f
 [<c0142d2b>] process_one_work+0x122/0x441
 [<c043feb5>] ? apic_timer_interrupt+0x31/0x38
 [<c03967eb>] ? flow_cache_new_hashrnd+0x2b/0x2b
 [<c0143e2d>] worker_thread+0x113/0x3cc

Fix by adding a init_dst() callback to struct xfrm_policy_afinfo to
properly initialize the dst's peer pointer.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      | 2 ++
 net/ipv6/xfrm6_policy.c | 8 ++++++++
 net/xfrm/xfrm_policy.c  | 2 ++
 3 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 62b619e82a9..976a81abe1a 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -292,6 +292,8 @@ struct xfrm_policy_afinfo {
 						  struct flowi *fl,
 						  int reverse);
 	int			(*get_tos)(const struct flowi *fl);
+	void			(*init_dst)(struct net *net,
+					    struct xfrm_dst *dst);
 	int			(*init_path)(struct xfrm_dst *path,
 					     struct dst_entry *dst,
 					     int nfheader_len);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef39812107b..f8c4c08ffb6 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl)
 	return 0;
 }
 
+static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
+{
+	struct rt6_info *rt = (struct rt6_info *)xdst;
+
+	rt6_init_peer(rt, net->ipv6.peers);
+}
+
 static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 			   int nfheader_len)
 {
@@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.get_saddr = 		xfrm6_get_saddr,
 	.decode_session =	_decode_session6,
 	.get_tos =		xfrm6_get_tos,
+	.init_dst =		xfrm6_init_dst,
 	.init_path =		xfrm6_init_path,
 	.fill_dst =		xfrm6_fill_dst,
 	.blackhole_route =	ip6_blackhole_route,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c5a5165a592..5a2aa17e4d3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1357,6 +1357,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 
 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
 		xdst->flo.ops = &xfrm_bundle_fc_ops;
+		if (afinfo->init_dst)
+			afinfo->init_dst(net, xdst);
 	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
-- 
cgit v1.2.3-70-g09d2


From af74115eed22698f771fec1287a864975c9a6671 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Wed, 15 Aug 2012 21:24:52 -0700
Subject: target: Remove unused se_cmd.cmd_spdtl

This was originally for helping fabrics to determine overflow/underflow
status, and has been superceeded by SCF_OVERFLOW_BIT + SCF_UNDERFLOW_BIT.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 2 --
 include/target/target_core_base.h      | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ea9a3d2e4f5..4de3186dc44 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1165,8 +1165,6 @@ int target_cmd_size_check(struct se_cmd *cmd, unsigned int size)
 			" 0x%02x\n", cmd->se_tfo->get_fabric_name(),
 				cmd->data_length, size, cmd->t_task_cdb[0]);
 
-		cmd->cmd_spdtl = size;
-
 		if (cmd->data_direction == DMA_TO_DEVICE) {
 			pr_err("Rejecting underflow/overflow"
 					" WRITE data\n");
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 128ce46fa48..015cea01ae3 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -503,8 +503,6 @@ struct se_cmd {
 	u32			se_ordered_id;
 	/* Total size in bytes associated with command */
 	u32			data_length;
-	/* SCSI Presented Data Transfer Length */
-	u32			cmd_spdtl;
 	u32			residual_count;
 	u32			orig_fe_lun;
 	/* Persistent Reservation key */
-- 
cgit v1.2.3-70-g09d2


From e0e3cea46d31d23dc40df0a49a7a2c04fe8edfea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 21 Aug 2012 06:21:17 +0000
Subject: af_netlink: force credentials passing [CVE-2012-3520]

Pablo Neira Ayuso discovered that avahi and
potentially NetworkManager accept spoofed Netlink messages because of a
kernel bug.  The kernel passes all-zero SCM_CREDENTIALS ancillary data
to the receiver if the sender did not provide such data, instead of not
including any such data at all or including the correct data from the
peer (as it is the case with AF_UNIX).

This bug was introduced in commit 16e572626961
(af_unix: dont send SCM_CREDENTIALS by default)

This patch forces passing credentials for netlink, as
before the regression.

Another fix would be to not add SCM_CREDENTIALS in
netlink messages if not provided by the sender, but it
might break some programs.

With help from Florian Weimer & Petr Matousek

This issue is designated as CVE-2012-3520

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Petr Matousek <pmatouse@redhat.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/scm.h        | 4 +++-
 net/netlink/af_netlink.c | 2 +-
 net/unix/af_unix.c       | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/scm.h b/include/net/scm.h
index 079d7887dac..7dc0854f0b3 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -70,9 +70,11 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
 }
 
 static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
-			       struct scm_cookie *scm)
+			       struct scm_cookie *scm, bool forcecreds)
 {
 	memset(scm, 0, sizeof(*scm));
+	if (forcecreds)
+		scm_set_cred(scm, task_tgid(current), current_cred());
 	unix_get_peersec_dgram(sock, scm);
 	if (msg->msg_controllen <= 0)
 		return 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5463969da45..1445d73533e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1362,7 +1362,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (NULL == siocb->scm)
 		siocb->scm = &scm;
 
-	err = scm_send(sock, msg, siocb->scm);
+	err = scm_send(sock, msg, siocb->scm, true);
 	if (err < 0)
 		return err;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e4768c180da..c5ee4ff6136 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1450,7 +1450,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm);
+	err = scm_send(sock, msg, siocb->scm, false);
 	if (err < 0)
 		return err;
 
@@ -1619,7 +1619,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm);
+	err = scm_send(sock, msg, siocb->scm, false);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3-70-g09d2


From 04ccfe77f132b973659f11954443214659014072 Mon Sep 17 00:00:00 2001
From: Damien Lespiau <damien.lespiau@intel.com>
Date: Fri, 17 Aug 2012 14:20:02 +0000
Subject: drm: Remove two unused fields from struct drm_display_mode

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_modes.c | 3 ---
 include/drm/drm_crtc.h      | 2 --
 2 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index b7adb4a967f..28637c181b1 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -706,9 +706,6 @@ void drm_mode_set_crtcinfo(struct drm_display_mode *p, int adjust_flags)
 	p->crtc_vblank_end = max(p->crtc_vsync_end, p->crtc_vtotal);
 	p->crtc_hblank_start = min(p->crtc_hsync_start, p->crtc_hdisplay);
 	p->crtc_hblank_end = max(p->crtc_hsync_end, p->crtc_htotal);
-
-	p->crtc_hadjusted = false;
-	p->crtc_vadjusted = false;
 }
 EXPORT_SYMBOL(drm_mode_set_crtcinfo);
 
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index a1a0386e016..ced362533e3 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -166,8 +166,6 @@ struct drm_display_mode {
 	int crtc_vsync_start;
 	int crtc_vsync_end;
 	int crtc_vtotal;
-	int crtc_hadjusted;
-	int crtc_vadjusted;
 
 	/* Driver private mode info */
 	int private_size;
-- 
cgit v1.2.3-70-g09d2


From c3a5ce0416b6c172a23bc8a3760d8704d3d1535b Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 21 Aug 2012 16:16:00 -0700
Subject: string: do not export memweight() to userspace

Fix the following warning:

  usr/include/linux/string.h:8: userspace cannot reference function or variable defined in the kernel

Signed-off-by: WANG Cong <xiyou.wangcong@gmail.com>
Acked-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/string.h b/include/linux/string.h
index ffe0442e18d..b9178812d9d 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -144,8 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix)
 {
 	return strncmp(str, prefix, strlen(prefix)) == 0;
 }
-#endif
 
 extern size_t memweight(const void *ptr, size_t bytes);
 
+#endif /* __KERNEL__ */
 #endif /* _LINUX_STRING_H_ */
-- 
cgit v1.2.3-70-g09d2


From c67fe3752abe6ab47639e2f9b836900c3dc3da84 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Tue, 21 Aug 2012 16:16:17 -0700
Subject: mm: compaction: Abort async compaction if locks are contended or
 taking too long

Jim Schutt reported a problem that pointed at compaction contending
heavily on locks.  The workload is straight-forward and in his own words;

	The systems in question have 24 SAS drives spread across 3 HBAs,
	running 24 Ceph OSD instances, one per drive.  FWIW these servers
	are dual-socket Intel 5675 Xeons w/48 GB memory.  I've got ~160
	Ceph Linux clients doing dd simultaneously to a Ceph file system
	backed by 12 of these servers.

Early in the test everything looks fine

  procs -------------------memory------------------ ---swap-- -----io---- --system-- -----cpu-------
   r  b       swpd       free       buff      cache   si   so    bi    bo   in   cs  us sy  id wa st
  31 15          0     287216        576   38606628    0    0     2  1158    2   14   1  3  95  0  0
  27 15          0     225288        576   38583384    0    0    18 2222016 203357 134876  11 56  17 15  0
  28 17          0     219256        576   38544736    0    0    11 2305932 203141 146296  11 49  23 17  0
   6 18          0     215596        576   38552872    0    0     7 2363207 215264 166502  12 45  22 20  0
  22 18          0     226984        576   38596404    0    0     3 2445741 223114 179527  12 43  23 22  0

and then it goes to pot

  procs -------------------memory------------------ ---swap-- -----io---- --system-- -----cpu-------
   r  b       swpd       free       buff      cache   si   so    bi    bo   in   cs  us sy  id wa st
  163  8          0     464308        576   36791368    0    0    11 22210  866  536   3 13  79  4  0
  207 14          0     917752        576   36181928    0    0   712 1345376 134598 47367   7 90   1  2  0
  123 12          0     685516        576   36296148    0    0   429 1386615 158494 60077   8 84   5  3  0
  123 12          0     598572        576   36333728    0    0  1107 1233281 147542 62351   7 84   5  4  0
  622  7          0     660768        576   36118264    0    0   557 1345548 151394 59353   7 85   4  3  0
  223 11          0     283960        576   36463868    0    0    46 1107160 121846 33006   6 93   1  1  0

Note that system CPU usage is very high blocks being written out has
dropped by 42%. He analysed this with perf and found

  perf record -g -a sleep 10
  perf report --sort symbol --call-graph fractal,5
    34.63%  [k] _raw_spin_lock_irqsave
            |
            |--97.30%-- isolate_freepages
            |          compaction_alloc
            |          unmap_and_move
            |          migrate_pages
            |          compact_zone
            |          compact_zone_order
            |          try_to_compact_pages
            |          __alloc_pages_direct_compact
            |          __alloc_pages_slowpath
            |          __alloc_pages_nodemask
            |          alloc_pages_vma
            |          do_huge_pmd_anonymous_page
            |          handle_mm_fault
            |          do_page_fault
            |          page_fault
            |          |
            |          |--87.39%-- skb_copy_datagram_iovec
            |          |          tcp_recvmsg
            |          |          inet_recvmsg
            |          |          sock_recvmsg
            |          |          sys_recvfrom
            |          |          system_call
            |          |          __recv
            |          |          |
            |          |           --100.00%-- (nil)
            |          |
            |           --12.61%-- memcpy
             --2.70%-- [...]

There was other data but primarily it is all showing that compaction is
contended heavily on the zone->lock and zone->lru_lock.

commit [b2eef8c0: mm: compaction: minimise the time IRQs are disabled
while isolating pages for migration] noted that it was possible for
migration to hold the lru_lock for an excessive amount of time. Very
broadly speaking this patch expands the concept.

This patch introduces compact_checklock_irqsave() to check if a lock
is contended or the process needs to be scheduled. If either condition
is true then async compaction is aborted and the caller is informed.
The page allocator will fail a THP allocation if compaction failed due
to contention. This patch also introduces compact_trylock_irqsave()
which will acquire the lock only if it is not contended and the process
does not need to schedule.

Reported-by: Jim Schutt <jaschut@sandia.gov>
Tested-by: Jim Schutt <jaschut@sandia.gov>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |   4 +-
 mm/compaction.c            | 100 +++++++++++++++++++++++++++++++++++----------
 mm/internal.h              |   1 +
 mm/page_alloc.c            |  17 +++++---
 4 files changed, 93 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 133ddcf8339..ef658147e4e 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			bool sync);
+			bool sync, bool *contended);
 extern int compact_pgdat(pg_data_t *pgdat, int order);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
 
@@ -64,7 +64,7 @@ static inline bool compaction_deferred(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync)
+			bool sync, bool *contended)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index bcce7897e17..7fcd3a52e68 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -50,6 +50,47 @@ static inline bool migrate_async_suitable(int migratetype)
 	return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
 }
 
+/*
+ * Compaction requires the taking of some coarse locks that are potentially
+ * very heavily contended. Check if the process needs to be scheduled or
+ * if the lock is contended. For async compaction, back out in the event
+ * if contention is severe. For sync compaction, schedule.
+ *
+ * Returns true if the lock is held.
+ * Returns false if the lock is released and compaction should abort
+ */
+static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
+				      bool locked, struct compact_control *cc)
+{
+	if (need_resched() || spin_is_contended(lock)) {
+		if (locked) {
+			spin_unlock_irqrestore(lock, *flags);
+			locked = false;
+		}
+
+		/* async aborts if taking too long or contended */
+		if (!cc->sync) {
+			if (cc->contended)
+				*cc->contended = true;
+			return false;
+		}
+
+		cond_resched();
+		if (fatal_signal_pending(current))
+			return false;
+	}
+
+	if (!locked)
+		spin_lock_irqsave(lock, *flags);
+	return true;
+}
+
+static inline bool compact_trylock_irqsave(spinlock_t *lock,
+			unsigned long *flags, struct compact_control *cc)
+{
+	return compact_checklock_irqsave(lock, flags, false, cc);
+}
+
 /*
  * Isolate free pages onto a private freelist. Caller must hold zone->lock.
  * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
@@ -173,7 +214,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
 }
 
 /* Update the number of anon and file isolated pages in the zone */
-static void acct_isolated(struct zone *zone, struct compact_control *cc)
+static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc)
 {
 	struct page *page;
 	unsigned int count[2] = { 0, };
@@ -181,8 +222,14 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
 	list_for_each_entry(page, &cc->migratepages, lru)
 		count[!!page_is_file_cache(page)]++;
 
-	__mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
-	__mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+	/* If locked we can use the interrupt unsafe versions */
+	if (locked) {
+		__mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
+		__mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+	} else {
+		mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
+		mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+	}
 }
 
 /* Similar to reclaim, but different enough that they don't share logic */
@@ -228,6 +275,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	struct list_head *migratelist = &cc->migratepages;
 	isolate_mode_t mode = 0;
 	struct lruvec *lruvec;
+	unsigned long flags;
+	bool locked;
 
 	/*
 	 * Ensure that there are not too many pages isolated from the LRU
@@ -247,25 +296,22 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 
 	/* Time to isolate some pages for migration */
 	cond_resched();
-	spin_lock_irq(&zone->lru_lock);
+	spin_lock_irqsave(&zone->lru_lock, flags);
+	locked = true;
 	for (; low_pfn < end_pfn; low_pfn++) {
 		struct page *page;
-		bool locked = true;
 
 		/* give a chance to irqs before checking need_resched() */
 		if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
-			spin_unlock_irq(&zone->lru_lock);
+			spin_unlock_irqrestore(&zone->lru_lock, flags);
 			locked = false;
 		}
-		if (need_resched() || spin_is_contended(&zone->lru_lock)) {
-			if (locked)
-				spin_unlock_irq(&zone->lru_lock);
-			cond_resched();
-			spin_lock_irq(&zone->lru_lock);
-			if (fatal_signal_pending(current))
-				break;
-		} else if (!locked)
-			spin_lock_irq(&zone->lru_lock);
+
+		/* Check if it is ok to still hold the lock */
+		locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
+								locked, cc);
+		if (!locked)
+			break;
 
 		/*
 		 * migrate_pfn does not necessarily start aligned to a
@@ -349,9 +395,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 		}
 	}
 
-	acct_isolated(zone, cc);
+	acct_isolated(zone, locked, cc);
 
-	spin_unlock_irq(&zone->lru_lock);
+	if (locked)
+		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
 	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
 
@@ -461,7 +508,16 @@ static void isolate_freepages(struct zone *zone,
 		 * are disabled
 		 */
 		isolated = 0;
-		spin_lock_irqsave(&zone->lock, flags);
+
+		/*
+		 * The zone lock must be held to isolate freepages. This
+		 * unfortunately this is a very coarse lock and can be
+		 * heavily contended if there are parallel allocations
+		 * or parallel compactions. For async compaction do not
+		 * spin on the lock
+		 */
+		if (!compact_trylock_irqsave(&zone->lock, &flags, cc))
+			break;
 		if (suitable_migration_target(page)) {
 			end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
 			isolated = isolate_freepages_block(pfn, end_pfn,
@@ -773,7 +829,7 @@ out:
 
 static unsigned long compact_zone_order(struct zone *zone,
 				 int order, gfp_t gfp_mask,
-				 bool sync)
+				 bool sync, bool *contended)
 {
 	struct compact_control cc = {
 		.nr_freepages = 0,
@@ -782,6 +838,7 @@ static unsigned long compact_zone_order(struct zone *zone,
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.sync = sync,
+		.contended = contended,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -803,7 +860,7 @@ int sysctl_extfrag_threshold = 500;
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync)
+			bool sync, bool *contended)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
 	int may_enter_fs = gfp_mask & __GFP_FS;
@@ -827,7 +884,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 								nodemask) {
 		int status;
 
-		status = compact_zone_order(zone, order, gfp_mask, sync);
+		status = compact_zone_order(zone, order, gfp_mask, sync,
+						contended);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
diff --git a/mm/internal.h b/mm/internal.h
index 3314f79d775..b8c91b342e2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -130,6 +130,7 @@ struct compact_control {
 	int order;			/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
+	bool *contended;		/* True if a lock was contended */
 };
 
 unsigned long
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07f19248acb..c66fb875104 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2102,7 +2102,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
 	int migratetype, bool sync_migration,
-	bool *deferred_compaction,
+	bool *contended_compaction, bool *deferred_compaction,
 	unsigned long *did_some_progress)
 {
 	struct page *page;
@@ -2117,7 +2117,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 
 	current->flags |= PF_MEMALLOC;
 	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
-						nodemask, sync_migration);
+						nodemask, sync_migration,
+						contended_compaction);
 	current->flags &= ~PF_MEMALLOC;
 	if (*did_some_progress != COMPACT_SKIPPED) {
 
@@ -2163,7 +2164,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
 	int migratetype, bool sync_migration,
-	bool *deferred_compaction,
+	bool *contended_compaction, bool *deferred_compaction,
 	unsigned long *did_some_progress)
 {
 	return NULL;
@@ -2336,6 +2337,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	unsigned long did_some_progress;
 	bool sync_migration = false;
 	bool deferred_compaction = false;
+	bool contended_compaction = false;
 
 	/*
 	 * In the slowpath, we sanity check order to avoid ever trying to
@@ -2425,6 +2427,7 @@ rebalance:
 					nodemask,
 					alloc_flags, preferred_zone,
 					migratetype, sync_migration,
+					&contended_compaction,
 					&deferred_compaction,
 					&did_some_progress);
 	if (page)
@@ -2434,10 +2437,11 @@ rebalance:
 	/*
 	 * If compaction is deferred for high-order allocations, it is because
 	 * sync compaction recently failed. In this is the case and the caller
-	 * has requested the system not be heavily disrupted, fail the
-	 * allocation now instead of entering direct reclaim
+	 * requested a movable allocation that does not heavily disrupt the
+	 * system then fail the allocation instead of entering direct reclaim.
 	 */
-	if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
+	if ((deferred_compaction || contended_compaction) &&
+						(gfp_mask & __GFP_NO_KSWAPD))
 		goto nopage;
 
 	/* Try direct reclaim and then allocating */
@@ -2508,6 +2512,7 @@ rebalance:
 					nodemask,
 					alloc_flags, preferred_zone,
 					migratetype, sync_migration,
+					&contended_compaction,
 					&deferred_compaction,
 					&did_some_progress);
 		if (page)
-- 
cgit v1.2.3-70-g09d2


From 1922b0f2758badc0b971d1ebbd300bc6635a6aef Mon Sep 17 00:00:00 2001
From: AnilKumar Ch <anilkumar@ti.com>
Date: Mon, 13 Aug 2012 20:36:05 +0530
Subject: mfd: Move tps65217 regulator plat data handling to regulator

Regulator platform data handling was mistakenly added to MFD
driver. So we will see build errors if we compile MFD drivers
without CONFIG_REGULATOR. This patch moves regulator platform
data handling from TPS65217 MFD driver to regulator driver.

This makes MFD driver independent of REGULATOR framework so
build error is fixed if CONFIG_REGULATOR is not set.

drivers/built-in.o: In function `tps65217_probe':
tps65217.c:(.devinit.text+0x13e37): undefined reference
to `of_regulator_match'

This patch also fix allocation size of tps65217 platform data.
Current implementation allocates a struct tps65217_board for each
regulator specified in the device tree. But the structure itself
provides array of regulators so one instance of it is sufficient.

Signed-off-by: AnilKumar Ch <anilkumar@ti.com>
---
 drivers/mfd/tps65217.c                 | 130 +++++++++++----------------------
 drivers/regulator/tps65217-regulator.c | 124 +++++++++++++++++++++++++++----
 include/linux/mfd/tps65217.h           |  12 ++-
 3 files changed, 161 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index 61c097a98f5..3bc274409b5 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -24,11 +24,18 @@
 #include <linux/slab.h>
 #include <linux/regmap.h>
 #include <linux/err.h>
-#include <linux/regulator/of_regulator.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <linux/mfd/core.h>
 #include <linux/mfd/tps65217.h>
 
+static struct mfd_cell tps65217s[] = {
+	{
+		.name = "tps65217-pmic",
+	},
+};
+
 /**
  * tps65217_reg_read: Read a single tps65217 register.
  *
@@ -133,83 +140,48 @@ int tps65217_clear_bits(struct tps65217 *tps, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(tps65217_clear_bits);
 
-#ifdef CONFIG_OF
-static struct of_regulator_match reg_matches[] = {
-	{ .name = "dcdc1", .driver_data = (void *)TPS65217_DCDC_1 },
-	{ .name = "dcdc2", .driver_data = (void *)TPS65217_DCDC_2 },
-	{ .name = "dcdc3", .driver_data = (void *)TPS65217_DCDC_3 },
-	{ .name = "ldo1", .driver_data = (void *)TPS65217_LDO_1 },
-	{ .name = "ldo2", .driver_data = (void *)TPS65217_LDO_2 },
-	{ .name = "ldo3", .driver_data = (void *)TPS65217_LDO_3 },
-	{ .name = "ldo4", .driver_data = (void *)TPS65217_LDO_4 },
-};
-
-static struct tps65217_board *tps65217_parse_dt(struct i2c_client *client)
-{
-	struct device_node *node = client->dev.of_node;
-	struct tps65217_board *pdata;
-	struct device_node *regs;
-	int count = ARRAY_SIZE(reg_matches);
-	int ret, i;
-
-	regs = of_find_node_by_name(node, "regulators");
-	if (!regs)
-		return NULL;
-
-	ret = of_regulator_match(&client->dev, regs, reg_matches, count);
-	of_node_put(regs);
-	if ((ret < 0) || (ret > count))
-		return NULL;
-
-	count = ret;
-	pdata = devm_kzalloc(&client->dev, count * sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return NULL;
-
-	for (i = 0; i < count; i++) {
-		if (!reg_matches[i].init_data || !reg_matches[i].of_node)
-			continue;
-
-		pdata->tps65217_init_data[i] = reg_matches[i].init_data;
-		pdata->of_node[i] = reg_matches[i].of_node;
-	}
-
-	return pdata;
-}
-
-static struct of_device_id tps65217_of_match[] = {
-	{ .compatible = "ti,tps65217", },
-	{ },
-};
-#else
-static struct tps65217_board *tps65217_parse_dt(struct i2c_client *client)
-{
-	return NULL;
-}
-#endif
-
 static struct regmap_config tps65217_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 };
 
+static const struct of_device_id tps65217_of_match[] = {
+	{ .compatible = "ti,tps65217", .data = (void *)TPS65217 },
+	{ /* sentinel */ },
+};
+
 static int __devinit tps65217_probe(struct i2c_client *client,
 				const struct i2c_device_id *ids)
 {
 	struct tps65217 *tps;
-	struct regulator_init_data *reg_data;
-	struct tps65217_board *pdata = client->dev.platform_data;
-	int i, ret;
 	unsigned int version;
+	unsigned int chip_id = ids->driver_data;
+	const struct of_device_id *match;
+	int ret;
 
-	if (!pdata && client->dev.of_node)
-		pdata = tps65217_parse_dt(client);
+	if (client->dev.of_node) {
+		match = of_match_device(tps65217_of_match, &client->dev);
+		if (!match) {
+			dev_err(&client->dev,
+				"Failed to find matching dt id\n");
+			return -EINVAL;
+		}
+		chip_id = (unsigned int)match->data;
+	}
+
+	if (!chip_id) {
+		dev_err(&client->dev, "id is null.\n");
+		return -ENODEV;
+	}
 
 	tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
 	if (!tps)
 		return -ENOMEM;
 
-	tps->pdata = pdata;
+	i2c_set_clientdata(client, tps);
+	tps->dev = &client->dev;
+	tps->id = chip_id;
+
 	tps->regmap = devm_regmap_init_i2c(client, &tps65217_regmap_config);
 	if (IS_ERR(tps->regmap)) {
 		ret = PTR_ERR(tps->regmap);
@@ -218,8 +190,12 @@ static int __devinit tps65217_probe(struct i2c_client *client,
 		return ret;
 	}
 
-	i2c_set_clientdata(client, tps);
-	tps->dev = &client->dev;
+	ret = mfd_add_devices(tps->dev, -1, tps65217s,
+					ARRAY_SIZE(tps65217s), NULL, 0);
+	if (ret < 0) {
+		dev_err(tps->dev, "mfd_add_devices failed: %d\n", ret);
+		return ret;
+	}
 
 	ret = tps65217_reg_read(tps, TPS65217_REG_CHIPID, &version);
 	if (ret < 0) {
@@ -232,41 +208,21 @@ static int __devinit tps65217_probe(struct i2c_client *client,
 			(version & TPS65217_CHIPID_CHIP_MASK) >> 4,
 			version & TPS65217_CHIPID_REV_MASK);
 
-	for (i = 0; i < TPS65217_NUM_REGULATOR; i++) {
-		struct platform_device *pdev;
-
-		pdev = platform_device_alloc("tps65217-pmic", i);
-		if (!pdev) {
-			dev_err(tps->dev, "Cannot create regulator %d\n", i);
-			continue;
-		}
-
-		pdev->dev.parent = tps->dev;
-		pdev->dev.of_node = pdata->of_node[i];
-		reg_data = pdata->tps65217_init_data[i];
-		platform_device_add_data(pdev, reg_data, sizeof(*reg_data));
-		tps->regulator_pdev[i] = pdev;
-
-		platform_device_add(pdev);
-	}
-
 	return 0;
 }
 
 static int __devexit tps65217_remove(struct i2c_client *client)
 {
 	struct tps65217 *tps = i2c_get_clientdata(client);
-	int i;
 
-	for (i = 0; i < TPS65217_NUM_REGULATOR; i++)
-		platform_device_unregister(tps->regulator_pdev[i]);
+	mfd_remove_devices(tps->dev);
 
 	return 0;
 }
 
 static const struct i2c_device_id tps65217_id_table[] = {
-	{"tps65217", 0xF0},
-	{/* end of list */}
+	{"tps65217", TPS65217},
+	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(i2c, tps65217_id_table);
 
diff --git a/drivers/regulator/tps65217-regulator.c b/drivers/regulator/tps65217-regulator.c
index 6caa222af77..ab00cab905b 100644
--- a/drivers/regulator/tps65217-regulator.c
+++ b/drivers/regulator/tps65217-regulator.c
@@ -22,6 +22,7 @@
 #include <linux/err.h>
 #include <linux/platform_device.h>
 
+#include <linux/regulator/of_regulator.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/mfd/tps65217.h>
@@ -281,37 +282,130 @@ static const struct regulator_desc regulators[] = {
 			   NULL),
 };
 
+#ifdef CONFIG_OF
+static struct of_regulator_match reg_matches[] = {
+	{ .name = "dcdc1", .driver_data = (void *)TPS65217_DCDC_1 },
+	{ .name = "dcdc2", .driver_data = (void *)TPS65217_DCDC_2 },
+	{ .name = "dcdc3", .driver_data = (void *)TPS65217_DCDC_3 },
+	{ .name = "ldo1", .driver_data = (void *)TPS65217_LDO_1 },
+	{ .name = "ldo2", .driver_data = (void *)TPS65217_LDO_2 },
+	{ .name = "ldo3", .driver_data = (void *)TPS65217_LDO_3 },
+	{ .name = "ldo4", .driver_data = (void *)TPS65217_LDO_4 },
+};
+
+static struct tps65217_board *tps65217_parse_dt(struct platform_device *pdev)
+{
+	struct tps65217 *tps = dev_get_drvdata(pdev->dev.parent);
+	struct device_node *node = tps->dev->of_node;
+	struct tps65217_board *pdata;
+	struct device_node *regs;
+	int i, count;
+
+	regs = of_find_node_by_name(node, "regulators");
+	if (!regs)
+		return NULL;
+
+	count = of_regulator_match(pdev->dev.parent, regs,
+				reg_matches, TPS65217_NUM_REGULATOR);
+	of_node_put(regs);
+	if ((count < 0) || (count > TPS65217_NUM_REGULATOR))
+		return NULL;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	for (i = 0; i < count; i++) {
+		if (!reg_matches[i].init_data || !reg_matches[i].of_node)
+			continue;
+
+		pdata->tps65217_init_data[i] = reg_matches[i].init_data;
+		pdata->of_node[i] = reg_matches[i].of_node;
+	}
+
+	return pdata;
+}
+#else
+static struct tps65217_board *tps65217_parse_dt(struct platform_device *pdev)
+{
+	return NULL;
+}
+#endif
+
 static int __devinit tps65217_regulator_probe(struct platform_device *pdev)
 {
+	struct tps65217 *tps = dev_get_drvdata(pdev->dev.parent);
+	struct tps65217_board *pdata = dev_get_platdata(tps->dev);
+	struct regulator_init_data *reg_data;
 	struct regulator_dev *rdev;
-	struct tps65217 *tps;
-	struct tps_info *info = &tps65217_pmic_regs[pdev->id];
 	struct regulator_config config = { };
+	int i, ret;
 
-	/* Already set by core driver */
-	tps = dev_to_tps65217(pdev->dev.parent);
-	tps->info[pdev->id] = info;
+	if (tps->dev->of_node)
+		pdata = tps65217_parse_dt(pdev);
 
-	config.dev = &pdev->dev;
-	config.of_node = pdev->dev.of_node;
-	config.init_data = pdev->dev.platform_data;
-	config.driver_data = tps;
+	if (!pdata) {
+		dev_err(&pdev->dev, "Platform data not found\n");
+		return -EINVAL;
+	}
 
-	rdev = regulator_register(&regulators[pdev->id], &config);
-	if (IS_ERR(rdev))
-		return PTR_ERR(rdev);
+	if (tps65217_chip_id(tps) != TPS65217) {
+		dev_err(&pdev->dev, "Invalid tps chip version\n");
+		return -ENODEV;
+	}
 
-	platform_set_drvdata(pdev, rdev);
+	platform_set_drvdata(pdev, tps);
 
+	for (i = 0; i < TPS65217_NUM_REGULATOR; i++) {
+
+		reg_data = pdata->tps65217_init_data[i];
+
+		/*
+		 * Regulator API handles empty constraints but not NULL
+		 * constraints
+		 */
+		if (!reg_data)
+			continue;
+
+		/* Register the regulators */
+		tps->info[i] = &tps65217_pmic_regs[i];
+
+		config.dev = tps->dev;
+		config.init_data = reg_data;
+		config.driver_data = tps;
+		config.regmap = tps->regmap;
+		if (tps->dev->of_node)
+			config.of_node = pdata->of_node[i];
+
+		rdev = regulator_register(&regulators[i], &config);
+		if (IS_ERR(rdev)) {
+			dev_err(tps->dev, "failed to register %s regulator\n",
+				pdev->name);
+			ret = PTR_ERR(rdev);
+			goto err_unregister_regulator;
+		}
+
+		/* Save regulator for cleanup */
+		tps->rdev[i] = rdev;
+	}
 	return 0;
+
+err_unregister_regulator:
+	while (--i >= 0)
+		regulator_unregister(tps->rdev[i]);
+
+	return ret;
 }
 
 static int __devexit tps65217_regulator_remove(struct platform_device *pdev)
 {
-	struct regulator_dev *rdev = platform_get_drvdata(pdev);
+	struct tps65217 *tps = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	for (i = 0; i < TPS65217_NUM_REGULATOR; i++)
+		regulator_unregister(tps->rdev[i]);
 
 	platform_set_drvdata(pdev, NULL);
-	regulator_unregister(rdev);
 
 	return 0;
 }
diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index 12c06870829..7cd83d826ed 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -22,6 +22,9 @@
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 
+/* TPS chip id list */
+#define TPS65217			0xF0
+
 /* I2C ID for TPS65217 part */
 #define TPS65217_I2C_ID			0x24
 
@@ -248,13 +251,11 @@ struct tps_info {
 struct tps65217 {
 	struct device *dev;
 	struct tps65217_board *pdata;
+	unsigned int id;
 	struct regulator_desc desc[TPS65217_NUM_REGULATOR];
 	struct regulator_dev *rdev[TPS65217_NUM_REGULATOR];
 	struct tps_info *info[TPS65217_NUM_REGULATOR];
 	struct regmap *regmap;
-
-	/* Client devices */
-	struct platform_device *regulator_pdev[TPS65217_NUM_REGULATOR];
 };
 
 static inline struct tps65217 *dev_to_tps65217(struct device *dev)
@@ -262,6 +263,11 @@ static inline struct tps65217 *dev_to_tps65217(struct device *dev)
 	return dev_get_drvdata(dev);
 }
 
+static inline int tps65217_chip_id(struct tps65217 *tps65217)
+{
+	return tps65217->id;
+}
+
 int tps65217_reg_read(struct tps65217 *tps, unsigned int reg,
 					unsigned int *val);
 int tps65217_reg_write(struct tps65217 *tps, unsigned int reg,
-- 
cgit v1.2.3-70-g09d2


From 8ad5db8a8ddbe3bd33078863a027011e28f1f4ee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 17 Aug 2012 20:10:46 -0400
Subject: introduce kref_put_mutex()

equivalent of
	mutex_lock(mutex);
	if (!kref_put(kref, release))
		mutex_unlock(mutex);

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/kref.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 9c07dcebded..65af6887872 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -18,6 +18,7 @@
 #include <linux/bug.h>
 #include <linux/atomic.h>
 #include <linux/kernel.h>
+#include <linux/mutex.h>
 
 struct kref {
 	atomic_t refcount;
@@ -93,4 +94,21 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)
 {
 	return kref_sub(kref, 1, release);
 }
+
+static inline int kref_put_mutex(struct kref *kref,
+				 void (*release)(struct kref *kref),
+				 struct mutex *lock)
+{
+	WARN_ON(release == NULL);
+        if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) {
+		mutex_lock(lock);
+		if (unlikely(!atomic_dec_and_test(&kref->refcount))) {
+			mutex_unlock(lock);
+			return 0;
+		}
+		release(kref);
+		return 1;
+	}
+	return 0;
+}
 #endif /* _KREF_H_ */
-- 
cgit v1.2.3-70-g09d2


From c7a9b09b1a4a1fbccb2ec409daec95f9068d77c0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 15 Aug 2012 20:51:54 +0000
Subject: ARM: omap: allow building omap44xx without SMP

The new omap4 cpuidle implementation currently requires
ARCH_NEEDS_CPU_IDLE_COUPLED, which only works on SMP.

This patch makes it possible to build a non-SMP kernel
for that platform. This is not normally desired for
end-users but can be useful for testing.

Without this patch, building rand-0y2jSKT results in:

drivers/cpuidle/coupled.c: In function 'cpuidle_coupled_poke':
drivers/cpuidle/coupled.c:317:3: error: implicit declaration of function '__smp_call_function_single' [-Werror=implicit-function-declaration]

It's not clear if this patch is the best solution for
the problem at hand. I have made sure that we can now
build the kernel in all configurations, but that does
not mean it will actually work on an OMAP44xx.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/Kconfig       | 2 +-
 arch/arm/mach-omap2/cpuidle44xx.c | 3 ++-
 include/linux/cpuidle.h           | 4 ++++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index dd2db025f77..66a8be331ca 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -62,7 +62,7 @@ config ARCH_OMAP4
 	select PM_OPP if PM
 	select USB_ARCH_HAS_EHCI if USB_SUPPORT
 	select ARM_CPU_SUSPEND if PM
-	select ARCH_NEEDS_CPU_IDLE_COUPLED
+	select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
 
 config SOC_OMAP5
 	bool "TI OMAP5"
diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c
index ee05e193fc6..288bee6cbb7 100644
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -238,8 +238,9 @@ int __init omap4_idle_init(void)
 	for_each_cpu(cpu_id, cpu_online_mask) {
 		dev = &per_cpu(omap4_idle_dev, cpu_id);
 		dev->cpu = cpu_id;
+#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
 		dev->coupled_cpus = *cpu_online_mask;
-
+#endif
 		cpuidle_register_driver(&omap4_idle_driver);
 
 		if (cpuidle_register_device(dev)) {
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 040b13b5c14..279b1eaa8b7 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -194,6 +194,10 @@ static inline int cpuidle_play_dead(void) {return -ENODEV; }
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
 void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a);
+#else
+static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a)
+{
+}
 #endif
 
 /******************************
-- 
cgit v1.2.3-70-g09d2


From 22f5d115a2b087c977128f84ee557ad71530330e Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Tue, 14 Aug 2012 10:53:38 +0000
Subject: drm: Initialize object type when using DRM_MODE() macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DRM_MODE() macro doesn't initialize the type of the base drm object.
When a copy is made of the mode, the object type is overwritten with
zero, and the the object can no longer be found by
drm_mode_object_find() due to the type check failing.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_crtc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index ced362533e3..bfacf0d5a22 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -118,7 +118,8 @@ enum drm_mode_status {
 	.hdisplay = (hd), .hsync_start = (hss), .hsync_end = (hse), \
 	.htotal = (ht), .hskew = (hsk), .vdisplay = (vd), \
 	.vsync_start = (vss), .vsync_end = (vse), .vtotal = (vt), \
-	.vscan = (vs), .flags = (f), .vrefresh = 0
+	.vscan = (vs), .flags = (f), .vrefresh = 0, \
+	.base.type = DRM_MODE_OBJECT_MODE
 
 #define CRTC_INTERLACE_HALVE_V 0x1 /* halve V values for interlacing */
 
-- 
cgit v1.2.3-70-g09d2


From 7c4eaca4162d0b5ad4fb39f974d7ffd71b9daa09 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Thu, 16 Aug 2012 08:29:03 +0000
Subject: drm: Check for invalid cursor flags

Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 2 +-
 include/drm/drm_mode.h     | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 08a7aa722d6..6fbfc244748 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1981,7 +1981,7 @@ int drm_mode_cursor_ioctl(struct drm_device *dev,
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
 
-	if (!req->flags)
+	if (!req->flags || (~DRM_MODE_CURSOR_FLAGS & req->flags))
 		return -EINVAL;
 
 	mutex_lock(&dev->mode_config.mutex);
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index 5581980b14f..3d6301b6ec1 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -359,8 +359,9 @@ struct drm_mode_mode_cmd {
 	struct drm_mode_modeinfo mode;
 };
 
-#define DRM_MODE_CURSOR_BO	(1<<0)
-#define DRM_MODE_CURSOR_MOVE	(1<<1)
+#define DRM_MODE_CURSOR_BO	0x01
+#define DRM_MODE_CURSOR_MOVE	0x02
+#define DRM_MODE_CURSOR_FLAGS	0x03
 
 /*
  * depending on the value in flags different members are used.
-- 
cgit v1.2.3-70-g09d2


From cc110922da7e902b62d18641a370fec01a9fa794 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Date: Thu, 23 Aug 2012 21:32:43 -0300
Subject: Bluetooth: Change signature of smp_conn_security()

To make it clear that it may be called from contexts that may not have
any knowledge of L2CAP, we change the connection parameter, to receive
a hci_conn.

This also makes it clear that it is checking the security of the link.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/smp.h |  2 +-
 net/bluetooth/l2cap_core.c  | 11 ++++++-----
 net/bluetooth/l2cap_sock.c  |  2 +-
 net/bluetooth/smp.c         |  4 ++--
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/smp.h b/include/net/bluetooth/smp.h
index ca356a73492..8b27927b2a5 100644
--- a/include/net/bluetooth/smp.h
+++ b/include/net/bluetooth/smp.h
@@ -136,7 +136,7 @@ struct smp_chan {
 };
 
 /* SMP Commands */
-int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level);
+int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);
 int smp_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb);
 int smp_distribute_keys(struct l2cap_conn *conn, __u8 force);
 int smp_user_confirm_reply(struct hci_conn *conn, u16 mgmt_op, __le32 passkey);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index daa149b7003..4ea1710a478 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1199,14 +1199,15 @@ clean:
 static void l2cap_conn_ready(struct l2cap_conn *conn)
 {
 	struct l2cap_chan *chan;
+	struct hci_conn *hcon = conn->hcon;
 
 	BT_DBG("conn %p", conn);
 
-	if (!conn->hcon->out && conn->hcon->type == LE_LINK)
+	if (!hcon->out && hcon->type == LE_LINK)
 		l2cap_le_conn_ready(conn);
 
-	if (conn->hcon->out && conn->hcon->type == LE_LINK)
-		smp_conn_security(conn, conn->hcon->pending_sec_level);
+	if (hcon->out && hcon->type == LE_LINK)
+		smp_conn_security(hcon, hcon->pending_sec_level);
 
 	mutex_lock(&conn->chan_lock);
 
@@ -1219,8 +1220,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
 			continue;
 		}
 
-		if (conn->hcon->type == LE_LINK) {
-			if (smp_conn_security(conn, chan->sec_level))
+		if (hcon->type == LE_LINK) {
+			if (smp_conn_security(hcon, chan->sec_level))
 				l2cap_chan_ready(chan);
 
 		} else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index b94abd30e6f..45cb0b0dd2c 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -615,7 +615,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
 				break;
 			}
 
-			if (smp_conn_security(conn, sec.level))
+			if (smp_conn_security(conn->hcon, sec.level))
 				break;
 			sk->sk_state = BT_CONFIG;
 			chan->state = BT_CONFIG;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 98ffc1b6a6f..8c225ef349c 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -760,9 +760,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	return 0;
 }
 
-int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level)
+int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
 {
-	struct hci_conn *hcon = conn->hcon;
+	struct l2cap_conn *conn = hcon->l2cap_data;
 	struct smp_chan *smp = conn->smp_chan;
 	__u8 authreq;
 
-- 
cgit v1.2.3-70-g09d2


From b969afc8b719bbe3f0842a694e6bf5e87f08868f Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Thu, 23 Aug 2012 18:14:54 +0200
Subject: ASoC: atmel-ssc: include linux/io.h for raw io

Include linux/io.h for raw io operations in atmel-scc header.

This fixes the following build error:
  CC [M]  sound/soc/atmel/atmel_ssc_dai.o
sound/soc/atmel/atmel_ssc_dai.c: In function 'atmel_ssc_interrupt':
sound/soc/atmel/atmel_ssc_dai.c:171: error: implicit declaration of function '__raw_readl'
sound/soc/atmel/atmel_ssc_dai.c: In function 'atmel_ssc_shutdown':
sound/soc/atmel/atmel_ssc_dai.c:249: error: implicit declaration of function '__raw_writel'

Signed-off-by: Joachim Eastwood <joachim.eastwood@jotron.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Jean-Christophe PLAGNIOL-VILLARD <plagnioj@jcrosoft.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/atmel-ssc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/atmel-ssc.h b/include/linux/atmel-ssc.h
index 06023393fba..4eb31752e2b 100644
--- a/include/linux/atmel-ssc.h
+++ b/include/linux/atmel-ssc.h
@@ -3,6 +3,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/list.h>
+#include <linux/io.h>
 
 struct ssc_device {
 	struct list_head	list;
-- 
cgit v1.2.3-70-g09d2


From 5c879d2094946081af934739850c7260e8b25d3c Mon Sep 17 00:00:00 2001
From: Yuval Mintz <yuvalmin@broadcom.com>
Date: Sun, 26 Aug 2012 00:35:45 +0000
Subject: bnx2x: fix 57840_MF pci id

Commit c3def943c7117d42caaed3478731ea7c3c87190e have added support for
new pci ids of the 57840 board, while failing to change the obsolete value
in 'pci_ids.h'.
This patch does so, allowing the probe of such devices.

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pci_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index fc352607734..6b4565c440c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2149,7 +2149,7 @@
 #define PCI_DEVICE_ID_TIGON3_5704S	0x16a8
 #define PCI_DEVICE_ID_NX2_57800_VF	0x16a9
 #define PCI_DEVICE_ID_NX2_5706S		0x16aa
-#define PCI_DEVICE_ID_NX2_57840_MF	0x16ab
+#define PCI_DEVICE_ID_NX2_57840_MF	0x16a4
 #define PCI_DEVICE_ID_NX2_5708S		0x16ac
 #define PCI_DEVICE_ID_NX2_57840_VF	0x16ad
 #define PCI_DEVICE_ID_NX2_57810_MF	0x16ae
-- 
cgit v1.2.3-70-g09d2


From 5b423f6a40a0327f9d40bc8b97ce9be266f74368 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 29 Aug 2012 16:25:49 +0000
Subject: netfilter: nf_conntrack: fix racy timer handling with reliable events

Existing code assumes that del_timer returns true for alive conntrack
entries. However, this is not true if reliable events are enabled.
In that case, del_timer may return true for entries that were
just inserted in the dying list. Note that packets / ctnetlink may
hold references to conntrack entries that were just inserted to such
list.

This patch fixes the issue by adding an independent timer for
event delivery. This increases the size of the ecache extension.
Still we can revisit this later and use variable size extensions
to allocate this area on demand.

Tested-by: Oliver Smith <olipro@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_ecache.h |  1 +
 net/netfilter/nf_conntrack_core.c           | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index e1ce1048fe5..4a045cda9c6 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -18,6 +18,7 @@ struct nf_conntrack_ecache {
 	u16 ctmask;		/* bitmask of ct events to be delivered */
 	u16 expmask;		/* bitmask of expect events to be delivered */
 	u32 pid;		/* netlink pid of destroyer */
+	struct timer_list timeout;
 };
 
 static inline struct nf_conntrack_ecache *
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index cf4875565d6..2ceec64b19f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -249,12 +249,15 @@ static void death_by_event(unsigned long ul_conntrack)
 {
 	struct nf_conn *ct = (void *)ul_conntrack;
 	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
+
+	BUG_ON(ecache == NULL);
 
 	if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
 		/* bad luck, let's retry again */
-		ct->timeout.expires = jiffies +
+		ecache->timeout.expires = jiffies +
 			(random32() % net->ct.sysctl_events_retry_timeout);
-		add_timer(&ct->timeout);
+		add_timer(&ecache->timeout);
 		return;
 	}
 	/* we've got the event delivered, now it's dying */
@@ -268,6 +271,9 @@ static void death_by_event(unsigned long ul_conntrack)
 void nf_ct_insert_dying_list(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
+
+	BUG_ON(ecache == NULL);
 
 	/* add this conntrack to the dying list */
 	spin_lock_bh(&nf_conntrack_lock);
@@ -275,10 +281,10 @@ void nf_ct_insert_dying_list(struct nf_conn *ct)
 			     &net->ct.dying);
 	spin_unlock_bh(&nf_conntrack_lock);
 	/* set a new timer to retry event delivery */
-	setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
-	ct->timeout.expires = jiffies +
+	setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
+	ecache->timeout.expires = jiffies +
 		(random32() % net->ct.sysctl_events_retry_timeout);
-	add_timer(&ct->timeout);
+	add_timer(&ecache->timeout);
 }
 EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
 
-- 
cgit v1.2.3-70-g09d2


From cee58483cf56e0ba355fdd97ff5e8925329aa936 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 31 Aug 2012 13:30:06 -0400
Subject: time: Move ktime_t overflow checking into timespec_valid_strict

Andreas Bombe reported that the added ktime_t overflow checking added to
timespec_valid in commit 4e8b14526ca7 ("time: Improve sanity checking of
timekeeping inputs") was causing problems with X.org because it caused
timeouts larger then KTIME_T to be invalid.

Previously, these large timeouts would be clamped to KTIME_MAX and would
never expire, which is valid.

This patch splits the ktime_t overflow checking into a new
timespec_valid_strict function, and converts the timekeeping codes
internal checking to use this more strict function.

Reported-and-tested-by: Andreas Bombe <aeb@debian.org>
Cc: Zhouping Liu <zliu@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/time.h      |  7 +++++++
 kernel/time/timekeeping.c | 10 +++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index b0bbd8f0130..b51e664c83e 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -125,6 +125,13 @@ static inline bool timespec_valid(const struct timespec *ts)
 	/* Can't have more nanoseconds then a second */
 	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
 		return false;
+	return true;
+}
+
+static inline bool timespec_valid_strict(const struct timespec *ts)
+{
+	if (!timespec_valid(ts))
+		return false;
 	/* Disallow values that could overflow ktime_t */
 	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
 		return false;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 0c1485e42be..34e5eac8142 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -428,7 +428,7 @@ int do_settimeofday(const struct timespec *tv)
 	struct timespec ts_delta, xt;
 	unsigned long flags;
 
-	if (!timespec_valid(tv))
+	if (!timespec_valid_strict(tv))
 		return -EINVAL;
 
 	write_seqlock_irqsave(&tk->lock, flags);
@@ -476,7 +476,7 @@ int timekeeping_inject_offset(struct timespec *ts)
 
 	/* Make sure the proposed value is valid */
 	tmp = timespec_add(tk_xtime(tk),  *ts);
-	if (!timespec_valid(&tmp)) {
+	if (!timespec_valid_strict(&tmp)) {
 		ret = -EINVAL;
 		goto error;
 	}
@@ -659,7 +659,7 @@ void __init timekeeping_init(void)
 	struct timespec now, boot, tmp;
 
 	read_persistent_clock(&now);
-	if (!timespec_valid(&now)) {
+	if (!timespec_valid_strict(&now)) {
 		pr_warn("WARNING: Persistent clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		now.tv_sec = 0;
@@ -667,7 +667,7 @@ void __init timekeeping_init(void)
 	}
 
 	read_boot_clock(&boot);
-	if (!timespec_valid(&boot)) {
+	if (!timespec_valid_strict(&boot)) {
 		pr_warn("WARNING: Boot clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
 		boot.tv_sec = 0;
@@ -713,7 +713,7 @@ static struct timespec timekeeping_suspend_time;
 static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
 							struct timespec *delta)
 {
-	if (!timespec_valid(delta)) {
+	if (!timespec_valid_strict(delta)) {
 		printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
 					"sleep delta value!\n");
 		return;
-- 
cgit v1.2.3-70-g09d2


From b6d86d3d6d6e4c9b588d81615c81b5a8292b62ed Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 24 Aug 2012 17:25:01 -0700
Subject: linux/kernel.h: Fix DIV_ROUND_CLOSEST to support negative dividends

DIV_ROUND_CLOSEST returns a bad result for negative dividends:
	DIV_ROUND_CLOSEST(-2, 2) = 0

Most of the time this does not matter. However, in the hardware monitoring
subsystem, DIV_ROUND_CLOSEST is sometimes used on integers which can be
negative (such as temperatures).

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/kernel.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 604382143bc..594b419b7d2 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -82,10 +82,18 @@
 	__x - (__x % (y));				\
 }							\
 )
+
+/*
+ * Divide positive or negative dividend by positive divisor and round
+ * to closest integer. Result is undefined for negative divisors.
+ */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
 {							\
-	typeof(divisor) __divisor = divisor;		\
-	(((x) + ((__divisor) / 2)) / (__divisor));	\
+	typeof(x) __x = x;				\
+	typeof(divisor) __d = divisor;			\
+	(((typeof(x))-1) >= 0 || (__x) >= 0) ?		\
+		(((__x) + ((__d) / 2)) / (__d)) :	\
+		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
 )
 
-- 
cgit v1.2.3-70-g09d2


From a6fa941d94b411bbd2b6421ffbde6db3c93e65ab Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 20 Aug 2012 14:59:25 +0100
Subject: perf_event: Switch to internal refcount, fix race with close()

Don't mess with file refcounts (or keep a reference to file, for
that matter) in perf_event.  Use explicit refcount of its own
instead.  Deal with the race between the final reference to event
going away and new children getting created for it by use of
atomic_long_inc_not_zero() in inherit_event(); just have the
latter free what it had allocated and return NULL, that works
out just fine (children of siblings of something doomed are
created as singletons, same as if the child of leader had been
created and immediately killed).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@kernel.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120820135925.GG23464@ZenIV.linux.org.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  2 +-
 kernel/events/core.c       | 62 ++++++++++++++++++++++++----------------------
 2 files changed, 34 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7602ccb3f40..ad04dfcd6f3 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -926,7 +926,7 @@ struct perf_event {
 	struct hw_perf_event		hw;
 
 	struct perf_event_context	*ctx;
-	struct file			*filp;
+	atomic_long_t			refcount;
 
 	/*
 	 * These accumulate total time (in nanoseconds) that children
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b7935fcec7d..efef4282b8e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2935,12 +2935,12 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 /*
  * Called when the last reference to the file is gone.
  */
-static int perf_release(struct inode *inode, struct file *file)
+static void put_event(struct perf_event *event)
 {
-	struct perf_event *event = file->private_data;
 	struct task_struct *owner;
 
-	file->private_data = NULL;
+	if (!atomic_long_dec_and_test(&event->refcount))
+		return;
 
 	rcu_read_lock();
 	owner = ACCESS_ONCE(event->owner);
@@ -2975,7 +2975,13 @@ static int perf_release(struct inode *inode, struct file *file)
 		put_task_struct(owner);
 	}
 
-	return perf_event_release_kernel(event);
+	perf_event_release_kernel(event);
+}
+
+static int perf_release(struct inode *inode, struct file *file)
+{
+	put_event(file->private_data);
+	return 0;
 }
 
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
@@ -3227,7 +3233,7 @@ unlock:
 
 static const struct file_operations perf_fops;
 
-static struct perf_event *perf_fget_light(int fd, int *fput_needed)
+static struct file *perf_fget_light(int fd, int *fput_needed)
 {
 	struct file *file;
 
@@ -3241,7 +3247,7 @@ static struct perf_event *perf_fget_light(int fd, int *fput_needed)
 		return ERR_PTR(-EBADF);
 	}
 
-	return file->private_data;
+	return file;
 }
 
 static int perf_event_set_output(struct perf_event *event,
@@ -3273,19 +3279,21 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 	case PERF_EVENT_IOC_SET_OUTPUT:
 	{
+		struct file *output_file = NULL;
 		struct perf_event *output_event = NULL;
 		int fput_needed = 0;
 		int ret;
 
 		if (arg != -1) {
-			output_event = perf_fget_light(arg, &fput_needed);
-			if (IS_ERR(output_event))
-				return PTR_ERR(output_event);
+			output_file = perf_fget_light(arg, &fput_needed);
+			if (IS_ERR(output_file))
+				return PTR_ERR(output_file);
+			output_event = output_file->private_data;
 		}
 
 		ret = perf_event_set_output(event, output_event);
 		if (output_event)
-			fput_light(output_event->filp, fput_needed);
+			fput_light(output_file, fput_needed);
 
 		return ret;
 	}
@@ -5950,6 +5958,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	mutex_init(&event->mmap_mutex);
 
+	atomic_long_set(&event->refcount, 1);
 	event->cpu		= cpu;
 	event->attr		= *attr;
 	event->group_leader	= group_leader;
@@ -6260,12 +6269,12 @@ SYSCALL_DEFINE5(perf_event_open,
 		return event_fd;
 
 	if (group_fd != -1) {
-		group_leader = perf_fget_light(group_fd, &fput_needed);
-		if (IS_ERR(group_leader)) {
-			err = PTR_ERR(group_leader);
+		group_file = perf_fget_light(group_fd, &fput_needed);
+		if (IS_ERR(group_file)) {
+			err = PTR_ERR(group_file);
 			goto err_fd;
 		}
-		group_file = group_leader->filp;
+		group_leader = group_file->private_data;
 		if (flags & PERF_FLAG_FD_OUTPUT)
 			output_event = group_leader;
 		if (flags & PERF_FLAG_FD_NO_GROUP)
@@ -6402,7 +6411,6 @@ SYSCALL_DEFINE5(perf_event_open,
 		put_ctx(gctx);
 	}
 
-	event->filp = event_file;
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 
@@ -6496,7 +6504,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		goto err_free;
 	}
 
-	event->filp = NULL;
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	perf_install_in_context(ctx, event, cpu);
@@ -6578,7 +6585,7 @@ static void sync_child_event(struct perf_event *child_event,
 	 * Release the parent event, if this was the last
 	 * reference to it.
 	 */
-	fput(parent_event->filp);
+	put_event(parent_event);
 }
 
 static void
@@ -6654,9 +6661,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	 *
 	 *   __perf_event_exit_task()
 	 *     sync_child_event()
-	 *       fput(parent_event->filp)
-	 *         perf_release()
-	 *           mutex_lock(&ctx->mutex)
+	 *       put_event()
+	 *         mutex_lock(&ctx->mutex)
 	 *
 	 * But since its the parent context it won't be the same instance.
 	 */
@@ -6724,7 +6730,7 @@ static void perf_free_event(struct perf_event *event,
 	list_del_init(&event->child_list);
 	mutex_unlock(&parent->child_mutex);
 
-	fput(parent->filp);
+	put_event(parent);
 
 	perf_group_detach(event);
 	list_del_event(event, ctx);
@@ -6804,6 +6810,12 @@ inherit_event(struct perf_event *parent_event,
 				           NULL, NULL);
 	if (IS_ERR(child_event))
 		return child_event;
+
+	if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
+		free_event(child_event);
+		return NULL;
+	}
+
 	get_ctx(child_ctx);
 
 	/*
@@ -6844,14 +6856,6 @@ inherit_event(struct perf_event *parent_event,
 	add_event_to_ctx(child_event, child_ctx);
 	raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
 
-	/*
-	 * Get a reference to the parent filp - we will fput it
-	 * when the child event exits. This is safe to do because
-	 * we are in the parent and we know that the filp still
-	 * exists and has a nonzero count:
-	 */
-	atomic_long_inc(&parent_event->filp->f_count);
-
 	/*
 	 * Link this into the parent event's child list
 	 */
-- 
cgit v1.2.3-70-g09d2


From 500ad2d8b01390c98bc6dce068bccfa9534b8212 Mon Sep 17 00:00:00 2001
From: "K.Prasad" <Prasad.Krishnan@gmail.com>
Date: Thu, 2 Aug 2012 13:46:35 +0530
Subject: perf/hwpb: Invoke __perf_event_disable() if interrupts are already
 disabled

While debugging a warning message on PowerPC while using hardware
breakpoints, it was discovered that when perf_event_disable is invoked
through hw_breakpoint_handler function with interrupts disabled, a
subsequent IPI in the code path would trigger a WARN_ON_ONCE message in
smp_call_function_single function.

This patch calls __perf_event_disable() when interrupts are already
disabled, instead of perf_event_disable().

Reported-by: Edjunior Barbosa Machado <emachado@linux.vnet.ibm.com>
Signed-off-by: K.Prasad <Prasad.Krishnan@gmail.com>
[naveen.n.rao@linux.vnet.ibm.com: v3: Check to make sure we target current task]
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120802081635.5811.17737.stgit@localhost.localdomain
[ Fixed build error on MIPS. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h    |  2 ++
 kernel/events/core.c          |  2 +-
 kernel/events/hw_breakpoint.c | 11 ++++++++++-
 3 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ad04dfcd6f3..33ed9d605f9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1296,6 +1296,7 @@ extern int perf_swevent_get_recursion_context(void);
 extern void perf_swevent_put_recursion_context(int rctx);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
+extern int __perf_event_disable(void *info);
 extern void perf_event_task_tick(void);
 #else
 static inline void
@@ -1334,6 +1335,7 @@ static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
 static inline void perf_event_enable(struct perf_event *event)		{ }
 static inline void perf_event_disable(struct perf_event *event)		{ }
+static inline int __perf_event_disable(void *info)			{ return -1; }
 static inline void perf_event_task_tick(void)				{ }
 #endif
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index efef4282b8e..7fee567153f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1253,7 +1253,7 @@ retry:
 /*
  * Cross CPU call to disable a performance event
  */
-static int __perf_event_disable(void *info)
+int __perf_event_disable(void *info)
 {
 	struct perf_event *event = info;
 	struct perf_event_context *ctx = event->ctx;
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index bb38c4d3ee1..9a7b487c6fe 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -453,7 +453,16 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att
 	int old_type = bp->attr.bp_type;
 	int err = 0;
 
-	perf_event_disable(bp);
+	/*
+	 * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
+	 * will not be possible to raise IPIs that invoke __perf_event_disable.
+	 * So call the function directly after making sure we are targeting the
+	 * current task.
+	 */
+	if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
+		__perf_event_disable(bp);
+	else
+		perf_event_disable(bp);
 
 	bp->attr.bp_addr = attr->bp_addr;
 	bp->attr.bp_type = attr->bp_type;
-- 
cgit v1.2.3-70-g09d2


From 3550ccdb9d8d350e526b809bf3dd92b550a74fe1 Mon Sep 17 00:00:00 2001
From: Ian Chen <ian.cy.chen@samsung.com>
Date: Wed, 29 Aug 2012 15:05:36 +0900
Subject: mmc: card: Skip secure erase on MoviNAND; causes unrecoverable
 corruption.

For several MoviNAND eMMC parts, there are known issues with secure
erase and secure trim.  For these specific MoviNAND devices, we skip
these operations.

Specifically, there is a bug in the eMMC firmware that causes
unrecoverable corruption when the MMC is erased with MMC_CAP_ERASE
enabled.

References:

http://forum.xda-developers.com/showthread.php?t=1644364
https://plus.google.com/111398485184813224730/posts/21pTYfTsCkB#111398485184813224730/posts/21pTYfTsCkB

Signed-off-by: Ian Chen <ian.cy.chen@samsung.com>
Reviewed-by: Namjae Jeon <linkinjeon@gmail.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: stable <stable@vger.kernel.org> [3.0+]
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/card/block.c | 26 +++++++++++++++++++++++++-
 include/linux/mmc/card.h |  1 +
 2 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index f1c84decb19..172a768036d 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1411,7 +1411,8 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		/* complete ongoing async transfer before issuing discard */
 		if (card->host->areq)
 			mmc_blk_issue_rw_rq(mq, NULL);
-		if (req->cmd_flags & REQ_SECURE)
+		if (req->cmd_flags & REQ_SECURE &&
+			!(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
 			ret = mmc_blk_issue_secdiscard_rq(mq, req);
 		else
 			ret = mmc_blk_issue_discard_rq(mq, req);
@@ -1716,6 +1717,7 @@ force_ro_fail:
 #define CID_MANFID_SANDISK	0x2
 #define CID_MANFID_TOSHIBA	0x11
 #define CID_MANFID_MICRON	0x13
+#define CID_MANFID_SAMSUNG	0x15
 
 static const struct mmc_fixup blk_fixups[] =
 {
@@ -1752,6 +1754,28 @@ static const struct mmc_fixup blk_fixups[] =
 	MMC_FIXUP(CID_NAME_ANY, CID_MANFID_MICRON, 0x200, add_quirk_mmc,
 		  MMC_QUIRK_LONG_READ_TIME),
 
+	/*
+	 * On these Samsung MoviNAND parts, performing secure erase or
+	 * secure trim can result in unrecoverable corruption due to a
+	 * firmware bug.
+	 */
+	MMC_FIXUP("M8G2FA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("MAG4FA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("MBG8FA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("MCGAFA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("VAL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("VYL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("KYL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+	MMC_FIXUP("VZL00M", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_SEC_ERASE_TRIM_BROKEN),
+
 	END_FIXUP
 };
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 111aca5e97f..4b27f9f503e 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -239,6 +239,7 @@ struct mmc_card {
 #define MMC_QUIRK_BLK_NO_CMD23	(1<<7)		/* Avoid CMD23 for regular multiblock */
 #define MMC_QUIRK_BROKEN_BYTE_MODE_512 (1<<8)	/* Avoid sending 512 bytes in */
 #define MMC_QUIRK_LONG_READ_TIME (1<<9)		/* Data read time > CSD says */
+#define MMC_QUIRK_SEC_ERASE_TRIM_BROKEN (1<<10)	/* Skip secure for erase/trim */
 						/* byte mode */
 	unsigned int    poweroff_notify_state;	/* eMMC4.5 notify feature */
 #define MMC_NO_POWER_NOTIFICATION	0
-- 
cgit v1.2.3-70-g09d2


From 3b59df46a449ec9975146d71318c4777ad086744 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 4 Sep 2012 00:03:29 +0000
Subject: xfrm: Workaround incompatibility of ESN and async crypto

ESN for esp is defined in RFC 4303. This RFC assumes that the
sequence number counters are always up to date. However,
this is not true if an async crypto algorithm is employed.

If the sequence number counters are not up to date on sequence
number check, we may incorrectly update the upper 32 bit of
the sequence number. This leads to a DOS.

We workaround this by comparing the upper sequence number,
(used for authentication) with the upper sequence number
computed after the async processing. We drop the packet
if these numbers are different.

To do this, we introduce a recheck function that does this
check in the ESN case.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  3 +++
 net/xfrm/xfrm_input.c  |  2 +-
 net/xfrm/xfrm_replay.c | 15 +++++++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 976a81abe1a..639dd1316d3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -273,6 +273,9 @@ struct xfrm_replay {
 	int	(*check)(struct xfrm_state *x,
 			 struct sk_buff *skb,
 			 __be32 net_seq);
+	int	(*recheck)(struct xfrm_state *x,
+			   struct sk_buff *skb,
+			   __be32 net_seq);
 	void	(*notify)(struct xfrm_state *x, int event);
 	int	(*overflow)(struct xfrm_state *x, struct sk_buff *skb);
 };
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 54a0dc2e2f8..ab2bb42fe09 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -212,7 +212,7 @@ resume:
 		/* only the first xfrm gets the encap type */
 		encap_type = 0;
 
-		if (async && x->repl->check(x, skb, seq)) {
+		if (async && x->repl->recheck(x, skb, seq)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
 			goto drop_unlock;
 		}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 2f6d11d04a2..3efb07d3eb2 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -420,6 +420,18 @@ err:
 	return -EINVAL;
 }
 
+static int xfrm_replay_recheck_esn(struct xfrm_state *x,
+				   struct sk_buff *skb, __be32 net_seq)
+{
+	if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi !=
+		     htonl(xfrm_replay_seqhi(x, net_seq)))) {
+			x->stats.replay_window++;
+			return -EINVAL;
+	}
+
+	return xfrm_replay_check_esn(x, skb, net_seq);
+}
+
 static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
 {
 	unsigned int bitnr, nr, i;
@@ -479,6 +491,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
 static struct xfrm_replay xfrm_replay_legacy = {
 	.advance	= xfrm_replay_advance,
 	.check		= xfrm_replay_check,
+	.recheck	= xfrm_replay_check,
 	.notify		= xfrm_replay_notify,
 	.overflow	= xfrm_replay_overflow,
 };
@@ -486,6 +499,7 @@ static struct xfrm_replay xfrm_replay_legacy = {
 static struct xfrm_replay xfrm_replay_bmp = {
 	.advance	= xfrm_replay_advance_bmp,
 	.check		= xfrm_replay_check_bmp,
+	.recheck	= xfrm_replay_check_bmp,
 	.notify		= xfrm_replay_notify_bmp,
 	.overflow	= xfrm_replay_overflow_bmp,
 };
@@ -493,6 +507,7 @@ static struct xfrm_replay xfrm_replay_bmp = {
 static struct xfrm_replay xfrm_replay_esn = {
 	.advance	= xfrm_replay_advance_esn,
 	.check		= xfrm_replay_check_esn,
+	.recheck	= xfrm_replay_recheck_esn,
 	.notify		= xfrm_replay_notify_bmp,
 	.overflow	= xfrm_replay_overflow_esn,
 };
-- 
cgit v1.2.3-70-g09d2


From c3f52af3e03013db5237e339c817beaae5ec9e3a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 3 Sep 2012 14:56:02 -0400
Subject: NFS: Fix the initialisation of the readdir 'cookieverf' array

When the NFS_COOKIEVERF helper macro was converted into a static
inline function in commit 99fadcd764 (nfs: convert NFS_*(inode)
helpers to static inline), we broke the initialisation of the
readdir cookies, since that depended on doing a memset with an
argument of 'sizeof(NFS_COOKIEVERF(inode))' which therefore
changed from sizeof(be32 cookieverf[2]) to sizeof(be32 *).

At this point, NFS_COOKIEVERF seems to be more of an obfuscation
than a helper, so the best thing would be to just get rid of it.

Also see: https://bugzilla.kernel.org/show_bug.cgi?id=46881

Reported-by: Andi Kleen <andi@firstfloor.org>
Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org
---
 fs/nfs/inode.c         | 2 +-
 fs/nfs/nfs3proc.c      | 2 +-
 fs/nfs/nfs4proc.c      | 4 ++--
 include/linux/nfs_fs.h | 5 -----
 4 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c6e895f0fbf..9b47610338f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -154,7 +154,7 @@ static void nfs_zap_caches_locked(struct inode *inode)
 	nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 	nfsi->attrtimeo_timestamp = jiffies;
 
-	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+	memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
 	else
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d6b3b5f2d77..69322096c32 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -643,7 +643,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 		  u64 cookie, struct page **pages, unsigned int count, int plus)
 {
 	struct inode		*dir = dentry->d_inode;
-	__be32			*verf = NFS_COOKIEVERF(dir);
+	__be32			*verf = NFS_I(dir)->cookieverf;
 	struct nfs3_readdirargs	arg = {
 		.fh		= NFS_FH(dir),
 		.cookie		= cookie,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 635274140b1..86b4c736103 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3215,11 +3215,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name,
 			(unsigned long long)cookie);
-	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
+	nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
 	res.pgbase = args.pgbase;
 	status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
 	if (status >= 0) {
-		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+		memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
 		status += args.pgbase;
 	}
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1f8fc7f9bcd..4b03f56e280 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -265,11 +265,6 @@ static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode)
 	return NFS_SERVER(inode)->nfs_client->rpc_ops;
 }
 
-static inline __be32 *NFS_COOKIEVERF(const struct inode *inode)
-{
-	return NFS_I(inode)->cookieverf;
-}
-
 static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode)
 {
 	struct nfs_server *nfss = NFS_SERVER(inode);
-- 
cgit v1.2.3-70-g09d2


From d5829eac5f7cfff89c6d1cf11717eee97cf030d0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 5 Sep 2012 17:09:15 +0200
Subject: target: fix use-after-free with PSCSI sense data

The pointer to the sense buffer is fetched by transport_get_sense_data,
but this is called by target_complete_ok_work long after pscsi_req_done
has freed the struct that contains it.

Pass instead the fabric's sense buffer to transport_complete,
and copy the data to it directly in transport_complete.  Setting
SCF_TRANSPORT_TASK_SENSE also becomes a duty of transport_complete.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_pscsi.c     | 21 +++++++-------------
 drivers/target/target_core_transport.c | 36 +++++++++++++---------------------
 include/target/target_core_backend.h   |  4 +++-
 3 files changed, 24 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 5552fa7426b..5f7151d9034 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -667,7 +667,8 @@ static void pscsi_free_device(void *p)
 	kfree(pdv);
 }
 
-static int pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg)
+static void pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg,
+				     unsigned char *sense_buffer)
 {
 	struct pscsi_dev_virt *pdv = cmd->se_dev->dev_ptr;
 	struct scsi_device *sd = pdv->pdv_sd;
@@ -679,7 +680,7 @@ static int pscsi_transport_complete(struct se_cmd *cmd, struct scatterlist *sg)
 	 * not been allocated because TCM is handling the emulation directly.
 	 */
 	if (!pt)
-		return 0;
+		return;
 
 	cdb = &pt->pscsi_cdb[0];
 	result = pt->pscsi_result;
@@ -750,10 +751,10 @@ after_mode_sense:
 	}
 after_mode_select:
 
-	if (status_byte(result) & CHECK_CONDITION)
-		return 1;
-
-	return 0;
+	if (sense_buffer && (status_byte(result) & CHECK_CONDITION)) {
+		memcpy(sense_buffer, pt->pscsi_sense, TRANSPORT_SENSE_BUFFER);
+		cmd->se_cmd_flags |= SCF_TRANSPORT_TASK_SENSE;
+	}
 }
 
 enum {
@@ -1184,13 +1185,6 @@ fail:
 	return -ENOMEM;
 }
 
-static unsigned char *pscsi_get_sense_buffer(struct se_cmd *cmd)
-{
-	struct pscsi_plugin_task *pt = cmd->priv;
-
-	return pt->pscsi_sense;
-}
-
 /*	pscsi_get_device_rev():
  *
  *
@@ -1273,7 +1267,6 @@ static struct se_subsystem_api pscsi_template = {
 	.check_configfs_dev_params = pscsi_check_configfs_dev_params,
 	.set_configfs_dev_params = pscsi_set_configfs_dev_params,
 	.show_configfs_dev_params = pscsi_show_configfs_dev_params,
-	.get_sense_buffer	= pscsi_get_sense_buffer,
 	.get_device_rev		= pscsi_get_device_rev,
 	.get_device_type	= pscsi_get_device_type,
 	.get_blocks		= pscsi_get_blocks,
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 040f05fde4d..8a29e3fd019 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -568,38 +568,31 @@ static void target_complete_failure_work(struct work_struct *work)
 }
 
 /*
- * Used to obtain Sense Data from underlying Linux/SCSI struct scsi_cmnd
+ * Used when asking transport to copy Sense Data from the underlying
+ * Linux/SCSI struct scsi_cmnd
  */
-static void transport_get_sense_data(struct se_cmd *cmd)
+static unsigned char *transport_get_sense_buffer(struct se_cmd *cmd)
 {
-	unsigned char *buffer = cmd->sense_buffer, *sense_buffer = NULL;
+	unsigned char *buffer = cmd->sense_buffer;
 	struct se_device *dev = cmd->se_dev;
-	unsigned long flags;
 	u32 offset = 0;
 
 	WARN_ON(!cmd->se_lun);
 
 	if (!dev)
-		return;
-
-	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	if (cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) {
-		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		return;
-	}
+		return NULL;
 
-	sense_buffer = dev->transport->get_sense_buffer(cmd);
-	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+	if (cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION)
+		return NULL;
 
 	offset = cmd->se_tfo->set_fabric_sense_len(cmd, TRANSPORT_SENSE_BUFFER);
 
-	memcpy(&buffer[offset], sense_buffer, TRANSPORT_SENSE_BUFFER);
-
 	/* Automatically padded */
 	cmd->scsi_sense_length = TRANSPORT_SENSE_BUFFER + offset;
 
-	pr_debug("HBA_[%u]_PLUG[%s]: Set SAM STATUS: 0x%02x and sense\n",
+	pr_debug("HBA_[%u]_PLUG[%s]: Requesting sense for SAM STATUS: 0x%02x\n",
 		dev->se_hba->hba_id, dev->transport->name, cmd->scsi_status);
+	return &buffer[offset];
 }
 
 void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
@@ -615,11 +608,11 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 	cmd->transport_state &= ~CMD_T_BUSY;
 
 	if (dev && dev->transport->transport_complete) {
-		if (dev->transport->transport_complete(cmd,
-				cmd->t_data_sg) != 0) {
-			cmd->se_cmd_flags |= SCF_TRANSPORT_TASK_SENSE;
+		dev->transport->transport_complete(cmd,
+				cmd->t_data_sg,
+				transport_get_sense_buffer(cmd));
+		if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
 			success = 1;
-		}
 	}
 
 	/*
@@ -1987,12 +1980,11 @@ static void target_complete_ok_work(struct work_struct *work)
 		schedule_work(&cmd->se_dev->qf_work_queue);
 
 	/*
-	 * Check if we need to retrieve a sense buffer from
+	 * Check if we need to send a sense buffer from
 	 * the struct se_cmd in question.
 	 */
 	if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) {
 		WARN_ON(!cmd->scsi_status);
-		transport_get_sense_data(cmd);
 		ret = transport_send_check_condition_and_sense(
 					cmd, 0, 1);
 		if (ret == -EAGAIN || ret == -ENOMEM)
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index f1405d335a9..941c84bf106 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -23,7 +23,9 @@ struct se_subsystem_api {
 	struct se_device *(*create_virtdevice)(struct se_hba *,
 				struct se_subsystem_dev *, void *);
 	void (*free_device)(void *);
-	int (*transport_complete)(struct se_cmd *cmd, struct scatterlist *);
+	void (*transport_complete)(struct se_cmd *cmd,
+				   struct scatterlist *,
+				   unsigned char *);
 
 	int (*parse_cdb)(struct se_cmd *cmd);
 	ssize_t (*check_configfs_dev_params)(struct se_hba *,
-- 
cgit v1.2.3-70-g09d2


From 1f1ea6c2d9d8c0be9ec56454b05315273b5de8ce Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 26 Aug 2012 11:44:43 -0700
Subject: NFSv4: Fix buffer overflow checking in __nfs4_get_acl_uncached

Pass the checks made by decode_getacl back to __nfs4_get_acl_uncached
so that it knows if the acl has been truncated.

The current overflow checking is broken, resulting in Oopses on
user-triggered nfs4_getfacl calls, and is opaque to the point
where several attempts at fixing it have failed.
This patch tries to clean up the code in addition to fixing the
Oopses by ensuring that the overflow checks are performed in
a single place (decode_getacl). If the overflow check failed,
we will still be able to report the acl length, but at least
we will no longer attempt to cache the acl or copy the
truncated contents to user space.

Reported-by: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Tested-by: Sachin Prabhu <sprabhu@redhat.com>
---
 fs/nfs/nfs4proc.c       | 31 ++++++++++++-------------------
 fs/nfs/nfs4xdr.c        | 14 +++++---------
 include/linux/nfs_xdr.h |  2 +-
 3 files changed, 18 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6b94f2d5253..1e50326d00d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3739,7 +3739,7 @@ static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size
 	struct nfs4_cached_acl *acl;
 	size_t buflen = sizeof(*acl) + acl_len;
 
-	if (pages && buflen <= PAGE_SIZE) {
+	if (buflen <= PAGE_SIZE) {
 		acl = kmalloc(buflen, GFP_KERNEL);
 		if (acl == NULL)
 			goto out;
@@ -3784,7 +3784,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 	};
 	unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
 	int ret = -ENOMEM, i;
-	size_t acl_len = 0;
 
 	/* As long as we're doing a round trip to the server anyway,
 	 * let's be prepared for a page of acl data. */
@@ -3807,11 +3806,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 	args.acl_len = npages * PAGE_SIZE;
 	args.acl_pgbase = 0;
 
-	/* Let decode_getfacl know not to fail if the ACL data is larger than
-	 * the page we send as a guess */
-	if (buf == NULL)
-		res.acl_flags |= NFS4_ACL_LEN_REQUEST;
-
 	dprintk("%s  buf %p buflen %zu npages %d args.acl_len %zu\n",
 		__func__, buf, buflen, npages, args.acl_len);
 	ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
@@ -3819,20 +3813,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
 	if (ret)
 		goto out_free;
 
-	acl_len = res.acl_len;
-	if (acl_len > args.acl_len)
-		nfs4_write_cached_acl(inode, NULL, 0, acl_len);
-	else
-		nfs4_write_cached_acl(inode, pages, res.acl_data_offset,
-				      acl_len);
-	if (buf) {
+	/* Handle the case where the passed-in buffer is too short */
+	if (res.acl_flags & NFS4_ACL_TRUNC) {
+		/* Did the user only issue a request for the acl length? */
+		if (buf == NULL)
+			goto out_ok;
 		ret = -ERANGE;
-		if (acl_len > buflen)
-			goto out_free;
-		_copy_from_pages(buf, pages, res.acl_data_offset,
-				acl_len);
+		goto out_free;
 	}
-	ret = acl_len;
+	nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
+	if (buf)
+		_copy_from_pages(buf, pages, res.acl_data_offset, res.acl_len);
+out_ok:
+	ret = res.acl_len;
 out_free:
 	for (i = 0; i < npages; i++)
 		if (pages[i])
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1bfbd67c556..541e796e6db 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5072,18 +5072,14 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 		 * are stored with the acl data to handle the problem of
 		 * variable length bitmaps.*/
 		res->acl_data_offset = xdr_stream_pos(xdr) - pg_offset;
-
-		/* We ignore &savep and don't do consistency checks on
-		 * the attr length.  Let userspace figure it out.... */
 		res->acl_len = attrlen;
-		if (attrlen > (xdr->nwords << 2)) {
-			if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
-				/* getxattr interface called with a NULL buf */
-				goto out;
-			}
+
+		/* Check for receive buffer overflow */
+		if (res->acl_len > (xdr->nwords << 2) ||
+		    res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
+			res->acl_flags |= NFS4_ACL_TRUNC;
 			dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
 					attrlen, xdr->nwords << 2);
-			return -EINVAL;
 		}
 	} else
 		status = -EOPNOTSUPP;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ac7c8ae254f..be9cf3c7e79 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -652,7 +652,7 @@ struct nfs_getaclargs {
 };
 
 /* getxattr ACL interface flags */
-#define NFS4_ACL_LEN_REQUEST	0x0001	/* zero length getxattr buffer */
+#define NFS4_ACL_TRUNC		0x0001	/* ACL was truncated */
 struct nfs_getaclres {
 	size_t				acl_len;
 	size_t				acl_data_offset;
-- 
cgit v1.2.3-70-g09d2


From 60e233a56609fd963c59e99bd75c663d63fa91b6 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Sun, 2 Sep 2012 15:41:34 +0200
Subject: kobject: fix oops with "input0: bad kobj_uevent_env content in
 show_uevent()"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fengguang Wu <fengguang.wu@intel.com> writes:

> After the __devinit* removal series, I can still get kernel panic in
> show_uevent(). So there are more sources of bug..
>
> Debug patch:
>
> @@ -343,8 +343,11 @@ static ssize_t show_uevent(struct device
>                 goto out;
>
>         /* copy keys to file */
> -       for (i = 0; i < env->envp_idx; i++)
> +       dev_err(dev, "uevent %d env[%d]: %s/.../%s\n", env->buflen, env->envp_idx, top_kobj->name, dev->kobj.name);
> +       for (i = 0; i < env->envp_idx; i++) {
> +               printk(KERN_ERR "uevent %d env[%d]: %s\n", (int)count, i, env->envp[i]);
>                 count += sprintf(&buf[count], "%s\n", env->envp[i]);
> +       }
>
> Oops message, the env[] is again not properly initilized:
>
> [   44.068623] input input0: uevent 61 env[805306368]: input0/.../input0
> [   44.069552] uevent 0 env[0]: (null)

This is a completely different CONFIG_HOTPLUG problem, only
demonstrating another reason why CONFIG_HOTPLUG should go away.  I had a
hard time trying to disable it anyway ;-)

The problem this time is lots of code assuming that a call to
add_uevent_var() will guarantee that env->buflen > 0.  This is not true
if CONFIG_HOTPLUG is unset.  So things like this end up overwriting
env->envp_idx because the array index is -1:

	if (add_uevent_var(env, "MODALIAS="))
		return -ENOMEM;
        len = input_print_modalias(&env->buf[env->buflen - 1],
				   sizeof(env->buf) - env->buflen,
				   dev, 0);

Don't know what the best action is, given that there seem to be a *lot*
of this around the kernel.  This patch "fixes" the problem for me, but I
don't know if it can be considered an appropriate fix.

[ It is the correct fix for now, for 3.7 forcing CONFIG_HOTPLUG to
always be on is the longterm fix, but it's too late for 3.6 and older
kernels to resolve this that way - gregkh ]

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Tested-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index fc615a97e2d..1e57449395b 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -224,7 +224,7 @@ static inline int kobject_uevent_env(struct kobject *kobj,
 
 static inline __printf(2, 3)
 int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
-{ return 0; }
+{ return -ENOMEM; }
 
 static inline int kobject_action_type(const char *buf, size_t count,
 				      enum kobject_action *type)
-- 
cgit v1.2.3-70-g09d2


From f39c1bfb5a03e2d255451bff05be0d7255298fa4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 7 Sep 2012 11:08:50 -0400
Subject: SUNRPC: Fix a UDP transport regression

Commit 43cedbf0e8dfb9c5610eb7985d5f21263e313802 (SUNRPC: Ensure that
we grab the XPRT_LOCK before calling xprt_alloc_slot) is causing
hangs in the case of NFS over UDP mounts.

Since neither the UDP or the RDMA transport mechanism use dynamic slot
allocation, we can skip grabbing the socket lock for those transports.
Add a new rpc_xprt_op to allow switching between the TCP and UDP/RDMA
case.

Note that the NFSv4.1 back channel assigns the slot directly
through rpc_run_bc_task, so we can ignore that case.

Reported-by: Dick Streefland <dick.streefland@altium.nl>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org [>= 3.1]
---
 include/linux/sunrpc/xprt.h     |  3 +++
 net/sunrpc/xprt.c               | 34 ++++++++++++++++++++--------------
 net/sunrpc/xprtrdma/transport.c |  1 +
 net/sunrpc/xprtsock.c           |  3 +++
 4 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index cff40aa7db6..bf8c49ff753 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -114,6 +114,7 @@ struct rpc_xprt_ops {
 	void		(*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
 	int		(*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
+	void		(*alloc_slot)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*rpcbind)(struct rpc_task *task);
 	void		(*set_port)(struct rpc_xprt *xprt, unsigned short port);
 	void		(*connect)(struct rpc_task *task);
@@ -281,6 +282,8 @@ void			xprt_connect(struct rpc_task *task);
 void			xprt_reserve(struct rpc_task *task);
 int			xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 int			xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
+void			xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
+void			xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 int			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
 void			xprt_end_transmit(struct rpc_task *task);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a5a402a7d21..5d7f61d7559 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -969,11 +969,11 @@ static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 	return false;
 }
 
-static void xprt_alloc_slot(struct rpc_task *task)
+void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	struct rpc_xprt	*xprt = task->tk_xprt;
 	struct rpc_rqst *req;
 
+	spin_lock(&xprt->reserve_lock);
 	if (!list_empty(&xprt->free)) {
 		req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
 		list_del(&req->rq_list);
@@ -994,12 +994,29 @@ static void xprt_alloc_slot(struct rpc_task *task)
 	default:
 		task->tk_status = -EAGAIN;
 	}
+	spin_unlock(&xprt->reserve_lock);
 	return;
 out_init_req:
 	task->tk_status = 0;
 	task->tk_rqstp = req;
 	xprt_request_init(task, xprt);
+	spin_unlock(&xprt->reserve_lock);
+}
+EXPORT_SYMBOL_GPL(xprt_alloc_slot);
+
+void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	/* Note: grabbing the xprt_lock_write() ensures that we throttle
+	 * new slot allocation if the transport is congested (i.e. when
+	 * reconnecting a stream transport or when out of socket write
+	 * buffer space).
+	 */
+	if (xprt_lock_write(xprt, task)) {
+		xprt_alloc_slot(xprt, task);
+		xprt_release_write(xprt, task);
+	}
 }
+EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
 
 static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
@@ -1083,20 +1100,9 @@ void xprt_reserve(struct rpc_task *task)
 	if (task->tk_rqstp != NULL)
 		return;
 
-	/* Note: grabbing the xprt_lock_write() here is not strictly needed,
-	 * but ensures that we throttle new slot allocation if the transport
-	 * is congested (e.g. if reconnecting or if we're out of socket
-	 * write buffer space).
-	 */
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	if (!xprt_lock_write(xprt, task))
-		return;
-
-	spin_lock(&xprt->reserve_lock);
-	xprt_alloc_slot(task);
-	spin_unlock(&xprt->reserve_lock);
-	xprt_release_write(xprt, task);
+	xprt->ops->alloc_slot(xprt, task);
 }
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 06cdbff79e4..5d9202dc7cb 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -713,6 +713,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 static struct rpc_xprt_ops xprt_rdma_procs = {
 	.reserve_xprt		= xprt_rdma_reserve_xprt,
 	.release_xprt		= xprt_release_xprt_cong, /* sunrpc/xprt.c */
+	.alloc_slot		= xprt_alloc_slot,
 	.release_request	= xprt_release_rqst_cong,       /* ditto */
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def, /* ditto */
 	.rpcbind		= rpcb_getport_async,	/* sunrpc/rpcb_clnt.c */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 400567243f8..a35b8e52e55 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2473,6 +2473,7 @@ static void bc_destroy(struct rpc_xprt *xprt)
 static struct rpc_xprt_ops xs_local_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xs_tcp_release_xprt,
+	.alloc_slot		= xprt_alloc_slot,
 	.rpcbind		= xs_local_rpcbind,
 	.set_port		= xs_local_set_port,
 	.connect		= xs_connect,
@@ -2489,6 +2490,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
+	.alloc_slot		= xprt_alloc_slot,
 	.rpcbind		= rpcb_getport_async,
 	.set_port		= xs_set_port,
 	.connect		= xs_connect,
@@ -2506,6 +2508,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
 static struct rpc_xprt_ops xs_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xs_tcp_release_xprt,
+	.alloc_slot		= xprt_lock_and_alloc_slot,
 	.rpcbind		= rpcb_getport_async,
 	.set_port		= xs_set_port,
 	.connect		= xs_connect,
-- 
cgit v1.2.3-70-g09d2


From a8edc3bf05a3465726afdf635a820761fae0d50b Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Wed, 5 Sep 2012 22:50:48 +0000
Subject: net/mlx4_core: Put Firmware flow steering structures in common header
 files

To allow for usage of the flow steering Firmware structures in more locations over the driver,
such as the resource tracker, move them from mcg.c to common header files.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/mcg.c  | 90 +++----------------------------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h | 76 ++++++++++++++++++++++++++
 include/linux/mlx4/device.h               |  2 +
 3 files changed, 85 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index a018ea2a43d..2673f3ba935 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -650,13 +650,6 @@ static int find_entry(struct mlx4_dev *dev, u8 port,
 	return err;
 }
 
-struct mlx4_net_trans_rule_hw_ctrl {
-	__be32 ctrl;
-	__be32 vf_vep_port;
-	__be32 qpn;
-	__be32 reserved;
-};
-
 static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl,
 				  struct mlx4_net_trans_rule_hw_ctrl *hw)
 {
@@ -680,87 +673,18 @@ static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl,
 	hw->qpn = cpu_to_be32(ctrl->qpn);
 }
 
-struct mlx4_net_trans_rule_hw_ib {
-	u8	size;
-	u8	rsvd1;
-	__be16	id;
-	u32	rsvd2;
-	__be32	qpn;
-	__be32	qpn_mask;
-	u8	dst_gid[16];
-	u8	dst_gid_msk[16];
-} __packed;
-
-struct mlx4_net_trans_rule_hw_eth {
-	u8	size;
-	u8	rsvd;
-	__be16	id;
-	u8	rsvd1[6];
-	u8	dst_mac[6];
-	u16	rsvd2;
-	u8	dst_mac_msk[6];
-	u16	rsvd3;
-	u8	src_mac[6];
-	u16	rsvd4;
-	u8	src_mac_msk[6];
-	u8      rsvd5;
-	u8      ether_type_enable;
-	__be16  ether_type;
-	__be16  vlan_id_msk;
-	__be16  vlan_id;
-} __packed;
-
-struct mlx4_net_trans_rule_hw_tcp_udp {
-	u8	size;
-	u8	rsvd;
-	__be16	id;
-	__be16	rsvd1[3];
-	__be16	dst_port;
-	__be16	rsvd2;
-	__be16	dst_port_msk;
-	__be16	rsvd3;
-	__be16	src_port;
-	__be16	rsvd4;
-	__be16	src_port_msk;
-} __packed;
-
-struct mlx4_net_trans_rule_hw_ipv4 {
-	u8	size;
-	u8	rsvd;
-	__be16	id;
-	__be32	rsvd1;
-	__be32	dst_ip;
-	__be32	dst_ip_msk;
-	__be32	src_ip;
-	__be32	src_ip_msk;
-} __packed;
-
-struct _rule_hw {
-	union {
-		struct {
-			u8 size;
-			u8 rsvd;
-			__be16 id;
-		};
-		struct mlx4_net_trans_rule_hw_eth eth;
-		struct mlx4_net_trans_rule_hw_ib ib;
-		struct mlx4_net_trans_rule_hw_ipv4 ipv4;
-		struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp;
-	};
+const u16 __sw_id_hw[] = {
+	[MLX4_NET_TRANS_RULE_ID_ETH]     = 0xE001,
+	[MLX4_NET_TRANS_RULE_ID_IB]      = 0xE005,
+	[MLX4_NET_TRANS_RULE_ID_IPV6]    = 0xE003,
+	[MLX4_NET_TRANS_RULE_ID_IPV4]    = 0xE002,
+	[MLX4_NET_TRANS_RULE_ID_TCP]     = 0xE004,
+	[MLX4_NET_TRANS_RULE_ID_UDP]     = 0xE006
 };
 
 static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec,
 			    struct _rule_hw *rule_hw)
 {
-	static const u16 __sw_id_hw[] = {
-		[MLX4_NET_TRANS_RULE_ID_ETH]     = 0xE001,
-		[MLX4_NET_TRANS_RULE_ID_IB]      = 0xE005,
-		[MLX4_NET_TRANS_RULE_ID_IPV6]    = 0xE003,
-		[MLX4_NET_TRANS_RULE_ID_IPV4]    = 0xE002,
-		[MLX4_NET_TRANS_RULE_ID_TCP]     = 0xE004,
-		[MLX4_NET_TRANS_RULE_ID_UDP]     = 0xE006
-	};
-
 	static const size_t __rule_hw_sz[] = {
 		[MLX4_NET_TRANS_RULE_ID_ETH] =
 			sizeof(struct mlx4_net_trans_rule_hw_eth),
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 4d9df8f2a12..dba69d98734 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -690,6 +690,82 @@ struct mlx4_steer {
 	struct list_head steer_entries[MLX4_NUM_STEERS];
 };
 
+struct mlx4_net_trans_rule_hw_ctrl {
+	__be32 ctrl;
+	__be32 vf_vep_port;
+	__be32 qpn;
+	__be32 reserved;
+};
+
+struct mlx4_net_trans_rule_hw_ib {
+	u8 size;
+	u8 rsvd1;
+	__be16 id;
+	u32 rsvd2;
+	__be32 qpn;
+	__be32 qpn_mask;
+	u8 dst_gid[16];
+	u8 dst_gid_msk[16];
+} __packed;
+
+struct mlx4_net_trans_rule_hw_eth {
+	u8	size;
+	u8	rsvd;
+	__be16	id;
+	u8	rsvd1[6];
+	u8	dst_mac[6];
+	u16	rsvd2;
+	u8	dst_mac_msk[6];
+	u16	rsvd3;
+	u8	src_mac[6];
+	u16	rsvd4;
+	u8	src_mac_msk[6];
+	u8      rsvd5;
+	u8      ether_type_enable;
+	__be16  ether_type;
+	__be16  vlan_id_msk;
+	__be16  vlan_id;
+} __packed;
+
+struct mlx4_net_trans_rule_hw_tcp_udp {
+	u8	size;
+	u8	rsvd;
+	__be16	id;
+	__be16	rsvd1[3];
+	__be16	dst_port;
+	__be16	rsvd2;
+	__be16	dst_port_msk;
+	__be16	rsvd3;
+	__be16	src_port;
+	__be16	rsvd4;
+	__be16	src_port_msk;
+} __packed;
+
+struct mlx4_net_trans_rule_hw_ipv4 {
+	u8	size;
+	u8	rsvd;
+	__be16	id;
+	__be32	rsvd1;
+	__be32	dst_ip;
+	__be32	dst_ip_msk;
+	__be32	src_ip;
+	__be32	src_ip_msk;
+} __packed;
+
+struct _rule_hw {
+	union {
+		struct {
+			u8 size;
+			u8 rsvd;
+			__be16 id;
+		};
+		struct mlx4_net_trans_rule_hw_eth eth;
+		struct mlx4_net_trans_rule_hw_ib ib;
+		struct mlx4_net_trans_rule_hw_ipv4 ipv4;
+		struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp;
+	};
+};
+
 struct mlx4_priv {
 	struct mlx4_dev		dev;
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index bd6c9fcdf2d..244ba902ab7 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -796,6 +796,8 @@ enum mlx4_net_trans_rule_id {
 	MLX4_NET_TRANS_RULE_NUM, /* should be last */
 };
 
+extern const u16 __sw_id_hw[];
+
 enum mlx4_net_trans_promisc_mode {
 	MLX4_FS_PROMISC_NONE = 0,
 	MLX4_FS_PROMISC_UPLINK,
-- 
cgit v1.2.3-70-g09d2


From 7fb40f87c4195ec1728527f30bc744c47a45b366 Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Wed, 5 Sep 2012 22:50:49 +0000
Subject: net/mlx4_core: Add security check / enforcement for flow steering
 rules set for VMs

Since VFs may be mapped to VMs which aren't trusted entities,  flow
steering rules attached through the wrapper on behalf of VFs must be
checked to make sure that their L2 specification relate to MAC address
assigned to that VF, and add L2 specification if its missing.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 116 +++++++++++++++++++++
 include/linux/mlx4/device.h                        |  11 ++
 2 files changed, 127 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 94ceddd17ab..293c9e820c4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -42,6 +42,7 @@
 #include <linux/mlx4/cmd.h>
 #include <linux/mlx4/qp.h>
 #include <linux/if_ether.h>
+#include <linux/etherdevice.h>
 
 #include "mlx4.h"
 #include "fw.h"
@@ -2776,18 +2777,133 @@ ex_put:
 	return err;
 }
 
+/*
+ * MAC validation for Flow Steering rules.
+ * VF can attach rules only with a mac address which is assigned to it.
+ */
+static int validate_eth_header_mac(int slave, struct _rule_hw *eth_header,
+				   struct list_head *rlist)
+{
+	struct mac_res *res, *tmp;
+	__be64 be_mac;
+
+	/* make sure it isn't multicast or broadcast mac*/
+	if (!is_multicast_ether_addr(eth_header->eth.dst_mac) &&
+	    !is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
+		list_for_each_entry_safe(res, tmp, rlist, list) {
+			be_mac = cpu_to_be64(res->mac << 16);
+			if (!memcmp(&be_mac, eth_header->eth.dst_mac, ETH_ALEN))
+				return 0;
+		}
+		pr_err("MAC %pM doesn't belong to VF %d, Steering rule rejected\n",
+		       eth_header->eth.dst_mac, slave);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * In case of missing eth header, append eth header with a MAC address
+ * assigned to the VF.
+ */
+static int add_eth_header(struct mlx4_dev *dev, int slave,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct list_head *rlist, int header_id)
+{
+	struct mac_res *res, *tmp;
+	u8 port;
+	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+	struct mlx4_net_trans_rule_hw_eth *eth_header;
+	struct mlx4_net_trans_rule_hw_ipv4 *ip_header;
+	struct mlx4_net_trans_rule_hw_tcp_udp *l4_header;
+	__be64 be_mac = 0;
+	__be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+	ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
+	port = be32_to_cpu(ctrl->vf_vep_port) & 0xff;
+	eth_header = (struct mlx4_net_trans_rule_hw_eth *)(ctrl + 1);
+
+	/* Clear a space in the inbox for eth header */
+	switch (header_id) {
+	case MLX4_NET_TRANS_RULE_ID_IPV4:
+		ip_header =
+			(struct mlx4_net_trans_rule_hw_ipv4 *)(eth_header + 1);
+		memmove(ip_header, eth_header,
+			sizeof(*ip_header) + sizeof(*l4_header));
+		break;
+	case MLX4_NET_TRANS_RULE_ID_TCP:
+	case MLX4_NET_TRANS_RULE_ID_UDP:
+		l4_header = (struct mlx4_net_trans_rule_hw_tcp_udp *)
+			    (eth_header + 1);
+		memmove(l4_header, eth_header, sizeof(*l4_header));
+		break;
+	default:
+		return -EINVAL;
+	}
+	list_for_each_entry_safe(res, tmp, rlist, list) {
+		if (port == res->port) {
+			be_mac = cpu_to_be64(res->mac << 16);
+			break;
+		}
+	}
+	if (!be_mac) {
+		pr_err("Failed adding eth header to FS rule, Can't find matching MAC for port %d .\n",
+		       port);
+		return -EINVAL;
+	}
+
+	memset(eth_header, 0, sizeof(*eth_header));
+	eth_header->size = sizeof(*eth_header) >> 2;
+	eth_header->id = cpu_to_be16(__sw_id_hw[MLX4_NET_TRANS_RULE_ID_ETH]);
+	memcpy(eth_header->dst_mac, &be_mac, ETH_ALEN);
+	memcpy(eth_header->dst_mac_msk, &mac_msk, ETH_ALEN);
+
+	return 0;
+
+}
+
 int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 					 struct mlx4_vhcr *vhcr,
 					 struct mlx4_cmd_mailbox *inbox,
 					 struct mlx4_cmd_mailbox *outbox,
 					 struct mlx4_cmd_info *cmd)
 {
+
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
+	struct list_head *rlist = &tracker->slave_list[slave].res_list[RES_MAC];
 	int err;
+	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+	struct _rule_hw  *rule_header;
+	int header_id;
 
 	if (dev->caps.steering_mode !=
 	    MLX4_STEERING_MODE_DEVICE_MANAGED)
 		return -EOPNOTSUPP;
 
+	ctrl = (struct mlx4_net_trans_rule_hw_ctrl *)inbox->buf;
+	rule_header = (struct _rule_hw *)(ctrl + 1);
+	header_id = map_hw_to_sw_id(be16_to_cpu(rule_header->id));
+
+	switch (header_id) {
+	case MLX4_NET_TRANS_RULE_ID_ETH:
+		if (validate_eth_header_mac(slave, rule_header, rlist))
+			return -EINVAL;
+		break;
+	case MLX4_NET_TRANS_RULE_ID_IPV4:
+	case MLX4_NET_TRANS_RULE_ID_TCP:
+	case MLX4_NET_TRANS_RULE_ID_UDP:
+		pr_warn("Can't attach FS rule without L2 headers, adding L2 header.\n");
+		if (add_eth_header(dev, slave, inbox, rlist, header_id))
+			return -EINVAL;
+		vhcr->in_modifier +=
+			sizeof(struct mlx4_net_trans_rule_hw_eth) >> 2;
+		break;
+	default:
+		pr_err("Corrupted mailbox.\n");
+		return -EINVAL;
+	}
+
 	err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param,
 			   vhcr->in_modifier, 0,
 			   MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 244ba902ab7..6e1b0f973a0 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -798,6 +798,17 @@ enum mlx4_net_trans_rule_id {
 
 extern const u16 __sw_id_hw[];
 
+static inline int map_hw_to_sw_id(u16 header_id)
+{
+
+	int i;
+	for (i = 0; i < MLX4_NET_TRANS_RULE_NUM; i++) {
+		if (header_id == __sw_id_hw[i])
+			return i;
+	}
+	return -EINVAL;
+}
+
 enum mlx4_net_trans_promisc_mode {
 	MLX4_FS_PROMISC_NONE = 0,
 	MLX4_FS_PROMISC_UPLINK,
-- 
cgit v1.2.3-70-g09d2


From 32a8811ff164f882712c17946e58e52444f464a7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 7 Sep 2012 17:30:36 +0200
Subject: target: support zero allocation length in REQUEST SENSE

Similar to INQUIRY and MODE SENSE, construct the sense data in a
buffer and later copy it to the scatterlist.  Do not do anything,
but still clear a pending unit attention condition, if the allocation
length is zero.

However, SPC tells us that "If a REQUEST SENSE command is terminated with
CHECK CONDITION status [and] the REQUEST SENSE command was received on
an I_T nexus with a pending unit attention condition (i.e., before the
device server reports CHECK CONDITION status), then the device server
shall not clear the pending unit attention condition."  Do the
transport_kmap_data_sg early to detect this case.

It also tells us "Device servers shall not adjust the additional sense
length to reflect truncation if the allocation length is less than the
sense data available", so do not do that!  Note that the err variable
is write-only.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_spc.c  | 35 ++++++++++++++++++-----------------
 include/target/target_core_base.h |  1 +
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 4c861de538c..388a922c8f6 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -877,9 +877,11 @@ static int spc_emulate_modesense(struct se_cmd *cmd)
 static int spc_emulate_request_sense(struct se_cmd *cmd)
 {
 	unsigned char *cdb = cmd->t_task_cdb;
-	unsigned char *buf;
+	unsigned char *rbuf;
 	u8 ua_asc = 0, ua_ascq = 0;
-	int err = 0;
+	unsigned char buf[SE_SENSE_BUF];
+
+	memset(buf, 0, SE_SENSE_BUF);
 
 	if (cdb[1] & 0x01) {
 		pr_err("REQUEST_SENSE description emulation not"
@@ -888,20 +890,21 @@ static int spc_emulate_request_sense(struct se_cmd *cmd)
 		return -ENOSYS;
 	}
 
-	buf = transport_kmap_data_sg(cmd);
-
-	if (!core_scsi3_ua_clear_for_request_sense(cmd, &ua_asc, &ua_ascq)) {
+	rbuf = transport_kmap_data_sg(cmd);
+	if (cmd->scsi_sense_reason != 0) {
+		/*
+		 * Out of memory.  We will fail with CHECK CONDITION, so
+		 * we must not clear the unit attention condition.
+		 */
+		target_complete_cmd(cmd, CHECK_CONDITION);
+		return 0;
+	} else if (!core_scsi3_ua_clear_for_request_sense(cmd, &ua_asc, &ua_ascq)) {
 		/*
 		 * CURRENT ERROR, UNIT ATTENTION
 		 */
 		buf[0] = 0x70;
 		buf[SPC_SENSE_KEY_OFFSET] = UNIT_ATTENTION;
 
-		if (cmd->data_length < 18) {
-			buf[7] = 0x00;
-			err = -EINVAL;
-			goto end;
-		}
 		/*
 		 * The Additional Sense Code (ASC) from the UNIT ATTENTION
 		 */
@@ -915,11 +918,6 @@ static int spc_emulate_request_sense(struct se_cmd *cmd)
 		buf[0] = 0x70;
 		buf[SPC_SENSE_KEY_OFFSET] = NO_SENSE;
 
-		if (cmd->data_length < 18) {
-			buf[7] = 0x00;
-			err = -EINVAL;
-			goto end;
-		}
 		/*
 		 * NO ADDITIONAL SENSE INFORMATION
 		 */
@@ -927,8 +925,11 @@ static int spc_emulate_request_sense(struct se_cmd *cmd)
 		buf[7] = 0x0A;
 	}
 
-end:
-	transport_kunmap_data_sg(cmd);
+	if (rbuf) {
+		memcpy(rbuf, buf, min_t(u32, sizeof(buf), cmd->data_length));
+		transport_kunmap_data_sg(cmd);
+	}
+
 	target_complete_cmd(cmd, GOOD);
 	return 0;
 }
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 015cea01ae3..5be89373cea 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -121,6 +121,7 @@
 
 #define SE_INQUIRY_BUF				512
 #define SE_MODE_PAGE_BUF			512
+#define SE_SENSE_BUF				96
 
 /* struct se_hba->hba_flags */
 enum hba_flags_table {
-- 
cgit v1.2.3-70-g09d2


From 2fc136eecd0c647a6b13fcd00d0c41a1a28f35a5 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed, 12 Sep 2012 12:44:30 +0100
Subject: xen/m2p: do not reuse kmap_op->dev_bus_addr

If the caller passes a valid kmap_op to m2p_add_override, we use
kmap_op->dev_bus_addr to store the original mfn, but dev_bus_addr is
part of the interface with Xen and if we are batching the hypercalls it
might not have been written by the hypervisor yet. That means that later
on Xen will write to it and we'll think that the original mfn is
actually what Xen has written to it.

Rather than "stealing" struct members from kmap_op, keep using
page->index to store the original mfn and add another parameter to
m2p_remove_override to get the corresponding kmap_op instead.
It is now responsibility of the caller to keep track of which kmap_op
corresponds to a particular page in the m2p_override (gntdev, the only
user of this interface that passes a valid kmap_op, is already doing that).

CC: stable@kernel.org
Reported-and-Tested-By: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/page.h     |  3 ++-
 arch/x86/xen/p2m.c                  | 27 +++++++++++----------------
 drivers/block/xen-blkback/blkback.c |  2 +-
 drivers/xen/gntdev.c                |  5 +++--
 drivers/xen/grant-table.c           |  6 ++++--
 include/xen/grant_table.h           |  3 ++-
 6 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 93971e841dd..472b9b78301 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 
 extern int m2p_add_override(unsigned long mfn, struct page *page,
 			    struct gnttab_map_grant_ref *kmap_op);
-extern int m2p_remove_override(struct page *page, bool clear_pte);
+extern int m2p_remove_override(struct page *page,
+				struct gnttab_map_grant_ref *kmap_op);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 76ba0e97e53..72213da605f 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -828,9 +828,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
 		}
-		/* let's use dev_bus_addr to record the old mfn instead */
-		kmap_op->dev_bus_addr = page->index;
-		page->index = (unsigned long) kmap_op;
 	}
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
@@ -857,7 +854,8 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(m2p_add_override);
-int m2p_remove_override(struct page *page, bool clear_pte)
+int m2p_remove_override(struct page *page,
+		struct gnttab_map_grant_ref *kmap_op)
 {
 	unsigned long flags;
 	unsigned long mfn;
@@ -887,10 +885,8 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 	WARN_ON(!PagePrivate(page));
 	ClearPagePrivate(page);
 
-	if (clear_pte) {
-		struct gnttab_map_grant_ref *map_op =
-			(struct gnttab_map_grant_ref *) page->index;
-		set_phys_to_machine(pfn, map_op->dev_bus_addr);
+	set_phys_to_machine(pfn, page->index);
+	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
 			struct multicall_space mcs;
 			struct gnttab_unmap_grant_ref *unmap_op;
@@ -902,13 +898,13 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 			 * issued. In this case handle is going to -1 because
 			 * it hasn't been modified yet.
 			 */
-			if (map_op->handle == -1)
+			if (kmap_op->handle == -1)
 				xen_mc_flush();
 			/*
-			 * Now if map_op->handle is negative it means that the
+			 * Now if kmap_op->handle is negative it means that the
 			 * hypercall actually returned an error.
 			 */
-			if (map_op->handle == GNTST_general_error) {
+			if (kmap_op->handle == GNTST_general_error) {
 				printk(KERN_WARNING "m2p_remove_override: "
 						"pfn %lx mfn %lx, failed to modify kernel mappings",
 						pfn, mfn);
@@ -918,8 +914,8 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 			mcs = xen_mc_entry(
 					sizeof(struct gnttab_unmap_grant_ref));
 			unmap_op = mcs.args;
-			unmap_op->host_addr = map_op->host_addr;
-			unmap_op->handle = map_op->handle;
+			unmap_op->host_addr = kmap_op->host_addr;
+			unmap_op->handle = kmap_op->handle;
 			unmap_op->dev_bus_addr = 0;
 
 			MULTI_grant_table_op(mcs.mc,
@@ -930,10 +926,9 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 			set_pte_at(&init_mm, address, ptep,
 					pfn_pte(pfn, PAGE_KERNEL));
 			__flush_tlb_single(address);
-			map_op->host_addr = 0;
+			kmap_op->host_addr = 0;
 		}
-	} else
-		set_phys_to_machine(pfn, page->index);
+	}
 
 	/* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
 	 * somewhere in this domain, even before being added to the
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 73f196ca713..c6decb901e5 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -337,7 +337,7 @@ static void xen_blkbk_unmap(struct pending_req *req)
 		invcount++;
 	}
 
-	ret = gnttab_unmap_refs(unmap, pages, invcount, false);
+	ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
 	BUG_ON(ret);
 }
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1ffd03bf8e1..7f124160848 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -314,8 +314,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		}
 	}
 
-	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset,
-				pages, true);
+	err = gnttab_unmap_refs(map->unmap_ops + offset,
+			use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset,
+			pages);
 	if (err)
 		return err;
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 0bfc1ef1125..006726688ba 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -870,7 +870,8 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 EXPORT_SYMBOL_GPL(gnttab_map_refs);
 
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
-		      struct page **pages, unsigned int count, bool clear_pte)
+		      struct gnttab_map_grant_ref *kmap_ops,
+		      struct page **pages, unsigned int count)
 {
 	int i, ret;
 	bool lazy = false;
@@ -888,7 +889,8 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 	}
 
 	for (i = 0; i < count; i++) {
-		ret = m2p_remove_override(pages[i], clear_pte);
+		ret = m2p_remove_override(pages[i], kmap_ops ?
+				       &kmap_ops[i] : NULL);
 		if (ret)
 			return ret;
 	}
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 11e27c3af3c..f19fff8650e 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -187,6 +187,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		    struct gnttab_map_grant_ref *kmap_ops,
 		    struct page **pages, unsigned int count);
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
-		      struct page **pages, unsigned int count, bool clear_pte);
+		      struct gnttab_map_grant_ref *kunmap_ops,
+		      struct page **pages, unsigned int count);
 
 #endif /* __ASM_GNTTAB_H__ */
-- 
cgit v1.2.3-70-g09d2


From c076ada4e4aaf45e1a31ad6de7c6cce36081e045 Mon Sep 17 00:00:00 2001
From: Roland Stigge <stigge@antcom.de>
Date: Wed, 8 Aug 2012 09:42:32 +0200
Subject: i2c: pnx: Fix read transactions of >= 2 bytes

On transactions with n>=2 bytes, the controller actually wrongly clocks in n+1
bytes. This is caused by the (wrong) assumption that RFE in the Status Register
is 1 iff there is no byte already ordered (via a dummy TX byte). This lead to
the implementation of synchronized byte ordering, e.g.:

Dummy-TX - RX - Dummy-TX - RX - ...

But since RFE actually stays high after some Dummy-TX, it rather looks like:

Dummy-TX - Dummy-TX - RX - Dummy-TX - RX - (RX)

The last RX byte is clocked in by the bus controller, but ignored by the kernel
when filling the userspace buffer.

This patch fixes the issue by asking for RX via Dummy-TX asynchronously.
Introducing a separate counter for TX bytes.

Signed-off-by: Roland Stigge <stigge@antcom.de>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 drivers/i2c/busses/i2c-pnx.c | 48 ++++++++++++++++++++++++++------------------
 include/linux/i2c-pnx.h      |  1 +
 2 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index b36e21f9fd5..8488bddfe46 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -291,31 +291,37 @@ static int i2c_pnx_master_rcv(struct i2c_pnx_algo_data *alg_data)
 	 * or we didn't 'ask' for it yet.
 	 */
 	if (ioread32(I2C_REG_STS(alg_data)) & mstatus_rfe) {
-		dev_dbg(&alg_data->adapter.dev,
-			"%s(): Write dummy data to fill Rx-fifo...\n",
-			__func__);
+		/* 'Asking' is done asynchronously, e.g. dummy TX of several
+		 * bytes is done before the first actual RX arrives in FIFO.
+		 * Therefore, ordered bytes (via TX) are counted separately.
+		 */
+		if (alg_data->mif.order) {
+			dev_dbg(&alg_data->adapter.dev,
+				"%s(): Write dummy data to fill Rx-fifo...\n",
+				__func__);
 
-		if (alg_data->mif.len == 1) {
-			/* Last byte, do not acknowledge next rcv. */
-			val |= stop_bit;
+			if (alg_data->mif.order == 1) {
+				/* Last byte, do not acknowledge next rcv. */
+				val |= stop_bit;
+
+				/*
+				 * Enable interrupt RFDAIE (data in Rx fifo),
+				 * and disable DRMIE (need data for Tx)
+				 */
+				ctl = ioread32(I2C_REG_CTL(alg_data));
+				ctl |= mcntrl_rffie | mcntrl_daie;
+				ctl &= ~mcntrl_drmie;
+				iowrite32(ctl, I2C_REG_CTL(alg_data));
+			}
 
 			/*
-			 * Enable interrupt RFDAIE (data in Rx fifo),
-			 * and disable DRMIE (need data for Tx)
+			 * Now we'll 'ask' for data:
+			 * For each byte we want to receive, we must
+			 * write a (dummy) byte to the Tx-FIFO.
 			 */
-			ctl = ioread32(I2C_REG_CTL(alg_data));
-			ctl |= mcntrl_rffie | mcntrl_daie;
-			ctl &= ~mcntrl_drmie;
-			iowrite32(ctl, I2C_REG_CTL(alg_data));
+			iowrite32(val, I2C_REG_TX(alg_data));
+			alg_data->mif.order--;
 		}
-
-		/*
-		 * Now we'll 'ask' for data:
-		 * For each byte we want to receive, we must
-		 * write a (dummy) byte to the Tx-FIFO.
-		 */
-		iowrite32(val, I2C_REG_TX(alg_data));
-
 		return 0;
 	}
 
@@ -515,6 +521,7 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 
 		alg_data->mif.buf = pmsg->buf;
 		alg_data->mif.len = pmsg->len;
+		alg_data->mif.order = pmsg->len;
 		alg_data->mif.mode = (pmsg->flags & I2C_M_RD) ?
 			I2C_SMBUS_READ : I2C_SMBUS_WRITE;
 		alg_data->mif.ret = 0;
@@ -567,6 +574,7 @@ i2c_pnx_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 	/* Cleanup to be sure... */
 	alg_data->mif.buf = NULL;
 	alg_data->mif.len = 0;
+	alg_data->mif.order = 0;
 
 	dev_dbg(&alg_data->adapter.dev, "%s(): exiting, stat = %x\n",
 		__func__, ioread32(I2C_REG_STS(alg_data)));
diff --git a/include/linux/i2c-pnx.h b/include/linux/i2c-pnx.h
index 1bc74afe7a3..49ed17fdf05 100644
--- a/include/linux/i2c-pnx.h
+++ b/include/linux/i2c-pnx.h
@@ -22,6 +22,7 @@ struct i2c_pnx_mif {
 	struct timer_list	timer;		/* Timeout */
 	u8 *			buf;		/* Data buffer */
 	int			len;		/* Length of data buffer */
+	int			order;		/* RX Bytes to order via TX */
 };
 
 struct i2c_pnx_algo_data {
-- 
cgit v1.2.3-70-g09d2


From d9dd85dd4e18c255225bada181ffcee37f4ff9a6 Mon Sep 17 00:00:00 2001
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date: Fri, 20 Apr 2012 18:26:10 +0300
Subject: drm: Drop the NV12M and YUV420M formats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The NV12M/YUV420M formats are identical to the NV12/YUV420 formats.
So just remove these duplicated format names.

This might look like breaking the ABI, but the code has never actually
accepted these formats, so nothing can be using them.

Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
---
 include/drm/drm_fourcc.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h
index bdf0152cbbe..f4621184a9b 100644
--- a/include/drm/drm_fourcc.h
+++ b/include/drm/drm_fourcc.h
@@ -107,8 +107,7 @@
 #define DRM_FORMAT_NV16		fourcc_code('N', 'V', '1', '6') /* 2x1 subsampled Cr:Cb plane */
 #define DRM_FORMAT_NV61		fourcc_code('N', 'V', '6', '1') /* 2x1 subsampled Cb:Cr plane */
 
-/* 2 non contiguous plane YCbCr */
-#define DRM_FORMAT_NV12M	fourcc_code('N', 'M', '1', '2') /* 2x2 subsampled Cr:Cb plane */
+/* special NV12 tiled format */
 #define DRM_FORMAT_NV12MT	fourcc_code('T', 'M', '1', '2') /* 2x2 subsampled Cr:Cb plane 64x32 macroblocks */
 
 /*
@@ -131,7 +130,4 @@
 #define DRM_FORMAT_YUV444	fourcc_code('Y', 'U', '2', '4') /* non-subsampled Cb (1) and Cr (2) planes */
 #define DRM_FORMAT_YVU444	fourcc_code('Y', 'V', '2', '4') /* non-subsampled Cr (1) and Cb (2) planes */
 
-/* 3 non contiguous plane YCbCr */
-#define DRM_FORMAT_YUV420M	fourcc_code('Y', 'M', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes */
-
 #endif /* DRM_FOURCC_H */
-- 
cgit v1.2.3-70-g09d2


From bad9ac2d7f878a31cf1ae8c1ee3768077d222bcb Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 25 Jul 2012 19:12:45 +0200
Subject: perf/x86/ibs: Check syscall attribute flags

Current implementation simply ignores attribute flags. Thus, there is
no notification to userland of unsupported features. Check syscall's
attribute flags to let userland know if a feature is supported by the
kernel. This is also needed to distinguish between future kernels what
might support a feature.

Cc: <stable@vger.kernel.org> v3.5..
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120910093018.GO8285@erda.amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c | 12 ++++++++++++
 include/linux/perf_event.h               |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 7bfb5bec863..eebd5ffe1bb 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -209,6 +209,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
 	return -EOPNOTSUPP;
 }
 
+static const struct perf_event_attr ibs_notsupp = {
+	.exclude_user	= 1,
+	.exclude_kernel	= 1,
+	.exclude_hv	= 1,
+	.exclude_idle	= 1,
+	.exclude_host	= 1,
+	.exclude_guest	= 1,
+};
+
 static int perf_ibs_init(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -229,6 +238,9 @@ static int perf_ibs_init(struct perf_event *event)
 	if (event->pmu != &perf_ibs->pmu)
 		return -ENOENT;
 
+	if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
+		return -EINVAL;
+
 	if (config & ~perf_ibs->config_mask)
 		return -EINVAL;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 33ed9d605f9..bdb41612bfe 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -274,6 +274,8 @@ struct perf_event_attr {
 	__u64	branch_sample_type; /* enum branch_sample_type */
 };
 
+#define perf_flags(attr)	(*(&(attr)->read_format + 1))
+
 /*
  * Ioctls that can be done on a perf event fd:
  */
-- 
cgit v1.2.3-70-g09d2


From 4b921eda53366b319602351ff4d7256fafa4bd1b Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Thu, 13 Sep 2012 04:36:20 +0000
Subject: mISDN: Fix wrong usage of flush_work_sync while holding locks

It is a bad idea to hold a spinlock and call flush_work_sync.
Move the workqueue cleanup outside the spinlock and use cancel_work_sync,
on closing the channel this seems to be the more correct function.
Remove the never used and constant return value of mISDN_freebchannel.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
Cc: <stable@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/avmfritz.c  | 3 ++-
 drivers/isdn/hardware/mISDN/mISDNipac.c | 3 ++-
 drivers/isdn/hardware/mISDN/mISDNisar.c | 3 ++-
 drivers/isdn/hardware/mISDN/netjet.c    | 3 ++-
 drivers/isdn/hardware/mISDN/w6692.c     | 3 ++-
 drivers/isdn/mISDN/hwchannel.c          | 9 ++++-----
 include/linux/mISDNhw.h                 | 2 +-
 7 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c
index fa6ca473372..dceaec821b0 100644
--- a/drivers/isdn/hardware/mISDN/avmfritz.c
+++ b/drivers/isdn/hardware/mISDN/avmfritz.c
@@ -857,8 +857,9 @@ avm_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
 	switch (cmd) {
 	case CLOSE_CHANNEL:
 		test_and_clear_bit(FLG_OPEN, &bch->Flags);
+		cancel_work_sync(&bch->workq);
 		spin_lock_irqsave(&fc->lock, flags);
-		mISDN_freebchannel(bch);
+		mISDN_clear_bchannel(bch);
 		modehdlc(bch, ISDN_P_NONE);
 		spin_unlock_irqrestore(&fc->lock, flags);
 		ch->protocol = ISDN_P_NONE;
diff --git a/drivers/isdn/hardware/mISDN/mISDNipac.c b/drivers/isdn/hardware/mISDN/mISDNipac.c
index 752e0825591..ccd7d851be2 100644
--- a/drivers/isdn/hardware/mISDN/mISDNipac.c
+++ b/drivers/isdn/hardware/mISDN/mISDNipac.c
@@ -1406,8 +1406,9 @@ hscx_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
 	switch (cmd) {
 	case CLOSE_CHANNEL:
 		test_and_clear_bit(FLG_OPEN, &bch->Flags);
+		cancel_work_sync(&bch->workq);
 		spin_lock_irqsave(hx->ip->hwlock, flags);
-		mISDN_freebchannel(bch);
+		mISDN_clear_bchannel(bch);
 		hscx_mode(hx, ISDN_P_NONE);
 		spin_unlock_irqrestore(hx->ip->hwlock, flags);
 		ch->protocol = ISDN_P_NONE;
diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c
index be5973ded6d..182ecf0626c 100644
--- a/drivers/isdn/hardware/mISDN/mISDNisar.c
+++ b/drivers/isdn/hardware/mISDN/mISDNisar.c
@@ -1588,8 +1588,9 @@ isar_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
 	switch (cmd) {
 	case CLOSE_CHANNEL:
 		test_and_clear_bit(FLG_OPEN, &bch->Flags);
+		cancel_work_sync(&bch->workq);
 		spin_lock_irqsave(ich->is->hwlock, flags);
-		mISDN_freebchannel(bch);
+		mISDN_clear_bchannel(bch);
 		modeisar(ich, ISDN_P_NONE);
 		spin_unlock_irqrestore(ich->is->hwlock, flags);
 		ch->protocol = ISDN_P_NONE;
diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
index c3e3e768627..9bcade59eb7 100644
--- a/drivers/isdn/hardware/mISDN/netjet.c
+++ b/drivers/isdn/hardware/mISDN/netjet.c
@@ -812,8 +812,9 @@ nj_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
 	switch (cmd) {
 	case CLOSE_CHANNEL:
 		test_and_clear_bit(FLG_OPEN, &bch->Flags);
+		cancel_work_sync(&bch->workq);
 		spin_lock_irqsave(&card->lock, flags);
-		mISDN_freebchannel(bch);
+		mISDN_clear_bchannel(bch);
 		mode_tiger(bc, ISDN_P_NONE);
 		spin_unlock_irqrestore(&card->lock, flags);
 		ch->protocol = ISDN_P_NONE;
diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c
index 26a86b84609..335fe645500 100644
--- a/drivers/isdn/hardware/mISDN/w6692.c
+++ b/drivers/isdn/hardware/mISDN/w6692.c
@@ -1054,8 +1054,9 @@ w6692_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
 	switch (cmd) {
 	case CLOSE_CHANNEL:
 		test_and_clear_bit(FLG_OPEN, &bch->Flags);
+		cancel_work_sync(&bch->workq);
 		spin_lock_irqsave(&card->lock, flags);
-		mISDN_freebchannel(bch);
+		mISDN_clear_bchannel(bch);
 		w6692_mode(bc, ISDN_P_NONE);
 		spin_unlock_irqrestore(&card->lock, flags);
 		ch->protocol = ISDN_P_NONE;
diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c
index ef34fd40867..2602be23f34 100644
--- a/drivers/isdn/mISDN/hwchannel.c
+++ b/drivers/isdn/mISDN/hwchannel.c
@@ -148,17 +148,16 @@ mISDN_clear_bchannel(struct bchannel *ch)
 	ch->next_minlen = ch->init_minlen;
 	ch->maxlen = ch->init_maxlen;
 	ch->next_maxlen = ch->init_maxlen;
+	skb_queue_purge(&ch->rqueue);
+	ch->rcount = 0;
 }
 EXPORT_SYMBOL(mISDN_clear_bchannel);
 
-int
+void
 mISDN_freebchannel(struct bchannel *ch)
 {
+	cancel_work_sync(&ch->workq);
 	mISDN_clear_bchannel(ch);
-	skb_queue_purge(&ch->rqueue);
-	ch->rcount = 0;
-	flush_work_sync(&ch->workq);
-	return 0;
 }
 EXPORT_SYMBOL(mISDN_freebchannel);
 
diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h
index d0752eca9b4..9d96d5d4dfe 100644
--- a/include/linux/mISDNhw.h
+++ b/include/linux/mISDNhw.h
@@ -183,7 +183,7 @@ extern int	mISDN_initbchannel(struct bchannel *, unsigned short,
 				   unsigned short);
 extern int	mISDN_freedchannel(struct dchannel *);
 extern void	mISDN_clear_bchannel(struct bchannel *);
-extern int	mISDN_freebchannel(struct bchannel *);
+extern void	mISDN_freebchannel(struct bchannel *);
 extern int	mISDN_ctrl_bchannel(struct bchannel *, struct mISDN_ctrl_req *);
 extern void	queue_ch_frame(struct mISDNchannel *, u_int,
 			int, struct sk_buff *);
-- 
cgit v1.2.3-70-g09d2


From 0848c94fb4a5cc213a7fb0fb3a5721ad6e16f096 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 11 Sep 2012 15:16:36 +0800
Subject: mfd: core: Push irqdomain mapping out into devices

Currently the MFD core supports remapping MFD cell interrupts using an
irqdomain but only if the MFD is being instantiated using device tree
and only if the device tree bindings use the pattern of registering IPs
in the device tree with compatible properties.  This will be actively
harmful for drivers which support non-DT platforms and use this pattern
for their DT bindings as it will mean that the core will silently change
remapping behaviour and it is also limiting for drivers which don't do
DT with this particular pattern.  There is also a potential fragility if
there are interrupts not associated with MFD cells and all the cells are
omitted from the device tree for some reason.

Instead change the code to take an IRQ domain as an optional argument,
allowing drivers to take the decision about the parent domain for their
interrupts.  The one current user of this feature is ab8500-core, it has
the domain lookup pushed out into the driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/88pm800.c             |  5 +++--
 drivers/mfd/88pm805.c             |  3 ++-
 drivers/mfd/88pm860x-core.c       | 21 +++++++++++----------
 drivers/mfd/aat2870-core.c        |  2 +-
 drivers/mfd/ab3100-core.c         |  2 +-
 drivers/mfd/ab8500-core.c         | 10 +++++-----
 drivers/mfd/arizona-core.c        |  6 +++---
 drivers/mfd/asic3.c               |  6 +++---
 drivers/mfd/cs5535-mfd.c          |  2 +-
 drivers/mfd/da9052-core.c         |  2 +-
 drivers/mfd/davinci_voicecodec.c  |  2 +-
 drivers/mfd/db8500-prcmu.c        |  2 +-
 drivers/mfd/htc-pasic3.c          |  5 +++--
 drivers/mfd/intel_msic.c          |  4 ++--
 drivers/mfd/janz-cmodio.c         |  2 +-
 drivers/mfd/jz4740-adc.c          |  3 ++-
 drivers/mfd/lm3533-core.c         |  7 ++++---
 drivers/mfd/lpc_ich.c             |  4 ++--
 drivers/mfd/lpc_sch.c             |  6 ++++--
 drivers/mfd/max77686.c            |  2 +-
 drivers/mfd/max77693.c            |  2 +-
 drivers/mfd/max8925-core.c        | 12 ++++++------
 drivers/mfd/max8997.c             |  2 +-
 drivers/mfd/max8998.c             |  8 ++++----
 drivers/mfd/mc13xxx-core.c        |  2 +-
 drivers/mfd/mfd-core.c            | 12 ++++++------
 drivers/mfd/palmas.c              |  3 ++-
 drivers/mfd/rc5t583.c             |  2 +-
 drivers/mfd/rdc321x-southbridge.c |  3 ++-
 drivers/mfd/sec-core.c            |  8 ++++----
 drivers/mfd/sta2x11-mfd.c         |  4 ++--
 drivers/mfd/stmpe.c               |  2 +-
 drivers/mfd/t7l66xb.c             |  2 +-
 drivers/mfd/tc3589x.c             |  8 ++++----
 drivers/mfd/tc6387xb.c            |  2 +-
 drivers/mfd/tc6393xb.c            |  4 ++--
 drivers/mfd/ti-ssp.c              |  2 +-
 drivers/mfd/timberdale.c          | 12 ++++++------
 drivers/mfd/tps6105x.c            |  2 +-
 drivers/mfd/tps6507x.c            |  2 +-
 drivers/mfd/tps65090.c            |  2 +-
 drivers/mfd/tps65217.c            |  2 +-
 drivers/mfd/tps6586x.c            |  3 ++-
 drivers/mfd/tps65910.c            |  2 +-
 drivers/mfd/tps65912-core.c       |  2 +-
 drivers/mfd/twl4030-audio.c       |  2 +-
 drivers/mfd/twl6040-core.c        |  2 +-
 drivers/mfd/vx855.c               |  2 +-
 drivers/mfd/wl1273-core.c         |  2 +-
 drivers/mfd/wm831x-core.c         | 16 ++++++++--------
 drivers/mfd/wm8400-core.c         |  2 +-
 drivers/mfd/wm8994-core.c         |  4 ++--
 drivers/staging/nvec/nvec.c       |  2 +-
 include/linux/mfd/core.h          |  4 +++-
 54 files changed, 125 insertions(+), 112 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/88pm800.c b/drivers/mfd/88pm800.c
index b67a3018b13..ce229ea933d 100644
--- a/drivers/mfd/88pm800.c
+++ b/drivers/mfd/88pm800.c
@@ -470,7 +470,8 @@ static int __devinit device_800_init(struct pm80x_chip *chip,
 
 	ret =
 	    mfd_add_devices(chip->dev, 0, &onkey_devs[0],
-			    ARRAY_SIZE(onkey_devs), &onkey_resources[0], 0);
+			    ARRAY_SIZE(onkey_devs), &onkey_resources[0], 0,
+			    NULL);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to add onkey subdev\n");
 		goto out_dev;
@@ -481,7 +482,7 @@ static int __devinit device_800_init(struct pm80x_chip *chip,
 		rtc_devs[0].platform_data = pdata->rtc;
 		rtc_devs[0].pdata_size = sizeof(struct pm80x_rtc_pdata);
 		ret = mfd_add_devices(chip->dev, 0, &rtc_devs[0],
-				      ARRAY_SIZE(rtc_devs), NULL, 0);
+				      ARRAY_SIZE(rtc_devs), NULL, 0, NULL);
 		if (ret < 0) {
 			dev_err(chip->dev, "Failed to add rtc subdev\n");
 			goto out_dev;
diff --git a/drivers/mfd/88pm805.c b/drivers/mfd/88pm805.c
index 6146583589f..c20a31136f0 100644
--- a/drivers/mfd/88pm805.c
+++ b/drivers/mfd/88pm805.c
@@ -216,7 +216,8 @@ static int __devinit device_805_init(struct pm80x_chip *chip)
 	}
 
 	ret = mfd_add_devices(chip->dev, 0, &codec_devs[0],
-			      ARRAY_SIZE(codec_devs), &codec_resources[0], 0);
+			      ARRAY_SIZE(codec_devs), &codec_resources[0], 0,
+			      NULL);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to add codec subdev\n");
 		goto out_codec;
diff --git a/drivers/mfd/88pm860x-core.c b/drivers/mfd/88pm860x-core.c
index d09918cf1b1..b73f033b2c6 100644
--- a/drivers/mfd/88pm860x-core.c
+++ b/drivers/mfd/88pm860x-core.c
@@ -637,7 +637,7 @@ static void __devinit device_bk_init(struct pm860x_chip *chip,
 			bk_devs[i].resources = &bk_resources[j];
 			ret = mfd_add_devices(chip->dev, 0,
 					      &bk_devs[i], 1,
-					      &bk_resources[j], 0);
+					      &bk_resources[j], 0, NULL);
 			if (ret < 0) {
 				dev_err(chip->dev, "Failed to add "
 					"backlight subdev\n");
@@ -672,7 +672,7 @@ static void __devinit device_led_init(struct pm860x_chip *chip,
 			led_devs[i].resources = &led_resources[j],
 			ret = mfd_add_devices(chip->dev, 0,
 					      &led_devs[i], 1,
-					      &led_resources[j], 0);
+					      &led_resources[j], 0, NULL);
 			if (ret < 0) {
 				dev_err(chip->dev, "Failed to add "
 					"led subdev\n");
@@ -709,7 +709,7 @@ static void __devinit device_regulator_init(struct pm860x_chip *chip,
 		regulator_devs[i].resources = &regulator_resources[seq];
 
 		ret = mfd_add_devices(chip->dev, 0, &regulator_devs[i], 1,
-				      &regulator_resources[seq], 0);
+				      &regulator_resources[seq], 0, NULL);
 		if (ret < 0) {
 			dev_err(chip->dev, "Failed to add regulator subdev\n");
 			goto out;
@@ -733,7 +733,7 @@ static void __devinit device_rtc_init(struct pm860x_chip *chip,
 	rtc_devs[0].resources = &rtc_resources[0];
 	ret = mfd_add_devices(chip->dev, 0, &rtc_devs[0],
 			      ARRAY_SIZE(rtc_devs), &rtc_resources[0],
-			      chip->irq_base);
+			      chip->irq_base, NULL);
 	if (ret < 0)
 		dev_err(chip->dev, "Failed to add rtc subdev\n");
 }
@@ -752,7 +752,7 @@ static void __devinit device_touch_init(struct pm860x_chip *chip,
 	touch_devs[0].resources = &touch_resources[0];
 	ret = mfd_add_devices(chip->dev, 0, &touch_devs[0],
 			      ARRAY_SIZE(touch_devs), &touch_resources[0],
-			      chip->irq_base);
+			      chip->irq_base, NULL);
 	if (ret < 0)
 		dev_err(chip->dev, "Failed to add touch subdev\n");
 }
@@ -770,7 +770,7 @@ static void __devinit device_power_init(struct pm860x_chip *chip,
 	power_devs[0].num_resources = ARRAY_SIZE(battery_resources);
 	power_devs[0].resources = &battery_resources[0],
 	ret = mfd_add_devices(chip->dev, 0, &power_devs[0], 1,
-			      &battery_resources[0], chip->irq_base);
+			      &battery_resources[0], chip->irq_base, NULL);
 	if (ret < 0)
 		dev_err(chip->dev, "Failed to add battery subdev\n");
 
@@ -779,7 +779,7 @@ static void __devinit device_power_init(struct pm860x_chip *chip,
 	power_devs[1].num_resources = ARRAY_SIZE(charger_resources);
 	power_devs[1].resources = &charger_resources[0],
 	ret = mfd_add_devices(chip->dev, 0, &power_devs[1], 1,
-			      &charger_resources[0], chip->irq_base);
+			      &charger_resources[0], chip->irq_base, NULL);
 	if (ret < 0)
 		dev_err(chip->dev, "Failed to add charger subdev\n");
 
@@ -788,7 +788,7 @@ static void __devinit device_power_init(struct pm860x_chip *chip,
 	power_devs[2].num_resources = ARRAY_SIZE(preg_resources);
 	power_devs[2].resources = &preg_resources[0],
 	ret = mfd_add_devices(chip->dev, 0, &power_devs[2], 1,
-			      &preg_resources[0], chip->irq_base);
+			      &preg_resources[0], chip->irq_base, NULL);
 	if (ret < 0)
 		dev_err(chip->dev, "Failed to add preg subdev\n");
 }
@@ -802,7 +802,7 @@ static void __devinit device_onkey_init(struct pm860x_chip *chip,
 	onkey_devs[0].resources = &onkey_resources[0],
 	ret = mfd_add_devices(chip->dev, 0, &onkey_devs[0],
 			      ARRAY_SIZE(onkey_devs), &onkey_resources[0],
-			      chip->irq_base);
+			      chip->irq_base, NULL);
 	if (ret < 0)
 		dev_err(chip->dev, "Failed to add onkey subdev\n");
 }
@@ -815,7 +815,8 @@ static void __devinit device_codec_init(struct pm860x_chip *chip,
 	codec_devs[0].num_resources = ARRAY_SIZE(codec_resources);
 	codec_devs[0].resources = &codec_resources[0],
 	ret = mfd_add_devices(chip->dev, 0, &codec_devs[0],
-			      ARRAY_SIZE(codec_devs), &codec_resources[0], 0);
+			      ARRAY_SIZE(codec_devs), &codec_resources[0], 0,
+			      NULL);
 	if (ret < 0)
 		dev_err(chip->dev, "Failed to add codec subdev\n");
 }
diff --git a/drivers/mfd/aat2870-core.c b/drivers/mfd/aat2870-core.c
index 44a3fdbadef..f1beb4971f8 100644
--- a/drivers/mfd/aat2870-core.c
+++ b/drivers/mfd/aat2870-core.c
@@ -424,7 +424,7 @@ static int aat2870_i2c_probe(struct i2c_client *client,
 	}
 
 	ret = mfd_add_devices(aat2870->dev, 0, aat2870_devs,
-			      ARRAY_SIZE(aat2870_devs), NULL, 0);
+			      ARRAY_SIZE(aat2870_devs), NULL, 0, NULL);
 	if (ret != 0) {
 		dev_err(aat2870->dev, "Failed to add subdev: %d\n", ret);
 		goto out_disable;
diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index 78fca2902c8..01781ae5d0d 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -946,7 +946,7 @@ static int __devinit ab3100_probe(struct i2c_client *client,
 	}
 
 	err = mfd_add_devices(&client->dev, 0, ab3100_devs,
-		ARRAY_SIZE(ab3100_devs), NULL, 0);
+			      ARRAY_SIZE(ab3100_devs), NULL, 0, NULL);
 
 	ab3100_setup_debugfs(ab3100);
 
diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index 626b4ecaf64..47adf800024 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c
@@ -1418,25 +1418,25 @@ static int __devinit ab8500_probe(struct platform_device *pdev)
 
 	ret = mfd_add_devices(ab8500->dev, 0, abx500_common_devs,
 			ARRAY_SIZE(abx500_common_devs), NULL,
-			ab8500->irq_base);
+			ab8500->irq_base, ab8500->domain);
 	if (ret)
 		goto out_freeirq;
 
 	if (is_ab9540(ab8500))
 		ret = mfd_add_devices(ab8500->dev, 0, ab9540_devs,
 				ARRAY_SIZE(ab9540_devs), NULL,
-				ab8500->irq_base);
+				ab8500->irq_base, ab8500->domain);
 	else
 		ret = mfd_add_devices(ab8500->dev, 0, ab8500_devs,
 				ARRAY_SIZE(ab8500_devs), NULL,
-				ab8500->irq_base);
+				ab8500->irq_base, ab8500->domain);
 	if (ret)
 		goto out_freeirq;
 
 	if (is_ab9540(ab8500) || is_ab8505(ab8500))
 		ret = mfd_add_devices(ab8500->dev, 0, ab9540_ab8505_devs,
 				ARRAY_SIZE(ab9540_ab8505_devs), NULL,
-				ab8500->irq_base);
+				ab8500->irq_base, ab8500->domain);
 	if (ret)
 		goto out_freeirq;
 
@@ -1444,7 +1444,7 @@ static int __devinit ab8500_probe(struct platform_device *pdev)
 		/* Add battery management devices */
 		ret = mfd_add_devices(ab8500->dev, 0, ab8500_bm_devs,
 				      ARRAY_SIZE(ab8500_bm_devs), NULL,
-				      ab8500->irq_base);
+				      ab8500->irq_base, ab8500->domain);
 		if (ret)
 			dev_err(ab8500->dev, "error adding bm devices\n");
 	}
diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index c7983e86254..1b48f209480 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -316,7 +316,7 @@ int __devinit arizona_dev_init(struct arizona *arizona)
 	}
 
 	ret = mfd_add_devices(arizona->dev, -1, early_devs,
-			      ARRAY_SIZE(early_devs), NULL, 0);
+			      ARRAY_SIZE(early_devs), NULL, 0, NULL);
 	if (ret != 0) {
 		dev_err(dev, "Failed to add early children: %d\n", ret);
 		return ret;
@@ -516,11 +516,11 @@ int __devinit arizona_dev_init(struct arizona *arizona)
 	switch (arizona->type) {
 	case WM5102:
 		ret = mfd_add_devices(arizona->dev, -1, wm5102_devs,
-				      ARRAY_SIZE(wm5102_devs), NULL, 0);
+				      ARRAY_SIZE(wm5102_devs), NULL, 0, NULL);
 		break;
 	case WM5110:
 		ret = mfd_add_devices(arizona->dev, -1, wm5110_devs,
-				      ARRAY_SIZE(wm5102_devs), NULL, 0);
+				      ARRAY_SIZE(wm5102_devs), NULL, 0, NULL);
 		break;
 	}
 
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 683e18a2332..62f0883a763 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -913,14 +913,14 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 	if (pdata->clock_rate) {
 		ds1wm_pdata.clock_rate = pdata->clock_rate;
 		ret = mfd_add_devices(&pdev->dev, pdev->id,
-			&asic3_cell_ds1wm, 1, mem, asic->irq_base);
+			&asic3_cell_ds1wm, 1, mem, asic->irq_base, NULL);
 		if (ret < 0)
 			goto out;
 	}
 
 	if (mem_sdio && (irq >= 0)) {
 		ret = mfd_add_devices(&pdev->dev, pdev->id,
-			&asic3_cell_mmc, 1, mem_sdio, irq);
+			&asic3_cell_mmc, 1, mem_sdio, irq, NULL);
 		if (ret < 0)
 			goto out;
 	}
@@ -934,7 +934,7 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 			asic3_cell_leds[i].pdata_size = sizeof(pdata->leds[i]);
 		}
 		ret = mfd_add_devices(&pdev->dev, 0,
-			asic3_cell_leds, ASIC3_NUM_LEDS, NULL, 0);
+			asic3_cell_leds, ASIC3_NUM_LEDS, NULL, 0, NULL);
 	}
 
  out:
diff --git a/drivers/mfd/cs5535-mfd.c b/drivers/mfd/cs5535-mfd.c
index 3419e726de4..2b282133c72 100644
--- a/drivers/mfd/cs5535-mfd.c
+++ b/drivers/mfd/cs5535-mfd.c
@@ -149,7 +149,7 @@ static int __devinit cs5535_mfd_probe(struct pci_dev *pdev,
 	}
 
 	err = mfd_add_devices(&pdev->dev, -1, cs5535_mfd_cells,
-			ARRAY_SIZE(cs5535_mfd_cells), NULL, 0);
+			      ARRAY_SIZE(cs5535_mfd_cells), NULL, 0, NULL);
 	if (err) {
 		dev_err(&pdev->dev, "MFD add devices failed: %d\n", err);
 		goto err_disable;
diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index 2544910e1fd..a0a62b24621 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -803,7 +803,7 @@ int __devinit da9052_device_init(struct da9052 *da9052, u8 chip_id)
 		dev_err(da9052->dev, "DA9052 ADC IRQ failed ret=%d\n", ret);
 
 	ret = mfd_add_devices(da9052->dev, -1, da9052_subdev_info,
-			      ARRAY_SIZE(da9052_subdev_info), NULL, 0);
+			      ARRAY_SIZE(da9052_subdev_info), NULL, 0, NULL);
 	if (ret)
 		goto err;
 
diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c
index 4e2af2cb2d2..45e83a68641 100644
--- a/drivers/mfd/davinci_voicecodec.c
+++ b/drivers/mfd/davinci_voicecodec.c
@@ -129,7 +129,7 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
 	cell->pdata_size = sizeof(*davinci_vc);
 
 	ret = mfd_add_devices(&pdev->dev, pdev->id, davinci_vc->cells,
-			      DAVINCI_VC_CELLS, NULL, 0);
+			      DAVINCI_VC_CELLS, NULL, 0, NULL);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "fail to register client devices\n");
 		goto fail4;
diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 7040a008113..0e63cdd9b52 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -3010,7 +3010,7 @@ static int __devinit db8500_prcmu_probe(struct platform_device *pdev)
 		prcmu_config_esram0_deep_sleep(ESRAM0_DEEP_SLEEP_STATE_RET);
 
 	err = mfd_add_devices(&pdev->dev, 0, db8500_prcmu_devs,
-			ARRAY_SIZE(db8500_prcmu_devs), NULL, 0);
+			      ARRAY_SIZE(db8500_prcmu_devs), NULL, 0, NULL);
 	if (err) {
 		pr_err("prcmu: Failed to add subdevices\n");
 		return err;
diff --git a/drivers/mfd/htc-pasic3.c b/drivers/mfd/htc-pasic3.c
index 04c7093d649..9e5453d21a6 100644
--- a/drivers/mfd/htc-pasic3.c
+++ b/drivers/mfd/htc-pasic3.c
@@ -168,7 +168,7 @@ static int __init pasic3_probe(struct platform_device *pdev)
 		/* the first 5 PASIC3 registers control the DS1WM */
 		ds1wm_resources[0].end = (5 << asic->bus_shift) - 1;
 		ret = mfd_add_devices(&pdev->dev, pdev->id,
-				&ds1wm_cell, 1, r, irq);
+				      &ds1wm_cell, 1, r, irq, NULL);
 		if (ret < 0)
 			dev_warn(dev, "failed to register DS1WM\n");
 	}
@@ -176,7 +176,8 @@ static int __init pasic3_probe(struct platform_device *pdev)
 	if (pdata && pdata->led_pdata) {
 		led_cell.platform_data = pdata->led_pdata;
 		led_cell.pdata_size = sizeof(struct pasic3_leds_machinfo);
-		ret = mfd_add_devices(&pdev->dev, pdev->id, &led_cell, 1, r, 0);
+		ret = mfd_add_devices(&pdev->dev, pdev->id, &led_cell, 1, r,
+				      0, NULL);
 		if (ret < 0)
 			dev_warn(dev, "failed to register LED device\n");
 	}
diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c
index 59df5584cb5..266bdc5bd96 100644
--- a/drivers/mfd/intel_msic.c
+++ b/drivers/mfd/intel_msic.c
@@ -344,13 +344,13 @@ static int __devinit intel_msic_init_devices(struct intel_msic *msic)
 			continue;
 
 		ret = mfd_add_devices(&pdev->dev, -1, &msic_devs[i], 1, NULL,
-				      pdata->irq[i]);
+				      pdata->irq[i], NULL);
 		if (ret)
 			goto fail;
 	}
 
 	ret = mfd_add_devices(&pdev->dev, 0, msic_other_devs,
-			      ARRAY_SIZE(msic_other_devs), NULL, 0);
+			      ARRAY_SIZE(msic_other_devs), NULL, 0, NULL);
 	if (ret)
 		goto fail;
 
diff --git a/drivers/mfd/janz-cmodio.c b/drivers/mfd/janz-cmodio.c
index 2ea99989551..965c4801df8 100644
--- a/drivers/mfd/janz-cmodio.c
+++ b/drivers/mfd/janz-cmodio.c
@@ -147,7 +147,7 @@ static int __devinit cmodio_probe_submodules(struct cmodio_device *priv)
 	}
 
 	return mfd_add_devices(&pdev->dev, 0, priv->cells,
-			       num_probed, NULL, pdev->irq);
+			       num_probed, NULL, pdev->irq, NULL);
 }
 
 /*
diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index 87662a17dec..c6b6d7dda51 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c
@@ -287,7 +287,8 @@ static int __devinit jz4740_adc_probe(struct platform_device *pdev)
 	writeb(0xff, adc->base + JZ_REG_ADC_CTRL);
 
 	ret = mfd_add_devices(&pdev->dev, 0, jz4740_adc_cells,
-		ARRAY_SIZE(jz4740_adc_cells), mem_base, irq_base);
+			      ARRAY_SIZE(jz4740_adc_cells), mem_base,
+			      irq_base, NULL);
 	if (ret < 0)
 		goto err_clk_put;
 
diff --git a/drivers/mfd/lm3533-core.c b/drivers/mfd/lm3533-core.c
index 0b2879b87fd..24212f45b20 100644
--- a/drivers/mfd/lm3533-core.c
+++ b/drivers/mfd/lm3533-core.c
@@ -393,7 +393,8 @@ static int __devinit lm3533_device_als_init(struct lm3533 *lm3533)
 	lm3533_als_devs[0].platform_data = pdata->als;
 	lm3533_als_devs[0].pdata_size = sizeof(*pdata->als);
 
-	ret = mfd_add_devices(lm3533->dev, 0, lm3533_als_devs, 1, NULL, 0);
+	ret = mfd_add_devices(lm3533->dev, 0, lm3533_als_devs, 1, NULL,
+			      0, NULL);
 	if (ret) {
 		dev_err(lm3533->dev, "failed to add ALS device\n");
 		return ret;
@@ -422,7 +423,7 @@ static int __devinit lm3533_device_bl_init(struct lm3533 *lm3533)
 	}
 
 	ret = mfd_add_devices(lm3533->dev, 0, lm3533_bl_devs,
-					pdata->num_backlights, NULL, 0);
+			      pdata->num_backlights, NULL, 0, NULL);
 	if (ret) {
 		dev_err(lm3533->dev, "failed to add backlight devices\n");
 		return ret;
@@ -451,7 +452,7 @@ static int __devinit lm3533_device_led_init(struct lm3533 *lm3533)
 	}
 
 	ret = mfd_add_devices(lm3533->dev, 0, lm3533_led_devs,
-						pdata->num_leds, NULL, 0);
+			      pdata->num_leds, NULL, 0, NULL);
 	if (ret) {
 		dev_err(lm3533->dev, "failed to add LED devices\n");
 		return ret;
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index a05fdfc2ebc..092ad4b44b6 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -750,7 +750,7 @@ gpe0_done:
 
 	lpc_ich_finalize_cell(&lpc_ich_cells[LPC_GPIO], id);
 	ret = mfd_add_devices(&dev->dev, -1, &lpc_ich_cells[LPC_GPIO],
-				1, NULL, 0);
+			      1, NULL, 0, NULL);
 
 gpio_done:
 	if (acpi_conflict)
@@ -807,7 +807,7 @@ static int __devinit lpc_ich_init_wdt(struct pci_dev *dev,
 
 	lpc_ich_finalize_cell(&lpc_ich_cells[LPC_WDT], id);
 	ret = mfd_add_devices(&dev->dev, -1, &lpc_ich_cells[LPC_WDT],
-				1, NULL, 0);
+			      1, NULL, 0, NULL);
 
 wdt_done:
 	return ret;
diff --git a/drivers/mfd/lpc_sch.c b/drivers/mfd/lpc_sch.c
index 9f20abc5e39..f6b9c5c96b2 100644
--- a/drivers/mfd/lpc_sch.c
+++ b/drivers/mfd/lpc_sch.c
@@ -127,7 +127,8 @@ static int __devinit lpc_sch_probe(struct pci_dev *dev,
 		lpc_sch_cells[i].id = id->device;
 
 	ret = mfd_add_devices(&dev->dev, 0,
-			lpc_sch_cells, ARRAY_SIZE(lpc_sch_cells), NULL, 0);
+			      lpc_sch_cells, ARRAY_SIZE(lpc_sch_cells), NULL,
+			      0, NULL);
 	if (ret)
 		goto out_dev;
 
@@ -153,7 +154,8 @@ static int __devinit lpc_sch_probe(struct pci_dev *dev,
 			tunnelcreek_cells[i].id = id->device;
 
 		ret = mfd_add_devices(&dev->dev, 0, tunnelcreek_cells,
-			ARRAY_SIZE(tunnelcreek_cells), NULL, 0);
+				      ARRAY_SIZE(tunnelcreek_cells), NULL,
+				      0, NULL);
 	}
 
 	return ret;
diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index c03e12b5192..d9e24c849a0 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c
@@ -126,7 +126,7 @@ static int max77686_i2c_probe(struct i2c_client *i2c,
 	max77686_irq_init(max77686);
 
 	ret = mfd_add_devices(max77686->dev, -1, max77686_devs,
-			      ARRAY_SIZE(max77686_devs), NULL, 0);
+			      ARRAY_SIZE(max77686_devs), NULL, 0, NULL);
 
 	if (ret < 0)
 		goto err_mfd;
diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index a1811cb50ec..4fdd03d2853 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -159,7 +159,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 	pm_runtime_set_active(max77693->dev);
 
 	ret = mfd_add_devices(max77693->dev, -1, max77693_devs,
-			ARRAY_SIZE(max77693_devs), NULL, 0);
+			      ARRAY_SIZE(max77693_devs), NULL, 0, NULL);
 	if (ret < 0)
 		goto err_mfd;
 
diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c
index 825a7f06d9b..ee53757beca 100644
--- a/drivers/mfd/max8925-core.c
+++ b/drivers/mfd/max8925-core.c
@@ -598,7 +598,7 @@ int __devinit max8925_device_init(struct max8925_chip *chip,
 
 	ret = mfd_add_devices(chip->dev, 0, &rtc_devs[0],
 			      ARRAY_SIZE(rtc_devs),
-			      &rtc_resources[0], chip->irq_base);
+			      &rtc_resources[0], chip->irq_base, NULL);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to add rtc subdev\n");
 		goto out;
@@ -606,7 +606,7 @@ int __devinit max8925_device_init(struct max8925_chip *chip,
 
 	ret = mfd_add_devices(chip->dev, 0, &onkey_devs[0],
 			      ARRAY_SIZE(onkey_devs),
-			      &onkey_resources[0], 0);
+			      &onkey_resources[0], 0, NULL);
 	if (ret < 0) {
 		dev_err(chip->dev, "Failed to add onkey subdev\n");
 		goto out_dev;
@@ -615,7 +615,7 @@ int __devinit max8925_device_init(struct max8925_chip *chip,
 	if (pdata) {
 		ret = mfd_add_devices(chip->dev, 0, &regulator_devs[0],
 				      ARRAY_SIZE(regulator_devs),
-				      &regulator_resources[0], 0);
+				      &regulator_resources[0], 0, NULL);
 		if (ret < 0) {
 			dev_err(chip->dev, "Failed to add regulator subdev\n");
 			goto out_dev;
@@ -625,7 +625,7 @@ int __devinit max8925_device_init(struct max8925_chip *chip,
 	if (pdata && pdata->backlight) {
 		ret = mfd_add_devices(chip->dev, 0, &backlight_devs[0],
 				      ARRAY_SIZE(backlight_devs),
-				      &backlight_resources[0], 0);
+				      &backlight_resources[0], 0, NULL);
 		if (ret < 0) {
 			dev_err(chip->dev, "Failed to add backlight subdev\n");
 			goto out_dev;
@@ -635,7 +635,7 @@ int __devinit max8925_device_init(struct max8925_chip *chip,
 	if (pdata && pdata->power) {
 		ret = mfd_add_devices(chip->dev, 0, &power_devs[0],
 					ARRAY_SIZE(power_devs),
-					&power_supply_resources[0], 0);
+				      &power_supply_resources[0], 0, NULL);
 		if (ret < 0) {
 			dev_err(chip->dev, "Failed to add power supply "
 				"subdev\n");
@@ -646,7 +646,7 @@ int __devinit max8925_device_init(struct max8925_chip *chip,
 	if (pdata && pdata->touch) {
 		ret = mfd_add_devices(chip->dev, 0, &touch_devs[0],
 				      ARRAY_SIZE(touch_devs),
-				      &touch_resources[0], 0);
+				      &touch_resources[0], 0, NULL);
 		if (ret < 0) {
 			dev_err(chip->dev, "Failed to add touch subdev\n");
 			goto out_dev;
diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c
index 10b629c245b..f123517065e 100644
--- a/drivers/mfd/max8997.c
+++ b/drivers/mfd/max8997.c
@@ -160,7 +160,7 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
 
 	mfd_add_devices(max8997->dev, -1, max8997_devs,
 			ARRAY_SIZE(max8997_devs),
-			NULL, 0);
+			NULL, 0, NULL);
 
 	/*
 	 * TODO: enable others (flash, muic, rtc, battery, ...) and
diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c
index 6ef56d28c05..d7218cc9094 100644
--- a/drivers/mfd/max8998.c
+++ b/drivers/mfd/max8998.c
@@ -161,13 +161,13 @@ static int max8998_i2c_probe(struct i2c_client *i2c,
 	switch (id->driver_data) {
 	case TYPE_LP3974:
 		ret = mfd_add_devices(max8998->dev, -1,
-				lp3974_devs, ARRAY_SIZE(lp3974_devs),
-				NULL, 0);
+				      lp3974_devs, ARRAY_SIZE(lp3974_devs),
+				      NULL, 0, NULL);
 		break;
 	case TYPE_MAX8998:
 		ret = mfd_add_devices(max8998->dev, -1,
-				max8998_devs, ARRAY_SIZE(max8998_devs),
-				NULL, 0);
+				      max8998_devs, ARRAY_SIZE(max8998_devs),
+				      NULL, 0, NULL);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index b801dc72f04..1ec79b54bd2 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c
@@ -612,7 +612,7 @@ static int mc13xxx_add_subdevice_pdata(struct mc13xxx *mc13xxx,
 	if (!cell.name)
 		return -ENOMEM;
 
-	return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0);
+	return mfd_add_devices(mc13xxx->dev, -1, &cell, 1, NULL, 0, NULL);
 }
 
 static int mc13xxx_add_subdevice(struct mc13xxx *mc13xxx, const char *format)
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 0c3a01cde2f..f8b77711ad2 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -74,12 +74,11 @@ static int mfd_platform_add_cell(struct platform_device *pdev,
 static int mfd_add_device(struct device *parent, int id,
 			  const struct mfd_cell *cell,
 			  struct resource *mem_base,
-			  int irq_base)
+			  int irq_base, struct irq_domain *domain)
 {
 	struct resource *res;
 	struct platform_device *pdev;
 	struct device_node *np = NULL;
-	struct irq_domain *domain = NULL;
 	int ret = -ENOMEM;
 	int r;
 
@@ -97,7 +96,6 @@ static int mfd_add_device(struct device *parent, int id,
 		for_each_child_of_node(parent->of_node, np) {
 			if (of_device_is_compatible(np, cell->of_compatible)) {
 				pdev->dev.of_node = np;
-				domain = irq_find_host(parent->of_node);
 				break;
 			}
 		}
@@ -177,7 +175,7 @@ fail_alloc:
 int mfd_add_devices(struct device *parent, int id,
 		    struct mfd_cell *cells, int n_devs,
 		    struct resource *mem_base,
-		    int irq_base)
+		    int irq_base, struct irq_domain *domain)
 {
 	int i;
 	int ret = 0;
@@ -191,7 +189,8 @@ int mfd_add_devices(struct device *parent, int id,
 	for (i = 0; i < n_devs; i++) {
 		atomic_set(&cnts[i], 0);
 		cells[i].usage_count = &cnts[i];
-		ret = mfd_add_device(parent, id, cells + i, mem_base, irq_base);
+		ret = mfd_add_device(parent, id, cells + i, mem_base,
+				     irq_base, domain);
 		if (ret)
 			break;
 	}
@@ -247,7 +246,8 @@ int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones)
 	for (i = 0; i < n_clones; i++) {
 		cell_entry.name = clones[i];
 		/* don't give up if a single call fails; just report error */
-		if (mfd_add_device(pdev->dev.parent, -1, &cell_entry, NULL, 0))
+		if (mfd_add_device(pdev->dev.parent, -1, &cell_entry, NULL, 0,
+				   NULL))
 			dev_err(dev, "failed to create platform device '%s'\n",
 					clones[i]);
 	}
diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c
index c4a69f193a1..a345f9bb7b4 100644
--- a/drivers/mfd/palmas.c
+++ b/drivers/mfd/palmas.c
@@ -453,7 +453,8 @@ static int __devinit palmas_i2c_probe(struct i2c_client *i2c,
 
 	ret = mfd_add_devices(palmas->dev, -1,
 			      children, ARRAY_SIZE(palmas_children),
-			      NULL, regmap_irq_chip_get_base(palmas->irq_data));
+			      NULL, regmap_irq_chip_get_base(palmas->irq_data),
+			      NULL);
 	kfree(children);
 
 	if (ret < 0)
diff --git a/drivers/mfd/rc5t583.c b/drivers/mfd/rc5t583.c
index cdc1df7fa0e..3a8fa88567b 100644
--- a/drivers/mfd/rc5t583.c
+++ b/drivers/mfd/rc5t583.c
@@ -289,7 +289,7 @@ static int __devinit rc5t583_i2c_probe(struct i2c_client *i2c,
 	}
 
 	ret = mfd_add_devices(rc5t583->dev, -1, rc5t583_subdevs,
-			ARRAY_SIZE(rc5t583_subdevs), NULL, 0);
+			      ARRAY_SIZE(rc5t583_subdevs), NULL, 0, NULL);
 	if (ret) {
 		dev_err(&i2c->dev, "add mfd devices failed: %d\n", ret);
 		goto err_add_devs;
diff --git a/drivers/mfd/rdc321x-southbridge.c b/drivers/mfd/rdc321x-southbridge.c
index 685d61e431a..0f70dce6116 100644
--- a/drivers/mfd/rdc321x-southbridge.c
+++ b/drivers/mfd/rdc321x-southbridge.c
@@ -87,7 +87,8 @@ static int __devinit rdc321x_sb_probe(struct pci_dev *pdev,
 	rdc321x_wdt_pdata.sb_pdev = pdev;
 
 	return mfd_add_devices(&pdev->dev, -1,
-		rdc321x_sb_cells, ARRAY_SIZE(rdc321x_sb_cells), NULL, 0);
+			       rdc321x_sb_cells, ARRAY_SIZE(rdc321x_sb_cells),
+			       NULL, 0, NULL);
 }
 
 static void __devexit rdc321x_sb_remove(struct pci_dev *pdev)
diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index 2988efde11e..49d361a618d 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -141,19 +141,19 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 	switch (sec_pmic->device_type) {
 	case S5M8751X:
 		ret = mfd_add_devices(sec_pmic->dev, -1, s5m8751_devs,
-					ARRAY_SIZE(s5m8751_devs), NULL, 0);
+				      ARRAY_SIZE(s5m8751_devs), NULL, 0, NULL);
 		break;
 	case S5M8763X:
 		ret = mfd_add_devices(sec_pmic->dev, -1, s5m8763_devs,
-					ARRAY_SIZE(s5m8763_devs), NULL, 0);
+				      ARRAY_SIZE(s5m8763_devs), NULL, 0, NULL);
 		break;
 	case S5M8767X:
 		ret = mfd_add_devices(sec_pmic->dev, -1, s5m8767_devs,
-					ARRAY_SIZE(s5m8767_devs), NULL, 0);
+				      ARRAY_SIZE(s5m8767_devs), NULL, 0, NULL);
 		break;
 	case S2MPS11X:
 		ret = mfd_add_devices(sec_pmic->dev, -1, s2mps11_devs,
-					ARRAY_SIZE(s2mps11_devs), NULL, 0);
+				      ARRAY_SIZE(s2mps11_devs), NULL, 0, NULL);
 		break;
 	default:
 		/* If this happens the probe function is problem */
diff --git a/drivers/mfd/sta2x11-mfd.c b/drivers/mfd/sta2x11-mfd.c
index d31fed07aef..d35da6820be 100644
--- a/drivers/mfd/sta2x11-mfd.c
+++ b/drivers/mfd/sta2x11-mfd.c
@@ -407,7 +407,7 @@ static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
 			      sta2x11_mfd_bar0,
 			      ARRAY_SIZE(sta2x11_mfd_bar0),
 			      &pdev->resource[0],
-			      0);
+			      0, NULL);
 	if (err) {
 		dev_err(&pdev->dev, "mfd_add_devices[0] failed: %d\n", err);
 		goto err_disable;
@@ -417,7 +417,7 @@ static int __devinit sta2x11_mfd_probe(struct pci_dev *pdev,
 			      sta2x11_mfd_bar1,
 			      ARRAY_SIZE(sta2x11_mfd_bar1),
 			      &pdev->resource[1],
-			      0);
+			      0, NULL);
 	if (err) {
 		dev_err(&pdev->dev, "mfd_add_devices[1] failed: %d\n", err);
 		goto err_disable;
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 2dd8d49cb30..c94f521f392 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -962,7 +962,7 @@ static int __devinit stmpe_add_device(struct stmpe *stmpe,
 				      struct mfd_cell *cell, int irq)
 {
 	return mfd_add_devices(stmpe->dev, stmpe->pdata->id, cell, 1,
-			       NULL, stmpe->irq_base + irq);
+			       NULL, stmpe->irq_base + irq, NULL);
 }
 
 static int __devinit stmpe_devices_init(struct stmpe *stmpe)
diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 2d9e8799e73..b32940ec903 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -388,7 +388,7 @@ static int t7l66xb_probe(struct platform_device *dev)
 
 	ret = mfd_add_devices(&dev->dev, dev->id,
 			      t7l66xb_cells, ARRAY_SIZE(t7l66xb_cells),
-			      iomem, t7l66xb->irq_base);
+			      iomem, t7l66xb->irq_base, NULL);
 
 	if (!ret)
 		return 0;
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index 048bf0532a0..b56ba6b4329 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -262,8 +262,8 @@ static int __devinit tc3589x_device_init(struct tc3589x *tc3589x)
 
 	if (blocks & TC3589x_BLOCK_GPIO) {
 		ret = mfd_add_devices(tc3589x->dev, -1, tc3589x_dev_gpio,
-				ARRAY_SIZE(tc3589x_dev_gpio), NULL,
-				tc3589x->irq_base);
+				      ARRAY_SIZE(tc3589x_dev_gpio), NULL,
+				      tc3589x->irq_base, NULL);
 		if (ret) {
 			dev_err(tc3589x->dev, "failed to add gpio child\n");
 			return ret;
@@ -273,8 +273,8 @@ static int __devinit tc3589x_device_init(struct tc3589x *tc3589x)
 
 	if (blocks & TC3589x_BLOCK_KEYPAD) {
 		ret = mfd_add_devices(tc3589x->dev, -1, tc3589x_dev_keypad,
-				ARRAY_SIZE(tc3589x_dev_keypad), NULL,
-				tc3589x->irq_base);
+				      ARRAY_SIZE(tc3589x_dev_keypad), NULL,
+				      tc3589x->irq_base, NULL);
 		if (ret) {
 			dev_err(tc3589x->dev, "failed to keypad child\n");
 			return ret;
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index d20a284ad4b..413c891102f 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -192,7 +192,7 @@ static int __devinit tc6387xb_probe(struct platform_device *dev)
 	printk(KERN_INFO "Toshiba tc6387xb initialised\n");
 
 	ret = mfd_add_devices(&dev->dev, dev->id, tc6387xb_cells,
-			      ARRAY_SIZE(tc6387xb_cells), iomem, irq);
+			      ARRAY_SIZE(tc6387xb_cells), iomem, irq, NULL);
 
 	if (!ret)
 		return 0;
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 9612264f0e6..dcab026fcbb 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -700,8 +700,8 @@ static int __devinit tc6393xb_probe(struct platform_device *dev)
 	tc6393xb_cells[TC6393XB_CELL_FB].pdata_size = sizeof(*tcpd->fb_data);
 
 	ret = mfd_add_devices(&dev->dev, dev->id,
-			tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
-			iomem, tcpd->irq_base);
+			      tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells),
+			      iomem, tcpd->irq_base, NULL);
 
 	if (!ret)
 		return 0;
diff --git a/drivers/mfd/ti-ssp.c b/drivers/mfd/ti-ssp.c
index 4fb0e6c8e8f..7c3675a74f9 100644
--- a/drivers/mfd/ti-ssp.c
+++ b/drivers/mfd/ti-ssp.c
@@ -412,7 +412,7 @@ static int __devinit ti_ssp_probe(struct platform_device *pdev)
 		cells[id].data_size	= data->pdata_size;
 	}
 
-	error = mfd_add_devices(dev, 0, cells, 2, NULL, 0);
+	error = mfd_add_devices(dev, 0, cells, 2, NULL, 0, NULL);
 	if (error < 0) {
 		dev_err(dev, "cannot add mfd cells\n");
 		goto error_enable;
diff --git a/drivers/mfd/timberdale.c b/drivers/mfd/timberdale.c
index a447f4ec11f..cccc626c83c 100644
--- a/drivers/mfd/timberdale.c
+++ b/drivers/mfd/timberdale.c
@@ -757,25 +757,25 @@ static int __devinit timb_probe(struct pci_dev *dev,
 		err = mfd_add_devices(&dev->dev, -1,
 			timberdale_cells_bar0_cfg0,
 			ARRAY_SIZE(timberdale_cells_bar0_cfg0),
-			&dev->resource[0], msix_entries[0].vector);
+			&dev->resource[0], msix_entries[0].vector, NULL);
 		break;
 	case TIMB_HW_VER1:
 		err = mfd_add_devices(&dev->dev, -1,
 			timberdale_cells_bar0_cfg1,
 			ARRAY_SIZE(timberdale_cells_bar0_cfg1),
-			&dev->resource[0], msix_entries[0].vector);
+			&dev->resource[0], msix_entries[0].vector, NULL);
 		break;
 	case TIMB_HW_VER2:
 		err = mfd_add_devices(&dev->dev, -1,
 			timberdale_cells_bar0_cfg2,
 			ARRAY_SIZE(timberdale_cells_bar0_cfg2),
-			&dev->resource[0], msix_entries[0].vector);
+			&dev->resource[0], msix_entries[0].vector, NULL);
 		break;
 	case TIMB_HW_VER3:
 		err = mfd_add_devices(&dev->dev, -1,
 			timberdale_cells_bar0_cfg3,
 			ARRAY_SIZE(timberdale_cells_bar0_cfg3),
-			&dev->resource[0], msix_entries[0].vector);
+			&dev->resource[0], msix_entries[0].vector, NULL);
 		break;
 	default:
 		dev_err(&dev->dev, "Uknown IP setup: %d.%d.%d\n",
@@ -792,7 +792,7 @@ static int __devinit timb_probe(struct pci_dev *dev,
 
 	err = mfd_add_devices(&dev->dev, 0,
 		timberdale_cells_bar1, ARRAY_SIZE(timberdale_cells_bar1),
-		&dev->resource[1], msix_entries[0].vector);
+		&dev->resource[1], msix_entries[0].vector, NULL);
 	if (err) {
 		dev_err(&dev->dev, "mfd_add_devices failed: %d\n", err);
 		goto err_mfd2;
@@ -803,7 +803,7 @@ static int __devinit timb_probe(struct pci_dev *dev,
 		((priv->fw.config & TIMB_HW_VER_MASK) == TIMB_HW_VER3)) {
 		err = mfd_add_devices(&dev->dev, 1, timberdale_cells_bar2,
 			ARRAY_SIZE(timberdale_cells_bar2),
-			&dev->resource[2], msix_entries[0].vector);
+			&dev->resource[2], msix_entries[0].vector, NULL);
 		if (err) {
 			dev_err(&dev->dev, "mfd_add_devices failed: %d\n", err);
 			goto err_mfd2;
diff --git a/drivers/mfd/tps6105x.c b/drivers/mfd/tps6105x.c
index a293b978e27..14051bdc714 100644
--- a/drivers/mfd/tps6105x.c
+++ b/drivers/mfd/tps6105x.c
@@ -188,7 +188,7 @@ static int __devinit tps6105x_probe(struct i2c_client *client,
 	}
 
 	ret = mfd_add_devices(&client->dev, 0, tps6105x_cells,
-		ARRAY_SIZE(tps6105x_cells), NULL, 0);
+			      ARRAY_SIZE(tps6105x_cells), NULL, 0, NULL);
 	if (ret)
 		goto fail;
 
diff --git a/drivers/mfd/tps6507x.c b/drivers/mfd/tps6507x.c
index 33ba7723c96..1b203499c74 100644
--- a/drivers/mfd/tps6507x.c
+++ b/drivers/mfd/tps6507x.c
@@ -100,7 +100,7 @@ static int tps6507x_i2c_probe(struct i2c_client *i2c,
 
 	ret = mfd_add_devices(tps6507x->dev, -1,
 			      tps6507x_devs, ARRAY_SIZE(tps6507x_devs),
-			      NULL, 0);
+			      NULL, 0, NULL);
 
 	if (ret < 0)
 		goto err;
diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index 80e24f4b47b..50fd87c87a1 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -292,7 +292,7 @@ static int __devinit tps65090_i2c_probe(struct i2c_client *client,
 	}
 
 	ret = mfd_add_devices(tps65090->dev, -1, tps65090s,
-		ARRAY_SIZE(tps65090s), NULL, 0);
+			      ARRAY_SIZE(tps65090s), NULL, 0, NULL);
 	if (ret) {
 		dev_err(&client->dev, "add mfd devices failed with err: %d\n",
 			ret);
diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index 3bc274409b5..a95e9421b73 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -191,7 +191,7 @@ static int __devinit tps65217_probe(struct i2c_client *client,
 	}
 
 	ret = mfd_add_devices(tps->dev, -1, tps65217s,
-					ARRAY_SIZE(tps65217s), NULL, 0);
+			      ARRAY_SIZE(tps65217s), NULL, 0, NULL);
 	if (ret < 0) {
 		dev_err(tps->dev, "mfd_add_devices failed: %d\n", ret);
 		return ret;
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 353c3481212..5f58370ccf5 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -493,7 +493,8 @@ static int __devinit tps6586x_i2c_probe(struct i2c_client *client,
 	}
 
 	ret = mfd_add_devices(tps6586x->dev, -1,
-			tps6586x_cell, ARRAY_SIZE(tps6586x_cell), NULL, 0);
+			      tps6586x_cell, ARRAY_SIZE(tps6586x_cell),
+			      NULL, 0, NULL);
 	if (ret < 0) {
 		dev_err(&client->dev, "mfd_add_devices failed: %d\n", ret);
 		goto err_mfd_add;
diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index 1c563792c77..d3ce4d569de 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c
@@ -254,7 +254,7 @@ static __devinit int tps65910_i2c_probe(struct i2c_client *i2c,
 
 	ret = mfd_add_devices(tps65910->dev, -1,
 			      tps65910s, ARRAY_SIZE(tps65910s),
-			      NULL, 0);
+			      NULL, 0, NULL);
 	if (ret < 0) {
 		dev_err(&i2c->dev, "mfd_add_devices failed: %d\n", ret);
 		return ret;
diff --git a/drivers/mfd/tps65912-core.c b/drivers/mfd/tps65912-core.c
index 74fd8cb5f37..4658b5bdcd8 100644
--- a/drivers/mfd/tps65912-core.c
+++ b/drivers/mfd/tps65912-core.c
@@ -146,7 +146,7 @@ int tps65912_device_init(struct tps65912 *tps65912)
 
 	ret = mfd_add_devices(tps65912->dev, -1,
 			      tps65912s, ARRAY_SIZE(tps65912s),
-			      NULL, 0);
+			      NULL, 0, NULL);
 	if (ret < 0)
 		goto err;
 
diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index 838ce4eb444..77c9acb1458 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c
@@ -223,7 +223,7 @@ static int __devinit twl4030_audio_probe(struct platform_device *pdev)
 
 	if (childs)
 		ret = mfd_add_devices(&pdev->dev, pdev->id, audio->cells,
-				      childs, NULL, 0);
+				      childs, NULL, 0, NULL);
 	else {
 		dev_err(&pdev->dev, "No platform data found for childs\n");
 		ret = -ENODEV;
diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c
index b0fad0ffca5..3dca5c195a2 100644
--- a/drivers/mfd/twl6040-core.c
+++ b/drivers/mfd/twl6040-core.c
@@ -632,7 +632,7 @@ static int __devinit twl6040_probe(struct i2c_client *client,
 	}
 
 	ret = mfd_add_devices(&client->dev, -1, twl6040->cells, children,
-			      NULL, 0);
+			      NULL, 0, NULL);
 	if (ret)
 		goto mfd_err;
 
diff --git a/drivers/mfd/vx855.c b/drivers/mfd/vx855.c
index 872aff21e4b..b9a636d44c7 100644
--- a/drivers/mfd/vx855.c
+++ b/drivers/mfd/vx855.c
@@ -102,7 +102,7 @@ static __devinit int vx855_probe(struct pci_dev *pdev,
 	vx855_gpio_resources[1].end = vx855_gpio_resources[1].start + 3;
 
 	ret = mfd_add_devices(&pdev->dev, -1, vx855_cells, ARRAY_SIZE(vx855_cells),
-			NULL, 0);
+			NULL, 0, NULL);
 
 	/* we always return -ENODEV here in order to enable other
 	 * drivers like old, not-yet-platform_device ported i2c-viapro */
diff --git a/drivers/mfd/wl1273-core.c b/drivers/mfd/wl1273-core.c
index f39b756df56..86e0e4309fc 100644
--- a/drivers/mfd/wl1273-core.c
+++ b/drivers/mfd/wl1273-core.c
@@ -241,7 +241,7 @@ static int __devinit wl1273_core_probe(struct i2c_client *client,
 		__func__, children);
 
 	r = mfd_add_devices(&client->dev, -1, core->cells,
-			    children, NULL, 0);
+			    children, NULL, 0, NULL);
 	if (r)
 		goto err;
 
diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 946698fd2dc..30173103594 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1813,27 +1813,27 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	case WM8310:
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      wm8310_devs, ARRAY_SIZE(wm8310_devs),
-				      NULL, 0);
+				      NULL, 0, NULL);
 		break;
 
 	case WM8311:
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      wm8311_devs, ARRAY_SIZE(wm8311_devs),
-				      NULL, 0);
+				      NULL, 0, NULL);
 		if (!pdata || !pdata->disable_touch)
 			mfd_add_devices(wm831x->dev, wm831x_num,
 					touch_devs, ARRAY_SIZE(touch_devs),
-					NULL, 0);
+					NULL, 0, NULL);
 		break;
 
 	case WM8312:
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      wm8312_devs, ARRAY_SIZE(wm8312_devs),
-				      NULL, 0);
+				      NULL, 0, NULL);
 		if (!pdata || !pdata->disable_touch)
 			mfd_add_devices(wm831x->dev, wm831x_num,
 					touch_devs, ARRAY_SIZE(touch_devs),
-					NULL, 0);
+					NULL, 0, NULL);
 		break;
 
 	case WM8320:
@@ -1842,7 +1842,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	case WM8326:
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      wm8320_devs, ARRAY_SIZE(wm8320_devs),
-				      NULL, 0);
+				      NULL, 0, NULL);
 		break;
 
 	default:
@@ -1867,7 +1867,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 	if (ret & WM831X_XTAL_ENA) {
 		ret = mfd_add_devices(wm831x->dev, wm831x_num,
 				      rtc_devs, ARRAY_SIZE(rtc_devs),
-				      NULL, 0);
+				      NULL, 0, NULL);
 		if (ret != 0) {
 			dev_err(wm831x->dev, "Failed to add RTC: %d\n", ret);
 			goto err_irq;
@@ -1880,7 +1880,7 @@ int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 		/* Treat errors as non-critical */
 		ret = mfd_add_devices(wm831x->dev, wm831x_num, backlight_devs,
 				      ARRAY_SIZE(backlight_devs), NULL,
-				      0);
+				      0, NULL);
 		if (ret < 0)
 			dev_err(wm831x->dev, "Failed to add backlight: %d\n",
 				ret);
diff --git a/drivers/mfd/wm8400-core.c b/drivers/mfd/wm8400-core.c
index 4b7d378551d..639ca359242 100644
--- a/drivers/mfd/wm8400-core.c
+++ b/drivers/mfd/wm8400-core.c
@@ -70,7 +70,7 @@ static int wm8400_register_codec(struct wm8400 *wm8400)
 		.pdata_size = sizeof(*wm8400),
 	};
 
-	return mfd_add_devices(wm8400->dev, -1, &cell, 1, NULL, 0);
+	return mfd_add_devices(wm8400->dev, -1, &cell, 1, NULL, 0, NULL);
 }
 
 /*
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index eec74aa55fd..2febf88cfce 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -414,7 +414,7 @@ static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq)
 	ret = mfd_add_devices(wm8994->dev, -1,
 			      wm8994_regulator_devs,
 			      ARRAY_SIZE(wm8994_regulator_devs),
-			      NULL, 0);
+			      NULL, 0, NULL);
 	if (ret != 0) {
 		dev_err(wm8994->dev, "Failed to add children: %d\n", ret);
 		goto err;
@@ -648,7 +648,7 @@ static __devinit int wm8994_device_init(struct wm8994 *wm8994, int irq)
 
 	ret = mfd_add_devices(wm8994->dev, -1,
 			      wm8994_devs, ARRAY_SIZE(wm8994_devs),
-			      NULL, 0);
+			      NULL, 0, NULL);
 	if (ret != 0) {
 		dev_err(wm8994->dev, "Failed to add children: %d\n", ret);
 		goto err_irq;
diff --git a/drivers/staging/nvec/nvec.c b/drivers/staging/nvec/nvec.c
index 695ea35f75b..d0a7e408efe 100644
--- a/drivers/staging/nvec/nvec.c
+++ b/drivers/staging/nvec/nvec.c
@@ -837,7 +837,7 @@ static int __devinit tegra_nvec_probe(struct platform_device *pdev)
 	}
 
 	ret = mfd_add_devices(nvec->dev, -1, nvec_devices,
-			      ARRAY_SIZE(nvec_devices), base, 0);
+			      ARRAY_SIZE(nvec_devices), base, 0, NULL);
 	if (ret)
 		dev_err(nvec->dev, "error adding subdevices\n");
 
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 3a8435a8058..cebe97ee98b 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -16,6 +16,8 @@
 
 #include <linux/platform_device.h>
 
+struct irq_domain;
+
 /*
  * This struct describes the MFD part ("cell").
  * After registration the copy of this structure will become the platform data
@@ -98,7 +100,7 @@ static inline const struct mfd_cell *mfd_get_cell(struct platform_device *pdev)
 extern int mfd_add_devices(struct device *parent, int id,
 			   struct mfd_cell *cells, int n_devs,
 			   struct resource *mem_base,
-			   int irq_base);
+			   int irq_base, struct irq_domain *irq_domain);
 
 extern void mfd_remove_devices(struct device *parent);
 
-- 
cgit v1.2.3-70-g09d2


From 37407ea7f93864c2cfc03edf8f37872ec539ea2b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 16 Sep 2012 12:29:43 -0700
Subject: Revert "sched: Improve scalability via 'CPU buddies', which withstand
 random perturbations"

This reverts commit 970e178985cadbca660feb02f4d2ee3a09f7fdda.

Nikolay Ulyanitsky reported thatthe 3.6-rc5 kernel has a 15-20%
performance drop on PostgreSQL 9.2 on his machine (running "pgbench").

Borislav Petkov was able to reproduce this, and bisected it to this
commit 970e178985ca ("sched: Improve scalability via 'CPU buddies' ...")
apparently because the new single-idle-buddy model simply doesn't find
idle CPU's to reschedule on aggressively enough.

Mike Galbraith suspects that it is likely due to the user-mode spinlocks
in PostgreSQL not reacting well to preemption, but we don't really know
the details - I'll just revert the commit for now.

There are hopefully other approaches to improve scheduler scalability
without it causing these kinds of downsides.

Reported-by: Nikolay Ulyanitsky <lystor@gmail.com>
Bisected-by: Borislav Petkov <bp@alien8.de>
Acked-by: Mike Galbraith <efault@gmx.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 -
 kernel/sched/core.c   | 39 +--------------------------------------
 kernel/sched/fair.c   | 28 +++++++++++++++++++++-------
 3 files changed, 22 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b8c86648a2f..23bddac4bad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -954,7 +954,6 @@ struct sched_domain {
 	unsigned int smt_gain;
 	int flags;			/* See SD_* */
 	int level;
-	int idle_buddy;			/* cpu assigned to select_idle_sibling() */
 
 	/* Runtime fields. */
 	unsigned long last_balance;	/* init to jiffies. units in jiffies */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a4ea245f3d8..649c9f876cb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6014,11 +6014,6 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu)
  * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
  * allows us to avoid some pointer chasing select_idle_sibling().
  *
- * Iterate domains and sched_groups downward, assigning CPUs to be
- * select_idle_sibling() hw buddy.  Cross-wiring hw makes bouncing
- * due to random perturbation self canceling, ie sw buddies pull
- * their counterpart to their CPU's hw counterpart.
- *
  * Also keep a unique ID per domain (we use the first cpu number in
  * the cpumask of the domain), this allows us to quickly tell if
  * two cpus are in the same cache domain, see cpus_share_cache().
@@ -6032,40 +6027,8 @@ static void update_top_cache_domain(int cpu)
 	int id = cpu;
 
 	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
-	if (sd) {
-		struct sched_domain *tmp = sd;
-		struct sched_group *sg, *prev;
-		bool right;
-
-		/*
-		 * Traverse to first CPU in group, and count hops
-		 * to cpu from there, switching direction on each
-		 * hop, never ever pointing the last CPU rightward.
-		 */
-		do {
-			id = cpumask_first(sched_domain_span(tmp));
-			prev = sg = tmp->groups;
-			right = 1;
-
-			while (cpumask_first(sched_group_cpus(sg)) != id)
-				sg = sg->next;
-
-			while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) {
-				prev = sg;
-				sg = sg->next;
-				right = !right;
-			}
-
-			/* A CPU went down, never point back to domain start. */
-			if (right && cpumask_first(sched_group_cpus(sg->next)) == id)
-				right = false;
-
-			sg = right ? sg->next : prev;
-			tmp->idle_buddy = cpumask_first(sched_group_cpus(sg));
-		} while ((tmp = tmp->child));
-
+	if (sd)
 		id = cpumask_first(sched_domain_span(sd));
-	}
 
 	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
 	per_cpu(sd_llc_id, cpu) = id;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 42d9df6a5ca..96e2b18b628 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2637,6 +2637,8 @@ static int select_idle_sibling(struct task_struct *p, int target)
 	int cpu = smp_processor_id();
 	int prev_cpu = task_cpu(p);
 	struct sched_domain *sd;
+	struct sched_group *sg;
+	int i;
 
 	/*
 	 * If the task is going to be woken-up on this cpu and if it is
@@ -2653,17 +2655,29 @@ static int select_idle_sibling(struct task_struct *p, int target)
 		return prev_cpu;
 
 	/*
-	 * Otherwise, check assigned siblings to find an elegible idle cpu.
+	 * Otherwise, iterate the domains and find an elegible idle cpu.
 	 */
 	sd = rcu_dereference(per_cpu(sd_llc, target));
-
 	for_each_lower_domain(sd) {
-		if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p)))
-			continue;
-		if (idle_cpu(sd->idle_buddy))
-			return sd->idle_buddy;
-	}
+		sg = sd->groups;
+		do {
+			if (!cpumask_intersects(sched_group_cpus(sg),
+						tsk_cpus_allowed(p)))
+				goto next;
 
+			for_each_cpu(i, sched_group_cpus(sg)) {
+				if (!idle_cpu(i))
+					goto next;
+			}
+
+			target = cpumask_first_and(sched_group_cpus(sg),
+					tsk_cpus_allowed(p));
+			goto done;
+next:
+			sg = sg->next;
+		} while (sg != sd->groups);
+	}
+done:
 	return target;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 05cf96398e1b6502f9e191291b715c7463c9d5dd Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@huawei.com>
Date: Mon, 17 Sep 2012 14:08:56 -0700
Subject: mm/ia64: fix a memory block size bug

I found following definition in include/linux/memory.h, in my IA64
platform, SECTION_SIZE_BITS is equal to 32, and MIN_MEMORY_BLOCK_SIZE
will be 0.

  #define MIN_MEMORY_BLOCK_SIZE     (1 << SECTION_SIZE_BITS)

Because MIN_MEMORY_BLOCK_SIZE is int type and length of 32bits,
so MIN_MEMORY_BLOCK_SIZE(1 << 32) will will equal to 0.
Actually when SECTION_SIZE_BITS >= 31, MIN_MEMORY_BLOCK_SIZE will be wrong.
This will cause wrong system memory infomation in sysfs.
I think it should be:

  #define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)

And "echo offline > memory0/state" will cause following call trace:

  kernel BUG at mm/memory_hotplug.c:885!
  sh[6455]: bugcheck! 0 [1]
  Pid: 6455, CPU 0, comm:                   sh
  psr : 0000101008526030 ifs : 8000000000000fa4 ip  : [<a0000001008c40f0>]    Not tainted (3.6.0-rc1)
  ip is at offline_pages+0x210/0xee0
  Call Trace:
    show_stack+0x80/0xa0
    show_regs+0x640/0x920
    die+0x190/0x2c0
    die_if_kernel+0x50/0x80
    ia64_bad_break+0x3d0/0x6e0
    ia64_native_leave_kernel+0x0/0x270
    offline_pages+0x210/0xee0
    alloc_pages_current+0x180/0x2a0

Signed-off-by: Jianguo Wu <wujianguo@huawei.com>
Signed-off-by: Jiang Liu <jiang.liu@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 1ac7f6e405f..ff9a9f8e0ed 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -19,7 +19,7 @@
 #include <linux/compiler.h>
 #include <linux/mutex.h>
 
-#define MIN_MEMORY_BLOCK_SIZE     (1 << SECTION_SIZE_BITS)
+#define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)
 
 struct memory_block {
 	unsigned long start_section_nr;
-- 
cgit v1.2.3-70-g09d2


From 35c448a8a3471b95ebc0ebcf91eb1183401b4274 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 17 Sep 2012 14:09:11 -0700
Subject: include/net/sock.h: squelch compiler warning in sk_rmem_schedule()

This warning:

  In file included from linux/include/linux/tcp.h:227:0,
                   from linux/include/linux/ipv6.h:221,
                   from linux/include/net/ipv6.h:16,
                   from linux/include/linux/sunrpc/clnt.h:26,
                   from linux/net/sunrpc/stats.c:22:
  linux/include/net/sock.h: In function `sk_rmem_schedule':
  linux/nfs-2.6/include/net/sock.h:1339:13: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]

is seen with gcc (GCC) 4.6.3 20120306 (Red Hat 4.6.3-2) using the
-Wextra option.

Commit c76562b6709f ("netvm: prevent a stream-specific deadlock")
accidentally replaced the "size" parameter of sk_rmem_schedule() with an
unsigned int.  This changes the semantics of the comparison in the
return statement.

In sk_wmem_schedule we have syntactically the same comparison, but
"size" is a signed integer.  In addition, __sk_mem_schedule() takes a
signed integer for its "size" parameter, so there is an implicit type
conversion in sk_rmem_schedule() anyway.

Revert the "size" parameter back to a signed integer so that the
semantics of the expressions in both sk_[rw]mem_schedule() are exactly
the same.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Miller <davem@davemloft.net>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 72132aef53f..adb7da20b5a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1332,7 +1332,7 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size)
 }
 
 static inline bool
-sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size)
+sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
 {
 	if (!sk_has_account(sk))
 		return true;
-- 
cgit v1.2.3-70-g09d2


From 9a858dc7cebce01a7bb616bebb85087fa2b40871 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 17 Sep 2012 14:09:15 -0700
Subject: compiler.h: add __visible

gcc 4.6+ has support for a externally_visible attribute that prevents the
optimizer from optimizing unused symbols away.  Add a __visible macro to
use it with that compiler version or later.

This is used (at least) by the "Link Time Optimization" patchset.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc4.h | 7 +++++++
 include/linux/compiler.h      | 4 ++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 2f4079175af..934bc34d5f9 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -49,6 +49,13 @@
 #endif
 #endif
 
+#if __GNUC_MINOR__ >= 6
+/*
+ * Tell the optimizer that something else uses this function or variable.
+ */
+#define __visible __attribute__((externally_visible))
+#endif
+
 #if __GNUC_MINOR__ > 0
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 923d093c9ce..f430e4162f4 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -278,6 +278,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __section(S) __attribute__ ((__section__(#S)))
 #endif
 
+#ifndef __visible
+#define __visible
+#endif
+
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #ifndef __same_type
 # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
-- 
cgit v1.2.3-70-g09d2


From b161dfa6937ae46d50adce8a7c6b12233e96e7bd Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 17 Sep 2012 22:31:38 +0200
Subject: vfs: dcache: use DCACHE_DENTRY_KILLED instead of DCACHE_DISCONNECTED
 in d_kill()

IBM reported a soft lockup after applying the fix for the rename_lock
deadlock.  Commit c83ce989cb5f ("VFS: Fix the nfs sillyrename regression
in kernel 2.6.38") was found to be the culprit.

The nfs sillyrename fix used DCACHE_DISCONNECTED to indicate that the
dentry was killed.  This flag can be set on non-killed dentries too,
which results in infinite retries when trying to traverse the dentry
tree.

This patch introduces a separate flag: DCACHE_DENTRY_KILLED, which is
only set in d_kill() and makes try_to_ascend() test only this flag.

IBM reported successful test results with this patch.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c            | 4 ++--
 include/linux/dcache.h | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 8086636bf79..16521a9f203 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -389,7 +389,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 	 * Inform try_to_ascend() that we are no longer attached to the
 	 * dentry tree
 	 */
-	dentry->d_flags |= DCACHE_DISCONNECTED;
+	dentry->d_flags |= DCACHE_DENTRY_KILLED;
 	if (parent)
 		spin_unlock(&parent->d_lock);
 	dentry_iput(dentry);
@@ -1048,7 +1048,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
 	 * or deletion
 	 */
 	if (new != old->d_parent ||
-		 (old->d_flags & DCACHE_DISCONNECTED) ||
+		 (old->d_flags & DCACHE_DENTRY_KILLED) ||
 		 (!locked && read_seqretry(&rename_lock, seq))) {
 		spin_unlock(&new->d_lock);
 		new = NULL;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index caa34e50537..59200795482 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -206,6 +206,8 @@ struct dentry_operations {
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
+#define DCACHE_DENTRY_KILLED	0x100000
+
 extern seqlock_t rename_lock;
 
 static inline int dname_external(struct dentry *dentry)
-- 
cgit v1.2.3-70-g09d2


From bafa6d9d89072c1a18853afe9ee5de05c491c13a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 7 Sep 2012 00:45:29 +0000
Subject: ipv4/route: arg delay is useless in rt_cache_flush()

Since route cache deletion (89aef8921bfbac22f), delay is no
more used. Remove it.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h     |  2 +-
 net/ipv4/arp.c          |  2 +-
 net/ipv4/devinet.c      |  6 +++---
 net/ipv4/fib_frontend.c | 20 ++++++++++----------
 net/ipv4/fib_rules.c    |  2 +-
 net/ipv4/fib_trie.c     |  6 +++---
 net/ipv4/route.c        | 19 +++----------------
 7 files changed, 22 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index 776a27f1ab7..da22243d276 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -108,7 +108,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;
 
 struct in_device;
 extern int		ip_rt_init(void);
-extern void		rt_cache_flush(struct net *net, int how);
+extern void		rt_cache_flush(struct net *net);
 extern void		rt_flush_dev(struct net_device *dev);
 extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
 extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 77e87aff419..47800459e4c 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGEADDR:
 		neigh_changeaddr(&arp_tbl, dev);
-		rt_cache_flush(dev_net(dev), 0);
+		rt_cache_flush(dev_net(dev));
 		break;
 	default:
 		break;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 44bf82e3aef..9b55b6f5a58 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1503,7 +1503,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
 			if ((new_value == 0) && (old_value != 0))
-				rt_cache_flush(net, 0);
+				rt_cache_flush(net);
 	}
 
 	return ret;
@@ -1537,7 +1537,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 				dev_disable_lro(idev->dev);
 			}
 			rtnl_unlock();
-			rt_cache_flush(net, 0);
+			rt_cache_flush(net);
 		}
 	}
 
@@ -1554,7 +1554,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
 	struct net *net = ctl->extra2;
 
 	if (write && *valp != val)
-		rt_cache_flush(net, 0);
+		rt_cache_flush(net);
 
 	return ret;
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c43ae3fba79..8e2b475da9f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -148,7 +148,7 @@ static void fib_flush(struct net *net)
 	}
 
 	if (flushed)
-		rt_cache_flush(net, -1);
+		rt_cache_flush(net);
 }
 
 /*
@@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net)
 	net->ipv4.fibnl = NULL;
 }
 
-static void fib_disable_ip(struct net_device *dev, int force, int delay)
+static void fib_disable_ip(struct net_device *dev, int force)
 {
 	if (fib_sync_down_dev(dev, force))
 		fib_flush(dev_net(dev));
-	rt_cache_flush(dev_net(dev), delay);
+	rt_cache_flush(dev_net(dev));
 	arp_ifdown(dev);
 }
 
@@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 		fib_sync_up(dev);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
-		rt_cache_flush(dev_net(dev), -1);
+		rt_cache_flush(dev_net(dev));
 		break;
 	case NETDEV_DOWN:
 		fib_del_ifaddr(ifa, NULL);
@@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 			/* Last address was deleted from this interface.
 			 * Disable IP.
 			 */
-			fib_disable_ip(dev, 1, 0);
+			fib_disable_ip(dev, 1);
 		} else {
-			rt_cache_flush(dev_net(dev), -1);
+			rt_cache_flush(dev_net(dev));
 		}
 		break;
 	}
@@ -1045,7 +1045,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_UNREGISTER) {
-		fib_disable_ip(dev, 2, -1);
+		fib_disable_ip(dev, 2);
 		rt_flush_dev(dev);
 		return NOTIFY_DONE;
 	}
@@ -1062,14 +1062,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		fib_sync_up(dev);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
-		rt_cache_flush(dev_net(dev), -1);
+		rt_cache_flush(dev_net(dev));
 		break;
 	case NETDEV_DOWN:
-		fib_disable_ip(dev, 0, 0);
+		fib_disable_ip(dev, 0);
 		break;
 	case NETDEV_CHANGEMTU:
 	case NETDEV_CHANGE:
-		rt_cache_flush(dev_net(dev), 0);
+		rt_cache_flush(dev_net(dev));
 		break;
 	case NETDEV_UNREGISTER_BATCH:
 		break;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a83d74e498d..274309d3ade 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 
 static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 {
-	rt_cache_flush(ops->fro_net, -1);
+	rt_cache_flush(ops->fro_net);
 }
 
 static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 57bd978483e..d1b93595b4a 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1286,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 
 			fib_release_info(fi_drop);
 			if (state & FA_S_ACCESSED)
-				rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+				rt_cache_flush(cfg->fc_nlinfo.nl_net);
 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
 				tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
 
@@ -1333,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	list_add_tail_rcu(&new_fa->fa_list,
 			  (fa ? &fa->fa_list : fa_head));
 
-	rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+	rt_cache_flush(cfg->fc_nlinfo.nl_net);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 succeeded:
@@ -1708,7 +1708,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 		trie_leaf_remove(t, l);
 
 	if (fa->fa_state & FA_S_ACCESSED)
-		rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+		rt_cache_flush(cfg->fc_nlinfo.nl_net);
 
 	fib_release_info(fa->fa_info);
 	alias_free_mem_rcu(fa);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 82cf2a722b2..f6436d3b207 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -461,11 +461,7 @@ static void rt_cache_invalidate(struct net *net)
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
 }
 
-/*
- * delay < 0  : invalidate cache (fast : entries will be deleted later)
- * delay >= 0 : invalidate & flush cache (can be long)
- */
-void rt_cache_flush(struct net *net, int delay)
+void rt_cache_flush(struct net *net)
 {
 	rt_cache_invalidate(net);
 }
@@ -2345,7 +2341,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 
 void ip_rt_multicast_event(struct in_device *in_dev)
 {
-	rt_cache_flush(dev_net(in_dev->dev), 0);
+	rt_cache_flush(dev_net(in_dev->dev));
 }
 
 #ifdef CONFIG_SYSCTL
@@ -2354,16 +2350,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
-		int flush_delay;
-		ctl_table ctl;
-		struct net *net;
-
-		memcpy(&ctl, __ctl, sizeof(ctl));
-		ctl.data = &flush_delay;
-		proc_dointvec(&ctl, write, buffer, lenp, ppos);
-
-		net = (struct net *)__ctl->extra1;
-		rt_cache_flush(net, flush_delay);
+		rt_cache_flush((struct net *)__ctl->extra1);
 		return 0;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From b42664f898c976247f7f609b8bb9c94d7475ca10 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 10 Sep 2012 22:09:44 +0000
Subject: netns: move net->ipv4.rt_genid to net->rt_genid

This commit prepares the use of rt_genid by both IPv4 and IPv6.
Initialization is left in IPv4 part.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 10 ++++++++++
 include/net/netns/ipv4.h    |  1 -
 net/ipv4/route.c            |  9 ++-------
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index ae1cd6c9ba5..fd87963a0ea 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -102,6 +102,7 @@ struct net {
 #endif
 	struct netns_ipvs	*ipvs;
 	struct sock		*diag_nlsk;
+	atomic_t		rt_genid;
 };
 
 
@@ -300,5 +301,14 @@ static inline void unregister_net_sysctl_table(struct ctl_table_header *header)
 }
 #endif
 
+static inline int rt_genid(struct net *net)
+{
+	return atomic_read(&net->rt_genid);
+}
+
+static inline void rt_genid_bump(struct net *net)
+{
+	atomic_inc(&net->rt_genid);
+}
 
 #endif /* __NET_NET_NAMESPACE_H */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1474dd65c66..eb24dbccd81 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -65,7 +65,6 @@ struct netns_ipv4 {
 	unsigned int sysctl_ping_group_range[2];
 	long sysctl_tcp_mem[3];
 
-	atomic_t rt_genid;
 	atomic_t dev_addr_genid;
 
 #ifdef CONFIG_IP_MROUTE
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index be27cfa96e8..fd9af60397b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -202,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio);
 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
 #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
 
-static inline int rt_genid(struct net *net)
-{
-	return atomic_read(&net->ipv4.rt_genid);
-}
-
 #ifdef CONFIG_PROC_FS
 static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
 {
@@ -449,7 +444,7 @@ static inline bool rt_is_expired(const struct rtable *rth)
 
 void rt_cache_flush(struct net *net)
 {
-	atomic_inc(&net->ipv4.rt_genid);
+	rt_genid_bump(net);
 }
 
 static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -2506,7 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 
 static __net_init int rt_genid_init(struct net *net)
 {
-	atomic_set(&net->ipv4.rt_genid, 0);
+	atomic_set(&net->rt_genid, 0);
 	get_random_bytes(&net->ipv4.dev_addr_genid,
 			 sizeof(net->ipv4.dev_addr_genid));
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 6f3118b571b8a4c06c7985dc3172c3526cb86253 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 10 Sep 2012 22:09:46 +0000
Subject: ipv6: use net->rt_genid to check dst validity

IPv6 dst should take care of rt_genid too. When a xfrm policy is inserted or
deleted, all dst should be invalidated.
To force the validation, dst entries should be created with ->obsolete set to
DST_OBSOLETE_FORCE_CHK. This was already the case for all functions calling
ip6_dst_alloc(), except for ip6_rt_copy().

As a consequence, we can remove the specific code in inet6_connection_sock.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h            |  5 ++---
 net/ipv6/inet6_connection_sock.c | 23 +----------------------
 net/ipv6/route.c                 | 13 +++++++++----
 3 files changed, 12 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 0fedbd8d747..9fc7114159e 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -111,9 +111,8 @@ struct rt6_info {
 	struct inet6_dev		*rt6i_idev;
 	unsigned long			_rt6i_peer;
 
-#ifdef CONFIG_XFRM
-	u32				rt6i_flow_cache_genid;
-#endif
+	u32				rt6i_genid;
+
 	/* more non-fragment space at head required */
 	unsigned short			rt6i_nfheader_len;
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0251a6005be..c4f934176ca 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -175,33 +175,12 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
 			   const struct in6_addr *saddr)
 {
 	__ip6_dst_store(sk, dst, daddr, saddr);
-
-#ifdef CONFIG_XFRM
-	{
-		struct rt6_info *rt = (struct rt6_info  *)dst;
-		rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
-	}
-#endif
 }
 
 static inline
 struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
 {
-	struct dst_entry *dst;
-
-	dst = __sk_dst_check(sk, cookie);
-
-#ifdef CONFIG_XFRM
-	if (dst) {
-		struct rt6_info *rt = (struct rt6_info *)dst;
-		if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
-			__sk_dst_reset(sk);
-			dst = NULL;
-		}
-	}
-#endif
-
-	return dst;
+	return __sk_dst_check(sk, cookie);
 }
 
 static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8e80fd27910..fb29e2215a1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -281,13 +281,14 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 					     struct fib6_table *table)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
-					0, DST_OBSOLETE_NONE, flags);
+					0, DST_OBSOLETE_FORCE_CHK, flags);
 
 	if (rt) {
 		struct dst_entry *dst = &rt->dst;
 
 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+		rt->rt6i_genid = rt_genid(net);
 	}
 	return rt;
 }
@@ -1031,6 +1032,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 	rt = (struct rt6_info *) dst;
 
+	/* All IPV6 dsts are created with ->obsolete set to the value
+	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
+	 * into this function always.
+	 */
+	if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
+		return NULL;
+
 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
 			if (!rt6_has_peer(rt))
@@ -1397,8 +1405,6 @@ int ip6_route_add(struct fib6_config *cfg)
 		goto out;
 	}
 
-	rt->dst.obsolete = -1;
-
 	if (cfg->fc_flags & RTF_EXPIRES)
 		rt6_set_expires(rt, jiffies +
 				clock_t_to_jiffies(cfg->fc_expires));
@@ -2080,7 +2086,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->dst.input = ip6_input;
 	rt->dst.output = ip6_output;
 	rt->rt6i_idev = idev;
-	rt->dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
 	if (anycast)
-- 
cgit v1.2.3-70-g09d2


From 263a523d18bca306016d75f5c8d5c57c37fe52fb Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 18 Sep 2012 20:42:31 -0700
Subject: linux/kernel.h: Fix warning seen with W=1 due to change in
 DIV_ROUND_CLOSEST

After commit b6d86d3d (Fix DIV_ROUND_CLOSEST to support negative dividends),
the following warning is seen if the kernel is compiled with W=1 (-Wextra):

warning: comparison of unsigned expression >= 0 is always true

The warning is due to the test '((typeof(x))-1) >= 0', which is used to detect
if the variable type is unsigned. Research on the web suggests that the warning
disappears if '>' instead of '>=' is used for the comparison.

Tests after changing the macro along that line show that the warning is gone,
and that the result is still correct:

i=-4: DIV_ROUND_CLOSEST(i, 2)=-2
i=-3: DIV_ROUND_CLOSEST(i, 2)=-2
i=-2: DIV_ROUND_CLOSEST(i, 2)=-1
i=-1: DIV_ROUND_CLOSEST(i, 2)=-1
i=0: DIV_ROUND_CLOSEST(i, 2)=0
i=1: DIV_ROUND_CLOSEST(i, 2)=1
i=2: DIV_ROUND_CLOSEST(i, 2)=1
i=3: DIV_ROUND_CLOSEST(i, 2)=2
i=4: DIV_ROUND_CLOSEST(i, 2)=2

Code size is the same as before.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 594b419b7d2..2451f1f7a1d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -91,7 +91,7 @@
 {							\
 	typeof(x) __x = x;				\
 	typeof(divisor) __d = divisor;			\
-	(((typeof(x))-1) >= 0 || (__x) >= 0) ?		\
+	(((typeof(x))-1) > 0 || (__x) > 0) ?		\
 		(((__x) + ((__d) / 2)) / (__d)) :	\
 		(((__x) - ((__d) / 2)) / (__d));	\
 }							\
-- 
cgit v1.2.3-70-g09d2


From 85f2a2ef1d0ab99523e0b947a2b723f5650ed6aa Mon Sep 17 00:00:00 2001
From: Wen Congyang <wency@cn.fujitsu.com>
Date: Thu, 20 Sep 2012 14:04:47 +0800
Subject: tracing: Don't call page_to_pfn() if page is NULL

When allocating memory fails, page is NULL. page_to_pfn() will
cause the kernel panicked if we don't use sparsemem vmemmap.

Link: http://lkml.kernel.org/r/505AB1FF.8020104@cn.fujitsu.com

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: stable <stable@vger.kernel.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/kmem.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 5f889f16b0c..08fa27244da 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -214,7 +214,7 @@ TRACE_EVENT(mm_page_alloc,
 
 	TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
 		__entry->page,
-		page_to_pfn(__entry->page),
+		__entry->page ? page_to_pfn(__entry->page) : 0,
 		__entry->order,
 		__entry->migratetype,
 		show_gfp_flags(__entry->gfp_flags))
@@ -240,7 +240,7 @@ DECLARE_EVENT_CLASS(mm_page,
 
 	TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d",
 		__entry->page,
-		page_to_pfn(__entry->page),
+		__entry->page ? page_to_pfn(__entry->page) : 0,
 		__entry->order,
 		__entry->migratetype,
 		__entry->order == 0)
-- 
cgit v1.2.3-70-g09d2


From ecd7918745234e423dd87fcc0c077da557909720 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 20 Sep 2012 10:01:49 +0000
Subject: xfrm_user: ensure user supplied esn replay window is valid

The current code fails to ensure that the netlink message actually
contains as many bytes as the header indicates. If a user creates a new
state or updates an existing one but does not supply the bytes for the
whole ESN replay window, the kernel copies random heap bytes into the
replay bitmap, the ones happen to follow the XFRMA_REPLAY_ESN_VAL
netlink attribute. This leads to following issues:

1. The replay window has random bits set confusing the replay handling
   code later on.

2. A malicious user could use this flaw to leak up to ~3.5kB of heap
   memory when she has access to the XFRM netlink interface (requires
   CAP_NET_ADMIN).

Known users of the ESN replay window are strongSwan and Steffen's
iproute2 patch (<http://patchwork.ozlabs.org/patch/85962/>). The latter
uses the interface with a bitmap supplied while the former does not.
strongSwan is therefore prone to run into issue 1.

To fix both issues without breaking existing userland allow using the
XFRMA_REPLAY_ESN_VAL netlink attribute with either an empty bitmap or a
fully specified one. For the former case we initialize the in-kernel
bitmap with zero, for the latter we copy the user supplied bitmap. For
state updates the full bitmap must be supplied.

To prevent overflows in the bitmap length calculation the maximum size
of bmp_len is limited to 128 by this patch -- resulting in a maximum
replay window of 4096 packets. This should be sufficient for all real
life scenarios (RFC 4303 recommends a default replay window size of 64).

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Martin Willi <martin@revosec.ch>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h |  2 ++
 net/xfrm/xfrm_user.c | 31 +++++++++++++++++++++++++------
 2 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 22e61fdf75a..28e493b5b94 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -84,6 +84,8 @@ struct xfrm_replay_state {
 	__u32	bitmap;
 };
 
+#define XFRMA_REPLAY_ESN_MAX	4096
+
 struct xfrm_replay_state_esn {
 	unsigned int	bmp_len;
 	__u32		oseq;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8024b3dea8c..5927065e97c 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -123,9 +123,21 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
 				struct nlattr **attrs)
 {
 	struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
+	struct xfrm_replay_state_esn *rs;
 
-	if ((p->flags & XFRM_STATE_ESN) && !rt)
-		return -EINVAL;
+	if (p->flags & XFRM_STATE_ESN) {
+		if (!rt)
+			return -EINVAL;
+
+		rs = nla_data(rt);
+
+		if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
+			return -EINVAL;
+
+		if (nla_len(rt) < xfrm_replay_state_esn_len(rs) &&
+		    nla_len(rt) != sizeof(*rs))
+			return -EINVAL;
+	}
 
 	if (!rt)
 		return 0;
@@ -370,14 +382,15 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
 					 struct nlattr *rp)
 {
 	struct xfrm_replay_state_esn *up;
+	int ulen;
 
 	if (!replay_esn || !rp)
 		return 0;
 
 	up = nla_data(rp);
+	ulen = xfrm_replay_state_esn_len(up);
 
-	if (xfrm_replay_state_esn_len(replay_esn) !=
-			xfrm_replay_state_esn_len(up))
+	if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen)
 		return -EINVAL;
 
 	return 0;
@@ -388,22 +401,28 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
 				       struct nlattr *rta)
 {
 	struct xfrm_replay_state_esn *p, *pp, *up;
+	int klen, ulen;
 
 	if (!rta)
 		return 0;
 
 	up = nla_data(rta);
+	klen = xfrm_replay_state_esn_len(up);
+	ulen = nla_len(rta) >= klen ? klen : sizeof(*up);
 
-	p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
+	p = kzalloc(klen, GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 
-	pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
+	pp = kzalloc(klen, GFP_KERNEL);
 	if (!pp) {
 		kfree(p);
 		return -ENOMEM;
 	}
 
+	memcpy(p, up, ulen);
+	memcpy(pp, up, ulen);
+
 	*replay_esn = p;
 	*preplay_esn = pp;
 
-- 
cgit v1.2.3-70-g09d2