From 257a4b018d1b514a1cc738e3ca11b566d8f3a3d8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 26 Dec 2017 17:34:44 +1100
Subject: xfrm: Forbid state updates from changing encap type

Currently we allow state updates to competely replace the contents
of x->encap.  This is bad because on the user side ESP only sets up
header lengths depending on encap_type once when the state is first
created.  This could result in the header lengths getting out of
sync with the actual state configuration.

In practice key managers will never do a state update to change the
encapsulation type.  Only the port numbers need to be changed as the
peer NAT entry is updated.

Therefore this patch adds a check in xfrm_state_update to forbid
any changes to the encap_type.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 500b3391f474..1e80f68e2266 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1534,8 +1534,12 @@ out:
 	err = -EINVAL;
 	spin_lock_bh(&x1->lock);
 	if (likely(x1->km.state == XFRM_STATE_VALID)) {
-		if (x->encap && x1->encap)
+		if (x->encap && x1->encap &&
+		    x->encap->encap_type == x1->encap->encap_type)
 			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
+		else if (x->encap || x1->encap)
+			goto fail;
+
 		if (x->coaddr && x1->coaddr) {
 			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
 		}
@@ -1552,6 +1556,8 @@ out:
 		x->km.state = XFRM_STATE_DEAD;
 		__xfrm_state_put(x);
 	}
+
+fail:
 	spin_unlock_bh(&x1->lock);
 
 	xfrm_state_put(x1);
-- 
cgit v1.2.3


From 862591bf4f519d1b8d859af720fafeaebdd0162a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 27 Dec 2017 23:25:45 +0100
Subject: xfrm: skip policies marked as dead while rehashing

syzkaller triggered following KASAN splat:

BUG: KASAN: slab-out-of-bounds in xfrm_hash_rebuild+0xdbe/0xf00 net/xfrm/xfrm_policy.c:618
read of size 2 at addr ffff8801c8e92fe4 by task kworker/1:1/23 [..]
Workqueue: events xfrm_hash_rebuild [..]
 __asan_report_load2_noabort+0x14/0x20 mm/kasan/report.c:428
 xfrm_hash_rebuild+0xdbe/0xf00 net/xfrm/xfrm_policy.c:618
 process_one_work+0xbbf/0x1b10 kernel/workqueue.c:2112
 worker_thread+0x223/0x1990 kernel/workqueue.c:2246 [..]

The reproducer triggers:
1016                 if (error) {
1017                         list_move_tail(&walk->walk.all, &x->all);
1018                         goto out;
1019                 }

in xfrm_policy_walk() via pfkey (it sets tiny rcv space, dump
callback returns -ENOBUFS).

In this case, *walk is located the pfkey socket struct, so this socket
becomes visible in the global policy list.

It looks like this is intentional -- phony walker has walk.dead set to 1
and all other places skip such "policies".

Ccing original authors of the two commits that seem to expose this
issue (first patch missed ->dead check, second patch adds pfkey
sockets to policies dumper list).

Fixes: 880a6fab8f6ba5b ("xfrm: configure policy hash table thresholds by netlink")
Fixes: 12a169e7d8f4b1c ("ipsec: Put dumpers on the dump list")
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Timo Teras <timo.teras@iki.fi>
Cc: Christophe Gouault <christophe.gouault@6wind.com>
Reported-by: syzbot <bot+c028095236fcb6f4348811565b75084c754dc729@syzkaller.appspotmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 70aa5cb0c659..2ef6db98e9ba 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -609,7 +609,8 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 
 	/* re-insert all policies by order of creation */
 	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
-		if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+		if (policy->walk.dead ||
+		    xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
 			/* skip socket policies */
 			continue;
 		}
-- 
cgit v1.2.3


From 06b335cb51af018d5feeff5dd4fd53847ddb675a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 29 Dec 2017 18:13:05 -0600
Subject: af_key: fix buffer overread in verify_address_len()

If a message sent to a PF_KEY socket ended with one of the extensions
that takes a 'struct sadb_address' but there were not enough bytes
remaining in the message for the ->sa_family member of the 'struct
sockaddr' which is supposed to follow, then verify_address_len() read
past the end of the message, into uninitialized memory.  Fix it by
returning -EINVAL in this case.

This bug was found using syzkaller with KMSAN.

Reproducer:

	#include <linux/pfkeyv2.h>
	#include <sys/socket.h>
	#include <unistd.h>

	int main()
	{
		int sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2);
		char buf[24] = { 0 };
		struct sadb_msg *msg = (void *)buf;
		struct sadb_address *addr = (void *)(msg + 1);

		msg->sadb_msg_version = PF_KEY_V2;
		msg->sadb_msg_type = SADB_DELETE;
		msg->sadb_msg_len = 3;
		addr->sadb_address_len = 1;
		addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;

		write(sock, buf, 24);
	}

Reported-by: Alexander Potapenko <glider@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/key/af_key.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3dffb892d52c..596499cc8b2f 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -401,6 +401,11 @@ static int verify_address_len(const void *p)
 #endif
 	int len;
 
+	if (sp->sadb_address_len <
+	    DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family),
+			 sizeof(uint64_t)))
+		return -EINVAL;
+
 	switch (addr->sa_family) {
 	case AF_INET:
 		len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t));
-- 
cgit v1.2.3


From 4e765b4972af7b07adcb1feb16e7a525ce1f6b28 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 29 Dec 2017 18:15:23 -0600
Subject: af_key: fix buffer overread in parse_exthdrs()

If a message sent to a PF_KEY socket ended with an incomplete extension
header (fewer than 4 bytes remaining), then parse_exthdrs() read past
the end of the message, into uninitialized memory.  Fix it by returning
-EINVAL in this case.

Reproducer:

	#include <linux/pfkeyv2.h>
	#include <sys/socket.h>
	#include <unistd.h>

	int main()
	{
		int sock = socket(PF_KEY, SOCK_RAW, PF_KEY_V2);
		char buf[17] = { 0 };
		struct sadb_msg *msg = (void *)buf;

		msg->sadb_msg_version = PF_KEY_V2;
		msg->sadb_msg_type = SADB_DELETE;
		msg->sadb_msg_len = 2;

		write(sock, buf, 17);
	}

Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/key/af_key.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 596499cc8b2f..d40861a048fe 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -516,6 +516,9 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void *
 		uint16_t ext_type;
 		int ext_len;
 
+		if (len < sizeof(*ehdr))
+			return -EINVAL;
+
 		ext_len  = ehdr->sadb_ext_len;
 		ext_len *= sizeof(uint64_t);
 		ext_type = ehdr->sadb_ext_type;
-- 
cgit v1.2.3


From 2f10a61cee8fdb9f8da90f5db687e1862b22cf06 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Sun, 31 Dec 2017 16:18:56 +0100
Subject: xfrm: fix rcu usage in xfrm_get_type_offload

request_module can sleep, thus we cannot hold rcu_read_lock() while
calling it. The function also jumps back and takes rcu_read_lock()
again (in xfrm_state_get_afinfo()), resulting in an imbalance.

This codepath is triggered whenever a new offloaded state is created.

Fixes: ffdb5211da1c ("xfrm: Auto-load xfrm offload modules")
Reported-by: syzbot+ca425f44816d749e8eb49755567a75ee48cf4a30@syzkaller.appspotmail.com
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 1e80f68e2266..429957412633 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -313,13 +313,14 @@ retry:
 	if ((type && !try_module_get(type->owner)))
 		type = NULL;
 
+	rcu_read_unlock();
+
 	if (!type && try_load) {
 		request_module("xfrm-offload-%d-%d", family, proto);
 		try_load = 0;
 		goto retry;
 	}
 
-	rcu_read_unlock();
 	return type;
 }
 
-- 
cgit v1.2.3


From d16b46e4fd8bc6063624605f25b8c0835bb1fbe3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 4 Jan 2018 22:25:07 +1100
Subject: xfrm: Use __skb_queue_tail in xfrm_trans_queue

We do not need locking in xfrm_trans_queue because it is designed
to use per-CPU buffers.  However, the original code incorrectly
used skb_queue_tail which takes the lock.  This patch switches
it to __skb_queue_tail instead.

Reported-and-tested-by: Artem Savkov <asavkov@redhat.com>
Fixes: acf568ee859f ("xfrm: Reinject transport-mode packets...")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 3f6f6f8c9fa5..5b2409746ae0 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -518,7 +518,7 @@ int xfrm_trans_queue(struct sk_buff *skb,
 		return -ENOBUFS;
 
 	XFRM_TRANS_SKB_CB(skb)->finish = finish;
-	skb_queue_tail(&trans->queue, skb);
+	__skb_queue_tail(&trans->queue, skb);
 	tasklet_schedule(&trans->tasklet);
 	return 0;
 }
-- 
cgit v1.2.3


From bcfd09f7837f5240c30fd2f52ee7293516641faa Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 5 Jan 2018 22:12:32 +1100
Subject: xfrm: Return error on unknown encap_type in init_state

Currently esp will happily create an xfrm state with an unknown
encap type for IPv4, without setting the necessary state parameters.
This patch fixes it by returning -EINVAL.

There is a similar problem in IPv6 where if the mode is unknown
we will skip initialisation while returning zero.  However, this
is harmless as the mode has already been checked further up the
stack.  This patch removes this anomaly by aligning the IPv6
behaviour with IPv4 and treating unknown modes (which cannot
actually happen) as transport mode.

Fixes: 38320c70d282 ("[IPSEC]: Use crypto_aead and authenc in ESP")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4.c | 1 +
 net/ipv6/esp6.c | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index d57aa64fa7c7..61fe6e4d23fc 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -981,6 +981,7 @@ static int esp_init_state(struct xfrm_state *x)
 
 		switch (encap->encap_type) {
 		default:
+			err = -EINVAL;
 			goto error;
 		case UDP_ENCAP_ESPINUDP:
 			x->props.header_len += sizeof(struct udphdr);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a902ff8f59be..1a7f00cd4803 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -890,13 +890,12 @@ static int esp6_init_state(struct xfrm_state *x)
 			x->props.header_len += IPV4_BEET_PHMAXLEN +
 					       (sizeof(struct ipv6hdr) - sizeof(struct iphdr));
 		break;
+	default:
 	case XFRM_MODE_TRANSPORT:
 		break;
 	case XFRM_MODE_TUNNEL:
 		x->props.header_len += sizeof(struct ipv6hdr);
 		break;
-	default:
-		goto error;
 	}
 
 	align = ALIGN(crypto_aead_blocksize(aead), 4);
-- 
cgit v1.2.3


From b1bdcb59b64f806ef08d25a85c39ffb3ad841ce6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 6 Jan 2018 01:13:08 +0100
Subject: xfrm: don't call xfrm_policy_cache_flush while holding spinlock

xfrm_policy_cache_flush can sleep, so it cannot be called while holding
a spinlock.  We could release the lock first, but I don't see why we need
to invoke this function here in first place, the packet path won't reuse
an xdst entry unless its still valid.

While at it, add an annotation to xfrm_policy_cache_flush, it would
have probably caught this bug sooner.

Fixes: ec30d78c14a813 ("xfrm: add xdst pcpu cache")
Reported-by: syzbot+e149f7d1328c26f9c12f@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2ef6db98e9ba..bc5eae12fb09 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -975,8 +975,6 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
 	}
 	if (!cnt)
 		err = -ESRCH;
-	else
-		xfrm_policy_cache_flush();
 out:
 	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 	return err;
@@ -1744,6 +1742,8 @@ void xfrm_policy_cache_flush(void)
 	bool found = 0;
 	int cpu;
 
+	might_sleep();
+
 	local_bh_disable();
 	rcu_read_lock();
 	for_each_possible_cpu(cpu) {
-- 
cgit v1.2.3


From 374d1b5a81f7f9cc5e7f095ac3d5aff3f6600376 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 5 Jan 2018 08:35:47 +0100
Subject: esp: Fix GRO when the headers not fully in the linear part of the
 skb.

The GRO layer does not necessarily pull the complete headers
into the linear part of the skb, a part may remain on the
first page fragment. This can lead to a crash if we try to
pull the headers, so make sure we have them on the linear
part before pulling.

Fixes: 7785bba299a8 ("esp: Add a software GRO codepath")
Reported-by: syzbot+82bbd65569c49c6c0c4d@syzkaller.appspotmail.com
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/esp4_offload.c | 3 ++-
 net/ipv6/esp6_offload.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index f8b918c766b0..b1338e576d00 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -38,7 +38,8 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
 	__be32 spi;
 	int err;
 
-	skb_pull(skb, offset);
+	if (!pskb_pull(skb, offset))
+		return NULL;
 
 	if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
 		goto out;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 333a478aa161..dd9627490c7c 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -60,7 +60,8 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head,
 	int nhoff;
 	int err;
 
-	skb_pull(skb, offset);
+	if (!pskb_pull(skb, offset))
+		return NULL;
 
 	if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
 		goto out;
-- 
cgit v1.2.3


From 1e532d2b49645e7cb76d5af6cb5bc4ec93d861ae Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 10 Jan 2018 09:33:26 +0100
Subject: af_key: Fix memory leak in key_notify_policy.

We leak the allocated out_skb in case
pfkey_xfrm_policy2msg() fails. Fix this
by freeing it on error.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/key/af_key.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index d40861a048fe..7e2e7188e7f4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2202,8 +2202,10 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
 		return PTR_ERR(out_skb);
 
 	err = pfkey_xfrm_policy2msg(out_skb, xp, dir);
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(out_skb);
 		return err;
+	}
 
 	out_hdr = (struct sadb_msg *) out_skb->data;
 	out_hdr->sadb_msg_version = PF_KEY_V2;
-- 
cgit v1.2.3


From 76a4201191814a0061cb5c861fafb9ecaa764846 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 10 Jan 2018 12:14:28 +0100
Subject: xfrm: Fix a race in the xdst pcpu cache.

We need to run xfrm_resolve_and_create_bundle() with
bottom halves off. Otherwise we may reuse an already
released dst_enty when the xfrm lookup functions are
called from process context.

Fixes: c30d78c14a813db39a647b6a348b428 ("xfrm: add xdst pcpu cache")
Reported-by: Darius Ski <darius.ski@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index bc5eae12fb09..bd6b0e7a0ee4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2063,8 +2063,11 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
 	if (num_xfrms <= 0)
 		goto make_dummy_bundle;
 
+	local_bh_disable();
 	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
-						  xflo->dst_orig);
+					      xflo->dst_orig);
+	local_bh_enable();
+
 	if (IS_ERR(xdst)) {
 		err = PTR_ERR(xdst);
 		if (err != -EAGAIN)
@@ -2151,9 +2154,12 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 				goto no_transform;
 			}
 
+			local_bh_disable();
 			xdst = xfrm_resolve_and_create_bundle(
 					pols, num_pols, fl,
 					family, dst_orig);
+			local_bh_enable();
+
 			if (IS_ERR(xdst)) {
 				xfrm_pols_put(pols, num_pols);
 				err = PTR_ERR(xdst);
-- 
cgit v1.2.3


From 40950343932879247861ae152dcb55e4555afdff Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 10 Jan 2018 09:20:54 +0000
Subject: bpf: fix spelling mistake: "obusing" -> "abusing"

Trivial fix to spelling mistake in error message text.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b414d6b2d470..96ab165c873c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4472,7 +4472,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			 */
 			map_ptr = env->insn_aux_data[i + delta].map_ptr;
 			if (map_ptr == BPF_MAP_PTR_POISON) {
-				verbose(env, "tail_call obusing map_ptr\n");
+				verbose(env, "tail_call abusing map_ptr\n");
 				return -EINVAL;
 			}
 			if (!map_ptr->unpriv_array)
-- 
cgit v1.2.3


From 7891a87efc7116590eaba57acc3c422487802c6f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 10 Jan 2018 20:04:37 +0100
Subject: bpf: arsh is not supported in 32 bit alu thus reject it

The following snippet was throwing an 'unknown opcode cc' warning
in BPF interpreter:

  0: (18) r0 = 0x0
  2: (7b) *(u64 *)(r10 -16) = r0
  3: (cc) (u32) r0 s>>= (u32) r0
  4: (95) exit

Although a number of JITs do support BPF_ALU | BPF_ARSH | BPF_{K,X}
generation, not all of them do and interpreter does neither. We can
leave existing ones and implement it later in bpf-next for the
remaining ones, but reject this properly in verifier for the time
being.

Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Reported-by: syzbot+93c4904c5c70348a6890@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                       |  5 ++++
 tools/testing/selftests/bpf/test_verifier.c | 40 +++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 96ab165c873c..20eb04fd155e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2493,6 +2493,11 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 			return -EINVAL;
 		}
 
+		if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) {
+			verbose(env, "BPF_ARSH not supported for 32 bit ALU\n");
+			return -EINVAL;
+		}
+
 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index b51017404c62..6bafa5456568 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -272,6 +272,46 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid bpf_ld_imm64 insn",
 		.result = REJECT,
 	},
+	{
+		"arsh32 on imm",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "BPF_ARSH not supported for 32 bit ALU",
+	},
+	{
+		"arsh32 on reg",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_IMM(BPF_REG_1, 5),
+			BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "BPF_ARSH not supported for 32 bit ALU",
+	},
+	{
+		"arsh64 on imm",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"arsh64 on reg",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_IMM(BPF_REG_1, 5),
+			BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
 	{
 		"no bpf_exit",
 		.insns = {
-- 
cgit v1.2.3


From bbeb6e4323dad9b5e0ee9f60c223dd532e2403b1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 10 Jan 2018 23:25:05 +0100
Subject: bpf, array: fix overflow in max_entries and undefined behavior in
 index_mask

syzkaller tried to alloc a map with 0xfffffffd entries out of a userns,
and thus unprivileged. With the recently added logic in b2157399cc98
("bpf: prevent out-of-bounds speculation") we round this up to the next
power of two value for max_entries for unprivileged such that we can
apply proper masking into potentially zeroed out map slots.

However, this will generate an index_mask of 0xffffffff, and therefore
a + 1 will let this overflow into new max_entries of 0. This will pass
allocation, etc, and later on map access we still enforce on the original
attr->max_entries value which was 0xfffffffd, therefore triggering GPF
all over the place. Thus bail out on overflow in such case.

Moreover, on 32 bit archs roundup_pow_of_two() can also not be used,
since fls_long(max_entries - 1) can result in 32 and 1UL << 32 in 32 bit
space is undefined. Therefore, do this by hand in a 64 bit variable.

This fixes all the issues triggered by syzkaller's reproducers.

Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation")
Reported-by: syzbot+b0efb8e572d01bce1ae0@syzkaller.appspotmail.com
Reported-by: syzbot+6c15e9744f75f2364773@syzkaller.appspotmail.com
Reported-by: syzbot+d2f5524fb46fd3b312ee@syzkaller.appspotmail.com
Reported-by: syzbot+61d23c95395cc90dbc2b@syzkaller.appspotmail.com
Reported-by: syzbot+0d363c942452cca68c01@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/arraymap.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index aaa319848e7d..ab94d304a634 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -56,7 +56,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	u32 elem_size, index_mask, max_entries;
 	bool unpriv = !capable(CAP_SYS_ADMIN);
 	struct bpf_array *array;
-	u64 array_size;
+	u64 array_size, mask64;
 
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -74,13 +74,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	elem_size = round_up(attr->value_size, 8);
 
 	max_entries = attr->max_entries;
-	index_mask = roundup_pow_of_two(max_entries) - 1;
 
-	if (unpriv)
+	/* On 32 bit archs roundup_pow_of_two() with max_entries that has
+	 * upper most bit set in u32 space is undefined behavior due to
+	 * resulting 1U << 32, so do it manually here in u64 space.
+	 */
+	mask64 = fls_long(max_entries - 1);
+	mask64 = 1ULL << mask64;
+	mask64 -= 1;
+
+	index_mask = mask64;
+	if (unpriv) {
 		/* round up array size to nearest power of 2,
 		 * since cpu will speculate within index_mask limits
 		 */
 		max_entries = index_mask + 1;
+		/* Check for overflows. */
+		if (max_entries < attr->max_entries)
+			return ERR_PTR(-E2BIG);
+	}
 
 	array_size = sizeof(*array);
 	if (percpu)
-- 
cgit v1.2.3


From 8978cc921fc7fad3f4d6f91f1da01352aeeeff25 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 9 Jan 2018 11:41:10 +0200
Subject: {net,ib}/mlx5: Don't disable local loopback multicast traffic when
 needed

There are systems platform information management interfaces (such as
HOST2BMC) for which we cannot disable local loopback multicast traffic.

Separate disable_local_lb_mc and disable_local_lb_uc capability bits so
driver will not disable multicast loopback traffic if not supported.
(It is expected that Firmware will not set disable_local_lb_mc if
HOST2BMC is running for example.)

Function mlx5_nic_vport_update_local_lb will do best effort to
disable/enable UC/MC loopback traffic and return success only in case it
succeeded to changed all allowed by Firmware.

Adapt mlx5_ib and mlx5e to support the new cap bits.

Fixes: 2c43c5a036be ("net/mlx5e: Enable local loopback in loopback selftest")
Fixes: c85023e153e3 ("IB/mlx5: Add raw ethernet local loopback support")
Fixes: bded747bb432 ("net/mlx5: Add raw ethernet local loopback firmware command")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                  |  9 +++++---
 .../net/ethernet/mellanox/mlx5/core/en_selftest.c  | 27 ++++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  3 +--
 drivers/net/ethernet/mellanox/mlx5/core/vport.c    | 22 +++++++++++++-----
 include/linux/mlx5/mlx5_ifc.h                      |  5 ++--
 5 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 8ac50de2b242..00cb184fa027 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1324,7 +1324,8 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
 		return err;
 
 	if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
-	    !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+	    (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
+	     !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
 		return err;
 
 	mutex_lock(&dev->lb_mutex);
@@ -1342,7 +1343,8 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
 	mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
 
 	if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
-	    !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+	    (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
+	     !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
 		return;
 
 	mutex_lock(&dev->lb_mutex);
@@ -4187,7 +4189,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	}
 
 	if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
-	    MLX5_CAP_GEN(mdev, disable_local_lb))
+	    (MLX5_CAP_GEN(mdev, disable_local_lb_uc) ||
+	     MLX5_CAP_GEN(mdev, disable_local_lb_mc)))
 		mutex_init(&dev->lb_mutex);
 
 	dev->ib_active = true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 1f1f8af87d4d..5a4608281f38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -238,15 +238,19 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
 	int err = 0;
 
 	/* Temporarily enable local_lb */
-	if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) {
-		mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb);
-		if (!lbtp->local_lb)
-			mlx5_nic_vport_update_local_lb(priv->mdev, true);
+	err = mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb);
+	if (err)
+		return err;
+
+	if (!lbtp->local_lb) {
+		err = mlx5_nic_vport_update_local_lb(priv->mdev, true);
+		if (err)
+			return err;
 	}
 
 	err = mlx5e_refresh_tirs(priv, true);
 	if (err)
-		return err;
+		goto out;
 
 	lbtp->loopback_ok = false;
 	init_completion(&lbtp->comp);
@@ -256,16 +260,21 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
 	lbtp->pt.dev = priv->netdev;
 	lbtp->pt.af_packet_priv = lbtp;
 	dev_add_pack(&lbtp->pt);
+
+	return 0;
+
+out:
+	if (!lbtp->local_lb)
+		mlx5_nic_vport_update_local_lb(priv->mdev, false);
+
 	return err;
 }
 
 static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
 					struct mlx5e_lbt_priv *lbtp)
 {
-	if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) {
-		if (!lbtp->local_lb)
-			mlx5_nic_vport_update_local_lb(priv->mdev, false);
-	}
+	if (!lbtp->local_lb)
+		mlx5_nic_vport_update_local_lb(priv->mdev, false);
 
 	dev_remove_pack(&lbtp->pt);
 	mlx5e_refresh_tirs(priv, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 8a89c7e8cd63..95e188d0883e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -578,8 +578,7 @@ static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
 	int ret = 0;
 
 	/* Disable local_lb by default */
-	if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
-	    MLX5_CAP_GEN(dev, disable_local_lb))
+	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
 		ret = mlx5_nic_vport_update_local_lb(dev, false);
 
 	return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index d653b0025b13..a1296a62497d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -908,23 +908,33 @@ int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable)
 	void *in;
 	int err;
 
-	mlx5_core_dbg(mdev, "%s local_lb\n", enable ? "enable" : "disable");
+	if (!MLX5_CAP_GEN(mdev, disable_local_lb_mc) &&
+	    !MLX5_CAP_GEN(mdev, disable_local_lb_uc))
+		return 0;
+
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
-	MLX5_SET(modify_nic_vport_context_in, in,
-		 field_select.disable_mc_local_lb, 1);
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 nic_vport_context.disable_mc_local_lb, !enable);
-
-	MLX5_SET(modify_nic_vport_context_in, in,
-		 field_select.disable_uc_local_lb, 1);
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 nic_vport_context.disable_uc_local_lb, !enable);
 
+	if (MLX5_CAP_GEN(mdev, disable_local_lb_mc))
+		MLX5_SET(modify_nic_vport_context_in, in,
+			 field_select.disable_mc_local_lb, 1);
+
+	if (MLX5_CAP_GEN(mdev, disable_local_lb_uc))
+		MLX5_SET(modify_nic_vport_context_in, in,
+			 field_select.disable_uc_local_lb, 1);
+
 	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
 
+	if (!err)
+		mlx5_core_dbg(mdev, "%s local_lb\n",
+			      enable ? "enable" : "disable");
+
 	kvfree(in);
 	return err;
 }
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index d44ec5f41d4a..1391a82da98e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1027,8 +1027,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_wq_sz[0x5];
 
 	u8         nic_vport_change_event[0x1];
-	u8         disable_local_lb[0x1];
-	u8         reserved_at_3e2[0x9];
+	u8         disable_local_lb_uc[0x1];
+	u8         disable_local_lb_mc[0x1];
+	u8         reserved_at_3e3[0x8];
 	u8         log_max_vlan_list[0x5];
 	u8         reserved_at_3f0[0x3];
 	u8         log_max_current_mc_list[0x5];
-- 
cgit v1.2.3


From 05e0cc84e00c54fb152d1f4b86bc211823a83d0c Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 4 Jan 2018 04:35:51 +0200
Subject: net/mlx5: Fix get vector affinity helper function

mlx5_get_vector_affinity used to call pci_irq_get_affinity and after
reverting the patch that sets the device affinity via PCI_IRQ_AFFINITY
API, calling pci_irq_get_affinity becomes useless and it breaks RDMA
mlx5 users.  To fix this, this patch provides an alternative way to
retrieve IRQ vector affinity using legacy IRQ API, following
smp_affinity read procfs implementation.

Fixes: 231243c82793 ("Revert mlx5: move affinity hints assignments to generic code")
Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code")
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1f509d072026..a0610427e168 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -36,6 +36,7 @@
 #include <linux/kernel.h>
 #include <linux/completion.h>
 #include <linux/pci.h>
+#include <linux/irq.h>
 #include <linux/spinlock_types.h>
 #include <linux/semaphore.h>
 #include <linux/slab.h>
@@ -1231,7 +1232,23 @@ enum {
 static inline const struct cpumask *
 mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector)
 {
-	return pci_irq_get_affinity(dev->pdev, MLX5_EQ_VEC_COMP_BASE + vector);
+	const struct cpumask *mask;
+	struct irq_desc *desc;
+	unsigned int irq;
+	int eqn;
+	int err;
+
+	err = mlx5_vector2eqn(dev, vector, &eqn, &irq);
+	if (err)
+		return NULL;
+
+	desc = irq_to_desc(irq);
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	mask = irq_data_get_effective_affinity_mask(&desc->irq_data);
+#else
+	mask = desc->irq_common_data.affinity;
+#endif
+	return mask;
 }
 
 #endif /* MLX5_DRIVER_H */
-- 
cgit v1.2.3


From b6908c296021a99ba2a83a4b4703eb9e6365e5dc Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@mellanox.com>
Date: Thu, 14 Dec 2017 19:23:50 +0200
Subject: net/mlx5: Fix memory leak in bad flow of mlx5_alloc_irq_vectors

Fix a memory leak where in case that pci_alloc_irq_vectors failed,
priv->irq_info was not released.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Alaa Hleihel <alaa@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 95e188d0883e..a4c82fa71aec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -319,6 +319,7 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 	struct mlx5_eq_table *table = &priv->eq_table;
 	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
 	int nvec;
+	int err;
 
 	nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
 	       MLX5_EQ_VEC_COMP_BASE;
@@ -328,21 +329,23 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 
 	priv->irq_info = kcalloc(nvec, sizeof(*priv->irq_info), GFP_KERNEL);
 	if (!priv->irq_info)
-		goto err_free_msix;
+		return -ENOMEM;
 
 	nvec = pci_alloc_irq_vectors(dev->pdev,
 			MLX5_EQ_VEC_COMP_BASE + 1, nvec,
 			PCI_IRQ_MSIX);
-	if (nvec < 0)
-		return nvec;
+	if (nvec < 0) {
+		err = nvec;
+		goto err_free_irq_info;
+	}
 
 	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
 
 	return 0;
 
-err_free_msix:
+err_free_irq_info:
 	kfree(priv->irq_info);
-	return -ENOMEM;
+	return err;
 }
 
 static void mlx5_free_irq_vectors(struct mlx5_core_dev *dev)
-- 
cgit v1.2.3


From 72f36be06138bdc11bdbe1f04e4a3e2637ea438d Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 20 Nov 2017 09:58:01 +0200
Subject: net/mlx5: Fix mlx5_get_uars_page to return error code

Change mlx5_get_uars_page to return ERR_PTR in case of
allocation failure. Change all callers accordingly to
check the IS_ERR(ptr) instead of NULL.

Fixes: 59211bd3b632 ("net/mlx5: Split the load/unload flow into hardware and software flows")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c              |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/uar.c  | 14 ++++++--------
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 00cb184fa027..262c1aa2e028 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4160,7 +4160,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 		goto err_cnt;
 
 	dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
-	if (!dev->mdev->priv.uar)
+	if (IS_ERR(dev->mdev->priv.uar))
 		goto err_cong;
 
 	err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a4c82fa71aec..6dffa58fb178 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1135,8 +1135,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	}
 
 	dev->priv.uar = mlx5_get_uars_page(dev);
-	if (!dev->priv.uar) {
+	if (IS_ERR(dev->priv.uar)) {
 		dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
+		err = PTR_ERR(dev->priv.uar);
 		goto err_disable_msix;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 222b25908d01..8b97066dd1f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -168,18 +168,16 @@ struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev)
 	struct mlx5_uars_page *ret;
 
 	mutex_lock(&mdev->priv.bfregs.reg_head.lock);
-	if (list_empty(&mdev->priv.bfregs.reg_head.list)) {
-		ret = alloc_uars_page(mdev, false);
-		if (IS_ERR(ret)) {
-			ret = NULL;
-			goto out;
-		}
-		list_add(&ret->list, &mdev->priv.bfregs.reg_head.list);
-	} else {
+	if (!list_empty(&mdev->priv.bfregs.reg_head.list)) {
 		ret = list_first_entry(&mdev->priv.bfregs.reg_head.list,
 				       struct mlx5_uars_page, list);
 		kref_get(&ret->ref_count);
+		goto out;
 	}
+	ret = alloc_uars_page(mdev, false);
+	if (IS_ERR(ret))
+		goto out;
+	list_add(&ret->list, &mdev->priv.bfregs.reg_head.list);
 out:
 	mutex_unlock(&mdev->priv.bfregs.reg_head.lock);
 
-- 
cgit v1.2.3


From 259bbc575c5322e0bc675c9a77e937250723c333 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Sun, 31 Dec 2017 11:31:34 +0200
Subject: net/mlx5: Fix error handling in load one

We didn't store the result of mlx5_init_once, due to that
mlx5_load_one returned success on error.  Fix that.

Fixes: 59211bd3b632 ("net/mlx5: Split the load/unload flow into hardware and software flows")
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6dffa58fb178..0f88fd30a09a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1123,9 +1123,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_stop_poll;
 	}
 
-	if (boot && mlx5_init_once(dev, priv)) {
-		dev_err(&pdev->dev, "sw objs init failed\n");
-		goto err_stop_poll;
+	if (boot) {
+		err = mlx5_init_once(dev, priv);
+		if (err) {
+			dev_err(&pdev->dev, "sw objs init failed\n");
+			goto err_stop_poll;
+		}
 	}
 
 	err = mlx5_alloc_irq_vectors(dev);
-- 
cgit v1.2.3


From e556f6dd47eda62cbb046fa92e03265245a1537f Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Tue, 26 Dec 2017 13:44:49 +0200
Subject: net/mlx5e: Keep updating ethtool statistics when the interface is
 down

ethtool statistics should be updated even when the interface is down
since it shows more than just netdev counters, which might change while
the logical link is down.
One useful use case, for example, is when running RoCE traffic over the
interface (while the logical link is down, but physical link is up) and
examining rx_prioX_bytes.

Fixes: f62b8bb8f2d3 ("net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 8f05efa5c829..ea5fff2c3143 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -207,8 +207,7 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv,
 		return;
 
 	mutex_lock(&priv->state_lock);
-	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
-		mlx5e_update_stats(priv, true);
+	mlx5e_update_stats(priv, true);
 	mutex_unlock(&priv->state_lock);
 
 	for (i = 0; i < mlx5e_num_stats_grps; i++)
-- 
cgit v1.2.3


From 97c8c3aa48ca8eb85d1806e08f882f90d78b1856 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 10 Oct 2017 16:51:44 +0300
Subject: net/mlx5e: Add error print in ETS init

ETS initialization might fail, add a print to indicate
such failures.

Fixes: 08fb1dacdd76 ("net/mlx5e: Support DCBNL IEEE ETS")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 9bcf38f4123b..a5c5134f5cb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -922,8 +922,9 @@ static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
 
 static void mlx5e_ets_init(struct mlx5e_priv *priv)
 {
-	int i;
 	struct ieee_ets ets;
+	int err;
+	int i;
 
 	if (!MLX5_CAP_GEN(priv->mdev, ets))
 		return;
@@ -940,7 +941,10 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv)
 	ets.prio_tc[0] = 1;
 	ets.prio_tc[1] = 0;
 
-	mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+	err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+	if (err)
+		netdev_err(priv->netdev,
+			   "%s, Failed to init ETS: %d\n", __func__, err);
 }
 
 enum {
-- 
cgit v1.2.3


From 4b7d4363f14a0398eca48c7e96e46120c5eb6a96 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 10 Oct 2017 16:54:30 +0300
Subject: net/mlx5e: Check support before TC swap in ETS init

Should not do the following swap between TCs 0 and 1
when max num of TCs is 1:
tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1)

Fixes: 08fb1dacdd76 ("net/mlx5e: Support DCBNL IEEE ETS")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index a5c5134f5cb2..3d46ef48d5b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -937,9 +937,11 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv)
 		ets.prio_tc[i] = i;
 	}
 
-	/* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
-	ets.prio_tc[0] = 1;
-	ets.prio_tc[1] = 0;
+	if (ets.ets_cap > 1) {
+		/* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
+		ets.prio_tc[0] = 1;
+		ets.prio_tc[1] = 0;
+	}
 
 	err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
 	if (err)
-- 
cgit v1.2.3


From 75b81ce719b79565eb0b39aa9954b6e11a5e73bf Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Wed, 10 Jan 2018 17:11:11 +0200
Subject: net/mlx5e: Don't override netdev features field unless in error flow

Set features function sets dev->features in order to keep track of which
features were successfully changed and which weren't (in case the user
asks for more than one change in a single command).

This breaks the logic in __netdev_update_features which assumes that
dev->features is not changed on success and checks for diffs between
features and dev->features (diffs that might not exist at this point
because of the driver override).

The solution is to keep track of successful/failed feature changes and
assign them to dev->features in case of failure only.

Fixes: 0e405443e803 ("net/mlx5e: Improve set features ndo resiliency")
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 43 +++++++++++++----------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d9d8227f195f..311d5ec8407c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3219,12 +3219,12 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr)
 	return 0;
 }
 
-#define MLX5E_SET_FEATURE(netdev, feature, enable)	\
+#define MLX5E_SET_FEATURE(features, feature, enable)	\
 	do {						\
 		if (enable)				\
-			netdev->features |= feature;	\
+			*features |= feature;		\
 		else					\
-			netdev->features &= ~feature;	\
+			*features &= ~feature;		\
 	} while (0)
 
 typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
@@ -3347,6 +3347,7 @@ static int set_feature_arfs(struct net_device *netdev, bool enable)
 #endif
 
 static int mlx5e_handle_feature(struct net_device *netdev,
+				netdev_features_t *features,
 				netdev_features_t wanted_features,
 				netdev_features_t feature,
 				mlx5e_feature_handler feature_handler)
@@ -3365,34 +3366,40 @@ static int mlx5e_handle_feature(struct net_device *netdev,
 		return err;
 	}
 
-	MLX5E_SET_FEATURE(netdev, feature, enable);
+	MLX5E_SET_FEATURE(features, feature, enable);
 	return 0;
 }
 
 static int mlx5e_set_features(struct net_device *netdev,
 			      netdev_features_t features)
 {
+	netdev_features_t oper_features = netdev->features;
 	int err;
 
-	err  = mlx5e_handle_feature(netdev, features, NETIF_F_LRO,
-				    set_feature_lro);
-	err |= mlx5e_handle_feature(netdev, features,
+	err  = mlx5e_handle_feature(netdev, &oper_features, features,
+				    NETIF_F_LRO, set_feature_lro);
+	err |= mlx5e_handle_feature(netdev, &oper_features, features,
 				    NETIF_F_HW_VLAN_CTAG_FILTER,
 				    set_feature_cvlan_filter);
-	err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC,
-				    set_feature_tc_num_filters);
-	err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL,
-				    set_feature_rx_all);
-	err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXFCS,
-				    set_feature_rx_fcs);
-	err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_RX,
-				    set_feature_rx_vlan);
+	err |= mlx5e_handle_feature(netdev, &oper_features, features,
+				    NETIF_F_HW_TC, set_feature_tc_num_filters);
+	err |= mlx5e_handle_feature(netdev, &oper_features, features,
+				    NETIF_F_RXALL, set_feature_rx_all);
+	err |= mlx5e_handle_feature(netdev, &oper_features, features,
+				    NETIF_F_RXFCS, set_feature_rx_fcs);
+	err |= mlx5e_handle_feature(netdev, &oper_features, features,
+				    NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
 #ifdef CONFIG_RFS_ACCEL
-	err |= mlx5e_handle_feature(netdev, features, NETIF_F_NTUPLE,
-				    set_feature_arfs);
+	err |= mlx5e_handle_feature(netdev, &oper_features, features,
+				    NETIF_F_NTUPLE, set_feature_arfs);
 #endif
 
-	return err ? -EINVAL : 0;
+	if (err) {
+		netdev->features = oper_features;
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
-- 
cgit v1.2.3


From afc98a0b46d8576a55f18092400cc518d03a79a1 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Wed, 3 Jan 2018 17:23:55 +0200
Subject: net/mlx5: Update ptp_clock_event foreach PPS event

PPS event did not update ptp_clock_event fields, therefore,
timestamp value was not updated correctly. This fix updates the
event source and the timestamp value for each PPS event.

Fixes: 7c39afb394c7 ("net/mlx5: PTP code migration to driver core section")
Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Reported-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index fa8aed62b231..5701f125e99c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -423,9 +423,13 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
 
 	switch (clock->ptp_info.pin_config[pin].func) {
 	case PTP_PF_EXTTS:
+		ptp_event.index = pin;
+		ptp_event.timestamp = timecounter_cyc2time(&clock->tc,
+					be64_to_cpu(eqe->data.pps.time_stamp));
 		if (clock->pps_info.enabled) {
 			ptp_event.type = PTP_CLOCK_PPSUSR;
-			ptp_event.pps_times.ts_real = ns_to_timespec64(eqe->data.pps.time_stamp);
+			ptp_event.pps_times.ts_real =
+					ns_to_timespec64(ptp_event.timestamp);
 		} else {
 			ptp_event.type = PTP_CLOCK_EXTTS;
 		}
-- 
cgit v1.2.3


From 237f258c42c905f71c694670fe4d9773d85c36ed Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Mon, 8 Jan 2018 10:01:04 +0200
Subject: net/mlx5e: Remove timestamp set from netdevice open flow

To avoid configuration override, timestamp set call will
be moved from the netdevice open flow to the init flow.
By this, a close-open procedure will not override the timestamp
configuration.
In addition, the change will rename mlx5e_timestamp_set function
to be mlx5e_timestamp_init.

Fixes: ef9814deafd0 ("net/mlx5e: Add HW timestamping (TS) support")
Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h          | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c     | 5 +++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c      | 2 ++
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 543060c305a0..c2d89bfa1a70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -895,7 +895,7 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
 			   u16 vid);
 void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
 void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
-void mlx5e_timestamp_set(struct mlx5e_priv *priv);
+void mlx5e_timestamp_init(struct mlx5e_priv *priv);
 
 struct mlx5e_redirect_rqt_param {
 	bool is_rss;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 311d5ec8407c..d8aefeed124d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2669,7 +2669,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 		netif_carrier_on(netdev);
 }
 
-void mlx5e_timestamp_set(struct mlx5e_priv *priv)
+void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 {
 	priv->tstamp.tx_type   = HWTSTAMP_TX_OFF;
 	priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
@@ -2690,7 +2690,6 @@ int mlx5e_open_locked(struct net_device *netdev)
 	mlx5e_activate_priv_channels(priv);
 	if (priv->profile->update_carrier)
 		priv->profile->update_carrier(priv);
-	mlx5e_timestamp_set(priv);
 
 	if (priv->profile->update_stats)
 		queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
@@ -4146,6 +4145,8 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
 	INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
 	INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+	mlx5e_timestamp_init(priv);
 }
 
 static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 2c43606c26b5..3409d86eb06b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -877,6 +877,8 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
 
 	mlx5e_build_rep_params(mdev, &priv->channels.params);
 	mlx5e_build_rep_netdev(netdev);
+
+	mlx5e_timestamp_init(priv);
 }
 
 static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 8812d7208e8f..ee2f378c5030 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -86,6 +86,8 @@ void mlx5i_init(struct mlx5_core_dev *mdev,
 	mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev));
 	mlx5i_build_nic_params(mdev, &priv->channels.params);
 
+	mlx5e_timestamp_init(priv);
+
 	/* netdev init */
 	netdev->hw_features    |= NETIF_F_SG;
 	netdev->hw_features    |= NETIF_F_IP_CSUM;
@@ -450,7 +452,6 @@ static int mlx5i_open(struct net_device *netdev)
 
 	mlx5e_refresh_tirs(epriv, false);
 	mlx5e_activate_priv_channels(epriv);
-	mlx5e_timestamp_set(epriv);
 
 	mutex_unlock(&epriv->state_lock);
 	return 0;
-- 
cgit v1.2.3


From a48a52b7bea81c046fe1c1288f84d0eba214cba0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 15 Jan 2018 09:12:05 +0100
Subject: cfg80211: fully initialize old channel for event

Paul reported that he got a report about undefined behaviour
that seems to me to originate in using uninitialized memory
when the channel structure here is used in the event code in
nl80211 later.

He never reported whether this fixed it, and I wasn't able
to trigger this so far, but we should do the right thing and
fully initialize the on-stack structure anyway.

Reported-by: Paul Menzel <pmenzel+linux-wireless@molgen.mpg.de>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 78e71b0390be..7b42f0bacfd8 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1769,8 +1769,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
 	if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS)
 		return;
 
-	chan_before.center_freq = chan->center_freq;
-	chan_before.flags = chan->flags;
+	chan_before = *chan;
 
 	if (chan->flags & IEEE80211_CHAN_NO_IR) {
 		chan->flags &= ~IEEE80211_CHAN_NO_IR;
-- 
cgit v1.2.3


From 7a94b8c2eee7083ddccd0515830f8c81a8e44b1a Mon Sep 17 00:00:00 2001
From: Dominik Brodowski <linux@dominikbrodowski.net>
Date: Mon, 15 Jan 2018 08:12:15 +0100
Subject: nl80211: take RCU read lock when calling ieee80211_bss_get_ie()

As ieee80211_bss_get_ie() derefences an RCU to return ssid_ie, both
the call to this function and any operation on this variable need
protection by the RCU read lock.

Fixes: 44905265bc15 ("nl80211: don't expose wdev->ssid for most interfaces")
Signed-off-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2b3dbcd40e46..ed87a97fcb0b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2618,12 +2618,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 		const u8 *ssid_ie;
 		if (!wdev->current_bss)
 			break;
+		rcu_read_lock();
 		ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub,
 					       WLAN_EID_SSID);
-		if (!ssid_ie)
-			break;
-		if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
-			goto nla_put_failure_locked;
+		if (ssid_ie &&
+		    nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
+			goto nla_put_failure_rcu_locked;
+		rcu_read_unlock();
 		break;
 		}
 	default:
@@ -2635,6 +2636,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 	genlmsg_end(msg, hdr);
 	return 0;
 
+ nla_put_failure_rcu_locked:
+	rcu_read_unlock();
  nla_put_failure_locked:
 	wdev_unlock(wdev);
  nla_put_failure:
-- 
cgit v1.2.3


From b71d856ab536f25eb97c011a351ecddf5518de41 Mon Sep 17 00:00:00 2001
From: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
Date: Wed, 10 Jan 2018 17:42:51 +0100
Subject: mac80211_hwsim: add workqueue to wait for deferred radio deletion on
 mod unload

When closing multiple wmediumd instances with many radios and try to
unload the  mac80211_hwsim module, it may happen that the work items live
longer than the module. To wait especially for this deletion work items,
add a work queue, otherwise flush_scheduled_work would be necessary.

Signed-off-by: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index e8189c07b41f..ccd573e53c92 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -489,6 +489,7 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = {
 
 static spinlock_t hwsim_radio_lock;
 static LIST_HEAD(hwsim_radios);
+static struct workqueue_struct *hwsim_wq;
 static int hwsim_radio_idx;
 
 static struct platform_driver mac80211_hwsim_driver = {
@@ -3342,7 +3343,7 @@ static void remove_user_radios(u32 portid)
 		if (entry->destroy_on_close && entry->portid == portid) {
 			list_del(&entry->list);
 			INIT_WORK(&entry->destroy_work, destroy_radio);
-			schedule_work(&entry->destroy_work);
+			queue_work(hwsim_wq, &entry->destroy_work);
 		}
 	}
 	spin_unlock_bh(&hwsim_radio_lock);
@@ -3417,7 +3418,7 @@ static void __net_exit hwsim_exit_net(struct net *net)
 
 		list_del(&data->list);
 		INIT_WORK(&data->destroy_work, destroy_radio);
-		schedule_work(&data->destroy_work);
+		queue_work(hwsim_wq, &data->destroy_work);
 	}
 	spin_unlock_bh(&hwsim_radio_lock);
 }
@@ -3449,6 +3450,10 @@ static int __init init_mac80211_hwsim(void)
 
 	spin_lock_init(&hwsim_radio_lock);
 
+	hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0);
+	if (!hwsim_wq)
+		return -ENOMEM;
+
 	err = register_pernet_device(&hwsim_net_ops);
 	if (err)
 		return err;
@@ -3587,8 +3592,11 @@ static void __exit exit_mac80211_hwsim(void)
 	hwsim_exit_netlink();
 
 	mac80211_hwsim_free();
+	flush_workqueue(hwsim_wq);
+
 	unregister_netdev(hwsim_mon);
 	platform_driver_unregister(&mac80211_hwsim_driver);
 	unregister_pernet_device(&hwsim_net_ops);
+	destroy_workqueue(hwsim_wq);
 }
 module_exit(exit_mac80211_hwsim);
-- 
cgit v1.2.3


From 51a1aaa631c90223888d8beac4d649dc11d2ca55 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 15 Jan 2018 09:32:36 +0100
Subject: mac80211_hwsim: validate number of different channels

When creating a new radio on the fly, hwsim allows this
to be done with an arbitrary number of channels, but
cfg80211 only supports a limited number of simultaneous
channels, leading to a warning.

Fix this by validating the number - this requires moving
the define for the maximum out to a visible header file.

Reported-by: syzbot+8dd9051ff19940290931@syzkaller.appspotmail.com
Fixes: b59ec8dd4394 ("mac80211_hwsim: fix number of channels in interface combinations")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 5 +++++
 include/net/cfg80211.h                | 2 ++
 net/wireless/core.h                   | 2 --
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index ccd573e53c92..f6d4a50f1bdb 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3121,6 +3121,11 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 	if (info->attrs[HWSIM_ATTR_CHANNELS])
 		param.channels = nla_get_u32(info->attrs[HWSIM_ATTR_CHANNELS]);
 
+	if (param.channels > CFG80211_MAX_NUM_DIFFERENT_CHANNELS) {
+		GENL_SET_ERR_MSG(info, "too many channels specified");
+		return -EINVAL;
+	}
+
 	if (info->attrs[HWSIM_ATTR_NO_VIF])
 		param.no_vif = true;
 
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index cb4d92b79cd9..fb94a8bd8ab5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -815,6 +815,8 @@ struct cfg80211_csa_settings {
 	u8 count;
 };
 
+#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
+
 /**
  * struct iface_combination_params - input parameters for interface combinations
  *
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d2f7e8b8a097..eaff636169c2 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -507,8 +507,6 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
 		       struct wireless_dev *wdev);
 
-#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
-
 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
 #define CFG80211_DEV_WARN_ON(cond)	WARN_ON(cond)
 #else
-- 
cgit v1.2.3


From 59b179b48ce2a6076448a44531242ac2b3f6cef2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 15 Jan 2018 09:58:27 +0100
Subject: cfg80211: check dev_set_name() return value

syzbot reported a warning from rfkill_alloc(), and after a while
I think that the reason is that it was doing fault injection and
the dev_set_name() failed, leaving the name NULL, and we didn't
check the return value and got to rfkill_alloc() with a NULL name.
Since we really don't want a NULL name, we ought to check the
return value.

Fixes: fb28ad35906a ("net: struct device - replace bus_id with dev_name(), dev_set_name()")
Reported-by: syzbot+1ddfb3357e1d7bb5b5d3@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index fdde0d98fde1..a6f3cac8c640 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -439,6 +439,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
 		if (rv)
 			goto use_default_name;
 	} else {
+		int rv;
+
 use_default_name:
 		/* NOTE:  This is *probably* safe w/out holding rtnl because of
 		 * the restrictions on phy names.  Probably this call could
@@ -446,7 +448,11 @@ use_default_name:
 		 * phyX.  But, might should add some locking and check return
 		 * value, and use a different name if this one exists?
 		 */
-		dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
+		rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
+		if (rv < 0) {
+			kfree(rdev);
+			return NULL;
+		}
 	}
 
 	INIT_LIST_HEAD(&rdev->wiphy.wdev_list);
-- 
cgit v1.2.3


From d542296a4d0d9f41d0186edcac2baba1b674d02f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Mon, 8 Jan 2018 08:23:18 -0800
Subject: 9p: add missing module license for xen transport

The 9P of Xen module is missing required license and module information.
See https://bugzilla.kernel.org/show_bug.cgi?id=198109

Reported-by: Alan Bartlett <ajb@elrepo.org>
Fixes: 868eb122739a ("xen/9pfs: introduce Xen 9pfs transport driver")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_xen.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 325c56043007..086a4abdfa7c 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -543,3 +543,7 @@ static void p9_trans_xen_exit(void)
 	return xenbus_unregister_driver(&xen_9pfs_front_driver);
 }
 module_exit(p9_trans_xen_exit);
+
+MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>");
+MODULE_DESCRIPTION("Xen Transport for 9P");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 66940f35d5a81d5969bb5543171c70a434fc5110 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 10 Jan 2018 16:03:05 +0200
Subject: ptr_ring: document usage around __ptr_ring_peek

This explains why is the net usage of __ptr_ring_peek
actually ok without locks.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6866df4f31b5..d72b2e7dd500 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -174,6 +174,15 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
  * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
  * If ring is never resized, and if the pointer is merely
  * tested, there's no need to take the lock - see e.g.  __ptr_ring_empty.
+ * However, if called outside the lock, and if some other CPU
+ * consumes ring entries at the same time, the value returned
+ * is not guaranteed to be correct.
+ * In this case - to avoid incorrectly detecting the ring
+ * as empty - the CPU consuming the ring entries is responsible
+ * for either consuming all ring entries until the ring is empty,
+ * or synchronizing with some other CPU and causing it to
+ * execute __ptr_ring_peek and/or consume the ring enteries
+ * after the synchronization point.
  */
 static inline void *__ptr_ring_peek(struct ptr_ring *r)
 {
@@ -182,10 +191,7 @@ static inline void *__ptr_ring_peek(struct ptr_ring *r)
 	return NULL;
 }
 
-/* Note: callers invoking this in a loop must use a compiler barrier,
- * for example cpu_relax(). Callers must take consumer_lock
- * if the ring is ever resized - see e.g. ptr_ring_empty.
- */
+/* See __ptr_ring_peek above for locking rules. */
 static inline bool __ptr_ring_empty(struct ptr_ring *r)
 {
 	return !__ptr_ring_peek(r);
-- 
cgit v1.2.3


From 0171c41835591e9aa2e384b703ef9a6ae367c610 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 10 Jan 2018 16:24:45 +0100
Subject: ppp: unlock all_ppp_mutex before registering device

ppp_dev_uninit(), which is the .ndo_uninit() handler of PPP devices,
needs to lock pn->all_ppp_mutex. Therefore we mustn't call
register_netdevice() with pn->all_ppp_mutex already locked, or we'd
deadlock in case register_netdevice() fails and calls .ndo_uninit().

Fortunately, we can unlock pn->all_ppp_mutex before calling
register_netdevice(). This lock protects pn->units_idr, which isn't
used in the device registration process.

However, keeping pn->all_ppp_mutex locked during device registration
did ensure that no device in transient state would be published in
pn->units_idr. In practice, unlocking it before calling
register_netdevice() doesn't change this property: ppp_unit_register()
is called with 'ppp_mutex' locked and all searches done in
pn->units_idr hold this lock too.

Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion")
Reported-and-tested-by: syzbot+367889b9c9e279219175@syzkaller.appspotmail.com
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/ppp_generic.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index d8e5747ff4e3..264d4af0bf69 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1006,17 +1006,18 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
 	if (!ifname_is_set)
 		snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index);
 
+	mutex_unlock(&pn->all_ppp_mutex);
+
 	ret = register_netdevice(ppp->dev);
 	if (ret < 0)
 		goto err_unit;
 
 	atomic_inc(&ppp_unit_count);
 
-	mutex_unlock(&pn->all_ppp_mutex);
-
 	return 0;
 
 err_unit:
+	mutex_lock(&pn->all_ppp_mutex);
 	unit_put(&pn->units_idr, ppp->file.index);
 err:
 	mutex_unlock(&pn->all_ppp_mutex);
-- 
cgit v1.2.3


From 6200b430220f3b9207861b16f57916950f4ecd8e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 10 Jan 2018 17:30:22 +0100
Subject: net: cs89x0: add MODULE_LICENSE

This driver lacks a MODULE_LICENSE tag, leading to a Kbuild warning:

WARNING: modpost: missing MODULE_LICENSE() in drivers/net/ethernet/cirrus/cs89x0.o

This adds license, author, and description according to the
comment block at the start of the file.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cirrus/cs89x0.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index 410a0a95130b..b3e7fafee3df 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1913,3 +1913,7 @@ static struct platform_driver cs89x0_driver = {
 module_platform_driver_probe(cs89x0_driver, cs89x0_platform_probe);
 
 #endif /* CONFIG_CS89x0_PLATFORM */
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Crystal Semiconductor (Now Cirrus Logic) CS89[02]0 network driver");
+MODULE_AUTHOR("Russell Nelson <nelson@crynwr.com>");
-- 
cgit v1.2.3


From 749439bfac6e1a2932c582e2699f91d329658196 Mon Sep 17 00:00:00 2001
From: Mike Maloney <maloney@google.com>
Date: Wed, 10 Jan 2018 12:45:10 -0500
Subject: ipv6: fix udpv6 sendmsg crash caused by too small MTU

The logic in __ip6_append_data() assumes that the MTU is at least large
enough for the headers.  A device's MTU may be adjusted after being
added while sendmsg() is processing data, resulting in
__ip6_append_data() seeing any MTU.  For an mtu smaller than the size of
the fragmentation header, the math results in a negative 'maxfraglen',
which causes problems when refragmenting any previous skb in the
skb_write_queue, leaving it possibly malformed.

Instead sendmsg returns EINVAL when the mtu is calculated to be less
than IPV6_MIN_MTU.

Found by syzkaller:
kernel BUG at ./include/linux/skbuff.h:2064!
invalid opcode: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 14216 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #2
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
task: ffff8801d0b68580 task.stack: ffff8801ac6b8000
RIP: 0010:__skb_pull include/linux/skbuff.h:2064 [inline]
RIP: 0010:__ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617
RSP: 0018:ffff8801ac6bf570 EFLAGS: 00010216
RAX: 0000000000010000 RBX: 0000000000000028 RCX: ffffc90003cce000
RDX: 00000000000001b8 RSI: ffffffff839df06f RDI: ffff8801d9478ca0
RBP: ffff8801ac6bf780 R08: ffff8801cc3f1dbc R09: 0000000000000000
R10: ffff8801ac6bf7a0 R11: 43cb4b7b1948a9e7 R12: ffff8801cc3f1dc8
R13: ffff8801cc3f1d40 R14: 0000000000001036 R15: dffffc0000000000
FS:  00007f43d740c700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f7834984000 CR3: 00000001d79b9000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 ip6_finish_skb include/net/ipv6.h:911 [inline]
 udp_v6_push_pending_frames+0x255/0x390 net/ipv6/udp.c:1093
 udpv6_sendmsg+0x280d/0x31a0 net/ipv6/udp.c:1363
 inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x352/0x5a0 net/socket.c:1750
 SyS_sendto+0x40/0x50 net/socket.c:1718
 entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x4512e9
RSP: 002b:00007f43d740bc08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 00000000007180a8 RCX: 00000000004512e9
RDX: 000000000000002e RSI: 0000000020d08000 RDI: 0000000000000005
RBP: 0000000000000086 R08: 00000000209c1000 R09: 000000000000001c
R10: 0000000000040800 R11: 0000000000000216 R12: 00000000004b9c69
R13: 00000000ffffffff R14: 0000000000000005 R15: 00000000202c2000
Code: 9e 01 fe e9 c5 e8 ff ff e8 7f 9e 01 fe e9 4a ea ff ff 48 89 f7 e8 52 9e 01 fe e9 aa eb ff ff e8 a8 b6 cf fd 0f 0b e8 a1 b6 cf fd <0f> 0b 49 8d 45 78 4d 8d 45 7c 48 89 85 78 fe ff ff 49 8d 85 ba
RIP: __skb_pull include/linux/skbuff.h:2064 [inline] RSP: ffff8801ac6bf570
RIP: __ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: ffff8801ac6bf570

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Mike Maloney <maloney@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 688ba5f7516b..8fe58a2d305c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1206,14 +1206,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	v6_cork->tclass = ipc6->tclass;
 	if (rt->dst.flags & DST_XFRM_TUNNEL)
 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
+		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
 	else
 		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
 	if (np->frag_size < mtu) {
 		if (np->frag_size)
 			mtu = np->frag_size;
 	}
+	if (mtu < IPV6_MIN_MTU)
+		return -EINVAL;
 	cork->base.fragsize = mtu;
 	if (dst_allfrag(rt->dst.path))
 		cork->base.flags |= IPCORK_ALLFRAG;
-- 
cgit v1.2.3


From 59b36613e85fb16ebf9feaf914570879cd5c2a21 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 10 Jan 2018 12:50:25 -0800
Subject: tipc: fix a memory leak in tipc_nl_node_get_link()

When tipc_node_find_by_name() fails, the nlmsg is not
freed.

While on it, switch to a goto label to properly
free it.

Fixes: be9c086715c ("tipc: narrow down exposure of struct tipc_node")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 507017fe0f1b..9036d8756e73 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1880,36 +1880,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
 
 	if (strcmp(name, tipc_bclink_name) == 0) {
 		err = tipc_nl_add_bc_link(net, &msg);
-		if (err) {
-			nlmsg_free(msg.skb);
-			return err;
-		}
+		if (err)
+			goto err_free;
 	} else {
 		int bearer_id;
 		struct tipc_node *node;
 		struct tipc_link *link;
 
 		node = tipc_node_find_by_name(net, name, &bearer_id);
-		if (!node)
-			return -EINVAL;
+		if (!node) {
+			err = -EINVAL;
+			goto err_free;
+		}
 
 		tipc_node_read_lock(node);
 		link = node->links[bearer_id].link;
 		if (!link) {
 			tipc_node_read_unlock(node);
-			nlmsg_free(msg.skb);
-			return -EINVAL;
+			err = -EINVAL;
+			goto err_free;
 		}
 
 		err = __tipc_nl_add_link(net, &msg, link, 0);
 		tipc_node_read_unlock(node);
-		if (err) {
-			nlmsg_free(msg.skb);
-			return err;
-		}
+		if (err)
+			goto err_free;
 	}
 
 	return genlmsg_reply(msg.skb, info);
+
+err_free:
+	nlmsg_free(msg.skb);
+	return err;
 }
 
 int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
-- 
cgit v1.2.3


From cbbdf8433a5f117b1a2119ea30fc651b61ef7570 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 10 Jan 2018 13:00:39 -0800
Subject: netlink: extack needs to be reset each time through loop

syzbot triggered the WARN_ON in netlink_ack testing the bad_attr value.
The problem is that netlink_rcv_skb loops over the skb repeatedly invoking
the callback and without resetting the extack leaving potentially stale
data. Initializing each time through avoids the WARN_ON.

Fixes: 2d4bc93368f5a ("netlink: extended ACK reporting")
Reported-by: syzbot+315fa6766d0f7c359327@syzkaller.appspotmail.com
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 79cc1bf36e4a..47ef2d8683d6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2384,7 +2384,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 						   struct nlmsghdr *,
 						   struct netlink_ext_ack *))
 {
-	struct netlink_ext_ack extack = {};
+	struct netlink_ext_ack extack;
 	struct nlmsghdr *nlh;
 	int err;
 
@@ -2405,6 +2405,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 		if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
 			goto ack;
 
+		memset(&extack, 0, sizeof(extack));
 		err = cb(skb, nlh, &extack);
 		if (err == -EINTR)
 			goto skip;
-- 
cgit v1.2.3


From 6503a30440962f1e1ccb8868816b4e18201218d4 Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Thu, 11 Jan 2018 18:36:26 +0900
Subject: net: ipv4: Make "ip route get" match iif lo rules again.

Commit 3765d35ed8b9 ("net: ipv4: Convert inet_rtm_getroute to rcu
versions of route lookup") broke "ip route get" in the presence
of rules that specify iif lo.

Host-originated traffic always has iif lo, because
ip_route_output_key_hash and ip6_route_output_flags set the flow
iif to LOOPBACK_IFINDEX. Thus, putting "iif lo" in an ip rule is a
convenient way to select only originated traffic and not forwarded
traffic.

inet_rtm_getroute used to match these rules correctly because
even though it sets the flow iif to 0, it called
ip_route_output_key which overwrites iif with LOOPBACK_IFINDEX.
But now that it calls ip_route_output_key_hash_rcu, the ifindex
will remain 0 and not match the iif lo in the rule. As a result,
"ip route get" will return ENETUNREACH.

Fixes: 3765d35ed8b9 ("net: ipv4: Convert inet_rtm_getroute to rcu versions of route lookup")
Tested: https://android.googlesource.com/kernel/tests/+/master/net/test/multinetwork_test.py passes again
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 43b69af242e1..4e153b23bcec 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2762,6 +2762,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		if (err == 0 && rt->dst.error)
 			err = -rt->dst.error;
 	} else {
+		fl4.flowi4_iif = LOOPBACK_IFINDEX;
 		rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
 		err = 0;
 		if (IS_ERR(rt))
-- 
cgit v1.2.3


From 37f47bc90c7481e7959703ad1defc4fc9f5d85e3 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Thu, 11 Jan 2018 14:22:06 -0200
Subject: sctp: avoid compiler warning on implicit fallthru

These fall-through are expected.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c     | 1 +
 net/sctp/outqueue.c | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 3b18085e3b10..5d4c15bf66d2 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -826,6 +826,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
 	case AF_INET:
 		if (!__ipv6_only_sock(sctp_opt2sk(sp)))
 			return 1;
+		/* fallthru */
 	default:
 		return 0;
 	}
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 7d67feeeffc1..c4ec99b20150 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -918,9 +918,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 			break;
 
 		case SCTP_CID_ABORT:
-			if (sctp_test_T_bit(chunk)) {
+			if (sctp_test_T_bit(chunk))
 				packet->vtag = asoc->c.my_vtag;
-			}
+			/* fallthru */
 		/* The following chunks are "response" chunks, i.e.
 		 * they are generated in response to something we
 		 * received.  If we are sending these, then we can
-- 
cgit v1.2.3


From 95ef498d977bf44ac094778fd448b98af158a3e6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 11 Jan 2018 22:31:18 -0800
Subject: ipv6: ip6_make_skb() needs to clear cork.base.dst

In my last patch, I missed fact that cork.base.dst was not initialized
in ip6_make_skb() :

If ip6_setup_cork() returns an error, we might attempt a dst_release()
on some random pointer.

Fixes: 862c03ee1deb ("ipv6: fix possible mem leaks in ipv6_make_skb()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8fe58a2d305c..4f7d8de56611 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1735,6 +1735,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
 	cork.base.flags = 0;
 	cork.base.addr = 0;
 	cork.base.opt = NULL;
+	cork.base.dst = NULL;
 	v6_cork.opt = NULL;
 	err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
 	if (err) {
-- 
cgit v1.2.3


From 30be8f8dba1bd2aff73e8447d59228471233a3d4 Mon Sep 17 00:00:00 2001
From: "r.hering@avm.de" <r.hering@avm.de>
Date: Fri, 12 Jan 2018 15:42:06 +0100
Subject: net/tls: Fix inverted error codes to avoid endless loop

sendfile() calls can hang endless with using Kernel TLS if a socket error occurs.
Socket error codes must be inverted by Kernel TLS before returning because
they are stored with positive sign. If returned non-inverted they are
interpreted as number of bytes sent, causing endless looping of the
splice mechanic behind sendfile().

Signed-off-by: Robert Hering <r.hering@avm.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 2 +-
 net/tls/tls_sw.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 936cfc5cab7d..9185e53a743c 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -170,7 +170,7 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 
 static inline void tls_err_abort(struct sock *sk)
 {
-	sk->sk_err = -EBADMSG;
+	sk->sk_err = EBADMSG;
 	sk->sk_error_report(sk);
 }
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 73d19210dd49..9773571b6a34 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -391,7 +391,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 
 	while (msg_data_left(msg)) {
 		if (sk->sk_err) {
-			ret = sk->sk_err;
+			ret = -sk->sk_err;
 			goto send_end;
 		}
 
@@ -544,7 +544,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 		size_t copy, required_size;
 
 		if (sk->sk_err) {
-			ret = sk->sk_err;
+			ret = -sk->sk_err;
 			goto sendpage_end;
 		}
 
-- 
cgit v1.2.3


From 95a332088ecb113c2e8753fa3f1df9b0dda9beec Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Fri, 12 Jan 2018 12:29:22 -0800
Subject: Revert "openvswitch: Add erspan tunnel support."

This reverts commit ceaa001a170e43608854d5290a48064f57b565ed.

The OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS attr should be designed
as a nested attribute to support all ERSPAN v1 and v2's fields.
The current attr is a be32 supporting only one field.  Thus, this
patch reverts it and later patch will redo it using nested attr.

Signed-off-by: William Tu <u9012063@gmail.com>
Cc: Jiri Benc <jbenc@redhat.com>
Cc: Pravin Shelar <pshelar@ovn.org>
Acked-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h |  1 -
 net/openvswitch/flow_netlink.c   | 51 +---------------------------------------
 2 files changed, 1 insertion(+), 51 deletions(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 4265d7f9e1f2..dcfab5e3b55c 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -363,7 +363,6 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_IPV6_SRC,		/* struct in6_addr src IPv6 address. */
 	OVS_TUNNEL_KEY_ATTR_IPV6_DST,		/* struct in6_addr dst IPv6 address. */
 	OVS_TUNNEL_KEY_ATTR_PAD,
-	OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,	/* be32 ERSPAN index. */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 624ea74353dd..f143908b651d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,7 +49,6 @@
 #include <net/mpls.h>
 #include <net/vxlan.h>
 #include <net/tun_proto.h>
-#include <net/erspan.h>
 
 #include "flow_netlink.h"
 
@@ -334,8 +333,7 @@ size_t ovs_tun_key_attr_size(void)
 		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
 		 */
 		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
-		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_DST */
-		+ nla_total_size(4);   /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
+		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 }
 
 static size_t ovs_nsh_key_attr_size(void)
@@ -402,7 +400,6 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
 						.next = ovs_vxlan_ext_key_lens },
 	[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
 	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
-	[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = sizeof(u32) },
 };
 
 static const struct ovs_len_tbl
@@ -634,33 +631,6 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
 	return 0;
 }
 
-static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
-				      struct sw_flow_match *match, bool is_mask,
-				      bool log)
-{
-	unsigned long opt_key_offset;
-	struct erspan_metadata opts;
-
-	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
-
-	memset(&opts, 0, sizeof(opts));
-	opts.index = nla_get_be32(attr);
-
-	/* Index has only 20-bit */
-	if (ntohl(opts.index) & ~INDEX_MASK) {
-		OVS_NLERR(log, "ERSPAN index number %x too large.",
-			  ntohl(opts.index));
-		return -EINVAL;
-	}
-
-	SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
-	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
-	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
-				  is_mask);
-
-	return 0;
-}
-
 static int ip_tun_from_nlattr(const struct nlattr *attr,
 			      struct sw_flow_match *match, bool is_mask,
 			      bool log)
@@ -768,19 +738,6 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
 			break;
 		case OVS_TUNNEL_KEY_ATTR_PAD:
 			break;
-		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
-			if (opts_type) {
-				OVS_NLERR(log, "Multiple metadata blocks provided");
-				return -EINVAL;
-			}
-
-			err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
-			if (err)
-				return err;
-
-			tun_flags |= TUNNEL_ERSPAN_OPT;
-			opts_type = type;
-			break;
 		default:
 			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
 				  type);
@@ -905,10 +862,6 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
 		else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
 			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
 			return -EMSGSIZE;
-		else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
-			 nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
-				      ((struct erspan_metadata *)tun_opts)->index))
-			return -EMSGSIZE;
 	}
 
 	return 0;
@@ -2533,8 +2486,6 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 			break;
 		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
 			break;
-		case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
-			break;
 		}
 	};
 
-- 
cgit v1.2.3


From 17d0fb0caa68f2bfd8aaa8125ff15abebfbfa1d7 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 13 Jan 2018 20:22:01 +0300
Subject: sh_eth: fix dumping ARSTR

ARSTR  is always located at the start of the TSU register region, thus
using add_reg()  instead of add_tsu_reg() in __sh_eth_get_regs() to dump it
causes EDMR or EDSR (depending on the register layout) to be dumped instead
of ARSTR.  Use the correct condition/macro there...

Fixes: 6b4b4fead342 ("sh_eth: Implement ethtool register dump operations")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index b9e2846589f8..53924a4fc31c 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2089,8 +2089,8 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
 		add_reg(CSMR);
 	if (cd->select_mii)
 		add_reg(RMII_MII);
-	add_reg(ARSTR);
 	if (cd->tsu) {
+		add_tsu_reg(ARSTR);
 		add_tsu_reg(TSU_CTRST);
 		add_tsu_reg(TSU_FWEN0);
 		add_tsu_reg(TSU_FWEN1);
-- 
cgit v1.2.3


From 096b9854c04df86f03b38a97d40b6506e5730919 Mon Sep 17 00:00:00 2001
From: Jim Westfall <jwestfall@surrealistic.net>
Date: Sun, 14 Jan 2018 04:18:50 -0800
Subject: net: Allow neigh contructor functions ability to modify the
 primary_key

Use n->primary_key instead of pkey to account for the possibility that a neigh
constructor function may have modified the primary_key value.

Signed-off-by: Jim Westfall <jwestfall@surrealistic.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d1f5fe986edd..7f831711b6e0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -532,7 +532,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 	if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
 		nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
 
-	hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+	hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 
 	if (n->parms->dead) {
 		rc = ERR_PTR(-EINVAL);
@@ -544,7 +544,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 	     n1 != NULL;
 	     n1 = rcu_dereference_protected(n1->next,
 			lockdep_is_held(&tbl->lock))) {
-		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+		if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
 			if (want_ref)
 				neigh_hold(n1);
 			rc = n1;
-- 
cgit v1.2.3


From cd9ff4de0107c65d69d02253bb25d6db93c3dbc1 Mon Sep 17 00:00:00 2001
From: Jim Westfall <jwestfall@surrealistic.net>
Date: Sun, 14 Jan 2018 04:18:51 -0800
Subject: ipv4: Make neigh lookup keys for loopback/point-to-point devices be
 INADDR_ANY

Map all lookup neigh keys to INADDR_ANY for loopback/point-to-point devices
to avoid making an entry for every remote ip the device needs to talk to.

This used the be the old behavior but became broken in a263b3093641f
(ipv4: Make neigh lookups directly in output packet path) and later removed
in 0bb4087cbec0 (ipv4: Fix neigh lookup keying over loopback/point-to-point
devices) because it was broken.

Signed-off-by: Jim Westfall <jwestfall@surrealistic.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/arp.h | 3 +++
 net/ipv4/arp.c    | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/net/arp.h b/include/net/arp.h
index dc8cd47f883b..977aabfcdc03 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -20,6 +20,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32
 
 static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
 {
+	if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+		key = INADDR_ANY;
+
 	return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev);
 }
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a8d7c5a9fb05..6c231b43974d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
 
 static int arp_constructor(struct neighbour *neigh)
 {
-	__be32 addr = *(__be32 *)neigh->primary_key;
+	__be32 addr;
 	struct net_device *dev = neigh->dev;
 	struct in_device *in_dev;
 	struct neigh_parms *parms;
+	u32 inaddr_any = INADDR_ANY;
 
+	if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+		memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
+
+	addr = *(__be32 *)neigh->primary_key;
 	rcu_read_lock();
 	in_dev = __in_dev_get_rcu(dev);
 	if (!in_dev) {
-- 
cgit v1.2.3


From 6311b7ce42e0c1d6d944bc099dc47e936c20cf11 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 15 Jan 2018 12:42:25 +0100
Subject: netlink: extack: avoid parenthesized string constant warning

NL_SET_ERR_MSG() and NL_SET_ERR_MSG_ATTR() lead to the following warning
in newer versions of gcc:
  warning: array initialized from parenthesized string constant

Just remove the parentheses, they're not needed in this context since
anyway since there can be no operator precendence issues or similar.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 49b4257ce1ea..f3075d6c7e82 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -85,7 +85,7 @@ struct netlink_ext_ack {
  * to the lack of an output buffer.)
  */
 #define NL_SET_ERR_MSG(extack, msg) do {		\
-	static const char __msg[] = (msg);		\
+	static const char __msg[] = msg;		\
 	struct netlink_ext_ack *__extack = (extack);	\
 							\
 	if (__extack)					\
@@ -101,7 +101,7 @@ struct netlink_ext_ack {
 } while (0)
 
 #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do {	\
-	static const char __msg[] = (msg);		\
+	static const char __msg[] = msg;		\
 	struct netlink_ext_ack *__extack = (extack);	\
 							\
 	if (__extack) {					\
-- 
cgit v1.2.3


From 3d1661304f0b2b51a8a43785b764822611dbdd53 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Wed, 10 Jan 2018 19:39:52 -0600
Subject: ibmvnic: Fix pending MAC address changes

Due to architecture limitations, the IBM VNIC client driver is unable
to perform MAC address changes unless the device has "logged in" to
its backing device. Currently, pending MAC changes are handled before
login, resulting in an error and failure to change the MAC address.
Moving that chunk to the end of the ibmvnic_login function, when we are
sure that it was successful, fixes that.

The MAC address can be changed when the device is up or down, so
only check if the device is in a "PROBED" state before setting the
MAC address.

Fixes: c26eba03e407 ("ibmvnic: Update reset infrastructure to support tunable parameters")
Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Reviewed-by: John Allen <jallen@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 1dc4aef37d3a..4b3df17c7a45 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -756,6 +756,12 @@ static int ibmvnic_login(struct net_device *netdev)
 		}
 	} while (adapter->renegotiate);
 
+	/* handle pending MAC address changes after successful login */
+	if (adapter->mac_change_pending) {
+		__ibmvnic_set_mac(netdev, &adapter->desired.mac);
+		adapter->mac_change_pending = false;
+	}
+
 	return 0;
 }
 
@@ -993,11 +999,6 @@ static int ibmvnic_open(struct net_device *netdev)
 
 	mutex_lock(&adapter->reset_lock);
 
-	if (adapter->mac_change_pending) {
-		__ibmvnic_set_mac(netdev, &adapter->desired.mac);
-		adapter->mac_change_pending = false;
-	}
-
 	if (adapter->state != VNIC_CLOSED) {
 		rc = ibmvnic_login(netdev);
 		if (rc) {
@@ -1527,7 +1528,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	struct sockaddr *addr = p;
 
-	if (adapter->state != VNIC_OPEN) {
+	if (adapter->state == VNIC_PROBED) {
 		memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr));
 		adapter->mac_change_pending = true;
 		return 0;
-- 
cgit v1.2.3


From 625637bf4afa45204bd87e4218645182a919485a Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 15 Jan 2018 17:01:19 +0800
Subject: sctp: reinit stream if stream outcnt has been change by sinit in
 sendmsg

After introducing sctp_stream structure, sctp uses stream->outcnt as the
out stream nums instead of c.sinit_num_ostreams.

However when users use sinit in cmsg, it only updates c.sinit_num_ostreams
in sctp_sendmsg. At that moment, stream->outcnt is still using previous
value. If it's value is not updated, the sinit_num_ostreams of sinit could
not really work.

This patch is to fix it by updating stream->outcnt and reiniting stream
if stream outcnt has been change by sinit in sendmsg.

Fixes: a83863174a61 ("sctp: prepare asoc stream for stream reconf")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9b01e994f661..15ae018b386f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1883,8 +1883,14 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		 */
 		if (sinit) {
 			if (sinit->sinit_num_ostreams) {
-				asoc->c.sinit_num_ostreams =
-					sinit->sinit_num_ostreams;
+				__u16 outcnt = sinit->sinit_num_ostreams;
+
+				asoc->c.sinit_num_ostreams = outcnt;
+				/* outcnt has been changed, so re-init stream */
+				err = sctp_stream_init(&asoc->stream, outcnt, 0,
+						       GFP_KERNEL);
+				if (err)
+					goto out_free;
 			}
 			if (sinit->sinit_max_instreams) {
 				asoc->c.sinit_max_instreams =
-- 
cgit v1.2.3


From a0ff660058b88d12625a783ce9e5c1371c87951f Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 15 Jan 2018 17:01:36 +0800
Subject: sctp: return error if the asoc has been peeled off in
 sctp_wait_for_sndbuf

After commit cea0cc80a677 ("sctp: use the right sk after waking up from
wait_buf sleep"), it may change to lock another sk if the asoc has been
peeled off in sctp_wait_for_sndbuf.

However, the asoc's new sk could be already closed elsewhere, as it's in
the sendmsg context of the old sk that can't avoid the new sk's closing.
If the sk's last one refcnt is held by this asoc, later on after putting
this asoc, the new sk will be freed, while under it's own lock.

This patch is to revert that commit, but fix the old issue by returning
error under the old sk's lock.

Fixes: cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep")
Reported-by: syzbot+ac6ea7baa4432811eb50@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 15ae018b386f..feb2ca69827a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -85,7 +85,7 @@
 static int sctp_writeable(struct sock *sk);
 static void sctp_wfree(struct sk_buff *skb);
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
-				size_t msg_len, struct sock **orig_sk);
+				size_t msg_len);
 static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
 static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
 static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -1977,7 +1977,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	if (!sctp_wspace(asoc)) {
 		/* sk can be changed by peel off when waiting for buf. */
-		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
 		if (err) {
 			if (err == -ESRCH) {
 				/* asoc is already dead. */
@@ -8022,12 +8022,12 @@ void sctp_sock_rfree(struct sk_buff *skb)
 
 /* Helper function to wait for space in the sndbuf.  */
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
-				size_t msg_len, struct sock **orig_sk)
+				size_t msg_len)
 {
 	struct sock *sk = asoc->base.sk;
-	int err = 0;
 	long current_timeo = *timeo_p;
 	DEFINE_WAIT(wait);
+	int err = 0;
 
 	pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
 		 *timeo_p, msg_len);
@@ -8056,17 +8056,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 		release_sock(sk);
 		current_timeo = schedule_timeout(current_timeo);
 		lock_sock(sk);
-		if (sk != asoc->base.sk) {
-			release_sock(sk);
-			sk = asoc->base.sk;
-			lock_sock(sk);
-		}
+		if (sk != asoc->base.sk)
+			goto do_error;
 
 		*timeo_p = current_timeo;
 	}
 
 out:
-	*orig_sk = sk;
 	finish_wait(&asoc->wait, &wait);
 
 	/* Release the association's refcnt.  */
-- 
cgit v1.2.3


From c5006b8aa74599ce19104b31d322d2ea9ff887cc Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 15 Jan 2018 17:02:00 +0800
Subject: sctp: do not allow the v4 socket to bind a v4mapped v6 address

The check in sctp_sockaddr_af is not robust enough to forbid binding a
v4mapped v6 addr on a v4 socket.

The worse thing is that v4 socket's bind_verify would not convert this
v4mapped v6 addr to a v4 addr. syzbot even reported a crash as the v4
socket bound a v6 addr.

This patch is to fix it by doing the common sa.sa_family check first,
then AF_INET check for v4mapped v6 addrs.

Fixes: 7dab83de50c7 ("sctp: Support ipv6only AF_INET6 sockets.")
Reported-by: syzbot+7b7b518b1228d2743963@syzkaller.appspotmail.com
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index feb2ca69827a..039fcb618c34 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -335,16 +335,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
 	if (len < sizeof (struct sockaddr))
 		return NULL;
 
+	if (!opt->pf->af_supported(addr->sa.sa_family, opt))
+		return NULL;
+
 	/* V4 mapped address are really of AF_INET family */
 	if (addr->sa.sa_family == AF_INET6 &&
-	    ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
-		if (!opt->pf->af_supported(AF_INET, opt))
-			return NULL;
-	} else {
-		/* Does this PF support this AF? */
-		if (!opt->pf->af_supported(addr->sa.sa_family, opt))
-			return NULL;
-	}
+	    ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
+	    !opt->pf->af_supported(AF_INET, opt))
+		return NULL;
 
 	/* If we get this far, af is valid. */
 	af = sctp_get_af_specific(addr->sa.sa_family);
-- 
cgit v1.2.3


From a5b1379afbfabf91e3a689e82ac619a7157336b3 Mon Sep 17 00:00:00 2001
From: Yuiko Oshino <yuiko.oshino@microchip.com>
Date: Mon, 15 Jan 2018 13:24:28 -0500
Subject: lan78xx: Fix failure in USB Full Speed

Fix initialize the uninitialized tx_qlen to an appropriate value when USB
Full Speed is used.

Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver")
Signed-off-by: Yuiko Oshino <yuiko.oshino@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 94c7804903c4..ec56ff29aac4 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2396,6 +2396,7 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 		buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE;
 		dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE;
 		dev->rx_qlen = 4;
+		dev->tx_qlen = 4;
 	}
 
 	ret = lan78xx_write_reg(dev, BURST_CAP, buf);
-- 
cgit v1.2.3


From 0d9c9f0f40ca262b67fc06a702b85f3976f5e1a1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 15 Jan 2018 11:47:53 -0800
Subject: nfp: use the correct index for link speed table

sts variable is holding link speed as well as state.  We should
be using ls to index into ls_to_ethtool.

Fixes: 265aeb511bd5 ("nfp: add support for .get_link_ksettings()")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 2801ecd09eab..6c02b2d6ba06 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -333,7 +333,7 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
 	    ls >= ARRAY_SIZE(ls_to_ethtool))
 		return 0;
 
-	cmd->base.speed = ls_to_ethtool[sts];
+	cmd->base.speed = ls_to_ethtool[ls];
 	cmd->base.duplex = DUPLEX_FULL;
 
 	return 0;
-- 
cgit v1.2.3


From 70eeff66c4696cee4076d6388b6bede5bd7ff71c Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Mon, 15 Jan 2018 12:24:49 -0800
Subject: qed: Fix potential use-after-free in qed_spq_post()

We need to check if p_ent->comp_mode is QED_SPQ_MODE_EBLOCK before
calling qed_spq_add_entry().  The test is fine is the mode is EBLOCK,
but if it isn't then qed_spq_add_entry() might kfree(p_ent).

Signed-off-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_spq.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index be48d9abd001..3588081b2e27 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -776,6 +776,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 	int rc = 0;
 	struct qed_spq *p_spq = p_hwfn ? p_hwfn->p_spq : NULL;
 	bool b_ret_ent = true;
+	bool eblock;
 
 	if (!p_hwfn)
 		return -EINVAL;
@@ -794,6 +795,11 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 	if (rc)
 		goto spq_post_fail;
 
+	/* Check if entry is in block mode before qed_spq_add_entry,
+	 * which might kfree p_ent.
+	 */
+	eblock = (p_ent->comp_mode == QED_SPQ_MODE_EBLOCK);
+
 	/* Add the request to the pending queue */
 	rc = qed_spq_add_entry(p_hwfn, p_ent, p_ent->priority);
 	if (rc)
@@ -811,7 +817,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 
 	spin_unlock_bh(&p_spq->lock);
 
-	if (p_ent->comp_mode == QED_SPQ_MODE_EBLOCK) {
+	if (eblock) {
 		/* For entries in QED BLOCK mode, the completion code cannot
 		 * perform the necessary cleanup - if it did, we couldn't
 		 * access p_ent here to see whether it's successful or not.
-- 
cgit v1.2.3


From 81d947e2b8dd2394586c3eaffdd2357797d3bf59 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 15 Jan 2018 23:12:09 +0100
Subject: net, sched: fix panic when updating miniq {b,q}stats

While working on fixing another bug, I ran into the following panic
on arm64 by simply attaching clsact qdisc, adding a filter and running
traffic on ingress to it:

  [...]
  [  178.188591] Unable to handle kernel read from unreadable memory at virtual address 810fb501f000
  [  178.197314] Mem abort info:
  [  178.200121]   ESR = 0x96000004
  [  178.203168]   Exception class = DABT (current EL), IL = 32 bits
  [  178.209095]   SET = 0, FnV = 0
  [  178.212157]   EA = 0, S1PTW = 0
  [  178.215288] Data abort info:
  [  178.218175]   ISV = 0, ISS = 0x00000004
  [  178.222019]   CM = 0, WnR = 0
  [  178.224997] user pgtable: 4k pages, 48-bit VAs, pgd = 0000000023cb3f33
  [  178.231531] [0000810fb501f000] *pgd=0000000000000000
  [  178.236508] Internal error: Oops: 96000004 [#1] SMP
  [...]
  [  178.311855] CPU: 73 PID: 2497 Comm: ping Tainted: G        W        4.15.0-rc7+ #5
  [  178.319413] Hardware name: FOXCONN R2-1221R-A4/C2U4N_MB, BIOS G31FB18A 03/31/2017
  [  178.326887] pstate: 60400005 (nZCv daif +PAN -UAO)
  [  178.331685] pc : __netif_receive_skb_core+0x49c/0xac8
  [  178.336728] lr : __netif_receive_skb+0x28/0x78
  [  178.341161] sp : ffff00002344b750
  [  178.344465] x29: ffff00002344b750 x28: ffff810fbdfd0580
  [  178.349769] x27: 0000000000000000 x26: ffff000009378000
  [...]
  [  178.418715] x1 : 0000000000000054 x0 : 0000000000000000
  [  178.424020] Process ping (pid: 2497, stack limit = 0x000000009f0a3ff4)
  [  178.430537] Call trace:
  [  178.432976]  __netif_receive_skb_core+0x49c/0xac8
  [  178.437670]  __netif_receive_skb+0x28/0x78
  [  178.441757]  process_backlog+0x9c/0x160
  [  178.445584]  net_rx_action+0x2f8/0x3f0
  [...]

Reason is that sch_ingress and sch_clsact are doing mini_qdisc_pair_init()
which sets up miniq pointers to cpu_{b,q}stats from the underlying qdisc.
Problem is that this cannot work since they are actually set up right after
the qdisc ->init() callback in qdisc_create(), so first packet going into
sch_handle_ingress() tries to call mini_qdisc_bstats_cpu_update() and we
therefore panic.

In order to fix this, allocation of {b,q}stats needs to happen before we
call into ->init(). In net-next, there's already such option through commit
d59f5ffa59d8 ("net: sched: a dflt qdisc may be used with per cpu stats").
However, the bug needs to be fixed in net still for 4.15. Thus, include
these bits to reduce any merge churn and reuse the static_flags field to
set TCQ_F_CPUSTATS, and remove the allocation from qdisc_create() since
there is no other user left. Prashant Bhole ran into the same issue but
for net-next, thus adding him below as well as co-author. Same issue was
also reported by Sandipan Das when using bcc.

Fixes: 46209401f8f6 ("net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath")
Reference: https://lists.iovisor.org/pipermail/iovisor-dev/2018-January/001190.html
Reported-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
Co-authored-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Co-authored-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  2 ++
 net/sched/sch_api.c       | 15 +--------------
 net/sched/sch_generic.c   | 18 +++++++++++++++++-
 net/sched/sch_ingress.c   | 19 ++++---------------
 4 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 83a3e47d5845..becf86aa4ac6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -179,6 +179,7 @@ struct Qdisc_ops {
 	const struct Qdisc_class_ops	*cl_ops;
 	char			id[IFNAMSIZ];
 	int			priv_size;
+	unsigned int		static_flags;
 
 	int 			(*enqueue)(struct sk_buff *skb,
 					   struct Qdisc *sch,
@@ -444,6 +445,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
 			       unsigned int len);
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  const struct Qdisc_ops *ops);
+void qdisc_free(struct Qdisc *qdisc);
 struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 				const struct Qdisc_ops *ops, u32 parentid);
 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0f1eab99ff4e..52529b7f8d96 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1063,17 +1063,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 	}
 
 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
-		if (qdisc_is_percpu_stats(sch)) {
-			sch->cpu_bstats =
-				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
-			if (!sch->cpu_bstats)
-				goto err_out4;
-
-			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
-			if (!sch->cpu_qstats)
-				goto err_out4;
-		}
-
 		if (tca[TCA_STAB]) {
 			stab = qdisc_get_stab(tca[TCA_STAB]);
 			if (IS_ERR(stab)) {
@@ -1115,7 +1104,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 		ops->destroy(sch);
 err_out3:
 	dev_put(dev);
-	kfree((char *) sch - sch->padded);
+	qdisc_free(sch);
 err_out2:
 	module_put(ops->owner);
 err_out:
@@ -1123,8 +1112,6 @@ err_out:
 	return NULL;
 
 err_out4:
-	free_percpu(sch->cpu_bstats);
-	free_percpu(sch->cpu_qstats);
 	/*
 	 * Any broken qdiscs that would require a ops->reset() here?
 	 * The qdisc was never in action so it shouldn't be necessary.
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 661c7144b53a..cac003fddf3e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -633,6 +633,19 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	qdisc_skb_head_init(&sch->q);
 	spin_lock_init(&sch->q.lock);
 
+	if (ops->static_flags & TCQ_F_CPUSTATS) {
+		sch->cpu_bstats =
+			netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+		if (!sch->cpu_bstats)
+			goto errout1;
+
+		sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+		if (!sch->cpu_qstats) {
+			free_percpu(sch->cpu_bstats);
+			goto errout1;
+		}
+	}
+
 	spin_lock_init(&sch->busylock);
 	lockdep_set_class(&sch->busylock,
 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
@@ -642,6 +655,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  dev->qdisc_running_key ?: &qdisc_running_key);
 
 	sch->ops = ops;
+	sch->flags = ops->static_flags;
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
 	sch->dev_queue = dev_queue;
@@ -649,6 +663,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	refcount_set(&sch->refcnt, 1);
 
 	return sch;
+errout1:
+	kfree(p);
 errout:
 	return ERR_PTR(err);
 }
@@ -698,7 +714,7 @@ void qdisc_reset(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_reset);
 
-static void qdisc_free(struct Qdisc *qdisc)
+void qdisc_free(struct Qdisc *qdisc)
 {
 	if (qdisc_is_percpu_stats(qdisc)) {
 		free_percpu(qdisc->cpu_bstats);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index fc1286f499c1..003e1b063447 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -66,7 +66,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct ingress_sched_data *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
-	int err;
 
 	net_inc_ingress_queue();
 
@@ -76,13 +75,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 	q->block_info.chain_head_change = clsact_chain_head_change;
 	q->block_info.chain_head_change_priv = &q->miniqp;
 
-	err = tcf_block_get_ext(&q->block, sch, &q->block_info);
-	if (err)
-		return err;
-
-	sch->flags |= TCQ_F_CPUSTATS;
-
-	return 0;
+	return tcf_block_get_ext(&q->block, sch, &q->block_info);
 }
 
 static void ingress_destroy(struct Qdisc *sch)
@@ -121,6 +114,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
 	.cl_ops		=	&ingress_class_ops,
 	.id		=	"ingress",
 	.priv_size	=	sizeof(struct ingress_sched_data),
+	.static_flags	=	TCQ_F_CPUSTATS,
 	.init		=	ingress_init,
 	.destroy	=	ingress_destroy,
 	.dump		=	ingress_dump,
@@ -192,13 +186,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
 	q->egress_block_info.chain_head_change = clsact_chain_head_change;
 	q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
 
-	err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
-	if (err)
-		return err;
-
-	sch->flags |= TCQ_F_CPUSTATS;
-
-	return 0;
+	return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
 }
 
 static void clsact_destroy(struct Qdisc *sch)
@@ -225,6 +213,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
 	.cl_ops		=	&clsact_class_ops,
 	.id		=	"clsact",
 	.priv_size	=	sizeof(struct clsact_sched_data),
+	.static_flags	=	TCQ_F_CPUSTATS,
 	.init		=	clsact_init,
 	.destroy	=	clsact_destroy,
 	.dump		=	ingress_dump,
-- 
cgit v1.2.3