From 892c141e62982272b9c738b5520ad0e5e1ad7b42 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:08:56 -0700 Subject: [MLSXFRM]: Add security sid to sock This adds security for IP sockets at the sock level. Security at the sock level is needed to enforce the SELinux security policy for security associations even when a sock is orphaned (such as in the TCP LAST_ACK state). This will also be used to enforce SELinux controls over data arriving at or leaving a child socket while it's still waiting to be accepted. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/net/sock.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 324b3ea233d..91cdceb3c02 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -972,6 +972,19 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } +static inline void sock_copy(struct sock *nsk, const struct sock *osk) +{ +#ifdef CONFIG_SECURITY_NETWORK + void *sptr = nsk->sk_security; +#endif + + memcpy(nsk, osk, osk->sk_prot->obj_size); +#ifdef CONFIG_SECURITY_NETWORK + nsk->sk_security = sptr; + security_sk_clone(osk, nsk); +#endif +} + extern int sock_i_uid(struct sock *sk); extern unsigned long sock_i_ino(struct sock *sk); -- cgit v1.2.3-70-g09d2 From 4237c75c0a35535d7f9f2bfeeb4b4df1e068a0bf Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:50 -0700 Subject: [MLSXFRM]: Auto-labeling of child sockets This automatically labels the TCP, Unix stream, and dccp child sockets as well as openreqs to be at the same MLS level as the peer. This will result in the selection of appropriately labeled IPSec Security Associations. This also uses the sock's sid (as opposed to the isec sid) in SELinux enforcement of secmark in rcv_skb and postroute_last hooks. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 55 ++++++++++++++++ include/net/request_sock.h | 1 + include/net/sock.h | 1 + net/dccp/ipv4.c | 3 + net/dccp/ipv6.c | 7 +- net/ipv4/inet_connection_sock.c | 4 +- net/ipv4/syncookies.c | 6 +- net/ipv4/tcp_ipv4.c | 3 + net/ipv6/tcp_ipv6.c | 6 +- security/dummy.c | 24 +++++++ security/selinux/hooks.c | 137 +++++++++++++++++++++++++++------------- security/selinux/xfrm.c | 1 - 12 files changed, 197 insertions(+), 51 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/linux/security.h b/include/linux/security.h index 8e3dc6c51a6..bb4c80fdfe7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -90,6 +90,7 @@ extern int cap_netlink_recv(struct sk_buff *skb, int cap); struct nfsctl_arg; struct sched_param; struct swap_info_struct; +struct request_sock; /* bprm_apply_creds unsafe reasons */ #define LSM_UNSAFE_SHARE 1 @@ -819,6 +820,14 @@ struct swap_info_struct; * @sk_getsecid: * Retrieve the LSM-specific secid for the sock to enable caching of network * authorizations. + * @sock_graft: + * Sets the socket's isec sid to the sock's sid. + * @inet_conn_request: + * Sets the openreq's sid to socket's sid with MLS portion taken from peer sid. + * @inet_csk_clone: + * Sets the new child socket's sid to the openreq sid. + * @req_classify_flow: + * Sets the flow's sid to the openreq sid. * * Security hooks for XFRM operations. * @@ -1358,6 +1367,11 @@ struct security_operations { void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); void (*sk_getsecid) (struct sock *sk, u32 *secid); + void (*sock_graft)(struct sock* sk, struct socket *parent); + int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); + void (*inet_csk_clone)(struct sock *newsk, const struct request_sock *req); + void (*req_classify_flow)(const struct request_sock *req, struct flowi *fl); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2926,6 +2940,28 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { security_ops->sk_getsecid(sk, &fl->secid); } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ + security_ops->req_classify_flow(req, fl); +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ + security_ops->sock_graft(sk, parent); +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return security_ops->inet_conn_request(sk, skb, req); +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ + security_ops->inet_csk_clone(newsk, req); +} #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, struct socket * other, @@ -3055,6 +3091,25 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return 0; +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/include/net/request_sock.h b/include/net/request_sock.h index c5d7f920c35..8e165ca16bd 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -53,6 +53,7 @@ struct request_sock { unsigned long expires; struct request_sock_ops *rsk_ops; struct sock *sk; + u32 secid; }; static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops) diff --git a/include/net/sock.h b/include/net/sock.h index 91cdceb3c02..337ebec84c7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -969,6 +969,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_sleep = &parent->wait; parent->sk = sk; sk->sk_socket = parent; + security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 386498053b1..171d363876e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -501,6 +501,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 53d255c0143..231bc7c7e74 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -424,7 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -626,7 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; - security_skb_classify_flow(rxskb, &fl); + security_req_classify_flow(req, &fl); if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { @@ -709,6 +709,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq6 = inet6_rsk(req); ireq = inet_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 772b4eac78b..07204391d08 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -327,7 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -510,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + + security_inet_csk_clone(newsk, req); } return newsk; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 307dc3c0d63..661e0a4bca7 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!req) goto out; + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = htonl(skb->h.th->seq) - 1; @@ -259,7 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4b04c3edd4a..43f6740244f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 46922e57e31..302786a11cd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -470,7 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -826,6 +826,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->snt_isn = isn; + security_inet_conn_request(sk, skb, req); + if (tcp_v6_send_synack(sk, req, NULL)) goto drop; @@ -929,7 +931,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/security/dummy.c b/security/dummy.c index 66cc0640493..1c45f8e4aad 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -812,6 +812,26 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock * static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) { } + +static inline void dummy_sock_graft(struct sock* sk, struct socket *parent) +{ +} + +static inline int dummy_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return 0; +} + +static inline void dummy_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ +} + +static inline void dummy_req_classify_flow(const struct request_sock *req, + struct flowi *fl) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1084,6 +1104,10 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sk_free_security); set_to_dummy_if_null(ops, sk_clone_security); set_to_dummy_if_null(ops, sk_getsecid); + set_to_dummy_if_null(ops, sock_graft); + set_to_dummy_if_null(ops, inet_conn_request); + set_to_dummy_if_null(ops, inet_csk_clone); + set_to_dummy_if_null(ops, req_classify_flow); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_dummy_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4e5989d584c..1dc935f7b91 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3328,8 +3328,9 @@ static int selinux_socket_unix_stream_connect(struct socket *sock, /* server child socket */ ssec = newsk->sk_security; ssec->peer_sid = isec->sid; - - return 0; + err = security_sid_mls_copy(other_isec->sid, ssec->peer_sid, &ssec->sid); + + return err; } static int selinux_socket_unix_may_send(struct socket *sock, @@ -3355,11 +3356,29 @@ static int selinux_socket_unix_may_send(struct socket *sock, } static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, - struct avc_audit_data *ad, u32 sock_sid, u16 sock_class, - u16 family, char *addrp, int len) + struct avc_audit_data *ad, u16 family, char *addrp, int len) { int err = 0; u32 netif_perm, node_perm, node_sid, if_sid, recv_perm = 0; + struct socket *sock; + u16 sock_class = 0; + u32 sock_sid = 0; + + read_lock_bh(&sk->sk_callback_lock); + sock = sk->sk_socket; + if (sock) { + struct inode *inode; + inode = SOCK_INODE(sock); + if (inode) { + struct inode_security_struct *isec; + isec = inode->i_security; + sock_sid = isec->sid; + sock_class = isec->sclass; + } + } + read_unlock_bh(&sk->sk_callback_lock); + if (!sock_sid) + goto out; if (!skb->dev) goto out; @@ -3419,12 +3438,10 @@ out: static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { u16 family; - u16 sock_class = 0; char *addrp; int len, err = 0; - u32 sock_sid = 0; - struct socket *sock; struct avc_audit_data ad; + struct sk_security_struct *sksec = sk->sk_security; family = sk->sk_family; if (family != PF_INET && family != PF_INET6) @@ -3434,22 +3451,6 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (family == PF_INET6 && skb->protocol == ntohs(ETH_P_IP)) family = PF_INET; - read_lock_bh(&sk->sk_callback_lock); - sock = sk->sk_socket; - if (sock) { - struct inode *inode; - inode = SOCK_INODE(sock); - if (inode) { - struct inode_security_struct *isec; - isec = inode->i_security; - sock_sid = isec->sid; - sock_class = isec->sclass; - } - } - read_unlock_bh(&sk->sk_callback_lock); - if (!sock_sid) - goto out; - AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = skb->dev ? skb->dev->name : "[unknown]"; ad.u.net.family = family; @@ -3459,16 +3460,15 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; if (selinux_compat_net) - err = selinux_sock_rcv_skb_compat(sk, skb, &ad, sock_sid, - sock_class, family, + err = selinux_sock_rcv_skb_compat(sk, skb, &ad, family, addrp, len); else - err = avc_has_perm(sock_sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) goto out; - err = selinux_xfrm_sock_rcv_skb(sock_sid, skb, &ad); + err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); out: return err; } @@ -3572,6 +3572,49 @@ static void selinux_sk_getsecid(struct sock *sk, u32 *secid) } } +void selinux_sock_graft(struct sock* sk, struct socket *parent) +{ + struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; + struct sk_security_struct *sksec = sk->sk_security; + + isec->sid = sksec->sid; +} + +int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +{ + struct sk_security_struct *sksec = sk->sk_security; + int err; + u32 newsid = 0; + u32 peersid; + + err = selinux_xfrm_decode_session(skb, &peersid, 0); + BUG_ON(err); + + err = security_sid_mls_copy(sksec->sid, peersid, &newsid); + if (err) + return err; + + req->secid = newsid; + return 0; +} + +void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req) +{ + struct sk_security_struct *newsksec = newsk->sk_security; + + newsksec->sid = req->secid; + /* NOTE: Ideally, we should also get the isec->sid for the + new socket in sync, but we don't have the isec available yet. + So we will wait until sock_graft to do it, by which + time it will have been created and available. */ +} + +void selinux_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ + fl->secid = req->secid; +} + static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) { int err = 0; @@ -3611,12 +3654,24 @@ out: #ifdef CONFIG_NETFILTER static int selinux_ip_postroute_last_compat(struct sock *sk, struct net_device *dev, - struct inode_security_struct *isec, struct avc_audit_data *ad, u16 family, char *addrp, int len) { - int err; + int err = 0; u32 netif_perm, node_perm, node_sid, if_sid, send_perm = 0; + struct socket *sock; + struct inode *inode; + struct inode_security_struct *isec; + + sock = sk->sk_socket; + if (!sock) + goto out; + + inode = SOCK_INODE(sock); + if (!inode) + goto out; + + isec = inode->i_security; err = sel_netif_sids(dev, &if_sid, NULL); if (err) @@ -3681,26 +3736,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, char *addrp; int len, err = 0; struct sock *sk; - struct socket *sock; - struct inode *inode; struct sk_buff *skb = *pskb; - struct inode_security_struct *isec; struct avc_audit_data ad; struct net_device *dev = (struct net_device *)out; + struct sk_security_struct *sksec; sk = skb->sk; if (!sk) goto out; - sock = sk->sk_socket; - if (!sock) - goto out; - - inode = SOCK_INODE(sock); - if (!inode) - goto out; - - isec = inode->i_security; + sksec = sk->sk_security; AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = dev->name; @@ -3711,16 +3756,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, goto out; if (selinux_compat_net) - err = selinux_ip_postroute_last_compat(sk, dev, isec, &ad, + err = selinux_ip_postroute_last_compat(sk, dev, &ad, family, addrp, len); else - err = avc_has_perm(isec->sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__SEND, &ad); if (err) goto out; - err = selinux_xfrm_postroute_last(isec->sid, skb, &ad); + err = selinux_xfrm_postroute_last(sksec->sid, skb, &ad); out: return err ? NF_DROP : NF_ACCEPT; } @@ -4623,6 +4668,10 @@ static struct security_operations selinux_ops = { .sk_free_security = selinux_sk_free_security, .sk_clone_security = selinux_sk_clone_security, .sk_getsecid = selinux_sk_getsecid, + .sock_graft = selinux_sock_graft, + .inet_conn_request = selinux_inet_conn_request, + .inet_csk_clone = selinux_inet_csk_clone, + .req_classify_flow = selinux_req_classify_flow, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index d3690f98513..3e742b850af 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -271,7 +271,6 @@ not_from_user: goto out; } - ctx->ctx_doi = XFRM_SC_DOI_LSM; ctx->ctx_alg = XFRM_SC_ALG_SELINUX; ctx->ctx_sid = ctx_sid; -- cgit v1.2.3-70-g09d2 From fda9ef5d679b07c9d9097aaf6ef7f069d794a8f9 Mon Sep 17 00:00:00 2001 From: Dmitry Mishin Date: Thu, 31 Aug 2006 15:28:39 -0700 Subject: [NET]: Fix sk->sk_filter field access Function sk_filter() is called from tcp_v{4,6}_rcv() functions with arg needlock = 0, while socket is not locked at that moment. In order to avoid this and similar issues in the future, use rcu for sk->sk_filter field read protection. Signed-off-by: Dmitry Mishin Signed-off-by: Alexey Kuznetsov Signed-off-by: Kirill Korotaev --- include/linux/filter.h | 13 +++++++------ include/net/sock.h | 34 +++++++++++++++++----------------- net/core/filter.c | 8 ++++---- net/core/sock.c | 22 +++++++++------------- net/dccp/ipv6.c | 2 +- net/decnet/dn_nsp_in.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- net/packet/af_packet.c | 43 ++++++++++++++++++------------------------- net/sctp/input.c | 2 +- 10 files changed, 61 insertions(+), 71 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/linux/filter.h b/include/linux/filter.h index c6cb8f09508..91b2e3b9251 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -25,10 +25,10 @@ struct sock_filter /* Filter block */ { - __u16 code; /* Actual filter code */ - __u8 jt; /* Jump true */ - __u8 jf; /* Jump false */ - __u32 k; /* Generic multiuse field */ + __u16 code; /* Actual filter code */ + __u8 jt; /* Jump true */ + __u8 jf; /* Jump false */ + __u32 k; /* Generic multiuse field */ }; struct sock_fprog /* Required for SO_ATTACH_FILTER. */ @@ -41,8 +41,9 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ struct sk_filter { atomic_t refcnt; - unsigned int len; /* Number of filter blocks */ - struct sock_filter insns[0]; + unsigned int len; /* Number of filter blocks */ + struct rcu_head rcu; + struct sock_filter insns[0]; }; static inline unsigned int sk_filter_len(struct sk_filter *fp) diff --git a/include/net/sock.h b/include/net/sock.h index 337ebec84c7..edd4d73ce7f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -862,30 +862,24 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); * */ -static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) +static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { int err; + struct sk_filter *filter; err = security_sock_rcv_skb(sk, skb); if (err) return err; - if (sk->sk_filter) { - struct sk_filter *filter; - - if (needlock) - bh_lock_sock(sk); - - filter = sk->sk_filter; - if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, - filter->len); - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; - } - - if (needlock) - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = sk->sk_filter; + if (filter) { + unsigned int pkt_len = sk_run_filter(skb, filter->insns, + filter->len); + err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } + rcu_read_unlock_bh(); + return err; } @@ -897,6 +891,12 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) * Remove a filter from a socket and release its resources. */ +static inline void sk_filter_rcu_free(struct rcu_head *rcu) +{ + struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + kfree(fp); +} + static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) { unsigned int size = sk_filter_len(fp); @@ -904,7 +904,7 @@ static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) atomic_sub(size, &sk->sk_omem_alloc); if (atomic_dec_and_test(&fp->refcnt)) - kfree(fp); + call_rcu_bh(&fp->rcu, sk_filter_rcu_free); } static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) diff --git a/net/core/filter.c b/net/core/filter.c index 5b4486a60cf..6732782a5a4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (!err) { struct sk_filter *old_fp; - spin_lock_bh(&sk->sk_lock.slock); - old_fp = sk->sk_filter; - sk->sk_filter = fp; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_lock_bh(); + old_fp = rcu_dereference(sk->sk_filter); + rcu_assign_pointer(sk->sk_filter, fp); + rcu_read_unlock_bh(); fp = old_fp; } diff --git a/net/core/sock.c b/net/core/sock.c index cfaf09039b0..b77e155cbe6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -247,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; } - /* It would be deadlock, if sock_queue_rcv_skb is used - with socket lock! We assume that users of this - function are lock free. - */ - err = sk_filter(sk, skb, 1); + err = sk_filter(sk, skb); if (err) goto out; @@ -278,7 +274,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) { int rc = NET_RX_SUCCESS; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; @@ -606,15 +602,15 @@ set_rcvbuf: break; case SO_DETACH_FILTER: - spin_lock_bh(&sk->sk_lock.slock); - filter = sk->sk_filter; + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); if (filter) { - sk->sk_filter = NULL; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_assign_pointer(sk->sk_filter, NULL); sk_filter_release(sk, filter); + rcu_read_unlock_bh(); break; } - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_unlock_bh(); ret = -ENONET; break; @@ -884,10 +880,10 @@ void sk_free(struct sock *sk) if (sk->sk_destruct) sk->sk_destruct(sk); - filter = sk->sk_filter; + filter = rcu_dereference(sk->sk_filter); if (filter) { sk_filter_release(sk, filter); - sk->sk_filter = NULL; + rcu_assign_pointer(sk->sk_filter, NULL); } sock_disable_timestamp(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f9c5e12d703..7a47399cf31 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -970,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 86f7f3b28e7..72ecc6e62ec 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -586,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig goto out; } - err = sk_filter(sk, skb, 0); + err = sk_filter(sk, skb); if (err) goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 23b46e36b14..39b17985608 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1104,7 +1104,7 @@ process: goto discard_and_relse; nf_reset(skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b18918f301..2546fc9f0a7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1075,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* @@ -1232,7 +1232,7 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 300215bdbf4..f4ccb90e673 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -427,21 +427,24 @@ out_unlock: } #endif -static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res) +static inline int run_filter(struct sk_buff *skb, struct sock *sk, + unsigned *snaplen) { struct sk_filter *filter; + int err = 0; - bh_lock_sock(sk); - filter = sk->sk_filter; - /* - * Our caller already checked that filter != NULL but we need to - * verify that under bh_lock_sock() to be safe - */ - if (likely(filter != NULL)) - res = sk_run_filter(skb, filter->insns, filter->len); - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter != NULL) { + err = sk_run_filter(skb, filter->insns, filter->len); + if (!err) + err = -EPERM; + else if (*snaplen > err) + *snaplen = err; + } + rcu_read_unlock_bh(); - return res; + return err; } /* @@ -491,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) @@ -593,13 +591,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; diff --git a/net/sctp/input.c b/net/sctp/input.c index 8a34d95602c..03f65de75d8 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -228,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_release; nf_reset(skb); - if (sk_filter(sk, skb, 1)) + if (sk_filter(sk, skb)) goto discard_release; /* Create an SCTP packet structure. */ -- cgit v1.2.3-70-g09d2