diff options
Diffstat (limited to 'net/packet')
-rw-r--r-- | net/packet/Kconfig | 8 | ||||
-rw-r--r-- | net/packet/Makefile | 2 | ||||
-rw-r--r-- | net/packet/af_packet.c | 157 | ||||
-rw-r--r-- | net/packet/diag.c | 242 | ||||
-rw-r--r-- | net/packet/internal.h | 121 |
5 files changed, 403 insertions, 127 deletions
diff --git a/net/packet/Kconfig b/net/packet/Kconfig index 0060e3b396b..cc55b35f80e 100644 --- a/net/packet/Kconfig +++ b/net/packet/Kconfig @@ -14,3 +14,11 @@ config PACKET be called af_packet. If unsure, say Y. + +config PACKET_DIAG + tristate "Packet: sockets monitoring interface" + depends on PACKET + default n + ---help--- + Support for PF_PACKET sockets monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/packet/Makefile b/net/packet/Makefile index 81183eabfde..9df61347a3c 100644 --- a/net/packet/Makefile +++ b/net/packet/Makefile @@ -3,3 +3,5 @@ # obj-$(CONFIG_PACKET) += af_packet.o +obj-$(CONFIG_PACKET_DIAG) += af_packet_diag.o +af_packet_diag-y += diag.o diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ceaca7c134a..94060edbbd7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -93,6 +93,8 @@ #include <net/inet_common.h> #endif +#include "internal.h" + /* Assumptions: - if device has no dev->hard_header routine, it adds and removes ll header @@ -146,14 +148,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it) /* Private packet socket structures. */ -struct packet_mclist { - struct packet_mclist *next; - int ifindex; - int count; - unsigned short type; - unsigned short alen; - unsigned char addr[MAX_ADDR_LEN]; -}; /* identical to struct packet_mreq except it has * a longer address field. */ @@ -175,63 +169,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) -/* kbdq - kernel block descriptor queue */ -struct tpacket_kbdq_core { - struct pgv *pkbdq; - unsigned int feature_req_word; - unsigned int hdrlen; - unsigned char reset_pending_on_curr_blk; - unsigned char delete_blk_timer; - unsigned short kactive_blk_num; - unsigned short blk_sizeof_priv; - - /* last_kactive_blk_num: - * trick to see if user-space has caught up - * in order to avoid refreshing timer when every single pkt arrives. - */ - unsigned short last_kactive_blk_num; - - char *pkblk_start; - char *pkblk_end; - int kblk_size; - unsigned int knum_blocks; - uint64_t knxt_seq_num; - char *prev; - char *nxt_offset; - struct sk_buff *skb; - - atomic_t blk_fill_in_prog; - - /* Default is set to 8ms */ -#define DEFAULT_PRB_RETIRE_TOV (8) - - unsigned short retire_blk_tov; - unsigned short version; - unsigned long tov_in_jiffies; - - /* timer to retire an outstanding block */ - struct timer_list retire_blk_timer; -}; - #define PGV_FROM_VMALLOC 1 -struct pgv { - char *buffer; -}; - -struct packet_ring_buffer { - struct pgv *pg_vec; - unsigned int head; - unsigned int frames_per_block; - unsigned int frame_size; - unsigned int frame_max; - - unsigned int pg_vec_order; - unsigned int pg_vec_pages; - unsigned int pg_vec_len; - - struct tpacket_kbdq_core prb_bdqc; - atomic_t pending; -}; #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) @@ -269,52 +207,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void packet_flush_mclist(struct sock *sk); -struct packet_fanout; -struct packet_sock { - /* struct sock has to be the first member of packet_sock */ - struct sock sk; - struct packet_fanout *fanout; - struct tpacket_stats stats; - union tpacket_stats_u stats_u; - struct packet_ring_buffer rx_ring; - struct packet_ring_buffer tx_ring; - int copy_thresh; - spinlock_t bind_lock; - struct mutex pg_vec_lock; - unsigned int running:1, /* prot_hook is attached*/ - auxdata:1, - origdev:1, - has_vnet_hdr:1; - int ifindex; /* bound device */ - __be16 num; - struct packet_mclist *mclist; - atomic_t mapped; - enum tpacket_versions tp_version; - unsigned int tp_hdrlen; - unsigned int tp_reserve; - unsigned int tp_loss:1; - unsigned int tp_tstamp; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - -#define PACKET_FANOUT_MAX 256 - -struct packet_fanout { -#ifdef CONFIG_NET_NS - struct net *net; -#endif - unsigned int num_members; - u16 id; - u8 type; - u8 defrag; - atomic_t rr_cur; - struct list_head list; - struct sock *arr[PACKET_FANOUT_MAX]; - spinlock_t lock; - atomic_t sk_ref; - struct packet_type prot_hook ____cacheline_aligned_in_smp; -}; - struct packet_skb_cb { unsigned int origlen; union { @@ -334,11 +226,6 @@ struct packet_skb_cb { (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ ((x)->kactive_blk_num+1) : 0) -static struct packet_sock *pkt_sk(struct sock *sk) -{ - return (struct packet_sock *)sk; -} - static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); @@ -968,7 +855,8 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb); ppd->tp_status = TP_STATUS_VLAN_VALID; } else { - ppd->hv1.tp_vlan_tci = ppd->tp_status = 0; + ppd->hv1.tp_vlan_tci = 0; + ppd->tp_status = TP_STATUS_AVAILABLE; } } @@ -1079,7 +967,7 @@ static void *packet_current_rx_frame(struct packet_sock *po, default: WARN(1, "TPACKET version not supported\n"); BUG(); - return 0; + return NULL; } } @@ -1243,7 +1131,8 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } -static DEFINE_MUTEX(fanout_mutex); +DEFINE_MUTEX(fanout_mutex); +EXPORT_SYMBOL_GPL(fanout_mutex); static LIST_HEAD(fanout_list); static void __fanout_link(struct sock *sk, struct packet_sock *po) @@ -1273,6 +1162,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) spin_unlock(&f->lock); } +static bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +{ + if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout) + return true; + + return false; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_sock *po = pkt_sk(sk); @@ -1325,6 +1222,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.id_match = match_fanout_group; dev_add_pack(&match->prot_hook); list_add(&match->list, &fanout_list); } @@ -1355,9 +1253,9 @@ static void fanout_release(struct sock *sk) if (!f) return; + mutex_lock(&fanout_mutex); po->fanout = NULL; - mutex_lock(&fanout_mutex); if (atomic_dec_and_test(&f->sk_ref)) { list_del(&f->list); dev_remove_pack(&f->prot_hook); @@ -1936,7 +1834,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) if (likely(po->tx_ring.pg_vec)) { ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); __packet_set_status(po, ph, TP_STATUS_AVAILABLE); @@ -2055,7 +1952,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) int tp_len, size_max; unsigned char *addr; int len_sum = 0; - int status = 0; + int status = TP_STATUS_AVAILABLE; int hlen, tlen; mutex_lock(&po->pg_vec_lock); @@ -2420,10 +2317,13 @@ static int packet_release(struct socket *sock) net = sock_net(sk); po = pkt_sk(sk); - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_del_node_init_rcu(sk); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); @@ -2622,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, register_prot_hook(sk); } - spin_lock_bh(&net->packet.sklist_lock); + mutex_lock(&net->packet.sklist_lock); sk_add_node_rcu(sk, &net->packet.sklist); + mutex_unlock(&net->packet.sklist_lock); + + preempt_disable(); sock_prot_inuse_add(net, &packet_proto, 1); - spin_unlock_bh(&net->packet.sklist_lock); + preempt_enable(); return 0; out: @@ -3846,7 +3749,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) po->ifindex, po->running, atomic_read(&s->sk_rmem_alloc), - sock_i_uid(s), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); } @@ -3878,7 +3781,7 @@ static const struct file_operations packet_seq_fops = { static int __net_init packet_net_init(struct net *net) { - spin_lock_init(&net->packet.sklist_lock); + mutex_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) diff --git a/net/packet/diag.c b/net/packet/diag.c new file mode 100644 index 00000000000..8db6e21c46b --- /dev/null +++ b/net/packet/diag.c @@ -0,0 +1,242 @@ +#include <linux/module.h> +#include <linux/sock_diag.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/packet_diag.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +#include "internal.h" + +static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb) +{ + struct packet_diag_info pinfo; + + pinfo.pdi_index = po->ifindex; + pinfo.pdi_version = po->tp_version; + pinfo.pdi_reserve = po->tp_reserve; + pinfo.pdi_copy_thresh = po->copy_thresh; + pinfo.pdi_tstamp = po->tp_tstamp; + + pinfo.pdi_flags = 0; + if (po->running) + pinfo.pdi_flags |= PDI_RUNNING; + if (po->auxdata) + pinfo.pdi_flags |= PDI_AUXDATA; + if (po->origdev) + pinfo.pdi_flags |= PDI_ORIGDEV; + if (po->has_vnet_hdr) + pinfo.pdi_flags |= PDI_VNETHDR; + if (po->tp_loss) + pinfo.pdi_flags |= PDI_LOSS; + + return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo); +} + +static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb) +{ + struct nlattr *mca; + struct packet_mclist *ml; + + mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST); + if (!mca) + return -EMSGSIZE; + + rtnl_lock(); + for (ml = po->mclist; ml; ml = ml->next) { + struct packet_diag_mclist *dml; + + dml = nla_reserve_nohdr(nlskb, sizeof(*dml)); + if (!dml) { + rtnl_unlock(); + nla_nest_cancel(nlskb, mca); + return -EMSGSIZE; + } + + dml->pdmc_index = ml->ifindex; + dml->pdmc_type = ml->type; + dml->pdmc_alen = ml->alen; + dml->pdmc_count = ml->count; + BUILD_BUG_ON(sizeof(dml->pdmc_addr) != sizeof(ml->addr)); + memcpy(dml->pdmc_addr, ml->addr, sizeof(ml->addr)); + } + + rtnl_unlock(); + nla_nest_end(nlskb, mca); + + return 0; +} + +static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type, + struct sk_buff *nlskb) +{ + struct packet_diag_ring pdr; + + if (!ring->pg_vec || ((ver > TPACKET_V2) && + (nl_type == PACKET_DIAG_TX_RING))) + return 0; + + pdr.pdr_block_size = ring->pg_vec_pages << PAGE_SHIFT; + pdr.pdr_block_nr = ring->pg_vec_len; + pdr.pdr_frame_size = ring->frame_size; + pdr.pdr_frame_nr = ring->frame_max + 1; + + if (ver > TPACKET_V2) { + pdr.pdr_retire_tmo = ring->prb_bdqc.retire_blk_tov; + pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv; + pdr.pdr_features = ring->prb_bdqc.feature_req_word; + } else { + pdr.pdr_retire_tmo = 0; + pdr.pdr_sizeof_priv = 0; + pdr.pdr_features = 0; + } + + return nla_put(nlskb, nl_type, sizeof(pdr), &pdr); +} + +static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb) +{ + int ret; + + mutex_lock(&po->pg_vec_lock); + ret = pdiag_put_ring(&po->rx_ring, po->tp_version, + PACKET_DIAG_RX_RING, skb); + if (!ret) + ret = pdiag_put_ring(&po->tx_ring, po->tp_version, + PACKET_DIAG_TX_RING, skb); + mutex_unlock(&po->pg_vec_lock); + + return ret; +} + +static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) +{ + int ret = 0; + + mutex_lock(&fanout_mutex); + if (po->fanout) { + u32 val; + + val = (u32)po->fanout->id | ((u32)po->fanout->type << 16); + ret = nla_put_u32(nlskb, PACKET_DIAG_FANOUT, val); + } + mutex_unlock(&fanout_mutex); + + return ret; +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, + u32 portid, u32 seq, u32 flags, int sk_ino) +{ + struct nlmsghdr *nlh; + struct packet_diag_msg *rp; + struct packet_sock *po = pkt_sk(sk); + + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); + if (!nlh) + return -EMSGSIZE; + + rp = nlmsg_data(nlh); + rp->pdiag_family = AF_PACKET; + rp->pdiag_type = sk->sk_type; + rp->pdiag_num = ntohs(po->num); + rp->pdiag_ino = sk_ino; + sock_diag_save_cookie(sk, rp->pdiag_cookie); + + if ((req->pdiag_show & PACKET_SHOW_INFO) && + pdiag_put_info(po, skb)) + goto out_nlmsg_trim; + + if ((req->pdiag_show & PACKET_SHOW_MCLIST) && + pdiag_put_mclist(po, skb)) + goto out_nlmsg_trim; + + if ((req->pdiag_show & PACKET_SHOW_RING_CFG) && + pdiag_put_rings_cfg(po, skb)) + goto out_nlmsg_trim; + + if ((req->pdiag_show & PACKET_SHOW_FANOUT) && + pdiag_put_fanout(po, skb)) + goto out_nlmsg_trim; + + return nlmsg_end(skb, nlh); + +out_nlmsg_trim: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int num = 0, s_num = cb->args[0]; + struct packet_diag_req *req; + struct net *net; + struct sock *sk; + struct hlist_node *node; + + net = sock_net(skb->sk); + req = nlmsg_data(cb->nlh); + + mutex_lock(&net->packet.sklist_lock); + sk_for_each(sk, node, &net->packet.sklist) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) + goto next; + + if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + sock_i_ino(sk)) < 0) + goto done; +next: + num++; + } +done: + mutex_unlock(&net->packet.sklist_lock); + cb->args[0] = num; + + return skb->len; +} + +static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct packet_diag_req); + struct net *net = sock_net(skb->sk); + struct packet_diag_req *req; + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + req = nlmsg_data(h); + /* Make it possible to support protocol filtering later */ + if (req->sdiag_protocol) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = packet_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } else + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler packet_diag_handler = { + .family = AF_PACKET, + .dump = packet_diag_handler_dump, +}; + +static int __init packet_diag_init(void) +{ + return sock_diag_register(&packet_diag_handler); +} + +static void __exit packet_diag_exit(void) +{ + sock_diag_unregister(&packet_diag_handler); +} + +module_init(packet_diag_init); +module_exit(packet_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */); diff --git a/net/packet/internal.h b/net/packet/internal.h new file mode 100644 index 00000000000..44945f6b725 --- /dev/null +++ b/net/packet/internal.h @@ -0,0 +1,121 @@ +#ifndef __PACKET_INTERNAL_H__ +#define __PACKET_INTERNAL_H__ + +struct packet_mclist { + struct packet_mclist *next; + int ifindex; + int count; + unsigned short type; + unsigned short alen; + unsigned char addr[MAX_ADDR_LEN]; +}; + +/* kbdq - kernel block descriptor queue */ +struct tpacket_kbdq_core { + struct pgv *pkbdq; + unsigned int feature_req_word; + unsigned int hdrlen; + unsigned char reset_pending_on_curr_blk; + unsigned char delete_blk_timer; + unsigned short kactive_blk_num; + unsigned short blk_sizeof_priv; + + /* last_kactive_blk_num: + * trick to see if user-space has caught up + * in order to avoid refreshing timer when every single pkt arrives. + */ + unsigned short last_kactive_blk_num; + + char *pkblk_start; + char *pkblk_end; + int kblk_size; + unsigned int knum_blocks; + uint64_t knxt_seq_num; + char *prev; + char *nxt_offset; + struct sk_buff *skb; + + atomic_t blk_fill_in_prog; + + /* Default is set to 8ms */ +#define DEFAULT_PRB_RETIRE_TOV (8) + + unsigned short retire_blk_tov; + unsigned short version; + unsigned long tov_in_jiffies; + + /* timer to retire an outstanding block */ + struct timer_list retire_blk_timer; +}; + +struct pgv { + char *buffer; +}; + +struct packet_ring_buffer { + struct pgv *pg_vec; + unsigned int head; + unsigned int frames_per_block; + unsigned int frame_size; + unsigned int frame_max; + + unsigned int pg_vec_order; + unsigned int pg_vec_pages; + unsigned int pg_vec_len; + + struct tpacket_kbdq_core prb_bdqc; + atomic_t pending; +}; + +extern struct mutex fanout_mutex; +#define PACKET_FANOUT_MAX 256 + +struct packet_fanout { +#ifdef CONFIG_NET_NS + struct net *net; +#endif + unsigned int num_members; + u16 id; + u8 type; + u8 defrag; + atomic_t rr_cur; + struct list_head list; + struct sock *arr[PACKET_FANOUT_MAX]; + spinlock_t lock; + atomic_t sk_ref; + struct packet_type prot_hook ____cacheline_aligned_in_smp; +}; + +struct packet_sock { + /* struct sock has to be the first member of packet_sock */ + struct sock sk; + struct packet_fanout *fanout; + struct tpacket_stats stats; + union tpacket_stats_u stats_u; + struct packet_ring_buffer rx_ring; + struct packet_ring_buffer tx_ring; + int copy_thresh; + spinlock_t bind_lock; + struct mutex pg_vec_lock; + unsigned int running:1, /* prot_hook is attached*/ + auxdata:1, + origdev:1, + has_vnet_hdr:1; + int ifindex; /* bound device */ + __be16 num; + struct packet_mclist *mclist; + atomic_t mapped; + enum tpacket_versions tp_version; + unsigned int tp_hdrlen; + unsigned int tp_reserve; + unsigned int tp_loss:1; + unsigned int tp_tstamp; + struct packet_type prot_hook ____cacheline_aligned_in_smp; +}; + +static struct packet_sock *pkt_sk(struct sock *sk) +{ + return (struct packet_sock *)sk; +} + +#endif |