diff options
Diffstat (limited to 'net')
27 files changed, 2330 insertions, 548 deletions
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index d4dc4eb48d9..a2241060113 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -348,6 +348,7 @@ static int cipso_v4_cache_check(const unsigned char *key, atomic_inc(&entry->lsm_data->refcount); secattr->cache = entry->lsm_data; secattr->flags |= NETLBL_SECATTR_CACHE; + secattr->type = NETLBL_NLTYPE_CIPSOV4; if (prev_entry == NULL) { spin_unlock_bh(&cipso_v4_cache[bkt].lock); return 0; @@ -865,7 +866,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, } for (;;) { - host_spot = netlbl_secattr_catmap_walk(secattr->mls_cat, + host_spot = netlbl_secattr_catmap_walk(secattr->attr.mls.cat, host_spot + 1); if (host_spot < 0) break; @@ -948,7 +949,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, return -EPERM; break; } - ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat, + ret_val = netlbl_secattr_catmap_setbit(secattr->attr.mls.cat, host_spot, GFP_ATOMIC); if (ret_val != 0) @@ -1014,7 +1015,8 @@ static int cipso_v4_map_cat_enum_hton(const struct cipso_v4_doi *doi_def, u32 cat_iter = 0; for (;;) { - cat = netlbl_secattr_catmap_walk(secattr->mls_cat, cat + 1); + cat = netlbl_secattr_catmap_walk(secattr->attr.mls.cat, + cat + 1); if (cat < 0) break; if ((cat_iter + 2) > net_cat_len) @@ -1049,7 +1051,7 @@ static int cipso_v4_map_cat_enum_ntoh(const struct cipso_v4_doi *doi_def, u32 iter; for (iter = 0; iter < net_cat_len; iter += 2) { - ret_val = netlbl_secattr_catmap_setbit(secattr->mls_cat, + ret_val = netlbl_secattr_catmap_setbit(secattr->attr.mls.cat, ntohs(get_unaligned((__be16 *)&net_cat[iter])), GFP_ATOMIC); if (ret_val != 0) @@ -1130,7 +1132,8 @@ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def, return -ENOSPC; for (;;) { - iter = netlbl_secattr_catmap_walk(secattr->mls_cat, iter + 1); + iter = netlbl_secattr_catmap_walk(secattr->attr.mls.cat, + iter + 1); if (iter < 0) break; cat_size += (iter == 0 ? 0 : sizeof(u16)); @@ -1138,7 +1141,8 @@ static int cipso_v4_map_cat_rng_hton(const struct cipso_v4_doi *doi_def, return -ENOSPC; array[array_cnt++] = iter; - iter = netlbl_secattr_catmap_walk_rng(secattr->mls_cat, iter); + iter = netlbl_secattr_catmap_walk_rng(secattr->attr.mls.cat, + iter); if (iter < 0) return -EFAULT; cat_size += sizeof(u16); @@ -1191,7 +1195,7 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def, else cat_low = 0; - ret_val = netlbl_secattr_catmap_setrng(secattr->mls_cat, + ret_val = netlbl_secattr_catmap_setrng(secattr->attr.mls.cat, cat_low, cat_high, GFP_ATOMIC); @@ -1251,7 +1255,9 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, if ((secattr->flags & NETLBL_SECATTR_MLS_LVL) == 0) return -EPERM; - ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); + ret_val = cipso_v4_map_lvl_hton(doi_def, + secattr->attr.mls.lvl, + &level); if (ret_val != 0) return ret_val; @@ -1303,12 +1309,13 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); if (ret_val != 0) return ret_val; - secattr->mls_lvl = level; + secattr->attr.mls.lvl = level; secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); - if (secattr->mls_cat == NULL) + secattr->attr.mls.cat = + netlbl_secattr_catmap_alloc(GFP_ATOMIC); + if (secattr->attr.mls.cat == NULL) return -ENOMEM; ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, @@ -1316,7 +1323,7 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, tag_len - 4, secattr); if (ret_val != 0) { - netlbl_secattr_catmap_free(secattr->mls_cat); + netlbl_secattr_catmap_free(secattr->attr.mls.cat); return ret_val; } @@ -1350,7 +1357,9 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def, if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL)) return -EPERM; - ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); + ret_val = cipso_v4_map_lvl_hton(doi_def, + secattr->attr.mls.lvl, + &level); if (ret_val != 0) return ret_val; @@ -1396,12 +1405,13 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def, ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); if (ret_val != 0) return ret_val; - secattr->mls_lvl = level; + secattr->attr.mls.lvl = level; secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); - if (secattr->mls_cat == NULL) + secattr->attr.mls.cat = + netlbl_secattr_catmap_alloc(GFP_ATOMIC); + if (secattr->attr.mls.cat == NULL) return -ENOMEM; ret_val = cipso_v4_map_cat_enum_ntoh(doi_def, @@ -1409,7 +1419,7 @@ static int cipso_v4_parsetag_enum(const struct cipso_v4_doi *doi_def, tag_len - 4, secattr); if (ret_val != 0) { - netlbl_secattr_catmap_free(secattr->mls_cat); + netlbl_secattr_catmap_free(secattr->attr.mls.cat); return ret_val; } @@ -1443,7 +1453,9 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def, if (!(secattr->flags & NETLBL_SECATTR_MLS_LVL)) return -EPERM; - ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); + ret_val = cipso_v4_map_lvl_hton(doi_def, + secattr->attr.mls.lvl, + &level); if (ret_val != 0) return ret_val; @@ -1488,12 +1500,13 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); if (ret_val != 0) return ret_val; - secattr->mls_lvl = level; + secattr->attr.mls.lvl = level; secattr->flags |= NETLBL_SECATTR_MLS_LVL; if (tag_len > 4) { - secattr->mls_cat = netlbl_secattr_catmap_alloc(GFP_ATOMIC); - if (secattr->mls_cat == NULL) + secattr->attr.mls.cat = + netlbl_secattr_catmap_alloc(GFP_ATOMIC); + if (secattr->attr.mls.cat == NULL) return -ENOMEM; ret_val = cipso_v4_map_cat_rng_ntoh(doi_def, @@ -1501,7 +1514,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, tag_len - 4, secattr); if (ret_val != 0) { - netlbl_secattr_catmap_free(secattr->mls_cat); + netlbl_secattr_catmap_free(secattr->attr.mls.cat); return ret_val; } @@ -1850,6 +1863,8 @@ static int cipso_v4_getattr(const unsigned char *cipso, ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); break; } + if (ret_val == 0) + secattr->type = NETLBL_NLTYPE_CIPSOV4; getattr_return: rcu_read_unlock(); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 4fc0b023cfd..6cae5475737 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -99,7 +99,7 @@ config IP6_NF_MATCH_HL config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' depends on IP6_NF_IPTABLES - depends on NETFILTER_ADVANCED + default m if NETFILTER_ADVANCED=n help This module allows one to match packets based upon the ipv6 extension headers. diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index b11b3ecbb39..7708e2084ce 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -72,12 +72,13 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info) return false; } - err = selinux_relabel_packet_permission(sel->selsid); + err = selinux_secmark_relabel_packet_permission(sel->selsid); if (err) { printk(KERN_INFO PFX "unable to obtain relabeling permission\n"); return false; } + selinux_secmark_refcount_inc(); return true; } @@ -110,11 +111,20 @@ secmark_tg_check(const char *tablename, const void *entry, return true; } +void secmark_tg_destroy(const struct xt_target *target, void *targinfo) +{ + switch (mode) { + case SECMARK_MODE_SEL: + selinux_secmark_refcount_dec(); + } +} + static struct xt_target secmark_tg_reg[] __read_mostly = { { .name = "SECMARK", .family = AF_INET, .checkentry = secmark_tg_check, + .destroy = secmark_tg_destroy, .target = secmark_tg, .targetsize = sizeof(struct xt_secmark_target_info), .table = "mangle", @@ -124,6 +134,7 @@ static struct xt_target secmark_tg_reg[] __read_mostly = { .name = "SECMARK", .family = AF_INET6, .checkentry = secmark_tg_check, + .destroy = secmark_tg_destroy, .target = secmark_tg, .targetsize = sizeof(struct xt_secmark_target_info), .table = "mangle", diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index ba0ca8d3f77..becf91a952a 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -38,6 +38,7 @@ #include <net/genetlink.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> +#include <asm/atomic.h> #include "netlabel_user.h" #include "netlabel_cipso_v4.h" @@ -421,7 +422,7 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) break; } if (ret_val == 0) - netlbl_mgmt_protocount_inc(); + atomic_inc(&netlabel_mgmt_protocount); audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, &audit_info); @@ -698,7 +699,7 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) &audit_info, netlbl_cipsov4_doi_free); if (ret_val == 0) - netlbl_mgmt_protocount_dec(); + atomic_dec(&netlabel_mgmt_protocount); audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, &audit_info); diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index b3675bd7db3..9a8ea0195c4 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -54,9 +54,6 @@ struct netlbl_domhsh_tbl { * hash table should be okay */ static DEFINE_SPINLOCK(netlbl_domhsh_lock); static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; - -/* Default domain mapping */ -static DEFINE_SPINLOCK(netlbl_domhsh_def_lock); static struct netlbl_dom_map *netlbl_domhsh_def = NULL; /* @@ -109,17 +106,14 @@ static u32 netlbl_domhsh_hash(const char *key) /** * netlbl_domhsh_search - Search for a domain entry * @domain: the domain - * @def: return default if no match is found * * Description: * Searches the domain hash table and returns a pointer to the hash table - * entry if found, otherwise NULL is returned. If @def is non-zero and a - * match is not found in the domain hash table the default mapping is returned - * if it exists. The caller is responsibile for the rcu hash table locks - * (i.e. the caller much call rcu_read_[un]lock()). + * entry if found, otherwise NULL is returned. The caller is responsibile for + * the rcu hash table locks (i.e. the caller much call rcu_read_[un]lock()). * */ -static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def) +static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain) { u32 bkt; struct netlbl_dom_map *iter; @@ -133,10 +127,31 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def) return iter; } - if (def != 0) { - iter = rcu_dereference(netlbl_domhsh_def); - if (iter != NULL && iter->valid) - return iter; + return NULL; +} + +/** + * netlbl_domhsh_search_def - Search for a domain entry + * @domain: the domain + * @def: return default if no match is found + * + * Description: + * Searches the domain hash table and returns a pointer to the hash table + * entry if an exact match is found, if an exact match is not present in the + * hash table then the default entry is returned if valid otherwise NULL is + * returned. The caller is responsibile for the rcu hash table locks + * (i.e. the caller much call rcu_read_[un]lock()). + * + */ +static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain) +{ + struct netlbl_dom_map *entry; + + entry = netlbl_domhsh_search(domain); + if (entry == NULL) { + entry = rcu_dereference(netlbl_domhsh_def); + if (entry != NULL && entry->valid) + return entry; } return NULL; @@ -221,24 +236,22 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, INIT_RCU_HEAD(&entry->rcu); rcu_read_lock(); + spin_lock(&netlbl_domhsh_lock); if (entry->domain != NULL) { bkt = netlbl_domhsh_hash(entry->domain); - spin_lock(&netlbl_domhsh_lock); - if (netlbl_domhsh_search(entry->domain, 0) == NULL) + if (netlbl_domhsh_search(entry->domain) == NULL) list_add_tail_rcu(&entry->list, &rcu_dereference(netlbl_domhsh)->tbl[bkt]); else ret_val = -EEXIST; - spin_unlock(&netlbl_domhsh_lock); } else { INIT_LIST_HEAD(&entry->list); - spin_lock(&netlbl_domhsh_def_lock); if (rcu_dereference(netlbl_domhsh_def) == NULL) rcu_assign_pointer(netlbl_domhsh_def, entry); else ret_val = -EEXIST; - spin_unlock(&netlbl_domhsh_def_lock); } + spin_unlock(&netlbl_domhsh_lock); audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); if (audit_buf != NULL) { audit_log_format(audit_buf, @@ -307,7 +320,10 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) struct audit_buffer *audit_buf; rcu_read_lock(); - entry = netlbl_domhsh_search(domain, (domain != NULL ? 0 : 1)); + if (domain) + entry = netlbl_domhsh_search(domain); + else + entry = netlbl_domhsh_search_def(domain); if (entry == NULL) goto remove_return; switch (entry->type) { @@ -316,23 +332,16 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) entry->domain); break; } - if (entry != rcu_dereference(netlbl_domhsh_def)) { - spin_lock(&netlbl_domhsh_lock); - if (entry->valid) { - entry->valid = 0; + spin_lock(&netlbl_domhsh_lock); + if (entry->valid) { + entry->valid = 0; + if (entry != rcu_dereference(netlbl_domhsh_def)) list_del_rcu(&entry->list); - ret_val = 0; - } - spin_unlock(&netlbl_domhsh_lock); - } else { - spin_lock(&netlbl_domhsh_def_lock); - if (entry->valid) { - entry->valid = 0; + else rcu_assign_pointer(netlbl_domhsh_def, NULL); - ret_val = 0; - } - spin_unlock(&netlbl_domhsh_def_lock); + ret_val = 0; } + spin_unlock(&netlbl_domhsh_lock); audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); if (audit_buf != NULL) { @@ -377,7 +386,7 @@ int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info) */ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) { - return netlbl_domhsh_search(domain, 1); + return netlbl_domhsh_search_def(domain); } /** diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 4f50949722a..c69e3e1f05c 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -34,6 +34,7 @@ #include <net/netlabel.h> #include <net/cipso_ipv4.h> #include <asm/bug.h> +#include <asm/atomic.h> #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" @@ -262,7 +263,7 @@ int netlbl_enabled(void) /* At some point we probably want to expose this mechanism to the user * as well so that admins can toggle NetLabel regardless of the * configuration */ - return (netlbl_mgmt_protocount_value() > 0 ? 1 : 0); + return (atomic_read(&netlabel_mgmt_protocount) > 0); } /** @@ -311,7 +312,7 @@ socket_setattr_return: * @secattr: the security attributes * * Description: - * Examines the given sock to see any NetLabel style labeling has been + * Examines the given sock to see if any NetLabel style labeling has been * applied to the sock, if so it parses the socket label and returns the * security attributes in @secattr. Returns zero on success, negative values * on failure. @@ -319,18 +320,13 @@ socket_setattr_return: */ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { - int ret_val; - - ret_val = cipso_v4_sock_getattr(sk, secattr); - if (ret_val == 0) - return 0; - - return netlbl_unlabel_getattr(secattr); + return cipso_v4_sock_getattr(sk, secattr); } /** * netlbl_skbuff_getattr - Determine the security attributes of a packet * @skb: the packet + * @family: protocol family * @secattr: the security attributes * * Description: @@ -341,13 +337,14 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) * */ int netlbl_skbuff_getattr(const struct sk_buff *skb, + u16 family, struct netlbl_lsm_secattr *secattr) { if (CIPSO_V4_OPTEXIST(skb) && cipso_v4_skbuff_getattr(skb, secattr) == 0) return 0; - return netlbl_unlabel_getattr(secattr); + return netlbl_unlabel_getattr(skb, family, secattr); } /** @@ -431,6 +428,10 @@ static int __init netlbl_init(void) if (ret_val != 0) goto init_failure; + ret_val = netlbl_unlabel_init(NETLBL_UNLHSH_BITSIZE); + if (ret_val != 0) + goto init_failure; + ret_val = netlbl_netlink_init(); if (ret_val != 0) goto init_failure; diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 9c41464d58d..e2258dc3c84 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -37,14 +37,14 @@ #include <net/genetlink.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> +#include <asm/atomic.h> #include "netlabel_domainhash.h" #include "netlabel_user.h" #include "netlabel_mgmt.h" -/* NetLabel configured protocol count */ -static DEFINE_SPINLOCK(netlabel_mgmt_protocount_lock); -static u32 netlabel_mgmt_protocount = 0; +/* NetLabel configured protocol counter */ +atomic_t netlabel_mgmt_protocount = ATOMIC_INIT(0); /* Argument struct for netlbl_domhsh_walk() */ struct netlbl_domhsh_walk_arg { @@ -71,63 +71,6 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { }; /* - * NetLabel Misc Management Functions - */ - -/** - * netlbl_mgmt_protocount_inc - Increment the configured labeled protocol count - * - * Description: - * Increment the number of labeled protocol configurations in the current - * NetLabel configuration. Keep track of this for use in determining if - * NetLabel label enforcement should be active/enabled or not in the LSM. - * - */ -void netlbl_mgmt_protocount_inc(void) -{ - spin_lock(&netlabel_mgmt_protocount_lock); - netlabel_mgmt_protocount++; - spin_unlock(&netlabel_mgmt_protocount_lock); -} - -/** - * netlbl_mgmt_protocount_dec - Decrement the configured labeled protocol count - * - * Description: - * Decrement the number of labeled protocol configurations in the current - * NetLabel configuration. Keep track of this for use in determining if - * NetLabel label enforcement should be active/enabled or not in the LSM. - * - */ -void netlbl_mgmt_protocount_dec(void) -{ - spin_lock(&netlabel_mgmt_protocount_lock); - if (netlabel_mgmt_protocount > 0) - netlabel_mgmt_protocount--; - spin_unlock(&netlabel_mgmt_protocount_lock); -} - -/** - * netlbl_mgmt_protocount_value - Return the number of configured protocols - * - * Description: - * Return the number of labeled protocols in the current NetLabel - * configuration. This value is useful in determining if NetLabel label - * enforcement should be active/enabled or not in the LSM. - * - */ -u32 netlbl_mgmt_protocount_value(void) -{ - u32 val; - - rcu_read_lock(); - val = netlabel_mgmt_protocount; - rcu_read_unlock(); - - return val; -} - -/* * NetLabel Command Handlers */ diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index ccb2b392359..a43bff169d6 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -32,6 +32,7 @@ #define _NETLABEL_MGMT_H #include <net/netlabel.h> +#include <asm/atomic.h> /* * The following NetLabel payloads are supported by the management interface. @@ -168,9 +169,7 @@ enum { /* NetLabel protocol functions */ int netlbl_mgmt_genl_init(void); -/* NetLabel misc management functions */ -void netlbl_mgmt_protocount_inc(void); -void netlbl_mgmt_protocount_dec(void); -u32 netlbl_mgmt_protocount_value(void); +/* NetLabel configured protocol reference counter */ +extern atomic_t netlabel_mgmt_protocount; #endif diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 348292450de..42e81fd8cc4 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2007 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,22 +36,92 @@ #include <linux/string.h> #include <linux/skbuff.h> #include <linux/audit.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/notifier.h> +#include <linux/netdevice.h> +#include <linux/security.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> - +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/net_namespace.h> #include <net/netlabel.h> #include <asm/bug.h> +#include <asm/atomic.h> #include "netlabel_user.h" #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" +#include "netlabel_mgmt.h" + +/* NOTE: at present we always use init's network namespace since we don't + * presently support different namespaces even though the majority of + * the functions in this file are "namespace safe" */ + +/* The unlabeled connection hash table which we use to map network interfaces + * and addresses of unlabeled packets to a user specified secid value for the + * LSM. The hash table is used to lookup the network interface entry + * (struct netlbl_unlhsh_iface) and then the interface entry is used to + * lookup an IP address match from an ordered list. If a network interface + * match can not be found in the hash table then the default entry + * (netlbl_unlhsh_def) is used. The IP address entry list + * (struct netlbl_unlhsh_addr) is ordered such that the entries with a + * larger netmask come first. + */ +struct netlbl_unlhsh_tbl { + struct list_head *tbl; + u32 size; +}; +struct netlbl_unlhsh_addr4 { + __be32 addr; + __be32 mask; + u32 secid; + + u32 valid; + struct list_head list; + struct rcu_head rcu; +}; +struct netlbl_unlhsh_addr6 { + struct in6_addr addr; + struct in6_addr mask; + u32 secid; + + u32 valid; + struct list_head list; + struct rcu_head rcu; +}; +struct netlbl_unlhsh_iface { + int ifindex; + struct list_head addr4_list; + struct list_head addr6_list; + + u32 valid; + struct list_head list; + struct rcu_head rcu; +}; + +/* Argument struct for netlbl_unlhsh_walk() */ +struct netlbl_unlhsh_walk_arg { + struct netlink_callback *nl_cb; + struct sk_buff *skb; + u32 seq; +}; + +/* Unlabeled connection hash table */ +/* updates should be so rare that having one spinlock for the entire + * hash table should be okay */ +static DEFINE_SPINLOCK(netlbl_unlhsh_lock); +static struct netlbl_unlhsh_tbl *netlbl_unlhsh = NULL; +static struct netlbl_unlhsh_iface *netlbl_unlhsh_def = NULL; /* Accept unlabeled packets flag */ -static DEFINE_SPINLOCK(netlabel_unlabel_acceptflg_lock); static u8 netlabel_unlabel_acceptflg = 0; -/* NetLabel Generic NETLINK CIPSOv4 family */ +/* NetLabel Generic NETLINK unlabeled family */ static struct genl_family netlbl_unlabel_gnl_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, @@ -63,11 +133,841 @@ static struct genl_family netlbl_unlabel_gnl_family = { /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, + [NLBL_UNLABEL_A_IPV6ADDR] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [NLBL_UNLABEL_A_IPV6MASK] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [NLBL_UNLABEL_A_IPV4ADDR] = { .type = NLA_BINARY, + .len = sizeof(struct in_addr) }, + [NLBL_UNLABEL_A_IPV4MASK] = { .type = NLA_BINARY, + .len = sizeof(struct in_addr) }, + [NLBL_UNLABEL_A_IFACE] = { .type = NLA_NUL_STRING, + .len = IFNAMSIZ - 1 }, + [NLBL_UNLABEL_A_SECCTX] = { .type = NLA_BINARY } }; /* - * Helper Functions + * Audit Helper Functions + */ + +/** + * netlbl_unlabel_audit_addr4 - Audit an IPv4 address + * @audit_buf: audit buffer + * @dev: network interface + * @addr: IP address + * @mask: IP address mask + * + * Description: + * Write the IPv4 address and address mask, if necessary, to @audit_buf. + * + */ +static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf, + const char *dev, + __be32 addr, __be32 mask) +{ + u32 mask_val = ntohl(mask); + + if (dev != NULL) + audit_log_format(audit_buf, " netif=%s", dev); + audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr)); + if (mask_val != 0xffffffff) { + u32 mask_len = 0; + while (mask_val > 0) { + mask_val <<= 1; + mask_len++; + } + audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); + } +} + +/** + * netlbl_unlabel_audit_addr6 - Audit an IPv6 address + * @audit_buf: audit buffer + * @dev: network interface + * @addr: IP address + * @mask: IP address mask + * + * Description: + * Write the IPv6 address and address mask, if necessary, to @audit_buf. + * + */ +static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask) +{ + if (dev != NULL) + audit_log_format(audit_buf, " netif=%s", dev); + audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr)); + if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { + u32 mask_len = 0; + u32 mask_val; + int iter = -1; + while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) + mask_len += 32; + mask_val = ntohl(mask->s6_addr32[iter]); + while (mask_val > 0) { + mask_val <<= 1; + mask_len++; + } + audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); + } +} + +/* + * Unlabeled Connection Hash Table Functions + */ + +/** + * netlbl_unlhsh_free_addr4 - Frees an IPv4 address entry from the hash table + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that memory allocated to a hash table address entry can be + * released safely. + * + */ +static void netlbl_unlhsh_free_addr4(struct rcu_head *entry) +{ + struct netlbl_unlhsh_addr4 *ptr; + + ptr = container_of(entry, struct netlbl_unlhsh_addr4, rcu); + kfree(ptr); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_unlhsh_free_addr6 - Frees an IPv6 address entry from the hash table + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that memory allocated to a hash table address entry can be + * released safely. + * + */ +static void netlbl_unlhsh_free_addr6(struct rcu_head *entry) +{ + struct netlbl_unlhsh_addr6 *ptr; + + ptr = container_of(entry, struct netlbl_unlhsh_addr6, rcu); + kfree(ptr); +} +#endif /* IPv6 */ + +/** + * netlbl_unlhsh_free_iface - Frees an interface entry from the hash table + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that memory allocated to a hash table interface entry can be + * released safely. It is important to note that this function does not free + * the IPv4 and IPv6 address lists contained as part of an interface entry. It + * is up to the rest of the code to make sure an interface entry is only freed + * once it's address lists are empty. + * + */ +static void netlbl_unlhsh_free_iface(struct rcu_head *entry) +{ + struct netlbl_unlhsh_iface *iface; + struct netlbl_unlhsh_addr4 *iter4; + struct netlbl_unlhsh_addr4 *tmp4; + struct netlbl_unlhsh_addr6 *iter6; + struct netlbl_unlhsh_addr6 *tmp6; + + iface = container_of(entry, struct netlbl_unlhsh_iface, rcu); + + /* no need for locks here since we are the only one with access to this + * structure */ + + list_for_each_entry_safe(iter4, tmp4, &iface->addr4_list, list) + if (iter4->valid) { + list_del_rcu(&iter4->list); + kfree(iter4); + } + list_for_each_entry_safe(iter6, tmp6, &iface->addr6_list, list) + if (iter6->valid) { + list_del_rcu(&iter6->list); + kfree(iter6); + } + kfree(iface); +} + +/** + * netlbl_unlhsh_hash - Hashing function for the hash table + * @ifindex: the network interface/device to hash + * + * Description: + * This is the hashing function for the unlabeled hash table, it returns the + * bucket number for the given device/interface. The caller is responsible for + * calling the rcu_read_[un]lock() functions. + * */ +static u32 netlbl_unlhsh_hash(int ifindex) +{ + /* this is taken _almost_ directly from + * security/selinux/netif.c:sel_netif_hasfn() as they do pretty much + * the same thing */ + return ifindex & (rcu_dereference(netlbl_unlhsh)->size - 1); +} + +/** + * netlbl_unlhsh_search_addr4 - Search for a matching IPv4 address entry + * @addr: IPv4 address + * @iface: the network interface entry + * + * Description: + * Searches the IPv4 address list of the network interface specified by @iface. + * If a matching address entry is found it is returned, otherwise NULL is + * returned. The caller is responsible for calling the rcu_read_[un]lock() + * functions. + * + */ +static struct netlbl_unlhsh_addr4 *netlbl_unlhsh_search_addr4( + __be32 addr, + const struct netlbl_unlhsh_iface *iface) +{ + struct netlbl_unlhsh_addr4 *iter; + + list_for_each_entry_rcu(iter, &iface->addr4_list, list) + if (iter->valid && (addr & iter->mask) == iter->addr) + return iter; + + return NULL; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_unlhsh_search_addr6 - Search for a matching IPv6 address entry + * @addr: IPv6 address + * @iface: the network interface entry + * + * Description: + * Searches the IPv6 address list of the network interface specified by @iface. + * If a matching address entry is found it is returned, otherwise NULL is + * returned. The caller is responsible for calling the rcu_read_[un]lock() + * functions. + * + */ +static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6( + const struct in6_addr *addr, + const struct netlbl_unlhsh_iface *iface) +{ + struct netlbl_unlhsh_addr6 *iter; + + list_for_each_entry_rcu(iter, &iface->addr6_list, list) + if (iter->valid && + ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0) + return iter; + + return NULL; +} +#endif /* IPv6 */ + +/** + * netlbl_unlhsh_search_iface - Search for a matching interface entry + * @ifindex: the network interface + * + * Description: + * Searches the unlabeled connection hash table and returns a pointer to the + * interface entry which matches @ifindex, otherwise NULL is returned. The + * caller is responsible for calling the rcu_read_[un]lock() functions. + * + */ +static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) +{ + u32 bkt; + struct netlbl_unlhsh_iface *iter; + + bkt = netlbl_unlhsh_hash(ifindex); + list_for_each_entry_rcu(iter, + &rcu_dereference(netlbl_unlhsh)->tbl[bkt], + list) + if (iter->valid && iter->ifindex == ifindex) + return iter; + + return NULL; +} + +/** + * netlbl_unlhsh_search_iface_def - Search for a matching interface entry + * @ifindex: the network interface + * + * Description: + * Searches the unlabeled connection hash table and returns a pointer to the + * interface entry which matches @ifindex. If an exact match can not be found + * and there is a valid default entry, the default entry is returned, otherwise + * NULL is returned. The caller is responsible for calling the + * rcu_read_[un]lock() functions. + * + */ +static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface_def(int ifindex) +{ + struct netlbl_unlhsh_iface *entry; + + entry = netlbl_unlhsh_search_iface(ifindex); + if (entry != NULL) + return entry; + + entry = rcu_dereference(netlbl_unlhsh_def); + if (entry != NULL && entry->valid) + return entry; + + return NULL; +} + +/** + * netlbl_unlhsh_add_addr4 - Add a new IPv4 address entry to the hash table + * @iface: the associated interface entry + * @addr: IPv4 address in network byte order + * @mask: IPv4 address mask in network byte order + * @secid: LSM secid value for entry + * + * Description: + * Add a new address entry into the unlabeled connection hash table using the + * interface entry specified by @iface. On success zero is returned, otherwise + * a negative value is returned. The caller is responsible for calling the + * rcu_read_[un]lock() functions. + * + */ +static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface, + const struct in_addr *addr, + const struct in_addr *mask, + u32 secid) +{ + struct netlbl_unlhsh_addr4 *entry; + struct netlbl_unlhsh_addr4 *iter; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) + return -ENOMEM; + + entry->addr = addr->s_addr & mask->s_addr; + entry->mask = mask->s_addr; + entry->secid = secid; + entry->valid = 1; + INIT_RCU_HEAD(&entry->rcu); + + spin_lock(&netlbl_unlhsh_lock); + iter = netlbl_unlhsh_search_addr4(entry->addr, iface); + if (iter != NULL && + iter->addr == addr->s_addr && iter->mask == mask->s_addr) { + spin_unlock(&netlbl_unlhsh_lock); + kfree(entry); + return -EEXIST; + } + /* in order to speed up address searches through the list (the common + * case) we need to keep the list in order based on the size of the + * address mask such that the entry with the widest mask (smallest + * numerical value) appears first in the list */ + list_for_each_entry_rcu(iter, &iface->addr4_list, list) + if (iter->valid && + ntohl(entry->mask) > ntohl(iter->mask)) { + __list_add_rcu(&entry->list, + iter->list.prev, + &iter->list); + spin_unlock(&netlbl_unlhsh_lock); + return 0; + } + list_add_tail_rcu(&entry->list, &iface->addr4_list); + spin_unlock(&netlbl_unlhsh_lock); + return 0; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_unlhsh_add_addr6 - Add a new IPv6 address entry to the hash table + * @iface: the associated interface entry + * @addr: IPv6 address in network byte order + * @mask: IPv6 address mask in network byte order + * @secid: LSM secid value for entry + * + * Description: + * Add a new address entry into the unlabeled connection hash table using the + * interface entry specified by @iface. On success zero is returned, otherwise + * a negative value is returned. The caller is responsible for calling the + * rcu_read_[un]lock() functions. + * + */ +static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface, + const struct in6_addr *addr, + const struct in6_addr *mask, + u32 secid) +{ + struct netlbl_unlhsh_addr6 *entry; + struct netlbl_unlhsh_addr6 *iter; + + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) + return -ENOMEM; + + ipv6_addr_copy(&entry->addr, addr); + entry->addr.s6_addr32[0] &= mask->s6_addr32[0]; + entry->addr.s6_addr32[1] &= mask->s6_addr32[1]; + entry->addr.s6_addr32[2] &= mask->s6_addr32[2]; + entry->addr.s6_addr32[3] &= mask->s6_addr32[3]; + ipv6_addr_copy(&entry->mask, mask); + entry->secid = secid; + entry->valid = 1; + INIT_RCU_HEAD(&entry->rcu); + + spin_lock(&netlbl_unlhsh_lock); + iter = netlbl_unlhsh_search_addr6(&entry->addr, iface); + if (iter != NULL && + (ipv6_addr_equal(&iter->addr, addr) && + ipv6_addr_equal(&iter->mask, mask))) { + spin_unlock(&netlbl_unlhsh_lock); + kfree(entry); + return -EEXIST; + } + /* in order to speed up address searches through the list (the common + * case) we need to keep the list in order based on the size of the + * address mask such that the entry with the widest mask (smallest + * numerical value) appears first in the list */ + list_for_each_entry_rcu(iter, &iface->addr6_list, list) + if (iter->valid && + ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) { + __list_add_rcu(&entry->list, + iter->list.prev, + &iter->list); + spin_unlock(&netlbl_unlhsh_lock); + return 0; + } + list_add_tail_rcu(&entry->list, &iface->addr6_list); + spin_unlock(&netlbl_unlhsh_lock); + return 0; +} +#endif /* IPv6 */ + +/** + * netlbl_unlhsh_add_iface - Adds a new interface entry to the hash table + * @ifindex: network interface + * + * Description: + * Add a new, empty, interface entry into the unlabeled connection hash table. + * On success a pointer to the new interface entry is returned, on failure NULL + * is returned. The caller is responsible for calling the rcu_read_[un]lock() + * functions. + * + */ +static struct netlbl_unlhsh_iface *netlbl_unlhsh_add_iface(int ifindex) +{ + u32 bkt; + struct netlbl_unlhsh_iface *iface; + + iface = kzalloc(sizeof(*iface), GFP_ATOMIC); + if (iface == NULL) + return NULL; + + iface->ifindex = ifindex; + INIT_LIST_HEAD(&iface->addr4_list); + INIT_LIST_HEAD(&iface->addr6_list); + iface->valid = 1; + INIT_RCU_HEAD(&iface->rcu); + + spin_lock(&netlbl_unlhsh_lock); + if (ifindex > 0) { + bkt = netlbl_unlhsh_hash(ifindex); + if (netlbl_unlhsh_search_iface(ifindex) != NULL) + goto add_iface_failure; + list_add_tail_rcu(&iface->list, + &rcu_dereference(netlbl_unlhsh)->tbl[bkt]); + } else { + INIT_LIST_HEAD(&iface->list); + if (rcu_dereference(netlbl_unlhsh_def) != NULL) + goto add_iface_failure; + rcu_assign_pointer(netlbl_unlhsh_def, iface); + } + spin_unlock(&netlbl_unlhsh_lock); + + return iface; + +add_iface_failure: + spin_unlock(&netlbl_unlhsh_lock); + kfree(iface); + return NULL; +} + +/** + * netlbl_unlhsh_add - Adds a new entry to the unlabeled connection hash table + * @net: network namespace + * @dev_name: interface name + * @addr: IP address in network byte order + * @mask: address mask in network byte order + * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6) + * @secid: LSM secid value for the entry + * @audit_info: NetLabel audit information + * + * Description: + * Adds a new entry to the unlabeled connection hash table. Returns zero on + * success, negative values on failure. + * + */ +static int netlbl_unlhsh_add(struct net *net, + const char *dev_name, + const void *addr, + const void *mask, + u32 addr_len, + u32 secid, + struct netlbl_audit *audit_info) +{ + int ret_val; + int ifindex; + struct net_device *dev; + struct netlbl_unlhsh_iface *iface; + struct in_addr *addr4, *mask4; + struct in6_addr *addr6, *mask6; + struct audit_buffer *audit_buf = NULL; + char *secctx = NULL; + u32 secctx_len; + + if (addr_len != sizeof(struct in_addr) && + addr_len != sizeof(struct in6_addr)) + return -EINVAL; + + rcu_read_lock(); + if (dev_name != NULL) { + dev = dev_get_by_name(net, dev_name); + if (dev == NULL) { + ret_val = -ENODEV; + goto unlhsh_add_return; + } + ifindex = dev->ifindex; + dev_put(dev); + iface = netlbl_unlhsh_search_iface(ifindex); + } else { + ifindex = 0; + iface = rcu_dereference(netlbl_unlhsh_def); + } + if (iface == NULL) { + iface = netlbl_unlhsh_add_iface(ifindex); + if (iface == NULL) { + ret_val = -ENOMEM; + goto unlhsh_add_return; + } + } + audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCADD, + audit_info); + switch (addr_len) { + case sizeof(struct in_addr): + addr4 = (struct in_addr *)addr; + mask4 = (struct in_addr *)mask; + ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); + if (audit_buf != NULL) + netlbl_unlabel_audit_addr4(audit_buf, + dev_name, + addr4->s_addr, + mask4->s_addr); + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case sizeof(struct in6_addr): + addr6 = (struct in6_addr *)addr; + mask6 = (struct in6_addr *)mask; + ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); + if (audit_buf != NULL) + netlbl_unlabel_audit_addr6(audit_buf, + dev_name, + addr6, mask6); + break; +#endif /* IPv6 */ + default: + ret_val = -EINVAL; + } + if (ret_val == 0) + atomic_inc(&netlabel_mgmt_protocount); + +unlhsh_add_return: + rcu_read_unlock(); + if (audit_buf != NULL) { + if (security_secid_to_secctx(secid, + &secctx, + &secctx_len) == 0) { + audit_log_format(audit_buf, " sec_obj=%s", secctx); + security_release_secctx(secctx, secctx_len); + } + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } + return ret_val; +} + +/** + * netlbl_unlhsh_remove_addr4 - Remove an IPv4 address entry + * @net: network namespace + * @iface: interface entry + * @addr: IP address + * @mask: IP address mask + * @audit_info: NetLabel audit information + * + * Description: + * Remove an IP address entry from the unlabeled connection hash table. + * Returns zero on success, negative values on failure. The caller is + * responsible for calling the rcu_read_[un]lock() functions. + * + */ +static int netlbl_unlhsh_remove_addr4(struct net *net, + struct netlbl_unlhsh_iface *iface, + const struct in_addr *addr, + const struct in_addr *mask, + struct netlbl_audit *audit_info) +{ + int ret_val = -ENOENT; + struct netlbl_unlhsh_addr4 *entry; + struct audit_buffer *audit_buf = NULL; + struct net_device *dev; + char *secctx = NULL; + u32 secctx_len; + + spin_lock(&netlbl_unlhsh_lock); + entry = netlbl_unlhsh_search_addr4(addr->s_addr, iface); + if (entry != NULL && + entry->addr == addr->s_addr && entry->mask == mask->s_addr) { + entry->valid = 0; + list_del_rcu(&entry->list); + ret_val = 0; + } + spin_unlock(&netlbl_unlhsh_lock); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, + audit_info); + if (audit_buf != NULL) { + dev = dev_get_by_index(net, iface->ifindex); + netlbl_unlabel_audit_addr4(audit_buf, + (dev != NULL ? dev->name : NULL), + entry->addr, entry->mask); + if (dev != NULL) + dev_put(dev); + if (security_secid_to_secctx(entry->secid, + &secctx, + &secctx_len) == 0) { + audit_log_format(audit_buf, " sec_obj=%s", secctx); + security_release_secctx(secctx, secctx_len); + } + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } + + if (ret_val == 0) + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr4); + return ret_val; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_unlhsh_remove_addr6 - Remove an IPv6 address entry + * @net: network namespace + * @iface: interface entry + * @addr: IP address + * @mask: IP address mask + * @audit_info: NetLabel audit information + * + * Description: + * Remove an IP address entry from the unlabeled connection hash table. + * Returns zero on success, negative values on failure. The caller is + * responsible for calling the rcu_read_[un]lock() functions. + * + */ +static int netlbl_unlhsh_remove_addr6(struct net *net, + struct netlbl_unlhsh_iface *iface, + const struct in6_addr *addr, + const struct in6_addr *mask, + struct netlbl_audit *audit_info) +{ + int ret_val = -ENOENT; + struct netlbl_unlhsh_addr6 *entry; + struct audit_buffer *audit_buf = NULL; + struct net_device *dev; + char *secctx = NULL; + u32 secctx_len; + + spin_lock(&netlbl_unlhsh_lock); + entry = netlbl_unlhsh_search_addr6(addr, iface); + if (entry != NULL && + (ipv6_addr_equal(&entry->addr, addr) && + ipv6_addr_equal(&entry->mask, mask))) { + entry->valid = 0; + list_del_rcu(&entry->list); + ret_val = 0; + } + spin_unlock(&netlbl_unlhsh_lock); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, + audit_info); + if (audit_buf != NULL) { + dev = dev_get_by_index(net, iface->ifindex); + netlbl_unlabel_audit_addr6(audit_buf, + (dev != NULL ? dev->name : NULL), + addr, mask); + if (dev != NULL) + dev_put(dev); + if (security_secid_to_secctx(entry->secid, + &secctx, + &secctx_len) == 0) { + audit_log_format(audit_buf, " sec_obj=%s", secctx); + security_release_secctx(secctx, secctx_len); + } + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } + + if (ret_val == 0) + call_rcu(&entry->rcu, netlbl_unlhsh_free_addr6); + return ret_val; +} +#endif /* IPv6 */ + +/** + * netlbl_unlhsh_condremove_iface - Remove an interface entry + * @iface: the interface entry + * + * Description: + * Remove an interface entry from the unlabeled connection hash table if it is + * empty. An interface entry is considered to be empty if there are no + * address entries assigned to it. + * + */ +static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) +{ + struct netlbl_unlhsh_addr4 *iter4; + struct netlbl_unlhsh_addr6 *iter6; + + spin_lock(&netlbl_unlhsh_lock); + list_for_each_entry_rcu(iter4, &iface->addr4_list, list) + if (iter4->valid) + goto unlhsh_condremove_failure; + list_for_each_entry_rcu(iter6, &iface->addr6_list, list) + if (iter6->valid) + goto unlhsh_condremove_failure; + iface->valid = 0; + if (iface->ifindex > 0) + list_del_rcu(&iface->list); + else + rcu_assign_pointer(netlbl_unlhsh_def, NULL); + spin_unlock(&netlbl_unlhsh_lock); + + call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); + return; + +unlhsh_condremove_failure: + spin_unlock(&netlbl_unlhsh_lock); + return; +} + +/** + * netlbl_unlhsh_remove - Remove an entry from the unlabeled hash table + * @net: network namespace + * @dev_name: interface name + * @addr: IP address in network byte order + * @mask: address mask in network byte order + * @addr_len: length of address/mask (4 for IPv4, 16 for IPv6) + * @audit_info: NetLabel audit information + * + * Description: + * Removes and existing entry from the unlabeled connection hash table. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_unlhsh_remove(struct net *net, + const char *dev_name, + const void *addr, + const void *mask, + u32 addr_len, + struct netlbl_audit *audit_info) +{ + int ret_val; + struct net_device *dev; + struct netlbl_unlhsh_iface *iface; + + if (addr_len != sizeof(struct in_addr) && + addr_len != sizeof(struct in6_addr)) + return -EINVAL; + + rcu_read_lock(); + if (dev_name != NULL) { + dev = dev_get_by_name(net, dev_name); + if (dev == NULL) { + ret_val = -ENODEV; + goto unlhsh_remove_return; + } + iface = netlbl_unlhsh_search_iface(dev->ifindex); + dev_put(dev); + } else + iface = rcu_dereference(netlbl_unlhsh_def); + if (iface == NULL) { + ret_val = -ENOENT; + goto unlhsh_remove_return; + } + switch (addr_len) { + case sizeof(struct in_addr): + ret_val = netlbl_unlhsh_remove_addr4(net, + iface, addr, mask, + audit_info); + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case sizeof(struct in6_addr): + ret_val = netlbl_unlhsh_remove_addr6(net, + iface, addr, mask, + audit_info); + break; +#endif /* IPv6 */ + default: + ret_val = -EINVAL; + } + if (ret_val == 0) { + netlbl_unlhsh_condremove_iface(iface); + atomic_dec(&netlabel_mgmt_protocount); + } + +unlhsh_remove_return: + rcu_read_unlock(); + return ret_val; +} + +/* + * General Helper Functions + */ + +/** + * netlbl_unlhsh_netdev_handler - Network device notification handler + * @this: notifier block + * @event: the event + * @ptr: the network device (cast to void) + * + * Description: + * Handle network device events, although at present all we care about is a + * network device going away. In the case of a device going away we clear any + * related entries from the unlabeled connection hash table. + * + */ +static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct netlbl_unlhsh_iface *iface = NULL; + + if (dev->nd_net != &init_net) + return NOTIFY_DONE; + + /* XXX - should this be a check for NETDEV_DOWN or _UNREGISTER? */ + if (event == NETDEV_DOWN) { + spin_lock(&netlbl_unlhsh_lock); + iface = netlbl_unlhsh_search_iface(dev->ifindex); + if (iface != NULL && iface->valid) { + iface->valid = 0; + list_del_rcu(&iface->list); + } else + iface = NULL; + spin_unlock(&netlbl_unlhsh_lock); + } + + if (iface != NULL) + call_rcu(&iface->rcu, netlbl_unlhsh_free_iface); + + return NOTIFY_DONE; +} /** * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag @@ -84,11 +984,8 @@ static void netlbl_unlabel_acceptflg_set(u8 value, struct audit_buffer *audit_buf; u8 old_val; - spin_lock(&netlabel_unlabel_acceptflg_lock); old_val = netlabel_unlabel_acceptflg; netlabel_unlabel_acceptflg = value; - spin_unlock(&netlabel_unlabel_acceptflg_lock); - audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, audit_info); if (audit_buf != NULL) { @@ -98,6 +995,48 @@ static void netlbl_unlabel_acceptflg_set(u8 value, } } +/** + * netlbl_unlabel_addrinfo_get - Get the IPv4/6 address information + * @info: the Generic NETLINK info block + * @addr: the IP address + * @mask: the IP address mask + * @len: the address length + * + * Description: + * Examine the Generic NETLINK message and extract the IP address information. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_unlabel_addrinfo_get(struct genl_info *info, + void **addr, + void **mask, + u32 *len) +{ + u32 addr_len; + + if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) { + addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); + if (addr_len != sizeof(struct in_addr) && + addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) + return -EINVAL; + *len = addr_len; + *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); + *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV4MASK]); + return 0; + } else if (info->attrs[NLBL_UNLABEL_A_IPV6ADDR]) { + addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]); + if (addr_len != sizeof(struct in6_addr) && + addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV6MASK])) + return -EINVAL; + *len = addr_len; + *addr = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6ADDR]); + *mask = nla_data(info->attrs[NLBL_UNLABEL_A_IPV6MASK]); + return 0; + } + + return -EINVAL; +} + /* * NetLabel Command Handlers */ @@ -155,11 +1094,9 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) goto list_failure; } - rcu_read_lock(); ret_val = nla_put_u8(ans_skb, NLBL_UNLABEL_A_ACPTFLG, netlabel_unlabel_acceptflg); - rcu_read_unlock(); if (ret_val != 0) goto list_failure; @@ -175,11 +1112,489 @@ list_failure: return ret_val; } +/** + * netlbl_unlabel_staticadd - Handle a STATICADD message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated STATICADD message and add a new unlabeled + * connection entry to the hash table. Returns zero on success, negative + * values on failure. + * + */ +static int netlbl_unlabel_staticadd(struct sk_buff *skb, + struct genl_info *info) +{ + int ret_val; + char *dev_name; + void *addr; + void *mask; + u32 addr_len; + u32 secid; + struct netlbl_audit audit_info; + + /* Don't allow users to add both IPv4 and IPv6 addresses for a + * single entry. However, allow users to create two entries, one each + * for IPv4 and IPv4, with the same LSM security context which should + * achieve the same result. */ + if (!info->attrs[NLBL_UNLABEL_A_SECCTX] || + !info->attrs[NLBL_UNLABEL_A_IFACE] || + !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || + !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ + (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || + !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) + return -EINVAL; + + netlbl_netlink_auditinfo(skb, &audit_info); + + ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); + if (ret_val != 0) + return ret_val; + dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]); + ret_val = security_secctx_to_secid( + nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]), + nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]), + &secid); + if (ret_val != 0) + return ret_val; + + return netlbl_unlhsh_add(&init_net, + dev_name, addr, mask, addr_len, secid, + &audit_info); +} + +/** + * netlbl_unlabel_staticadddef - Handle a STATICADDDEF message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated STATICADDDEF message and add a new default + * unlabeled connection entry. Returns zero on success, negative values on + * failure. + * + */ +static int netlbl_unlabel_staticadddef(struct sk_buff *skb, + struct genl_info *info) +{ + int ret_val; + void *addr; + void *mask; + u32 addr_len; + u32 secid; + struct netlbl_audit audit_info; + + /* Don't allow users to add both IPv4 and IPv6 addresses for a + * single entry. However, allow users to create two entries, one each + * for IPv4 and IPv6, with the same LSM security context which should + * achieve the same result. */ + if (!info->attrs[NLBL_UNLABEL_A_SECCTX] || + !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || + !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ + (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || + !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) + return -EINVAL; + + netlbl_netlink_auditinfo(skb, &audit_info); + + ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); + if (ret_val != 0) + return ret_val; + ret_val = security_secctx_to_secid( + nla_data(info->attrs[NLBL_UNLABEL_A_SECCTX]), + nla_len(info->attrs[NLBL_UNLABEL_A_SECCTX]), + &secid); + if (ret_val != 0) + return ret_val; + + return netlbl_unlhsh_add(&init_net, + NULL, addr, mask, addr_len, secid, + &audit_info); +} + +/** + * netlbl_unlabel_staticremove - Handle a STATICREMOVE message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated STATICREMOVE message and remove the specified + * unlabeled connection entry. Returns zero on success, negative values on + * failure. + * + */ +static int netlbl_unlabel_staticremove(struct sk_buff *skb, + struct genl_info *info) +{ + int ret_val; + char *dev_name; + void *addr; + void *mask; + u32 addr_len; + struct netlbl_audit audit_info; + + /* See the note in netlbl_unlabel_staticadd() about not allowing both + * IPv4 and IPv6 in the same entry. */ + if (!info->attrs[NLBL_UNLABEL_A_IFACE] || + !((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || + !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ + (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || + !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) + return -EINVAL; + + netlbl_netlink_auditinfo(skb, &audit_info); + + ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); + if (ret_val != 0) + return ret_val; + dev_name = nla_data(info->attrs[NLBL_UNLABEL_A_IFACE]); + + return netlbl_unlhsh_remove(&init_net, + dev_name, addr, mask, addr_len, + &audit_info); +} + +/** + * netlbl_unlabel_staticremovedef - Handle a STATICREMOVEDEF message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated STATICREMOVEDEF message and remove the default + * unlabeled connection entry. Returns zero on success, negative values on + * failure. + * + */ +static int netlbl_unlabel_staticremovedef(struct sk_buff *skb, + struct genl_info *info) +{ + int ret_val; + void *addr; + void *mask; + u32 addr_len; + struct netlbl_audit audit_info; + + /* See the note in netlbl_unlabel_staticadd() about not allowing both + * IPv4 and IPv6 in the same entry. */ + if (!((!info->attrs[NLBL_UNLABEL_A_IPV4ADDR] || + !info->attrs[NLBL_UNLABEL_A_IPV4MASK]) ^ + (!info->attrs[NLBL_UNLABEL_A_IPV6ADDR] || + !info->attrs[NLBL_UNLABEL_A_IPV6MASK]))) + return -EINVAL; + + netlbl_netlink_auditinfo(skb, &audit_info); + + ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len); + if (ret_val != 0) + return ret_val; + + return netlbl_unlhsh_remove(&init_net, + NULL, addr, mask, addr_len, + &audit_info); +} + + +/** + * netlbl_unlabel_staticlist_gen - Generate messages for STATICLIST[DEF] + * @cmd: command/message + * @iface: the interface entry + * @addr4: the IPv4 address entry + * @addr6: the IPv6 address entry + * @arg: the netlbl_unlhsh_walk_arg structure + * + * Description: + * This function is designed to be used to generate a response for a + * STATICLIST or STATICLISTDEF message. When called either @addr4 or @addr6 + * can be specified, not both, the other unspecified entry should be set to + * NULL by the caller. Returns the size of the message on success, negative + * values on failure. + * + */ +static int netlbl_unlabel_staticlist_gen(u32 cmd, + const struct netlbl_unlhsh_iface *iface, + const struct netlbl_unlhsh_addr4 *addr4, + const struct netlbl_unlhsh_addr6 *addr6, + void *arg) +{ + int ret_val = -ENOMEM; + struct netlbl_unlhsh_walk_arg *cb_arg = arg; + struct net_device *dev; + void *data; + u32 secid; + char *secctx; + u32 secctx_len; + + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + cb_arg->seq, &netlbl_unlabel_gnl_family, + NLM_F_MULTI, cmd); + if (data == NULL) + goto list_cb_failure; + + if (iface->ifindex > 0) { + dev = dev_get_by_index(&init_net, iface->ifindex); + ret_val = nla_put_string(cb_arg->skb, + NLBL_UNLABEL_A_IFACE, dev->name); + dev_put(dev); + if (ret_val != 0) + goto list_cb_failure; + } + + if (addr4) { + struct in_addr addr_struct; + + addr_struct.s_addr = addr4->addr; + ret_val = nla_put(cb_arg->skb, + NLBL_UNLABEL_A_IPV4ADDR, + sizeof(struct in_addr), + &addr_struct); + if (ret_val != 0) + goto list_cb_failure; + + addr_struct.s_addr = addr4->mask; + ret_val = nla_put(cb_arg->skb, + NLBL_UNLABEL_A_IPV4MASK, + sizeof(struct in_addr), + &addr_struct); + if (ret_val != 0) + goto list_cb_failure; + + secid = addr4->secid; + } else { + ret_val = nla_put(cb_arg->skb, + NLBL_UNLABEL_A_IPV6ADDR, + sizeof(struct in6_addr), + &addr6->addr); + if (ret_val != 0) + goto list_cb_failure; + + ret_val = nla_put(cb_arg->skb, + NLBL_UNLABEL_A_IPV6MASK, + sizeof(struct in6_addr), + &addr6->mask); + if (ret_val != 0) + goto list_cb_failure; + + secid = addr6->secid; + } + + ret_val = security_secid_to_secctx(secid, &secctx, &secctx_len); + if (ret_val != 0) + goto list_cb_failure; + ret_val = nla_put(cb_arg->skb, + NLBL_UNLABEL_A_SECCTX, + secctx_len, + secctx); + security_release_secctx(secctx, secctx_len); + if (ret_val != 0) + goto list_cb_failure; + + cb_arg->seq++; + return genlmsg_end(cb_arg->skb, data); + +list_cb_failure: + genlmsg_cancel(cb_arg->skb, data); + return ret_val; +} + +/** + * netlbl_unlabel_staticlist - Handle a STATICLIST message + * @skb: the NETLINK buffer + * @cb: the NETLINK callback + * + * Description: + * Process a user generated STATICLIST message and dump the unlabeled + * connection hash table in a form suitable for use in a kernel generated + * STATICLIST message. Returns the length of @skb. + * + */ +static int netlbl_unlabel_staticlist(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct netlbl_unlhsh_walk_arg cb_arg; + u32 skip_bkt = cb->args[0]; + u32 skip_chain = cb->args[1]; + u32 skip_addr4 = cb->args[2]; + u32 skip_addr6 = cb->args[3]; + u32 iter_bkt; + u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; + struct netlbl_unlhsh_iface *iface; + struct netlbl_unlhsh_addr4 *addr4; + struct netlbl_unlhsh_addr6 *addr6; + + cb_arg.nl_cb = cb; + cb_arg.skb = skb; + cb_arg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + for (iter_bkt = skip_bkt; + iter_bkt < rcu_dereference(netlbl_unlhsh)->size; + iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { + list_for_each_entry_rcu(iface, + &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt], + list) { + if (!iface->valid || + iter_chain++ < skip_chain) + continue; + list_for_each_entry_rcu(addr4, + &iface->addr4_list, + list) { + if (!addr4->valid || iter_addr4++ < skip_addr4) + continue; + if (netlbl_unlabel_staticlist_gen( + NLBL_UNLABEL_C_STATICLIST, + iface, + addr4, + NULL, + &cb_arg) < 0) { + iter_addr4--; + iter_chain--; + goto unlabel_staticlist_return; + } + } + list_for_each_entry_rcu(addr6, + &iface->addr6_list, + list) { + if (!addr6->valid || iter_addr6++ < skip_addr6) + continue; + if (netlbl_unlabel_staticlist_gen( + NLBL_UNLABEL_C_STATICLIST, + iface, + NULL, + addr6, + &cb_arg) < 0) { + iter_addr6--; + iter_chain--; + goto unlabel_staticlist_return; + } + } + } + } + +unlabel_staticlist_return: + rcu_read_unlock(); + cb->args[0] = skip_bkt; + cb->args[1] = skip_chain; + cb->args[2] = skip_addr4; + cb->args[3] = skip_addr6; + return skb->len; +} + +/** + * netlbl_unlabel_staticlistdef - Handle a STATICLISTDEF message + * @skb: the NETLINK buffer + * @cb: the NETLINK callback + * + * Description: + * Process a user generated STATICLISTDEF message and dump the default + * unlabeled connection entry in a form suitable for use in a kernel generated + * STATICLISTDEF message. Returns the length of @skb. + * + */ +static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct netlbl_unlhsh_walk_arg cb_arg; + struct netlbl_unlhsh_iface *iface; + u32 skip_addr4 = cb->args[0]; + u32 skip_addr6 = cb->args[1]; + u32 iter_addr4 = 0, iter_addr6 = 0; + struct netlbl_unlhsh_addr4 *addr4; + struct netlbl_unlhsh_addr6 *addr6; + + cb_arg.nl_cb = cb; + cb_arg.skb = skb; + cb_arg.seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + iface = rcu_dereference(netlbl_unlhsh_def); + if (iface == NULL || !iface->valid) + goto unlabel_staticlistdef_return; + + list_for_each_entry_rcu(addr4, &iface->addr4_list, list) { + if (!addr4->valid || iter_addr4++ < skip_addr4) + continue; + if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, + iface, + addr4, + NULL, + &cb_arg) < 0) { + iter_addr4--; + goto unlabel_staticlistdef_return; + } + } + list_for_each_entry_rcu(addr6, &iface->addr6_list, list) { + if (addr6->valid || iter_addr6++ < skip_addr6) + continue; + if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, + iface, + NULL, + addr6, + &cb_arg) < 0) { + iter_addr6--; + goto unlabel_staticlistdef_return; + } + } + +unlabel_staticlistdef_return: + rcu_read_unlock(); + cb->args[0] = skip_addr4; + cb->args[1] = skip_addr6; + return skb->len; +} /* * NetLabel Generic NETLINK Command Definitions */ +static struct genl_ops netlbl_unlabel_genl_c_staticadd = { + .cmd = NLBL_UNLABEL_C_STATICADD, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_unlabel_genl_policy, + .doit = netlbl_unlabel_staticadd, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_unlabel_genl_c_staticremove = { + .cmd = NLBL_UNLABEL_C_STATICREMOVE, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_unlabel_genl_policy, + .doit = netlbl_unlabel_staticremove, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_unlabel_genl_c_staticlist = { + .cmd = NLBL_UNLABEL_C_STATICLIST, + .flags = 0, + .policy = netlbl_unlabel_genl_policy, + .doit = NULL, + .dumpit = netlbl_unlabel_staticlist, +}; + +static struct genl_ops netlbl_unlabel_genl_c_staticadddef = { + .cmd = NLBL_UNLABEL_C_STATICADDDEF, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_unlabel_genl_policy, + .doit = netlbl_unlabel_staticadddef, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_unlabel_genl_c_staticremovedef = { + .cmd = NLBL_UNLABEL_C_STATICREMOVEDEF, + .flags = GENL_ADMIN_PERM, + .policy = netlbl_unlabel_genl_policy, + .doit = netlbl_unlabel_staticremovedef, + .dumpit = NULL, +}; + +static struct genl_ops netlbl_unlabel_genl_c_staticlistdef = { + .cmd = NLBL_UNLABEL_C_STATICLISTDEF, + .flags = 0, + .policy = netlbl_unlabel_genl_policy, + .doit = NULL, + .dumpit = netlbl_unlabel_staticlistdef, +}; + static struct genl_ops netlbl_unlabel_genl_c_accept = { .cmd = NLBL_UNLABEL_C_ACCEPT, .flags = GENL_ADMIN_PERM, @@ -196,7 +1611,6 @@ static struct genl_ops netlbl_unlabel_genl_c_list = { .dumpit = NULL, }; - /* * NetLabel Generic NETLINK Protocol Functions */ @@ -218,6 +1632,36 @@ int netlbl_unlabel_genl_init(void) return ret_val; ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_staticadd); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_staticremove); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_staticlist); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_staticadddef); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_staticremovedef); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, + &netlbl_unlabel_genl_c_staticlistdef); + if (ret_val != 0) + return ret_val; + + ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, &netlbl_unlabel_genl_c_accept); if (ret_val != 0) return ret_val; @@ -234,8 +1678,58 @@ int netlbl_unlabel_genl_init(void) * NetLabel KAPI Hooks */ +static struct notifier_block netlbl_unlhsh_netdev_notifier = { + .notifier_call = netlbl_unlhsh_netdev_handler, +}; + +/** + * netlbl_unlabel_init - Initialize the unlabeled connection hash table + * @size: the number of bits to use for the hash buckets + * + * Description: + * Initializes the unlabeled connection hash table and registers a network + * device notification handler. This function should only be called by the + * NetLabel subsystem itself during initialization. Returns zero on success, + * non-zero values on error. + * + */ +int netlbl_unlabel_init(u32 size) +{ + u32 iter; + struct netlbl_unlhsh_tbl *hsh_tbl; + + if (size == 0) + return -EINVAL; + + hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL); + if (hsh_tbl == NULL) + return -ENOMEM; + hsh_tbl->size = 1 << size; + hsh_tbl->tbl = kcalloc(hsh_tbl->size, + sizeof(struct list_head), + GFP_KERNEL); + if (hsh_tbl->tbl == NULL) { + kfree(hsh_tbl); + return -ENOMEM; + } + for (iter = 0; iter < hsh_tbl->size; iter++) + INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); + + rcu_read_lock(); + spin_lock(&netlbl_unlhsh_lock); + rcu_assign_pointer(netlbl_unlhsh, hsh_tbl); + spin_unlock(&netlbl_unlhsh_lock); + rcu_read_unlock(); + + register_netdevice_notifier(&netlbl_unlhsh_netdev_notifier); + + return 0; +} + /** * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet + * @skb: the packet + * @family: protocol family * @secattr: the security attributes * * Description: @@ -243,19 +1737,52 @@ int netlbl_unlabel_genl_init(void) * them in @secattr. Returns zero on success and negative values on failure. * */ -int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr) +int netlbl_unlabel_getattr(const struct sk_buff *skb, + u16 family, + struct netlbl_lsm_secattr *secattr) { - int ret_val; + struct iphdr *hdr4; + struct ipv6hdr *hdr6; + struct netlbl_unlhsh_addr4 *addr4; + struct netlbl_unlhsh_addr6 *addr6; + struct netlbl_unlhsh_iface *iface; rcu_read_lock(); - if (netlabel_unlabel_acceptflg == 1) { - netlbl_secattr_init(secattr); - ret_val = 0; - } else - ret_val = -ENOMSG; + iface = netlbl_unlhsh_search_iface_def(skb->iif); + if (iface == NULL) + goto unlabel_getattr_nolabel; + switch (family) { + case PF_INET: + hdr4 = ip_hdr(skb); + addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface); + if (addr4 == NULL) + goto unlabel_getattr_nolabel; + secattr->attr.secid = addr4->secid; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case PF_INET6: + hdr6 = ipv6_hdr(skb); + addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface); + if (addr6 == NULL) + goto unlabel_getattr_nolabel; + secattr->attr.secid = addr6->secid; + break; +#endif /* IPv6 */ + default: + goto unlabel_getattr_nolabel; + } rcu_read_unlock(); - return ret_val; + secattr->flags |= NETLBL_SECATTR_SECID; + secattr->type = NETLBL_NLTYPE_UNLABELED; + return 0; + +unlabel_getattr_nolabel: + rcu_read_unlock(); + if (netlabel_unlabel_acceptflg == 0) + return -ENOMSG; + secattr->type = NETLBL_NLTYPE_UNLABELED; + return 0; } /** diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index c2917fbb42c..06b1301ac07 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -36,6 +36,116 @@ /* * The following NetLabel payloads are supported by the Unlabeled subsystem. * + * o STATICADD + * This message is sent from an application to add a new static label for + * incoming unlabeled connections. + * + * Required attributes: + * + * NLBL_UNLABEL_A_IFACE + * NLBL_UNLABEL_A_SECCTX + * + * If IPv4 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV4ADDR + * NLBL_UNLABEL_A_IPV4MASK + * + * If IPv6 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV6ADDR + * NLBL_UNLABEL_A_IPV6MASK + * + * o STATICREMOVE + * This message is sent from an application to remove an existing static + * label for incoming unlabeled connections. + * + * Required attributes: + * + * NLBL_UNLABEL_A_IFACE + * + * If IPv4 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV4ADDR + * NLBL_UNLABEL_A_IPV4MASK + * + * If IPv6 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV6ADDR + * NLBL_UNLABEL_A_IPV6MASK + * + * o STATICLIST + * This message can be sent either from an application or by the kernel in + * response to an application generated STATICLIST message. When sent by an + * application there is no payload and the NLM_F_DUMP flag should be set. + * The kernel should response with a series of the following messages. + * + * Required attributes: + * + * NLBL_UNLABEL_A_IFACE + * NLBL_UNLABEL_A_SECCTX + * + * If IPv4 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV4ADDR + * NLBL_UNLABEL_A_IPV4MASK + * + * If IPv6 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV6ADDR + * NLBL_UNLABEL_A_IPV6MASK + * + * o STATICADDDEF + * This message is sent from an application to set the default static + * label for incoming unlabeled connections. + * + * Required attribute: + * + * NLBL_UNLABEL_A_SECCTX + * + * If IPv4 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV4ADDR + * NLBL_UNLABEL_A_IPV4MASK + * + * If IPv6 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV6ADDR + * NLBL_UNLABEL_A_IPV6MASK + * + * o STATICREMOVEDEF + * This message is sent from an application to remove the existing default + * static label for incoming unlabeled connections. + * + * If IPv4 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV4ADDR + * NLBL_UNLABEL_A_IPV4MASK + * + * If IPv6 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV6ADDR + * NLBL_UNLABEL_A_IPV6MASK + * + * o STATICLISTDEF + * This message can be sent either from an application or by the kernel in + * response to an application generated STATICLISTDEF message. When sent by + * an application there is no payload and the NLM_F_DUMP flag should be set. + * The kernel should response with the following message. + * + * Required attribute: + * + * NLBL_UNLABEL_A_SECCTX + * + * If IPv4 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV4ADDR + * NLBL_UNLABEL_A_IPV4MASK + * + * If IPv6 is specified the following attributes are required: + * + * NLBL_UNLABEL_A_IPV6ADDR + * NLBL_UNLABEL_A_IPV6MASK + * * o ACCEPT * This message is sent from an application to specify if the kernel should * allow unlabled packets to pass if they do not match any of the static @@ -62,6 +172,12 @@ enum { NLBL_UNLABEL_C_UNSPEC, NLBL_UNLABEL_C_ACCEPT, NLBL_UNLABEL_C_LIST, + NLBL_UNLABEL_C_STATICADD, + NLBL_UNLABEL_C_STATICREMOVE, + NLBL_UNLABEL_C_STATICLIST, + NLBL_UNLABEL_C_STATICADDDEF, + NLBL_UNLABEL_C_STATICREMOVEDEF, + NLBL_UNLABEL_C_STATICLISTDEF, __NLBL_UNLABEL_C_MAX, }; #define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) @@ -73,6 +189,24 @@ enum { /* (NLA_U8) * if true then unlabeled packets are allowed to pass, else unlabeled * packets are rejected */ + NLBL_UNLABEL_A_IPV6ADDR, + /* (NLA_BINARY, struct in6_addr) + * an IPv6 address */ + NLBL_UNLABEL_A_IPV6MASK, + /* (NLA_BINARY, struct in6_addr) + * an IPv6 address mask */ + NLBL_UNLABEL_A_IPV4ADDR, + /* (NLA_BINARY, struct in_addr) + * an IPv4 address */ + NLBL_UNLABEL_A_IPV4MASK, + /* (NLA_BINARY, struct in_addr) + * and IPv4 address mask */ + NLBL_UNLABEL_A_IFACE, + /* (NLA_NULL_STRING) + * network interface */ + NLBL_UNLABEL_A_SECCTX, + /* (NLA_BINARY) + * a LSM specific security context */ __NLBL_UNLABEL_A_MAX, }; #define NLBL_UNLABEL_A_MAX (__NLBL_UNLABEL_A_MAX - 1) @@ -80,8 +214,17 @@ enum { /* NetLabel protocol functions */ int netlbl_unlabel_genl_init(void); +/* Unlabeled connection hash table size */ +/* XXX - currently this number is an uneducated guess */ +#define NETLBL_UNLHSH_BITSIZE 7 + +/* General Unlabeled init function */ +int netlbl_unlabel_init(u32 size); + /* Process Unlabeled incoming network packets */ -int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr); +int netlbl_unlabel_getattr(const struct sk_buff *skb, + u16 family, + struct netlbl_lsm_secattr *secattr); /* Set the default configuration to allow Unlabeled packets */ int netlbl_unlabel_defconf(void); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 1ea27559b1d..bcd9abdb031 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -51,6 +51,7 @@ rpcauth_register(const struct rpc_authops *ops) spin_unlock(&rpc_authflavor_lock); return ret; } +EXPORT_SYMBOL_GPL(rpcauth_register); int rpcauth_unregister(const struct rpc_authops *ops) @@ -68,6 +69,7 @@ rpcauth_unregister(const struct rpc_authops *ops) spin_unlock(&rpc_authflavor_lock); return ret; } +EXPORT_SYMBOL_GPL(rpcauth_unregister); struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) @@ -102,6 +104,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) out: return auth; } +EXPORT_SYMBOL_GPL(rpcauth_create); void rpcauth_release(struct rpc_auth *auth) @@ -151,6 +154,7 @@ rpcauth_init_credcache(struct rpc_auth *auth) auth->au_credcache = new; return 0; } +EXPORT_SYMBOL_GPL(rpcauth_init_credcache); /* * Destroy a list of credentials @@ -213,6 +217,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth) kfree(cache); } } +EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache); /* * Remove stale credentials. Avoid sleeping inside the loop. @@ -332,6 +337,7 @@ found: out: return cred; } +EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *auth, int flags) @@ -350,6 +356,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) put_group_info(acred.group_info); return ret; } +EXPORT_SYMBOL_GPL(rpcauth_lookupcred); void rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, @@ -366,7 +373,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, #endif cred->cr_uid = acred->uid; } -EXPORT_SYMBOL(rpcauth_init_cred); +EXPORT_SYMBOL_GPL(rpcauth_init_cred); struct rpc_cred * rpcauth_bindcred(struct rpc_task *task) @@ -378,6 +385,7 @@ rpcauth_bindcred(struct rpc_task *task) .group_info = current->group_info, }; struct rpc_cred *ret; + sigset_t oldset; int flags = 0; dprintk("RPC: %5u looking up %s cred\n", @@ -385,7 +393,9 @@ rpcauth_bindcred(struct rpc_task *task) get_group_info(acred.group_info); if (task->tk_flags & RPC_TASK_ROOTCREDS) flags |= RPCAUTH_LOOKUP_ROOTCREDS; + rpc_clnt_sigmask(task->tk_client, &oldset); ret = auth->au_ops->lookup_cred(auth, &acred, flags); + rpc_clnt_sigunmask(task->tk_client, &oldset); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else @@ -435,6 +445,7 @@ need_lock: out_destroy: cred->cr_ops->crdestroy(cred); } +EXPORT_SYMBOL_GPL(put_rpccred); void rpcauth_unbindcred(struct rpc_task *task) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 1f2d85e869c..6dac3879228 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -472,16 +472,15 @@ gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, char __user *dst, size_t buflen) { char *data = (char *)msg->data + msg->copied; - ssize_t mlen = msg->len; - ssize_t left; + size_t mlen = min(msg->len, buflen); + unsigned long left; - if (mlen > buflen) - mlen = buflen; left = copy_to_user(dst, data, mlen); - if (left < 0) { - msg->errno = left; - return left; + if (left == mlen) { + msg->errno = -EFAULT; + return -EFAULT; } + mlen -= left; msg->copied += mlen; msg->errno = 0; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 76be83ee4b0..924916ceaa4 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -30,6 +30,7 @@ #include <linux/smp_lock.h> #include <linux/utsname.h> #include <linux/workqueue.h> +#include <linux/in6.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -121,8 +122,9 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) } } -static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor) +static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { + struct rpc_program *program = args->program; struct rpc_version *version; struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; @@ -131,13 +133,13 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s /* sanity check the name before trying to print it */ err = -EINVAL; - len = strlen(servname); + len = strlen(args->servername); if (len > RPC_MAXNETNAMELEN) goto out_no_rpciod; len++; dprintk("RPC: creating %s client for %s (xprt %p)\n", - program->name, servname, xprt); + program->name, args->servername, xprt); err = rpciod_up(); if (err) @@ -145,7 +147,11 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s err = -EINVAL; if (!xprt) goto out_no_xprt; - if (vers >= program->nrvers || !(version = program->version[vers])) + + if (args->version >= program->nrvers) + goto out_err; + version = program->version[args->version]; + if (version == NULL) goto out_err; err = -ENOMEM; @@ -157,12 +163,12 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_server = clnt->cl_inline_name; if (len > sizeof(clnt->cl_inline_name)) { char *buf = kmalloc(len, GFP_KERNEL); - if (buf != 0) + if (buf != NULL) clnt->cl_server = buf; else len = sizeof(clnt->cl_inline_name); } - strlcpy(clnt->cl_server, servname, len); + strlcpy(clnt->cl_server, args->servername, len); clnt->cl_xprt = xprt; clnt->cl_procinfo = version->procs; @@ -182,8 +188,15 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s if (!xprt_bound(clnt->cl_xprt)) clnt->cl_autobind = 1; + clnt->cl_timeout = xprt->timeout; + if (args->timeout != NULL) { + memcpy(&clnt->cl_timeout_default, args->timeout, + sizeof(clnt->cl_timeout_default)); + clnt->cl_timeout = &clnt->cl_timeout_default; + } + clnt->cl_rtt = &clnt->cl_rtt_default; - rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); + rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval); kref_init(&clnt->cl_kref); @@ -191,10 +204,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s if (err < 0) goto out_no_path; - auth = rpcauth_create(flavor, clnt); + auth = rpcauth_create(args->authflavor, clnt); if (IS_ERR(auth)) { printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n", - flavor); + args->authflavor); err = PTR_ERR(auth); goto out_no_auth; } @@ -245,9 +258,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, - .timeout = args->timeout }; - char servername[20]; + char servername[48]; xprt = xprt_create_transport(&xprtargs); if (IS_ERR(xprt)) @@ -258,13 +270,34 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * up a string representation of the passed-in address. */ if (args->servername == NULL) { - struct sockaddr_in *addr = - (struct sockaddr_in *) args->address; - snprintf(servername, sizeof(servername), NIPQUAD_FMT, - NIPQUAD(addr->sin_addr.s_addr)); + servername[0] = '\0'; + switch (args->address->sa_family) { + case AF_INET: { + struct sockaddr_in *sin = + (struct sockaddr_in *)args->address; + snprintf(servername, sizeof(servername), NIPQUAD_FMT, + NIPQUAD(sin->sin_addr.s_addr)); + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin = + (struct sockaddr_in6 *)args->address; + snprintf(servername, sizeof(servername), NIP6_FMT, + NIP6(sin->sin6_addr)); + break; + } + default: + /* caller wants default server name, but + * address family isn't recognized. */ + return ERR_PTR(-EINVAL); + } args->servername = servername; } + xprt = xprt_create_transport(&xprtargs); + if (IS_ERR(xprt)) + return (struct rpc_clnt *)xprt; + /* * By default, kernel RPC client connects from a reserved port. * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters, @@ -275,8 +308,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; - clnt = rpc_new_client(xprt, args->servername, args->program, - args->version, args->authflavor); + clnt = rpc_new_client(args, xprt); if (IS_ERR(clnt)) return clnt; @@ -322,7 +354,7 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_autobind = 0; INIT_LIST_HEAD(&new->cl_tasks); spin_lock_init(&new->cl_lock); - rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); + rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval); new->cl_metrics = rpc_alloc_iostats(clnt); if (new->cl_metrics == NULL) goto out_no_stats; @@ -345,6 +377,7 @@ out_no_clnt: dprintk("RPC: %s: returned error %d\n", __FUNCTION__, err); return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(rpc_clone_client); /* * Properly shut down an RPC client, terminating all outstanding @@ -363,6 +396,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt) rpc_release_client(clnt); } +EXPORT_SYMBOL_GPL(rpc_shutdown_client); /* * Free an RPC client @@ -467,6 +501,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, out: return clnt; } +EXPORT_SYMBOL_GPL(rpc_bind_new_program); /* * Default callback for async RPC calls @@ -498,12 +533,12 @@ static void rpc_save_sigmask(sigset_t *oldset, int intr) sigprocmask(SIG_BLOCK, &sigmask, oldset); } -static inline void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset) +static void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset) { rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task)); } -static inline void rpc_restore_sigmask(sigset_t *oldset) +static void rpc_restore_sigmask(sigset_t *oldset) { sigprocmask(SIG_SETMASK, oldset, NULL); } @@ -512,45 +547,49 @@ void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset) { rpc_save_sigmask(oldset, clnt->cl_intr); } +EXPORT_SYMBOL_GPL(rpc_clnt_sigmask); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset) { rpc_restore_sigmask(oldset); } +EXPORT_SYMBOL_GPL(rpc_clnt_sigunmask); -static -struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt, - struct rpc_message *msg, - int flags, - const struct rpc_call_ops *ops, - void *data) +/** + * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it + * @task_setup_data: pointer to task initialisation data + */ +struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) { struct rpc_task *task, *ret; sigset_t oldset; - task = rpc_new_task(clnt, flags, ops, data); + task = rpc_new_task(task_setup_data); if (task == NULL) { - rpc_release_calldata(ops, data); - return ERR_PTR(-ENOMEM); + rpc_release_calldata(task_setup_data->callback_ops, + task_setup_data->callback_data); + ret = ERR_PTR(-ENOMEM); + goto out; } - /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */ - rpc_task_sigmask(task, &oldset); - if (msg != NULL) { - rpc_call_setup(task, msg, 0); - if (task->tk_status != 0) { - ret = ERR_PTR(task->tk_status); - rpc_put_task(task); - goto out; - } + if (task->tk_status != 0) { + ret = ERR_PTR(task->tk_status); + rpc_put_task(task); + goto out; } atomic_inc(&task->tk_count); - rpc_execute(task); + /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */ + if (!RPC_IS_ASYNC(task)) { + rpc_task_sigmask(task, &oldset); + rpc_execute(task); + rpc_restore_sigmask(&oldset); + } else + rpc_execute(task); ret = task; out: - rpc_restore_sigmask(&oldset); return ret; } +EXPORT_SYMBOL_GPL(rpc_run_task); /** * rpc_call_sync - Perform a synchronous RPC call @@ -561,17 +600,24 @@ out: int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) { struct rpc_task *task; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = msg, + .callback_ops = &rpc_default_ops, + .flags = flags, + }; int status; BUG_ON(flags & RPC_TASK_ASYNC); - task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); status = task->tk_status; rpc_put_task(task); return status; } +EXPORT_SYMBOL_GPL(rpc_call_sync); /** * rpc_call_async - Perform an asynchronous RPC call @@ -586,45 +632,28 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = msg, + .callback_ops = tk_ops, + .callback_data = data, + .flags = flags|RPC_TASK_ASYNC, + }; - task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data); + task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); rpc_put_task(task); return 0; } - -/** - * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it - * @clnt: pointer to RPC client - * @flags: RPC flags - * @ops: RPC call ops - * @data: user call data - */ -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, - const struct rpc_call_ops *tk_ops, - void *data) -{ - return rpc_do_run_task(clnt, NULL, flags, tk_ops, data); -} -EXPORT_SYMBOL(rpc_run_task); +EXPORT_SYMBOL_GPL(rpc_call_async); void -rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) +rpc_call_start(struct rpc_task *task) { - task->tk_msg = *msg; - task->tk_flags |= flags; - /* Bind the user cred */ - if (task->tk_msg.rpc_cred != NULL) - rpcauth_holdcred(task); - else - rpcauth_bindcred(task); - - if (task->tk_status == 0) - task->tk_action = call_start; - else - task->tk_action = rpc_exit_task; + task->tk_action = call_start; } +EXPORT_SYMBOL_GPL(rpc_call_start); /** * rpc_peeraddr - extract remote peer address from clnt's xprt @@ -653,7 +682,8 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr); * @format: address format * */ -char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format) +const char *rpc_peeraddr2str(struct rpc_clnt *clnt, + enum rpc_display_format_t format) { struct rpc_xprt *xprt = clnt->cl_xprt; @@ -671,6 +701,7 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize if (xprt->ops->set_buffer_size) xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); } +EXPORT_SYMBOL_GPL(rpc_setbufsize); /* * Return size of largest payload RPC client can support, in bytes @@ -710,6 +741,7 @@ rpc_restart_call(struct rpc_task *task) task->tk_action = call_start; } +EXPORT_SYMBOL_GPL(rpc_restart_call); /* * 0. Initial state @@ -1137,7 +1169,7 @@ call_status(struct rpc_task *task) case -ETIMEDOUT: task->tk_action = call_timeout; if (task->tk_client->cl_discrtry) - xprt_disconnect(task->tk_xprt); + xprt_force_disconnect(task->tk_xprt); break; case -ECONNREFUSED: case -ENOTCONN: @@ -1260,7 +1292,7 @@ out_retry: req->rq_received = req->rq_private_buf.len = 0; task->tk_status = 0; if (task->tk_client->cl_discrtry) - xprt_disconnect(task->tk_xprt); + xprt_force_disconnect(task->tk_xprt); } /* @@ -1517,9 +1549,15 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int .rpc_proc = &rpcproc_null, .rpc_cred = cred, }; - return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL); + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = &msg, + .callback_ops = &rpc_default_ops, + .flags = flags, + }; + return rpc_run_task(&task_setup_data); } -EXPORT_SYMBOL(rpc_call_null); +EXPORT_SYMBOL_GPL(rpc_call_null); #ifdef RPC_DEBUG void rpc_show_tasks(void) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index c59f3ca2b41..7e197168a24 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -76,6 +76,16 @@ rpc_timeout_upcall_queue(struct work_struct *work) rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT); } +/** + * rpc_queue_upcall + * @inode: inode of upcall pipe on which to queue given message + * @msg: message to queue + * + * Call with an @inode created by rpc_mkpipe() to queue an upcall. + * A userspace process may then later read the upcall by performing a + * read on an open file for this inode. It is up to the caller to + * initialize the fields of @msg (other than @msg->list) appropriately. + */ int rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) { @@ -103,6 +113,7 @@ out: wake_up(&rpci->waitq); return res; } +EXPORT_SYMBOL(rpc_queue_upcall); static inline void rpc_inode_setowner(struct inode *inode, void *private) @@ -512,8 +523,8 @@ rpc_get_inode(struct super_block *sb, int mode) /* * FIXME: This probably has races. */ -static void -rpc_depopulate(struct dentry *parent, int start, int eof) +static void rpc_depopulate(struct dentry *parent, + unsigned long start, unsigned long eof) { struct inode *dir = parent->d_inode; struct list_head *pos, *next; @@ -663,7 +674,16 @@ rpc_lookup_negative(char *path, struct nameidata *nd) return dentry; } - +/** + * rpc_mkdir - Create a new directory in rpc_pipefs + * @path: path from the rpc_pipefs root to the new directory + * @rpc_clnt: rpc client to associate with this directory + * + * This creates a directory at the given @path associated with + * @rpc_clnt, which will contain a file named "info" with some basic + * information about the client, together with any "pipes" that may + * later be created using rpc_mkpipe(). + */ struct dentry * rpc_mkdir(char *path, struct rpc_clnt *rpc_client) { @@ -699,6 +719,10 @@ err_dput: goto out; } +/** + * rpc_rmdir - Remove a directory created with rpc_mkdir() + * @dentry: directory to remove + */ int rpc_rmdir(struct dentry *dentry) { @@ -717,6 +741,25 @@ rpc_rmdir(struct dentry *dentry) return error; } +/** + * rpc_mkpipe - make an rpc_pipefs file for kernel<->userspace communication + * @parent: dentry of directory to create new "pipe" in + * @name: name of pipe + * @private: private data to associate with the pipe, for the caller's use + * @ops: operations defining the behavior of the pipe: upcall, downcall, + * release_pipe, and destroy_msg. + * + * Data is made available for userspace to read by calls to + * rpc_queue_upcall(). The actual reads will result in calls to + * @ops->upcall, which will be called with the file pointer, + * message, and userspace buffer to copy to. + * + * Writes can come at any time, and do not necessarily have to be + * responses to upcalls. They will result in calls to @msg->downcall. + * + * The @private argument passed here will be available to all these methods + * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. + */ struct dentry * rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) { @@ -763,7 +806,16 @@ err_dput: -ENOMEM); goto out; } +EXPORT_SYMBOL(rpc_mkpipe); +/** + * rpc_unlink - remove a pipe + * @dentry: dentry for the pipe, as returned from rpc_mkpipe + * + * After this call, lookups will no longer find the pipe, and any + * attempts to read or write using preexisting opens of the pipe will + * return -EPIPE. + */ int rpc_unlink(struct dentry *dentry) { @@ -785,6 +837,7 @@ rpc_unlink(struct dentry *dentry) dput(parent); return error; } +EXPORT_SYMBOL(rpc_unlink); /* * populate the filesystem diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index a05493aedb6..fa5b8f202d5 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -55,45 +55,6 @@ enum { #define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT /* - * r_addr - * - * Quoting RFC 3530, section 2.2: - * - * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the - * US-ASCII string: - * - * h1.h2.h3.h4.p1.p2 - * - * The prefix, "h1.h2.h3.h4", is the standard textual form for - * representing an IPv4 address, which is always four octets long. - * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively, - * the first through fourth octets each converted to ASCII-decimal. - * Assuming big-endian ordering, p1 and p2 are, respectively, the first - * and second octets each converted to ASCII-decimal. For example, if a - * host, in big-endian order, has an address of 0x0A010307 and there is - * a service listening on, in big endian order, port 0x020F (decimal - * 527), then the complete universal address is "10.1.3.7.2.15". - * - * ... - * - * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the - * US-ASCII string: - * - * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2 - * - * The suffix "p1.p2" is the service port, and is computed the same way - * as with universal addresses for TCP and UDP over IPv4. The prefix, - * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for - * representing an IPv6 address as defined in Section 2.2 of [RFC2373]. - * Additionally, the two alternative forms specified in Section 2.2 of - * [RFC2373] are also acceptable. - * - * XXX: Currently this implementation does not explicitly convert the - * stored address to US-ASCII on non-ASCII systems. - */ -#define RPCB_MAXADDRLEN (128u) - -/* * r_owner * * The "owner" is allowed to unset a service in the rpcbind database. @@ -112,9 +73,9 @@ struct rpcbind_args { u32 r_vers; u32 r_prot; unsigned short r_port; - char * r_netid; - char r_addr[RPCB_MAXADDRLEN]; - char * r_owner; + const char * r_netid; + const char * r_addr; + const char * r_owner; }; static struct rpc_procinfo rpcb_procedures2[]; @@ -128,19 +89,6 @@ struct rpcb_info { static struct rpcb_info rpcb_next_version[]; static struct rpcb_info rpcb_next_version6[]; -static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) -{ - struct rpcbind_args *map = calldata; - struct rpc_xprt *xprt = map->r_xprt; - struct rpc_message msg = { - .rpc_proc = rpcb_next_version[xprt->bind_index].rpc_proc, - .rpc_argp = map, - .rpc_resp = &map->r_port, - }; - - rpc_call_setup(task, &msg, 0); -} - static void rpcb_map_release(void *data) { struct rpcbind_args *map = data; @@ -150,7 +98,6 @@ static void rpcb_map_release(void *data) } static const struct rpc_call_ops rpcb_getport_ops = { - .rpc_call_prepare = rpcb_getport_prepare, .rpc_call_done = rpcb_getport_done, .rpc_release = rpcb_map_release, }; @@ -162,12 +109,13 @@ static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status) } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, - int proto, int version, int privileged) + size_t salen, int proto, u32 version, + int privileged) { struct rpc_create_args args = { .protocol = proto, .address = srvaddr, - .addrsize = sizeof(struct sockaddr_in), + .addrsize = salen, .servername = hostname, .program = &rpcb_program, .version = version, @@ -230,7 +178,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) prog, vers, prot, port); rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - XPRT_TRANSPORT_UDP, 2, 1); + sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -252,13 +200,15 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) * @vers: RPC version number to bind * @prot: transport protocol to use to make this request * + * Return value is the requested advertised port number, + * or a negative errno value. + * * Called from outside the RPC client in a synchronous task context. * Uses default timeout parameters specified by underlying transport. * - * XXX: Needs to support IPv6, and rpcbind versions 3 and 4 + * XXX: Needs to support IPv6 */ -int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog, - __u32 vers, int prot) +int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) { struct rpcbind_args map = { .r_prog = prog, @@ -272,14 +222,13 @@ int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog, .rpc_resp = &map.r_port, }; struct rpc_clnt *rpcb_clnt; - char hostname[40]; int status; dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n", __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); - sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); - rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0); + rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin, + sizeof(*sin), prot, 2, 0); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -295,6 +244,24 @@ int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog, } EXPORT_SYMBOL_GPL(rpcb_getport_sync); +static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version) +{ + struct rpc_message msg = { + .rpc_proc = rpcb_next_version[version].rpc_proc, + .rpc_argp = map, + .rpc_resp = &map->r_port, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = rpcb_clnt, + .rpc_message = &msg, + .callback_ops = &rpcb_getport_ops, + .callback_data = map, + .flags = RPC_TASK_ASYNC, + }; + + return rpc_run_task(&task_setup_data); +} + /** * rpcb_getport_async - obtain the port for a given RPC service on a given host * @task: task that is waiting for portmapper request @@ -305,12 +272,14 @@ EXPORT_SYMBOL_GPL(rpcb_getport_sync); void rpcb_getport_async(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - int bind_version; + u32 bind_version; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_clnt *rpcb_clnt; static struct rpcbind_args *map; struct rpc_task *child; - struct sockaddr addr; + struct sockaddr_storage addr; + struct sockaddr *sap = (struct sockaddr *)&addr; + size_t salen; int status; struct rpcb_info *info; @@ -340,10 +309,10 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); + salen = rpc_peeraddr(clnt, sap, sizeof(addr)); /* Don't ever use rpcbind v2 for AF_INET6 requests */ - switch (addr.sa_family) { + switch (sap->sa_family) { case AF_INET: info = rpcb_next_version; break; @@ -368,7 +337,7 @@ void rpcb_getport_async(struct rpc_task *task) dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __FUNCTION__, bind_version); - rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, + rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot, bind_version, 0); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); @@ -390,12 +359,10 @@ void rpcb_getport_async(struct rpc_task *task) map->r_port = 0; map->r_xprt = xprt_get(xprt); map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - memcpy(map->r_addr, - rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), - sizeof(map->r_addr)); + map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ - child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); + child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; @@ -518,7 +485,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, * Simple sanity check. The smallest possible universal * address is an IPv4 address string containing 11 bytes. */ - if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN) + if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN) goto out_err; /* @@ -569,7 +536,7 @@ out_err: #define RPCB_boolean_sz (1u) #define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) -#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) +#define RPCB_addr_sz (1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) #define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) #define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index eed5dd9819c..40ce6f6672d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -45,7 +45,7 @@ static void rpc_release_task(struct rpc_task *task); /* * RPC tasks sit here while waiting for conditions to improve. */ -static RPC_WAITQ(delay_queue, "delayq"); +static struct rpc_wait_queue delay_queue; /* * rpciod-related stuff @@ -135,7 +135,7 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r if (unlikely(task->tk_priority > queue->maxpriority)) q = &queue->tasks[queue->maxpriority]; list_for_each_entry(t, q, u.tk_wait.list) { - if (t->tk_cookie == task->tk_cookie) { + if (t->tk_owner == task->tk_owner) { list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); return; } @@ -208,26 +208,26 @@ static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int queue->count = 1 << (priority * 2); } -static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie) +static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) { - queue->cookie = cookie; + queue->owner = pid; queue->nr = RPC_BATCH_COUNT; } static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) { rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_cookie(queue, 0); + rpc_set_waitqueue_owner(queue, 0); } -static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio) +static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues) { int i; spin_lock_init(&queue->lock); for (i = 0; i < ARRAY_SIZE(queue->tasks); i++) INIT_LIST_HEAD(&queue->tasks[i]); - queue->maxpriority = maxprio; + queue->maxpriority = nr_queues - 1; rpc_reset_waitqueue_priority(queue); #ifdef RPC_DEBUG queue->name = qname; @@ -236,14 +236,14 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname) { - __rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH); + __rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY); } void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) { - __rpc_init_priority_wait_queue(queue, qname, 0); + __rpc_init_priority_wait_queue(queue, qname, 1); } -EXPORT_SYMBOL(rpc_init_wait_queue); +EXPORT_SYMBOL_GPL(rpc_init_wait_queue); static int rpc_wait_bit_interruptible(void *word) { @@ -303,7 +303,7 @@ int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *)) return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE, action, TASK_INTERRUPTIBLE); } -EXPORT_SYMBOL(__rpc_wait_for_completion_task); +EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); /* * Make an RPC task runnable. @@ -373,6 +373,7 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, __rpc_sleep_on(q, task, action, timer); spin_unlock_bh(&q->lock); } +EXPORT_SYMBOL_GPL(rpc_sleep_on); /** * __rpc_do_wake_up_task - wake up a single rpc_task @@ -444,6 +445,7 @@ void rpc_wake_up_task(struct rpc_task *task) } rcu_read_unlock_bh(); } +EXPORT_SYMBOL_GPL(rpc_wake_up_task); /* * Wake up the next task on a priority queue. @@ -454,12 +456,12 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu struct rpc_task *task; /* - * Service a batch of tasks from a single cookie. + * Service a batch of tasks from a single owner. */ q = &queue->tasks[queue->priority]; if (!list_empty(q)) { task = list_entry(q->next, struct rpc_task, u.tk_wait.list); - if (queue->cookie == task->tk_cookie) { + if (queue->owner == task->tk_owner) { if (--queue->nr) goto out; list_move_tail(&task->u.tk_wait.list, q); @@ -468,7 +470,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu * Check if we need to switch queues. */ if (--queue->count) - goto new_cookie; + goto new_owner; } /* @@ -490,8 +492,8 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu new_queue: rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0])); -new_cookie: - rpc_set_waitqueue_cookie(queue, task->tk_cookie); +new_owner: + rpc_set_waitqueue_owner(queue, task->tk_owner); out: __rpc_wake_up_task(task); return task; @@ -519,6 +521,7 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) return task; } +EXPORT_SYMBOL_GPL(rpc_wake_up_next); /** * rpc_wake_up - wake up all rpc_tasks @@ -544,6 +547,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue) spin_unlock(&queue->lock); rcu_read_unlock_bh(); } +EXPORT_SYMBOL_GPL(rpc_wake_up); /** * rpc_wake_up_status - wake up all rpc_tasks and set their status value. @@ -572,6 +576,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) spin_unlock(&queue->lock); rcu_read_unlock_bh(); } +EXPORT_SYMBOL_GPL(rpc_wake_up_status); static void __rpc_atrun(struct rpc_task *task) { @@ -586,6 +591,7 @@ void rpc_delay(struct rpc_task *task, unsigned long delay) task->tk_timeout = delay; rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); } +EXPORT_SYMBOL_GPL(rpc_delay); /* * Helper to call task->tk_ops->rpc_call_prepare @@ -614,7 +620,7 @@ void rpc_exit_task(struct rpc_task *task) } } } -EXPORT_SYMBOL(rpc_exit_task); +EXPORT_SYMBOL_GPL(rpc_exit_task); void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) { @@ -808,39 +814,49 @@ EXPORT_SYMBOL_GPL(rpc_free); /* * Creation and deletion of RPC task structures */ -void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) +static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data) { memset(task, 0, sizeof(*task)); setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer, (unsigned long)task); atomic_set(&task->tk_count, 1); - task->tk_client = clnt; - task->tk_flags = flags; - task->tk_ops = tk_ops; - if (tk_ops->rpc_call_prepare != NULL) - task->tk_action = rpc_prepare_task; - task->tk_calldata = calldata; + task->tk_flags = task_setup_data->flags; + task->tk_ops = task_setup_data->callback_ops; + task->tk_calldata = task_setup_data->callback_data; INIT_LIST_HEAD(&task->tk_task); /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; - task->tk_priority = RPC_PRIORITY_NORMAL; - task->tk_cookie = (unsigned long)current; + task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW; + task->tk_owner = current->tgid; /* Initialize workqueue for async tasks */ task->tk_workqueue = rpciod_workqueue; - if (clnt) { - kref_get(&clnt->cl_kref); - if (clnt->cl_softrtry) + task->tk_client = task_setup_data->rpc_client; + if (task->tk_client != NULL) { + kref_get(&task->tk_client->cl_kref); + if (task->tk_client->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; - if (!clnt->cl_intr) + if (!task->tk_client->cl_intr) task->tk_flags |= RPC_TASK_NOINTR; } - BUG_ON(task->tk_ops == NULL); + if (task->tk_ops->rpc_call_prepare != NULL) + task->tk_action = rpc_prepare_task; + + if (task_setup_data->rpc_message != NULL) { + memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg)); + /* Bind the user cred */ + if (task->tk_msg.rpc_cred != NULL) + rpcauth_holdcred(task); + else + rpcauth_bindcred(task); + if (task->tk_action == NULL) + rpc_call_start(task); + } /* starting timestamp */ task->tk_start = jiffies; @@ -865,18 +881,22 @@ static void rpc_free_task(struct rcu_head *rcu) /* * Create a new task for the specified client. */ -struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) +struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data) { - struct rpc_task *task; - - task = rpc_alloc_task(); - if (!task) - goto out; + struct rpc_task *task = setup_data->task; + unsigned short flags = 0; + + if (task == NULL) { + task = rpc_alloc_task(); + if (task == NULL) + goto out; + flags = RPC_TASK_DYNAMIC; + } - rpc_init_task(task, clnt, flags, tk_ops, calldata); + rpc_init_task(task, setup_data); + task->tk_flags |= flags; dprintk("RPC: allocated task %p\n", task); - task->tk_flags |= RPC_TASK_DYNAMIC; out: return task; } @@ -902,7 +922,7 @@ void rpc_put_task(struct rpc_task *task) call_rcu_bh(&task->u.tk_rcu, rpc_free_task); rpc_release_calldata(tk_ops, calldata); } -EXPORT_SYMBOL(rpc_put_task); +EXPORT_SYMBOL_GPL(rpc_put_task); static void rpc_release_task(struct rpc_task *task) { @@ -959,6 +979,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) } spin_unlock(&clnt->cl_lock); } +EXPORT_SYMBOL_GPL(rpc_killall_tasks); int rpciod_up(void) { @@ -1038,6 +1059,11 @@ rpc_init_mempool(void) goto err_nomem; if (!rpciod_start()) goto err_nomem; + /* + * The following is not strictly a mempool initialisation, + * but there is no harm in doing it here + */ + rpc_init_wait_queue(&delay_queue, "delayq"); return 0; err_nomem: rpc_destroy_mempool(); diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 97ac45f034d..a661a3acb37 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -72,7 +72,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; ssize_t copied = 0; - int ret; + size_t ret; len = xdr->head[0].iov_len; if (base < len) { diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 4d4f3738b68..74df2d358e6 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -118,7 +118,7 @@ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL); return new; } -EXPORT_SYMBOL(rpc_alloc_iostats); +EXPORT_SYMBOL_GPL(rpc_alloc_iostats); /** * rpc_free_iostats - release an rpc_iostats structure @@ -129,7 +129,7 @@ void rpc_free_iostats(struct rpc_iostats *stats) { kfree(stats); } -EXPORT_SYMBOL(rpc_free_iostats); +EXPORT_SYMBOL_GPL(rpc_free_iostats); /** * rpc_count_iostats - tally up per-task stats @@ -215,7 +215,7 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) metrics->om_execute * MILLISECS_PER_JIFFY); } } -EXPORT_SYMBOL(rpc_print_iostats); +EXPORT_SYMBOL_GPL(rpc_print_iostats); /* * Register/unregister RPC proc files @@ -241,12 +241,14 @@ rpc_proc_register(struct rpc_stat *statp) { return do_register(statp->program->name, statp, &rpc_proc_fops); } +EXPORT_SYMBOL_GPL(rpc_proc_register); void rpc_proc_unregister(const char *name) { remove_proc_entry(name, proc_net_rpc); } +EXPORT_SYMBOL_GPL(rpc_proc_unregister); struct proc_dir_entry * svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 33d89e842c8..1a7e309d008 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -22,45 +22,6 @@ #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/xprtsock.h> -/* RPC scheduler */ -EXPORT_SYMBOL(rpc_execute); -EXPORT_SYMBOL(rpc_init_task); -EXPORT_SYMBOL(rpc_sleep_on); -EXPORT_SYMBOL(rpc_wake_up_next); -EXPORT_SYMBOL(rpc_wake_up_task); -EXPORT_SYMBOL(rpc_wake_up_status); - -/* RPC client functions */ -EXPORT_SYMBOL(rpc_clone_client); -EXPORT_SYMBOL(rpc_bind_new_program); -EXPORT_SYMBOL(rpc_shutdown_client); -EXPORT_SYMBOL(rpc_killall_tasks); -EXPORT_SYMBOL(rpc_call_sync); -EXPORT_SYMBOL(rpc_call_async); -EXPORT_SYMBOL(rpc_call_setup); -EXPORT_SYMBOL(rpc_clnt_sigmask); -EXPORT_SYMBOL(rpc_clnt_sigunmask); -EXPORT_SYMBOL(rpc_delay); -EXPORT_SYMBOL(rpc_restart_call); -EXPORT_SYMBOL(rpc_setbufsize); -EXPORT_SYMBOL(rpc_unlink); -EXPORT_SYMBOL(rpc_wake_up); -EXPORT_SYMBOL(rpc_queue_upcall); -EXPORT_SYMBOL(rpc_mkpipe); - -/* Client transport */ -EXPORT_SYMBOL(xprt_set_timeout); - -/* Client credential cache */ -EXPORT_SYMBOL(rpcauth_register); -EXPORT_SYMBOL(rpcauth_unregister); -EXPORT_SYMBOL(rpcauth_create); -EXPORT_SYMBOL(rpcauth_lookupcred); -EXPORT_SYMBOL(rpcauth_lookup_credcache); -EXPORT_SYMBOL(rpcauth_destroy_credcache); -EXPORT_SYMBOL(rpcauth_init_credcache); -EXPORT_SYMBOL(put_rpccred); - /* RPC server stuff */ EXPORT_SYMBOL(svc_create); EXPORT_SYMBOL(svc_create_thread); @@ -81,8 +42,6 @@ EXPORT_SYMBOL(svc_set_client); /* RPC statistics */ #ifdef CONFIG_PROC_FS -EXPORT_SYMBOL(rpc_proc_register); -EXPORT_SYMBOL(rpc_proc_unregister); EXPORT_SYMBOL(svc_proc_register); EXPORT_SYMBOL(svc_proc_unregister); EXPORT_SYMBOL(svc_seq_show); @@ -105,31 +64,6 @@ EXPORT_SYMBOL(qword_get); EXPORT_SYMBOL(svcauth_unix_purge); EXPORT_SYMBOL(unix_domain_find); -/* Generic XDR */ -EXPORT_SYMBOL(xdr_encode_string); -EXPORT_SYMBOL(xdr_decode_string_inplace); -EXPORT_SYMBOL(xdr_decode_netobj); -EXPORT_SYMBOL(xdr_encode_netobj); -EXPORT_SYMBOL(xdr_encode_pages); -EXPORT_SYMBOL(xdr_inline_pages); -EXPORT_SYMBOL(xdr_shift_buf); -EXPORT_SYMBOL(xdr_encode_word); -EXPORT_SYMBOL(xdr_decode_word); -EXPORT_SYMBOL(xdr_encode_array2); -EXPORT_SYMBOL(xdr_decode_array2); -EXPORT_SYMBOL(xdr_buf_from_iov); -EXPORT_SYMBOL(xdr_buf_subsegment); -EXPORT_SYMBOL(xdr_buf_read_netobj); -EXPORT_SYMBOL(read_bytes_from_xdr_buf); - -/* Debugging symbols */ -#ifdef RPC_DEBUG -EXPORT_SYMBOL(rpc_debug); -EXPORT_SYMBOL(nfs_debug); -EXPORT_SYMBOL(nfsd_debug); -EXPORT_SYMBOL(nlm_debug); -#endif - extern struct cache_detail ip_map_cache, unix_gid_cache; static int __init diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a4a6bf7deaa..4ad5fbbb18b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -18,6 +18,7 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/sched.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 2be714e9b38..bada7de0c2f 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -23,9 +23,16 @@ * Declare the debug flags here */ unsigned int rpc_debug; +EXPORT_SYMBOL_GPL(rpc_debug); + unsigned int nfs_debug; +EXPORT_SYMBOL_GPL(nfs_debug); + unsigned int nfsd_debug; +EXPORT_SYMBOL_GPL(nfsd_debug); + unsigned int nlm_debug; +EXPORT_SYMBOL_GPL(nlm_debug); #ifdef RPC_DEBUG diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index fdc5e6d7562..54264062ea6 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -28,6 +28,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj) memcpy(p, obj->data, obj->len); return p + XDR_QUADLEN(obj->len); } +EXPORT_SYMBOL(xdr_encode_netobj); __be32 * xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) @@ -40,6 +41,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj) obj->data = (u8 *) p; return p + XDR_QUADLEN(len); } +EXPORT_SYMBOL(xdr_decode_netobj); /** * xdr_encode_opaque_fixed - Encode fixed length opaque data @@ -91,6 +93,7 @@ xdr_encode_string(__be32 *p, const char *string) { return xdr_encode_array(p, string, strlen(string)); } +EXPORT_SYMBOL(xdr_encode_string); __be32 * xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) @@ -103,6 +106,7 @@ xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) *sp = (char *) p; return p + XDR_QUADLEN(len); } +EXPORT_SYMBOL(xdr_decode_string_inplace); void xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, @@ -130,6 +134,7 @@ xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, xdr->buflen += len; xdr->len += len; } +EXPORT_SYMBOL(xdr_encode_pages); void xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, @@ -151,7 +156,7 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, xdr->buflen += len; } - +EXPORT_SYMBOL(xdr_inline_pages); /* * Helper routines for doing 'memmove' like operations on a struct xdr_buf @@ -418,6 +423,7 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len) { xdr_shrink_bufhead(buf, len); } +EXPORT_SYMBOL(xdr_shift_buf); /** * xdr_init_encode - Initialize a struct xdr_stream for sending data. @@ -639,6 +645,7 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf) buf->page_len = 0; buf->buflen = buf->len = iov->iov_len; } +EXPORT_SYMBOL(xdr_buf_from_iov); /* Sets subbuf to the portion of buf of length len beginning base bytes * from the start of buf. Returns -1 if base of length are out of bounds. */ @@ -687,6 +694,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, return -1; return 0; } +EXPORT_SYMBOL(xdr_buf_subsegment); static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { @@ -717,6 +725,7 @@ int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, u __read_bytes_from_xdr_buf(&subbuf, obj, len); return 0; } +EXPORT_SYMBOL(read_bytes_from_xdr_buf); static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { @@ -760,6 +769,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj) *obj = ntohl(raw); return 0; } +EXPORT_SYMBOL(xdr_decode_word); int xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) @@ -768,6 +778,7 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); } +EXPORT_SYMBOL(xdr_encode_word); /* If the netobj starting offset bytes from the start of xdr_buf is contained * entirely in the head or the tail, set object to point to it; otherwise @@ -805,6 +816,7 @@ int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned in __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len); return 0; } +EXPORT_SYMBOL(xdr_buf_read_netobj); /* Returns 0 on success, or else a negative error code. */ static int @@ -1010,6 +1022,7 @@ xdr_decode_array2(struct xdr_buf *buf, unsigned int base, return xdr_xcode_array2(buf, base, desc, 0); } +EXPORT_SYMBOL(xdr_decode_array2); int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, @@ -1021,6 +1034,7 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base, return xdr_xcode_array2(buf, base, desc, 1); } +EXPORT_SYMBOL(xdr_encode_array2); int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index fb92f51405c..cfcade906a5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -501,9 +501,10 @@ EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def); void xprt_set_retrans_timeout_rtt(struct rpc_task *task) { int timer = task->tk_msg.rpc_proc->p_timer; - struct rpc_rtt *rtt = task->tk_client->cl_rtt; + struct rpc_clnt *clnt = task->tk_client; + struct rpc_rtt *rtt = clnt->cl_rtt; struct rpc_rqst *req = task->tk_rqstp; - unsigned long max_timeout = req->rq_xprt->timeout.to_maxval; + unsigned long max_timeout = clnt->cl_timeout->to_maxval; task->tk_timeout = rpc_calc_rto(rtt, timer); task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; @@ -514,7 +515,7 @@ EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt); static void xprt_reset_majortimeo(struct rpc_rqst *req) { - struct rpc_timeout *to = &req->rq_xprt->timeout; + const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; req->rq_majortimeo = req->rq_timeout; if (to->to_exponential) @@ -534,7 +535,7 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req) int xprt_adjust_timeout(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; - struct rpc_timeout *to = &xprt->timeout; + const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; int status = 0; if (time_before(jiffies, req->rq_majortimeo)) { @@ -568,17 +569,17 @@ static void xprt_autoclose(struct work_struct *work) struct rpc_xprt *xprt = container_of(work, struct rpc_xprt, task_cleanup); - xprt_disconnect(xprt); xprt->ops->close(xprt); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); xprt_release_write(xprt, NULL); } /** - * xprt_disconnect - mark a transport as disconnected + * xprt_disconnect_done - mark a transport as disconnected * @xprt: transport to flag for disconnect * */ -void xprt_disconnect(struct rpc_xprt *xprt) +void xprt_disconnect_done(struct rpc_xprt *xprt) { dprintk("RPC: disconnected transport %p\n", xprt); spin_lock_bh(&xprt->transport_lock); @@ -586,7 +587,26 @@ void xprt_disconnect(struct rpc_xprt *xprt) xprt_wake_pending_tasks(xprt, -ENOTCONN); spin_unlock_bh(&xprt->transport_lock); } -EXPORT_SYMBOL_GPL(xprt_disconnect); +EXPORT_SYMBOL_GPL(xprt_disconnect_done); + +/** + * xprt_force_disconnect - force a transport to disconnect + * @xprt: transport to disconnect + * + */ +void xprt_force_disconnect(struct rpc_xprt *xprt) +{ + /* Don't race with the test_bit() in xprt_clear_locked() */ + spin_lock_bh(&xprt->transport_lock); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + /* Try to schedule an autoclose RPC call */ + if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) + queue_work(rpciod_workqueue, &xprt->task_cleanup); + else if (xprt->snd_task != NULL) + rpc_wake_up_task(xprt->snd_task); + spin_unlock_bh(&xprt->transport_lock); +} +EXPORT_SYMBOL_GPL(xprt_force_disconnect); static void xprt_init_autodisconnect(unsigned long data) @@ -909,7 +929,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) { struct rpc_rqst *req = task->tk_rqstp; - req->rq_timeout = xprt->timeout.to_initval; + req->rq_timeout = task->tk_client->cl_timeout->to_initval; req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; @@ -959,22 +979,6 @@ void xprt_release(struct rpc_task *task) } /** - * xprt_set_timeout - set constant RPC timeout - * @to: RPC timeout parameters to set up - * @retr: number of retries - * @incr: amount of increase after each retry - * - */ -void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) -{ - to->to_initval = - to->to_increment = incr; - to->to_maxval = to->to_initval + (incr * retr); - to->to_retries = retr; - to->to_exponential = 0; -} - -/** * xprt_create_transport - create an RPC transport * @args: rpc transport creation arguments * diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 1aa1580cda6..e55427f73df 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -83,7 +83,7 @@ static const char transfertypes[][12] = { */ static int -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos, +rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) { int len, n = 0, p; @@ -169,7 +169,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt); int nsegs, nchunks = 0; - int pos; + unsigned int pos; struct rpcrdma_mr_seg *seg = req->rl_segments; struct rpcrdma_read_chunk *cur_rchunk = NULL; struct rpcrdma_write_array *warray = NULL; @@ -213,7 +213,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, (__be32 *)&cur_rchunk->rc_target.rs_offset, seg->mr_base); dprintk("RPC: %s: read chunk " - "elem %d@0x%llx:0x%x pos %d (%s)\n", __func__, + "elem %d@0x%llx:0x%x pos %u (%s)\n", __func__, seg->mr_len, (unsigned long long)seg->mr_base, seg->mr_rkey, pos, n < nsegs ? "more" : "last"); cur_rchunk++; @@ -552,7 +552,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) */ static int -rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, __be32 **iptrp) +rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __be32 **iptrp) { unsigned int i, total_len; struct rpcrdma_write_chunk *cur_wchunk; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6f2112dd9f7..02c522c17de 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -212,12 +212,16 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt) static void xprt_rdma_free_addresses(struct rpc_xprt *xprt) { - kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); - kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_ALL]); - kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]); - kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]); + unsigned int i; + + for (i = 0; i < RPC_DISPLAY_MAX; i++) + switch (i) { + case RPC_DISPLAY_PROTO: + case RPC_DISPLAY_NETID: + continue; + default: + kfree(xprt->address_strings[i]); + } } static void @@ -289,6 +293,11 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) module_put(THIS_MODULE); } +static const struct rpc_timeout xprt_rdma_default_timeout = { + .to_initval = 60 * HZ, + .to_maxval = 60 * HZ, +}; + /** * xprt_setup_rdma - Set up transport to use RDMA * @@ -327,7 +336,7 @@ xprt_setup_rdma(struct xprt_create *args) } /* 60 second timeout, no retries */ - xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ); + xprt->timeout = &xprt_rdma_default_timeout; xprt->bind_timeout = (60U * HZ); xprt->connect_timeout = (60U * HZ); xprt->reestablish_timeout = (5U * HZ); @@ -449,7 +458,7 @@ xprt_rdma_close(struct rpc_xprt *xprt) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); dprintk("RPC: %s: closing\n", __func__); - xprt_disconnect(xprt); + xprt_disconnect_done(xprt); (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); } @@ -682,7 +691,7 @@ xprt_rdma_send_request(struct rpc_task *task) } if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { - xprt_disconnect(xprt); + xprt_disconnect_done(xprt); return -ENOTCONN; /* implies disconnect */ } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 44b0fb942e8..ffbf22a1d2c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -522,7 +522,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { struct ib_device_attr devattr; - int rc; + int rc, err; rc = ib_query_device(ia->ri_id->device, &devattr); if (rc) { @@ -648,8 +648,10 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, return 0; out2: - if (ib_destroy_cq(ep->rep_cq)) - ; + err = ib_destroy_cq(ep->rep_cq); + if (err) + dprintk("RPC: %s: ib_destroy_cq returned %i\n", + __func__, err); out1: return rc; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6fa52f44de0..30e7ac243a9 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -280,7 +280,9 @@ static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) return (struct sockaddr_in6 *) &xprt->addr; } -static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) +static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, + const char *protocol, + const char *netid) { struct sockaddr_in *addr = xs_addr_in(xprt); char *buf; @@ -299,21 +301,14 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - if (xprt->prot == IPPROTO_UDP) - snprintf(buf, 8, "udp"); - else - snprintf(buf, 8, "tcp"); - } - xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; buf = kzalloc(48, GFP_KERNEL); if (buf) { snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), - xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + protocol); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; @@ -340,12 +335,12 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - xprt->address_strings[RPC_DISPLAY_NETID] = - kstrdup(xprt->prot == IPPROTO_UDP ? - RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL); + xprt->address_strings[RPC_DISPLAY_NETID] = netid; } -static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) +static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, + const char *protocol, + const char *netid) { struct sockaddr_in6 *addr = xs_addr_in6(xprt); char *buf; @@ -364,21 +359,14 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - if (xprt->prot == IPPROTO_UDP) - snprintf(buf, 8, "udp"); - else - snprintf(buf, 8, "tcp"); - } - xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; buf = kzalloc(64, GFP_KERNEL); if (buf) { snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", NIP6(addr->sin6_addr), ntohs(addr->sin6_port), - xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + protocol); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; @@ -405,17 +393,21 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - xprt->address_strings[RPC_DISPLAY_NETID] = - kstrdup(xprt->prot == IPPROTO_UDP ? - RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL); + xprt->address_strings[RPC_DISPLAY_NETID] = netid; } static void xs_free_peer_addresses(struct rpc_xprt *xprt) { - int i; + unsigned int i; for (i = 0; i < RPC_DISPLAY_MAX; i++) - kfree(xprt->address_strings[i]); + switch (i) { + case RPC_DISPLAY_PROTO: + case RPC_DISPLAY_NETID: + continue; + default: + kfree(xprt->address_strings[i]); + } } #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) @@ -614,6 +606,22 @@ static int xs_udp_send_request(struct rpc_task *task) return status; } +/** + * xs_tcp_shutdown - gracefully shut down a TCP socket + * @xprt: transport + * + * Initiates a graceful shutdown of the TCP socket by calling the + * equivalent of shutdown(SHUT_WR); + */ +static void xs_tcp_shutdown(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = transport->sock; + + if (sock != NULL) + kernel_sock_shutdown(sock, SHUT_WR); +} + static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) { u32 reclen = buf->len - sizeof(rpc_fraghdr); @@ -691,7 +699,7 @@ static int xs_tcp_send_request(struct rpc_task *task) default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); - xprt_disconnect(xprt); + xs_tcp_shutdown(xprt); break; } @@ -759,7 +767,9 @@ static void xs_close(struct rpc_xprt *xprt) clear_close_wait: smp_mb__before_clear_bit(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); + xprt_disconnect_done(xprt); } /** @@ -775,7 +785,6 @@ static void xs_destroy(struct rpc_xprt *xprt) cancel_rearming_delayed_work(&transport->connect_worker); - xprt_disconnect(xprt); xs_close(xprt); xs_free_peer_addresses(xprt); kfree(xprt->slot); @@ -886,7 +895,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea /* Sanity check of the record length */ if (unlikely(transport->tcp_reclen < 4)) { dprintk("RPC: invalid TCP record fragment length\n"); - xprt_disconnect(xprt); + xprt_force_disconnect(xprt); return; } dprintk("RPC: reading TCP record fragment of length %d\n", @@ -1113,21 +1122,44 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; - xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; xprt_wake_pending_tasks(xprt, 0); } spin_unlock_bh(&xprt->transport_lock); break; - case TCP_SYN_SENT: - case TCP_SYN_RECV: + case TCP_FIN_WAIT1: + /* The client initiated a shutdown of the socket */ + xprt->reestablish_timeout = 0; + set_bit(XPRT_CLOSING, &xprt->state); + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTED, &xprt->state); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + smp_mb__after_clear_bit(); break; case TCP_CLOSE_WAIT: - /* Try to schedule an autoclose RPC calls */ - set_bit(XPRT_CLOSE_WAIT, &xprt->state); - if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - queue_work(rpciod_workqueue, &xprt->task_cleanup); - default: - xprt_disconnect(xprt); + /* The server initiated a shutdown of the socket */ + set_bit(XPRT_CLOSING, &xprt->state); + xprt_force_disconnect(xprt); + case TCP_SYN_SENT: + case TCP_CLOSING: + /* + * If the server closed down the connection, make sure that + * we back off before reconnecting + */ + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + break; + case TCP_LAST_ACK: + smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTED, &xprt->state); + smp_mb__after_clear_bit(); + break; + case TCP_CLOSE: + smp_mb__before_clear_bit(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_clear_bit(); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); } out: read_unlock(&sk->sk_callback_lock); @@ -1279,34 +1311,53 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) } } +static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) +{ + unsigned short port = transport->port; + + if (port == 0 && transport->xprt.resvport) + port = xs_get_random_port(); + return port; +} + +static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) +{ + if (transport->port != 0) + transport->port = 0; + if (!transport->xprt.resvport) + return 0; + if (port <= xprt_min_resvport || port > xprt_max_resvport) + return xprt_max_resvport; + return --port; +} + static int xs_bind4(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, }; struct sockaddr_in *sa; - int err; - unsigned short port = transport->port; + int err, nloop = 0; + unsigned short port = xs_get_srcport(transport, sock); + unsigned short last; - if (!transport->xprt.resvport) - port = 0; sa = (struct sockaddr_in *)&transport->addr; myaddr.sin_addr = sa->sin_addr; do { myaddr.sin_port = htons(port); err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); - if (!transport->xprt.resvport) + if (port == 0) break; if (err == 0) { transport->port = port; break; } - if (port <= xprt_min_resvport) - port = xprt_max_resvport; - else - port--; - } while (err == -EADDRINUSE && port != transport->port); + last = port; + port = xs_next_srcport(transport, sock, port); + if (port > last) + nloop++; + } while (err == -EADDRINUSE && nloop != 2); dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", __FUNCTION__, NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); @@ -1319,28 +1370,27 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) .sin6_family = AF_INET6, }; struct sockaddr_in6 *sa; - int err; - unsigned short port = transport->port; + int err, nloop = 0; + unsigned short port = xs_get_srcport(transport, sock); + unsigned short last; - if (!transport->xprt.resvport) - port = 0; sa = (struct sockaddr_in6 *)&transport->addr; myaddr.sin6_addr = sa->sin6_addr; do { myaddr.sin6_port = htons(port); err = kernel_bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); - if (!transport->xprt.resvport) + if (port == 0) break; if (err == 0) { transport->port = port; break; } - if (port <= xprt_min_resvport) - port = xprt_max_resvport; - else - port--; - } while (err == -EADDRINUSE && port != transport->port); + last = port; + port = xs_next_srcport(transport, sock, port); + if (port > last) + nloop++; + } while (err == -EADDRINUSE && nloop != 2); dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); return err; @@ -1602,8 +1652,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) break; default: /* get rid of existing socket, and retry */ - xs_close(xprt); - break; + xs_tcp_shutdown(xprt); } } out: @@ -1662,8 +1711,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) break; default: /* get rid of existing socket, and retry */ - xs_close(xprt); - break; + xs_tcp_shutdown(xprt); } } out: @@ -1710,6 +1758,19 @@ static void xs_connect(struct rpc_task *task) } } +static void xs_tcp_connect(struct rpc_task *task) +{ + struct rpc_xprt *xprt = task->tk_xprt; + + /* Initiate graceful shutdown of the socket if not already done */ + if (test_bit(XPRT_CONNECTED, &xprt->state)) + xs_tcp_shutdown(xprt); + /* Exit if we need to wait for socket shutdown to complete */ + if (test_bit(XPRT_CLOSING, &xprt->state)) + return; + xs_connect(task); +} + /** * xs_udp_print_stats - display UDP socket-specifc stats * @xprt: rpc_xprt struct containing statistics @@ -1780,12 +1841,12 @@ static struct rpc_xprt_ops xs_tcp_ops = { .release_xprt = xs_tcp_release_xprt, .rpcbind = rpcb_getport_async, .set_port = xs_set_port, - .connect = xs_connect, + .connect = xs_tcp_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_close, + .close = xs_tcp_shutdown, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; @@ -1822,11 +1883,17 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, xprt->addrlen = args->addrlen; if (args->srcaddr) memcpy(&new->addr, args->srcaddr, args->addrlen); - new->port = xs_get_random_port(); return xprt; } +static const struct rpc_timeout xs_udp_default_timeout = { + .to_initval = 5 * HZ, + .to_maxval = 30 * HZ, + .to_increment = 5 * HZ, + .to_retries = 5, +}; + /** * xs_setup_udp - Set up transport to use a UDP socket * @args: rpc transport creation arguments @@ -1855,10 +1922,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xprt->ops = &xs_udp_ops; - if (args->timeout) - xprt->timeout = *args->timeout; - else - xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); + xprt->timeout = &xs_udp_default_timeout; switch (addr->sa_family) { case AF_INET: @@ -1867,7 +1931,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt); + xs_format_ipv4_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) @@ -1875,7 +1939,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt); + xs_format_ipv6_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: kfree(xprt); @@ -1893,6 +1957,12 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) return ERR_PTR(-EINVAL); } +static const struct rpc_timeout xs_tcp_default_timeout = { + .to_initval = 60 * HZ, + .to_maxval = 60 * HZ, + .to_retries = 2, +}; + /** * xs_setup_tcp - Set up transport to use a TCP socket * @args: rpc transport creation arguments @@ -1919,11 +1989,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt->idle_timeout = XS_IDLE_DISC_TO; xprt->ops = &xs_tcp_ops; - - if (args->timeout) - xprt->timeout = *args->timeout; - else - xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); + xprt->timeout = &xs_tcp_default_timeout; switch (addr->sa_family) { case AF_INET: @@ -1931,14 +1997,14 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt); + xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) xprt_set_bound(xprt); INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt); + xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: kfree(xprt); |