summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt11
-rw-r--r--arch/x86/net/bpf_jit.S122
-rw-r--r--arch/x86/net/bpf_jit_comp.c41
-rw-r--r--drivers/net/ppp/ppp_generic.c14
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--net/core/dev.c58
-rw-r--r--net/core/dev_addr_lists.c3
-rw-r--r--net/core/filter.c9
-rw-r--r--net/ipv4/tcp.c2
9 files changed, 148 insertions, 114 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ad3e80e17b4..bd80ba5847d 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -604,15 +604,8 @@ IP Variables:
ip_local_port_range - 2 INTEGERS
Defines the local port range that is used by TCP and UDP to
choose the local port. The first number is the first, the
- second the last local port number. Default value depends on
- amount of memory available on the system:
- > 128Mb 32768-61000
- < 128Mb 1024-4999 or even less.
- This number defines number of active connections, which this
- system can issue simultaneously to systems not supporting
- TCP extensions (timestamps). With tcp_tw_recycle enabled
- (i.e. by default) range 1024-4999 is enough to issue up to
- 2000 connections per second to systems supporting timestamps.
+ second the last local port number. The default values are
+ 32768 and 61000 respectively.
ip_local_reserved_ports - list of comma separated ranges
Specify the ports which are reserved for known third-party
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 66870223f8c..877b9a1b215 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,17 @@
* r9d : hlen = skb->len - skb->data_len
*/
#define SKBDATA %r8
-
-sk_load_word_ind:
- .globl sk_load_word_ind
-
- add %ebx,%esi /* offset += X */
-# test %esi,%esi /* if (offset < 0) goto bpf_error; */
- js bpf_error
+#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
sk_load_word:
.globl sk_load_word
+ test %esi,%esi
+ js bpf_slow_path_word_neg
+
+sk_load_word_positive_offset:
+ .globl sk_load_word_positive_offset
+
mov %r9d,%eax # hlen
sub %esi,%eax # hlen - offset
cmp $3,%eax
@@ -37,16 +37,15 @@ sk_load_word:
bswap %eax /* ntohl() */
ret
-
-sk_load_half_ind:
- .globl sk_load_half_ind
-
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_half:
.globl sk_load_half
+ test %esi,%esi
+ js bpf_slow_path_half_neg
+
+sk_load_half_positive_offset:
+ .globl sk_load_half_positive_offset
+
mov %r9d,%eax
sub %esi,%eax # hlen - offset
cmp $1,%eax
@@ -55,14 +54,15 @@ sk_load_half:
rol $8,%ax # ntohs()
ret
-sk_load_byte_ind:
- .globl sk_load_byte_ind
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_byte:
.globl sk_load_byte
+ test %esi,%esi
+ js bpf_slow_path_byte_neg
+
+sk_load_byte_positive_offset:
+ .globl sk_load_byte_positive_offset
+
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
jle bpf_slow_path_byte
movzbl (SKBDATA,%rsi),%eax
@@ -73,25 +73,21 @@ sk_load_byte:
*
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
* Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
*/
-ENTRY(sk_load_byte_msh)
- CFI_STARTPROC
+sk_load_byte_msh:
+ .globl sk_load_byte_msh
+ test %esi,%esi
+ js bpf_slow_path_byte_msh_neg
+
+sk_load_byte_msh_positive_offset:
+ .globl sk_load_byte_msh_positive_offset
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
jle bpf_slow_path_byte_msh
movzbl (SKBDATA,%rsi),%ebx
and $15,%bl
shl $2,%bl
ret
- CFI_ENDPROC
-ENDPROC(sk_load_byte_msh)
-
-bpf_error:
-# force a return 0 from jit handler
- xor %eax,%eax
- mov -8(%rbp),%rbx
- leaveq
- ret
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
@@ -138,3 +134,67 @@ bpf_slow_path_byte_msh:
shl $2,%al
xchg %eax,%ebx
ret
+
+#define sk_negative_common(SIZE) \
+ push %rdi; /* save skb */ \
+ push %r9; \
+ push SKBDATA; \
+/* rsi already has offset */ \
+ mov $SIZE,%ecx; /* size */ \
+ call bpf_internal_load_pointer_neg_helper; \
+ test %rax,%rax; \
+ pop SKBDATA; \
+ pop %r9; \
+ pop %rdi; \
+ jz bpf_error
+
+
+bpf_slow_path_word_neg:
+ cmp SKF_MAX_NEG_OFF, %esi /* test range */
+ jl bpf_error /* offset lower -> error */
+sk_load_word_negative_offset:
+ .globl sk_load_word_negative_offset
+ sk_negative_common(4)
+ mov (%rax), %eax
+ bswap %eax
+ ret
+
+bpf_slow_path_half_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_half_negative_offset:
+ .globl sk_load_half_negative_offset
+ sk_negative_common(2)
+ mov (%rax),%ax
+ rol $8,%ax
+ movzwl %ax,%eax
+ ret
+
+bpf_slow_path_byte_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_negative_offset:
+ .globl sk_load_byte_negative_offset
+ sk_negative_common(1)
+ movzbl (%rax), %eax
+ ret
+
+bpf_slow_path_byte_msh_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_msh_negative_offset:
+ .globl sk_load_byte_msh_negative_offset
+ xchg %eax,%ebx /* dont lose A , X is about to be scratched */
+ sk_negative_common(1)
+ movzbl (%rax),%eax
+ and $15,%al
+ shl $2,%al
+ xchg %eax,%ebx
+ ret
+
+bpf_error:
+# force a return 0 from jit handler
+ xor %eax,%eax
+ mov -8(%rbp),%rbx
+ leaveq
+ ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5a5b6e4dd73..0597f95b6da 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
* assembly code in arch/x86/net/bpf_jit.S
*/
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
set_fs(old_fs);
}
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
void bpf_jit_compile(struct sk_filter *fp)
{
@@ -473,44 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
#endif
break;
case BPF_S_LD_W_ABS:
- func = sk_load_word;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_word);
common_load: seen |= SEEN_DATAREF;
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call */
break;
case BPF_S_LD_H_ABS:
- func = sk_load_half;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_half);
goto common_load;
case BPF_S_LD_B_ABS:
- func = sk_load_byte;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
goto common_load;
case BPF_S_LDX_B_MSH:
- if ((int)K < 0) {
- /* Abort the JIT because __load_pointer() is needed. */
- goto out;
- }
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
seen |= SEEN_DATAREF | SEEN_XREG;
- t_offset = sk_load_byte_msh - (image + addrs[i]);
+ t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
break;
case BPF_S_LD_W_IND:
- func = sk_load_word_ind;
+ func = sk_load_word;
common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
t_offset = func - (image + addrs[i]);
- EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+ if (K) {
+ if (is_imm8(K)) {
+ EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
+ } else {
+ EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+ EMIT(K, 4);
+ }
+ } else {
+ EMIT2(0x89,0xde); /* mov %ebx,%esi */
+ }
EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
break;
case BPF_S_LD_H_IND:
- func = sk_load_half_ind;
+ func = sk_load_half;
goto common_load_ind;
case BPF_S_LD_B_IND:
- func = sk_load_byte_ind;
+ func = sk_load_byte;
goto common_load_ind;
case BPF_S_JMP_JA:
t_offset = addrs[i + K] - addrs[i];
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 159da2905fe..33f8c51968b 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -235,7 +235,7 @@ struct ppp_net {
/* Prototypes. */
static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
struct file *file, unsigned int cmd, unsigned long arg);
-static void ppp_xmit_process(struct ppp *ppp);
+static int ppp_xmit_process(struct ppp *ppp);
static void ppp_send_frame(struct ppp *ppp, struct sk_buff *skb);
static void ppp_push(struct ppp *ppp);
static void ppp_channel_push(struct channel *pch);
@@ -968,9 +968,9 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
proto = npindex_to_proto[npi];
put_unaligned_be16(proto, pp);
- netif_stop_queue(dev);
skb_queue_tail(&ppp->file.xq, skb);
- ppp_xmit_process(ppp);
+ if (!ppp_xmit_process(ppp))
+ netif_stop_queue(dev);
return NETDEV_TX_OK;
outf:
@@ -1048,10 +1048,11 @@ static void ppp_setup(struct net_device *dev)
* Called to do any work queued up on the transmit side
* that can now be done.
*/
-static void
+static int
ppp_xmit_process(struct ppp *ppp)
{
struct sk_buff *skb;
+ int ret = 0;
ppp_xmit_lock(ppp);
if (!ppp->closing) {
@@ -1061,10 +1062,13 @@ ppp_xmit_process(struct ppp *ppp)
ppp_send_frame(ppp, skb);
/* If there's no work left to do, tell the core net
code that we can accept some more. */
- if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq))
+ if (!ppp->xmit_pending && !skb_peek(&ppp->file.xq)) {
netif_wake_queue(ppp->dev);
+ ret = 1;
+ }
}
ppp_xmit_unlock(ppp);
+ return ret;
}
static inline struct sk_buff *
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1f77540bdc9..5cbaa20f165 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2604,8 +2604,6 @@ extern void net_disable_timestamp(void);
extern void *dev_seq_start(struct seq_file *seq, loff_t *pos);
extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos);
extern void dev_seq_stop(struct seq_file *seq, void *v);
-extern int dev_seq_open_ops(struct inode *inode, struct file *file,
- const struct seq_operations *ops);
#endif
extern int netdev_class_create_file(struct class_attribute *class_attr);
diff --git a/net/core/dev.c b/net/core/dev.c
index 6c7dc9d78e1..c25d453b280 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4028,54 +4028,41 @@ static int dev_ifconf(struct net *net, char __user *arg)
#ifdef CONFIG_PROC_FS
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS)
-
-struct dev_iter_state {
- struct seq_net_private p;
- unsigned int pos; /* bucket << BUCKET_SPACE + offset */
-};
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
#define get_bucket(x) ((x) >> BUCKET_SPACE)
#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq)
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
{
- struct dev_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
struct net_device *dev;
struct hlist_node *p;
struct hlist_head *h;
- unsigned int count, bucket, offset;
+ unsigned int count = 0, offset = get_offset(*pos);
- bucket = get_bucket(state->pos);
- offset = get_offset(state->pos);
- h = &net->dev_name_head[bucket];
- count = 0;
+ h = &net->dev_name_head[get_bucket(*pos)];
hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
- if (count++ == offset) {
- state->pos = set_bucket_offset(bucket, count);
+ if (++count == offset)
return dev;
- }
}
return NULL;
}
-static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
+static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
{
- struct dev_iter_state *state = seq->private;
struct net_device *dev;
unsigned int bucket;
- bucket = get_bucket(state->pos);
do {
- dev = dev_from_same_bucket(seq);
+ dev = dev_from_same_bucket(seq, pos);
if (dev)
return dev;
- bucket++;
- state->pos = set_bucket_offset(bucket, 0);
+ bucket = get_bucket(*pos) + 1;
+ *pos = set_bucket_offset(bucket, 1);
} while (bucket < NETDEV_HASHENTRIES);
return NULL;
@@ -4088,33 +4075,20 @@ static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
void *dev_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
- struct dev_iter_state *state = seq->private;
-
rcu_read_lock();
if (!*pos)
return SEQ_START_TOKEN;
- /* check for end of the hash */
- if (state->pos == 0 && *pos > 1)
+ if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
return NULL;
- return dev_from_new_bucket(seq);
+ return dev_from_bucket(seq, pos);
}
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev;
-
++*pos;
-
- if (v == SEQ_START_TOKEN)
- return dev_from_new_bucket(seq);
-
- dev = dev_from_same_bucket(seq);
- if (dev)
- return dev;
-
- return dev_from_new_bucket(seq);
+ return dev_from_bucket(seq, pos);
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4213,13 +4187,7 @@ static const struct seq_operations dev_seq_ops = {
static int dev_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &dev_seq_ops,
- sizeof(struct dev_iter_state));
-}
-
-int dev_seq_open_ops(struct inode *inode, struct file *file,
- const struct seq_operations *ops)
-{
- return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
+ sizeof(struct seq_net_private));
}
static const struct file_operations dev_seq_fops = {
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 29c07fef922..626698f0db8 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -696,7 +696,8 @@ static const struct seq_operations dev_mc_seq_ops = {
static int dev_mc_seq_open(struct inode *inode, struct file *file)
{
- return dev_seq_open_ops(inode, file, &dev_mc_seq_ops);
+ return seq_open_net(inode, file, &dev_mc_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations dev_mc_seq_fops = {
diff --git a/net/core/filter.c b/net/core/filter.c
index cf4989ac503..6f755cca452 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -39,8 +39,11 @@
#include <linux/reciprocal_div.h>
#include <linux/ratelimit.h>
-/* No hurry in this branch */
-static void *__load_pointer(const struct sk_buff *skb, int k, unsigned int size)
+/* No hurry in this branch
+ *
+ * Exported for the bpf jit load helper.
+ */
+void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
{
u8 *ptr = NULL;
@@ -59,7 +62,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
{
if (k >= 0)
return skb_header_pointer(skb, k, size, buffer);
- return __load_pointer(skb, k, size);
+ return bpf_internal_load_pointer_neg_helper(skb, k, size);
}
/**
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index cfd7edda0a8..2ff6f45a76f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -860,7 +860,7 @@ wait_for_memory:
}
out:
- if (copied)
+ if (copied && !(flags & MSG_MORE))
tcp_push(sk, flags, mss_now, tp->nonagle);
return copied;