diff options
325 files changed, 14274 insertions, 6823 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-mdio b/Documentation/ABI/testing/sysfs-bus-mdio new file mode 100644 index 00000000000..6349749ebc2 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-mdio @@ -0,0 +1,9 @@ +What: /sys/bus/mdio_bus/devices/.../phy_id +Date: November 2012 +KernelVersion: 3.8 +Contact: netdev@vger.kernel.org +Description: + This attribute contains the 32-bit PHY Identifier as reported + by the device during bus enumeration, encoded in hexadecimal. + This ID is used to match the device with the appropriate + driver. diff --git a/Documentation/devicetree/bindings/net/cdns-emac.txt b/Documentation/devicetree/bindings/net/cdns-emac.txt new file mode 100644 index 00000000000..09055c2495f --- /dev/null +++ b/Documentation/devicetree/bindings/net/cdns-emac.txt @@ -0,0 +1,23 @@ +* Cadence EMAC Ethernet controller + +Required properties: +- compatible: Should be "cdns,[<chip>-]{emac}" + Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC. + or the generic form: "cdns,emac". +- reg: Address and length of the register set for the device +- interrupts: Should contain macb interrupt +- phy-mode: String, operation mode of the PHY interface. + Supported values are: "mii", "rmii". + +Optional properties: +- local-mac-address: 6 bytes, mac address + +Examples: + + macb0: ethernet@fffc4000 { + compatible = "cdns,at91rm9200-emac"; + reg = <0xfffc4000 0x4000>; + interrupts = <21>; + phy-mode = "rmii"; + local-mac-address = [3a 0e 03 04 05 06]; + }; diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index dcaabe9fe86..221460714c5 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -16,12 +16,16 @@ Required properties: - ale_entries : Specifies No of entries ALE can hold - host_port_reg_ofs : Specifies host port register offset - hw_stats_reg_ofs : Specifies hardware statistics register offset +- cpts_reg_ofs : Specifies the offset of the CPTS registers - bd_ram_ofs : Specifies internal desciptor RAM offset - bd_ram_size : Specifies internal descriptor RAM size - rx_descs : Specifies number of Rx descriptors - mac_control : Specifies Default MAC control register content for the specific platform - slaves : Specifies number for slaves +- cpts_active_slave : Specifies the slave to use for time stamping +- cpts_clock_mult : Numerator to convert input clock ticks into nanoseconds +- cpts_clock_shift : Denominator to convert input clock ticks into nanoseconds - slave_reg_ofs : Specifies slave register offset - sliver_reg_ofs : Specifies slave sliver register offset - phy_id : Specifies slave phy id @@ -52,21 +56,25 @@ Examples: ale_entries = <1024>; host_port_reg_ofs = <0x108>; hw_stats_reg_ofs = <0x900>; + cpts_reg_ofs = <0xc00>; bd_ram_ofs = <0x2000>; bd_ram_size = <0x2000>; no_bd_ram = <0>; rx_descs = <64>; mac_control = <0x20>; slaves = <2>; + cpts_active_slave = <0>; + cpts_clock_mult = <0x80000000>; + cpts_clock_shift = <29>; cpsw_emac0: slave@0 { - slave_reg_ofs = <0x208>; + slave_reg_ofs = <0x200>; sliver_reg_ofs = <0xd80>; phy_id = "davinci_mdio.16:00"; /* Filled in by U-Boot */ mac-address = [ 00 00 00 00 00 00 ]; }; cpsw_emac1: slave@1 { - slave_reg_ofs = <0x308>; + slave_reg_ofs = <0x300>; sliver_reg_ofs = <0xdc0>; phy_id = "davinci_mdio.16:01"; /* Filled in by U-Boot */ @@ -86,21 +94,25 @@ Examples: ale_entries = <1024>; host_port_reg_ofs = <0x108>; hw_stats_reg_ofs = <0x900>; + cpts_reg_ofs = <0xc00>; bd_ram_ofs = <0x2000>; bd_ram_size = <0x2000>; no_bd_ram = <0>; rx_descs = <64>; mac_control = <0x20>; slaves = <2>; + cpts_active_slave = <0>; + cpts_clock_mult = <0x80000000>; + cpts_clock_shift = <29>; cpsw_emac0: slave@0 { - slave_reg_ofs = <0x208>; + slave_reg_ofs = <0x200>; sliver_reg_ofs = <0xd80>; phy_id = "davinci_mdio.16:00"; /* Filled in by U-Boot */ mac-address = [ 00 00 00 00 00 00 ]; }; cpsw_emac1: slave@1 { - slave_reg_ofs = <0x308>; + slave_reg_ofs = <0x300>; sliver_reg_ofs = <0xdc0>; phy_id = "davinci_mdio.16:01"; /* Filled in by U-Boot */ diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index a173d2a879f..c1d82047a4b 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -203,7 +203,8 @@ abled during run time. Following log_levels are defined: 2 - Enable messages related to route added / changed / deleted 4 - Enable messages related to translation table operations 8 - Enable messages related to bridge loop avoidance -15 - enable all messages +16 - Enable messaged related to DAT, ARP snooping and parsing +31 - Enable all messages The debug output can be changed at runtime using the file /sys/class/net/bat0/mesh/log_level. e.g. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index c7fc1072494..98ac0d7552a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1514,6 +1514,20 @@ cookie_preserve_enable - BOOLEAN Default: 1 +cookie_hmac_alg - STRING + Select the hmac algorithm used when generating the cookie value sent by + a listening sctp socket to a connecting client in the INIT-ACK chunk. + Valid values are: + * md5 + * sha1 + * none + Ability to assign md5 or sha1 as the selected alg is predicated on the + configuarion of those algorithms at build time (CONFIG_CRYPTO_MD5 and + CONFIG_CRYPTO_SHA1). + + Default: Dependent on configuration. MD5 if available, else SHA1 if + available, else none. + rcvbuf_policy - INTEGER Determines if the receive buffer is attributed to the socket or to association. SCTP supports the capability to create multiple diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 1c08a4b0981..94444b152fb 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -3,9 +3,9 @@ -------------------------------------------------------------------------------- This file documents the mmap() facility available with the PACKET -socket interface on 2.4 and 2.6 kernels. This type of sockets is used for -capture network traffic with utilities like tcpdump or any other that needs -raw access to network interface. +socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for +i) capture network traffic with utilities like tcpdump, ii) transmit network +traffic, or any other that needs raw access to network interface. You can find the latest version of this document at: http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap @@ -21,19 +21,18 @@ Please send your comments to + Why use PACKET_MMAP -------------------------------------------------------------------------------- -In Linux 2.4/2.6 if PACKET_MMAP is not enabled, the capture process is very -inefficient. It uses very limited buffers and requires one system call -to capture each packet, it requires two if you want to get packet's -timestamp (like libpcap always does). +In Linux 2.4/2.6/3.x if PACKET_MMAP is not enabled, the capture process is very +inefficient. It uses very limited buffers and requires one system call to +capture each packet, it requires two if you want to get packet's timestamp +(like libpcap always does). In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size configurable circular buffer mapped in user space that can be used to either send or receive packets. This way reading packets just needs to wait for them, most of the time there is no need to issue a single system call. Concerning transmission, multiple packets can be sent through one system call to get the -highest bandwidth. -By using a shared buffer between the kernel and the user also has the benefit -of minimizing packet copies. +highest bandwidth. By using a shared buffer between the kernel and the user +also has the benefit of minimizing packet copies. It's fine to use PACKET_MMAP to improve the performance of the capture and transmission process, but it isn't everything. At least, if you are capturing @@ -41,7 +40,8 @@ at high speeds (this is relative to the cpu speed), you should check if the device driver of your network interface card supports some sort of interrupt load mitigation or (even better) if it supports NAPI, also make sure it is enabled. For transmission, check the MTU (Maximum Transmission Unit) used and -supported by devices of your network. +supported by devices of your network. CPU IRQ pinning of your network interface +card can also be an advantage. -------------------------------------------------------------------------------- + How to use mmap() to improve capture process @@ -87,9 +87,7 @@ the following process: socket creation and destruction is straight forward, and is done the same way with or without PACKET_MMAP: -int fd; - -fd= socket(PF_PACKET, mode, htons(ETH_P_ALL)) + int fd = socket(PF_PACKET, mode, htons(ETH_P_ALL)); where mode is SOCK_RAW for the raw interface were link level information can be captured or SOCK_DGRAM for the cooked @@ -163,11 +161,23 @@ As capture, each frame contains two parts: A complete tutorial is available at: http://wiki.gnu-log.net/ +By default, the user should put data at : + frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll) + +So, whatever you choose for the socket mode (SOCK_DGRAM or SOCK_RAW), +the beginning of the user data will be at : + frame base + TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + +If you wish to put user data at a custom offset from the beginning of +the frame (for payload alignment with SOCK_RAW mode for instance) you +can set tp_net (with SOCK_DGRAM) or tp_mac (with SOCK_RAW). In order +to make this work it must be enabled previously with setsockopt() +and the PACKET_TX_HAS_OFF option. + -------------------------------------------------------------------------------- + PACKET_MMAP settings -------------------------------------------------------------------------------- - To setup PACKET_MMAP from user level code is done with a call like - Capture process @@ -201,7 +211,6 @@ indeed, packet_set_ring checks that the following condition is true frames_per_block * tp_block_nr == tp_frame_nr - Lets see an example, with the following values: tp_block_size= 4096 @@ -227,7 +236,6 @@ be spawned across two blocks, so there are some details you have to take into account when choosing the frame_size. See "Mapping and use of the circular buffer (ring)". - -------------------------------------------------------------------------------- + PACKET_MMAP setting constraints -------------------------------------------------------------------------------- @@ -264,7 +272,6 @@ User space programs can include /usr/include/sys/user.h and The pagesize can also be determined dynamically with the getpagesize (2) system call. - Block number limit -------------------- @@ -284,7 +291,6 @@ called pg_vec, its size limits the number of blocks that can be allocated. v block #2 block #1 - kmalloc allocates any number of bytes of physically contiguous memory from a pool of pre-determined sizes. This pool of memory is maintained by the slab allocator which is at the end the responsible for doing the allocation and @@ -299,7 +305,6 @@ pointers to blocks is 131072/4 = 32768 blocks - PACKET_MMAP buffer size calculator ------------------------------------ @@ -340,7 +345,6 @@ and a value for <frame size> of 2048 bytes. These parameters will yield and hence the buffer will have a 262144 MiB size. So it can hold 262144 MiB / 2048 bytes = 134217728 frames - Actually, this buffer size is not possible with an i386 architecture. Remember that the memory is allocated in kernel space, in the case of an i386 kernel's memory size is limited to 1GiB. @@ -372,7 +376,6 @@ the following (from include/linux/if_packet.h): - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16. - Pad to align to TPACKET_ALIGNMENT=16 */ - The following are conditions that are checked in packet_set_ring @@ -413,7 +416,6 @@ and the following flags apply: #define TP_STATUS_LOSING 4 #define TP_STATUS_CSUMNOTREADY 8 - TP_STATUS_COPY : This flag indicates that the frame (and associated meta information) has been truncated because it's larger than tp_frame_size. This packet can be @@ -462,7 +464,6 @@ packets are in the ring: It doesn't incur in a race condition to first check the status value and then poll for frames. - ++ Transmission process Those defines are also used for transmission: @@ -494,6 +495,196 @@ The user can also use poll() to check if a buffer is available: retval = poll(&pfd, 1, timeout); ------------------------------------------------------------------------------- ++ What TPACKET versions are available and when to use them? +------------------------------------------------------------------------------- + + int val = tpacket_version; + setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)); + getsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)); + +where 'tpacket_version' can be TPACKET_V1 (default), TPACKET_V2, TPACKET_V3. + +TPACKET_V1: + - Default if not otherwise specified by setsockopt(2) + - RX_RING, TX_RING available + - VLAN metadata information available for packets + (TP_STATUS_VLAN_VALID) + +TPACKET_V1 --> TPACKET_V2: + - Made 64 bit clean due to unsigned long usage in TPACKET_V1 + structures, thus this also works on 64 bit kernel with 32 bit + userspace and the like + - Timestamp resolution in nanoseconds instead of microseconds + - RX_RING, TX_RING available + - How to switch to TPACKET_V2: + 1. Replace struct tpacket_hdr by struct tpacket2_hdr + 2. Query header len and save + 3. Set protocol version to 2, set up ring as usual + 4. For getting the sockaddr_ll, + use (void *)hdr + TPACKET_ALIGN(hdrlen) instead of + (void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + +TPACKET_V2 --> TPACKET_V3: + - Flexible buffer implementation: + 1. Blocks can be configured with non-static frame-size + 2. Read/poll is at a block-level (as opposed to packet-level) + 3. Added poll timeout to avoid indefinite user-space wait + on idle links + 4. Added user-configurable knobs: + 4.1 block::timeout + 4.2 tpkt_hdr::sk_rxhash + - RX Hash data available in user space + - Currently only RX_RING available + +------------------------------------------------------------------------------- ++ AF_PACKET fanout mode +------------------------------------------------------------------------------- + +In the AF_PACKET fanout mode, packet reception can be load balanced among +processes. This also works in combination with mmap(2) on packet sockets. + +Minimal example code by David S. Miller (try things like "./test eth0 hash", +"./test eth0 lb", etc.): + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/socket.h> +#include <sys/ioctl.h> + +#include <unistd.h> + +#include <linux/if_ether.h> +#include <linux/if_packet.h> + +#include <net/if.h> + +static const char *device_name; +static int fanout_type; +static int fanout_id; + +#ifndef PACKET_FANOUT +# define PACKET_FANOUT 18 +# define PACKET_FANOUT_HASH 0 +# define PACKET_FANOUT_LB 1 +#endif + +static int setup_socket(void) +{ + int err, fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_IP)); + struct sockaddr_ll ll; + struct ifreq ifr; + int fanout_arg; + + if (fd < 0) { + perror("socket"); + return EXIT_FAILURE; + } + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, device_name); + err = ioctl(fd, SIOCGIFINDEX, &ifr); + if (err < 0) { + perror("SIOCGIFINDEX"); + return EXIT_FAILURE; + } + + memset(&ll, 0, sizeof(ll)); + ll.sll_family = AF_PACKET; + ll.sll_ifindex = ifr.ifr_ifindex; + err = bind(fd, (struct sockaddr *) &ll, sizeof(ll)); + if (err < 0) { + perror("bind"); + return EXIT_FAILURE; + } + + fanout_arg = (fanout_id | (fanout_type << 16)); + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &fanout_arg, sizeof(fanout_arg)); + if (err) { + perror("setsockopt"); + return EXIT_FAILURE; + } + + return fd; +} + +static void fanout_thread(void) +{ + int fd = setup_socket(); + int limit = 10000; + + if (fd < 0) + exit(fd); + + while (limit-- > 0) { + char buf[1600]; + int err; + + err = read(fd, buf, sizeof(buf)); + if (err < 0) { + perror("read"); + exit(EXIT_FAILURE); + } + if ((limit % 10) == 0) + fprintf(stdout, "(%d) \n", getpid()); + } + + fprintf(stdout, "%d: Received 10000 packets\n", getpid()); + + close(fd); + exit(0); +} + +int main(int argc, char **argp) +{ + int fd, err; + int i; + + if (argc != 3) { + fprintf(stderr, "Usage: %s INTERFACE {hash|lb}\n", argp[0]); + return EXIT_FAILURE; + } + + if (!strcmp(argp[2], "hash")) + fanout_type = PACKET_FANOUT_HASH; + else if (!strcmp(argp[2], "lb")) + fanout_type = PACKET_FANOUT_LB; + else { + fprintf(stderr, "Unknown fanout type [%s]\n", argp[2]); + exit(EXIT_FAILURE); + } + + device_name = argp[1]; + fanout_id = getpid() & 0xffff; + + for (i = 0; i < 4; i++) { + pid_t pid = fork(); + + switch (pid) { + case 0: + fanout_thread(); + + case -1: + perror("fork"); + exit(EXIT_FAILURE); + } + } + + for (i = 0; i < 4; i++) { + int status; + + wait(&status); + } + + return 0; +} + +------------------------------------------------------------------------------- + PACKET_TIMESTAMP ------------------------------------------------------------------------------- @@ -519,6 +710,13 @@ the networking stack is used (the behavior before this setting was added). See include/linux/net_tstamp.h and Documentation/networking/timestamping for more information on hardware timestamps. +------------------------------------------------------------------------------- ++ Miscellaneous bits +------------------------------------------------------------------------------- + +- Packet sockets work well together with Linux socket filters, thus you also + might want to have a look at Documentation/networking/filter.txt + -------------------------------------------------------------------------------- + THANKS -------------------------------------------------------------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 28eeaece87c..4b062ff7441 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3878,7 +3878,9 @@ M: Greg Rose <gregory.v.rose@intel.com> M: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com> M: Alex Duyck <alexander.h.duyck@intel.com> M: John Ronciak <john.ronciak@intel.com> +M: Tushar Dave <tushar.n.dave@intel.com> L: e1000-devel@lists.sourceforge.net +W: http://www.intel.com/support/feedback.htm W: http://e1000.sourceforge.net/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next.git @@ -6365,6 +6367,7 @@ F: drivers/scsi/st* SCTP PROTOCOL M: Vlad Yasevich <vyasevich@gmail.com> M: Sridhar Samudrala <sri@us.ibm.com> +M: Neil Horman <nhorman@tuxdriver.com> L: linux-sctp@vger.kernel.org W: http://lksctp.sourceforge.net S: Maintained diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index 7d2f75be932..0087d053b77 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -47,6 +47,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi index f3990b04fec..3290e61be3e 100644 --- a/arch/arm/boot/dts/imx6q.dtsi +++ b/arch/arm/boot/dts/imx6q.dtsi @@ -580,6 +580,7 @@ 66 0x1b0b0 /* MX6Q_PAD_RGMII_RD2__ENET_RGMII_RD2 */ 70 0x1b0b0 /* MX6Q_PAD_RGMII_RD3__ENET_RGMII_RD3 */ 48 0x1b0b0 /* MX6Q_PAD_RGMII_RX_CTL__RGMII_RX_CTL */ + 1033 0x4001b0a8 /* MX6Q_PAD_GPIO_16__ENET_ANATOP_ETHERNET_REF_OUT*/ >; }; @@ -833,8 +834,8 @@ compatible = "fsl,imx6q-fec"; reg = <0x02188000 0x4000>; interrupts = <0 118 0x04 0 119 0x04>; - clocks = <&clks 117>, <&clks 117>; - clock-names = "ipg", "ahb"; + clocks = <&clks 117>, <&clks 117>, <&clks 177>; + clock-names = "ipg", "ahb", "ptp"; status = "disabled"; }; diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig index 043624219b5..92857683cb6 100644 --- a/arch/arm/mach-at91/Kconfig +++ b/arch/arm/mach-at91/Kconfig @@ -39,7 +39,6 @@ config SOC_AT91RM9200 config SOC_AT91SAM9260 bool "AT91SAM9260, AT91SAM9XE or AT91SAM9G20" select HAVE_AT91_DBGU0 - select HAVE_NET_MACB select SOC_AT91SAM9 help Select this if you are using one of Atmel's AT91SAM9260, AT91SAM9XE @@ -57,7 +56,6 @@ config SOC_AT91SAM9263 bool "AT91SAM9263" select HAVE_AT91_DBGU1 select HAVE_FB_ATMEL - select HAVE_NET_MACB select SOC_AT91SAM9 config SOC_AT91SAM9RL @@ -70,7 +68,6 @@ config SOC_AT91SAM9G45 bool "AT91SAM9G45 or AT91SAM9M10 families" select HAVE_AT91_DBGU1 select HAVE_FB_ATMEL - select HAVE_NET_MACB select SOC_AT91SAM9 help Select this if you are using one of Atmel's AT91SAM9G45 family SoC. @@ -80,7 +77,6 @@ config SOC_AT91SAM9X5 bool "AT91SAM9x5 family" select HAVE_AT91_DBGU0 select HAVE_FB_ATMEL - select HAVE_NET_MACB select SOC_AT91SAM9 help Select this if you are using one of Atmel's AT91SAM9x5 family SoC. diff --git a/arch/arm/mach-at91/board-csb337.c b/arch/arm/mach-at91/board-csb337.c index 3e37437a7a6..aa9b320bad5 100644 --- a/arch/arm/mach-at91/board-csb337.c +++ b/arch/arm/mach-at91/board-csb337.c @@ -53,6 +53,8 @@ static void __init csb337_init_early(void) static struct macb_platform_data __initdata csb337_eth_data = { .phy_irq_pin = AT91_PIN_PC2, .is_rmii = 0, + /* The CSB337 bootloader stores the MAC the wrong-way around */ + .rev_eth_addr = 1, }; static struct at91_usbh_data __initdata csb337_usbh_data = { diff --git a/arch/arm/mach-at91/include/mach/at91rm9200_emac.h b/arch/arm/mach-at91/include/mach/at91rm9200_emac.h deleted file mode 100644 index b8260cd8041..00000000000 --- a/arch/arm/mach-at91/include/mach/at91rm9200_emac.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * arch/arm/mach-at91/include/mach/at91rm9200_emac.h - * - * Copyright (C) 2005 Ivan Kokshaysky - * Copyright (C) SAN People - * - * Ethernet MAC registers. - * Based on AT91RM9200 datasheet revision E. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef AT91RM9200_EMAC_H -#define AT91RM9200_EMAC_H - -#define AT91_EMAC_CTL 0x00 /* Control Register */ -#define AT91_EMAC_LB (1 << 0) /* Loopback */ -#define AT91_EMAC_LBL (1 << 1) /* Loopback Local */ -#define AT91_EMAC_RE (1 << 2) /* Receive Enable */ -#define AT91_EMAC_TE (1 << 3) /* Transmit Enable */ -#define AT91_EMAC_MPE (1 << 4) /* Management Port Enable */ -#define AT91_EMAC_CSR (1 << 5) /* Clear Statistics Registers */ -#define AT91_EMAC_INCSTAT (1 << 6) /* Increment Statistics Registers */ -#define AT91_EMAC_WES (1 << 7) /* Write Enable for Statistics Registers */ -#define AT91_EMAC_BP (1 << 8) /* Back Pressure */ - -#define AT91_EMAC_CFG 0x04 /* Configuration Register */ -#define AT91_EMAC_SPD (1 << 0) /* Speed */ -#define AT91_EMAC_FD (1 << 1) /* Full Duplex */ -#define AT91_EMAC_BR (1 << 2) /* Bit Rate */ -#define AT91_EMAC_CAF (1 << 4) /* Copy All Frames */ -#define AT91_EMAC_NBC (1 << 5) /* No Broadcast */ -#define AT91_EMAC_MTI (1 << 6) /* Multicast Hash Enable */ -#define AT91_EMAC_UNI (1 << 7) /* Unicast Hash Enable */ -#define AT91_EMAC_BIG (1 << 8) /* Receive 1522 Bytes */ -#define AT91_EMAC_EAE (1 << 9) /* External Address Match Enable */ -#define AT91_EMAC_CLK (3 << 10) /* MDC Clock Divisor */ -#define AT91_EMAC_CLK_DIV8 (0 << 10) -#define AT91_EMAC_CLK_DIV16 (1 << 10) -#define AT91_EMAC_CLK_DIV32 (2 << 10) -#define AT91_EMAC_CLK_DIV64 (3 << 10) -#define AT91_EMAC_RTY (1 << 12) /* Retry Test */ -#define AT91_EMAC_RMII (1 << 13) /* Reduce MII (RMII) */ - -#define AT91_EMAC_SR 0x08 /* Status Register */ -#define AT91_EMAC_SR_LINK (1 << 0) /* Link */ -#define AT91_EMAC_SR_MDIO (1 << 1) /* MDIO pin */ -#define AT91_EMAC_SR_IDLE (1 << 2) /* PHY idle */ - -#define AT91_EMAC_TAR 0x0c /* Transmit Address Register */ - -#define AT91_EMAC_TCR 0x10 /* Transmit Control Register */ -#define AT91_EMAC_LEN (0x7ff << 0) /* Transmit Frame Length */ -#define AT91_EMAC_NCRC (1 << 15) /* No CRC */ - -#define AT91_EMAC_TSR 0x14 /* Transmit Status Register */ -#define AT91_EMAC_TSR_OVR (1 << 0) /* Transmit Buffer Overrun */ -#define AT91_EMAC_TSR_COL (1 << 1) /* Collision Occurred */ -#define AT91_EMAC_TSR_RLE (1 << 2) /* Retry Limit Exceeded */ -#define AT91_EMAC_TSR_IDLE (1 << 3) /* Transmitter Idle */ -#define AT91_EMAC_TSR_BNQ (1 << 4) /* Transmit Buffer not Queued */ -#define AT91_EMAC_TSR_COMP (1 << 5) /* Transmit Complete */ -#define AT91_EMAC_TSR_UND (1 << 6) /* Transmit Underrun */ - -#define AT91_EMAC_RBQP 0x18 /* Receive Buffer Queue Pointer */ - -#define AT91_EMAC_RSR 0x20 /* Receive Status Register */ -#define AT91_EMAC_RSR_BNA (1 << 0) /* Buffer Not Available */ -#define AT91_EMAC_RSR_REC (1 << 1) /* Frame Received */ -#define AT91_EMAC_RSR_OVR (1 << 2) /* RX Overrun */ - -#define AT91_EMAC_ISR 0x24 /* Interrupt Status Register */ -#define AT91_EMAC_DONE (1 << 0) /* Management Done */ -#define AT91_EMAC_RCOM (1 << 1) /* Receive Complete */ -#define AT91_EMAC_RBNA (1 << 2) /* Receive Buffer Not Available */ -#define AT91_EMAC_TOVR (1 << 3) /* Transmit Buffer Overrun */ -#define AT91_EMAC_TUND (1 << 4) /* Transmit Buffer Underrun */ -#define AT91_EMAC_RTRY (1 << 5) /* Retry Limit */ -#define AT91_EMAC_TBRE (1 << 6) /* Transmit Buffer Register Empty */ -#define AT91_EMAC_TCOM (1 << 7) /* Transmit Complete */ -#define AT91_EMAC_TIDLE (1 << 8) /* Transmit Idle */ -#define AT91_EMAC_LINK (1 << 9) /* Link */ -#define AT91_EMAC_ROVR (1 << 10) /* RX Overrun */ -#define AT91_EMAC_ABT (1 << 11) /* Abort */ - -#define AT91_EMAC_IER 0x28 /* Interrupt Enable Register */ -#define AT91_EMAC_IDR 0x2c /* Interrupt Disable Register */ -#define AT91_EMAC_IMR 0x30 /* Interrupt Mask Register */ - -#define AT91_EMAC_MAN 0x34 /* PHY Maintenance Register */ -#define AT91_EMAC_DATA (0xffff << 0) /* MDIO Data */ -#define AT91_EMAC_REGA (0x1f << 18) /* MDIO Register */ -#define AT91_EMAC_PHYA (0x1f << 23) /* MDIO PHY Address */ -#define AT91_EMAC_RW (3 << 28) /* Read/Write operation */ -#define AT91_EMAC_RW_W (1 << 28) -#define AT91_EMAC_RW_R (2 << 28) -#define AT91_EMAC_MAN_802_3 0x40020000 /* IEEE 802.3 value */ - -/* - * Statistics Registers. - */ -#define AT91_EMAC_FRA 0x40 /* Frames Transmitted OK */ -#define AT91_EMAC_SCOL 0x44 /* Single Collision Frame */ -#define AT91_EMAC_MCOL 0x48 /* Multiple Collision Frame */ -#define AT91_EMAC_OK 0x4c /* Frames Received OK */ -#define AT91_EMAC_SEQE 0x50 /* Frame Check Sequence Error */ -#define AT91_EMAC_ALE 0x54 /* Alignmemt Error */ -#define AT91_EMAC_DTE 0x58 /* Deffered Transmission Frame */ -#define AT91_EMAC_LCOL 0x5c /* Late Collision */ -#define AT91_EMAC_ECOL 0x60 /* Excessive Collision */ -#define AT91_EMAC_TUE 0x64 /* Transmit Underrun Error */ -#define AT91_EMAC_CSE 0x68 /* Carrier Sense Error */ -#define AT91_EMAC_DRFC 0x6c /* Discard RX Frame */ -#define AT91_EMAC_ROV 0x70 /* Receive Overrun */ -#define AT91_EMAC_CDE 0x74 /* Code Error */ -#define AT91_EMAC_ELR 0x78 /* Excessive Length Error */ -#define AT91_EMAC_RJB 0x7c /* Receive Jabber */ -#define AT91_EMAC_USF 0x80 /* Undersize Frame */ -#define AT91_EMAC_SQEE 0x84 /* SQE Test Error */ - -/* - * Address Registers. - */ -#define AT91_EMAC_HSL 0x90 /* Hash Address Low [31:0] */ -#define AT91_EMAC_HSH 0x94 /* Hash Address High [63:32] */ -#define AT91_EMAC_SA1L 0x98 /* Specific Address 1 Low, bytes 0-3 */ -#define AT91_EMAC_SA1H 0x9c /* Specific Address 1 High, bytes 4-5 */ -#define AT91_EMAC_SA2L 0xa0 /* Specific Address 2 Low, bytes 0-3 */ -#define AT91_EMAC_SA2H 0xa4 /* Specific Address 2 High, bytes 4-5 */ -#define AT91_EMAC_SA3L 0xa8 /* Specific Address 3 Low, bytes 0-3 */ -#define AT91_EMAC_SA3H 0xac /* Specific Address 3 High, bytes 4-5 */ -#define AT91_EMAC_SA4L 0xb0 /* Specific Address 4 Low, bytes 0-3 */ -#define AT91_EMAC_SA4H 0xb4 /* Specific Address 4 High, bytes 4-5 */ - -#endif diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 47c91f7185d..38d69100398 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -117,6 +117,17 @@ static void __init imx6q_sabrelite_init(void) imx6q_sabrelite_cko1_setup(); } +static void __init imx6q_1588_init(void) +{ + struct regmap *gpr; + + gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); + if (!IS_ERR(gpr)) + regmap_update_bits(gpr, 0x4, 1 << 21, 1 << 21); + else + pr_err("failed to find fsl,imx6q-iomux-gpr regmap\n"); + +} static void __init imx6q_usb_init(void) { struct regmap *anatop; @@ -153,6 +164,7 @@ static void __init imx6q_init_machine(void) imx6q_pm_init(); imx6q_usb_init(); + imx6q_1588_init(); } static struct cpuidle_driver imx6q_cpuidle_driver = { diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 06e73bf665e..09f9fa800b3 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -80,7 +80,6 @@ config PLATFORM_AT32AP select ARCH_REQUIRE_GPIOLIB select GENERIC_ALLOCATOR select HAVE_FB_ATMEL - select HAVE_NET_MACB # # CPU types diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h index a473f8c6a9a..486df68abee 100644 --- a/arch/avr32/include/uapi/asm/socket.h +++ b/arch/avr32/include/uapi/asm/socket.h @@ -40,6 +40,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index ae52825021a..b681b043f6c 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -42,6 +42,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h index a5b1d7dbb20..871f89b7fbd 100644 --- a/arch/frv/include/uapi/asm/socket.h +++ b/arch/frv/include/uapi/asm/socket.h @@ -40,6 +40,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index ec4554e7b04..90a2e573c7e 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -40,6 +40,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h index 41fc28a4a18..23d6759bb57 100644 --- a/arch/ia64/include/uapi/asm/socket.h +++ b/arch/ia64/include/uapi/asm/socket.h @@ -49,6 +49,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h index a15f40b5278..5e7088a2672 100644 --- a/arch/m32r/include/asm/socket.h +++ b/arch/m32r/include/asm/socket.h @@ -40,6 +40,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/m68k/include/uapi/asm/socket.h b/arch/m68k/include/uapi/asm/socket.h index d1be684edf9..285da3b6ad9 100644 --- a/arch/m68k/include/uapi/asm/socket.h +++ b/arch/m68k/include/uapi/asm/socket.h @@ -40,6 +40,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index c5ed59549cb..17307ab9047 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -63,6 +63,7 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h index 820463a484b..af5366bbfe6 100644 --- a/arch/mn10300/include/uapi/asm/socket.h +++ b/arch/mn10300/include/uapi/asm/socket.h @@ -40,6 +40,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 1b52c2c31a7..d9ff4731253 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -48,6 +48,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 0x401a #define SO_DETACH_FILTER 0x401b +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_ACCEPTCONN 0x401c diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h index 3d5179bb122..eb0b1864d40 100644 --- a/arch/powerpc/include/uapi/asm/socket.h +++ b/arch/powerpc/include/uapi/asm/socket.h @@ -47,6 +47,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h index 69718cd6d63..436d07c23be 100644 --- a/arch/s390/include/uapi/asm/socket.h +++ b/arch/s390/include/uapi/asm/socket.h @@ -46,6 +46,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index bea1568ae4a..c83a937ead0 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -41,6 +41,7 @@ #define SO_ATTACH_FILTER 0x001a #define SO_DETACH_FILTER 0x001b +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 0x001c #define SO_TIMESTAMP 0x001d diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 28368701ef7..3109ca684a9 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -3,6 +3,7 @@ #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/cache.h> +#include <linux/if_vlan.h> #include <asm/cacheflush.h> #include <asm/ptrace.h> @@ -312,6 +313,12 @@ do { *prog++ = BR_OPC | WDISP22(OFF); \ #define emit_addi(R1, IMM, R3) \ *prog++ = (ADD | IMMED | RS1(R1) | S13(IMM) | RD(R3)) +#define emit_and(R1, R2, R3) \ + *prog++ = (AND | RS1(R1) | RS2(R2) | RD(R3)) + +#define emit_andi(R1, IMM, R3) \ + *prog++ = (AND | IMMED | RS1(R1) | S13(IMM) | RD(R3)) + #define emit_alloc_stack(SZ) \ *prog++ = (SUB | IMMED | RS1(SP) | S13(SZ) | RD(SP)) @@ -415,6 +422,8 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ANC_IFINDEX: case BPF_S_ANC_MARK: case BPF_S_ANC_RXHASH: + case BPF_S_ANC_VLAN_TAG: + case BPF_S_ANC_VLAN_TAG_PRESENT: case BPF_S_ANC_CPU: case BPF_S_ANC_QUEUE: case BPF_S_LD_W_ABS: @@ -600,6 +609,16 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ANC_RXHASH: emit_skb_load32(rxhash, r_A); break; + case BPF_S_ANC_VLAN_TAG: + case BPF_S_ANC_VLAN_TAG_PRESENT: + emit_skb_load16(vlan_tci, r_A); + if (filter[i].code == BPF_S_ANC_VLAN_TAG) { + emit_andi(r_A, VLAN_VID_MASK, r_A); + } else { + emit_loadimm(VLAN_TAG_PRESENT, r_TMP); + emit_and(r_A, r_TMP, r_A); + } + break; case BPF_S_LD_IMM: emit_loadimm(K, r_A); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 520d2bd0b9c..d11a47099d3 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,6 +11,7 @@ #include <asm/cacheflush.h> #include <linux/netdevice.h> #include <linux/filter.h> +#include <linux/if_vlan.h> /* * Conventions : @@ -212,6 +213,8 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ANC_MARK: case BPF_S_ANC_RXHASH: case BPF_S_ANC_CPU: + case BPF_S_ANC_VLAN_TAG: + case BPF_S_ANC_VLAN_TAG_PRESENT: case BPF_S_ANC_QUEUE: case BPF_S_LD_W_ABS: case BPF_S_LD_H_ABS: @@ -515,6 +518,24 @@ void bpf_jit_compile(struct sk_filter *fp) CLEAR_A(); #endif break; + case BPF_S_ANC_VLAN_TAG: + case BPF_S_ANC_VLAN_TAG_PRESENT: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); + if (is_imm8(offsetof(struct sk_buff, vlan_tci))) { + /* movzwl off8(%rdi),%eax */ + EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci)); + } else { + EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */ + EMIT(offsetof(struct sk_buff, vlan_tci), 4); + } + BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); + if (filter[i].code == BPF_S_ANC_VLAN_TAG) { + EMIT3(0x80, 0xe4, 0xef); /* and $0xef,%ah */ + } else { + EMIT3(0xc1, 0xe8, 0x0c); /* shr $0xc,%eax */ + EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */ + } + break; case BPF_S_LD_W_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_word); common_load: seen |= SEEN_DATAREF; diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h index e36c6818492..38079be1cf1 100644 --- a/arch/xtensa/include/uapi/asm/socket.h +++ b/arch/xtensa/include/uapi/asm/socket.h @@ -52,6 +52,7 @@ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 81363ffa535..6e99d73563b 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -490,7 +490,7 @@ receive_dmsg(struct hfc_pci *hc) (df->data[le16_to_cpu(zp->z1)])) { if (dch->debug & DEBUG_HW) printk(KERN_DEBUG - "empty_fifo hfcpci paket inv. len " + "empty_fifo hfcpci packet inv. len " "%d or crc %d\n", rcnt, df->data[le16_to_cpu(zp->z1)]); diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c index 182ecf0626c..feafa91c2ed 100644 --- a/drivers/isdn/hardware/mISDN/mISDNisar.c +++ b/drivers/isdn/hardware/mISDN/mISDNisar.c @@ -1302,7 +1302,7 @@ modeisar(struct isar_ch *ch, u32 bprotocol) &ch->is->Flags)) ch->dpath = 1; else { - pr_info("modeisar both pathes in use\n"); + pr_info("modeisar both paths in use\n"); return -EBUSY; } if (bprotocol == ISDN_P_B_HDLC) diff --git a/drivers/isdn/hisax/callc.c b/drivers/isdn/hisax/callc.c index a47637be0cc..ddec47a911a 100644 --- a/drivers/isdn/hisax/callc.c +++ b/drivers/isdn/hisax/callc.c @@ -35,7 +35,7 @@ static int chancount; /* experimental REJECT after ALERTING for CALLBACK to beat the 4s delay */ #define ALERT_REJECT 0 -/* Value to delay the sending of the first B-channel paket after CONNECT +/* Value to delay the sending of the first B-channel packet after CONNECT * here is no value given by ITU, but experience shows that 300 ms will * work on many networks, if you or your other side is behind local exchanges * a greater value may be recommented. If the delay is to short the first paket diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index 334fa90bed8..f60d4be5894 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -354,7 +354,7 @@ receive_dmsg(struct IsdnCardState *cs) if ((rcnt > MAX_DFRAME_LEN + 3) || (rcnt < 4) || (df->data[zp->z1])) { if (cs->debug & L1_DEB_WARN) - debugl1(cs, "empty_fifo hfcpci paket inv. len %d or crc %d", rcnt, df->data[zp->z1]); + debugl1(cs, "empty_fifo hfcpci packet inv. len %d or crc %d", rcnt, df->data[zp->z1]); #ifdef ERROR_STATISTIC cs->err_rx++; #endif diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c index 4db846be436..4ec279ce052 100644 --- a/drivers/isdn/hisax/hfc_sx.c +++ b/drivers/isdn/hisax/hfc_sx.c @@ -270,7 +270,7 @@ read_fifo(struct IsdnCardState *cs, u_char fifo, int trans_max) if ((count > fifo_size) || (count < 4)) { if (cs->debug & L1_DEB_WARN) - debugl1(cs, "hfcsx_read_fifo %d paket inv. len %d ", fifo , count); + debugl1(cs, "hfcsx_read_fifo %d packet inv. len %d ", fifo , count); while (count) { count--; /* empty fifo */ Read_hfc(cs, HFCSX_FIF_DRD); diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index db50f788855..f8e405c383a 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -277,7 +277,6 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask, u16 timebase, u8 *buf, int len) { u8 *p; - int multi = 0; u8 frame[len + 32]; struct socket *socket = NULL; @@ -317,9 +316,7 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask, *p++ = hc->id >> 8; *p++ = hc->id; } - *p++ = (multi == 1) ? 0x80 : 0x00 + channel; /* m-flag, channel */ - if (multi == 1) - *p++ = len; /* length */ + *p++ = 0x00 + channel; /* m-flag, channel */ *p++ = timebase >> 8; /* time base */ *p++ = timebase; diff --git a/drivers/isdn/pcbit/layer2.c b/drivers/isdn/pcbit/layer2.c index a18e639b40d..42ecfef8013 100644 --- a/drivers/isdn/pcbit/layer2.c +++ b/drivers/isdn/pcbit/layer2.c @@ -508,7 +508,7 @@ pcbit_irq_handler(int interrupt, void *devptr) return IRQ_NONE; } if (dev->interrupt) { - printk(KERN_DEBUG "pcbit: reentering interrupt hander\n"); + printk(KERN_DEBUG "pcbit: reentering interrupt handler\n"); return IRQ_HANDLED; } dev->interrupt = 1; diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig index 49a30d37ae4..e49c0eff040 100644 --- a/drivers/net/ethernet/adi/Kconfig +++ b/drivers/net/ethernet/adi/Kconfig @@ -61,7 +61,7 @@ config BFIN_RX_DESC_NUM config BFIN_MAC_USE_HWSTAMP bool "Use IEEE 1588 hwstamp" - depends on BFIN_MAC && BF518 + select PTP_1588_CLOCK default y ---help--- To support the IEEE 1588 Precision Time Protocol (PTP), select y here diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index f816426e108..f1c458dc039 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -548,14 +548,17 @@ static int bfin_mac_ethtool_setwol(struct net_device *dev, return 0; } +#ifdef CONFIG_BFIN_MAC_USE_HWSTAMP static int bfin_mac_ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) { + struct bfin_mac_local *lp = netdev_priv(dev); + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_SYS_HARDWARE; - info->phc_index = -1; + SOF_TIMESTAMPING_RAW_HARDWARE; + info->phc_index = lp->phc_index; info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); @@ -566,6 +569,7 @@ static int bfin_mac_ethtool_get_ts_info(struct net_device *dev, (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT); return 0; } +#endif static const struct ethtool_ops bfin_mac_ethtool_ops = { .get_settings = bfin_mac_ethtool_getsettings, @@ -574,7 +578,9 @@ static const struct ethtool_ops bfin_mac_ethtool_ops = { .get_drvinfo = bfin_mac_ethtool_getdrvinfo, .get_wol = bfin_mac_ethtool_getwol, .set_wol = bfin_mac_ethtool_setwol, +#ifdef CONFIG_BFIN_MAC_USE_HWSTAMP .get_ts_info = bfin_mac_ethtool_get_ts_info, +#endif }; /**************************************************************************/ @@ -649,6 +655,20 @@ static int bfin_mac_set_mac_address(struct net_device *dev, void *p) #ifdef CONFIG_BFIN_MAC_USE_HWSTAMP #define bfin_mac_hwtstamp_is_none(cfg) ((cfg) == HWTSTAMP_FILTER_NONE) +static u32 bfin_select_phc_clock(u32 input_clk, unsigned int *shift_result) +{ + u32 ipn = 1000000000UL / input_clk; + u32 ppn = 1; + unsigned int shift = 0; + + while (ppn <= ipn) { + ppn <<= 1; + shift++; + } + *shift_result = shift; + return 1000000000UL / ppn; +} + static int bfin_mac_hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { @@ -798,19 +818,7 @@ static int bfin_mac_hwtstamp_ioctl(struct net_device *netdev, bfin_read_EMAC_PTP_TXSNAPLO(); bfin_read_EMAC_PTP_TXSNAPHI(); - /* - * Set registers so that rollover occurs soon to test this. - */ - bfin_write_EMAC_PTP_TIMELO(0x00000000); - bfin_write_EMAC_PTP_TIMEHI(0xFF800000); - SSYNC(); - - lp->compare.last_update = 0; - timecounter_init(&lp->clock, - &lp->cycles, - ktime_to_ns(ktime_get_real())); - timecompare_update(&lp->compare, 0); } lp->stamp_cfg = config; @@ -818,15 +826,6 @@ static int bfin_mac_hwtstamp_ioctl(struct net_device *netdev, -EFAULT : 0; } -static void bfin_dump_hwtamp(char *s, ktime_t *hw, ktime_t *ts, struct timecompare *cmp) -{ - ktime_t sys = ktime_get_real(); - - pr_debug("%s %s hardware:%d,%d transform system:%d,%d system:%d,%d, cmp:%lld, %lld\n", - __func__, s, hw->tv.sec, hw->tv.nsec, ts->tv.sec, ts->tv.nsec, sys.tv.sec, - sys.tv.nsec, cmp->offset, cmp->skew); -} - static void bfin_tx_hwtstamp(struct net_device *netdev, struct sk_buff *skb) { struct bfin_mac_local *lp = netdev_priv(netdev); @@ -857,15 +856,9 @@ static void bfin_tx_hwtstamp(struct net_device *netdev, struct sk_buff *skb) regval = bfin_read_EMAC_PTP_TXSNAPLO(); regval |= (u64)bfin_read_EMAC_PTP_TXSNAPHI() << 32; memset(&shhwtstamps, 0, sizeof(shhwtstamps)); - ns = timecounter_cyc2time(&lp->clock, - regval); - timecompare_update(&lp->compare, ns); + ns = regval << lp->shift; shhwtstamps.hwtstamp = ns_to_ktime(ns); - shhwtstamps.syststamp = - timecompare_transform(&lp->compare, ns); skb_tstamp_tx(skb, &shhwtstamps); - - bfin_dump_hwtamp("TX", &shhwtstamps.hwtstamp, &shhwtstamps.syststamp, &lp->compare); } } } @@ -888,55 +881,184 @@ static void bfin_rx_hwtstamp(struct net_device *netdev, struct sk_buff *skb) regval = bfin_read_EMAC_PTP_RXSNAPLO(); regval |= (u64)bfin_read_EMAC_PTP_RXSNAPHI() << 32; - ns = timecounter_cyc2time(&lp->clock, regval); - timecompare_update(&lp->compare, ns); + ns = regval << lp->shift; memset(shhwtstamps, 0, sizeof(*shhwtstamps)); shhwtstamps->hwtstamp = ns_to_ktime(ns); - shhwtstamps->syststamp = timecompare_transform(&lp->compare, ns); +} + +static void bfin_mac_hwtstamp_init(struct net_device *netdev) +{ + struct bfin_mac_local *lp = netdev_priv(netdev); + u64 addend, ppb; + u32 input_clk, phc_clk; + + /* Initialize hardware timer */ + input_clk = get_sclk(); + phc_clk = bfin_select_phc_clock(input_clk, &lp->shift); + addend = phc_clk * (1ULL << 32); + do_div(addend, input_clk); + bfin_write_EMAC_PTP_ADDEND((u32)addend); + + lp->addend = addend; + ppb = 1000000000ULL * input_clk; + do_div(ppb, phc_clk); + lp->max_ppb = ppb - 1000000000ULL - 1ULL; - bfin_dump_hwtamp("RX", &shhwtstamps->hwtstamp, &shhwtstamps->syststamp, &lp->compare); + /* Initialize hwstamp config */ + lp->stamp_cfg.rx_filter = HWTSTAMP_FILTER_NONE; + lp->stamp_cfg.tx_type = HWTSTAMP_TX_OFF; } -/* - * bfin_read_clock - read raw cycle counter (to be used by time counter) - */ -static cycle_t bfin_read_clock(const struct cyclecounter *tc) +static u64 bfin_ptp_time_read(struct bfin_mac_local *lp) { - u64 stamp; + u64 ns; + u32 lo, hi; + + lo = bfin_read_EMAC_PTP_TIMELO(); + hi = bfin_read_EMAC_PTP_TIMEHI(); - stamp = bfin_read_EMAC_PTP_TIMELO(); - stamp |= (u64)bfin_read_EMAC_PTP_TIMEHI() << 32ULL; + ns = ((u64) hi) << 32; + ns |= lo; + ns <<= lp->shift; - return stamp; + return ns; } -#define PTP_CLK 25000000 +static void bfin_ptp_time_write(struct bfin_mac_local *lp, u64 ns) +{ + u32 hi, lo; -static void bfin_mac_hwtstamp_init(struct net_device *netdev) + ns >>= lp->shift; + hi = ns >> 32; + lo = ns & 0xffffffff; + + bfin_write_EMAC_PTP_TIMELO(lo); + bfin_write_EMAC_PTP_TIMEHI(hi); +} + +/* PTP Hardware Clock operations */ + +static int bfin_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +{ + u64 adj; + u32 diff, addend; + int neg_adj = 0; + struct bfin_mac_local *lp = + container_of(ptp, struct bfin_mac_local, caps); + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + addend = lp->addend; + adj = addend; + adj *= ppb; + diff = div_u64(adj, 1000000000ULL); + + addend = neg_adj ? addend - diff : addend + diff; + + bfin_write_EMAC_PTP_ADDEND(addend); + + return 0; +} + +static int bfin_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + s64 now; + unsigned long flags; + struct bfin_mac_local *lp = + container_of(ptp, struct bfin_mac_local, caps); + + spin_lock_irqsave(&lp->phc_lock, flags); + + now = bfin_ptp_time_read(lp); + now += delta; + bfin_ptp_time_write(lp, now); + + spin_unlock_irqrestore(&lp->phc_lock, flags); + + return 0; +} + +static int bfin_ptp_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +{ + u64 ns; + u32 remainder; + unsigned long flags; + struct bfin_mac_local *lp = + container_of(ptp, struct bfin_mac_local, caps); + + spin_lock_irqsave(&lp->phc_lock, flags); + + ns = bfin_ptp_time_read(lp); + + spin_unlock_irqrestore(&lp->phc_lock, flags); + + ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder); + ts->tv_nsec = remainder; + return 0; +} + +static int bfin_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + u64 ns; + unsigned long flags; + struct bfin_mac_local *lp = + container_of(ptp, struct bfin_mac_local, caps); + + ns = ts->tv_sec * 1000000000ULL; + ns += ts->tv_nsec; + + spin_lock_irqsave(&lp->phc_lock, flags); + + bfin_ptp_time_write(lp, ns); + + spin_unlock_irqrestore(&lp->phc_lock, flags); + + return 0; +} + +static int bfin_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +static struct ptp_clock_info bfin_ptp_caps = { + .owner = THIS_MODULE, + .name = "BF518 clock", + .max_adj = 0, + .n_alarm = 0, + .n_ext_ts = 0, + .n_per_out = 0, + .pps = 0, + .adjfreq = bfin_ptp_adjfreq, + .adjtime = bfin_ptp_adjtime, + .gettime = bfin_ptp_gettime, + .settime = bfin_ptp_settime, + .enable = bfin_ptp_enable, +}; + +static int bfin_phc_init(struct net_device *netdev, struct device *dev) { struct bfin_mac_local *lp = netdev_priv(netdev); - u64 append; - /* Initialize hardware timer */ - append = PTP_CLK * (1ULL << 32); - do_div(append, get_sclk()); - bfin_write_EMAC_PTP_ADDEND((u32)append); - - memset(&lp->cycles, 0, sizeof(lp->cycles)); - lp->cycles.read = bfin_read_clock; - lp->cycles.mask = CLOCKSOURCE_MASK(64); - lp->cycles.mult = 1000000000 / PTP_CLK; - lp->cycles.shift = 0; - - /* Synchronize our NIC clock against system wall clock */ - memset(&lp->compare, 0, sizeof(lp->compare)); - lp->compare.source = &lp->clock; - lp->compare.target = ktime_get_real; - lp->compare.num_samples = 10; + lp->caps = bfin_ptp_caps; + lp->caps.max_adj = lp->max_ppb; + lp->clock = ptp_clock_register(&lp->caps, dev); + if (IS_ERR(lp->clock)) + return PTR_ERR(lp->clock); - /* Initialize hwstamp config */ - lp->stamp_cfg.rx_filter = HWTSTAMP_FILTER_NONE; - lp->stamp_cfg.tx_type = HWTSTAMP_TX_OFF; + lp->phc_index = ptp_clock_index(lp->clock); + spin_lock_init(&lp->phc_lock); + + return 0; +} + +static void bfin_phc_release(struct bfin_mac_local *lp) +{ + ptp_clock_unregister(lp->clock); } #else @@ -945,6 +1067,8 @@ static void bfin_mac_hwtstamp_init(struct net_device *netdev) # define bfin_mac_hwtstamp_ioctl(dev, ifr, cmd) (-EOPNOTSUPP) # define bfin_rx_hwtstamp(dev, skb) # define bfin_tx_hwtstamp(dev, skb) +# define bfin_phc_init(netdev, dev) 0 +# define bfin_phc_release(lp) #endif static inline void _tx_reclaim_skb(void) @@ -1579,12 +1703,17 @@ static int __devinit bfin_mac_probe(struct platform_device *pdev) } bfin_mac_hwtstamp_init(ndev); + if (bfin_phc_init(ndev, &pdev->dev)) { + dev_err(&pdev->dev, "Cannot register PHC device!\n"); + goto out_err_phc; + } /* now, print out the card info, in a short format.. */ netdev_info(ndev, "%s, Version %s\n", DRV_DESC, DRV_VERSION); return 0; +out_err_phc: out_err_reg_ndev: free_irq(IRQ_MAC_RX, ndev); out_err_request_irq: @@ -1603,6 +1732,8 @@ static int __devexit bfin_mac_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct bfin_mac_local *lp = netdev_priv(ndev); + bfin_phc_release(lp); + platform_set_drvdata(pdev, NULL); lp->mii_bus->priv = NULL; diff --git a/drivers/net/ethernet/adi/bfin_mac.h b/drivers/net/ethernet/adi/bfin_mac.h index 960905c0822..7a07ee07906 100644 --- a/drivers/net/ethernet/adi/bfin_mac.h +++ b/drivers/net/ethernet/adi/bfin_mac.h @@ -11,8 +11,7 @@ #define _BFIN_MAC_H_ #include <linux/net_tstamp.h> -#include <linux/clocksource.h> -#include <linux/timecompare.h> +#include <linux/ptp_clock_kernel.h> #include <linux/timer.h> #include <linux/etherdevice.h> #include <linux/bfin_mac.h> @@ -94,10 +93,14 @@ struct bfin_mac_local { struct mii_bus *mii_bus; #if defined(CONFIG_BFIN_MAC_USE_HWSTAMP) - struct cyclecounter cycles; - struct timecounter clock; - struct timecompare compare; + u32 addend; + unsigned int shift; + s32 max_ppb; struct hwtstamp_config stamp_cfg; + struct ptp_clock_info caps; + struct ptp_clock *clock; + int phc_index; + spinlock_t phc_lock; /* protects time lo/hi registers */ #endif }; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 72897c47b8c..de121ccd675 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -34,18 +34,10 @@ #include "bnx2x_hsi.h" -#if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE) -#define BCM_CNIC 1 #include "../cnic_if.h" -#endif -#ifdef BCM_CNIC -#define BNX2X_MIN_MSIX_VEC_CNT 3 -#define BNX2X_MSIX_VEC_FP_START 2 -#else -#define BNX2X_MIN_MSIX_VEC_CNT 2 -#define BNX2X_MSIX_VEC_FP_START 1 -#endif + +#define BNX2X_MIN_MSIX_VEC_CNT(bp) ((bp)->min_msix_vec_cnt) #include <linux/mdio.h> @@ -256,15 +248,10 @@ enum { /* FCoE L2 */ #define BNX2X_FCOE_ETH_CID(bp) (BNX2X_CNIC_START_ETH_CID(bp) + 1) -/** Additional rings budgeting */ -#ifdef BCM_CNIC -#define CNIC_PRESENT 1 -#define FCOE_PRESENT 1 -#else -#define CNIC_PRESENT 0 -#define FCOE_PRESENT 0 -#endif /* BCM_CNIC */ -#define NON_ETH_CONTEXT_USE (FCOE_PRESENT) +#define CNIC_SUPPORT(bp) ((bp)->cnic_support) +#define CNIC_ENABLED(bp) ((bp)->cnic_enabled) +#define CNIC_LOADED(bp) ((bp)->cnic_loaded) +#define FCOE_INIT(bp) ((bp)->fcoe_init) #define AEU_IN_ATTN_BITS_PXPPCICLOCKCLIENT_PARITY_ERROR \ AEU_INPUTS_ATTN_BITS_PXPPCICLOCKCLIENT_PARITY_ERROR @@ -297,9 +284,7 @@ enum { OOO_TXQ_IDX_OFFSET, }; #define MAX_ETH_TXQ_IDX(bp) (BNX2X_NUM_NON_CNIC_QUEUES(bp) * (bp)->max_cos) -#ifdef BCM_CNIC #define FCOE_TXQ_IDX(bp) (MAX_ETH_TXQ_IDX(bp) + FCOE_TXQ_IDX_OFFSET) -#endif /* fast path */ /* @@ -585,15 +570,9 @@ struct bnx2x_fastpath { ->var) -#define IS_ETH_FP(fp) (fp->index < \ - BNX2X_NUM_ETH_QUEUES(fp->bp)) -#ifdef BCM_CNIC -#define IS_FCOE_FP(fp) (fp->index == FCOE_IDX(fp->bp)) -#define IS_FCOE_IDX(idx) ((idx) == FCOE_IDX(bp)) -#else -#define IS_FCOE_FP(fp) false -#define IS_FCOE_IDX(idx) false -#endif +#define IS_ETH_FP(fp) ((fp)->index < BNX2X_NUM_ETH_QUEUES((fp)->bp)) +#define IS_FCOE_FP(fp) ((fp)->index == FCOE_IDX((fp)->bp)) +#define IS_FCOE_IDX(idx) ((idx) == FCOE_IDX(bp)) /* MC hsi */ @@ -886,6 +865,18 @@ struct bnx2x_common { (CHIP_REV(bp) == CHIP_REV_Bx)) #define CHIP_IS_E3A0(bp) (CHIP_IS_E3(bp) && \ (CHIP_REV(bp) == CHIP_REV_Ax)) +/* This define is used in two main places: + * 1. In the early stages of nic_load, to know if to configrue Parser / Searcher + * to nic-only mode or to offload mode. Offload mode is configured if either the + * chip is E1x (where MIC_MODE register is not applicable), or if cnic already + * registered for this port (which means that the user wants storage services). + * 2. During cnic-related load, to know if offload mode is already configured in + * the HW or needs to be configrued. + * Since the transition from nic-mode to offload-mode in HW causes traffic + * coruption, nic-mode is configured only in ports on which storage services + * where never requested. + */ +#define CONFIGURE_NIC_MODE(bp) (!CHIP_IS_E1x(bp) && !CNIC_ENABLED(bp)) int flash_size; #define BNX2X_NVRAM_1MB_SIZE 0x20000 /* 1M bit in bytes */ @@ -1003,18 +994,15 @@ union cdu_context { #define CDU_ILT_PAGE_SZ (8192 << CDU_ILT_PAGE_SZ_HW) /* 32K */ #define ILT_PAGE_CIDS (CDU_ILT_PAGE_SZ / sizeof(union cdu_context)) -#ifdef BCM_CNIC #define CNIC_ISCSI_CID_MAX 256 #define CNIC_FCOE_CID_MAX 2048 #define CNIC_CID_MAX (CNIC_ISCSI_CID_MAX + CNIC_FCOE_CID_MAX) #define CNIC_ILT_LINES DIV_ROUND_UP(CNIC_CID_MAX, ILT_PAGE_CIDS) -#endif #define QM_ILT_PAGE_SZ_HW 0 #define QM_ILT_PAGE_SZ (4096 << QM_ILT_PAGE_SZ_HW) /* 4K */ #define QM_CID_ROUND 1024 -#ifdef BCM_CNIC /* TM (timers) host DB constants */ #define TM_ILT_PAGE_SZ_HW 0 #define TM_ILT_PAGE_SZ (4096 << TM_ILT_PAGE_SZ_HW) /* 4K */ @@ -1032,8 +1020,6 @@ union cdu_context { #define SRC_T2_SZ SRC_ILT_SZ #define SRC_ILT_LINES DIV_ROUND_UP(SRC_ILT_SZ, SRC_ILT_PAGE_SZ) -#endif - #define MAX_DMAE_C 8 /* DMA memory not used in fastpath */ @@ -1227,7 +1213,6 @@ struct bnx2x { struct bnx2x_sp_objs *sp_objs; struct bnx2x_fp_stats *fp_stats; struct bnx2x_fp_txdata *bnx2x_txq; - int bnx2x_txq_size; void __iomem *regview; void __iomem *doorbells; u16 db_size; @@ -1350,6 +1335,15 @@ struct bnx2x { #define NO_ISCSI_OOO(bp) ((bp)->flags & NO_ISCSI_OOO_FLAG) #define NO_FCOE(bp) ((bp)->flags & NO_FCOE_FLAG) + u8 cnic_support; + bool cnic_enabled; + bool cnic_loaded; + + /* Flag that indicates that we can start looking for FCoE L2 queue + * completions in the default status block. + */ + bool fcoe_init; + int pm_cap; int mrrs; @@ -1420,6 +1414,8 @@ struct bnx2x { #define BNX2X_MAX_COS 3 #define BNX2X_MAX_TX_COS 2 int num_queues; + uint num_ethernet_queues; + uint num_cnic_queues; int num_napi_queues; int disable_tpa; @@ -1433,6 +1429,7 @@ struct bnx2x { u8 igu_dsb_id; u8 igu_base_sb; u8 igu_sb_cnt; + u8 min_msix_vec_cnt; dma_addr_t def_status_blk_mapping; @@ -1478,16 +1475,16 @@ struct bnx2x { * Maximum supported number of RSS queues: number of IGU SBs minus one that goes * to CNIC. */ -#define BNX2X_MAX_RSS_COUNT(bp) ((bp)->igu_sb_cnt - CNIC_PRESENT) +#define BNX2X_MAX_RSS_COUNT(bp) ((bp)->igu_sb_cnt - CNIC_SUPPORT(bp)) /* * Maximum CID count that might be required by the bnx2x: * Max RSS * Max_Tx_Multi_Cos + FCoE + iSCSI */ #define BNX2X_L2_CID_COUNT(bp) (BNX2X_NUM_ETH_QUEUES(bp) * BNX2X_MULTI_TX_COS \ - + NON_ETH_CONTEXT_USE + CNIC_PRESENT) + + 2 * CNIC_SUPPORT(bp)) #define BNX2X_L2_MAX_CID(bp) (BNX2X_MAX_RSS_COUNT(bp) * BNX2X_MULTI_TX_COS \ - + NON_ETH_CONTEXT_USE + CNIC_PRESENT) + + 2 * CNIC_SUPPORT(bp)) #define L2_ILT_LINES(bp) (DIV_ROUND_UP(BNX2X_L2_CID_COUNT(bp),\ ILT_PAGE_CIDS)) @@ -1495,9 +1492,6 @@ struct bnx2x { int dropless_fc; -#ifdef BCM_CNIC - u32 cnic_flags; -#define BNX2X_CNIC_FLAG_MAC_SET 1 void *t2; dma_addr_t t2_mapping; struct cnic_ops __rcu *cnic_ops; @@ -1518,7 +1512,6 @@ struct bnx2x { /* Start index of the "special" (CNIC related) L2 cleints */ u8 cnic_base_cl_id; -#endif int dmae_ready; /* used to synchronize dmae accesses */ @@ -1647,9 +1640,9 @@ struct bnx2x { /* Tx queues may be less or equal to Rx queues */ extern int num_queues; #define BNX2X_NUM_QUEUES(bp) (bp->num_queues) -#define BNX2X_NUM_ETH_QUEUES(bp) (BNX2X_NUM_QUEUES(bp) - NON_ETH_CONTEXT_USE) +#define BNX2X_NUM_ETH_QUEUES(bp) ((bp)->num_ethernet_queues) #define BNX2X_NUM_NON_CNIC_QUEUES(bp) (BNX2X_NUM_QUEUES(bp) - \ - NON_ETH_CONTEXT_USE) + (bp)->num_cnic_queues) #define BNX2X_NUM_RX_QUEUES(bp) BNX2X_NUM_QUEUES(bp) #define is_multi(bp) (BNX2X_NUM_QUEUES(bp) > 1) @@ -1689,6 +1682,13 @@ struct bnx2x_func_init_params { u16 spq_prod; /* valid iff FUNC_FLG_SPQ */ }; +#define for_each_cnic_queue(bp, var) \ + for ((var) = BNX2X_NUM_ETH_QUEUES(bp); (var) < BNX2X_NUM_QUEUES(bp); \ + (var)++) \ + if (skip_queue(bp, var)) \ + continue; \ + else + #define for_each_eth_queue(bp, var) \ for ((var) = 0; (var) < BNX2X_NUM_ETH_QUEUES(bp); (var)++) @@ -1702,6 +1702,22 @@ struct bnx2x_func_init_params { else /* Skip forwarding FP */ +#define for_each_valid_rx_queue(bp, var) \ + for ((var) = 0; \ + (var) < (CNIC_LOADED(bp) ? BNX2X_NUM_QUEUES(bp) : \ + BNX2X_NUM_ETH_QUEUES(bp)); \ + (var)++) \ + if (skip_rx_queue(bp, var)) \ + continue; \ + else + +#define for_each_rx_queue_cnic(bp, var) \ + for ((var) = BNX2X_NUM_ETH_QUEUES(bp); (var) < BNX2X_NUM_QUEUES(bp); \ + (var)++) \ + if (skip_rx_queue(bp, var)) \ + continue; \ + else + #define for_each_rx_queue(bp, var) \ for ((var) = 0; (var) < BNX2X_NUM_QUEUES(bp); (var)++) \ if (skip_rx_queue(bp, var)) \ @@ -1709,6 +1725,22 @@ struct bnx2x_func_init_params { else /* Skip OOO FP */ +#define for_each_valid_tx_queue(bp, var) \ + for ((var) = 0; \ + (var) < (CNIC_LOADED(bp) ? BNX2X_NUM_QUEUES(bp) : \ + BNX2X_NUM_ETH_QUEUES(bp)); \ + (var)++) \ + if (skip_tx_queue(bp, var)) \ + continue; \ + else + +#define for_each_tx_queue_cnic(bp, var) \ + for ((var) = BNX2X_NUM_ETH_QUEUES(bp); (var) < BNX2X_NUM_QUEUES(bp); \ + (var)++) \ + if (skip_tx_queue(bp, var)) \ + continue; \ + else + #define for_each_tx_queue(bp, var) \ for ((var) = 0; (var) < BNX2X_NUM_QUEUES(bp); (var)++) \ if (skip_tx_queue(bp, var)) \ @@ -2179,7 +2211,6 @@ void bnx2x_notify_link_changed(struct bnx2x *bp); #define BNX2X_MF_SD_PROTOCOL(bp) \ ((bp)->mf_config[BP_VN(bp)] & FUNC_MF_CFG_PROTOCOL_MASK) -#ifdef BCM_CNIC #define BNX2X_IS_MF_SD_PROTOCOL_ISCSI(bp) \ (BNX2X_MF_SD_PROTOCOL(bp) == FUNC_MF_CFG_PROTOCOL_ISCSI) @@ -2196,9 +2227,12 @@ void bnx2x_notify_link_changed(struct bnx2x *bp); #define IS_MF_STORAGE_SD(bp) (IS_MF_SD(bp) && \ (BNX2X_IS_MF_SD_PROTOCOL_ISCSI(bp) || \ BNX2X_IS_MF_SD_PROTOCOL_FCOE(bp))) -#else -#define IS_MF_FCOE_AFEX(bp) false -#endif +enum { + SWITCH_UPDATE, + AFEX_UPDATE, +}; + +#define NUM_MACS 8 #endif /* bnx2x.h */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 4833b6a9031..54d522da1aa 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -1152,6 +1152,25 @@ static void bnx2x_free_tpa_pool(struct bnx2x *bp, } } +void bnx2x_init_rx_rings_cnic(struct bnx2x *bp) +{ + int j; + + for_each_rx_queue_cnic(bp, j) { + struct bnx2x_fastpath *fp = &bp->fp[j]; + + fp->rx_bd_cons = 0; + + /* Activate BD ring */ + /* Warning! + * this will generate an interrupt (to the TSTORM) + * must only be done after chip is initialized + */ + bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod, + fp->rx_sge_prod); + } +} + void bnx2x_init_rx_rings(struct bnx2x *bp) { int func = BP_FUNC(bp); @@ -1159,7 +1178,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) int i, j; /* Allocate TPA resources */ - for_each_rx_queue(bp, j) { + for_each_eth_queue(bp, j) { struct bnx2x_fastpath *fp = &bp->fp[j]; DP(NETIF_MSG_IFUP, @@ -1217,7 +1236,7 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) } } - for_each_rx_queue(bp, j) { + for_each_eth_queue(bp, j) { struct bnx2x_fastpath *fp = &bp->fp[j]; fp->rx_bd_cons = 0; @@ -1244,29 +1263,45 @@ void bnx2x_init_rx_rings(struct bnx2x *bp) } } -static void bnx2x_free_tx_skbs(struct bnx2x *bp) +static void bnx2x_free_tx_skbs_queue(struct bnx2x_fastpath *fp) { - int i; u8 cos; + struct bnx2x *bp = fp->bp; - for_each_tx_queue(bp, i) { - struct bnx2x_fastpath *fp = &bp->fp[i]; - for_each_cos_in_tx_queue(fp, cos) { - struct bnx2x_fp_txdata *txdata = fp->txdata_ptr[cos]; - unsigned pkts_compl = 0, bytes_compl = 0; + for_each_cos_in_tx_queue(fp, cos) { + struct bnx2x_fp_txdata *txdata = fp->txdata_ptr[cos]; + unsigned pkts_compl = 0, bytes_compl = 0; - u16 sw_prod = txdata->tx_pkt_prod; - u16 sw_cons = txdata->tx_pkt_cons; + u16 sw_prod = txdata->tx_pkt_prod; + u16 sw_cons = txdata->tx_pkt_cons; - while (sw_cons != sw_prod) { - bnx2x_free_tx_pkt(bp, txdata, TX_BD(sw_cons), - &pkts_compl, &bytes_compl); - sw_cons++; - } - netdev_tx_reset_queue( - netdev_get_tx_queue(bp->dev, - txdata->txq_index)); + while (sw_cons != sw_prod) { + bnx2x_free_tx_pkt(bp, txdata, TX_BD(sw_cons), + &pkts_compl, &bytes_compl); + sw_cons++; } + + netdev_tx_reset_queue( + netdev_get_tx_queue(bp->dev, + txdata->txq_index)); + } +} + +static void bnx2x_free_tx_skbs_cnic(struct bnx2x *bp) +{ + int i; + + for_each_tx_queue_cnic(bp, i) { + bnx2x_free_tx_skbs_queue(&bp->fp[i]); + } +} + +static void bnx2x_free_tx_skbs(struct bnx2x *bp) +{ + int i; + + for_each_eth_queue(bp, i) { + bnx2x_free_tx_skbs_queue(&bp->fp[i]); } } @@ -1294,11 +1329,20 @@ static void bnx2x_free_rx_bds(struct bnx2x_fastpath *fp) } } +static void bnx2x_free_rx_skbs_cnic(struct bnx2x *bp) +{ + int j; + + for_each_rx_queue_cnic(bp, j) { + bnx2x_free_rx_bds(&bp->fp[j]); + } +} + static void bnx2x_free_rx_skbs(struct bnx2x *bp) { int j; - for_each_rx_queue(bp, j) { + for_each_eth_queue(bp, j) { struct bnx2x_fastpath *fp = &bp->fp[j]; bnx2x_free_rx_bds(fp); @@ -1308,6 +1352,12 @@ static void bnx2x_free_rx_skbs(struct bnx2x *bp) } } +void bnx2x_free_skbs_cnic(struct bnx2x *bp) +{ + bnx2x_free_tx_skbs_cnic(bp); + bnx2x_free_rx_skbs_cnic(bp); +} + void bnx2x_free_skbs(struct bnx2x *bp) { bnx2x_free_tx_skbs(bp); @@ -1347,11 +1397,12 @@ static void bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs) DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n", bp->msix_table[offset].vector); offset++; -#ifdef BCM_CNIC - if (nvecs == offset) - return; - offset++; -#endif + + if (CNIC_SUPPORT(bp)) { + if (nvecs == offset) + return; + offset++; + } for_each_eth_queue(bp, i) { if (nvecs == offset) @@ -1368,7 +1419,7 @@ void bnx2x_free_irq(struct bnx2x *bp) if (bp->flags & USING_MSIX_FLAG && !(bp->flags & USING_SINGLE_MSIX_FLAG)) bnx2x_free_msix_irqs(bp, BNX2X_NUM_ETH_QUEUES(bp) + - CNIC_PRESENT + 1); + CNIC_SUPPORT(bp) + 1); else free_irq(bp->dev->irq, bp->dev); } @@ -1382,12 +1433,14 @@ int bnx2x_enable_msix(struct bnx2x *bp) bp->msix_table[0].entry); msix_vec++; -#ifdef BCM_CNIC - bp->msix_table[msix_vec].entry = msix_vec; - BNX2X_DEV_INFO("msix_table[%d].entry = %d (CNIC)\n", - bp->msix_table[msix_vec].entry, bp->msix_table[msix_vec].entry); - msix_vec++; -#endif + /* Cnic requires an msix vector for itself */ + if (CNIC_SUPPORT(bp)) { + bp->msix_table[msix_vec].entry = msix_vec; + BNX2X_DEV_INFO("msix_table[%d].entry = %d (CNIC)\n", + msix_vec, bp->msix_table[msix_vec].entry); + msix_vec++; + } + /* We need separate vectors for ETH queues only (not FCoE) */ for_each_eth_queue(bp, i) { bp->msix_table[msix_vec].entry = msix_vec; @@ -1396,7 +1449,7 @@ int bnx2x_enable_msix(struct bnx2x *bp) msix_vec++; } - req_cnt = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_PRESENT + 1; + req_cnt = BNX2X_NUM_ETH_QUEUES(bp) + CNIC_SUPPORT(bp) + 1; rc = pci_enable_msix(bp->pdev, &bp->msix_table[0], req_cnt); @@ -1404,7 +1457,7 @@ int bnx2x_enable_msix(struct bnx2x *bp) * reconfigure number of tx/rx queues according to available * MSI-X vectors */ - if (rc >= BNX2X_MIN_MSIX_VEC_CNT) { + if (rc >= BNX2X_MIN_MSIX_VEC_CNT(bp)) { /* how less vectors we will have? */ int diff = req_cnt - rc; @@ -1419,7 +1472,8 @@ int bnx2x_enable_msix(struct bnx2x *bp) /* * decrease number of queues by number of unallocated entries */ - bp->num_queues -= diff; + bp->num_ethernet_queues -= diff; + bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues; BNX2X_DEV_INFO("New queue configuration set: %d\n", bp->num_queues); @@ -1435,6 +1489,9 @@ int bnx2x_enable_msix(struct bnx2x *bp) BNX2X_DEV_INFO("Using single MSI-X vector\n"); bp->flags |= USING_SINGLE_MSIX_FLAG; + BNX2X_DEV_INFO("set number of queues to 1\n"); + bp->num_ethernet_queues = 1; + bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues; } else if (rc < 0) { BNX2X_DEV_INFO("MSI-X is not attainable rc %d\n", rc); goto no_msix; @@ -1464,9 +1521,9 @@ static int bnx2x_req_msix_irqs(struct bnx2x *bp) return -EBUSY; } -#ifdef BCM_CNIC - offset++; -#endif + if (CNIC_SUPPORT(bp)) + offset++; + for_each_eth_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; snprintf(fp->name, sizeof(fp->name), "%s-fp-%d", @@ -1485,7 +1542,7 @@ static int bnx2x_req_msix_irqs(struct bnx2x *bp) } i = BNX2X_NUM_ETH_QUEUES(bp); - offset = 1 + CNIC_PRESENT; + offset = 1 + CNIC_SUPPORT(bp); netdev_info(bp->dev, "using MSI-X IRQs: sp %d fp[%d] %d ... fp[%d] %d\n", bp->msix_table[0].vector, 0, bp->msix_table[offset].vector, @@ -1556,19 +1613,35 @@ static int bnx2x_setup_irqs(struct bnx2x *bp) return 0; } +static void bnx2x_napi_enable_cnic(struct bnx2x *bp) +{ + int i; + + for_each_rx_queue_cnic(bp, i) + napi_enable(&bnx2x_fp(bp, i, napi)); +} + static void bnx2x_napi_enable(struct bnx2x *bp) { int i; - for_each_rx_queue(bp, i) + for_each_eth_queue(bp, i) napi_enable(&bnx2x_fp(bp, i, napi)); } +static void bnx2x_napi_disable_cnic(struct bnx2x *bp) +{ + int i; + + for_each_rx_queue_cnic(bp, i) + napi_disable(&bnx2x_fp(bp, i, napi)); +} + static void bnx2x_napi_disable(struct bnx2x *bp) { int i; - for_each_rx_queue(bp, i) + for_each_eth_queue(bp, i) napi_disable(&bnx2x_fp(bp, i, napi)); } @@ -1576,6 +1649,8 @@ void bnx2x_netif_start(struct bnx2x *bp) { if (netif_running(bp->dev)) { bnx2x_napi_enable(bp); + if (CNIC_LOADED(bp)) + bnx2x_napi_enable_cnic(bp); bnx2x_int_enable(bp); if (bp->state == BNX2X_STATE_OPEN) netif_tx_wake_all_queues(bp->dev); @@ -1586,14 +1661,15 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw) { bnx2x_int_disable_sync(bp, disable_hw); bnx2x_napi_disable(bp); + if (CNIC_LOADED(bp)) + bnx2x_napi_disable_cnic(bp); } u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb) { struct bnx2x *bp = netdev_priv(dev); -#ifdef BCM_CNIC - if (!NO_FCOE(bp)) { + if (CNIC_LOADED(bp) && !NO_FCOE(bp)) { struct ethhdr *hdr = (struct ethhdr *)skb->data; u16 ether_type = ntohs(hdr->h_proto); @@ -1609,7 +1685,7 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb) if ((ether_type == ETH_P_FCOE) || (ether_type == ETH_P_FIP)) return bnx2x_fcoe_tx(bp, txq_index); } -#endif + /* select a non-FCoE queue */ return __skb_tx_hash(dev, skb, BNX2X_NUM_ETH_QUEUES(bp)); } @@ -1618,15 +1694,15 @@ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb) void bnx2x_set_num_queues(struct bnx2x *bp) { /* RSS queues */ - bp->num_queues = bnx2x_calc_num_queues(bp); + bp->num_ethernet_queues = bnx2x_calc_num_queues(bp); -#ifdef BCM_CNIC /* override in STORAGE SD modes */ if (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp)) - bp->num_queues = 1; -#endif + bp->num_ethernet_queues = 1; + /* Add special queues */ - bp->num_queues += NON_ETH_CONTEXT_USE; + bp->num_cnic_queues = CNIC_SUPPORT(bp); /* For FCOE */ + bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues; BNX2X_DEV_INFO("set number of queues to %d\n", bp->num_queues); } @@ -1653,20 +1729,18 @@ void bnx2x_set_num_queues(struct bnx2x *bp) * bnx2x_setup_tc() takes care of the proper TC mappings so that __skb_tx_hash() * will return a proper Tx index if TC is enabled (netdev->num_tc > 0). */ -static int bnx2x_set_real_num_queues(struct bnx2x *bp) +static int bnx2x_set_real_num_queues(struct bnx2x *bp, int include_cnic) { int rc, tx, rx; tx = BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos; - rx = BNX2X_NUM_QUEUES(bp) - NON_ETH_CONTEXT_USE; + rx = BNX2X_NUM_ETH_QUEUES(bp); /* account for fcoe queue */ -#ifdef BCM_CNIC - if (!NO_FCOE(bp)) { - rx += FCOE_PRESENT; - tx += FCOE_PRESENT; + if (include_cnic && !NO_FCOE(bp)) { + rx++; + tx++; } -#endif rc = netif_set_real_num_tx_queues(bp->dev, tx); if (rc) { @@ -1859,14 +1933,26 @@ static void bnx2x_squeeze_objects(struct bnx2x *bp) (bp)->state = BNX2X_STATE_ERROR; \ goto label; \ } while (0) -#else + +#define LOAD_ERROR_EXIT_CNIC(bp, label) \ + do { \ + bp->cnic_loaded = false; \ + goto label; \ + } while (0) +#else /*BNX2X_STOP_ON_ERROR*/ #define LOAD_ERROR_EXIT(bp, label) \ do { \ (bp)->state = BNX2X_STATE_ERROR; \ (bp)->panic = 1; \ return -EBUSY; \ } while (0) -#endif +#define LOAD_ERROR_EXIT_CNIC(bp, label) \ + do { \ + bp->cnic_loaded = false; \ + (bp)->panic = 1; \ + return -EBUSY; \ + } while (0) +#endif /*BNX2X_STOP_ON_ERROR*/ bool bnx2x_test_firmware_version(struct bnx2x *bp, bool is_err) { @@ -1959,10 +2045,8 @@ static void bnx2x_bz_fp(struct bnx2x *bp, int index) fp->max_cos = 1; /* Init txdata pointers */ -#ifdef BCM_CNIC if (IS_FCOE_FP(fp)) fp->txdata_ptr[0] = &bp->bnx2x_txq[FCOE_TXQ_IDX(bp)]; -#endif if (IS_ETH_FP(fp)) for_each_cos_in_tx_queue(fp, cos) fp->txdata_ptr[cos] = &bp->bnx2x_txq[cos * @@ -1980,11 +2064,95 @@ static void bnx2x_bz_fp(struct bnx2x *bp, int index) else if (bp->flags & GRO_ENABLE_FLAG) fp->mode = TPA_MODE_GRO; -#ifdef BCM_CNIC /* We don't want TPA on an FCoE L2 ring */ if (IS_FCOE_FP(fp)) fp->disable_tpa = 1; -#endif +} + +int bnx2x_load_cnic(struct bnx2x *bp) +{ + int i, rc, port = BP_PORT(bp); + + DP(NETIF_MSG_IFUP, "Starting CNIC-related load\n"); + + mutex_init(&bp->cnic_mutex); + + rc = bnx2x_alloc_mem_cnic(bp); + if (rc) { + BNX2X_ERR("Unable to allocate bp memory for cnic\n"); + LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic0); + } + + rc = bnx2x_alloc_fp_mem_cnic(bp); + if (rc) { + BNX2X_ERR("Unable to allocate memory for cnic fps\n"); + LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic0); + } + + /* Update the number of queues with the cnic queues */ + rc = bnx2x_set_real_num_queues(bp, 1); + if (rc) { + BNX2X_ERR("Unable to set real_num_queues including cnic\n"); + LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic0); + } + + /* Add all CNIC NAPI objects */ + bnx2x_add_all_napi_cnic(bp); + DP(NETIF_MSG_IFUP, "cnic napi added\n"); + bnx2x_napi_enable_cnic(bp); + + rc = bnx2x_init_hw_func_cnic(bp); + if (rc) + LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic1); + + bnx2x_nic_init_cnic(bp); + + /* Enable Timer scan */ + REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1); + + for_each_cnic_queue(bp, i) { + rc = bnx2x_setup_queue(bp, &bp->fp[i], 0); + if (rc) { + BNX2X_ERR("Queue setup failed\n"); + LOAD_ERROR_EXIT(bp, load_error_cnic2); + } + } + + /* Initialize Rx filter. */ + netif_addr_lock_bh(bp->dev); + bnx2x_set_rx_mode(bp->dev); + netif_addr_unlock_bh(bp->dev); + + /* re-read iscsi info */ + bnx2x_get_iscsi_info(bp); + bnx2x_setup_cnic_irq_info(bp); + bnx2x_setup_cnic_info(bp); + bp->cnic_loaded = true; + if (bp->state == BNX2X_STATE_OPEN) + bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD); + + + DP(NETIF_MSG_IFUP, "Ending successfully CNIC-related load\n"); + + return 0; + +#ifndef BNX2X_STOP_ON_ERROR +load_error_cnic2: + /* Disable Timer scan */ + REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0); + +load_error_cnic1: + bnx2x_napi_disable_cnic(bp); + /* Update the number of queues without the cnic queues */ + rc = bnx2x_set_real_num_queues(bp, 0); + if (rc) + BNX2X_ERR("Unable to set real_num_queues not including cnic\n"); +load_error_cnic0: + BNX2X_ERR("CNIC-related load failed\n"); + bnx2x_free_fp_mem_cnic(bp); + bnx2x_free_mem_cnic(bp); + return rc; +#endif /* ! BNX2X_STOP_ON_ERROR */ } @@ -1995,6 +2163,10 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) u32 load_code; int i, rc; + DP(NETIF_MSG_IFUP, "Starting NIC load\n"); + DP(NETIF_MSG_IFUP, + "CNIC is %s\n", CNIC_ENABLED(bp) ? "enabled" : "disabled"); + #ifdef BNX2X_STOP_ON_ERROR if (unlikely(bp->panic)) { BNX2X_ERR("Can't load NIC when there is panic\n"); @@ -2022,9 +2194,11 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) DP(NETIF_MSG_IFUP, "num queues: %d", bp->num_queues); for_each_queue(bp, i) bnx2x_bz_fp(bp, i); - memset(bp->bnx2x_txq, 0, bp->bnx2x_txq_size * - sizeof(struct bnx2x_fp_txdata)); + memset(bp->bnx2x_txq, 0, (BNX2X_MAX_RSS_COUNT(bp) * BNX2X_MULTI_TX_COS + + bp->num_cnic_queues) * + sizeof(struct bnx2x_fp_txdata)); + bp->fcoe_init = false; /* Set the receive queues buffer size */ bnx2x_set_rx_buf_size(bp); @@ -2034,9 +2208,9 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) /* As long as bnx2x_alloc_mem() may possibly update * bp->num_queues, bnx2x_set_real_num_queues() should always - * come after it. + * come after it. At this stage cnic queues are not counted. */ - rc = bnx2x_set_real_num_queues(bp); + rc = bnx2x_set_real_num_queues(bp, 0); if (rc) { BNX2X_ERR("Unable to set real_num_queues\n"); LOAD_ERROR_EXIT(bp, load_error0); @@ -2050,6 +2224,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) /* Add all NAPI objects */ bnx2x_add_all_napi(bp); + DP(NETIF_MSG_IFUP, "napi added\n"); bnx2x_napi_enable(bp); /* set pf load just before approaching the MCP */ @@ -2191,23 +2366,18 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) LOAD_ERROR_EXIT(bp, load_error3); } -#ifdef BCM_CNIC - /* Enable Timer scan */ - REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 1); -#endif - - for_each_nondefault_queue(bp, i) { + for_each_nondefault_eth_queue(bp, i) { rc = bnx2x_setup_queue(bp, &bp->fp[i], 0); if (rc) { BNX2X_ERR("Queue setup failed\n"); - LOAD_ERROR_EXIT(bp, load_error4); + LOAD_ERROR_EXIT(bp, load_error3); } } rc = bnx2x_init_rss_pf(bp); if (rc) { BNX2X_ERR("PF RSS init failed\n"); - LOAD_ERROR_EXIT(bp, load_error4); + LOAD_ERROR_EXIT(bp, load_error3); } /* Now when Clients are configured we are ready to work */ @@ -2217,7 +2387,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) rc = bnx2x_set_eth_mac(bp, true); if (rc) { BNX2X_ERR("Setting Ethernet MAC failed\n"); - LOAD_ERROR_EXIT(bp, load_error4); + LOAD_ERROR_EXIT(bp, load_error3); } if (bp->pending_max) { @@ -2264,14 +2434,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) /* start the timer */ mod_timer(&bp->timer, jiffies + bp->current_interval); -#ifdef BCM_CNIC - /* re-read iscsi info */ - bnx2x_get_iscsi_info(bp); - bnx2x_setup_cnic_irq_info(bp); - bnx2x_setup_cnic_info(bp); - if (bp->state == BNX2X_STATE_OPEN) - bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD); -#endif + if (CNIC_ENABLED(bp)) + bnx2x_load_cnic(bp); /* mark driver is loaded in shmem2 */ if (SHMEM2_HAS(bp, drv_capabilities_flag)) { @@ -2293,14 +2457,11 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) if (bp->port.pmf && (bp->state != BNX2X_STATE_DIAG)) bnx2x_dcbx_init(bp, false); + DP(NETIF_MSG_IFUP, "Ending successfully NIC load\n"); + return 0; #ifndef BNX2X_STOP_ON_ERROR -load_error4: -#ifdef BCM_CNIC - /* Disable Timer scan */ - REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0); -#endif load_error3: bnx2x_int_disable_sync(bp, 1); @@ -2338,6 +2499,8 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) int i; bool global = false; + DP(NETIF_MSG_IFUP, "Starting NIC unload\n"); + /* mark driver is unloaded in shmem2 */ if (SHMEM2_HAS(bp, drv_capabilities_flag)) { u32 val; @@ -2373,14 +2536,13 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT; smp_mb(); + if (CNIC_LOADED(bp)) + bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD); + /* Stop Tx */ bnx2x_tx_disable(bp); netdev_reset_tc(bp->dev); -#ifdef BCM_CNIC - bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD); -#endif - bp->rx_mode = BNX2X_RX_MODE_NONE; del_timer_sync(&bp->timer); @@ -2414,7 +2576,8 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) bnx2x_netif_stop(bp, 1); /* Delete all NAPI objects */ bnx2x_del_all_napi(bp); - + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); /* Release IRQs */ bnx2x_free_irq(bp); @@ -2435,12 +2598,19 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) /* Free SKBs, SGEs, TPA pool and driver internals */ bnx2x_free_skbs(bp); + if (CNIC_LOADED(bp)) + bnx2x_free_skbs_cnic(bp); for_each_rx_queue(bp, i) bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE); + if (CNIC_LOADED(bp)) { + bnx2x_free_fp_mem_cnic(bp); + bnx2x_free_mem_cnic(bp); + } bnx2x_free_mem(bp); bp->state = BNX2X_STATE_CLOSED; + bp->cnic_loaded = false; /* Check if there are pending parity attentions. If there are - set * RECOVERY_IN_PROGRESS. @@ -2460,6 +2630,8 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) if (!bnx2x_clear_pf_load(bp) && bnx2x_reset_is_done(bp, BP_PATH(bp))) bnx2x_disable_close_the_gate(bp); + DP(NETIF_MSG_IFUP, "Ending NIC unload\n"); + return 0; } @@ -2550,7 +2722,7 @@ int bnx2x_poll(struct napi_struct *napi, int budget) /* Fall out from the NAPI loop if needed */ if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { -#ifdef BCM_CNIC + /* No need to update SB for FCoE L2 ring as long as * it's connected to the default SB and the SB * has been updated when NAPI was scheduled. @@ -2559,8 +2731,6 @@ int bnx2x_poll(struct napi_struct *napi, int budget) napi_complete(napi); break; } -#endif - bnx2x_update_fpsb_idx(fp); /* bnx2x_has_rx_work() reads the status block, * thus we need to ensure that status block indices @@ -2940,7 +3110,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) txq_index = skb_get_queue_mapping(skb); txq = netdev_get_tx_queue(dev, txq_index); - BUG_ON(txq_index >= MAX_ETH_TXQ_IDX(bp) + FCOE_PRESENT); + BUG_ON(txq_index >= MAX_ETH_TXQ_IDX(bp) + (CNIC_LOADED(bp) ? 1 : 0)); txdata = &bp->bnx2x_txq[txq_index]; @@ -3339,13 +3509,11 @@ int bnx2x_change_mac_addr(struct net_device *dev, void *p) return -EINVAL; } -#ifdef BCM_CNIC if ((IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp)) && !is_zero_ether_addr(addr->sa_data)) { BNX2X_ERR("Can't configure non-zero address on iSCSI or FCoE functions in MF-SD mode\n"); return -EINVAL; } -#endif if (netif_running(dev)) { rc = bnx2x_set_eth_mac(bp, false); @@ -3369,13 +3537,11 @@ static void bnx2x_free_fp_mem_at(struct bnx2x *bp, int fp_index) u8 cos; /* Common */ -#ifdef BCM_CNIC + if (IS_FCOE_IDX(fp_index)) { memset(sb, 0, sizeof(union host_hc_status_block)); fp->status_blk_mapping = 0; - } else { -#endif /* status blocks */ if (!CHIP_IS_E1x(bp)) BNX2X_PCI_FREE(sb->e2_sb, @@ -3387,9 +3553,8 @@ static void bnx2x_free_fp_mem_at(struct bnx2x *bp, int fp_index) bnx2x_fp(bp, fp_index, status_blk_mapping), sizeof(struct host_hc_status_block_e1x)); -#ifdef BCM_CNIC } -#endif + /* Rx */ if (!skip_rx_queue(bp, fp_index)) { bnx2x_free_rx_bds(fp); @@ -3431,10 +3596,17 @@ static void bnx2x_free_fp_mem_at(struct bnx2x *bp, int fp_index) /* end of fastpath */ } +void bnx2x_free_fp_mem_cnic(struct bnx2x *bp) +{ + int i; + for_each_cnic_queue(bp, i) + bnx2x_free_fp_mem_at(bp, i); +} + void bnx2x_free_fp_mem(struct bnx2x *bp) { int i; - for_each_queue(bp, i) + for_each_eth_queue(bp, i) bnx2x_free_fp_mem_at(bp, i); } @@ -3519,14 +3691,11 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) u8 cos; int rx_ring_size = 0; -#ifdef BCM_CNIC if (!bp->rx_ring_size && (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp))) { rx_ring_size = MIN_RX_SIZE_NONTPA; bp->rx_ring_size = rx_ring_size; - } else -#endif - if (!bp->rx_ring_size) { + } else if (!bp->rx_ring_size) { rx_ring_size = MAX_RX_AVAIL/BNX2X_NUM_RX_QUEUES(bp); if (CHIP_IS_E3(bp)) { @@ -3550,9 +3719,8 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) /* Common */ sb = &bnx2x_fp(bp, index, status_blk); -#ifdef BCM_CNIC + if (!IS_FCOE_IDX(index)) { -#endif /* status blocks */ if (!CHIP_IS_E1x(bp)) BNX2X_PCI_ALLOC(sb->e2_sb, @@ -3562,9 +3730,7 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) BNX2X_PCI_ALLOC(sb->e1x_sb, &bnx2x_fp(bp, index, status_blk_mapping), sizeof(struct host_hc_status_block_e1x)); -#ifdef BCM_CNIC } -#endif /* FCoE Queue uses Default SB and doesn't ACK the SB, thus no need to * set shortcuts for it. @@ -3641,31 +3807,31 @@ alloc_mem_err: return 0; } +int bnx2x_alloc_fp_mem_cnic(struct bnx2x *bp) +{ + if (!NO_FCOE(bp)) + /* FCoE */ + if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX(bp))) + /* we will fail load process instead of mark + * NO_FCOE_FLAG + */ + return -ENOMEM; + + return 0; +} + int bnx2x_alloc_fp_mem(struct bnx2x *bp) { int i; - /** - * 1. Allocate FP for leading - fatal if error - * 2. {CNIC} Allocate FCoE FP - fatal if error - * 3. {CNIC} Allocate OOO + FWD - disable OOO if error - * 4. Allocate RSS - fix number of queues if error + /* 1. Allocate FP for leading - fatal if error + * 2. Allocate RSS - fix number of queues if error */ /* leading */ if (bnx2x_alloc_fp_mem_at(bp, 0)) return -ENOMEM; -#ifdef BCM_CNIC - if (!NO_FCOE(bp)) - /* FCoE */ - if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX(bp))) - /* we will fail load process instead of mark - * NO_FCOE_FLAG - */ - return -ENOMEM; -#endif - /* RSS */ for_each_nondefault_eth_queue(bp, i) if (bnx2x_alloc_fp_mem_at(bp, i)) @@ -3676,17 +3842,17 @@ int bnx2x_alloc_fp_mem(struct bnx2x *bp) int delta = BNX2X_NUM_ETH_QUEUES(bp) - i; WARN_ON(delta < 0); -#ifdef BCM_CNIC - /** - * move non eth FPs next to last eth FP - * must be done in that order - * FCOE_IDX < FWD_IDX < OOO_IDX - */ + if (CNIC_SUPPORT(bp)) + /* move non eth FPs next to last eth FP + * must be done in that order + * FCOE_IDX < FWD_IDX < OOO_IDX + */ - /* move FCoE fp even NO_FCOE_FLAG is on */ - bnx2x_move_fp(bp, FCOE_IDX(bp), FCOE_IDX(bp) - delta); -#endif - bp->num_queues -= delta; + /* move FCoE fp even NO_FCOE_FLAG is on */ + bnx2x_move_fp(bp, FCOE_IDX(bp), FCOE_IDX(bp) - delta); + bp->num_ethernet_queues -= delta; + bp->num_queues = bp->num_ethernet_queues + + bp->num_cnic_queues; BNX2X_ERR("Adjusted num of queues from %d to %d\n", bp->num_queues + delta, bp->num_queues); } @@ -3711,7 +3877,7 @@ int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp) struct msix_entry *tbl; struct bnx2x_ilt *ilt; int msix_table_size = 0; - int fp_array_size; + int fp_array_size, txq_array_size; int i; /* @@ -3721,7 +3887,7 @@ int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp) msix_table_size = bp->igu_sb_cnt + 1; /* fp array: RSS plus CNIC related L2 queues */ - fp_array_size = BNX2X_MAX_RSS_COUNT(bp) + NON_ETH_CONTEXT_USE; + fp_array_size = BNX2X_MAX_RSS_COUNT(bp) + CNIC_SUPPORT(bp); BNX2X_DEV_INFO("fp_array_size %d", fp_array_size); fp = kcalloc(fp_array_size, sizeof(*fp), GFP_KERNEL); @@ -3750,12 +3916,12 @@ int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp) goto alloc_err; /* Allocate memory for the transmission queues array */ - bp->bnx2x_txq_size = BNX2X_MAX_RSS_COUNT(bp) * BNX2X_MULTI_TX_COS; -#ifdef BCM_CNIC - bp->bnx2x_txq_size++; -#endif - bp->bnx2x_txq = kcalloc(bp->bnx2x_txq_size, - sizeof(struct bnx2x_fp_txdata), GFP_KERNEL); + txq_array_size = + BNX2X_MAX_RSS_COUNT(bp) * BNX2X_MULTI_TX_COS + CNIC_SUPPORT(bp); + BNX2X_DEV_INFO("txq_array_size %d", txq_array_size); + + bp->bnx2x_txq = kcalloc(txq_array_size, sizeof(struct bnx2x_fp_txdata), + GFP_KERNEL); if (!bp->bnx2x_txq) goto alloc_err; @@ -3838,7 +4004,7 @@ int bnx2x_get_link_cfg_idx(struct bnx2x *bp) return LINK_CONFIG_IDX(sel_phy_idx); } -#if defined(NETDEV_FCOE_WWNN) && defined(BCM_CNIC) +#ifdef NETDEV_FCOE_WWNN int bnx2x_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type) { struct bnx2x *bp = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 9c5ea6c5b4c..ad280740b13 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -238,7 +238,6 @@ irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance); * @dev_instance: private instance */ irqreturn_t bnx2x_interrupt(int irq, void *dev_instance); -#ifdef BCM_CNIC /** * bnx2x_cnic_notify - send command to cnic driver @@ -262,8 +261,6 @@ void bnx2x_setup_cnic_irq_info(struct bnx2x *bp); */ void bnx2x_setup_cnic_info(struct bnx2x *bp); -#endif - /** * bnx2x_int_enable - enable HW interrupts. * @@ -283,7 +280,7 @@ void bnx2x_int_enable(struct bnx2x *bp); void bnx2x_int_disable_sync(struct bnx2x *bp, int disable_hw); /** - * bnx2x_nic_init - init driver internals. + * bnx2x_nic_init_cnic - init driver internals for cnic. * * @bp: driver handle * @load_code: COMMON, PORT or FUNCTION @@ -293,9 +290,26 @@ void bnx2x_int_disable_sync(struct bnx2x *bp, int disable_hw); * - status blocks * - etc. */ -void bnx2x_nic_init(struct bnx2x *bp, u32 load_code); +void bnx2x_nic_init_cnic(struct bnx2x *bp); /** + * bnx2x_nic_init - init driver internals. + * + * @bp: driver handle + * + * Initializes: + * - rings + * - status blocks + * - etc. + */ +void bnx2x_nic_init(struct bnx2x *bp, u32 load_code); +/** + * bnx2x_alloc_mem_cnic - allocate driver's memory for cnic. + * + * @bp: driver handle + */ +int bnx2x_alloc_mem_cnic(struct bnx2x *bp); +/** * bnx2x_alloc_mem - allocate driver's memory. * * @bp: driver handle @@ -303,6 +317,12 @@ void bnx2x_nic_init(struct bnx2x *bp, u32 load_code); int bnx2x_alloc_mem(struct bnx2x *bp); /** + * bnx2x_free_mem_cnic - release driver's memory for cnic. + * + * @bp: driver handle + */ +void bnx2x_free_mem_cnic(struct bnx2x *bp); +/** * bnx2x_free_mem - release driver's memory. * * @bp: driver handle @@ -407,6 +427,7 @@ bool bnx2x_reset_is_done(struct bnx2x *bp, int engine); void bnx2x_set_reset_in_progress(struct bnx2x *bp); void bnx2x_set_reset_global(struct bnx2x *bp); void bnx2x_disable_close_the_gate(struct bnx2x *bp); +int bnx2x_init_hw_func_cnic(struct bnx2x *bp); /** * bnx2x_sp_event - handle ramrods completion. @@ -424,6 +445,14 @@ void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe); void bnx2x_ilt_set_info(struct bnx2x *bp); /** + * bnx2x_ilt_set_cnic_info - prepare ILT configurations for SRC + * and TM. + * + * @bp: driver handle + */ +void bnx2x_ilt_set_info_cnic(struct bnx2x *bp); + +/** * bnx2x_dcbx_init - initialize dcbx protocol. * * @bp: driver handle @@ -491,12 +520,17 @@ int bnx2x_resume(struct pci_dev *pdev); /* Release IRQ vectors */ void bnx2x_free_irq(struct bnx2x *bp); +void bnx2x_free_fp_mem_cnic(struct bnx2x *bp); void bnx2x_free_fp_mem(struct bnx2x *bp); +int bnx2x_alloc_fp_mem_cnic(struct bnx2x *bp); int bnx2x_alloc_fp_mem(struct bnx2x *bp); void bnx2x_init_rx_rings(struct bnx2x *bp); +void bnx2x_init_rx_rings_cnic(struct bnx2x *bp); +void bnx2x_free_skbs_cnic(struct bnx2x *bp); void bnx2x_free_skbs(struct bnx2x *bp); void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw); void bnx2x_netif_start(struct bnx2x *bp); +int bnx2x_load_cnic(struct bnx2x *bp); /** * bnx2x_enable_msix - set msix configuration. @@ -547,7 +581,7 @@ void bnx2x_free_mem_bp(struct bnx2x *bp); */ int bnx2x_change_mtu(struct net_device *dev, int new_mtu); -#if defined(NETDEV_FCOE_WWNN) && defined(BCM_CNIC) +#ifdef NETDEV_FCOE_WWNN /** * bnx2x_fcoe_get_wwn - return the requested WWN value for this port * @@ -793,23 +827,39 @@ static inline void bnx2x_free_rx_sge(struct bnx2x *bp, sge->addr_lo = 0; } -static inline void bnx2x_add_all_napi(struct bnx2x *bp) +static inline void bnx2x_add_all_napi_cnic(struct bnx2x *bp) { int i; - bp->num_napi_queues = bp->num_queues; + /* Add NAPI objects */ + for_each_rx_queue_cnic(bp, i) + netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), + bnx2x_poll, BNX2X_NAPI_WEIGHT); +} + +static inline void bnx2x_add_all_napi(struct bnx2x *bp) +{ + int i; /* Add NAPI objects */ - for_each_rx_queue(bp, i) + for_each_eth_queue(bp, i) netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, BNX2X_NAPI_WEIGHT); } +static inline void bnx2x_del_all_napi_cnic(struct bnx2x *bp) +{ + int i; + + for_each_rx_queue_cnic(bp, i) + netif_napi_del(&bnx2x_fp(bp, i, napi)); +} + static inline void bnx2x_del_all_napi(struct bnx2x *bp) { int i; - for_each_rx_queue(bp, i) + for_each_eth_queue(bp, i) netif_napi_del(&bnx2x_fp(bp, i, napi)); } @@ -979,11 +1029,9 @@ static inline u8 bnx2x_stats_id(struct bnx2x_fastpath *fp) { struct bnx2x *bp = fp->bp; if (!CHIP_IS_E1x(bp)) { -#ifdef BCM_CNIC /* there are special statistics counters for FCoE 136..140 */ if (IS_FCOE_FP(fp)) return bp->cnic_base_cl_id + (bp->pf_num >> 1); -#endif return fp->cl_id; } return fp->cl_id + BP_PORT(bp) * FP_SB_MAX_E1x; @@ -1102,7 +1150,6 @@ static inline void bnx2x_init_txdata(struct bnx2x *bp, txdata->cid, txdata->txq_index); } -#ifdef BCM_CNIC static inline u8 bnx2x_cnic_eth_cl_id(struct bnx2x *bp, u8 cl_idx) { return bp->cnic_base_cl_id + cl_idx + @@ -1162,7 +1209,6 @@ static inline void bnx2x_init_fcoe_fp(struct bnx2x *bp) fp->index, bp, fp->status_blk.e2_sb, fp->cl_id, fp->fw_sb_id, fp->igu_sb_id); } -#endif static inline int bnx2x_clean_tx_queue(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata) @@ -1280,7 +1326,7 @@ static inline bool bnx2x_mtu_allows_gro(int mtu) */ return mtu <= SGE_PAGE_SIZE && (U_ETH_SGL_SIZE * fpp) <= MAX_SKB_FRAGS; } -#ifdef BCM_CNIC + /** * bnx2x_get_iscsi_info - update iSCSI params according to licensing info. * @@ -1288,7 +1334,6 @@ static inline bool bnx2x_mtu_allows_gro(int mtu) * */ void bnx2x_get_iscsi_info(struct bnx2x *bp); -#endif /** * bnx2x_link_sync_notify - send notification to other functions. @@ -1340,13 +1385,11 @@ static inline void bnx2x_update_drv_flags(struct bnx2x *bp, u32 flags, u32 set) static inline bool bnx2x_is_valid_ether_addr(struct bnx2x *bp, u8 *addr) { - if (is_valid_ether_addr(addr)) + if (is_valid_ether_addr(addr) || + (is_zero_ether_addr(addr) && + (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp)))) return true; -#ifdef BCM_CNIC - if (is_zero_ether_addr(addr) && - (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp))) - return true; -#endif + return false; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c index 2245c389540..cba4a16ab86 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c @@ -1908,10 +1908,10 @@ static void bnx2x_dcbnl_get_perm_hw_addr(struct net_device *netdev, /* first the HW mac address */ memcpy(perm_addr, netdev->dev_addr, netdev->addr_len); -#ifdef BCM_CNIC - /* second SAN address */ - memcpy(perm_addr+netdev->addr_len, bp->fip_mac, netdev->addr_len); -#endif + if (CNIC_LOADED(bp)) + /* second SAN address */ + memcpy(perm_addr+netdev->addr_len, bp->fip_mac, + netdev->addr_len); } static void bnx2x_dcbnl_set_pg_tccfg_tx(struct net_device *netdev, int prio, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 6e5bdd1a31d..c40c0253e10 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -2901,7 +2901,9 @@ static void bnx2x_get_channels(struct net_device *dev, static void bnx2x_change_num_queues(struct bnx2x *bp, int num_rss) { bnx2x_disable_msi(bp); - BNX2X_NUM_QUEUES(bp) = num_rss + NON_ETH_CONTEXT_USE; + bp->num_ethernet_queues = num_rss; + bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues; + BNX2X_DEV_INFO("set number of queues to %d\n", bp->num_queues); bnx2x_set_int_mode(bp); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index 18704929e64..7eaa74b78a5 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -4845,9 +4845,17 @@ struct vif_list_event_data { __le32 reserved2; }; -/* - * union for all event ring message types - */ +/* function update event data */ +struct function_update_event_data { + u8 echo; + u8 reserved; + __le16 reserved0; + __le32 reserved1; + __le32 reserved2; +}; + + +/* union for all event ring message types */ union event_data { struct vf_pf_event_data vf_pf_event; struct eth_event_data eth_event; @@ -4855,6 +4863,7 @@ union event_data { struct vf_flr_event_data vf_flr_event; struct malicious_vf_event_data malicious_vf_event; struct vif_list_event_data vif_list_event; + struct function_update_event_data function_update_event; }; @@ -4984,8 +4993,10 @@ struct function_update_data { u8 allowed_priorities; u8 network_cos_mode; u8 lb_mode_en; - u8 reserved0; - __le32 reserved1; + u8 tx_switch_suspend_change_flg; + u8 tx_switch_suspend; + u8 echo; + __le16 reserved1; }; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h index fe66d902dc6..d755acfe7a4 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h @@ -648,15 +648,25 @@ static int bnx2x_ilt_client_mem_op(struct bnx2x *bp, int cli_num, return rc; } +static int bnx2x_ilt_mem_op_cnic(struct bnx2x *bp, u8 memop) +{ + int rc = 0; + + if (CONFIGURE_NIC_MODE(bp)) + rc = bnx2x_ilt_client_mem_op(bp, ILT_CLIENT_SRC, memop); + if (!rc) + rc = bnx2x_ilt_client_mem_op(bp, ILT_CLIENT_TM, memop); + + return rc; +} + static int bnx2x_ilt_mem_op(struct bnx2x *bp, u8 memop) { int rc = bnx2x_ilt_client_mem_op(bp, ILT_CLIENT_CDU, memop); if (!rc) rc = bnx2x_ilt_client_mem_op(bp, ILT_CLIENT_QM, memop); - if (!rc) + if (!rc && CNIC_SUPPORT(bp) && !CONFIGURE_NIC_MODE(bp)) rc = bnx2x_ilt_client_mem_op(bp, ILT_CLIENT_SRC, memop); - if (!rc) - rc = bnx2x_ilt_client_mem_op(bp, ILT_CLIENT_TM, memop); return rc; } @@ -781,12 +791,19 @@ static void bnx2x_ilt_client_id_init_op(struct bnx2x *bp, bnx2x_ilt_client_init_op(bp, ilt_cli, initop); } +static void bnx2x_ilt_init_op_cnic(struct bnx2x *bp, u8 initop) +{ + if (CONFIGURE_NIC_MODE(bp)) + bnx2x_ilt_client_id_init_op(bp, ILT_CLIENT_SRC, initop); + bnx2x_ilt_client_id_init_op(bp, ILT_CLIENT_TM, initop); +} + static void bnx2x_ilt_init_op(struct bnx2x *bp, u8 initop) { bnx2x_ilt_client_id_init_op(bp, ILT_CLIENT_CDU, initop); bnx2x_ilt_client_id_init_op(bp, ILT_CLIENT_QM, initop); - bnx2x_ilt_client_id_init_op(bp, ILT_CLIENT_SRC, initop); - bnx2x_ilt_client_id_init_op(bp, ILT_CLIENT_TM, initop); + if (CNIC_SUPPORT(bp) && !CONFIGURE_NIC_MODE(bp)) + bnx2x_ilt_client_id_init_op(bp, ILT_CLIENT_SRC, initop); } static void bnx2x_ilt_init_client_psz(struct bnx2x *bp, int cli_num, @@ -890,7 +907,6 @@ static void bnx2x_qm_init_ptr_table(struct bnx2x *bp, int qm_cid_count, /**************************************************************************** * SRC initializations ****************************************************************************/ -#ifdef BCM_CNIC /* called during init func stage */ static void bnx2x_src_init_t2(struct bnx2x *bp, struct src_ent *t2, dma_addr_t t2_mapping, int src_cid_count) @@ -915,5 +931,4 @@ static void bnx2x_src_init_t2(struct bnx2x *bp, struct src_ent *t2, U64_HI((u64)t2_mapping + (src_cid_count-1) * sizeof(struct src_ent))); } -#endif #endif /* BNX2X_INIT_OPS_H */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index f6cfdc6cf20..cd002943fac 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -12066,7 +12066,7 @@ void bnx2x_init_xgxs_loopback(struct link_params *params, bnx2x_set_led(params, vars, LED_MODE_OPER, vars->line_speed); } -static void bnx2x_set_rx_filter(struct link_params *params, u8 en) +void bnx2x_set_rx_filter(struct link_params *params, u8 en) { struct bnx2x *bp = params->bp; u8 val = en * 0x1F; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h index 9165b89a4b1..ba981ced628 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h @@ -432,7 +432,8 @@ int bnx2x_phy_probe(struct link_params *params); u8 bnx2x_fan_failure_det_req(struct bnx2x *bp, u32 shmem_base, u32 shmem2_base, u8 port); - +/* Open / close the gate between the NIG and the BRB */ +void bnx2x_set_rx_filter(struct link_params *params, u8 en); /* DCBX structs */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index bd1fd3d87c2..04b9f0ab183 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -791,10 +791,9 @@ void bnx2x_panic_dump(struct bnx2x *bp) /* host sb data */ -#ifdef BCM_CNIC if (IS_FCOE_FP(fp)) continue; -#endif + BNX2X_ERR(" run indexes ("); for (j = 0; j < HC_SB_MAX_SM; j++) pr_cont("0x%x%s", @@ -859,7 +858,7 @@ void bnx2x_panic_dump(struct bnx2x *bp) #ifdef BNX2X_STOP_ON_ERROR /* Rings */ /* Rx */ - for_each_rx_queue(bp, i) { + for_each_valid_rx_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; start = RX_BD(le16_to_cpu(*fp->rx_cons_sb) - 10); @@ -893,7 +892,7 @@ void bnx2x_panic_dump(struct bnx2x *bp) } /* Tx */ - for_each_tx_queue(bp, i) { + for_each_valid_tx_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; for_each_cos_in_tx_queue(fp, cos) { struct bnx2x_fp_txdata *txdata = fp->txdata_ptr[cos]; @@ -1504,9 +1503,8 @@ void bnx2x_int_disable_sync(struct bnx2x *bp, int disable_hw) if (msix) { synchronize_irq(bp->msix_table[0].vector); offset = 1; -#ifdef BCM_CNIC - offset++; -#endif + if (CNIC_SUPPORT(bp)) + offset++; for_each_eth_queue(bp, i) synchronize_irq(bp->msix_table[offset++].vector); } else @@ -1588,9 +1586,8 @@ static bool bnx2x_trylock_leader_lock(struct bnx2x *bp) return bnx2x_trylock_hw_lock(bp, bnx2x_get_leader_lock_resource(bp)); } -#ifdef BCM_CNIC static void bnx2x_cnic_cfc_comp(struct bnx2x *bp, int cid, u8 err); -#endif + void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe) { @@ -1720,7 +1717,7 @@ irqreturn_t bnx2x_interrupt(int irq, void *dev_instance) for_each_eth_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; - mask = 0x2 << (fp->index + CNIC_PRESENT); + mask = 0x2 << (fp->index + CNIC_SUPPORT(bp)); if (status & mask) { /* Handle Rx or Tx according to SB id */ prefetch(fp->rx_cons_sb); @@ -1732,22 +1729,23 @@ irqreturn_t bnx2x_interrupt(int irq, void *dev_instance) } } -#ifdef BCM_CNIC - mask = 0x2; - if (status & (mask | 0x1)) { - struct cnic_ops *c_ops = NULL; + if (CNIC_SUPPORT(bp)) { + mask = 0x2; + if (status & (mask | 0x1)) { + struct cnic_ops *c_ops = NULL; - if (likely(bp->state == BNX2X_STATE_OPEN)) { - rcu_read_lock(); - c_ops = rcu_dereference(bp->cnic_ops); - if (c_ops) - c_ops->cnic_handler(bp->cnic_data, NULL); - rcu_read_unlock(); - } + if (likely(bp->state == BNX2X_STATE_OPEN)) { + rcu_read_lock(); + c_ops = rcu_dereference(bp->cnic_ops); + if (c_ops) + c_ops->cnic_handler(bp->cnic_data, + NULL); + rcu_read_unlock(); + } - status &= ~mask; + status &= ~mask; + } } -#endif if (unlikely(status & 0x1)) { queue_delayed_work(bnx2x_wq, &bp->sp_task, 0); @@ -3075,11 +3073,13 @@ static void bnx2x_drv_info_ether_stat(struct bnx2x *bp) static void bnx2x_drv_info_fcoe_stat(struct bnx2x *bp) { -#ifdef BCM_CNIC struct bnx2x_dcbx_app_params *app = &bp->dcbx_port_params.app; struct fcoe_stats_info *fcoe_stat = &bp->slowpath->drv_info_to_mcp.fcoe_stat; + if (!CNIC_LOADED(bp)) + return; + memcpy(fcoe_stat->mac_local + MAC_LEADING_ZERO_CNT, bp->fip_mac, ETH_ALEN); @@ -3162,16 +3162,17 @@ static void bnx2x_drv_info_fcoe_stat(struct bnx2x *bp) /* ask L5 driver to add data to the struct */ bnx2x_cnic_notify(bp, CNIC_CTL_FCOE_STATS_GET_CMD); -#endif } static void bnx2x_drv_info_iscsi_stat(struct bnx2x *bp) { -#ifdef BCM_CNIC struct bnx2x_dcbx_app_params *app = &bp->dcbx_port_params.app; struct iscsi_stats_info *iscsi_stat = &bp->slowpath->drv_info_to_mcp.iscsi_stat; + if (!CNIC_LOADED(bp)) + return; + memcpy(iscsi_stat->mac_local + MAC_LEADING_ZERO_CNT, bp->cnic_eth_dev.iscsi_mac, ETH_ALEN); @@ -3180,7 +3181,6 @@ static void bnx2x_drv_info_iscsi_stat(struct bnx2x *bp) /* ask L5 driver to add data to the struct */ bnx2x_cnic_notify(bp, CNIC_CTL_ISCSI_STATS_GET_CMD); -#endif } /* called due to MCP event (on pmf): @@ -4572,7 +4572,6 @@ static void bnx2x_update_eq_prod(struct bnx2x *bp, u16 prod) mmiowb(); /* keep prod updates ordered */ } -#ifdef BCM_CNIC static int bnx2x_cnic_handle_cfc_del(struct bnx2x *bp, u32 cid, union event_ring_elem *elem) { @@ -4594,7 +4593,6 @@ static int bnx2x_cnic_handle_cfc_del(struct bnx2x *bp, u32 cid, bnx2x_cnic_cfc_comp(bp, cid, err); return 0; } -#endif static void bnx2x_handle_mcast_eqe(struct bnx2x *bp) { @@ -4635,11 +4633,9 @@ static void bnx2x_handle_classification_eqe(struct bnx2x *bp, switch (elem->message.data.eth_event.echo >> BNX2X_SWCID_SHIFT) { case BNX2X_FILTER_MAC_PENDING: DP(BNX2X_MSG_SP, "Got SETUP_MAC completions\n"); -#ifdef BCM_CNIC - if (cid == BNX2X_ISCSI_ETH_CID(bp)) + if (CNIC_LOADED(bp) && (cid == BNX2X_ISCSI_ETH_CID(bp))) vlan_mac_obj = &bp->iscsi_l2_mac_obj; else -#endif vlan_mac_obj = &bp->sp_objs[cid].mac_obj; break; @@ -4665,9 +4661,7 @@ static void bnx2x_handle_classification_eqe(struct bnx2x *bp, } -#ifdef BCM_CNIC static void bnx2x_set_iscsi_eth_rx_mode(struct bnx2x *bp, bool start); -#endif static void bnx2x_handle_rx_mode_eqe(struct bnx2x *bp) { @@ -4678,14 +4672,12 @@ static void bnx2x_handle_rx_mode_eqe(struct bnx2x *bp) /* Send rx_mode command again if was requested */ if (test_and_clear_bit(BNX2X_FILTER_RX_MODE_SCHED, &bp->sp_state)) bnx2x_set_storm_rx_mode(bp); -#ifdef BCM_CNIC else if (test_and_clear_bit(BNX2X_FILTER_ISCSI_ETH_START_SCHED, &bp->sp_state)) bnx2x_set_iscsi_eth_rx_mode(bp, true); else if (test_and_clear_bit(BNX2X_FILTER_ISCSI_ETH_STOP_SCHED, &bp->sp_state)) bnx2x_set_iscsi_eth_rx_mode(bp, false); -#endif netif_addr_unlock_bh(bp->dev); } @@ -4747,7 +4739,6 @@ static void bnx2x_after_function_update(struct bnx2x *bp) q); } -#ifdef BCM_CNIC if (!NO_FCOE(bp)) { fp = &bp->fp[FCOE_IDX(bp)]; queue_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj; @@ -4770,22 +4761,16 @@ static void bnx2x_after_function_update(struct bnx2x *bp) bnx2x_link_report(bp); bnx2x_fw_command(bp, DRV_MSG_CODE_AFEX_VIFSET_ACK, 0); } -#else - /* If no FCoE ring - ACK MCP now */ - bnx2x_link_report(bp); - bnx2x_fw_command(bp, DRV_MSG_CODE_AFEX_VIFSET_ACK, 0); -#endif /* BCM_CNIC */ } static struct bnx2x_queue_sp_obj *bnx2x_cid_to_q_obj( struct bnx2x *bp, u32 cid) { DP(BNX2X_MSG_SP, "retrieving fp from cid %d\n", cid); -#ifdef BCM_CNIC - if (cid == BNX2X_FCOE_ETH_CID(bp)) + + if (CNIC_LOADED(bp) && (cid == BNX2X_FCOE_ETH_CID(bp))) return &bnx2x_fcoe_sp_obj(bp, q_obj); else -#endif return &bp->sp_objs[CID_TO_FP(cid, bp)].q_obj; } @@ -4793,6 +4778,7 @@ static void bnx2x_eq_int(struct bnx2x *bp) { u16 hw_cons, sw_cons, sw_prod; union event_ring_elem *elem; + u8 echo; u32 cid; u8 opcode; int spqe_cnt = 0; @@ -4847,10 +4833,11 @@ static void bnx2x_eq_int(struct bnx2x *bp) */ DP(BNX2X_MSG_SP, "got delete ramrod for MULTI[%d]\n", cid); -#ifdef BCM_CNIC - if (!bnx2x_cnic_handle_cfc_del(bp, cid, elem)) + + if (CNIC_LOADED(bp) && + !bnx2x_cnic_handle_cfc_del(bp, cid, elem)) goto next_spqe; -#endif + q_obj = bnx2x_cid_to_q_obj(bp, cid); if (q_obj->complete_cmd(bp, q_obj, BNX2X_Q_CMD_CFC_DEL)) @@ -4875,21 +4862,34 @@ static void bnx2x_eq_int(struct bnx2x *bp) break; bnx2x_dcbx_set_params(bp, BNX2X_DCBX_STATE_TX_RELEASED); goto next_spqe; + case EVENT_RING_OPCODE_FUNCTION_UPDATE: - DP(BNX2X_MSG_SP | BNX2X_MSG_MCP, - "AFEX: ramrod completed FUNCTION_UPDATE\n"); - f_obj->complete_cmd(bp, f_obj, BNX2X_F_CMD_AFEX_UPDATE); + echo = elem->message.data.function_update_event.echo; + if (echo == SWITCH_UPDATE) { + DP(BNX2X_MSG_SP | NETIF_MSG_IFUP, + "got FUNC_SWITCH_UPDATE ramrod\n"); + if (f_obj->complete_cmd( + bp, f_obj, BNX2X_F_CMD_SWITCH_UPDATE)) + break; - /* We will perform the Queues update from sp_rtnl task - * as all Queue SP operations should run under - * rtnl_lock. - */ - smp_mb__before_clear_bit(); - set_bit(BNX2X_SP_RTNL_AFEX_F_UPDATE, - &bp->sp_rtnl_state); - smp_mb__after_clear_bit(); + } else { + DP(BNX2X_MSG_SP | BNX2X_MSG_MCP, + "AFEX: ramrod completed FUNCTION_UPDATE\n"); + f_obj->complete_cmd(bp, f_obj, + BNX2X_F_CMD_AFEX_UPDATE); + + /* We will perform the Queues update from + * sp_rtnl task as all Queue SP operations + * should run under rtnl_lock. + */ + smp_mb__before_clear_bit(); + set_bit(BNX2X_SP_RTNL_AFEX_F_UPDATE, + &bp->sp_rtnl_state); + smp_mb__after_clear_bit(); + + schedule_delayed_work(&bp->sp_rtnl_task, 0); + } - schedule_delayed_work(&bp->sp_rtnl_task, 0); goto next_spqe; case EVENT_RING_OPCODE_AFEX_VIF_LISTS: @@ -4999,11 +4999,10 @@ static void bnx2x_sp_task(struct work_struct *work) /* SP events: STAT_QUERY and others */ if (status & BNX2X_DEF_SB_IDX) { -#ifdef BCM_CNIC struct bnx2x_fastpath *fp = bnx2x_fcoe_fp(bp); - if ((!NO_FCOE(bp)) && - (bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { + if (FCOE_INIT(bp) && + (bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { /* * Prevent local bottom-halves from running as * we are going to change the local NAPI list. @@ -5012,7 +5011,7 @@ static void bnx2x_sp_task(struct work_struct *work) napi_schedule(&bnx2x_fcoe(bp, napi)); local_bh_enable(); } -#endif + /* Handle EQ completions */ bnx2x_eq_int(bp); @@ -5050,8 +5049,7 @@ irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance) return IRQ_HANDLED; #endif -#ifdef BCM_CNIC - { + if (CNIC_LOADED(bp)) { struct cnic_ops *c_ops; rcu_read_lock(); @@ -5060,7 +5058,7 @@ irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance) c_ops->cnic_handler(bp->cnic_data, NULL); rcu_read_unlock(); } -#endif + queue_delayed_work(bnx2x_wq, &bp->sp_task, 0); return IRQ_HANDLED; @@ -5498,12 +5496,10 @@ void bnx2x_set_storm_rx_mode(struct bnx2x *bp) unsigned long rx_mode_flags = 0, ramrod_flags = 0; unsigned long rx_accept_flags = 0, tx_accept_flags = 0; -#ifdef BCM_CNIC if (!NO_FCOE(bp)) /* Configure rx_mode of FCoE Queue */ __set_bit(BNX2X_RX_MODE_FCOE_ETH, &rx_mode_flags); -#endif switch (bp->rx_mode) { case BNX2X_RX_MODE_NONE: @@ -5624,12 +5620,12 @@ static void bnx2x_init_internal(struct bnx2x *bp, u32 load_code) static inline u8 bnx2x_fp_igu_sb_id(struct bnx2x_fastpath *fp) { - return fp->bp->igu_base_sb + fp->index + CNIC_PRESENT; + return fp->bp->igu_base_sb + fp->index + CNIC_SUPPORT(fp->bp); } static inline u8 bnx2x_fp_fw_sb_id(struct bnx2x_fastpath *fp) { - return fp->bp->base_fw_ndsb + fp->index + CNIC_PRESENT; + return fp->bp->base_fw_ndsb + fp->index + CNIC_SUPPORT(fp->bp); } static u8 bnx2x_fp_cl_id(struct bnx2x_fastpath *fp) @@ -5720,23 +5716,25 @@ static void bnx2x_init_tx_ring_one(struct bnx2x_fp_txdata *txdata) txdata->tx_pkt = 0; } +static void bnx2x_init_tx_rings_cnic(struct bnx2x *bp) +{ + int i; + + for_each_tx_queue_cnic(bp, i) + bnx2x_init_tx_ring_one(bp->fp[i].txdata_ptr[0]); +} static void bnx2x_init_tx_rings(struct bnx2x *bp) { int i; u8 cos; - for_each_tx_queue(bp, i) + for_each_eth_queue(bp, i) for_each_cos_in_tx_queue(&bp->fp[i], cos) bnx2x_init_tx_ring_one(bp->fp[i].txdata_ptr[cos]); } -void bnx2x_nic_init(struct bnx2x *bp, u32 load_code) +void bnx2x_nic_init_cnic(struct bnx2x *bp) { - int i; - - for_each_eth_queue(bp, i) - bnx2x_init_eth_fp(bp, i); -#ifdef BCM_CNIC if (!NO_FCOE(bp)) bnx2x_init_fcoe_fp(bp); @@ -5744,8 +5742,22 @@ void bnx2x_nic_init(struct bnx2x *bp, u32 load_code) BNX2X_VF_ID_INVALID, false, bnx2x_cnic_fw_sb_id(bp), bnx2x_cnic_igu_sb_id(bp)); -#endif + /* ensure status block indices were read */ + rmb(); + bnx2x_init_rx_rings_cnic(bp); + bnx2x_init_tx_rings_cnic(bp); + + /* flush all */ + mb(); + mmiowb(); +} +void bnx2x_nic_init(struct bnx2x *bp, u32 load_code) +{ + int i; + + for_each_eth_queue(bp, i) + bnx2x_init_eth_fp(bp, i); /* Initialize MOD_ABS interrupts */ bnx2x_init_mod_abs_int(bp, &bp->link_vars, bp->common.chip_id, bp->common.shmem_base, bp->common.shmem2_base, @@ -6031,10 +6043,9 @@ static int bnx2x_int_mem_test(struct bnx2x *bp) msleep(50); bnx2x_init_block(bp, BLOCK_BRB1, PHASE_COMMON); bnx2x_init_block(bp, BLOCK_PRS, PHASE_COMMON); -#ifndef BCM_CNIC - /* set NIC mode */ - REG_WR(bp, PRS_REG_NIC_MODE, 1); -#endif + if (!CNIC_SUPPORT(bp)) + /* set NIC mode */ + REG_WR(bp, PRS_REG_NIC_MODE, 1); /* Enable inputs of parser neighbor blocks */ REG_WR(bp, TSDM_REG_ENABLE_IN1, 0x7fffffff); @@ -6522,9 +6533,8 @@ static int bnx2x_init_hw_common(struct bnx2x *bp) REG_WR(bp, QM_REG_SOFT_RESET, 1); REG_WR(bp, QM_REG_SOFT_RESET, 0); -#ifdef BCM_CNIC - bnx2x_init_block(bp, BLOCK_TM, PHASE_COMMON); -#endif + if (CNIC_SUPPORT(bp)) + bnx2x_init_block(bp, BLOCK_TM, PHASE_COMMON); bnx2x_init_block(bp, BLOCK_DORQ, PHASE_COMMON); REG_WR(bp, DORQ_REG_DPM_CID_OFST, BNX2X_DB_SHIFT); @@ -6611,18 +6621,18 @@ static int bnx2x_init_hw_common(struct bnx2x *bp) bnx2x_init_block(bp, BLOCK_SRC, PHASE_COMMON); -#ifdef BCM_CNIC - REG_WR(bp, SRC_REG_KEYSEARCH_0, 0x63285672); - REG_WR(bp, SRC_REG_KEYSEARCH_1, 0x24b8f2cc); - REG_WR(bp, SRC_REG_KEYSEARCH_2, 0x223aef9b); - REG_WR(bp, SRC_REG_KEYSEARCH_3, 0x26001e3a); - REG_WR(bp, SRC_REG_KEYSEARCH_4, 0x7ae91116); - REG_WR(bp, SRC_REG_KEYSEARCH_5, 0x5ce5230b); - REG_WR(bp, SRC_REG_KEYSEARCH_6, 0x298d8adf); - REG_WR(bp, SRC_REG_KEYSEARCH_7, 0x6eb0ff09); - REG_WR(bp, SRC_REG_KEYSEARCH_8, 0x1830f82f); - REG_WR(bp, SRC_REG_KEYSEARCH_9, 0x01e46be7); -#endif + if (CNIC_SUPPORT(bp)) { + REG_WR(bp, SRC_REG_KEYSEARCH_0, 0x63285672); + REG_WR(bp, SRC_REG_KEYSEARCH_1, 0x24b8f2cc); + REG_WR(bp, SRC_REG_KEYSEARCH_2, 0x223aef9b); + REG_WR(bp, SRC_REG_KEYSEARCH_3, 0x26001e3a); + REG_WR(bp, SRC_REG_KEYSEARCH_4, 0x7ae91116); + REG_WR(bp, SRC_REG_KEYSEARCH_5, 0x5ce5230b); + REG_WR(bp, SRC_REG_KEYSEARCH_6, 0x298d8adf); + REG_WR(bp, SRC_REG_KEYSEARCH_7, 0x6eb0ff09); + REG_WR(bp, SRC_REG_KEYSEARCH_8, 0x1830f82f); + REG_WR(bp, SRC_REG_KEYSEARCH_9, 0x01e46be7); + } REG_WR(bp, SRC_REG_SOFT_RST, 0); if (sizeof(union cdu_context) != 1024) @@ -6786,11 +6796,11 @@ static int bnx2x_init_hw_port(struct bnx2x *bp) /* QM cid (connection) count */ bnx2x_qm_init_cid_count(bp, bp->qm_cid_count, INITOP_SET); -#ifdef BCM_CNIC - bnx2x_init_block(bp, BLOCK_TM, init_phase); - REG_WR(bp, TM_REG_LIN0_SCAN_TIME + port*4, 20); - REG_WR(bp, TM_REG_LIN0_MAX_ACTIVE_CID + port*4, 31); -#endif + if (CNIC_SUPPORT(bp)) { + bnx2x_init_block(bp, BLOCK_TM, init_phase); + REG_WR(bp, TM_REG_LIN0_SCAN_TIME + port*4, 20); + REG_WR(bp, TM_REG_LIN0_MAX_ACTIVE_CID + port*4, 31); + } bnx2x_init_block(bp, BLOCK_DORQ, init_phase); @@ -6877,9 +6887,9 @@ static int bnx2x_init_hw_port(struct bnx2x *bp) REG_WR(bp, PBF_REG_INIT_P0 + port*4, 0); } -#ifdef BCM_CNIC - bnx2x_init_block(bp, BLOCK_SRC, init_phase); -#endif + if (CNIC_SUPPORT(bp)) + bnx2x_init_block(bp, BLOCK_SRC, init_phase); + bnx2x_init_block(bp, BLOCK_CDU, init_phase); bnx2x_init_block(bp, BLOCK_CFC, init_phase); @@ -7040,6 +7050,130 @@ static void bnx2x_clear_func_ilt(struct bnx2x *bp, u32 func) bnx2x_ilt_wr(bp, i, 0); } + +void bnx2x_init_searcher(struct bnx2x *bp) +{ + int port = BP_PORT(bp); + bnx2x_src_init_t2(bp, bp->t2, bp->t2_mapping, SRC_CONN_NUM); + /* T1 hash bits value determines the T1 number of entries */ + REG_WR(bp, SRC_REG_NUMBER_HASH_BITS0 + port*4, SRC_HASH_BITS); +} + +static inline int bnx2x_func_switch_update(struct bnx2x *bp, int suspend) +{ + int rc; + struct bnx2x_func_state_params func_params = {NULL}; + struct bnx2x_func_switch_update_params *switch_update_params = + &func_params.params.switch_update; + + /* Prepare parameters for function state transitions */ + __set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags); + __set_bit(RAMROD_RETRY, &func_params.ramrod_flags); + + func_params.f_obj = &bp->func_obj; + func_params.cmd = BNX2X_F_CMD_SWITCH_UPDATE; + + /* Function parameters */ + switch_update_params->suspend = suspend; + + rc = bnx2x_func_state_change(bp, &func_params); + + return rc; +} + +int bnx2x_reset_nic_mode(struct bnx2x *bp) +{ + int rc, i, port = BP_PORT(bp); + int vlan_en = 0, mac_en[NUM_MACS]; + + + /* Close input from network */ + if (bp->mf_mode == SINGLE_FUNCTION) { + bnx2x_set_rx_filter(&bp->link_params, 0); + } else { + vlan_en = REG_RD(bp, port ? NIG_REG_LLH1_FUNC_EN : + NIG_REG_LLH0_FUNC_EN); + REG_WR(bp, port ? NIG_REG_LLH1_FUNC_EN : + NIG_REG_LLH0_FUNC_EN, 0); + for (i = 0; i < NUM_MACS; i++) { + mac_en[i] = REG_RD(bp, port ? + (NIG_REG_LLH1_FUNC_MEM_ENABLE + + 4 * i) : + (NIG_REG_LLH0_FUNC_MEM_ENABLE + + 4 * i)); + REG_WR(bp, port ? (NIG_REG_LLH1_FUNC_MEM_ENABLE + + 4 * i) : + (NIG_REG_LLH0_FUNC_MEM_ENABLE + 4 * i), 0); + } + } + + /* Close BMC to host */ + REG_WR(bp, port ? NIG_REG_P0_TX_MNG_HOST_ENABLE : + NIG_REG_P1_TX_MNG_HOST_ENABLE, 0); + + /* Suspend Tx switching to the PF. Completion of this ramrod + * further guarantees that all the packets of that PF / child + * VFs in BRB were processed by the Parser, so it is safe to + * change the NIC_MODE register. + */ + rc = bnx2x_func_switch_update(bp, 1); + if (rc) { + BNX2X_ERR("Can't suspend tx-switching!\n"); + return rc; + } + + /* Change NIC_MODE register */ + REG_WR(bp, PRS_REG_NIC_MODE, 0); + + /* Open input from network */ + if (bp->mf_mode == SINGLE_FUNCTION) { + bnx2x_set_rx_filter(&bp->link_params, 1); + } else { + REG_WR(bp, port ? NIG_REG_LLH1_FUNC_EN : + NIG_REG_LLH0_FUNC_EN, vlan_en); + for (i = 0; i < NUM_MACS; i++) { + REG_WR(bp, port ? (NIG_REG_LLH1_FUNC_MEM_ENABLE + + 4 * i) : + (NIG_REG_LLH0_FUNC_MEM_ENABLE + 4 * i), + mac_en[i]); + } + } + + /* Enable BMC to host */ + REG_WR(bp, port ? NIG_REG_P0_TX_MNG_HOST_ENABLE : + NIG_REG_P1_TX_MNG_HOST_ENABLE, 1); + + /* Resume Tx switching to the PF */ + rc = bnx2x_func_switch_update(bp, 0); + if (rc) { + BNX2X_ERR("Can't resume tx-switching!\n"); + return rc; + } + + DP(NETIF_MSG_IFUP, "NIC MODE disabled\n"); + return 0; +} + +int bnx2x_init_hw_func_cnic(struct bnx2x *bp) +{ + int rc; + + bnx2x_ilt_init_op_cnic(bp, INITOP_SET); + + if (CONFIGURE_NIC_MODE(bp)) { + /* Configrue searcher as part of function hw init */ + bnx2x_init_searcher(bp); + + /* Reset NIC mode */ + rc = bnx2x_reset_nic_mode(bp); + if (rc) + BNX2X_ERR("Can't change NIC mode!\n"); + return rc; + } + + return 0; +} + static int bnx2x_init_hw_func(struct bnx2x *bp) { int port = BP_PORT(bp); @@ -7082,17 +7216,16 @@ static int bnx2x_init_hw_func(struct bnx2x *bp) } bnx2x_ilt_init_op(bp, INITOP_SET); -#ifdef BCM_CNIC - bnx2x_src_init_t2(bp, bp->t2, bp->t2_mapping, SRC_CONN_NUM); - - /* T1 hash bits value determines the T1 number of entries */ - REG_WR(bp, SRC_REG_NUMBER_HASH_BITS0 + port*4, SRC_HASH_BITS); -#endif + if (!CONFIGURE_NIC_MODE(bp)) { + bnx2x_init_searcher(bp); + REG_WR(bp, PRS_REG_NIC_MODE, 0); + DP(NETIF_MSG_IFUP, "NIC MODE disabled\n"); + } else { + /* Set NIC mode */ + REG_WR(bp, PRS_REG_NIC_MODE, 1); + DP(NETIF_MSG_IFUP, "NIC MODE configrued\n"); -#ifndef BCM_CNIC - /* set NIC mode */ - REG_WR(bp, PRS_REG_NIC_MODE, 1); -#endif /* BCM_CNIC */ + } if (!CHIP_IS_E1x(bp)) { u32 pf_conf = IGU_PF_CONF_FUNC_EN; @@ -7343,6 +7476,20 @@ static int bnx2x_init_hw_func(struct bnx2x *bp) } +void bnx2x_free_mem_cnic(struct bnx2x *bp) +{ + bnx2x_ilt_mem_op_cnic(bp, ILT_MEMOP_FREE); + + if (!CHIP_IS_E1x(bp)) + BNX2X_PCI_FREE(bp->cnic_sb.e2_sb, bp->cnic_sb_mapping, + sizeof(struct host_hc_status_block_e2)); + else + BNX2X_PCI_FREE(bp->cnic_sb.e1x_sb, bp->cnic_sb_mapping, + sizeof(struct host_hc_status_block_e1x)); + + BNX2X_PCI_FREE(bp->t2, bp->t2_mapping, SRC_T2_SZ); +} + void bnx2x_free_mem(struct bnx2x *bp) { int i; @@ -7367,17 +7514,6 @@ void bnx2x_free_mem(struct bnx2x *bp) BNX2X_FREE(bp->ilt->lines); -#ifdef BCM_CNIC - if (!CHIP_IS_E1x(bp)) - BNX2X_PCI_FREE(bp->cnic_sb.e2_sb, bp->cnic_sb_mapping, - sizeof(struct host_hc_status_block_e2)); - else - BNX2X_PCI_FREE(bp->cnic_sb.e1x_sb, bp->cnic_sb_mapping, - sizeof(struct host_hc_status_block_e1x)); - - BNX2X_PCI_FREE(bp->t2, bp->t2_mapping, SRC_T2_SZ); -#endif - BNX2X_PCI_FREE(bp->spq, bp->spq_mapping, BCM_PAGE_SIZE); BNX2X_PCI_FREE(bp->eq_ring, bp->eq_mapping, @@ -7445,24 +7581,44 @@ alloc_mem_err: return -ENOMEM; } - -int bnx2x_alloc_mem(struct bnx2x *bp) +int bnx2x_alloc_mem_cnic(struct bnx2x *bp) { - int i, allocated, context_size; - -#ifdef BCM_CNIC if (!CHIP_IS_E1x(bp)) /* size = the status block + ramrod buffers */ BNX2X_PCI_ALLOC(bp->cnic_sb.e2_sb, &bp->cnic_sb_mapping, sizeof(struct host_hc_status_block_e2)); else - BNX2X_PCI_ALLOC(bp->cnic_sb.e1x_sb, &bp->cnic_sb_mapping, - sizeof(struct host_hc_status_block_e1x)); + BNX2X_PCI_ALLOC(bp->cnic_sb.e1x_sb, + &bp->cnic_sb_mapping, + sizeof(struct + host_hc_status_block_e1x)); - /* allocate searcher T2 table */ - BNX2X_PCI_ALLOC(bp->t2, &bp->t2_mapping, SRC_T2_SZ); -#endif + if (CONFIGURE_NIC_MODE(bp)) + /* allocate searcher T2 table, as it wan't allocated before */ + BNX2X_PCI_ALLOC(bp->t2, &bp->t2_mapping, SRC_T2_SZ); + + /* write address to which L5 should insert its values */ + bp->cnic_eth_dev.addr_drv_info_to_mcp = + &bp->slowpath->drv_info_to_mcp; + + if (bnx2x_ilt_mem_op_cnic(bp, ILT_MEMOP_ALLOC)) + goto alloc_mem_err; + + return 0; + +alloc_mem_err: + bnx2x_free_mem_cnic(bp); + BNX2X_ERR("Can't allocate memory\n"); + return -ENOMEM; +} + +int bnx2x_alloc_mem(struct bnx2x *bp) +{ + int i, allocated, context_size; + if (!CONFIGURE_NIC_MODE(bp)) + /* allocate searcher T2 table */ + BNX2X_PCI_ALLOC(bp->t2, &bp->t2_mapping, SRC_T2_SZ); BNX2X_PCI_ALLOC(bp->def_status_blk, &bp->def_status_blk_mapping, sizeof(struct host_sp_status_block)); @@ -7470,11 +7626,6 @@ int bnx2x_alloc_mem(struct bnx2x *bp) BNX2X_PCI_ALLOC(bp->slowpath, &bp->slowpath_mapping, sizeof(struct bnx2x_slowpath)); -#ifdef BCM_CNIC - /* write address to which L5 should insert its values */ - bp->cnic_eth_dev.addr_drv_info_to_mcp = &bp->slowpath->drv_info_to_mcp; -#endif - /* Allocated memory for FW statistics */ if (bnx2x_alloc_fw_stats_mem(bp)) goto alloc_mem_err; @@ -7596,14 +7747,12 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set) { unsigned long ramrod_flags = 0; -#ifdef BCM_CNIC if (is_zero_ether_addr(bp->dev->dev_addr) && (IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp))) { DP(NETIF_MSG_IFUP | NETIF_MSG_IFDOWN, "Ignoring Zero MAC for STORAGE SD mode\n"); return 0; } -#endif DP(NETIF_MSG_IFUP, "Adding Eth MAC\n"); @@ -7632,7 +7781,8 @@ void bnx2x_set_int_mode(struct bnx2x *bp) bnx2x_enable_msi(bp); /* falling through... */ case INT_MODE_INTx: - bp->num_queues = 1 + NON_ETH_CONTEXT_USE; + bp->num_ethernet_queues = 1; + bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues; BNX2X_DEV_INFO("set number of queues to 1\n"); break; default: @@ -7644,9 +7794,10 @@ void bnx2x_set_int_mode(struct bnx2x *bp) bp->flags & USING_SINGLE_MSIX_FLAG) { /* failed to enable multiple MSI-X */ BNX2X_DEV_INFO("Failed to enable multiple MSI-X (%d), set number of queues to %d\n", - bp->num_queues, 1 + NON_ETH_CONTEXT_USE); + bp->num_queues, + 1 + bp->num_cnic_queues); - bp->num_queues = 1 + NON_ETH_CONTEXT_USE; + bp->num_queues = 1 + bp->num_cnic_queues; /* Try to enable MSI */ if (!(bp->flags & USING_SINGLE_MSIX_FLAG) && @@ -7679,9 +7830,9 @@ void bnx2x_ilt_set_info(struct bnx2x *bp) ilt_client->flags = ILT_CLIENT_SKIP_MEM; ilt_client->start = line; line += bnx2x_cid_ilt_lines(bp); -#ifdef BCM_CNIC - line += CNIC_ILT_LINES; -#endif + + if (CNIC_SUPPORT(bp)) + line += CNIC_ILT_LINES; ilt_client->end = line - 1; DP(NETIF_MSG_IFUP, "ilt client[CDU]: start %d, end %d, psz 0x%x, flags 0x%x, hw psz %d\n", @@ -7714,49 +7865,43 @@ void bnx2x_ilt_set_info(struct bnx2x *bp) ilog2(ilt_client->page_size >> 12)); } - /* SRC */ - ilt_client = &ilt->clients[ILT_CLIENT_SRC]; -#ifdef BCM_CNIC - ilt_client->client_num = ILT_CLIENT_SRC; - ilt_client->page_size = SRC_ILT_PAGE_SZ; - ilt_client->flags = 0; - ilt_client->start = line; - line += SRC_ILT_LINES; - ilt_client->end = line - 1; - DP(NETIF_MSG_IFUP, - "ilt client[SRC]: start %d, end %d, psz 0x%x, flags 0x%x, hw psz %d\n", - ilt_client->start, - ilt_client->end, - ilt_client->page_size, - ilt_client->flags, - ilog2(ilt_client->page_size >> 12)); + if (CNIC_SUPPORT(bp)) { + /* SRC */ + ilt_client = &ilt->clients[ILT_CLIENT_SRC]; + ilt_client->client_num = ILT_CLIENT_SRC; + ilt_client->page_size = SRC_ILT_PAGE_SZ; + ilt_client->flags = 0; + ilt_client->start = line; + line += SRC_ILT_LINES; + ilt_client->end = line - 1; -#else - ilt_client->flags = (ILT_CLIENT_SKIP_INIT | ILT_CLIENT_SKIP_MEM); -#endif + DP(NETIF_MSG_IFUP, + "ilt client[SRC]: start %d, end %d, psz 0x%x, flags 0x%x, hw psz %d\n", + ilt_client->start, + ilt_client->end, + ilt_client->page_size, + ilt_client->flags, + ilog2(ilt_client->page_size >> 12)); - /* TM */ - ilt_client = &ilt->clients[ILT_CLIENT_TM]; -#ifdef BCM_CNIC - ilt_client->client_num = ILT_CLIENT_TM; - ilt_client->page_size = TM_ILT_PAGE_SZ; - ilt_client->flags = 0; - ilt_client->start = line; - line += TM_ILT_LINES; - ilt_client->end = line - 1; + /* TM */ + ilt_client = &ilt->clients[ILT_CLIENT_TM]; + ilt_client->client_num = ILT_CLIENT_TM; + ilt_client->page_size = TM_ILT_PAGE_SZ; + ilt_client->flags = 0; + ilt_client->start = line; + line += TM_ILT_LINES; + ilt_client->end = line - 1; - DP(NETIF_MSG_IFUP, - "ilt client[TM]: start %d, end %d, psz 0x%x, flags 0x%x, hw psz %d\n", - ilt_client->start, - ilt_client->end, - ilt_client->page_size, - ilt_client->flags, - ilog2(ilt_client->page_size >> 12)); + DP(NETIF_MSG_IFUP, + "ilt client[TM]: start %d, end %d, psz 0x%x, flags 0x%x, hw psz %d\n", + ilt_client->start, + ilt_client->end, + ilt_client->page_size, + ilt_client->flags, + ilog2(ilt_client->page_size >> 12)); + } -#else - ilt_client->flags = (ILT_CLIENT_SKIP_INIT | ILT_CLIENT_SKIP_MEM); -#endif BUG_ON(line > ILT_MAX_LINES); } @@ -7924,6 +8069,9 @@ int bnx2x_setup_queue(struct bnx2x *bp, struct bnx2x_fastpath *fp, /* Set the command */ q_params.cmd = BNX2X_Q_CMD_SETUP; + if (IS_FCOE_FP(fp)) + bp->fcoe_init = true; + /* Change the state to SETUP */ rc = bnx2x_queue_state_change(bp, &q_params); if (rc) { @@ -8037,12 +8185,12 @@ static void bnx2x_reset_func(struct bnx2x *bp) SB_DISABLED); } -#ifdef BCM_CNIC - /* CNIC SB */ - REG_WR8(bp, BAR_CSTRORM_INTMEM + - CSTORM_STATUS_BLOCK_DATA_STATE_OFFSET(bnx2x_cnic_fw_sb_id(bp)), - SB_DISABLED); -#endif + if (CNIC_LOADED(bp)) + /* CNIC SB */ + REG_WR8(bp, BAR_CSTRORM_INTMEM + + CSTORM_STATUS_BLOCK_DATA_STATE_OFFSET + (bnx2x_cnic_fw_sb_id(bp)), SB_DISABLED); + /* SP SB */ REG_WR8(bp, BAR_CSTRORM_INTMEM + CSTORM_SP_STATUS_BLOCK_DATA_STATE_OFFSET(func), @@ -8061,19 +8209,19 @@ static void bnx2x_reset_func(struct bnx2x *bp) REG_WR(bp, IGU_REG_TRAILING_EDGE_LATCH, 0); } -#ifdef BCM_CNIC - /* Disable Timer scan */ - REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0); - /* - * Wait for at least 10ms and up to 2 second for the timers scan to - * complete - */ - for (i = 0; i < 200; i++) { - msleep(10); - if (!REG_RD(bp, TM_REG_LIN0_SCAN_ON + port*4)) - break; + if (CNIC_LOADED(bp)) { + /* Disable Timer scan */ + REG_WR(bp, TM_REG_EN_LINEAR0_TIMER + port*4, 0); + /* + * Wait for at least 10ms and up to 2 second for the timers + * scan to complete + */ + for (i = 0; i < 200; i++) { + msleep(10); + if (!REG_RD(bp, TM_REG_LIN0_SCAN_ON + port*4)) + break; + } } -#endif /* Clear ILT */ bnx2x_clear_func_ilt(bp, func); @@ -8409,13 +8557,24 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) /* Close multi and leading connections * Completions for ramrods are collected in a synchronous way */ - for_each_queue(bp, i) + for_each_eth_queue(bp, i) if (bnx2x_stop_queue(bp, i)) #ifdef BNX2X_STOP_ON_ERROR return; #else goto unload_error; #endif + + if (CNIC_LOADED(bp)) { + for_each_cnic_queue(bp, i) + if (bnx2x_stop_queue(bp, i)) +#ifdef BNX2X_STOP_ON_ERROR + return; +#else + goto unload_error; +#endif + } + /* If SP settings didn't get completed so far - something * very wrong has happen. */ @@ -8437,6 +8596,8 @@ unload_error: bnx2x_netif_stop(bp, 1); /* Delete all NAPI objects */ bnx2x_del_all_napi(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); /* Release IRQs */ bnx2x_free_irq(bp); @@ -10224,12 +10385,15 @@ static void __devinit bnx2x_get_port_hwinfo(struct bnx2x *bp) void bnx2x_get_iscsi_info(struct bnx2x *bp) { u32 no_flags = NO_ISCSI_FLAG; -#ifdef BCM_CNIC int port = BP_PORT(bp); - u32 max_iscsi_conn = FW_ENCODE_32BIT_PATTERN ^ SHMEM_RD(bp, drv_lic_key[port].max_iscsi_conn); + if (!CNIC_SUPPORT(bp)) { + bp->flags |= no_flags; + return; + } + /* Get the number of maximum allowed iSCSI connections */ bp->cnic_eth_dev.max_iscsi_conn = (max_iscsi_conn & BNX2X_MAX_ISCSI_INIT_CONN_MASK) >> @@ -10244,12 +10408,9 @@ void bnx2x_get_iscsi_info(struct bnx2x *bp) */ if (!bp->cnic_eth_dev.max_iscsi_conn) bp->flags |= no_flags; -#else - bp->flags |= no_flags; -#endif + } -#ifdef BCM_CNIC static void __devinit bnx2x_get_ext_wwn_info(struct bnx2x *bp, int func) { /* Port info */ @@ -10264,16 +10425,18 @@ static void __devinit bnx2x_get_ext_wwn_info(struct bnx2x *bp, int func) bp->cnic_eth_dev.fcoe_wwn_node_name_lo = MF_CFG_RD(bp, func_ext_config[func].fcoe_wwn_node_name_lower); } -#endif static void __devinit bnx2x_get_fcoe_info(struct bnx2x *bp) { -#ifdef BCM_CNIC int port = BP_PORT(bp); int func = BP_ABS_FUNC(bp); - u32 max_fcoe_conn = FW_ENCODE_32BIT_PATTERN ^ SHMEM_RD(bp, drv_lic_key[port].max_fcoe_conn); + if (!CNIC_SUPPORT(bp)) { + bp->flags |= NO_FCOE_FLAG; + return; + } + /* Get the number of maximum allowed FCoE connections */ bp->cnic_eth_dev.max_fcoe_conn = (max_fcoe_conn & BNX2X_MAX_FCOE_INIT_CONN_MASK) >> @@ -10319,9 +10482,6 @@ static void __devinit bnx2x_get_fcoe_info(struct bnx2x *bp) */ if (!bp->cnic_eth_dev.max_fcoe_conn) bp->flags |= NO_FCOE_FLAG; -#else - bp->flags |= NO_FCOE_FLAG; -#endif } static void __devinit bnx2x_get_cnic_info(struct bnx2x *bp) @@ -10335,132 +10495,133 @@ static void __devinit bnx2x_get_cnic_info(struct bnx2x *bp) bnx2x_get_fcoe_info(bp); } -static void __devinit bnx2x_get_mac_hwinfo(struct bnx2x *bp) +static void __devinit bnx2x_get_cnic_mac_hwinfo(struct bnx2x *bp) { u32 val, val2; int func = BP_ABS_FUNC(bp); int port = BP_PORT(bp); -#ifdef BCM_CNIC u8 *iscsi_mac = bp->cnic_eth_dev.iscsi_mac; u8 *fip_mac = bp->fip_mac; -#endif - /* Zero primary MAC configuration */ - memset(bp->dev->dev_addr, 0, ETH_ALEN); - - if (BP_NOMCP(bp)) { - BNX2X_ERROR("warning: random MAC workaround active\n"); - eth_hw_addr_random(bp->dev); - } else if (IS_MF(bp)) { - val2 = MF_CFG_RD(bp, func_mf_config[func].mac_upper); - val = MF_CFG_RD(bp, func_mf_config[func].mac_lower); - if ((val2 != FUNC_MF_CFG_UPPERMAC_DEFAULT) && - (val != FUNC_MF_CFG_LOWERMAC_DEFAULT)) - bnx2x_set_mac_buf(bp->dev->dev_addr, val, val2); - -#ifdef BCM_CNIC - /* - * iSCSI and FCoE NPAR MACs: if there is no either iSCSI or + if (IS_MF(bp)) { + /* iSCSI and FCoE NPAR MACs: if there is no either iSCSI or * FCoE MAC then the appropriate feature should be disabled. - * - * In non SD mode features configuration comes from - * struct func_ext_config. + * In non SD mode features configuration comes from struct + * func_ext_config. */ - if (!IS_MF_SD(bp)) { + if (!IS_MF_SD(bp) && !CHIP_IS_E1x(bp)) { u32 cfg = MF_CFG_RD(bp, func_ext_config[func].func_cfg); if (cfg & MACP_FUNC_CFG_FLAGS_ISCSI_OFFLOAD) { val2 = MF_CFG_RD(bp, func_ext_config[func]. - iscsi_mac_addr_upper); + iscsi_mac_addr_upper); val = MF_CFG_RD(bp, func_ext_config[func]. - iscsi_mac_addr_lower); + iscsi_mac_addr_lower); bnx2x_set_mac_buf(iscsi_mac, val, val2); - BNX2X_DEV_INFO("Read iSCSI MAC: %pM\n", - iscsi_mac); - } else + BNX2X_DEV_INFO + ("Read iSCSI MAC: %pM\n", iscsi_mac); + } else { bp->flags |= NO_ISCSI_OOO_FLAG | NO_ISCSI_FLAG; + } if (cfg & MACP_FUNC_CFG_FLAGS_FCOE_OFFLOAD) { val2 = MF_CFG_RD(bp, func_ext_config[func]. - fcoe_mac_addr_upper); + fcoe_mac_addr_upper); val = MF_CFG_RD(bp, func_ext_config[func]. - fcoe_mac_addr_lower); + fcoe_mac_addr_lower); bnx2x_set_mac_buf(fip_mac, val, val2); - BNX2X_DEV_INFO("Read FCoE L2 MAC: %pM\n", - fip_mac); - - } else + BNX2X_DEV_INFO + ("Read FCoE L2 MAC: %pM\n", fip_mac); + } else { bp->flags |= NO_FCOE_FLAG; + } bp->mf_ext_config = cfg; } else { /* SD MODE */ - if (IS_MF_STORAGE_SD(bp)) { - if (BNX2X_IS_MF_SD_PROTOCOL_ISCSI(bp)) { - /* use primary mac as iscsi mac */ - memcpy(iscsi_mac, bp->dev->dev_addr, - ETH_ALEN); - - BNX2X_DEV_INFO("SD ISCSI MODE\n"); - BNX2X_DEV_INFO("Read iSCSI MAC: %pM\n", - iscsi_mac); - } else { /* FCoE */ - memcpy(fip_mac, bp->dev->dev_addr, - ETH_ALEN); - BNX2X_DEV_INFO("SD FCoE MODE\n"); - BNX2X_DEV_INFO("Read FIP MAC: %pM\n", - fip_mac); - } - /* Zero primary MAC configuration */ - memset(bp->dev->dev_addr, 0, ETH_ALEN); + if (BNX2X_IS_MF_SD_PROTOCOL_ISCSI(bp)) { + /* use primary mac as iscsi mac */ + memcpy(iscsi_mac, bp->dev->dev_addr, ETH_ALEN); + + BNX2X_DEV_INFO("SD ISCSI MODE\n"); + BNX2X_DEV_INFO + ("Read iSCSI MAC: %pM\n", iscsi_mac); + } else if (BNX2X_IS_MF_SD_PROTOCOL_FCOE(bp)) { + /* use primary mac as fip mac */ + memcpy(fip_mac, bp->dev->dev_addr, ETH_ALEN); + BNX2X_DEV_INFO("SD FCoE MODE\n"); + BNX2X_DEV_INFO + ("Read FIP MAC: %pM\n", fip_mac); } } + if (IS_MF_STORAGE_SD(bp)) + /* Zero primary MAC configuration */ + memset(bp->dev->dev_addr, 0, ETH_ALEN); + if (IS_MF_FCOE_AFEX(bp)) /* use FIP MAC as primary MAC */ memcpy(bp->dev->dev_addr, fip_mac, ETH_ALEN); -#endif } else { - /* in SF read MACs from port configuration */ - val2 = SHMEM_RD(bp, dev_info.port_hw_config[port].mac_upper); - val = SHMEM_RD(bp, dev_info.port_hw_config[port].mac_lower); - bnx2x_set_mac_buf(bp->dev->dev_addr, val, val2); - -#ifdef BCM_CNIC val2 = SHMEM_RD(bp, dev_info.port_hw_config[port]. - iscsi_mac_upper); + iscsi_mac_upper); val = SHMEM_RD(bp, dev_info.port_hw_config[port]. - iscsi_mac_lower); + iscsi_mac_lower); bnx2x_set_mac_buf(iscsi_mac, val, val2); val2 = SHMEM_RD(bp, dev_info.port_hw_config[port]. - fcoe_fip_mac_upper); + fcoe_fip_mac_upper); val = SHMEM_RD(bp, dev_info.port_hw_config[port]. - fcoe_fip_mac_lower); + fcoe_fip_mac_lower); bnx2x_set_mac_buf(fip_mac, val, val2); -#endif } - memcpy(bp->link_params.mac_addr, bp->dev->dev_addr, ETH_ALEN); - memcpy(bp->dev->perm_addr, bp->dev->dev_addr, ETH_ALEN); - -#ifdef BCM_CNIC - /* Disable iSCSI if MAC configuration is - * invalid. - */ + /* Disable iSCSI OOO if MAC configuration is invalid. */ if (!is_valid_ether_addr(iscsi_mac)) { - bp->flags |= NO_ISCSI_FLAG; + bp->flags |= NO_ISCSI_OOO_FLAG | NO_ISCSI_FLAG; memset(iscsi_mac, 0, ETH_ALEN); } - /* Disable FCoE if MAC configuration is - * invalid. - */ + /* Disable FCoE if MAC configuration is invalid. */ if (!is_valid_ether_addr(fip_mac)) { bp->flags |= NO_FCOE_FLAG; memset(bp->fip_mac, 0, ETH_ALEN); } -#endif +} + +static void __devinit bnx2x_get_mac_hwinfo(struct bnx2x *bp) +{ + u32 val, val2; + int func = BP_ABS_FUNC(bp); + int port = BP_PORT(bp); + + /* Zero primary MAC configuration */ + memset(bp->dev->dev_addr, 0, ETH_ALEN); + + if (BP_NOMCP(bp)) { + BNX2X_ERROR("warning: random MAC workaround active\n"); + eth_hw_addr_random(bp->dev); + } else if (IS_MF(bp)) { + val2 = MF_CFG_RD(bp, func_mf_config[func].mac_upper); + val = MF_CFG_RD(bp, func_mf_config[func].mac_lower); + if ((val2 != FUNC_MF_CFG_UPPERMAC_DEFAULT) && + (val != FUNC_MF_CFG_LOWERMAC_DEFAULT)) + bnx2x_set_mac_buf(bp->dev->dev_addr, val, val2); + + if (CNIC_SUPPORT(bp)) + bnx2x_get_cnic_mac_hwinfo(bp); + } else { + /* in SF read MACs from port configuration */ + val2 = SHMEM_RD(bp, dev_info.port_hw_config[port].mac_upper); + val = SHMEM_RD(bp, dev_info.port_hw_config[port].mac_lower); + bnx2x_set_mac_buf(bp->dev->dev_addr, val, val2); + + if (CNIC_SUPPORT(bp)) + bnx2x_get_cnic_mac_hwinfo(bp); + } + + memcpy(bp->link_params.mac_addr, bp->dev->dev_addr, ETH_ALEN); + memcpy(bp->dev->perm_addr, bp->dev->dev_addr, ETH_ALEN); if (!bnx2x_is_valid_ether_addr(bp, bp->dev->dev_addr)) dev_err(&bp->pdev->dev, @@ -10837,9 +10998,7 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp) mutex_init(&bp->port.phy_mutex); mutex_init(&bp->fw_mb_mutex); spin_lock_init(&bp->stats_lock); -#ifdef BCM_CNIC - mutex_init(&bp->cnic_mutex); -#endif + INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task); INIT_DELAYED_WORK(&bp->sp_rtnl_task, bnx2x_sp_rtnl_task); @@ -10877,10 +11036,7 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp) dev_err(&bp->pdev->dev, "MCP disabled, must load devices in order!\n"); bp->disable_tpa = disable_tpa; - -#ifdef BCM_CNIC bp->disable_tpa |= IS_MF_STORAGE_SD(bp) || IS_MF_FCOE_AFEX(bp); -#endif /* Set TPA flags */ if (bp->disable_tpa) { @@ -10914,12 +11070,10 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp) bnx2x_dcbx_set_state(bp, true, BNX2X_DCBX_ENABLED_ON_NEG_ON); bnx2x_dcbx_init_params(bp); -#ifdef BCM_CNIC if (CHIP_IS_E1x(bp)) bp->cnic_base_cl_id = FP_SB_MAX_E1x; else bp->cnic_base_cl_id = FP_SB_MAX_E2; -#endif /* multiple tx priority */ if (CHIP_IS_E1x(bp)) @@ -10929,6 +11083,16 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp) if (CHIP_IS_E3B0(bp)) bp->max_cos = BNX2X_MULTI_TX_COS_E3B0; + /* We need at least one default status block for slow-path events, + * second status block for the L2 queue, and a third status block for + * CNIC if supproted. + */ + if (CNIC_SUPPORT(bp)) + bp->min_msix_vec_cnt = 3; + else + bp->min_msix_vec_cnt = 2; + BNX2X_DEV_INFO("bp->min_msix_vec_cnt %d", bp->min_msix_vec_cnt); + return rc; } @@ -11165,11 +11329,9 @@ void bnx2x_set_rx_mode(struct net_device *dev) } bp->rx_mode = rx_mode; -#ifdef BCM_CNIC /* handle ISCSI SD mode */ if (IS_MF_ISCSI_SD(bp)) bp->rx_mode = BNX2X_RX_MODE_NONE; -#endif /* Schedule the rx_mode command */ if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state)) { @@ -11281,7 +11443,7 @@ static const struct net_device_ops bnx2x_netdev_ops = { #endif .ndo_setup_tc = bnx2x_setup_tc, -#if defined(NETDEV_FCOE_WWNN) && defined(BCM_CNIC) +#ifdef NETDEV_FCOE_WWNN .ndo_fcoe_get_wwn = bnx2x_fcoe_get_wwn, #endif }; @@ -11747,9 +11909,8 @@ static int bnx2x_set_qm_cid_count(struct bnx2x *bp) { int cid_count = BNX2X_L2_MAX_CID(bp); -#ifdef BCM_CNIC - cid_count += CNIC_CID_MAX; -#endif + if (CNIC_SUPPORT(bp)) + cid_count += CNIC_CID_MAX; return roundup(cid_count, QM_CID_ROUND); } @@ -11759,7 +11920,8 @@ static int bnx2x_set_qm_cid_count(struct bnx2x *bp) * @dev: pci device * */ -static int bnx2x_get_num_non_def_sbs(struct pci_dev *pdev) +static int bnx2x_get_num_non_def_sbs(struct pci_dev *pdev, + int cnic_cnt) { int pos; u16 control; @@ -11771,7 +11933,7 @@ static int bnx2x_get_num_non_def_sbs(struct pci_dev *pdev) * one fast path queue: one FP queue + SB for CNIC */ if (!pos) - return 1 + CNIC_PRESENT; + return 1 + cnic_cnt; /* * The value in the PCI configuration space is the index of the last @@ -11791,6 +11953,7 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, int pcie_width, pcie_speed; int rc, max_non_def_sbs; int rx_count, tx_count, rss_count, doorbell_size; + int cnic_cnt; /* * An estimated maximum supported CoS number according to the chip * version. @@ -11834,21 +11997,22 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, return -ENODEV; } - max_non_def_sbs = bnx2x_get_num_non_def_sbs(pdev); + cnic_cnt = 1; + max_non_def_sbs = bnx2x_get_num_non_def_sbs(pdev, cnic_cnt); WARN_ON(!max_non_def_sbs); /* Maximum number of RSS queues: one IGU SB goes to CNIC */ - rss_count = max_non_def_sbs - CNIC_PRESENT; + rss_count = max_non_def_sbs - cnic_cnt; /* Maximum number of netdev Rx queues: RSS + FCoE L2 */ - rx_count = rss_count + FCOE_PRESENT; + rx_count = rss_count + cnic_cnt; /* * Maximum number of netdev Tx queues: * Maximum TSS queues * Maximum supported number of CoS + FCoE L2 */ - tx_count = rss_count * max_cos_est + FCOE_PRESENT; + tx_count = rss_count * max_cos_est + cnic_cnt; /* dev zeroed in init_etherdev */ dev = alloc_etherdev_mqs(sizeof(*bp), tx_count, rx_count); @@ -11859,6 +12023,8 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, bp->igu_sb_cnt = max_non_def_sbs; bp->msg_enable = debug; + bp->cnic_support = cnic_cnt; + pci_set_drvdata(pdev, dev); rc = bnx2x_init_dev(pdev, dev, ent->driver_data); @@ -11867,6 +12033,7 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, return rc; } + BNX2X_DEV_INFO("Cnic support is %s\n", CNIC_SUPPORT(bp) ? "on" : "off"); BNX2X_DEV_INFO("max_non_def_sbs %d\n", max_non_def_sbs); BNX2X_DEV_INFO("Allocated netdev with %d tx and %d rx queues\n", @@ -11899,10 +12066,10 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, /* calc qm_cid_count */ bp->qm_cid_count = bnx2x_set_qm_cid_count(bp); -#ifdef BCM_CNIC - /* disable FCOE L2 queue for E1x */ + /* disable FCOE L2 queue for E1x*/ if (CHIP_IS_E1x(bp)) bp->flags |= NO_FCOE_FLAG; + /* disable FCOE for 57840 device, until FW supports it */ switch (ent->driver_data) { case BCM57840_O: @@ -11912,8 +12079,6 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, case BCM57840_MF: bp->flags |= NO_FCOE_FLAG; } -#endif - /* Set bp->num_queues for MSI-X mode*/ bnx2x_set_num_queues(bp); @@ -11929,14 +12094,13 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, goto init_one_exit; } -#ifdef BCM_CNIC + if (!NO_FCOE(bp)) { /* Add storage MAC address */ rtnl_lock(); dev_addr_add(bp->dev, bp->fip_mac, NETDEV_HW_ADDR_T_SAN); rtnl_unlock(); } -#endif bnx2x_get_pcie_width_speed(bp, &pcie_width, &pcie_speed); @@ -11981,14 +12145,12 @@ static void __devexit bnx2x_remove_one(struct pci_dev *pdev) } bp = netdev_priv(dev); -#ifdef BCM_CNIC /* Delete storage MAC address */ if (!NO_FCOE(bp)) { rtnl_lock(); dev_addr_del(bp->dev, bp->fip_mac, NETDEV_HW_ADDR_T_SAN); rtnl_unlock(); } -#endif #ifdef BCM_DCBNL /* Delete app tlvs from dcbnl */ @@ -12036,15 +12198,17 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp) bp->rx_mode = BNX2X_RX_MODE_NONE; -#ifdef BCM_CNIC - bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD); -#endif + if (CNIC_LOADED(bp)) + bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD); + /* Stop Tx */ bnx2x_tx_disable(bp); bnx2x_netif_stop(bp, 0); /* Delete all NAPI objects */ bnx2x_del_all_napi(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); del_timer_sync(&bp->timer); @@ -12235,7 +12399,6 @@ void bnx2x_notify_link_changed(struct bnx2x *bp) module_init(bnx2x_init); module_exit(bnx2x_cleanup); -#ifdef BCM_CNIC /** * bnx2x_set_iscsi_eth_mac_addr - set iSCSI MAC(s). * @@ -12688,12 +12851,31 @@ static int bnx2x_register_cnic(struct net_device *dev, struct cnic_ops *ops, { struct bnx2x *bp = netdev_priv(dev); struct cnic_eth_dev *cp = &bp->cnic_eth_dev; + int rc; + + DP(NETIF_MSG_IFUP, "Register_cnic called\n"); if (ops == NULL) { BNX2X_ERR("NULL ops received\n"); return -EINVAL; } + if (!CNIC_SUPPORT(bp)) { + BNX2X_ERR("Can't register CNIC when not supported\n"); + return -EOPNOTSUPP; + } + + if (!CNIC_LOADED(bp)) { + rc = bnx2x_load_cnic(bp); + if (rc) { + BNX2X_ERR("CNIC-related load failed\n"); + return rc; + } + + } + + bp->cnic_enabled = true; + bp->cnic_kwq = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!bp->cnic_kwq) return -ENOMEM; @@ -12785,5 +12967,4 @@ struct cnic_eth_dev *bnx2x_cnic_probe(struct net_device *dev) } EXPORT_SYMBOL(bnx2x_cnic_probe); -#endif /* BCM_CNIC */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h index 1b1999d34c7..7d93adb57f3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h @@ -2107,6 +2107,7 @@ #define NIG_REG_LLH1_ERROR_MASK 0x10090 /* [RW 8] event id for llh1 */ #define NIG_REG_LLH1_EVENT_ID 0x10088 +#define NIG_REG_LLH1_FUNC_EN 0x16104 #define NIG_REG_LLH1_FUNC_MEM 0x161c0 #define NIG_REG_LLH1_FUNC_MEM_ENABLE 0x16160 #define NIG_REG_LLH1_FUNC_MEM_SIZE 16 @@ -2302,6 +2303,15 @@ * set to 0x345678021. This is a new register (with 2_) added in E3 B0 to * accommodate the 9 input clients to ETS arbiter. */ #define NIG_REG_P0_TX_ARB_PRIORITY_CLIENT2_MSB 0x18684 +/* [RW 1] MCP-to-host path enable. Set this bit to enable the routing of MCP + * packets to BRB LB interface to forward the packet to the host. All + * packets from MCP are forwarded to the network when this bit is cleared - + * regardless of the configured destination in tx_mng_destination register. + * When MCP-to-host paths for both ports 0 and 1 are disabled - the arbiter + * for BRB LB interface is bypassed and PBF LB traffic is always selected to + * send to BRB LB. + */ +#define NIG_REG_P0_TX_MNG_HOST_ENABLE 0x182f4 #define NIG_REG_P1_HWPFC_ENABLE 0x181d0 #define NIG_REG_P1_MAC_IN_EN 0x185c0 /* [RW 1] Output enable for TX MAC interface */ @@ -2418,6 +2428,12 @@ #define NIG_REG_P1_TX_ARB_PRIORITY_CLIENT2_MSB 0x186e4 /* [R 1] TX FIFO for transmitting data to MAC is empty. */ #define NIG_REG_P1_TX_MACFIFO_EMPTY 0x18594 +/* [RW 1] MCP-to-host path enable. Set this bit to enable the routing of MCP + * packets to BRB LB interface to forward the packet to the host. All + * packets from MCP are forwarded to the network when this bit is cleared - + * regardless of the configured destination in tx_mng_destination register. + */ +#define NIG_REG_P1_TX_MNG_HOST_ENABLE 0x182f8 /* [R 1] FIFO empty status of the MCP TX FIFO used for storing MCP packets forwarded to the host. */ #define NIG_REG_P1_TX_MNG_HOST_FIFO_EMPTY 0x182b8 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 614981c0226..b8b4b749daa 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -5350,12 +5350,24 @@ static int bnx2x_func_chk_transition(struct bnx2x *bp, else if ((cmd == BNX2X_F_CMD_AFEX_VIFLISTS) && (!test_bit(BNX2X_F_CMD_STOP, &o->pending))) next_state = BNX2X_F_STATE_STARTED; + + /* Switch_update ramrod can be sent in either started or + * tx_stopped state, and it doesn't change the state. + */ + else if ((cmd == BNX2X_F_CMD_SWITCH_UPDATE) && + (!test_bit(BNX2X_F_CMD_STOP, &o->pending))) + next_state = BNX2X_F_STATE_STARTED; + else if (cmd == BNX2X_F_CMD_TX_STOP) next_state = BNX2X_F_STATE_TX_STOPPED; break; case BNX2X_F_STATE_TX_STOPPED: - if (cmd == BNX2X_F_CMD_TX_START) + if ((cmd == BNX2X_F_CMD_SWITCH_UPDATE) && + (!test_bit(BNX2X_F_CMD_STOP, &o->pending))) + next_state = BNX2X_F_STATE_TX_STOPPED; + + else if (cmd == BNX2X_F_CMD_TX_START) next_state = BNX2X_F_STATE_STARTED; break; @@ -5637,6 +5649,28 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp, U64_LO(data_mapping), NONE_CONNECTION_TYPE); } +static inline int bnx2x_func_send_switch_update(struct bnx2x *bp, + struct bnx2x_func_state_params *params) +{ + struct bnx2x_func_sp_obj *o = params->f_obj; + struct function_update_data *rdata = + (struct function_update_data *)o->rdata; + dma_addr_t data_mapping = o->rdata_mapping; + struct bnx2x_func_switch_update_params *switch_update_params = + ¶ms->params.switch_update; + + memset(rdata, 0, sizeof(*rdata)); + + /* Fill the ramrod data with provided parameters */ + rdata->tx_switch_suspend_change_flg = 1; + rdata->tx_switch_suspend = switch_update_params->suspend; + rdata->echo = SWITCH_UPDATE; + + return bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_FUNCTION_UPDATE, 0, + U64_HI(data_mapping), + U64_LO(data_mapping), NONE_CONNECTION_TYPE); +} + static inline int bnx2x_func_send_afex_update(struct bnx2x *bp, struct bnx2x_func_state_params *params) { @@ -5657,6 +5691,7 @@ static inline int bnx2x_func_send_afex_update(struct bnx2x *bp, cpu_to_le16(afex_update_params->afex_default_vlan); rdata->allowed_priorities_change_flg = 1; rdata->allowed_priorities = afex_update_params->allowed_priorities; + rdata->echo = AFEX_UPDATE; /* No need for an explicit memory barrier here as long we would * need to ensure the ordering of writing to the SPQ element @@ -5773,6 +5808,8 @@ static int bnx2x_func_send_cmd(struct bnx2x *bp, return bnx2x_func_send_tx_stop(bp, params); case BNX2X_F_CMD_TX_START: return bnx2x_func_send_tx_start(bp, params); + case BNX2X_F_CMD_SWITCH_UPDATE: + return bnx2x_func_send_switch_update(bp, params); default: BNX2X_ERR("Unknown command: %d\n", params->cmd); return -EINVAL; @@ -5818,16 +5855,30 @@ int bnx2x_func_state_change(struct bnx2x *bp, struct bnx2x_func_state_params *params) { struct bnx2x_func_sp_obj *o = params->f_obj; - int rc; + int rc, cnt = 300; enum bnx2x_func_cmd cmd = params->cmd; unsigned long *pending = &o->pending; mutex_lock(&o->one_pending_mutex); /* Check that the requested transition is legal */ - if (o->check_transition(bp, o, params)) { + rc = o->check_transition(bp, o, params); + if ((rc == -EBUSY) && + (test_bit(RAMROD_RETRY, ¶ms->ramrod_flags))) { + while ((rc == -EBUSY) && (--cnt > 0)) { + mutex_unlock(&o->one_pending_mutex); + msleep(10); + mutex_lock(&o->one_pending_mutex); + rc = o->check_transition(bp, o, params); + } + if (rc == -EBUSY) { + mutex_unlock(&o->one_pending_mutex); + BNX2X_ERR("timeout waiting for previous ramrod completion\n"); + return rc; + } + } else if (rc) { mutex_unlock(&o->one_pending_mutex); - return -EINVAL; + return rc; } /* Set "pending" bit */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index acf2fe4ca60..adbd91b1bdf 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -40,6 +40,12 @@ enum { * pending commands list. */ RAMROD_CONT, + /* If there is another pending ramrod, wait until it finishes and + * re-try to submit this one. This flag can be set only in sleepable + * context, and should not be set from the context that completes the + * ramrods as deadlock will occur. + */ + RAMROD_RETRY, }; typedef enum { @@ -1061,6 +1067,7 @@ enum bnx2x_func_cmd { BNX2X_F_CMD_AFEX_VIFLISTS, BNX2X_F_CMD_TX_STOP, BNX2X_F_CMD_TX_START, + BNX2X_F_CMD_SWITCH_UPDATE, BNX2X_F_CMD_MAX, }; @@ -1103,6 +1110,10 @@ struct bnx2x_func_start_params { u8 network_cos_mode; }; +struct bnx2x_func_switch_update_params { + u8 suspend; +}; + struct bnx2x_func_afex_update_params { u16 vif_id; u16 afex_default_vlan; @@ -1136,6 +1147,7 @@ struct bnx2x_func_state_params { struct bnx2x_func_hw_init_params hw_init; struct bnx2x_func_hw_reset_params hw_reset; struct bnx2x_func_start_params start; + struct bnx2x_func_switch_update_params switch_update; struct bnx2x_func_afex_update_params afex_update; struct bnx2x_func_afex_viflists_params afex_viflists; struct bnx2x_func_tx_start_params tx_start; diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a8800ac10df..038ce0215e3 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -90,10 +90,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) #define DRV_MODULE_NAME "tg3" #define TG3_MAJ_NUM 3 -#define TG3_MIN_NUM 125 +#define TG3_MIN_NUM 126 #define DRV_MODULE_VERSION \ __stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM) -#define DRV_MODULE_RELDATE "September 26, 2012" +#define DRV_MODULE_RELDATE "November 05, 2012" #define RESET_KIND_SHUTDOWN 0 #define RESET_KIND_INIT 1 @@ -291,6 +291,7 @@ static DEFINE_PCI_DEVICE_TABLE(tg3_pci_tbl) = { {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57790)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57788)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5717)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5717_C)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5718)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57781)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57785)}, @@ -10429,10 +10430,8 @@ static void tg3_stop(struct tg3 *tp) { int i; - tg3_napi_disable(tp); tg3_reset_task_cancel(tp); - - netif_tx_disable(tp->dev); + tg3_netif_stop(tp); tg3_timer_stop(tp); @@ -14026,7 +14025,8 @@ out_not_found: out_no_vpd: if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717) { - if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_5717) + if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_5717 || + tp->pdev->device == TG3PCI_DEVICE_TIGON3_5717_C) strcpy(tp->board_part_number, "BCM5717"); else if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_5718) strcpy(tp->board_part_number, "BCM5718"); @@ -14397,6 +14397,7 @@ static void __devinit tg3_detect_asic_rev(struct tg3 *tp, u32 misc_ctrl_reg) tg3_flag_set(tp, CPMU_PRESENT); if (tp->pdev->device == TG3PCI_DEVICE_TIGON3_5717 || + tp->pdev->device == TG3PCI_DEVICE_TIGON3_5717_C || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5718 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5719 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5720) @@ -14424,6 +14425,9 @@ static void __devinit tg3_detect_asic_rev(struct tg3 *tp, u32 misc_ctrl_reg) if (tp->pci_chip_rev_id == CHIPREV_ID_5752_A0_HW) tp->pci_chip_rev_id = CHIPREV_ID_5752_A0; + if (tp->pci_chip_rev_id == CHIPREV_ID_5717_C0) + tp->pci_chip_rev_id = CHIPREV_ID_5720_A0; + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5720) @@ -16013,6 +16017,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, tp->pdev->device == TG3PCI_DEVICE_TIGON3_5761S || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5761SE || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5717 || + tp->pdev->device == TG3PCI_DEVICE_TIGON3_5717_C || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5718 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5719 || tp->pdev->device == TG3PCI_DEVICE_TIGON3_5720) { diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index d9308c32102..b3c2bf2c082 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -50,6 +50,7 @@ #define TG3PCI_DEVICE_TIGON3_5785_G 0x1699 /* GPHY */ #define TG3PCI_DEVICE_TIGON3_5785_F 0x16a0 /* 10/100 only */ #define TG3PCI_DEVICE_TIGON3_5717 0x1655 +#define TG3PCI_DEVICE_TIGON3_5717_C 0x1665 #define TG3PCI_DEVICE_TIGON3_5718 0x1656 #define TG3PCI_DEVICE_TIGON3_57781 0x16b1 #define TG3PCI_DEVICE_TIGON3_57785 0x16b5 @@ -149,6 +150,7 @@ #define CHIPREV_ID_57780_A0 0x57780000 #define CHIPREV_ID_57780_A1 0x57780001 #define CHIPREV_ID_5717_A0 0x05717000 +#define CHIPREV_ID_5717_C0 0x05717200 #define CHIPREV_ID_57765_A0 0x57785000 #define CHIPREV_ID_5719_A0 0x05719000 #define CHIPREV_ID_5720_A0 0x05720000 diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index db931916da0..ceb0de0cf62 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -2,13 +2,10 @@ # Atmel device configuration # -config HAVE_NET_MACB - bool - config NET_CADENCE bool "Cadence devices" + depends on HAS_IOMEM default y - depends on HAVE_NET_MACB || (ARM && ARCH_AT91RM9200) ---help--- If you have a network (Ethernet) card belonging to this class, say Y. Make sure you know the name of your card. Read the Ethernet-HOWTO, @@ -25,16 +22,14 @@ if NET_CADENCE config ARM_AT91_ETHER tristate "AT91RM9200 Ethernet support" - depends on ARM && ARCH_AT91RM9200 select NET_CORE - select MII + select MACB ---help--- If you wish to compile a kernel for the AT91RM9200 and enable ethernet support, then you should always answer Y to this. config MACB tristate "Cadence MACB/GEM support" - depends on HAVE_NET_MACB select PHYLIB ---help--- The Cadence MACB ethernet interface is found on many Atmel AT32 and diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c index 4e980a7886f..e7a476cff6c 100644 --- a/drivers/net/ethernet/cadence/at91_ether.c +++ b/drivers/net/ethernet/cadence/at91_ether.c @@ -6,11 +6,6 @@ * Based on an earlier Atmel EMAC macrocell driver by Atmel and Lineo Inc. * Initial version by Rick Bronson 01/11/2003 * - * Intel LXT971A PHY support by Christopher Bahns & David Knickerbocker - * (Polaroid Corporation) - * - * Realtek RTL8201(B)L PHY support by Roman Avramenko <roman@imsystems.ru> - * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -20,7 +15,6 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/interrupt.h> -#include <linux/mii.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> @@ -31,956 +25,251 @@ #include <linux/clk.h> #include <linux/gfp.h> #include <linux/phy.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_net.h> +#include <linux/pinctrl/consumer.h> -#include <asm/io.h> -#include <asm/uaccess.h> -#include <asm/mach-types.h> - -#include <mach/at91rm9200_emac.h> -#include <asm/gpio.h> -#include <mach/board.h> - -#include "at91_ether.h" - -#define DRV_NAME "at91_ether" -#define DRV_VERSION "1.0" - -#define LINK_POLL_INTERVAL (HZ) - -/* ..................................................................... */ - -/* - * Read from a EMAC register. - */ -static inline unsigned long at91_emac_read(struct at91_private *lp, unsigned int reg) -{ - return __raw_readl(lp->emac_base + reg); -} - -/* - * Write to a EMAC register. - */ -static inline void at91_emac_write(struct at91_private *lp, unsigned int reg, unsigned long value) -{ - __raw_writel(value, lp->emac_base + reg); -} - -/* ........................... PHY INTERFACE ........................... */ - -/* - * Enable the MDIO bit in MAC control register - * When not called from an interrupt-handler, access to the PHY must be - * protected by a spinlock. - */ -static void enable_mdi(struct at91_private *lp) -{ - unsigned long ctl; - - ctl = at91_emac_read(lp, AT91_EMAC_CTL); - at91_emac_write(lp, AT91_EMAC_CTL, ctl | AT91_EMAC_MPE); /* enable management port */ -} - -/* - * Disable the MDIO bit in the MAC control register - */ -static void disable_mdi(struct at91_private *lp) -{ - unsigned long ctl; - - ctl = at91_emac_read(lp, AT91_EMAC_CTL); - at91_emac_write(lp, AT91_EMAC_CTL, ctl & ~AT91_EMAC_MPE); /* disable management port */ -} - -/* - * Wait until the PHY operation is complete. - */ -static inline void at91_phy_wait(struct at91_private *lp) -{ - unsigned long timeout = jiffies + 2; - - while (!(at91_emac_read(lp, AT91_EMAC_SR) & AT91_EMAC_SR_IDLE)) { - if (time_after(jiffies, timeout)) { - printk("at91_ether: MIO timeout\n"); - break; - } - cpu_relax(); - } -} - -/* - * Write value to the a PHY register - * Note: MDI interface is assumed to already have been enabled. - */ -static void write_phy(struct at91_private *lp, unsigned char phy_addr, unsigned char address, unsigned int value) -{ - at91_emac_write(lp, AT91_EMAC_MAN, AT91_EMAC_MAN_802_3 | AT91_EMAC_RW_W - | ((phy_addr & 0x1f) << 23) | (address << 18) | (value & AT91_EMAC_DATA)); - - /* Wait until IDLE bit in Network Status register is cleared */ - at91_phy_wait(lp); -} - -/* - * Read value stored in a PHY register. - * Note: MDI interface is assumed to already have been enabled. - */ -static void read_phy(struct at91_private *lp, unsigned char phy_addr, unsigned char address, unsigned int *value) -{ - at91_emac_write(lp, AT91_EMAC_MAN, AT91_EMAC_MAN_802_3 | AT91_EMAC_RW_R - | ((phy_addr & 0x1f) << 23) | (address << 18)); - - /* Wait until IDLE bit in Network Status register is cleared */ - at91_phy_wait(lp); - - *value = at91_emac_read(lp, AT91_EMAC_MAN) & AT91_EMAC_DATA; -} - -/* ........................... PHY MANAGEMENT .......................... */ - -/* - * Access the PHY to determine the current link speed and mode, and update the - * MAC accordingly. - * If no link or auto-negotiation is busy, then no changes are made. - */ -static void update_linkspeed(struct net_device *dev, int silent) -{ - struct at91_private *lp = netdev_priv(dev); - unsigned int bmsr, bmcr, lpa, mac_cfg; - unsigned int speed, duplex; - - if (!mii_link_ok(&lp->mii)) { /* no link */ - netif_carrier_off(dev); - if (!silent) - printk(KERN_INFO "%s: Link down.\n", dev->name); - return; - } - - /* Link up, or auto-negotiation still in progress */ - read_phy(lp, lp->phy_address, MII_BMSR, &bmsr); - read_phy(lp, lp->phy_address, MII_BMCR, &bmcr); - if (bmcr & BMCR_ANENABLE) { /* AutoNegotiation is enabled */ - if (!(bmsr & BMSR_ANEGCOMPLETE)) - return; /* Do nothing - another interrupt generated when negotiation complete */ - - read_phy(lp, lp->phy_address, MII_LPA, &lpa); - if ((lpa & LPA_100FULL) || (lpa & LPA_100HALF)) speed = SPEED_100; - else speed = SPEED_10; - if ((lpa & LPA_100FULL) || (lpa & LPA_10FULL)) duplex = DUPLEX_FULL; - else duplex = DUPLEX_HALF; - } else { - speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10; - duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; - } - - /* Update the MAC */ - mac_cfg = at91_emac_read(lp, AT91_EMAC_CFG) & ~(AT91_EMAC_SPD | AT91_EMAC_FD); - if (speed == SPEED_100) { - if (duplex == DUPLEX_FULL) /* 100 Full Duplex */ - mac_cfg |= AT91_EMAC_SPD | AT91_EMAC_FD; - else /* 100 Half Duplex */ - mac_cfg |= AT91_EMAC_SPD; - } else { - if (duplex == DUPLEX_FULL) /* 10 Full Duplex */ - mac_cfg |= AT91_EMAC_FD; - else {} /* 10 Half Duplex */ - } - at91_emac_write(lp, AT91_EMAC_CFG, mac_cfg); - - if (!silent) - printk(KERN_INFO "%s: Link now %i-%s\n", dev->name, speed, (duplex == DUPLEX_FULL) ? "FullDuplex" : "HalfDuplex"); - netif_carrier_on(dev); -} - -/* - * Handle interrupts from the PHY - */ -static irqreturn_t at91ether_phy_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = (struct net_device *) dev_id; - struct at91_private *lp = netdev_priv(dev); - unsigned int phy; - - /* - * This hander is triggered on both edges, but the PHY chips expect - * level-triggering. We therefore have to check if the PHY actually has - * an IRQ pending. - */ - enable_mdi(lp); - if ((lp->phy_type == MII_DM9161_ID) || (lp->phy_type == MII_DM9161A_ID)) { - read_phy(lp, lp->phy_address, MII_DSINTR_REG, &phy); /* ack interrupt in Davicom PHY */ - if (!(phy & (1 << 0))) - goto done; - } - else if (lp->phy_type == MII_LXT971A_ID) { - read_phy(lp, lp->phy_address, MII_ISINTS_REG, &phy); /* ack interrupt in Intel PHY */ - if (!(phy & (1 << 2))) - goto done; - } - else if (lp->phy_type == MII_BCM5221_ID) { - read_phy(lp, lp->phy_address, MII_BCMINTR_REG, &phy); /* ack interrupt in Broadcom PHY */ - if (!(phy & (1 << 0))) - goto done; - } - else if (lp->phy_type == MII_KS8721_ID) { - read_phy(lp, lp->phy_address, MII_TPISTATUS, &phy); /* ack interrupt in Micrel PHY */ - if (!(phy & ((1 << 2) | 1))) - goto done; - } - else if (lp->phy_type == MII_T78Q21x3_ID) { /* ack interrupt in Teridian PHY */ - read_phy(lp, lp->phy_address, MII_T78Q21INT_REG, &phy); - if (!(phy & ((1 << 2) | 1))) - goto done; - } - else if (lp->phy_type == MII_DP83848_ID) { - read_phy(lp, lp->phy_address, MII_DPPHYSTS_REG, &phy); /* ack interrupt in DP83848 PHY */ - if (!(phy & (1 << 7))) - goto done; - } - - update_linkspeed(dev, 0); - -done: - disable_mdi(lp); - - return IRQ_HANDLED; -} - -/* - * Initialize and enable the PHY interrupt for link-state changes - */ -static void enable_phyirq(struct net_device *dev) -{ - struct at91_private *lp = netdev_priv(dev); - unsigned int dsintr, irq_number; - int status; - - if (!gpio_is_valid(lp->board_data.phy_irq_pin)) { - /* - * PHY doesn't have an IRQ pin (RTL8201, DP83847, AC101L), - * or board does not have it connected. - */ - mod_timer(&lp->check_timer, jiffies + LINK_POLL_INTERVAL); - return; - } - - irq_number = gpio_to_irq(lp->board_data.phy_irq_pin); - status = request_irq(irq_number, at91ether_phy_interrupt, 0, dev->name, dev); - if (status) { - printk(KERN_ERR "at91_ether: PHY IRQ %d request failed - status %d!\n", irq_number, status); - return; - } - - spin_lock_irq(&lp->lock); - enable_mdi(lp); - - if ((lp->phy_type == MII_DM9161_ID) || (lp->phy_type == MII_DM9161A_ID)) { /* for Davicom PHY */ - read_phy(lp, lp->phy_address, MII_DSINTR_REG, &dsintr); - dsintr = dsintr & ~0xf00; /* clear bits 8..11 */ - write_phy(lp, lp->phy_address, MII_DSINTR_REG, dsintr); - } - else if (lp->phy_type == MII_LXT971A_ID) { /* for Intel PHY */ - read_phy(lp, lp->phy_address, MII_ISINTE_REG, &dsintr); - dsintr = dsintr | 0xf2; /* set bits 1, 4..7 */ - write_phy(lp, lp->phy_address, MII_ISINTE_REG, dsintr); - } - else if (lp->phy_type == MII_BCM5221_ID) { /* for Broadcom PHY */ - dsintr = (1 << 15) | ( 1 << 14); - write_phy(lp, lp->phy_address, MII_BCMINTR_REG, dsintr); - } - else if (lp->phy_type == MII_KS8721_ID) { /* for Micrel PHY */ - dsintr = (1 << 10) | ( 1 << 8); - write_phy(lp, lp->phy_address, MII_TPISTATUS, dsintr); - } - else if (lp->phy_type == MII_T78Q21x3_ID) { /* for Teridian PHY */ - read_phy(lp, lp->phy_address, MII_T78Q21INT_REG, &dsintr); - dsintr = dsintr | 0x500; /* set bits 8, 10 */ - write_phy(lp, lp->phy_address, MII_T78Q21INT_REG, dsintr); - } - else if (lp->phy_type == MII_DP83848_ID) { /* National Semiconductor DP83848 PHY */ - read_phy(lp, lp->phy_address, MII_DPMISR_REG, &dsintr); - dsintr = dsintr | 0x3c; /* set bits 2..5 */ - write_phy(lp, lp->phy_address, MII_DPMISR_REG, dsintr); - read_phy(lp, lp->phy_address, MII_DPMICR_REG, &dsintr); - dsintr = dsintr | 0x3; /* set bits 0,1 */ - write_phy(lp, lp->phy_address, MII_DPMICR_REG, dsintr); - } - - disable_mdi(lp); - spin_unlock_irq(&lp->lock); -} - -/* - * Disable the PHY interrupt - */ -static void disable_phyirq(struct net_device *dev) -{ - struct at91_private *lp = netdev_priv(dev); - unsigned int dsintr; - unsigned int irq_number; - - if (!gpio_is_valid(lp->board_data.phy_irq_pin)) { - del_timer_sync(&lp->check_timer); - return; - } - - spin_lock_irq(&lp->lock); - enable_mdi(lp); - - if ((lp->phy_type == MII_DM9161_ID) || (lp->phy_type == MII_DM9161A_ID)) { /* for Davicom PHY */ - read_phy(lp, lp->phy_address, MII_DSINTR_REG, &dsintr); - dsintr = dsintr | 0xf00; /* set bits 8..11 */ - write_phy(lp, lp->phy_address, MII_DSINTR_REG, dsintr); - } - else if (lp->phy_type == MII_LXT971A_ID) { /* for Intel PHY */ - read_phy(lp, lp->phy_address, MII_ISINTE_REG, &dsintr); - dsintr = dsintr & ~0xf2; /* clear bits 1, 4..7 */ - write_phy(lp, lp->phy_address, MII_ISINTE_REG, dsintr); - } - else if (lp->phy_type == MII_BCM5221_ID) { /* for Broadcom PHY */ - read_phy(lp, lp->phy_address, MII_BCMINTR_REG, &dsintr); - dsintr = ~(1 << 14); - write_phy(lp, lp->phy_address, MII_BCMINTR_REG, dsintr); - } - else if (lp->phy_type == MII_KS8721_ID) { /* for Micrel PHY */ - read_phy(lp, lp->phy_address, MII_TPISTATUS, &dsintr); - dsintr = ~((1 << 10) | (1 << 8)); - write_phy(lp, lp->phy_address, MII_TPISTATUS, dsintr); - } - else if (lp->phy_type == MII_T78Q21x3_ID) { /* for Teridian PHY */ - read_phy(lp, lp->phy_address, MII_T78Q21INT_REG, &dsintr); - dsintr = dsintr & ~0x500; /* clear bits 8, 10 */ - write_phy(lp, lp->phy_address, MII_T78Q21INT_REG, dsintr); - } - else if (lp->phy_type == MII_DP83848_ID) { /* National Semiconductor DP83848 PHY */ - read_phy(lp, lp->phy_address, MII_DPMICR_REG, &dsintr); - dsintr = dsintr & ~0x3; /* clear bits 0, 1 */ - write_phy(lp, lp->phy_address, MII_DPMICR_REG, dsintr); - read_phy(lp, lp->phy_address, MII_DPMISR_REG, &dsintr); - dsintr = dsintr & ~0x3c; /* clear bits 2..5 */ - write_phy(lp, lp->phy_address, MII_DPMISR_REG, dsintr); - } - - disable_mdi(lp); - spin_unlock_irq(&lp->lock); - - irq_number = gpio_to_irq(lp->board_data.phy_irq_pin); - free_irq(irq_number, dev); /* Free interrupt handler */ -} - -/* - * Perform a software reset of the PHY. - */ -#if 0 -static void reset_phy(struct net_device *dev) -{ - struct at91_private *lp = netdev_priv(dev); - unsigned int bmcr; - - spin_lock_irq(&lp->lock); - enable_mdi(lp); - - /* Perform PHY reset */ - write_phy(lp, lp->phy_address, MII_BMCR, BMCR_RESET); - - /* Wait until PHY reset is complete */ - do { - read_phy(lp, lp->phy_address, MII_BMCR, &bmcr); - } while (!(bmcr & BMCR_RESET)); - - disable_mdi(lp); - spin_unlock_irq(&lp->lock); -} -#endif - -static void at91ether_check_link(unsigned long dev_id) -{ - struct net_device *dev = (struct net_device *) dev_id; - struct at91_private *lp = netdev_priv(dev); - - enable_mdi(lp); - update_linkspeed(dev, 1); - disable_mdi(lp); - - mod_timer(&lp->check_timer, jiffies + LINK_POLL_INTERVAL); -} - -/* - * Perform any PHY-specific initialization. - */ -static void __init initialize_phy(struct at91_private *lp) -{ - unsigned int val; - - spin_lock_irq(&lp->lock); - enable_mdi(lp); - - if ((lp->phy_type == MII_DM9161_ID) || (lp->phy_type == MII_DM9161A_ID)) { - read_phy(lp, lp->phy_address, MII_DSCR_REG, &val); - if ((val & (1 << 10)) == 0) /* DSCR bit 10 is 0 -- fiber mode */ - lp->phy_media = PORT_FIBRE; - } else if (machine_is_csb337()) { - /* mix link activity status into LED2 link state */ - write_phy(lp, lp->phy_address, MII_LEDCTRL_REG, 0x0d22); - } else if (machine_is_ecbat91()) - write_phy(lp, lp->phy_address, MII_LEDCTRL_REG, 0x156A); - - disable_mdi(lp); - spin_unlock_irq(&lp->lock); -} - -/* ......................... ADDRESS MANAGEMENT ........................ */ - -/* - * NOTE: Your bootloader must always set the MAC address correctly before - * booting into Linux. - * - * - It must always set the MAC address after reset, even if it doesn't - * happen to access the Ethernet while it's booting. Some versions of - * U-Boot on the AT91RM9200-DK do not do this. - * - * - Likewise it must store the addresses in the correct byte order. - * MicroMonitor (uMon) on the CSB337 does this incorrectly (and - * continues to do so, for bug-compatibility). - */ - -static short __init unpack_mac_address(struct net_device *dev, unsigned int hi, unsigned int lo) -{ - char addr[6]; - - if (machine_is_csb337()) { - addr[5] = (lo & 0xff); /* The CSB337 bootloader stores the MAC the wrong-way around */ - addr[4] = (lo & 0xff00) >> 8; - addr[3] = (lo & 0xff0000) >> 16; - addr[2] = (lo & 0xff000000) >> 24; - addr[1] = (hi & 0xff); - addr[0] = (hi & 0xff00) >> 8; - } - else { - addr[0] = (lo & 0xff); - addr[1] = (lo & 0xff00) >> 8; - addr[2] = (lo & 0xff0000) >> 16; - addr[3] = (lo & 0xff000000) >> 24; - addr[4] = (hi & 0xff); - addr[5] = (hi & 0xff00) >> 8; - } - - if (is_valid_ether_addr(addr)) { - memcpy(dev->dev_addr, &addr, 6); - return 1; - } - return 0; -} - -/* - * Set the ethernet MAC address in dev->dev_addr - */ -static void __init get_mac_address(struct net_device *dev) -{ - struct at91_private *lp = netdev_priv(dev); - - /* Check Specific-Address 1 */ - if (unpack_mac_address(dev, at91_emac_read(lp, AT91_EMAC_SA1H), at91_emac_read(lp, AT91_EMAC_SA1L))) - return; - /* Check Specific-Address 2 */ - if (unpack_mac_address(dev, at91_emac_read(lp, AT91_EMAC_SA2H), at91_emac_read(lp, AT91_EMAC_SA2L))) - return; - /* Check Specific-Address 3 */ - if (unpack_mac_address(dev, at91_emac_read(lp, AT91_EMAC_SA3H), at91_emac_read(lp, AT91_EMAC_SA3L))) - return; - /* Check Specific-Address 4 */ - if (unpack_mac_address(dev, at91_emac_read(lp, AT91_EMAC_SA4H), at91_emac_read(lp, AT91_EMAC_SA4L))) - return; - - printk(KERN_ERR "at91_ether: Your bootloader did not configure a MAC address.\n"); -} - -/* - * Program the hardware MAC address from dev->dev_addr. - */ -static void update_mac_address(struct net_device *dev) -{ - struct at91_private *lp = netdev_priv(dev); - - at91_emac_write(lp, AT91_EMAC_SA1L, (dev->dev_addr[3] << 24) | (dev->dev_addr[2] << 16) | (dev->dev_addr[1] << 8) | (dev->dev_addr[0])); - at91_emac_write(lp, AT91_EMAC_SA1H, (dev->dev_addr[5] << 8) | (dev->dev_addr[4])); - - at91_emac_write(lp, AT91_EMAC_SA2L, 0); - at91_emac_write(lp, AT91_EMAC_SA2H, 0); -} - -/* - * Store the new hardware address in dev->dev_addr, and update the MAC. - */ -static int set_mac_address(struct net_device *dev, void* addr) -{ - struct sockaddr *address = addr; - - if (!is_valid_ether_addr(address->sa_data)) - return -EADDRNOTAVAIL; - - memcpy(dev->dev_addr, address->sa_data, dev->addr_len); - update_mac_address(dev); +#include "macb.h" - printk("%s: Setting MAC address to %pM\n", dev->name, - dev->dev_addr); +/* 1518 rounded up */ +#define MAX_RBUFF_SZ 0x600 +/* max number of receive buffers */ +#define MAX_RX_DESCR 9 - return 0; -} - -static int inline hash_bit_value(int bitnr, __u8 *addr) -{ - if (addr[bitnr / 8] & (1 << (bitnr % 8))) - return 1; - return 0; -} - -/* - * The hash address register is 64 bits long and takes up two locations in the memory map. - * The least significant bits are stored in EMAC_HSL and the most significant - * bits in EMAC_HSH. - * - * The unicast hash enable and the multicast hash enable bits in the network configuration - * register enable the reception of hash matched frames. The destination address is - * reduced to a 6 bit index into the 64 bit hash register using the following hash function. - * The hash function is an exclusive or of every sixth bit of the destination address. - * hash_index[5] = da[5] ^ da[11] ^ da[17] ^ da[23] ^ da[29] ^ da[35] ^ da[41] ^ da[47] - * hash_index[4] = da[4] ^ da[10] ^ da[16] ^ da[22] ^ da[28] ^ da[34] ^ da[40] ^ da[46] - * hash_index[3] = da[3] ^ da[09] ^ da[15] ^ da[21] ^ da[27] ^ da[33] ^ da[39] ^ da[45] - * hash_index[2] = da[2] ^ da[08] ^ da[14] ^ da[20] ^ da[26] ^ da[32] ^ da[38] ^ da[44] - * hash_index[1] = da[1] ^ da[07] ^ da[13] ^ da[19] ^ da[25] ^ da[31] ^ da[37] ^ da[43] - * hash_index[0] = da[0] ^ da[06] ^ da[12] ^ da[18] ^ da[24] ^ da[30] ^ da[36] ^ da[42] - * da[0] represents the least significant bit of the first byte received, that is, the multicast/ - * unicast indicator, and da[47] represents the most significant bit of the last byte - * received. - * If the hash index points to a bit that is set in the hash register then the frame will be - * matched according to whether the frame is multicast or unicast. - * A multicast match will be signalled if the multicast hash enable bit is set, da[0] is 1 and - * the hash index points to a bit set in the hash register. - * A unicast match will be signalled if the unicast hash enable bit is set, da[0] is 0 and the - * hash index points to a bit set in the hash register. - * To receive all multicast frames, the hash register should be set with all ones and the - * multicast hash enable bit should be set in the network configuration register. - */ - -/* - * Return the hash index value for the specified address. - */ -static int hash_get_index(__u8 *addr) -{ - int i, j, bitval; - int hash_index = 0; - - for (j = 0; j < 6; j++) { - for (i = 0, bitval = 0; i < 8; i++) - bitval ^= hash_bit_value(i*6 + j, addr); - - hash_index |= (bitval << j); - } - - return hash_index; -} - -/* - * Add multicast addresses to the internal multicast-hash table. - */ -static void at91ether_sethashtable(struct net_device *dev) +/* Initialize and start the Receiver and Transmit subsystems */ +static int at91ether_start(struct net_device *dev) { - struct at91_private *lp = netdev_priv(dev); - struct netdev_hw_addr *ha; - unsigned long mc_filter[2]; - unsigned int bitnr; - - mc_filter[0] = mc_filter[1] = 0; - - netdev_for_each_mc_addr(ha, dev) { - bitnr = hash_get_index(ha->addr); - mc_filter[bitnr >> 5] |= 1 << (bitnr & 31); - } - - at91_emac_write(lp, AT91_EMAC_HSL, mc_filter[0]); - at91_emac_write(lp, AT91_EMAC_HSH, mc_filter[1]); -} + struct macb *lp = netdev_priv(dev); + dma_addr_t addr; + u32 ctl; + int i; -/* - * Enable/Disable promiscuous and multicast modes. - */ -static void at91ether_set_multicast_list(struct net_device *dev) -{ - struct at91_private *lp = netdev_priv(dev); - unsigned long cfg; - - cfg = at91_emac_read(lp, AT91_EMAC_CFG); - - if (dev->flags & IFF_PROMISC) /* Enable promiscuous mode */ - cfg |= AT91_EMAC_CAF; - else if (dev->flags & (~IFF_PROMISC)) /* Disable promiscuous mode */ - cfg &= ~AT91_EMAC_CAF; - - if (dev->flags & IFF_ALLMULTI) { /* Enable all multicast mode */ - at91_emac_write(lp, AT91_EMAC_HSH, -1); - at91_emac_write(lp, AT91_EMAC_HSL, -1); - cfg |= AT91_EMAC_MTI; - } else if (!netdev_mc_empty(dev)) { /* Enable specific multicasts */ - at91ether_sethashtable(dev); - cfg |= AT91_EMAC_MTI; - } else if (dev->flags & (~IFF_ALLMULTI)) { /* Disable all multicast mode */ - at91_emac_write(lp, AT91_EMAC_HSH, 0); - at91_emac_write(lp, AT91_EMAC_HSL, 0); - cfg &= ~AT91_EMAC_MTI; + lp->rx_ring = dma_alloc_coherent(&lp->pdev->dev, + MAX_RX_DESCR * sizeof(struct macb_dma_desc), + &lp->rx_ring_dma, GFP_KERNEL); + if (!lp->rx_ring) { + netdev_err(dev, "unable to alloc rx ring DMA buffer\n"); + return -ENOMEM; } - at91_emac_write(lp, AT91_EMAC_CFG, cfg); -} - -/* ......................... ETHTOOL SUPPORT ........................... */ - -static int mdio_read(struct net_device *dev, int phy_id, int location) -{ - struct at91_private *lp = netdev_priv(dev); - unsigned int value; - - read_phy(lp, phy_id, location, &value); - return value; -} - -static void mdio_write(struct net_device *dev, int phy_id, int location, int value) -{ - struct at91_private *lp = netdev_priv(dev); - - write_phy(lp, phy_id, location, value); -} - -static int at91ether_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct at91_private *lp = netdev_priv(dev); - int ret; - - spin_lock_irq(&lp->lock); - enable_mdi(lp); - - ret = mii_ethtool_gset(&lp->mii, cmd); - - disable_mdi(lp); - spin_unlock_irq(&lp->lock); + lp->rx_buffers = dma_alloc_coherent(&lp->pdev->dev, + MAX_RX_DESCR * MAX_RBUFF_SZ, + &lp->rx_buffers_dma, GFP_KERNEL); + if (!lp->rx_buffers) { + netdev_err(dev, "unable to alloc rx data DMA buffer\n"); - if (lp->phy_media == PORT_FIBRE) { /* override media type since mii.c doesn't know */ - cmd->supported = SUPPORTED_FIBRE; - cmd->port = PORT_FIBRE; + dma_free_coherent(&lp->pdev->dev, + MAX_RX_DESCR * sizeof(struct macb_dma_desc), + lp->rx_ring, lp->rx_ring_dma); + lp->rx_ring = NULL; + return -ENOMEM; } - return ret; -} - -static int at91ether_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct at91_private *lp = netdev_priv(dev); - int ret; - - spin_lock_irq(&lp->lock); - enable_mdi(lp); - - ret = mii_ethtool_sset(&lp->mii, cmd); - - disable_mdi(lp); - spin_unlock_irq(&lp->lock); - - return ret; -} - -static int at91ether_nwayreset(struct net_device *dev) -{ - struct at91_private *lp = netdev_priv(dev); - int ret; - - spin_lock_irq(&lp->lock); - enable_mdi(lp); - - ret = mii_nway_restart(&lp->mii); - - disable_mdi(lp); - spin_unlock_irq(&lp->lock); - - return ret; -} - -static void at91ether_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) -{ - strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); - strlcpy(info->bus_info, dev_name(dev->dev.parent), sizeof(info->bus_info)); -} - -static const struct ethtool_ops at91ether_ethtool_ops = { - .get_settings = at91ether_get_settings, - .set_settings = at91ether_set_settings, - .get_drvinfo = at91ether_get_drvinfo, - .nway_reset = at91ether_nwayreset, - .get_link = ethtool_op_get_link, -}; - -static int at91ether_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) -{ - struct at91_private *lp = netdev_priv(dev); - int res; - - if (!netif_running(dev)) - return -EINVAL; - - spin_lock_irq(&lp->lock); - enable_mdi(lp); - res = generic_mii_ioctl(&lp->mii, if_mii(rq), cmd, NULL); - disable_mdi(lp); - spin_unlock_irq(&lp->lock); - - return res; -} - -/* ................................ MAC ................................ */ - -/* - * Initialize and start the Receiver and Transmit subsystems - */ -static void at91ether_start(struct net_device *dev) -{ - struct at91_private *lp = netdev_priv(dev); - struct recv_desc_bufs *dlist, *dlist_phys; - int i; - unsigned long ctl; - - dlist = lp->dlist; - dlist_phys = lp->dlist_phys; - + addr = lp->rx_buffers_dma; for (i = 0; i < MAX_RX_DESCR; i++) { - dlist->descriptors[i].addr = (unsigned int) &dlist_phys->recv_buf[i][0]; - dlist->descriptors[i].size = 0; + lp->rx_ring[i].addr = addr; + lp->rx_ring[i].ctrl = 0; + addr += MAX_RBUFF_SZ; } /* Set the Wrap bit on the last descriptor */ - dlist->descriptors[i-1].addr |= EMAC_DESC_WRAP; + lp->rx_ring[MAX_RX_DESCR - 1].addr |= MACB_BIT(RX_WRAP); /* Reset buffer index */ - lp->rxBuffIndex = 0; + lp->rx_tail = 0; /* Program address of descriptor list in Rx Buffer Queue register */ - at91_emac_write(lp, AT91_EMAC_RBQP, (unsigned long) dlist_phys); + macb_writel(lp, RBQP, lp->rx_ring_dma); /* Enable Receive and Transmit */ - ctl = at91_emac_read(lp, AT91_EMAC_CTL); - at91_emac_write(lp, AT91_EMAC_CTL, ctl | AT91_EMAC_RE | AT91_EMAC_TE); + ctl = macb_readl(lp, NCR); + macb_writel(lp, NCR, ctl | MACB_BIT(RE) | MACB_BIT(TE)); + + return 0; } -/* - * Open the ethernet interface - */ +/* Open the ethernet interface */ static int at91ether_open(struct net_device *dev) { - struct at91_private *lp = netdev_priv(dev); - unsigned long ctl; + struct macb *lp = netdev_priv(dev); + u32 ctl; + int ret; if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; - clk_enable(lp->ether_clk); /* Re-enable Peripheral clock */ - /* Clear internal statistics */ - ctl = at91_emac_read(lp, AT91_EMAC_CTL); - at91_emac_write(lp, AT91_EMAC_CTL, ctl | AT91_EMAC_CSR); + ctl = macb_readl(lp, NCR); + macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT)); - /* Update the MAC address (incase user has changed it) */ - update_mac_address(dev); + macb_set_hwaddr(lp); - /* Enable PHY interrupt */ - enable_phyirq(dev); + ret = at91ether_start(dev); + if (ret) + return ret; /* Enable MAC interrupts */ - at91_emac_write(lp, AT91_EMAC_IER, AT91_EMAC_RCOM | AT91_EMAC_RBNA - | AT91_EMAC_TUND | AT91_EMAC_RTRY | AT91_EMAC_TCOM - | AT91_EMAC_ROVR | AT91_EMAC_ABT); - - /* Determine current link speed */ - spin_lock_irq(&lp->lock); - enable_mdi(lp); - update_linkspeed(dev, 0); - disable_mdi(lp); - spin_unlock_irq(&lp->lock); - - at91ether_start(dev); + macb_writel(lp, IER, MACB_BIT(RCOMP) | + MACB_BIT(RXUBR) | + MACB_BIT(ISR_TUND) | + MACB_BIT(ISR_RLE) | + MACB_BIT(TCOMP) | + MACB_BIT(ISR_ROVR) | + MACB_BIT(HRESP)); + + /* schedule a link state check */ + phy_start(lp->phy_dev); + netif_start_queue(dev); + return 0; } -/* - * Close the interface - */ +/* Close the interface */ static int at91ether_close(struct net_device *dev) { - struct at91_private *lp = netdev_priv(dev); - unsigned long ctl; + struct macb *lp = netdev_priv(dev); + u32 ctl; /* Disable Receiver and Transmitter */ - ctl = at91_emac_read(lp, AT91_EMAC_CTL); - at91_emac_write(lp, AT91_EMAC_CTL, ctl & ~(AT91_EMAC_TE | AT91_EMAC_RE)); - - /* Disable PHY interrupt */ - disable_phyirq(dev); + ctl = macb_readl(lp, NCR); + macb_writel(lp, NCR, ctl & ~(MACB_BIT(TE) | MACB_BIT(RE))); /* Disable MAC interrupts */ - at91_emac_write(lp, AT91_EMAC_IDR, AT91_EMAC_RCOM | AT91_EMAC_RBNA - | AT91_EMAC_TUND | AT91_EMAC_RTRY | AT91_EMAC_TCOM - | AT91_EMAC_ROVR | AT91_EMAC_ABT); + macb_writel(lp, IDR, MACB_BIT(RCOMP) | + MACB_BIT(RXUBR) | + MACB_BIT(ISR_TUND) | + MACB_BIT(ISR_RLE) | + MACB_BIT(TCOMP) | + MACB_BIT(ISR_ROVR) | + MACB_BIT(HRESP)); netif_stop_queue(dev); - clk_disable(lp->ether_clk); /* Disable Peripheral clock */ + dma_free_coherent(&lp->pdev->dev, + MAX_RX_DESCR * sizeof(struct macb_dma_desc), + lp->rx_ring, lp->rx_ring_dma); + lp->rx_ring = NULL; + + dma_free_coherent(&lp->pdev->dev, + MAX_RX_DESCR * MAX_RBUFF_SZ, + lp->rx_buffers, lp->rx_buffers_dma); + lp->rx_buffers = NULL; return 0; } -/* - * Transmit packet. - */ +/* Transmit packet */ static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct at91_private *lp = netdev_priv(dev); + struct macb *lp = netdev_priv(dev); - if (at91_emac_read(lp, AT91_EMAC_TSR) & AT91_EMAC_TSR_BNQ) { + if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) { netif_stop_queue(dev); /* Store packet information (to free when Tx completed) */ lp->skb = skb; lp->skb_length = skb->len; - lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len, DMA_TO_DEVICE); - dev->stats.tx_bytes += skb->len; + lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len, + DMA_TO_DEVICE); /* Set address of the data in the Transmit Address register */ - at91_emac_write(lp, AT91_EMAC_TAR, lp->skb_physaddr); + macb_writel(lp, TAR, lp->skb_physaddr); /* Set length of the packet in the Transmit Control register */ - at91_emac_write(lp, AT91_EMAC_TCR, skb->len); + macb_writel(lp, TCR, skb->len); } else { - printk(KERN_ERR "at91_ether.c: at91ether_start_xmit() called, but device is busy!\n"); - return NETDEV_TX_BUSY; /* if we return anything but zero, dev.c:1055 calls kfree_skb(skb) - on this skb, he also reports -ENETDOWN and printk's, so either - we free and return(0) or don't free and return 1 */ + netdev_err(dev, "%s called, but device is busy!\n", __func__); + return NETDEV_TX_BUSY; } return NETDEV_TX_OK; } -/* - * Update the current statistics from the internal statistics registers. - */ -static struct net_device_stats *at91ether_stats(struct net_device *dev) -{ - struct at91_private *lp = netdev_priv(dev); - int ale, lenerr, seqe, lcol, ecol; - - if (netif_running(dev)) { - dev->stats.rx_packets += at91_emac_read(lp, AT91_EMAC_OK); /* Good frames received */ - ale = at91_emac_read(lp, AT91_EMAC_ALE); - dev->stats.rx_frame_errors += ale; /* Alignment errors */ - lenerr = at91_emac_read(lp, AT91_EMAC_ELR) + at91_emac_read(lp, AT91_EMAC_USF); - dev->stats.rx_length_errors += lenerr; /* Excessive Length or Undersize Frame error */ - seqe = at91_emac_read(lp, AT91_EMAC_SEQE); - dev->stats.rx_crc_errors += seqe; /* CRC error */ - dev->stats.rx_fifo_errors += at91_emac_read(lp, AT91_EMAC_DRFC);/* Receive buffer not available */ - dev->stats.rx_errors += (ale + lenerr + seqe - + at91_emac_read(lp, AT91_EMAC_CDE) + at91_emac_read(lp, AT91_EMAC_RJB)); - - dev->stats.tx_packets += at91_emac_read(lp, AT91_EMAC_FRA); /* Frames successfully transmitted */ - dev->stats.tx_fifo_errors += at91_emac_read(lp, AT91_EMAC_TUE); /* Transmit FIFO underruns */ - dev->stats.tx_carrier_errors += at91_emac_read(lp, AT91_EMAC_CSE); /* Carrier Sense errors */ - dev->stats.tx_heartbeat_errors += at91_emac_read(lp, AT91_EMAC_SQEE);/* Heartbeat error */ - - lcol = at91_emac_read(lp, AT91_EMAC_LCOL); - ecol = at91_emac_read(lp, AT91_EMAC_ECOL); - dev->stats.tx_window_errors += lcol; /* Late collisions */ - dev->stats.tx_aborted_errors += ecol; /* 16 collisions */ - - dev->stats.collisions += (at91_emac_read(lp, AT91_EMAC_SCOL) + at91_emac_read(lp, AT91_EMAC_MCOL) + lcol + ecol); - } - return &dev->stats; -} - -/* - * Extract received frame from buffer descriptors and sent to upper layers. +/* Extract received frame from buffer descriptors and sent to upper layers. * (Called from interrupt context) */ static void at91ether_rx(struct net_device *dev) { - struct at91_private *lp = netdev_priv(dev); - struct recv_desc_bufs *dlist; + struct macb *lp = netdev_priv(dev); unsigned char *p_recv; struct sk_buff *skb; unsigned int pktlen; - dlist = lp->dlist; - while (dlist->descriptors[lp->rxBuffIndex].addr & EMAC_DESC_DONE) { - p_recv = dlist->recv_buf[lp->rxBuffIndex]; - pktlen = dlist->descriptors[lp->rxBuffIndex].size & 0x7ff; /* Length of frame including FCS */ + while (lp->rx_ring[lp->rx_tail].addr & MACB_BIT(RX_USED)) { + p_recv = lp->rx_buffers + lp->rx_tail * MAX_RBUFF_SZ; + pktlen = MACB_BF(RX_FRMLEN, lp->rx_ring[lp->rx_tail].ctrl); skb = netdev_alloc_skb(dev, pktlen + 2); - if (skb != NULL) { + if (skb) { skb_reserve(skb, 2); memcpy(skb_put(skb, pktlen), p_recv, pktlen); skb->protocol = eth_type_trans(skb, dev); - dev->stats.rx_bytes += pktlen; + lp->stats.rx_packets++; + lp->stats.rx_bytes += pktlen; netif_rx(skb); + } else { + lp->stats.rx_dropped++; + netdev_notice(dev, "Memory squeeze, dropping packet.\n"); } - else { - dev->stats.rx_dropped += 1; - printk(KERN_NOTICE "%s: Memory squeeze, dropping packet.\n", dev->name); - } - if (dlist->descriptors[lp->rxBuffIndex].size & EMAC_MULTICAST) - dev->stats.multicast++; + if (lp->rx_ring[lp->rx_tail].ctrl & MACB_BIT(RX_MHASH_MATCH)) + lp->stats.multicast++; + + /* reset ownership bit */ + lp->rx_ring[lp->rx_tail].addr &= ~MACB_BIT(RX_USED); - dlist->descriptors[lp->rxBuffIndex].addr &= ~EMAC_DESC_DONE; /* reset ownership bit */ - if (lp->rxBuffIndex == MAX_RX_DESCR-1) /* wrap after last buffer */ - lp->rxBuffIndex = 0; + /* wrap after last buffer */ + if (lp->rx_tail == MAX_RX_DESCR - 1) + lp->rx_tail = 0; else - lp->rxBuffIndex++; + lp->rx_tail++; } } -/* - * MAC interrupt handler - */ +/* MAC interrupt handler */ static irqreturn_t at91ether_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; - struct at91_private *lp = netdev_priv(dev); - unsigned long intstatus, ctl; + struct net_device *dev = dev_id; + struct macb *lp = netdev_priv(dev); + u32 intstatus, ctl; /* MAC Interrupt Status register indicates what interrupts are pending. - It is automatically cleared once read. */ - intstatus = at91_emac_read(lp, AT91_EMAC_ISR); + * It is automatically cleared once read. + */ + intstatus = macb_readl(lp, ISR); - if (intstatus & AT91_EMAC_RCOM) /* Receive complete */ + /* Receive complete */ + if (intstatus & MACB_BIT(RCOMP)) at91ether_rx(dev); - if (intstatus & AT91_EMAC_TCOM) { /* Transmit complete */ - /* The TCOM bit is set even if the transmission failed. */ - if (intstatus & (AT91_EMAC_TUND | AT91_EMAC_RTRY)) - dev->stats.tx_errors += 1; + /* Transmit complete */ + if (intstatus & MACB_BIT(TCOMP)) { + /* The TCOM bit is set even if the transmission failed */ + if (intstatus & (MACB_BIT(ISR_TUND) | MACB_BIT(ISR_RLE))) + lp->stats.tx_errors++; if (lp->skb) { dev_kfree_skb_irq(lp->skb); lp->skb = NULL; dma_unmap_single(NULL, lp->skb_physaddr, lp->skb_length, DMA_TO_DEVICE); + lp->stats.tx_packets++; + lp->stats.tx_bytes += lp->skb_length; } netif_wake_queue(dev); } - /* Work-around for Errata #11 */ - if (intstatus & AT91_EMAC_RBNA) { - ctl = at91_emac_read(lp, AT91_EMAC_CTL); - at91_emac_write(lp, AT91_EMAC_CTL, ctl & ~AT91_EMAC_RE); - at91_emac_write(lp, AT91_EMAC_CTL, ctl | AT91_EMAC_RE); + /* Work-around for EMAC Errata section 41.3.1 */ + if (intstatus & MACB_BIT(RXUBR)) { + ctl = macb_readl(lp, NCR); + macb_writel(lp, NCR, ctl & ~MACB_BIT(RE)); + macb_writel(lp, NCR, ctl | MACB_BIT(RE)); } - if (intstatus & AT91_EMAC_ROVR) - printk("%s: ROVR error\n", dev->name); + if (intstatus & MACB_BIT(ISR_ROVR)) + netdev_err(dev, "ROVR error\n"); return IRQ_HANDLED; } @@ -1000,10 +289,10 @@ static const struct net_device_ops at91ether_netdev_ops = { .ndo_open = at91ether_open, .ndo_stop = at91ether_close, .ndo_start_xmit = at91ether_start_xmit, - .ndo_get_stats = at91ether_stats, - .ndo_set_rx_mode = at91ether_set_multicast_list, - .ndo_set_mac_address = set_mac_address, - .ndo_do_ioctl = at91ether_ioctl, + .ndo_get_stats = macb_get_stats, + .ndo_set_rx_mode = macb_set_rx_mode, + .ndo_set_mac_address = eth_mac_addr, + .ndo_do_ioctl = macb_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1011,197 +300,160 @@ static const struct net_device_ops at91ether_netdev_ops = { #endif }; -/* - * Detect the PHY type, and its address. - */ -static int __init at91ether_phy_detect(struct at91_private *lp) +#if defined(CONFIG_OF) +static const struct of_device_id at91ether_dt_ids[] = { + { .compatible = "cdns,at91rm9200-emac" }, + { .compatible = "cdns,emac" }, + { /* sentinel */ } +}; + +MODULE_DEVICE_TABLE(of, at91ether_dt_ids); + +static int at91ether_get_phy_mode_dt(struct platform_device *pdev) { - unsigned int phyid1, phyid2; - unsigned long phy_id; - unsigned short phy_address = 0; - - while (phy_address < PHY_MAX_ADDR) { - /* Read the PHY ID registers */ - enable_mdi(lp); - read_phy(lp, phy_address, MII_PHYSID1, &phyid1); - read_phy(lp, phy_address, MII_PHYSID2, &phyid2); - disable_mdi(lp); - - phy_id = (phyid1 << 16) | (phyid2 & 0xfff0); - switch (phy_id) { - case MII_DM9161_ID: /* Davicom 9161: PHY_ID1 = 0x181, PHY_ID2 = B881 */ - case MII_DM9161A_ID: /* Davicom 9161A: PHY_ID1 = 0x181, PHY_ID2 = B8A0 */ - case MII_LXT971A_ID: /* Intel LXT971A: PHY_ID1 = 0x13, PHY_ID2 = 78E0 */ - case MII_RTL8201_ID: /* Realtek RTL8201: PHY_ID1 = 0, PHY_ID2 = 0x8201 */ - case MII_BCM5221_ID: /* Broadcom BCM5221: PHY_ID1 = 0x40, PHY_ID2 = 0x61e0 */ - case MII_DP83847_ID: /* National Semiconductor DP83847: */ - case MII_DP83848_ID: /* National Semiconductor DP83848: */ - case MII_AC101L_ID: /* Altima AC101L: PHY_ID1 = 0x22, PHY_ID2 = 0x5520 */ - case MII_KS8721_ID: /* Micrel KS8721: PHY_ID1 = 0x22, PHY_ID2 = 0x1610 */ - case MII_T78Q21x3_ID: /* Teridian 78Q21x3: PHY_ID1 = 0x0E, PHY_ID2 = 7237 */ - case MII_LAN83C185_ID: /* SMSC LAN83C185: PHY_ID1 = 0x0007, PHY_ID2 = 0xC0A1 */ - /* store detected values */ - lp->phy_type = phy_id; /* Type of PHY connected */ - lp->phy_address = phy_address; /* MDI address of PHY */ - return 1; - } + struct device_node *np = pdev->dev.of_node; - phy_address++; - } + if (np) + return of_get_phy_mode(np); - return 0; /* not detected */ + return -ENODEV; } +static int at91ether_get_hwaddr_dt(struct macb *bp) +{ + struct device_node *np = bp->pdev->dev.of_node; -/* - * Detect MAC & PHY and perform ethernet interface initialization - */ + if (np) { + const char *mac = of_get_mac_address(np); + if (mac) { + memcpy(bp->dev->dev_addr, mac, ETH_ALEN); + return 0; + } + } + + return -ENODEV; +} +#else +static int at91ether_get_phy_mode_dt(struct platform_device *pdev) +{ + return -ENODEV; +} +static int at91ether_get_hwaddr_dt(struct macb *bp) +{ + return -ENODEV; +} +#endif + +/* Detect MAC & PHY and perform ethernet interface initialization */ static int __init at91ether_probe(struct platform_device *pdev) { struct macb_platform_data *board_data = pdev->dev.platform_data; struct resource *regs; struct net_device *dev; - struct at91_private *lp; + struct phy_device *phydev; + struct pinctrl *pinctrl; + struct macb *lp; int res; + u32 reg; regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!regs) return -ENOENT; - dev = alloc_etherdev(sizeof(struct at91_private)); + pinctrl = devm_pinctrl_get_select_default(&pdev->dev); + if (IS_ERR(pinctrl)) { + res = PTR_ERR(pinctrl); + if (res == -EPROBE_DEFER) + return res; + + dev_warn(&pdev->dev, "No pinctrl provided\n"); + } + + dev = alloc_etherdev(sizeof(struct macb)); if (!dev) return -ENOMEM; lp = netdev_priv(dev); - lp->board_data = *board_data; + lp->pdev = pdev; + lp->dev = dev; spin_lock_init(&lp->lock); - dev->base_addr = regs->start; /* physical base address */ - lp->emac_base = ioremap(regs->start, regs->end - regs->start + 1); - if (!lp->emac_base) { + /* physical base address */ + dev->base_addr = regs->start; + lp->regs = devm_ioremap(&pdev->dev, regs->start, resource_size(regs)); + if (!lp->regs) { res = -ENOMEM; goto err_free_dev; } /* Clock */ - lp->ether_clk = clk_get(&pdev->dev, "ether_clk"); - if (IS_ERR(lp->ether_clk)) { - res = PTR_ERR(lp->ether_clk); - goto err_ioumap; + lp->pclk = devm_clk_get(&pdev->dev, "ether_clk"); + if (IS_ERR(lp->pclk)) { + res = PTR_ERR(lp->pclk); + goto err_free_dev; } - clk_enable(lp->ether_clk); + clk_enable(lp->pclk); /* Install the interrupt handler */ dev->irq = platform_get_irq(pdev, 0); - if (request_irq(dev->irq, at91ether_interrupt, 0, dev->name, dev)) { - res = -EBUSY; + res = devm_request_irq(&pdev->dev, dev->irq, at91ether_interrupt, 0, dev->name, dev); + if (res) goto err_disable_clock; - } - - /* Allocate memory for DMA Receive descriptors */ - lp->dlist = (struct recv_desc_bufs *) dma_alloc_coherent(NULL, sizeof(struct recv_desc_bufs), (dma_addr_t *) &lp->dlist_phys, GFP_KERNEL); - if (lp->dlist == NULL) { - res = -ENOMEM; - goto err_free_irq; - } ether_setup(dev); dev->netdev_ops = &at91ether_netdev_ops; - dev->ethtool_ops = &at91ether_ethtool_ops; + dev->ethtool_ops = &macb_ethtool_ops; platform_set_drvdata(pdev, dev); SET_NETDEV_DEV(dev, &pdev->dev); - get_mac_address(dev); /* Get ethernet address and store it in dev->dev_addr */ - update_mac_address(dev); /* Program ethernet address into MAC */ - - at91_emac_write(lp, AT91_EMAC_CTL, 0); + res = at91ether_get_hwaddr_dt(lp); + if (res < 0) + macb_get_hwaddr(lp); - if (board_data->is_rmii) - at91_emac_write(lp, AT91_EMAC_CFG, AT91_EMAC_CLK_DIV32 | AT91_EMAC_BIG | AT91_EMAC_RMII); - else - at91_emac_write(lp, AT91_EMAC_CFG, AT91_EMAC_CLK_DIV32 | AT91_EMAC_BIG); - - /* Detect PHY */ - if (!at91ether_phy_detect(lp)) { - printk(KERN_ERR "at91_ether: Could not detect ethernet PHY\n"); - res = -ENODEV; - goto err_free_dmamem; + res = at91ether_get_phy_mode_dt(pdev); + if (res < 0) { + if (board_data && board_data->is_rmii) + lp->phy_interface = PHY_INTERFACE_MODE_RMII; + else + lp->phy_interface = PHY_INTERFACE_MODE_MII; + } else { + lp->phy_interface = res; } - initialize_phy(lp); + macb_writel(lp, NCR, 0); + + reg = MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG); + if (lp->phy_interface == PHY_INTERFACE_MODE_RMII) + reg |= MACB_BIT(RM9200_RMII); - lp->mii.dev = dev; /* Support for ethtool */ - lp->mii.mdio_read = mdio_read; - lp->mii.mdio_write = mdio_write; - lp->mii.phy_id = lp->phy_address; - lp->mii.phy_id_mask = 0x1f; - lp->mii.reg_num_mask = 0x1f; + macb_writel(lp, NCFGR, reg); /* Register the network interface */ res = register_netdev(dev); if (res) - goto err_free_dmamem; - - /* Determine current link speed */ - spin_lock_irq(&lp->lock); - enable_mdi(lp); - update_linkspeed(dev, 0); - disable_mdi(lp); - spin_unlock_irq(&lp->lock); - netif_carrier_off(dev); /* will be enabled in open() */ - - /* If board has no PHY IRQ, use a timer to poll the PHY */ - if (gpio_is_valid(lp->board_data.phy_irq_pin)) { - gpio_request(board_data->phy_irq_pin, "ethernet_phy"); - } else { - /* If board has no PHY IRQ, use a timer to poll the PHY */ - init_timer(&lp->check_timer); - lp->check_timer.data = (unsigned long)dev; - lp->check_timer.function = at91ether_check_link; - } + goto err_disable_clock; + + if (macb_mii_init(lp) != 0) + goto err_out_unregister_netdev; + + /* will be enabled in open() */ + netif_carrier_off(dev); + + phydev = lp->phy_dev; + netdev_info(dev, "attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", + phydev->drv->name, dev_name(&phydev->dev), + phydev->irq); /* Display ethernet banner */ - printk(KERN_INFO "%s: AT91 ethernet at 0x%08x int=%d %s%s (%pM)\n", - dev->name, (uint) dev->base_addr, dev->irq, - at91_emac_read(lp, AT91_EMAC_CFG) & AT91_EMAC_SPD ? "100-" : "10-", - at91_emac_read(lp, AT91_EMAC_CFG) & AT91_EMAC_FD ? "FullDuplex" : "HalfDuplex", - dev->dev_addr); - if ((lp->phy_type == MII_DM9161_ID) || (lp->phy_type == MII_DM9161A_ID)) - printk(KERN_INFO "%s: Davicom 9161 PHY %s\n", dev->name, (lp->phy_media == PORT_FIBRE) ? "(Fiber)" : "(Copper)"); - else if (lp->phy_type == MII_LXT971A_ID) - printk(KERN_INFO "%s: Intel LXT971A PHY\n", dev->name); - else if (lp->phy_type == MII_RTL8201_ID) - printk(KERN_INFO "%s: Realtek RTL8201(B)L PHY\n", dev->name); - else if (lp->phy_type == MII_BCM5221_ID) - printk(KERN_INFO "%s: Broadcom BCM5221 PHY\n", dev->name); - else if (lp->phy_type == MII_DP83847_ID) - printk(KERN_INFO "%s: National Semiconductor DP83847 PHY\n", dev->name); - else if (lp->phy_type == MII_DP83848_ID) - printk(KERN_INFO "%s: National Semiconductor DP83848 PHY\n", dev->name); - else if (lp->phy_type == MII_AC101L_ID) - printk(KERN_INFO "%s: Altima AC101L PHY\n", dev->name); - else if (lp->phy_type == MII_KS8721_ID) - printk(KERN_INFO "%s: Micrel KS8721 PHY\n", dev->name); - else if (lp->phy_type == MII_T78Q21x3_ID) - printk(KERN_INFO "%s: Teridian 78Q21x3 PHY\n", dev->name); - else if (lp->phy_type == MII_LAN83C185_ID) - printk(KERN_INFO "%s: SMSC LAN83C185 PHY\n", dev->name); - - clk_disable(lp->ether_clk); /* Disable Peripheral clock */ + netdev_info(dev, "AT91 ethernet at 0x%08lx int=%d (%pM)\n", + dev->base_addr, dev->irq, dev->dev_addr); return 0; - -err_free_dmamem: - platform_set_drvdata(pdev, NULL); - dma_free_coherent(NULL, sizeof(struct recv_desc_bufs), lp->dlist, (dma_addr_t)lp->dlist_phys); -err_free_irq: - free_irq(dev->irq, dev); +err_out_unregister_netdev: + unregister_netdev(dev); err_disable_clock: - clk_disable(lp->ether_clk); - clk_put(lp->ether_clk); -err_ioumap: - iounmap(lp->emac_base); + clk_disable(lp->pclk); err_free_dev: free_netdev(dev); return res; @@ -1210,38 +462,33 @@ err_free_dev: static int __devexit at91ether_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); - struct at91_private *lp = netdev_priv(dev); + struct macb *lp = netdev_priv(dev); - if (gpio_is_valid(lp->board_data.phy_irq_pin)) - gpio_free(lp->board_data.phy_irq_pin); + if (lp->phy_dev) + phy_disconnect(lp->phy_dev); + mdiobus_unregister(lp->mii_bus); + kfree(lp->mii_bus->irq); + mdiobus_free(lp->mii_bus); unregister_netdev(dev); - free_irq(dev->irq, dev); - dma_free_coherent(NULL, sizeof(struct recv_desc_bufs), lp->dlist, (dma_addr_t)lp->dlist_phys); - clk_put(lp->ether_clk); - - platform_set_drvdata(pdev, NULL); + clk_disable(lp->pclk); free_netdev(dev); + platform_set_drvdata(pdev, NULL); + return 0; } #ifdef CONFIG_PM - static int at91ether_suspend(struct platform_device *pdev, pm_message_t mesg) { struct net_device *net_dev = platform_get_drvdata(pdev); - struct at91_private *lp = netdev_priv(net_dev); + struct macb *lp = netdev_priv(net_dev); if (netif_running(net_dev)) { - if (gpio_is_valid(lp->board_data.phy_irq_pin)) { - int phy_irq = gpio_to_irq(lp->board_data.phy_irq_pin); - disable_irq(phy_irq); - } - netif_stop_queue(net_dev); netif_device_detach(net_dev); - clk_disable(lp->ether_clk); + clk_disable(lp->pclk); } return 0; } @@ -1249,22 +496,16 @@ static int at91ether_suspend(struct platform_device *pdev, pm_message_t mesg) static int at91ether_resume(struct platform_device *pdev) { struct net_device *net_dev = platform_get_drvdata(pdev); - struct at91_private *lp = netdev_priv(net_dev); + struct macb *lp = netdev_priv(net_dev); if (netif_running(net_dev)) { - clk_enable(lp->ether_clk); + clk_enable(lp->pclk); netif_device_attach(net_dev); netif_start_queue(net_dev); - - if (gpio_is_valid(lp->board_data.phy_irq_pin)) { - int phy_irq = gpio_to_irq(lp->board_data.phy_irq_pin); - enable_irq(phy_irq); - } } return 0; } - #else #define at91ether_suspend NULL #define at91ether_resume NULL @@ -1275,8 +516,9 @@ static struct platform_driver at91ether_driver = { .suspend = at91ether_suspend, .resume = at91ether_resume, .driver = { - .name = DRV_NAME, + .name = "at91_ether", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(at91ether_dt_ids), }, }; @@ -1296,4 +538,4 @@ module_exit(at91ether_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("AT91RM9200 EMAC Ethernet driver"); MODULE_AUTHOR("Andrew Victor"); -MODULE_ALIAS("platform:" DRV_NAME); +MODULE_ALIAS("platform:at91_ether"); diff --git a/drivers/net/ethernet/cadence/at91_ether.h b/drivers/net/ethernet/cadence/at91_ether.h deleted file mode 100644 index 0ef6328fa7f..00000000000 --- a/drivers/net/ethernet/cadence/at91_ether.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Ethernet driver for the Atmel AT91RM9200 (Thunder) - * - * Copyright (C) SAN People (Pty) Ltd - * - * Based on an earlier Atmel EMAC macrocell driver by Atmel and Lineo Inc. - * Initial version by Rick Bronson. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef AT91_ETHERNET -#define AT91_ETHERNET - - -/* Davicom 9161 PHY */ -#define MII_DM9161_ID 0x0181b880 -#define MII_DM9161A_ID 0x0181b8a0 -#define MII_DSCR_REG 16 -#define MII_DSCSR_REG 17 -#define MII_DSINTR_REG 21 - -/* Intel LXT971A PHY */ -#define MII_LXT971A_ID 0x001378E0 -#define MII_ISINTE_REG 18 -#define MII_ISINTS_REG 19 -#define MII_LEDCTRL_REG 20 - -/* Realtek RTL8201 PHY */ -#define MII_RTL8201_ID 0x00008200 - -/* Broadcom BCM5221 PHY */ -#define MII_BCM5221_ID 0x004061e0 -#define MII_BCMINTR_REG 26 - -/* National Semiconductor DP83847 */ -#define MII_DP83847_ID 0x20005c30 - -/* National Semiconductor DP83848 */ -#define MII_DP83848_ID 0x20005c90 -#define MII_DPPHYSTS_REG 16 -#define MII_DPMICR_REG 17 -#define MII_DPMISR_REG 18 - -/* Altima AC101L PHY */ -#define MII_AC101L_ID 0x00225520 - -/* Micrel KS8721 PHY */ -#define MII_KS8721_ID 0x00221610 - -/* Teridian 78Q2123/78Q2133 */ -#define MII_T78Q21x3_ID 0x000e7230 -#define MII_T78Q21INT_REG 17 - -/* SMSC LAN83C185 */ -#define MII_LAN83C185_ID 0x0007C0A0 - -/* ........................................................................ */ - -#define MAX_RBUFF_SZ 0x600 /* 1518 rounded up */ -#define MAX_RX_DESCR 9 /* max number of receive buffers */ - -#define EMAC_DESC_DONE 0x00000001 /* bit for if DMA is done */ -#define EMAC_DESC_WRAP 0x00000002 /* bit for wrap */ - -#define EMAC_BROADCAST 0x80000000 /* broadcast address */ -#define EMAC_MULTICAST 0x40000000 /* multicast address */ -#define EMAC_UNICAST 0x20000000 /* unicast address */ - -struct rbf_t -{ - unsigned int addr; - unsigned long size; -}; - -struct recv_desc_bufs -{ - struct rbf_t descriptors[MAX_RX_DESCR]; /* must be on sizeof (rbf_t) boundary */ - char recv_buf[MAX_RX_DESCR][MAX_RBUFF_SZ]; /* must be on long boundary */ -}; - -struct at91_private -{ - struct mii_if_info mii; /* ethtool support */ - struct macb_platform_data board_data; /* board-specific - * configuration (shared with - * macb for common data */ - void __iomem *emac_base; /* base register address */ - struct clk *ether_clk; /* clock */ - - /* PHY */ - unsigned long phy_type; /* type of PHY (PHY_ID) */ - spinlock_t lock; /* lock for MDI interface */ - short phy_media; /* media interface type */ - unsigned short phy_address; /* 5-bit MDI address of PHY (0..31) */ - struct timer_list check_timer; /* Poll link status */ - - /* Transmit */ - struct sk_buff *skb; /* holds skb until xmit interrupt completes */ - dma_addr_t skb_physaddr; /* phys addr from pci_map_single */ - int skb_length; /* saved skb length for pci_unmap_single */ - - /* Receive */ - int rxBuffIndex; /* index into receive descriptor list */ - struct recv_desc_bufs *dlist; /* descriptor list address */ - struct recv_desc_bufs *dlist_phys; /* descriptor list physical address */ -}; - -#endif diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 033064b7b57..1fac769989a 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -26,37 +26,79 @@ #include <linux/of.h> #include <linux/of_device.h> #include <linux/of_net.h> +#include <linux/pinctrl/consumer.h> #include "macb.h" #define RX_BUFFER_SIZE 128 -#define RX_RING_SIZE 512 -#define RX_RING_BYTES (sizeof(struct dma_desc) * RX_RING_SIZE) +#define RX_RING_SIZE 512 /* must be power of 2 */ +#define RX_RING_BYTES (sizeof(struct macb_dma_desc) * RX_RING_SIZE) -/* Make the IP header word-aligned (the ethernet header is 14 bytes) */ -#define RX_OFFSET 2 - -#define TX_RING_SIZE 128 -#define DEF_TX_RING_PENDING (TX_RING_SIZE - 1) -#define TX_RING_BYTES (sizeof(struct dma_desc) * TX_RING_SIZE) - -#define TX_RING_GAP(bp) \ - (TX_RING_SIZE - (bp)->tx_pending) -#define TX_BUFFS_AVAIL(bp) \ - (((bp)->tx_tail <= (bp)->tx_head) ? \ - (bp)->tx_tail + (bp)->tx_pending - (bp)->tx_head : \ - (bp)->tx_tail - (bp)->tx_head - TX_RING_GAP(bp)) -#define NEXT_TX(n) (((n) + 1) & (TX_RING_SIZE - 1)) - -#define NEXT_RX(n) (((n) + 1) & (RX_RING_SIZE - 1)) +#define TX_RING_SIZE 128 /* must be power of 2 */ +#define TX_RING_BYTES (sizeof(struct macb_dma_desc) * TX_RING_SIZE) /* minimum number of free TX descriptors before waking up TX process */ #define MACB_TX_WAKEUP_THRESH (TX_RING_SIZE / 4) #define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(RXUBR) \ | MACB_BIT(ISR_ROVR)) +#define MACB_TX_ERR_FLAGS (MACB_BIT(ISR_TUND) \ + | MACB_BIT(ISR_RLE) \ + | MACB_BIT(TXERR)) +#define MACB_TX_INT_FLAGS (MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP)) + +/* + * Graceful stop timeouts in us. We should allow up to + * 1 frame time (10 Mbits/s, full-duplex, ignoring collisions) + */ +#define MACB_HALT_TIMEOUT 1230 + +/* Ring buffer accessors */ +static unsigned int macb_tx_ring_wrap(unsigned int index) +{ + return index & (TX_RING_SIZE - 1); +} + +static unsigned int macb_tx_ring_avail(struct macb *bp) +{ + return (bp->tx_tail - bp->tx_head) & (TX_RING_SIZE - 1); +} + +static struct macb_dma_desc *macb_tx_desc(struct macb *bp, unsigned int index) +{ + return &bp->tx_ring[macb_tx_ring_wrap(index)]; +} + +static struct macb_tx_skb *macb_tx_skb(struct macb *bp, unsigned int index) +{ + return &bp->tx_skb[macb_tx_ring_wrap(index)]; +} + +static dma_addr_t macb_tx_dma(struct macb *bp, unsigned int index) +{ + dma_addr_t offset; + + offset = macb_tx_ring_wrap(index) * sizeof(struct macb_dma_desc); + + return bp->tx_ring_dma + offset; +} + +static unsigned int macb_rx_ring_wrap(unsigned int index) +{ + return index & (RX_RING_SIZE - 1); +} + +static struct macb_dma_desc *macb_rx_desc(struct macb *bp, unsigned int index) +{ + return &bp->rx_ring[macb_rx_ring_wrap(index)]; +} -static void __macb_set_hwaddr(struct macb *bp) +static void *macb_rx_buffer(struct macb *bp, unsigned int index) +{ + return bp->rx_buffers + RX_BUFFER_SIZE * macb_rx_ring_wrap(index); +} + +void macb_set_hwaddr(struct macb *bp) { u32 bottom; u16 top; @@ -66,30 +108,49 @@ static void __macb_set_hwaddr(struct macb *bp) top = cpu_to_le16(*((u16 *)(bp->dev->dev_addr + 4))); macb_or_gem_writel(bp, SA1T, top); } +EXPORT_SYMBOL_GPL(macb_set_hwaddr); -static void __init macb_get_hwaddr(struct macb *bp) +void macb_get_hwaddr(struct macb *bp) { + struct macb_platform_data *pdata; u32 bottom; u16 top; u8 addr[6]; + int i; - bottom = macb_or_gem_readl(bp, SA1B); - top = macb_or_gem_readl(bp, SA1T); + pdata = bp->pdev->dev.platform_data; - addr[0] = bottom & 0xff; - addr[1] = (bottom >> 8) & 0xff; - addr[2] = (bottom >> 16) & 0xff; - addr[3] = (bottom >> 24) & 0xff; - addr[4] = top & 0xff; - addr[5] = (top >> 8) & 0xff; + /* Check all 4 address register for vaild address */ + for (i = 0; i < 4; i++) { + bottom = macb_or_gem_readl(bp, SA1B + i * 8); + top = macb_or_gem_readl(bp, SA1T + i * 8); + + if (pdata && pdata->rev_eth_addr) { + addr[5] = bottom & 0xff; + addr[4] = (bottom >> 8) & 0xff; + addr[3] = (bottom >> 16) & 0xff; + addr[2] = (bottom >> 24) & 0xff; + addr[1] = top & 0xff; + addr[0] = (top & 0xff00) >> 8; + } else { + addr[0] = bottom & 0xff; + addr[1] = (bottom >> 8) & 0xff; + addr[2] = (bottom >> 16) & 0xff; + addr[3] = (bottom >> 24) & 0xff; + addr[4] = top & 0xff; + addr[5] = (top >> 8) & 0xff; + } - if (is_valid_ether_addr(addr)) { - memcpy(bp->dev->dev_addr, addr, sizeof(addr)); - } else { - netdev_info(bp->dev, "invalid hw address, using random\n"); - eth_hw_addr_random(bp->dev); + if (is_valid_ether_addr(addr)) { + memcpy(bp->dev->dev_addr, addr, sizeof(addr)); + return; + } } + + netdev_info(bp->dev, "invalid hw address, using random\n"); + eth_hw_addr_random(bp->dev); } +EXPORT_SYMBOL_GPL(macb_get_hwaddr); static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { @@ -152,13 +213,17 @@ static void macb_handle_link_change(struct net_device *dev) reg = macb_readl(bp, NCFGR); reg &= ~(MACB_BIT(SPD) | MACB_BIT(FD)); + if (macb_is_gem(bp)) + reg &= ~GEM_BIT(GBE); if (phydev->duplex) reg |= MACB_BIT(FD); if (phydev->speed == SPEED_100) reg |= MACB_BIT(SPD); + if (phydev->speed == SPEED_1000) + reg |= GEM_BIT(GBE); - macb_writel(bp, NCFGR, reg); + macb_or_gem_writel(bp, NCFGR, reg); bp->speed = phydev->speed; bp->duplex = phydev->duplex; @@ -216,7 +281,10 @@ static int macb_mii_probe(struct net_device *dev) } /* mask with MAC supported features */ - phydev->supported &= PHY_BASIC_FEATURES; + if (macb_is_gem(bp)) + phydev->supported &= PHY_GBIT_FEATURES; + else + phydev->supported &= PHY_BASIC_FEATURES; phydev->advertising = phydev->supported; @@ -228,7 +296,7 @@ static int macb_mii_probe(struct net_device *dev) return 0; } -static int macb_mii_init(struct macb *bp) +int macb_mii_init(struct macb *bp) { struct macb_platform_data *pdata; int err = -ENXIO, i; @@ -284,6 +352,7 @@ err_out_free_mdiobus: err_out: return err; } +EXPORT_SYMBOL_GPL(macb_mii_init); static void macb_update_stats(struct macb *bp) { @@ -297,93 +366,147 @@ static void macb_update_stats(struct macb *bp) *p += __raw_readl(reg); } -static void macb_tx(struct macb *bp) +static int macb_halt_tx(struct macb *bp) { - unsigned int tail; - unsigned int head; - u32 status; + unsigned long halt_time, timeout; + u32 status; - status = macb_readl(bp, TSR); - macb_writel(bp, TSR, status); + macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(THALT)); - netdev_dbg(bp->dev, "macb_tx status = %02lx\n", (unsigned long)status); + timeout = jiffies + usecs_to_jiffies(MACB_HALT_TIMEOUT); + do { + halt_time = jiffies; + status = macb_readl(bp, TSR); + if (!(status & MACB_BIT(TGO))) + return 0; - if (status & (MACB_BIT(UND) | MACB_BIT(TSR_RLE))) { - int i; - netdev_err(bp->dev, "TX %s, resetting buffers\n", - status & MACB_BIT(UND) ? - "underrun" : "retry limit exceeded"); + usleep_range(10, 250); + } while (time_before(halt_time, timeout)); - /* Transfer ongoing, disable transmitter, to avoid confusion */ - if (status & MACB_BIT(TGO)) - macb_writel(bp, NCR, macb_readl(bp, NCR) & ~MACB_BIT(TE)); + return -ETIMEDOUT; +} - head = bp->tx_head; +static void macb_tx_error_task(struct work_struct *work) +{ + struct macb *bp = container_of(work, struct macb, tx_error_task); + struct macb_tx_skb *tx_skb; + struct sk_buff *skb; + unsigned int tail; - /*Mark all the buffer as used to avoid sending a lost buffer*/ - for (i = 0; i < TX_RING_SIZE; i++) - bp->tx_ring[i].ctrl = MACB_BIT(TX_USED); + netdev_vdbg(bp->dev, "macb_tx_error_task: t = %u, h = %u\n", + bp->tx_tail, bp->tx_head); - /* Add wrap bit */ - bp->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); + /* Make sure nobody is trying to queue up new packets */ + netif_stop_queue(bp->dev); - /* free transmit buffer in upper layer*/ - for (tail = bp->tx_tail; tail != head; tail = NEXT_TX(tail)) { - struct ring_info *rp = &bp->tx_skb[tail]; - struct sk_buff *skb = rp->skb; + /* + * Stop transmission now + * (in case we have just queued new packets) + */ + if (macb_halt_tx(bp)) + /* Just complain for now, reinitializing TX path can be good */ + netdev_err(bp->dev, "BUG: halt tx timed out\n"); - BUG_ON(skb == NULL); + /* No need for the lock here as nobody will interrupt us anymore */ - rmb(); + /* + * Treat frames in TX queue including the ones that caused the error. + * Free transmit buffers in upper layer. + */ + for (tail = bp->tx_tail; tail != bp->tx_head; tail++) { + struct macb_dma_desc *desc; + u32 ctrl; + + desc = macb_tx_desc(bp, tail); + ctrl = desc->ctrl; + tx_skb = macb_tx_skb(bp, tail); + skb = tx_skb->skb; + + if (ctrl & MACB_BIT(TX_USED)) { + netdev_vdbg(bp->dev, "txerr skb %u (data %p) TX complete\n", + macb_tx_ring_wrap(tail), skb->data); + bp->stats.tx_packets++; + bp->stats.tx_bytes += skb->len; + } else { + /* + * "Buffers exhausted mid-frame" errors may only happen + * if the driver is buggy, so complain loudly about those. + * Statistics are updated by hardware. + */ + if (ctrl & MACB_BIT(TX_BUF_EXHAUSTED)) + netdev_err(bp->dev, + "BUG: TX buffers exhausted mid-frame\n"); - dma_unmap_single(&bp->pdev->dev, rp->mapping, skb->len, - DMA_TO_DEVICE); - rp->skb = NULL; - dev_kfree_skb_irq(skb); + desc->ctrl = ctrl | MACB_BIT(TX_USED); } - bp->tx_head = bp->tx_tail = 0; - - /* Enable the transmitter again */ - if (status & MACB_BIT(TGO)) - macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TE)); + dma_unmap_single(&bp->pdev->dev, tx_skb->mapping, skb->len, + DMA_TO_DEVICE); + tx_skb->skb = NULL; + dev_kfree_skb(skb); } - if (!(status & MACB_BIT(COMP))) - /* - * This may happen when a buffer becomes complete - * between reading the ISR and scanning the - * descriptors. Nothing to worry about. - */ - return; + /* Make descriptor updates visible to hardware */ + wmb(); + + /* Reinitialize the TX desc queue */ + macb_writel(bp, TBQP, bp->tx_ring_dma); + /* Make TX ring reflect state of hardware */ + bp->tx_head = bp->tx_tail = 0; + + /* Now we are ready to start transmission again */ + netif_wake_queue(bp->dev); + + /* Housework before enabling TX IRQ */ + macb_writel(bp, TSR, macb_readl(bp, TSR)); + macb_writel(bp, IER, MACB_TX_INT_FLAGS); +} + +static void macb_tx_interrupt(struct macb *bp) +{ + unsigned int tail; + unsigned int head; + u32 status; + + status = macb_readl(bp, TSR); + macb_writel(bp, TSR, status); + + netdev_vdbg(bp->dev, "macb_tx_interrupt status = 0x%03lx\n", + (unsigned long)status); head = bp->tx_head; - for (tail = bp->tx_tail; tail != head; tail = NEXT_TX(tail)) { - struct ring_info *rp = &bp->tx_skb[tail]; - struct sk_buff *skb = rp->skb; - u32 bufstat; + for (tail = bp->tx_tail; tail != head; tail++) { + struct macb_tx_skb *tx_skb; + struct sk_buff *skb; + struct macb_dma_desc *desc; + u32 ctrl; - BUG_ON(skb == NULL); + desc = macb_tx_desc(bp, tail); + /* Make hw descriptor updates visible to CPU */ rmb(); - bufstat = bp->tx_ring[tail].ctrl; - if (!(bufstat & MACB_BIT(TX_USED))) + ctrl = desc->ctrl; + + if (!(ctrl & MACB_BIT(TX_USED))) break; - netdev_dbg(bp->dev, "skb %u (data %p) TX complete\n", - tail, skb->data); - dma_unmap_single(&bp->pdev->dev, rp->mapping, skb->len, + tx_skb = macb_tx_skb(bp, tail); + skb = tx_skb->skb; + + netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n", + macb_tx_ring_wrap(tail), skb->data); + dma_unmap_single(&bp->pdev->dev, tx_skb->mapping, skb->len, DMA_TO_DEVICE); bp->stats.tx_packets++; bp->stats.tx_bytes += skb->len; - rp->skb = NULL; + tx_skb->skb = NULL; dev_kfree_skb_irq(skb); } bp->tx_tail = tail; - if (netif_queue_stopped(bp->dev) && - TX_BUFFS_AVAIL(bp) > MACB_TX_WAKEUP_THRESH) + if (netif_queue_stopped(bp->dev) + && macb_tx_ring_avail(bp) > MACB_TX_WAKEUP_THRESH) netif_wake_queue(bp->dev); } @@ -392,31 +515,48 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, { unsigned int len; unsigned int frag; - unsigned int offset = 0; + unsigned int offset; struct sk_buff *skb; + struct macb_dma_desc *desc; - len = MACB_BFEXT(RX_FRMLEN, bp->rx_ring[last_frag].ctrl); + desc = macb_rx_desc(bp, last_frag); + len = MACB_BFEXT(RX_FRMLEN, desc->ctrl); - netdev_dbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n", - first_frag, last_frag, len); + netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n", + macb_rx_ring_wrap(first_frag), + macb_rx_ring_wrap(last_frag), len); - skb = netdev_alloc_skb(bp->dev, len + RX_OFFSET); + /* + * The ethernet header starts NET_IP_ALIGN bytes into the + * first buffer. Since the header is 14 bytes, this makes the + * payload word-aligned. + * + * Instead of calling skb_reserve(NET_IP_ALIGN), we just copy + * the two padding bytes into the skb so that we avoid hitting + * the slowpath in memcpy(), and pull them off afterwards. + */ + skb = netdev_alloc_skb(bp->dev, len + NET_IP_ALIGN); if (!skb) { bp->stats.rx_dropped++; - for (frag = first_frag; ; frag = NEXT_RX(frag)) { - bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED); + for (frag = first_frag; ; frag++) { + desc = macb_rx_desc(bp, frag); + desc->addr &= ~MACB_BIT(RX_USED); if (frag == last_frag) break; } + + /* Make descriptor updates visible to hardware */ wmb(); + return 1; } - skb_reserve(skb, RX_OFFSET); + offset = 0; + len += NET_IP_ALIGN; skb_checksum_none_assert(skb); skb_put(skb, len); - for (frag = first_frag; ; frag = NEXT_RX(frag)) { + for (frag = first_frag; ; frag++) { unsigned int frag_len = RX_BUFFER_SIZE; if (offset + frag_len > len) { @@ -424,22 +564,24 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag, frag_len = len - offset; } skb_copy_to_linear_data_offset(skb, offset, - (bp->rx_buffers + - (RX_BUFFER_SIZE * frag)), - frag_len); + macb_rx_buffer(bp, frag), frag_len); offset += RX_BUFFER_SIZE; - bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED); - wmb(); + desc = macb_rx_desc(bp, frag); + desc->addr &= ~MACB_BIT(RX_USED); if (frag == last_frag) break; } + /* Make descriptor updates visible to hardware */ + wmb(); + + __skb_pull(skb, NET_IP_ALIGN); skb->protocol = eth_type_trans(skb, bp->dev); bp->stats.rx_packets++; - bp->stats.rx_bytes += len; - netdev_dbg(bp->dev, "received skb of length %u, csum: %08x\n", + bp->stats.rx_bytes += skb->len; + netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n", skb->len, skb->csum); netif_receive_skb(skb); @@ -452,8 +594,12 @@ static void discard_partial_frame(struct macb *bp, unsigned int begin, { unsigned int frag; - for (frag = begin; frag != end; frag = NEXT_RX(frag)) - bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED); + for (frag = begin; frag != end; frag++) { + struct macb_dma_desc *desc = macb_rx_desc(bp, frag); + desc->addr &= ~MACB_BIT(RX_USED); + } + + /* Make descriptor updates visible to hardware */ wmb(); /* @@ -466,15 +612,18 @@ static void discard_partial_frame(struct macb *bp, unsigned int begin, static int macb_rx(struct macb *bp, int budget) { int received = 0; - unsigned int tail = bp->rx_tail; + unsigned int tail; int first_frag = -1; - for (; budget > 0; tail = NEXT_RX(tail)) { + for (tail = bp->rx_tail; budget > 0; tail++) { + struct macb_dma_desc *desc = macb_rx_desc(bp, tail); u32 addr, ctrl; + /* Make hw descriptor updates visible to CPU */ rmb(); - addr = bp->rx_ring[tail].addr; - ctrl = bp->rx_ring[tail].ctrl; + + addr = desc->addr; + ctrl = desc->ctrl; if (!(addr & MACB_BIT(RX_USED))) break; @@ -517,7 +666,7 @@ static int macb_poll(struct napi_struct *napi, int budget) work_done = 0; - netdev_dbg(bp->dev, "poll: status = %08lx, budget = %d\n", + netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n", (unsigned long)status, budget); work_done = macb_rx(bp, budget); @@ -552,10 +701,12 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) while (status) { /* close possible race with dev_close */ if (unlikely(!netif_running(dev))) { - macb_writel(bp, IDR, ~0UL); + macb_writel(bp, IDR, -1); break; } + netdev_vdbg(bp->dev, "isr = 0x%08lx\n", (unsigned long)status); + if (status & MACB_RX_INT_FLAGS) { /* * There's no point taking any more interrupts @@ -567,14 +718,19 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) macb_writel(bp, IDR, MACB_RX_INT_FLAGS); if (napi_schedule_prep(&bp->napi)) { - netdev_dbg(bp->dev, "scheduling RX softirq\n"); + netdev_vdbg(bp->dev, "scheduling RX softirq\n"); __napi_schedule(&bp->napi); } } - if (status & (MACB_BIT(TCOMP) | MACB_BIT(ISR_TUND) | - MACB_BIT(ISR_RLE))) - macb_tx(bp); + if (unlikely(status & (MACB_TX_ERR_FLAGS))) { + macb_writel(bp, IDR, MACB_TX_INT_FLAGS); + schedule_work(&bp->tx_error_task); + break; + } + + if (status & MACB_BIT(TCOMP)) + macb_tx_interrupt(bp); /* * Link change detection isn't possible with RMII, so we'll @@ -626,11 +782,13 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) struct macb *bp = netdev_priv(dev); dma_addr_t mapping; unsigned int len, entry; + struct macb_dma_desc *desc; + struct macb_tx_skb *tx_skb; u32 ctrl; unsigned long flags; -#ifdef DEBUG - netdev_dbg(bp->dev, +#if defined(DEBUG) && defined(VERBOSE_DEBUG) + netdev_vdbg(bp->dev, "start_xmit: len %u head %p data %p tail %p end %p\n", skb->len, skb->head, skb->data, skb_tail_pointer(skb), skb_end_pointer(skb)); @@ -642,7 +800,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) spin_lock_irqsave(&bp->lock, flags); /* This is a hard error, log it. */ - if (TX_BUFFS_AVAIL(bp) < 1) { + if (macb_tx_ring_avail(bp) < 1) { netif_stop_queue(dev); spin_unlock_irqrestore(&bp->lock, flags); netdev_err(bp->dev, "BUG! Tx Ring full when queue awake!\n"); @@ -651,13 +809,16 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_BUSY; } - entry = bp->tx_head; - netdev_dbg(bp->dev, "Allocated ring entry %u\n", entry); + entry = macb_tx_ring_wrap(bp->tx_head); + bp->tx_head++; + netdev_vdbg(bp->dev, "Allocated ring entry %u\n", entry); mapping = dma_map_single(&bp->pdev->dev, skb->data, len, DMA_TO_DEVICE); - bp->tx_skb[entry].skb = skb; - bp->tx_skb[entry].mapping = mapping; - netdev_dbg(bp->dev, "Mapped skb data %p to DMA addr %08lx\n", + + tx_skb = &bp->tx_skb[entry]; + tx_skb->skb = skb; + tx_skb->mapping = mapping; + netdev_vdbg(bp->dev, "Mapped skb data %p to DMA addr %08lx\n", skb->data, (unsigned long)mapping); ctrl = MACB_BF(TX_FRMLEN, len); @@ -665,18 +826,18 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) if (entry == (TX_RING_SIZE - 1)) ctrl |= MACB_BIT(TX_WRAP); - bp->tx_ring[entry].addr = mapping; - bp->tx_ring[entry].ctrl = ctrl; - wmb(); + desc = &bp->tx_ring[entry]; + desc->addr = mapping; + desc->ctrl = ctrl; - entry = NEXT_TX(entry); - bp->tx_head = entry; + /* Make newly initialized descriptor visible to hardware */ + wmb(); skb_tx_timestamp(skb); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - if (TX_BUFFS_AVAIL(bp) < 1) + if (macb_tx_ring_avail(bp) < 1) netif_stop_queue(dev); spin_unlock_irqrestore(&bp->lock, flags); @@ -712,7 +873,7 @@ static int macb_alloc_consistent(struct macb *bp) { int size; - size = TX_RING_SIZE * sizeof(struct ring_info); + size = TX_RING_SIZE * sizeof(struct macb_tx_skb); bp->tx_skb = kmalloc(size, GFP_KERNEL); if (!bp->tx_skb) goto out_err; @@ -775,9 +936,6 @@ static void macb_init_rings(struct macb *bp) static void macb_reset_hw(struct macb *bp) { - /* Make sure we have the write buffer for ourselves */ - wmb(); - /* * Disable RX and TX (XXX: Should we halt the transmission * more gracefully?) @@ -788,11 +946,11 @@ static void macb_reset_hw(struct macb *bp) macb_writel(bp, NCR, MACB_BIT(CLRSTAT)); /* Clear all status flags */ - macb_writel(bp, TSR, ~0UL); - macb_writel(bp, RSR, ~0UL); + macb_writel(bp, TSR, -1); + macb_writel(bp, RSR, -1); /* Disable all interrupts */ - macb_writel(bp, IDR, ~0UL); + macb_writel(bp, IDR, -1); macb_readl(bp, ISR); } @@ -879,9 +1037,10 @@ static void macb_init_hw(struct macb *bp) u32 config; macb_reset_hw(bp); - __macb_set_hwaddr(bp); + macb_set_hwaddr(bp); config = macb_mdc_clk_div(bp); + config |= MACB_BF(RBOF, NET_IP_ALIGN); /* Make eth data aligned */ config |= MACB_BIT(PAE); /* PAuse Enable */ config |= MACB_BIT(DRFCS); /* Discard Rx FCS */ config |= MACB_BIT(BIG); /* Receive oversized frames */ @@ -891,6 +1050,8 @@ static void macb_init_hw(struct macb *bp) config |= MACB_BIT(NBC); /* No BroadCast */ config |= macb_dbw(bp); macb_writel(bp, NCFGR, config); + bp->speed = SPEED_10; + bp->duplex = DUPLEX_HALF; macb_configure_dma(bp); @@ -902,13 +1063,8 @@ static void macb_init_hw(struct macb *bp) macb_writel(bp, NCR, MACB_BIT(RE) | MACB_BIT(TE) | MACB_BIT(MPE)); /* Enable interrupts */ - macb_writel(bp, IER, (MACB_BIT(RCOMP) - | MACB_BIT(RXUBR) - | MACB_BIT(ISR_TUND) - | MACB_BIT(ISR_RLE) - | MACB_BIT(TXERR) - | MACB_BIT(TCOMP) - | MACB_BIT(ISR_ROVR) + macb_writel(bp, IER, (MACB_RX_INT_FLAGS + | MACB_TX_INT_FLAGS | MACB_BIT(HRESP))); } @@ -996,7 +1152,7 @@ static void macb_sethashtable(struct net_device *dev) /* * Enable/Disable promiscuous and multicast modes. */ -static void macb_set_rx_mode(struct net_device *dev) +void macb_set_rx_mode(struct net_device *dev) { unsigned long cfg; struct macb *bp = netdev_priv(dev); @@ -1028,6 +1184,7 @@ static void macb_set_rx_mode(struct net_device *dev) macb_writel(bp, NCFGR, cfg); } +EXPORT_SYMBOL_GPL(macb_set_rx_mode); static int macb_open(struct net_device *dev) { @@ -1135,7 +1292,7 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) return nstat; } -static struct net_device_stats *macb_get_stats(struct net_device *dev) +struct net_device_stats *macb_get_stats(struct net_device *dev) { struct macb *bp = netdev_priv(dev); struct net_device_stats *nstat = &bp->stats; @@ -1181,6 +1338,7 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev) return nstat; } +EXPORT_SYMBOL_GPL(macb_get_stats); static int macb_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { @@ -1204,25 +1362,55 @@ static int macb_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) return phy_ethtool_sset(phydev, cmd); } -static void macb_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) +static int macb_get_regs_len(struct net_device *netdev) +{ + return MACB_GREGS_NBR * sizeof(u32); +} + +static void macb_get_regs(struct net_device *dev, struct ethtool_regs *regs, + void *p) { struct macb *bp = netdev_priv(dev); + unsigned int tail, head; + u32 *regs_buff = p; + + regs->version = (macb_readl(bp, MID) & ((1 << MACB_REV_SIZE) - 1)) + | MACB_GREGS_VERSION; + + tail = macb_tx_ring_wrap(bp->tx_tail); + head = macb_tx_ring_wrap(bp->tx_head); + + regs_buff[0] = macb_readl(bp, NCR); + regs_buff[1] = macb_or_gem_readl(bp, NCFGR); + regs_buff[2] = macb_readl(bp, NSR); + regs_buff[3] = macb_readl(bp, TSR); + regs_buff[4] = macb_readl(bp, RBQP); + regs_buff[5] = macb_readl(bp, TBQP); + regs_buff[6] = macb_readl(bp, RSR); + regs_buff[7] = macb_readl(bp, IMR); - strcpy(info->driver, bp->pdev->dev.driver->name); - strcpy(info->version, "$Revision: 1.14 $"); - strcpy(info->bus_info, dev_name(&bp->pdev->dev)); + regs_buff[8] = tail; + regs_buff[9] = head; + regs_buff[10] = macb_tx_dma(bp, tail); + regs_buff[11] = macb_tx_dma(bp, head); + + if (macb_is_gem(bp)) { + regs_buff[12] = gem_readl(bp, USRIO); + regs_buff[13] = gem_readl(bp, DMACFG); + } } -static const struct ethtool_ops macb_ethtool_ops = { +const struct ethtool_ops macb_ethtool_ops = { .get_settings = macb_get_settings, .set_settings = macb_set_settings, - .get_drvinfo = macb_get_drvinfo, + .get_regs_len = macb_get_regs_len, + .get_regs = macb_get_regs, .get_link = ethtool_op_get_link, .get_ts_info = ethtool_op_get_ts_info, }; +EXPORT_SYMBOL_GPL(macb_ethtool_ops); -static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct macb *bp = netdev_priv(dev); struct phy_device *phydev = bp->phy_dev; @@ -1235,6 +1423,7 @@ static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return phy_mii_ioctl(phydev, rq, cmd); } +EXPORT_SYMBOL_GPL(macb_ioctl); static const struct net_device_ops macb_netdev_ops = { .ndo_open = macb_open, @@ -1306,6 +1495,7 @@ static int __init macb_probe(struct platform_device *pdev) struct phy_device *phydev; u32 config; int err = -ENXIO; + struct pinctrl *pinctrl; regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!regs) { @@ -1313,6 +1503,15 @@ static int __init macb_probe(struct platform_device *pdev) goto err_out; } + pinctrl = devm_pinctrl_get_select_default(&pdev->dev); + if (IS_ERR(pinctrl)) { + err = PTR_ERR(pinctrl); + if (err == -EPROBE_DEFER) + goto err_out; + + dev_warn(&pdev->dev, "No pinctrl provided\n"); + } + err = -ENOMEM; dev = alloc_etherdev(sizeof(*bp)); if (!dev) @@ -1328,6 +1527,7 @@ static int __init macb_probe(struct platform_device *pdev) bp->dev = dev; spin_lock_init(&bp->lock); + INIT_WORK(&bp->tx_error_task, macb_tx_error_task); bp->pclk = clk_get(&pdev->dev, "pclk"); if (IS_ERR(bp->pclk)) { @@ -1384,7 +1584,9 @@ static int __init macb_probe(struct platform_device *pdev) bp->phy_interface = err; } - if (bp->phy_interface == PHY_INTERFACE_MODE_RMII) + if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII) + macb_or_gem_writel(bp, USRIO, GEM_BIT(RGMII)); + else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII) #if defined(CONFIG_ARCH_AT91) macb_or_gem_writel(bp, USRIO, (MACB_BIT(RMII) | MACB_BIT(CLKEN))); @@ -1398,8 +1600,6 @@ static int __init macb_probe(struct platform_device *pdev) macb_or_gem_writel(bp, USRIO, MACB_BIT(MII)); #endif - bp->tx_pending = DEF_TX_RING_PENDING; - err = register_netdev(dev); if (err) { dev_err(&pdev->dev, "Cannot register net device, aborting.\n"); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 335e288f531..864e38042b2 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -10,10 +10,15 @@ #ifndef _MACB_H #define _MACB_H +#define MACB_GREGS_NBR 16 +#define MACB_GREGS_VERSION 1 + /* MACB register offsets */ #define MACB_NCR 0x0000 #define MACB_NCFGR 0x0004 #define MACB_NSR 0x0008 +#define MACB_TAR 0x000c /* AT91RM9200 only */ +#define MACB_TCR 0x0010 /* AT91RM9200 only */ #define MACB_TSR 0x0014 #define MACB_RBQP 0x0018 #define MACB_TBQP 0x001c @@ -133,6 +138,8 @@ #define MACB_RTY_SIZE 1 #define MACB_PAE_OFFSET 13 #define MACB_PAE_SIZE 1 +#define MACB_RM9200_RMII_OFFSET 13 /* AT91RM9200 only */ +#define MACB_RM9200_RMII_SIZE 1 /* AT91RM9200 only */ #define MACB_RBOF_OFFSET 14 #define MACB_RBOF_SIZE 2 #define MACB_RLCE_OFFSET 16 @@ -145,6 +152,8 @@ #define MACB_IRXFCS_SIZE 1 /* GEM specific NCFGR bitfields. */ +#define GEM_GBE_OFFSET 10 +#define GEM_GBE_SIZE 1 #define GEM_CLK_OFFSET 18 #define GEM_CLK_SIZE 3 #define GEM_DBW_OFFSET 21 @@ -178,6 +187,8 @@ #define MACB_TGO_SIZE 1 #define MACB_BEX_OFFSET 4 #define MACB_BEX_SIZE 1 +#define MACB_RM9200_BNQ_OFFSET 4 /* AT91RM9200 only */ +#define MACB_RM9200_BNQ_SIZE 1 /* AT91RM9200 only */ #define MACB_COMP_OFFSET 5 #define MACB_COMP_SIZE 1 #define MACB_UND_OFFSET 6 @@ -246,6 +257,8 @@ /* Bitfields in USRIO (AT91) */ #define MACB_RMII_OFFSET 0 #define MACB_RMII_SIZE 1 +#define GEM_RGMII_OFFSET 0 /* GEM gigabit mode */ +#define GEM_RGMII_SIZE 1 #define MACB_CLKEN_OFFSET 1 #define MACB_CLKEN_SIZE 1 @@ -352,7 +365,12 @@ __v; \ }) -struct dma_desc { +/** + * struct macb_dma_desc - Hardware DMA descriptor + * @addr: DMA address of data buffer + * @ctrl: Control and status bits + */ +struct macb_dma_desc { u32 addr; u32 ctrl; }; @@ -417,7 +435,12 @@ struct dma_desc { #define MACB_TX_USED_OFFSET 31 #define MACB_TX_USED_SIZE 1 -struct ring_info { +/** + * struct macb_tx_skb - data about an skb which is being transmitted + * @skb: skb currently being transmitted + * @mapping: DMA address of the skb's data buffer + */ +struct macb_tx_skb { struct sk_buff *skb; dma_addr_t mapping; }; @@ -502,12 +525,12 @@ struct macb { void __iomem *regs; unsigned int rx_tail; - struct dma_desc *rx_ring; + struct macb_dma_desc *rx_ring; void *rx_buffers; unsigned int tx_head, tx_tail; - struct dma_desc *tx_ring; - struct ring_info *tx_skb; + struct macb_dma_desc *tx_ring; + struct macb_tx_skb *tx_skb; spinlock_t lock; struct platform_device *pdev; @@ -515,6 +538,7 @@ struct macb { struct clk *hclk; struct net_device *dev; struct napi_struct napi; + struct work_struct tx_error_task; struct net_device_stats stats; union { struct macb_stats macb; @@ -525,8 +549,6 @@ struct macb { dma_addr_t tx_ring_dma; dma_addr_t rx_buffers_dma; - unsigned int rx_pending, tx_pending; - struct mii_bus *mii_bus; struct phy_device *phy_dev; unsigned int link; @@ -534,8 +556,22 @@ struct macb { unsigned int duplex; phy_interface_t phy_interface; + + /* AT91RM9200 transmit */ + struct sk_buff *skb; /* holds skb until xmit interrupt completes */ + dma_addr_t skb_physaddr; /* phys addr from pci_map_single */ + int skb_length; /* saved skb length for pci_unmap_single */ }; +extern const struct ethtool_ops macb_ethtool_ops; + +int macb_mii_init(struct macb *bp); +int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +struct net_device_stats *macb_get_stats(struct net_device *dev); +void macb_set_rx_mode(struct net_device *dev); +void macb_set_hwaddr(struct macb *bp); +void macb_get_hwaddr(struct macb *bp); + static inline bool macb_is_gem(struct macb *bp) { return MACB_BFEXT(IDNUM, macb_readl(bp, MID)) == 0x2; diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 16814b34d4b..b407043ce9b 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -191,6 +191,7 @@ #define DMA_CONTROL_ST 0x00002000 /* Start/Stop Transmission */ #define DMA_CONTROL_SR 0x00000002 /* Start/Stop Receive */ #define DMA_CONTROL_DFF 0x01000000 /* Disable flush of rx frames */ +#define DMA_CONTROL_OSF 0x00000004 /* Operate on 2nd tx frame */ /* DMA Normal interrupt */ #define DMA_INTR_ENA_NIE 0x00010000 /* Normal Summary */ @@ -210,7 +211,7 @@ #define DMA_INTR_ENA_TIE 0x00000001 /* Transmit Interrupt */ #define DMA_INTR_NORMAL (DMA_INTR_ENA_NIE | DMA_INTR_ENA_RIE | \ - DMA_INTR_ENA_TUE) + DMA_INTR_ENA_TUE | DMA_INTR_ENA_TIE) #define DMA_INTR_ABNORMAL (DMA_INTR_ENA_AIE | DMA_INTR_ENA_FBE | \ DMA_INTR_ENA_RWE | DMA_INTR_ENA_RSE | \ @@ -373,6 +374,7 @@ struct xgmac_priv { struct sk_buff **tx_skbuff; unsigned int tx_head; unsigned int tx_tail; + int tx_irq_cnt; void __iomem *base; unsigned int dma_buf_sz; @@ -663,6 +665,7 @@ static void xgmac_rx_refill(struct xgmac_priv *priv) { struct xgmac_dma_desc *p; dma_addr_t paddr; + int bufsz = priv->dev->mtu + ETH_HLEN + ETH_FCS_LEN; while (dma_ring_space(priv->rx_head, priv->rx_tail, DMA_RX_RING_SZ) > 1) { int entry = priv->rx_head; @@ -671,13 +674,13 @@ static void xgmac_rx_refill(struct xgmac_priv *priv) p = priv->dma_rx + entry; if (priv->rx_skbuff[entry] == NULL) { - skb = netdev_alloc_skb(priv->dev, priv->dma_buf_sz); + skb = netdev_alloc_skb_ip_align(priv->dev, bufsz); if (unlikely(skb == NULL)) break; priv->rx_skbuff[entry] = skb; paddr = dma_map_single(priv->device, skb->data, - priv->dma_buf_sz, DMA_FROM_DEVICE); + bufsz, DMA_FROM_DEVICE); desc_set_buf_addr(p, paddr, priv->dma_buf_sz); } @@ -701,10 +704,10 @@ static int xgmac_dma_desc_rings_init(struct net_device *dev) unsigned int bfsize; /* Set the Buffer size according to the MTU; - * indeed, in case of jumbo we need to bump-up the buffer sizes. + * The total buffer size including any IP offset must be a multiple + * of 8 bytes. */ - bfsize = ALIGN(dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN + 64, - 64); + bfsize = ALIGN(dev->mtu + ETH_HLEN + ETH_FCS_LEN + NET_IP_ALIGN, 8); netdev_dbg(priv->dev, "mtu [%d] bfsize [%d]\n", dev->mtu, bfsize); @@ -845,9 +848,6 @@ static void xgmac_free_dma_desc_rings(struct xgmac_priv *priv) static void xgmac_tx_complete(struct xgmac_priv *priv) { int i; - void __iomem *ioaddr = priv->base; - - writel(DMA_STATUS_TU | DMA_STATUS_NIS, ioaddr + XGMAC_DMA_STATUS); while (dma_ring_cnt(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ)) { unsigned int entry = priv->tx_tail; @@ -888,7 +888,7 @@ static void xgmac_tx_complete(struct xgmac_priv *priv) } if (dma_ring_space(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ) > - TX_THRESH) + MAX_SKB_FRAGS) netif_wake_queue(priv->dev); } @@ -965,8 +965,7 @@ static int xgmac_hw_init(struct net_device *dev) ctrl |= XGMAC_CONTROL_IPC; writel(ctrl, ioaddr + XGMAC_CONTROL); - value = DMA_CONTROL_DFF; - writel(value, ioaddr + XGMAC_DMA_CONTROL); + writel(DMA_CONTROL_OSF, ioaddr + XGMAC_DMA_CONTROL); /* Set the HW DMA mode and the COE */ writel(XGMAC_OMR_TSF | XGMAC_OMR_RFD | XGMAC_OMR_RFA | @@ -1060,19 +1059,15 @@ static netdev_tx_t xgmac_xmit(struct sk_buff *skb, struct net_device *dev) struct xgmac_priv *priv = netdev_priv(dev); unsigned int entry; int i; + u32 irq_flag; int nfrags = skb_shinfo(skb)->nr_frags; struct xgmac_dma_desc *desc, *first; unsigned int desc_flags; unsigned int len; dma_addr_t paddr; - if (dma_ring_space(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ) < - (nfrags + 1)) { - writel(DMA_INTR_DEFAULT_MASK | DMA_INTR_ENA_TIE, - priv->base + XGMAC_DMA_INTR_ENA); - netif_stop_queue(dev); - return NETDEV_TX_BUSY; - } + priv->tx_irq_cnt = (priv->tx_irq_cnt + 1) & (DMA_TX_RING_SZ/4 - 1); + irq_flag = priv->tx_irq_cnt ? 0 : TXDESC_INTERRUPT; desc_flags = (skb->ip_summed == CHECKSUM_PARTIAL) ? TXDESC_CSUM_ALL : 0; @@ -1113,9 +1108,9 @@ static netdev_tx_t xgmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Interrupt on completition only for the latest segment */ if (desc != first) desc_set_tx_owner(desc, desc_flags | - TXDESC_LAST_SEG | TXDESC_INTERRUPT); + TXDESC_LAST_SEG | irq_flag); else - desc_flags |= TXDESC_LAST_SEG | TXDESC_INTERRUPT; + desc_flags |= TXDESC_LAST_SEG | irq_flag; /* Set owner on first desc last to avoid race condition */ wmb(); @@ -1124,6 +1119,9 @@ static netdev_tx_t xgmac_xmit(struct sk_buff *skb, struct net_device *dev) priv->tx_head = dma_ring_incr(entry, DMA_TX_RING_SZ); writel(1, priv->base + XGMAC_DMA_TX_POLL); + if (dma_ring_space(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ) < + MAX_SKB_FRAGS) + netif_stop_queue(dev); return NETDEV_TX_OK; } @@ -1139,9 +1137,6 @@ static int xgmac_rx(struct xgmac_priv *priv, int limit) struct sk_buff *skb; int frame_len; - writel(DMA_STATUS_RI | DMA_STATUS_NIS, - priv->base + XGMAC_DMA_STATUS); - entry = priv->rx_tail; p = priv->dma_rx + entry; if (desc_get_owner(p)) @@ -1180,8 +1175,6 @@ static int xgmac_rx(struct xgmac_priv *priv, int limit) xgmac_rx_refill(priv); - writel(1, priv->base + XGMAC_DMA_RX_POLL); - return count; } @@ -1205,7 +1198,7 @@ static int xgmac_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete(napi); - writel(DMA_INTR_DEFAULT_MASK, priv->base + XGMAC_DMA_INTR_ENA); + __raw_writel(DMA_INTR_DEFAULT_MASK, priv->base + XGMAC_DMA_INTR_ENA); } return work_done; } @@ -1350,7 +1343,7 @@ static irqreturn_t xgmac_pmt_interrupt(int irq, void *dev_id) struct xgmac_priv *priv = netdev_priv(dev); void __iomem *ioaddr = priv->base; - intr_status = readl(ioaddr + XGMAC_INT_STAT); + intr_status = __raw_readl(ioaddr + XGMAC_INT_STAT); if (intr_status & XGMAC_INT_STAT_PMT) { netdev_dbg(priv->dev, "received Magic frame\n"); /* clear the PMT bits 5 and 6 by reading the PMT */ @@ -1368,9 +1361,9 @@ static irqreturn_t xgmac_interrupt(int irq, void *dev_id) struct xgmac_extra_stats *x = &priv->xstats; /* read the status register (CSR5) */ - intr_status = readl(priv->base + XGMAC_DMA_STATUS); - intr_status &= readl(priv->base + XGMAC_DMA_INTR_ENA); - writel(intr_status, priv->base + XGMAC_DMA_STATUS); + intr_status = __raw_readl(priv->base + XGMAC_DMA_STATUS); + intr_status &= __raw_readl(priv->base + XGMAC_DMA_INTR_ENA); + __raw_writel(intr_status, priv->base + XGMAC_DMA_STATUS); /* It displays the DMA process states (CSR5 register) */ /* ABNORMAL interrupts */ @@ -1405,8 +1398,8 @@ static irqreturn_t xgmac_interrupt(int irq, void *dev_id) } /* TX/RX NORMAL interrupts */ - if (intr_status & (DMA_STATUS_RI | DMA_STATUS_TU)) { - writel(DMA_INTR_ABNORMAL, priv->base + XGMAC_DMA_INTR_ENA); + if (intr_status & (DMA_STATUS_RI | DMA_STATUS_TU | DMA_STATUS_TI)) { + __raw_writel(DMA_INTR_ABNORMAL, priv->base + XGMAC_DMA_INTR_ENA); napi_schedule(&priv->napi); } diff --git a/drivers/net/ethernet/chelsio/cxgb3/common.h b/drivers/net/ethernet/chelsio/cxgb3/common.h index df01b634324..8c82248ce41 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/common.h +++ b/drivers/net/ethernet/chelsio/cxgb3/common.h @@ -42,10 +42,9 @@ #include <linux/mdio.h> #include "version.h" -#define CH_ERR(adap, fmt, ...) dev_err(&adap->pdev->dev, fmt, ## __VA_ARGS__) -#define CH_WARN(adap, fmt, ...) dev_warn(&adap->pdev->dev, fmt, ## __VA_ARGS__) -#define CH_ALERT(adap, fmt, ...) \ - dev_printk(KERN_ALERT, &adap->pdev->dev, fmt, ## __VA_ARGS__) +#define CH_ERR(adap, fmt, ...) dev_err(&adap->pdev->dev, fmt, ##__VA_ARGS__) +#define CH_WARN(adap, fmt, ...) dev_warn(&adap->pdev->dev, fmt, ##__VA_ARGS__) +#define CH_ALERT(adap, fmt, ...) dev_alert(&adap->pdev->dev, fmt, ##__VA_ARGS__) /* * More powerful macro that selectively prints messages based on msg_enable. diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index a059f0c27e2..2fb01bf1815 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -1758,21 +1758,7 @@ static struct pci_driver rio_driver = { .remove = __devexit_p(rio_remove1), }; -static int __init -rio_init (void) -{ - return pci_register_driver(&rio_driver); -} - -static void __exit -rio_exit (void) -{ - pci_unregister_driver (&rio_driver); -} - -module_init (rio_init); -module_exit (rio_exit); - +module_pci_driver(rio_driver); /* Compile command: diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index cf4c05bdf5f..abf26c7c1d1 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -34,7 +34,7 @@ #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "4.4.31.0u" +#define DRV_VER "4.4.161.0u" #define DRV_NAME "be2net" #define BE_NAME "ServerEngines BladeEngine2 10Gbps NIC" #define BE3_NAME "ServerEngines BladeEngine3 10Gbps NIC" @@ -53,6 +53,7 @@ #define OC_DEVICE_ID3 0xe220 /* Device id for Lancer cards */ #define OC_DEVICE_ID4 0xe228 /* Device id for VF in Lancer */ #define OC_DEVICE_ID5 0x720 /* Device Id for Skyhawk cards */ +#define OC_DEVICE_ID6 0x728 /* Device id for VF in SkyHawk */ #define OC_SUBSYS_DEVICE_ID1 0xE602 #define OC_SUBSYS_DEVICE_ID2 0xE642 #define OC_SUBSYS_DEVICE_ID3 0xE612 @@ -71,6 +72,7 @@ static inline char *nic_name(struct pci_dev *pdev) case BE_DEVICE_ID2: return BE3_NAME; case OC_DEVICE_ID5: + case OC_DEVICE_ID6: return OC_NAME_SH; default: return BE_NAME; @@ -346,7 +348,6 @@ struct be_adapter { struct pci_dev *pdev; struct net_device *netdev; - u8 __iomem *csr; u8 __iomem *db; /* Door Bell */ struct mutex mbox_lock; /* For serializing mbox cmds to BE card */ @@ -374,11 +375,8 @@ struct be_adapter { struct be_rx_obj rx_obj[MAX_RX_QS]; u32 big_page_size; /* Compounded page size shared by rx wrbs */ - u8 eq_next_idx; struct be_drv_stats drv_stats; - u16 vlans_added; - u16 max_vlans; /* Number of vlans supported */ u8 vlan_tag[VLAN_N_VID]; u8 vlan_prio_bmap; /* Available Priority BitMap */ u16 recommended_prio; /* Recommended Priority */ @@ -391,6 +389,7 @@ struct be_adapter { struct delayed_work func_recovery_work; u32 flags; + u32 cmd_privileges; /* Ethtool knobs and info */ char fw_ver[FW_VER_LEN]; int if_handle; /* Used to configure filtering */ @@ -408,10 +407,8 @@ struct be_adapter { u32 rx_fc; /* Rx flow control */ u32 tx_fc; /* Tx flow control */ bool stats_cmd_sent; - u8 generation; /* BladeEngine ASIC generation */ u32 if_type; struct { - u8 __iomem *base; /* Door Bell */ u32 size; u32 total_size; u64 io_addr; @@ -434,10 +431,18 @@ struct be_adapter { struct phy_info phy; u8 wol_cap; bool wol; - u32 max_pmac_cnt; /* Max secondary UC MACs programmable */ u32 uc_macs; /* Count of secondary UC MAC programmed */ u32 msg_enable; int be_get_temp_freq; + u16 max_mcast_mac; + u16 max_tx_queues; + u16 max_rss_queues; + u16 max_rx_queues; + u16 max_pmac_cnt; + u16 max_vlans; + u16 max_event_queues; + u32 if_cap_flags; + u8 pf_number; }; #define be_physfn(adapter) (!adapter->virtfn) @@ -448,21 +453,25 @@ struct be_adapter { for (i = 0, vf_cfg = &adapter->vf_cfg[i]; i < adapter->num_vfs; \ i++, vf_cfg++) -/* BladeEngine Generation numbers */ -#define BE_GEN2 2 -#define BE_GEN3 3 - #define ON 1 #define OFF 0 -#define lancer_chip(adapter) ((adapter->pdev->device == OC_DEVICE_ID3) || \ - (adapter->pdev->device == OC_DEVICE_ID4)) -#define skyhawk_chip(adapter) (adapter->pdev->device == OC_DEVICE_ID5) +#define lancer_chip(adapter) (adapter->pdev->device == OC_DEVICE_ID3 || \ + adapter->pdev->device == OC_DEVICE_ID4) + +#define skyhawk_chip(adapter) (adapter->pdev->device == OC_DEVICE_ID5 || \ + adapter->pdev->device == OC_DEVICE_ID6) + +#define BE3_chip(adapter) (adapter->pdev->device == BE_DEVICE_ID2 || \ + adapter->pdev->device == OC_DEVICE_ID2) +#define BE2_chip(adapter) (adapter->pdev->device == BE_DEVICE_ID1 || \ + adapter->pdev->device == OC_DEVICE_ID1) -#define be_roce_supported(adapter) ((adapter->if_type == SLI_INTF_TYPE_3 || \ - adapter->sli_family == SKYHAWK_SLI_FAMILY) && \ - (adapter->function_mode & RDMA_ENABLED)) +#define BEx_chip(adapter) (BE3_chip(adapter) || BE2_chip(adapter)) + +#define be_roce_supported(adapter) (skyhawk_chip(adapter) && \ + (adapter->function_mode & RDMA_ENABLED)) extern const struct ethtool_ops be_ethtool_ops; @@ -637,12 +646,6 @@ static inline bool be_is_wol_excluded(struct be_adapter *adapter) } } -static inline bool be_type_2_3(struct be_adapter *adapter) -{ - return (adapter->if_type == SLI_INTF_TYPE_2 || - adapter->if_type == SLI_INTF_TYPE_3) ? true : false; -} - extern void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped); extern void be_link_status_update(struct be_adapter *adapter, u8 link_status); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index af60bb26e33..f2875aa4766 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -19,6 +19,55 @@ #include "be.h" #include "be_cmds.h" +static struct be_cmd_priv_map cmd_priv_map[] = { + { + OPCODE_ETH_ACPI_WOL_MAGIC_CONFIG, + CMD_SUBSYSTEM_ETH, + BE_PRIV_LNKMGMT | BE_PRIV_VHADM | + BE_PRIV_DEVCFG | BE_PRIV_DEVSEC + }, + { + OPCODE_COMMON_GET_FLOW_CONTROL, + CMD_SUBSYSTEM_COMMON, + BE_PRIV_LNKQUERY | BE_PRIV_VHADM | + BE_PRIV_DEVCFG | BE_PRIV_DEVSEC + }, + { + OPCODE_COMMON_SET_FLOW_CONTROL, + CMD_SUBSYSTEM_COMMON, + BE_PRIV_LNKMGMT | BE_PRIV_VHADM | + BE_PRIV_DEVCFG | BE_PRIV_DEVSEC + }, + { + OPCODE_ETH_GET_PPORT_STATS, + CMD_SUBSYSTEM_ETH, + BE_PRIV_LNKMGMT | BE_PRIV_VHADM | + BE_PRIV_DEVCFG | BE_PRIV_DEVSEC + }, + { + OPCODE_COMMON_GET_PHY_DETAILS, + CMD_SUBSYSTEM_COMMON, + BE_PRIV_LNKMGMT | BE_PRIV_VHADM | + BE_PRIV_DEVCFG | BE_PRIV_DEVSEC + } +}; + +static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, + u8 subsystem) +{ + int i; + int num_entries = sizeof(cmd_priv_map)/sizeof(struct be_cmd_priv_map); + u32 cmd_privileges = adapter->cmd_privileges; + + for (i = 0; i < num_entries; i++) + if (opcode == cmd_priv_map[i].opcode && + subsystem == cmd_priv_map[i].subsystem) + if (!(cmd_privileges & cmd_priv_map[i].priv_mask)) + return false; + + return true; +} + static inline void *embedded_payload(struct be_mcc_wrb *wrb) { return wrb->payload.embedded_payload; @@ -419,14 +468,13 @@ static int be_mbox_notify_wait(struct be_adapter *adapter) static int be_POST_stage_get(struct be_adapter *adapter, u16 *stage) { u32 sem; + u32 reg = skyhawk_chip(adapter) ? SLIPORT_SEMAPHORE_OFFSET_SH : + SLIPORT_SEMAPHORE_OFFSET_BE; - if (lancer_chip(adapter)) - sem = ioread32(adapter->db + MPU_EP_SEMAPHORE_IF_TYPE2_OFFSET); - else - sem = ioread32(adapter->csr + MPU_EP_SEMAPHORE_OFFSET); + pci_read_config_dword(adapter->pdev, reg, &sem); + *stage = sem & POST_STAGE_MASK; - *stage = sem & EP_SEMAPHORE_POST_STAGE_MASK; - if ((sem >> EP_SEMAPHORE_POST_ERR_SHIFT) & EP_SEMAPHORE_POST_ERR_MASK) + if ((sem >> POST_ERR_SHIFT) & POST_ERR_MASK) return -1; else return 0; @@ -452,10 +500,33 @@ int lancer_wait_ready(struct be_adapter *adapter) return status; } +static bool lancer_provisioning_error(struct be_adapter *adapter) +{ + u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; + sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); + if (sliport_status & SLIPORT_STATUS_ERR_MASK) { + sliport_err1 = ioread32(adapter->db + + SLIPORT_ERROR1_OFFSET); + sliport_err2 = ioread32(adapter->db + + SLIPORT_ERROR2_OFFSET); + + if (sliport_err1 == SLIPORT_ERROR_NO_RESOURCE1 && + sliport_err2 == SLIPORT_ERROR_NO_RESOURCE2) + return true; + } + return false; +} + int lancer_test_and_set_rdy_state(struct be_adapter *adapter) { int status; u32 sliport_status, err, reset_needed; + bool resource_error; + + resource_error = lancer_provisioning_error(adapter); + if (resource_error) + return -1; + status = lancer_wait_ready(adapter); if (!status) { sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); @@ -477,6 +548,14 @@ int lancer_test_and_set_rdy_state(struct be_adapter *adapter) status = -1; } } + /* Stop error recovery if error is not recoverable. + * No resource error is temporary errors and will go away + * when PF provisions resources. + */ + resource_error = lancer_provisioning_error(adapter); + if (status == -1 && !resource_error) + adapter->eeh_error = true; + return status; } @@ -601,6 +680,9 @@ static struct be_mcc_wrb *wrb_from_mccq(struct be_adapter *adapter) struct be_queue_info *mccq = &adapter->mcc_obj.q; struct be_mcc_wrb *wrb; + if (!mccq->created) + return NULL; + if (atomic_read(&mccq->used) >= mccq->len) { dev_err(&adapter->pdev->dev, "Out of MCCQ wrbs\n"); return NULL; @@ -1155,8 +1237,7 @@ int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q, req->id = cpu_to_le16(q->id); status = be_mbox_notify_wait(adapter); - if (!status) - q->created = false; + q->created = false; mutex_unlock(&adapter->mbox_lock); return status; @@ -1183,8 +1264,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) req->id = cpu_to_le16(q->id); status = be_mcc_notify_wait(adapter); - if (!status) - q->created = false; + q->created = false; err: spin_unlock_bh(&adapter->mcc_lock); @@ -1281,7 +1361,8 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) be_wrb_cmd_hdr_prepare(hdr, CMD_SUBSYSTEM_ETH, OPCODE_ETH_GET_STATISTICS, nonemb_cmd->size, wrb, nonemb_cmd); - if (adapter->generation == BE_GEN3) + /* version 1 of the cmd is not supported only by BE2 */ + if (!BE2_chip(adapter)) hdr->version = 1; be_mcc_notify(adapter); @@ -1301,6 +1382,10 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, struct lancer_cmd_req_pport_stats *req; int status = 0; + if (!be_cmd_allowed(adapter, OPCODE_ETH_GET_PPORT_STATS, + CMD_SUBSYSTEM_ETH)) + return -EPERM; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); @@ -1367,7 +1452,8 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, OPCODE_COMMON_NTWK_LINK_STATUS_QUERY, sizeof(*req), wrb, NULL); - if (adapter->generation == BE_GEN3 || lancer_chip(adapter)) + /* version 1 of the cmd is not supported only by BE2 */ + if (!BE2_chip(adapter)) req->hdr.version = 1; req->hdr.domain = dom; @@ -1658,9 +1744,9 @@ int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) /* Reset mcast promisc mode if already set by setting mask * and not setting flags field */ - if (!lancer_chip(adapter) || be_physfn(adapter)) - req->if_flags_mask |= - cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS); + req->if_flags_mask |= + cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS & + adapter->if_cap_flags); req->mcast_num = cpu_to_le32(netdev_mc_count(adapter->netdev)); netdev_for_each_mc_addr(ha, adapter->netdev) @@ -1680,6 +1766,10 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) struct be_cmd_req_set_flow_control *req; int status; + if (!be_cmd_allowed(adapter, OPCODE_COMMON_SET_FLOW_CONTROL, + CMD_SUBSYSTEM_COMMON)) + return -EPERM; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); @@ -1709,6 +1799,10 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) struct be_cmd_req_get_flow_control *req; int status; + if (!be_cmd_allowed(adapter, OPCODE_COMMON_GET_FLOW_CONTROL, + CMD_SUBSYSTEM_COMMON)) + return -EPERM; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); @@ -2067,7 +2161,7 @@ int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, int offset) { struct be_mcc_wrb *wrb; - struct be_cmd_write_flashrom *req; + struct be_cmd_read_flash_crc *req; int status; spin_lock_bh(&adapter->mcc_lock); @@ -2080,7 +2174,8 @@ int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, req = embedded_payload(wrb); be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, - OPCODE_COMMON_READ_FLASHROM, sizeof(*req)+4, wrb, NULL); + OPCODE_COMMON_READ_FLASHROM, sizeof(*req), + wrb, NULL); req->params.op_type = cpu_to_le32(OPTYPE_REDBOOT); req->params.op_code = cpu_to_le32(FLASHROM_OPER_REPORT); @@ -2089,7 +2184,7 @@ int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, status = be_mcc_notify_wait(adapter); if (!status) - memcpy(flashed_crc, req->params.data_buf, 4); + memcpy(flashed_crc, req->crc, 4); err: spin_unlock_bh(&adapter->mcc_lock); @@ -2275,6 +2370,10 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) struct be_dma_mem cmd; int status; + if (!be_cmd_allowed(adapter, OPCODE_COMMON_GET_PHY_DETAILS, + CMD_SUBSYSTEM_COMMON)) + return -EPERM; + spin_lock_bh(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); @@ -2434,6 +2533,42 @@ err: return status; } +/* Get privilege(s) for a function */ +int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, + u32 domain) +{ + struct be_mcc_wrb *wrb; + struct be_cmd_req_get_fn_privileges *req; + int status; + + spin_lock_bh(&adapter->mcc_lock); + + wrb = wrb_from_mccq(adapter); + if (!wrb) { + status = -EBUSY; + goto err; + } + + req = embedded_payload(wrb); + + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_GET_FN_PRIVILEGES, sizeof(*req), + wrb, NULL); + + req->hdr.domain = domain; + + status = be_mcc_notify_wait(adapter); + if (!status) { + struct be_cmd_resp_get_fn_privileges *resp = + embedded_payload(wrb); + *privilege = le32_to_cpu(resp->privilege_mask); + } + +err: + spin_unlock_bh(&adapter->mcc_lock); + return status; +} + /* Uses synchronous MCCQ */ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, bool *pmac_id_active, u32 *pmac_id, u8 domain) @@ -2651,6 +2786,10 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter) int payload_len = sizeof(*req); struct be_dma_mem cmd; + if (!be_cmd_allowed(adapter, OPCODE_ETH_ACPI_WOL_MAGIC_CONFIG, + CMD_SUBSYSTEM_ETH)) + return -EPERM; + memset(&cmd, 0, sizeof(struct be_dma_mem)); cmd.size = sizeof(struct be_cmd_resp_acpi_wol_magic_config_v1); cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, @@ -2792,6 +2931,240 @@ err: return status; } +static struct be_nic_resource_desc *be_get_nic_desc(u8 *buf, u32 desc_count, + u32 max_buf_size) +{ + struct be_nic_resource_desc *desc = (struct be_nic_resource_desc *)buf; + int i; + + for (i = 0; i < desc_count; i++) { + desc->desc_len = RESOURCE_DESC_SIZE; + if (((void *)desc + desc->desc_len) > + (void *)(buf + max_buf_size)) { + desc = NULL; + break; + } + + if (desc->desc_type == NIC_RESOURCE_DESC_TYPE_ID) + break; + + desc = (void *)desc + desc->desc_len; + } + + if (!desc || i == MAX_RESOURCE_DESC) + return NULL; + + return desc; +} + +/* Uses Mbox */ +int be_cmd_get_func_config(struct be_adapter *adapter) +{ + struct be_mcc_wrb *wrb; + struct be_cmd_req_get_func_config *req; + int status; + struct be_dma_mem cmd; + + memset(&cmd, 0, sizeof(struct be_dma_mem)); + cmd.size = sizeof(struct be_cmd_resp_get_func_config); + cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, + &cmd.dma); + if (!cmd.va) { + dev_err(&adapter->pdev->dev, "Memory alloc failure\n"); + return -ENOMEM; + } + if (mutex_lock_interruptible(&adapter->mbox_lock)) + return -1; + + wrb = wrb_from_mbox(adapter); + if (!wrb) { + status = -EBUSY; + goto err; + } + + req = cmd.va; + + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_GET_FUNC_CONFIG, + cmd.size, wrb, &cmd); + + status = be_mbox_notify_wait(adapter); + if (!status) { + struct be_cmd_resp_get_func_config *resp = cmd.va; + u32 desc_count = le32_to_cpu(resp->desc_count); + struct be_nic_resource_desc *desc; + + desc = be_get_nic_desc(resp->func_param, desc_count, + sizeof(resp->func_param)); + if (!desc) { + status = -EINVAL; + goto err; + } + + adapter->pf_number = desc->pf_num; + adapter->max_pmac_cnt = le16_to_cpu(desc->unicast_mac_count); + adapter->max_vlans = le16_to_cpu(desc->vlan_count); + adapter->max_mcast_mac = le16_to_cpu(desc->mcast_mac_count); + adapter->max_tx_queues = le16_to_cpu(desc->txq_count); + adapter->max_rss_queues = le16_to_cpu(desc->rssq_count); + adapter->max_rx_queues = le16_to_cpu(desc->rq_count); + + adapter->max_event_queues = le16_to_cpu(desc->eq_count); + adapter->if_cap_flags = le32_to_cpu(desc->cap_flags); + } +err: + mutex_unlock(&adapter->mbox_lock); + pci_free_consistent(adapter->pdev, cmd.size, + cmd.va, cmd.dma); + return status; +} + + /* Uses sync mcc */ +int be_cmd_get_profile_config(struct be_adapter *adapter, u32 *cap_flags, + u8 domain) +{ + struct be_mcc_wrb *wrb; + struct be_cmd_req_get_profile_config *req; + int status; + struct be_dma_mem cmd; + + memset(&cmd, 0, sizeof(struct be_dma_mem)); + cmd.size = sizeof(struct be_cmd_resp_get_profile_config); + cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, + &cmd.dma); + if (!cmd.va) { + dev_err(&adapter->pdev->dev, "Memory alloc failure\n"); + return -ENOMEM; + } + + spin_lock_bh(&adapter->mcc_lock); + + wrb = wrb_from_mccq(adapter); + if (!wrb) { + status = -EBUSY; + goto err; + } + + req = cmd.va; + + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_GET_PROFILE_CONFIG, + cmd.size, wrb, &cmd); + + req->type = ACTIVE_PROFILE_TYPE; + req->hdr.domain = domain; + + status = be_mcc_notify_wait(adapter); + if (!status) { + struct be_cmd_resp_get_profile_config *resp = cmd.va; + u32 desc_count = le32_to_cpu(resp->desc_count); + struct be_nic_resource_desc *desc; + + desc = be_get_nic_desc(resp->func_param, desc_count, + sizeof(resp->func_param)); + + if (!desc) { + status = -EINVAL; + goto err; + } + *cap_flags = le32_to_cpu(desc->cap_flags); + } +err: + spin_unlock_bh(&adapter->mcc_lock); + pci_free_consistent(adapter->pdev, cmd.size, + cmd.va, cmd.dma); + return status; +} + +/* Uses sync mcc */ +int be_cmd_set_profile_config(struct be_adapter *adapter, u32 bps, + u8 domain) +{ + struct be_mcc_wrb *wrb; + struct be_cmd_req_set_profile_config *req; + int status; + + spin_lock_bh(&adapter->mcc_lock); + + wrb = wrb_from_mccq(adapter); + if (!wrb) { + status = -EBUSY; + goto err; + } + + req = embedded_payload(wrb); + + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_SET_PROFILE_CONFIG, sizeof(*req), + wrb, NULL); + + req->hdr.domain = domain; + req->desc_count = cpu_to_le32(1); + + req->nic_desc.desc_type = NIC_RESOURCE_DESC_TYPE_ID; + req->nic_desc.desc_len = RESOURCE_DESC_SIZE; + req->nic_desc.flags = (1 << QUN) | (1 << IMM) | (1 << NOSV); + req->nic_desc.pf_num = adapter->pf_number; + req->nic_desc.vf_num = domain; + + /* Mark fields invalid */ + req->nic_desc.unicast_mac_count = 0xFFFF; + req->nic_desc.mcc_count = 0xFFFF; + req->nic_desc.vlan_count = 0xFFFF; + req->nic_desc.mcast_mac_count = 0xFFFF; + req->nic_desc.txq_count = 0xFFFF; + req->nic_desc.rq_count = 0xFFFF; + req->nic_desc.rssq_count = 0xFFFF; + req->nic_desc.lro_count = 0xFFFF; + req->nic_desc.cq_count = 0xFFFF; + req->nic_desc.toe_conn_count = 0xFFFF; + req->nic_desc.eq_count = 0xFFFF; + req->nic_desc.link_param = 0xFF; + req->nic_desc.bw_min = 0xFFFFFFFF; + req->nic_desc.acpi_params = 0xFF; + req->nic_desc.wol_param = 0x0F; + + /* Change BW */ + req->nic_desc.bw_min = cpu_to_le32(bps); + req->nic_desc.bw_max = cpu_to_le32(bps); + status = be_mcc_notify_wait(adapter); +err: + spin_unlock_bh(&adapter->mcc_lock); + return status; +} + +/* Uses sync mcc */ +int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) +{ + struct be_mcc_wrb *wrb; + struct be_cmd_enable_disable_vf *req; + int status; + + if (!lancer_chip(adapter)) + return 0; + + spin_lock_bh(&adapter->mcc_lock); + + wrb = wrb_from_mccq(adapter); + if (!wrb) { + status = -EBUSY; + goto err; + } + + req = embedded_payload(wrb); + + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_ENABLE_DISABLE_VF, sizeof(*req), + wrb, NULL); + + req->hdr.domain = domain; + req->enable = 1; + status = be_mcc_notify_wait(adapter); +err: + spin_unlock_bh(&adapter->mcc_lock); + return status; +} + int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, int wrb_payload_size, u16 *cmd_status, u16 *ext_status) { diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 0936e21e3cf..d6552e19ffe 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -196,9 +196,14 @@ struct be_mcc_mailbox { #define OPCODE_COMMON_GET_MAC_LIST 147 #define OPCODE_COMMON_SET_MAC_LIST 148 #define OPCODE_COMMON_GET_HSW_CONFIG 152 +#define OPCODE_COMMON_GET_FUNC_CONFIG 160 +#define OPCODE_COMMON_GET_PROFILE_CONFIG 164 +#define OPCODE_COMMON_SET_PROFILE_CONFIG 165 #define OPCODE_COMMON_SET_HSW_CONFIG 153 +#define OPCODE_COMMON_GET_FN_PRIVILEGES 170 #define OPCODE_COMMON_READ_OBJECT 171 #define OPCODE_COMMON_WRITE_OBJECT 172 +#define OPCODE_COMMON_ENABLE_DISABLE_VF 196 #define OPCODE_ETH_RSS_CONFIG 1 #define OPCODE_ETH_ACPI_CONFIG 2 @@ -1151,14 +1156,22 @@ struct flashrom_params { u32 op_type; u32 data_buf_size; u32 offset; - u8 data_buf[4]; }; struct be_cmd_write_flashrom { struct be_cmd_req_hdr hdr; struct flashrom_params params; -}; + u8 data_buf[32768]; + u8 rsvd[4]; +} __packed; +/* cmd to read flash crc */ +struct be_cmd_read_flash_crc { + struct be_cmd_req_hdr hdr; + struct flashrom_params params; + u8 crc[4]; + u8 rsvd[4]; +}; /**************** Lancer Firmware Flash ************/ struct amap_lancer_write_obj_context { u8 write_length[24]; @@ -1429,6 +1442,41 @@ struct be_cmd_resp_set_func_cap { u8 rsvd[212]; }; +/*********************** Function Privileges ***********************/ +enum { + BE_PRIV_DEFAULT = 0x1, + BE_PRIV_LNKQUERY = 0x2, + BE_PRIV_LNKSTATS = 0x4, + BE_PRIV_LNKMGMT = 0x8, + BE_PRIV_LNKDIAG = 0x10, + BE_PRIV_UTILQUERY = 0x20, + BE_PRIV_FILTMGMT = 0x40, + BE_PRIV_IFACEMGMT = 0x80, + BE_PRIV_VHADM = 0x100, + BE_PRIV_DEVCFG = 0x200, + BE_PRIV_DEVSEC = 0x400 +}; +#define MAX_PRIVILEGES (BE_PRIV_VHADM | BE_PRIV_DEVCFG | \ + BE_PRIV_DEVSEC) +#define MIN_PRIVILEGES BE_PRIV_DEFAULT + +struct be_cmd_priv_map { + u8 opcode; + u8 subsystem; + u32 priv_mask; +}; + +struct be_cmd_req_get_fn_privileges { + struct be_cmd_req_hdr hdr; + u32 rsvd; +}; + +struct be_cmd_resp_get_fn_privileges { + struct be_cmd_resp_hdr hdr; + u32 privilege_mask; +}; + + /******************** GET/SET_MACLIST **************************/ #define BE_MAX_MAC 64 struct be_cmd_req_get_mac_list { @@ -1608,33 +1656,6 @@ struct be_cmd_resp_get_stats_v1 { struct be_hw_stats_v1 hw_stats; }; -static inline void *hw_stats_from_cmd(struct be_adapter *adapter) -{ - if (adapter->generation == BE_GEN3) { - struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va; - - return &cmd->hw_stats; - } else { - struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va; - - return &cmd->hw_stats; - } -} - -static inline void *be_erx_stats_from_cmd(struct be_adapter *adapter) -{ - if (adapter->generation == BE_GEN3) { - struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter); - - return &hw_stats->erx; - } else { - struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter); - - return &hw_stats->erx; - } -} - - /************** get fat capabilites *******************/ #define MAX_MODULES 27 #define MAX_MODES 4 @@ -1684,6 +1705,96 @@ struct be_cmd_req_set_ext_fat_caps { struct be_fat_conf_params set_params; }; +#define RESOURCE_DESC_SIZE 72 +#define NIC_RESOURCE_DESC_TYPE_ID 0x41 +#define MAX_RESOURCE_DESC 4 + +/* QOS unit number */ +#define QUN 4 +/* Immediate */ +#define IMM 6 +/* No save */ +#define NOSV 7 + +struct be_nic_resource_desc { + u8 desc_type; + u8 desc_len; + u8 rsvd1; + u8 flags; + u8 vf_num; + u8 rsvd2; + u8 pf_num; + u8 rsvd3; + u16 unicast_mac_count; + u8 rsvd4[6]; + u16 mcc_count; + u16 vlan_count; + u16 mcast_mac_count; + u16 txq_count; + u16 rq_count; + u16 rssq_count; + u16 lro_count; + u16 cq_count; + u16 toe_conn_count; + u16 eq_count; + u32 rsvd5; + u32 cap_flags; + u8 link_param; + u8 rsvd6[3]; + u32 bw_min; + u32 bw_max; + u8 acpi_params; + u8 wol_param; + u16 rsvd7; + u32 rsvd8[3]; +}; + +struct be_cmd_req_get_func_config { + struct be_cmd_req_hdr hdr; +}; + +struct be_cmd_resp_get_func_config { + struct be_cmd_req_hdr hdr; + u32 desc_count; + u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE]; +}; + +#define ACTIVE_PROFILE_TYPE 0x2 +struct be_cmd_req_get_profile_config { + struct be_cmd_req_hdr hdr; + u8 rsvd; + u8 type; + u16 rsvd1; +}; + +struct be_cmd_resp_get_profile_config { + struct be_cmd_req_hdr hdr; + u32 desc_count; + u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE]; +}; + +struct be_cmd_req_set_profile_config { + struct be_cmd_req_hdr hdr; + u32 rsvd; + u32 desc_count; + struct be_nic_resource_desc nic_desc; +}; + +struct be_cmd_resp_set_profile_config { + struct be_cmd_req_hdr hdr; +}; + +struct be_cmd_enable_disable_vf { + struct be_cmd_req_hdr hdr; + u8 enable; + u8 rsvd[3]; +}; + +static inline bool check_privilege(struct be_adapter *adapter, u32 flags) +{ + return flags & adapter->cmd_privileges ? true : false; +} + extern int be_pci_fnum_get(struct be_adapter *adapter); extern int be_fw_wait_ready(struct be_adapter *adapter); extern int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, @@ -1780,6 +1891,8 @@ extern int be_cmd_get_cntl_attributes(struct be_adapter *adapter); extern int be_cmd_req_native_mode(struct be_adapter *adapter); extern int be_cmd_get_reg_len(struct be_adapter *adapter, u32 *log_size); extern void be_cmd_get_regs(struct be_adapter *adapter, u32 buf_len, void *buf); +extern int be_cmd_get_fn_privileges(struct be_adapter *adapter, + u32 *privilege, u32 domain); extern int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, bool *pmac_id_active, u32 *pmac_id, u8 domain); @@ -1798,4 +1911,10 @@ extern int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, extern int lancer_wait_ready(struct be_adapter *adapter); extern int lancer_test_and_set_rdy_state(struct be_adapter *adapter); extern int be_cmd_query_port_name(struct be_adapter *adapter, u8 *port_name); +extern int be_cmd_get_func_config(struct be_adapter *adapter); +extern int be_cmd_get_profile_config(struct be_adapter *adapter, u32 *cap_flags, + u8 domain); +extern int be_cmd_set_profile_config(struct be_adapter *adapter, u32 bps, + u8 domain); +extern int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 8e6fb0ba6aa..00454a10f88 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -261,6 +261,9 @@ be_get_reg_len(struct net_device *netdev) struct be_adapter *adapter = netdev_priv(netdev); u32 log_size = 0; + if (!check_privilege(adapter, MAX_PRIVILEGES)) + return 0; + if (be_physfn(adapter)) { if (lancer_chip(adapter)) log_size = lancer_cmd_get_file_len(adapter, @@ -525,6 +528,10 @@ static int be_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) u8 link_status; u16 link_speed = 0; int status; + u32 auto_speeds; + u32 fixed_speeds; + u32 dac_cable_len; + u16 interface_type; if (adapter->phy.link_speed < 0) { status = be_cmd_link_status_query(adapter, &link_speed, @@ -534,39 +541,46 @@ static int be_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) ethtool_cmd_speed_set(ecmd, link_speed); status = be_cmd_get_phy_info(adapter); - if (status) - return status; - - ecmd->supported = - convert_to_et_setting(adapter->phy.interface_type, - adapter->phy.auto_speeds_supported | - adapter->phy.fixed_speeds_supported); - ecmd->advertising = - convert_to_et_setting(adapter->phy.interface_type, - adapter->phy.auto_speeds_supported); - - ecmd->port = be_get_port_type(adapter->phy.interface_type, - adapter->phy.dac_cable_len); - - if (adapter->phy.auto_speeds_supported) { - ecmd->supported |= SUPPORTED_Autoneg; - ecmd->autoneg = AUTONEG_ENABLE; - ecmd->advertising |= ADVERTISED_Autoneg; - } + if (!status) { + interface_type = adapter->phy.interface_type; + auto_speeds = adapter->phy.auto_speeds_supported; + fixed_speeds = adapter->phy.fixed_speeds_supported; + dac_cable_len = adapter->phy.dac_cable_len; + + ecmd->supported = + convert_to_et_setting(interface_type, + auto_speeds | + fixed_speeds); + ecmd->advertising = + convert_to_et_setting(interface_type, + auto_speeds); + + ecmd->port = be_get_port_type(interface_type, + dac_cable_len); + + if (adapter->phy.auto_speeds_supported) { + ecmd->supported |= SUPPORTED_Autoneg; + ecmd->autoneg = AUTONEG_ENABLE; + ecmd->advertising |= ADVERTISED_Autoneg; + } - if (be_pause_supported(adapter)) { ecmd->supported |= SUPPORTED_Pause; - ecmd->advertising |= ADVERTISED_Pause; - } - - switch (adapter->phy.interface_type) { - case PHY_TYPE_KR_10GB: - case PHY_TYPE_KX4_10GB: - ecmd->transceiver = XCVR_INTERNAL; - break; - default: - ecmd->transceiver = XCVR_EXTERNAL; - break; + if (be_pause_supported(adapter)) + ecmd->advertising |= ADVERTISED_Pause; + + switch (adapter->phy.interface_type) { + case PHY_TYPE_KR_10GB: + case PHY_TYPE_KX4_10GB: + ecmd->transceiver = XCVR_INTERNAL; + break; + default: + ecmd->transceiver = XCVR_EXTERNAL; + break; + } + } else { + ecmd->port = PORT_OTHER; + ecmd->autoneg = AUTONEG_DISABLE; + ecmd->transceiver = XCVR_DUMMY1; } /* Save for future use */ @@ -787,6 +801,10 @@ static int be_get_eeprom_len(struct net_device *netdev) { struct be_adapter *adapter = netdev_priv(netdev); + + if (!check_privilege(adapter, MAX_PRIVILEGES)) + return 0; + if (lancer_chip(adapter)) { if (be_physfn(adapter)) return lancer_cmd_get_file_len(adapter, diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index b755f7061dc..541d4530d5b 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -31,12 +31,12 @@ #define MPU_EP_CONTROL 0 -/********** MPU semphore ******************/ -#define MPU_EP_SEMAPHORE_OFFSET 0xac -#define MPU_EP_SEMAPHORE_IF_TYPE2_OFFSET 0x400 -#define EP_SEMAPHORE_POST_STAGE_MASK 0x0000FFFF -#define EP_SEMAPHORE_POST_ERR_MASK 0x1 -#define EP_SEMAPHORE_POST_ERR_SHIFT 31 +/********** MPU semphore: used for SH & BE *************/ +#define SLIPORT_SEMAPHORE_OFFSET_BE 0x7c +#define SLIPORT_SEMAPHORE_OFFSET_SH 0x94 +#define POST_STAGE_MASK 0x0000FFFF +#define POST_ERR_MASK 0x1 +#define POST_ERR_SHIFT 31 /* MPU semphore POST stage values */ #define POST_STAGE_AWAITING_HOST_RDY 0x1 /* FW awaiting goahead from host */ @@ -59,6 +59,9 @@ #define PHYSDEV_CONTROL_FW_RESET_MASK 0x00000002 #define PHYSDEV_CONTROL_INP_MASK 0x40000000 +#define SLIPORT_ERROR_NO_RESOURCE1 0x2 +#define SLIPORT_ERROR_NO_RESOURCE2 0x9 + /********* Memory BAR register ************/ #define PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET 0xfc /* Host Interrupt Enable, if set interrupts are enabled although "PCI Interrupt @@ -102,11 +105,6 @@ #define SLI_INTF_TYPE_2 2 #define SLI_INTF_TYPE_3 3 -/* SLI family */ -#define BE_SLI_FAMILY 0x0 -#define LANCER_A0_SLI_FAMILY 0xA -#define SKYHAWK_SLI_FAMILY 0x2 - /********* ISR0 Register offset **********/ #define CEV_ISR0_OFFSET 0xC18 #define CEV_ISR_SIZE 4 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index d1b6cc58763..c365722218f 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -44,6 +44,7 @@ static DEFINE_PCI_DEVICE_TABLE(be_dev_ids) = { { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)}, { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)}, { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)}, + { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)}, { 0 } }; MODULE_DEVICE_TABLE(pci, be_dev_ids); @@ -237,23 +238,46 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) int status = 0; u8 current_mac[ETH_ALEN]; u32 pmac_id = adapter->pmac_id[0]; + bool active_mac = true; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - status = be_cmd_mac_addr_query(adapter, current_mac, false, - adapter->if_handle, 0); + /* For BE VF, MAC address is already activated by PF. + * Hence only operation left is updating netdev->devaddr. + * Update it if user is passing the same MAC which was used + * during configuring VF MAC from PF(Hypervisor). + */ + if (!lancer_chip(adapter) && !be_physfn(adapter)) { + status = be_cmd_mac_addr_query(adapter, current_mac, + false, adapter->if_handle, 0); + if (!status && !memcmp(current_mac, addr->sa_data, ETH_ALEN)) + goto done; + else + goto err; + } + + if (!memcmp(addr->sa_data, netdev->dev_addr, ETH_ALEN)) + goto done; + + /* For Lancer check if any MAC is active. + * If active, get its mac id. + */ + if (lancer_chip(adapter) && !be_physfn(adapter)) + be_cmd_get_mac_from_list(adapter, current_mac, &active_mac, + &pmac_id, 0); + + status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data, + adapter->if_handle, + &adapter->pmac_id[0], 0); + if (status) goto err; - if (memcmp(addr->sa_data, current_mac, ETH_ALEN)) { - status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data, - adapter->if_handle, &adapter->pmac_id[0], 0); - if (status) - goto err; - - be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0); - } + if (active_mac) + be_cmd_pmac_del(adapter, adapter->if_handle, + pmac_id, 0); +done: memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); return 0; err: @@ -261,7 +285,35 @@ err: return status; } -static void populate_be2_stats(struct be_adapter *adapter) +/* BE2 supports only v0 cmd */ +static void *hw_stats_from_cmd(struct be_adapter *adapter) +{ + if (BE2_chip(adapter)) { + struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va; + + return &cmd->hw_stats; + } else { + struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va; + + return &cmd->hw_stats; + } +} + +/* BE2 supports only v0 cmd */ +static void *be_erx_stats_from_cmd(struct be_adapter *adapter) +{ + if (BE2_chip(adapter)) { + struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter); + + return &hw_stats->erx; + } else { + struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter); + + return &hw_stats->erx; + } +} + +static void populate_be_v0_stats(struct be_adapter *adapter) { struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter); struct be_pmem_stats *pmem_sts = &hw_stats->pmem; @@ -310,7 +362,7 @@ static void populate_be2_stats(struct be_adapter *adapter) adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops; } -static void populate_be3_stats(struct be_adapter *adapter) +static void populate_be_v1_stats(struct be_adapter *adapter) { struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter); struct be_pmem_stats *pmem_sts = &hw_stats->pmem; @@ -412,28 +464,25 @@ void be_parse_stats(struct be_adapter *adapter) struct be_rx_obj *rxo; int i; - if (adapter->generation == BE_GEN3) { - if (lancer_chip(adapter)) - populate_lancer_stats(adapter); - else - populate_be3_stats(adapter); + if (lancer_chip(adapter)) { + populate_lancer_stats(adapter); } else { - populate_be2_stats(adapter); - } - - if (lancer_chip(adapter)) - goto done; + if (BE2_chip(adapter)) + populate_be_v0_stats(adapter); + else + /* for BE3 and Skyhawk */ + populate_be_v1_stats(adapter); - /* as erx_v1 is longer than v0, ok to use v1 defn for v0 access */ - for_all_rx_queues(adapter, rxo, i) { - /* below erx HW counter can actually wrap around after - * 65535. Driver accumulates a 32-bit value - */ - accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags, - (u16)erx->rx_drops_no_fragments[rxo->q.id]); + /* as erx_v1 is longer than v0, ok to use v1 for v0 access */ + for_all_rx_queues(adapter, rxo, i) { + /* below erx HW counter can actually wrap around after + * 65535. Driver accumulates a 32-bit value + */ + accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags, + (u16)erx->rx_drops_no_fragments \ + [rxo->q.id]); + } } -done: - return; } static struct rtnl_link_stats64 *be_get_stats64(struct net_device *netdev, @@ -597,16 +646,6 @@ static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr, hdr, skb_shinfo(skb)->gso_size); if (skb_is_gso_v6(skb) && !lancer_chip(adapter)) AMAP_SET_BITS(struct amap_eth_hdr_wrb, lso6, hdr, 1); - if (lancer_chip(adapter) && adapter->sli_family == - LANCER_A0_SLI_FAMILY) { - AMAP_SET_BITS(struct amap_eth_hdr_wrb, ipcs, hdr, 1); - if (is_tcp_pkt(skb)) - AMAP_SET_BITS(struct amap_eth_hdr_wrb, - tcpcs, hdr, 1); - else if (is_udp_pkt(skb)) - AMAP_SET_BITS(struct amap_eth_hdr_wrb, - udpcs, hdr, 1); - } } else if (skb->ip_summed == CHECKSUM_PARTIAL) { if (is_tcp_pkt(skb)) AMAP_SET_BITS(struct amap_eth_hdr_wrb, tcpcs, hdr, 1); @@ -856,11 +895,15 @@ static int be_vlan_add_vid(struct net_device *netdev, u16 vid) struct be_adapter *adapter = netdev_priv(netdev); int status = 0; - if (!be_physfn(adapter)) { + if (!lancer_chip(adapter) && !be_physfn(adapter)) { status = -EINVAL; goto ret; } + /* Packets with VID 0 are always received by Lancer by default */ + if (lancer_chip(adapter) && vid == 0) + goto ret; + adapter->vlan_tag[vid] = 1; if (adapter->vlans_added <= (adapter->max_vlans + 1)) status = be_vid_config(adapter); @@ -878,11 +921,15 @@ static int be_vlan_rem_vid(struct net_device *netdev, u16 vid) struct be_adapter *adapter = netdev_priv(netdev); int status = 0; - if (!be_physfn(adapter)) { + if (!lancer_chip(adapter) && !be_physfn(adapter)) { status = -EINVAL; goto ret; } + /* Packets with VID 0 are always received by Lancer by default */ + if (lancer_chip(adapter) && vid == 0) + goto ret; + adapter->vlan_tag[vid] = 0; if (adapter->vlans_added <= adapter->max_vlans) status = be_vid_config(adapter); @@ -917,7 +964,7 @@ static void be_set_rx_mode(struct net_device *netdev) /* Enable multicast promisc if num configured exceeds what we support */ if (netdev->flags & IFF_ALLMULTI || - netdev_mc_count(netdev) > BE_MAX_MC) { + netdev_mc_count(netdev) > adapter->max_mcast_mac) { be_cmd_rx_filter(adapter, IFF_ALLMULTI, ON); goto done; } @@ -962,6 +1009,9 @@ static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) struct be_adapter *adapter = netdev_priv(netdev); struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; int status; + bool active_mac = false; + u32 pmac_id; + u8 old_mac[ETH_ALEN]; if (!sriov_enabled(adapter)) return -EPERM; @@ -970,6 +1020,12 @@ static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) return -EINVAL; if (lancer_chip(adapter)) { + status = be_cmd_get_mac_from_list(adapter, old_mac, &active_mac, + &pmac_id, vf + 1); + if (!status && active_mac) + be_cmd_pmac_del(adapter, vf_cfg->if_handle, + pmac_id, vf + 1); + status = be_cmd_set_mac_list(adapter, mac, 1, vf + 1); } else { status = be_cmd_pmac_del(adapter, vf_cfg->if_handle, @@ -1062,7 +1118,10 @@ static int be_set_vf_tx_rate(struct net_device *netdev, return -EINVAL; } - status = be_cmd_set_qos(adapter, rate / 10, vf + 1); + if (lancer_chip(adapter)) + status = be_cmd_set_profile_config(adapter, rate / 10, vf + 1); + else + status = be_cmd_set_qos(adapter, rate / 10, vf + 1); if (status) dev_err(&adapter->pdev->dev, @@ -1837,12 +1896,13 @@ static void be_tx_queues_destroy(struct be_adapter *adapter) static int be_num_txqs_want(struct be_adapter *adapter) { - if (sriov_want(adapter) || be_is_mc(adapter) || - lancer_chip(adapter) || !be_physfn(adapter) || - adapter->generation == BE_GEN2) + if ((!lancer_chip(adapter) && sriov_want(adapter)) || + be_is_mc(adapter) || + (!lancer_chip(adapter) && !be_physfn(adapter)) || + BE2_chip(adapter)) return 1; else - return MAX_TX_QS; + return adapter->max_tx_queues; } static int be_tx_cqs_create(struct be_adapter *adapter) @@ -2177,9 +2237,11 @@ static void be_msix_disable(struct be_adapter *adapter) static uint be_num_rss_want(struct be_adapter *adapter) { u32 num = 0; + if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) && - !sriov_want(adapter) && be_physfn(adapter)) { - num = (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS; + (lancer_chip(adapter) || + (!sriov_want(adapter) && be_physfn(adapter)))) { + num = adapter->max_rss_queues; num = min_t(u32, num, (u32)netif_get_num_default_rss_queues()); } return num; @@ -2579,10 +2641,30 @@ static int be_clear(struct be_adapter *adapter) be_tx_queues_destroy(adapter); be_evt_queues_destroy(adapter); + kfree(adapter->pmac_id); + adapter->pmac_id = NULL; + be_msix_disable(adapter); return 0; } +static void be_get_vf_if_cap_flags(struct be_adapter *adapter, + u32 *cap_flags, u8 domain) +{ + bool profile_present = false; + int status; + + if (lancer_chip(adapter)) { + status = be_cmd_get_profile_config(adapter, cap_flags, domain); + if (!status) + profile_present = true; + } + + if (!profile_present) + *cap_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | + BE_IF_FLAGS_MULTICAST; +} + static int be_vf_setup_init(struct be_adapter *adapter) { struct be_vf_cfg *vf_cfg; @@ -2634,9 +2716,13 @@ static int be_vf_setup(struct be_adapter *adapter) if (status) goto err; - cap_flags = en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | - BE_IF_FLAGS_MULTICAST; for_all_vfs(adapter, vf_cfg, vf) { + be_get_vf_if_cap_flags(adapter, &cap_flags, vf + 1); + + en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED | + BE_IF_FLAGS_BROADCAST | + BE_IF_FLAGS_MULTICAST); + status = be_cmd_if_create(adapter, cap_flags, en_flags, &vf_cfg->if_handle, vf + 1); if (status) @@ -2661,6 +2747,8 @@ static int be_vf_setup(struct be_adapter *adapter) if (status) goto err; vf_cfg->def_vid = def_vlan; + + be_cmd_enable_vf(adapter, vf + 1); } return 0; err: @@ -2674,7 +2762,10 @@ static void be_setup_init(struct be_adapter *adapter) adapter->if_handle = -1; adapter->be3_native = false; adapter->promiscuous = false; - adapter->eq_next_idx = 0; + if (be_physfn(adapter)) + adapter->cmd_privileges = MAX_PRIVILEGES; + else + adapter->cmd_privileges = MIN_PRIVILEGES; } static int be_get_mac_addr(struct be_adapter *adapter, u8 *mac, u32 if_handle, @@ -2712,12 +2803,93 @@ static int be_get_mac_addr(struct be_adapter *adapter, u8 *mac, u32 if_handle, return status; } +static void be_get_resources(struct be_adapter *adapter) +{ + int status; + bool profile_present = false; + + if (lancer_chip(adapter)) { + status = be_cmd_get_func_config(adapter); + + if (!status) + profile_present = true; + } + + if (profile_present) { + /* Sanity fixes for Lancer */ + adapter->max_pmac_cnt = min_t(u16, adapter->max_pmac_cnt, + BE_UC_PMAC_COUNT); + adapter->max_vlans = min_t(u16, adapter->max_vlans, + BE_NUM_VLANS_SUPPORTED); + adapter->max_mcast_mac = min_t(u16, adapter->max_mcast_mac, + BE_MAX_MC); + adapter->max_tx_queues = min_t(u16, adapter->max_tx_queues, + MAX_TX_QS); + adapter->max_rss_queues = min_t(u16, adapter->max_rss_queues, + BE3_MAX_RSS_QS); + adapter->max_event_queues = min_t(u16, + adapter->max_event_queues, + BE3_MAX_RSS_QS); + + if (adapter->max_rss_queues && + adapter->max_rss_queues == adapter->max_rx_queues) + adapter->max_rss_queues -= 1; + + if (adapter->max_event_queues < adapter->max_rss_queues) + adapter->max_rss_queues = adapter->max_event_queues; + + } else { + if (be_physfn(adapter)) + adapter->max_pmac_cnt = BE_UC_PMAC_COUNT; + else + adapter->max_pmac_cnt = BE_VF_UC_PMAC_COUNT; + + if (adapter->function_mode & FLEX10_MODE) + adapter->max_vlans = BE_NUM_VLANS_SUPPORTED/8; + else + adapter->max_vlans = BE_NUM_VLANS_SUPPORTED; + + adapter->max_mcast_mac = BE_MAX_MC; + adapter->max_tx_queues = MAX_TX_QS; + adapter->max_rss_queues = (adapter->be3_native) ? + BE3_MAX_RSS_QS : BE2_MAX_RSS_QS; + adapter->max_event_queues = BE3_MAX_RSS_QS; + + adapter->if_cap_flags = BE_IF_FLAGS_UNTAGGED | + BE_IF_FLAGS_BROADCAST | + BE_IF_FLAGS_MULTICAST | + BE_IF_FLAGS_PASS_L3L4_ERRORS | + BE_IF_FLAGS_MCAST_PROMISCUOUS | + BE_IF_FLAGS_VLAN_PROMISCUOUS | + BE_IF_FLAGS_PROMISCUOUS; + + if (adapter->function_caps & BE_FUNCTION_CAPS_RSS) + adapter->if_cap_flags |= BE_IF_FLAGS_RSS; + } +} + /* Routine to query per function resource limits */ static int be_get_config(struct be_adapter *adapter) { - int pos; + int pos, status; u16 dev_num_vfs; + status = be_cmd_query_fw_cfg(adapter, &adapter->port_num, + &adapter->function_mode, + &adapter->function_caps); + if (status) + goto err; + + be_get_resources(adapter); + + /* primary mac needs 1 pmac entry */ + adapter->pmac_id = kcalloc(adapter->max_pmac_cnt + 1, + sizeof(u32), GFP_KERNEL); + if (!adapter->pmac_id) { + status = -ENOMEM; + goto err; + } + pos = pci_find_ext_capability(adapter->pdev, PCI_EXT_CAP_ID_SRIOV); if (pos) { pci_read_config_word(adapter->pdev, pos + PCI_SRIOV_TOTAL_VF, @@ -2726,13 +2898,14 @@ static int be_get_config(struct be_adapter *adapter) dev_num_vfs = min_t(u16, dev_num_vfs, MAX_VFS); adapter->dev_num_vfs = dev_num_vfs; } - return 0; +err: + return status; } static int be_setup(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; - u32 cap_flags, en_flags; + u32 en_flags; u32 tx_fc, rx_fc; int status; u8 mac[ETH_ALEN]; @@ -2740,9 +2913,12 @@ static int be_setup(struct be_adapter *adapter) be_setup_init(adapter); - be_get_config(adapter); + if (!lancer_chip(adapter)) + be_cmd_req_native_mode(adapter); - be_cmd_req_native_mode(adapter); + status = be_get_config(adapter); + if (status) + goto err; be_msix_enable(adapter); @@ -2762,24 +2938,22 @@ static int be_setup(struct be_adapter *adapter) if (status) goto err; + be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0); + /* In UMC mode FW does not return right privileges. + * Override with correct privilege equivalent to PF. + */ + if (be_is_mc(adapter)) + adapter->cmd_privileges = MAX_PRIVILEGES; + en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS; - cap_flags = en_flags | BE_IF_FLAGS_MCAST_PROMISCUOUS | - BE_IF_FLAGS_VLAN_PROMISCUOUS | BE_IF_FLAGS_PROMISCUOUS; - if (adapter->function_caps & BE_FUNCTION_CAPS_RSS) { - cap_flags |= BE_IF_FLAGS_RSS; + if (adapter->function_caps & BE_FUNCTION_CAPS_RSS) en_flags |= BE_IF_FLAGS_RSS; - } - if (lancer_chip(adapter) && !be_physfn(adapter)) { - en_flags = BE_IF_FLAGS_UNTAGGED | - BE_IF_FLAGS_BROADCAST | - BE_IF_FLAGS_MULTICAST; - cap_flags = en_flags; - } + en_flags = en_flags & adapter->if_cap_flags; - status = be_cmd_if_create(adapter, cap_flags, en_flags, + status = be_cmd_if_create(adapter, adapter->if_cap_flags, en_flags, &adapter->if_handle, 0); if (status != 0) goto err; @@ -2827,8 +3001,8 @@ static int be_setup(struct be_adapter *adapter) dev_warn(dev, "device doesn't support SRIOV\n"); } - be_cmd_get_phy_info(adapter); - if (be_pause_supported(adapter)) + status = be_cmd_get_phy_info(adapter); + if (!status && be_pause_supported(adapter)) adapter->phy.fc_autoneg = 1; schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); @@ -2895,7 +3069,7 @@ static bool is_comp_in_ufi(struct be_adapter *adapter, int i = 0, img_type = 0; struct flash_section_info_g2 *fsec_g2 = NULL; - if (adapter->generation != BE_GEN3) + if (BE2_chip(adapter)) fsec_g2 = (struct flash_section_info_g2 *)fsec; for (i = 0; i < MAX_FLASH_COMP; i++) { @@ -2928,7 +3102,49 @@ struct flash_section_info *get_fsec_info(struct be_adapter *adapter, return NULL; } -static int be_flash_data(struct be_adapter *adapter, +static int be_flash(struct be_adapter *adapter, const u8 *img, + struct be_dma_mem *flash_cmd, int optype, int img_size) +{ + u32 total_bytes = 0, flash_op, num_bytes = 0; + int status = 0; + struct be_cmd_write_flashrom *req = flash_cmd->va; + + total_bytes = img_size; + while (total_bytes) { + num_bytes = min_t(u32, 32*1024, total_bytes); + + total_bytes -= num_bytes; + + if (!total_bytes) { + if (optype == OPTYPE_PHY_FW) + flash_op = FLASHROM_OPER_PHY_FLASH; + else + flash_op = FLASHROM_OPER_FLASH; + } else { + if (optype == OPTYPE_PHY_FW) + flash_op = FLASHROM_OPER_PHY_SAVE; + else + flash_op = FLASHROM_OPER_SAVE; + } + + memcpy(req->data_buf, img, num_bytes); + img += num_bytes; + status = be_cmd_write_flashrom(adapter, flash_cmd, optype, + flash_op, num_bytes); + if (status) { + if (status == ILLEGAL_IOCTL_REQ && + optype == OPTYPE_PHY_FW) + break; + dev_err(&adapter->pdev->dev, + "cmd to write to flash rom failed.\n"); + return status; + } + } + return 0; +} + +/* For BE2 and BE3 */ +static int be_flash_BEx(struct be_adapter *adapter, const struct firmware *fw, struct be_dma_mem *flash_cmd, int num_of_images) @@ -2936,12 +3152,9 @@ static int be_flash_data(struct be_adapter *adapter, { int status = 0, i, filehdr_size = 0; int img_hdrs_size = (num_of_images * sizeof(struct image_hdr)); - u32 total_bytes = 0, flash_op; - int num_bytes; const u8 *p = fw->data; - struct be_cmd_write_flashrom *req = flash_cmd->va; const struct flash_comp *pflashcomp; - int num_comp, hdr_size; + int num_comp, redboot; struct flash_section_info *fsec = NULL; struct flash_comp gen3_flash_types[] = { @@ -2986,7 +3199,7 @@ static int be_flash_data(struct be_adapter *adapter, FLASH_IMAGE_MAX_SIZE_g2, IMAGE_FIRMWARE_BACKUP_FCoE} }; - if (adapter->generation == BE_GEN3) { + if (BE3_chip(adapter)) { pflashcomp = gen3_flash_types; filehdr_size = sizeof(struct flash_file_hdr_g3); num_comp = ARRAY_SIZE(gen3_flash_types); @@ -2995,6 +3208,7 @@ static int be_flash_data(struct be_adapter *adapter, filehdr_size = sizeof(struct flash_file_hdr_g2); num_comp = ARRAY_SIZE(gen2_flash_types); } + /* Get flash section info*/ fsec = get_fsec_info(adapter, filehdr_size + img_hdrs_size, fw); if (!fsec) { @@ -3010,70 +3224,105 @@ static int be_flash_data(struct be_adapter *adapter, memcmp(adapter->fw_ver, "3.102.148.0", 11) < 0) continue; - if (pflashcomp[i].optype == OPTYPE_PHY_FW) { - if (!phy_flashing_required(adapter)) + if (pflashcomp[i].optype == OPTYPE_PHY_FW && + !phy_flashing_required(adapter)) continue; - } - - hdr_size = filehdr_size + - (num_of_images * sizeof(struct image_hdr)); - if ((pflashcomp[i].optype == OPTYPE_REDBOOT) && - (!be_flash_redboot(adapter, fw->data, pflashcomp[i].offset, - pflashcomp[i].size, hdr_size))) - continue; + if (pflashcomp[i].optype == OPTYPE_REDBOOT) { + redboot = be_flash_redboot(adapter, fw->data, + pflashcomp[i].offset, pflashcomp[i].size, + filehdr_size + img_hdrs_size); + if (!redboot) + continue; + } - /* Flash the component */ p = fw->data; p += filehdr_size + pflashcomp[i].offset + img_hdrs_size; if (p + pflashcomp[i].size > fw->data + fw->size) return -1; - total_bytes = pflashcomp[i].size; - while (total_bytes) { - if (total_bytes > 32*1024) - num_bytes = 32*1024; - else - num_bytes = total_bytes; - total_bytes -= num_bytes; - if (!total_bytes) { - if (pflashcomp[i].optype == OPTYPE_PHY_FW) - flash_op = FLASHROM_OPER_PHY_FLASH; - else - flash_op = FLASHROM_OPER_FLASH; - } else { - if (pflashcomp[i].optype == OPTYPE_PHY_FW) - flash_op = FLASHROM_OPER_PHY_SAVE; - else - flash_op = FLASHROM_OPER_SAVE; - } - memcpy(req->params.data_buf, p, num_bytes); - p += num_bytes; - status = be_cmd_write_flashrom(adapter, flash_cmd, - pflashcomp[i].optype, flash_op, num_bytes); - if (status) { - if ((status == ILLEGAL_IOCTL_REQ) && - (pflashcomp[i].optype == - OPTYPE_PHY_FW)) - break; - dev_err(&adapter->pdev->dev, - "cmd to write to flash rom failed.\n"); - return -1; - } + + status = be_flash(adapter, p, flash_cmd, pflashcomp[i].optype, + pflashcomp[i].size); + if (status) { + dev_err(&adapter->pdev->dev, + "Flashing section type %d failed.\n", + pflashcomp[i].img_type); + return status; } } return 0; } -static int get_ufigen_type(struct flash_file_hdr_g2 *fhdr) +static int be_flash_skyhawk(struct be_adapter *adapter, + const struct firmware *fw, + struct be_dma_mem *flash_cmd, int num_of_images) { - if (fhdr == NULL) - return 0; - if (fhdr->build[0] == '3') - return BE_GEN3; - else if (fhdr->build[0] == '2') - return BE_GEN2; - else - return 0; + int status = 0, i, filehdr_size = 0; + int img_offset, img_size, img_optype, redboot; + int img_hdrs_size = num_of_images * sizeof(struct image_hdr); + const u8 *p = fw->data; + struct flash_section_info *fsec = NULL; + + filehdr_size = sizeof(struct flash_file_hdr_g3); + fsec = get_fsec_info(adapter, filehdr_size + img_hdrs_size, fw); + if (!fsec) { + dev_err(&adapter->pdev->dev, + "Invalid Cookie. UFI corrupted ?\n"); + return -1; + } + + for (i = 0; i < le32_to_cpu(fsec->fsec_hdr.num_images); i++) { + img_offset = le32_to_cpu(fsec->fsec_entry[i].offset); + img_size = le32_to_cpu(fsec->fsec_entry[i].pad_size); + + switch (le32_to_cpu(fsec->fsec_entry[i].type)) { + case IMAGE_FIRMWARE_iSCSI: + img_optype = OPTYPE_ISCSI_ACTIVE; + break; + case IMAGE_BOOT_CODE: + img_optype = OPTYPE_REDBOOT; + break; + case IMAGE_OPTION_ROM_ISCSI: + img_optype = OPTYPE_BIOS; + break; + case IMAGE_OPTION_ROM_PXE: + img_optype = OPTYPE_PXE_BIOS; + break; + case IMAGE_OPTION_ROM_FCoE: + img_optype = OPTYPE_FCOE_BIOS; + break; + case IMAGE_FIRMWARE_BACKUP_iSCSI: + img_optype = OPTYPE_ISCSI_BACKUP; + break; + case IMAGE_NCSI: + img_optype = OPTYPE_NCSI_FW; + break; + default: + continue; + } + + if (img_optype == OPTYPE_REDBOOT) { + redboot = be_flash_redboot(adapter, fw->data, + img_offset, img_size, + filehdr_size + img_hdrs_size); + if (!redboot) + continue; + } + + p = fw->data; + p += filehdr_size + img_offset + img_hdrs_size; + if (p + img_size > fw->data + fw->size) + return -1; + + status = be_flash(adapter, p, flash_cmd, img_optype, img_size); + if (status) { + dev_err(&adapter->pdev->dev, + "Flashing section type %d failed.\n", + fsec->fsec_entry[i].type); + return status; + } + } + return 0; } static int lancer_wait_idle(struct be_adapter *adapter) @@ -3207,6 +3456,28 @@ lancer_fw_exit: return status; } +#define UFI_TYPE2 2 +#define UFI_TYPE3 3 +#define UFI_TYPE4 4 +static int be_get_ufi_type(struct be_adapter *adapter, + struct flash_file_hdr_g2 *fhdr) +{ + if (fhdr == NULL) + goto be_get_ufi_exit; + + if (skyhawk_chip(adapter) && fhdr->build[0] == '4') + return UFI_TYPE4; + else if (BE3_chip(adapter) && fhdr->build[0] == '3') + return UFI_TYPE3; + else if (BE2_chip(adapter) && fhdr->build[0] == '2') + return UFI_TYPE2; + +be_get_ufi_exit: + dev_err(&adapter->pdev->dev, + "UFI and Interface are not compatible for flashing\n"); + return -1; +} + static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw) { struct flash_file_hdr_g2 *fhdr; @@ -3214,12 +3485,9 @@ static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw) struct image_hdr *img_hdr_ptr = NULL; struct be_dma_mem flash_cmd; const u8 *p; - int status = 0, i = 0, num_imgs = 0; + int status = 0, i = 0, num_imgs = 0, ufi_type = 0; - p = fw->data; - fhdr = (struct flash_file_hdr_g2 *) p; - - flash_cmd.size = sizeof(struct be_cmd_write_flashrom) + 32*1024; + flash_cmd.size = sizeof(struct be_cmd_write_flashrom); flash_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, flash_cmd.size, &flash_cmd.dma, GFP_KERNEL); if (!flash_cmd.va) { @@ -3229,27 +3497,32 @@ static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw) goto be_fw_exit; } - if ((adapter->generation == BE_GEN3) && - (get_ufigen_type(fhdr) == BE_GEN3)) { - fhdr3 = (struct flash_file_hdr_g3 *) fw->data; - num_imgs = le32_to_cpu(fhdr3->num_imgs); - for (i = 0; i < num_imgs; i++) { - img_hdr_ptr = (struct image_hdr *) (fw->data + - (sizeof(struct flash_file_hdr_g3) + - i * sizeof(struct image_hdr))); - if (le32_to_cpu(img_hdr_ptr->imageid) == 1) - status = be_flash_data(adapter, fw, &flash_cmd, - num_imgs); + p = fw->data; + fhdr = (struct flash_file_hdr_g2 *)p; + + ufi_type = be_get_ufi_type(adapter, fhdr); + + fhdr3 = (struct flash_file_hdr_g3 *)fw->data; + num_imgs = le32_to_cpu(fhdr3->num_imgs); + for (i = 0; i < num_imgs; i++) { + img_hdr_ptr = (struct image_hdr *)(fw->data + + (sizeof(struct flash_file_hdr_g3) + + i * sizeof(struct image_hdr))); + if (le32_to_cpu(img_hdr_ptr->imageid) == 1) { + if (ufi_type == UFI_TYPE4) + status = be_flash_skyhawk(adapter, fw, + &flash_cmd, num_imgs); + else if (ufi_type == UFI_TYPE3) + status = be_flash_BEx(adapter, fw, &flash_cmd, + num_imgs); } - } else if ((adapter->generation == BE_GEN2) && - (get_ufigen_type(fhdr) == BE_GEN2)) { - status = be_flash_data(adapter, fw, &flash_cmd, 0); - } else { - dev_err(&adapter->pdev->dev, - "UFI and Interface are not compatible for flashing\n"); - status = -1; } + if (ufi_type == UFI_TYPE2) + status = be_flash_BEx(adapter, fw, &flash_cmd, 0); + else if (ufi_type == -1) + status = -1; + dma_free_coherent(&adapter->pdev->dev, flash_cmd.size, flash_cmd.va, flash_cmd.dma); if (status) { @@ -3344,80 +3617,47 @@ static void be_netdev_init(struct net_device *netdev) static void be_unmap_pci_bars(struct be_adapter *adapter) { - if (adapter->csr) - iounmap(adapter->csr); if (adapter->db) - iounmap(adapter->db); - if (adapter->roce_db.base) - pci_iounmap(adapter->pdev, adapter->roce_db.base); + pci_iounmap(adapter->pdev, adapter->db); } -static int lancer_roce_map_pci_bars(struct be_adapter *adapter) +static int db_bar(struct be_adapter *adapter) { - struct pci_dev *pdev = adapter->pdev; - u8 __iomem *addr; - - addr = pci_iomap(pdev, 2, 0); - if (addr == NULL) - return -ENOMEM; + if (lancer_chip(adapter) || !be_physfn(adapter)) + return 0; + else + return 4; +} - adapter->roce_db.base = addr; - adapter->roce_db.io_addr = pci_resource_start(pdev, 2); - adapter->roce_db.size = 8192; - adapter->roce_db.total_size = pci_resource_len(pdev, 2); +static int be_roce_map_pci_bars(struct be_adapter *adapter) +{ + if (skyhawk_chip(adapter)) { + adapter->roce_db.size = 4096; + adapter->roce_db.io_addr = pci_resource_start(adapter->pdev, + db_bar(adapter)); + adapter->roce_db.total_size = pci_resource_len(adapter->pdev, + db_bar(adapter)); + } return 0; } static int be_map_pci_bars(struct be_adapter *adapter) { u8 __iomem *addr; - int db_reg; + u32 sli_intf; - if (lancer_chip(adapter)) { - if (be_type_2_3(adapter)) { - addr = ioremap_nocache( - pci_resource_start(adapter->pdev, 0), - pci_resource_len(adapter->pdev, 0)); - if (addr == NULL) - return -ENOMEM; - adapter->db = addr; - } - if (adapter->if_type == SLI_INTF_TYPE_3) { - if (lancer_roce_map_pci_bars(adapter)) - goto pci_map_err; - } - return 0; - } - - if (be_physfn(adapter)) { - addr = ioremap_nocache(pci_resource_start(adapter->pdev, 2), - pci_resource_len(adapter->pdev, 2)); - if (addr == NULL) - return -ENOMEM; - adapter->csr = addr; - } + pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); + adapter->if_type = (sli_intf & SLI_INTF_IF_TYPE_MASK) >> + SLI_INTF_IF_TYPE_SHIFT; - if (adapter->generation == BE_GEN2) { - db_reg = 4; - } else { - if (be_physfn(adapter)) - db_reg = 4; - else - db_reg = 0; - } - addr = ioremap_nocache(pci_resource_start(adapter->pdev, db_reg), - pci_resource_len(adapter->pdev, db_reg)); + addr = pci_iomap(adapter->pdev, db_bar(adapter), 0); if (addr == NULL) goto pci_map_err; adapter->db = addr; - if (adapter->sli_family == SKYHAWK_SLI_FAMILY) { - adapter->roce_db.size = 4096; - adapter->roce_db.io_addr = - pci_resource_start(adapter->pdev, db_reg); - adapter->roce_db.total_size = - pci_resource_len(adapter->pdev, db_reg); - } + + be_roce_map_pci_bars(adapter); return 0; + pci_map_err: be_unmap_pci_bars(adapter); return -ENOMEM; @@ -3437,7 +3677,6 @@ static void be_ctrl_cleanup(struct be_adapter *adapter) if (mem->va) dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, mem->dma); - kfree(adapter->pmac_id); } static int be_ctrl_init(struct be_adapter *adapter) @@ -3445,8 +3684,14 @@ static int be_ctrl_init(struct be_adapter *adapter) struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced; struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem; struct be_dma_mem *rx_filter = &adapter->rx_filter; + u32 sli_intf; int status; + pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); + adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >> + SLI_INTF_FAMILY_SHIFT; + adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; + status = be_map_pci_bars(adapter); if (status) goto done; @@ -3473,13 +3718,6 @@ static int be_ctrl_init(struct be_adapter *adapter) goto free_mbox; } memset(rx_filter->va, 0, rx_filter->size); - - /* primary mac needs 1 pmac entry */ - adapter->pmac_id = kcalloc(adapter->max_pmac_cnt + 1, - sizeof(*adapter->pmac_id), GFP_KERNEL); - if (!adapter->pmac_id) - return -ENOMEM; - mutex_init(&adapter->mbox_lock); spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); @@ -3512,14 +3750,14 @@ static int be_stats_init(struct be_adapter *adapter) { struct be_dma_mem *cmd = &adapter->stats_cmd; - if (adapter->generation == BE_GEN2) { + if (lancer_chip(adapter)) + cmd->size = sizeof(struct lancer_cmd_req_pport_stats); + else if (BE2_chip(adapter)) cmd->size = sizeof(struct be_cmd_req_get_stats_v0); - } else { - if (lancer_chip(adapter)) - cmd->size = sizeof(struct lancer_cmd_req_pport_stats); - else - cmd->size = sizeof(struct be_cmd_req_get_stats_v1); - } + else + /* BE3 and Skyhawk */ + cmd->size = sizeof(struct be_cmd_req_get_stats_v1); + cmd->va = dma_alloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma, GFP_KERNEL); if (cmd->va == NULL) @@ -3573,6 +3811,9 @@ u32 be_get_fw_log_level(struct be_adapter *adapter) u32 level = 0; int j; + if (lancer_chip(adapter)) + return 0; + memset(&extfat_cmd, 0, sizeof(struct be_dma_mem)); extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps); extfat_cmd.va = pci_alloc_consistent(adapter->pdev, extfat_cmd.size, @@ -3598,26 +3839,12 @@ u32 be_get_fw_log_level(struct be_adapter *adapter) err: return level; } + static int be_get_initial_config(struct be_adapter *adapter) { int status; u32 level; - status = be_cmd_query_fw_cfg(adapter, &adapter->port_num, - &adapter->function_mode, &adapter->function_caps); - if (status) - return status; - - if (adapter->function_mode & FLEX10_MODE) - adapter->max_vlans = BE_NUM_VLANS_SUPPORTED/8; - else - adapter->max_vlans = BE_NUM_VLANS_SUPPORTED; - - if (be_physfn(adapter)) - adapter->max_pmac_cnt = BE_UC_PMAC_COUNT; - else - adapter->max_pmac_cnt = BE_VF_UC_PMAC_COUNT; - status = be_cmd_get_cntl_attributes(adapter); if (status) return status; @@ -3642,55 +3869,6 @@ static int be_get_initial_config(struct be_adapter *adapter) return 0; } -static int be_dev_type_check(struct be_adapter *adapter) -{ - struct pci_dev *pdev = adapter->pdev; - u32 sli_intf = 0, if_type; - - switch (pdev->device) { - case BE_DEVICE_ID1: - case OC_DEVICE_ID1: - adapter->generation = BE_GEN2; - break; - case BE_DEVICE_ID2: - case OC_DEVICE_ID2: - adapter->generation = BE_GEN3; - break; - case OC_DEVICE_ID3: - case OC_DEVICE_ID4: - pci_read_config_dword(pdev, SLI_INTF_REG_OFFSET, &sli_intf); - adapter->if_type = (sli_intf & SLI_INTF_IF_TYPE_MASK) >> - SLI_INTF_IF_TYPE_SHIFT; - if_type = (sli_intf & SLI_INTF_IF_TYPE_MASK) >> - SLI_INTF_IF_TYPE_SHIFT; - if (((sli_intf & SLI_INTF_VALID_MASK) != SLI_INTF_VALID) || - !be_type_2_3(adapter)) { - dev_err(&pdev->dev, "SLI_INTF reg val is not valid\n"); - return -EINVAL; - } - adapter->sli_family = ((sli_intf & SLI_INTF_FAMILY_MASK) >> - SLI_INTF_FAMILY_SHIFT); - adapter->generation = BE_GEN3; - break; - case OC_DEVICE_ID5: - pci_read_config_dword(pdev, SLI_INTF_REG_OFFSET, &sli_intf); - if ((sli_intf & SLI_INTF_VALID_MASK) != SLI_INTF_VALID) { - dev_err(&pdev->dev, "SLI_INTF reg val is not valid\n"); - return -EINVAL; - } - adapter->sli_family = ((sli_intf & SLI_INTF_FAMILY_MASK) >> - SLI_INTF_FAMILY_SHIFT); - adapter->generation = BE_GEN3; - break; - default: - adapter->generation = 0; - } - - pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); - adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; - return 0; -} - static int lancer_recover_func(struct be_adapter *adapter) { int status; @@ -3721,8 +3899,9 @@ static int lancer_recover_func(struct be_adapter *adapter) "Adapter SLIPORT recovery succeeded\n"); return 0; err: - dev_err(&adapter->pdev->dev, - "Adapter SLIPORT recovery failed\n"); + if (adapter->eeh_error) + dev_err(&adapter->pdev->dev, + "Adapter SLIPORT recovery failed\n"); return status; } @@ -3845,11 +4024,6 @@ static int __devinit be_probe(struct pci_dev *pdev, adapter = netdev_priv(netdev); adapter->pdev = pdev; pci_set_drvdata(pdev, adapter); - - status = be_dev_type_check(adapter); - if (status) - goto free_netdev; - adapter->netdev = netdev; SET_NETDEV_DEV(netdev, &pdev->dev); @@ -4023,9 +4197,6 @@ static void be_shutdown(struct pci_dev *pdev) netif_device_detach(adapter->netdev); - if (adapter->wol) - be_setup_wol(adapter, true); - be_cmd_reset_function(adapter); pci_disable_device(pdev); @@ -4061,9 +4232,13 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev, /* The error could cause the FW to trigger a flash debug dump. * Resetting the card while flash dump is in progress - * can cause it not to recover; wait for it to finish + * can cause it not to recover; wait for it to finish. + * Wait only for first function as it is needed only once per + * adapter. */ - ssleep(30); + if (pdev->devfn == 0) + ssleep(30); + return PCI_ERS_RESULT_NEED_RESET; } diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c index deecc44b361..55d32aa0a09 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.c +++ b/drivers/net/ethernet/emulex/benet/be_roce.c @@ -47,10 +47,7 @@ static void _be_roce_dev_add(struct be_adapter *adapter) dev_info.dpp_unmapped_len = 0; } dev_info.pdev = adapter->pdev; - if (adapter->sli_family == SKYHAWK_SLI_FAMILY) - dev_info.db = adapter->db; - else - dev_info.db = adapter->roce_db.base; + dev_info.db = adapter->db; dev_info.unmapped_db = adapter->roce_db.io_addr; dev_info.db_page_size = adapter->roce_db.size; dev_info.db_total_size = adapter->roce_db.total_size; diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index feff51664dc..5ba6e1cbd34 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -92,4 +92,13 @@ config GIANFAR This driver supports the Gigabit TSEC on the MPC83xx, MPC85xx, and MPC86xx family of chips, and the FEC on the 8540. +config FEC_PTP + bool "PTP Hardware Clock (PHC)" + depends on FEC && ARCH_MXC + select PTP_1588_CLOCK + default y if SOC_IMX6Q + --help--- + Say Y here if you want to use PTP Hardware Clock (PHC) in the + driver. Only the basic clock operations have been implemented. + endif # NET_VENDOR_FREESCALE diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index 3d1839afff6..d4d19b3d00a 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_FEC) += fec.o +obj-$(CONFIG_FEC_PTP) += fec_ptp.o obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx.o ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y) obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx_phy.o diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c index fffd20528b5..2665162ff4e 100644 --- a/drivers/net/ethernet/freescale/fec.c +++ b/drivers/net/ethernet/freescale/fec.c @@ -140,21 +140,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #endif #endif /* CONFIG_M5272 */ -/* The number of Tx and Rx buffers. These are allocated from the page - * pool. The code may assume these are power of two, so it it best - * to keep them that size. - * We don't need to allocate pages for the transmitter. We just use - * the skbuffer directly. - */ -#define FEC_ENET_RX_PAGES 8 -#define FEC_ENET_RX_FRSIZE 2048 -#define FEC_ENET_RX_FRPPG (PAGE_SIZE / FEC_ENET_RX_FRSIZE) -#define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) -#define FEC_ENET_TX_FRSIZE 2048 -#define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) -#define TX_RING_SIZE 16 /* Must be power of two */ -#define TX_RING_MOD_MASK 15 /* for this to work */ - #if (((RX_RING_SIZE + TX_RING_SIZE) * 8) > PAGE_SIZE) #error "FEC: descriptor ring size constants too large" #endif @@ -179,9 +164,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #define PKT_MINBUF_SIZE 64 #define PKT_MAXBLR_SIZE 1520 -/* This device has up to three irqs on some platforms */ -#define FEC_IRQ_NUM 3 - /* * The 5270/5271/5280/5282/532x RX control register also contains maximum frame * size bits. Other FEC hardware does not, so we need to take that into @@ -194,61 +176,6 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #define OPT_FRAME_SIZE 0 #endif -/* The FEC buffer descriptors track the ring buffers. The rx_bd_base and - * tx_bd_base always point to the base of the buffer descriptors. The - * cur_rx and cur_tx point to the currently available buffer. - * The dirty_tx tracks the current buffer that is being sent by the - * controller. The cur_tx and dirty_tx are equal under both completely - * empty and completely full conditions. The empty/ready indicator in - * the buffer descriptor determines the actual condition. - */ -struct fec_enet_private { - /* Hardware registers of the FEC device */ - void __iomem *hwp; - - struct net_device *netdev; - - struct clk *clk_ipg; - struct clk *clk_ahb; - - /* The saved address of a sent-in-place packet/buffer, for skfree(). */ - unsigned char *tx_bounce[TX_RING_SIZE]; - struct sk_buff* tx_skbuff[TX_RING_SIZE]; - struct sk_buff* rx_skbuff[RX_RING_SIZE]; - ushort skb_cur; - ushort skb_dirty; - - /* CPM dual port RAM relative addresses */ - dma_addr_t bd_dma; - /* Address of Rx and Tx buffers */ - struct bufdesc *rx_bd_base; - struct bufdesc *tx_bd_base; - /* The next free ring entry */ - struct bufdesc *cur_rx, *cur_tx; - /* The ring entries to be free()ed */ - struct bufdesc *dirty_tx; - - uint tx_full; - /* hold while accessing the HW like ringbuffer for tx/rx but not MAC */ - spinlock_t hw_lock; - - struct platform_device *pdev; - - int opened; - int dev_id; - - /* Phylib and MDIO interface */ - struct mii_bus *mii_bus; - struct phy_device *phy_dev; - int mii_timeout; - uint phy_speed; - phy_interface_t phy_interface; - int link; - int full_duplex; - struct completion mdio_done; - int irq[FEC_IRQ_NUM]; -}; - /* FEC MII MMFR bits definition */ #define FEC_MMFR_ST (1 << 30) #define FEC_MMFR_OP_READ (2 << 28) @@ -353,6 +280,17 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) | BD_ENET_TX_LAST | BD_ENET_TX_TC); bdp->cbd_sc = status; +#ifdef CONFIG_FEC_PTP + bdp->cbd_bdu = 0; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + fep->hwts_tx_en)) { + bdp->cbd_esc = (BD_ENET_TX_TS | BD_ENET_TX_INT); + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + } else { + + bdp->cbd_esc = BD_ENET_TX_INT; + } +#endif /* Trigger transmission start */ writel(0, fep->hwp + FEC_X_DES_ACTIVE); @@ -510,10 +448,17 @@ fec_restart(struct net_device *ndev, int duplex) writel(1 << 8, fep->hwp + FEC_X_WMRK); } +#ifdef CONFIG_FEC_PTP + ecntl |= (1 << 4); +#endif + /* And last, enable the transmit and receive processing */ writel(ecntl, fep->hwp + FEC_ECNTRL); writel(0, fep->hwp + FEC_R_DES_ACTIVE); +#ifdef CONFIG_FEC_PTP + fec_ptp_start_cyclecounter(ndev); +#endif /* Enable interrupts we wish to service */ writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK); } @@ -599,6 +544,19 @@ fec_enet_tx(struct net_device *ndev) ndev->stats.tx_packets++; } +#ifdef CONFIG_FEC_PTP + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + struct skb_shared_hwtstamps shhwtstamps; + unsigned long flags; + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + spin_lock_irqsave(&fep->tmreg_lock, flags); + shhwtstamps.hwtstamp = ns_to_ktime( + timecounter_cyc2time(&fep->tc, bdp->ts)); + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + skb_tstamp_tx(skb, &shhwtstamps); + } +#endif if (status & BD_ENET_TX_READY) printk("HEY! Enet xmit interrupt and TX_READY.\n"); @@ -725,6 +683,21 @@ fec_enet_rx(struct net_device *ndev) skb_put(skb, pkt_len - 4); /* Make room */ skb_copy_to_linear_data(skb, data, pkt_len - 4); skb->protocol = eth_type_trans(skb, ndev); +#ifdef CONFIG_FEC_PTP + /* Get receive timestamp from the skb */ + if (fep->hwts_rx_en) { + struct skb_shared_hwtstamps *shhwtstamps = + skb_hwtstamps(skb); + unsigned long flags; + + memset(shhwtstamps, 0, sizeof(*shhwtstamps)); + + spin_lock_irqsave(&fep->tmreg_lock, flags); + shhwtstamps->hwtstamp = ns_to_ktime( + timecounter_cyc2time(&fep->tc, bdp->ts)); + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + } +#endif if (!skb_defer_rx_timestamp(skb)) netif_rx(skb); } @@ -739,6 +712,12 @@ rx_processing_done: status |= BD_ENET_RX_EMPTY; bdp->cbd_sc = status; +#ifdef CONFIG_FEC_PTP + bdp->cbd_esc = BD_ENET_RX_INT; + bdp->cbd_prot = 0; + bdp->cbd_bdu = 0; +#endif + /* Update BD pointer to next entry */ if (status & BD_ENET_RX_WRAP) bdp = fep->rx_bd_base; @@ -1178,6 +1157,10 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) if (!phydev) return -ENODEV; +#ifdef CONFIG_FEC_PTP + if (cmd == SIOCSHWTSTAMP) + return fec_ptp_ioctl(ndev, rq, cmd); +#endif return phy_mii_ioctl(phydev, rq, cmd); } @@ -1224,6 +1207,9 @@ static int fec_enet_alloc_buffers(struct net_device *ndev) bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, skb->data, FEC_ENET_RX_FRSIZE, DMA_FROM_DEVICE); bdp->cbd_sc = BD_ENET_RX_EMPTY; +#ifdef CONFIG_FEC_PTP + bdp->cbd_esc = BD_ENET_RX_INT; +#endif bdp++; } @@ -1237,6 +1223,10 @@ static int fec_enet_alloc_buffers(struct net_device *ndev) bdp->cbd_sc = 0; bdp->cbd_bufaddr = 0; + +#ifdef CONFIG_FEC_PTP + bdp->cbd_esc = BD_ENET_RX_INT; +#endif bdp++; } @@ -1638,9 +1628,19 @@ fec_probe(struct platform_device *pdev) goto failed_clk; } +#ifdef CONFIG_FEC_PTP + fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp"); + if (IS_ERR(fep->clk_ptp)) { + ret = PTR_ERR(fep->clk_ptp); + goto failed_clk; + } +#endif + clk_prepare_enable(fep->clk_ahb); clk_prepare_enable(fep->clk_ipg); - +#ifdef CONFIG_FEC_PTP + clk_prepare_enable(fep->clk_ptp); +#endif reg_phy = devm_regulator_get(&pdev->dev, "phy"); if (!IS_ERR(reg_phy)) { ret = regulator_enable(reg_phy); @@ -1668,6 +1668,10 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_register; +#ifdef CONFIG_FEC_PTP + fec_ptp_init(ndev, pdev); +#endif + return 0; failed_register: @@ -1677,6 +1681,9 @@ failed_init: failed_regulator: clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); +#ifdef CONFIG_FEC_PTP + clk_disable_unprepare(fep->clk_ptp); +#endif failed_pin: failed_clk: for (i = 0; i < FEC_IRQ_NUM; i++) { @@ -1709,6 +1716,12 @@ fec_drv_remove(struct platform_device *pdev) if (irq > 0) free_irq(irq, ndev); } +#ifdef CONFIG_FEC_PTP + del_timer_sync(&fep->time_keep); + clk_disable_unprepare(fep->clk_ptp); + if (fep->ptp_clock) + ptp_clock_unregister(fep->ptp_clock); +#endif clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); iounmap(fep->hwp); diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 8408c627b19..c5a3bc1475c 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -13,6 +13,12 @@ #define FEC_H /****************************************************************************/ +#ifdef CONFIG_FEC_PTP +#include <linux/clocksource.h> +#include <linux/net_tstamp.h> +#include <linux/ptp_clock_kernel.h> +#endif + #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ defined(CONFIG_M520x) || defined(CONFIG_M532x) || \ defined(CONFIG_ARCH_MXC) || defined(CONFIG_SOC_IMX28) @@ -88,6 +94,13 @@ struct bufdesc { unsigned short cbd_datlen; /* Data length */ unsigned short cbd_sc; /* Control and status info */ unsigned long cbd_bufaddr; /* Buffer address */ +#ifdef CONFIG_FEC_PTP + unsigned long cbd_esc; + unsigned long cbd_prot; + unsigned long cbd_bdu; + unsigned long ts; + unsigned short res0[4]; +#endif }; #else struct bufdesc { @@ -147,6 +160,112 @@ struct bufdesc { #define BD_ENET_TX_CSL ((ushort)0x0001) #define BD_ENET_TX_STATS ((ushort)0x03ff) /* All status bits */ +/*enhanced buffer desciptor control/status used by Ethernet transmit*/ +#define BD_ENET_TX_INT 0x40000000 +#define BD_ENET_TX_TS 0x20000000 + + +/* This device has up to three irqs on some platforms */ +#define FEC_IRQ_NUM 3 + +/* The number of Tx and Rx buffers. These are allocated from the page + * pool. The code may assume these are power of two, so it it best + * to keep them that size. + * We don't need to allocate pages for the transmitter. We just use + * the skbuffer directly. + */ + +#define FEC_ENET_RX_PAGES 8 +#define FEC_ENET_RX_FRSIZE 2048 +#define FEC_ENET_RX_FRPPG (PAGE_SIZE / FEC_ENET_RX_FRSIZE) +#define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) +#define FEC_ENET_TX_FRSIZE 2048 +#define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) +#define TX_RING_SIZE 16 /* Must be power of two */ +#define TX_RING_MOD_MASK 15 /* for this to work */ + +#define BD_ENET_RX_INT 0x00800000 +#define BD_ENET_RX_PTP ((ushort)0x0400) + +/* The FEC buffer descriptors track the ring buffers. The rx_bd_base and + * tx_bd_base always point to the base of the buffer descriptors. The + * cur_rx and cur_tx point to the currently available buffer. + * The dirty_tx tracks the current buffer that is being sent by the + * controller. The cur_tx and dirty_tx are equal under both completely + * empty and completely full conditions. The empty/ready indicator in + * the buffer descriptor determines the actual condition. + */ +struct fec_enet_private { + /* Hardware registers of the FEC device */ + void __iomem *hwp; + + struct net_device *netdev; + + struct clk *clk_ipg; + struct clk *clk_ahb; +#ifdef CONFIG_FEC_PTP + struct clk *clk_ptp; +#endif + + /* The saved address of a sent-in-place packet/buffer, for skfree(). */ + unsigned char *tx_bounce[TX_RING_SIZE]; + struct sk_buff *tx_skbuff[TX_RING_SIZE]; + struct sk_buff *rx_skbuff[RX_RING_SIZE]; + ushort skb_cur; + ushort skb_dirty; + + /* CPM dual port RAM relative addresses */ + dma_addr_t bd_dma; + /* Address of Rx and Tx buffers */ + struct bufdesc *rx_bd_base; + struct bufdesc *tx_bd_base; + /* The next free ring entry */ + struct bufdesc *cur_rx, *cur_tx; + /* The ring entries to be free()ed */ + struct bufdesc *dirty_tx; + + uint tx_full; + /* hold while accessing the HW like ringbuffer for tx/rx but not MAC */ + spinlock_t hw_lock; + + struct platform_device *pdev; + + int opened; + int dev_id; + + /* Phylib and MDIO interface */ + struct mii_bus *mii_bus; + struct phy_device *phy_dev; + int mii_timeout; + uint phy_speed; + phy_interface_t phy_interface; + int link; + int full_duplex; + struct completion mdio_done; + int irq[FEC_IRQ_NUM]; + +#ifdef CONFIG_FEC_PTP + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_caps; + unsigned long last_overflow_check; + spinlock_t tmreg_lock; + struct cyclecounter cc; + struct timecounter tc; + int rx_hwtstamp_filter; + u32 base_incval; + u32 cycle_speed; + int hwts_rx_en; + int hwts_tx_en; + struct timer_list time_keep; +#endif + +}; + +#ifdef CONFIG_FEC_PTP +void fec_ptp_init(struct net_device *ndev, struct platform_device *pdev); +void fec_ptp_start_cyclecounter(struct net_device *ndev); +int fec_ptp_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd); +#endif /****************************************************************************/ #endif /* FEC_H */ diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c new file mode 100644 index 00000000000..c40526c78c2 --- /dev/null +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -0,0 +1,383 @@ +/* + * Fast Ethernet Controller (ENET) PTP driver for MX6x. + * + * Copyright (C) 2012 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/ptrace.h> +#include <linux/errno.h> +#include <linux/ioport.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/bitops.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/clk.h> +#include <linux/platform_device.h> +#include <linux/phy.h> +#include <linux/fec.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_gpio.h> +#include <linux/of_net.h> + +#include "fec.h" + +/* FEC 1588 register bits */ +#define FEC_T_CTRL_SLAVE 0x00002000 +#define FEC_T_CTRL_CAPTURE 0x00000800 +#define FEC_T_CTRL_RESTART 0x00000200 +#define FEC_T_CTRL_PERIOD_RST 0x00000030 +#define FEC_T_CTRL_PERIOD_EN 0x00000010 +#define FEC_T_CTRL_ENABLE 0x00000001 + +#define FEC_T_INC_MASK 0x0000007f +#define FEC_T_INC_OFFSET 0 +#define FEC_T_INC_CORR_MASK 0x00007f00 +#define FEC_T_INC_CORR_OFFSET 8 + +#define FEC_ATIME_CTRL 0x400 +#define FEC_ATIME 0x404 +#define FEC_ATIME_EVT_OFFSET 0x408 +#define FEC_ATIME_EVT_PERIOD 0x40c +#define FEC_ATIME_CORR 0x410 +#define FEC_ATIME_INC 0x414 +#define FEC_TS_TIMESTAMP 0x418 + +#define FEC_CC_MULT (1 << 31) +/** + * fec_ptp_read - read raw cycle counter (to be used by time counter) + * @cc: the cyclecounter structure + * + * this function reads the cyclecounter registers and is called by the + * cyclecounter structure used to construct a ns counter from the + * arbitrary fixed point registers + */ +static cycle_t fec_ptp_read(const struct cyclecounter *cc) +{ + struct fec_enet_private *fep = + container_of(cc, struct fec_enet_private, cc); + u32 tempval; + + tempval = readl(fep->hwp + FEC_ATIME_CTRL); + tempval |= FEC_T_CTRL_CAPTURE; + writel(tempval, fep->hwp + FEC_ATIME_CTRL); + + return readl(fep->hwp + FEC_ATIME); +} + +/** + * fec_ptp_start_cyclecounter - create the cycle counter from hw + * @ndev: network device + * + * this function initializes the timecounter and cyclecounter + * structures for use in generated a ns counter from the arbitrary + * fixed point cycles registers in the hardware. + */ +void fec_ptp_start_cyclecounter(struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + unsigned long flags; + int inc; + + inc = 1000000000 / clk_get_rate(fep->clk_ptp); + + /* grab the ptp lock */ + spin_lock_irqsave(&fep->tmreg_lock, flags); + + /* 1ns counter */ + writel(inc << FEC_T_INC_OFFSET, fep->hwp + FEC_ATIME_INC); + + /* use free running count */ + writel(0, fep->hwp + FEC_ATIME_EVT_PERIOD); + + writel(FEC_T_CTRL_ENABLE, fep->hwp + FEC_ATIME_CTRL); + + memset(&fep->cc, 0, sizeof(fep->cc)); + fep->cc.read = fec_ptp_read; + fep->cc.mask = CLOCKSOURCE_MASK(32); + fep->cc.shift = 31; + fep->cc.mult = FEC_CC_MULT; + + /* reset the ns time counter */ + timecounter_init(&fep->tc, &fep->cc, ktime_to_ns(ktime_get_real())); + + spin_unlock_irqrestore(&fep->tmreg_lock, flags); +} + +/** + * fec_ptp_adjfreq - adjust ptp cycle frequency + * @ptp: the ptp clock structure + * @ppb: parts per billion adjustment from base + * + * Adjust the frequency of the ptp cycle counter by the + * indicated ppb from the base frequency. + * + * Because ENET hardware frequency adjust is complex, + * using software method to do that. + */ +static int fec_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +{ + u64 diff; + unsigned long flags; + int neg_adj = 0; + u32 mult = FEC_CC_MULT; + + struct fec_enet_private *fep = + container_of(ptp, struct fec_enet_private, ptp_caps); + + if (ppb < 0) { + ppb = -ppb; + neg_adj = 1; + } + + diff = mult; + diff *= ppb; + diff = div_u64(diff, 1000000000ULL); + + spin_lock_irqsave(&fep->tmreg_lock, flags); + /* + * dummy read to set cycle_last in tc to now. + * So use adjusted mult to calculate when next call + * timercounter_read. + */ + timecounter_read(&fep->tc); + + fep->cc.mult = neg_adj ? mult - diff : mult + diff; + + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + + return 0; +} + +/** + * fec_ptp_adjtime + * @ptp: the ptp clock structure + * @delta: offset to adjust the cycle counter by + * + * adjust the timer by resetting the timecounter structure. + */ +static int fec_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct fec_enet_private *fep = + container_of(ptp, struct fec_enet_private, ptp_caps); + unsigned long flags; + u64 now; + + spin_lock_irqsave(&fep->tmreg_lock, flags); + + now = timecounter_read(&fep->tc); + now += delta; + + /* reset the timecounter */ + timecounter_init(&fep->tc, &fep->cc, now); + + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + + return 0; +} + +/** + * fec_ptp_gettime + * @ptp: the ptp clock structure + * @ts: timespec structure to hold the current time value + * + * read the timecounter and return the correct value on ns, + * after converting it into a struct timespec. + */ +static int fec_ptp_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +{ + struct fec_enet_private *adapter = + container_of(ptp, struct fec_enet_private, ptp_caps); + u64 ns; + u32 remainder; + unsigned long flags; + + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_read(&adapter->tc); + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + ts->tv_sec = div_u64_rem(ns, 1000000000ULL, &remainder); + ts->tv_nsec = remainder; + + return 0; +} + +/** + * fec_ptp_settime + * @ptp: the ptp clock structure + * @ts: the timespec containing the new time for the cycle counter + * + * reset the timecounter to use a new base value instead of the kernel + * wall timer value. + */ +static int fec_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct fec_enet_private *fep = + container_of(ptp, struct fec_enet_private, ptp_caps); + + u64 ns; + unsigned long flags; + + ns = ts->tv_sec * 1000000000ULL; + ns += ts->tv_nsec; + + spin_lock_irqsave(&fep->tmreg_lock, flags); + timecounter_init(&fep->tc, &fep->cc, ns); + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + return 0; +} + +/** + * fec_ptp_enable + * @ptp: the ptp clock structure + * @rq: the requested feature to change + * @on: whether to enable or disable the feature + * + */ +static int fec_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +/** + * fec_ptp_hwtstamp_ioctl - control hardware time stamping + * @ndev: pointer to net_device + * @ifreq: ioctl data + * @cmd: particular ioctl requested + */ +int fec_ptp_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + struct hwtstamp_config config; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + fep->hwts_tx_en = 0; + break; + case HWTSTAMP_TX_ON: + fep->hwts_tx_en = 1; + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + if (fep->hwts_rx_en) + fep->hwts_rx_en = 0; + config.rx_filter = HWTSTAMP_FILTER_NONE; + break; + + default: + /* + * register RXMTRL must be set in order to do V1 packets, + * therefore it is not possible to time stamp both V1 Sync and + * Delay_Req messages and hardware does not support + * timestamping all packets => return error + */ + fep->hwts_rx_en = 1; + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + } + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +/** + * fec_time_keep - call timecounter_read every second to avoid timer overrun + * because ENET just support 32bit counter, will timeout in 4s + */ +static void fec_time_keep(unsigned long _data) +{ + struct fec_enet_private *fep = (struct fec_enet_private *)_data; + u64 ns; + unsigned long flags; + + spin_lock_irqsave(&fep->tmreg_lock, flags); + ns = timecounter_read(&fep->tc); + spin_unlock_irqrestore(&fep->tmreg_lock, flags); + + mod_timer(&fep->time_keep, jiffies + HZ); +} + +/** + * fec_ptp_init + * @ndev: The FEC network adapter + * + * This function performs the required steps for enabling ptp + * support. If ptp support has already been loaded it simply calls the + * cyclecounter init routine and exits. + */ + +void fec_ptp_init(struct net_device *ndev, struct platform_device *pdev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + + fep->ptp_caps.owner = THIS_MODULE; + snprintf(fep->ptp_caps.name, 16, "fec ptp"); + + fep->ptp_caps.max_adj = 250000000; + fep->ptp_caps.n_alarm = 0; + fep->ptp_caps.n_ext_ts = 0; + fep->ptp_caps.n_per_out = 0; + fep->ptp_caps.pps = 0; + fep->ptp_caps.adjfreq = fec_ptp_adjfreq; + fep->ptp_caps.adjtime = fec_ptp_adjtime; + fep->ptp_caps.gettime = fec_ptp_gettime; + fep->ptp_caps.settime = fec_ptp_settime; + fep->ptp_caps.enable = fec_ptp_enable; + + spin_lock_init(&fep->tmreg_lock); + + fec_ptp_start_cyclecounter(ndev); + + init_timer(&fep->time_keep); + fep->time_keep.data = (unsigned long)fep; + fep->time_keep.function = fec_time_keep; + fep->time_keep.expires = jiffies + HZ; + add_timer(&fep->time_keep); + + fep->ptp_clock = ptp_clock_register(&fep->ptp_caps, &pdev->dev); + if (IS_ERR(fep->ptp_clock)) { + fep->ptp_clock = NULL; + pr_err("ptp_clock_register failed\n"); + } else { + pr_info("registered PHC device on %s\n", ndev->name); + } +} diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 19ac096cb07..bffb2edd685 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -210,7 +210,7 @@ static int gfar_init_bds(struct net_device *ndev) skb = gfar_new_skb(ndev); if (!skb) { netdev_err(ndev, "Can't allocate RX buffers\n"); - goto err_rxalloc_fail; + return -ENOMEM; } rx_queue->rx_skbuff[j] = skb; @@ -223,10 +223,6 @@ static int gfar_init_bds(struct net_device *ndev) } return 0; - -err_rxalloc_fail: - free_skb_resources(priv); - return -ENOMEM; } static int gfar_alloc_skb_resources(struct net_device *ndev) @@ -1359,7 +1355,11 @@ static int gfar_restore(struct device *dev) return 0; } - gfar_init_bds(ndev); + if (gfar_init_bds(ndev)) { + free_skb_resources(priv); + return -ENOMEM; + } + init_registers(ndev); gfar_set_mac_address(ndev); gfar_init_mac(ndev); @@ -1712,6 +1712,7 @@ static void free_skb_tx_queue(struct gfar_priv_tx_q *tx_queue) tx_queue->tx_skbuff[i] = NULL; } kfree(tx_queue->tx_skbuff); + tx_queue->tx_skbuff = NULL; } static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue) @@ -1735,6 +1736,7 @@ static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue) rxbdp++; } kfree(rx_queue->rx_skbuff); + rx_queue->rx_skbuff = NULL; } /* If there are any tx skbs or rx skbs still around, free them. diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 479e43e2f1e..84c6b6cf9c1 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -738,13 +738,11 @@ static int __devexit mal_remove(struct platform_device *ofdev) /* Synchronize with scheduled polling */ napi_disable(&mal->napi); - if (!list_empty(&mal->list)) { + if (!list_empty(&mal->list)) /* This is *very* bad */ - printk(KERN_EMERG + WARN(1, KERN_EMERG "mal%d: commac list is not empty on remove!\n", mal->index); - WARN_ON(1); - } dev_set_drvdata(&ofdev->dev, NULL); diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 0cafe4fe940..73d28d51b5d 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -93,6 +93,7 @@ config E1000E config IGB tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support" depends on PCI + select PTP_1588_CLOCK ---help--- This driver supports Intel(R) 82575/82576 gigabit ethernet family of adapters. For more information on how to identify your adapter, go @@ -120,19 +121,6 @@ config IGB_DCA driver. DCA is a method for warming the CPU cache before data is used, with the intent of lessening the impact of cache misses. -config IGB_PTP - bool "PTP Hardware Clock (PHC)" - default n - depends on IGB && EXPERIMENTAL - select PPS - select PTP_1588_CLOCK - ---help--- - Say Y here if you want to use PTP Hardware Clock (PHC) in the - driver. Only the basic clock operations have been implemented. - - Every timestamp and clock read operations must consult the - overflow counter to form a correct time value. - config IGBVF tristate "Intel(R) 82576 Virtual Function Ethernet support" depends on PCI @@ -180,6 +168,7 @@ config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI && INET select MDIO + select PTP_1588_CLOCK ---help--- This driver supports Intel(R) 10GbE PCI Express family of adapters. For more information on how to identify your adapter, go @@ -222,19 +211,6 @@ config IXGBE_DCB If unsure, say N. -config IXGBE_PTP - bool "PTP Clock Support" - default n - depends on IXGBE && EXPERIMENTAL - select PPS - select PTP_1588_CLOCK - ---help--- - Say Y here if you want support for 1588 Timestamping with a - PHC device, using the PTP 1588 Clock support. This is - required to enable timestamping support for the device. - - If unsure, say N. - config IXGBEVF tristate "Intel(R) 82599 Virtual Function Ethernet support" depends on PCI_MSI diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index 3d683952876..8fedd245153 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -107,6 +107,7 @@ u16 e1000_igp_cable_length_table[IGP01E1000_AGC_LENGTH_TABLE_SIZE] = { }; static DEFINE_SPINLOCK(e1000_eeprom_lock); +static DEFINE_SPINLOCK(e1000_phy_lock); /** * e1000_set_phy_type - Set the phy type member in the hw struct. @@ -2830,19 +2831,25 @@ static u16 e1000_shift_in_mdi_bits(struct e1000_hw *hw) s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 *phy_data) { u32 ret_val; + unsigned long flags; e_dbg("e1000_read_phy_reg"); + spin_lock_irqsave(&e1000_phy_lock, flags); + if ((hw->phy_type == e1000_phy_igp) && (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, (u16) reg_addr); - if (ret_val) + if (ret_val) { + spin_unlock_irqrestore(&e1000_phy_lock, flags); return ret_val; + } } ret_val = e1000_read_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr, phy_data); + spin_unlock_irqrestore(&e1000_phy_lock, flags); return ret_val; } @@ -2965,19 +2972,25 @@ static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr, s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 phy_data) { u32 ret_val; + unsigned long flags; e_dbg("e1000_write_phy_reg"); + spin_lock_irqsave(&e1000_phy_lock, flags); + if ((hw->phy_type == e1000_phy_igp) && (reg_addr > MAX_PHY_MULTI_PAGE_REG)) { ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT, (u16) reg_addr); - if (ret_val) + if (ret_val) { + spin_unlock_irqrestore(&e1000_phy_lock, flags); return ret_val; + } } ret_val = e1000_write_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr, phy_data); + spin_unlock_irqrestore(&e1000_phy_lock, flags); return ret_val; } diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index f444eb0b76d..dadb13be479 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5067,6 +5067,17 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } + /* + * The minimum packet size with TCTL.PSP set is 17 bytes so + * pad skb in order to meet this minimum size requirement + */ + if (unlikely(skb->len < 17)) { + if (skb_pad(skb, 17 - skb->len)) + return NETDEV_TX_OK; + skb->len = 17; + skb_set_tail_pointer(skb, 17); + } + mss = skb_shinfo(skb)->gso_size; if (mss) { u8 hdr_len; diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile index 97c197fd4a8..624476cfa72 100644 --- a/drivers/net/ethernet/intel/igb/Makefile +++ b/drivers/net/ethernet/intel/igb/Makefile @@ -34,6 +34,4 @@ obj-$(CONFIG_IGB) += igb.o igb-objs := igb_main.o igb_ethtool.o e1000_82575.o \ e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \ - e1000_i210.o - -igb-$(CONFIG_IGB_PTP) += igb_ptp.o + e1000_i210.o igb_ptp.o diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index ca4641e2f74..deb05970b9f 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -319,6 +319,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) nvm->ops.acquire = igb_acquire_nvm_i210; nvm->ops.release = igb_release_nvm_i210; nvm->ops.read = igb_read_nvm_srrd_i210; + nvm->ops.write = igb_write_nvm_srwr_i210; nvm->ops.valid_led_default = igb_valid_led_default_i210; break; case e1000_i211: @@ -2233,19 +2234,16 @@ s32 igb_set_eee_i350(struct e1000_hw *hw) /* enable or disable per user setting */ if (!(hw->dev_spec._82575.eee_disable)) { - ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | - E1000_IPCNFG_EEE_100M_AN); - eeer |= (E1000_EEER_TX_LPI_EN | - E1000_EEER_RX_LPI_EN | + u32 eee_su = rd32(E1000_EEE_SU); + + ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); + eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | E1000_EEER_LPI_FC); - /* keep the LPI clock running before EEE is enabled */ - if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { - u32 eee_su; - eee_su = rd32(E1000_EEE_SU); - eee_su &= ~E1000_EEE_SU_LPI_CLK_STP; - wr32(E1000_EEE_SU, eee_su); - } + /* This bit should not be set in normal operation. */ + if (eee_su & E1000_EEE_SU_LPI_CLK_STP) + hw_dbg("LPI Clock Stop Bit should not be set!\n"); + } else { ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index e85c453f542..44b76b3b681 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -172,10 +172,13 @@ struct e1000_adv_tx_context_desc { #define E1000_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* DCA Rx Desc enable */ #define E1000_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* DCA Rx Desc header enable */ #define E1000_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* DCA Rx Desc payload enable */ +#define E1000_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx rd Desc Relax Order */ #define E1000_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ #define E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ +#define E1000_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ #define E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */ +#define E1000_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ /* Additional DCA related definitions, note change in position of CPUID */ #define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */ diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index de4b41ec3c4..e647cff9a5e 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -636,6 +636,7 @@ /* NVM Word Offsets */ #define NVM_COMPAT 0x0003 #define NVM_ID_LED_SETTINGS 0x0004 /* SERDES output amplitude */ +#define NVM_VERSION 0x0005 #define NVM_INIT_CONTROL2_REG 0x000F #define NVM_INIT_CONTROL3_PORT_B 0x0014 #define NVM_INIT_CONTROL3_PORT_A 0x0024 @@ -653,6 +654,19 @@ #define NVM_LED_1_CFG 0x001C #define NVM_LED_0_2_CFG 0x001F +/* NVM version defines */ +#define NVM_ETRACK_WORD 0x0042 +#define NVM_COMB_VER_OFF 0x0083 +#define NVM_COMB_VER_PTR 0x003d +#define NVM_MAJOR_MASK 0xF000 +#define NVM_MINOR_MASK 0x0FF0 +#define NVM_BUILD_MASK 0x000F +#define NVM_COMB_VER_MASK 0x00FF +#define NVM_MAJOR_SHIFT 12 +#define NVM_MINOR_SHIFT 4 +#define NVM_COMB_VER_SHFT 8 +#define NVM_VER_INVALID 0xFFFF +#define NVM_ETRACK_SHIFT 16 #define E1000_NVM_CFG_DONE_PORT_0 0x040000 /* MNG config cycle done */ #define E1000_NVM_CFG_DONE_PORT_1 0x080000 /* ...for second port */ diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 77a5f939bc7..41474298d36 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -423,6 +423,100 @@ s32 igb_read_invm_i211(struct e1000_hw *hw, u16 address, u16 *data) } /** + * igb_read_invm_version - Reads iNVM version and image type + * @hw: pointer to the HW structure + * @invm_ver: version structure for the version read + * + * Reads iNVM version and image type. + **/ +s32 igb_read_invm_version(struct e1000_hw *hw, + struct e1000_fw_version *invm_ver) { + u32 *record = NULL; + u32 *next_record = NULL; + u32 i = 0; + u32 invm_dword = 0; + u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE / + E1000_INVM_RECORD_SIZE_IN_BYTES); + u32 buffer[E1000_INVM_SIZE]; + s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; + u16 version = 0; + + /* Read iNVM memory */ + for (i = 0; i < E1000_INVM_SIZE; i++) { + invm_dword = rd32(E1000_INVM_DATA_REG(i)); + buffer[i] = invm_dword; + } + + /* Read version number */ + for (i = 1; i < invm_blocks; i++) { + record = &buffer[invm_blocks - i]; + next_record = &buffer[invm_blocks - i + 1]; + + /* Check if we have first version location used */ + if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) { + version = 0; + status = E1000_SUCCESS; + break; + } + /* Check if we have second version location used */ + else if ((i == 1) && + ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) { + version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; + status = E1000_SUCCESS; + break; + } + /* Check if we have odd version location + * used and it is the last one used + */ + else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) && + ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) && + (i != 1))) { + version = (*next_record & E1000_INVM_VER_FIELD_TWO) + >> 13; + status = E1000_SUCCESS; + break; + } + /* Check if we have even version location + * used and it is the last one used + */ + else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) && + ((*record & 0x3) == 0)) { + version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; + status = E1000_SUCCESS; + break; + } + } + + if (status == E1000_SUCCESS) { + invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK) + >> E1000_INVM_MAJOR_SHIFT; + invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK; + } + /* Read Image Type */ + for (i = 1; i < invm_blocks; i++) { + record = &buffer[invm_blocks - i]; + next_record = &buffer[invm_blocks - i + 1]; + + /* Check if we have image type in first location used */ + if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) { + invm_ver->invm_img_type = 0; + status = E1000_SUCCESS; + break; + } + /* Check if we have image type in first location used */ + else if ((((*record & 0x3) == 0) && + ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) || + ((((*record & 0x3) != 0) && (i != 1)))) { + invm_ver->invm_img_type = + (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23; + status = E1000_SUCCESS; + break; + } + } + return status; +} + +/** * igb_validate_nvm_checksum_i210 - Validate EEPROM checksum * @hw: pointer to the HW structure * diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h index 5dc2bd3f50b..974d23584d7 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.h +++ b/drivers/net/ethernet/intel/igb/e1000_i210.h @@ -43,6 +43,8 @@ extern void igb_release_nvm_i210(struct e1000_hw *hw); extern s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data); extern s32 igb_read_nvm_i211(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +extern s32 igb_read_invm_version(struct e1000_hw *hw, + struct e1000_fw_version *invm_ver); #define E1000_STM_OPCODE 0xDB00 #define E1000_EEPROM_FLASH_SIZE_WORD 0x11 @@ -65,6 +67,15 @@ enum E1000_INVM_STRUCTURE_TYPE { #define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS 8 #define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS 1 +#define E1000_INVM_ULT_BYTES_SIZE 8 +#define E1000_INVM_RECORD_SIZE_IN_BYTES 4 +#define E1000_INVM_VER_FIELD_ONE 0x1FF8 +#define E1000_INVM_VER_FIELD_TWO 0x7FE000 +#define E1000_INVM_IMGTYPE_FIELD 0x1F800000 + +#define E1000_INVM_MAJOR_MASK 0x3F0 +#define E1000_INVM_MINOR_MASK 0xF +#define E1000_INVM_MAJOR_SHIFT 4 #define ID_LED_DEFAULT_I210 ((ID_LED_OFF1_ON2 << 8) | \ (ID_LED_OFF1_OFF2 << 4) | \ diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 819c145ac76..7acddfe9e6d 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -1391,6 +1391,10 @@ s32 igb_validate_mdi_setting(struct e1000_hw *hw) { s32 ret_val = 0; + /* All MDI settings are supported on 82580 and newer. */ + if (hw->mac.type >= e1000_82580) + goto out; + if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) { hw_dbg("Invalid MDI setting detected\n"); hw->phy.mdix = 1; diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h index cbddc4e51e3..e2b2c4b9c95 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.h +++ b/drivers/net/ethernet/intel/igb/e1000_mac.h @@ -33,6 +33,7 @@ #include "e1000_phy.h" #include "e1000_nvm.h" #include "e1000_defines.h" +#include "e1000_i210.h" /* * Functions that should not be called directly from drivers but can be used diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c index aa5fcdf3f35..7db3f80bcd5 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.c +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c @@ -710,3 +710,74 @@ s32 igb_update_nvm_checksum(struct e1000_hw *hw) out: return ret_val; } + +/** + * igb_get_fw_version - Get firmware version information + * @hw: pointer to the HW structure + * @fw_vers: pointer to output structure + * + * unsupported MAC types will return all 0 version structure + **/ +void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) +{ + u16 eeprom_verh, eeprom_verl, comb_verh, comb_verl, comb_offset; + u16 fw_version; + + memset(fw_vers, 0, sizeof(struct e1000_fw_version)); + + switch (hw->mac.type) { + case e1000_i211: + igb_read_invm_version(hw, fw_vers); + return; + case e1000_82575: + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i210: + break; + default: + return; + } + /* basic eeprom version numbers */ + hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); + fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) >> NVM_MAJOR_SHIFT; + fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK); + + /* etrack id */ + hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl); + hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh); + fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT) | eeprom_verl; + + switch (hw->mac.type) { + case e1000_i210: + case e1000_i350: + /* find combo image version */ + hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset); + if ((comb_offset != 0x0) && (comb_offset != NVM_VER_INVALID)) { + + hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset + + 1), 1, &comb_verh); + hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset), + 1, &comb_verl); + + /* get Option Rom version if it exists and is valid */ + if ((comb_verh && comb_verl) && + ((comb_verh != NVM_VER_INVALID) && + (comb_verl != NVM_VER_INVALID))) { + + fw_vers->or_valid = true; + fw_vers->or_major = + comb_verl >> NVM_COMB_VER_SHFT; + fw_vers->or_build = + ((comb_verl << NVM_COMB_VER_SHFT) + | (comb_verh >> NVM_COMB_VER_SHFT)); + fw_vers->or_patch = + comb_verh & NVM_COMB_VER_MASK; + } + } + break; + default: + break; + } + return; +} diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h index 825b0228cac..7012d458c6f 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.h +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h @@ -40,4 +40,20 @@ s32 igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 igb_validate_nvm_checksum(struct e1000_hw *hw); s32 igb_update_nvm_checksum(struct e1000_hw *hw); +struct e1000_fw_version { + u32 etrack_id; + u16 eep_major; + u16 eep_minor; + + u8 invm_major; + u8 invm_minor; + u8 invm_img_type; + + bool or_valid; + u16 or_major; + u16 or_build; + u16 or_patch; +}; +void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers); + #endif diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 3404bc79f4c..fe76004aca4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -1207,20 +1207,25 @@ s32 igb_phy_force_speed_duplex_m88(struct e1000_hw *hw) u16 phy_data; bool link; - /* - * Clear Auto-Crossover to force MDI manually. M88E1000 requires MDI - * forced whenever speed and duplex are forced. - */ - ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); - if (ret_val) - goto out; + /* I210 and I211 devices support Auto-Crossover in forced operation. */ + if (phy->type != e1000_phy_i210) { + /* + * Clear Auto-Crossover to force MDI manually. M88E1000 + * requires MDI forced whenever speed and duplex are forced. + */ + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, + &phy_data); + if (ret_val) + goto out; - phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; - ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); - if (ret_val) - goto out; + phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; + ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, + phy_data); + if (ret_val) + goto out; - hw_dbg("M88E1000 PSCR: %X\n", phy_data); + hw_dbg("M88E1000 PSCR: %X\n", phy_data); + } ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); if (ret_val) @@ -1710,6 +1715,26 @@ s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw) switch (hw->phy.id) { case I210_I_PHY_ID: + /* Get cable length from PHY Cable Diagnostics Control Reg */ + ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + + (I347AT4_PCDL + phy->addr), + &phy_data); + if (ret_val) + return ret_val; + + /* Check if the unit of cable length is meters or cm */ + ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + + I347AT4_PCDC, &phy_data2); + if (ret_val) + return ret_val; + + is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); + + /* Populate the phy structure with cable length in meters */ + phy->min_cable_length = phy_data / (is_cm ? 100 : 1); + phy->max_cable_length = phy_data / (is_cm ? 100 : 1); + phy->cable_length = phy_data / (is_cm ? 100 : 1); + break; case I347AT4_E_PHY_ID: /* Remember the original page select and set it to 7 */ ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 8aad230c059..796db53954d 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -34,11 +34,9 @@ #include "e1000_mac.h" #include "e1000_82575.h" -#ifdef CONFIG_IGB_PTP #include <linux/clocksource.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> -#endif /* CONFIG_IGB_PTP */ #include <linux/bitops.h> #include <linux/if_vlan.h> @@ -132,9 +130,10 @@ struct vf_data_storage { #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 /* Supported Rx Buffer Sizes */ -#define IGB_RXBUFFER_256 256 -#define IGB_RXBUFFER_16384 16384 -#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 +#define IGB_RXBUFFER_256 256 +#define IGB_RXBUFFER_2048 2048 +#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 +#define IGB_RX_BUFSZ IGB_RXBUFFER_2048 /* How many Tx Descriptors do we need to call netif_wake_queue ? */ #define IGB_TX_QUEUE_WAKE 16 @@ -174,11 +173,9 @@ struct igb_tx_buffer { }; struct igb_rx_buffer { - struct sk_buff *skb; dma_addr_t dma; struct page *page; - dma_addr_t page_dma; - u32 page_offset; + unsigned int page_offset; }; struct igb_tx_queue_stats { @@ -205,22 +202,6 @@ struct igb_ring_container { u8 itr; /* current ITR setting for ring */ }; -struct igb_q_vector { - struct igb_adapter *adapter; /* backlink */ - int cpu; /* CPU for DCA */ - u32 eims_value; /* EIMS mask value */ - - struct igb_ring_container rx, tx; - - struct napi_struct napi; - - u16 itr_val; - u8 set_itr; - void __iomem *itr_register; - - char name[IFNAMSIZ + 9]; -}; - struct igb_ring { struct igb_q_vector *q_vector; /* backlink to q_vector */ struct net_device *netdev; /* back pointer to net_device */ @@ -232,15 +213,17 @@ struct igb_ring { void *desc; /* descriptor ring memory */ unsigned long flags; /* ring specific flags */ void __iomem *tail; /* pointer to ring tail register */ + dma_addr_t dma; /* phys address of the ring */ + unsigned int size; /* length of desc. ring in bytes */ u16 count; /* number of desc. in the ring */ u8 queue_index; /* logical index of the ring*/ u8 reg_idx; /* physical index of the ring */ - u32 size; /* length of desc. ring in bytes */ /* everything past this point are written often */ - u16 next_to_clean ____cacheline_aligned_in_smp; + u16 next_to_clean; u16 next_to_use; + u16 next_to_alloc; union { /* TX */ @@ -251,12 +234,30 @@ struct igb_ring { }; /* RX */ struct { + struct sk_buff *skb; struct igb_rx_queue_stats rx_stats; struct u64_stats_sync rx_syncp; }; }; - /* Items past this point are only used during ring alloc / free */ - dma_addr_t dma; /* phys address of the ring */ +} ____cacheline_internodealigned_in_smp; + +struct igb_q_vector { + struct igb_adapter *adapter; /* backlink */ + int cpu; /* CPU for DCA */ + u32 eims_value; /* EIMS mask value */ + + u16 itr_val; + u8 set_itr; + void __iomem *itr_register; + + struct igb_ring_container rx, tx; + + struct napi_struct napi; + struct rcu_head rcu; /* to avoid race with update stats on free */ + char name[IFNAMSIZ + 9]; + + /* for dynamic allocation of rings associated with this q_vector */ + struct igb_ring ring[0] ____cacheline_internodealigned_in_smp; }; enum e1000_ring_flags_t { @@ -373,7 +374,6 @@ struct igb_adapter { u32 wvbr; u32 *shadow_vfta; -#ifdef CONFIG_IGB_PTP struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; struct delayed_work ptp_overflow_work; @@ -382,7 +382,6 @@ struct igb_adapter { spinlock_t tmreg_lock; struct cyclecounter cc; struct timecounter tc; -#endif /* CONFIG_IGB_PTP */ char fw_version[32]; }; @@ -436,18 +435,27 @@ extern bool igb_has_link(struct igb_adapter *adapter); extern void igb_set_ethtool_ops(struct net_device *); extern void igb_power_up_link(struct igb_adapter *); extern void igb_set_fw_version(struct igb_adapter *); -#ifdef CONFIG_IGB_PTP extern void igb_ptp_init(struct igb_adapter *adapter); extern void igb_ptp_stop(struct igb_adapter *adapter); extern void igb_ptp_reset(struct igb_adapter *adapter); extern void igb_ptp_tx_work(struct work_struct *work); extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter); -extern void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, - union e1000_adv_rx_desc *rx_desc, +extern void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb); +extern void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, + unsigned char *va, + struct sk_buff *skb); +static inline void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS) && + !igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) + igb_ptp_rx_rgtstamp(q_vector, skb); +} + extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd); -#endif /* CONFIG_IGB_PTP */ static inline s32 igb_reset_phy(struct e1000_hw *hw) { diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 2ea01284982..d8b1bee606c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -37,6 +37,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/pm_runtime.h> +#include <linux/highmem.h> #include "igb.h" @@ -1685,16 +1686,24 @@ static void igb_create_lbtest_frame(struct sk_buff *skb, memset(&skb->data[frame_size + 12], 0xAF, 1); } -static int igb_check_lbtest_frame(struct sk_buff *skb, unsigned int frame_size) +static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer, + unsigned int frame_size) { - frame_size /= 2; - if (*(skb->data + 3) == 0xFF) { - if ((*(skb->data + frame_size + 10) == 0xBE) && - (*(skb->data + frame_size + 12) == 0xAF)) { - return 0; - } - } - return 13; + unsigned char *data; + bool match = true; + + frame_size >>= 1; + + data = kmap(rx_buffer->page); + + if (data[3] != 0xFF || + data[frame_size + 10] != 0xBE || + data[frame_size + 12] != 0xAF) + match = false; + + kunmap(rx_buffer->page); + + return match; } static int igb_clean_test_rings(struct igb_ring *rx_ring, @@ -1704,9 +1713,7 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring, union e1000_adv_rx_desc *rx_desc; struct igb_rx_buffer *rx_buffer_info; struct igb_tx_buffer *tx_buffer_info; - struct netdev_queue *txq; u16 rx_ntc, tx_ntc, count = 0; - unsigned int total_bytes = 0, total_packets = 0; /* initialize next to clean and descriptor values */ rx_ntc = rx_ring->next_to_clean; @@ -1717,21 +1724,24 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring, /* check rx buffer */ rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc]; - /* unmap rx buffer, will be remapped by alloc_rx_buffers */ - dma_unmap_single(rx_ring->dev, - rx_buffer_info->dma, - IGB_RX_HDR_LEN, - DMA_FROM_DEVICE); - rx_buffer_info->dma = 0; + /* sync Rx buffer for CPU read */ + dma_sync_single_for_cpu(rx_ring->dev, + rx_buffer_info->dma, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); /* verify contents of skb */ - if (!igb_check_lbtest_frame(rx_buffer_info->skb, size)) + if (igb_check_lbtest_frame(rx_buffer_info, size)) count++; + /* sync Rx buffer for device write */ + dma_sync_single_for_device(rx_ring->dev, + rx_buffer_info->dma, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); + /* unmap buffer on tx side */ tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc]; - total_bytes += tx_buffer_info->bytecount; - total_packets += tx_buffer_info->gso_segs; igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); /* increment rx/tx next to clean counters */ @@ -1746,8 +1756,7 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring, rx_desc = IGB_RX_DESC(rx_ring, rx_ntc); } - txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); - netdev_tx_completed_queue(txq, total_packets, total_bytes); + netdev_tx_reset_queue(txring_txq(tx_ring)); /* re-map buffers to ring, store next to clean values */ igb_alloc_rx_buffers(rx_ring, count); @@ -2301,7 +2310,6 @@ static int igb_get_ts_info(struct net_device *dev, struct igb_adapter *adapter = netdev_priv(dev); switch (adapter->hw.mac.type) { -#ifdef CONFIG_IGB_PTP case e1000_82576: case e1000_82580: case e1000_i350: @@ -2337,7 +2345,6 @@ static int igb_get_ts_info(struct net_device *dev, (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); return 0; -#endif /* CONFIG_IGB_PTP */ default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e1ceb37ef12..082ce73dc62 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -61,7 +61,7 @@ #define MAJ 4 #define MIN 0 -#define BUILD 1 +#define BUILD 17 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" char igb_driver_name[] = "igb"; @@ -534,31 +534,27 @@ rx_ring_summary: if (staterr & E1000_RXD_STAT_DD) { /* Descriptor Done */ - pr_info("%s[0x%03X] %016llX %016llX -------" - "--------- %p%s\n", "RWB", i, + pr_info("%s[0x%03X] %016llX %016llX ---------------- %s\n", + "RWB", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - buffer_info->skb, next_desc); + next_desc); } else { - pr_info("%s[0x%03X] %016llX %016llX %016llX" - " %p%s\n", "R ", i, + pr_info("%s[0x%03X] %016llX %016llX %016llX %s\n", + "R ", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), (u64)buffer_info->dma, - buffer_info->skb, next_desc); + next_desc); if (netif_msg_pktdata(adapter) && - buffer_info->dma && buffer_info->skb) { - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, - 16, 1, buffer_info->skb->data, - IGB_RX_HDR_LEN, true); + buffer_info->dma && buffer_info->page) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, page_address(buffer_info->page) + buffer_info->page_offset, - PAGE_SIZE/2, true); + IGB_RX_BUFSZ, true); } } } @@ -656,80 +652,6 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) } } -static void igb_free_queues(struct igb_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) { - kfree(adapter->tx_ring[i]); - adapter->tx_ring[i] = NULL; - } - for (i = 0; i < adapter->num_rx_queues; i++) { - kfree(adapter->rx_ring[i]); - adapter->rx_ring[i] = NULL; - } - adapter->num_rx_queues = 0; - adapter->num_tx_queues = 0; -} - -/** - * igb_alloc_queues - Allocate memory for all rings - * @adapter: board private structure to initialize - * - * We allocate one ring per queue at run-time since we don't know the - * number of queues at compile-time. - **/ -static int igb_alloc_queues(struct igb_adapter *adapter) -{ - struct igb_ring *ring; - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) { - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); - if (!ring) - goto err; - ring->count = adapter->tx_ring_count; - ring->queue_index = i; - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - /* For 82575, context index must be unique per ring. */ - if (adapter->hw.mac.type == e1000_82575) - set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); - adapter->tx_ring[i] = ring; - } - - for (i = 0; i < adapter->num_rx_queues; i++) { - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); - if (!ring) - goto err; - ring->count = adapter->rx_ring_count; - ring->queue_index = i; - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - /* set flag indicating ring supports SCTP checksum offload */ - if (adapter->hw.mac.type >= e1000_82576) - set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); - - /* - * On i350, i210, and i211, loopback VLAN packets - * have the tag byte-swapped. - * */ - if (adapter->hw.mac.type >= e1000_i350) - set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); - - adapter->rx_ring[i] = ring; - } - - igb_cache_ring_register(adapter); - - return 0; - -err: - igb_free_queues(adapter); - - return -ENOMEM; -} - /** * igb_write_ivar - configure ivar for given MSI-X vector * @hw: pointer to the HW structure @@ -960,6 +882,35 @@ static void igb_reset_interrupt_capability(struct igb_adapter *adapter) } /** + * igb_free_q_vector - Free memory allocated for specific interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be freed + * + * This function frees the memory allocated to the q_vector. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + **/ +static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) +{ + struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + + if (q_vector->tx.ring) + adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; + + if (q_vector->rx.ring) + adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL; + + adapter->q_vector[v_idx] = NULL; + netif_napi_del(&q_vector->napi); + + /* + * ixgbe_get_stats64() might access the rings on this vector, + * we must wait a grace period before freeing it. + */ + kfree_rcu(q_vector, rcu); +} + +/** * igb_free_q_vectors - Free memory allocated for interrupt vectors * @adapter: board private structure to initialize * @@ -969,17 +920,14 @@ static void igb_reset_interrupt_capability(struct igb_adapter *adapter) **/ static void igb_free_q_vectors(struct igb_adapter *adapter) { - int v_idx; + int v_idx = adapter->num_q_vectors; - for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { - struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; - adapter->q_vector[v_idx] = NULL; - if (!q_vector) - continue; - netif_napi_del(&q_vector->napi); - kfree(q_vector); - } + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; adapter->num_q_vectors = 0; + + while (v_idx--) + igb_free_q_vector(adapter, v_idx); } /** @@ -990,7 +938,6 @@ static void igb_free_q_vectors(struct igb_adapter *adapter) */ static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) { - igb_free_queues(adapter); igb_free_q_vectors(adapter); igb_reset_interrupt_capability(adapter); } @@ -1001,7 +948,7 @@ static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) * Attempt to configure interrupts using the best available * capabilities of the hardware and kernel. **/ -static int igb_set_interrupt_capability(struct igb_adapter *adapter) +static void igb_set_interrupt_capability(struct igb_adapter *adapter) { int err; int numvecs, i; @@ -1038,7 +985,7 @@ static int igb_set_interrupt_capability(struct igb_adapter *adapter) adapter->msix_entries, numvecs); if (err == 0) - goto out; + return; igb_reset_interrupt_capability(adapter); @@ -1068,105 +1015,183 @@ msi_only: adapter->num_q_vectors = 1; if (!pci_enable_msi(adapter->pdev)) adapter->flags |= IGB_FLAG_HAS_MSI; -out: - /* Notify the stack of the (possibly) reduced queue counts. */ - rtnl_lock(); - netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues); - err = netif_set_real_num_rx_queues(adapter->netdev, - adapter->num_rx_queues); - rtnl_unlock(); - return err; +} + +static void igb_add_ring(struct igb_ring *ring, + struct igb_ring_container *head) +{ + head->ring = ring; + head->count++; } /** - * igb_alloc_q_vectors - Allocate memory for interrupt vectors + * igb_alloc_q_vector - Allocate memory for a single interrupt vector * @adapter: board private structure to initialize + * @v_count: q_vectors allocated on adapter, used for ring interleaving + * @v_idx: index of vector in adapter struct + * @txr_count: total number of Tx rings to allocate + * @txr_idx: index of first Tx ring to allocate + * @rxr_count: total number of Rx rings to allocate + * @rxr_idx: index of first Rx ring to allocate * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. + * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ -static int igb_alloc_q_vectors(struct igb_adapter *adapter) +static int igb_alloc_q_vector(struct igb_adapter *adapter, + int v_count, int v_idx, + int txr_count, int txr_idx, + int rxr_count, int rxr_idx) { struct igb_q_vector *q_vector; - struct e1000_hw *hw = &adapter->hw; - int v_idx; + struct igb_ring *ring; + int ring_count, size; - for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { - q_vector = kzalloc(sizeof(struct igb_q_vector), - GFP_KERNEL); - if (!q_vector) - goto err_out; - q_vector->adapter = adapter; - q_vector->itr_register = hw->hw_addr + E1000_EITR(0); - q_vector->itr_val = IGB_START_ITR; - netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64); - adapter->q_vector[v_idx] = q_vector; + /* igb only supports 1 Tx and/or 1 Rx queue per vector */ + if (txr_count > 1 || rxr_count > 1) + return -ENOMEM; + + ring_count = txr_count + rxr_count; + size = sizeof(struct igb_q_vector) + + (sizeof(struct igb_ring) * ring_count); + + /* allocate q_vector and rings */ + q_vector = kzalloc(size, GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, + igb_poll, 64); + + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; + + /* initialize work limits */ + q_vector->tx.work_limit = adapter->tx_work_limit; + + /* initialize ITR configuration */ + q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0); + q_vector->itr_val = IGB_START_ITR; + + /* initialize pointer to rings */ + ring = q_vector->ring; + + if (txr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + igb_add_ring(ring, &q_vector->tx); + + /* For 82575, context index must be unique per ring. */ + if (adapter->hw.mac.type == e1000_82575) + set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = txr_idx; + + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; + + /* push pointer to next ring */ + ring++; } - return 0; + if (rxr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; -err_out: - igb_free_q_vectors(adapter); - return -ENOMEM; -} + /* configure backlink on ring */ + ring->q_vector = q_vector; -static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter, - int ring_idx, int v_idx) -{ - struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + /* update q_vector Rx values */ + igb_add_ring(ring, &q_vector->rx); - q_vector->rx.ring = adapter->rx_ring[ring_idx]; - q_vector->rx.ring->q_vector = q_vector; - q_vector->rx.count++; - q_vector->itr_val = adapter->rx_itr_setting; - if (q_vector->itr_val && q_vector->itr_val <= 3) - q_vector->itr_val = IGB_START_ITR; -} + /* set flag indicating ring supports SCTP checksum offload */ + if (adapter->hw.mac.type >= e1000_82576) + set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); -static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter, - int ring_idx, int v_idx) -{ - struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + /* + * On i350, i210, and i211, loopback VLAN packets + * have the tag byte-swapped. + * */ + if (adapter->hw.mac.type >= e1000_i350) + set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); - q_vector->tx.ring = adapter->tx_ring[ring_idx]; - q_vector->tx.ring->q_vector = q_vector; - q_vector->tx.count++; - q_vector->itr_val = adapter->tx_itr_setting; - q_vector->tx.work_limit = adapter->tx_work_limit; - if (q_vector->itr_val && q_vector->itr_val <= 3) - q_vector->itr_val = IGB_START_ITR; + /* apply Rx specific ring traits */ + ring->count = adapter->rx_ring_count; + ring->queue_index = rxr_idx; + + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; + } + + return 0; } + /** - * igb_map_ring_to_vector - maps allocated queues to vectors + * igb_alloc_q_vectors - Allocate memory for interrupt vectors + * @adapter: board private structure to initialize * - * This function maps the recently allocated queues to vectors. + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. **/ -static int igb_map_ring_to_vector(struct igb_adapter *adapter) +static int igb_alloc_q_vectors(struct igb_adapter *adapter) { - int i; - int v_idx = 0; + int q_vectors = adapter->num_q_vectors; + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int err; - if ((adapter->num_q_vectors < adapter->num_rx_queues) || - (adapter->num_q_vectors < adapter->num_tx_queues)) - return -ENOMEM; + if (q_vectors >= (rxr_remaining + txr_remaining)) { + for (; rxr_remaining; v_idx++) { + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, + 0, 0, 1, rxr_idx); - if (adapter->num_q_vectors >= - (adapter->num_rx_queues + adapter->num_tx_queues)) { - for (i = 0; i < adapter->num_rx_queues; i++) - igb_map_rx_ring_to_vector(adapter, i, v_idx++); - for (i = 0; i < adapter->num_tx_queues; i++) - igb_map_tx_ring_to_vector(adapter, i, v_idx++); - } else { - for (i = 0; i < adapter->num_rx_queues; i++) { - if (i < adapter->num_tx_queues) - igb_map_tx_ring_to_vector(adapter, i, v_idx); - igb_map_rx_ring_to_vector(adapter, i, v_idx++); + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining--; + rxr_idx++; } - for (; i < adapter->num_tx_queues; i++) - igb_map_tx_ring_to_vector(adapter, i, v_idx++); } + + for (; v_idx < q_vectors; v_idx++) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, + tqpv, txr_idx, rqpv, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining -= rqpv; + txr_remaining -= tqpv; + rxr_idx++; + txr_idx++; + } + return 0; + +err_out: + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) + igb_free_q_vector(adapter, v_idx); + + return -ENOMEM; } /** @@ -1179,9 +1204,7 @@ static int igb_init_interrupt_scheme(struct igb_adapter *adapter) struct pci_dev *pdev = adapter->pdev; int err; - err = igb_set_interrupt_capability(adapter); - if (err) - return err; + igb_set_interrupt_capability(adapter); err = igb_alloc_q_vectors(adapter); if (err) { @@ -1189,24 +1212,10 @@ static int igb_init_interrupt_scheme(struct igb_adapter *adapter) goto err_alloc_q_vectors; } - err = igb_alloc_queues(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); - goto err_alloc_queues; - } - - err = igb_map_ring_to_vector(adapter); - if (err) { - dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n"); - goto err_map_queues; - } - + igb_cache_ring_register(adapter); return 0; -err_map_queues: - igb_free_queues(adapter); -err_alloc_queues: - igb_free_q_vectors(adapter); + err_alloc_q_vectors: igb_reset_interrupt_capability(adapter); return err; @@ -1229,11 +1238,11 @@ static int igb_request_irq(struct igb_adapter *adapter) if (!err) goto request_done; /* fall back to MSI */ + igb_free_all_tx_resources(adapter); + igb_free_all_rx_resources(adapter); igb_clear_interrupt_scheme(adapter); if (!pci_enable_msi(pdev)) adapter->flags |= IGB_FLAG_HAS_MSI; - igb_free_all_tx_resources(adapter); - igb_free_all_rx_resources(adapter); adapter->num_tx_queues = 1; adapter->num_rx_queues = 1; adapter->num_q_vectors = 1; @@ -1243,13 +1252,6 @@ static int igb_request_irq(struct igb_adapter *adapter) "Unable to allocate memory for vectors\n"); goto request_done; } - err = igb_alloc_queues(adapter); - if (err) { - dev_err(&pdev->dev, - "Unable to allocate memory for queues\n"); - igb_free_q_vectors(adapter); - goto request_done; - } igb_setup_all_tx_resources(adapter); igb_setup_all_rx_resources(adapter); } @@ -1706,10 +1708,8 @@ void igb_reset(struct igb_adapter *adapter) /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); -#ifdef CONFIG_IGB_PTP /* Re-enable PTP, where applicable. */ igb_ptp_reset(adapter); -#endif /* CONFIG_IGB_PTP */ igb_get_phy_info(hw); } @@ -1783,58 +1783,34 @@ static const struct net_device_ops igb_netdev_ops = { void igb_set_fw_version(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u16 eeprom_verh, eeprom_verl, comb_verh, comb_verl, comb_offset; - u16 major, build, patch, fw_version; - u32 etrack_id; - - hw->nvm.ops.read(hw, 5, 1, &fw_version); - if (adapter->hw.mac.type != e1000_i211) { - hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verh); - hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verl); - etrack_id = (eeprom_verh << IGB_ETRACK_SHIFT) | eeprom_verl; - - /* combo image version needs to be found */ - hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset); - if ((comb_offset != 0x0) && - (comb_offset != IGB_NVM_VER_INVALID)) { - hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset - + 1), 1, &comb_verh); - hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset), - 1, &comb_verl); - - /* Only display Option Rom if it exists and is valid */ - if ((comb_verh && comb_verl) && - ((comb_verh != IGB_NVM_VER_INVALID) && - (comb_verl != IGB_NVM_VER_INVALID))) { - major = comb_verl >> IGB_COMB_VER_SHFT; - build = (comb_verl << IGB_COMB_VER_SHFT) | - (comb_verh >> IGB_COMB_VER_SHFT); - patch = comb_verh & IGB_COMB_VER_MASK; - snprintf(adapter->fw_version, - sizeof(adapter->fw_version), - "%d.%d%d, 0x%08x, %d.%d.%d", - (fw_version & IGB_MAJOR_MASK) >> - IGB_MAJOR_SHIFT, - (fw_version & IGB_MINOR_MASK) >> - IGB_MINOR_SHIFT, - (fw_version & IGB_BUILD_MASK), - etrack_id, major, build, patch); - goto out; - } - } - snprintf(adapter->fw_version, sizeof(adapter->fw_version), - "%d.%d%d, 0x%08x", - (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT, - (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT, - (fw_version & IGB_BUILD_MASK), etrack_id); - } else { + struct e1000_fw_version fw; + + igb_get_fw_version(hw, &fw); + + switch (hw->mac.type) { + case e1000_i211: snprintf(adapter->fw_version, sizeof(adapter->fw_version), - "%d.%d%d", - (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT, - (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT, - (fw_version & IGB_BUILD_MASK)); + "%2d.%2d-%d", + fw.invm_major, fw.invm_minor, fw.invm_img_type); + break; + + default: + /* if option is rom valid, display its version too */ + if (fw.or_valid) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x, %d.%d.%d", + fw.eep_major, fw.eep_minor, fw.etrack_id, + fw.or_major, fw.or_build, fw.or_patch); + /* no option rom */ + } else { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x", + fw.eep_major, fw.eep_minor, fw.etrack_id); + } + break; } -out: return; } @@ -2141,10 +2117,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, #endif -#ifdef CONFIG_IGB_PTP /* do hw tstamp init after resetting */ igb_ptp_init(adapter); -#endif /* CONFIG_IGB_PTP */ dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); /* print bus type/speed/width info */ @@ -2219,9 +2193,7 @@ static void __devexit igb_remove(struct pci_dev *pdev) struct e1000_hw *hw = &adapter->hw; pm_runtime_get_noresume(&pdev->dev); -#ifdef CONFIG_IGB_PTP igb_ptp_stop(adapter); -#endif /* CONFIG_IGB_PTP */ /* * The watchdog timer may be rescheduled, so explicitly @@ -2531,6 +2503,17 @@ static int __igb_open(struct net_device *netdev, bool resuming) if (err) goto err_req_irq; + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(adapter->netdev, + adapter->num_tx_queues); + if (err) + goto err_set_queues; + + err = netif_set_real_num_rx_queues(adapter->netdev, + adapter->num_rx_queues); + if (err) + goto err_set_queues; + /* From here on the code is the same as igb_up() */ clear_bit(__IGB_DOWN, &adapter->state); @@ -2560,6 +2543,8 @@ static int __igb_open(struct net_device *netdev, bool resuming) return 0; +err_set_queues: + igb_free_irq(adapter); err_req_irq: igb_release_hw_control(adapter); igb_power_down_link(adapter); @@ -2637,10 +2622,8 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); - tx_ring->desc = dma_alloc_coherent(dev, - tx_ring->size, - &tx_ring->dma, - GFP_KERNEL); + tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) goto err; @@ -2777,18 +2760,16 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) if (!rx_ring->rx_buffer_info) goto err; - /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - rx_ring->desc = dma_alloc_coherent(dev, - rx_ring->size, - &rx_ring->dma, - GFP_KERNEL); + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) goto err; + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; @@ -3106,16 +3087,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, /* set descriptor configuration */ srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; -#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384 - srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT; -#else - srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT; -#endif - srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; -#ifdef CONFIG_IGB_PTP + srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; if (hw->mac.type >= e1000_82580) srrctl |= E1000_SRRCTL_TIMESTAMP; -#endif /* CONFIG_IGB_PTP */ /* Only set Drop Enable if we are supporting multiple queues */ if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) srrctl |= E1000_SRRCTL_DROP_EN; @@ -3305,36 +3280,27 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) unsigned long size; u16 i; + if (rx_ring->skb) + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + if (!rx_ring->rx_buffer_info) return; /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; - if (buffer_info->dma) { - dma_unmap_single(rx_ring->dev, - buffer_info->dma, - IGB_RX_HDR_LEN, - DMA_FROM_DEVICE); - buffer_info->dma = 0; - } - if (buffer_info->skb) { - dev_kfree_skb(buffer_info->skb); - buffer_info->skb = NULL; - } - if (buffer_info->page_dma) { - dma_unmap_page(rx_ring->dev, - buffer_info->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - buffer_info->page_dma = 0; - } - if (buffer_info->page) { - put_page(buffer_info->page); - buffer_info->page = NULL; - buffer_info->page_offset = 0; - } + if (!buffer_info->page) + continue; + + dma_unmap_page(rx_ring->dev, + buffer_info->dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(buffer_info->page); + + buffer_info->page = NULL; } size = sizeof(struct igb_rx_buffer) * rx_ring->count; @@ -3343,6 +3309,7 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -4159,11 +4126,9 @@ static __le32 igb_tx_cmd_type(u32 tx_flags) if (tx_flags & IGB_TX_FLAGS_VLAN) cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE); -#ifdef CONFIG_IGB_PTP /* set timestamp bit if present */ if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP)) cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP); -#endif /* CONFIG_IGB_PTP */ /* set segmentation bits for TSO */ if (tx_flags & IGB_TX_FLAGS_TSO) @@ -4372,9 +4337,7 @@ static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_ring *tx_ring) { -#ifdef CONFIG_IGB_PTP struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); -#endif /* CONFIG_IGB_PTP */ struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; @@ -4397,7 +4360,6 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, first->bytecount = skb->len; first->gso_segs = 1; -#ifdef CONFIG_IGB_PTP if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && !(adapter->ptp_tx_skb))) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; @@ -4407,7 +4369,6 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, if (adapter->hw.mac.type == e1000_82576) schedule_work(&adapter->ptp_tx_work); } -#endif /* CONFIG_IGB_PTP */ if (vlan_tx_tag_present(skb)) { tx_flags |= IGB_TX_FLAGS_VLAN; @@ -4467,10 +4428,11 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, * The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ - if (skb->len < 17) { - if (skb_padto(skb, 17)) + if (unlikely(skb->len < 17)) { + if (skb_pad(skb, 17 - skb->len)) return NETDEV_TX_OK; skb->len = 17; + skb_set_tail_pointer(skb, 17); } return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); @@ -4800,7 +4762,6 @@ static irqreturn_t igb_msix_other(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } -#ifdef CONFIG_IGB_PTP if (icr & E1000_ICR_TS) { u32 tsicr = rd32(E1000_TSICR); @@ -4811,7 +4772,6 @@ static irqreturn_t igb_msix_other(int irq, void *data) schedule_work(&adapter->ptp_tx_work); } } -#endif /* CONFIG_IGB_PTP */ wr32(E1000_EIMS, adapter->eims_other); @@ -4851,45 +4811,63 @@ static irqreturn_t igb_msix_ring(int irq, void *data) } #ifdef CONFIG_IGB_DCA +static void igb_update_tx_dca(struct igb_adapter *adapter, + struct igb_ring *tx_ring, + int cpu) +{ + struct e1000_hw *hw = &adapter->hw; + u32 txctrl = dca3_get_tag(tx_ring->dev, cpu); + + if (hw->mac.type != e1000_82575) + txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT; + + /* + * We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN | + E1000_DCA_TXCTRL_DATA_RRO_EN | + E1000_DCA_TXCTRL_DESC_DCA_EN; + + wr32(E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl); +} + +static void igb_update_rx_dca(struct igb_adapter *adapter, + struct igb_ring *rx_ring, + int cpu) +{ + struct e1000_hw *hw = &adapter->hw; + u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu); + + if (hw->mac.type != e1000_82575) + rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT; + + /* + * We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN | + E1000_DCA_RXCTRL_DESC_DCA_EN; + + wr32(E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl); +} + static void igb_update_dca(struct igb_q_vector *q_vector) { struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; int cpu = get_cpu(); if (q_vector->cpu == cpu) goto out_no_update; - if (q_vector->tx.ring) { - int q = q_vector->tx.ring->reg_idx; - u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q)); - if (hw->mac.type == e1000_82575) { - dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK; - dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); - } else { - dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576; - dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << - E1000_DCA_TXCTRL_CPUID_SHIFT; - } - dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN; - wr32(E1000_DCA_TXCTRL(q), dca_txctrl); - } - if (q_vector->rx.ring) { - int q = q_vector->rx.ring->reg_idx; - u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q)); - if (hw->mac.type == e1000_82575) { - dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK; - dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); - } else { - dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576; - dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << - E1000_DCA_RXCTRL_CPUID_SHIFT; - } - dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN; - dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN; - dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN; - wr32(E1000_DCA_RXCTRL(q), dca_rxctrl); - } + if (q_vector->tx.ring) + igb_update_tx_dca(adapter, q_vector->tx.ring, cpu); + + if (q_vector->rx.ring) + igb_update_rx_dca(adapter, q_vector->rx.ring, cpu); + q_vector->cpu = cpu; out_no_update: put_cpu(); @@ -5545,7 +5523,6 @@ static irqreturn_t igb_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } -#ifdef CONFIG_IGB_PTP if (icr & E1000_ICR_TS) { u32 tsicr = rd32(E1000_TSICR); @@ -5556,7 +5533,6 @@ static irqreturn_t igb_intr_msi(int irq, void *data) schedule_work(&adapter->ptp_tx_work); } } -#endif /* CONFIG_IGB_PTP */ napi_schedule(&q_vector->napi); @@ -5599,7 +5575,6 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } -#ifdef CONFIG_IGB_PTP if (icr & E1000_ICR_TS) { u32 tsicr = rd32(E1000_TSICR); @@ -5610,7 +5585,6 @@ static irqreturn_t igb_intr(int irq, void *data) schedule_work(&adapter->ptp_tx_work); } } -#endif /* CONFIG_IGB_PTP */ napi_schedule(&q_vector->napi); @@ -5840,6 +5814,181 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) return !!budget; } +/** + * igb_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void igb_reuse_rx_page(struct igb_ring *rx_ring, + struct igb_rx_buffer *old_buff) +{ + struct igb_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_buffer_info[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer)); + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, + old_buff->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); +} + +/** + * igb_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buff to place the data into + * + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. + **/ +static bool igb_add_rx_frag(struct igb_ring *rx_ring, + struct igb_rx_buffer *rx_buffer, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct page *page = rx_buffer->page; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); + + if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buffer->page_offset; + + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); + va += IGB_TS_HDR_LEN; + size -= IGB_TS_HDR_LEN; + } + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* we can reuse buffer as-is, just make sure it is local */ + if (likely(page_to_nid(page) == numa_node_id())) + return true; + + /* this page cannot be reused so discard it */ + put_page(page); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buffer->page_offset, size, IGB_RX_BUFSZ); + + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= IGB_RX_BUFSZ; + + /* + * since we are the only owner of the page and we need to + * increment it, just set the value to 2 in order to avoid + * an unnecessary locked operation + */ + atomic_set(&page->_count, 2); +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += SKB_DATA_ALIGN(size); + + if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) + return false; + + /* bump ref count on page before it is given to the stack */ + get_page(page); +#endif + + return true; +} + +static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct igb_rx_buffer *rx_buffer; + struct page *page; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + + /* + * This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); + + page = rx_buffer->page; + prefetchw(page); + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IGB_RX_HDR_LEN); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_failed++; + return NULL; + } + + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + } + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); + + /* pull page into skb */ + if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + /* hand second half of page back to the ring */ + igb_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + PAGE_SIZE, DMA_FROM_DEVICE); + } + + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; + + return skb; +} + static inline void igb_rx_checksum(struct igb_ring *ring, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) @@ -5889,224 +6038,386 @@ static inline void igb_rx_hash(struct igb_ring *ring, skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); } -static void igb_rx_vlan(struct igb_ring *ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) +/** + * igb_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static bool igb_is_non_eop(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc) { - if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { - u16 vid; - if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && - test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags)) - vid = be16_to_cpu(rx_desc->wb.upper.vlan); - else - vid = le16_to_cpu(rx_desc->wb.upper.vlan); + u32 ntc = rx_ring->next_to_clean + 1; - __vlan_hwaccel_put_tag(skb, vid); - } + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(IGB_RX_DESC(rx_ring, ntc)); + + if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))) + return false; + + return true; } -static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc) -{ - /* HW will not DMA in data larger than the given buffer, even if it - * parses the (NFS, of course) header to be larger. In that case, it - * fills the header buffer and spills the rest into the page. +/** + * igb_get_headlen - determine size of header for LRO/GRO + * @data: pointer to the start of the headers + * @max_len: total length of section to find headers in + * + * This function is meant to determine the length of headers that will + * be recognized by hardware for LRO, and GRO offloads. The main + * motivation of doing this is to only perform one pull for IPv4 TCP + * packets so that we can do basic things like calculating the gso_size + * based on the average data per packet. + **/ +static unsigned int igb_get_headlen(unsigned char *data, + unsigned int max_len) +{ + union { + unsigned char *network; + /* l2 headers */ + struct ethhdr *eth; + struct vlan_hdr *vlan; + /* l3 headers */ + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + } hdr; + __be16 protocol; + u8 nexthdr = 0; /* default to not TCP */ + u8 hlen; + + /* this should never happen, but better safe than sorry */ + if (max_len < ETH_HLEN) + return max_len; + + /* initialize network frame pointer */ + hdr.network = data; + + /* set first protocol and move network header forward */ + protocol = hdr.eth->h_proto; + hdr.network += ETH_HLEN; + + /* handle any vlan tag if present */ + if (protocol == __constant_htons(ETH_P_8021Q)) { + if ((hdr.network - data) > (max_len - VLAN_HLEN)) + return max_len; + + protocol = hdr.vlan->h_vlan_encapsulated_proto; + hdr.network += VLAN_HLEN; + } + + /* handle L3 protocols */ + if (protocol == __constant_htons(ETH_P_IP)) { + if ((hdr.network - data) > (max_len - sizeof(struct iphdr))) + return max_len; + + /* access ihl as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct iphdr)) + return hdr.network - data; + + /* record next protocol */ + nexthdr = hdr.ipv4->protocol; + hdr.network += hlen; + } else if (protocol == __constant_htons(ETH_P_IPV6)) { + if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) + return max_len; + + /* record next protocol */ + nexthdr = hdr.ipv6->nexthdr; + hdr.network += sizeof(struct ipv6hdr); + } else { + return hdr.network - data; + } + + /* finally sort out TCP */ + if (nexthdr == IPPROTO_TCP) { + if ((hdr.network - data) > (max_len - sizeof(struct tcphdr))) + return max_len; + + /* access doff as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[12] & 0xF0) >> 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct tcphdr)) + return hdr.network - data; + + hdr.network += hlen; + } else if (nexthdr == IPPROTO_UDP) { + if ((hdr.network - data) > (max_len - sizeof(struct udphdr))) + return max_len; + + hdr.network += sizeof(struct udphdr); + } + + /* + * If everything has gone correctly hdr.network should be the + * data section of the packet and will be the end of the header. + * If not then it probably represents the end of the last recognized + * header. */ - u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) & - E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > IGB_RX_HDR_LEN) - hlen = IGB_RX_HDR_LEN; - return hlen; + if ((hdr.network - data) < max_len) + return hdr.network - data; + else + return max_len; } -static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) +/** + * igb_pull_tail - igb specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being adjusted + * + * This function is an igb specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void igb_pull_tail(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) { - struct igb_ring *rx_ring = q_vector->rx.ring; - union e1000_adv_rx_desc *rx_desc; - const int current_node = numa_node_id(); - unsigned int total_bytes = 0, total_packets = 0; - u16 cleaned_count = igb_desc_unused(rx_ring); - u16 i = rx_ring->next_to_clean; + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; - rx_desc = IGB_RX_DESC(rx_ring, i); + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); - while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { - struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; - struct sk_buff *skb = buffer_info->skb; - union e1000_adv_rx_desc *next_rxd; + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + /* retrieve timestamp from buffer */ + igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); - buffer_info->skb = NULL; - prefetch(skb->data); + /* update pointers to remove timestamp header */ + skb_frag_size_sub(frag, IGB_TS_HDR_LEN); + frag->page_offset += IGB_TS_HDR_LEN; + skb->data_len -= IGB_TS_HDR_LEN; + skb->len -= IGB_TS_HDR_LEN; - i++; - if (i == rx_ring->count) - i = 0; + /* move va to start of packet data */ + va += IGB_TS_HDR_LEN; + } + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN); - next_rxd = IGB_RX_DESC(rx_ring, i); - prefetch(next_rxd); + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - /* - * This memory barrier is needed to keep us from reading - * any other fields out of the rx_desc until we know the - * RXD_STAT_DD bit is set - */ - rmb(); + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} - if (!skb_is_nonlinear(skb)) { - __skb_put(skb, igb_get_hlen(rx_desc)); - dma_unmap_single(rx_ring->dev, buffer_info->dma, - IGB_RX_HDR_LEN, - DMA_FROM_DEVICE); - buffer_info->dma = 0; +/** + * igb_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool igb_cleanup_headers(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + + if (unlikely((igb_test_staterr(rx_desc, + E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { + struct net_device *netdev = rx_ring->netdev; + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; } + } - if (rx_desc->wb.upper.length) { - u16 length = le16_to_cpu(rx_desc->wb.upper.length); + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + igb_pull_tail(rx_ring, rx_desc, skb); - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - buffer_info->page, - buffer_info->page_offset, - length); + /* if skb_pad returns an error the skb was freed */ + if (unlikely(skb->len < 60)) { + int pad_len = 60 - skb->len; - skb->len += length; - skb->data_len += length; - skb->truesize += PAGE_SIZE / 2; + if (skb_pad(skb, pad_len)) + return true; + __skb_put(skb, pad_len); + } - if ((page_count(buffer_info->page) != 1) || - (page_to_nid(buffer_info->page) != current_node)) - buffer_info->page = NULL; - else - get_page(buffer_info->page); + return false; +} - dma_unmap_page(rx_ring->dev, buffer_info->page_dma, - PAGE_SIZE / 2, DMA_FROM_DEVICE); - buffer_info->page_dma = 0; - } +/** + * igb_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, timestamp, protocol, and + * other fields within the skb. + **/ +static void igb_process_skb_fields(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct net_device *dev = rx_ring->netdev; - if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) { - struct igb_rx_buffer *next_buffer; - next_buffer = &rx_ring->rx_buffer_info[i]; - buffer_info->skb = next_buffer->skb; - buffer_info->dma = next_buffer->dma; - next_buffer->skb = skb; - next_buffer->dma = 0; - goto next_desc; - } + igb_rx_hash(rx_ring, rx_desc, skb); - if (unlikely((igb_test_staterr(rx_desc, - E1000_RXDEXT_ERR_FRAME_ERR_MASK)) - && !(rx_ring->netdev->features & NETIF_F_RXALL))) { - dev_kfree_skb_any(skb); - goto next_desc; - } + igb_rx_checksum(rx_ring, rx_desc, skb); -#ifdef CONFIG_IGB_PTP - igb_ptp_rx_hwtstamp(q_vector, rx_desc, skb); -#endif /* CONFIG_IGB_PTP */ - igb_rx_hash(rx_ring, rx_desc, skb); - igb_rx_checksum(rx_ring, rx_desc, skb); - igb_rx_vlan(rx_ring, rx_desc, skb); + igb_ptp_rx_hwtstamp(rx_ring->q_vector, rx_desc, skb); - total_bytes += skb->len; - total_packets++; + if ((dev->features & NETIF_F_HW_VLAN_RX) && + igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { + u16 vid; + if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && + test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) + vid = be16_to_cpu(rx_desc->wb.upper.vlan); + else + vid = le16_to_cpu(rx_desc->wb.upper.vlan); - skb->protocol = eth_type_trans(skb, rx_ring->netdev); + __vlan_hwaccel_put_tag(skb, vid); + } - napi_gro_receive(&q_vector->napi, skb); + skb_record_rx_queue(skb, rx_ring->queue_index); - budget--; -next_desc: - if (!budget) - break; + skb->protocol = eth_type_trans(skb, rx_ring->netdev); +} + +static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) +{ + struct igb_ring *rx_ring = q_vector->rx.ring; + struct sk_buff *skb = rx_ring->skb; + unsigned int total_bytes = 0, total_packets = 0; + u16 cleaned_count = igb_desc_unused(rx_ring); + + do { + union e1000_adv_rx_desc *rx_desc; - cleaned_count++; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IGB_RX_BUFFER_WRITE) { igb_alloc_rx_buffers(rx_ring, cleaned_count); cleaned_count = 0; } - /* use prefetched values */ - rx_desc = next_rxd; - } + rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); - rx_ring->next_to_clean = i; - u64_stats_update_begin(&rx_ring->rx_syncp); - rx_ring->rx_stats.packets += total_packets; - rx_ring->rx_stats.bytes += total_bytes; - u64_stats_update_end(&rx_ring->rx_syncp); - q_vector->rx.total_packets += total_packets; - q_vector->rx.total_bytes += total_bytes; + if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) + break; - if (cleaned_count) - igb_alloc_rx_buffers(rx_ring, cleaned_count); + /* retrieve a buffer from the ring */ + skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); - return !!budget; -} + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; -static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, - struct igb_rx_buffer *bi) -{ - struct sk_buff *skb = bi->skb; - dma_addr_t dma = bi->dma; + cleaned_count++; - if (dma) - return true; + /* fetch next buffer in frame if non-eop */ + if (igb_is_non_eop(rx_ring, rx_desc)) + continue; - if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IGB_RX_HDR_LEN); - bi->skb = skb; - if (!skb) { - rx_ring->rx_stats.alloc_failed++; - return false; + /* verify the packet layout is correct */ + if (igb_cleanup_headers(rx_ring, rx_desc, skb)) { + skb = NULL; + continue; } - /* initialize skb for ring */ - skb_record_rx_queue(skb, rx_ring->queue_index); - } + /* probably a little skewed due to removing CRC */ + total_bytes += skb->len; - dma = dma_map_single(rx_ring->dev, skb->data, - IGB_RX_HDR_LEN, DMA_FROM_DEVICE); + /* populate checksum, timestamp, VLAN, and protocol */ + igb_process_skb_fields(rx_ring, rx_desc, skb); - if (dma_mapping_error(rx_ring->dev, dma)) { - rx_ring->rx_stats.alloc_failed++; - return false; - } + napi_gro_receive(&q_vector->napi, skb); - bi->dma = dma; - return true; + /* reset skb pointer */ + skb = NULL; + + /* update budget accounting */ + total_packets++; + } while (likely(total_packets < budget)); + + /* place incomplete frames back on ring for completion */ + rx_ring->skb = skb; + + u64_stats_update_begin(&rx_ring->rx_syncp); + rx_ring->rx_stats.packets += total_packets; + rx_ring->rx_stats.bytes += total_bytes; + u64_stats_update_end(&rx_ring->rx_syncp); + q_vector->rx.total_packets += total_packets; + q_vector->rx.total_bytes += total_bytes; + + if (cleaned_count) + igb_alloc_rx_buffers(rx_ring, cleaned_count); + + return (total_packets < budget); } static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, struct igb_rx_buffer *bi) { struct page *page = bi->page; - dma_addr_t page_dma = bi->page_dma; - unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2); + dma_addr_t dma; - if (page_dma) + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(page)) return true; - if (!page) { - page = __skb_alloc_page(GFP_ATOMIC, bi->skb); - bi->page = page; - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_failed++; - return false; - } + /* alloc new page for storage */ + page = __skb_alloc_page(GFP_ATOMIC | __GFP_COLD, NULL); + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_failed++; + return false; } - page_dma = dma_map_page(rx_ring->dev, page, - page_offset, PAGE_SIZE / 2, - DMA_FROM_DEVICE); + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + + /* + * if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_page(page); - if (dma_mapping_error(rx_ring->dev, page_dma)) { rx_ring->rx_stats.alloc_failed++; return false; } - bi->page_dma = page_dma; - bi->page_offset = page_offset; + bi->dma = dma; + bi->page = page; + bi->page_offset = 0; + return true; } @@ -6120,22 +6431,23 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) struct igb_rx_buffer *bi; u16 i = rx_ring->next_to_use; + /* nothing to do */ + if (!cleaned_count) + return; + rx_desc = IGB_RX_DESC(rx_ring, i); bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; - while (cleaned_count--) { - if (!igb_alloc_mapped_skb(rx_ring, bi)) - break; - - /* Refresh the desc even if buffer_addrs didn't change - * because each write-back erases this info. */ - rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); - + do { if (!igb_alloc_mapped_page(rx_ring, bi)) break; - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + /* + * Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); rx_desc++; bi++; @@ -6148,17 +6460,25 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) /* clear the hdr_addr for the next_to_use descriptor */ rx_desc->read.hdr_addr = 0; - } + + cleaned_count--; + } while (cleaned_count); i += rx_ring->count; if (rx_ring->next_to_use != i) { + /* record the next descriptor to use */ rx_ring->next_to_use = i; - /* Force memory writes to complete before letting h/w + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = i; + + /* + * Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, - * such as IA-64). */ + * such as IA-64). + */ wmb(); writel(i, rx_ring->tail); } @@ -6207,10 +6527,8 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: return igb_mii_ioctl(netdev, ifr, cmd); -#ifdef CONFIG_IGB_PTP case SIOCSHWTSTAMP: return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd); -#endif /* CONFIG_IGB_PTP */ default: return -EOPNOTSUPP; } @@ -6492,7 +6810,9 @@ static int igb_resume(struct device *dev) wr32(E1000_WUS, ~0); if (netdev->flags & IFF_UP) { + rtnl_lock(); err = __igb_open(netdev, true); + rtnl_unlock(); if (err) return err; } diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index ee21445157a..aa10f69f9f1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -441,18 +441,46 @@ void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) adapter->ptp_tx_skb = NULL; } -void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, - union e1000_adv_rx_desc *rx_desc, +/** + * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp + * @q_vector: Pointer to interrupt specific structure + * @va: Pointer to address containing Rx buffer + * @skb: Buffer containing timestamp and packet + * + * This function is meant to retrieve a timestamp from the first buffer of an + * incoming frame. The value is stored in little endian format starting on + * byte 8. + */ +void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, + unsigned char *va, + struct sk_buff *skb) +{ + __le64 *regval = (__le64 *)va; + + /* + * The timestamp is recorded in little endian format. + * DWORD: 0 1 2 3 + * Field: Reserved Reserved SYSTIML SYSTIMH + */ + igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + le64_to_cpu(regval[1])); +} + +/** + * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register + * @q_vector: Pointer to interrupt specific structure + * @skb: Buffer containing timestamp and packet + * + * This function is meant to retrieve a timestamp from the internal registers + * of the adapter and store it in the skb. + */ +void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb) { struct igb_adapter *adapter = q_vector->adapter; struct e1000_hw *hw = &adapter->hw; u64 regval; - if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | - E1000_RXDADV_STAT_TS)) - return; - /* * If this bit is set, then the RX registers contain the time stamp. No * other packet will be time stamped until we read these registers, so @@ -464,18 +492,11 @@ void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, * If nothing went wrong, then it should have a shared tx_flags that we * can turn into a skb_shared_hwtstamps. */ - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - u32 *stamp = (u32 *)skb->data; - regval = le32_to_cpu(*(stamp + 2)); - regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; - skb_pull(skb, IGB_TS_HDR_LEN); - } else { - if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) - return; + if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) + return; - regval = rd32(E1000_RXSTMPL); - regval |= (u64)rd32(E1000_RXSTMPH) << 32; - } + regval = rd32(E1000_RXSTMPL); + regval |= (u64)rd32(E1000_RXSTMPH) << 32; igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); } diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 0ac11f527a8..4051ec40461 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -184,6 +184,13 @@ static void igbvf_alloc_rx_buffers(struct igbvf_ring *rx_ring, buffer_info->page_offset, PAGE_SIZE / 2, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, + buffer_info->page_dma)) { + __free_page(buffer_info->page); + buffer_info->page = NULL; + dev_err(&pdev->dev, "RX DMA map failed\n"); + break; + } } if (!buffer_info->skb) { @@ -197,6 +204,12 @@ static void igbvf_alloc_rx_buffers(struct igbvf_ring *rx_ring, buffer_info->dma = dma_map_single(&pdev->dev, skb->data, bufsz, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, buffer_info->dma)) { + dev_kfree_skb(buffer_info->skb); + buffer_info->skb = NULL; + dev_err(&pdev->dev, "RX DMA map failed\n"); + goto no_buffers; + } } /* Refresh the desc even if buffer_addrs didn't change because * each write-back erases this info. */ diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index 89f40e51fc1..f3a632bf8d9 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -34,11 +34,10 @@ obj-$(CONFIG_IXGBE) += ixgbe.o ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o ixgbe_debugfs.o\ ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \ - ixgbe_mbx.o ixgbe_x540.o ixgbe_lib.o + ixgbe_mbx.o ixgbe_x540.o ixgbe_lib.o ixgbe_ptp.o ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \ ixgbe_dcb_82599.o ixgbe_dcb_nl.o -ixgbe-$(CONFIG_IXGBE_PTP) += ixgbe_ptp.o ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 30efc9f0f47..7ff4c4fdcb0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -36,11 +36,9 @@ #include <linux/aer.h> #include <linux/if_vlan.h> -#ifdef CONFIG_IXGBE_PTP #include <linux/clocksource.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> -#endif /* CONFIG_IXGBE_PTP */ #include "ixgbe_type.h" #include "ixgbe_common.h" @@ -135,6 +133,7 @@ struct vf_data_storage { u16 tx_rate; u16 vlan_count; u8 spoofchk_enabled; + unsigned int vf_api; }; struct vf_macvlans { @@ -482,7 +481,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_FDIR_REQUIRES_REINIT (u32)(1 << 7) #define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP (u32)(1 << 8) #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP (u32)(1 << 9) -#define IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED (u32)(1 << 10) +#define IXGBE_FLAG2_PTP_ENABLED (u32)(1 << 10) #define IXGBE_FLAG2_PTP_PPS_ENABLED (u32)(1 << 11) /* Tx fast path data */ @@ -571,7 +570,6 @@ struct ixgbe_adapter { u32 interrupt_event; u32 led_reg; -#ifdef CONFIG_IXGBE_PTP struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; unsigned long last_overflow_check; @@ -580,8 +578,6 @@ struct ixgbe_adapter { struct timecounter tc; int rx_hwtstamp_filter; u32 base_incval; - u32 cycle_speed; -#endif /* CONFIG_IXGBE_PTP */ /* SR-IOV */ DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS); @@ -600,6 +596,8 @@ struct ixgbe_adapter { #ifdef CONFIG_DEBUG_FS struct dentry *ixgbe_dbg_adapter; #endif /*CONFIG_DEBUG_FS*/ + + u8 default_up; }; struct ixgbe_fdir_filter { @@ -691,6 +689,7 @@ extern s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, u16 soft_id); extern void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, union ixgbe_atr_input *mask); +extern bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw); extern void ixgbe_set_rx_mode(struct net_device *netdev); #ifdef CONFIG_IXGBE_DCB extern void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter); @@ -739,7 +738,6 @@ static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring) return netdev_get_tx_queue(ring->netdev, ring->queue_index); } -#ifdef CONFIG_IXGBE_PTP extern void ixgbe_ptp_init(struct ixgbe_adapter *adapter); extern void ixgbe_ptp_stop(struct ixgbe_adapter *adapter); extern void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter); @@ -751,7 +749,7 @@ extern void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, extern int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter, struct ifreq *ifr, int cmd); extern void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter); +extern void ixgbe_ptp_reset(struct ixgbe_adapter *adapter); extern void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr); -#endif /* CONFIG_IXGBE_PTP */ #endif /* _IXGBE_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index 1077cb2b38d..e75f5a4a2a6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -62,7 +62,6 @@ static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw, bool autoneg, bool autoneg_wait_to_complete); static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw); -static bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw); static void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw) { @@ -99,9 +98,8 @@ static void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw) static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) { s32 ret_val = 0; - u32 reg_anlp1 = 0; - u32 i = 0; u16 list_offset, data_offset, data_value; + bool got_lock = false; if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) { ixgbe_init_mac_link_ops_82599(hw); @@ -137,28 +135,36 @@ static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) usleep_range(hw->eeprom.semaphore_delay * 1000, hw->eeprom.semaphore_delay * 2000); - /* Now restart DSP by setting Restart_AN and clearing LMS */ - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, ((IXGBE_READ_REG(hw, - IXGBE_AUTOC) & ~IXGBE_AUTOC_LMS_MASK) | - IXGBE_AUTOC_AN_RESTART)); - - /* Wait for AN to leave state 0 */ - for (i = 0; i < 10; i++) { - usleep_range(4000, 8000); - reg_anlp1 = IXGBE_READ_REG(hw, IXGBE_ANLP1); - if (reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK) - break; + /* Need SW/FW semaphore around AUTOC writes if LESM on, + * likewise reset_pipeline requires lock as it also writes + * AUTOC. + */ + if (ixgbe_verify_lesm_fw_enabled_82599(hw)) { + ret_val = hw->mac.ops.acquire_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + if (ret_val) + goto setup_sfp_out; + + got_lock = true; + } + + /* Restart DSP and set SFI mode */ + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (IXGBE_READ_REG(hw, + IXGBE_AUTOC) | IXGBE_AUTOC_LMS_10G_SERIAL)); + + ret_val = ixgbe_reset_pipeline_82599(hw); + + if (got_lock) { + hw->mac.ops.release_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + got_lock = false; } - if (!(reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)) { - hw_dbg(hw, "sfp module setup not complete\n"); + + if (ret_val) { + hw_dbg(hw, " sfp module setup not complete\n"); ret_val = IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; goto setup_sfp_out; } - - /* Restart DSP by setting Restart_AN and return to SFI mode */ - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (IXGBE_READ_REG(hw, - IXGBE_AUTOC) | IXGBE_AUTOC_LMS_10G_SERIAL | - IXGBE_AUTOC_AN_RESTART)); } setup_sfp_out: @@ -394,14 +400,26 @@ static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, u32 links_reg; u32 i; s32 status = 0; + bool got_lock = false; + + if (ixgbe_verify_lesm_fw_enabled_82599(hw)) { + status = hw->mac.ops.acquire_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + if (status) + goto out; + + got_lock = true; + } /* Restart link */ - autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); - autoc_reg |= IXGBE_AUTOC_AN_RESTART; - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + ixgbe_reset_pipeline_82599(hw); + + if (got_lock) + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); /* Only poll for autoneg to complete if specified to do so */ if (autoneg_wait_to_complete) { + autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) == IXGBE_AUTOC_LMS_KX4_KX_KR || (autoc_reg & IXGBE_AUTOC_LMS_MASK) == @@ -425,6 +443,7 @@ static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, /* Add delay to filter out noises during initial link setup */ msleep(50); +out: return status; } @@ -779,6 +798,7 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, u32 links_reg; u32 i; ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN; + bool got_lock = false; /* Check to see if speed passed in is supported. */ status = hw->mac.ops.get_link_capabilities(hw, &link_capabilities, @@ -836,9 +856,26 @@ static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, } if (autoc != start_autoc) { + /* Need SW/FW semaphore around AUTOC writes if LESM is on, + * likewise reset_pipeline requires us to hold this lock as + * it also writes to AUTOC. + */ + if (ixgbe_verify_lesm_fw_enabled_82599(hw)) { + status = hw->mac.ops.acquire_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + if (status != 0) + goto out; + + got_lock = true; + } + /* Restart link */ - autoc |= IXGBE_AUTOC_AN_RESTART; IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc); + ixgbe_reset_pipeline_82599(hw); + + if (got_lock) + hw->mac.ops.release_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); /* Only poll for autoneg to complete if specified to do so */ if (autoneg_wait_to_complete) { @@ -994,9 +1031,28 @@ mac_reset_top: hw->mac.orig_autoc2 = autoc2; hw->mac.orig_link_settings_stored = true; } else { - if (autoc != hw->mac.orig_autoc) - IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc | - IXGBE_AUTOC_AN_RESTART)); + if (autoc != hw->mac.orig_autoc) { + /* Need SW/FW semaphore around AUTOC writes if LESM is + * on, likewise reset_pipeline requires us to hold + * this lock as it also writes to AUTOC. + */ + bool got_lock = false; + if (ixgbe_verify_lesm_fw_enabled_82599(hw)) { + status = hw->mac.ops.acquire_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + if (status) + goto reset_hw_out; + + got_lock = true; + } + + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc); + ixgbe_reset_pipeline_82599(hw); + + if (got_lock) + hw->mac.ops.release_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + } if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) != (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) { @@ -1983,7 +2039,7 @@ fw_version_out: * Returns true if the LESM FW module is present and enabled. Otherwise * returns false. Smart Speed must be disabled if LESM FW module is enabled. **/ -static bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw) +bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw) { bool lesm_enabled = false; u16 fw_offset, fw_lesm_param_offset, fw_lesm_state; @@ -2080,6 +2136,50 @@ static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw, return ret_val; } +/** + * ixgbe_reset_pipeline_82599 - perform pipeline reset + * + * @hw: pointer to hardware structure + * + * Reset pipeline by asserting Restart_AN together with LMS change to ensure + * full pipeline reset. Note - We must hold the SW/FW semaphore before writing + * to AUTOC, so this function assumes the semaphore is held. + **/ +s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw) +{ + s32 i, autoc_reg, ret_val; + s32 anlp1_reg = 0; + + autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + autoc_reg |= IXGBE_AUTOC_AN_RESTART; + + /* Write AUTOC register with toggled LMS[2] bit and Restart_AN */ + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg ^ IXGBE_AUTOC_LMS_1G_AN); + + /* Wait for AN to leave state 0 */ + for (i = 0; i < 10; i++) { + usleep_range(4000, 8000); + anlp1_reg = IXGBE_READ_REG(hw, IXGBE_ANLP1); + if (anlp1_reg & IXGBE_ANLP1_AN_STATE_MASK) + break; + } + + if (!(anlp1_reg & IXGBE_ANLP1_AN_STATE_MASK)) { + hw_dbg(hw, "auto negotiation not completed\n"); + ret_val = IXGBE_ERR_RESET_FAILED; + goto reset_pipeline_out; + } + + ret_val = 0; + +reset_pipeline_out: + /* Write AUTOC register with original LMS field and Restart_AN */ + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + IXGBE_WRITE_FLUSH(hw); + + return ret_val; +} + static struct ixgbe_mac_operations mac_ops_82599 = { .init_hw = &ixgbe_init_hw_generic, .reset_hw = &ixgbe_reset_hw_82599, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index dbf37e4a45f..8f285edb509 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -90,6 +90,7 @@ static s32 ixgbe_setup_fc(struct ixgbe_hw *hw) s32 ret_val = 0; u32 reg = 0, reg_bp = 0; u16 reg_cu = 0; + bool got_lock = false; /* * Validate the requested mode. Strict IEEE mode does not allow @@ -210,8 +211,29 @@ static s32 ixgbe_setup_fc(struct ixgbe_hw *hw) * */ if (hw->phy.media_type == ixgbe_media_type_backplane) { - reg_bp |= IXGBE_AUTOC_AN_RESTART; + /* Need the SW/FW semaphore around AUTOC writes if 82599 and + * LESM is on, likewise reset_pipeline requries the lock as + * it also writes AUTOC. + */ + if ((hw->mac.type == ixgbe_mac_82599EB) && + ixgbe_verify_lesm_fw_enabled_82599(hw)) { + ret_val = hw->mac.ops.acquire_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + if (ret_val) + goto out; + + got_lock = true; + } + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_bp); + + if (hw->mac.type == ixgbe_mac_82599EB) + ixgbe_reset_pipeline_82599(hw); + + if (got_lock) + hw->mac.ops.release_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + } else if ((hw->phy.media_type == ixgbe_media_type_copper) && (ixgbe_device_supports_autoneg_fc(hw) == 0)) { hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE, @@ -1778,8 +1800,7 @@ s32 ixgbe_validate_mac_addr(u8 *mac_addr) else if (IXGBE_IS_BROADCAST(mac_addr)) status = IXGBE_ERR_INVALID_MAC_ADDR; /* Reject the zero address */ - else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 && - mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) + else if (is_zero_ether_addr(mac_addr)) status = IXGBE_ERR_INVALID_MAC_ADDR; return status; @@ -2617,6 +2638,7 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) bool link_up = false; u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + s32 ret_val = 0; /* * Link must be up to auto-blink the LEDs; @@ -2625,10 +2647,28 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) hw->mac.ops.check_link(hw, &speed, &link_up, false); if (!link_up) { + /* Need the SW/FW semaphore around AUTOC writes if 82599 and + * LESM is on. + */ + bool got_lock = false; + + if ((hw->mac.type == ixgbe_mac_82599EB) && + ixgbe_verify_lesm_fw_enabled_82599(hw)) { + ret_val = hw->mac.ops.acquire_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + if (ret_val) + goto out; + + got_lock = true; + } autoc_reg |= IXGBE_AUTOC_AN_RESTART; autoc_reg |= IXGBE_AUTOC_FLU; IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); IXGBE_WRITE_FLUSH(hw); + + if (got_lock) + hw->mac.ops.release_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); usleep_range(10000, 20000); } @@ -2637,7 +2677,8 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); IXGBE_WRITE_FLUSH(hw); - return 0; +out: + return ret_val; } /** @@ -2649,18 +2690,40 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) { u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + s32 ret_val = 0; + bool got_lock = false; + + /* Need the SW/FW semaphore around AUTOC writes if 82599 and + * LESM is on. + */ + if ((hw->mac.type == ixgbe_mac_82599EB) && + ixgbe_verify_lesm_fw_enabled_82599(hw)) { + ret_val = hw->mac.ops.acquire_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + if (ret_val) + goto out; + + got_lock = true; + } autoc_reg &= ~IXGBE_AUTOC_FLU; autoc_reg |= IXGBE_AUTOC_AN_RESTART; IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + if (hw->mac.type == ixgbe_mac_82599EB) + ixgbe_reset_pipeline_82599(hw); + + if (got_lock) + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); + led_reg &= ~IXGBE_LED_MODE_MASK(index); led_reg &= ~IXGBE_LED_BLINK(index); led_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index); IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); IXGBE_WRITE_FLUSH(hw); - return 0; +out: + return ret_val; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h index d813d1188c3..587db472807 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h @@ -107,6 +107,7 @@ void ixgbe_clear_tx_pending(struct ixgbe_hw *hw); void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom, int strategy); +s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw); #define IXGBE_I2C_THERMAL_SENSOR_ADDR 0xF8 #define IXGBE_EMC_INTERNAL_DATA 0x00 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 116f0e901be..a545728e100 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -887,24 +887,23 @@ static int ixgbe_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_ring *temp_tx_ring, *temp_rx_ring; + struct ixgbe_ring *temp_ring; int i, err = 0; u32 new_rx_count, new_tx_count; - bool need_update = false; if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; - new_rx_count = max_t(u32, ring->rx_pending, IXGBE_MIN_RXD); - new_rx_count = min_t(u32, new_rx_count, IXGBE_MAX_RXD); - new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE); - - new_tx_count = max_t(u32, ring->tx_pending, IXGBE_MIN_TXD); - new_tx_count = min_t(u32, new_tx_count, IXGBE_MAX_TXD); + new_tx_count = clamp_t(u32, ring->tx_pending, + IXGBE_MIN_TXD, IXGBE_MAX_TXD); new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE); - if ((new_tx_count == adapter->tx_ring[0]->count) && - (new_rx_count == adapter->rx_ring[0]->count)) { + new_rx_count = clamp_t(u32, ring->rx_pending, + IXGBE_MIN_RXD, IXGBE_MAX_RXD); + new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE); + + if ((new_tx_count == adapter->tx_ring_count) && + (new_rx_count == adapter->rx_ring_count)) { /* nothing to do */ return 0; } @@ -922,81 +921,80 @@ static int ixgbe_set_ringparam(struct net_device *netdev, goto clear_reset; } - temp_tx_ring = vmalloc(adapter->num_tx_queues * sizeof(struct ixgbe_ring)); - if (!temp_tx_ring) { + /* allocate temporary buffer to store rings in */ + i = max_t(int, adapter->num_tx_queues, adapter->num_rx_queues); + temp_ring = vmalloc(i * sizeof(struct ixgbe_ring)); + + if (!temp_ring) { err = -ENOMEM; goto clear_reset; } + ixgbe_down(adapter); + + /* + * Setup new Tx resources and free the old Tx resources in that order. + * We can then assign the new resources to the rings via a memcpy. + * The advantage to this approach is that we are guaranteed to still + * have resources even in the case of an allocation failure. + */ if (new_tx_count != adapter->tx_ring_count) { for (i = 0; i < adapter->num_tx_queues; i++) { - memcpy(&temp_tx_ring[i], adapter->tx_ring[i], + memcpy(&temp_ring[i], adapter->tx_ring[i], sizeof(struct ixgbe_ring)); - temp_tx_ring[i].count = new_tx_count; - err = ixgbe_setup_tx_resources(&temp_tx_ring[i]); + + temp_ring[i].count = new_tx_count; + err = ixgbe_setup_tx_resources(&temp_ring[i]); if (err) { while (i) { i--; - ixgbe_free_tx_resources(&temp_tx_ring[i]); + ixgbe_free_tx_resources(&temp_ring[i]); } - goto clear_reset; + goto err_setup; } } - need_update = true; - } - temp_rx_ring = vmalloc(adapter->num_rx_queues * sizeof(struct ixgbe_ring)); - if (!temp_rx_ring) { - err = -ENOMEM; - goto err_setup; + for (i = 0; i < adapter->num_tx_queues; i++) { + ixgbe_free_tx_resources(adapter->tx_ring[i]); + + memcpy(adapter->tx_ring[i], &temp_ring[i], + sizeof(struct ixgbe_ring)); + } + + adapter->tx_ring_count = new_tx_count; } + /* Repeat the process for the Rx rings if needed */ if (new_rx_count != adapter->rx_ring_count) { for (i = 0; i < adapter->num_rx_queues; i++) { - memcpy(&temp_rx_ring[i], adapter->rx_ring[i], + memcpy(&temp_ring[i], adapter->rx_ring[i], sizeof(struct ixgbe_ring)); - temp_rx_ring[i].count = new_rx_count; - err = ixgbe_setup_rx_resources(&temp_rx_ring[i]); + + temp_ring[i].count = new_rx_count; + err = ixgbe_setup_rx_resources(&temp_ring[i]); if (err) { while (i) { i--; - ixgbe_free_rx_resources(&temp_rx_ring[i]); + ixgbe_free_rx_resources(&temp_ring[i]); } goto err_setup; } + } - need_update = true; - } - /* if rings need to be updated, here's the place to do it in one shot */ - if (need_update) { - ixgbe_down(adapter); + for (i = 0; i < adapter->num_rx_queues; i++) { + ixgbe_free_rx_resources(adapter->rx_ring[i]); - /* tx */ - if (new_tx_count != adapter->tx_ring_count) { - for (i = 0; i < adapter->num_tx_queues; i++) { - ixgbe_free_tx_resources(adapter->tx_ring[i]); - memcpy(adapter->tx_ring[i], &temp_tx_ring[i], - sizeof(struct ixgbe_ring)); - } - adapter->tx_ring_count = new_tx_count; + memcpy(adapter->rx_ring[i], &temp_ring[i], + sizeof(struct ixgbe_ring)); } - /* rx */ - if (new_rx_count != adapter->rx_ring_count) { - for (i = 0; i < adapter->num_rx_queues; i++) { - ixgbe_free_rx_resources(adapter->rx_ring[i]); - memcpy(adapter->rx_ring[i], &temp_rx_ring[i], - sizeof(struct ixgbe_ring)); - } - adapter->rx_ring_count = new_rx_count; - } - ixgbe_up(adapter); + adapter->rx_ring_count = new_rx_count; } - vfree(temp_rx_ring); err_setup: - vfree(temp_tx_ring); + ixgbe_up(adapter); + vfree(temp_ring); clear_reset: clear_bit(__IXGBE_RESETTING, &adapter->state); return err; @@ -2669,7 +2667,6 @@ static int ixgbe_get_ts_info(struct net_device *dev, struct ixgbe_adapter *adapter = netdev_priv(dev); switch (adapter->hw.mac.type) { -#ifdef CONFIG_IXGBE_PTP case ixgbe_mac_X540: case ixgbe_mac_82599EB: info->so_timestamping = @@ -2695,7 +2692,6 @@ static int ixgbe_get_ts_info(struct net_device *dev, (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); break; -#endif /* CONFIG_IXGBE_PTP */ default: return ethtool_op_get_ts_info(dev, info); break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index ae73ef14fdf..252850d9a3e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -800,6 +800,10 @@ int ixgbe_fcoe_enable(struct net_device *netdev) return -EINVAL; e_info(drv, "Enabling FCoE offload features.\n"); + + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + e_warn(probe, "Enabling FCoE on PF will disable legacy VFs\n"); + if (netif_running(netdev)) netdev->netdev_ops->ndo_stop(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 17ecbcedd54..8c74f739011 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -802,10 +802,13 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, /* setup affinity mask and node */ if (cpu != -1) cpumask_set_cpu(cpu, &q_vector->affinity_mask); - else - cpumask_copy(&q_vector->affinity_mask, cpu_online_mask); q_vector->numa_node = node; +#ifdef CONFIG_IXGBE_DCA + /* initialize CPU for DCA */ + q_vector->cpu = -1; + +#endif /* initialize NAPI */ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbe_poll, 64); @@ -821,6 +824,21 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, /* initialize pointer to rings */ ring = q_vector->ring; + /* intialize ITR */ + if (txr_count && !rxr_count) { + /* tx only vector */ + if (adapter->tx_itr_setting == 1) + q_vector->itr = IXGBE_10K_ITR; + else + q_vector->itr = adapter->tx_itr_setting; + } else { + /* rx or rx/tx vector */ + if (adapter->rx_itr_setting == 1) + q_vector->itr = IXGBE_20K_ITR; + else + q_vector->itr = adapter->rx_itr_setting; + } + while (txr_count) { /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index fa3d552e1f4..690535a0322 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -44,6 +44,7 @@ #include <linux/ethtool.h> #include <linux/if.h> #include <linux/if_vlan.h> +#include <linux/if_bridge.h> #include <linux/prefetch.h> #include <scsi/fc/fc_fcoe.h> @@ -355,13 +356,37 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) /* Transmit Descriptor Formats * - * Advanced Transmit Descriptor + * 82598 Advanced Transmit Descriptor * +--------------------------------------------------------------+ * 0 | Buffer Address [63:0] | * +--------------------------------------------------------------+ - * 8 | PAYLEN | PORTS | IDX | STA | DCMD |DTYP | RSV | DTALEN | + * 8 | PAYLEN | POPTS | IDX | STA | DCMD |DTYP | RSV | DTALEN | * +--------------------------------------------------------------+ * 63 46 45 40 39 36 35 32 31 24 23 20 19 0 + * + * 82598 Advanced Transmit Descriptor (Write-Back Format) + * +--------------------------------------------------------------+ + * 0 | RSV [63:0] | + * +--------------------------------------------------------------+ + * 8 | RSV | STA | NXTSEQ | + * +--------------------------------------------------------------+ + * 63 36 35 32 31 0 + * + * 82599+ Advanced Transmit Descriptor + * +--------------------------------------------------------------+ + * 0 | Buffer Address [63:0] | + * +--------------------------------------------------------------+ + * 8 |PAYLEN |POPTS|CC|IDX |STA |DCMD |DTYP |MAC |RSV |DTALEN | + * +--------------------------------------------------------------+ + * 63 46 45 40 39 38 36 35 32 31 24 23 20 19 18 17 16 15 0 + * + * 82599+ Advanced Transmit Descriptor (Write-Back Format) + * +--------------------------------------------------------------+ + * 0 | RSV [63:0] | + * +--------------------------------------------------------------+ + * 8 | RSV | STA | RSV | + * +--------------------------------------------------------------+ + * 63 36 35 32 31 0 */ for (n = 0; n < adapter->num_tx_queues; n++) { @@ -422,7 +447,9 @@ rx_ring_summary: dev_info(&adapter->pdev->dev, "RX Rings Dump\n"); - /* Advanced Receive Descriptor (Read) Format + /* Receive Descriptor Formats + * + * 82598 Advanced Receive Descriptor (Read) Format * 63 1 0 * +-----------------------------------------------------+ * 0 | Packet Buffer Address [63:1] |A0/NSE| @@ -431,17 +458,40 @@ rx_ring_summary: * +-----------------------------------------------------+ * * - * Advanced Receive Descriptor (Write-Back) Format + * 82598 Advanced Receive Descriptor (Write-Back) Format * * 63 48 47 32 31 30 21 20 16 15 4 3 0 * +------------------------------------------------------+ - * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS | - * | Checksum Ident | | | | Type | Type | + * 0 | RSS Hash / |SPH| HDR_LEN | RSV |Packet| RSS | + * | Packet | IP | | | | Type | Type | + * | Checksum | Ident | | | | | | * +------------------------------------------------------+ * 8 | VLAN Tag | Length | Extended Error | Extended Status | * +------------------------------------------------------+ * 63 48 47 32 31 20 19 0 + * + * 82599+ Advanced Receive Descriptor (Read) Format + * 63 1 0 + * +-----------------------------------------------------+ + * 0 | Packet Buffer Address [63:1] |A0/NSE| + * +----------------------------------------------+------+ + * 8 | Header Buffer Address [63:1] | DD | + * +-----------------------------------------------------+ + * + * + * 82599+ Advanced Receive Descriptor (Write-Back) Format + * + * 63 48 47 32 31 30 21 20 17 16 4 3 0 + * +------------------------------------------------------+ + * 0 |RSS / Frag Checksum|SPH| HDR_LEN |RSC- |Packet| RSS | + * |/ RTT / PCoE_PARAM | | | CNT | Type | Type | + * |/ Flow Dir Flt ID | | | | | | + * +------------------------------------------------------+ + * 8 | VLAN Tag | Length |Extended Error| Xtnd Status/NEXTP | + * +------------------------------------------------------+ + * 63 48 47 32 31 20 19 0 */ + for (n = 0; n < adapter->num_rx_queues; n++) { rx_ring = adapter->rx_ring[n]; pr_info("------------------------------------\n"); @@ -791,10 +841,8 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; -#ifdef CONFIG_IXGBE_PTP if (unlikely(tx_buffer->tx_flags & IXGBE_TX_FLAGS_TSTAMP)) ixgbe_ptp_tx_hwtstamp(q_vector, tx_buffer->skb); -#endif /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); @@ -1244,6 +1292,7 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, struct vlan_hdr *vlan; /* l3 headers */ struct iphdr *ipv4; + struct ipv6hdr *ipv6; } hdr; __be16 protocol; u8 nexthdr = 0; /* default to not TCP */ @@ -1284,6 +1333,13 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, /* record next protocol */ nexthdr = hdr.ipv4->protocol; hdr.network += hlen; + } else if (protocol == __constant_htons(ETH_P_IPV6)) { + if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) + return max_len; + + /* record next protocol */ + nexthdr = hdr.ipv6->nexthdr; + hdr.network += sizeof(struct ipv6hdr); #ifdef IXGBE_FCOE } else if (protocol == __constant_htons(ETH_P_FCOE)) { if ((hdr.network - data) > (max_len - FCOE_HEADER_LEN)) @@ -1294,7 +1350,7 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, return hdr.network - data; } - /* finally sort out TCP */ + /* finally sort out TCP/UDP */ if (nexthdr == IPPROTO_TCP) { if ((hdr.network - data) > (max_len - sizeof(struct tcphdr))) return max_len; @@ -1307,6 +1363,11 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, return hdr.network - data; hdr.network += hlen; + } else if (nexthdr == IPPROTO_UDP) { + if ((hdr.network - data) > (max_len - sizeof(struct udphdr))) + return max_len; + + hdr.network += sizeof(struct udphdr); } /* @@ -1369,9 +1430,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_rx_checksum(rx_ring, rx_desc, skb); -#ifdef CONFIG_IXGBE_PTP ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector, rx_desc, skb); -#endif if ((dev->features & NETIF_F_HW_VLAN_RX) && ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { @@ -1781,7 +1840,7 @@ dma_sync: **/ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *rx_ring, - int budget) + const int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; #ifdef IXGBE_FCOE @@ -1832,7 +1891,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; - total_rx_packets++; /* populate checksum, timestamp, VLAN, and protocol */ ixgbe_process_skb_fields(rx_ring, rx_desc, skb); @@ -1865,8 +1923,8 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, ixgbe_rx_skb(q_vector, skb); /* update budget accounting */ - budget--; - } while (likely(budget)); + total_rx_packets++; + } while (likely(total_rx_packets < budget)); u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; @@ -1878,7 +1936,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (cleaned_count) ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); - return !!budget; + return (total_rx_packets < budget); } /** @@ -1914,20 +1972,6 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) ixgbe_for_each_ring(ring, q_vector->tx) ixgbe_set_ivar(adapter, 1, ring->reg_idx, v_idx); - if (q_vector->tx.ring && !q_vector->rx.ring) { - /* tx only vector */ - if (adapter->tx_itr_setting == 1) - q_vector->itr = IXGBE_10K_ITR; - else - q_vector->itr = adapter->tx_itr_setting; - } else { - /* rx or rx/tx vector */ - if (adapter->rx_itr_setting == 1) - q_vector->itr = IXGBE_20K_ITR; - else - q_vector->itr = adapter->rx_itr_setting; - } - ixgbe_write_eitr(q_vector); } @@ -2324,10 +2368,8 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues, break; } -#ifdef CONFIG_IXGBE_PTP if (adapter->hw.mac.type == ixgbe_mac_X540) mask |= IXGBE_EIMS_TIMESYNC; -#endif if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) && !(adapter->flags2 & IXGBE_FLAG2_FDIR_REQUIRES_REINIT)) @@ -2393,10 +2435,8 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); -#ifdef CONFIG_IXGBE_PTP if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) ixgbe_ptp_check_pps_event(adapter, eicr); -#endif /* re-enable the original interrupt state, no lsc, no queues */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) @@ -2588,10 +2628,8 @@ static irqreturn_t ixgbe_intr(int irq, void *data) } ixgbe_check_fan_failure(adapter, eicr); -#ifdef CONFIG_IXGBE_PTP if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) ixgbe_ptp_check_pps_event(adapter, eicr); -#endif /* would disable interrupts here but EIAM disabled it */ napi_schedule(&q_vector->napi); @@ -2699,12 +2737,6 @@ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter) { struct ixgbe_q_vector *q_vector = adapter->q_vector[0]; - /* rx/tx vector */ - if (adapter->rx_itr_setting == 1) - q_vector->itr = IXGBE_20K_ITR; - else - q_vector->itr = adapter->rx_itr_setting; - ixgbe_write_eitr(q_vector); ixgbe_set_ivar(adapter, 0, 0, 0); @@ -3211,7 +3243,6 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset ^ 1), reg_offset - 1); IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), (~0) << vf_shift); IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset ^ 1), reg_offset - 1); - IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); /* Map PF MAC address in RAR Entry 0 to first pool following VFs */ hw->mac.ops.set_vmdq(hw, 0, VMDQ_P(0)); @@ -3234,8 +3265,6 @@ static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); - /* enable Tx loopback for VF/PF communication */ - IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); /* Enable MAC Anti-Spoofing */ hw->mac.ops.set_mac_anti_spoofing(hw, (adapter->num_vfs != 0), @@ -3263,6 +3292,11 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) max_frame = IXGBE_FCOE_JUMBO_FRAME_SIZE; #endif /* IXGBE_FCOE */ + + /* adjust max frame to be at least the size of a standard frame */ + if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) + max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN); + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); if (max_frame != (mhadd >> IXGBE_MHADD_MFS_SHIFT)) { mhadd &= ~IXGBE_MHADD_MFS_MASK; @@ -3271,9 +3305,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } - /* MHADD will allow an extra 4 bytes past for vlan tagged frames */ - max_frame += VLAN_HLEN; - hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); /* set jumbo enable since MHADD.MFS is keeping size locked at max_frame */ hlreg0 |= IXGBE_HLREG0_JUMBOEN; @@ -4072,11 +4103,8 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) else ixgbe_configure_msi_and_legacy(adapter); - /* enable the optics for both mult-speed fiber and 82599 SFP+ fiber */ - if (hw->mac.ops.enable_tx_laser && - ((hw->phy.multispeed_fiber) || - ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && - (hw->mac.type == ixgbe_mac_82599EB)))) + /* enable the optics for 82599 SFP+ fiber */ + if (hw->mac.ops.enable_tx_laser) hw->mac.ops.enable_tx_laser(hw); clear_bit(__IXGBE_DOWN, &adapter->state); @@ -4192,6 +4220,9 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) /* update SAN MAC vmdq pool selection */ if (hw->mac.san_mac_rar_index) hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0)); + + if (adapter->flags2 & IXGBE_FLAG2_PTP_ENABLED) + ixgbe_ptp_reset(adapter); } /** @@ -4393,11 +4424,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) if (!pci_channel_offline(adapter->pdev)) ixgbe_reset(adapter); - /* power down the optics for multispeed fiber and 82599 SFP+ fiber */ - if (hw->mac.ops.disable_tx_laser && - ((hw->phy.multispeed_fiber) || - ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && - (hw->mac.type == ixgbe_mac_82599EB)))) + /* power down the optics for 82599 SFP+ fiber */ + if (hw->mac.ops.disable_tx_laser) hw->mac.ops.disable_tx_laser(hw); ixgbe_clean_all_tx_rings(adapter); @@ -4828,14 +4856,14 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; /* - * For 82599EB we cannot allow PF to change MTU greater than 1500 - * in SR-IOV mode as it may cause buffer overruns in guest VFs that - * don't allocate and chain buffers correctly. + * For 82599EB we cannot allow legacy VFs to enable their receive + * paths when MTU greater than 1500 is configured. So display a + * warning that legacy VFs will be disabled. */ if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (adapter->hw.mac.type == ixgbe_mac_82599EB) && (max_frame > MAXIMUM_ETHERNET_VLAN_SIZE)) - return -EINVAL; + e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n"); e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); @@ -4901,6 +4929,8 @@ static int ixgbe_open(struct net_device *netdev) if (err) goto err_set_queues; + ixgbe_ptp_init(adapter); + ixgbe_up_complete(adapter); return 0; @@ -4932,6 +4962,8 @@ static int ixgbe_close(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); + ixgbe_ptp_stop(adapter); + ixgbe_down(adapter); ixgbe_free_irq(adapter); @@ -5022,14 +5054,8 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) if (wufc) { ixgbe_set_rx_mode(netdev); - /* - * enable the optics for both mult-speed fiber and - * 82599 SFP+ fiber as we can WoL. - */ - if (hw->mac.ops.enable_tx_laser && - (hw->phy.multispeed_fiber || - (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber && - hw->mac.type == ixgbe_mac_82599EB))) + /* enable the optics for 82599 SFP+ fiber as we can WoL */ + if (hw->mac.ops.enable_tx_laser) hw->mac.ops.enable_tx_laser(hw); /* turn on all-multi mode if wake on multicast is enabled */ @@ -5442,6 +5468,23 @@ static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter) adapter->link_speed = link_speed; } +static void ixgbe_update_default_up(struct ixgbe_adapter *adapter) +{ +#ifdef CONFIG_IXGBE_DCB + struct net_device *netdev = adapter->netdev; + struct dcb_app app = { + .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE, + .protocol = 0, + }; + u8 up = 0; + + if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) + up = dcb_ieee_getapp_mask(netdev, &app); + + adapter->default_up = (up > 1) ? (ffs(up) - 1) : 0; +#endif +} + /** * ixgbe_watchdog_link_is_up - update netif_carrier status and * print link up message @@ -5482,9 +5525,8 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) break; } -#ifdef CONFIG_IXGBE_PTP - ixgbe_ptp_start_cyclecounter(adapter); -#endif + if (adapter->flags2 & IXGBE_FLAG2_PTP_ENABLED) + ixgbe_ptp_start_cyclecounter(adapter); e_info(drv, "NIC Link is Up %s, Flow Control: %s\n", (link_speed == IXGBE_LINK_SPEED_10GB_FULL ? @@ -5501,6 +5543,9 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) netif_carrier_on(netdev); ixgbe_check_vf_rate_limit(adapter); + /* update the default user priority for VFs */ + ixgbe_update_default_up(adapter); + /* ping all the active vfs to let them know link has changed */ ixgbe_ping_all_vfs(adapter); } @@ -5526,9 +5571,8 @@ static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *adapter) if (ixgbe_is_sfp(hw) && hw->mac.type == ixgbe_mac_82598EB) adapter->flags2 |= IXGBE_FLAG2_SEARCH_FOR_SFP; -#ifdef CONFIG_IXGBE_PTP - ixgbe_ptp_start_cyclecounter(adapter); -#endif + if (adapter->flags2 & IXGBE_FLAG2_PTP_ENABLED) + ixgbe_ptp_start_cyclecounter(adapter); e_info(drv, "NIC Link is Down\n"); netif_carrier_off(netdev); @@ -5833,9 +5877,7 @@ static void ixgbe_service_task(struct work_struct *work) ixgbe_watchdog_subtask(adapter); ixgbe_fdir_reinit_subtask(adapter); ixgbe_check_hang_subtask(adapter); -#ifdef CONFIG_IXGBE_PTP ixgbe_ptp_overflow_check(adapter); -#endif ixgbe_service_event_complete(adapter); } @@ -5988,10 +6030,8 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) if (tx_flags & IXGBE_TX_FLAGS_HW_VLAN) cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_VLE); -#ifdef CONFIG_IXGBE_PTP if (tx_flags & IXGBE_TX_FLAGS_TSTAMP) cmd_type |= cpu_to_le32(IXGBE_ADVTXD_MAC_TSTAMP); -#endif /* set segmentation enable bits for TSO/FSO */ #ifdef IXGBE_FCOE @@ -6393,12 +6433,10 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, skb_tx_timestamp(skb); -#ifdef CONFIG_IXGBE_PTP if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IXGBE_TX_FLAGS_TSTAMP; } -#endif #ifdef CONFIG_PCI_IOV /* @@ -6485,6 +6523,7 @@ static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb, if (skb_pad(skb, 17 - skb->len)) return NETDEV_TX_OK; skb->len = 17; + skb_set_tail_pointer(skb, 17); } tx_ring = adapter->tx_ring[skb->queue_mapping]; @@ -6547,10 +6586,8 @@ static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) struct ixgbe_adapter *adapter = netdev_priv(netdev); switch (cmd) { -#ifdef CONFIG_IXGBE_PTP case SIOCSHWTSTAMP: return ixgbe_ptp_hwtstamp_ioctl(adapter, req, cmd); -#endif default: return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd); } @@ -6916,7 +6953,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (is_unicast_ether_addr(addr)) { + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { u32 rar_uc_entries = IXGBE_MAX_PF_MACVLANS; if (netdev_uc_count(dev) < rar_uc_entries) @@ -6974,6 +7011,59 @@ static int ixgbe_ndo_fdb_dump(struct sk_buff *skb, return idx; } +static int ixgbe_ndo_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct nlattr *attr, *br_spec; + int rem; + + if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) + return -EOPNOTSUPP; + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + + nla_for_each_nested(attr, br_spec, rem) { + __u16 mode; + u32 reg = 0; + + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + mode = nla_get_u16(attr); + if (mode == BRIDGE_MODE_VEPA) + reg = 0; + else if (mode == BRIDGE_MODE_VEB) + reg = IXGBE_PFDTXGSWC_VT_LBEN; + else + return -EINVAL; + + IXGBE_WRITE_REG(&adapter->hw, IXGBE_PFDTXGSWC, reg); + + e_info(drv, "enabling bridge mode: %s\n", + mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB"); + } + + return 0; +} + +static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + u16 mode; + + if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) + return 0; + + if (IXGBE_READ_REG(&adapter->hw, IXGBE_PFDTXGSWC) & 1) + mode = BRIDGE_MODE_VEB; + else + mode = BRIDGE_MODE_VEPA; + + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode); +} + static const struct net_device_ops ixgbe_netdev_ops = { .ndo_open = ixgbe_open, .ndo_stop = ixgbe_close, @@ -7013,6 +7103,8 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_fdb_add = ixgbe_ndo_fdb_add, .ndo_fdb_del = ixgbe_ndo_fdb_del, .ndo_fdb_dump = ixgbe_ndo_fdb_dump, + .ndo_bridge_setlink = ixgbe_ndo_bridge_setlink, + .ndo_bridge_getlink = ixgbe_ndo_bridge_getlink, }; /** @@ -7042,6 +7134,7 @@ int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, break; case IXGBE_SUBDEV_ID_82599_SFP: case IXGBE_SUBDEV_ID_82599_RNDC: + case IXGBE_SUBDEV_ID_82599_ECNA_DP: is_wol_supported = 1; break; } @@ -7364,10 +7457,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); -#ifdef CONFIG_IXGBE_PTP - ixgbe_ptp_init(adapter); -#endif /* CONFIG_IXGBE_PTP*/ - /* save off EEPROM version number */ hw->eeprom.ops.read(hw, 0x2e, &adapter->eeprom_verh); hw->eeprom.ops.read(hw, 0x2d, &adapter->eeprom_verl); @@ -7420,11 +7509,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, if (err) goto err_register; - /* power down the optics for multispeed fiber and 82599 SFP+ fiber */ - if (hw->mac.ops.disable_tx_laser && - ((hw->phy.multispeed_fiber) || - ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && - (hw->mac.type == ixgbe_mac_82599EB)))) + /* power down the optics for 82599 SFP+ fiber */ + if (hw->mac.ops.disable_tx_laser) hw->mac.ops.disable_tx_laser(hw); /* carrier off reporting is important to ethtool even BEFORE open */ @@ -7505,9 +7591,6 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) set_bit(__IXGBE_DOWN, &adapter->state); cancel_work_sync(&adapter->service_task); -#ifdef CONFIG_IXGBE_PTP - ixgbe_ptp_stop(adapter); -#endif #ifdef CONFIG_IXGBE_DCA if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h index 310bdd96107..42dd65e6ac9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h @@ -62,12 +62,39 @@ /* bits 23:16 are used for exra info for certain messages */ #define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT) +/* definitions to support mailbox API version negotiation */ + +/* + * Each element denotes a version of the API; existing numbers may not + * change; any additions must go at the end + */ +enum ixgbe_pfvf_api_rev { + ixgbe_mbox_api_10, /* API version 1.0, linux/freebsd VF driver */ + ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */ + ixgbe_mbox_api_11, /* API version 1.1, linux/freebsd VF driver */ + /* This value should always be last */ + ixgbe_mbox_api_unknown, /* indicates that API version is not known */ +}; + +/* mailbox API, legacy requests */ #define IXGBE_VF_RESET 0x01 /* VF requests reset */ #define IXGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */ #define IXGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */ #define IXGBE_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */ -#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ -#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ + +/* mailbox API, version 1.0 VF requests */ +#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ +#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ +#define IXGBE_VF_API_NEGOTIATE 0x08 /* negotiate API version */ + +/* mailbox API, version 1.1 VF requests */ +#define IXGBE_VF_GET_QUEUES 0x09 /* get queue configuration */ + +/* GET_QUEUES return data indices within the mailbox */ +#define IXGBE_VF_TX_QUEUES 1 /* number of Tx queues supported */ +#define IXGBE_VF_RX_QUEUES 2 /* number of Rx queues supported */ +#define IXGBE_VF_TRANS_VLAN 3 /* Indication of port vlan */ +#define IXGBE_VF_DEF_QUEUE 4 /* Default queue offset */ /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index d9291316ee9..01d99af0b9b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -387,6 +387,15 @@ void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr) struct ixgbe_hw *hw = &adapter->hw; struct ptp_clock_event event; + event.type = PTP_CLOCK_PPS; + + /* this check is necessary in case the interrupt was enabled via some + * alternative means (ex. debug_fs). Better to check here than + * everywhere that calls this function. + */ + if (!adapter->ptp_clock) + return; + switch (hw->mac.type) { case ixgbe_mac_X540: ptp_clock_event(adapter->ptp_clock, &event); @@ -411,7 +420,7 @@ void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter) unsigned long elapsed_jiffies = adapter->last_overflow_check - jiffies; struct timespec ts; - if ((adapter->flags2 & IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED) && + if ((adapter->flags2 & IXGBE_FLAG2_PTP_ENABLED) && (elapsed_jiffies >= IXGBE_OVERFLOW_PERIOD)) { ixgbe_ptp_gettime(&adapter->ptp_caps, &ts); adapter->last_overflow_check = jiffies; @@ -554,12 +563,14 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, adapter = q_vector->adapter; hw = &adapter->hw; + if (likely(!ixgbe_ptp_match(skb, adapter->rx_hwtstamp_filter))) + return; + tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); /* Check if we have a valid timestamp and make sure the skb should * have been timestamped */ - if (likely(!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID) || - !ixgbe_ptp_match(skb, adapter->rx_hwtstamp_filter))) + if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID)) return; /* @@ -759,58 +770,20 @@ int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter, * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw * @adapter: pointer to the adapter structure * - * this function initializes the timecounter and cyclecounter - * structures for use in generated a ns counter from the arbitrary - * fixed point cycles registers in the hardware. - * - * A change in link speed impacts the frequency of the DMA clock on - * the device, which is used to generate the cycle counter - * registers. Therefor this function is called whenever the link speed - * changes. - * - * This function also turns on the SDP pin for clock out feature (X540 - * only), because this is where the shift is first calculated. + * This function should be called to set the proper values for the TIMINCA + * register and tell the cyclecounter structure what the tick rate of SYSTIME + * is. It does not directly modify SYSTIME registers or the timecounter + * structure. It should be called whenever a new TIMINCA value is necessary, + * such as during initialization or when the link speed changes. */ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 incval = 0; - u32 timinca = 0; u32 shift = 0; - u32 cycle_speed; unsigned long flags; /** - * Determine what speed we need to set the cyclecounter - * for. It should be different for 100Mb, 1Gb, and 10Gb. Treat - * unknown speeds as 10Gb. (Hence why we can't just copy the - * link_speed. - */ - switch (adapter->link_speed) { - case IXGBE_LINK_SPEED_100_FULL: - case IXGBE_LINK_SPEED_1GB_FULL: - case IXGBE_LINK_SPEED_10GB_FULL: - cycle_speed = adapter->link_speed; - break; - default: - /* cycle speed should be 10Gb when there is no link */ - cycle_speed = IXGBE_LINK_SPEED_10GB_FULL; - break; - } - - /* - * grab the current TIMINCA value from the register so that it can be - * double checked. If the register value has been cleared, it must be - * reset to the correct value for generating a cyclecounter. If - * TIMINCA is zero, the SYSTIME registers do not increment at all. - */ - timinca = IXGBE_READ_REG(hw, IXGBE_TIMINCA); - - /* Bail if the cycle speed didn't change and TIMINCA is non-zero */ - if (adapter->cycle_speed == cycle_speed && timinca) - return; - - /** * Scale the NIC cycle counter by a large factor so that * relatively small corrections to the frequency can be added * or subtracted. The drawbacks of a large factor include @@ -819,8 +792,12 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) * to nanoseconds using only a multiplier and a right-shift, * and (c) the value must fit within the timinca register space * => math based on internal DMA clock rate and available bits + * + * Note that when there is no link, internal DMA clock is same as when + * link speed is 10Gb. Set the registers correctly even when link is + * down to preserve the clock setting */ - switch (cycle_speed) { + switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: incval = IXGBE_INCVAL_100; shift = IXGBE_INCVAL_SHIFT_100; @@ -830,6 +807,7 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) shift = IXGBE_INCVAL_SHIFT_1GB; break; case IXGBE_LINK_SPEED_10GB_FULL: + default: incval = IXGBE_INCVAL_10GB; shift = IXGBE_INCVAL_SHIFT_10GB; break; @@ -857,18 +835,11 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) return; } - /* reset the system time registers */ - IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000); - IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000); - IXGBE_WRITE_FLUSH(hw); - - /* store the new cycle speed */ - adapter->cycle_speed = cycle_speed; - + /* update the base incval used to calculate frequency adjustment */ ACCESS_ONCE(adapter->base_incval) = incval; smp_mb(); - /* grab the ptp lock */ + /* need lock to prevent incorrect read while modifying cyclecounter */ spin_lock_irqsave(&adapter->tmreg_lock, flags); memset(&adapter->cc, 0, sizeof(adapter->cc)); @@ -877,6 +848,31 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter) adapter->cc.shift = shift; adapter->cc.mult = 1; + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); +} + +/** + * ixgbe_ptp_reset + * @adapter: the ixgbe private board structure + * + * When the MAC resets, all timesync features are reset. This function should be + * called to re-enable the PTP clock structure. It will re-init the timecounter + * structure based on the kernel time as well as setup the cycle counter data. + */ +void ixgbe_ptp_reset(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + unsigned long flags; + + /* set SYSTIME registers to 0 just in case */ + IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0x00000000); + IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0x00000000); + IXGBE_WRITE_FLUSH(hw); + + ixgbe_ptp_start_cyclecounter(adapter); + + spin_lock_irqsave(&adapter->tmreg_lock, flags); + /* reset the ns time counter */ timecounter_init(&adapter->tc, &adapter->cc, ktime_to_ns(ktime_get_real())); @@ -904,7 +900,7 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) switch (adapter->hw.mac.type) { case ixgbe_mac_X540: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name); adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 250000000; adapter->ptp_caps.n_alarm = 0; @@ -918,7 +914,7 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) adapter->ptp_caps.enable = ixgbe_ptp_enable; break; case ixgbe_mac_82599EB: - snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name); adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 250000000; adapter->ptp_caps.n_alarm = 0; @@ -942,11 +938,6 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) spin_lock_init(&adapter->tmreg_lock); - ixgbe_ptp_start_cyclecounter(adapter); - - /* (Re)start the overflow check */ - adapter->flags2 |= IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED; - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { @@ -955,6 +946,11 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) } else e_dev_info("registered PHC device on %s\n", netdev->name); + ixgbe_ptp_reset(adapter); + + /* set the flag that PTP has been enabled */ + adapter->flags2 |= IXGBE_FLAG2_PTP_ENABLED; + return; } @@ -967,7 +963,7 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) void ixgbe_ptp_stop(struct ixgbe_adapter *adapter) { /* stop the overflow check task */ - adapter->flags2 &= ~(IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED | + adapter->flags2 &= ~(IXGBE_FLAG2_PTP_ENABLED | IXGBE_FLAG2_PTP_PPS_ENABLED); ixgbe_ptp_setup_sdp(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index dce48bf64d9..4993642d1ce 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -117,6 +117,9 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, } } + /* Initialize default switching mode VEB */ + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); + /* If call to enable VFs succeeded then allocate memory * for per VF control structures. */ @@ -150,16 +153,6 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, adapter->flags2 &= ~(IXGBE_FLAG2_RSC_CAPABLE | IXGBE_FLAG2_RSC_ENABLED); -#ifdef IXGBE_FCOE - /* - * When SR-IOV is enabled 82599 cannot support jumbo frames - * so we must disable FCoE because we cannot support FCoE MTU. - */ - if (adapter->hw.mac.type == ixgbe_mac_82599EB) - adapter->flags &= ~(IXGBE_FLAG_FCOE_ENABLED | - IXGBE_FLAG_FCOE_CAPABLE); -#endif - /* enable spoof checking for all VFs */ for (i = 0; i < adapter->num_vfs; i++) adapter->vfinfo[i].spoofchk_enabled = true; @@ -265,8 +258,11 @@ void ixgbe_disable_sriov(struct ixgbe_adapter *adapter) } static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, - int entries, u16 *hash_list, u32 vf) + u32 *msgbuf, u32 vf) { + int entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) + >> IXGBE_VT_MSGINFO_SHIFT; + u16 *hash_list = (u16 *)&msgbuf[1]; struct vf_data_storage *vfinfo = &adapter->vfinfo[vf]; struct ixgbe_hw *hw = &adapter->hw; int i; @@ -353,31 +349,89 @@ static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, return adapter->hw.mac.ops.set_vfta(&adapter->hw, vid, vf, (bool)add); } -static void ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf) +static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; - int new_mtu = msgbuf[1]; + int max_frame = msgbuf[1]; u32 max_frs; - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; - /* Only X540 supports jumbo frames in IOV mode */ - if (adapter->hw.mac.type != ixgbe_mac_X540) - return; + /* + * For 82599EB we have to keep all PFs and VFs operating with + * the same max_frame value in order to avoid sending an oversize + * frame to a VF. In order to guarantee this is handled correctly + * for all cases we have several special exceptions to take into + * account before we can enable the VF for receive + */ + if (adapter->hw.mac.type == ixgbe_mac_82599EB) { + struct net_device *dev = adapter->netdev; + int pf_max_frame = dev->mtu + ETH_HLEN; + u32 reg_offset, vf_shift, vfre; + s32 err = 0; + +#ifdef CONFIG_FCOE + if (dev->features & NETIF_F_FCOE_MTU) + pf_max_frame = max_t(int, pf_max_frame, + IXGBE_FCOE_JUMBO_FRAME_SIZE); + +#endif /* CONFIG_FCOE */ + switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_11: + /* + * Version 1.1 supports jumbo frames on VFs if PF has + * jumbo frames enabled which means legacy VFs are + * disabled + */ + if (pf_max_frame > ETH_FRAME_LEN) + break; + default: + /* + * If the PF or VF are running w/ jumbo frames enabled + * we need to shut down the VF Rx path as we cannot + * support jumbo frames on legacy VFs + */ + if ((pf_max_frame > ETH_FRAME_LEN) || + (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN))) + err = -EINVAL; + break; + } + + /* determine VF receive enable location */ + vf_shift = vf % 32; + reg_offset = vf / 32; + + /* enable or disable receive depending on error */ + vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset)); + if (err) + vfre &= ~(1 << vf_shift); + else + vfre |= 1 << vf_shift; + IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), vfre); + + if (err) { + e_err(drv, "VF max_frame %d out of range\n", max_frame); + return err; + } + } /* MTU < 68 is an error and causes problems on some kernels */ - if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) { - e_err(drv, "VF mtu %d out of range\n", new_mtu); - return; + if (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE) { + e_err(drv, "VF max_frame %d out of range\n", max_frame); + return -EINVAL; } - max_frs = (IXGBE_READ_REG(hw, IXGBE_MAXFRS) & - IXGBE_MHADD_MFS_MASK) >> IXGBE_MHADD_MFS_SHIFT; - if (max_frs < new_mtu) { - max_frs = new_mtu << IXGBE_MHADD_MFS_SHIFT; + /* pull current max frame size from hardware */ + max_frs = IXGBE_READ_REG(hw, IXGBE_MAXFRS); + max_frs &= IXGBE_MHADD_MFS_MASK; + max_frs >>= IXGBE_MHADD_MFS_SHIFT; + + if (max_frs < max_frame) { + max_frs = max_frame << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, max_frs); } - e_info(hw, "VF requests change max MTU to %d\n", new_mtu); + e_info(hw, "VF requests change max MTU to %d\n", max_frame); + + return 0; } static void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe) @@ -392,35 +446,47 @@ static void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe) IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr); } -static void ixgbe_set_vmvir(struct ixgbe_adapter *adapter, u32 vid, u32 vf) +static void ixgbe_set_vmvir(struct ixgbe_adapter *adapter, + u16 vid, u16 qos, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; + u32 vmvir = vid | (qos << VLAN_PRIO_SHIFT) | IXGBE_VMVIR_VLANA_DEFAULT; - if (vid) - IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), - (vid | IXGBE_VMVIR_VLANA_DEFAULT)); - else - IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), 0); + IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), vmvir); } +static void ixgbe_clear_vmvir(struct ixgbe_adapter *adapter, u32 vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), 0); +} static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; + struct vf_data_storage *vfinfo = &adapter->vfinfo[vf]; int rar_entry = hw->mac.num_rar_entries - (vf + 1); + u8 num_tcs = netdev_get_num_tc(adapter->netdev); + + /* add PF assigned VLAN or VLAN 0 */ + ixgbe_set_vf_vlan(adapter, true, vfinfo->pf_vlan, vf); /* reset offloads to defaults */ - if (adapter->vfinfo[vf].pf_vlan) { - ixgbe_set_vf_vlan(adapter, true, - adapter->vfinfo[vf].pf_vlan, vf); - ixgbe_set_vmvir(adapter, - (adapter->vfinfo[vf].pf_vlan | - (adapter->vfinfo[vf].pf_qos << - VLAN_PRIO_SHIFT)), vf); - ixgbe_set_vmolr(hw, vf, false); + ixgbe_set_vmolr(hw, vf, !vfinfo->pf_vlan); + + /* set outgoing tags for VFs */ + if (!vfinfo->pf_vlan && !vfinfo->pf_qos && !num_tcs) { + ixgbe_clear_vmvir(adapter, vf); } else { - ixgbe_set_vf_vlan(adapter, true, 0, vf); - ixgbe_set_vmvir(adapter, 0, vf); - ixgbe_set_vmolr(hw, vf, true); + if (vfinfo->pf_qos || !num_tcs) + ixgbe_set_vmvir(adapter, vfinfo->pf_vlan, + vfinfo->pf_qos, vf); + else + ixgbe_set_vmvir(adapter, vfinfo->pf_vlan, + adapter->default_up, vf); + + if (vfinfo->spoofchk_enabled) + hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); } /* reset multicast table array for vf */ @@ -430,6 +496,9 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) ixgbe_set_rx_mode(adapter->netdev); hw->mac.ops.clear_rar(hw, rar_entry); + + /* reset VF api back to unknown */ + adapter->vfinfo[vf].vf_api = ixgbe_mbox_api_10; } static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, @@ -521,30 +590,221 @@ int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask) return 0; } -static inline void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) +static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf) { struct ixgbe_hw *hw = &adapter->hw; - u32 reg; + unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses; + u32 reg, msgbuf[4]; u32 reg_offset, vf_shift; + u8 *addr = (u8 *)(&msgbuf[1]); + + e_info(probe, "VF Reset msg received from vf %d\n", vf); + + /* reset the filters for the device */ + ixgbe_vf_reset_event(adapter, vf); + + /* set vf mac address */ + ixgbe_set_vf_mac(adapter, vf, vf_mac); vf_shift = vf % 32; reg_offset = vf / 32; - /* enable transmit and receive for vf */ + /* enable transmit for vf */ reg = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset)); - reg |= (reg | (1 << vf_shift)); + reg |= 1 << vf_shift; IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg); + /* enable receive for vf */ reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset)); - reg |= (reg | (1 << vf_shift)); + reg |= 1 << vf_shift; + /* + * The 82599 cannot support a mix of jumbo and non-jumbo PF/VFs. + * For more info take a look at ixgbe_set_vf_lpe + */ + if (adapter->hw.mac.type == ixgbe_mac_82599EB) { + struct net_device *dev = adapter->netdev; + int pf_max_frame = dev->mtu + ETH_HLEN; + +#ifdef CONFIG_FCOE + if (dev->features & NETIF_F_FCOE_MTU) + pf_max_frame = max_t(int, pf_max_frame, + IXGBE_FCOE_JUMBO_FRAME_SIZE); + +#endif /* CONFIG_FCOE */ + if (pf_max_frame > ETH_FRAME_LEN) + reg &= ~(1 << vf_shift); + } IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), reg); + /* enable VF mailbox for further messages */ + adapter->vfinfo[vf].clear_to_send = true; + /* Enable counting of spoofed packets in the SSVPC register */ reg = IXGBE_READ_REG(hw, IXGBE_VMECM(reg_offset)); reg |= (1 << vf_shift); IXGBE_WRITE_REG(hw, IXGBE_VMECM(reg_offset), reg); - ixgbe_vf_reset_event(adapter, vf); + /* reply to reset with ack and vf mac address */ + msgbuf[0] = IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, ETH_ALEN); + + /* + * Piggyback the multicast filter type so VF can compute the + * correct vectors + */ + msgbuf[3] = hw->mac.mc_filter_type; + ixgbe_write_mbx(hw, msgbuf, IXGBE_VF_PERMADDR_MSG_LEN, vf); + + return 0; +} + +static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + u8 *new_mac = ((u8 *)(&msgbuf[1])); + + if (!is_valid_ether_addr(new_mac)) { + e_warn(drv, "VF %d attempted to set invalid mac\n", vf); + return -1; + } + + if (adapter->vfinfo[vf].pf_set_mac && + memcmp(adapter->vfinfo[vf].vf_mac_addresses, new_mac, + ETH_ALEN)) { + e_warn(drv, + "VF %d attempted to override administratively set MAC address\n" + "Reload the VF driver to resume operations\n", + vf); + return -1; + } + + return ixgbe_set_vf_mac(adapter, vf, new_mac) < 0; +} + +static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + struct ixgbe_hw *hw = &adapter->hw; + int add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT; + int vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK); + int err; + u8 tcs = netdev_get_num_tc(adapter->netdev); + + if (adapter->vfinfo[vf].pf_vlan || tcs) { + e_warn(drv, + "VF %d attempted to override administratively set VLAN configuration\n" + "Reload the VF driver to resume operations\n", + vf); + return -1; + } + + if (add) + adapter->vfinfo[vf].vlan_count++; + else if (adapter->vfinfo[vf].vlan_count) + adapter->vfinfo[vf].vlan_count--; + + err = ixgbe_set_vf_vlan(adapter, add, vid, vf); + if (!err && adapter->vfinfo[vf].spoofchk_enabled) + hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); + + return err; +} + +static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + u8 *new_mac = ((u8 *)(&msgbuf[1])); + int index = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> + IXGBE_VT_MSGINFO_SHIFT; + int err; + + if (adapter->vfinfo[vf].pf_set_mac && index > 0) { + e_warn(drv, + "VF %d requested MACVLAN filter but is administratively denied\n", + vf); + return -1; + } + + /* An non-zero index indicates the VF is setting a filter */ + if (index) { + if (!is_valid_ether_addr(new_mac)) { + e_warn(drv, "VF %d attempted to set invalid mac\n", vf); + return -1; + } + + /* + * If the VF is allowed to set MAC filters then turn off + * anti-spoofing to avoid false positives. + */ + if (adapter->vfinfo[vf].spoofchk_enabled) + ixgbe_ndo_set_vf_spoofchk(adapter->netdev, vf, false); + } + + err = ixgbe_set_vf_macvlan(adapter, vf, index, new_mac); + if (err == -ENOSPC) + e_warn(drv, + "VF %d has requested a MACVLAN filter but there is no space for it\n", + vf); + + return err < 0; +} + +static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + int api = msgbuf[1]; + + switch (api) { + case ixgbe_mbox_api_10: + case ixgbe_mbox_api_11: + adapter->vfinfo[vf].vf_api = api; + return 0; + default: + break; + } + + e_info(drv, "VF %d requested invalid api version %u\n", vf, api); + + return -1; +} + +static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + struct net_device *dev = adapter->netdev; + struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; + unsigned int default_tc = 0; + u8 num_tcs = netdev_get_num_tc(dev); + + /* verify the PF is supporting the correct APIs */ + switch (adapter->vfinfo[vf].vf_api) { + case ixgbe_mbox_api_20: + case ixgbe_mbox_api_11: + break; + default: + return -1; + } + + /* only allow 1 Tx queue for bandwidth limiting */ + msgbuf[IXGBE_VF_TX_QUEUES] = __ALIGN_MASK(1, ~vmdq->mask); + msgbuf[IXGBE_VF_RX_QUEUES] = __ALIGN_MASK(1, ~vmdq->mask); + + /* if TCs > 1 determine which TC belongs to default user priority */ + if (num_tcs > 1) + default_tc = netdev_get_prio_tc_map(dev, adapter->default_up); + + /* notify VF of need for VLAN tag stripping, and correct queue */ + if (num_tcs) + msgbuf[IXGBE_VF_TRANS_VLAN] = num_tcs; + else if (adapter->vfinfo[vf].pf_vlan || adapter->vfinfo[vf].pf_qos) + msgbuf[IXGBE_VF_TRANS_VLAN] = 1; + else + msgbuf[IXGBE_VF_TRANS_VLAN] = 0; + + /* notify VF of default queue */ + msgbuf[IXGBE_VF_DEF_QUEUE] = default_tc; + + return 0; } static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) @@ -553,10 +813,6 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) u32 msgbuf[IXGBE_VFMAILBOX_SIZE]; struct ixgbe_hw *hw = &adapter->hw; s32 retval; - int entries; - u16 *hash_list; - int add, vid, index; - u8 *new_mac; retval = ixgbe_read_mbx(hw, msgbuf, mbx_size, vf); @@ -572,39 +828,13 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) /* flush the ack before we write any messages back */ IXGBE_WRITE_FLUSH(hw); + if (msgbuf[0] == IXGBE_VF_RESET) + return ixgbe_vf_reset_msg(adapter, vf); + /* * until the vf completes a virtual function reset it should not be * allowed to start any configuration. */ - - if (msgbuf[0] == IXGBE_VF_RESET) { - unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses; - new_mac = (u8 *)(&msgbuf[1]); - e_info(probe, "VF Reset msg received from vf %d\n", vf); - adapter->vfinfo[vf].clear_to_send = false; - ixgbe_vf_reset_msg(adapter, vf); - adapter->vfinfo[vf].clear_to_send = true; - - if (is_valid_ether_addr(new_mac) && - !adapter->vfinfo[vf].pf_set_mac) - ixgbe_set_vf_mac(adapter, vf, vf_mac); - else - ixgbe_set_vf_mac(adapter, - vf, adapter->vfinfo[vf].vf_mac_addresses); - - /* reply to reset with ack and vf mac address */ - msgbuf[0] = IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_ACK; - memcpy(new_mac, vf_mac, ETH_ALEN); - /* - * Piggyback the multicast filter type so VF can compute the - * correct vectors - */ - msgbuf[3] = hw->mac.mc_filter_type; - ixgbe_write_mbx(hw, msgbuf, IXGBE_VF_PERMADDR_MSG_LEN, vf); - - return retval; - } - if (!adapter->vfinfo[vf].clear_to_send) { msgbuf[0] |= IXGBE_VT_MSGTYPE_NACK; ixgbe_write_mbx(hw, msgbuf, 1, vf); @@ -613,70 +843,25 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) switch ((msgbuf[0] & 0xFFFF)) { case IXGBE_VF_SET_MAC_ADDR: - new_mac = ((u8 *)(&msgbuf[1])); - if (is_valid_ether_addr(new_mac) && - !adapter->vfinfo[vf].pf_set_mac) { - ixgbe_set_vf_mac(adapter, vf, new_mac); - } else if (memcmp(adapter->vfinfo[vf].vf_mac_addresses, - new_mac, ETH_ALEN)) { - e_warn(drv, "VF %d attempted to override " - "administratively set MAC address\nReload " - "the VF driver to resume operations\n", vf); - retval = -1; - } + retval = ixgbe_set_vf_mac_addr(adapter, msgbuf, vf); break; case IXGBE_VF_SET_MULTICAST: - entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) - >> IXGBE_VT_MSGINFO_SHIFT; - hash_list = (u16 *)&msgbuf[1]; - retval = ixgbe_set_vf_multicasts(adapter, entries, - hash_list, vf); - break; - case IXGBE_VF_SET_LPE: - ixgbe_set_vf_lpe(adapter, msgbuf); + retval = ixgbe_set_vf_multicasts(adapter, msgbuf, vf); break; case IXGBE_VF_SET_VLAN: - add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) - >> IXGBE_VT_MSGINFO_SHIFT; - vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK); - if (adapter->vfinfo[vf].pf_vlan) { - e_warn(drv, "VF %d attempted to override " - "administratively set VLAN configuration\n" - "Reload the VF driver to resume operations\n", - vf); - retval = -1; - } else { - if (add) - adapter->vfinfo[vf].vlan_count++; - else if (adapter->vfinfo[vf].vlan_count) - adapter->vfinfo[vf].vlan_count--; - retval = ixgbe_set_vf_vlan(adapter, add, vid, vf); - if (!retval && adapter->vfinfo[vf].spoofchk_enabled) - hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); - } + retval = ixgbe_set_vf_vlan_msg(adapter, msgbuf, vf); + break; + case IXGBE_VF_SET_LPE: + retval = ixgbe_set_vf_lpe(adapter, msgbuf, vf); break; case IXGBE_VF_SET_MACVLAN: - index = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> - IXGBE_VT_MSGINFO_SHIFT; - if (adapter->vfinfo[vf].pf_set_mac && index > 0) { - e_warn(drv, "VF %d requested MACVLAN filter but is " - "administratively denied\n", vf); - retval = -1; - break; - } - /* - * If the VF is allowed to set MAC filters then turn off - * anti-spoofing to avoid false positives. An index - * greater than 0 will indicate the VF is setting a - * macvlan MAC filter. - */ - if (index > 0 && adapter->vfinfo[vf].spoofchk_enabled) - ixgbe_ndo_set_vf_spoofchk(adapter->netdev, vf, false); - retval = ixgbe_set_vf_macvlan(adapter, vf, index, - (unsigned char *)(&msgbuf[1])); - if (retval == -ENOSPC) - e_warn(drv, "VF %d has requested a MACVLAN filter " - "but there is no space for it\n", vf); + retval = ixgbe_set_vf_macvlan_msg(adapter, msgbuf, vf); + break; + case IXGBE_VF_API_NEGOTIATE: + retval = ixgbe_negotiate_vf_api(adapter, msgbuf, vf); + break; + case IXGBE_VF_GET_QUEUES: + retval = ixgbe_get_vf_queues(adapter, msgbuf, vf); break; default: e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]); @@ -692,7 +877,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf) msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS; - ixgbe_write_mbx(hw, msgbuf, 1, vf); + ixgbe_write_mbx(hw, msgbuf, mbx_size, vf); return retval; } @@ -783,7 +968,7 @@ int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) err = ixgbe_set_vf_vlan(adapter, true, vlan, vf); if (err) goto out; - ixgbe_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); + ixgbe_set_vmvir(adapter, vlan, qos, vf); ixgbe_set_vmolr(hw, vf, false); if (adapter->vfinfo[vf].spoofchk_enabled) hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf); @@ -803,7 +988,7 @@ int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) } else { err = ixgbe_set_vf_vlan(adapter, false, adapter->vfinfo[vf].pf_vlan, vf); - ixgbe_set_vmvir(adapter, vlan, vf); + ixgbe_clear_vmvir(adapter, vf); ixgbe_set_vmolr(hw, vf, true); hw->mac.ops.set_vlan_anti_spoofing(hw, false, vf); if (adapter->vfinfo[vf].vlan_count) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 0722f336809..21915e20399 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -56,6 +56,7 @@ #define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 #define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 #define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 +#define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 #define IXGBE_DEV_ID_82599_SFP_EM 0x1507 #define IXGBE_DEV_ID_82599_SFP_SF2 0x154D #define IXGBE_DEV_ID_82599EN_SFP 0x1557 diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index da17ccf5c09..3147795bd13 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -33,8 +33,11 @@ #define IXGBE_DEV_ID_X540_VF 0x1515 #define IXGBE_VF_IRQ_CLEAR_MASK 7 -#define IXGBE_VF_MAX_TX_QUEUES 1 -#define IXGBE_VF_MAX_RX_QUEUES 1 +#define IXGBE_VF_MAX_TX_QUEUES 8 +#define IXGBE_VF_MAX_RX_QUEUES 8 + +/* DCB define */ +#define IXGBE_VF_MAX_TRAFFIC_CLASS 8 /* Link speed */ typedef u32 ixgbe_link_speed; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 4a9c9c28568..2323ccd211c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -89,8 +89,8 @@ struct ixgbevf_ring { /* How many Rx Buffers do we bundle into one write to the hardware ? */ #define IXGBEVF_RX_BUFFER_WRITE 16 /* Must be power of 2 */ -#define MAX_RX_QUEUES 1 -#define MAX_TX_QUEUES 1 +#define MAX_RX_QUEUES IXGBE_VF_MAX_RX_QUEUES +#define MAX_TX_QUEUES IXGBE_VF_MAX_TX_QUEUES #define IXGBEVF_DEFAULT_TXD 1024 #define IXGBEVF_DEFAULT_RXD 512 diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index de1ad506665..f3d3947ae96 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -58,7 +58,7 @@ const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = "Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver"; -#define DRV_VERSION "2.6.0-k" +#define DRV_VERSION "2.7.12-k" const char ixgbevf_driver_version[] = DRV_VERSION; static char ixgbevf_copyright[] = "Copyright (c) 2009 - 2012 Intel Corporation."; @@ -99,6 +99,7 @@ MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); /* forward decls */ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector); +static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter); static inline void ixgbevf_release_rx_desc(struct ixgbe_hw *hw, struct ixgbevf_ring *rx_ring, @@ -358,6 +359,12 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter, bi->dma = dma_map_single(&pdev->dev, skb->data, rx_ring->rx_buf_len, DMA_FROM_DEVICE); + if (dma_mapping_error(&pdev->dev, bi->dma)) { + dev_kfree_skb(skb); + bi->skb = NULL; + dev_err(&pdev->dev, "RX DMA map failed\n"); + break; + } } rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); @@ -471,6 +478,16 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, } skb->protocol = eth_type_trans(skb, rx_ring->netdev); + /* Workaround hardware that can't do proper VEPA multicast + * source pruning. + */ + if ((skb->pkt_type & (PACKET_BROADCAST | PACKET_MULTICAST)) && + !(compare_ether_addr(adapter->netdev->dev_addr, + eth_hdr(skb)->h_source))) { + dev_kfree_skb_irq(skb); + goto next_desc; + } + ixgbevf_receive_skb(q_vector, skb, staterr, rx_desc); next_desc: @@ -1131,12 +1148,12 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) if (!hw->mac.ops.set_vfta) return -EOPNOTSUPP; - spin_lock(&adapter->mbx_lock); + spin_lock_bh(&adapter->mbx_lock); /* add VID to filter table */ err = hw->mac.ops.set_vfta(hw, vid, 0, true); - spin_unlock(&adapter->mbx_lock); + spin_unlock_bh(&adapter->mbx_lock); /* translate error return types so error makes sense */ if (err == IXGBE_ERR_MBX) @@ -1156,13 +1173,13 @@ static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) struct ixgbe_hw *hw = &adapter->hw; int err = -EOPNOTSUPP; - spin_lock(&adapter->mbx_lock); + spin_lock_bh(&adapter->mbx_lock); /* remove VID from filter table */ if (hw->mac.ops.set_vfta) err = hw->mac.ops.set_vfta(hw, vid, 0, false); - spin_unlock(&adapter->mbx_lock); + spin_unlock_bh(&adapter->mbx_lock); clear_bit(vid, adapter->active_vlans); @@ -1218,7 +1235,7 @@ static void ixgbevf_set_rx_mode(struct net_device *netdev) struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - spin_lock(&adapter->mbx_lock); + spin_lock_bh(&adapter->mbx_lock); /* reprogram multicast list */ if (hw->mac.ops.update_mc_addr_list) @@ -1226,7 +1243,7 @@ static void ixgbevf_set_rx_mode(struct net_device *netdev) ixgbevf_write_uc_addr_list(netdev); - spin_unlock(&adapter->mbx_lock); + spin_unlock_bh(&adapter->mbx_lock); } static void ixgbevf_napi_enable_all(struct ixgbevf_adapter *adapter) @@ -1335,11 +1352,12 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - int api[] = { ixgbe_mbox_api_10, + int api[] = { ixgbe_mbox_api_11, + ixgbe_mbox_api_10, ixgbe_mbox_api_unknown }; int err = 0, idx = 0; - spin_lock(&adapter->mbx_lock); + spin_lock_bh(&adapter->mbx_lock); while (api[idx] != ixgbe_mbox_api_unknown) { err = ixgbevf_negotiate_api_version(hw, api[idx]); @@ -1348,7 +1366,7 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) idx++; } - spin_unlock(&adapter->mbx_lock); + spin_unlock_bh(&adapter->mbx_lock); } static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) @@ -1389,7 +1407,7 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) ixgbevf_configure_msix(adapter); - spin_lock(&adapter->mbx_lock); + spin_lock_bh(&adapter->mbx_lock); if (hw->mac.ops.set_rar) { if (is_valid_ether_addr(hw->mac.addr)) @@ -1398,7 +1416,7 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0); } - spin_unlock(&adapter->mbx_lock); + spin_unlock_bh(&adapter->mbx_lock); clear_bit(__IXGBEVF_DOWN, &adapter->state); ixgbevf_napi_enable_all(adapter); @@ -1413,12 +1431,87 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) mod_timer(&adapter->watchdog_timer, jiffies); } +static int ixgbevf_reset_queues(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbevf_ring *rx_ring; + unsigned int def_q = 0; + unsigned int num_tcs = 0; + unsigned int num_rx_queues = 1; + int err, i; + + spin_lock_bh(&adapter->mbx_lock); + + /* fetch queue configuration from the PF */ + err = ixgbevf_get_queues(hw, &num_tcs, &def_q); + + spin_unlock_bh(&adapter->mbx_lock); + + if (err) + return err; + + if (num_tcs > 1) { + /* update default Tx ring register index */ + adapter->tx_ring[0].reg_idx = def_q; + + /* we need as many queues as traffic classes */ + num_rx_queues = num_tcs; + } + + /* nothing to do if we have the correct number of queues */ + if (adapter->num_rx_queues == num_rx_queues) + return 0; + + /* allocate new rings */ + rx_ring = kcalloc(num_rx_queues, + sizeof(struct ixgbevf_ring), GFP_KERNEL); + if (!rx_ring) + return -ENOMEM; + + /* setup ring fields */ + for (i = 0; i < num_rx_queues; i++) { + rx_ring[i].count = adapter->rx_ring_count; + rx_ring[i].queue_index = i; + rx_ring[i].reg_idx = i; + rx_ring[i].dev = &adapter->pdev->dev; + rx_ring[i].netdev = adapter->netdev; + + /* allocate resources on the ring */ + err = ixgbevf_setup_rx_resources(adapter, &rx_ring[i]); + if (err) { + while (i) { + i--; + ixgbevf_free_rx_resources(adapter, &rx_ring[i]); + } + kfree(rx_ring); + return err; + } + } + + /* free the existing rings and queues */ + ixgbevf_free_all_rx_resources(adapter); + adapter->num_rx_queues = 0; + kfree(adapter->rx_ring); + + /* move new rings into position on the adapter struct */ + adapter->rx_ring = rx_ring; + adapter->num_rx_queues = num_rx_queues; + + /* reset ring to vector mapping */ + ixgbevf_reset_q_vectors(adapter); + ixgbevf_map_rings_to_vectors(adapter); + + return 0; +} + void ixgbevf_up(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; ixgbevf_negotiate_api(adapter); + ixgbevf_reset_queues(adapter); + ixgbevf_configure(adapter); ixgbevf_up_complete(adapter); @@ -1611,14 +1704,14 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; - spin_lock(&adapter->mbx_lock); + spin_lock_bh(&adapter->mbx_lock); if (hw->mac.ops.reset_hw(hw)) hw_dbg(hw, "PF still resetting\n"); else hw->mac.ops.init_hw(hw); - spin_unlock(&adapter->mbx_lock); + spin_unlock_bh(&adapter->mbx_lock); if (is_valid_ether_addr(adapter->hw.mac.addr)) { memcpy(netdev->dev_addr, adapter->hw.mac.addr, @@ -1717,6 +1810,7 @@ static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) { adapter->tx_ring[i].count = adapter->tx_ring_count; adapter->tx_ring[i].queue_index = i; + /* reg_idx may be remapped later by DCB config */ adapter->tx_ring[i].reg_idx = i; adapter->tx_ring[i].dev = &adapter->pdev->dev; adapter->tx_ring[i].netdev = adapter->netdev; @@ -1834,18 +1928,13 @@ err_out: **/ static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter) { - int q_idx, num_q_vectors; - int napi_vectors; - - num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - napi_vectors = adapter->num_rx_queues; + int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx]; adapter->q_vector[q_idx] = NULL; - if (q_idx < napi_vectors) - netif_napi_del(&q_vector->napi); + netif_napi_del(&q_vector->napi); kfree(q_vector); } } @@ -1950,8 +2039,11 @@ static int __devinit ixgbevf_sw_init(struct ixgbevf_adapter *adapter) hw->subsystem_device_id = pdev->subsystem_device; hw->mbx.ops.init_params(hw); - hw->mac.max_tx_queues = MAX_TX_QUEUES; - hw->mac.max_rx_queues = MAX_RX_QUEUES; + + /* assume legacy case in which PF would only give VF 2 queues */ + hw->mac.max_tx_queues = 2; + hw->mac.max_rx_queues = 2; + err = hw->mac.ops.reset_hw(hw); if (err) { dev_info(&pdev->dev, @@ -2113,12 +2205,12 @@ static void ixgbevf_watchdog_task(struct work_struct *work) if (hw->mac.ops.check_link) { s32 need_reset; - spin_lock(&adapter->mbx_lock); + spin_lock_bh(&adapter->mbx_lock); need_reset = hw->mac.ops.check_link(hw, &link_speed, &link_up, false); - spin_unlock(&adapter->mbx_lock); + spin_unlock_bh(&adapter->mbx_lock); if (need_reset) { adapter->link_up = link_up; @@ -2377,6 +2469,63 @@ static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter) &adapter->rx_ring[i]); } +static int ixgbevf_setup_queues(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbevf_ring *rx_ring; + unsigned int def_q = 0; + unsigned int num_tcs = 0; + unsigned int num_rx_queues = 1; + int err, i; + + spin_lock_bh(&adapter->mbx_lock); + + /* fetch queue configuration from the PF */ + err = ixgbevf_get_queues(hw, &num_tcs, &def_q); + + spin_unlock_bh(&adapter->mbx_lock); + + if (err) + return err; + + if (num_tcs > 1) { + /* update default Tx ring register index */ + adapter->tx_ring[0].reg_idx = def_q; + + /* we need as many queues as traffic classes */ + num_rx_queues = num_tcs; + } + + /* nothing to do if we have the correct number of queues */ + if (adapter->num_rx_queues == num_rx_queues) + return 0; + + /* allocate new rings */ + rx_ring = kcalloc(num_rx_queues, + sizeof(struct ixgbevf_ring), GFP_KERNEL); + if (!rx_ring) + return -ENOMEM; + + /* setup ring fields */ + for (i = 0; i < num_rx_queues; i++) { + rx_ring[i].count = adapter->rx_ring_count; + rx_ring[i].queue_index = i; + rx_ring[i].reg_idx = i; + rx_ring[i].dev = &adapter->pdev->dev; + rx_ring[i].netdev = adapter->netdev; + } + + /* free the existing ring and queues */ + adapter->num_rx_queues = 0; + kfree(adapter->rx_ring); + + /* move new rings into position on the adapter struct */ + adapter->rx_ring = rx_ring; + adapter->num_rx_queues = num_rx_queues; + + return 0; +} + /** * ixgbevf_open - Called when a network interface is made active * @netdev: network interface device structure @@ -2413,6 +2562,11 @@ static int ixgbevf_open(struct net_device *netdev) ixgbevf_negotiate_api(adapter); + /* setup queue reg_idx and Rx queue count */ + err = ixgbevf_setup_queues(adapter); + if (err) + goto err_setup_queues; + /* allocate transmit descriptors */ err = ixgbevf_setup_all_tx_resources(adapter); if (err) @@ -2451,6 +2605,7 @@ err_setup_rx: ixgbevf_free_all_rx_resources(adapter); err_setup_tx: ixgbevf_free_all_tx_resources(adapter); +err_setup_queues: ixgbevf_reset(adapter); err_setup_reset: @@ -2678,10 +2833,10 @@ static int ixgbevf_tx_map(struct ixgbevf_ring *tx_ring, tx_buffer_info->dma = skb_frag_dma_map(tx_ring->dev, frag, offset, size, DMA_TO_DEVICE); - tx_buffer_info->mapped_as_page = true; if (dma_mapping_error(tx_ring->dev, tx_buffer_info->dma)) goto dma_error; + tx_buffer_info->mapped_as_page = true; tx_buffer_info->next_to_watch = i; len -= size; @@ -2823,6 +2978,11 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) #if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD unsigned short f; #endif + u8 *dst_mac = skb_header_pointer(skb, 0, 0, NULL); + if (!dst_mac || is_link_local_ether_addr(dst_mac)) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } tx_ring = &adapter->tx_ring[r_idx]; @@ -2902,12 +3062,12 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - spin_lock(&adapter->mbx_lock); + spin_lock_bh(&adapter->mbx_lock); if (hw->mac.ops.set_rar) hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0); - spin_unlock(&adapter->mbx_lock); + spin_unlock_bh(&adapter->mbx_lock); return 0; } @@ -2925,8 +3085,15 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE; - if (adapter->hw.mac.type == ixgbe_mac_X540_vf) + switch (adapter->hw.api_version) { + case ixgbe_mbox_api_11: max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE; + break; + default: + if (adapter->hw.mac.type == ixgbe_mac_X540_vf) + max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE; + break; + } /* MTU < 68 is an error and causes problems on some kernels */ if ((new_mtu < 68) || (max_frame > max_possible_frame)) diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index 946ce86f337..0bc30058ff8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -85,6 +85,7 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_10, /* API version 1.0, linux/freebsd VF driver */ ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */ + ixgbe_mbox_api_11, /* API version 1.1, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -100,6 +101,15 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ #define IXGBE_VF_API_NEGOTIATE 0x08 /* negotiate API version */ +/* mailbox API, version 1.1 VF requests */ +#define IXGBE_VF_GET_QUEUE 0x09 /* get queue configuration */ + +/* GET_QUEUES return data indices within the mailbox */ +#define IXGBE_VF_TX_QUEUES 1 /* number of Tx queues supported */ +#define IXGBE_VF_RX_QUEUES 2 /* number of Rx queues supported */ +#define IXGBE_VF_TRANS_VLAN 3 /* Indication of port vlan */ +#define IXGBE_VF_DEF_QUEUE 4 /* Default queue offset */ + /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 /* word in permanent address message with the current multicast type */ diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 0c7447e6fcc..0c94557b53d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -331,6 +331,9 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw, netdev_for_each_mc_addr(ha, netdev) { if (i == cnt) break; + if (is_link_local_ether_addr(ha->addr)) + continue; + vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr); } @@ -513,6 +516,64 @@ int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api) return err; } +int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, + unsigned int *default_tc) +{ + int err; + u32 msg[5]; + + /* do nothing if API doesn't support ixgbevf_get_queues */ + switch (hw->api_version) { + case ixgbe_mbox_api_11: + break; + default: + return 0; + } + + /* Fetch queue configuration from the PF */ + msg[0] = IXGBE_VF_GET_QUEUE; + msg[1] = msg[2] = msg[3] = msg[4] = 0; + err = hw->mbx.ops.write_posted(hw, msg, 5); + + if (!err) + err = hw->mbx.ops.read_posted(hw, msg, 5); + + if (!err) { + msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; + + /* + * if we we didn't get an ACK there must have been + * some sort of mailbox error so we should treat it + * as such + */ + if (msg[0] != (IXGBE_VF_GET_QUEUE | IXGBE_VT_MSGTYPE_ACK)) + return IXGBE_ERR_MBX; + + /* record and validate values from message */ + hw->mac.max_tx_queues = msg[IXGBE_VF_TX_QUEUES]; + if (hw->mac.max_tx_queues == 0 || + hw->mac.max_tx_queues > IXGBE_VF_MAX_TX_QUEUES) + hw->mac.max_tx_queues = IXGBE_VF_MAX_TX_QUEUES; + + hw->mac.max_rx_queues = msg[IXGBE_VF_RX_QUEUES]; + if (hw->mac.max_rx_queues == 0 || + hw->mac.max_rx_queues > IXGBE_VF_MAX_RX_QUEUES) + hw->mac.max_rx_queues = IXGBE_VF_MAX_RX_QUEUES; + + *num_tcs = msg[IXGBE_VF_TRANS_VLAN]; + /* in case of unknown state assume we cannot tag frames */ + if (*num_tcs > hw->mac.max_rx_queues) + *num_tcs = 1; + + *default_tc = msg[IXGBE_VF_DEF_QUEUE]; + /* default to queue 0 on out-of-bounds queue number */ + if (*default_tc >= hw->mac.max_tx_queues) + *default_tc = 0; + } + + return err; +} + static const struct ixgbe_mac_operations ixgbevf_mac_ops = { .init_hw = ixgbevf_init_hw_vf, .reset_hw = ixgbevf_reset_hw_vf, diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 47f11a584d8..7b1f502d171 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -174,5 +174,7 @@ struct ixgbevf_info { void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size); int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api); +int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, + unsigned int *default_tc); #endif /* __IXGBE_VF_H__ */ diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 59489722e89..10d678d3dd0 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1131,7 +1131,7 @@ static int pxa168_eth_open(struct net_device *dev) err = request_irq(dev->irq, pxa168_eth_int_handler, IRQF_DISABLED, dev->name, dev); if (err) { - dev_printk(KERN_ERR, &dev->dev, "can't assign irq\n"); + dev_err(&dev->dev, "can't assign irq\n"); return -EAGAIN; } pep->rx_resource_err = 0; @@ -1201,9 +1201,8 @@ static int pxa168_eth_change_mtu(struct net_device *dev, int mtu) */ pxa168_eth_stop(dev); if (pxa168_eth_open(dev)) { - dev_printk(KERN_ERR, &dev->dev, - "fatal error on re-opening device after " - "MTU change\n"); + dev_err(&dev->dev, + "fatal error on re-opening device after MTU change\n"); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index edd9cb8d3e1..2b23ca21b32 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -870,7 +870,7 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) /* If we haven't received a specific coalescing setting * (module param), we set the moderation parameters as follows: * - moder_cnt is set to the number of mtu sized packets to - * satisfy our coelsing target. + * satisfy our coalescing target. * - moder_time is set to a fixed value. */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9d27e42264e..8a5e70d6889 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -126,7 +126,7 @@ enum { #define MLX4_EN_RX_COAL_TIME 0x10 #define MLX4_EN_TX_COAL_PKTS 16 -#define MLX4_EN_TX_COAL_TIME 0x80 +#define MLX4_EN_TX_COAL_TIME 0x10 #define MLX4_EN_RX_RATE_LOW 400000 #define MLX4_EN_RX_COAL_TIME_LOW 0 diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index e558edd1cb6..e4ba868e232 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -7251,18 +7251,7 @@ static struct pci_driver pci_device_driver = { .remove = pcidev_exit }; -static int __init ksz884x_init_module(void) -{ - return pci_register_driver(&pci_device_driver); -} - -static void __exit ksz884x_cleanup_module(void) -{ - pci_unregister_driver(&pci_device_driver); -} - -module_init(ksz884x_init_module); -module_exit(ksz884x_cleanup_module); +module_pci_driver(pci_device_driver); MODULE_DESCRIPTION("KSZ8841/2 PCI network driver"); MODULE_AUTHOR("Tristram Ha <Tristram.Ha@micrel.com>"); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index de50547c187..c98decb19ce 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -8239,7 +8239,8 @@ static int __init s2io_starter(void) /** * s2io_closer - Cleanup routine for the driver - * Description: This function is the cleanup routine for the driver. It unregist * ers the driver. + * Description: This function is the cleanup routine for the driver. It + * unregisters the driver. */ static __exit void s2io_closer(void) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig index 5296cc8d3cb..00bc4fc968c 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig +++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig @@ -20,19 +20,3 @@ config PCH_GBE purpose use. ML7223/ML7831 is companion chip for Intel Atom E6xx series. ML7223/ML7831 is completely compatible for Intel EG20T PCH. - -if PCH_GBE - -config PCH_PTP - bool "PCH PTP clock support" - default n - depends on EXPERIMENTAL - select PPS - select PTP_1588_CLOCK - select PTP_1588_CLOCK_PCH - ---help--- - Say Y here if you want to use Precision Time Protocol (PTP) in the - driver. PTP is a method to precisely synchronize distributed clocks - over Ethernet networks. - -endif # PCH_GBE diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h index b07311eaa69..7fb7e178c74 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h @@ -649,7 +649,6 @@ extern void pch_gbe_free_tx_resources(struct pch_gbe_adapter *adapter, extern void pch_gbe_free_rx_resources(struct pch_gbe_adapter *adapter, struct pch_gbe_rx_ring *rx_ring); extern void pch_gbe_update_stats(struct pch_gbe_adapter *adapter); -#ifdef CONFIG_PCH_PTP extern u32 pch_ch_control_read(struct pci_dev *pdev); extern void pch_ch_control_write(struct pci_dev *pdev, u32 val); extern u32 pch_ch_event_read(struct pci_dev *pdev); @@ -659,7 +658,6 @@ extern u32 pch_src_uuid_hi_read(struct pci_dev *pdev); extern u64 pch_rx_snap_read(struct pci_dev *pdev); extern u64 pch_tx_snap_read(struct pci_dev *pdev); extern int pch_set_station_address(u8 *addr, struct pci_dev *pdev); -#endif /* pch_gbe_param.c */ extern void pch_gbe_check_options(struct pch_gbe_adapter *adapter); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 4c4fe5b1a29..39ab4d09faa 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -21,10 +21,8 @@ #include "pch_gbe.h" #include "pch_gbe_api.h" #include <linux/module.h> -#ifdef CONFIG_PCH_PTP #include <linux/net_tstamp.h> #include <linux/ptp_classify.h> -#endif #define DRV_VERSION "1.01" const char pch_driver_version[] = DRV_VERSION; @@ -98,7 +96,6 @@ const char pch_driver_version[] = DRV_VERSION; #define PCH_GBE_INT_DISABLE_ALL 0 -#ifdef CONFIG_PCH_PTP /* Macros for ieee1588 */ /* 0x40 Time Synchronization Channel Control Register Bits */ #define MASTER_MODE (1<<0) @@ -113,7 +110,6 @@ const char pch_driver_version[] = DRV_VERSION; #define PTP_L4_MULTICAST_SA "01:00:5e:00:01:81" #define PTP_L2_MULTICAST_SA "01:1b:19:00:00:00" -#endif static unsigned int copybreak __read_mostly = PCH_GBE_COPYBREAK_DEFAULT; @@ -122,7 +118,6 @@ static void pch_gbe_mdio_write(struct net_device *netdev, int addr, int reg, int data); static void pch_gbe_set_multi(struct net_device *netdev); -#ifdef CONFIG_PCH_PTP static struct sock_filter ptp_filter[] = { PTP_FILTER }; @@ -291,7 +286,6 @@ static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } -#endif inline void pch_gbe_mac_load_mac_addr(struct pch_gbe_hw *hw) { @@ -1244,9 +1238,7 @@ static void pch_gbe_tx_queue(struct pch_gbe_adapter *adapter, (int)sizeof(struct pch_gbe_tx_desc) * ring_num, &hw->reg->TX_DSC_SW_P); -#ifdef CONFIG_PCH_PTP pch_tx_timestamp(adapter, skb); -#endif dev_kfree_skb_any(skb); } @@ -1730,9 +1722,7 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter, /* Write meta date of skb */ skb_put(skb, length); -#ifdef CONFIG_PCH_PTP pch_rx_timestamp(adapter, skb); -#endif skb->protocol = eth_type_trans(skb, netdev); if (tcp_ip_status & PCH_GBE_RXD_ACC_STAT_TCPIPOK) @@ -2334,10 +2324,8 @@ static int pch_gbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) pr_debug("cmd : 0x%04x\n", cmd); -#ifdef CONFIG_PCH_PTP if (cmd == SIOCSHWTSTAMP) return hwtstamp_ioctl(netdev, ifr, cmd); -#endif return generic_mii_ioctl(&adapter->mii, if_mii(ifr), cmd, NULL); } @@ -2623,14 +2611,12 @@ static int pch_gbe_probe(struct pci_dev *pdev, goto err_free_netdev; } -#ifdef CONFIG_PCH_PTP adapter->ptp_pdev = pci_get_bus_and_slot(adapter->pdev->bus->number, PCI_DEVFN(12, 4)); if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) { pr_err("Bad ptp filter\n"); return -EINVAL; } -#endif netdev->netdev_ops = &pch_gbe_netdev_ops; netdev->watchdog_timeo = PCH_GBE_WATCHDOG_PERIOD; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c index 10468e7932d..4ca2c196c98 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c @@ -218,7 +218,7 @@ skip: check_sfp_module = netif_running(dev) && adapter->has_link_events; } else { - ecmd->supported |= (SUPPORTED_TP |SUPPORTED_Autoneg); + ecmd->supported |= (SUPPORTED_TP | SUPPORTED_Autoneg); ecmd->advertising |= (ADVERTISED_TP | ADVERTISED_Autoneg); ecmd->port = PORT_TP; @@ -381,7 +381,7 @@ static u32 netxen_nic_test_link(struct net_device *dev) static int netxen_nic_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, - u8 * bytes) + u8 *bytes) { struct netxen_adapter *adapter = netdev_priv(dev); int offset; @@ -488,6 +488,8 @@ netxen_nic_get_pauseparam(struct net_device *dev, __u32 val; int port = adapter->physical_port; + pause->autoneg = 0; + if (adapter->ahw.port_type == NETXEN_NIC_GBE) { if ((port < 0) || (port >= NETXEN_NIU_MAX_GBE_PORTS)) return; @@ -496,19 +498,19 @@ netxen_nic_get_pauseparam(struct net_device *dev, pause->rx_pause = netxen_gb_get_rx_flowctl(val); val = NXRD32(adapter, NETXEN_NIU_GB_PAUSE_CTL); switch (port) { - case 0: - pause->tx_pause = !(netxen_gb_get_gb0_mask(val)); - break; - case 1: - pause->tx_pause = !(netxen_gb_get_gb1_mask(val)); - break; - case 2: - pause->tx_pause = !(netxen_gb_get_gb2_mask(val)); - break; - case 3: - default: - pause->tx_pause = !(netxen_gb_get_gb3_mask(val)); - break; + case 0: + pause->tx_pause = !(netxen_gb_get_gb0_mask(val)); + break; + case 1: + pause->tx_pause = !(netxen_gb_get_gb1_mask(val)); + break; + case 2: + pause->tx_pause = !(netxen_gb_get_gb2_mask(val)); + break; + case 3: + default: + pause->tx_pause = !(netxen_gb_get_gb3_mask(val)); + break; } } else if (adapter->ahw.port_type == NETXEN_NIC_XGBE) { if ((port < 0) || (port >= NETXEN_NIU_MAX_XG_PORTS)) @@ -532,6 +534,11 @@ netxen_nic_set_pauseparam(struct net_device *dev, struct netxen_adapter *adapter = netdev_priv(dev); __u32 val; int port = adapter->physical_port; + + /* not supported */ + if (pause->autoneg) + return -EINVAL; + /* read mode */ if (adapter->ahw.port_type == NETXEN_NIC_GBE) { if ((port < 0) || (port >= NETXEN_NIU_MAX_GBE_PORTS)) @@ -549,31 +556,31 @@ netxen_nic_set_pauseparam(struct net_device *dev, /* set autoneg */ val = NXRD32(adapter, NETXEN_NIU_GB_PAUSE_CTL); switch (port) { - case 0: - if (pause->tx_pause) - netxen_gb_unset_gb0_mask(val); - else - netxen_gb_set_gb0_mask(val); - break; - case 1: - if (pause->tx_pause) - netxen_gb_unset_gb1_mask(val); - else - netxen_gb_set_gb1_mask(val); - break; - case 2: - if (pause->tx_pause) - netxen_gb_unset_gb2_mask(val); - else - netxen_gb_set_gb2_mask(val); - break; - case 3: - default: - if (pause->tx_pause) - netxen_gb_unset_gb3_mask(val); - else - netxen_gb_set_gb3_mask(val); - break; + case 0: + if (pause->tx_pause) + netxen_gb_unset_gb0_mask(val); + else + netxen_gb_set_gb0_mask(val); + break; + case 1: + if (pause->tx_pause) + netxen_gb_unset_gb1_mask(val); + else + netxen_gb_set_gb1_mask(val); + break; + case 2: + if (pause->tx_pause) + netxen_gb_unset_gb2_mask(val); + else + netxen_gb_set_gb2_mask(val); + break; + case 3: + default: + if (pause->tx_pause) + netxen_gb_unset_gb3_mask(val); + else + netxen_gb_set_gb3_mask(val); + break; } NXWR32(adapter, NETXEN_NIU_GB_PAUSE_CTL, val); } else if (adapter->ahw.port_type == NETXEN_NIC_XGBE) { @@ -636,7 +643,7 @@ static int netxen_get_sset_count(struct net_device *dev, int sset) static void netxen_nic_diag_test(struct net_device *dev, struct ethtool_test *eth_test, - u64 * data) + u64 *data) { memset(data, 0, sizeof(uint64_t) * NETXEN_NIC_TEST_LEN); if ((data[0] = netxen_nic_reg_test(dev))) @@ -647,7 +654,7 @@ netxen_nic_diag_test(struct net_device *dev, struct ethtool_test *eth_test, } static void -netxen_nic_get_strings(struct net_device *dev, u32 stringset, u8 * data) +netxen_nic_get_strings(struct net_device *dev, u32 stringset, u8 *data) { int index; @@ -668,7 +675,7 @@ netxen_nic_get_strings(struct net_device *dev, u32 stringset, u8 * data) static void netxen_nic_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 * data) + struct ethtool_stats *stats, u64 *data) { struct netxen_adapter *adapter = netdev_priv(dev); int index; diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 6407d0d77e8..12d1f2470d5 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -1920,7 +1920,6 @@ static void ql_process_mac_tx_intr(struct ql3_adapter *qdev, { struct ql_tx_buf_cb *tx_cb; int i; - int retval = 0; if (mac_rsp->flags & OB_MAC_IOCB_RSP_S) { netdev_warn(qdev->ndev, @@ -1935,7 +1934,6 @@ static void ql_process_mac_tx_intr(struct ql3_adapter *qdev, "Frame too short to be legal, frame not sent\n"); qdev->ndev->stats.tx_errors++; - retval = -EIO; goto frame_not_sent; } @@ -1944,7 +1942,6 @@ static void ql_process_mac_tx_intr(struct ql3_adapter *qdev, mac_rsp->transaction_id); qdev->ndev->stats.tx_errors++; - retval = -EIO; goto invalid_seg_count; } @@ -3958,15 +3955,4 @@ static struct pci_driver ql3xxx_driver = { .remove = __devexit_p(ql3xxx_remove), }; -static int __init ql3xxx_init_module(void) -{ - return pci_register_driver(&ql3xxx_driver); -} - -static void __exit ql3xxx_exit(void) -{ - pci_unregister_driver(&ql3xxx_driver); -} - -module_init(ql3xxx_init_module); -module_exit(ql3xxx_exit); +module_pci_driver(ql3xxx_driver); diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c index 58185b604b7..10093f0c4c0 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_dbg.c @@ -86,7 +86,7 @@ exit: } /* Read out the SERDES registers */ -static int ql_read_serdes_reg(struct ql_adapter *qdev, u32 reg, u32 * data) +static int ql_read_serdes_reg(struct ql_adapter *qdev, u32 reg, u32 *data) { int status; @@ -364,7 +364,7 @@ exit: /* Read the 400 xgmac control/statistics registers * skipping unused locations. */ -static int ql_get_xgmac_regs(struct ql_adapter *qdev, u32 * buf, +static int ql_get_xgmac_regs(struct ql_adapter *qdev, u32 *buf, unsigned int other_function) { int status = 0; @@ -405,7 +405,7 @@ static int ql_get_xgmac_regs(struct ql_adapter *qdev, u32 * buf, return status; } -static int ql_get_ets_regs(struct ql_adapter *qdev, u32 * buf) +static int ql_get_ets_regs(struct ql_adapter *qdev, u32 *buf) { int status = 0; int i; @@ -423,7 +423,7 @@ static int ql_get_ets_regs(struct ql_adapter *qdev, u32 * buf) return status; } -static void ql_get_intr_states(struct ql_adapter *qdev, u32 * buf) +static void ql_get_intr_states(struct ql_adapter *qdev, u32 *buf) { int i; @@ -434,7 +434,7 @@ static void ql_get_intr_states(struct ql_adapter *qdev, u32 * buf) } } -static int ql_get_cam_entries(struct ql_adapter *qdev, u32 * buf) +static int ql_get_cam_entries(struct ql_adapter *qdev, u32 *buf) { int i, status; u32 value[3]; @@ -471,7 +471,7 @@ err: return status; } -static int ql_get_routing_entries(struct ql_adapter *qdev, u32 * buf) +static int ql_get_routing_entries(struct ql_adapter *qdev, u32 *buf) { int status; u32 value, i; @@ -496,7 +496,7 @@ err: } /* Read the MPI Processor shadow registers */ -static int ql_get_mpi_shadow_regs(struct ql_adapter *qdev, u32 * buf) +static int ql_get_mpi_shadow_regs(struct ql_adapter *qdev, u32 *buf) { u32 i; int status; @@ -515,7 +515,7 @@ end: } /* Read the MPI Processor core registers */ -static int ql_get_mpi_regs(struct ql_adapter *qdev, u32 * buf, +static int ql_get_mpi_regs(struct ql_adapter *qdev, u32 *buf, u32 offset, u32 count) { int i, status = 0; diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index e02f04d7f3a..9f2d416de75 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -175,8 +175,7 @@ struct net_local { unsigned int tx_unit_busy:1; unsigned char re_tx, /* Number of packet retransmissions. */ addr_mode, /* Current Rx filter e.g. promiscuous, etc. */ - pac_cnt_in_tx_buf, - chip_type; + pac_cnt_in_tx_buf; }; /* This code, written by wwc@super.org, resets the adapter every @@ -339,7 +338,6 @@ static int __init atp_probe1(long ioaddr) write_reg_high(ioaddr, CMR1, CMR1h_RESET | CMR1h_MUX); lp = netdev_priv(dev); - lp->chip_type = RTL8002; lp->addr_mode = CMR2h_Normal; spin_lock_init(&lp->lock); @@ -852,7 +850,7 @@ net_close(struct net_device *dev) * Set or clear the multicast filter for this adapter. */ -static void set_rx_mode_8002(struct net_device *dev) +static void set_rx_mode(struct net_device *dev) { struct net_local *lp = netdev_priv(dev); long ioaddr = dev->base_addr; @@ -864,58 +862,6 @@ static void set_rx_mode_8002(struct net_device *dev) write_reg_high(ioaddr, CMR2, lp->addr_mode); } -static void set_rx_mode_8012(struct net_device *dev) -{ - struct net_local *lp = netdev_priv(dev); - long ioaddr = dev->base_addr; - unsigned char new_mode, mc_filter[8]; /* Multicast hash filter */ - int i; - - if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */ - new_mode = CMR2h_PROMISC; - } else if ((netdev_mc_count(dev) > 1000) || - (dev->flags & IFF_ALLMULTI)) { - /* Too many to filter perfectly -- accept all multicasts. */ - memset(mc_filter, 0xff, sizeof(mc_filter)); - new_mode = CMR2h_Normal; - } else { - struct netdev_hw_addr *ha; - - memset(mc_filter, 0, sizeof(mc_filter)); - netdev_for_each_mc_addr(ha, dev) { - int filterbit = ether_crc_le(ETH_ALEN, ha->addr) & 0x3f; - mc_filter[filterbit >> 5] |= 1 << (filterbit & 31); - } - new_mode = CMR2h_Normal; - } - lp->addr_mode = new_mode; - write_reg(ioaddr, CMR2, CMR2_IRQOUT | 0x04); /* Switch to page 1. */ - for (i = 0; i < 8; i++) - write_reg_byte(ioaddr, i, mc_filter[i]); - if (net_debug > 2 || 1) { - lp->addr_mode = 1; - printk(KERN_DEBUG "%s: Mode %d, setting multicast filter to", - dev->name, lp->addr_mode); - for (i = 0; i < 8; i++) - printk(" %2.2x", mc_filter[i]); - printk(".\n"); - } - - write_reg_high(ioaddr, CMR2, lp->addr_mode); - write_reg(ioaddr, CMR2, CMR2_IRQOUT); /* Switch back to page 0 */ -} - -static void set_rx_mode(struct net_device *dev) -{ - struct net_local *lp = netdev_priv(dev); - - if (lp->chip_type == RTL8002) - return set_rx_mode_8002(dev); - else - return set_rx_mode_8012(dev); -} - - static int __init atp_init_module(void) { if (debug) /* Emit version even if no cards detected. */ printk(KERN_INFO "%s", version); diff --git a/drivers/net/ethernet/realtek/atp.h b/drivers/net/ethernet/realtek/atp.h index 0edc642c2c2..040b1373994 100644 --- a/drivers/net/ethernet/realtek/atp.h +++ b/drivers/net/ethernet/realtek/atp.h @@ -16,8 +16,6 @@ struct rx_header { #define PAR_STATUS 1 #define PAR_CONTROL 2 -enum chip_type { RTL8002, RTL8012 }; - #define Ctrl_LNibRead 0x08 /* LP_PSELECP */ #define Ctrl_HNibRead 0 #define Ctrl_LNibWrite 0x08 /* LP_PSELECP */ diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 927aa33d434..50a55fb1036 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -78,7 +78,6 @@ static const int multicast_filter_limit = 32; #define MAX_READ_REQUEST_SHIFT 12 #define TX_DMA_BURST 7 /* Maximum PCI burst, '7' is unlimited */ -#define SafeMtu 0x1c20 /* ... actually life sucks beyond ~7k */ #define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */ #define R8169_REGS_SIZE 256 @@ -456,6 +455,7 @@ enum rtl8168_registers { #define PWM_EN (1 << 22) #define RXDV_GATED_EN (1 << 19) #define EARLY_TALLY_EN (1 << 16) +#define FORCE_CLK (1 << 15) /* force clock request */ }; enum rtl_register_content { @@ -519,6 +519,7 @@ enum rtl_register_content { PMEnable = (1 << 0), /* Power Management Enable */ /* Config2 register p. 25 */ + ClkReqEn = (1 << 7), /* Clock Request Enable */ MSIEnable = (1 << 5), /* 8169 only. Reserved in the 8168. */ PCI_Clock_66MHz = 0x01, PCI_Clock_33MHz = 0x00, @@ -539,6 +540,7 @@ enum rtl_register_content { Spi_en = (1 << 3), LanWake = (1 << 1), /* LanWake enable/disable */ PMEStatus = (1 << 0), /* PME status can be reset by PCI RST# */ + ASPM_en = (1 << 0), /* ASPM enable */ /* TBICSR p.28 */ TBIReset = 0x80000000, @@ -687,6 +689,7 @@ enum features { RTL_FEATURE_WOL = (1 << 0), RTL_FEATURE_MSI = (1 << 1), RTL_FEATURE_GMII = (1 << 2), + RTL_FEATURE_FW_LOADED = (1 << 3), }; struct rtl8169_counters { @@ -2394,8 +2397,10 @@ static void rtl_apply_firmware(struct rtl8169_private *tp) struct rtl_fw *rtl_fw = tp->rtl_fw; /* TODO: release firmware once rtl_phy_write_fw signals failures. */ - if (!IS_ERR_OR_NULL(rtl_fw)) + if (!IS_ERR_OR_NULL(rtl_fw)) { rtl_phy_write_fw(tp, rtl_fw); + tp->features |= RTL_FEATURE_FW_LOADED; + } } static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val) @@ -2406,6 +2411,31 @@ static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val) rtl_apply_firmware(tp); } +static void r810x_aldps_disable(struct rtl8169_private *tp) +{ + rtl_writephy(tp, 0x1f, 0x0000); + rtl_writephy(tp, 0x18, 0x0310); + msleep(100); +} + +static void r810x_aldps_enable(struct rtl8169_private *tp) +{ + if (!(tp->features & RTL_FEATURE_FW_LOADED)) + return; + + rtl_writephy(tp, 0x1f, 0x0000); + rtl_writephy(tp, 0x18, 0x8310); +} + +static void r8168_aldps_enable_1(struct rtl8169_private *tp) +{ + if (!(tp->features & RTL_FEATURE_FW_LOADED)) + return; + + rtl_writephy(tp, 0x1f, 0x0000); + rtl_w1w0_phy(tp, 0x15, 0x1000, 0x0000); +} + static void rtl8169s_hw_phy_config(struct rtl8169_private *tp) { static const struct phy_reg phy_reg_init[] = { @@ -3178,6 +3208,8 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp) rtl_w1w0_phy(tp, 0x19, 0x0000, 0x0001); rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400); rtl_writephy(tp, 0x1f, 0x0000); + + r8168_aldps_enable_1(tp); } static void rtl8168f_hw_phy_config(struct rtl8169_private *tp) @@ -3250,6 +3282,8 @@ static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x05, 0x8b85); rtl_w1w0_phy(tp, 0x06, 0x4000, 0x0000); rtl_writephy(tp, 0x1f, 0x0000); + + r8168_aldps_enable_1(tp); } static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp) @@ -3257,6 +3291,8 @@ static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp) rtl_apply_firmware(tp); rtl8168f_hw_phy_config(tp); + + r8168_aldps_enable_1(tp); } static void rtl8411_hw_phy_config(struct rtl8169_private *tp) @@ -3354,6 +3390,8 @@ static void rtl8411_hw_phy_config(struct rtl8169_private *tp) rtl_w1w0_phy(tp, 0x19, 0x0000, 0x0001); rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400); rtl_writephy(tp, 0x1f, 0x0000); + + r8168_aldps_enable_1(tp); } static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) @@ -3439,21 +3477,19 @@ static void rtl8105e_hw_phy_config(struct rtl8169_private *tp) }; /* Disable ALDPS before ram code */ - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, 0x18, 0x0310); - msleep(100); + r810x_aldps_disable(tp); rtl_apply_firmware(tp); rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init)); + + r810x_aldps_enable(tp); } static void rtl8402_hw_phy_config(struct rtl8169_private *tp) { /* Disable ALDPS before setting firmware */ - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, 0x18, 0x0310); - msleep(20); + r810x_aldps_disable(tp); rtl_apply_firmware(tp); @@ -3463,6 +3499,8 @@ static void rtl8402_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x10, 0x401f); rtl_writephy(tp, 0x19, 0x7030); rtl_writephy(tp, 0x1f, 0x0000); + + r810x_aldps_enable(tp); } static void rtl8106e_hw_phy_config(struct rtl8169_private *tp) @@ -3475,9 +3513,7 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp) }; /* Disable ALDPS before ram code */ - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, 0x18, 0x0310); - msleep(100); + r810x_aldps_disable(tp); rtl_apply_firmware(tp); @@ -3485,6 +3521,8 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp) rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init)); rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); + + r810x_aldps_enable(tp); } static void rtl_hw_phy_config(struct net_device *dev) @@ -5015,8 +5053,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) RTL_W8(MaxTxPacketSize, EarlySize); - rtl_disable_clock_request(pdev); - RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB); @@ -5025,7 +5061,8 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); RTL_W32(MISC, RTL_R32(MISC) | PWM_EN); - RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en); + RTL_W8(Config5, (RTL_R8(Config5) & ~Spi_en) | ASPM_en); + RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); } static void rtl_hw_start_8168f(struct rtl8169_private *tp) @@ -5050,13 +5087,12 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) RTL_W8(MaxTxPacketSize, EarlySize); - rtl_disable_clock_request(pdev); - RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB); RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); - RTL_W32(MISC, RTL_R32(MISC) | PWM_EN); - RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en); + RTL_W32(MISC, RTL_R32(MISC) | PWM_EN | FORCE_CLK); + RTL_W8(Config5, (RTL_R8(Config5) & ~Spi_en) | ASPM_en); + RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); } static void rtl_hw_start_8168f_1(struct rtl8169_private *tp) @@ -5113,8 +5149,10 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) rtl_w1w0_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC); RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); - RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN); + RTL_W32(MISC, (RTL_R32(MISC) | FORCE_CLK) & ~RXDV_GATED_EN); RTL_W8(MaxTxPacketSize, EarlySize); + RTL_W8(Config5, RTL_R8(Config5) | ASPM_en); + RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); @@ -5330,6 +5368,9 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp) RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); + RTL_W8(Config5, RTL_R8(Config5) | ASPM_en); + RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); + RTL_W32(MISC, RTL_R32(MISC) | FORCE_CLK); rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1)); } @@ -5355,6 +5396,9 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp) RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB); + RTL_W8(Config5, RTL_R8(Config5) | ASPM_en); + RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); + RTL_W32(MISC, RTL_R32(MISC) | FORCE_CLK); rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402)); @@ -5376,7 +5420,10 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) /* Force LAN exit from ASPM if Rx/Tx are not idle */ RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800); - RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN); + RTL_W32(MISC, + (RTL_R32(MISC) | DISABLE_LAN_EN | FORCE_CLK) & ~EARLY_TALLY_EN); + RTL_W8(Config5, RTL_R8(Config5) | ASPM_en); + RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN); } @@ -6992,15 +7039,4 @@ static struct pci_driver rtl8169_pci_driver = { .driver.pm = RTL8169_PM_OPS, }; -static int __init rtl8169_init_module(void) -{ - return pci_register_driver(&rtl8169_pci_driver); -} - -static void __exit rtl8169_cleanup_module(void) -{ - pci_unregister_driver(&rtl8169_pci_driver); -} - -module_init(rtl8169_init_module); -module_exit(rtl8169_cleanup_module); +module_pci_driver(rtl8169_pci_driver); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index c8bfea0524d..3d705862bd7 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2286,7 +2286,7 @@ static int sh_mdio_init(struct net_device *ndev, int id, for (i = 0; i < PHY_MAX_ADDR; i++) mdp->mii_bus->irq[i] = PHY_POLL; - /* regist mdio bus */ + /* register mdio bus */ ret = mdiobus_register(mdp->mii_bus); if (ret) goto out_free_irq; diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 25906c1d1b1..3ab2c4289a4 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -5,6 +5,7 @@ config SFC select CRC32 select I2C select I2C_ALGOBIT + select PTP_1588_CLOCK ---help--- This driver supports 10-gigabit Ethernet cards based on the Solarflare SFC4000 and SFC9000-family controllers. @@ -34,10 +35,3 @@ config SFC_SRIOV This enables support for the SFC9000 I/O Virtualization features, allowing accelerated network performance in virtualized environments. -config SFC_PTP - bool "Solarflare SFC9000-family PTP support" - depends on SFC && PTP_1588_CLOCK && !(SFC=y && PTP_1588_CLOCK=m) - default y - ---help--- - This enables support for the Precision Time Protocol (PTP) - on SFC9000-family NICs diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index e11f2ecf69d..945bf06e69e 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -2,9 +2,8 @@ sfc-y += efx.o nic.o falcon.o siena.o tx.o rx.o filter.o \ falcon_xmac.o mcdi_mac.o \ selftest.o ethtool.o qt202x_phy.o mdio_10g.o \ tenxpress.o txc43128_phy.o falcon_boards.o \ - mcdi.o mcdi_phy.o mcdi_mon.o + mcdi.o mcdi_phy.o mcdi_mon.o ptp.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o -sfc-$(CONFIG_SFC_PTP) += ptp.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 576a3109116..2487f582ab0 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -868,9 +868,7 @@ struct efx_nic { struct work_struct peer_work; #endif -#ifdef CONFIG_SFC_PTP struct efx_ptp_data *ptp_data; -#endif /* The following fields may be written more often */ diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 438cef11f72..7a9647a3c56 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -252,7 +252,6 @@ extern int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf, bool spoofchk); struct ethtool_ts_info; -#ifdef CONFIG_SFC_PTP extern void efx_ptp_probe(struct efx_nic *efx); extern int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd); extern int efx_ptp_get_ts_info(struct net_device *net_dev, @@ -260,31 +259,6 @@ extern int efx_ptp_get_ts_info(struct net_device *net_dev, extern bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); extern int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); extern void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev); -#else -static inline void efx_ptp_probe(struct efx_nic *efx) {} -static inline int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd) -{ - return -EOPNOTSUPP; -} -static inline int efx_ptp_get_ts_info(struct net_device *net_dev, - struct ethtool_ts_info *ts_info) -{ - ts_info->so_timestamping = (SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_RX_SOFTWARE); - ts_info->phc_index = -1; - - return 0; -} -static inline bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) -{ - return false; -} -static inline int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) -{ - return NETDEV_TX_OK; -} -static inline void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) {} -#endif extern const struct efx_nic_type falcon_a1_nic_type; extern const struct efx_nic_type falcon_b0_nic_type; diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 2c41894d547..48fcb5e3bd3 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -60,6 +60,14 @@ config TI_CPSW To compile this driver as a module, choose M here: the module will be called cpsw. +config TI_CPTS + boolean "TI Common Platform Time Sync (CPTS) Support" + select PTP_1588_CLOCK + ---help--- + This driver supports the Common Platform Time Sync unit of + the CPSW Ethernet Switch. The unit can time stamp PTP UDP/IPv4 + and Layer 2 packets, and the driver offers a PTP Hardware Clock. + config TLAN tristate "TI ThunderLAN support" depends on (PCI || EISA) diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index 91bd8bba78f..c65148e8aa1 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_TI_DAVINCI_EMAC) += davinci_emac.o obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o obj-$(CONFIG_TI_CPSW) += ti_cpsw.o -ti_cpsw-y := cpsw_ale.o cpsw.o +ti_cpsw-y := cpsw_ale.o cpsw.o cpts.o diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index df55e240374..7654a62ab75 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -24,6 +24,7 @@ #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/netdevice.h> +#include <linux/net_tstamp.h> #include <linux/phy.h> #include <linux/workqueue.h> #include <linux/delay.h> @@ -35,6 +36,7 @@ #include <linux/platform_data/cpsw.h> #include "cpsw_ale.h" +#include "cpts.h" #include "davinci_cpdma.h" #define CPSW_DEBUG (NETIF_MSG_HW | NETIF_MSG_WOL | \ @@ -70,10 +72,14 @@ do { \ dev_notice(priv->dev, format, ## __VA_ARGS__); \ } while (0) +#define ALE_ALL_PORTS 0x7 + #define CPSW_MAJOR_VERSION(reg) (reg >> 8 & 0x7) #define CPSW_MINOR_VERSION(reg) (reg & 0xff) #define CPSW_RTL_VERSION(reg) ((reg >> 11) & 0x1f) +#define CPSW_VERSION_1 0x19010a +#define CPSW_VERSION_2 0x19010c #define CPDMA_RXTHRESH 0x0c0 #define CPDMA_RXFREE 0x0e0 #define CPDMA_TXHDP 0x00 @@ -129,7 +135,7 @@ static int rx_packet_max = CPSW_MAX_PACKET_SIZE; module_param(rx_packet_max, int, 0); MODULE_PARM_DESC(rx_packet_max, "maximum receive packet size (bytes)"); -struct cpsw_ss_regs { +struct cpsw_wr_regs { u32 id_ver; u32 soft_reset; u32 control; @@ -140,26 +146,98 @@ struct cpsw_ss_regs { u32 misc_en; }; -struct cpsw_regs { +struct cpsw_ss_regs { u32 id_ver; u32 control; u32 soft_reset; u32 stat_port_en; u32 ptype; + u32 soft_idle; + u32 thru_rate; + u32 gap_thresh; + u32 tx_start_wds; + u32 flow_control; + u32 vlan_ltype; + u32 ts_ltype; + u32 dlr_ltype; }; -struct cpsw_slave_regs { - u32 max_blks; - u32 blk_cnt; - u32 flow_thresh; - u32 port_vlan; - u32 tx_pri_map; - u32 ts_ctl; - u32 ts_seq_ltype; - u32 ts_vlan; - u32 sa_lo; - u32 sa_hi; -}; +/* CPSW_PORT_V1 */ +#define CPSW1_MAX_BLKS 0x00 /* Maximum FIFO Blocks */ +#define CPSW1_BLK_CNT 0x04 /* FIFO Block Usage Count (Read Only) */ +#define CPSW1_TX_IN_CTL 0x08 /* Transmit FIFO Control */ +#define CPSW1_PORT_VLAN 0x0c /* VLAN Register */ +#define CPSW1_TX_PRI_MAP 0x10 /* Tx Header Priority to Switch Pri Mapping */ +#define CPSW1_TS_CTL 0x14 /* Time Sync Control */ +#define CPSW1_TS_SEQ_LTYPE 0x18 /* Time Sync Sequence ID Offset and Msg Type */ +#define CPSW1_TS_VLAN 0x1c /* Time Sync VLAN1 and VLAN2 */ + +/* CPSW_PORT_V2 */ +#define CPSW2_CONTROL 0x00 /* Control Register */ +#define CPSW2_MAX_BLKS 0x08 /* Maximum FIFO Blocks */ +#define CPSW2_BLK_CNT 0x0c /* FIFO Block Usage Count (Read Only) */ +#define CPSW2_TX_IN_CTL 0x10 /* Transmit FIFO Control */ +#define CPSW2_PORT_VLAN 0x14 /* VLAN Register */ +#define CPSW2_TX_PRI_MAP 0x18 /* Tx Header Priority to Switch Pri Mapping */ +#define CPSW2_TS_SEQ_MTYPE 0x1c /* Time Sync Sequence ID Offset and Msg Type */ + +/* CPSW_PORT_V1 and V2 */ +#define SA_LO 0x20 /* CPGMAC_SL Source Address Low */ +#define SA_HI 0x24 /* CPGMAC_SL Source Address High */ +#define SEND_PERCENT 0x28 /* Transmit Queue Send Percentages */ + +/* CPSW_PORT_V2 only */ +#define RX_DSCP_PRI_MAP0 0x30 /* Rx DSCP Priority to Rx Packet Mapping */ +#define RX_DSCP_PRI_MAP1 0x34 /* Rx DSCP Priority to Rx Packet Mapping */ +#define RX_DSCP_PRI_MAP2 0x38 /* Rx DSCP Priority to Rx Packet Mapping */ +#define RX_DSCP_PRI_MAP3 0x3c /* Rx DSCP Priority to Rx Packet Mapping */ +#define RX_DSCP_PRI_MAP4 0x40 /* Rx DSCP Priority to Rx Packet Mapping */ +#define RX_DSCP_PRI_MAP5 0x44 /* Rx DSCP Priority to Rx Packet Mapping */ +#define RX_DSCP_PRI_MAP6 0x48 /* Rx DSCP Priority to Rx Packet Mapping */ +#define RX_DSCP_PRI_MAP7 0x4c /* Rx DSCP Priority to Rx Packet Mapping */ + +/* Bit definitions for the CPSW2_CONTROL register */ +#define PASS_PRI_TAGGED (1<<24) /* Pass Priority Tagged */ +#define VLAN_LTYPE2_EN (1<<21) /* VLAN LTYPE 2 enable */ +#define VLAN_LTYPE1_EN (1<<20) /* VLAN LTYPE 1 enable */ +#define DSCP_PRI_EN (1<<16) /* DSCP Priority Enable */ +#define TS_320 (1<<14) /* Time Sync Dest Port 320 enable */ +#define TS_319 (1<<13) /* Time Sync Dest Port 319 enable */ +#define TS_132 (1<<12) /* Time Sync Dest IP Addr 132 enable */ +#define TS_131 (1<<11) /* Time Sync Dest IP Addr 131 enable */ +#define TS_130 (1<<10) /* Time Sync Dest IP Addr 130 enable */ +#define TS_129 (1<<9) /* Time Sync Dest IP Addr 129 enable */ +#define TS_BIT8 (1<<8) /* ts_ttl_nonzero? */ +#define TS_ANNEX_D_EN (1<<4) /* Time Sync Annex D enable */ +#define TS_LTYPE2_EN (1<<3) /* Time Sync LTYPE 2 enable */ +#define TS_LTYPE1_EN (1<<2) /* Time Sync LTYPE 1 enable */ +#define TS_TX_EN (1<<1) /* Time Sync Transmit Enable */ +#define TS_RX_EN (1<<0) /* Time Sync Receive Enable */ + +#define CTRL_TS_BITS \ + (TS_320 | TS_319 | TS_132 | TS_131 | TS_130 | TS_129 | TS_BIT8 | \ + TS_ANNEX_D_EN | TS_LTYPE1_EN) + +#define CTRL_ALL_TS_MASK (CTRL_TS_BITS | TS_TX_EN | TS_RX_EN) +#define CTRL_TX_TS_BITS (CTRL_TS_BITS | TS_TX_EN) +#define CTRL_RX_TS_BITS (CTRL_TS_BITS | TS_RX_EN) + +/* Bit definitions for the CPSW2_TS_SEQ_MTYPE register */ +#define TS_SEQ_ID_OFFSET_SHIFT (16) /* Time Sync Sequence ID Offset */ +#define TS_SEQ_ID_OFFSET_MASK (0x3f) +#define TS_MSG_TYPE_EN_SHIFT (0) /* Time Sync Message Type Enable */ +#define TS_MSG_TYPE_EN_MASK (0xffff) + +/* The PTP event messages - Sync, Delay_Req, Pdelay_Req, and Pdelay_Resp. */ +#define EVENT_MSG_BITS ((1<<0) | (1<<1) | (1<<2) | (1<<3)) + +/* Bit definitions for the CPSW1_TS_CTL register */ +#define CPSW_V1_TS_RX_EN BIT(0) +#define CPSW_V1_TS_TX_EN BIT(4) +#define CPSW_V1_MSG_TYPE_OFS 16 + +/* Bit definitions for the CPSW1_TS_SEQ_LTYPE register */ +#define CPSW_V1_SEQ_ID_OFS_SHIFT 16 struct cpsw_host_regs { u32 max_blks; @@ -185,7 +263,7 @@ struct cpsw_sliver_regs { }; struct cpsw_slave { - struct cpsw_slave_regs __iomem *regs; + void __iomem *regs; struct cpsw_sliver_regs __iomem *sliver; int slave_num; u32 mac_control; @@ -193,19 +271,30 @@ struct cpsw_slave { struct phy_device *phy; }; +static inline u32 slave_read(struct cpsw_slave *slave, u32 offset) +{ + return __raw_readl(slave->regs + offset); +} + +static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) +{ + __raw_writel(val, slave->regs + offset); +} + struct cpsw_priv { spinlock_t lock; struct platform_device *pdev; struct net_device *ndev; struct resource *cpsw_res; - struct resource *cpsw_ss_res; + struct resource *cpsw_wr_res; struct napi_struct napi; struct device *dev; struct cpsw_platform_data data; - struct cpsw_regs __iomem *regs; - struct cpsw_ss_regs __iomem *ss_regs; + struct cpsw_ss_regs __iomem *regs; + struct cpsw_wr_regs __iomem *wr_regs; struct cpsw_host_regs __iomem *host_port_regs; u32 msg_enable; + u32 version; struct net_device_stats stats; int rx_packet_max; int host_port; @@ -218,6 +307,7 @@ struct cpsw_priv { /* snapshot of IRQ numbers */ u32 irqs_table[4]; u32 num_irqs; + struct cpts cpts; }; #define napi_to_priv(napi) container_of(napi, struct cpsw_priv, napi) @@ -228,10 +318,34 @@ struct cpsw_priv { (func)((priv)->slaves + idx, ##arg); \ } while (0) +static void cpsw_ndo_set_rx_mode(struct net_device *ndev) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + + if (ndev->flags & IFF_PROMISC) { + /* Enable promiscuous mode */ + dev_err(priv->dev, "Ignoring Promiscuous mode\n"); + return; + } + + /* Clear all mcast from ALE */ + cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS << priv->host_port); + + if (!netdev_mc_empty(ndev)) { + struct netdev_hw_addr *ha; + + /* program multicast address list into ALE register */ + netdev_for_each_mc_addr(ha, ndev) { + cpsw_ale_add_mcast(priv->ale, (u8 *)ha->addr, + ALE_ALL_PORTS << priv->host_port, 0, 0); + } + } +} + static void cpsw_intr_enable(struct cpsw_priv *priv) { - __raw_writel(0xFF, &priv->ss_regs->tx_en); - __raw_writel(0xFF, &priv->ss_regs->rx_en); + __raw_writel(0xFF, &priv->wr_regs->tx_en); + __raw_writel(0xFF, &priv->wr_regs->rx_en); cpdma_ctlr_int_ctrl(priv->dma, true); return; @@ -239,8 +353,8 @@ static void cpsw_intr_enable(struct cpsw_priv *priv) static void cpsw_intr_disable(struct cpsw_priv *priv) { - __raw_writel(0, &priv->ss_regs->tx_en); - __raw_writel(0, &priv->ss_regs->rx_en); + __raw_writel(0, &priv->wr_regs->tx_en); + __raw_writel(0, &priv->wr_regs->rx_en); cpdma_ctlr_int_ctrl(priv->dma, false); return; @@ -254,6 +368,7 @@ void cpsw_tx_handler(void *token, int len, int status) if (unlikely(netif_queue_stopped(ndev))) netif_start_queue(ndev); + cpts_tx_timestamp(&priv->cpts, skb); priv->stats.tx_packets++; priv->stats.tx_bytes += len; dev_kfree_skb_any(skb); @@ -274,6 +389,7 @@ void cpsw_rx_handler(void *token, int len, int status) } if (likely(status >= 0)) { skb_put(skb, len); + cpts_rx_timestamp(&priv->cpts, skb); skb->protocol = eth_type_trans(skb, ndev); netif_receive_skb(skb); priv->stats.rx_bytes += len; @@ -359,8 +475,8 @@ static inline void soft_reset(const char *module, void __iomem *reg) static void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv) { - __raw_writel(mac_hi(priv->mac_addr), &slave->regs->sa_hi); - __raw_writel(mac_lo(priv->mac_addr), &slave->regs->sa_lo); + slave_write(slave, mac_hi(priv->mac_addr), SA_HI); + slave_write(slave, mac_lo(priv->mac_addr), SA_LO); } static void _cpsw_adjust_link(struct cpsw_slave *slave, @@ -446,7 +562,15 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) /* setup priority mapping */ __raw_writel(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map); - __raw_writel(TX_PRIORITY_MAPPING, &slave->regs->tx_pri_map); + + switch (priv->version) { + case CPSW_VERSION_1: + slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP); + break; + case CPSW_VERSION_2: + slave_write(slave, TX_PRIORITY_MAPPING, CPSW2_TX_PRI_MAP); + break; + } /* setup max packet size, and mac address */ __raw_writel(priv->rx_packet_max, &slave->sliver->rx_maxlen); @@ -506,6 +630,7 @@ static int cpsw_ndo_open(struct net_device *ndev) pm_runtime_get_sync(&priv->pdev->dev); reg = __raw_readl(&priv->regs->id_ver); + priv->version = reg; dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n", CPSW_MAJOR_VERSION(reg), CPSW_MINOR_VERSION(reg), @@ -592,6 +717,11 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && priv->cpts.tx_enable) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + skb_tx_timestamp(skb); + ret = cpdma_chan_submit(priv->txch, skb, skb->data, skb->len, GFP_KERNEL); if (unlikely(ret != 0)) { @@ -629,6 +759,130 @@ static void cpsw_ndo_change_rx_flags(struct net_device *ndev, int flags) dev_err(&ndev->dev, "multicast traffic cannot be filtered!\n"); } +#ifdef CONFIG_TI_CPTS + +static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) +{ + struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave]; + u32 ts_en, seq_id; + + if (!priv->cpts.tx_enable && !priv->cpts.rx_enable) { + slave_write(slave, 0, CPSW1_TS_CTL); + return; + } + + seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588; + ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS; + + if (priv->cpts.tx_enable) + ts_en |= CPSW_V1_TS_TX_EN; + + if (priv->cpts.rx_enable) + ts_en |= CPSW_V1_TS_RX_EN; + + slave_write(slave, ts_en, CPSW1_TS_CTL); + slave_write(slave, seq_id, CPSW1_TS_SEQ_LTYPE); +} + +static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) +{ + struct cpsw_slave *slave = &priv->slaves[priv->data.cpts_active_slave]; + u32 ctrl, mtype; + + ctrl = slave_read(slave, CPSW2_CONTROL); + ctrl &= ~CTRL_ALL_TS_MASK; + + if (priv->cpts.tx_enable) + ctrl |= CTRL_TX_TS_BITS; + + if (priv->cpts.rx_enable) + ctrl |= CTRL_RX_TS_BITS; + + mtype = (30 << TS_SEQ_ID_OFFSET_SHIFT) | EVENT_MSG_BITS; + + slave_write(slave, mtype, CPSW2_TS_SEQ_MTYPE); + slave_write(slave, ctrl, CPSW2_CONTROL); + __raw_writel(ETH_P_1588, &priv->regs->ts_ltype); +} + +static int cpsw_hwtstamp_ioctl(struct cpsw_priv *priv, struct ifreq *ifr) +{ + struct cpts *cpts = &priv->cpts; + struct hwtstamp_config cfg; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + /* reserved for future extensions */ + if (cfg.flags) + return -EINVAL; + + switch (cfg.tx_type) { + case HWTSTAMP_TX_OFF: + cpts->tx_enable = 0; + break; + case HWTSTAMP_TX_ON: + cpts->tx_enable = 1; + break; + default: + return -ERANGE; + } + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: + cpts->rx_enable = 0; + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + return -ERANGE; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + cpts->rx_enable = 1; + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + break; + default: + return -ERANGE; + } + + switch (priv->version) { + case CPSW_VERSION_1: + cpsw_hwtstamp_v1(priv); + break; + case CPSW_VERSION_2: + cpsw_hwtstamp_v2(priv); + break; + default: + return -ENOTSUPP; + } + + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; +} + +#endif /*CONFIG_TI_CPTS*/ + +static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) +{ + struct cpsw_priv *priv = netdev_priv(dev); + + if (!netif_running(dev)) + return -EINVAL; + +#ifdef CONFIG_TI_CPTS + if (cmd == SIOCSHWTSTAMP) + return cpsw_hwtstamp_ioctl(priv, req); +#endif + return -ENOTSUPP; +} + static void cpsw_ndo_tx_timeout(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); @@ -669,10 +923,12 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_stop = cpsw_ndo_stop, .ndo_start_xmit = cpsw_ndo_start_xmit, .ndo_change_rx_flags = cpsw_ndo_change_rx_flags, + .ndo_do_ioctl = cpsw_ndo_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = eth_change_mtu, .ndo_tx_timeout = cpsw_ndo_tx_timeout, .ndo_get_stats = cpsw_ndo_get_stats, + .ndo_set_rx_mode = cpsw_ndo_set_rx_mode, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = cpsw_ndo_poll_controller, #endif @@ -699,11 +955,44 @@ static void cpsw_set_msglevel(struct net_device *ndev, u32 value) priv->msg_enable = value; } +static int cpsw_get_ts_info(struct net_device *ndev, + struct ethtool_ts_info *info) +{ +#ifdef CONFIG_TI_CPTS + struct cpsw_priv *priv = netdev_priv(ndev); + + info->so_timestamping = + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->phc_index = priv->cpts.phc_index; + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + info->rx_filters = + (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); +#else + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info->phc_index = -1; + info->tx_types = 0; + info->rx_filters = 0; +#endif + return 0; +} + static const struct ethtool_ops cpsw_ethtool_ops = { .get_drvinfo = cpsw_get_drvinfo, .get_msglevel = cpsw_get_msglevel, .set_msglevel = cpsw_set_msglevel, .get_link = ethtool_op_get_link, + .get_ts_info = cpsw_get_ts_info, }; static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv) @@ -734,6 +1023,27 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } data->slaves = prop; + if (of_property_read_u32(node, "cpts_active_slave", &prop)) { + pr_err("Missing cpts_active_slave property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->cpts_active_slave = prop; + + if (of_property_read_u32(node, "cpts_clock_mult", &prop)) { + pr_err("Missing cpts_clock_mult property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->cpts_clock_mult = prop; + + if (of_property_read_u32(node, "cpts_clock_shift", &prop)) { + pr_err("Missing cpts_clock_shift property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->cpts_clock_shift = prop; + data->slave_data = kzalloc(sizeof(struct cpsw_slave_data) * data->slaves, GFP_KERNEL); if (!data->slave_data) { @@ -799,6 +1109,13 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } data->hw_stats_reg_ofs = prop; + if (of_property_read_u32(node, "cpts_reg_ofs", &prop)) { + pr_err("Missing cpts_reg_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->cpts_reg_ofs = prop; + if (of_property_read_u32(node, "bd_ram_ofs", &prop)) { pr_err("Missing bd_ram_ofs property in the DT.\n"); ret = -EINVAL; @@ -935,14 +1252,12 @@ static int __devinit cpsw_probe(struct platform_device *pdev) ret = -ENOENT; goto clean_clk_ret; } - if (!request_mem_region(priv->cpsw_res->start, resource_size(priv->cpsw_res), ndev->name)) { dev_err(priv->dev, "failed request i/o region\n"); ret = -ENXIO; goto clean_clk_ret; } - regs = ioremap(priv->cpsw_res->start, resource_size(priv->cpsw_res)); if (!regs) { dev_err(priv->dev, "unable to map i/o region\n"); @@ -951,28 +1266,27 @@ static int __devinit cpsw_probe(struct platform_device *pdev) priv->regs = regs; priv->host_port = data->host_port_num; priv->host_port_regs = regs + data->host_port_reg_ofs; + priv->cpts.reg = regs + data->cpts_reg_ofs; - priv->cpsw_ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!priv->cpsw_ss_res) { + priv->cpsw_wr_res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!priv->cpsw_wr_res) { dev_err(priv->dev, "error getting i/o resource\n"); ret = -ENOENT; - goto clean_clk_ret; + goto clean_iomap_ret; } - - if (!request_mem_region(priv->cpsw_ss_res->start, - resource_size(priv->cpsw_ss_res), ndev->name)) { + if (!request_mem_region(priv->cpsw_wr_res->start, + resource_size(priv->cpsw_wr_res), ndev->name)) { dev_err(priv->dev, "failed request i/o region\n"); ret = -ENXIO; - goto clean_clk_ret; + goto clean_iomap_ret; } - - regs = ioremap(priv->cpsw_ss_res->start, - resource_size(priv->cpsw_ss_res)); + regs = ioremap(priv->cpsw_wr_res->start, + resource_size(priv->cpsw_wr_res)); if (!regs) { dev_err(priv->dev, "unable to map i/o region\n"); - goto clean_cpsw_ss_iores_ret; + goto clean_cpsw_wr_iores_ret; } - priv->ss_regs = regs; + priv->wr_regs = regs; for_each_slave(priv, cpsw_slave_init, priv); @@ -1008,7 +1322,7 @@ static int __devinit cpsw_probe(struct platform_device *pdev) if (!priv->dma) { dev_err(priv->dev, "error initializing dma\n"); ret = -ENOMEM; - goto clean_iomap_ret; + goto clean_wr_iomap_ret; } priv->txch = cpdma_chan_create(priv->dma, tx_chan_num(0), @@ -1072,6 +1386,10 @@ static int __devinit cpsw_probe(struct platform_device *pdev) goto clean_irq_ret; } + if (cpts_register(&pdev->dev, &priv->cpts, + data->cpts_clock_mult, data->cpts_clock_shift)) + dev_err(priv->dev, "error registering cpts device\n"); + cpsw_notice(priv, probe, "initialized device (regs %x, irq %d)\n", priv->cpsw_res->start, ndev->irq); @@ -1085,11 +1403,13 @@ clean_dma_ret: cpdma_chan_destroy(priv->txch); cpdma_chan_destroy(priv->rxch); cpdma_ctlr_destroy(priv->dma); +clean_wr_iomap_ret: + iounmap(priv->wr_regs); +clean_cpsw_wr_iores_ret: + release_mem_region(priv->cpsw_wr_res->start, + resource_size(priv->cpsw_wr_res)); clean_iomap_ret: iounmap(priv->regs); -clean_cpsw_ss_iores_ret: - release_mem_region(priv->cpsw_ss_res->start, - resource_size(priv->cpsw_ss_res)); clean_cpsw_iores_ret: release_mem_region(priv->cpsw_res->start, resource_size(priv->cpsw_res)); @@ -1111,6 +1431,7 @@ static int __devexit cpsw_remove(struct platform_device *pdev) pr_info("removing device"); platform_set_drvdata(pdev, NULL); + cpts_unregister(&priv->cpts); free_irq(ndev->irq, priv); cpsw_ale_destroy(priv->ale); cpdma_chan_destroy(priv->txch); @@ -1119,8 +1440,9 @@ static int __devexit cpsw_remove(struct platform_device *pdev) iounmap(priv->regs); release_mem_region(priv->cpsw_res->start, resource_size(priv->cpsw_res)); - release_mem_region(priv->cpsw_ss_res->start, - resource_size(priv->cpsw_ss_res)); + iounmap(priv->wr_regs); + release_mem_region(priv->cpsw_wr_res->start, + resource_size(priv->cpsw_wr_res)); pm_runtime_disable(&pdev->dev); clk_put(priv->clk); kfree(priv->slaves); diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index ca0d48a7e50..0e9ccc2cf91 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -20,6 +20,7 @@ #include <linux/io.h> #include <linux/stat.h> #include <linux/sysfs.h> +#include <linux/etherdevice.h> #include "cpsw_ale.h" @@ -211,10 +212,34 @@ static void cpsw_ale_flush_mcast(struct cpsw_ale *ale, u32 *ale_entry, mask &= ~port_mask; /* free if only remaining port is host port */ - if (mask == BIT(ale->params.ale_ports)) - cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); - else + if (mask) cpsw_ale_set_port_mask(ale_entry, mask); + else + cpsw_ale_set_entry_type(ale_entry, ALE_TYPE_FREE); +} + +int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask) +{ + u32 ale_entry[ALE_ENTRY_WORDS]; + int ret, idx; + + for (idx = 0; idx < ale->params.ale_entries; idx++) { + cpsw_ale_read(ale, idx, ale_entry); + ret = cpsw_ale_get_entry_type(ale_entry); + if (ret != ALE_TYPE_ADDR && ret != ALE_TYPE_VLAN_ADDR) + continue; + + if (cpsw_ale_get_mcast(ale_entry)) { + u8 addr[6]; + + cpsw_ale_get_addr(ale_entry, addr); + if (!is_broadcast_ether_addr(addr)) + cpsw_ale_flush_mcast(ale, ale_entry, port_mask); + } + + cpsw_ale_write(ale, idx, ale_entry); + } + return 0; } static void cpsw_ale_flush_ucast(struct cpsw_ale *ale, u32 *ale_entry, diff --git a/drivers/net/ethernet/ti/cpsw_ale.h b/drivers/net/ethernet/ti/cpsw_ale.h index a95b37beb02..2bd09cbce52 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.h +++ b/drivers/net/ethernet/ti/cpsw_ale.h @@ -80,6 +80,7 @@ void cpsw_ale_stop(struct cpsw_ale *ale); int cpsw_ale_set_ageout(struct cpsw_ale *ale, int ageout); int cpsw_ale_flush(struct cpsw_ale *ale, int port_mask); +int cpsw_ale_flush_multicast(struct cpsw_ale *ale, int port_mask); int cpsw_ale_add_ucast(struct cpsw_ale *ale, u8 *addr, int port, int flags); int cpsw_ale_del_ucast(struct cpsw_ale *ale, u8 *addr, int port); int cpsw_ale_add_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask, diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c new file mode 100644 index 00000000000..337766738ec --- /dev/null +++ b/drivers/net/ethernet/ti/cpts.c @@ -0,0 +1,427 @@ +/* + * TI Common Platform Time Sync + * + * Copyright (C) 2012 Richard Cochran <richardcochran@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <linux/err.h> +#include <linux/if.h> +#include <linux/hrtimer.h> +#include <linux/module.h> +#include <linux/net_tstamp.h> +#include <linux/ptp_classify.h> +#include <linux/time.h> +#include <linux/uaccess.h> +#include <linux/workqueue.h> + +#include <plat/clock.h> + +#include "cpts.h" + +#ifdef CONFIG_TI_CPTS + +static struct sock_filter ptp_filter[] = { + PTP_FILTER +}; + +#define cpts_read32(c, r) __raw_readl(&c->reg->r) +#define cpts_write32(c, v, r) __raw_writel(v, &c->reg->r) + +static int event_expired(struct cpts_event *event) +{ + return time_after(jiffies, event->tmo); +} + +static int event_type(struct cpts_event *event) +{ + return (event->high >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; +} + +static int cpts_fifo_pop(struct cpts *cpts, u32 *high, u32 *low) +{ + u32 r = cpts_read32(cpts, intstat_raw); + + if (r & TS_PEND_RAW) { + *high = cpts_read32(cpts, event_high); + *low = cpts_read32(cpts, event_low); + cpts_write32(cpts, EVENT_POP, event_pop); + return 0; + } + return -1; +} + +/* + * Returns zero if matching event type was found. + */ +static int cpts_fifo_read(struct cpts *cpts, int match) +{ + int i, type = -1; + u32 hi, lo; + struct cpts_event *event; + + for (i = 0; i < CPTS_FIFO_DEPTH; i++) { + if (cpts_fifo_pop(cpts, &hi, &lo)) + break; + if (list_empty(&cpts->pool)) { + pr_err("cpts: event pool is empty\n"); + return -1; + } + event = list_first_entry(&cpts->pool, struct cpts_event, list); + event->tmo = jiffies + 2; + event->high = hi; + event->low = lo; + type = event_type(event); + switch (type) { + case CPTS_EV_PUSH: + case CPTS_EV_RX: + case CPTS_EV_TX: + list_del_init(&event->list); + list_add_tail(&event->list, &cpts->events); + break; + case CPTS_EV_ROLL: + case CPTS_EV_HALF: + case CPTS_EV_HW: + break; + default: + pr_err("cpts: unkown event type\n"); + break; + } + if (type == match) + break; + } + return type == match ? 0 : -1; +} + +static cycle_t cpts_systim_read(const struct cyclecounter *cc) +{ + u64 val = 0; + struct cpts_event *event; + struct list_head *this, *next; + struct cpts *cpts = container_of(cc, struct cpts, cc); + + cpts_write32(cpts, TS_PUSH, ts_push); + if (cpts_fifo_read(cpts, CPTS_EV_PUSH)) + pr_err("cpts: unable to obtain a time stamp\n"); + + list_for_each_safe(this, next, &cpts->events) { + event = list_entry(this, struct cpts_event, list); + if (event_type(event) == CPTS_EV_PUSH) { + list_del_init(&event->list); + list_add(&event->list, &cpts->pool); + val = event->low; + break; + } + } + + return val; +} + +/* PTP clock operations */ + +static int cpts_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +{ + u64 adj; + u32 diff, mult; + int neg_adj = 0; + unsigned long flags; + struct cpts *cpts = container_of(ptp, struct cpts, info); + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + mult = cpts->cc_mult; + adj = mult; + adj *= ppb; + diff = div_u64(adj, 1000000000ULL); + + spin_lock_irqsave(&cpts->lock, flags); + + timecounter_read(&cpts->tc); + + cpts->cc.mult = neg_adj ? mult - diff : mult + diff; + + spin_unlock_irqrestore(&cpts->lock, flags); + + return 0; +} + +static int cpts_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + s64 now; + unsigned long flags; + struct cpts *cpts = container_of(ptp, struct cpts, info); + + spin_lock_irqsave(&cpts->lock, flags); + now = timecounter_read(&cpts->tc); + now += delta; + timecounter_init(&cpts->tc, &cpts->cc, now); + spin_unlock_irqrestore(&cpts->lock, flags); + + return 0; +} + +static int cpts_ptp_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +{ + u64 ns; + u32 remainder; + unsigned long flags; + struct cpts *cpts = container_of(ptp, struct cpts, info); + + spin_lock_irqsave(&cpts->lock, flags); + ns = timecounter_read(&cpts->tc); + spin_unlock_irqrestore(&cpts->lock, flags); + + ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder); + ts->tv_nsec = remainder; + + return 0; +} + +static int cpts_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + u64 ns; + unsigned long flags; + struct cpts *cpts = container_of(ptp, struct cpts, info); + + ns = ts->tv_sec * 1000000000ULL; + ns += ts->tv_nsec; + + spin_lock_irqsave(&cpts->lock, flags); + timecounter_init(&cpts->tc, &cpts->cc, ns); + spin_unlock_irqrestore(&cpts->lock, flags); + + return 0; +} + +static int cpts_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +static struct ptp_clock_info cpts_info = { + .owner = THIS_MODULE, + .name = "CTPS timer", + .max_adj = 1000000, + .n_ext_ts = 0, + .pps = 0, + .adjfreq = cpts_ptp_adjfreq, + .adjtime = cpts_ptp_adjtime, + .gettime = cpts_ptp_gettime, + .settime = cpts_ptp_settime, + .enable = cpts_ptp_enable, +}; + +static void cpts_overflow_check(struct work_struct *work) +{ + struct timespec ts; + struct cpts *cpts = container_of(work, struct cpts, overflow_work.work); + + cpts_write32(cpts, CPTS_EN, control); + cpts_write32(cpts, TS_PEND_EN, int_enable); + cpts_ptp_gettime(&cpts->info, &ts); + pr_debug("cpts overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); + schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD); +} + +#define CPTS_REF_CLOCK_NAME "cpsw_cpts_rft_clk" + +static void cpts_clk_init(struct cpts *cpts) +{ + cpts->refclk = clk_get(NULL, CPTS_REF_CLOCK_NAME); + if (IS_ERR(cpts->refclk)) { + pr_err("Failed to clk_get %s\n", CPTS_REF_CLOCK_NAME); + cpts->refclk = NULL; + return; + } + clk_enable(cpts->refclk); + cpts->freq = cpts->refclk->recalc(cpts->refclk); +} + +static void cpts_clk_release(struct cpts *cpts) +{ + clk_disable(cpts->refclk); + clk_put(cpts->refclk); +} + +static int cpts_match(struct sk_buff *skb, unsigned int ptp_class, + u16 ts_seqid, u8 ts_msgtype) +{ + u16 *seqid; + unsigned int offset; + u8 *msgtype, *data = skb->data; + + switch (ptp_class) { + case PTP_CLASS_V1_IPV4: + case PTP_CLASS_V2_IPV4: + offset = ETH_HLEN + IPV4_HLEN(data) + UDP_HLEN; + break; + case PTP_CLASS_V1_IPV6: + case PTP_CLASS_V2_IPV6: + offset = OFF_PTP6; + break; + case PTP_CLASS_V2_L2: + offset = ETH_HLEN; + break; + case PTP_CLASS_V2_VLAN: + offset = ETH_HLEN + VLAN_HLEN; + break; + default: + return 0; + } + + if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid)) + return 0; + + if (unlikely(ptp_class & PTP_CLASS_V1)) + msgtype = data + offset + OFF_PTP_CONTROL; + else + msgtype = data + offset; + + seqid = (u16 *)(data + offset + OFF_PTP_SEQUENCE_ID); + + return (ts_msgtype == (*msgtype & 0xf) && ts_seqid == ntohs(*seqid)); +} + +static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type) +{ + u64 ns = 0; + struct cpts_event *event; + struct list_head *this, *next; + unsigned int class = sk_run_filter(skb, ptp_filter); + unsigned long flags; + u16 seqid; + u8 mtype; + + if (class == PTP_CLASS_NONE) + return 0; + + spin_lock_irqsave(&cpts->lock, flags); + cpts_fifo_read(cpts, CPTS_EV_PUSH); + list_for_each_safe(this, next, &cpts->events) { + event = list_entry(this, struct cpts_event, list); + if (event_expired(event)) { + list_del_init(&event->list); + list_add(&event->list, &cpts->pool); + continue; + } + mtype = (event->high >> MESSAGE_TYPE_SHIFT) & MESSAGE_TYPE_MASK; + seqid = (event->high >> SEQUENCE_ID_SHIFT) & SEQUENCE_ID_MASK; + if (ev_type == event_type(event) && + cpts_match(skb, class, seqid, mtype)) { + ns = timecounter_cyc2time(&cpts->tc, event->low); + list_del_init(&event->list); + list_add(&event->list, &cpts->pool); + break; + } + } + spin_unlock_irqrestore(&cpts->lock, flags); + + return ns; +} + +void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb) +{ + u64 ns; + struct skb_shared_hwtstamps *ssh; + + if (!cpts->rx_enable) + return; + ns = cpts_find_ts(cpts, skb, CPTS_EV_RX); + if (!ns) + return; + ssh = skb_hwtstamps(skb); + memset(ssh, 0, sizeof(*ssh)); + ssh->hwtstamp = ns_to_ktime(ns); +} + +void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb) +{ + u64 ns; + struct skb_shared_hwtstamps ssh; + + if (!(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) + return; + ns = cpts_find_ts(cpts, skb, CPTS_EV_TX); + if (!ns) + return; + memset(&ssh, 0, sizeof(ssh)); + ssh.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(skb, &ssh); +} + +#endif /*CONFIG_TI_CPTS*/ + +int cpts_register(struct device *dev, struct cpts *cpts, + u32 mult, u32 shift) +{ +#ifdef CONFIG_TI_CPTS + int err, i; + unsigned long flags; + + if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) { + pr_err("cpts: bad ptp filter\n"); + return -EINVAL; + } + cpts->info = cpts_info; + cpts->clock = ptp_clock_register(&cpts->info, dev); + if (IS_ERR(cpts->clock)) { + err = PTR_ERR(cpts->clock); + cpts->clock = NULL; + return err; + } + spin_lock_init(&cpts->lock); + + cpts->cc.read = cpts_systim_read; + cpts->cc.mask = CLOCKSOURCE_MASK(32); + cpts->cc_mult = mult; + cpts->cc.mult = mult; + cpts->cc.shift = shift; + + INIT_LIST_HEAD(&cpts->events); + INIT_LIST_HEAD(&cpts->pool); + for (i = 0; i < CPTS_MAX_EVENTS; i++) + list_add(&cpts->pool_data[i].list, &cpts->pool); + + cpts_clk_init(cpts); + cpts_write32(cpts, CPTS_EN, control); + cpts_write32(cpts, TS_PEND_EN, int_enable); + + spin_lock_irqsave(&cpts->lock, flags); + timecounter_init(&cpts->tc, &cpts->cc, ktime_to_ns(ktime_get_real())); + spin_unlock_irqrestore(&cpts->lock, flags); + + INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check); + schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD); + + cpts->phc_index = ptp_clock_index(cpts->clock); +#endif + return 0; +} + +void cpts_unregister(struct cpts *cpts) +{ +#ifdef CONFIG_TI_CPTS + if (cpts->clock) { + ptp_clock_unregister(cpts->clock); + cancel_delayed_work_sync(&cpts->overflow_work); + } + if (cpts->refclk) + cpts_clk_release(cpts); +#endif +} diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h new file mode 100644 index 00000000000..e1bba3a496b --- /dev/null +++ b/drivers/net/ethernet/ti/cpts.h @@ -0,0 +1,146 @@ +/* + * TI Common Platform Time Sync + * + * Copyright (C) 2012 Richard Cochran <richardcochran@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef _TI_CPTS_H_ +#define _TI_CPTS_H_ + +#include <linux/clk.h> +#include <linux/clkdev.h> +#include <linux/clocksource.h> +#include <linux/device.h> +#include <linux/list.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/skbuff.h> + +struct cpsw_cpts { + u32 idver; /* Identification and version */ + u32 control; /* Time sync control */ + u32 res1; + u32 ts_push; /* Time stamp event push */ + u32 ts_load_val; /* Time stamp load value */ + u32 ts_load_en; /* Time stamp load enable */ + u32 res2[2]; + u32 intstat_raw; /* Time sync interrupt status raw */ + u32 intstat_masked; /* Time sync interrupt status masked */ + u32 int_enable; /* Time sync interrupt enable */ + u32 res3; + u32 event_pop; /* Event interrupt pop */ + u32 event_low; /* 32 Bit Event Time Stamp */ + u32 event_high; /* Event Type Fields */ +}; + +/* Bit definitions for the IDVER register */ +#define TX_IDENT_SHIFT (16) /* TX Identification Value */ +#define TX_IDENT_MASK (0xffff) +#define RTL_VER_SHIFT (11) /* RTL Version Value */ +#define RTL_VER_MASK (0x1f) +#define MAJOR_VER_SHIFT (8) /* Major Version Value */ +#define MAJOR_VER_MASK (0x7) +#define MINOR_VER_SHIFT (0) /* Minor Version Value */ +#define MINOR_VER_MASK (0xff) + +/* Bit definitions for the CONTROL register */ +#define HW4_TS_PUSH_EN (1<<11) /* Hardware push 4 enable */ +#define HW3_TS_PUSH_EN (1<<10) /* Hardware push 3 enable */ +#define HW2_TS_PUSH_EN (1<<9) /* Hardware push 2 enable */ +#define HW1_TS_PUSH_EN (1<<8) /* Hardware push 1 enable */ +#define INT_TEST (1<<1) /* Interrupt Test */ +#define CPTS_EN (1<<0) /* Time Sync Enable */ + +/* + * Definitions for the single bit resisters: + * TS_PUSH TS_LOAD_EN INTSTAT_RAW INTSTAT_MASKED INT_ENABLE EVENT_POP + */ +#define TS_PUSH (1<<0) /* Time stamp event push */ +#define TS_LOAD_EN (1<<0) /* Time Stamp Load */ +#define TS_PEND_RAW (1<<0) /* int read (before enable) */ +#define TS_PEND (1<<0) /* masked interrupt read (after enable) */ +#define TS_PEND_EN (1<<0) /* masked interrupt enable */ +#define EVENT_POP (1<<0) /* writing discards one event */ + +/* Bit definitions for the EVENT_HIGH register */ +#define PORT_NUMBER_SHIFT (24) /* Indicates Ethernet port or HW pin */ +#define PORT_NUMBER_MASK (0x1f) +#define EVENT_TYPE_SHIFT (20) /* Time sync event type */ +#define EVENT_TYPE_MASK (0xf) +#define MESSAGE_TYPE_SHIFT (16) /* PTP message type */ +#define MESSAGE_TYPE_MASK (0xf) +#define SEQUENCE_ID_SHIFT (0) /* PTP message sequence ID */ +#define SEQUENCE_ID_MASK (0xffff) + +enum { + CPTS_EV_PUSH, /* Time Stamp Push Event */ + CPTS_EV_ROLL, /* Time Stamp Rollover Event */ + CPTS_EV_HALF, /* Time Stamp Half Rollover Event */ + CPTS_EV_HW, /* Hardware Time Stamp Push Event */ + CPTS_EV_RX, /* Ethernet Receive Event */ + CPTS_EV_TX, /* Ethernet Transmit Event */ +}; + +/* This covers any input clock up to about 500 MHz. */ +#define CPTS_OVERFLOW_PERIOD (HZ * 8) + +#define CPTS_FIFO_DEPTH 16 +#define CPTS_MAX_EVENTS 32 + +struct cpts_event { + struct list_head list; + unsigned long tmo; + u32 high; + u32 low; +}; + +struct cpts { + struct cpsw_cpts __iomem *reg; + int tx_enable; + int rx_enable; +#ifdef CONFIG_TI_CPTS + struct ptp_clock_info info; + struct ptp_clock *clock; + spinlock_t lock; /* protects time registers */ + u32 cc_mult; /* for the nominal frequency */ + struct cyclecounter cc; + struct timecounter tc; + struct delayed_work overflow_work; + int phc_index; + struct clk *refclk; + unsigned long freq; + struct list_head events; + struct list_head pool; + struct cpts_event pool_data[CPTS_MAX_EVENTS]; +#endif +}; + +#ifdef CONFIG_TI_CPTS +extern void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb); +extern void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb); +#else +static inline void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb) +{ +} +static inline void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb) +{ +} +#endif + +extern int cpts_register(struct device *dev, struct cpts *cpts, + u32 mult, u32 shift); +extern void cpts_unregister(struct cpts *cpts); + +#endif diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 928148cc322..7fdeb528133 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -363,11 +363,6 @@ static void rndis_filter_receive_data(struct rndis_device *dev, rndis_pkt = &msg->msg.pkt; - /* - * FIXME: Handle multiple rndis pkt msgs that maybe enclosed in this - * netvsc packet (ie TotalDataBufferLength != MessageLength) - */ - /* Remove the rndis header and pass it back up the stack */ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; diff --git a/drivers/net/irda/sh_irda.c b/drivers/net/irda/sh_irda.c index 4b746d9bd8e..945360a99cb 100644 --- a/drivers/net/irda/sh_irda.c +++ b/drivers/net/irda/sh_irda.c @@ -33,11 +33,7 @@ #define DRIVER_NAME "sh_irda" -#if defined(CONFIG_ARCH_SH7367) || defined(CONFIG_ARCH_SH7377) -#define __IRDARAM_LEN 0x13FF -#else #define __IRDARAM_LEN 0x1039 -#endif #define IRTMR 0x1F00 /* Transfer mode */ #define IRCFR 0x1F02 /* Configuration */ diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index b3321129a83..6989ebe2bc7 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -56,6 +56,10 @@ static char config[MAX_PARAM_LENGTH]; module_param_string(netconsole, config, MAX_PARAM_LENGTH, 0); MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr]"); +static bool oops_only = false; +module_param(oops_only, bool, 0600); +MODULE_PARM_DESC(oops_only, "Only log oops messages"); + #ifndef MODULE static int __init option_setup(char *opt) { @@ -683,6 +687,8 @@ static void write_msg(struct console *con, const char *msg, unsigned int len) struct netconsole_target *nt; const char *tmp; + if (oops_only && !oops_in_progress) + return; /* Avoid taking lock and disabling interrupts unnecessarily */ if (list_empty(&target_list)) return; diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index c1ef3000ea6..044b5326459 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -431,10 +431,24 @@ static struct dev_pm_ops mdio_bus_pm_ops = { #endif /* CONFIG_PM */ +static ssize_t +phy_id_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct phy_device *phydev = to_phy_device(dev); + + return sprintf(buf, "0x%.8lx\n", (unsigned long)phydev->phy_id); +} + +static struct device_attribute mdio_dev_attrs[] = { + __ATTR_RO(phy_id), + __ATTR_NULL +}; + struct bus_type mdio_bus_type = { .name = "mdio_bus", .match = mdio_bus_match, .pm = MDIO_BUS_PM_OPS, + .dev_attrs = mdio_dev_attrs, }; EXPORT_SYMBOL(mdio_bus_type); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index eb3f5cefeba..0b2706abe3e 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1034,7 +1034,7 @@ ppp_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; } -struct rtnl_link_stats64* +static struct rtnl_link_stats64* ppp_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) { struct ppp *ppp = netdev_priv(dev); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0873cdcf39b..b44d7b79cdd 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -68,7 +68,6 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/sock.h> -#include <net/cls_cgroup.h> #include <asm/uaccess.h> @@ -110,16 +109,56 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; +/* 1024 is probably a high enough limit: modern hypervisors seem to support on + * the order of 100-200 CPUs so this leaves us some breathing space if we want + * to match a queue per guest CPU. + */ +#define MAX_TAP_QUEUES 1024 + +#define TUN_FLOW_EXPIRE (3 * HZ) + +/* A tun_file connects an open character device to a tuntap netdevice. It + * also contains all socket related strctures (except sock_fprog and tap_filter) + * to serve as one transmit queue for tuntap device. The sock_fprog and + * tap_filter were kept in tun_struct since they were used for filtering for the + * netdevice not for a specific queue (at least I didn't see the reqirement for + * this). + * + * RCU usage: + * The tun_file and tun_struct are loosely coupled, the pointer from on to the + * other can only be read while rcu_read_lock or rtnl_lock is held. + */ struct tun_file { - atomic_t count; - struct tun_struct *tun; + struct sock sk; + struct socket socket; + struct socket_wq wq; + struct tun_struct __rcu *tun; struct net *net; + struct fasync_struct *fasync; + /* only used for fasnyc */ + unsigned int flags; + u16 queue_index; +}; + +struct tun_flow_entry { + struct hlist_node hash_link; + struct rcu_head rcu; + struct tun_struct *tun; + + u32 rxhash; + int queue_index; + unsigned long updated; }; -struct tun_sock; +#define TUN_NUM_FLOW_ENTRIES 1024 +/* Since the socket were moved to tun_file, to preserve the behavior of persist + * device, socket fileter, sndbuf and vnet header size were restore when the + * file were attached to a persist device. + */ struct tun_struct { - struct tun_file *tfile; + struct tun_file __rcu *tfiles[MAX_TAP_QUEUES]; + unsigned int numqueues; unsigned int flags; kuid_t owner; kgid_t group; @@ -128,88 +167,351 @@ struct tun_struct { netdev_features_t set_features; #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ NETIF_F_TSO6|NETIF_F_UFO) - struct fasync_struct *fasync; - - struct tap_filter txflt; - struct socket socket; - struct socket_wq wq; int vnet_hdr_sz; - + int sndbuf; + struct tap_filter txflt; + struct sock_fprog fprog; + /* protected by rtnl lock */ + bool filter_attached; #ifdef TUN_DEBUG int debug; #endif + spinlock_t lock; + struct kmem_cache *flow_cache; + struct hlist_head flows[TUN_NUM_FLOW_ENTRIES]; + struct timer_list flow_gc_timer; + unsigned long ageing_time; }; -struct tun_sock { - struct sock sk; - struct tun_struct *tun; -}; +static inline u32 tun_hashfn(u32 rxhash) +{ + return rxhash & 0x3ff; +} -static inline struct tun_sock *tun_sk(struct sock *sk) +static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash) { - return container_of(sk, struct tun_sock, sk); + struct tun_flow_entry *e; + struct hlist_node *n; + + hlist_for_each_entry_rcu(e, n, head, hash_link) { + if (e->rxhash == rxhash) + return e; + } + return NULL; } -static int tun_attach(struct tun_struct *tun, struct file *file) +static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, + struct hlist_head *head, + u32 rxhash, u16 queue_index) { - struct tun_file *tfile = file->private_data; - int err; + struct tun_flow_entry *e = kmem_cache_alloc(tun->flow_cache, + GFP_ATOMIC); + if (e) { + tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n", + rxhash, queue_index); + e->updated = jiffies; + e->rxhash = rxhash; + e->queue_index = queue_index; + e->tun = tun; + hlist_add_head_rcu(&e->hash_link, head); + } + return e; +} - ASSERT_RTNL(); +static void tun_flow_free(struct rcu_head *head) +{ + struct tun_flow_entry *e + = container_of(head, struct tun_flow_entry, rcu); + kmem_cache_free(e->tun->flow_cache, e); +} - netif_tx_lock_bh(tun->dev); +static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) +{ + tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", + e->rxhash, e->queue_index); + hlist_del_rcu(&e->hash_link); + call_rcu(&e->rcu, tun_flow_free); +} - err = -EINVAL; - if (tfile->tun) - goto out; +static void tun_flow_flush(struct tun_struct *tun) +{ + int i; - err = -EBUSY; - if (tun->tfile) - goto out; + spin_lock_bh(&tun->lock); + for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { + struct tun_flow_entry *e; + struct hlist_node *h, *n; - err = 0; - tfile->tun = tun; - tun->tfile = tfile; - tun->socket.file = file; - netif_carrier_on(tun->dev); - dev_hold(tun->dev); - sock_hold(tun->socket.sk); - atomic_inc(&tfile->count); + hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) + tun_flow_delete(tun, e); + } + spin_unlock_bh(&tun->lock); +} -out: - netif_tx_unlock_bh(tun->dev); - return err; +static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) +{ + int i; + + spin_lock_bh(&tun->lock); + for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { + struct tun_flow_entry *e; + struct hlist_node *h, *n; + + hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + if (e->queue_index == queue_index) + tun_flow_delete(tun, e); + } + } + spin_unlock_bh(&tun->lock); } -static void __tun_detach(struct tun_struct *tun) +static void tun_flow_cleanup(unsigned long data) { - /* Detach from net device */ - netif_tx_lock_bh(tun->dev); - netif_carrier_off(tun->dev); - tun->tfile = NULL; - netif_tx_unlock_bh(tun->dev); + struct tun_struct *tun = (struct tun_struct *)data; + unsigned long delay = tun->ageing_time; + unsigned long next_timer = jiffies + delay; + unsigned long count = 0; + int i; - /* Drop read queue */ - skb_queue_purge(&tun->socket.sk->sk_receive_queue); + tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n"); + + spin_lock_bh(&tun->lock); + for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) { + struct tun_flow_entry *e; + struct hlist_node *h, *n; + + hlist_for_each_entry_safe(e, h, n, &tun->flows[i], hash_link) { + unsigned long this_timer; + count++; + this_timer = e->updated + delay; + if (time_before_eq(this_timer, jiffies)) + tun_flow_delete(tun, e); + else if (time_before(this_timer, next_timer)) + next_timer = this_timer; + } + } - /* Drop the extra count on the net device */ - dev_put(tun->dev); + if (count) + mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer)); + spin_unlock_bh(&tun->lock); +} + +static void tun_flow_update(struct tun_struct *tun, struct sk_buff *skb, + u16 queue_index) +{ + struct hlist_head *head; + struct tun_flow_entry *e; + unsigned long delay = tun->ageing_time; + u32 rxhash = skb_get_rxhash(skb); + + if (!rxhash) + return; + else + head = &tun->flows[tun_hashfn(rxhash)]; + + rcu_read_lock(); + + if (tun->numqueues == 1) + goto unlock; + + e = tun_flow_find(head, rxhash); + if (likely(e)) { + /* TODO: keep queueing to old queue until it's empty? */ + e->queue_index = queue_index; + e->updated = jiffies; + } else { + spin_lock_bh(&tun->lock); + if (!tun_flow_find(head, rxhash)) + tun_flow_create(tun, head, rxhash, queue_index); + + if (!timer_pending(&tun->flow_gc_timer)) + mod_timer(&tun->flow_gc_timer, + round_jiffies_up(jiffies + delay)); + spin_unlock_bh(&tun->lock); + } + +unlock: + rcu_read_unlock(); +} + +/* We try to identify a flow through its rxhash first. The reason that + * we do not check rxq no. is becuase some cards(e.g 82599), chooses + * the rxq based on the txq where the last packet of the flow comes. As + * the userspace application move between processors, we may get a + * different rxq no. here. If we could not get rxhash, then we would + * hope the rxq no. may help here. + */ +static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + struct tun_struct *tun = netdev_priv(dev); + struct tun_flow_entry *e; + u32 txq = 0; + u32 numqueues = 0; + + rcu_read_lock(); + numqueues = tun->numqueues; + + txq = skb_get_rxhash(skb); + if (txq) { + e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); + if (e) + txq = e->queue_index; + else + /* use multiply and shift instead of expensive divide */ + txq = ((u64)txq * numqueues) >> 32; + } else if (likely(skb_rx_queue_recorded(skb))) { + txq = skb_get_rx_queue(skb); + while (unlikely(txq >= numqueues)) + txq -= numqueues; + } + + rcu_read_unlock(); + return txq; +} + +static inline bool tun_not_capable(struct tun_struct *tun) +{ + const struct cred *cred = current_cred(); + + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !capable(CAP_NET_ADMIN); +} + +static void tun_set_real_num_queues(struct tun_struct *tun) +{ + netif_set_real_num_tx_queues(tun->dev, tun->numqueues); + netif_set_real_num_rx_queues(tun->dev, tun->numqueues); +} + +static void __tun_detach(struct tun_file *tfile, bool clean) +{ + struct tun_file *ntfile; + struct tun_struct *tun; + struct net_device *dev; + + tun = rcu_dereference_protected(tfile->tun, + lockdep_rtnl_is_held()); + if (tun) { + u16 index = tfile->queue_index; + BUG_ON(index >= tun->numqueues); + dev = tun->dev; + + rcu_assign_pointer(tun->tfiles[index], + tun->tfiles[tun->numqueues - 1]); + rcu_assign_pointer(tfile->tun, NULL); + ntfile = rcu_dereference_protected(tun->tfiles[index], + lockdep_rtnl_is_held()); + ntfile->queue_index = index; + + --tun->numqueues; + sock_put(&tfile->sk); + + synchronize_net(); + tun_flow_delete_by_queue(tun, tun->numqueues + 1); + /* Drop read queue */ + skb_queue_purge(&tfile->sk.sk_receive_queue); + tun_set_real_num_queues(tun); + + if (tun->numqueues == 0 && !(tun->flags & TUN_PERSIST)) + if (dev->reg_state == NETREG_REGISTERED) + unregister_netdevice(dev); + } + + if (clean) { + BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, + &tfile->socket.flags)); + sk_release_kernel(&tfile->sk); + } } -static void tun_detach(struct tun_struct *tun) +static void tun_detach(struct tun_file *tfile, bool clean) { rtnl_lock(); - __tun_detach(tun); + __tun_detach(tfile, clean); rtnl_unlock(); } +static void tun_detach_all(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct tun_file *tfile; + int i, n = tun->numqueues; + + for (i = 0; i < n; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + BUG_ON(!tfile); + wake_up_all(&tfile->wq.wait); + rcu_assign_pointer(tfile->tun, NULL); + --tun->numqueues; + } + BUG_ON(tun->numqueues != 0); + + synchronize_net(); + for (i = 0; i < n; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + /* Drop read queue */ + skb_queue_purge(&tfile->sk.sk_receive_queue); + sock_put(&tfile->sk); + } +} + +static int tun_attach(struct tun_struct *tun, struct file *file) +{ + struct tun_file *tfile = file->private_data; + int err; + + err = -EINVAL; + if (rcu_dereference_protected(tfile->tun, lockdep_rtnl_is_held())) + goto out; + + err = -EBUSY; + if (!(tun->flags & TUN_TAP_MQ) && tun->numqueues == 1) + goto out; + + err = -E2BIG; + if (tun->numqueues == MAX_TAP_QUEUES) + goto out; + + err = 0; + + /* Re-attach the filter to presist device */ + if (tun->filter_attached == true) { + err = sk_attach_filter(&tun->fprog, tfile->socket.sk); + if (!err) + goto out; + } + tfile->queue_index = tun->numqueues; + rcu_assign_pointer(tfile->tun, tun); + rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); + sock_hold(&tfile->sk); + tun->numqueues++; + + tun_set_real_num_queues(tun); + + if (tun->numqueues == 1) + netif_carrier_on(tun->dev); + + /* device is allowed to go away first, so no need to hold extra + * refcnt. + */ + +out: + return err; +} + static struct tun_struct *__tun_get(struct tun_file *tfile) { - struct tun_struct *tun = NULL; + struct tun_struct *tun; - if (atomic_inc_not_zero(&tfile->count)) - tun = tfile->tun; + rcu_read_lock(); + tun = rcu_dereference(tfile->tun); + if (tun) + dev_hold(tun->dev); + rcu_read_unlock(); return tun; } @@ -221,10 +523,7 @@ static struct tun_struct *tun_get(struct file *file) static void tun_put(struct tun_struct *tun) { - struct tun_file *tfile = tun->tfile; - - if (atomic_dec_and_test(&tfile->count)) - tun_detach(tfile->tun); + dev_put(tun->dev); } /* TAP filtering */ @@ -344,38 +643,20 @@ static const struct ethtool_ops tun_ethtool_ops; /* Net device detach from fd. */ static void tun_net_uninit(struct net_device *dev) { - struct tun_struct *tun = netdev_priv(dev); - struct tun_file *tfile = tun->tfile; - - /* Inform the methods they need to stop using the dev. - */ - if (tfile) { - wake_up_all(&tun->wq.wait); - if (atomic_dec_and_test(&tfile->count)) - __tun_detach(tun); - } -} - -static void tun_free_netdev(struct net_device *dev) -{ - struct tun_struct *tun = netdev_priv(dev); - - BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags)); - - sk_release_kernel(tun->socket.sk); + tun_detach_all(dev); } /* Net device open. */ static int tun_net_open(struct net_device *dev) { - netif_start_queue(dev); + netif_tx_start_all_queues(dev); return 0; } /* Net device close. */ static int tun_net_close(struct net_device *dev) { - netif_stop_queue(dev); + netif_tx_stop_all_queues(dev); return 0; } @@ -383,28 +664,39 @@ static int tun_net_close(struct net_device *dev) static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); + int txq = skb->queue_mapping; + struct tun_file *tfile; - tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); + rcu_read_lock(); + tfile = rcu_dereference(tun->tfiles[txq]); /* Drop packet if interface is not attached */ - if (!tun->tfile) + if (txq >= tun->numqueues) goto drop; + tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); + + BUG_ON(!tfile); + /* Drop if the filter does not like it. * This is a noop if the filter is disabled. * Filter can be enabled only for the TAP devices. */ if (!check_filter(&tun->txflt, skb)) goto drop; - if (tun->socket.sk->sk_filter && - sk_filter(tun->socket.sk, skb)) + if (tfile->socket.sk->sk_filter && + sk_filter(tfile->socket.sk, skb)) goto drop; - if (skb_queue_len(&tun->socket.sk->sk_receive_queue) >= dev->tx_queue_len) { + /* Limit the number of packets queued by divining txq length with the + * number of queues. + */ + if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) + >= dev->tx_queue_len / tun->numqueues){ if (!(tun->flags & TUN_ONE_QUEUE)) { /* Normal queueing mode. */ /* Packet scheduler handles dropping of further packets. */ - netif_stop_queue(dev); + netif_stop_subqueue(dev, txq); /* We won't see all dropped packets individually, so overrun * error is more appropriate. */ @@ -423,18 +715,22 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) skb_orphan(skb); /* Enqueue packet */ - skb_queue_tail(&tun->socket.sk->sk_receive_queue, skb); + skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb); /* Notify and wake up reader process */ - if (tun->flags & TUN_FASYNC) - kill_fasync(&tun->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tun->wq.wait, POLLIN | + if (tfile->flags & TUN_FASYNC) + kill_fasync(&tfile->fasync, SIGIO, POLL_IN); + wake_up_interruptible_poll(&tfile->wq.wait, POLLIN | POLLRDNORM | POLLRDBAND); + + rcu_read_unlock(); return NETDEV_TX_OK; drop: dev->stats.tx_dropped++; + skb_tx_error(skb); kfree_skb(skb); + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -490,6 +786,7 @@ static const struct net_device_ops tun_netdev_ops = { .ndo_start_xmit = tun_net_xmit, .ndo_change_mtu = tun_net_change_mtu, .ndo_fix_features = tun_net_fix_features, + .ndo_select_queue = tun_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = tun_poll_controller, #endif @@ -505,11 +802,43 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_set_rx_mode = tun_net_mclist, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, + .ndo_select_queue = tun_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = tun_poll_controller, #endif }; +static int tun_flow_init(struct tun_struct *tun) +{ + int i; + + tun->flow_cache = kmem_cache_create("tun_flow_cache", + sizeof(struct tun_flow_entry), 0, 0, + NULL); + if (!tun->flow_cache) + return -ENOMEM; + + for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) + INIT_HLIST_HEAD(&tun->flows[i]); + + tun->ageing_time = TUN_FLOW_EXPIRE; + setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun); + mod_timer(&tun->flow_gc_timer, + round_jiffies_up(jiffies + tun->ageing_time)); + + return 0; +} + +static void tun_flow_uninit(struct tun_struct *tun) +{ + del_timer_sync(&tun->flow_gc_timer); + tun_flow_flush(tun); + + /* Wait for completion of call_rcu()'s */ + rcu_barrier(); + kmem_cache_destroy(tun->flow_cache); +} + /* Initialize net device. */ static void tun_net_init(struct net_device *dev) { @@ -546,7 +875,7 @@ static void tun_net_init(struct net_device *dev) /* Character device part */ /* Poll */ -static unsigned int tun_chr_poll(struct file *file, poll_table * wait) +static unsigned int tun_chr_poll(struct file *file, poll_table *wait) { struct tun_file *tfile = file->private_data; struct tun_struct *tun = __tun_get(tfile); @@ -556,11 +885,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) if (!tun) return POLLERR; - sk = tun->socket.sk; + sk = tfile->socket.sk; tun_debug(KERN_INFO, tun, "tun_chr_poll\n"); - poll_wait(file, &tun->wq.wait, wait); + poll_wait(file, &tfile->wq.wait, wait); if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; @@ -579,16 +908,14 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) /* prepad is the amount to reserve at front. len is length after that. * linear is a hint as to how much to copy (usually headers). */ -static struct sk_buff *tun_alloc_skb(struct tun_struct *tun, +static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, size_t prepad, size_t len, size_t linear, int noblock) { - struct sock *sk = tun->socket.sk; + struct sock *sk = tfile->socket.sk; struct sk_buff *skb; int err; - sock_update_classid(sk); - /* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE || !linear) linear = len; @@ -685,9 +1012,9 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, } /* Get packet from user space buffer */ -static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, - const struct iovec *iv, size_t total_len, - size_t count, int noblock) +static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, + void *msg_control, const struct iovec *iv, + size_t total_len, size_t count, int noblock) { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; @@ -757,7 +1084,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, } else copylen = len; - skb = tun_alloc_skb(tun, align, copylen, gso.hdr_len, noblock); + skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) tun->dev->stats.rx_dropped++; @@ -854,6 +1181,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, tun->dev->stats.rx_packets++; tun->dev->stats.rx_bytes += len; + tun_flow_update(tun, skb, tfile->queue_index); return total_len; } @@ -862,6 +1190,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, { struct file *file = iocb->ki_filp; struct tun_struct *tun = tun_get(file); + struct tun_file *tfile = file->private_data; ssize_t result; if (!tun) @@ -869,8 +1198,8 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count); - result = tun_get_user(tun, NULL, iv, iov_length(iv, count), count, - file->f_flags & O_NONBLOCK); + result = tun_get_user(tun, tfile, NULL, iv, iov_length(iv, count), + count, file->f_flags & O_NONBLOCK); tun_put(tun); return result; @@ -878,6 +1207,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, /* Put packet to the user space buffer */ static ssize_t tun_put_user(struct tun_struct *tun, + struct tun_file *tfile, struct sk_buff *skb, const struct iovec *iv, int len) { @@ -957,7 +1287,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, return total; } -static ssize_t tun_do_read(struct tun_struct *tun, +static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, struct kiocb *iocb, const struct iovec *iv, ssize_t len, int noblock) { @@ -968,12 +1298,12 @@ static ssize_t tun_do_read(struct tun_struct *tun, tun_debug(KERN_INFO, tun, "tun_chr_read\n"); if (unlikely(!noblock)) - add_wait_queue(&tun->wq.wait, &wait); + add_wait_queue(&tfile->wq.wait, &wait); while (len) { current->state = TASK_INTERRUPTIBLE; /* Read frames from the queue */ - if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { + if (!(skb = skb_dequeue(&tfile->socket.sk->sk_receive_queue))) { if (noblock) { ret = -EAGAIN; break; @@ -991,16 +1321,16 @@ static ssize_t tun_do_read(struct tun_struct *tun, schedule(); continue; } - netif_wake_queue(tun->dev); + netif_wake_subqueue(tun->dev, tfile->queue_index); - ret = tun_put_user(tun, skb, iv, len); + ret = tun_put_user(tun, tfile, skb, iv, len); kfree_skb(skb); break; } current->state = TASK_RUNNING; if (unlikely(!noblock)) - remove_wait_queue(&tun->wq.wait, &wait); + remove_wait_queue(&tfile->wq.wait, &wait); return ret; } @@ -1021,13 +1351,22 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, goto out; } - ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK); + ret = tun_do_read(tun, tfile, iocb, iv, len, + file->f_flags & O_NONBLOCK); ret = min_t(ssize_t, ret, len); out: tun_put(tun); return ret; } +static void tun_free_netdev(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + + tun_flow_uninit(tun); + free_netdev(dev); +} + static void tun_setup(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -1056,7 +1395,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = { static void tun_sock_write_space(struct sock *sk) { - struct tun_struct *tun; + struct tun_file *tfile; wait_queue_head_t *wqueue; if (!sock_writeable(sk)) @@ -1070,37 +1409,46 @@ static void tun_sock_write_space(struct sock *sk) wake_up_interruptible_sync_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); - tun = tun_sk(sk)->tun; - kill_fasync(&tun->fasync, SIGIO, POLL_OUT); -} - -static void tun_sock_destruct(struct sock *sk) -{ - free_netdev(tun_sk(sk)->tun->dev); + tfile = container_of(sk, struct tun_file, sk); + kill_fasync(&tfile->fasync, SIGIO, POLL_OUT); } static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { - struct tun_struct *tun = container_of(sock, struct tun_struct, socket); - return tun_get_user(tun, m->msg_control, m->msg_iov, total_len, - m->msg_iovlen, m->msg_flags & MSG_DONTWAIT); + int ret; + struct tun_file *tfile = container_of(sock, struct tun_file, socket); + struct tun_struct *tun = __tun_get(tfile); + + if (!tun) + return -EBADFD; + ret = tun_get_user(tun, tfile, m->msg_control, m->msg_iov, total_len, + m->msg_iovlen, m->msg_flags & MSG_DONTWAIT); + tun_put(tun); + return ret; } + static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len, int flags) { - struct tun_struct *tun = container_of(sock, struct tun_struct, socket); + struct tun_file *tfile = container_of(sock, struct tun_file, socket); + struct tun_struct *tun = __tun_get(tfile); int ret; + + if (!tun) + return -EBADFD; + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) return -EINVAL; - ret = tun_do_read(tun, iocb, m->msg_iov, total_len, + ret = tun_do_read(tun, tfile, iocb, m->msg_iov, total_len, flags & MSG_DONTWAIT); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; } + tun_put(tun); return ret; } @@ -1121,7 +1469,7 @@ static const struct proto_ops tun_socket_ops = { static struct proto tun_proto = { .name = "tun", .owner = THIS_MODULE, - .obj_size = sizeof(struct tun_sock), + .obj_size = sizeof(struct tun_file), }; static int tun_flags(struct tun_struct *tun) @@ -1142,6 +1490,9 @@ static int tun_flags(struct tun_struct *tun) if (tun->flags & TUN_VNET_HDR) flags |= IFF_VNET_HDR; + if (tun->flags & TUN_TAP_MQ) + flags |= IFF_MULTI_QUEUE; + return flags; } @@ -1178,15 +1529,13 @@ static DEVICE_ATTR(group, 0444, tun_show_group, NULL); static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) { - struct sock *sk; struct tun_struct *tun; + struct tun_file *tfile = file->private_data; struct net_device *dev; int err; dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { - const struct cred *cred = current_cred(); - if (ifr->ifr_flags & IFF_TUN_EXCL) return -EBUSY; if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) @@ -1196,11 +1545,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else return -EINVAL; - if (((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || - (gid_valid(tun->group) && !in_egroup_p(tun->group))) && - !capable(CAP_NET_ADMIN)) + if (tun_not_capable(tun)) return -EPERM; - err = security_tun_dev_attach(tun->socket.sk); + err = security_tun_dev_attach(tfile->socket.sk); if (err < 0) return err; @@ -1233,8 +1580,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (*ifr->ifr_name) name = ifr->ifr_name; - dev = alloc_netdev(sizeof(struct tun_struct), name, - tun_setup); + dev = alloc_netdev_mqs(sizeof(struct tun_struct), name, + tun_setup, + MAX_TAP_QUEUES, MAX_TAP_QUEUES); if (!dev) return -ENOMEM; @@ -1246,46 +1594,35 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->flags = flags; tun->txflt.count = 0; tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); - set_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags); - - err = -ENOMEM; - sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto); - if (!sk) - goto err_free_dev; - sk_change_net(sk, net); - tun->socket.wq = &tun->wq; - init_waitqueue_head(&tun->wq.wait); - tun->socket.ops = &tun_socket_ops; - sock_init_data(&tun->socket, sk); - sk->sk_write_space = tun_sock_write_space; - sk->sk_sndbuf = INT_MAX; - sock_set_flag(sk, SOCK_ZEROCOPY); + tun->filter_attached = false; + tun->sndbuf = tfile->socket.sk->sk_sndbuf; - tun_sk(sk)->tun = tun; + spin_lock_init(&tun->lock); - security_tun_dev_post_create(sk); + security_tun_dev_post_create(&tfile->sk); tun_net_init(dev); + if (tun_flow_init(tun)) + goto err_free_dev; + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES; dev->features = dev->hw_features; err = register_netdevice(tun->dev); if (err < 0) - goto err_free_sk; + goto err_free_dev; if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) || device_create_file(&tun->dev->dev, &dev_attr_owner) || device_create_file(&tun->dev->dev, &dev_attr_group)) pr_err("Failed to create tun sysfs files\n"); - sk->sk_destruct = tun_sock_destruct; - err = tun_attach(tun, file); if (err < 0) - goto failed; + goto err_free_dev; } tun_debug(KERN_INFO, tun, "tun_set_iff\n"); @@ -1305,20 +1642,22 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else tun->flags &= ~TUN_VNET_HDR; + if (ifr->ifr_flags & IFF_MULTI_QUEUE) + tun->flags |= TUN_TAP_MQ; + else + tun->flags &= ~TUN_TAP_MQ; + /* Make sure persistent devices do not get stuck in * xoff state. */ if (netif_running(tun->dev)) - netif_wake_queue(tun->dev); + netif_tx_wake_all_queues(tun->dev); strcpy(ifr->ifr_name, tun->dev->name); return 0; - err_free_sk: - tun_free_netdev(dev); err_free_dev: free_netdev(dev); - failed: return err; } @@ -1373,13 +1712,91 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) return 0; } +static void tun_detach_filter(struct tun_struct *tun, int n) +{ + int i; + struct tun_file *tfile; + + for (i = 0; i < n; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + sk_detach_filter(tfile->socket.sk); + } + + tun->filter_attached = false; +} + +static int tun_attach_filter(struct tun_struct *tun) +{ + int i, ret = 0; + struct tun_file *tfile; + + for (i = 0; i < tun->numqueues; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + ret = sk_attach_filter(&tun->fprog, tfile->socket.sk); + if (ret) { + tun_detach_filter(tun, i); + return ret; + } + } + + tun->filter_attached = true; + return ret; +} + +static void tun_set_sndbuf(struct tun_struct *tun) +{ + struct tun_file *tfile; + int i; + + for (i = 0; i < tun->numqueues; i++) { + tfile = rcu_dereference_protected(tun->tfiles[i], + lockdep_rtnl_is_held()); + tfile->socket.sk->sk_sndbuf = tun->sndbuf; + } +} + +static int tun_set_queue(struct file *file, struct ifreq *ifr) +{ + struct tun_file *tfile = file->private_data; + struct tun_struct *tun; + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + + if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { + dev = __dev_get_by_name(tfile->net, ifr->ifr_name); + if (!dev) { + ret = -EINVAL; + goto unlock; + } + + tun = netdev_priv(dev); + if (dev->netdev_ops != &tap_netdev_ops && + dev->netdev_ops != &tun_netdev_ops) + ret = -EINVAL; + else if (tun_not_capable(tun)) + ret = -EPERM; + else + ret = tun_attach(tun, file); + } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) + __tun_detach(tfile, false); + else + ret = -EINVAL; + +unlock: + rtnl_unlock(); + return ret; +} + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { struct tun_file *tfile = file->private_data; struct tun_struct *tun; void __user* argp = (void __user*)arg; - struct sock_fprog fprog; struct ifreq ifr; kuid_t owner; kgid_t group; @@ -1387,7 +1804,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int vnet_hdr_sz; int ret; - if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) { + if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; } else { @@ -1398,10 +1815,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, * This is needed because we never checked for invalid flags on * TUNSETIFF. */ return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | - IFF_VNET_HDR, + IFF_VNET_HDR | IFF_MULTI_QUEUE, (unsigned int __user*)argp); - } + } else if (cmd == TUNSETQUEUE) + return tun_set_queue(file, &ifr); + ret = 0; rtnl_lock(); tun = __tun_get(tfile); @@ -1422,7 +1841,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if (!tun) goto unlock; - tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %d\n", cmd); + tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd); ret = 0; switch (cmd) { @@ -1444,11 +1863,16 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; case TUNSETPERSIST: - /* Disable/Enable persist mode */ - if (arg) + /* Disable/Enable persist mode. Keep an extra reference to the + * module to prevent the module being unprobed. + */ + if (arg) { tun->flags |= TUN_PERSIST; - else + __module_get(THIS_MODULE); + } else { tun->flags &= ~TUN_PERSIST; + module_put(THIS_MODULE); + } tun_debug(KERN_INFO, tun, "persist %s\n", arg ? "enabled" : "disabled"); @@ -1462,7 +1886,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; } tun->owner = owner; - tun_debug(KERN_INFO, tun, "owner set to %d\n", + tun_debug(KERN_INFO, tun, "owner set to %u\n", from_kuid(&init_user_ns, tun->owner)); break; @@ -1474,7 +1898,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; } tun->group = group; - tun_debug(KERN_INFO, tun, "group set to %d\n", + tun_debug(KERN_INFO, tun, "group set to %u\n", from_kgid(&init_user_ns, tun->group)); break; @@ -1526,7 +1950,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; case TUNGETSNDBUF: - sndbuf = tun->socket.sk->sk_sndbuf; + sndbuf = tfile->socket.sk->sk_sndbuf; if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) ret = -EFAULT; break; @@ -1537,7 +1961,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; } - tun->socket.sk->sk_sndbuf = sndbuf; + tun->sndbuf = sndbuf; + tun_set_sndbuf(tun); break; case TUNGETVNETHDRSZ: @@ -1565,10 +1990,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) break; ret = -EFAULT; - if (copy_from_user(&fprog, argp, sizeof(fprog))) + if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog))) break; - ret = sk_attach_filter(&fprog, tun->socket.sk); + ret = tun_attach_filter(tun); break; case TUNDETACHFILTER: @@ -1576,7 +2001,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) break; - ret = sk_detach_filter(tun->socket.sk); + ret = 0; + tun_detach_filter(tun, tun->numqueues); break; default: @@ -1628,27 +2054,21 @@ static long tun_chr_compat_ioctl(struct file *file, static int tun_chr_fasync(int fd, struct file *file, int on) { - struct tun_struct *tun = tun_get(file); + struct tun_file *tfile = file->private_data; int ret; - if (!tun) - return -EBADFD; - - tun_debug(KERN_INFO, tun, "tun_chr_fasync %d\n", on); - - if ((ret = fasync_helper(fd, file, on, &tun->fasync)) < 0) + if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0) goto out; if (on) { ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0); if (ret) goto out; - tun->flags |= TUN_FASYNC; + tfile->flags |= TUN_FASYNC; } else - tun->flags &= ~TUN_FASYNC; + tfile->flags &= ~TUN_FASYNC; ret = 0; out: - tun_put(tun); return ret; } @@ -1658,44 +2078,39 @@ static int tun_chr_open(struct inode *inode, struct file * file) DBG1(KERN_INFO, "tunX: tun_chr_open\n"); - tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); + tfile = (struct tun_file *)sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, + &tun_proto); if (!tfile) return -ENOMEM; - atomic_set(&tfile->count, 0); - tfile->tun = NULL; + rcu_assign_pointer(tfile->tun, NULL); tfile->net = get_net(current->nsproxy->net_ns); + tfile->flags = 0; + + rcu_assign_pointer(tfile->socket.wq, &tfile->wq); + init_waitqueue_head(&tfile->wq.wait); + + tfile->socket.file = file; + tfile->socket.ops = &tun_socket_ops; + + sock_init_data(&tfile->socket, &tfile->sk); + sk_change_net(&tfile->sk, tfile->net); + + tfile->sk.sk_write_space = tun_sock_write_space; + tfile->sk.sk_sndbuf = INT_MAX; + file->private_data = tfile; + set_bit(SOCK_EXTERNALLY_ALLOCATED, &tfile->socket.flags); + return 0; } static int tun_chr_close(struct inode *inode, struct file *file) { struct tun_file *tfile = file->private_data; - struct tun_struct *tun; + struct net *net = tfile->net; - tun = __tun_get(tfile); - if (tun) { - struct net_device *dev = tun->dev; - - tun_debug(KERN_INFO, tun, "tun_chr_close\n"); - - __tun_detach(tun); - - /* If desirable, unregister the netdevice. */ - if (!(tun->flags & TUN_PERSIST)) { - rtnl_lock(); - if (dev->reg_state == NETREG_REGISTERED) - unregister_netdevice(dev); - rtnl_unlock(); - } - } - - tun = tfile->tun; - if (tun) - sock_put(tun->socket.sk); - - put_net(tfile->net); - kfree(tfile); + tun_detach(tfile, true); + put_net(net); return 0; } @@ -1822,14 +2237,13 @@ static void tun_cleanup(void) * holding a reference to the file for as long as the socket is in use. */ struct socket *tun_get_socket(struct file *file) { - struct tun_struct *tun; + struct tun_file *tfile; if (file->f_op != &tun_fops) return ERR_PTR(-EINVAL); - tun = tun_get(file); - if (!tun) + tfile = file->private_data; + if (!tfile) return ERR_PTR(-EBADFD); - tun_put(tun); - return &tun->socket; + return &tfile->socket; } EXPORT_SYMBOL_GPL(tun_get_socket); diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index c1ae76968f4..ef976215b64 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -219,6 +219,24 @@ config USB_NET_CDC_NCM * ST-Ericsson M343 HSPA Mobile Broadband Modem (reference design) * Ericsson F5521gw Mobile Broadband Module +config USB_NET_CDC_MBIM + tristate "CDC MBIM support" + depends on USB_USBNET + select USB_WDM + select USB_NET_CDC_NCM + help + This driver provides support for CDC MBIM (Mobile Broadband + Interface Model) devices. The CDC MBIM specification is + available from <http://www.usb.org/>. + + MBIM devices require configuration using the management + protocol defined by the MBIM specification. This driver + provides unfiltered access to the MBIM control channel + through the associated /dev/cdc-wdmx character device. + + To compile this driver as a module, choose M here: the + module will be called cdc_mbim. + config USB_NET_DM9601 tristate "Davicom DM9601 based USB 1.1 10/100 ethernet devices" depends on USB_USBNET @@ -230,6 +248,8 @@ config USB_NET_DM9601 config USB_NET_SMSC75XX tristate "SMSC LAN75XX based USB 2.0 gigabit ethernet devices" depends on USB_USBNET + select BITREVERSE + select CRC16 select CRC32 help This option adds support for SMSC LAN95XX based USB 2.0 @@ -238,6 +258,8 @@ config USB_NET_SMSC75XX config USB_NET_SMSC95XX tristate "SMSC LAN95XX based USB 2.0 10/100 ethernet devices" depends on USB_USBNET + select BITREVERSE + select CRC16 select CRC32 help This option adds support for SMSC LAN95XX based USB 2.0 diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index bf063008c1a..478691326f3 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -31,4 +31,5 @@ obj-$(CONFIG_USB_NET_CX82310_ETH) += cx82310_eth.o obj-$(CONFIG_USB_NET_CDC_NCM) += cdc_ncm.o obj-$(CONFIG_USB_VL600) += lg-vl600.o obj-$(CONFIG_USB_NET_QMI_WWAN) += qmi_wwan.o +obj-$(CONFIG_USB_NET_CDC_MBIM) += cdc_mbim.o diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 774d9ce2daf..50d167330d3 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -25,121 +25,30 @@ int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data) { - void *buf; - int err = -ENOMEM; - - netdev_dbg(dev->net, "asix_read_cmd() cmd=0x%02x value=0x%04x index=0x%04x size=%d\n", - cmd, value, index, size); - - buf = kmalloc(size, GFP_KERNEL); - if (!buf) - goto out; - - err = usb_control_msg( - dev->udev, - usb_rcvctrlpipe(dev->udev, 0), - cmd, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, - index, - buf, - size, - USB_CTRL_GET_TIMEOUT); - if (err == size) - memcpy(data, buf, size); - else if (err >= 0) - err = -EINVAL; - kfree(buf); + int ret; + ret = usbnet_read_cmd(dev, cmd, + USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, data, size); -out: - return err; + if (ret != size && ret >= 0) + return -EINVAL; + return ret; } int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data) { - void *buf = NULL; - int err = -ENOMEM; - - netdev_dbg(dev->net, "asix_write_cmd() cmd=0x%02x value=0x%04x index=0x%04x size=%d\n", - cmd, value, index, size); - - if (data) { - buf = kmemdup(data, size, GFP_KERNEL); - if (!buf) - goto out; - } - - err = usb_control_msg( - dev->udev, - usb_sndctrlpipe(dev->udev, 0), - cmd, - USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, - index, - buf, - size, - USB_CTRL_SET_TIMEOUT); - kfree(buf); - -out: - return err; -} - -static void asix_async_cmd_callback(struct urb *urb) -{ - struct usb_ctrlrequest *req = (struct usb_ctrlrequest *)urb->context; - int status = urb->status; - - if (status < 0) - printk(KERN_DEBUG "asix_async_cmd_callback() failed with %d", - status); - - kfree(req); - usb_free_urb(urb); + return usbnet_write_cmd(dev, cmd, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, data, size); } void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data) { - struct usb_ctrlrequest *req; - int status; - struct urb *urb; - - netdev_dbg(dev->net, "asix_write_cmd_async() cmd=0x%02x value=0x%04x index=0x%04x size=%d\n", - cmd, value, index, size); - - urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(dev->net, "Error allocating URB in write_cmd_async!\n"); - return; - } - - req = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC); - if (!req) { - netdev_err(dev->net, "Failed to allocate memory for control request\n"); - usb_free_urb(urb); - return; - } - - req->bRequestType = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE; - req->bRequest = cmd; - req->wValue = cpu_to_le16(value); - req->wIndex = cpu_to_le16(index); - req->wLength = cpu_to_le16(size); - - usb_fill_control_urb(urb, dev->udev, - usb_sndctrlpipe(dev->udev, 0), - (void *)req, data, size, - asix_async_cmd_callback, req); - - status = usb_submit_urb(urb, GFP_ATOMIC); - if (status < 0) { - netdev_err(dev->net, "Error submitting the control message: status=%d\n", - status); - kfree(req); - usb_free_urb(urb); - } + usbnet_write_cmd_async(dev, cmd, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, data, size); } int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c new file mode 100644 index 00000000000..42f51c71ec1 --- /dev/null +++ b/drivers/net/usb/cdc_mbim.c @@ -0,0 +1,412 @@ +/* + * Copyright (c) 2012 Smith Micro Software, Inc. + * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no> + * + * This driver is based on and reuse most of cdc_ncm, which is + * Copyright (C) ST-Ericsson 2010-2012 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/ethtool.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/mii.h> +#include <linux/usb.h> +#include <linux/usb/cdc.h> +#include <linux/usb/usbnet.h> +#include <linux/usb/cdc-wdm.h> +#include <linux/usb/cdc_ncm.h> + +/* driver specific data - must match cdc_ncm usage */ +struct cdc_mbim_state { + struct cdc_ncm_ctx *ctx; + atomic_t pmcount; + struct usb_driver *subdriver; + struct usb_interface *control; + struct usb_interface *data; +}; + +/* using a counter to merge subdriver requests with our own into a combined state */ +static int cdc_mbim_manage_power(struct usbnet *dev, int on) +{ + struct cdc_mbim_state *info = (void *)&dev->data; + int rv = 0; + + dev_dbg(&dev->intf->dev, "%s() pmcount=%d, on=%d\n", __func__, atomic_read(&info->pmcount), on); + + if ((on && atomic_add_return(1, &info->pmcount) == 1) || (!on && atomic_dec_and_test(&info->pmcount))) { + /* need autopm_get/put here to ensure the usbcore sees the new value */ + rv = usb_autopm_get_interface(dev->intf); + if (rv < 0) + goto err; + dev->intf->needs_remote_wakeup = on; + usb_autopm_put_interface(dev->intf); + } +err: + return rv; +} + +static int cdc_mbim_wdm_manage_power(struct usb_interface *intf, int status) +{ + struct usbnet *dev = usb_get_intfdata(intf); + + /* can be called while disconnecting */ + if (!dev) + return 0; + + return cdc_mbim_manage_power(dev, status); +} + + +static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) +{ + struct cdc_ncm_ctx *ctx; + struct usb_driver *subdriver = ERR_PTR(-ENODEV); + int ret = -ENODEV; + u8 data_altsetting = CDC_NCM_DATA_ALTSETTING_NCM; + struct cdc_mbim_state *info = (void *)&dev->data; + + /* see if interface supports MBIM alternate setting */ + if (intf->num_altsetting == 2) { + if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) + usb_set_interface(dev->udev, + intf->cur_altsetting->desc.bInterfaceNumber, + CDC_NCM_COMM_ALTSETTING_MBIM); + data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM; + } + + /* Probably NCM, defer for cdc_ncm_bind */ + if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) + goto err; + + ret = cdc_ncm_bind_common(dev, intf, data_altsetting); + if (ret) + goto err; + + ctx = info->ctx; + + /* The MBIM descriptor and the status endpoint are required */ + if (ctx->mbim_desc && dev->status) + subdriver = usb_cdc_wdm_register(ctx->control, + &dev->status->desc, + le16_to_cpu(ctx->mbim_desc->wMaxControlMessage), + cdc_mbim_wdm_manage_power); + if (IS_ERR(subdriver)) { + ret = PTR_ERR(subdriver); + cdc_ncm_unbind(dev, intf); + goto err; + } + + /* can't let usbnet use the interrupt endpoint */ + dev->status = NULL; + info->subdriver = subdriver; + + /* MBIM cannot do ARP */ + dev->net->flags |= IFF_NOARP; + + /* no need to put the VLAN tci in the packet headers */ + dev->net->features |= NETIF_F_HW_VLAN_TX; +err: + return ret; +} + +static void cdc_mbim_unbind(struct usbnet *dev, struct usb_interface *intf) +{ + struct cdc_mbim_state *info = (void *)&dev->data; + struct cdc_ncm_ctx *ctx = info->ctx; + + /* disconnect subdriver from control interface */ + if (info->subdriver && info->subdriver->disconnect) + info->subdriver->disconnect(ctx->control); + info->subdriver = NULL; + + /* let NCM unbind clean up both control and data interface */ + cdc_ncm_unbind(dev, intf); +} + + +static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) +{ + struct sk_buff *skb_out; + struct cdc_mbim_state *info = (void *)&dev->data; + struct cdc_ncm_ctx *ctx = info->ctx; + __le32 sign = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN); + u16 tci = 0; + u8 *c; + + if (!ctx) + goto error; + + if (skb) { + if (skb->len <= sizeof(ETH_HLEN)) + goto error; + + /* mapping VLANs to MBIM sessions: + * no tag => IPS session <0> + * 1 - 255 => IPS session <vlanid> + * 256 - 511 => DSS session <vlanid - 256> + * 512 - 4095 => unsupported, drop + */ + vlan_get_tag(skb, &tci); + + switch (tci & 0x0f00) { + case 0x0000: /* VLAN ID 0 - 255 */ + /* verify that datagram is IPv4 or IPv6 */ + skb_reset_mac_header(skb); + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + break; + default: + goto error; + } + c = (u8 *)&sign; + c[3] = tci; + break; + case 0x0100: /* VLAN ID 256 - 511 */ + sign = cpu_to_le32(USB_CDC_MBIM_NDP16_DSS_SIGN); + c = (u8 *)&sign; + c[3] = tci; + break; + default: + netif_err(dev, tx_err, dev->net, + "unsupported tci=0x%04x\n", tci); + goto error; + } + skb_pull(skb, ETH_HLEN); + } + + spin_lock_bh(&ctx->mtx); + skb_out = cdc_ncm_fill_tx_frame(ctx, skb, sign); + spin_unlock_bh(&ctx->mtx); + return skb_out; + +error: + if (skb) + dev_kfree_skb_any(skb); + + return NULL; +} + +static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_t len, u16 tci) +{ + __be16 proto = htons(ETH_P_802_3); + struct sk_buff *skb = NULL; + + if (tci < 256) { /* IPS session? */ + if (len < sizeof(struct iphdr)) + goto err; + + switch (*buf & 0xf0) { + case 0x40: + proto = htons(ETH_P_IP); + break; + case 0x60: + proto = htons(ETH_P_IPV6); + break; + default: + goto err; + } + } + + skb = netdev_alloc_skb_ip_align(dev->net, len + ETH_HLEN); + if (!skb) + goto err; + + /* add an ethernet header */ + skb_put(skb, ETH_HLEN); + skb_reset_mac_header(skb); + eth_hdr(skb)->h_proto = proto; + memset(eth_hdr(skb)->h_source, 0, ETH_ALEN); + memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN); + + /* add datagram */ + memcpy(skb_put(skb, len), buf, len); + + /* map MBIM session to VLAN */ + if (tci) + vlan_put_tag(skb, tci); +err: + return skb; +} + +static int cdc_mbim_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) +{ + struct sk_buff *skb; + struct cdc_mbim_state *info = (void *)&dev->data; + struct cdc_ncm_ctx *ctx = info->ctx; + int len; + int nframes; + int x; + int offset; + struct usb_cdc_ncm_ndp16 *ndp16; + struct usb_cdc_ncm_dpe16 *dpe16; + int ndpoffset; + int loopcount = 50; /* arbitrary max preventing infinite loop */ + u8 *c; + u16 tci; + + ndpoffset = cdc_ncm_rx_verify_nth16(ctx, skb_in); + if (ndpoffset < 0) + goto error; + +next_ndp: + nframes = cdc_ncm_rx_verify_ndp16(skb_in, ndpoffset); + if (nframes < 0) + goto error; + + ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb_in->data + ndpoffset); + + switch (ndp16->dwSignature & cpu_to_le32(0x00ffffff)) { + case cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN): + c = (u8 *)&ndp16->dwSignature; + tci = c[3]; + break; + case cpu_to_le32(USB_CDC_MBIM_NDP16_DSS_SIGN): + c = (u8 *)&ndp16->dwSignature; + tci = c[3] + 256; + break; + default: + netif_dbg(dev, rx_err, dev->net, + "unsupported NDP signature <0x%08x>\n", + le32_to_cpu(ndp16->dwSignature)); + goto err_ndp; + + } + + dpe16 = ndp16->dpe16; + for (x = 0; x < nframes; x++, dpe16++) { + offset = le16_to_cpu(dpe16->wDatagramIndex); + len = le16_to_cpu(dpe16->wDatagramLength); + + /* + * CDC NCM ch. 3.7 + * All entries after first NULL entry are to be ignored + */ + if ((offset == 0) || (len == 0)) { + if (!x) + goto err_ndp; /* empty NTB */ + break; + } + + /* sanity checking */ + if (((offset + len) > skb_in->len) || (len > ctx->rx_max)) { + netif_dbg(dev, rx_err, dev->net, + "invalid frame detected (ignored) offset[%u]=%u, length=%u, skb=%p\n", + x, offset, len, skb_in); + if (!x) + goto err_ndp; + break; + } else { + skb = cdc_mbim_process_dgram(dev, skb_in->data + offset, len, tci); + if (!skb) + goto error; + usbnet_skb_return(dev, skb); + } + } +err_ndp: + /* are there more NDPs to process? */ + ndpoffset = le16_to_cpu(ndp16->wNextNdpIndex); + if (ndpoffset && loopcount--) + goto next_ndp; + + return 1; +error: + return 0; +} + +static int cdc_mbim_suspend(struct usb_interface *intf, pm_message_t message) +{ + int ret = 0; + struct usbnet *dev = usb_get_intfdata(intf); + struct cdc_mbim_state *info = (void *)&dev->data; + struct cdc_ncm_ctx *ctx = info->ctx; + + if (ctx == NULL) { + ret = -1; + goto error; + } + + ret = usbnet_suspend(intf, message); + if (ret < 0) + goto error; + + if (intf == ctx->control && info->subdriver && info->subdriver->suspend) + ret = info->subdriver->suspend(intf, message); + if (ret < 0) + usbnet_resume(intf); + +error: + return ret; +} + +static int cdc_mbim_resume(struct usb_interface *intf) +{ + int ret = 0; + struct usbnet *dev = usb_get_intfdata(intf); + struct cdc_mbim_state *info = (void *)&dev->data; + struct cdc_ncm_ctx *ctx = info->ctx; + bool callsub = (intf == ctx->control && info->subdriver && info->subdriver->resume); + + if (callsub) + ret = info->subdriver->resume(intf); + if (ret < 0) + goto err; + ret = usbnet_resume(intf); + if (ret < 0 && callsub && info->subdriver->suspend) + info->subdriver->suspend(intf, PMSG_SUSPEND); +err: + return ret; +} + +static const struct driver_info cdc_mbim_info = { + .description = "CDC MBIM", + .flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN, + .bind = cdc_mbim_bind, + .unbind = cdc_mbim_unbind, + .manage_power = cdc_mbim_manage_power, + .rx_fixup = cdc_mbim_rx_fixup, + .tx_fixup = cdc_mbim_tx_fixup, +}; + +static const struct usb_device_id mbim_devs[] = { + /* This duplicate NCM entry is intentional. MBIM devices can + * be disguised as NCM by default, and this is necessary to + * allow us to bind the correct driver_info to such devices. + * + * bind() will sort out this for us, selecting the correct + * entry and reject the other + */ + { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info, + }, + { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info, + }, + { + }, +}; +MODULE_DEVICE_TABLE(usb, mbim_devs); + +static struct usb_driver cdc_mbim_driver = { + .name = "cdc_mbim", + .id_table = mbim_devs, + .probe = usbnet_probe, + .disconnect = usbnet_disconnect, + .suspend = cdc_mbim_suspend, + .resume = cdc_mbim_resume, + .reset_resume = cdc_mbim_resume, + .supports_autosuspend = 1, + .disable_hub_initiated_lpm = 1, +}; +module_usb_driver(cdc_mbim_driver); + +MODULE_AUTHOR("Greg Suarez <gsuarez@smithmicro.com>"); +MODULE_AUTHOR("Bjørn Mork <bjorn@mork.no>"); +MODULE_DESCRIPTION("USB CDC MBIM host driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 4cd582a4f62..ddc7b8880f6 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -51,90 +51,10 @@ #include <linux/atomic.h> #include <linux/usb/usbnet.h> #include <linux/usb/cdc.h> +#include <linux/usb/cdc_ncm.h> #define DRIVER_VERSION "14-Mar-2012" -/* CDC NCM subclass 3.2.1 */ -#define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10 - -/* Maximum NTB length */ -#define CDC_NCM_NTB_MAX_SIZE_TX 32768 /* bytes */ -#define CDC_NCM_NTB_MAX_SIZE_RX 32768 /* bytes */ - -/* Minimum value for MaxDatagramSize, ch. 6.2.9 */ -#define CDC_NCM_MIN_DATAGRAM_SIZE 1514 /* bytes */ - -#define CDC_NCM_MIN_TX_PKT 512 /* bytes */ - -/* Default value for MaxDatagramSize */ -#define CDC_NCM_MAX_DATAGRAM_SIZE 8192 /* bytes */ - -/* - * Maximum amount of datagrams in NCM Datagram Pointer Table, not counting - * the last NULL entry. - */ -#define CDC_NCM_DPT_DATAGRAMS_MAX 40 - -/* Restart the timer, if amount of datagrams is less than given value */ -#define CDC_NCM_RESTART_TIMER_DATAGRAM_CNT 3 -#define CDC_NCM_TIMER_PENDING_CNT 2 -#define CDC_NCM_TIMER_INTERVAL (400UL * NSEC_PER_USEC) - -/* The following macro defines the minimum header space */ -#define CDC_NCM_MIN_HDR_SIZE \ - (sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \ - (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16)) - -struct cdc_ncm_data { - struct usb_cdc_ncm_nth16 nth16; - struct usb_cdc_ncm_ndp16 ndp16; - struct usb_cdc_ncm_dpe16 dpe16[CDC_NCM_DPT_DATAGRAMS_MAX + 1]; -}; - -struct cdc_ncm_ctx { - struct cdc_ncm_data tx_ncm; - struct usb_cdc_ncm_ntb_parameters ncm_parm; - struct hrtimer tx_timer; - struct tasklet_struct bh; - - const struct usb_cdc_ncm_desc *func_desc; - const struct usb_cdc_header_desc *header_desc; - const struct usb_cdc_union_desc *union_desc; - const struct usb_cdc_ether_desc *ether_desc; - - struct net_device *netdev; - struct usb_device *udev; - struct usb_host_endpoint *in_ep; - struct usb_host_endpoint *out_ep; - struct usb_host_endpoint *status_ep; - struct usb_interface *intf; - struct usb_interface *control; - struct usb_interface *data; - - struct sk_buff *tx_curr_skb; - struct sk_buff *tx_rem_skb; - - spinlock_t mtx; - atomic_t stop; - - u32 tx_timer_pending; - u32 tx_curr_offset; - u32 tx_curr_last_offset; - u32 tx_curr_frame_num; - u32 rx_speed; - u32 tx_speed; - u32 rx_max; - u32 tx_max; - u32 max_datagram_size; - u16 tx_max_datagrams; - u16 tx_remainder; - u16 tx_modulus; - u16 tx_ndp_modulus; - u16 tx_seq; - u16 rx_seq; - u16 connected; -}; - static void cdc_ncm_txpath_bh(unsigned long param); static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); @@ -158,17 +78,19 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) u8 flags; u8 iface_no; int err; + int eth_hlen; u16 ntb_fmt_supported; + u32 min_dgram_size; + u32 min_hdr_size; + struct usbnet *dev = netdev_priv(ctx->netdev); iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber; - err = usb_control_msg(ctx->udev, - usb_rcvctrlpipe(ctx->udev, 0), - USB_CDC_GET_NTB_PARAMETERS, - USB_TYPE_CLASS | USB_DIR_IN - | USB_RECIP_INTERFACE, - 0, iface_no, &ctx->ncm_parm, - sizeof(ctx->ncm_parm), 10000); + err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_PARAMETERS, + USB_TYPE_CLASS | USB_DIR_IN + |USB_RECIP_INTERFACE, + 0, iface_no, &ctx->ncm_parm, + sizeof(ctx->ncm_parm)); if (err < 0) { pr_debug("failed GET_NTB_PARAMETERS\n"); return 1; @@ -184,10 +106,19 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) ctx->tx_max_datagrams = le16_to_cpu(ctx->ncm_parm.wNtbOutMaxDatagrams); ntb_fmt_supported = le16_to_cpu(ctx->ncm_parm.bmNtbFormatsSupported); - if (ctx->func_desc != NULL) + eth_hlen = ETH_HLEN; + min_dgram_size = CDC_NCM_MIN_DATAGRAM_SIZE; + min_hdr_size = CDC_NCM_MIN_HDR_SIZE; + if (ctx->mbim_desc != NULL) { + flags = ctx->mbim_desc->bmNetworkCapabilities; + eth_hlen = 0; + min_dgram_size = CDC_MBIM_MIN_DATAGRAM_SIZE; + min_hdr_size = 0; + } else if (ctx->func_desc != NULL) { flags = ctx->func_desc->bmNetworkCapabilities; - else + } else { flags = 0; + } pr_debug("dwNtbInMaxSize=%u dwNtbOutMaxSize=%u " "wNdpOutPayloadRemainder=%u wNdpOutDivisor=%u " @@ -215,49 +146,19 @@ static u8 cdc_ncm_setup(struct cdc_ncm_ctx *ctx) /* inform device about NTB input size changes */ if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) { + __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); - if (flags & USB_CDC_NCM_NCAP_NTB_INPUT_SIZE) { - struct usb_cdc_ncm_ndp_input_size *ndp_in_sz; - - ndp_in_sz = kzalloc(sizeof(*ndp_in_sz), GFP_KERNEL); - if (!ndp_in_sz) { - err = -ENOMEM; - goto size_err; - } - - err = usb_control_msg(ctx->udev, - usb_sndctrlpipe(ctx->udev, 0), - USB_CDC_SET_NTB_INPUT_SIZE, - USB_TYPE_CLASS | USB_DIR_OUT - | USB_RECIP_INTERFACE, - 0, iface_no, ndp_in_sz, 8, 1000); - kfree(ndp_in_sz); - } else { - __le32 *dwNtbInMaxSize; - dwNtbInMaxSize = kzalloc(sizeof(*dwNtbInMaxSize), - GFP_KERNEL); - if (!dwNtbInMaxSize) { - err = -ENOMEM; - goto size_err; - } - *dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); - - err = usb_control_msg(ctx->udev, - usb_sndctrlpipe(ctx->udev, 0), - USB_CDC_SET_NTB_INPUT_SIZE, - USB_TYPE_CLASS | USB_DIR_OUT - | USB_RECIP_INTERFACE, - 0, iface_no, dwNtbInMaxSize, 4, 1000); - kfree(dwNtbInMaxSize); - } -size_err: + err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_INPUT_SIZE, + USB_TYPE_CLASS | USB_DIR_OUT + | USB_RECIP_INTERFACE, + 0, iface_no, &dwNtbInMaxSize, 4); if (err < 0) pr_debug("Setting NTB Input Size failed\n"); } /* verify maximum size of transmitted NTB in bytes */ if ((ctx->tx_max < - (CDC_NCM_MIN_HDR_SIZE + CDC_NCM_MIN_DATAGRAM_SIZE)) || + (min_hdr_size + min_dgram_size)) || (ctx->tx_max > CDC_NCM_NTB_MAX_SIZE_TX)) { pr_debug("Using default maximum transmit length=%d\n", CDC_NCM_NTB_MAX_SIZE_TX); @@ -299,93 +200,85 @@ size_err: } /* adjust TX-remainder according to NCM specification. */ - ctx->tx_remainder = ((ctx->tx_remainder - ETH_HLEN) & - (ctx->tx_modulus - 1)); + ctx->tx_remainder = ((ctx->tx_remainder - eth_hlen) & + (ctx->tx_modulus - 1)); /* additional configuration */ /* set CRC Mode */ if (flags & USB_CDC_NCM_NCAP_CRC_MODE) { - err = usb_control_msg(ctx->udev, usb_sndctrlpipe(ctx->udev, 0), - USB_CDC_SET_CRC_MODE, - USB_TYPE_CLASS | USB_DIR_OUT - | USB_RECIP_INTERFACE, - USB_CDC_NCM_CRC_NOT_APPENDED, - iface_no, NULL, 0, 1000); + err = usbnet_write_cmd(dev, USB_CDC_SET_CRC_MODE, + USB_TYPE_CLASS | USB_DIR_OUT + | USB_RECIP_INTERFACE, + USB_CDC_NCM_CRC_NOT_APPENDED, + iface_no, NULL, 0); if (err < 0) pr_debug("Setting CRC mode off failed\n"); } /* set NTB format, if both formats are supported */ if (ntb_fmt_supported & USB_CDC_NCM_NTH32_SIGN) { - err = usb_control_msg(ctx->udev, usb_sndctrlpipe(ctx->udev, 0), - USB_CDC_SET_NTB_FORMAT, USB_TYPE_CLASS - | USB_DIR_OUT | USB_RECIP_INTERFACE, - USB_CDC_NCM_NTB16_FORMAT, - iface_no, NULL, 0, 1000); + err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT, + USB_TYPE_CLASS | USB_DIR_OUT + | USB_RECIP_INTERFACE, + USB_CDC_NCM_NTB16_FORMAT, + iface_no, NULL, 0); if (err < 0) pr_debug("Setting NTB format to 16-bit failed\n"); } - ctx->max_datagram_size = CDC_NCM_MIN_DATAGRAM_SIZE; + ctx->max_datagram_size = min_dgram_size; /* set Max Datagram Size (MTU) */ if (flags & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE) { - __le16 *max_datagram_size; - u16 eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize); - - max_datagram_size = kzalloc(sizeof(*max_datagram_size), - GFP_KERNEL); - if (!max_datagram_size) { - err = -ENOMEM; + __le16 max_datagram_size; + u16 eth_max_sz; + if (ctx->ether_desc != NULL) + eth_max_sz = le16_to_cpu(ctx->ether_desc->wMaxSegmentSize); + else if (ctx->mbim_desc != NULL) + eth_max_sz = le16_to_cpu(ctx->mbim_desc->wMaxSegmentSize); + else goto max_dgram_err; - } - err = usb_control_msg(ctx->udev, usb_rcvctrlpipe(ctx->udev, 0), - USB_CDC_GET_MAX_DATAGRAM_SIZE, - USB_TYPE_CLASS | USB_DIR_IN - | USB_RECIP_INTERFACE, - 0, iface_no, max_datagram_size, - 2, 1000); + err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, + USB_TYPE_CLASS | USB_DIR_IN + | USB_RECIP_INTERFACE, + 0, iface_no, &max_datagram_size, 2); if (err < 0) { pr_debug("GET_MAX_DATAGRAM_SIZE failed, use size=%u\n", - CDC_NCM_MIN_DATAGRAM_SIZE); + min_dgram_size); } else { ctx->max_datagram_size = - le16_to_cpu(*max_datagram_size); + le16_to_cpu(max_datagram_size); /* Check Eth descriptor value */ if (ctx->max_datagram_size > eth_max_sz) ctx->max_datagram_size = eth_max_sz; if (ctx->max_datagram_size > CDC_NCM_MAX_DATAGRAM_SIZE) - ctx->max_datagram_size = - CDC_NCM_MAX_DATAGRAM_SIZE; + ctx->max_datagram_size = CDC_NCM_MAX_DATAGRAM_SIZE; - if (ctx->max_datagram_size < CDC_NCM_MIN_DATAGRAM_SIZE) - ctx->max_datagram_size = - CDC_NCM_MIN_DATAGRAM_SIZE; + if (ctx->max_datagram_size < min_dgram_size) + ctx->max_datagram_size = min_dgram_size; /* if value changed, update device */ if (ctx->max_datagram_size != - le16_to_cpu(*max_datagram_size)) { - err = usb_control_msg(ctx->udev, - usb_sndctrlpipe(ctx->udev, 0), + le16_to_cpu(max_datagram_size)) { + err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, 0, - iface_no, max_datagram_size, - 2, 1000); + iface_no, &max_datagram_size, + 2); if (err < 0) pr_debug("SET_MAX_DGRAM_SIZE failed\n"); } } - kfree(max_datagram_size); } max_dgram_err: - if (ctx->netdev->mtu != (ctx->max_datagram_size - ETH_HLEN)) - ctx->netdev->mtu = ctx->max_datagram_size - ETH_HLEN; + if (ctx->netdev->mtu != (ctx->max_datagram_size - eth_hlen)) + ctx->netdev->mtu = ctx->max_datagram_size - eth_hlen; return 0; } @@ -451,7 +344,7 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = { .nway_reset = usbnet_nway_reset, }; -static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) { struct cdc_ncm_ctx *ctx; struct usb_driver *driver; @@ -525,6 +418,13 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) ctx->func_desc = (const struct usb_cdc_ncm_desc *)buf; break; + case USB_CDC_MBIM_TYPE: + if (buf[0] < sizeof(*(ctx->mbim_desc))) + break; + + ctx->mbim_desc = (const struct usb_cdc_mbim_desc *)buf; + break; + default: break; } @@ -537,7 +437,7 @@ advance: /* check if we got everything */ if ((ctx->control == NULL) || (ctx->data == NULL) || - (ctx->ether_desc == NULL) || (ctx->control != intf)) + ((!ctx->mbim_desc) && ((ctx->ether_desc == NULL) || (ctx->control != intf)))) goto error; /* claim interfaces, if any */ @@ -557,7 +457,7 @@ advance: goto error2; /* configure data interface */ - temp = usb_set_interface(dev->udev, iface_no, 1); + temp = usb_set_interface(dev->udev, iface_no, data_altsetting); if (temp) goto error2; @@ -574,11 +474,13 @@ advance: usb_set_intfdata(ctx->control, dev); usb_set_intfdata(ctx->intf, dev); - temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress); - if (temp) - goto error2; + if (ctx->ether_desc) { + temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress); + if (temp) + goto error2; + dev_info(&dev->udev->dev, "MAC-Address: %pM\n", dev->net->dev_addr); + } - dev_info(&dev->udev->dev, "MAC-Address: %pM\n", dev->net->dev_addr); dev->in = usb_rcvbulkpipe(dev->udev, ctx->in_ep->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); @@ -587,13 +489,6 @@ advance: dev->status = ctx->status_ep; dev->rx_urb_size = ctx->rx_max; - /* - * We should get an event when network connection is "connected" or - * "disconnected". Set network connection in "disconnected" state - * (carrier is OFF) during attach, so the IP network stack does not - * start IPv6 negotiation and more. - */ - netif_carrier_off(dev->net); ctx->tx_speed = ctx->rx_speed = 0; return 0; @@ -607,8 +502,9 @@ error: dev_info(&dev->udev->dev, "bind() failure\n"); return -ENODEV; } +EXPORT_SYMBOL_GPL(cdc_ncm_bind_common); -static void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) +void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; struct usb_driver *driver = driver_of(intf); @@ -638,52 +534,121 @@ static void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) usb_set_intfdata(ctx->intf, NULL); cdc_ncm_free(ctx); } +EXPORT_SYMBOL_GPL(cdc_ncm_unbind); -static void cdc_ncm_zero_fill(u8 *ptr, u32 first, u32 end, u32 max) +static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) { - if (first >= max) - return; - if (first >= end) - return; - if (end > max) - end = max; - memset(ptr + first, 0, end - first); + int ret; + + /* The MBIM spec defines a NCM compatible default altsetting, + * which we may have matched: + * + * "Functions that implement both NCM 1.0 and MBIM (an + * “NCM/MBIM function”) according to this recommendation + * shall provide two alternate settings for the + * Communication Interface. Alternate setting 0, and the + * associated class and endpoint descriptors, shall be + * constructed according to the rules given for the + * Communication Interface in section 5 of [USBNCM10]. + * Alternate setting 1, and the associated class and + * endpoint descriptors, shall be constructed according to + * the rules given in section 6 (USB Device Model) of this + * specification." + * + * Do not bind to such interfaces, allowing cdc_mbim to handle + * them + */ +#if IS_ENABLED(CONFIG_USB_NET_CDC_MBIM) + if ((intf->num_altsetting == 2) && + !usb_set_interface(dev->udev, + intf->cur_altsetting->desc.bInterfaceNumber, + CDC_NCM_COMM_ALTSETTING_MBIM) && + cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) + return -ENODEV; +#endif + + /* NCM data altsetting is always 1 */ + ret = cdc_ncm_bind_common(dev, intf, 1); + + /* + * We should get an event when network connection is "connected" or + * "disconnected". Set network connection in "disconnected" state + * (carrier is OFF) during attach, so the IP network stack does not + * start IPv6 negotiation and more. + */ + netif_carrier_off(dev->net); + return ret; } -static struct sk_buff * -cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) +static void cdc_ncm_align_tail(struct sk_buff *skb, size_t modulus, size_t remainder, size_t max) { + size_t align = ALIGN(skb->len, modulus) - skb->len + remainder; + + if (skb->len + align > max) + align = max - skb->len; + if (align && skb_tailroom(skb) >= align) + memset(skb_put(skb, align), 0, align); +} + +/* return a pointer to a valid struct usb_cdc_ncm_ndp16 of type sign, possibly + * allocating a new one within skb + */ +static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign, size_t reserve) +{ + struct usb_cdc_ncm_ndp16 *ndp16 = NULL; + struct usb_cdc_ncm_nth16 *nth16 = (void *)skb->data; + size_t ndpoffset = le16_to_cpu(nth16->wNdpIndex); + + /* follow the chain of NDPs, looking for a match */ + while (ndpoffset) { + ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); + if (ndp16->dwSignature == sign) + return ndp16; + ndpoffset = le16_to_cpu(ndp16->wNextNdpIndex); + } + + /* align new NDP */ + cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); + + /* verify that there is room for the NDP and the datagram (reserve) */ + if ((ctx->tx_max - skb->len - reserve) < CDC_NCM_NDP_SIZE) + return NULL; + + /* link to it */ + if (ndp16) + ndp16->wNextNdpIndex = cpu_to_le16(skb->len); + else + nth16->wNdpIndex = cpu_to_le16(skb->len); + + /* push a new empty NDP */ + ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, CDC_NCM_NDP_SIZE), 0, CDC_NCM_NDP_SIZE); + ndp16->dwSignature = sign; + ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16)); + return ndp16; +} + +struct sk_buff * +cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign) +{ + struct usb_cdc_ncm_nth16 *nth16; + struct usb_cdc_ncm_ndp16 *ndp16; struct sk_buff *skb_out; - u32 rem; - u32 offset; - u32 last_offset; - u16 n = 0, index; + u16 n = 0, index, ndplen; u8 ready2send = 0; /* if there is a remaining skb, it gets priority */ - if (skb != NULL) + if (skb != NULL) { swap(skb, ctx->tx_rem_skb); - else + swap(sign, ctx->tx_rem_sign); + } else { ready2send = 1; - - /* - * +----------------+ - * | skb_out | - * +----------------+ - * ^ offset - * ^ last_offset - */ + } /* check if we are resuming an OUT skb */ - if (ctx->tx_curr_skb != NULL) { - /* pop variables */ - skb_out = ctx->tx_curr_skb; - offset = ctx->tx_curr_offset; - last_offset = ctx->tx_curr_last_offset; - n = ctx->tx_curr_frame_num; + skb_out = ctx->tx_curr_skb; - } else { - /* reset variables */ + /* allocate a new OUT skb */ + if (!skb_out) { skb_out = alloc_skb((ctx->tx_max + 1), GFP_ATOMIC); if (skb_out == NULL) { if (skb != NULL) { @@ -692,35 +657,21 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) } goto exit_no_skb; } + /* fill out the initial 16-bit NTB header */ + nth16 = (struct usb_cdc_ncm_nth16 *)memset(skb_put(skb_out, sizeof(struct usb_cdc_ncm_nth16)), 0, sizeof(struct usb_cdc_ncm_nth16)); + nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); + nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); + nth16->wSequence = cpu_to_le16(ctx->tx_seq++); - /* make room for NTH and NDP */ - offset = ALIGN(sizeof(struct usb_cdc_ncm_nth16), - ctx->tx_ndp_modulus) + - sizeof(struct usb_cdc_ncm_ndp16) + - (ctx->tx_max_datagrams + 1) * - sizeof(struct usb_cdc_ncm_dpe16); - - /* store last valid offset before alignment */ - last_offset = offset; - /* align first Datagram offset correctly */ - offset = ALIGN(offset, ctx->tx_modulus) + ctx->tx_remainder; - /* zero buffer till the first IP datagram */ - cdc_ncm_zero_fill(skb_out->data, 0, offset, offset); - n = 0; + /* count total number of frames in this NTB */ ctx->tx_curr_frame_num = 0; } - for (; n < ctx->tx_max_datagrams; n++) { - /* check if end of transmit buffer is reached */ - if (offset >= ctx->tx_max) { - ready2send = 1; - break; - } - /* compute maximum buffer size */ - rem = ctx->tx_max - offset; - + for (n = ctx->tx_curr_frame_num; n < ctx->tx_max_datagrams; n++) { + /* send any remaining skb first */ if (skb == NULL) { skb = ctx->tx_rem_skb; + sign = ctx->tx_rem_sign; ctx->tx_rem_skb = NULL; /* check for end of skb */ @@ -728,7 +679,14 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) break; } - if (skb->len > rem) { + /* get the appropriate NDP for this skb */ + ndp16 = cdc_ncm_ndp(ctx, skb_out, sign, skb->len + ctx->tx_modulus + ctx->tx_remainder); + + /* align beginning of next frame */ + cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max); + + /* check if we had enough room left for both NDP and frame */ + if (!ndp16 || skb_out->len + skb->len > ctx->tx_max) { if (n == 0) { /* won't fit, MTU problem? */ dev_kfree_skb_any(skb); @@ -741,31 +699,30 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) ctx->netdev->stats.tx_dropped++; } ctx->tx_rem_skb = skb; + ctx->tx_rem_sign = sign; skb = NULL; ready2send = 1; } break; } - memcpy(((u8 *)skb_out->data) + offset, skb->data, skb->len); + /* calculate frame number withing this NDP */ + ndplen = le16_to_cpu(ndp16->wLength); + index = (ndplen - sizeof(struct usb_cdc_ncm_ndp16)) / sizeof(struct usb_cdc_ncm_dpe16) - 1; - ctx->tx_ncm.dpe16[n].wDatagramLength = cpu_to_le16(skb->len); - ctx->tx_ncm.dpe16[n].wDatagramIndex = cpu_to_le16(offset); - - /* update offset */ - offset += skb->len; - - /* store last valid offset before alignment */ - last_offset = offset; - - /* align offset correctly */ - offset = ALIGN(offset, ctx->tx_modulus) + ctx->tx_remainder; - - /* zero padding */ - cdc_ncm_zero_fill(skb_out->data, last_offset, offset, - ctx->tx_max); + /* OK, add this skb */ + ndp16->dpe16[index].wDatagramLength = cpu_to_le16(skb->len); + ndp16->dpe16[index].wDatagramIndex = cpu_to_le16(skb_out->len); + ndp16->wLength = cpu_to_le16(ndplen + sizeof(struct usb_cdc_ncm_dpe16)); + memcpy(skb_put(skb_out, skb->len), skb->data, skb->len); dev_kfree_skb_any(skb); skb = NULL; + + /* send now if this NDP is full */ + if (index >= CDC_NCM_DPT_DATAGRAMS_MAX) { + ready2send = 1; + break; + } } /* free up any dangling skb */ @@ -781,16 +738,12 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) /* wait for more frames */ /* push variables */ ctx->tx_curr_skb = skb_out; - ctx->tx_curr_offset = offset; - ctx->tx_curr_last_offset = last_offset; goto exit_no_skb; } else if ((n < ctx->tx_max_datagrams) && (ready2send == 0)) { /* wait for more frames */ /* push variables */ ctx->tx_curr_skb = skb_out; - ctx->tx_curr_offset = offset; - ctx->tx_curr_last_offset = last_offset; /* set the pending count */ if (n < CDC_NCM_RESTART_TIMER_DATAGRAM_CNT) ctx->tx_timer_pending = CDC_NCM_TIMER_PENDING_CNT; @@ -801,75 +754,24 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) /* variables will be reset at next call */ } - /* check for overflow */ - if (last_offset > ctx->tx_max) - last_offset = ctx->tx_max; - - /* revert offset */ - offset = last_offset; - /* * If collected data size is less or equal CDC_NCM_MIN_TX_PKT bytes, * we send buffers as it is. If we get more data, it would be more * efficient for USB HS mobile device with DMA engine to receive a full * size NTB, than canceling DMA transfer and receiving a short packet. */ - if (offset > CDC_NCM_MIN_TX_PKT) - offset = ctx->tx_max; - - /* final zero padding */ - cdc_ncm_zero_fill(skb_out->data, last_offset, offset, ctx->tx_max); - - /* store last offset */ - last_offset = offset; - - if (((last_offset < ctx->tx_max) && ((last_offset % - le16_to_cpu(ctx->out_ep->desc.wMaxPacketSize)) == 0)) || - (((last_offset == ctx->tx_max) && ((ctx->tx_max % - le16_to_cpu(ctx->out_ep->desc.wMaxPacketSize)) == 0)) && - (ctx->tx_max < le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)))) { - /* force short packet */ - *(((u8 *)skb_out->data) + last_offset) = 0; - last_offset++; - } + if (skb_out->len > CDC_NCM_MIN_TX_PKT) + /* final zero padding */ + memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0, ctx->tx_max - skb_out->len); - /* zero the rest of the DPEs plus the last NULL entry */ - for (; n <= CDC_NCM_DPT_DATAGRAMS_MAX; n++) { - ctx->tx_ncm.dpe16[n].wDatagramLength = 0; - ctx->tx_ncm.dpe16[n].wDatagramIndex = 0; - } - - /* fill out 16-bit NTB header */ - ctx->tx_ncm.nth16.dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); - ctx->tx_ncm.nth16.wHeaderLength = - cpu_to_le16(sizeof(ctx->tx_ncm.nth16)); - ctx->tx_ncm.nth16.wSequence = cpu_to_le16(ctx->tx_seq); - ctx->tx_ncm.nth16.wBlockLength = cpu_to_le16(last_offset); - index = ALIGN(sizeof(struct usb_cdc_ncm_nth16), ctx->tx_ndp_modulus); - ctx->tx_ncm.nth16.wNdpIndex = cpu_to_le16(index); - - memcpy(skb_out->data, &(ctx->tx_ncm.nth16), sizeof(ctx->tx_ncm.nth16)); - ctx->tx_seq++; - - /* fill out 16-bit NDP table */ - ctx->tx_ncm.ndp16.dwSignature = - cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN); - rem = sizeof(ctx->tx_ncm.ndp16) + ((ctx->tx_curr_frame_num + 1) * - sizeof(struct usb_cdc_ncm_dpe16)); - ctx->tx_ncm.ndp16.wLength = cpu_to_le16(rem); - ctx->tx_ncm.ndp16.wNextNdpIndex = 0; /* reserved */ + /* do we need to prevent a ZLP? */ + if (((skb_out->len % le16_to_cpu(ctx->out_ep->desc.wMaxPacketSize)) == 0) && + (skb_out->len < le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)) && skb_tailroom(skb_out)) + *skb_put(skb_out, 1) = 0; /* force short packet */ - memcpy(((u8 *)skb_out->data) + index, - &(ctx->tx_ncm.ndp16), - sizeof(ctx->tx_ncm.ndp16)); - - memcpy(((u8 *)skb_out->data) + index + sizeof(ctx->tx_ncm.ndp16), - &(ctx->tx_ncm.dpe16), - (ctx->tx_curr_frame_num + 1) * - sizeof(struct usb_cdc_ncm_dpe16)); - - /* set frame length */ - skb_put(skb_out, last_offset); + /* set final frame length */ + nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; + nth16->wBlockLength = cpu_to_le16(skb_out->len); /* return skb */ ctx->tx_curr_skb = NULL; @@ -882,6 +784,7 @@ exit_no_skb: cdc_ncm_tx_timeout_start(ctx); return NULL; } +EXPORT_SYMBOL_GPL(cdc_ncm_fill_tx_frame); static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx) { @@ -916,6 +819,8 @@ static void cdc_ncm_txpath_bh(unsigned long param) netif_tx_lock_bh(ctx->netdev); usbnet_start_xmit(NULL, ctx->netdev); netif_tx_unlock_bh(ctx->netdev); + } else { + spin_unlock_bh(&ctx->mtx); } } @@ -936,7 +841,7 @@ cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) goto error; spin_lock_bh(&ctx->mtx); - skb_out = cdc_ncm_fill_tx_frame(ctx, skb); + skb_out = cdc_ncm_fill_tx_frame(ctx, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)); spin_unlock_bh(&ctx->mtx); return skb_out; @@ -947,17 +852,12 @@ error: return NULL; } -static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) +/* verify NTB header and return offset of first NDP, or negative error */ +int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in) { - struct sk_buff *skb; - struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; - int len; - int nframes; - int x; - int offset; struct usb_cdc_ncm_nth16 *nth16; - struct usb_cdc_ncm_ndp16 *ndp16; - struct usb_cdc_ncm_dpe16 *dpe16; + int len; + int ret = -EINVAL; if (ctx == NULL) goto error; @@ -991,20 +891,23 @@ static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) } ctx->rx_seq = le16_to_cpu(nth16->wSequence); - len = le16_to_cpu(nth16->wNdpIndex); - if ((len + sizeof(struct usb_cdc_ncm_ndp16)) > skb_in->len) { - pr_debug("invalid DPT16 index <%u>\n", - le16_to_cpu(nth16->wNdpIndex)); - goto error; - } + ret = le16_to_cpu(nth16->wNdpIndex); +error: + return ret; +} +EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_nth16); - ndp16 = (struct usb_cdc_ncm_ndp16 *)(((u8 *)skb_in->data) + len); +/* verify NDP header and return number of datagrams, or negative error */ +int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset) +{ + struct usb_cdc_ncm_ndp16 *ndp16; + int ret = -EINVAL; - if (le32_to_cpu(ndp16->dwSignature) != USB_CDC_NCM_NDP16_NOCRC_SIGN) { - pr_debug("invalid DPT16 signature <%u>\n", - le32_to_cpu(ndp16->dwSignature)); + if ((ndpoffset + sizeof(struct usb_cdc_ncm_ndp16)) > skb_in->len) { + pr_debug("invalid NDP offset <%u>\n", ndpoffset); goto error; } + ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb_in->data + ndpoffset); if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { pr_debug("invalid DPT16 length <%u>\n", @@ -1012,20 +915,52 @@ static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) goto error; } - nframes = ((le16_to_cpu(ndp16->wLength) - + ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16)) / sizeof(struct usb_cdc_ncm_dpe16)); - nframes--; /* we process NDP entries except for the last one */ - - len += sizeof(struct usb_cdc_ncm_ndp16); + ret--; /* we process NDP entries except for the last one */ - if ((len + nframes * (sizeof(struct usb_cdc_ncm_dpe16))) > + if ((sizeof(struct usb_cdc_ncm_ndp16) + ret * (sizeof(struct usb_cdc_ncm_dpe16))) > skb_in->len) { - pr_debug("Invalid nframes = %d\n", nframes); - goto error; + pr_debug("Invalid nframes = %d\n", ret); + ret = -EINVAL; } - dpe16 = (struct usb_cdc_ncm_dpe16 *)(((u8 *)skb_in->data) + len); +error: + return ret; +} +EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_ndp16); + +static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) +{ + struct sk_buff *skb; + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + int len; + int nframes; + int x; + int offset; + struct usb_cdc_ncm_ndp16 *ndp16; + struct usb_cdc_ncm_dpe16 *dpe16; + int ndpoffset; + int loopcount = 50; /* arbitrary max preventing infinite loop */ + + ndpoffset = cdc_ncm_rx_verify_nth16(ctx, skb_in); + if (ndpoffset < 0) + goto error; + +next_ndp: + nframes = cdc_ncm_rx_verify_ndp16(skb_in, ndpoffset); + if (nframes < 0) + goto error; + + ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb_in->data + ndpoffset); + + if (le32_to_cpu(ndp16->dwSignature) != USB_CDC_NCM_NDP16_NOCRC_SIGN) { + pr_debug("invalid DPT16 signature <%u>\n", + le32_to_cpu(ndp16->dwSignature)); + goto err_ndp; + } + dpe16 = ndp16->dpe16; for (x = 0; x < nframes; x++, dpe16++) { offset = le16_to_cpu(dpe16->wDatagramIndex); @@ -1037,7 +972,7 @@ static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) */ if ((offset == 0) || (len == 0)) { if (!x) - goto error; /* empty NTB */ + goto err_ndp; /* empty NTB */ break; } @@ -1048,7 +983,7 @@ static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) "offset[%u]=%u, length=%u, skb=%p\n", x, offset, len, skb_in); if (!x) - goto error; + goto err_ndp; break; } else { @@ -1061,6 +996,12 @@ static int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) usbnet_skb_return(dev, skb); } } +err_ndp: + /* are there more NDPs to process? */ + ndpoffset = le16_to_cpu(ndp16->wNextNdpIndex); + if (ndpoffset && loopcount--) + goto next_ndp; + return 1; error: return 0; @@ -1125,7 +1066,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ - ctx->connected = event->wValue; + ctx->connected = le16_to_cpu(event->wValue); printk(KERN_INFO KBUILD_MODNAME ": %s: network connection:" " %sconnected\n", diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index e0433ce6ced..3f554c1149f 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -56,27 +56,12 @@ static int dm_read(struct usbnet *dev, u8 reg, u16 length, void *data) { - void *buf; - int err = -ENOMEM; - - netdev_dbg(dev->net, "dm_read() reg=0x%02x length=%d\n", reg, length); - - buf = kmalloc(length, GFP_KERNEL); - if (!buf) - goto out; - - err = usb_control_msg(dev->udev, - usb_rcvctrlpipe(dev->udev, 0), - DM_READ_REGS, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, reg, buf, length, USB_CTRL_SET_TIMEOUT); - if (err == length) - memcpy(data, buf, length); - else if (err >= 0) + int err; + err = usbnet_read_cmd(dev, DM_READ_REGS, + USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, reg, data, length); + if(err != length && err >= 0) err = -EINVAL; - kfree(buf); - - out: return err; } @@ -87,91 +72,29 @@ static int dm_read_reg(struct usbnet *dev, u8 reg, u8 *value) static int dm_write(struct usbnet *dev, u8 reg, u16 length, void *data) { - void *buf = NULL; - int err = -ENOMEM; - - netdev_dbg(dev->net, "dm_write() reg=0x%02x, length=%d\n", reg, length); + int err; + err = usbnet_write_cmd(dev, DM_WRITE_REGS, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, reg, data, length); - if (data) { - buf = kmemdup(data, length, GFP_KERNEL); - if (!buf) - goto out; - } - - err = usb_control_msg(dev->udev, - usb_sndctrlpipe(dev->udev, 0), - DM_WRITE_REGS, - USB_DIR_OUT | USB_TYPE_VENDOR |USB_RECIP_DEVICE, - 0, reg, buf, length, USB_CTRL_SET_TIMEOUT); - kfree(buf); if (err >= 0 && err < length) err = -EINVAL; - out: return err; } static int dm_write_reg(struct usbnet *dev, u8 reg, u8 value) { - netdev_dbg(dev->net, "dm_write_reg() reg=0x%02x, value=0x%02x\n", - reg, value); - return usb_control_msg(dev->udev, - usb_sndctrlpipe(dev->udev, 0), - DM_WRITE_REG, - USB_DIR_OUT | USB_TYPE_VENDOR |USB_RECIP_DEVICE, - value, reg, NULL, 0, USB_CTRL_SET_TIMEOUT); -} - -static void dm_write_async_callback(struct urb *urb) -{ - struct usb_ctrlrequest *req = (struct usb_ctrlrequest *)urb->context; - int status = urb->status; - - if (status < 0) - printk(KERN_DEBUG "dm_write_async_callback() failed with %d\n", - status); - - kfree(req); - usb_free_urb(urb); + return usbnet_write_cmd(dev, DM_WRITE_REGS, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, reg, NULL, 0); } static void dm_write_async_helper(struct usbnet *dev, u8 reg, u8 value, u16 length, void *data) { - struct usb_ctrlrequest *req; - struct urb *urb; - int status; - - urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(dev->net, "Error allocating URB in dm_write_async_helper!\n"); - return; - } - - req = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC); - if (!req) { - netdev_err(dev->net, "Failed to allocate memory for control request\n"); - usb_free_urb(urb); - return; - } - - req->bRequestType = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE; - req->bRequest = length ? DM_WRITE_REGS : DM_WRITE_REG; - req->wValue = cpu_to_le16(value); - req->wIndex = cpu_to_le16(reg); - req->wLength = cpu_to_le16(length); - - usb_fill_control_urb(urb, dev->udev, - usb_sndctrlpipe(dev->udev, 0), - (void *)req, data, length, - dm_write_async_callback, req); - - status = usb_submit_urb(urb, GFP_ATOMIC); - if (status < 0) { - netdev_err(dev->net, "Error submitting the control message: status=%d\n", - status); - kfree(req); - usb_free_urb(urb); - } + usbnet_write_cmd_async(dev, DM_WRITE_REGS, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, reg, data, length); } static void dm_write_async(struct usbnet *dev, u8 reg, u16 length, void *data) diff --git a/drivers/net/usb/int51x1.c b/drivers/net/usb/int51x1.c index 8de641713d5..ace9e74ffbd 100644 --- a/drivers/net/usb/int51x1.c +++ b/drivers/net/usb/int51x1.c @@ -116,23 +116,8 @@ static struct sk_buff *int51x1_tx_fixup(struct usbnet *dev, return skb; } -static void int51x1_async_cmd_callback(struct urb *urb) -{ - struct usb_ctrlrequest *req = (struct usb_ctrlrequest *)urb->context; - int status = urb->status; - - if (status < 0) - dev_warn(&urb->dev->dev, "async callback failed with %d\n", status); - - kfree(req); - usb_free_urb(urb); -} - static void int51x1_set_multicast(struct net_device *netdev) { - struct usb_ctrlrequest *req; - int status; - struct urb *urb; struct usbnet *dev = netdev_priv(netdev); u16 filter = PACKET_TYPE_DIRECTED | PACKET_TYPE_BROADCAST; @@ -149,40 +134,9 @@ static void int51x1_set_multicast(struct net_device *netdev) netdev_dbg(dev->net, "receive own packets only\n"); } - urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_warn(dev->net, "Error allocating URB\n"); - return; - } - - req = kmalloc(sizeof(*req), GFP_ATOMIC); - if (!req) { - netdev_warn(dev->net, "Error allocating control msg\n"); - goto out; - } - - req->bRequestType = USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE; - req->bRequest = SET_ETHERNET_PACKET_FILTER; - req->wValue = cpu_to_le16(filter); - req->wIndex = 0; - req->wLength = 0; - - usb_fill_control_urb(urb, dev->udev, usb_sndctrlpipe(dev->udev, 0), - (void *)req, NULL, 0, - int51x1_async_cmd_callback, - (void *)req); - - status = usb_submit_urb(urb, GFP_ATOMIC); - if (status < 0) { - netdev_warn(dev->net, "Error submitting control msg, sts=%d\n", - status); - goto out1; - } - return; -out1: - kfree(req); -out: - usb_free_urb(urb); + usbnet_write_cmd_async(dev, SET_ETHERNET_PACKET_FILTER, + USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, + filter, 0, NULL, 0); } static const struct net_device_ops int51x1_netdev_ops = { diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index cc7e72010ac..3f3f566afa0 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -124,93 +124,20 @@ static const char driver_name[] = "MOSCHIP usb-ethernet driver"; static int mcs7830_get_reg(struct usbnet *dev, u16 index, u16 size, void *data) { - struct usb_device *xdev = dev->udev; - int ret; - void *buffer; - - buffer = kmalloc(size, GFP_NOIO); - if (buffer == NULL) - return -ENOMEM; - - ret = usb_control_msg(xdev, usb_rcvctrlpipe(xdev, 0), MCS7830_RD_BREQ, - MCS7830_RD_BMREQ, 0x0000, index, buffer, - size, MCS7830_CTRL_TIMEOUT); - memcpy(data, buffer, size); - kfree(buffer); - - return ret; + return usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ, + 0x0000, index, data, size); } static int mcs7830_set_reg(struct usbnet *dev, u16 index, u16 size, const void *data) { - struct usb_device *xdev = dev->udev; - int ret; - void *buffer; - - buffer = kmemdup(data, size, GFP_NOIO); - if (buffer == NULL) - return -ENOMEM; - - ret = usb_control_msg(xdev, usb_sndctrlpipe(xdev, 0), MCS7830_WR_BREQ, - MCS7830_WR_BMREQ, 0x0000, index, buffer, - size, MCS7830_CTRL_TIMEOUT); - kfree(buffer); - return ret; -} - -static void mcs7830_async_cmd_callback(struct urb *urb) -{ - struct usb_ctrlrequest *req = (struct usb_ctrlrequest *)urb->context; - int status = urb->status; - - if (status < 0) - printk(KERN_DEBUG "%s() failed with %d\n", - __func__, status); - - kfree(req); - usb_free_urb(urb); + return usbnet_write_cmd(dev, MCS7830_WR_BREQ, MCS7830_WR_BMREQ, + 0x0000, index, data, size); } static void mcs7830_set_reg_async(struct usbnet *dev, u16 index, u16 size, void *data) { - struct usb_ctrlrequest *req; - int ret; - struct urb *urb; - - urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - dev_dbg(&dev->udev->dev, - "Error allocating URB in write_cmd_async!\n"); - return; - } - - req = kmalloc(sizeof *req, GFP_ATOMIC); - if (!req) { - dev_err(&dev->udev->dev, - "Failed to allocate memory for control request\n"); - goto out; - } - req->bRequestType = MCS7830_WR_BMREQ; - req->bRequest = MCS7830_WR_BREQ; - req->wValue = 0; - req->wIndex = cpu_to_le16(index); - req->wLength = cpu_to_le16(size); - - usb_fill_control_urb(urb, dev->udev, - usb_sndctrlpipe(dev->udev, 0), - (void *)req, data, size, - mcs7830_async_cmd_callback, req); - - ret = usb_submit_urb(urb, GFP_ATOMIC); - if (ret < 0) { - dev_err(&dev->udev->dev, - "Error submitting the control message: ret=%d\n", ret); - goto out; - } - return; -out: - kfree(req); - usb_free_urb(urb); + usbnet_write_cmd_async(dev, MCS7830_WR_BREQ, MCS7830_WR_BMREQ, + 0x0000, index, data, size); } static int mcs7830_hif_get_mac_address(struct usbnet *dev, unsigned char *addr) diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c index c062a3e8295..93e0716a118 100644 --- a/drivers/net/usb/net1080.c +++ b/drivers/net/usb/net1080.c @@ -109,13 +109,11 @@ struct nc_trailer { static int nc_vendor_read(struct usbnet *dev, u8 req, u8 regnum, u16 *retval_ptr) { - int status = usb_control_msg(dev->udev, - usb_rcvctrlpipe(dev->udev, 0), - req, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, regnum, - retval_ptr, sizeof *retval_ptr, - USB_CTRL_GET_TIMEOUT); + int status = usbnet_read_cmd(dev, req, + USB_DIR_IN | USB_TYPE_VENDOR | + USB_RECIP_DEVICE, + 0, regnum, retval_ptr, + sizeof *retval_ptr); if (status > 0) status = 0; if (!status) @@ -133,13 +131,9 @@ nc_register_read(struct usbnet *dev, u8 regnum, u16 *retval_ptr) static void nc_vendor_write(struct usbnet *dev, u8 req, u8 regnum, u16 value) { - usb_control_msg(dev->udev, - usb_sndctrlpipe(dev->udev, 0), - req, - USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, regnum, - NULL, 0, // data is in setup packet - USB_CTRL_SET_TIMEOUT); + usbnet_write_cmd(dev, req, + USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, regnum, NULL, 0); } static inline void @@ -288,37 +282,34 @@ static inline void nc_dump_ttl(struct usbnet *dev, u16 ttl) static int net1080_reset(struct usbnet *dev) { u16 usbctl, status, ttl; - u16 *vp = kmalloc(sizeof (u16), GFP_KERNEL); + u16 vp; int retval; - if (!vp) - return -ENOMEM; - // nc_dump_registers(dev); - if ((retval = nc_register_read(dev, REG_STATUS, vp)) < 0) { + if ((retval = nc_register_read(dev, REG_STATUS, &vp)) < 0) { netdev_dbg(dev->net, "can't read %s-%s status: %d\n", dev->udev->bus->bus_name, dev->udev->devpath, retval); goto done; } - status = *vp; + status = vp; nc_dump_status(dev, status); - if ((retval = nc_register_read(dev, REG_USBCTL, vp)) < 0) { + if ((retval = nc_register_read(dev, REG_USBCTL, &vp)) < 0) { netdev_dbg(dev->net, "can't read USBCTL, %d\n", retval); goto done; } - usbctl = *vp; + usbctl = vp; nc_dump_usbctl(dev, usbctl); nc_register_write(dev, REG_USBCTL, USBCTL_FLUSH_THIS | USBCTL_FLUSH_OTHER); - if ((retval = nc_register_read(dev, REG_TTL, vp)) < 0) { + if ((retval = nc_register_read(dev, REG_TTL, &vp)) < 0) { netdev_dbg(dev->net, "can't read TTL, %d\n", retval); goto done; } - ttl = *vp; + ttl = vp; // nc_dump_ttl(dev, ttl); nc_register_write(dev, REG_TTL, @@ -331,7 +322,6 @@ static int net1080_reset(struct usbnet *dev) retval = 0; done: - kfree(vp); return retval; } @@ -339,13 +329,10 @@ static int net1080_check_connect(struct usbnet *dev) { int retval; u16 status; - u16 *vp = kmalloc(sizeof (u16), GFP_KERNEL); + u16 vp; - if (!vp) - return -ENOMEM; - retval = nc_register_read(dev, REG_STATUS, vp); - status = *vp; - kfree(vp); + retval = nc_register_read(dev, REG_STATUS, &vp); + status = vp; if (retval != 0) { netdev_dbg(dev->net, "net1080_check_conn read - %d\n", retval); return retval; @@ -355,59 +342,22 @@ static int net1080_check_connect(struct usbnet *dev) return 0; } -static void nc_flush_complete(struct urb *urb) -{ - kfree(urb->context); - usb_free_urb(urb); -} - static void nc_ensure_sync(struct usbnet *dev) { - dev->frame_errors++; - if (dev->frame_errors > 5) { - struct urb *urb; - struct usb_ctrlrequest *req; - int status; - - /* Send a flush */ - urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) - return; - - req = kmalloc(sizeof *req, GFP_ATOMIC); - if (!req) { - usb_free_urb(urb); - return; - } + if (++dev->frame_errors <= 5) + return; - req->bRequestType = USB_DIR_OUT - | USB_TYPE_VENDOR - | USB_RECIP_DEVICE; - req->bRequest = REQUEST_REGISTER; - req->wValue = cpu_to_le16(USBCTL_FLUSH_THIS - | USBCTL_FLUSH_OTHER); - req->wIndex = cpu_to_le16(REG_USBCTL); - req->wLength = cpu_to_le16(0); - - /* queue an async control request, we don't need - * to do anything when it finishes except clean up. - */ - usb_fill_control_urb(urb, dev->udev, - usb_sndctrlpipe(dev->udev, 0), - (unsigned char *) req, - NULL, 0, - nc_flush_complete, req); - status = usb_submit_urb(urb, GFP_ATOMIC); - if (status) { - kfree(req); - usb_free_urb(urb); - return; - } + if (usbnet_write_cmd_async(dev, REQUEST_REGISTER, + USB_DIR_OUT | USB_TYPE_VENDOR | + USB_RECIP_DEVICE, + USBCTL_FLUSH_THIS | + USBCTL_FLUSH_OTHER, + REG_USBCTL, NULL, 0)) + return; - netif_dbg(dev, rx_err, dev->net, - "flush net1080; too many framing errors\n"); - dev->frame_errors = 0; - } + netif_dbg(dev, rx_err, dev->net, + "flush net1080; too many framing errors\n"); + dev->frame_errors = 0; } static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb) diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 4584b9a805b..0fcc8e65a06 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -71,13 +71,10 @@ static inline int pl_vendor_req(struct usbnet *dev, u8 req, u8 val, u8 index) { - return usb_control_msg(dev->udev, - usb_rcvctrlpipe(dev->udev, 0), - req, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - val, index, - NULL, 0, - USB_CTRL_GET_TIMEOUT); + return usbnet_read_cmd(dev, req, + USB_DIR_IN | USB_TYPE_VENDOR | + USB_RECIP_DEVICE, + val, index, NULL, 0); } static inline int diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c index c27d27701ae..18dd4257ab1 100644 --- a/drivers/net/usb/sierra_net.c +++ b/drivers/net/usb/sierra_net.c @@ -311,10 +311,9 @@ static int sierra_net_send_cmd(struct usbnet *dev, struct sierra_net_data *priv = sierra_net_get_private(dev); int status; - status = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - USB_CDC_SEND_ENCAPSULATED_COMMAND, - USB_DIR_OUT|USB_TYPE_CLASS|USB_RECIP_INTERFACE, 0, - priv->ifnum, cmd, cmdlen, USB_CTRL_SET_TIMEOUT); + status = usbnet_write_cmd(dev, USB_CDC_SEND_ENCAPSULATED_COMMAND, + USB_DIR_OUT|USB_TYPE_CLASS|USB_RECIP_INTERFACE, + 0, priv->ifnum, cmd, cmdlen); if (status != cmdlen && status != -ENODEV) netdev_err(dev->net, "Submit %s failed %d\n", cmd_name, status); @@ -340,7 +339,7 @@ static void sierra_net_set_ctx_index(struct sierra_net_data *priv, u8 ctx_ix) dev_dbg(&(priv->usbnet->udev->dev), "%s %d", __func__, ctx_ix); priv->tx_hdr_template[0] = 0x3F; priv->tx_hdr_template[1] = ctx_ix; - *((u16 *)&priv->tx_hdr_template[2]) = + *((__be16 *)&priv->tx_hdr_template[2]) = cpu_to_be16(SIERRA_NET_HIP_EXT_IP_OUT_ID); } @@ -632,32 +631,22 @@ static int sierra_net_change_mtu(struct net_device *net, int new_mtu) static int sierra_net_get_fw_attr(struct usbnet *dev, u16 *datap) { int result = 0; - u16 *attrdata; - - attrdata = kmalloc(sizeof(*attrdata), GFP_KERNEL); - if (!attrdata) - return -ENOMEM; - - result = usb_control_msg( - dev->udev, - usb_rcvctrlpipe(dev->udev, 0), - /* _u8 vendor specific request */ - SWI_USB_REQUEST_GET_FW_ATTR, - USB_DIR_IN | USB_TYPE_VENDOR, /* __u8 request type */ - 0x0000, /* __u16 value not used */ - 0x0000, /* __u16 index not used */ - attrdata, /* char *data */ - sizeof(*attrdata), /* __u16 size */ - USB_CTRL_SET_TIMEOUT); /* int timeout */ - - if (result < 0) { - kfree(attrdata); + __le16 attrdata; + + result = usbnet_read_cmd(dev, + /* _u8 vendor specific request */ + SWI_USB_REQUEST_GET_FW_ATTR, + USB_DIR_IN | USB_TYPE_VENDOR, /* __u8 request type */ + 0x0000, /* __u16 value not used */ + 0x0000, /* __u16 index not used */ + &attrdata, /* char *data */ + sizeof(attrdata) /* __u16 size */ + ); + + if (result < 0) return -EIO; - } - - *datap = le16_to_cpu(*attrdata); - kfree(attrdata); + *datap = le16_to_cpu(attrdata); return result; } diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index b77ae76f4aa..c5353cfc9c8 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -26,6 +26,8 @@ #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/usb.h> +#include <linux/bitrev.h> +#include <linux/crc16.h> #include <linux/crc32.h> #include <linux/usb/usbnet.h> #include <linux/slab.h> @@ -52,7 +54,8 @@ #define USB_PRODUCT_ID_LAN7500 (0x7500) #define USB_PRODUCT_ID_LAN7505 (0x7505) #define RXW_PADDING 2 -#define SUPPORTED_WAKE (WAKE_MAGIC) +#define SUPPORTED_WAKE (WAKE_UCAST | WAKE_BCAST | \ + WAKE_MCAST | WAKE_ARP | WAKE_MAGIC) #define check_warn(ret, fmt, args...) \ ({ if (ret < 0) netdev_warn(dev->net, fmt, ##args); }) @@ -82,71 +85,92 @@ static bool turbo_mode = true; module_param(turbo_mode, bool, 0644); MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction"); -static int __must_check smsc75xx_read_reg(struct usbnet *dev, u32 index, - u32 *data) +static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index, + u32 *data, int in_pm) { - u32 *buf = kmalloc(4, GFP_KERNEL); + u32 buf; int ret; + int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); BUG_ON(!dev); - if (!buf) - return -ENOMEM; - - ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), - USB_VENDOR_REQUEST_READ_REGISTER, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 00, index, buf, 4, USB_CTRL_GET_TIMEOUT); + if (!in_pm) + fn = usbnet_read_cmd; + else + fn = usbnet_read_cmd_nopm; + ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN + | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, index, &buf, 4); if (unlikely(ret < 0)) netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d", index, ret); - le32_to_cpus(buf); - *data = *buf; - kfree(buf); + le32_to_cpus(&buf); + *data = buf; return ret; } -static int __must_check smsc75xx_write_reg(struct usbnet *dev, u32 index, - u32 data) +static int __must_check __smsc75xx_write_reg(struct usbnet *dev, u32 index, + u32 data, int in_pm) { - u32 *buf = kmalloc(4, GFP_KERNEL); + u32 buf; int ret; + int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); BUG_ON(!dev); - if (!buf) - return -ENOMEM; - - *buf = data; - cpu_to_le32s(buf); + if (!in_pm) + fn = usbnet_write_cmd; + else + fn = usbnet_write_cmd_nopm; - ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - USB_VENDOR_REQUEST_WRITE_REGISTER, - USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 00, index, buf, 4, USB_CTRL_SET_TIMEOUT); + buf = data; + cpu_to_le32s(&buf); + ret = fn(dev, USB_VENDOR_REQUEST_WRITE_REGISTER, USB_DIR_OUT + | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, index, &buf, 4); if (unlikely(ret < 0)) netdev_warn(dev->net, "Failed to write reg index 0x%08x: %d", index, ret); - kfree(buf); - return ret; } +static int __must_check smsc75xx_read_reg_nopm(struct usbnet *dev, u32 index, + u32 *data) +{ + return __smsc75xx_read_reg(dev, index, data, 1); +} + +static int __must_check smsc75xx_write_reg_nopm(struct usbnet *dev, u32 index, + u32 data) +{ + return __smsc75xx_write_reg(dev, index, data, 1); +} + +static int __must_check smsc75xx_read_reg(struct usbnet *dev, u32 index, + u32 *data) +{ + return __smsc75xx_read_reg(dev, index, data, 0); +} + +static int __must_check smsc75xx_write_reg(struct usbnet *dev, u32 index, + u32 data) +{ + return __smsc75xx_write_reg(dev, index, data, 0); +} + static int smsc75xx_set_feature(struct usbnet *dev, u32 feature) { if (WARN_ON_ONCE(!dev)) return -EINVAL; - cpu_to_le32s(&feature); - - return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - USB_REQ_SET_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); + return usbnet_write_cmd_nopm(dev, USB_REQ_SET_FEATURE, + USB_DIR_OUT | USB_RECIP_DEVICE, + feature, 0, NULL, 0); } static int smsc75xx_clear_feature(struct usbnet *dev, u32 feature) @@ -154,11 +178,9 @@ static int smsc75xx_clear_feature(struct usbnet *dev, u32 feature) if (WARN_ON_ONCE(!dev)) return -EINVAL; - cpu_to_le32s(&feature); - - return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); + return usbnet_write_cmd_nopm(dev, USB_REQ_CLEAR_FEATURE, + USB_DIR_OUT | USB_RECIP_DEVICE, + feature, 0, NULL, 0); } /* Loop until the read is completed with timeout @@ -804,13 +826,16 @@ static int smsc75xx_set_features(struct net_device *netdev, return 0; } -static int smsc75xx_wait_ready(struct usbnet *dev) +static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm) { int timeout = 0; do { u32 buf; - int ret = smsc75xx_read_reg(dev, PMT_CTL, &buf); + int ret; + + ret = __smsc75xx_read_reg(dev, PMT_CTL, &buf, in_pm); + check_warn_return(ret, "Failed to read PMT_CTL: %d", ret); if (buf & PMT_CTL_DEV_RDY) @@ -832,7 +857,7 @@ static int smsc75xx_reset(struct usbnet *dev) netif_dbg(dev, ifup, dev->net, "entering smsc75xx_reset"); - ret = smsc75xx_wait_ready(dev); + ret = smsc75xx_wait_ready(dev, 0); check_warn_return(ret, "device not ready in smsc75xx_reset"); ret = smsc75xx_read_reg(dev, HW_CFG, &buf); @@ -1154,6 +1179,36 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf) } } +static u16 smsc_crc(const u8 *buffer, size_t len) +{ + return bitrev16(crc16(0xFFFF, buffer, len)); +} + +static int smsc75xx_write_wuff(struct usbnet *dev, int filter, u32 wuf_cfg, + u32 wuf_mask1) +{ + int cfg_base = WUF_CFGX + filter * 4; + int mask_base = WUF_MASKX + filter * 16; + int ret; + + ret = smsc75xx_write_reg(dev, cfg_base, wuf_cfg); + check_warn_return(ret, "Error writing WUF_CFGX"); + + ret = smsc75xx_write_reg(dev, mask_base, wuf_mask1); + check_warn_return(ret, "Error writing WUF_MASKX"); + + ret = smsc75xx_write_reg(dev, mask_base + 4, 0); + check_warn_return(ret, "Error writing WUF_MASKX"); + + ret = smsc75xx_write_reg(dev, mask_base + 8, 0); + check_warn_return(ret, "Error writing WUF_MASKX"); + + ret = smsc75xx_write_reg(dev, mask_base + 12, 0); + check_warn_return(ret, "Error writing WUF_MASKX"); + + return 0; +} + static int smsc75xx_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); @@ -1169,101 +1224,156 @@ static int smsc75xx_suspend(struct usb_interface *intf, pm_message_t message) netdev_info(dev->net, "entering SUSPEND2 mode"); /* disable energy detect (link up) & wake up events */ - ret = smsc75xx_read_reg(dev, WUCSR, &val); + ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); check_warn_return(ret, "Error reading WUCSR"); val &= ~(WUCSR_MPEN | WUCSR_WUEN); - ret = smsc75xx_write_reg(dev, WUCSR, val); + ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); check_warn_return(ret, "Error writing WUCSR"); - ret = smsc75xx_read_reg(dev, PMT_CTL, &val); + ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); check_warn_return(ret, "Error reading PMT_CTL"); val &= ~(PMT_CTL_ED_EN | PMT_CTL_WOL_EN); - ret = smsc75xx_write_reg(dev, PMT_CTL, val); + ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); check_warn_return(ret, "Error writing PMT_CTL"); /* enter suspend2 mode */ - ret = smsc75xx_read_reg(dev, PMT_CTL, &val); + ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); check_warn_return(ret, "Error reading PMT_CTL"); val &= ~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST); val |= PMT_CTL_SUS_MODE_2; - ret = smsc75xx_write_reg(dev, PMT_CTL, val); + ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); check_warn_return(ret, "Error writing PMT_CTL"); return 0; } - if (pdata->wolopts & WAKE_MAGIC) { - /* clear any pending magic packet status */ - ret = smsc75xx_read_reg(dev, WUCSR, &val); + if (pdata->wolopts & (WAKE_MCAST | WAKE_ARP)) { + int i, filter = 0; + + /* disable all filters */ + for (i = 0; i < WUF_NUM; i++) { + ret = smsc75xx_write_reg_nopm(dev, WUF_CFGX + i * 4, 0); + check_warn_return(ret, "Error writing WUF_CFGX"); + } + + if (pdata->wolopts & WAKE_MCAST) { + const u8 mcast[] = {0x01, 0x00, 0x5E}; + netdev_info(dev->net, "enabling multicast detection"); + + val = WUF_CFGX_EN | WUF_CFGX_ATYPE_MULTICAST + | smsc_crc(mcast, 3); + ret = smsc75xx_write_wuff(dev, filter++, val, 0x0007); + check_warn_return(ret, "Error writing wakeup filter"); + } + + if (pdata->wolopts & WAKE_ARP) { + const u8 arp[] = {0x08, 0x06}; + netdev_info(dev->net, "enabling ARP detection"); + + val = WUF_CFGX_EN | WUF_CFGX_ATYPE_ALL | (0x0C << 16) + | smsc_crc(arp, 2); + ret = smsc75xx_write_wuff(dev, filter++, val, 0x0003); + check_warn_return(ret, "Error writing wakeup filter"); + } + + /* clear any pending pattern match packet status */ + ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); check_warn_return(ret, "Error reading WUCSR"); - val |= WUCSR_MPR; + val |= WUCSR_WUFR; - ret = smsc75xx_write_reg(dev, WUCSR, val); + ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); + check_warn_return(ret, "Error writing WUCSR"); + + netdev_info(dev->net, "enabling packet match detection"); + ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); + check_warn_return(ret, "Error reading WUCSR"); + + val |= WUCSR_WUEN; + + ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); + check_warn_return(ret, "Error writing WUCSR"); + } else { + netdev_info(dev->net, "disabling packet match detection"); + ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); + check_warn_return(ret, "Error reading WUCSR"); + + val &= ~WUCSR_WUEN; + + ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); check_warn_return(ret, "Error writing WUCSR"); } - /* enable/disable magic packup wake */ - ret = smsc75xx_read_reg(dev, WUCSR, &val); + /* disable magic, bcast & unicast wakeup sources */ + ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); check_warn_return(ret, "Error reading WUCSR"); + val &= ~(WUCSR_MPEN | WUCSR_BCST_EN | WUCSR_PFDA_EN); + + ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); + check_warn_return(ret, "Error writing WUCSR"); + if (pdata->wolopts & WAKE_MAGIC) { netdev_info(dev->net, "enabling magic packet wakeup"); - val |= WUCSR_MPEN; - } else { - netdev_info(dev->net, "disabling magic packet wakeup"); - val &= ~WUCSR_MPEN; + ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); + check_warn_return(ret, "Error reading WUCSR"); + + /* clear any pending magic packet status */ + val |= WUCSR_MPR | WUCSR_MPEN; + + ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); + check_warn_return(ret, "Error writing WUCSR"); } - ret = smsc75xx_write_reg(dev, WUCSR, val); - check_warn_return(ret, "Error writing WUCSR"); + if (pdata->wolopts & WAKE_BCAST) { + netdev_info(dev->net, "enabling broadcast detection"); + ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); + check_warn_return(ret, "Error reading WUCSR"); - /* enable wol wakeup source */ - ret = smsc75xx_read_reg(dev, PMT_CTL, &val); - check_warn_return(ret, "Error reading PMT_CTL"); + val |= WUCSR_BCAST_FR | WUCSR_BCST_EN; - val |= PMT_CTL_WOL_EN; + ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); + check_warn_return(ret, "Error writing WUCSR"); + } - ret = smsc75xx_write_reg(dev, PMT_CTL, val); - check_warn_return(ret, "Error writing PMT_CTL"); + if (pdata->wolopts & WAKE_UCAST) { + netdev_info(dev->net, "enabling unicast detection"); + ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); + check_warn_return(ret, "Error reading WUCSR"); + + val |= WUCSR_WUFR | WUCSR_PFDA_EN; + + ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); + check_warn_return(ret, "Error writing WUCSR"); + } - /* enable receiver */ - ret = smsc75xx_read_reg(dev, MAC_RX, &val); + /* enable receiver to enable frame reception */ + ret = smsc75xx_read_reg_nopm(dev, MAC_RX, &val); check_warn_return(ret, "Failed to read MAC_RX: %d", ret); val |= MAC_RX_RXEN; - ret = smsc75xx_write_reg(dev, MAC_RX, val); + ret = smsc75xx_write_reg_nopm(dev, MAC_RX, val); check_warn_return(ret, "Failed to write MAC_RX: %d", ret); /* some wol options are enabled, so enter SUSPEND0 */ netdev_info(dev->net, "entering SUSPEND0 mode"); - ret = smsc75xx_read_reg(dev, PMT_CTL, &val); + ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); check_warn_return(ret, "Error reading PMT_CTL"); - val &= (~(PMT_CTL_SUS_MODE | PMT_CTL_WUPS | PMT_CTL_PHY_RST)); - val |= PMT_CTL_SUS_MODE_0; + val &= (~(PMT_CTL_SUS_MODE | PMT_CTL_PHY_RST)); + val |= PMT_CTL_SUS_MODE_0 | PMT_CTL_WOL_EN | PMT_CTL_WUPS; - ret = smsc75xx_write_reg(dev, PMT_CTL, val); + ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); check_warn_return(ret, "Error writing PMT_CTL"); - /* clear wol status */ - val &= ~PMT_CTL_WUPS; - val |= PMT_CTL_WUPS_WOL; - ret = smsc75xx_write_reg(dev, PMT_CTL, val); - check_warn_return(ret, "Error writing PMT_CTL"); - - /* read back PMT_CTL */ - ret = smsc75xx_read_reg(dev, PMT_CTL, &val); - check_warn_return(ret, "Error reading PMT_CTL"); - smsc75xx_set_feature(dev, USB_DEVICE_REMOTE_WAKEUP); return 0; @@ -1276,42 +1386,43 @@ static int smsc75xx_resume(struct usb_interface *intf) int ret; u32 val; - if (pdata->wolopts & WAKE_MAGIC) { + if (pdata->wolopts) { netdev_info(dev->net, "resuming from SUSPEND0"); smsc75xx_clear_feature(dev, USB_DEVICE_REMOTE_WAKEUP); - /* Disable magic packup wake */ - ret = smsc75xx_read_reg(dev, WUCSR, &val); + /* Disable wakeup sources */ + ret = smsc75xx_read_reg_nopm(dev, WUCSR, &val); check_warn_return(ret, "Error reading WUCSR"); - val &= ~WUCSR_MPEN; + val &= ~(WUCSR_WUEN | WUCSR_MPEN | WUCSR_PFDA_EN + | WUCSR_BCST_EN); - ret = smsc75xx_write_reg(dev, WUCSR, val); + ret = smsc75xx_write_reg_nopm(dev, WUCSR, val); check_warn_return(ret, "Error writing WUCSR"); /* clear wake-up status */ - ret = smsc75xx_read_reg(dev, PMT_CTL, &val); + ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); check_warn_return(ret, "Error reading PMT_CTL"); val &= ~PMT_CTL_WOL_EN; val |= PMT_CTL_WUPS; - ret = smsc75xx_write_reg(dev, PMT_CTL, val); + ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); check_warn_return(ret, "Error writing PMT_CTL"); } else { netdev_info(dev->net, "resuming from SUSPEND2"); - ret = smsc75xx_read_reg(dev, PMT_CTL, &val); + ret = smsc75xx_read_reg_nopm(dev, PMT_CTL, &val); check_warn_return(ret, "Error reading PMT_CTL"); val |= PMT_CTL_PHY_PWRUP; - ret = smsc75xx_write_reg(dev, PMT_CTL, val); + ret = smsc75xx_write_reg_nopm(dev, PMT_CTL, val); check_warn_return(ret, "Error writing PMT_CTL"); } - ret = smsc75xx_wait_ready(dev); + ret = smsc75xx_wait_ready(dev, 1); check_warn_return(ret, "device not ready in smsc75xx_resume"); return usbnet_resume(intf); diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 3286166415b..e07f70b5f39 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -26,6 +26,8 @@ #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/usb.h> +#include <linux/bitrev.h> +#include <linux/crc16.h> #include <linux/crc32.h> #include <linux/usb/usbnet.h> #include <linux/slab.h> @@ -46,7 +48,8 @@ #define SMSC95XX_INTERNAL_PHY_ID (1) #define SMSC95XX_TX_OVERHEAD (8) #define SMSC95XX_TX_OVERHEAD_CSUM (12) -#define SUPPORTED_WAKE (WAKE_MAGIC) +#define SUPPORTED_WAKE (WAKE_UCAST | WAKE_BCAST | \ + WAKE_MCAST | WAKE_ARP | WAKE_MAGIC) #define check_warn(ret, fmt, args...) \ ({ if (ret < 0) netdev_warn(dev->net, fmt, ##args); }) @@ -63,80 +66,98 @@ struct smsc95xx_priv { u32 hash_lo; u32 wolopts; spinlock_t mac_cr_lock; -}; - -struct usb_context { - struct usb_ctrlrequest req; - struct usbnet *dev; + int wuff_filter_count; }; static bool turbo_mode = true; module_param(turbo_mode, bool, 0644); MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction"); -static int __must_check smsc95xx_read_reg(struct usbnet *dev, u32 index, - u32 *data) +static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index, + u32 *data, int in_pm) { - u32 *buf = kmalloc(4, GFP_KERNEL); + u32 buf; int ret; + int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); BUG_ON(!dev); - if (!buf) - return -ENOMEM; - - ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), - USB_VENDOR_REQUEST_READ_REGISTER, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 00, index, buf, 4, USB_CTRL_GET_TIMEOUT); + if (!in_pm) + fn = usbnet_read_cmd; + else + fn = usbnet_read_cmd_nopm; + ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN + | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, index, &buf, 4); if (unlikely(ret < 0)) - netdev_warn(dev->net, "Failed to read register index 0x%08x\n", index); + netdev_warn(dev->net, + "Failed to read reg index 0x%08x: %d", index, ret); - le32_to_cpus(buf); - *data = *buf; - kfree(buf); + le32_to_cpus(&buf); + *data = buf; return ret; } -static int __must_check smsc95xx_write_reg(struct usbnet *dev, u32 index, - u32 data) +static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index, + u32 data, int in_pm) { - u32 *buf = kmalloc(4, GFP_KERNEL); + u32 buf; int ret; + int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); BUG_ON(!dev); - if (!buf) - return -ENOMEM; - - *buf = data; - cpu_to_le32s(buf); + if (!in_pm) + fn = usbnet_write_cmd; + else + fn = usbnet_write_cmd_nopm; - ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - USB_VENDOR_REQUEST_WRITE_REGISTER, - USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 00, index, buf, 4, USB_CTRL_SET_TIMEOUT); + buf = data; + cpu_to_le32s(&buf); + ret = fn(dev, USB_VENDOR_REQUEST_WRITE_REGISTER, USB_DIR_OUT + | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0, index, &buf, 4); if (unlikely(ret < 0)) - netdev_warn(dev->net, "Failed to write register index 0x%08x\n", index); - - kfree(buf); + netdev_warn(dev->net, + "Failed to write reg index 0x%08x: %d", index, ret); return ret; } +static int __must_check smsc95xx_read_reg_nopm(struct usbnet *dev, u32 index, + u32 *data) +{ + return __smsc95xx_read_reg(dev, index, data, 1); +} + +static int __must_check smsc95xx_write_reg_nopm(struct usbnet *dev, u32 index, + u32 data) +{ + return __smsc95xx_write_reg(dev, index, data, 1); +} + +static int __must_check smsc95xx_read_reg(struct usbnet *dev, u32 index, + u32 *data) +{ + return __smsc95xx_read_reg(dev, index, data, 0); +} + +static int __must_check smsc95xx_write_reg(struct usbnet *dev, u32 index, + u32 data) +{ + return __smsc95xx_write_reg(dev, index, data, 0); +} static int smsc95xx_set_feature(struct usbnet *dev, u32 feature) { if (WARN_ON_ONCE(!dev)) return -EINVAL; - cpu_to_le32s(&feature); - - return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - USB_REQ_SET_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); + return usbnet_write_cmd_nopm(dev, USB_REQ_SET_FEATURE, + USB_RECIP_DEVICE, feature, 0, + NULL, 0); } static int smsc95xx_clear_feature(struct usbnet *dev, u32 feature) @@ -144,11 +165,9 @@ static int smsc95xx_clear_feature(struct usbnet *dev, u32 feature) if (WARN_ON_ONCE(!dev)) return -EINVAL; - cpu_to_le32s(&feature); - - return usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), - USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0, - USB_CTRL_SET_TIMEOUT); + return usbnet_write_cmd_nopm(dev, USB_REQ_CLEAR_FEATURE, + USB_RECIP_DEVICE, feature, + 0, NULL, 0); } /* Loop until the read is completed with timeout @@ -350,60 +369,20 @@ static int smsc95xx_write_eeprom(struct usbnet *dev, u32 offset, u32 length, return 0; } -static void smsc95xx_async_cmd_callback(struct urb *urb) -{ - struct usb_context *usb_context = urb->context; - struct usbnet *dev = usb_context->dev; - int status = urb->status; - - check_warn(status, "async callback failed with %d\n", status); - - kfree(usb_context); - usb_free_urb(urb); -} - static int __must_check smsc95xx_write_reg_async(struct usbnet *dev, u16 index, u32 *data) { - struct usb_context *usb_context; - int status; - struct urb *urb; const u16 size = 4; + int ret; - urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_warn(dev->net, "Error allocating URB\n"); - return -ENOMEM; - } - - usb_context = kmalloc(sizeof(struct usb_context), GFP_ATOMIC); - if (usb_context == NULL) { - netdev_warn(dev->net, "Error allocating control msg\n"); - usb_free_urb(urb); - return -ENOMEM; - } - - usb_context->req.bRequestType = - USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE; - usb_context->req.bRequest = USB_VENDOR_REQUEST_WRITE_REGISTER; - usb_context->req.wValue = 00; - usb_context->req.wIndex = cpu_to_le16(index); - usb_context->req.wLength = cpu_to_le16(size); - - usb_fill_control_urb(urb, dev->udev, usb_sndctrlpipe(dev->udev, 0), - (void *)&usb_context->req, data, size, - smsc95xx_async_cmd_callback, - (void *)usb_context); - - status = usb_submit_urb(urb, GFP_ATOMIC); - if (status < 0) { - netdev_warn(dev->net, "Error submitting control msg, sts=%d\n", - status); - kfree(usb_context); - usb_free_urb(urb); - } - - return status; + ret = usbnet_write_cmd_async(dev, USB_VENDOR_REQUEST_WRITE_REGISTER, + USB_DIR_OUT | USB_TYPE_VENDOR | + USB_RECIP_DEVICE, + 0, index, data, size); + if (ret < 0) + netdev_warn(dev->net, "Error write async cmd, sts=%d\n", + ret); + return ret; } /* returns hash bit number for given MAC address @@ -765,7 +744,7 @@ static int smsc95xx_start_tx_path(struct usbnet *dev) } /* Starts the Receive path */ -static int smsc95xx_start_rx_path(struct usbnet *dev) +static int smsc95xx_start_rx_path(struct usbnet *dev, int in_pm) { struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); unsigned long flags; @@ -775,7 +754,7 @@ static int smsc95xx_start_rx_path(struct usbnet *dev) pdata->mac_cr |= MAC_CR_RXEN_; spin_unlock_irqrestore(&pdata->mac_cr_lock, flags); - ret = smsc95xx_write_reg(dev, MAC_CR, pdata->mac_cr); + ret = __smsc95xx_write_reg(dev, MAC_CR, pdata->mac_cr, in_pm); check_warn_return(ret, "Failed to write MAC_CR: %d\n", ret); return 0; @@ -994,7 +973,7 @@ static int smsc95xx_reset(struct usbnet *dev) ret = smsc95xx_start_tx_path(dev); check_warn_return(ret, "Failed to start TX path"); - ret = smsc95xx_start_rx_path(dev); + ret = smsc95xx_start_rx_path(dev, 0); check_warn_return(ret, "Failed to start RX path"); netif_dbg(dev, ifup, dev->net, "smsc95xx_reset, return 0\n"); @@ -1017,6 +996,7 @@ static const struct net_device_ops smsc95xx_netdev_ops = { static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) { struct smsc95xx_priv *pdata = NULL; + u32 val; int ret; printk(KERN_INFO SMSC_CHIPNAME " v" SMSC_DRIVER_VERSION "\n"); @@ -1047,6 +1027,15 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) /* Init all registers */ ret = smsc95xx_reset(dev); + /* detect device revision as different features may be available */ + ret = smsc95xx_read_reg(dev, ID_REV, &val); + check_warn_return(ret, "Failed to read ID_REV: %d\n", ret); + val >>= 16; + if ((val == ID_REV_CHIP_ID_9500A_) || (val == ID_REV_CHIP_ID_9512_)) + pdata->wuff_filter_count = LAN9500A_WUFF_NUM; + else + pdata->wuff_filter_count = LAN9500_WUFF_NUM; + dev->net->netdev_ops = &smsc95xx_netdev_ops; dev->net->ethtool_ops = &smsc95xx_ethtool_ops; dev->net->flags |= IFF_MULTICAST; @@ -1066,6 +1055,11 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf) } } +static u16 smsc_crc(const u8 *buffer, size_t len, int filter) +{ + return bitrev16(crc16(0xFFFF, buffer, len)) << ((filter % 2) * 16); +} + static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); @@ -1081,50 +1075,153 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) netdev_info(dev->net, "entering SUSPEND2 mode"); /* disable energy detect (link up) & wake up events */ - ret = smsc95xx_read_reg(dev, WUCSR, &val); + ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val); check_warn_return(ret, "Error reading WUCSR"); val &= ~(WUCSR_MPEN_ | WUCSR_WAKE_EN_); - ret = smsc95xx_write_reg(dev, WUCSR, val); + ret = smsc95xx_write_reg_nopm(dev, WUCSR, val); check_warn_return(ret, "Error writing WUCSR"); - ret = smsc95xx_read_reg(dev, PM_CTRL, &val); + ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val); check_warn_return(ret, "Error reading PM_CTRL"); val &= ~(PM_CTL_ED_EN_ | PM_CTL_WOL_EN_); - ret = smsc95xx_write_reg(dev, PM_CTRL, val); + ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val); check_warn_return(ret, "Error writing PM_CTRL"); /* enter suspend2 mode */ - ret = smsc95xx_read_reg(dev, PM_CTRL, &val); + ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val); check_warn_return(ret, "Error reading PM_CTRL"); val &= ~(PM_CTL_SUS_MODE_ | PM_CTL_WUPS_ | PM_CTL_PHY_RST_); val |= PM_CTL_SUS_MODE_2; - ret = smsc95xx_write_reg(dev, PM_CTRL, val); + ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val); check_warn_return(ret, "Error writing PM_CTRL"); return 0; } + if (pdata->wolopts & (WAKE_BCAST | WAKE_MCAST | WAKE_ARP | WAKE_UCAST)) { + u32 *filter_mask = kzalloc(32, GFP_KERNEL); + u32 command[2]; + u32 offset[2]; + u32 crc[4]; + int i, filter = 0; + + memset(command, 0, sizeof(command)); + memset(offset, 0, sizeof(offset)); + memset(crc, 0, sizeof(crc)); + + if (pdata->wolopts & WAKE_BCAST) { + const u8 bcast[] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + netdev_info(dev->net, "enabling broadcast detection"); + filter_mask[filter * 4] = 0x003F; + filter_mask[filter * 4 + 1] = 0x00; + filter_mask[filter * 4 + 2] = 0x00; + filter_mask[filter * 4 + 3] = 0x00; + command[filter/4] |= 0x05UL << ((filter % 4) * 8); + offset[filter/4] |= 0x00 << ((filter % 4) * 8); + crc[filter/2] |= smsc_crc(bcast, 6, filter); + filter++; + } + + if (pdata->wolopts & WAKE_MCAST) { + const u8 mcast[] = {0x01, 0x00, 0x5E}; + netdev_info(dev->net, "enabling multicast detection"); + filter_mask[filter * 4] = 0x0007; + filter_mask[filter * 4 + 1] = 0x00; + filter_mask[filter * 4 + 2] = 0x00; + filter_mask[filter * 4 + 3] = 0x00; + command[filter/4] |= 0x09UL << ((filter % 4) * 8); + offset[filter/4] |= 0x00 << ((filter % 4) * 8); + crc[filter/2] |= smsc_crc(mcast, 3, filter); + filter++; + } + + if (pdata->wolopts & WAKE_ARP) { + const u8 arp[] = {0x08, 0x06}; + netdev_info(dev->net, "enabling ARP detection"); + filter_mask[filter * 4] = 0x0003; + filter_mask[filter * 4 + 1] = 0x00; + filter_mask[filter * 4 + 2] = 0x00; + filter_mask[filter * 4 + 3] = 0x00; + command[filter/4] |= 0x05UL << ((filter % 4) * 8); + offset[filter/4] |= 0x0C << ((filter % 4) * 8); + crc[filter/2] |= smsc_crc(arp, 2, filter); + filter++; + } + + if (pdata->wolopts & WAKE_UCAST) { + netdev_info(dev->net, "enabling unicast detection"); + filter_mask[filter * 4] = 0x003F; + filter_mask[filter * 4 + 1] = 0x00; + filter_mask[filter * 4 + 2] = 0x00; + filter_mask[filter * 4 + 3] = 0x00; + command[filter/4] |= 0x01UL << ((filter % 4) * 8); + offset[filter/4] |= 0x00 << ((filter % 4) * 8); + crc[filter/2] |= smsc_crc(dev->net->dev_addr, ETH_ALEN, filter); + filter++; + } + + for (i = 0; i < (pdata->wuff_filter_count * 4); i++) { + ret = smsc95xx_write_reg_nopm(dev, WUFF, filter_mask[i]); + if (ret < 0) + kfree(filter_mask); + check_warn_return(ret, "Error writing WUFF"); + } + kfree(filter_mask); + + for (i = 0; i < (pdata->wuff_filter_count / 4); i++) { + ret = smsc95xx_write_reg_nopm(dev, WUFF, command[i]); + check_warn_return(ret, "Error writing WUFF"); + } + + for (i = 0; i < (pdata->wuff_filter_count / 4); i++) { + ret = smsc95xx_write_reg_nopm(dev, WUFF, offset[i]); + check_warn_return(ret, "Error writing WUFF"); + } + + for (i = 0; i < (pdata->wuff_filter_count / 2); i++) { + ret = smsc95xx_write_reg_nopm(dev, WUFF, crc[i]); + check_warn_return(ret, "Error writing WUFF"); + } + + /* clear any pending pattern match packet status */ + ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val); + check_warn_return(ret, "Error reading WUCSR"); + + val |= WUCSR_WUFR_; + + ret = smsc95xx_write_reg_nopm(dev, WUCSR, val); + check_warn_return(ret, "Error writing WUCSR"); + } + if (pdata->wolopts & WAKE_MAGIC) { /* clear any pending magic packet status */ - ret = smsc95xx_read_reg(dev, WUCSR, &val); + ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val); check_warn_return(ret, "Error reading WUCSR"); val |= WUCSR_MPR_; - ret = smsc95xx_write_reg(dev, WUCSR, val); + ret = smsc95xx_write_reg_nopm(dev, WUCSR, val); check_warn_return(ret, "Error writing WUCSR"); } - /* enable/disable magic packup wake */ - ret = smsc95xx_read_reg(dev, WUCSR, &val); + /* enable/disable wakeup sources */ + ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val); check_warn_return(ret, "Error reading WUCSR"); + if (pdata->wolopts & (WAKE_BCAST | WAKE_MCAST | WAKE_ARP | WAKE_UCAST)) { + netdev_info(dev->net, "enabling pattern match wakeup"); + val |= WUCSR_WAKE_EN_; + } else { + netdev_info(dev->net, "disabling pattern match wakeup"); + val &= ~WUCSR_WAKE_EN_; + } + if (pdata->wolopts & WAKE_MAGIC) { netdev_info(dev->net, "enabling magic packet wakeup"); val |= WUCSR_MPEN_; @@ -1133,41 +1230,41 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) val &= ~WUCSR_MPEN_; } - ret = smsc95xx_write_reg(dev, WUCSR, val); + ret = smsc95xx_write_reg_nopm(dev, WUCSR, val); check_warn_return(ret, "Error writing WUCSR"); /* enable wol wakeup source */ - ret = smsc95xx_read_reg(dev, PM_CTRL, &val); + ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val); check_warn_return(ret, "Error reading PM_CTRL"); val |= PM_CTL_WOL_EN_; - ret = smsc95xx_write_reg(dev, PM_CTRL, val); + ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val); check_warn_return(ret, "Error writing PM_CTRL"); - /* enable receiver */ - smsc95xx_start_rx_path(dev); + /* enable receiver to enable frame reception */ + smsc95xx_start_rx_path(dev, 1); /* some wol options are enabled, so enter SUSPEND0 */ netdev_info(dev->net, "entering SUSPEND0 mode"); - ret = smsc95xx_read_reg(dev, PM_CTRL, &val); + ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val); check_warn_return(ret, "Error reading PM_CTRL"); val &= (~(PM_CTL_SUS_MODE_ | PM_CTL_WUPS_ | PM_CTL_PHY_RST_)); val |= PM_CTL_SUS_MODE_0; - ret = smsc95xx_write_reg(dev, PM_CTRL, val); + ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val); check_warn_return(ret, "Error writing PM_CTRL"); /* clear wol status */ val &= ~PM_CTL_WUPS_; val |= PM_CTL_WUPS_WOL_; - ret = smsc95xx_write_reg(dev, PM_CTRL, val); + ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val); check_warn_return(ret, "Error writing PM_CTRL"); /* read back PM_CTRL */ - ret = smsc95xx_read_reg(dev, PM_CTRL, &val); + ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val); check_warn_return(ret, "Error reading PM_CTRL"); smsc95xx_set_feature(dev, USB_DEVICE_REMOTE_WAKEUP); @@ -1184,26 +1281,26 @@ static int smsc95xx_resume(struct usb_interface *intf) BUG_ON(!dev); - if (pdata->wolopts & WAKE_MAGIC) { + if (pdata->wolopts) { smsc95xx_clear_feature(dev, USB_DEVICE_REMOTE_WAKEUP); - /* Disable magic packup wake */ - ret = smsc95xx_read_reg(dev, WUCSR, &val); + /* clear wake-up sources */ + ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val); check_warn_return(ret, "Error reading WUCSR"); - val &= ~WUCSR_MPEN_; + val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_); - ret = smsc95xx_write_reg(dev, WUCSR, val); + ret = smsc95xx_write_reg_nopm(dev, WUCSR, val); check_warn_return(ret, "Error writing WUCSR"); /* clear wake-up status */ - ret = smsc95xx_read_reg(dev, PM_CTRL, &val); + ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val); check_warn_return(ret, "Error reading PM_CTRL"); val &= ~PM_CTL_WOL_EN_; val |= PM_CTL_WUPS_; - ret = smsc95xx_write_reg(dev, PM_CTRL, val); + ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val); check_warn_return(ret, "Error writing PM_CTRL"); } diff --git a/drivers/net/usb/smsc95xx.h b/drivers/net/usb/smsc95xx.h index 2ff9815aa27..1f862693dd7 100644 --- a/drivers/net/usb/smsc95xx.h +++ b/drivers/net/usb/smsc95xx.h @@ -53,6 +53,8 @@ #define ID_REV_CHIP_ID_MASK_ (0xFFFF0000) #define ID_REV_CHIP_REV_MASK_ (0x0000FFFF) #define ID_REV_CHIP_ID_9500_ (0x9500) +#define ID_REV_CHIP_ID_9500A_ (0x9E00) +#define ID_REV_CHIP_ID_9512_ (0xEC00) #define INT_STS (0x08) #define INT_STS_TX_STOP_ (0x00020000) @@ -203,8 +205,11 @@ #define VLAN2 (0x124) #define WUFF (0x128) +#define LAN9500_WUFF_NUM (4) +#define LAN9500A_WUFF_NUM (8) #define WUCSR (0x12C) +#define WUCSR_WFF_PTR_RST_ (0x80000000) #define WUCSR_GUE_ (0x00000200) #define WUCSR_WUFR_ (0x00000040) #define WUCSR_MPR_ (0x00000020) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index edb81ed0695..c04110ba677 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1616,6 +1616,202 @@ void usbnet_device_suggests_idle(struct usbnet *dev) EXPORT_SYMBOL(usbnet_device_suggests_idle); /*-------------------------------------------------------------------------*/ +static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, void *data, u16 size) +{ + void *buf = NULL; + int err = -ENOMEM; + + netdev_dbg(dev->net, "usbnet_read_cmd cmd=0x%02x reqtype=%02x" + " value=0x%04x index=0x%04x size=%d\n", + cmd, reqtype, value, index, size); + + if (data) { + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + goto out; + } + + err = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), + cmd, reqtype, value, index, buf, size, + USB_CTRL_GET_TIMEOUT); + if (err > 0 && err <= size) + memcpy(data, buf, err); + kfree(buf); +out: + return err; +} + +static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, const void *data, + u16 size) +{ + void *buf = NULL; + int err = -ENOMEM; + + netdev_dbg(dev->net, "usbnet_write_cmd cmd=0x%02x reqtype=%02x" + " value=0x%04x index=0x%04x size=%d\n", + cmd, reqtype, value, index, size); + + if (data) { + buf = kmemdup(data, size, GFP_KERNEL); + if (!buf) + goto out; + } + + err = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), + cmd, reqtype, value, index, buf, size, + USB_CTRL_SET_TIMEOUT); + kfree(buf); + +out: + return err; +} + +/* + * The function can't be called inside suspend/resume callback, + * otherwise deadlock will be caused. + */ +int usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, void *data, u16 size) +{ + int ret; + + if (usb_autopm_get_interface(dev->intf) < 0) + return -ENODEV; + ret = __usbnet_read_cmd(dev, cmd, reqtype, value, index, + data, size); + usb_autopm_put_interface(dev->intf); + return ret; +} +EXPORT_SYMBOL_GPL(usbnet_read_cmd); + +/* + * The function can't be called inside suspend/resume callback, + * otherwise deadlock will be caused. + */ +int usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, const void *data, u16 size) +{ + int ret; + + if (usb_autopm_get_interface(dev->intf) < 0) + return -ENODEV; + ret = __usbnet_write_cmd(dev, cmd, reqtype, value, index, + data, size); + usb_autopm_put_interface(dev->intf); + return ret; +} +EXPORT_SYMBOL_GPL(usbnet_write_cmd); + +/* + * The function can be called inside suspend/resume callback safely + * and should only be called by suspend/resume callback generally. + */ +int usbnet_read_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, void *data, u16 size) +{ + return __usbnet_read_cmd(dev, cmd, reqtype, value, index, + data, size); +} +EXPORT_SYMBOL_GPL(usbnet_read_cmd_nopm); + +/* + * The function can be called inside suspend/resume callback safely + * and should only be called by suspend/resume callback generally. + */ +int usbnet_write_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, const void *data, + u16 size) +{ + return __usbnet_write_cmd(dev, cmd, reqtype, value, index, + data, size); +} +EXPORT_SYMBOL_GPL(usbnet_write_cmd_nopm); + +static void usbnet_async_cmd_cb(struct urb *urb) +{ + struct usb_ctrlrequest *req = (struct usb_ctrlrequest *)urb->context; + int status = urb->status; + + if (status < 0) + dev_dbg(&urb->dev->dev, "%s failed with %d", + __func__, status); + + kfree(req); + usb_free_urb(urb); +} + +/* + * The caller must make sure that device can't be put into suspend + * state until the control URB completes. + */ +int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, const void *data, u16 size) +{ + struct usb_ctrlrequest *req = NULL; + struct urb *urb; + int err = -ENOMEM; + void *buf = NULL; + + netdev_dbg(dev->net, "usbnet_write_cmd cmd=0x%02x reqtype=%02x" + " value=0x%04x index=0x%04x size=%d\n", + cmd, reqtype, value, index, size); + + urb = usb_alloc_urb(0, GFP_ATOMIC); + if (!urb) { + netdev_err(dev->net, "Error allocating URB in" + " %s!\n", __func__); + goto fail; + } + + if (data) { + buf = kmemdup(data, size, GFP_ATOMIC); + if (!buf) { + netdev_err(dev->net, "Error allocating buffer" + " in %s!\n", __func__); + goto fail_free; + } + } + + req = kmalloc(sizeof(struct usb_ctrlrequest), GFP_ATOMIC); + if (!req) { + netdev_err(dev->net, "Failed to allocate memory for %s\n", + __func__); + goto fail_free_buf; + } + + req->bRequestType = reqtype; + req->bRequest = cmd; + req->wValue = cpu_to_le16(value); + req->wIndex = cpu_to_le16(index); + req->wLength = cpu_to_le16(size); + + usb_fill_control_urb(urb, dev->udev, + usb_sndctrlpipe(dev->udev, 0), + (void *)req, buf, size, + usbnet_async_cmd_cb, req); + urb->transfer_flags |= URB_FREE_BUFFER; + + err = usb_submit_urb(urb, GFP_ATOMIC); + if (err < 0) { + netdev_err(dev->net, "Error submitting the control" + " message: status=%d\n", err); + goto fail_free; + } + return 0; + +fail_free_buf: + kfree(buf); +fail_free: + kfree(req); + usb_free_urb(urb); +fail: + return err; + +} +EXPORT_SYMBOL_GPL(usbnet_write_cmd_async); +/*-------------------------------------------------------------------------*/ static int __init usbnet_init(void) { diff --git a/drivers/net/veth.c b/drivers/net/veth.c index e522ff70444..24f6b27e169 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -264,6 +264,7 @@ static void veth_setup(struct net_device *dev) ether_setup(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; dev->netdev_ops = &veth_netdev_ops; dev->ethtool_ops = &veth_ethtool_ops; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index cbf8b062535..26c502e4b87 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -212,8 +212,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, * the case of a broken device. */ if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { - if (net_ratelimit()) - pr_debug("%s: too much data\n", skb->dev->name); + net_dbg_ratelimited("%s: too much data\n", skb->dev->name); dev_kfree_skb(skb); return NULL; } @@ -333,9 +332,8 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len) skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; break; default: - if (net_ratelimit()) - printk(KERN_WARNING "%s: bad gso type %u.\n", - dev->name, hdr->hdr.gso_type); + net_warn_ratelimited("%s: bad gso type %u.\n", + dev->name, hdr->hdr.gso_type); goto frame_err; } @@ -344,9 +342,7 @@ static void receive_buf(struct net_device *dev, void *buf, unsigned int len) skb_shinfo(skb)->gso_size = hdr->hdr.gso_size; if (skb_shinfo(skb)->gso_size == 0) { - if (net_ratelimit()) - printk(KERN_WARNING "%s: zero gso size.\n", - dev->name); + net_warn_ratelimited("%s: zero gso size.\n", dev->name); goto frame_err; } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 0ae1bcc6da7..7e9622fea0a 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1922,7 +1922,7 @@ vmxnet3_free_irqs(struct vmxnet3_adapter *adapter) free_irq(adapter->pdev->irq, adapter->netdev); break; default: - BUG_ON(true); + BUG(); } } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 7b4adde93c0..8aca888734d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1107,6 +1107,9 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, if (data[IFLA_VXLAN_TOS]) vxlan->tos = nla_get_u8(data[IFLA_VXLAN_TOS]); + if (data[IFLA_VXLAN_TTL]) + vxlan->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]); + if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING])) vxlan->learn = true; diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index eac709bed7a..df70248e2fd 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -52,9 +52,9 @@ endif quiet_cmd_build_wanxlfw = BLD FW $@ cmd_build_wanxlfw = \ - $(CPP) -Wp,-MD,$(depfile) -I$(srctree)/include $< | $(AS68K) -m68360 -o $(obj)/wanxlfw.o; \ + $(CPP) -D__ASSEMBLY__ -Wp,-MD,$(depfile) -I$(srctree)/include/uapi $< | $(AS68K) -m68360 -o $(obj)/wanxlfw.o; \ $(LD68K) --oformat binary -Ttext 0x1000 $(obj)/wanxlfw.o -o $(obj)/wanxlfw.bin; \ - hexdump -ve '"\n" 16/1 "0x%02X,"' $(obj)/wanxlfw.bin | sed 's/0x ,//g;1s/^/static u8 firmware[]={/;$$s/,$$/\n};\n/' >$(obj)/wanxlfw.inc; \ + hexdump -ve '"\n" 16/1 "0x%02X,"' $(obj)/wanxlfw.bin | sed 's/0x ,//g;1s/^/static const u8 firmware[]={/;$$s/,$$/\n};\n/' >$(obj)/wanxlfw.inc; \ rm -f $(obj)/wanxlfw.bin $(obj)/wanxlfw.o $(obj)/wanxlfw.inc: $(src)/wanxlfw.S diff --git a/drivers/net/wan/wanxlfw.S b/drivers/net/wan/wanxlfw.S index 73aae2bf2f1..21565d59ec7 100644 --- a/drivers/net/wan/wanxlfw.S +++ b/drivers/net/wan/wanxlfw.S @@ -35,6 +35,7 @@ */ #include <linux/hdlc.h> +#include <linux/hdlc/ioctl.h> #include "wanxl.h" /* memory addresses and offsets */ diff --git a/drivers/pps/Kconfig b/drivers/pps/Kconfig index 258ca596e1b..982d16b5a84 100644 --- a/drivers/pps/Kconfig +++ b/drivers/pps/Kconfig @@ -6,7 +6,6 @@ menu "PPS support" config PPS tristate "PPS support" - depends on EXPERIMENTAL ---help--- PPS (Pulse Per Second) is a special pulse provided by some GPS antennae. Userland can use it to get a high-precision time diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index ffdf712f9a6..70c5836ebfc 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -4,13 +4,9 @@ menu "PTP clock support" -comment "Enable Device Drivers -> PPS to see the PTP clock options." - depends on PPS=n - config PTP_1588_CLOCK tristate "PTP clock support" - depends on EXPERIMENTAL - depends on PPS + select PPS help The IEEE 1588 standard defines a method to precisely synchronize distributed clocks over Ethernet networks. The @@ -29,8 +25,9 @@ config PTP_1588_CLOCK config PTP_1588_CLOCK_GIANFAR tristate "Freescale eTSEC as PTP clock" - depends on PTP_1588_CLOCK depends on GIANFAR + select PTP_1588_CLOCK + default y help This driver adds support for using the eTSEC as a PTP clock. This clock is only useful if your PTP programs are @@ -42,8 +39,9 @@ config PTP_1588_CLOCK_GIANFAR config PTP_1588_CLOCK_IXP46X tristate "Intel IXP46x as PTP clock" - depends on PTP_1588_CLOCK depends on IXP4XX_ETH + select PTP_1588_CLOCK + default y help This driver adds support for using the IXP46X as a PTP clock. This clock is only useful if your PTP programs are @@ -54,13 +52,13 @@ config PTP_1588_CLOCK_IXP46X will be called ptp_ixp46x. comment "Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks." - depends on PTP_1588_CLOCK && (PHYLIB=n || NETWORK_PHY_TIMESTAMPING=n) + depends on PHYLIB=n || NETWORK_PHY_TIMESTAMPING=n config DP83640_PHY tristate "Driver for the National Semiconductor DP83640 PHYTER" - depends on PTP_1588_CLOCK depends on NETWORK_PHY_TIMESTAMPING depends on PHYLIB + select PTP_1588_CLOCK ---help--- Supports the DP83640 PHYTER with IEEE 1588 features. @@ -74,8 +72,9 @@ config DP83640_PHY config PTP_1588_CLOCK_PCH tristate "Intel PCH EG20T as PTP clock" - depends on PTP_1588_CLOCK depends on PCH_GBE + select PTP_1588_CLOCK + default y help This driver adds support for using the PCH EG20T as a PTP clock. The hardware supports time stamping of PTP packets diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index e7f301da290..4f8ae8057a7 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -33,9 +33,13 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) { struct ptp_clock_caps caps; struct ptp_clock_request req; + struct ptp_sys_offset sysoff; struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); struct ptp_clock_info *ops = ptp->info; + struct ptp_clock_time *pct; + struct timespec ts; int enable, err = 0; + unsigned int i; switch (cmd) { @@ -88,6 +92,34 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = ops->enable(ops, &req, enable); break; + case PTP_SYS_OFFSET: + if (copy_from_user(&sysoff, (void __user *)arg, + sizeof(sysoff))) { + err = -EFAULT; + break; + } + if (sysoff.n_samples > PTP_MAX_SAMPLES) { + err = -EINVAL; + break; + } + pct = &sysoff.ts[0]; + for (i = 0; i < sysoff.n_samples; i++) { + getnstimeofday(&ts); + pct->sec = ts.tv_sec; + pct->nsec = ts.tv_nsec; + pct++; + ptp->info->gettime(ptp->info, &ts); + pct->sec = ts.tv_sec; + pct->nsec = ts.tv_nsec; + pct++; + } + getnstimeofday(&ts); + pct->sec = ts.tv_sec; + pct->nsec = ts.tv_nsec; + if (copy_to_user((void __user *)arg, &sysoff, sizeof(sysoff))) + err = -EFAULT; + break; + default: err = -ENOTTY; break; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 7f93f34b7f9..67898fa9c44 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -42,6 +42,21 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Experimental Zero Copy TX"); #define VHOST_MAX_PEND 128 #define VHOST_GOODCOPY_LEN 256 +/* + * For transmit, used buffer len is unused; we override it to track buffer + * status internally; used for zerocopy tx only. + */ +/* Lower device DMA failed */ +#define VHOST_DMA_FAILED_LEN 3 +/* Lower device DMA done */ +#define VHOST_DMA_DONE_LEN 2 +/* Lower device DMA in progress */ +#define VHOST_DMA_IN_PROGRESS 1 +/* Buffer unused */ +#define VHOST_DMA_CLEAR_LEN 0 + +#define VHOST_DMA_IS_DONE(len) ((len) >= VHOST_DMA_DONE_LEN) + enum { VHOST_NET_VQ_RX = 0, VHOST_NET_VQ_TX = 1, @@ -62,8 +77,33 @@ struct vhost_net { * We only do this when socket buffer fills up. * Protected by tx vq lock. */ enum vhost_net_poll_state tx_poll_state; + /* Number of TX recently submitted. + * Protected by tx vq lock. */ + unsigned tx_packets; + /* Number of times zerocopy TX recently failed. + * Protected by tx vq lock. */ + unsigned tx_zcopy_err; }; +static void vhost_net_tx_packet(struct vhost_net *net) +{ + ++net->tx_packets; + if (net->tx_packets < 1024) + return; + net->tx_packets = 0; + net->tx_zcopy_err = 0; +} + +static void vhost_net_tx_err(struct vhost_net *net) +{ + ++net->tx_zcopy_err; +} + +static bool vhost_net_tx_select_zcopy(struct vhost_net *net) +{ + return net->tx_packets / 64 >= net->tx_zcopy_err; +} + static bool vhost_sock_zcopy(struct socket *sock) { return unlikely(experimental_zcopytx) && @@ -126,6 +166,55 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock) net->tx_poll_state = VHOST_NET_POLL_STARTED; } +/* In case of DMA done not in order in lower device driver for some reason. + * upend_idx is used to track end of used idx, done_idx is used to track head + * of used idx. Once lower device DMA done contiguously, we will signal KVM + * guest used idx. + */ +static int vhost_zerocopy_signal_used(struct vhost_net *net, + struct vhost_virtqueue *vq) +{ + int i; + int j = 0; + + for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { + if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) + vhost_net_tx_err(net); + if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { + vq->heads[i].len = VHOST_DMA_CLEAR_LEN; + vhost_add_used_and_signal(vq->dev, vq, + vq->heads[i].id, 0); + ++j; + } else + break; + } + if (j) + vq->done_idx = i; + return j; +} + +static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) +{ + struct vhost_ubuf_ref *ubufs = ubuf->ctx; + struct vhost_virtqueue *vq = ubufs->vq; + int cnt = atomic_read(&ubufs->kref.refcount); + + /* + * Trigger polling thread if guest stopped submitting new buffers: + * in this case, the refcount after decrement will eventually reach 1 + * so here it is 2. + * We also trigger polling periodically after each 16 packets + * (the value 16 here is more or less arbitrary, it's tuned to trigger + * less than 10% of times). + */ + if (cnt <= 2 || !(cnt % 16)) + vhost_poll_queue(&vq->poll); + /* set len to mark this desc buffers done DMA */ + vq->heads[ubuf->desc].len = success ? + VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; + vhost_ubuf_put(ubufs); +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -172,7 +261,7 @@ static void handle_tx(struct vhost_net *net) for (;;) { /* Release DMAs done buffers first */ if (zcopy) - vhost_zerocopy_signal_used(vq); + vhost_zerocopy_signal_used(net, vq); head = vhost_get_vq_desc(&net->dev, vq, vq->iov, ARRAY_SIZE(vq->iov), @@ -227,7 +316,8 @@ static void handle_tx(struct vhost_net *net) /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy) { vq->heads[vq->upend_idx].id = head; - if (len < VHOST_GOODCOPY_LEN) { + if (!vhost_net_tx_select_zcopy(net) || + len < VHOST_GOODCOPY_LEN) { /* copy don't need to wait for DMA done */ vq->heads[vq->upend_idx].len = VHOST_DMA_DONE_LEN; @@ -237,7 +327,8 @@ static void handle_tx(struct vhost_net *net) } else { struct ubuf_info *ubuf = &vq->ubuf_info[head]; - vq->heads[vq->upend_idx].len = len; + vq->heads[vq->upend_idx].len = + VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; ubuf->ctx = vq->ubufs; ubuf->desc = vq->upend_idx; @@ -268,8 +359,9 @@ static void handle_tx(struct vhost_net *net) if (!zcopy) vhost_add_used_and_signal(&net->dev, vq, head, 0); else - vhost_zerocopy_signal_used(vq); + vhost_zerocopy_signal_used(net, vq); total_len += len; + vhost_net_tx_packet(net); if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); break; @@ -594,9 +686,18 @@ static int vhost_net_release(struct inode *inode, struct file *f) struct vhost_net *n = f->private_data; struct socket *tx_sock; struct socket *rx_sock; + int i; vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); + vhost_dev_stop(&n->dev); + for (i = 0; i < n->dev.nvqs; ++i) { + /* Wait for all lower device DMAs done. */ + if (n->dev.vqs[i].ubufs) + vhost_ubuf_put_and_wait(n->dev.vqs[i].ubufs); + + vhost_zerocopy_signal_used(n, &n->dev.vqs[i]); + } vhost_dev_cleanup(&n->dev, false); if (tx_sock) fput(tx_sock->file); @@ -729,7 +830,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) if (oldubufs) { vhost_ubuf_put_and_wait(oldubufs); mutex_lock(&vq->mutex); - vhost_zerocopy_signal_used(vq); + vhost_zerocopy_signal_used(n, vq); mutex_unlock(&vq->mutex); } diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index aa31692064d..23c138fdc19 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -895,6 +895,7 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) vhost_scsi_clear_endpoint(s, &backend); } + vhost_dev_stop(&s->dev); vhost_dev_cleanup(&s->dev, false); kfree(s); return 0; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 99ac2cb08b4..ef8f5988f85 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -26,10 +26,6 @@ #include <linux/kthread.h> #include <linux/cgroup.h> -#include <linux/net.h> -#include <linux/if_packet.h> -#include <linux/if_arp.h> - #include "vhost.h" enum { @@ -414,28 +410,16 @@ long vhost_dev_reset_owner(struct vhost_dev *dev) return 0; } -/* In case of DMA done not in order in lower device driver for some reason. - * upend_idx is used to track end of used idx, done_idx is used to track head - * of used idx. Once lower device DMA done contiguously, we will signal KVM - * guest used idx. - */ -int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq) +void vhost_dev_stop(struct vhost_dev *dev) { int i; - int j = 0; - - for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { - if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) { - vq->heads[i].len = VHOST_DMA_CLEAR_LEN; - vhost_add_used_and_signal(vq->dev, vq, - vq->heads[i].id, 0); - ++j; - } else - break; + + for (i = 0; i < dev->nvqs; ++i) { + if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { + vhost_poll_stop(&dev->vqs[i].poll); + vhost_poll_flush(&dev->vqs[i].poll); + } } - if (j) - vq->done_idx = i; - return j; } /* Caller should have device mutex if and only if locked is set */ @@ -444,17 +428,6 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) int i; for (i = 0; i < dev->nvqs; ++i) { - if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { - vhost_poll_stop(&dev->vqs[i].poll); - vhost_poll_flush(&dev->vqs[i].poll); - } - /* Wait for all lower device DMAs done. */ - if (dev->vqs[i].ubufs) - vhost_ubuf_put_and_wait(dev->vqs[i].ubufs); - - /* Signal guest as appropriate. */ - vhost_zerocopy_signal_used(&dev->vqs[i]); - if (dev->vqs[i].error_ctx) eventfd_ctx_put(dev->vqs[i].error_ctx); if (dev->vqs[i].error) @@ -1599,14 +1572,3 @@ void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); kfree(ubufs); } - -void vhost_zerocopy_callback(struct ubuf_info *ubuf) -{ - struct vhost_ubuf_ref *ubufs = ubuf->ctx; - struct vhost_virtqueue *vq = ubufs->vq; - - vhost_poll_queue(&vq->poll); - /* set len = 1 to mark this desc buffers done DMA */ - vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; - kref_put(&ubufs->kref, vhost_zerocopy_done_signal); -} diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 1125af3d27d..5e19e3d5db8 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -7,17 +7,11 @@ #include <linux/mutex.h> #include <linux/poll.h> #include <linux/file.h> -#include <linux/skbuff.h> #include <linux/uio.h> #include <linux/virtio_config.h> #include <linux/virtio_ring.h> #include <linux/atomic.h> -/* This is for zerocopy, used buffer len is set to 1 when lower device DMA - * done */ -#define VHOST_DMA_DONE_LEN 1 -#define VHOST_DMA_CLEAR_LEN 0 - struct vhost_device; struct vhost_work; @@ -70,6 +64,8 @@ struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy); void vhost_ubuf_put(struct vhost_ubuf_ref *); void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *); +struct ubuf_info; + /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -167,6 +163,7 @@ long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); long vhost_dev_check_owner(struct vhost_dev *); long vhost_dev_reset_owner(struct vhost_dev *); void vhost_dev_cleanup(struct vhost_dev *, bool locked); +void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg); int vhost_vq_access_ok(struct vhost_virtqueue *vq); int vhost_log_access_ok(struct vhost_dev *); @@ -191,8 +188,6 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); -void vhost_zerocopy_callback(struct ubuf_info *); -int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq); #define vq_err(vq, fmt, ...) do { \ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index b006ba0a9f4..243eea1e33d 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -51,6 +51,26 @@ extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1) #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count) +/* Reserved Ethernet Addresses per IEEE 802.1Q */ +static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = +{ 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; + +/** + * is_link_local_ether_addr - Determine if given Ethernet address is link-local + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per + * IEEE 802.1Q 8.6.3 Frame filtering. + */ +static inline bool is_link_local_ether_addr(const u8 *addr) +{ + __be16 *a = (__be16 *)addr; + static const __be16 *b = (const __be16 *)eth_reserved_addr_base; + static const __be16 m = cpu_to_be16(0xfff0); + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; +} + /** * is_zero_ether_addr - Determine if give Ethernet address is all zeros. * @addr: Pointer to a six-byte array containing the Ethernet address diff --git a/include/linux/filter.h b/include/linux/filter.h index 24d251f3bab..c45eabc135e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -45,6 +45,7 @@ extern void sk_unattached_filter_destroy(struct sk_filter *fp); extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); extern int sk_detach_filter(struct sock *sk); extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); +extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len); #ifdef CONFIG_BPF_JIT extern void bpf_jit_compile(struct sk_filter *fp); @@ -123,6 +124,8 @@ enum { BPF_S_ANC_CPU, BPF_S_ANC_ALU_XOR_X, BPF_S_ANC_SECCOMP_LD_W, + BPF_S_ANC_VLAN_TAG, + BPF_S_ANC_VLAN_TAG_PRESENT, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild index 1fb26448faa..e69de29bb2d 100644 --- a/include/linux/hdlc/Kbuild +++ b/include/linux/hdlc/Kbuild @@ -1 +0,0 @@ -header-y += ioctl.h diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 06177ba10a1..e83512f63df 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -282,6 +282,25 @@ static inline int ktime_equal(const ktime_t cmp1, const ktime_t cmp2) return cmp1.tv64 == cmp2.tv64; } +/** + * ktime_compare - Compares two ktime_t variables for less, greater or equal + * @cmp1: comparable1 + * @cmp2: comparable2 + * + * Returns ... + * cmp1 < cmp2: return <0 + * cmp1 == cmp2: return 0 + * cmp1 > cmp2: return >0 + */ +static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2) +{ + if (cmp1.tv64 < cmp2.tv64) + return -1; + if (cmp1.tv64 > cmp2.tv64) + return 1; + return 0; +} + static inline s64 ktime_to_us(const ktime_t kt) { struct timeval tv = ktime_to_timeval(kt); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f8eda0276f0..7bf867c9704 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -887,6 +887,10 @@ struct netdev_fcoe_hbainfo { * struct net_device *dev, int idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. + * + * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh) + * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, + * struct net_device *dev) */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -998,6 +1002,12 @@ struct net_device_ops { struct netlink_callback *cb, struct net_device *dev, int idx); + + int (*ndo_bridge_setlink)(struct net_device *dev, + struct nlmsghdr *nlh); + int (*ndo_bridge_getlink)(struct sk_buff *skb, + u32 pid, u32 seq, + struct net_device *dev); }; /* diff --git a/include/linux/platform_data/cpsw.h b/include/linux/platform_data/cpsw.h index c4e23d02949..b5c16c3df45 100644 --- a/include/linux/platform_data/cpsw.h +++ b/include/linux/platform_data/cpsw.h @@ -33,6 +33,9 @@ struct cpsw_platform_data { u32 slaves; /* number of slave cpgmac ports */ struct cpsw_slave_data *slave_data; + u32 cpts_active_slave; /* time stamping slave */ + u32 cpts_clock_mult; /* convert input clock ticks to nanoseconds */ + u32 cpts_clock_shift; /* convert input clock ticks to nanoseconds */ u32 ale_reg_ofs; /* address lookup engine reg offset */ u32 ale_entries; /* ale table size */ @@ -41,6 +44,7 @@ struct cpsw_platform_data { u32 host_port_num; /* The port number for the host port */ u32 hw_stats_reg_ofs; /* cpsw hardware statistics counters */ + u32 cpts_reg_ofs; /* cpts registers */ u32 bd_ram_ofs; /* embedded buffer descriptor RAM offset*/ u32 bd_ram_size; /*buffer descriptor ram size */ diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h index b081c7245ec..044a124bfbb 100644 --- a/include/linux/platform_data/macb.h +++ b/include/linux/platform_data/macb.h @@ -12,6 +12,7 @@ struct macb_platform_data { u32 phy_mask; int phy_irq_pin; /* PHY IRQ */ u8 is_rmii; /* using RMII interface? */ + u8 rev_eth_addr; /* reverse Ethernet address byte order */ }; #endif /* __MACB_PDATA_H__ */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 7002bbfd5d4..489dd7bb28e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -69,4 +69,7 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, int idx); + +extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u16 mode); #endif /* __LINUX_RTNETLINK_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6a2c34e6d96..f2af494330a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -235,11 +235,13 @@ enum { /* * The callback notifies userspace to release buffers when skb DMA is done in * lower device, the skb last reference should be 0 when calling this. + * The zerocopy_success argument is true if zero copy transmit occurred, + * false on data copy or out of memory error caused by data copy attempt. * The ctx field is used to track device context. * The desc field is used to track userspace buffer index. */ struct ubuf_info { - void (*callback)(struct ubuf_info *); + void (*callback)(struct ubuf_info *, bool zerocopy_success); void *ctx; unsigned long desc; }; @@ -566,6 +568,7 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb) } extern void kfree_skb(struct sk_buff *skb); +extern void skb_tx_error(struct sk_buff *skb); extern void consume_skb(struct sk_buff *skb); extern void __kfree_skb(struct sk_buff *skb); extern struct kmem_cache *skbuff_head_cache; @@ -643,7 +646,7 @@ extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, extern void __skb_get_rxhash(struct sk_buff *skb); static inline __u32 skb_get_rxhash(struct sk_buff *skb) { - if (!skb->rxhash) + if (!skb->l4_rxhash) __skb_get_rxhash(skb); return skb->rxhash; diff --git a/include/linux/timecompare.h b/include/linux/timecompare.h deleted file mode 100644 index 546e2234e4b..00000000000 --- a/include/linux/timecompare.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Utility code which helps transforming between two different time - * bases, called "source" and "target" time in this code. - * - * Source time has to be provided via the timecounter API while target - * time is accessed via a function callback whose prototype - * intentionally matches ktime_get() and ktime_get_real(). These - * interfaces where chosen like this so that the code serves its - * initial purpose without additional glue code. - * - * This purpose is synchronizing a hardware clock in a NIC with system - * time, in order to implement the Precision Time Protocol (PTP, - * IEEE1588) with more accurate hardware assisted time stamping. In - * that context only synchronization against system time (= - * ktime_get_real()) is currently needed. But this utility code might - * become useful in other situations, which is why it was written as - * general purpose utility code. - * - * The source timecounter is assumed to return monotonically - * increasing time (but this code does its best to compensate if that - * is not the case) whereas target time may jump. - * - * The target time corresponding to a source time is determined by - * reading target time, reading source time, reading target time - * again, then assuming that average target time corresponds to source - * time. In other words, the assumption is that reading the source - * time is slow and involves equal time for sending the request and - * receiving the reply, whereas reading target time is assumed to be - * fast. - * - * Copyright (C) 2009 Intel Corporation. - * Author: Patrick Ohly <patrick.ohly@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - */ -#ifndef _LINUX_TIMECOMPARE_H -#define _LINUX_TIMECOMPARE_H - -#include <linux/clocksource.h> -#include <linux/ktime.h> - -/** - * struct timecompare - stores state and configuration for the two clocks - * - * Initialize to zero, then set source/target/num_samples. - * - * Transformation between source time and target time is done with: - * target_time = source_time + offset + - * (source_time - last_update) * skew / - * TIMECOMPARE_SKEW_RESOLUTION - * - * @source: used to get source time stamps via timecounter_read() - * @target: function returning target time (for example, ktime_get - * for monotonic time, or ktime_get_real for wall clock) - * @num_samples: number of times that source time and target time are to - * be compared when determining their offset - * @offset: (target time - source time) at the time of the last update - * @skew: average (target time - source time) / delta source time * - * TIMECOMPARE_SKEW_RESOLUTION - * @last_update: last source time stamp when time offset was measured - */ -struct timecompare { - struct timecounter *source; - ktime_t (*target)(void); - int num_samples; - - s64 offset; - s64 skew; - u64 last_update; -}; - -/** - * timecompare_transform - transform source time stamp into target time base - * @sync: context for time sync - * @source_tstamp: the result of timecounter_read() or - * timecounter_cyc2time() - */ -extern ktime_t timecompare_transform(struct timecompare *sync, - u64 source_tstamp); - -/** - * timecompare_offset - measure current (target time - source time) offset - * @sync: context for time sync - * @offset: average offset during sample period returned here - * @source_tstamp: average source time during sample period returned here - * - * Returns number of samples used. Might be zero (= no result) in the - * unlikely case that target time was monotonically decreasing for all - * samples (= broken). - */ -extern int timecompare_offset(struct timecompare *sync, - s64 *offset, - u64 *source_tstamp); - -extern void __timecompare_update(struct timecompare *sync, - u64 source_tstamp); - -/** - * timecompare_update - update offset and skew by measuring current offset - * @sync: context for time sync - * @source_tstamp: the result of timecounter_read() or - * timecounter_cyc2time(), pass zero to force update - * - * Updates are only done at most once per second. - */ -static inline void timecompare_update(struct timecompare *sync, - u64 source_tstamp) -{ - if (!source_tstamp || - (s64)(source_tstamp - sync->last_update) >= NSEC_PER_SEC) - __timecompare_update(sync, source_tstamp); -} - -#endif /* _LINUX_TIMECOMPARE_H */ diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h new file mode 100644 index 00000000000..3b8f9d4fc3f --- /dev/null +++ b/include/linux/usb/cdc_ncm.h @@ -0,0 +1,134 @@ +/* + * Copyright (C) ST-Ericsson 2010-2012 + * Contact: Alexey Orishko <alexey.orishko@stericsson.com> + * Original author: Hans Petter Selasky <hans.petter.selasky@stericsson.com> + * + * USB Host Driver for Network Control Model (NCM) + * http://www.usb.org/developers/devclass_docs/NCM10.zip + * + * The NCM encoding, decoding and initialization logic + * derives from FreeBSD 8.x. if_cdce.c and if_cdcereg.h + * + * This software is available to you under a choice of one of two + * licenses. You may choose this file to be licensed under the terms + * of the GNU General Public License (GPL) Version 2 or the 2-clause + * BSD license listed below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define CDC_NCM_COMM_ALTSETTING_NCM 0 +#define CDC_NCM_COMM_ALTSETTING_MBIM 1 + +#define CDC_NCM_DATA_ALTSETTING_NCM 1 +#define CDC_NCM_DATA_ALTSETTING_MBIM 2 + +/* CDC NCM subclass 3.2.1 */ +#define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10 + +/* Maximum NTB length */ +#define CDC_NCM_NTB_MAX_SIZE_TX 32768 /* bytes */ +#define CDC_NCM_NTB_MAX_SIZE_RX 32768 /* bytes */ + +/* Minimum value for MaxDatagramSize, ch. 6.2.9 */ +#define CDC_NCM_MIN_DATAGRAM_SIZE 1514 /* bytes */ + +/* Minimum value for MaxDatagramSize, ch. 8.1.3 */ +#define CDC_MBIM_MIN_DATAGRAM_SIZE 2048 /* bytes */ + +#define CDC_NCM_MIN_TX_PKT 512 /* bytes */ + +/* Default value for MaxDatagramSize */ +#define CDC_NCM_MAX_DATAGRAM_SIZE 8192 /* bytes */ + +/* + * Maximum amount of datagrams in NCM Datagram Pointer Table, not counting + * the last NULL entry. + */ +#define CDC_NCM_DPT_DATAGRAMS_MAX 40 + +/* Restart the timer, if amount of datagrams is less than given value */ +#define CDC_NCM_RESTART_TIMER_DATAGRAM_CNT 3 +#define CDC_NCM_TIMER_PENDING_CNT 2 +#define CDC_NCM_TIMER_INTERVAL (400UL * NSEC_PER_USEC) + +/* The following macro defines the minimum header space */ +#define CDC_NCM_MIN_HDR_SIZE \ + (sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \ + (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16)) + +#define CDC_NCM_NDP_SIZE \ + (sizeof(struct usb_cdc_ncm_ndp16) + \ + (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16)) + +#define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ + (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) +#define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) + +struct cdc_ncm_ctx { + struct usb_cdc_ncm_ntb_parameters ncm_parm; + struct hrtimer tx_timer; + struct tasklet_struct bh; + + const struct usb_cdc_ncm_desc *func_desc; + const struct usb_cdc_mbim_desc *mbim_desc; + const struct usb_cdc_header_desc *header_desc; + const struct usb_cdc_union_desc *union_desc; + const struct usb_cdc_ether_desc *ether_desc; + + struct net_device *netdev; + struct usb_device *udev; + struct usb_host_endpoint *in_ep; + struct usb_host_endpoint *out_ep; + struct usb_host_endpoint *status_ep; + struct usb_interface *intf; + struct usb_interface *control; + struct usb_interface *data; + + struct sk_buff *tx_curr_skb; + struct sk_buff *tx_rem_skb; + __le32 tx_rem_sign; + + spinlock_t mtx; + atomic_t stop; + + u32 tx_timer_pending; + u32 tx_curr_frame_num; + u32 rx_speed; + u32 tx_speed; + u32 rx_max; + u32 tx_max; + u32 max_datagram_size; + u16 tx_max_datagrams; + u16 tx_remainder; + u16 tx_modulus; + u16 tx_ndp_modulus; + u16 tx_seq; + u16 rx_seq; + u16 connected; +}; + +extern int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); +extern void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); +extern struct sk_buff *cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign); +extern int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); +extern int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index ddbbb7de894..9bbeabf66c5 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -163,6 +163,16 @@ extern int usbnet_resume(struct usb_interface *); extern void usbnet_disconnect(struct usb_interface *); extern void usbnet_device_suggests_idle(struct usbnet *dev); +extern int usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, void *data, u16 size); +extern int usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, const void *data, u16 size); +extern int usbnet_read_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, void *data, u16 size); +extern int usbnet_write_cmd_nopm(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, const void *data, u16 size); +extern int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, + u16 value, u16 index, const void *data, u16 size); /* Drivers that reuse some of the standard USB CDC infrastructure * (notably, using multiple interfaces according to the CDC diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b5f8988e428..0a996a3517e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -53,7 +53,6 @@ struct unix_sock { struct path path; struct mutex readlock; struct sock *peer; - struct sock *other; struct list_head link; atomic_long_t inflight; spinlock_t lock; diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index b6a6eeb3905..2581638f4a3 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -24,12 +24,12 @@ struct cgroup_cls_state u32 classid; }; -extern void sock_update_classid(struct sock *sk); +extern void sock_update_classid(struct sock *sk, struct task_struct *task); #if IS_BUILTIN(CONFIG_NET_CLS_CGROUP) static inline u32 task_cls_classid(struct task_struct *p) { - int classid; + u32 classid; if (in_interrupt()) return 0; @@ -61,7 +61,7 @@ static inline u32 task_cls_classid(struct task_struct *p) } #endif #else /* !CGROUP_NET_CLS_CGROUP */ -static inline void sock_update_classid(struct sock *sk) +static inline void sock_update_classid(struct sock *sk, struct task_struct *task) { } diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 8a2a203eb15..fdc48a94a06 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -47,6 +47,8 @@ struct fib6_config { unsigned long fc_expires; struct nlattr *fc_mx; int fc_mx_len; + int fc_mp_len; + struct nlattr *fc_mp; struct nl_info fc_nlinfo; }; @@ -99,6 +101,14 @@ struct rt6_info { struct in6_addr rt6i_gateway; + /* Multipath routes: + * siblings is a list of rt6_info that have the the same metric/weight, + * destination, but not the same gateway. nsiblings is just a cache + * to speed up lookup. + */ + struct list_head rt6i_siblings; + unsigned int rt6i_nsiblings; + atomic_t rt6i_ref; /* These are in a separate cache line. */ @@ -107,7 +117,6 @@ struct rt6_info { struct rt6key rt6i_src; struct rt6key rt6i_prefsrc; u32 rt6i_metric; - u32 rt6i_peer_genid; struct inet6_dev *rt6i_idev; unsigned long _rt6i_peer; @@ -203,6 +212,15 @@ static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) dst_hold(new); } +static inline void ip6_rt_put(struct rt6_info *rt) +{ + /* dst_release() accepts a NULL parameter. + * We rely on dst being first structure in struct rt6_info + */ + BUILD_BUG_ON(offsetof(struct rt6_info, dst) != 0); + dst_release(&rt->dst); +} + struct fib6_walker_t { struct list_head lh; struct fib6_node *root, *node; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ee75ccdf518..68c69d54d39 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -22,7 +22,10 @@ #include <linux/ip.h> #include <linux/ipv6.h> /* for struct ipv6hdr */ #include <net/ipv6.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_IP_VS_IPV6) +#include <linux/netfilter_ipv6/ip6_tables.h> +#endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif #include <net/net_namespace.h> /* Netw namespace */ @@ -103,30 +106,117 @@ static inline struct net *seq_file_single_net(struct seq_file *seq) /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; - struct ip_vs_iphdr { - int len; - __u8 protocol; + __u32 len; /* IPv4 simply where L4 starts + IPv6 where L4 Transport Header starts */ + __u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */ + __u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/ + __s16 protocol; + __s32 flags; union nf_inet_addr saddr; union nf_inet_addr daddr; }; +/* Dependency to module: nf_defrag_ipv6 */ +#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE) +static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) +{ + return skb->nfct_reasm; +} +static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, + int len, void *buffer, + const struct ip_vs_iphdr *ipvsh) +{ + if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb))) + return skb_header_pointer(skb_nfct_reasm(skb), + ipvsh->thoff_reasm, len, buffer); + + return skb_header_pointer(skb, offset, len, buffer); +} +#else +static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb) +{ + return NULL; +} +static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset, + int len, void *buffer, + const struct ip_vs_iphdr *ipvsh) +{ + return skb_header_pointer(skb, offset, len, buffer); +} +#endif + static inline void -ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr) +ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr) +{ + const struct iphdr *iph = nh; + + iphdr->len = iph->ihl * 4; + iphdr->fragoffs = 0; + iphdr->protocol = iph->protocol; + iphdr->saddr.ip = iph->saddr; + iphdr->daddr.ip = iph->daddr; +} + +/* This function handles filling *ip_vs_iphdr, both for IPv4 and IPv6. + * IPv6 requires some extra work, as finding proper header position, + * depend on the IPv6 extension headers. + */ +static inline void +ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - const struct ipv6hdr *iph = nh; - iphdr->len = sizeof(struct ipv6hdr); - iphdr->protocol = iph->nexthdr; + const struct ipv6hdr *iph = + (struct ipv6hdr *)skb_network_header(skb); iphdr->saddr.in6 = iph->saddr; iphdr->daddr.in6 = iph->daddr; + /* ipv6_find_hdr() updates len, flags, thoff_reasm */ + iphdr->thoff_reasm = 0; + iphdr->len = 0; + iphdr->flags = 0; + iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1, + &iphdr->fragoffs, + &iphdr->flags); + /* get proto from re-assembled packet and it's offset */ + if (skb_nfct_reasm(skb)) + iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb), + &iphdr->thoff_reasm, + -1, NULL, NULL); + } else #endif { - const struct iphdr *iph = nh; - iphdr->len = iph->ihl * 4; - iphdr->protocol = iph->protocol; + const struct iphdr *iph = + (struct iphdr *)skb_network_header(skb); + iphdr->len = iph->ihl * 4; + iphdr->fragoffs = 0; + iphdr->protocol = iph->protocol; + iphdr->saddr.ip = iph->saddr; + iphdr->daddr.ip = iph->daddr; + } +} + +/* This function is a faster version of ip_vs_fill_iph_skb(). + * Where we only populate {s,d}addr (and avoid calling ipv6_find_hdr()). + * This is used by the some of the ip_vs_*_schedule() functions. + * (Mostly done to avoid ABI breakage of external schedulers) + */ +static inline void +ip_vs_fill_iph_addr_only(int af, const struct sk_buff *skb, + struct ip_vs_iphdr *iphdr) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + const struct ipv6hdr *iph = + (struct ipv6hdr *)skb_network_header(skb); + iphdr->saddr.in6 = iph->saddr; + iphdr->daddr.in6 = iph->daddr; + } else +#endif + { + const struct iphdr *iph = + (struct iphdr *)skb_network_header(skb); iphdr->saddr.ip = iph->saddr; iphdr->daddr.ip = iph->daddr; } @@ -165,7 +255,7 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, int len; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6]", + len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6c]", &addr->in6) + 1; else #endif @@ -398,27 +488,26 @@ struct ip_vs_protocol { int (*conn_schedule)(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - int *verdict, struct ip_vs_conn **cpp); + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph); struct ip_vs_conn * (*conn_in_get)(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse); struct ip_vs_conn * (*conn_out_get)(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse); - int (*snat_handler)(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp); + int (*snat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph); - int (*dnat_handler)(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp); + int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph); int (*csum_check)(int af, struct sk_buff *skb, struct ip_vs_protocol *pp); @@ -518,7 +607,7 @@ struct ip_vs_conn { NF_ACCEPT can be returned when destination is local. */ int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp); + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); /* Note: we can group the following members into a structure, in order to save more space, and the following members are @@ -769,13 +858,11 @@ struct ip_vs_app { struct ip_vs_conn * (*conn_in_get)(const struct sk_buff *skb, struct ip_vs_app *app, - const struct iphdr *iph, unsigned int proto_off, - int inverse); + const struct iphdr *iph, int inverse); struct ip_vs_conn * (*conn_out_get)(const struct sk_buff *skb, struct ip_vs_app *app, - const struct iphdr *iph, unsigned int proto_off, - int inverse); + const struct iphdr *iph, int inverse); int (*state_transition)(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, @@ -1074,14 +1161,12 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse); struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse); /* put back the conn without restarting its timer */ @@ -1254,9 +1339,10 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_proto_data *pd, int *ignored); + struct ip_vs_proto_data *pd, int *ignored, + struct ip_vs_iphdr *iph); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_proto_data *pd); + struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph); extern void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg); @@ -1315,33 +1401,38 @@ extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst, /* * Various IPVS packet transmitters (from ip_vs_xmit.c) */ -extern int ip_vs_null_xmit -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -extern int ip_vs_bypass_xmit -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -extern int ip_vs_nat_xmit -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -extern int ip_vs_tunnel_xmit -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -extern int ip_vs_dr_xmit -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -extern int ip_vs_icmp_xmit -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, - int offset, unsigned int hooknum); +extern int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); +extern int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, + struct ip_vs_iphdr *iph); +extern int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); +extern int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, + struct ip_vs_iphdr *iph); +extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); +extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, int offset, + unsigned int hooknum, struct ip_vs_iphdr *iph); extern void ip_vs_dst_reset(struct ip_vs_dest *dest); #ifdef CONFIG_IP_VS_IPV6 -extern int ip_vs_bypass_xmit_v6 -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -extern int ip_vs_nat_xmit_v6 -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -extern int ip_vs_tunnel_xmit_v6 -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -extern int ip_vs_dr_xmit_v6 -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); -extern int ip_vs_icmp_xmit_v6 -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, - int offset, unsigned int hooknum); +extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, + struct ip_vs_iphdr *iph); +extern int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, + struct ip_vs_iphdr *iph); +extern int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, + struct ip_vs_iphdr *iph); +extern int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph); +extern int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, int offset, + unsigned int hooknum, struct ip_vs_iphdr *iph); #endif #ifdef CONFIG_SYSCTL diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index 5e5eb1f9f14..3573a81815a 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -62,6 +62,9 @@ struct netns_sctp { /* Whether Cookie Preservative is enabled(1) or not(0) */ int cookie_preserve_enable; + /* The namespace default hmac alg */ + char *sctp_hmac_alg; + /* Valid.Cookie.Life - 60 seconds */ unsigned int valid_cookie_life; diff --git a/include/net/request_sock.h b/include/net/request_sock.h index b01d8dd9ee7..a51dbd17c2d 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -49,13 +49,16 @@ struct request_sock_ops { struct request_sock *req); }; +extern int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req); + /* struct request_sock - mini sock to represent a connection request */ struct request_sock { struct request_sock *dl_next; /* Must be first member! */ u16 mss; - u8 retrans; - u8 cookie_ts; /* syncookie: encode tcpopts in timestamp */ + u8 num_retrans; /* number of retransmits */ + u8 cookie_ts:1; /* syncookie: encode tcpopts in timestamp */ + u8 num_timeout:7; /* number of timeouts */ /* The following two fields can be easily recomputed I think -AK */ u32 window_clamp; /* window clamp at creation time */ u32 rcv_wnd; /* rcv_wnd offered first time */ @@ -231,7 +234,7 @@ static inline int reqsk_queue_removed(struct request_sock_queue *queue, { struct listen_sock *lopt = queue->listen_opt; - if (req->retrans == 0) + if (req->num_timeout == 0) --lopt->qlen_young; return --lopt->qlen; @@ -269,7 +272,8 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, struct listen_sock *lopt = queue->listen_opt; req->expires = jiffies + timeout; - req->retrans = 0; + req->num_retrans = 0; + req->num_timeout = 0; req->sk = NULL; req->dl_next = lopt->syn_table[hash]; diff --git a/include/net/route.h b/include/net/route.h index bc40b633a5c..2ea40c1b5e0 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -198,10 +198,13 @@ struct in_ifaddr; extern void fib_add_ifaddr(struct in_ifaddr *); extern void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *); -static inline void ip_rt_put(struct rtable * rt) +static inline void ip_rt_put(struct rtable *rt) { - if (rt) - dst_release(&rt->dst); + /* dst_release() accepts a NULL parameter. + * We rely on dst being first structure in struct rtable + */ + BUILD_BUG_ON(offsetof(struct rtable, dst) != 0); + dst_release(&rt->dst); } #define IPTOS_RT_MASK (IPTOS_TOS_MASK & ~3) diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 712b3bebeda..35247271e55 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -130,8 +130,6 @@ typedef union { __be16 err; sctp_state_t state; sctp_event_timeout_t to; - unsigned long zero; - void *ptr; struct sctp_chunk *chunk; struct sctp_association *asoc; struct sctp_transport *transport; @@ -154,23 +152,15 @@ typedef union { * which takes an __s32 and returns a sctp_arg_t containing the * __s32. So, after foo = SCTP_I32(arg), foo.i32 == arg. */ -static inline sctp_arg_t SCTP_NULL(void) -{ - sctp_arg_t retval; retval.ptr = NULL; return retval; -} -static inline sctp_arg_t SCTP_NOFORCE(void) -{ - sctp_arg_t retval = {.zero = 0UL}; retval.i32 = 0; return retval; -} -static inline sctp_arg_t SCTP_FORCE(void) -{ - sctp_arg_t retval = {.zero = 0UL}; retval.i32 = 1; return retval; -} #define SCTP_ARG_CONSTRUCTOR(name, type, elt) \ static inline sctp_arg_t \ SCTP_## name (type arg) \ -{ sctp_arg_t retval = {.zero = 0UL}; retval.elt = arg; return retval; } +{ sctp_arg_t retval;\ + memset(&retval, 0, sizeof(sctp_arg_t));\ + retval.elt = arg;\ + return retval;\ +} SCTP_ARG_CONSTRUCTOR(I32, __s32, i32) SCTP_ARG_CONSTRUCTOR(U32, __u32, u32) @@ -181,7 +171,6 @@ SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(PERR, __be16, err) /* protocol error */ SCTP_ARG_CONSTRUCTOR(STATE, sctp_state_t, state) SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) -SCTP_ARG_CONSTRUCTOR(PTR, void *, ptr) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) SCTP_ARG_CONSTRUCTOR(ASOC, struct sctp_association *, asoc) SCTP_ARG_CONSTRUCTOR(TRANSPORT, struct sctp_transport *, transport) @@ -192,6 +181,23 @@ SCTP_ARG_CONSTRUCTOR(PACKET, struct sctp_packet *, packet) SCTP_ARG_CONSTRUCTOR(SACKH, sctp_sackhdr_t *, sackh) SCTP_ARG_CONSTRUCTOR(DATAMSG, struct sctp_datamsg *, msg) +static inline sctp_arg_t SCTP_FORCE(void) +{ + return SCTP_I32(1); +} + +static inline sctp_arg_t SCTP_NOFORCE(void) +{ + return SCTP_I32(0); +} + +static inline sctp_arg_t SCTP_NULL(void) +{ + sctp_arg_t retval; + memset(&retval, 0, sizeof(sctp_arg_t)); + return retval; +} + typedef struct { sctp_arg_t obj; sctp_verb_t verb; diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index d053d2e9987..c29707d654c 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -312,14 +312,6 @@ enum { SCTP_MAX_GABS = 16 }; * functions simpler to write. */ -#if defined (CONFIG_SCTP_HMAC_MD5) -#define SCTP_COOKIE_HMAC_ALG "hmac(md5)" -#elif defined (CONFIG_SCTP_HMAC_SHA1) -#define SCTP_COOKIE_HMAC_ALG "hmac(sha1)" -#else -#define SCTP_COOKIE_HMAC_ALG NULL -#endif - /* These return values describe the success or failure of a number of * routines which form the lower interface to SCTP_outqueue. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 64158aa1bb5..2b2f61dd403 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -177,6 +177,7 @@ struct sctp_sock { /* Access to HMAC transform. */ struct crypto_hash *hmac; + char *sctp_hmac_alg; /* What is our base endpointer? */ struct sctp_endpoint *ep; diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index 2e5ee0d8458..ff1b8ba73ab 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -72,7 +72,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *, struct sctp_ulpevent *ev); void sctp_ulpq_renege(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); /* Perform partial delivery. */ -void sctp_ulpq_partial_delivery(struct sctp_ulpq *, struct sctp_chunk *, gfp_t); +void sctp_ulpq_partial_delivery(struct sctp_ulpq *, gfp_t); /* Abort the partial delivery. */ void sctp_ulpq_abort_pd(struct sctp_ulpq *, gfp_t); diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index b1bea03274d..2d32d073a6f 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -43,6 +43,7 @@ /* Socket filtering */ #define SO_ATTACH_FILTER 26 #define SO_DETACH_FILTER 27 +#define SO_GET_FILTER SO_ATTACH_FILTER #define SO_PEERNAME 28 #define SO_TIMESTAMP 29 diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 3d7922433ab..9cfde694109 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -127,7 +127,9 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_RXHASH 32 #define SKF_AD_CPU 36 #define SKF_AD_ALU_XOR_X 40 -#define SKF_AD_MAX 44 +#define SKF_AD_VLAN_TAG 44 +#define SKF_AD_VLAN_TAG_PRESENT 48 +#define SKF_AD_MAX 52 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/include/uapi/linux/hdlc/Kbuild b/include/uapi/linux/hdlc/Kbuild index aafaa5aa54d..8c1d2cb75e3 100644 --- a/include/uapi/linux/hdlc/Kbuild +++ b/include/uapi/linux/hdlc/Kbuild @@ -1 +1,2 @@ # UAPI Header export list +header-y += ioctl.h diff --git a/include/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h index 58397236435..04bc0274a18 100644 --- a/include/linux/hdlc/ioctl.h +++ b/include/uapi/linux/hdlc/ioctl.h @@ -34,13 +34,15 @@ #define LMI_CCITT 3 /* ITU-T Annex A */ #define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ -typedef struct { +#ifndef __ASSEMBLY__ + +typedef struct { unsigned int clock_rate; /* bits per second */ unsigned int clock_type; /* internal, external, TX-internal etc. */ unsigned short loopback; } sync_serial_settings; /* V.35, V.24, X.21 */ -typedef struct { +typedef struct { unsigned int clock_rate; /* bits per second */ unsigned int clock_type; /* internal, external, TX-internal etc. */ unsigned short loopback; @@ -78,4 +80,5 @@ typedef struct { /* PPP doesn't need any info now - supply length = 0 to ioctl */ +#endif /* __ASSEMBLY__ */ #endif /* __HDLC_IOCTL_H__ */ diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index a8fe9549ddb..b3885791e11 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -97,5 +97,23 @@ struct __fdb_entry { __u16 unused; }; +/* Bridge Flags */ +#define BRIDGE_FLAGS_MASTER 1 /* Bridge command to/from master */ +#define BRIDGE_FLAGS_SELF 2 /* Bridge command to/from lowerdev */ +#define BRIDGE_MODE_VEB 0 /* Default loopback mode */ +#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ + +/* Bridge management nested attributes + * [IFLA_AF_SPEC] = { + * [IFLA_BRIDGE_FLAGS] + * [IFLA_BRIDGE_MODE] + * } + */ +enum { + IFLA_BRIDGE_FLAGS, + IFLA_BRIDGE_MODE, + __IFLA_BRIDGE_MAX, +}; +#define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 0343e1f0582..67fb87ca109 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -48,6 +48,7 @@ #define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */ #define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */ +#define ETH_P_BATMAN 0x4305 /* B.A.T.M.A.N.-Advanced packet [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DEC 0x6000 /* DEC Assigned proto */ #define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */ #define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */ diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index f3799295d23..f9a60375f0d 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -50,6 +50,7 @@ struct sockaddr_ll { #define PACKET_TX_TIMESTAMP 16 #define PACKET_TIMESTAMP 17 #define PACKET_FANOUT 18 +#define PACKET_TX_HAS_OFF 19 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 25a585ce23e..958497ad5bb 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -34,6 +34,7 @@ #define TUN_ONE_QUEUE 0x0080 #define TUN_PERSIST 0x0100 #define TUN_VNET_HDR 0x0200 +#define TUN_TAP_MQ 0x0400 /* Ioctl defines */ #define TUNSETNOCSUM _IOW('T', 200, int) @@ -53,6 +54,7 @@ #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog) #define TUNGETVNETHDRSZ _IOR('T', 215, int) #define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 @@ -61,6 +63,9 @@ #define IFF_ONE_QUEUE 0x2000 #define IFF_VNET_HDR 0x4000 #define IFF_TUN_EXCL 0x8000 +#define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 /* Features for GSO (TUNSETOFFLOAD). */ #define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 5db5942575f..c1bf0b5a8da 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -37,6 +37,20 @@ struct ip_tunnel_parm { struct iphdr iph; }; +enum { + IFLA_IPTUN_UNSPEC, + IFLA_IPTUN_LINK, + IFLA_IPTUN_LOCAL, + IFLA_IPTUN_REMOTE, + IFLA_IPTUN_TTL, + IFLA_IPTUN_TOS, + IFLA_IPTUN_ENCAP_LIMIT, + IFLA_IPTUN_FLOWINFO, + IFLA_IPTUN_FLAGS, + __IFLA_IPTUN_MAX, +}; +#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) + /* SIT-mode i_flags */ #define SIT_ISATAP 0x0001 diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 8c469af939a..bbde90fa583 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -109,9 +109,10 @@ enum { INET_DIAG_TOS, INET_DIAG_TCLASS, INET_DIAG_SKMEMINFO, + INET_DIAG_SHUTDOWN, }; -#define INET_DIAG_MAX INET_DIAG_SKMEMINFO +#define INET_DIAG_MAX INET_DIAG_SHUTDOWN /* INET_DIAG_MEM */ diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h new file mode 100644 index 00000000000..75dcbc587fb --- /dev/null +++ b/include/uapi/linux/netconf.h @@ -0,0 +1,23 @@ +#ifndef _UAPI_LINUX_NETCONF_H_ +#define _UAPI_LINUX_NETCONF_H_ + +#include <linux/types.h> +#include <linux/netlink.h> + +struct netconfmsg { + __u8 ncm_family; +}; + +enum { + NETCONFA_UNSPEC, + NETCONFA_IFINDEX, + NETCONFA_FORWARDING, + NETCONFA_RP_FILTER, + __NETCONFA_MAX +}; +#define NETCONFA_MAX (__NETCONFA_MAX - 1) + +#define NETCONFA_IFINDEX_ALL -1 +#define NETCONFA_IFINDEX_DEFAULT -2 + +#endif /* _UAPI_LINUX_NETCONF_H_ */ diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 94e981f810a..b65c834f83e 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -67,12 +67,26 @@ struct ptp_perout_request { unsigned int rsv[4]; /* Reserved for future use. */ }; +#define PTP_MAX_SAMPLES 25 /* Maximum allowed offset measurement samples. */ + +struct ptp_sys_offset { + unsigned int n_samples; /* Desired number of measurements. */ + unsigned int rsv[3]; /* Reserved for future use. */ + /* + * Array of interleaved system/phc time stamps. The kernel + * will provide 2*n_samples + 1 time stamps, with the last + * one as a system time stamp. + */ + struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; +}; + #define PTP_CLK_MAGIC '=' #define PTP_CLOCK_GETCAPS _IOR(PTP_CLK_MAGIC, 1, struct ptp_clock_caps) #define PTP_EXTTS_REQUEST _IOW(PTP_CLK_MAGIC, 2, struct ptp_extts_request) #define PTP_PEROUT_REQUEST _IOW(PTP_CLK_MAGIC, 3, struct ptp_perout_request) #define PTP_ENABLE_PPS _IOW(PTP_CLK_MAGIC, 4, int) +#define PTP_SYS_OFFSET _IOW(PTP_CLK_MAGIC, 5, struct ptp_sys_offset) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index fcd768b09f6..3dee071770d 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -120,6 +120,11 @@ enum { RTM_SETDCB, #define RTM_SETDCB RTM_SETDCB + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; @@ -587,6 +592,10 @@ enum rtnetlink_groups { #define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE RTNLGRP_DCB, #define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h index b1d2bf16b33..b8a24941db2 100644 --- a/include/uapi/linux/unix_diag.h +++ b/include/uapi/linux/unix_diag.h @@ -37,6 +37,7 @@ enum { UNIX_DIAG_ICONS, UNIX_DIAG_RQLEN, UNIX_DIAG_MEMINFO, + UNIX_DIAG_SHUTDOWN, UNIX_DIAG_MAX, }; diff --git a/include/uapi/linux/usb/cdc.h b/include/uapi/linux/usb/cdc.h index 81a927930bf..f35aa0a338c 100644 --- a/include/uapi/linux/usb/cdc.h +++ b/include/uapi/linux/usb/cdc.h @@ -19,6 +19,7 @@ #define USB_CDC_SUBCLASS_OBEX 0x0b #define USB_CDC_SUBCLASS_EEM 0x0c #define USB_CDC_SUBCLASS_NCM 0x0d +#define USB_CDC_SUBCLASS_MBIM 0x0e #define USB_CDC_PROTO_NONE 0 @@ -33,6 +34,7 @@ #define USB_CDC_PROTO_EEM 7 #define USB_CDC_NCM_PROTO_NTB 1 +#define USB_CDC_MBIM_PROTO_NTB 2 /*-------------------------------------------------------------------------*/ @@ -53,6 +55,7 @@ #define USB_CDC_DMM_TYPE 0x14 #define USB_CDC_OBEX_TYPE 0x15 #define USB_CDC_NCM_TYPE 0x1a +#define USB_CDC_MBIM_TYPE 0x1b /* "Header Functional Descriptor" from CDC spec 5.2.3.1 */ struct usb_cdc_header_desc { @@ -187,6 +190,21 @@ struct usb_cdc_ncm_desc { __le16 bcdNcmVersion; __u8 bmNetworkCapabilities; } __attribute__ ((packed)); + +/* "MBIM Control Model Functional Descriptor" */ +struct usb_cdc_mbim_desc { + __u8 bLength; + __u8 bDescriptorType; + __u8 bDescriptorSubType; + + __le16 bcdMBIMVersion; + __le16 wMaxControlMessage; + __u8 bNumberFilters; + __u8 bMaxFilterSize; + __le16 wMaxSegmentSize; + __u8 bmNetworkCapabilities; +} __attribute__ ((packed)); + /*-------------------------------------------------------------------------*/ /* @@ -332,6 +350,11 @@ struct usb_cdc_ncm_nth32 { #define USB_CDC_NCM_NDP32_CRC_SIGN 0x316D636E /* ncm1 */ #define USB_CDC_NCM_NDP32_NOCRC_SIGN 0x306D636E /* ncm0 */ +#define USB_CDC_MBIM_NDP16_IPS_SIGN 0x00535049 /* IPS<sessionID> : IPS0 for now */ +#define USB_CDC_MBIM_NDP32_IPS_SIGN 0x00737069 /* ips<sessionID> : ips0 for now */ +#define USB_CDC_MBIM_NDP16_DSS_SIGN 0x00535344 /* DSS<sessionID> */ +#define USB_CDC_MBIM_NDP32_DSS_SIGN 0x00737364 /* dss<sessionID> */ + /* 16-bit NCM Datagram Pointer Entry */ struct usb_cdc_ncm_dpe16 { __le16 wDatagramIndex; diff --git a/kernel/time/Makefile b/kernel/time/Makefile index e2fd74b8e8c..ff7d9d2ab50 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -1,4 +1,4 @@ -obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o +obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o obj-y += timeconv.o posix-clock.o alarmtimer.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c deleted file mode 100644 index a9ae369925c..00000000000 --- a/kernel/time/timecompare.c +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (C) 2009 Intel Corporation. - * Author: Patrick Ohly <patrick.ohly@intel.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/timecompare.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/math64.h> -#include <linux/kernel.h> - -/* - * fixed point arithmetic scale factor for skew - * - * Usually one would measure skew in ppb (parts per billion, 1e9), but - * using a factor of 2 simplifies the math. - */ -#define TIMECOMPARE_SKEW_RESOLUTION (((s64)1)<<30) - -ktime_t timecompare_transform(struct timecompare *sync, - u64 source_tstamp) -{ - u64 nsec; - - nsec = source_tstamp + sync->offset; - nsec += (s64)(source_tstamp - sync->last_update) * sync->skew / - TIMECOMPARE_SKEW_RESOLUTION; - - return ns_to_ktime(nsec); -} -EXPORT_SYMBOL_GPL(timecompare_transform); - -int timecompare_offset(struct timecompare *sync, - s64 *offset, - u64 *source_tstamp) -{ - u64 start_source = 0, end_source = 0; - struct { - s64 offset; - s64 duration_target; - } buffer[10], sample, *samples; - int counter = 0, i; - int used; - int index; - int num_samples = sync->num_samples; - - if (num_samples > ARRAY_SIZE(buffer)) { - samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC); - if (!samples) { - samples = buffer; - num_samples = ARRAY_SIZE(buffer); - } - } else { - samples = buffer; - } - - /* run until we have enough valid samples, but do not try forever */ - i = 0; - counter = 0; - while (1) { - u64 ts; - ktime_t start, end; - - start = sync->target(); - ts = timecounter_read(sync->source); - end = sync->target(); - - if (!i) - start_source = ts; - - /* ignore negative durations */ - sample.duration_target = ktime_to_ns(ktime_sub(end, start)); - if (sample.duration_target >= 0) { - /* - * assume symetric delay to and from source: - * average target time corresponds to measured - * source time - */ - sample.offset = - (ktime_to_ns(end) + ktime_to_ns(start)) / 2 - - ts; - - /* simple insertion sort based on duration */ - index = counter - 1; - while (index >= 0) { - if (samples[index].duration_target < - sample.duration_target) - break; - samples[index + 1] = samples[index]; - index--; - } - samples[index + 1] = sample; - counter++; - } - - i++; - if (counter >= num_samples || i >= 100000) { - end_source = ts; - break; - } - } - - *source_tstamp = (end_source + start_source) / 2; - - /* remove outliers by only using 75% of the samples */ - used = counter * 3 / 4; - if (!used) - used = counter; - if (used) { - /* calculate average */ - s64 off = 0; - for (index = 0; index < used; index++) - off += samples[index].offset; - *offset = div_s64(off, used); - } - - if (samples && samples != buffer) - kfree(samples); - - return used; -} -EXPORT_SYMBOL_GPL(timecompare_offset); - -void __timecompare_update(struct timecompare *sync, - u64 source_tstamp) -{ - s64 offset; - u64 average_time; - - if (!timecompare_offset(sync, &offset, &average_time)) - return; - - if (!sync->last_update) { - sync->last_update = average_time; - sync->offset = offset; - sync->skew = 0; - } else { - s64 delta_nsec = average_time - sync->last_update; - - /* avoid division by negative or small deltas */ - if (delta_nsec >= 10000) { - s64 delta_offset_nsec = offset - sync->offset; - s64 skew; /* delta_offset_nsec * - TIMECOMPARE_SKEW_RESOLUTION / - delta_nsec */ - u64 divisor; - - /* div_s64() is limited to 32 bit divisor */ - skew = delta_offset_nsec * TIMECOMPARE_SKEW_RESOLUTION; - divisor = delta_nsec; - while (unlikely(divisor >= ((s64)1) << 32)) { - /* divide both by 2; beware, right shift - of negative value has undefined - behavior and can only be used for - the positive divisor */ - skew = div_s64(skew, 2); - divisor >>= 1; - } - skew = div_s64(skew, divisor); - - /* - * Calculate new overall skew as 4/16 the - * old value and 12/16 the new one. This is - * a rather arbitrary tradeoff between - * only using the latest measurement (0/16 and - * 16/16) and even more weight on past measurements. - */ -#define TIMECOMPARE_NEW_SKEW_PER_16 12 - sync->skew = - div_s64((16 - TIMECOMPARE_NEW_SKEW_PER_16) * - sync->skew + - TIMECOMPARE_NEW_SKEW_PER_16 * skew, - 16); - sync->last_update = average_time; - sync->offset = offset; - } - } -} -EXPORT_SYMBOL_GPL(__timecompare_update); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ee070722a3a..30ee4bc0f7c 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -294,7 +294,7 @@ static void vlan_transfer_features(struct net_device *dev, else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; #endif diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 402442402af..4a6d31a082b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -409,7 +409,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa) return err; } -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc) { @@ -531,6 +531,10 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; +static struct device_type vlan_type = { + .name = "vlan", +}; + static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) @@ -564,7 +568,7 @@ static int vlan_dev_init(struct net_device *dev) if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) dev->fcoe_ddp_xid = real_dev->fcoe_ddp_xid; #endif @@ -579,6 +583,8 @@ static int vlan_dev_init(struct net_device *dev) dev->netdev_ops = &vlan_netdev_ops; + SET_NETDEV_DEVTYPE(dev, &vlan_type); + if (is_vlan_dev(real_dev)) subclass = 1; @@ -741,7 +747,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_do_ioctl = vlan_dev_ioctl, .ndo_neigh_setup = vlan_dev_neigh_setup, .ndo_get_stats64 = vlan_dev_get_stats64, -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, .ndo_fcoe_enable = vlan_dev_fcoe_enable, diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 53f5244e28f..250e0b58109 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -25,6 +25,16 @@ config BATMAN_ADV_BLA more than one mesh node in the same LAN, you can safely remove this feature and save some space. +config BATMAN_ADV_DAT + bool "Distributed ARP Table" + depends on BATMAN_ADV && INET + default n + help + This option enables DAT (Distributed ARP Table), a DHT based + mechanism that increases ARP reliability on sparse wireless + mesh networks. If you think that your network does not need + this option you can safely remove it and save some space. + config BATMAN_ADV_DEBUG bool "B.A.T.M.A.N. debugging" depends on BATMAN_ADV diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 8676d2b1d57..e45e3b4e32e 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -23,6 +23,7 @@ batman-adv-y += bat_iv_ogm.o batman-adv-y += bitarray.o batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o batman-adv-y += debugfs.o +batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o batman-adv-y += gateway_client.o batman-adv-y += gateway_common.o batman-adv-y += hard-interface.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b02b75dae3a..9f3925a85aa 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -57,20 +57,22 @@ out: static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; + unsigned char *ogm_buff; uint32_t random_seqno; int res = -ENOMEM; /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); - atomic_set(&hard_iface->seqno, random_seqno); + atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno); - hard_iface->packet_len = BATADV_OGM_HLEN; - hard_iface->packet_buff = kmalloc(hard_iface->packet_len, GFP_ATOMIC); - - if (!hard_iface->packet_buff) + hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; + ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); + if (!ogm_buff) goto out; - batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; + hard_iface->bat_iv.ogm_buff = ogm_buff; + + batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; batadv_ogm_packet->header.packet_type = BATADV_IV_OGM; batadv_ogm_packet->header.version = BATADV_COMPAT_VERSION; batadv_ogm_packet->header.ttl = 2; @@ -87,15 +89,16 @@ out: static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { - kfree(hard_iface->packet_buff); - hard_iface->packet_buff = NULL; + kfree(hard_iface->bat_iv.ogm_buff); + hard_iface->bat_iv.ogm_buff = NULL; } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; + unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; + batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; memcpy(batadv_ogm_packet->orig, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(batadv_ogm_packet->prev_sender, @@ -106,8 +109,9 @@ static void batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; + unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; + batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; batadv_ogm_packet->flags = BATADV_PRIMARIES_FIRST_HOP; batadv_ogm_packet->header.ttl = BATADV_TTL; } @@ -407,9 +411,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, if ((atomic_read(&bat_priv->aggregated_ogms)) && (packet_len < BATADV_MAX_AGGREGATION_BYTES)) - skb_size = BATADV_MAX_AGGREGATION_BYTES + ETH_HLEN; + skb_size = BATADV_MAX_AGGREGATION_BYTES; else - skb_size = packet_len + ETH_HLEN; + skb_size = packet_len; + + skb_size += ETH_HLEN + NET_IP_ALIGN; forw_packet_aggr->skb = dev_alloc_skb(skb_size); if (!forw_packet_aggr->skb) { @@ -418,7 +424,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, kfree(forw_packet_aggr); goto out; } - skb_reserve(forw_packet_aggr->skb, ETH_HLEN); + skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN); INIT_HLIST_NODE(&forw_packet_aggr->list); @@ -590,8 +596,10 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *primary_if; + int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len; int vis_server, tt_num_changes = 0; uint32_t seqno; uint8_t bandwidth; @@ -600,17 +608,16 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) primary_if = batadv_primary_if_get_selected(bat_priv); if (hard_iface == primary_if) - tt_num_changes = batadv_tt_append_diff(bat_priv, - &hard_iface->packet_buff, - &hard_iface->packet_len, + tt_num_changes = batadv_tt_append_diff(bat_priv, ogm_buff, + ogm_buff_len, BATADV_OGM_HLEN); - batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff; + batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff); /* change sequence number to network order */ - seqno = (uint32_t)atomic_read(&hard_iface->seqno); + seqno = (uint32_t)atomic_read(&hard_iface->bat_iv.ogm_seqno); batadv_ogm_packet->seqno = htonl(seqno); - atomic_inc(&hard_iface->seqno); + atomic_inc(&hard_iface->bat_iv.ogm_seqno); batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn); batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc); @@ -631,8 +638,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) } batadv_slide_own_bcast_window(hard_iface); - batadv_iv_ogm_queue_add(bat_priv, hard_iface->packet_buff, - hard_iface->packet_len, hard_iface, 1, + batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, + hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, batadv_iv_ogm_emit_send_time(bat_priv)); if (primary_if) @@ -1015,7 +1022,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, return; /* could be changed by schedule_own_packet() */ - if_incoming_seqno = atomic_read(&if_incoming->seqno); + if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno); if (batadv_ogm_packet->flags & BATADV_DIRECTLINK) has_directlink_flag = 1; diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index aea174cdbfb..5453b17d8df 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -79,20 +79,17 @@ int batadv_bit_get_packet(void *priv, unsigned long *seq_bits, * or the old packet got delayed somewhere in the network. The * packet should be dropped without calling this function if the * seqno window is protected. + * + * seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE + * or + * seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE */ - if (seq_num_diff <= -BATADV_TQ_LOCAL_WINDOW_SIZE || - seq_num_diff >= BATADV_EXPECTED_SEQNO_RANGE) { + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Other host probably restarted!\n"); - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Other host probably restarted!\n"); - - bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); - if (set_mark) - batadv_set_bit(seq_bits, 0); - - return 1; - } + bitmap_zero(seq_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); + if (set_mark) + batadv_set_bit(seq_bits, 0); - /* never reached */ - return 0; + return 1; } diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index fd8d5afec0d..29a5542aac7 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1585,23 +1585,11 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) struct hlist_head *head; uint32_t i; bool is_own; - int ret = 0; uint8_t *primary_addr; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } primary_addr = primary_if->net_dev->dev_addr; seq_printf(seq, @@ -1628,7 +1616,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) @@ -1643,23 +1631,11 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) int secs, msecs; uint32_t i; bool is_own; - int ret = 0; uint8_t *primary_addr; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } primary_addr = primary_if->net_dev->dev_addr; seq_printf(seq, @@ -1693,5 +1669,5 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 391d4fb2026..3f679cb2d0e 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -31,6 +31,7 @@ #include "vis.h" #include "icmp_socket.h" #include "bridge_loop_avoidance.h" +#include "distributed-arp-table.h" static struct dentry *batadv_debugfs; @@ -99,15 +100,17 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) static int batadv_log_open(struct inode *inode, struct file *file) { + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + nonseekable_open(inode, file); file->private_data = inode->i_private; - batadv_inc_module_count(); return 0; } static int batadv_log_release(struct inode *inode, struct file *file) { - batadv_dec_module_count(); + module_put(THIS_MODULE); return 0; } @@ -278,6 +281,19 @@ static int batadv_bla_backbone_table_open(struct inode *inode, #endif +#ifdef CONFIG_BATMAN_ADV_DAT +/** + * batadv_dat_cache_open - Prepare file handler for reads from dat_chache + * @inode: inode which was opened + * @file: file handle to be initialized + */ +static int batadv_dat_cache_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_dat_cache_seq_print_text, net_dev); +} +#endif + static int batadv_transtable_local_open(struct inode *inode, struct file *file) { struct net_device *net_dev = (struct net_device *)inode->i_private; @@ -317,6 +333,9 @@ static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open); static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO, batadv_bla_backbone_table_open); #endif +#ifdef CONFIG_BATMAN_ADV_DAT +static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open); +#endif static BATADV_DEBUGINFO(transtable_local, S_IRUGO, batadv_transtable_local_open); static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open); @@ -329,6 +348,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_bla_claim_table, &batadv_debuginfo_bla_backbone_table, #endif +#ifdef CONFIG_BATMAN_ADV_DAT + &batadv_debuginfo_dat_cache, +#endif &batadv_debuginfo_transtable_local, &batadv_debuginfo_vis_data, NULL, diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c new file mode 100644 index 00000000000..8e1d89d2b1c --- /dev/null +++ b/net/batman-adv/distributed-arp-table.c @@ -0,0 +1,1066 @@ +/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: + * + * Antonio Quartulli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include <linux/if_ether.h> +#include <linux/if_arp.h> +#include <net/arp.h> + +#include "main.h" +#include "hash.h" +#include "distributed-arp-table.h" +#include "hard-interface.h" +#include "originator.h" +#include "send.h" +#include "types.h" +#include "translation-table.h" +#include "unicast.h" + +static void batadv_dat_purge(struct work_struct *work); + +/** + * batadv_dat_start_timer - initialise the DAT periodic worker + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_dat_start_timer(struct batadv_priv *bat_priv) +{ + INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge); + queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work, + msecs_to_jiffies(10000)); +} + +/** + * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly + * free it + * @dat_entry: the oentry to free + */ +static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) +{ + if (atomic_dec_and_test(&dat_entry->refcount)) + kfree_rcu(dat_entry, rcu); +} + +/** + * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not + * @dat_entry: the entry to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) +{ + return batadv_has_timed_out(dat_entry->last_update, + BATADV_DAT_ENTRY_TIMEOUT); +} + +/** + * __batadv_dat_purge - delete entries from the DAT local storage + * @bat_priv: the bat priv with all the soft interface information + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the dat_entry as argument and has to + * returns a boolean value: true is the entry has to be deleted, + * false otherwise + * + * Loops over each entry in the DAT local storage and delete it if and only if + * the to_purge function passed as argument returns true + */ +static void __batadv_dat_purge(struct batadv_priv *bat_priv, + bool (*to_purge)(struct batadv_dat_entry *)) +{ + spinlock_t *list_lock; /* protects write access to the hash lists */ + struct batadv_dat_entry *dat_entry; + struct hlist_node *node, *node_tmp; + struct hlist_head *head; + uint32_t i; + + if (!bat_priv->dat.hash) + return; + + for (i = 0; i < bat_priv->dat.hash->size; i++) { + head = &bat_priv->dat.hash->table[i]; + list_lock = &bat_priv->dat.hash->list_locks[i]; + + spin_lock_bh(list_lock); + hlist_for_each_entry_safe(dat_entry, node, node_tmp, head, + hash_entry) { + /* if an helper function has been passed as parameter, + * ask it if the entry has to be purged or not + */ + if (to_purge && !to_purge(dat_entry)) + continue; + + hlist_del_rcu(node); + batadv_dat_entry_free_ref(dat_entry); + } + spin_unlock_bh(list_lock); + } +} + +/** + * batadv_dat_purge - periodic task that deletes old entries from the local DAT + * hash table + * @work: kernel work struct + */ +static void batadv_dat_purge(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_priv_dat *priv_dat; + struct batadv_priv *bat_priv; + + delayed_work = container_of(work, struct delayed_work, work); + priv_dat = container_of(delayed_work, struct batadv_priv_dat, work); + bat_priv = container_of(priv_dat, struct batadv_priv, dat); + + __batadv_dat_purge(bat_priv, batadv_dat_to_purge); + batadv_dat_start_timer(bat_priv); +} + +/** + * batadv_compare_dat - comparing function used in the local DAT hash table + * @node: node in the local table + * @data2: second object to compare the node to + * + * Returns 1 if the two entry are the same, 0 otherwise + */ +static int batadv_compare_dat(const struct hlist_node *node, const void *data2) +{ + const void *data1 = container_of(node, struct batadv_dat_entry, + hash_entry); + + return (memcmp(data1, data2, sizeof(__be32)) == 0 ? 1 : 0); +} + +/** + * batadv_arp_hw_src - extract the hw_src field from an ARP packet + * @skb: ARP packet + * @hdr_size: size of the possible header before the ARP packet + * + * Returns the value of the hw_src field in the ARP packet + */ +static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) +{ + uint8_t *addr; + + addr = (uint8_t *)(skb->data + hdr_size); + addr += ETH_HLEN + sizeof(struct arphdr); + + return addr; +} + +/** + * batadv_arp_ip_src - extract the ip_src field from an ARP packet + * @skb: ARP packet + * @hdr_size: size of the possible header before the ARP packet + * + * Returns the value of the ip_src field in the ARP packet + */ +static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) +{ + return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN); +} + +/** + * batadv_arp_hw_dst - extract the hw_dst field from an ARP packet + * @skb: ARP packet + * @hdr_size: size of the possible header before the ARP packet + * + * Returns the value of the hw_dst field in the ARP packet + */ +static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) +{ + return batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN + 4; +} + +/** + * batadv_arp_ip_dst - extract the ip_dst field from an ARP packet + * @skb: ARP packet + * @hdr_size: size of the possible header before the ARP packet + * + * Returns the value of the ip_dst field in the ARP packet + */ +static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) +{ + return *(__be32 *)(batadv_arp_hw_src(skb, hdr_size) + ETH_ALEN * 2 + 4); +} + +/** + * batadv_hash_dat - compute the hash value for an IP address + * @data: data to hash + * @size: size of the hash table + * + * Returns the selected index in the hash table for the given data + */ +static uint32_t batadv_hash_dat(const void *data, uint32_t size) +{ + const unsigned char *key = data; + uint32_t hash = 0; + size_t i; + + for (i = 0; i < 4; i++) { + hash += key[i]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash % size; +} + +/** + * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash + * table + * @bat_priv: the bat priv with all the soft interface information + * @ip: search key + * + * Returns the dat_entry if found, NULL otherwise + */ +static struct batadv_dat_entry * +batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) +{ + struct hlist_head *head; + struct hlist_node *node; + struct batadv_dat_entry *dat_entry, *dat_entry_tmp = NULL; + struct batadv_hashtable *hash = bat_priv->dat.hash; + uint32_t index; + + if (!hash) + return NULL; + + index = batadv_hash_dat(&ip, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + if (dat_entry->ip != ip) + continue; + + if (!atomic_inc_not_zero(&dat_entry->refcount)) + continue; + + dat_entry_tmp = dat_entry; + break; + } + rcu_read_unlock(); + + return dat_entry_tmp; +} + +/** + * batadv_dat_entry_add - add a new dat entry or update it if already exists + * @bat_priv: the bat priv with all the soft interface information + * @ip: ipv4 to add/edit + * @mac_addr: mac address to assign to the given ipv4 + */ +static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, + uint8_t *mac_addr) +{ + struct batadv_dat_entry *dat_entry; + int hash_added; + + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip); + /* if this entry is already known, just update it */ + if (dat_entry) { + if (!batadv_compare_eth(dat_entry->mac_addr, mac_addr)) + memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN); + dat_entry->last_update = jiffies; + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "Entry updated: %pI4 %pM\n", &dat_entry->ip, + dat_entry->mac_addr); + goto out; + } + + dat_entry = kmalloc(sizeof(*dat_entry), GFP_ATOMIC); + if (!dat_entry) + goto out; + + dat_entry->ip = ip; + memcpy(dat_entry->mac_addr, mac_addr, ETH_ALEN); + dat_entry->last_update = jiffies; + atomic_set(&dat_entry->refcount, 2); + + hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat, + batadv_hash_dat, &dat_entry->ip, + &dat_entry->hash_entry); + + if (unlikely(hash_added != 0)) { + /* remove the reference for the hash */ + batadv_dat_entry_free_ref(dat_entry); + goto out; + } + + batadv_dbg(BATADV_DBG_DAT, bat_priv, "New entry added: %pI4 %pM\n", + &dat_entry->ip, dat_entry->mac_addr); + +out: + if (dat_entry) + batadv_dat_entry_free_ref(dat_entry); +} + +#ifdef CONFIG_BATMAN_ADV_DEBUG + +/** + * batadv_dbg_arp - print a debug message containing all the ARP packet details + * @bat_priv: the bat priv with all the soft interface information + * @skb: ARP packet + * @type: ARP type + * @hdr_size: size of the possible header before the ARP packet + * @msg: message to print together with the debugging information + */ +static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, + uint16_t type, int hdr_size, char *msg) +{ + struct batadv_unicast_4addr_packet *unicast_4addr_packet; + struct batadv_bcast_packet *bcast_pkt; + uint8_t *orig_addr; + __be32 ip_src, ip_dst; + + if (msg) + batadv_dbg(BATADV_DBG_DAT, bat_priv, "%s\n", msg); + + ip_src = batadv_arp_ip_src(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "ARP MSG = [src: %pM-%pI4 dst: %pM-%pI4]\n", + batadv_arp_hw_src(skb, hdr_size), &ip_src, + batadv_arp_hw_dst(skb, hdr_size), &ip_dst); + + if (hdr_size == 0) + return; + + /* if the ARP packet is encapsulated in a batman packet, let's print + * some debug messages + */ + unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + + switch (unicast_4addr_packet->u.header.packet_type) { + case BATADV_UNICAST: + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* encapsulated within a UNICAST packet\n"); + break; + case BATADV_UNICAST_4ADDR: + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* encapsulated within a UNICAST_4ADDR packet (src: %pM)\n", + unicast_4addr_packet->src); + switch (unicast_4addr_packet->subtype) { + case BATADV_P_DAT_DHT_PUT: + batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DAT_DHT_PUT\n"); + break; + case BATADV_P_DAT_DHT_GET: + batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DAT_DHT_GET\n"); + break; + case BATADV_P_DAT_CACHE_REPLY: + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* type: DAT_CACHE_REPLY\n"); + break; + case BATADV_P_DATA: + batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: DATA\n"); + break; + default: + batadv_dbg(BATADV_DBG_DAT, bat_priv, "* type: Unknown (%u)!\n", + unicast_4addr_packet->u.header.packet_type); + } + break; + case BATADV_BCAST: + bcast_pkt = (struct batadv_bcast_packet *)unicast_4addr_packet; + orig_addr = bcast_pkt->orig; + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* encapsulated within a BCAST packet (src: %pM)\n", + orig_addr); + break; + default: + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "* encapsulated within an unknown packet type (0x%x)\n", + unicast_4addr_packet->u.header.packet_type); + } +} + +#else + +static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, + uint16_t type, int hdr_size, char *msg) +{ +} + +#endif /* CONFIG_BATMAN_ADV_DEBUG */ + +/** + * batadv_is_orig_node_eligible - check whether a node can be a DHT candidate + * @res: the array with the already selected candidates + * @select: number of already selected candidates + * @tmp_max: address of the currently evaluated node + * @max: current round max address + * @last_max: address of the last selected candidate + * @candidate: orig_node under evaluation + * @max_orig_node: last selected candidate + * + * Returns true if the node has been elected as next candidate or false othrwise + */ +static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, + int select, batadv_dat_addr_t tmp_max, + batadv_dat_addr_t max, + batadv_dat_addr_t last_max, + struct batadv_orig_node *candidate, + struct batadv_orig_node *max_orig_node) +{ + bool ret = false; + int j; + + /* Check if this node has already been selected... */ + for (j = 0; j < select; j++) + if (res[j].orig_node == candidate) + break; + /* ..and possibly skip it */ + if (j < select) + goto out; + /* sanity check: has it already been selected? This should not happen */ + if (tmp_max > last_max) + goto out; + /* check if during this iteration an originator with a closer dht + * address has already been found + */ + if (tmp_max < max) + goto out; + /* this is an hash collision with the temporary selected node. Choose + * the one with the lowest address + */ + if ((tmp_max == max) && + (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) + goto out; + + ret = true; +out: + return ret; +} + +/** + * batadv_choose_next_candidate - select the next DHT candidate + * @bat_priv: the bat priv with all the soft interface information + * @cands: candidates array + * @select: number of candidates already present in the array + * @ip_key: key to look up in the DHT + * @last_max: pointer where the address of the selected candidate will be saved + */ +static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, + struct batadv_dat_candidate *cands, + int select, batadv_dat_addr_t ip_key, + batadv_dat_addr_t *last_max) +{ + batadv_dat_addr_t max = 0, tmp_max = 0; + struct batadv_orig_node *orig_node, *max_orig_node = NULL; + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_node *node; + struct hlist_head *head; + int i; + + /* if no node is eligible as candidate, leave the candidate type as + * NOT_FOUND + */ + cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND; + + /* iterate over the originator list and find the node with closest + * dat_address which has not been selected yet + */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, node, head, hash_entry) { + /* the dht space is a ring and addresses are unsigned */ + tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + + ip_key; + + if (!batadv_is_orig_node_eligible(cands, select, + tmp_max, max, + *last_max, orig_node, + max_orig_node)) + continue; + + if (!atomic_inc_not_zero(&orig_node->refcount)) + continue; + + max = tmp_max; + if (max_orig_node) + batadv_orig_node_free_ref(max_orig_node); + max_orig_node = orig_node; + } + rcu_read_unlock(); + } + if (max_orig_node) { + cands[select].type = BATADV_DAT_CANDIDATE_ORIG; + cands[select].orig_node = max_orig_node; + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "dat_select_candidates() %d: selected %pM addr=%u dist=%u\n", + select, max_orig_node->orig, max_orig_node->dat_addr, + max); + } + *last_max = max; +} + +/** + * batadv_dat_select_candidates - selects the nodes which the DHT message has to + * be sent to + * @bat_priv: the bat priv with all the soft interface information + * @ip_dst: ipv4 to look up in the DHT + * + * An originator O is selected if and only if its DHT_ID value is one of three + * closest values (from the LEFT, with wrap around if needed) then the hash + * value of the key. ip_dst is the key. + * + * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM + */ +static struct batadv_dat_candidate * +batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) +{ + int select; + batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key; + struct batadv_dat_candidate *res; + + if (!bat_priv->orig_hash) + return NULL; + + res = kmalloc(BATADV_DAT_CANDIDATES_NUM * sizeof(*res), GFP_ATOMIC); + if (!res) + return NULL; + + ip_key = (batadv_dat_addr_t)batadv_hash_dat(&ip_dst, + BATADV_DAT_ADDR_MAX); + + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "dat_select_candidates(): IP=%pI4 hash(IP)=%u\n", &ip_dst, + ip_key); + + for (select = 0; select < BATADV_DAT_CANDIDATES_NUM; select++) + batadv_choose_next_candidate(bat_priv, res, select, ip_key, + &last_max); + + return res; +} + +/** + * batadv_dat_send_data - send a payload to the selected candidates + * @bat_priv: the bat priv with all the soft interface information + * @skb: payload to send + * @ip: the DHT key + * @packet_subtype: unicast4addr packet subtype to use + * + * In this function the skb is copied by means of pskb_copy() and is sent as + * unicast packet to each of the selected candidates + * + * Returns true if the packet is sent to at least one candidate, false otherwise + */ +static bool batadv_dat_send_data(struct batadv_priv *bat_priv, + struct sk_buff *skb, __be32 ip, + int packet_subtype) +{ + int i; + bool ret = false; + int send_status; + struct batadv_neigh_node *neigh_node = NULL; + struct sk_buff *tmp_skb; + struct batadv_dat_candidate *cand; + + cand = batadv_dat_select_candidates(bat_priv, ip); + if (!cand) + goto out; + + batadv_dbg(BATADV_DBG_DAT, bat_priv, "DHT_SEND for %pI4\n", &ip); + + for (i = 0; i < BATADV_DAT_CANDIDATES_NUM; i++) { + if (cand[i].type == BATADV_DAT_CANDIDATE_NOT_FOUND) + continue; + + neigh_node = batadv_orig_node_get_router(cand[i].orig_node); + if (!neigh_node) + goto free_orig; + + tmp_skb = pskb_copy(skb, GFP_ATOMIC); + if (!batadv_unicast_4addr_prepare_skb(bat_priv, tmp_skb, + cand[i].orig_node, + packet_subtype)) { + kfree_skb(tmp_skb); + goto free_neigh; + } + + send_status = batadv_send_skb_packet(tmp_skb, + neigh_node->if_incoming, + neigh_node->addr); + if (send_status == NET_XMIT_SUCCESS) { + /* count the sent packet */ + switch (packet_subtype) { + case BATADV_P_DAT_DHT_GET: + batadv_inc_counter(bat_priv, + BATADV_CNT_DAT_GET_TX); + break; + case BATADV_P_DAT_DHT_PUT: + batadv_inc_counter(bat_priv, + BATADV_CNT_DAT_PUT_TX); + break; + } + + /* packet sent to a candidate: return true */ + ret = true; + } +free_neigh: + batadv_neigh_node_free_ref(neigh_node); +free_orig: + batadv_orig_node_free_ref(cand[i].orig_node); + } + +out: + kfree(cand); + return ret; +} + +/** + * batadv_dat_hash_free - free the local DAT hash table + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_dat_hash_free(struct batadv_priv *bat_priv) +{ + if (!bat_priv->dat.hash) + return; + + __batadv_dat_purge(bat_priv, NULL); + + batadv_hash_destroy(bat_priv->dat.hash); + + bat_priv->dat.hash = NULL; +} + +/** + * batadv_dat_init - initialise the DAT internals + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_dat_init(struct batadv_priv *bat_priv) +{ + if (bat_priv->dat.hash) + return 0; + + bat_priv->dat.hash = batadv_hash_new(1024); + + if (!bat_priv->dat.hash) + return -ENOMEM; + + batadv_dat_start_timer(bat_priv); + + return 0; +} + +/** + * batadv_dat_free - free the DAT internals + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_dat_free(struct batadv_priv *bat_priv) +{ + cancel_delayed_work_sync(&bat_priv->dat.work); + + batadv_dat_hash_free(bat_priv); +} + +/** + * batadv_dat_cache_seq_print_text - print the local DAT hash table + * @seq: seq file to print on + * @offset: not used + */ +int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hashtable *hash = bat_priv->dat.hash; + struct batadv_dat_entry *dat_entry; + struct batadv_hard_iface *primary_if; + struct hlist_node *node; + struct hlist_head *head; + unsigned long last_seen_jiffies; + int last_seen_msecs, last_seen_secs, last_seen_mins; + uint32_t i; + + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + goto out; + + seq_printf(seq, "Distributed ARP Table (%s):\n", net_dev->name); + seq_printf(seq, " %-7s %-13s %5s\n", "IPv4", "MAC", + "last-seen"); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(dat_entry, node, head, hash_entry) { + last_seen_jiffies = jiffies - dat_entry->last_update; + last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); + last_seen_mins = last_seen_msecs / 60000; + last_seen_msecs = last_seen_msecs % 60000; + last_seen_secs = last_seen_msecs / 1000; + + seq_printf(seq, " * %15pI4 %14pM %6i:%02i\n", + &dat_entry->ip, dat_entry->mac_addr, + last_seen_mins, last_seen_secs); + } + rcu_read_unlock(); + } + +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return 0; +} + +/** + * batadv_arp_get_type - parse an ARP packet and gets the type + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to analyse + * @hdr_size: size of the possible header before the ARP packet in the skb + * + * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise + */ +static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + struct arphdr *arphdr; + struct ethhdr *ethhdr; + __be32 ip_src, ip_dst; + uint16_t type = 0; + + /* pull the ethernet header */ + if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN))) + goto out; + + ethhdr = (struct ethhdr *)(skb->data + hdr_size); + + if (ethhdr->h_proto != htons(ETH_P_ARP)) + goto out; + + /* pull the ARP payload */ + if (unlikely(!pskb_may_pull(skb, hdr_size + ETH_HLEN + + arp_hdr_len(skb->dev)))) + goto out; + + arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN); + + /* Check whether the ARP packet carries a valid + * IP information + */ + if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) + goto out; + + if (arphdr->ar_pro != htons(ETH_P_IP)) + goto out; + + if (arphdr->ar_hln != ETH_ALEN) + goto out; + + if (arphdr->ar_pln != 4) + goto out; + + /* Check for bad reply/request. If the ARP message is not sane, DAT + * will simply ignore it + */ + ip_src = batadv_arp_ip_src(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); + if (ipv4_is_loopback(ip_src) || ipv4_is_multicast(ip_src) || + ipv4_is_loopback(ip_dst) || ipv4_is_multicast(ip_dst)) + goto out; + + type = ntohs(arphdr->ar_op); +out: + return type; +} + +/** + * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to + * answer using DAT + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + * + * Returns true if the message has been sent to the dht candidates, false + * otherwise. In case of true the message has to be enqueued to permit the + * fallback + */ +bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + uint16_t type = 0; + __be32 ip_dst, ip_src; + uint8_t *hw_src; + bool ret = false; + struct batadv_dat_entry *dat_entry = NULL; + struct sk_buff *skb_new; + struct batadv_hard_iface *primary_if = NULL; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + goto out; + + type = batadv_arp_get_type(bat_priv, skb, 0); + /* If the node gets an ARP_REQUEST it has to send a DHT_GET unicast + * message to the selected DHT candidates + */ + if (type != ARPOP_REQUEST) + goto out; + + batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REQUEST"); + + ip_src = batadv_arp_ip_src(skb, 0); + hw_src = batadv_arp_hw_src(skb, 0); + ip_dst = batadv_arp_ip_dst(skb, 0); + + batadv_dat_entry_add(bat_priv, ip_src, hw_src); + + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); + if (dat_entry) { + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, + primary_if->soft_iface, ip_dst, hw_src, + dat_entry->mac_addr, hw_src); + if (!skb_new) + goto out; + + skb_reset_mac_header(skb_new); + skb_new->protocol = eth_type_trans(skb_new, + primary_if->soft_iface); + bat_priv->stats.rx_packets++; + bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; + primary_if->soft_iface->last_rx = jiffies; + + netif_rx(skb_new); + batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); + ret = true; + } else { + /* Send the request on the DHT */ + ret = batadv_dat_send_data(bat_priv, skb, ip_dst, + BATADV_P_DAT_DHT_GET); + } +out: + if (dat_entry) + batadv_dat_entry_free_ref(dat_entry); + if (primary_if) + batadv_hardif_free_ref(primary_if); + return ret; +} + +/** + * batadv_dat_snoop_incoming_arp_request - snoop the ARP request and try to + * answer using the local DAT storage + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + * @hdr_size: size of the encapsulation header + * + * Returns true if the request has been answered, false otherwise + */ +bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + uint16_t type; + __be32 ip_src, ip_dst; + uint8_t *hw_src; + struct sk_buff *skb_new; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_dat_entry *dat_entry = NULL; + bool ret = false; + int err; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + goto out; + + type = batadv_arp_get_type(bat_priv, skb, hdr_size); + if (type != ARPOP_REQUEST) + goto out; + + hw_src = batadv_arp_hw_src(skb, hdr_size); + ip_src = batadv_arp_ip_src(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); + + batadv_dbg_arp(bat_priv, skb, type, hdr_size, + "Parsing incoming ARP REQUEST"); + + batadv_dat_entry_add(bat_priv, ip_src, hw_src); + + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); + if (!dat_entry) + goto out; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, + primary_if->soft_iface, ip_dst, hw_src, + dat_entry->mac_addr, hw_src); + + if (!skb_new) + goto out; + + /* to preserve backwards compatibility, here the node has to answer + * using the same packet type it received for the request. This is due + * to that if a node is not using the 4addr packet format it may not + * support it. + */ + if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) + err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, + BATADV_P_DAT_CACHE_REPLY); + else + err = batadv_unicast_send_skb(bat_priv, skb_new); + + if (!err) { + batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX); + ret = true; + } +out: + if (dat_entry) + batadv_dat_entry_free_ref(dat_entry); + if (primary_if) + batadv_hardif_free_ref(primary_if); + if (ret) + kfree_skb(skb); + return ret; +} + +/** + * batadv_dat_snoop_outgoing_arp_reply - snoop the ARP reply and fill the DHT + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + */ +void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + uint16_t type; + __be32 ip_src, ip_dst; + uint8_t *hw_src, *hw_dst; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + return; + + type = batadv_arp_get_type(bat_priv, skb, 0); + if (type != ARPOP_REPLY) + return; + + batadv_dbg_arp(bat_priv, skb, type, 0, "Parsing outgoing ARP REPLY"); + + hw_src = batadv_arp_hw_src(skb, 0); + ip_src = batadv_arp_ip_src(skb, 0); + hw_dst = batadv_arp_hw_dst(skb, 0); + ip_dst = batadv_arp_ip_dst(skb, 0); + + batadv_dat_entry_add(bat_priv, ip_src, hw_src); + batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); + + /* Send the ARP reply to the candidates for both the IP addresses that + * the node got within the ARP reply + */ + batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); + batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); +} +/** + * batadv_dat_snoop_incoming_arp_reply - snoop the ARP reply and fill the local + * DAT storage only + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + * @hdr_size: siaze of the encapsulation header + */ +bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + uint16_t type; + __be32 ip_src, ip_dst; + uint8_t *hw_src, *hw_dst; + bool ret = false; + + if (!atomic_read(&bat_priv->distributed_arp_table)) + goto out; + + type = batadv_arp_get_type(bat_priv, skb, hdr_size); + if (type != ARPOP_REPLY) + goto out; + + batadv_dbg_arp(bat_priv, skb, type, hdr_size, + "Parsing incoming ARP REPLY"); + + hw_src = batadv_arp_hw_src(skb, hdr_size); + ip_src = batadv_arp_ip_src(skb, hdr_size); + hw_dst = batadv_arp_hw_dst(skb, hdr_size); + ip_dst = batadv_arp_ip_dst(skb, hdr_size); + + /* Update our internal cache with both the IP addresses the node got + * within the ARP reply + */ + batadv_dat_entry_add(bat_priv, ip_src, hw_src); + batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); + + /* if this REPLY is directed to a client of mine, let's deliver the + * packet to the interface + */ + ret = !batadv_is_my_client(bat_priv, hw_dst); +out: + /* if ret == false -> packet has to be delivered to the interface */ + return ret; +} + +/** + * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped + * (because the node has already got the reply via DAT) or not + * @bat_priv: the bat priv with all the soft interface information + * @forw_packet: the broadcast packet + * + * Returns true if the node can drop the packet, false otherwise + */ +bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, + struct batadv_forw_packet *forw_packet) +{ + uint16_t type; + __be32 ip_dst; + struct batadv_dat_entry *dat_entry = NULL; + bool ret = false; + const size_t bcast_len = sizeof(struct batadv_bcast_packet); + + if (!atomic_read(&bat_priv->distributed_arp_table)) + goto out; + + /* If this packet is an ARP_REQUEST and the node already has the + * information that it is going to ask, then the packet can be dropped + */ + if (forw_packet->num_packets) + goto out; + + type = batadv_arp_get_type(bat_priv, forw_packet->skb, bcast_len); + if (type != ARPOP_REQUEST) + goto out; + + ip_dst = batadv_arp_ip_dst(forw_packet->skb, bcast_len); + dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); + /* check if the node already got this entry */ + if (!dat_entry) { + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "ARP Request for %pI4: fallback\n", &ip_dst); + goto out; + } + + batadv_dbg(BATADV_DBG_DAT, bat_priv, + "ARP Request for %pI4: fallback prevented\n", &ip_dst); + ret = true; + +out: + if (dat_entry) + batadv_dat_entry_free_ref(dat_entry); + return ret; +} diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h new file mode 100644 index 00000000000..d060c033e7d --- /dev/null +++ b/net/batman-adv/distributed-arp-table.h @@ -0,0 +1,167 @@ +/* Copyright (C) 2011-2012 B.A.T.M.A.N. contributors: + * + * Antonio Quartulli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _NET_BATMAN_ADV_ARP_H_ +#define _NET_BATMAN_ADV_ARP_H_ + +#ifdef CONFIG_BATMAN_ADV_DAT + +#include "types.h" +#include "originator.h" + +#include <linux/if_arp.h> + +#define BATADV_DAT_ADDR_MAX ((batadv_dat_addr_t)~(batadv_dat_addr_t)0) + +bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb); +bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size); +void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb); +bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size); +bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, + struct batadv_forw_packet *forw_packet); + +/** + * batadv_dat_init_orig_node_addr - assign a DAT address to the orig_node + * @orig_node: the node to assign the DAT address to + */ +static inline void +batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node) +{ + uint32_t addr; + + addr = batadv_choose_orig(orig_node->orig, BATADV_DAT_ADDR_MAX); + orig_node->dat_addr = (batadv_dat_addr_t)addr; +} + +/** + * batadv_dat_init_own_addr - assign a DAT address to the node itself + * @bat_priv: the bat priv with all the soft interface information + * @primary_if: a pointer to the primary interface + */ +static inline void +batadv_dat_init_own_addr(struct batadv_priv *bat_priv, + struct batadv_hard_iface *primary_if) +{ + uint32_t addr; + + addr = batadv_choose_orig(primary_if->net_dev->dev_addr, + BATADV_DAT_ADDR_MAX); + + bat_priv->dat.addr = (batadv_dat_addr_t)addr; +} + +int batadv_dat_init(struct batadv_priv *bat_priv); +void batadv_dat_free(struct batadv_priv *bat_priv); +int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset); + +/** + * batadv_dat_inc_counter - increment the correct DAT packet counter + * @bat_priv: the bat priv with all the soft interface information + * @subtype: the 4addr subtype of the packet to be counted + * + * Updates the ethtool statistics for the received packet if it is a DAT subtype + */ +static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, + uint8_t subtype) +{ + switch (subtype) { + case BATADV_P_DAT_DHT_GET: + batadv_inc_counter(bat_priv, + BATADV_CNT_DAT_GET_RX); + break; + case BATADV_P_DAT_DHT_PUT: + batadv_inc_counter(bat_priv, + BATADV_CNT_DAT_PUT_RX); + break; + } +} + +#else + +static inline bool +batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return false; +} + +static inline bool +batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + return false; +} + +static inline bool +batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return false; +} + +static inline bool +batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, + struct sk_buff *skb, int hdr_size) +{ + return false; +} + +static inline bool +batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, + struct batadv_forw_packet *forw_packet) +{ + return false; +} + +static inline void +batadv_dat_init_orig_node_addr(struct batadv_orig_node *orig_node) +{ +} + +static inline void batadv_dat_init_own_addr(struct batadv_priv *bat_priv, + struct batadv_hard_iface *iface) +{ +} + +static inline void batadv_arp_change_timeout(struct net_device *soft_iface, + const char *name) +{ +} + +static inline int batadv_dat_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_dat_free(struct batadv_priv *bat_priv) +{ +} + +static inline void batadv_dat_inc_counter(struct batadv_priv *bat_priv, + uint8_t subtype) +{ +} + +#endif /* CONFIG_BATMAN_ADV_DAT */ + +#endif /* _NET_BATMAN_ADV_ARP_H_ */ diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 15d67abc10a..dd07c7e3654 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -477,22 +477,11 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) struct batadv_hard_iface *primary_if; struct batadv_gw_node *gw_node; struct hlist_node *node; - int gw_count = 0, ret = 0; + int gw_count = 0; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } - - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); - goto out; - } seq_printf(seq, " %-12s (%s/%i) %17s [%10s]: gw_class ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", @@ -519,7 +508,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index d112fd6750b..6b7a5d3eeb7 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -18,6 +18,7 @@ */ #include "main.h" +#include "distributed-arp-table.h" #include "hard-interface.h" #include "soft-interface.h" #include "send.h" @@ -109,6 +110,8 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv, if (!primary_if) goto out; + batadv_dat_init_own_addr(bat_priv, primary_if); + skb = bat_priv->vis.my_info->skb_packet; vis_packet = (struct batadv_vis_packet *)skb->data; memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN); @@ -450,8 +453,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) /* This can't be called via a bat_priv callback because * we have no bat_priv yet. */ - atomic_set(&hard_iface->seqno, 1); - hard_iface->packet_buff = NULL; + atomic_set(&hard_iface->bat_iv.ogm_seqno, 1); + hard_iface->bat_iv.ogm_buff = NULL; return hard_iface; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index bde3cf74750..87ca8095b01 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -42,12 +42,16 @@ static int batadv_socket_open(struct inode *inode, struct file *file) unsigned int i; struct batadv_socket_client *socket_client; + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + nonseekable_open(inode, file); socket_client = kmalloc(sizeof(*socket_client), GFP_KERNEL); - - if (!socket_client) + if (!socket_client) { + module_put(THIS_MODULE); return -ENOMEM; + } for (i = 0; i < ARRAY_SIZE(batadv_socket_client_hash); i++) { if (!batadv_socket_client_hash[i]) { @@ -59,6 +63,7 @@ static int batadv_socket_open(struct inode *inode, struct file *file) if (i == ARRAY_SIZE(batadv_socket_client_hash)) { pr_err("Error - can't add another packet client: maximum number of clients reached\n"); kfree(socket_client); + module_put(THIS_MODULE); return -EXFULL; } @@ -71,7 +76,6 @@ static int batadv_socket_open(struct inode *inode, struct file *file) file->private_data = socket_client; - batadv_inc_module_count(); return 0; } @@ -96,7 +100,7 @@ static int batadv_socket_release(struct inode *inode, struct file *file) spin_unlock_bh(&socket_client->lock); kfree(socket_client); - batadv_dec_module_count(); + module_put(THIS_MODULE); return 0; } @@ -173,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (len >= sizeof(struct batadv_icmp_packet_rr)) packet_len = sizeof(struct batadv_icmp_packet_rr); - skb = dev_alloc_skb(packet_len + ETH_HLEN); + skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN); if (!skb) { len = -ENOMEM; goto out; } - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); if (copy_from_user(icmp_packet, buff, packet_len)) { diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index b4aa470bc4a..dc33a0c484a 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -29,6 +29,7 @@ #include "hard-interface.h" #include "gateway_client.h" #include "bridge_loop_avoidance.h" +#include "distributed-arp-table.h" #include "vis.h" #include "hash.h" #include "bat_algo.h" @@ -128,6 +129,10 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; + ret = batadv_dat_init(bat_priv); + if (ret < 0) + goto err; + atomic_set(&bat_priv->gw.reselect, 0); atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); @@ -155,21 +160,13 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_bla_free(bat_priv); + batadv_dat_free(bat_priv); + free_percpu(bat_priv->bat_counters); atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); } -void batadv_inc_module_count(void) -{ - try_module_get(THIS_MODULE); -} - -void batadv_dec_module_count(void) -{ - module_put(THIS_MODULE); -} - int batadv_is_my_mac(const uint8_t *addr) { const struct batadv_hard_iface *hard_iface; @@ -188,6 +185,42 @@ int batadv_is_my_mac(const uint8_t *addr) return 0; } +/** + * batadv_seq_print_text_primary_if_get - called from debugfs table printing + * function that requires the primary interface + * @seq: debugfs table seq_file struct + * + * Returns primary interface if found or NULL otherwise. + */ +struct batadv_hard_iface * +batadv_seq_print_text_primary_if_get(struct seq_file *seq) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hard_iface *primary_if; + + primary_if = batadv_primary_if_get_selected(bat_priv); + + if (!primary_if) { + seq_printf(seq, + "BATMAN mesh %s disabled - please specify interfaces to enable it\n", + net_dev->name); + goto out; + } + + if (primary_if->if_status == BATADV_IF_ACTIVE) + goto out; + + seq_printf(seq, + "BATMAN mesh %s disabled - primary interface not active\n", + net_dev->name); + batadv_hardif_free_ref(primary_if); + primary_if = NULL; + +out: + return primary_if; +} + static int batadv_recv_unhandled_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { @@ -274,6 +307,8 @@ static void batadv_recv_handler_init(void) /* batman icmp packet */ batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet; + /* unicast with 4 addresses packet */ + batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet; /* unicast packet */ batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet; /* fragmented unicast packet */ diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index d57b746219d..240c74ffeb9 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -44,6 +44,7 @@ #define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */ #define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */ #define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */ +#define BATADV_DAT_ENTRY_TIMEOUT (5*60000) /* 5 mins in milliseconds */ /* sliding packet range of received originator messages in sequence numbers * (should be a multiple of our word size) */ @@ -73,6 +74,11 @@ #define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ +/* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ +#define ARP_REQ_DELAY 250 +/* numbers of originator to contact for any PUT/GET DHT operation */ +#define BATADV_DAT_CANDIDATES_NUM 3 + #define BATADV_VIS_INTERVAL 5000 /* 5 seconds */ /* how much worse secondary interfaces may be to be considered as bonding @@ -117,6 +123,9 @@ enum batadv_uev_type { #define BATADV_GW_THRESHOLD 50 +#define BATADV_DAT_CANDIDATE_NOT_FOUND 0 +#define BATADV_DAT_CANDIDATE_ORIG 1 + /* Debug Messages */ #ifdef pr_fmt #undef pr_fmt @@ -150,9 +159,9 @@ extern struct workqueue_struct *batadv_event_workqueue; int batadv_mesh_init(struct net_device *soft_iface); void batadv_mesh_free(struct net_device *soft_iface); -void batadv_inc_module_count(void); -void batadv_dec_module_count(void); int batadv_is_my_mac(const uint8_t *addr); +struct batadv_hard_iface * +batadv_seq_print_text_primary_if_get(struct seq_file *seq); int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev); @@ -165,13 +174,22 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); int batadv_algo_select(struct batadv_priv *bat_priv, char *name); int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); -/* all messages related to routing / flooding / broadcasting / etc */ +/** + * enum batadv_dbg_level - available log levels + * @BATADV_DBG_BATMAN: OGM and TQ computations related messages + * @BATADV_DBG_ROUTES: route added / changed / deleted + * @BATADV_DBG_TT: translation table messages + * @BATADV_DBG_BLA: bridge loop avoidance messages + * @BATADV_DBG_DAT: ARP snooping and DAT related messages + * @BATADV_DBG_ALL: the union of all the above log levels + */ enum batadv_dbg_level { BATADV_DBG_BATMAN = BIT(0), - BATADV_DBG_ROUTES = BIT(1), /* route added / changed / deleted */ - BATADV_DBG_TT = BIT(2), /* translation table operations */ - BATADV_DBG_BLA = BIT(3), /* bridge loop avoidance */ - BATADV_DBG_ALL = 15, + BATADV_DBG_ROUTES = BIT(1), + BATADV_DBG_TT = BIT(2), + BATADV_DBG_BLA = BIT(3), + BATADV_DBG_DAT = BIT(4), + BATADV_DBG_ALL = 31, }; #ifdef CONFIG_BATMAN_ADV_DEBUG diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ac9bdf8f80a..84930a4f536 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -18,6 +18,7 @@ */ #include "main.h" +#include "distributed-arp-table.h" #include "originator.h" #include "hash.h" #include "translation-table.h" @@ -223,6 +224,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, orig_node->tt_poss_change = false; orig_node->bat_priv = bat_priv; memcpy(orig_node->orig, addr, ETH_ALEN); + batadv_dat_init_orig_node_addr(orig_node); orig_node->router = NULL; orig_node->tt_crc = 0; atomic_set(&orig_node->last_ttvn, 0); @@ -415,23 +417,10 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) int last_seen_msecs; unsigned long last_seen_jiffies; uint32_t i; - int ret = 0; - primary_if = batadv_primary_if_get_selected(bat_priv); - - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, @@ -485,7 +474,7 @@ next: out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } static int batadv_orig_node_add_if(struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 2d23a14c220..df548ed196d 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -23,14 +23,29 @@ #define BATADV_ETH_P_BATMAN 0x4305 /* unofficial/not registered Ethertype */ enum batadv_packettype { - BATADV_IV_OGM = 0x01, - BATADV_ICMP = 0x02, - BATADV_UNICAST = 0x03, - BATADV_BCAST = 0x04, - BATADV_VIS = 0x05, - BATADV_UNICAST_FRAG = 0x06, - BATADV_TT_QUERY = 0x07, - BATADV_ROAM_ADV = 0x08, + BATADV_IV_OGM = 0x01, + BATADV_ICMP = 0x02, + BATADV_UNICAST = 0x03, + BATADV_BCAST = 0x04, + BATADV_VIS = 0x05, + BATADV_UNICAST_FRAG = 0x06, + BATADV_TT_QUERY = 0x07, + BATADV_ROAM_ADV = 0x08, + BATADV_UNICAST_4ADDR = 0x09, +}; + +/** + * enum batadv_subtype - packet subtype for unicast4addr + * @BATADV_P_DATA: user payload + * @BATADV_P_DAT_DHT_GET: DHT request message + * @BATADV_P_DAT_DHT_PUT: DHT store message + * @BATADV_P_DAT_CACHE_REPLY: ARP reply generated by DAT + */ +enum batadv_subtype { + BATADV_P_DATA = 0x01, + BATADV_P_DAT_DHT_GET = 0x02, + BATADV_P_DAT_DHT_PUT = 0x03, + BATADV_P_DAT_CACHE_REPLY = 0x04, }; /* this file is included by batctl which needs these defines */ @@ -106,13 +121,16 @@ struct batadv_bla_claim_dst { uint8_t magic[3]; /* FF:43:05 */ uint8_t type; /* bla_claimframe */ __be16 group; /* group id */ -} __packed; +}; struct batadv_header { uint8_t packet_type; uint8_t version; /* batman version field */ uint8_t ttl; -} __packed; + /* the parent struct has to add a byte after the header to make + * everything 4 bytes aligned again + */ +}; struct batadv_ogm_packet { struct batadv_header header; @@ -137,7 +155,7 @@ struct batadv_icmp_packet { __be16 seqno; uint8_t uid; uint8_t reserved; -} __packed; +}; #define BATADV_RR_LEN 16 @@ -153,13 +171,32 @@ struct batadv_icmp_packet_rr { uint8_t uid; uint8_t rr_cur; uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; -} __packed; +}; struct batadv_unicast_packet { struct batadv_header header; uint8_t ttvn; /* destination translation table version number */ uint8_t dest[ETH_ALEN]; -} __packed; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ +}; + +/** + * struct batadv_unicast_4addr_packet - extended unicast packet + * @u: common unicast packet header + * @src: address of the source + * @subtype: packet subtype + */ +struct batadv_unicast_4addr_packet { + struct batadv_unicast_packet u; + uint8_t src[ETH_ALEN]; + uint8_t subtype; + uint8_t reserved; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ +}; struct batadv_unicast_frag_packet { struct batadv_header header; @@ -176,6 +213,9 @@ struct batadv_bcast_packet { uint8_t reserved; __be32 seqno; uint8_t orig[ETH_ALEN]; + /* "4 bytes boundary + 2 bytes" long to make the payload after the + * following ethernet header again 4 bytes boundary aligned + */ } __packed; struct batadv_vis_packet { @@ -187,7 +227,7 @@ struct batadv_vis_packet { uint8_t vis_orig[ETH_ALEN]; /* originator reporting its neighbors */ uint8_t target_orig[ETH_ALEN]; /* who should receive this packet */ uint8_t sender_orig[ETH_ALEN]; /* who sent or forwarded this packet */ -} __packed; +}; struct batadv_tt_query_packet { struct batadv_header header; diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 376b4cc6ca8..32aa4d460e1 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -28,6 +28,7 @@ #include "vis.h" #include "unicast.h" #include "bridge_loop_avoidance.h" +#include "distributed-arp-table.h" static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -549,25 +550,18 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, if (tmp_neigh_node->if_incoming == recv_if) continue; - if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + if (router && tmp_neigh_node->tq_avg <= router->tq_avg) continue; - /* if we don't have a router yet - * or this one is better, choose it. - */ - if ((!router) || - (tmp_neigh_node->tq_avg > router->tq_avg)) { - /* decrement refcount of - * previously selected router - */ - if (router) - batadv_neigh_node_free_ref(router); + if (!atomic_inc_not_zero(&tmp_neigh_node->refcount)) + continue; - router = tmp_neigh_node; - atomic_inc_not_zero(&router->refcount); - } + /* decrement refcount of previously selected router */ + if (router) + batadv_neigh_node_free_ref(router); - batadv_neigh_node_free_ref(tmp_neigh_node); + /* we found a better router (or at least one valid router) */ + router = tmp_neigh_node; } /* use the first candidate if nothing was found. */ @@ -687,21 +681,8 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if) struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_roam_adv_packet *roam_adv_packet; struct batadv_orig_node *orig_node; - struct ethhdr *ethhdr; - /* drop packet if it has not necessary minimum size */ - if (unlikely(!pskb_may_pull(skb, - sizeof(struct batadv_roam_adv_packet)))) - goto out; - - ethhdr = (struct ethhdr *)skb_mac_header(skb); - - /* packet with unicast indication but broadcast recipient */ - if (is_broadcast_ether_addr(ethhdr->h_dest)) - goto out; - - /* packet with broadcast sender address */ - if (is_broadcast_ether_addr(ethhdr->h_source)) + if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0) goto out; batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); @@ -928,8 +909,12 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, bool tt_poss_change; int is_old_ttvn; - /* I could need to modify it */ - if (skb_cow(skb, sizeof(struct batadv_unicast_packet)) < 0) + /* check if there is enough data before accessing it */ + if (pskb_may_pull(skb, sizeof(*unicast_packet) + ETH_HLEN) < 0) + return 0; + + /* create a copy of the skb (in case of for re-routing) to modify it. */ + if (skb_cow(skb, sizeof(*unicast_packet)) < 0) return 0; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -985,10 +970,10 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, batadv_orig_node_free_ref(orig_node); } - batadv_dbg(BATADV_DBG_ROUTES, bat_priv, - "TTVN mismatch (old_ttvn %u new_ttvn %u)! Rerouting unicast packet (for %pM) to %pM\n", - unicast_packet->ttvn, curr_ttvn, ethhdr->h_dest, - unicast_packet->dest); + net_ratelimited_function(batadv_dbg, BATADV_DBG_TT, bat_priv, + "TTVN mismatch (old_ttvn %u new_ttvn %u)! Rerouting unicast packet (for %pM) to %pM\n", + unicast_packet->ttvn, curr_ttvn, + ethhdr->h_dest, unicast_packet->dest); unicast_packet->ttvn = curr_ttvn; } @@ -1000,7 +985,19 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, { struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_unicast_packet *unicast_packet; + struct batadv_unicast_4addr_packet *unicast_4addr_packet; + uint8_t *orig_addr; + struct batadv_orig_node *orig_node = NULL; int hdr_size = sizeof(*unicast_packet); + bool is4addr; + + unicast_packet = (struct batadv_unicast_packet *)skb->data; + unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + + is4addr = unicast_packet->header.packet_type == BATADV_UNICAST_4ADDR; + /* the caller function should have already pulled 2 bytes */ + if (is4addr) + hdr_size = sizeof(*unicast_4addr_packet); if (batadv_check_unicast_packet(skb, hdr_size) < 0) return NET_RX_DROP; @@ -1008,12 +1005,28 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (!batadv_check_unicast_ttvn(bat_priv, skb)) return NET_RX_DROP; - unicast_packet = (struct batadv_unicast_packet *)skb->data; - /* packet for me */ if (batadv_is_my_mac(unicast_packet->dest)) { + if (is4addr) { + batadv_dat_inc_counter(bat_priv, + unicast_4addr_packet->subtype); + orig_addr = unicast_4addr_packet->src; + orig_node = batadv_orig_hash_find(bat_priv, orig_addr); + } + + if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, + hdr_size)) + goto rx_success; + if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb, + hdr_size)) + goto rx_success; + batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, - NULL); + orig_node); + +rx_success: + if (orig_node) + batadv_orig_node_free_ref(orig_node); return NET_RX_SUCCESS; } @@ -1050,8 +1063,17 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb, if (!new_skb) return NET_RX_SUCCESS; + if (batadv_dat_snoop_incoming_arp_request(bat_priv, new_skb, + hdr_size)) + goto rx_success; + if (batadv_dat_snoop_incoming_arp_reply(bat_priv, new_skb, + hdr_size)) + goto rx_success; + batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if, sizeof(struct batadv_unicast_packet), NULL); + +rx_success: return NET_RX_SUCCESS; } @@ -1143,9 +1165,16 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size)) goto out; + if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size)) + goto rx_success; + if (batadv_dat_snoop_incoming_arp_reply(bat_priv, skb, hdr_size)) + goto rx_success; + /* broadcast for me */ batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size, orig_node); + +rx_success: ret = NET_RX_SUCCESS; goto out; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 570a8bce036..660d9bf7d21 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -18,6 +18,7 @@ */ #include "main.h" +#include "distributed-arp-table.h" #include "send.h" #include "routing.h" #include "translation-table.h" @@ -209,6 +210,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; + if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) + goto out; + /* rebroadcast packet */ rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index b9a28d2dd3e..c283d87c4cc 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -20,6 +20,7 @@ #include "main.h" #include "soft-interface.h" #include "hard-interface.h" +#include "distributed-arp-table.h" #include "routing.h" #include "send.h" #include "debugfs.h" @@ -146,13 +147,16 @@ static int batadv_interface_tx(struct sk_buff *skb, struct batadv_bcast_packet *bcast_packet; struct vlan_ethhdr *vhdr; __be16 ethertype = __constant_htons(BATADV_ETH_P_BATMAN); - static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, - 0x00}; + static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, + 0x00, 0x00}; + static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, + 0x00, 0x00}; unsigned int header_len = 0; int data_len = skb->len, ret; short vid __maybe_unused = -1; bool do_bcast = false; uint32_t seqno; + unsigned long brd_delay = 1; if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@ -180,10 +184,16 @@ static int batadv_interface_tx(struct sk_buff *skb, /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... + * + * The same goes for ECTP sent at least by some Cisco Switches, + * it might confuse the mesh when used with bridge loop avoidance. */ if (batadv_compare_eth(ethhdr->h_dest, stp_addr)) goto dropped; + if (batadv_compare_eth(ethhdr->h_dest, ectp_addr)) + goto dropped; + if (is_multicast_ether_addr(ethhdr->h_dest)) { do_bcast = true; @@ -216,6 +226,13 @@ static int batadv_interface_tx(struct sk_buff *skb, if (!primary_if) goto dropped; + /* in case of ARP request, we do not immediately broadcasti the + * packet, instead we first wait for DAT to try to retrieve the + * correct ARP entry + */ + if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) + brd_delay = msecs_to_jiffies(ARP_REQ_DELAY); + if (batadv_skb_head_push(skb, sizeof(*bcast_packet)) < 0) goto dropped; @@ -237,7 +254,7 @@ static int batadv_interface_tx(struct sk_buff *skb, seqno = atomic_inc_return(&bat_priv->bcast_seqno); bcast_packet->seqno = htonl(seqno); - batadv_add_bcast_packet_to_list(bat_priv, skb, 1); + batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay); /* a copy is stored in the bcast list, therefore removing * the original skb. @@ -252,7 +269,12 @@ static int batadv_interface_tx(struct sk_buff *skb, goto dropped; } - ret = batadv_unicast_send_skb(skb, bat_priv); + if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) + goto dropped; + + batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); + + ret = batadv_unicast_send_skb(bat_priv, skb); if (ret != 0) goto dropped_freed; } @@ -347,7 +369,51 @@ out: return; } +/* batman-adv network devices have devices nesting below it and are a special + * "super class" of normal network devices; split their locks off into a + * separate class since they always nest. + */ +static struct lock_class_key batadv_netdev_xmit_lock_key; +static struct lock_class_key batadv_netdev_addr_lock_key; + +/** + * batadv_set_lockdep_class_one - Set lockdep class for a single tx queue + * @dev: device which owns the tx queue + * @txq: tx queue to modify + * @_unused: always NULL + */ +static void batadv_set_lockdep_class_one(struct net_device *dev, + struct netdev_queue *txq, + void *_unused) +{ + lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key); +} + +/** + * batadv_set_lockdep_class - Set txq and addr_list lockdep class + * @dev: network device to modify + */ +static void batadv_set_lockdep_class(struct net_device *dev) +{ + lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key); + netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); +} + +/** + * batadv_softif_init - Late stage initialization of soft interface + * @dev: registered network device to modify + * + * Returns error code on failures + */ +static int batadv_softif_init(struct net_device *dev) +{ + batadv_set_lockdep_class(dev); + + return 0; +} + static const struct net_device_ops batadv_netdev_ops = { + .ndo_init = batadv_softif_init, .ndo_open = batadv_interface_open, .ndo_stop = batadv_interface_release, .ndo_get_stats = batadv_interface_stats, @@ -414,6 +480,9 @@ struct net_device *batadv_softif_create(const char *name) atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); atomic_set(&bat_priv->bridge_loop_avoidance, 0); +#ifdef CONFIG_BATMAN_ADV_DAT + atomic_set(&bat_priv->distributed_arp_table, 1); +#endif atomic_set(&bat_priv->ap_isolation, 0); atomic_set(&bat_priv->vis_mode, BATADV_VIS_TYPE_CLIENT_UPDATE); atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF); @@ -556,6 +625,13 @@ static const struct { { "tt_response_rx" }, { "tt_roam_adv_tx" }, { "tt_roam_adv_rx" }, +#ifdef CONFIG_BATMAN_ADV_DAT + { "dat_get_tx" }, + { "dat_get_rx" }, + { "dat_put_tx" }, + { "dat_put_rx" }, + { "dat_cached_reply_tx" }, +#endif }; static void batadv_get_strings(struct net_device *dev, uint32_t stringset, diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 66518c75c21..fa3cc1af091 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -20,6 +20,7 @@ #include "main.h" #include "sysfs.h" #include "translation-table.h" +#include "distributed-arp-table.h" #include "originator.h" #include "hard-interface.h" #include "gateway_common.h" @@ -122,55 +123,6 @@ ssize_t batadv_show_##_name(struct kobject *kobj, \ batadv_store_##_name) -#define BATADV_ATTR_HIF_STORE_UINT(_name, _min, _max, _post_func) \ -ssize_t batadv_store_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff, \ - size_t count) \ -{ \ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ - struct batadv_hard_iface *hard_iface; \ - ssize_t length; \ - \ - hard_iface = batadv_hardif_get_by_netdev(net_dev); \ - if (!hard_iface) \ - return 0; \ - \ - length = __batadv_store_uint_attr(buff, count, _min, _max, \ - _post_func, attr, \ - &hard_iface->_name, net_dev); \ - \ - batadv_hardif_free_ref(hard_iface); \ - return length; \ -} - -#define BATADV_ATTR_HIF_SHOW_UINT(_name) \ -ssize_t batadv_show_##_name(struct kobject *kobj, \ - struct attribute *attr, char *buff) \ -{ \ - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); \ - struct batadv_hard_iface *hard_iface; \ - ssize_t length; \ - \ - hard_iface = batadv_hardif_get_by_netdev(net_dev); \ - if (!hard_iface) \ - return 0; \ - \ - length = sprintf(buff, "%i\n", atomic_read(&hard_iface->_name));\ - \ - batadv_hardif_free_ref(hard_iface); \ - return length; \ -} - -/* Use this, if you are going to set [name] in hard_iface to an - * unsigned integer value - */ -#define BATADV_ATTR_HIF_UINT(_name, _mode, _min, _max, _post_func) \ - static BATADV_ATTR_HIF_STORE_UINT(_name, _min, _max, _post_func)\ - static BATADV_ATTR_HIF_SHOW_UINT(_name) \ - static BATADV_ATTR(_name, _mode, batadv_show_##_name, \ - batadv_store_##_name) - - static int batadv_store_bool_attr(char *buff, size_t count, struct net_device *net_dev, const char *attr_name, atomic_t *attr) @@ -469,6 +421,9 @@ BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL); #ifdef CONFIG_BATMAN_ADV_BLA BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR, NULL); #endif +#ifdef CONFIG_BATMAN_ADV_DAT +BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR, NULL); +#endif BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu); BATADV_ATTR_SIF_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL); static BATADV_ATTR(vis_mode, S_IRUGO | S_IWUSR, batadv_show_vis_mode, @@ -494,6 +449,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { #ifdef CONFIG_BATMAN_ADV_BLA &batadv_attr_bridge_loop_avoidance, #endif +#ifdef CONFIG_BATMAN_ADV_DAT + &batadv_attr_distributed_arp_table, +#endif &batadv_attr_fragmentation, &batadv_attr_ap_isolation, &batadv_attr_vis_mode, diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 112edd371b2..f8b9c32c29a 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -434,22 +434,10 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct hlist_node *node; struct hlist_head *head; uint32_t i; - int ret = 0; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } seq_printf(seq, "Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n", @@ -479,7 +467,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } static void @@ -501,24 +489,39 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, tt_local_entry->common.addr, message); } -void batadv_tt_local_remove(struct batadv_priv *bat_priv, const uint8_t *addr, - const char *message, bool roaming) +/** + * batadv_tt_local_remove - logically remove an entry from the local table + * @bat_priv: the bat priv with all the soft interface information + * @addr: the MAC address of the client to remove + * @message: message to append to the log on deletion + * @roaming: true if the deletion is due to a roaming event + * + * Returns the flags assigned to the local entry before being deleted + */ +uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, + const uint8_t *addr, const char *message, + bool roaming) { struct batadv_tt_local_entry *tt_local_entry = NULL; - uint16_t flags; + uint16_t flags, curr_flags = BATADV_NO_FLAGS; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr); if (!tt_local_entry) goto out; + curr_flags = tt_local_entry->common.flags; + flags = BATADV_TT_CLIENT_DEL; if (roaming) flags |= BATADV_TT_CLIENT_ROAM; batadv_tt_local_set_pending(bat_priv, tt_local_entry, flags, message); + out: if (tt_local_entry) batadv_tt_local_entry_free_ref(tt_local_entry); + + return curr_flags; } static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, @@ -725,6 +728,7 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, int ret = 0; int hash_added; struct batadv_tt_common_entry *common; + uint16_t local_flags; tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr); @@ -738,6 +742,12 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, common->flags = flags; tt_global_entry->roam_at = 0; + /* node must store current time in case of roaming. This is + * needed to purge this entry out on timeout (if nobody claims + * it) + */ + if (flags & BATADV_TT_CLIENT_ROAM) + tt_global_entry->roam_at = jiffies; atomic_set(&common->refcount, 2); common->added_at = jiffies; @@ -788,13 +798,16 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (via %pM)\n", tt_global_entry->common.addr, orig_node->orig); + ret = 1; out_remove: + /* remove address from local hash if present */ - batadv_tt_local_remove(bat_priv, tt_global_entry->common.addr, - "global tt received", - flags & BATADV_TT_CLIENT_ROAM); - ret = 1; + local_flags = batadv_tt_local_remove(bat_priv, tt_addr, + "global tt received", + flags & BATADV_TT_CLIENT_ROAM); + tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI; + out: if (tt_global_entry) batadv_tt_global_entry_free_ref(tt_global_entry); @@ -842,22 +855,10 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) struct hlist_node *node; struct hlist_head *head; uint32_t i; - int ret = 0; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - please specify interfaces to enable it\n", - net_dev->name); - goto out; - } - if (primary_if->if_status != BATADV_IF_ACTIVE) { - ret = seq_printf(seq, - "BATMAN mesh %s disabled - primary interface not active\n", - net_dev->name); + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) goto out; - } seq_printf(seq, "Globally announced TT entries received via the mesh %s\n", @@ -881,7 +882,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset) out: if (primary_if) batadv_hardif_free_ref(primary_if); - return ret; + return 0; } /* deletes the orig list of a tt_global_entry */ @@ -1471,11 +1472,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = tt_query_size + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN); + skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); tt_response->ttvn = ttvn; @@ -1537,11 +1538,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN); + skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); tt_req_len = sizeof(*tt_request); tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); @@ -1652,11 +1653,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN); + skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1779,11 +1780,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN); + skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -2117,11 +2118,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, if (!batadv_tt_check_roam_count(bat_priv, client)) goto out; - skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN); + skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN); + skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); @@ -2438,7 +2439,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, if (!tt_global_entry) goto out; - ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM; + ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM); batadv_tt_global_entry_free_ref(tt_global_entry); out: return ret; diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 811fffd4760..9fa4fe41c86 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -24,9 +24,9 @@ int batadv_tt_len(int changes_num); int batadv_tt_init(struct batadv_priv *bat_priv); void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, int ifindex); -void batadv_tt_local_remove(struct batadv_priv *bat_priv, - const uint8_t *addr, const char *message, - bool roaming); +uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, + const uint8_t *addr, const char *message, + bool roaming); int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_add_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ac1e07a8045..8ce16c1cbaf 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -28,20 +28,41 @@ (ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \ sizeof(struct batadv_bcast_packet))) +#ifdef CONFIG_BATMAN_ADV_DAT + +/* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed, + * BATADV_DAT_ADDR_MAX is changed as well. + * + * *Please be careful: batadv_dat_addr_t must be UNSIGNED* + */ +#define batadv_dat_addr_t uint16_t + +#endif /* CONFIG_BATMAN_ADV_DAT */ + +/** + * struct batadv_hard_iface_bat_iv - per hard interface B.A.T.M.A.N. IV data + * @ogm_buff: buffer holding the OGM packet + * @ogm_buff_len: length of the OGM packet buffer + * @ogm_seqno: OGM sequence number - used to identify each OGM + */ +struct batadv_hard_iface_bat_iv { + unsigned char *ogm_buff; + int ogm_buff_len; + atomic_t ogm_seqno; +}; + struct batadv_hard_iface { struct list_head list; int16_t if_num; char if_status; struct net_device *net_dev; - atomic_t seqno; atomic_t frag_seqno; - unsigned char *packet_buff; - int packet_len; struct kobject *hardif_obj; atomic_t refcount; struct packet_type batman_adv_ptype; struct net_device *soft_iface; struct rcu_head rcu; + struct batadv_hard_iface_bat_iv bat_iv; }; /** @@ -63,6 +84,9 @@ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; uint8_t primary_addr[ETH_ALEN]; struct batadv_neigh_node __rcu *router; /* rcu protected pointer */ +#ifdef CONFIG_BATMAN_ADV_DAT + batadv_dat_addr_t dat_addr; +#endif unsigned long *bcast_own; uint8_t *bcast_own_sum; unsigned long last_seen; @@ -162,6 +186,13 @@ enum batadv_counters { BATADV_CNT_TT_RESPONSE_RX, BATADV_CNT_TT_ROAM_ADV_TX, BATADV_CNT_TT_ROAM_ADV_RX, +#ifdef CONFIG_BATMAN_ADV_DAT + BATADV_CNT_DAT_GET_TX, + BATADV_CNT_DAT_GET_RX, + BATADV_CNT_DAT_PUT_TX, + BATADV_CNT_DAT_PUT_RX, + BATADV_CNT_DAT_CACHED_REPLY_TX, +#endif BATADV_CNT_NUM, }; @@ -228,6 +259,20 @@ struct batadv_priv_vis { struct batadv_vis_info *my_info; }; +/** + * struct batadv_priv_dat - per mesh interface DAT private data + * @addr: node DAT address + * @hash: hashtable representing the local ARP cache + * @work: work queue callback item for cache purging + */ +#ifdef CONFIG_BATMAN_ADV_DAT +struct batadv_priv_dat { + batadv_dat_addr_t addr; + struct batadv_hashtable *hash; + struct delayed_work work; +}; +#endif + struct batadv_priv { atomic_t mesh_state; struct net_device_stats stats; @@ -237,6 +282,9 @@ struct batadv_priv { atomic_t fragmentation; /* boolean */ atomic_t ap_isolation; /* boolean */ atomic_t bridge_loop_avoidance; /* boolean */ +#ifdef CONFIG_BATMAN_ADV_DAT + atomic_t distributed_arp_table; /* boolean */ +#endif atomic_t vis_mode; /* VIS_TYPE_* */ atomic_t gw_mode; /* GW_MODE_* */ atomic_t gw_sel_class; /* uint */ @@ -265,6 +313,9 @@ struct batadv_priv { struct batadv_priv_gw gw; struct batadv_priv_tt tt; struct batadv_priv_vis vis; +#ifdef CONFIG_BATMAN_ADV_DAT + struct batadv_priv_dat dat; +#endif }; struct batadv_socket_client { @@ -437,4 +488,36 @@ struct batadv_algo_ops { void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); }; +/** + * struct batadv_dat_entry - it is a single entry of batman-adv ARP backend. It + * is used to stored ARP entries needed for the global DAT cache + * @ip: the IPv4 corresponding to this DAT/ARP entry + * @mac_addr: the MAC address associated to the stored IPv4 + * @last_update: time in jiffies when this entry was refreshed last time + * @hash_entry: hlist node for batadv_priv_dat::hash + * @refcount: number of contexts the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +struct batadv_dat_entry { + __be32 ip; + uint8_t mac_addr[ETH_ALEN]; + unsigned long last_update; + struct hlist_node hash_entry; + atomic_t refcount; + struct rcu_head rcu; +}; + +/** + * struct batadv_dat_candidate - candidate destination for DAT operations + * @type: the type of the selected candidate. It can one of the following: + * - BATADV_DAT_CANDIDATE_NOT_FOUND + * - BATADV_DAT_CANDIDATE_ORIG + * @orig_node: if type is BATADV_DAT_CANDIDATE_ORIG this field points to the + * corresponding originator node structure + */ +struct batadv_dat_candidate { + int type; + struct batadv_orig_node *orig_node; +}; + #endif /* _NET_BATMAN_ADV_TYPES_H_ */ diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index f39723281ca..c9a1f6523c3 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -291,7 +291,111 @@ out: return ret; } -int batadv_unicast_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv) +/** + * batadv_unicast_push_and_fill_skb - extends the buffer and initializes the + * common fields for unicast packets + * @skb: packet + * @hdr_size: amount of bytes to push at the beginning of the skb + * @orig_node: the destination node + * + * Returns false if the buffer extension was not possible or true otherwise + */ +static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size, + struct batadv_orig_node *orig_node) +{ + struct batadv_unicast_packet *unicast_packet; + uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); + + if (batadv_skb_head_push(skb, hdr_size) < 0) + return false; + + unicast_packet = (struct batadv_unicast_packet *)skb->data; + unicast_packet->header.version = BATADV_COMPAT_VERSION; + /* batman packet type: unicast */ + unicast_packet->header.packet_type = BATADV_UNICAST; + /* set unicast ttl */ + unicast_packet->header.ttl = BATADV_TTL; + /* copy the destination for faster routing */ + memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); + /* set the destination tt version number */ + unicast_packet->ttvn = ttvn; + + return true; +} + +/** + * batadv_unicast_prepare_skb - encapsulate an skb with a unicast header + * @skb: the skb containing the payload to encapsulate + * @orig_node: the destination node + * + * Returns false if the payload could not be encapsulated or true otherwise + */ +static bool batadv_unicast_prepare_skb(struct sk_buff *skb, + struct batadv_orig_node *orig_node) +{ + size_t uni_size = sizeof(struct batadv_unicast_packet); + return batadv_unicast_push_and_fill_skb(skb, uni_size, orig_node); +} + +/** + * batadv_unicast_4addr_prepare_skb - encapsulate an skb with a unicast4addr + * header + * @bat_priv: the bat priv with all the soft interface information + * @skb: the skb containing the payload to encapsulate + * @orig_node: the destination node + * @packet_subtype: the batman 4addr packet subtype to use + * + * Returns false if the payload could not be encapsulated or true otherwise + */ +bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, + struct batadv_orig_node *orig, + int packet_subtype) +{ + struct batadv_hard_iface *primary_if; + struct batadv_unicast_4addr_packet *unicast_4addr_packet; + bool ret = false; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if) + goto out; + + /* pull the header space and fill the unicast_packet substructure. + * We can do that because the first member of the unicast_4addr_packet + * is of type struct unicast_packet + */ + if (!batadv_unicast_push_and_fill_skb(skb, + sizeof(*unicast_4addr_packet), + orig)) + goto out; + + unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; + unicast_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR; + memcpy(unicast_4addr_packet->src, primary_if->net_dev->dev_addr, + ETH_ALEN); + unicast_4addr_packet->subtype = packet_subtype; + unicast_4addr_packet->reserved = 0; + + ret = true; +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return ret; +} + +/** + * batadv_unicast_generic_send_skb - send an skb as unicast + * @bat_priv: the bat priv with all the soft interface information + * @skb: payload to send + * @packet_type: the batman unicast packet type to use + * @packet_subtype: the batman packet subtype. It is ignored if packet_type is + * not BATADV_UNICAT_4ADDR + * + * Returns 1 in case of error or 0 otherwise + */ +int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype) { struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_unicast_packet *unicast_packet; @@ -324,21 +428,23 @@ find_router: if (!neigh_node) goto out; - if (batadv_skb_head_push(skb, sizeof(*unicast_packet)) < 0) + switch (packet_type) { + case BATADV_UNICAST: + batadv_unicast_prepare_skb(skb, orig_node); + break; + case BATADV_UNICAST_4ADDR: + batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node, + packet_subtype); + break; + default: + /* this function supports UNICAST and UNICAST_4ADDR only. It + * should never be invoked with any other packet type + */ goto out; + } unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_packet->header.version = BATADV_COMPAT_VERSION; - /* batman packet type: unicast */ - unicast_packet->header.packet_type = BATADV_UNICAST; - /* set unicast ttl */ - unicast_packet->header.ttl = BATADV_TTL; - /* copy the destination for faster routing */ - memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN); - /* set the destination tt version number */ - unicast_packet->ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn); - /* inform the destination node that we are still missing a correct route * for this client. The destination will receive this packet and will * try to reroute it because the ttvn contained in the header is less @@ -348,7 +454,9 @@ find_router: unicast_packet->ttvn = unicast_packet->ttvn - 1; dev_mtu = neigh_node->if_incoming->net_dev->mtu; - if (atomic_read(&bat_priv->fragmentation) && + /* fragmentation mechanism only works for UNICAST (now) */ + if (packet_type == BATADV_UNICAST && + atomic_read(&bat_priv->fragmentation) && data_len + sizeof(*unicast_packet) > dev_mtu) { /* send frag skb decreases ttl */ unicast_packet->header.ttl++; @@ -360,7 +468,6 @@ find_router: batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = 0; - goto out; out: if (neigh_node) diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h index 1c46e2eb1ef..61abba58bd8 100644 --- a/net/batman-adv/unicast.h +++ b/net/batman-adv/unicast.h @@ -29,10 +29,44 @@ int batadv_frag_reassemble_skb(struct sk_buff *skb, struct batadv_priv *bat_priv, struct sk_buff **new_skb); void batadv_frag_list_free(struct list_head *head); -int batadv_unicast_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv); int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv, struct batadv_hard_iface *hard_iface, const uint8_t dstaddr[]); +bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, + struct batadv_orig_node *orig_node, + int packet_subtype); +int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, int packet_type, + int packet_subtype); + + +/** + * batadv_unicast_send_skb - send the skb encapsulated in a unicast packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the payload to send + */ +static inline int batadv_unicast_send_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return batadv_unicast_generic_send_skb(bat_priv, skb, BATADV_UNICAST, + 0); +} + +/** + * batadv_unicast_send_skb - send the skb encapsulated in a unicast4addr packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: the payload to send + * @packet_subtype: the batman 4addr packet subtype to use + */ +static inline int batadv_unicast_4addr_send_skb(struct batadv_priv *bat_priv, + struct sk_buff *skb, + int packet_subtype) +{ + return batadv_unicast_generic_send_skb(bat_priv, skb, + BATADV_UNICAST_4ADDR, + packet_subtype); +} static inline int batadv_frag_can_reassemble(const struct sk_buff *skb, int mtu) { diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 5abd1454fb0..ad14a6c91d6 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -396,12 +396,12 @@ batadv_add_packet(struct batadv_priv *bat_priv, return NULL; len = sizeof(*packet) + vis_info_len; - info->skb_packet = dev_alloc_skb(len + ETH_HLEN); + info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); if (!info->skb_packet) { kfree(info); return NULL; } - skb_reserve(info->skb_packet, ETH_HLEN); + skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN); packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); kref_init(&info->refcount); @@ -873,12 +873,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv) if (!bat_priv->vis.my_info) goto err; - len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; + len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE; + len += ETH_HLEN + NET_IP_ALIGN; bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); if (!bat_priv->vis.my_info->skb_packet) goto free_info; - skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); + skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN); tmp_skb = bat_priv->vis.my_info->skb_packet; packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 070e8a68cfc..7c78e264019 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -313,6 +313,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, .ndo_fdb_dump = br_fdb_dump, + .ndo_bridge_getlink = br_getlink, + .ndo_bridge_setlink = br_setlink, }; static void br_dev_free(struct net_device *dev) @@ -356,7 +358,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_id.prio[0] = 0x80; br->bridge_id.prio[1] = 0x00; - memcpy(br->group_addr, br_group_address, ETH_ALEN); + memcpy(br->group_addr, eth_reserved_addr_base, ETH_ALEN); br->stp_enabled = BR_NO_STP; br->group_fwd_mask = BR_GROUPFWD_DEFAULT; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 76f15fda021..4b34207419b 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -19,9 +19,6 @@ #include <linux/export.h> #include "br_private.h" -/* Bridge group multicast address 802.1d (pg 51). */ -const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; - /* Hook for brouter */ br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; EXPORT_SYMBOL(br_should_route_hook); @@ -127,18 +124,6 @@ static int br_handle_local_finish(struct sk_buff *skb) return 0; /* process further */ } -/* Does address match the link local multicast address. - * 01:80:c2:00:00:0X - */ -static inline int is_link_local(const unsigned char *dest) -{ - __be16 *a = (__be16 *)dest; - static const __be16 *b = (const __be16 *)br_group_address; - static const __be16 m = cpu_to_be16(0xfff0); - - return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; -} - /* * Return NULL if skb is handled * note: already called with rcu_read_lock @@ -162,7 +147,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) p = br_port_get_rcu(skb->dev); - if (unlikely(is_link_local(dest))) { + if (unlikely(is_link_local_ether_addr(dest))) { /* * See IEEE 802.1D Table 7-10 Reserved addresses * diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 093f527276a..14b065cbd21 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -111,54 +111,33 @@ errout: /* * Dump information about all ports, in response to GETLINK */ -static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev) { - struct net *net = sock_net(skb->sk); - struct net_device *dev; - int idx; - - idx = 0; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - struct net_bridge_port *port = br_port_get_rcu(dev); - - /* not a bridge port */ - if (!port || idx < cb->args[0]) - goto skip; - - if (br_fill_ifinfo(skb, port, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, RTM_NEWLINK, - NLM_F_MULTI) < 0) - break; -skip: - ++idx; - } - rcu_read_unlock(); - cb->args[0] = idx; + int err = 0; + struct net_bridge_port *port = br_port_get_rcu(dev); + + /* not a bridge port */ + if (!port) + goto out; - return skb->len; + err = br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI); +out: + return err; } /* * Change state of port (ie from forwarding to blocking etc) * Used by spanning tree in user space. */ -static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { - struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct nlattr *protinfo; - struct net_device *dev; struct net_bridge_port *p; u8 new_state; - if (nlmsg_len(nlh) < sizeof(*ifm)) - return -EINVAL; - ifm = nlmsg_data(nlh); - if (ifm->ifi_family != AF_BRIDGE) - return -EPFNOSUPPORT; protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); if (!protinfo || nla_len(protinfo) < sizeof(u8)) @@ -168,10 +147,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(net, ifm->ifi_index); - if (!dev) - return -ENODEV; - p = br_port_get_rtnl(dev); if (!p) return -EINVAL; @@ -191,8 +166,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) br_port_state_selection(p->br); spin_unlock_bh(&p->br->lock); - br_ifinfo_notify(RTM_NEWLINK, p); - return 0; } @@ -218,29 +191,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = { int __init br_netlink_init(void) { - int err; - - err = rtnl_link_register(&br_link_ops); - if (err < 0) - goto err1; - - err = __rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, - br_dump_ifinfo, NULL); - if (err) - goto err2; - err = __rtnl_register(PF_BRIDGE, RTM_SETLINK, - br_rtm_setlink, NULL, NULL); - if (err) - goto err3; - - return 0; - -err3: - rtnl_unregister_all(PF_BRIDGE); -err2: - rtnl_link_unregister(&br_link_ops); -err1: - return err; + return rtnl_link_register(&br_link_ops); } void __exit br_netlink_fini(void) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 9b278c4ebee..22111ffd68d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -158,7 +158,9 @@ struct net_bridge_port static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) { - struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data); + struct net_bridge_port *port = + rcu_dereference_rtnl(dev->rx_handler_data); + return br_port_exists(dev) ? port : NULL; } @@ -288,7 +290,6 @@ struct br_input_skb_cb { pr_debug("%s: " format, (br)->dev->name, ##args) extern struct notifier_block br_device_notifier; -extern const u8 br_group_address[ETH_ALEN]; /* called under bridge lock */ static inline int br_is_root_bridge(const struct net_bridge *br) @@ -553,6 +554,9 @@ extern struct rtnl_link_ops br_link_ops; extern int br_netlink_init(void); extern void br_netlink_fini(void); extern void br_ifinfo_notify(int event, struct net_bridge_port *port); +extern int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg); +extern int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev); #ifdef CONFIG_SYSFS /* br_sysfs_if.c */ @@ -566,10 +570,10 @@ extern void br_sysfs_delbr(struct net_device *dev); #else -#define br_sysfs_addif(p) (0) -#define br_sysfs_renameif(p) (0) -#define br_sysfs_addbr(dev) (0) -#define br_sysfs_delbr(dev) do { } while(0) +static inline int br_sysfs_addif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_renameif(struct net_bridge_port *p) { return 0; } +static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } +static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ #endif diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index c5c059333ea..cffb76e2161 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -14,6 +14,7 @@ #include <linux/capability.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/etherdevice.h> #include <linux/if_bridge.h> #include <linux/rtnetlink.h> #include <linux/spinlock.h> @@ -297,23 +298,18 @@ static ssize_t store_group_addr(struct device *d, const char *buf, size_t len) { struct net_bridge *br = to_bridge(d); - unsigned int new_addr[6]; + u8 new_addr[6]; int i; if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (sscanf(buf, "%x:%x:%x:%x:%x:%x", + if (sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &new_addr[0], &new_addr[1], &new_addr[2], &new_addr[3], &new_addr[4], &new_addr[5]) != 6) return -EINVAL; - /* Must be 01:80:c2:00:00:0X */ - for (i = 0; i < 5; i++) - if (new_addr[i] != br_group_address[i]) - return -EINVAL; - - if (new_addr[5] & ~0xf) + if (!is_link_local_ether_addr(new_addr)) return -EINVAL; if (new_addr[5] == 1 || /* 802.3x Pause address */ diff --git a/net/core/dev.c b/net/core/dev.c index bda6d004f9f..83232a1be1e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6264,7 +6264,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char goto out; /* Ensure the device has been registrered */ - err = -EINVAL; if (dev->reg_state != NETREG_REGISTERED) goto out; diff --git a/net/core/filter.c b/net/core/filter.c index 3d92ebb7fbc..c23543cba13 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -39,6 +39,7 @@ #include <linux/reciprocal_div.h> #include <linux/ratelimit.h> #include <linux/seccomp.h> +#include <linux/if_vlan.h> /* No hurry in this branch * @@ -341,6 +342,12 @@ load_b: case BPF_S_ANC_CPU: A = raw_smp_processor_id(); continue; + case BPF_S_ANC_VLAN_TAG: + A = vlan_tx_tag_get(skb); + continue; + case BPF_S_ANC_VLAN_TAG_PRESENT: + A = !!vlan_tx_tag_present(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(RXHASH); ANCILLARY(CPU); ANCILLARY(ALU_XOR_X); + ANCILLARY(VLAN_TAG); + ANCILLARY(VLAN_TAG_PRESENT); } } ftest->code = code; @@ -751,3 +760,133 @@ int sk_detach_filter(struct sock *sk) return ret; } EXPORT_SYMBOL_GPL(sk_detach_filter); + +static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) +{ + static const u16 decodes[] = { + [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, + [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, + [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, + [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, + [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, + [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, + [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, + [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, + [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, + [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, + [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, + [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, + [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, + [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, + [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, + [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, + [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, + [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, + [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, + [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, + [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, + [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, + [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, + [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, + [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, + [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, + [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, + [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, + [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, + [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, + [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, + [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, + [BPF_S_RET_K] = BPF_RET|BPF_K, + [BPF_S_RET_A] = BPF_RET|BPF_A, + [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, + [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, + [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, + [BPF_S_ST] = BPF_ST, + [BPF_S_STX] = BPF_STX, + [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, + [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, + [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, + [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, + [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, + [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, + [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, + [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, + [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, + }; + u16 code; + + code = filt->code; + + to->code = decodes[code]; + to->jt = filt->jt; + to->jf = filt->jf; + + if (code == BPF_S_ALU_DIV_K) { + /* + * When loaded this rule user gave us X, which was + * translated into R = r(X). Now we calculate the + * RR = r(R) and report it back. If next time this + * value is loaded and RRR = r(RR) is calculated + * then the R == RRR will be true. + * + * One exception. X == 1 translates into R == 0 and + * we can't calculate RR out of it with r(). + */ + + if (filt->k == 0) + to->k = 1; + else + to->k = reciprocal_value(filt->k); + + BUG_ON(reciprocal_value(to->k) != filt->k); + } else + to->k = filt->k; +} + +int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) +{ + struct sk_filter *filter; + int i, ret; + + lock_sock(sk); + filter = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + ret = 0; + if (!filter) + goto out; + ret = filter->len; + if (!len) + goto out; + ret = -EINVAL; + if (len < filter->len) + goto out; + + ret = -EFAULT; + for (i = 0; i < filter->len; i++) { + struct sock_filter fb; + + sk_decode_filter(&filter->insns[i], &fb); + if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) + goto out; + } + + ret = filter->len; +out: + release_sock(sk); + return ret; +} diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 79285a36035..847c02b197b 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -248,7 +248,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n) return 0; } -void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; void *v; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d1dc14c2aac..b29dacf900f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -419,20 +419,6 @@ struct pktgen_thread { #define REMOVE 1 #define FIND 0 -static inline ktime_t ktime_now(void) -{ - struct timespec ts; - ktime_get_ts(&ts); - - return timespec_to_ktime(ts); -} - -/* This works even if 32 bit because of careful byte order choice */ -static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2) -{ - return cmp1.tv64 < cmp2.tv64; -} - static const char version[] = "Packet Generator for packet performance testing. " "Version: " VERSION "\n"; @@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, "\n"); /* not really stopped, more like last-running-at */ - stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at; + stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at; idle = pkt_dev->idle_acc; do_div(idle, NSEC_PER_USEC); @@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) return; } - start_time = ktime_now(); + start_time = ktime_get(); if (remaining < 100000) { /* for small delays (<100us), just loop until limit is reached */ do { - end_time = ktime_now(); - } while (ktime_lt(end_time, spin_until)); + end_time = ktime_get(); + } while (ktime_compare(end_time, spin_until) < 0); } else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); @@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_cancel(&t.timer); } while (t.task && pkt_dev->running && !signal_pending(current)); __set_current_state(TASK_RUNNING); - end_time = ktime_now(); + end_time = ktime_get(); } pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); @@ -2427,11 +2413,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } } else { /* IPV6 * */ - if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[1] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[2] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ; - else { + if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) { int i; /* Only random destinations yet */ @@ -2916,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t) pktgen_clear_counters(pkt_dev); pkt_dev->running = 1; /* Cranke yeself! */ pkt_dev->skb = NULL; - pkt_dev->started_at = - pkt_dev->next_tx = ktime_now(); + pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); set_pkt_overhead(pkt_dev); @@ -3076,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; - pkt_dev->stopped_at = ktime_now(); + pkt_dev->stopped_at = ktime_get(); pkt_dev->running = 0; show_results(pkt_dev, nr_frags); @@ -3095,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) continue; if (best == NULL) best = pkt_dev; - else if (ktime_lt(pkt_dev->next_tx, best->next_tx)) + else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) best = pkt_dev; } if_unlock(t); @@ -3180,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static void pktgen_resched(struct pktgen_dev *pkt_dev) { - ktime_t idle_start = ktime_now(); + ktime_t idle_start = ktime_get(); schedule(); - pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) { - ktime_t idle_start = ktime_now(); + ktime_t idle_start = ktime_get(); while (atomic_read(&(pkt_dev->skb->users)) != 1) { if (signal_pending(current)) @@ -3198,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) else cpu_relax(); } - pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } static void pktgen_xmit(struct pktgen_dev *pkt_dev) @@ -3220,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) * "never transmit" */ if (unlikely(pkt_dev->delay == ULLONG_MAX)) { - pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); + pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX); return; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fad649ae4de..a810f6a6137 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].doit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].doit : NULL; + return tab[msgindex].doit; } static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) @@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].dumpit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].dumpit : NULL; + return tab[msgindex].dumpit; } static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) @@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].calcit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].calcit : NULL; + return tab[msgindex].calcit; } /** @@ -2253,6 +2253,211 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u16 mode) +{ + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + struct nlattr *br_afspec; + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + + nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); + if (nlh == NULL) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifi_family = AF_BRIDGE; + ifm->__ifi_pad = 0; + ifm->ifi_type = dev->type; + ifm->ifi_index = dev->ifindex; + ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_change = 0; + + + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || + nla_put_u32(skb, IFLA_MTU, dev->mtu) || + nla_put_u8(skb, IFLA_OPERSTATE, operstate) || + (dev->master && + nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + (dev->addr_len && + nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || + (dev->ifindex != dev->iflink && + nla_put_u32(skb, IFLA_LINK, dev->iflink))) + goto nla_put_failure; + + br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); + if (!br_afspec) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) || + nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) { + nla_nest_cancel(skb, br_afspec); + goto nla_put_failure; + } + nla_nest_end(skb, br_afspec); + + return nlmsg_end(skb, nlh); +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} +EXPORT_SYMBOL(ndo_dflt_bridge_getlink); + +static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx = 0; + u32 portid = NETLINK_CB(cb->skb).portid; + u32 seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *master = dev->master; + + if (master && master->netdev_ops->ndo_bridge_getlink) { + if (idx >= cb->args[0] && + master->netdev_ops->ndo_bridge_getlink( + skb, portid, seq, dev) < 0) + break; + idx++; + } + + if (ops->ndo_bridge_getlink) { + if (idx >= cb->args[0] && + ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + break; + idx++; + } + } + rcu_read_unlock(); + cb->args[0] = idx; + + return skb->len; +} + +static inline size_t bridge_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(sizeof(u32)) /* IFLA_MASTER */ + + nla_total_size(sizeof(u32)) /* IFLA_MTU */ + + nla_total_size(sizeof(u32)) /* IFLA_LINK */ + + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */ + + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */ + + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */ + + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */ + + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ +} + +static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +{ + struct net *net = dev_net(dev); + struct net_device *master = dev->master; + struct sk_buff *skb; + int err = -EOPNOTSUPP; + + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); + if (!skb) { + err = -ENOMEM; + goto errout; + } + + if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && + master && master->netdev_ops->ndo_bridge_getlink) { + err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + if (err < 0) + goto errout; + } + + if ((flags & BRIDGE_FLAGS_SELF) && + dev->netdev_ops->ndo_bridge_getlink) { + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + if (err < 0) + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return 0; +errout: + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); + return err; +} + +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + if (!dev->master || + !dev->master->netdev_ops->ndo_bridge_setlink) { + err = -EOPNOTSUPP; + goto out; + } + + err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_setlink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2434,5 +2639,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); + + rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4007c1437fd..880722e22cc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb) uarg = skb_shinfo(skb)->destructor_arg; if (uarg->callback) - uarg->callback(uarg); + uarg->callback(uarg, true); } if (skb_has_frag_list(skb)) @@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(kfree_skb); /** + * skb_tx_error - report an sk_buff xmit error + * @skb: buffer that triggered an error + * + * Report xmit error if a device callback is tracking this skb. + * skb must be freed afterwards. + */ +void skb_tx_error(struct sk_buff *skb) +{ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + struct ubuf_info *uarg; + + uarg = skb_shinfo(skb)->destructor_arg; + if (uarg->callback) + uarg->callback(uarg, false); + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } +} +EXPORT_SYMBOL(skb_tx_error); + +/** * consume_skb - free an skbuff * @skb: buffer to free * @@ -797,7 +817,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) for (i = 0; i < num_frags; i++) skb_frag_unref(skb, i); - uarg->callback(uarg); + uarg->callback(uarg, false); /* skb frags point to kernel buffers */ for (i = num_frags - 1; i >= 0; i--) { diff --git a/net/core/sock.c b/net/core/sock.c index 8a146cfcc36..06286006a2c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1074,6 +1074,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); break; + case SO_BINDTODEVICE: + v.val = sk->sk_bound_dev_if; + break; + case SO_GET_FILTER: + len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); + if (len < 0) + return len; + + goto lenout; default: return -ENOPROTOOPT; } @@ -1214,13 +1223,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) #ifdef CONFIG_CGROUPS #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk) +void sock_update_classid(struct sock *sk, struct task_struct *task) { u32 classid; - rcu_read_lock(); /* doing current task, which cannot vanish. */ - classid = task_cls_classid(current); - rcu_read_unlock(); + classid = task_cls_classid(task); if (classid != sk->sk_classid) sk->sk_classid = classid; } @@ -1263,7 +1270,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk); + sock_update_classid(sk, current); sock_update_netprioidx(sk, current); } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index ea850ce35d4..662071b249c 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -174,8 +174,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, * To protect against Request floods, increment retrans * counter (backoff, monitored by dccp_response_timer). */ - req->retrans++; - req->rsk_ops->rtx_syn_ack(sk, req, NULL); + inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ return NULL; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2a6abc163ed..f6db227c1fd 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -55,6 +55,7 @@ #include <linux/sysctl.h> #endif #include <linux/kmod.h> +#include <linux/netconf.h> #include <net/arp.h> #include <net/ip.h> @@ -1442,6 +1443,149 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) return 0; } +static int inet_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + /* type -1 is used for ALL */ + if (type == -1 || type == NETCONFA_FORWARDING) + size += nla_total_size(4); + if (type == -1 || type == NETCONFA_RP_FILTER) + size += nla_total_size(4); + + return size; +} + +static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv4_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + /* type -1 is used for ALL */ + if ((type == -1 || type == NETCONFA_FORWARDING) && + nla_put_s32(skb, NETCONFA_FORWARDING, + IPV4_DEVCONF(*devconf, FORWARDING)) < 0) + goto nla_put_failure; + if ((type == -1 || type == NETCONFA_RP_FILTER) && + nla_put_s32(skb, NETCONFA_RP_FILTER, + IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); +} + +static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { + [NETCONFA_IFINDEX] = { .len = sizeof(int) }, + [NETCONFA_FORWARDING] = { .len = sizeof(int) }, + [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, +}; + +static int inet_netconf_get_devconf(struct sk_buff *in_skb, + struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[NETCONFA_MAX+1]; + struct netconfmsg *ncm; + struct sk_buff *skb; + struct ipv4_devconf *devconf; + struct in_device *in_dev; + struct net_device *dev; + int ifindex; + int err; + + err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, + devconf_ipv4_policy); + if (err < 0) + goto errout; + + err = EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + + ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); + switch (ifindex) { + case NETCONFA_IFINDEX_ALL: + devconf = net->ipv4.devconf_all; + break; + case NETCONFA_IFINDEX_DEFAULT: + devconf = net->ipv4.devconf_dflt; + break; + default: + dev = __dev_get_by_index(net, ifindex); + if (dev == NULL) + goto errout; + in_dev = __in_dev_get_rtnl(dev); + if (in_dev == NULL) + goto errout; + devconf = &in_dev->cnf; + break; + } + + err = -ENOBUFS; + skb = nlmsg_new(inet_netconf_msgsize_devconf(-1), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet_netconf_fill_devconf(skb, ifindex, devconf, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWNETCONF, 0, + -1); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); +errout: + return err; +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -1467,6 +1611,12 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); for_each_netdev(net, dev) { struct in_device *in_dev; @@ -1474,8 +1624,11 @@ static void inet_forward_change(struct net *net) dev_disable_lro(dev); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - if (in_dev) + if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + dev->ifindex, &in_dev->cnf); + } rcu_read_unlock(); } } @@ -1501,6 +1654,23 @@ static int devinet_conf_proc(ctl_table *ctl, int write, i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) rt_cache_flush(net); + if (i == IPV4_DEVCONF_RP_FILTER - 1 && + new_value != old_value) { + int ifindex; + + if (cnf == net->ipv4.devconf_dflt) + ifindex = NETCONFA_IFINDEX_DEFAULT; + else if (cnf == net->ipv4.devconf_all) + ifindex = NETCONFA_IFINDEX_ALL; + else { + struct in_device *idev = + container_of(cnf, struct in_device, + cnf); + ifindex = idev->dev->ifindex; + } + inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, + ifindex, cnf); + } } return ret; @@ -1527,15 +1697,23 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); - } else if (*valp) { + } else { struct ipv4_devconf *cnf = ctl->extra1; struct in_device *idev = container_of(cnf, struct in_device, cnf); - dev_disable_lro(idev->dev); + if (*valp) + dev_disable_lro(idev->dev); + inet_netconf_notify_devconf(net, + NETCONFA_FORWARDING, + idev->dev->ifindex, + cnf); } rtnl_unlock(); rt_cache_flush(net); - } + } else + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); } return ret; @@ -1809,5 +1987,7 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); + rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, + NULL, NULL); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 71b125cd5db..4797a800faf 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -803,7 +803,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) unsigned int bytes; if (!new_size) - new_size = 1; + new_size = 16; bytes = new_size * sizeof(struct hlist_head *); new_info_hash = fib_info_hash_alloc(bytes); new_laddrhash = fib_info_hash_alloc(bytes); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index d34ce2972c8..2026542d683 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -521,21 +521,31 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, int *expire, int *resend) { if (!rskq_defer_accept) { - *expire = req->retrans >= thresh; + *expire = req->num_timeout >= thresh; *resend = 1; return; } - *expire = req->retrans >= thresh && - (!inet_rsk(req)->acked || req->retrans >= max_retries); + *expire = req->num_timeout >= thresh && + (!inet_rsk(req)->acked || req->num_timeout >= max_retries); /* * Do not resend while waiting for data after ACK, * start to resend on end of deferring period to give * last chance for data or ACK to create established socket. */ *resend = !inet_rsk(req)->acked || - req->retrans >= rskq_defer_accept - 1; + req->num_timeout >= rskq_defer_accept - 1; } +int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) +{ + int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); + + if (!err) + req->num_retrans++; + return err; +} +EXPORT_SYMBOL(inet_rtx_syn_ack); + void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long interval, const unsigned long timeout, @@ -599,13 +609,14 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, req->rsk_ops->syn_ack_timeout(parent, req); if (!expire && (!resend || - !req->rsk_ops->rtx_syn_ack(parent, req, NULL) || + !inet_rtx_syn_ack(parent, req) || inet_rsk(req)->acked)) { unsigned long timeo; - if (req->retrans++ == 0) + if (req->num_timeout++ == 0) lopt->qlen_young--; - timeo = min((timeout << req->retrans), max_rto); + timeo = min(timeout << req->num_timeout, + max_rto); req->expires = now + timeo; reqp = &req->dl_next; continue; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 0c34bfabc11..cb98cbed197 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -105,6 +105,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; + if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto errout; + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, * hence this needs to be included regardless of socket family. */ @@ -617,7 +620,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_family = sk->sk_family; r->idiag_state = TCP_SYN_RECV; r->idiag_timer = 1; - r->idiag_retrans = req->retrans; + r->idiag_retrans = req->num_retrans; r->id.idiag_if = sk->sk_bound_dev_if; sock_diag_save_cookie(req, r->id.idiag_cookie); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 798358b1071..d763701cff1 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1500,8 +1500,10 @@ static int __init ip_auto_config(void) * Clue in the operator. */ pr_info("IP-Config: Complete:\n"); - pr_info(" device=%s, addr=%pI4, mask=%pI4, gw=%pI4\n", - ic_dev->name, &ic_myaddr, &ic_netmask, &ic_gateway); + + pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n", + ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr, + &ic_myaddr, &ic_netmask, &ic_gateway); pr_info(" host=%s, domain=%s, nis-domain=%s\n", utsname()->nodename, ic_domain, utsname()->domainname); pr_info(" bootserver=%pI4, rootserver=%pI4, rootpath=%s", diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e15b45297c0..720855e4110 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -138,6 +138,7 @@ struct ipip_net { static int ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); static void ipip_dev_free(struct net_device *dev); +static struct rtnl_link_ops ipip_link_ops __read_mostly; /* * Locking : hash tables are protected by RCU and RTNL @@ -305,6 +306,7 @@ static struct ip_tunnel *ipip_tunnel_locate(struct net *net, goto failed_free; strcpy(nt->parms.name, dev->name); + dev->rtnl_link_ops = &ipip_link_ops; dev_hold(dev); ipip_tunnel_link(ipn, nt); @@ -479,6 +481,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->protocol != htons(ETH_P_IP)) goto tx_error; + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb)) + goto tx_error; + if (tos & 1) tos = old_iph->tos; @@ -773,6 +779,11 @@ static void ipip_dev_free(struct net_device *dev) free_netdev(dev); } +#define IPIP_FEATURES (NETIF_F_SG | \ + NETIF_F_FRAGLIST | \ + NETIF_F_HIGHDMA | \ + NETIF_F_HW_CSUM) + static void ipip_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip_netdev_ops; @@ -787,6 +798,9 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + + dev->features |= IPIP_FEATURES; + dev->hw_features |= IPIP_FEATURES; } static int ipip_tunnel_init(struct net_device *dev) @@ -829,6 +843,47 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) return 0; } +static size_t ipip_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(4) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(4) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_TOS */ + nla_total_size(1) + + 0; +} + +static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops ipip_link_ops __read_mostly = { + .kind = "ipip", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip_tunnel), + .get_size = ipip_get_size, + .fill_info = ipip_fill_info, +}; + static struct xfrm_tunnel ipip_handler __read_mostly = { .handler = ipip_rcv, .err_handler = ipip_err, @@ -925,14 +980,26 @@ static int __init ipip_init(void) return err; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); if (err < 0) { - unregister_pernet_device(&ipip_net_ops); pr_info("%s: can't register tunnel\n", __func__); + goto xfrm_tunnel_failed; } + err = rtnl_link_register(&ipip_link_ops); + if (err < 0) + goto rtnl_link_failed; + +out: return err; + +rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel_failed: + unregister_pernet_device(&ipip_net_ops); + goto out; } static void __exit ipip_fini(void) { + rtnl_link_unregister(&ipip_link_ops); if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) pr_info("%s: can't deregister tunnel\n", __func__); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ba48e799b03..b236ef04914 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -340,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } req->expires = 0UL; - req->retrans = 0; + req->num_retrans = 0; /* * We need to lookup the route here to get at the correct diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 197c0008503..733f48593ec 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -536,13 +536,14 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { struct tcp_sock *tp = tcp_sk(sk); int answ; + bool slow; switch (cmd) { case SIOCINQ: if (sk->sk_state == TCP_LISTEN) return -EINVAL; - lock_sock(sk); + slow = lock_sock_fast(sk); if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) answ = 0; else if (sock_flag(sk, SOCK_URGINLINE) || @@ -557,7 +558,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) answ--; } else answ = tp->urg_seq - tp->copied_seq; - release_sock(sk); + unlock_sock_fast(sk, slow); break; case SIOCATMARK: answ = tp->urg_data && tp->urg_seq == tp->copied_seq; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2c2b13a999e..7839d51fb65 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3552,6 +3552,24 @@ static bool tcp_process_frto(struct sock *sk, int flag) return false; } +/* RFC 5961 7 [ACK Throttling] */ +static void tcp_send_challenge_ack(struct sock *sk) +{ + /* unprotected vars, we dont care of overwrites */ + static u32 challenge_timestamp; + static unsigned int challenge_count; + u32 now = jiffies / HZ; + + if (now != challenge_timestamp) { + challenge_timestamp = now; + challenge_count = 0; + } + if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + tcp_send_ack(sk); + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3571,8 +3589,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* If the ack is older than previous acks * then we can probably ignore it. */ - if (before(ack, prior_snd_una)) + if (before(ack, prior_snd_una)) { + /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */ + if (before(ack, prior_snd_una - tp->max_window)) { + tcp_send_challenge_ack(sk); + return -1; + } goto old_ack; + } /* If the ack includes data we haven't sent yet, discard * this segment (RFC793 Section 3.9). @@ -5244,23 +5268,6 @@ out: } #endif /* CONFIG_NET_DMA */ -static void tcp_send_challenge_ack(struct sock *sk) -{ - /* unprotected vars, we dont care of overwrites */ - static u32 challenge_timestamp; - static unsigned int challenge_count; - u32 now = jiffies / HZ; - - if (now != challenge_timestamp) { - challenge_timestamp = now; - challenge_count = 0; - } - if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); - tcp_send_ack(sk); - } -} - /* Does PAWS and seqno based validation of an incoming segment, flags will * play significant role here. */ @@ -5988,7 +5995,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (req) { tcp_synack_rtt_meas(sk, req); - tp->total_retrans = req->retrans; + tp->total_retrans = req->num_retrans; reqsk_fastopen_remove(sk, req, false); } else { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0c4a6435560..9dd5b34eb11 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -877,10 +877,13 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, } static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) + struct request_values *rvp) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); + int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); + + if (!res) + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + return res; } /* @@ -1070,7 +1073,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) } EXPORT_SYMBOL(tcp_md5_do_del); -void tcp_clear_md5_list(struct sock *sk) +static void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; @@ -1386,7 +1389,8 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, struct sock *child; int err; - req->retrans = 0; + req->num_retrans = 0; + req->num_timeout = 0; req->sk = NULL; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); @@ -1741,7 +1745,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_initialize_rcv_mss(newsk); tcp_synack_rtt_meas(newsk, req); - newtp->total_retrans = req->retrans; + newtp->total_retrans = req->num_retrans; #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ @@ -1919,7 +1923,6 @@ EXPORT_SYMBOL(tcp_v4_do_rcv); void tcp_v4_early_demux(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct tcphdr *th; struct sock *sk; @@ -1927,16 +1930,16 @@ void tcp_v4_early_demux(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) return; - if (!pskb_may_pull(skb, ip_hdrlen(skb) + sizeof(struct tcphdr))) + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) return; iph = ip_hdr(skb); - th = (struct tcphdr *) ((char *)iph + ip_hdrlen(skb)); + th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) return; - sk = __inet_lookup_established(net, &tcp_hashinfo, + sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), skb->skb_iif); @@ -2640,7 +2643,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_delta_to_clock_t(delta), - req->retrans, + req->num_timeout, from_kuid_munged(seq_user_ns(f), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a7302d974f3..f35f2dfb640 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -553,7 +553,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * it can be estimated (approximately) * from another data. */ - tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); + tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout); paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } @@ -582,7 +582,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Note that even if there is new data in the SYN packet * they will be thrown away too. */ - req->rsk_ops->rtx_syn_ack(sk, req, NULL); + inet_rtx_syn_ack(sk, req); return NULL; } @@ -696,7 +696,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */ if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr) tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr; - else if (req->retrans) /* don't take RTT sample if retrans && ~TS */ + else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */ tcp_rsk(req)->snt_synack = 0; /* For Fast Open no more processing is needed (sk is the @@ -706,7 +706,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, return sk; /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ - if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + if (req->num_timeout < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d47c1b4421a..b78aac30c49 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -318,7 +318,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) req = tcp_sk(sk)->fastopen_rsk; req->rsk_ops->syn_ack_timeout(sk, req); - if (req->retrans >= max_retries) { + if (req->num_timeout >= max_retries) { tcp_write_err(sk); return; } @@ -327,10 +327,10 @@ static void tcp_fastopen_synack_timer(struct sock *sk) * regular retransmit because if the child socket has been accepted * it's not good to give up too easily. */ - req->rsk_ops->rtx_syn_ack(sk, req, NULL); - req->retrans++; + inet_rtx_syn_ack(sk, req); + req->num_timeout++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); + TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); } /* diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0424e4e2741..fab23db8ee7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,6 +81,7 @@ #include <net/pkt_sched.h> #include <linux/if_tunnel.h> #include <linux/rtnetlink.h> +#include <linux/netconf.h> #ifdef CONFIG_IPV6_PRIVACY #include <linux/random.h> @@ -401,7 +402,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) ndev->cnf.accept_dad = -1; -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) { pr_info("%s: Disabled Multicast RS\n", dev->name); ndev->cnf.rtr_solicits = 0; @@ -460,6 +461,141 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev) return idev; } +static int inet6_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + /* type -1 is used for ALL */ + if (type == -1 || type == NETCONFA_FORWARDING) + size += nla_total_size(4); + + return size; +} + +static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv6_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET6; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + /* type -1 is used for ALL */ + if ((type == -1 || type == NETCONFA_FORWARDING) && + nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); +} + +static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { + [NETCONFA_IFINDEX] = { .len = sizeof(int) }, + [NETCONFA_FORWARDING] = { .len = sizeof(int) }, +}; + +static int inet6_netconf_get_devconf(struct sk_buff *in_skb, + struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(in_skb->sk); + struct nlattr *tb[NETCONFA_MAX+1]; + struct netconfmsg *ncm; + struct sk_buff *skb; + struct ipv6_devconf *devconf; + struct inet6_dev *in6_dev; + struct net_device *dev; + int ifindex; + int err; + + err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, + devconf_ipv6_policy); + if (err < 0) + goto errout; + + err = EINVAL; + if (!tb[NETCONFA_IFINDEX]) + goto errout; + + ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); + switch (ifindex) { + case NETCONFA_IFINDEX_ALL: + devconf = net->ipv6.devconf_all; + break; + case NETCONFA_IFINDEX_DEFAULT: + devconf = net->ipv6.devconf_dflt; + break; + default: + dev = __dev_get_by_index(net, ifindex); + if (dev == NULL) + goto errout; + in6_dev = __in6_dev_get(dev); + if (in6_dev == NULL) + goto errout; + devconf = &in6_dev->cnf; + break; + } + + err = -ENOBUFS; + skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_netconf_fill_devconf(skb, ifindex, devconf, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, RTM_NEWNETCONF, 0, + -1); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); +errout: + return err; +} + #ifdef CONFIG_SYSCTL static void dev_forward_change(struct inet6_dev *idev) { @@ -471,7 +607,7 @@ static void dev_forward_change(struct inet6_dev *idev) dev = idev->dev; if (idev->cnf.forwarding) dev_disable_lro(dev); - if (dev && (dev->flags & IFF_MULTICAST)) { + if (dev->flags & IFF_MULTICAST) { if (idev->cnf.forwarding) ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); else @@ -486,6 +622,8 @@ static void dev_forward_change(struct inet6_dev *idev) else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING, + dev->ifindex, &idev->cnf); } @@ -518,6 +656,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) *p = newf; if (p == &net->ipv6.devconf_dflt->forwarding) { + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); rtnl_unlock(); return 0; } @@ -525,6 +667,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_all->forwarding) { net->ipv6.devconf_dflt->forwarding = newf; addrconf_forward_change(net, newf); + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); } else if ((!newf) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); rtnl_unlock(); @@ -553,7 +699,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_warn("Freeing alive inet6 address %p\n", ifp); return; } - dst_release(&ifp->rt->dst); + ip6_rt_put(ifp->rt); kfree_rcu(ifp, rcu); } @@ -805,7 +951,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) rt6_set_expires(rt, expires); } } - dst_release(&rt->dst); + ip6_rt_put(rt); } /* clean up prefsrc entries */ @@ -1692,7 +1838,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, This thing is done here expecting that the whole class of non-broadcast devices need not cloning. */ -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) cfg.fc_flags |= RTF_NONEXTHOP; #endif @@ -1752,7 +1898,7 @@ static void addrconf_add_mroute(struct net_device *dev) ip6_route_add(&cfg); } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) static void sit_route_add(struct net_device *dev) { struct fib6_config cfg = { @@ -1881,8 +2027,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, dev, expires, flags); } - if (rt) - dst_release(&rt->dst); + ip6_rt_put(rt); } /* Try to figure out our local address for this prefix */ @@ -2104,7 +2249,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) if (dev == NULL) goto err_exit; -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) if (dev->type == ARPHRD_SIT) { const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; @@ -2315,7 +2460,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, } } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) static void sit_add_v4_addrs(struct inet6_dev *idev) { struct in6_addr addr; @@ -2434,7 +2579,7 @@ static void addrconf_dev_config(struct net_device *dev) addrconf_add_linklocal(idev, &addr); } -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) static void addrconf_sit_config(struct net_device *dev) { struct inet6_dev *idev; @@ -2471,7 +2616,7 @@ static void addrconf_sit_config(struct net_device *dev) } #endif -#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) +#if IS_ENABLED(CONFIG_NET_IPGRE) static void addrconf_gre_config(struct net_device *dev) { struct inet6_dev *idev; @@ -2601,12 +2746,12 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } switch (dev->type) { -#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +#if IS_ENABLED(CONFIG_IPV6_SIT) case ARPHRD_SIT: addrconf_sit_config(dev); break; #endif -#if defined(CONFIG_NET_IPGRE) || defined(CONFIG_NET_IPGRE_MODULE) +#if IS_ENABLED(CONFIG_NET_IPGRE) case ARPHRD_IPGRE: addrconf_gre_config(dev); break; @@ -3194,7 +3339,7 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) /* Check if address is a home address configured on any interface. */ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) { @@ -4784,6 +4929,8 @@ int __init addrconf_init(void) inet6_dump_ifmcaddr, NULL); __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr, NULL); + __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, + NULL, NULL); ipv6_addr_label_rtnl_register(); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7e6139508ee..ecc35b93314 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -44,7 +44,7 @@ #define IPV6HDR_BASELEN 8 struct tmp_ext { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) struct in6_addr saddr; #endif struct in6_addr daddr; @@ -152,7 +152,7 @@ bad: return false; } -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) /** * ipv6_rearrange_destopt - rearrange IPv6 destination options header * @iph: IPv6 header @@ -320,7 +320,7 @@ static void ah6_output_done(struct crypto_async_request *base, int err) memcpy(top_iph, iph_base, IPV6HDR_BASELEN); if (extlen) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) memcpy(&top_iph->saddr, iph_ext, extlen); #else memcpy(&top_iph->daddr, iph_ext, extlen); @@ -385,7 +385,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(iph_base, top_iph, IPV6HDR_BASELEN); if (extlen) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) memcpy(iph_ext, &top_iph->saddr, extlen); #else memcpy(iph_ext, &top_iph->daddr, extlen); @@ -434,7 +434,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph, iph_base, IPV6HDR_BASELEN); if (extlen) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) memcpy(&top_iph->saddr, iph_ext, extlen); #else memcpy(&top_iph->daddr, iph_ext, extlen); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index cdf02be5f19..4963c769a13 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->dst.dev; - dst_release(&rt->dst); + ip6_rt_put(rt); } else if (ishost) { err = -EADDRNOTAVAIL; goto error; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index be2b67d631e..93cbad2c0aa 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -769,7 +769,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk, rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); switch (rthdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index fa3d9c32809..f005acc58b2 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -43,7 +43,7 @@ #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/xfrm.h> #endif @@ -224,7 +224,7 @@ bad: Destination options header. *****************************/ -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) { struct ipv6_destopt_hao *hao; @@ -288,7 +288,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) #endif static const struct tlvtype_proc tlvprocdestopt_lst[] = { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) { .type = IPV6_TLV_HAO, .func = ipv6_dest_hao, @@ -300,7 +300,7 @@ static const struct tlvtype_proc tlvprocdestopt_lst[] = { static int ipv6_destopt_rcv(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) __u16 dstbuf; #endif struct dst_entry *dst = skb_dst(skb); @@ -315,14 +315,14 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) } opt->lastopt = opt->dst1 = skb_network_header_len(skb); -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) dstbuf = opt->dst1; #endif if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) opt->nhoff = dstbuf; #else opt->nhoff = opt->dst1; @@ -378,7 +378,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: /* Silently discard type 2 header unless it was * processed by own @@ -404,7 +404,7 @@ looped_back: } switch (hdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (accept_source_route < 0) goto unknown_rh; @@ -461,7 +461,7 @@ looped_back: addr += i - 1; switch (hdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index d9fb9110f60..2e1a432867c 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -100,7 +100,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } again: - dst_release(&rt->dst); + ip6_rt_put(rt); rt = NULL; goto out; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 24d69dbca4d..b4a9fd51dae 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -280,7 +280,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st return 0; } -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) static void mip6_addr_swap(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 24995a93ef8..710cafd2e1a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -672,6 +672,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, iter->rt6i_idev == rt->rt6i_idev && ipv6_addr_equal(&iter->rt6i_gateway, &rt->rt6i_gateway)) { + if (rt->rt6i_nsiblings) + rt->rt6i_nsiblings = 0; if (!(iter->rt6i_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->rt6i_flags & RTF_EXPIRES)) @@ -680,6 +682,21 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, rt6_set_expires(iter, rt->dst.expires); return -EEXIST; } + /* If we have the same destination and the same metric, + * but not the same gateway, then the route we try to + * add is sibling to this route, increment our counter + * of siblings, and later we will add our route to the + * list. + * Only static routes (which don't have flag + * RTF_EXPIRES) are used for ECMPv6. + * + * To avoid long list, we only had siblings if the + * route have a gateway. + */ + if (rt->rt6i_flags & RTF_GATEWAY && + !(rt->rt6i_flags & RTF_EXPIRES) && + !(iter->rt6i_flags & RTF_EXPIRES)) + rt->rt6i_nsiblings++; } if (iter->rt6i_metric > rt->rt6i_metric) @@ -692,6 +709,35 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (ins == &fn->leaf) fn->rr_ptr = NULL; + /* Link this route to others same route. */ + if (rt->rt6i_nsiblings) { + unsigned int rt6i_nsiblings; + struct rt6_info *sibling, *temp_sibling; + + /* Find the first route that have the same metric */ + sibling = fn->leaf; + while (sibling) { + if (sibling->rt6i_metric == rt->rt6i_metric) { + list_add_tail(&rt->rt6i_siblings, + &sibling->rt6i_siblings); + break; + } + sibling = sibling->dst.rt6_next; + } + /* For each sibling in the list, increment the counter of + * siblings. BUG() if counters does not match, list of siblings + * is broken! + */ + rt6i_nsiblings = 0; + list_for_each_entry_safe(sibling, temp_sibling, + &rt->rt6i_siblings, rt6i_siblings) { + sibling->rt6i_nsiblings++; + BUG_ON(sibling->rt6i_nsiblings != rt->rt6i_nsiblings); + rt6i_nsiblings++; + } + BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); + } + /* * insert node */ @@ -1193,6 +1239,17 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, if (fn->rr_ptr == rt) fn->rr_ptr = NULL; + /* Remove this entry from other siblings */ + if (rt->rt6i_nsiblings) { + struct rt6_info *sibling, *next_sibling; + + list_for_each_entry_safe(sibling, next_sibling, + &rt->rt6i_siblings, rt6i_siblings) + sibling->rt6i_nsiblings--; + rt->rt6i_nsiblings = 0; + list_del_init(&rt->rt6i_siblings); + } + /* Adjust walkers */ read_lock(&fib6_walker_lock); FOR_WALKERS(w) { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d5cb3c4e66f..12aa473e979 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1069,7 +1069,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) dev->mtu = IPV6_MIN_MTU; } } - dst_release(&rt->dst); + ip6_rt_put(rt); } t->hlen = addend; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index aece3e792f8..3deaa4e2e8e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -538,8 +538,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) to->nf_trace = from->nf_trace; #endif skb_copy_secmark(to, from); @@ -564,7 +563,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) found_rhdr = 1; break; case NEXTHDR_DEST: -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) break; #endif @@ -756,7 +755,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); - dst_release(&rt->dst); + ip6_rt_put(rt); return 0; } @@ -768,7 +767,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); - dst_release(&rt->dst); + ip6_rt_put(rt); return err; slow_path_clean: diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index cb7e2ded6f0..424ed45ef12 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -83,6 +83,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); +static struct rtnl_link_ops ip6_link_ops __read_mostly; static int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { @@ -299,6 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) goto failed_free; strcpy(t->parms.name, dev->name); + dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); @@ -663,8 +665,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, icmpv6_send(skb2, rel_type, rel_code, rel_info); - if (rt) - dst_release(&rt->dst); + ip6_rt_put(rt); kfree_skb(skb2); } @@ -1208,7 +1209,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } - dst_release(&rt->dst); + ip6_rt_put(rt); } } @@ -1505,6 +1506,55 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) return 0; } +static size_t ip6_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_IPTUN_FLOWINFO */ + nla_total_size(4) + + /* IFLA_IPTUN_FLAGS */ + nla_total_size(4) + + 0; +} + +static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + struct __ip6_tnl_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), + &parm->raddr) || + nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), + &parm->laddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || + nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || + nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || + nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops ip6_link_ops __read_mostly = { + .kind = "ip6tnl", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip6_tnl), + .get_size = ip6_get_size, + .fill_info = ip6_fill_info, +}; + static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { .handler = ip4ip6_rcv, .err_handler = ip4ip6_err, @@ -1613,9 +1663,14 @@ static int __init ip6_tunnel_init(void) pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } + err = rtnl_link_register(&ip6_link_ops); + if (err < 0) + goto rtnl_link_failed; return 0; +rtnl_link_failed: + xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); out_ip4ip6: @@ -1630,6 +1685,7 @@ out_pernet: static void __exit ip6_tunnel_cleanup(void) { + rtnl_link_unregister(&ip6_link_ops); if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) pr_info("%s: can't deregister ip4ip6\n", __func__); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ba6d13d1f1e..a7bee6a9133 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -397,7 +397,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 92f8e48e4ba..b19ed51a45b 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -163,7 +163,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->dst.dev; - dst_release(&rt->dst); + ip6_rt_put(rt); } } else dev = dev_get_by_index_rcu(net, ifindex); @@ -260,7 +260,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, if (rt) { dev = rt->dst.dev; - dst_release(&rt->dst); + ip6_rt_put(rt); } } else dev = dev_get_by_index_rcu(net, ifindex); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2edce30ef73..4f47aa5183a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -905,7 +905,7 @@ static void ndisc_recv_na(struct sk_buff *skb) if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { - /* XXX: idev->cnf.prixy_ndp */ + /* XXX: idev->cnf.proxy_ndp */ goto out; } @@ -1144,7 +1144,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - dst_release(&rt->dst); + ip6_rt_put(rt); return; } } @@ -1169,7 +1169,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - dst_release(&rt->dst); + ip6_rt_put(rt); return; } neigh->flags |= NTF_ROUTER; @@ -1325,8 +1325,7 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: - if (rt) - dst_release(&rt->dst); + ip6_rt_put(rt); if (neigh) neigh_release(neigh); } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d7cb04506c3..10ce76a2cb9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -207,8 +207,7 @@ ip6t_get_target_c(const struct ip6t_entry *e) return ip6t_get_target((struct ip6t_entry *)e); } -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* This cries for unification! */ static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", @@ -381,8 +380,7 @@ ip6t_do_table(struct sk_buff *skb, t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(skb, hook, in, out, diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 5d1d8b04d69..5060d54199a 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -67,7 +67,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) ret = true; out: - dst_release(&rt->dst); + ip6_rt_put(rt); return ret; } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 8860d23e61c..ccb5cbe9354 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -295,7 +295,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { }, }; -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -346,7 +346,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .invert_tuple = ipv6_invert_tuple, .print_tuple = ipv6_print_tuple, .get_l4proto = ipv6_get_l4proto, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv6_tuple_to_nlattr, .nlattr_tuple_size = ipv6_nlattr_tuple_size, .nlattr_to_tuple = ipv6_nlattr_to_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 2d54b2061d6..24df3dde007 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -232,7 +232,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> @@ -375,7 +375,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .get_timeouts = icmpv6_get_timeouts, .new = icmpv6_new, .error = icmpv6_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, .nlattr_tuple_size = icmpv6_nlattr_tuple_size, .nlattr_to_tuple = icmpv6_nlattr_to_tuple, diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index cdd6d045e42..aacd121fe8c 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -19,7 +19,7 @@ #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l4proto.h> @@ -35,7 +35,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, { u16 zone = NF_CT_DEFAULT_ZONE; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif @@ -60,7 +60,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, { struct sk_buff *reasm; -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Previously seen (loopback)? */ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c index 5d6da784305..61aaf70f376 100644 --- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -84,7 +84,7 @@ const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = { .manip_pkt = icmpv6_manip_pkt, .in_range = icmpv6_in_range, .unique_tuple = icmpv6_unique_tuple, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, #endif }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d8e95c77db9..6cd29b1e8b9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -50,7 +50,7 @@ #include <net/udp.h> #include <net/inet_common.h> #include <net/tcp_states.h> -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif #include <linux/mroute6.h> @@ -123,7 +123,7 @@ static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) return 1; } -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); static mh_filter_t __rcu *mh_filter __read_mostly; @@ -184,7 +184,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) filtered = icmpv6_filter(sk, skb); break; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: { /* XXX: To validate MH only once for each packet, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b1e6cf0b95f..30458726acc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -57,6 +57,7 @@ #include <net/xfrm.h> #include <net/netevent.h> #include <net/netlink.h> +#include <net/nexthop.h> #include <asm/uaccess.h> @@ -289,6 +290,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); rt->rt6i_genid = rt_genid(net); + INIT_LIST_HEAD(&rt->rt6i_siblings); + rt->rt6i_nsiblings = 0; } return rt; } @@ -318,13 +321,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } -static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); - -static u32 rt6_peer_genid(void) -{ - return atomic_read(&__rt6_peer_genid); -} - void rt6_bind_peer(struct rt6_info *rt, int create) { struct inet_peer_base *base; @@ -338,8 +334,6 @@ void rt6_bind_peer(struct rt6_info *rt, int create) if (peer) { if (!rt6_set_peer(rt, peer)) inet_putpeer(peer); - else - rt->rt6i_peer_genid = rt6_peer_genid(); } } @@ -385,6 +379,69 @@ static bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } +/* Multipath route selection: + * Hash based function using packet header and flowlabel. + * Adapted from fib_info_hashfn() + */ +static int rt6_info_hash_nhsfn(unsigned int candidate_count, + const struct flowi6 *fl6) +{ + unsigned int val = fl6->flowi6_proto; + + val ^= (__force u32)fl6->daddr.s6_addr32[0]; + val ^= (__force u32)fl6->daddr.s6_addr32[1]; + val ^= (__force u32)fl6->daddr.s6_addr32[2]; + val ^= (__force u32)fl6->daddr.s6_addr32[3]; + + val ^= (__force u32)fl6->saddr.s6_addr32[0]; + val ^= (__force u32)fl6->saddr.s6_addr32[1]; + val ^= (__force u32)fl6->saddr.s6_addr32[2]; + val ^= (__force u32)fl6->saddr.s6_addr32[3]; + + /* Work only if this not encapsulated */ + switch (fl6->flowi6_proto) { + case IPPROTO_UDP: + case IPPROTO_TCP: + case IPPROTO_SCTP: + val ^= (__force u16)fl6->fl6_sport; + val ^= (__force u16)fl6->fl6_dport; + break; + + case IPPROTO_ICMPV6: + val ^= (__force u16)fl6->fl6_icmp_type; + val ^= (__force u16)fl6->fl6_icmp_code; + break; + } + /* RFC6438 recommands to use flowlabel */ + val ^= (__force u32)fl6->flowlabel; + + /* Perhaps, we need to tune, this function? */ + val = val ^ (val >> 7) ^ (val >> 12); + return val % candidate_count; +} + +static struct rt6_info *rt6_multipath_select(struct rt6_info *match, + struct flowi6 *fl6) +{ + struct rt6_info *sibling, *next_sibling; + int route_choosen; + + route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); + /* Don't change the route, if route_choosen == 0 + * (siblings does not include ourself) + */ + if (route_choosen) + list_for_each_entry_safe(sibling, next_sibling, + &match->rt6i_siblings, rt6i_siblings) { + route_choosen--; + if (route_choosen == 0) { + match = sibling; + break; + } + } + return match; +} + /* * Route lookup. Any table->tb6_lock is implied. */ @@ -666,7 +723,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, else rt6_set_expires(rt, jiffies + HZ * lifetime); - dst_release(&rt->dst); + ip6_rt_put(rt); } return 0; } @@ -702,6 +759,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, restart: rt = fn->leaf; rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); + if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) + rt = rt6_multipath_select(rt, fl6); BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); @@ -863,7 +922,8 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - + if (rt->rt6i_nsiblings && oif == 0) + rt = rt6_multipath_select(rt, fl6); BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -879,7 +939,7 @@ restart: else goto out2; - dst_release(&rt->dst); + ip6_rt_put(rt); rt = nrt ? : net->ipv6.ip6_null_entry; dst_hold(&rt->dst); @@ -896,7 +956,7 @@ restart: * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ - dst_release(&rt->dst); + ip6_rt_put(rt); goto relookup; out: @@ -1030,14 +1090,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { - if (rt->rt6i_peer_genid != rt6_peer_genid()) { - if (!rt6_has_peer(rt)) - rt6_bind_peer(rt, 0); - rt->rt6i_peer_genid = rt6_peer_genid(); - } + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) return dst; - } + return NULL; } @@ -1507,7 +1562,7 @@ int ip6_route_add(struct fib6_config *cfg) goto out; if (dev) { if (dev != grt->dst.dev) { - dst_release(&grt->dst); + ip6_rt_put(grt); goto out; } } else { @@ -1518,7 +1573,7 @@ int ip6_route_add(struct fib6_config *cfg) } if (!(grt->rt6i_flags & RTF_GATEWAY)) err = 0; - dst_release(&grt->dst); + ip6_rt_put(grt); if (err) goto out; @@ -1604,7 +1659,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) write_unlock_bh(&table->tb6_lock); out: - dst_release(&rt->dst); + ip6_rt_put(rt); return err; } @@ -2249,6 +2304,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2326,11 +2382,71 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (tb[RTA_TABLE]) cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); + if (tb[RTA_MULTIPATH]) { + cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); + cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); + } + err = 0; errout: return err; } +static int ip6_route_multipath(struct fib6_config *cfg, int add) +{ + struct fib6_config r_cfg; + struct rtnexthop *rtnh; + int remaining; + int attrlen; + int err = 0, last_err = 0; + +beginning: + rtnh = (struct rtnexthop *)cfg->fc_mp; + remaining = cfg->fc_mp_len; + + /* Parse a Multipath Entry */ + while (rtnh_ok(rtnh, remaining)) { + memcpy(&r_cfg, cfg, sizeof(*cfg)); + if (rtnh->rtnh_ifindex) + r_cfg.fc_ifindex = rtnh->rtnh_ifindex; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla) { + nla_memcpy(&r_cfg.fc_gateway, nla, 16); + r_cfg.fc_flags |= RTF_GATEWAY; + } + } + err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); + if (err) { + last_err = err; + /* If we are trying to remove a route, do not stop the + * loop when ip6_route_del() fails (because next hop is + * already gone), we should try to remove all next hops. + */ + if (add) { + /* If add fails, we should try to delete all + * next hops that have been already added. + */ + add = 0; + goto beginning; + } + } + /* Because each route is added like a single route we remove + * this flag after the first nexthop (if there is a collision, + * we have already fail to add the first nexthop: + * fib6_add_rt2node() has reject it). + */ + cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; + rtnh = rtnh_next(rtnh, &remaining); + } + + return last_err; +} + static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { struct fib6_config cfg; @@ -2340,7 +2456,10 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a if (err < 0) return err; - return ip6_route_del(&cfg); + if (cfg.fc_mp) + return ip6_route_multipath(&cfg, 0); + else + return ip6_route_del(&cfg); } static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -2352,7 +2471,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a if (err < 0) return err; - return ip6_route_add(&cfg); + if (cfg.fc_mp) + return ip6_route_multipath(&cfg, 1); + else + return ip6_route_add(&cfg); } static inline size_t rt6_nlmsg_size(void) @@ -2596,7 +2718,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { - dst_release(&rt->dst); + ip6_rt_put(rt); err = -ENOBUFS; goto errout; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3ed54ffd8d5..b543c56cad2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -68,6 +68,7 @@ static int ipip6_tunnel_init(struct net_device *dev); static void ipip6_tunnel_setup(struct net_device *dev); static void ipip6_dev_free(struct net_device *dev); +static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; struct sit_net { @@ -282,6 +283,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, goto failed_free; strcpy(nt->parms.name, dev->name); + dev->rtnl_link_ops = &sit_link_ops; dev_hold(dev); @@ -1216,6 +1218,47 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } +static size_t sit_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(4) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(4) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_TOS */ + nla_total_size(1) + + 0; +} + +static int sit_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops sit_link_ops __read_mostly = { + .kind = "sit", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip_tunnel), + .get_size = sit_get_size, + .fill_info = sit_fill_info, +}; + static struct xfrm_tunnel sit_handler __read_mostly = { .handler = ipip6_rcv, .err_handler = ipip6_err, @@ -1302,6 +1345,7 @@ static struct pernet_operations sit_net_ops = { static void __exit sit_cleanup(void) { + rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); unregister_pernet_device(&sit_net_ops); @@ -1319,10 +1363,21 @@ static int __init sit_init(void) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { - unregister_pernet_device(&sit_net_ops); pr_info("%s: can't add protocol\n", __func__); + goto xfrm_tunnel_failed; } + err = rtnl_link_register(&sit_link_ops); + if (err < 0) + goto rtnl_link_failed; + +out: return err; + +rtnl_link_failed: + xfrm4_tunnel_deregister(&sit_handler, AF_INET6); +xfrm_tunnel_failed: + unregister_pernet_device(&sit_net_ops); + goto out; } module_init(sit_init); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 182ab9a85d6..40161977f7c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq6->iif = inet6_iif(skb); req->expires = 0UL; - req->retrans = 0; + req->num_retrans = 0; ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 26175bffbaa..c73d0ebde9c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -495,9 +495,12 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { struct flowi6 fl6; + int res; - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + if (!res) + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + return res; } static void tcp_v6_reqsk_destructor(struct request_sock *req) @@ -1364,7 +1367,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_initialize_rcv_mss(newsk); tcp_synack_rtt_meas(newsk, req); - newtp->total_retrans = req->retrans; + newtp->total_retrans = req->num_retrans; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; @@ -1741,11 +1744,11 @@ static void tcp_v6_early_demux(struct sk_buff *skb) skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { struct dst_entry *dst = sk->sk_rx_dst; - struct inet_sock *icsk = inet_sk(sk); + if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); if (dst && - icsk->rx_dst_ifindex == skb->skb_iif) + inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } @@ -1866,7 +1869,7 @@ static void get_openreq6(struct seq_file *seq, 0,0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), - req->retrans, + req->num_timeout, from_kuid_munged(seq_user_ns(seq), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index f8c4c08ffb6..f3ed8ca59b9 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -20,7 +20,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif @@ -182,7 +182,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) fl6->flowi6_proto = nexthdr; return; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 3f2f7c4ab72..d8c70b8efc2 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -101,7 +101,7 @@ static int __xfrm6_state_sort_cmp(void *p) return 1; else return 3; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: return 2; @@ -134,7 +134,7 @@ static int __xfrm6_tmpl_sort_cmp(void *p) switch (v->mode) { case XFRM_MODE_TRANSPORT: return 1; -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) +#if IS_ENABLED(CONFIG_IPV6_MIP6) case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_IN_TRIGGER: return 2; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 6c4cc12c741..bbba3a19e94 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -632,7 +632,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl nla_put_u16(skb, L2TP_ATTR_MRU, session->mru))) goto nla_put_failure; - if ((session->ifname && session->ifname[0] && + if ((session->ifname[0] && nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) || (session->cookie_len && nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len, diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 8b2cffdfdd9..0c3b1670b0d 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -28,12 +28,11 @@ if IP_VS config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 + select IP6_NF_IPTABLES ---help--- - Add IPv6 support to IPVS. This is incomplete and might be dangerous. + Add IPv6 support to IPVS. - See http://www.mindbasket.com/ipvs for more information. - - Say N if unsure. + Say Y if unsure. config IP_VS_DEBUG bool "IP virtual server debugging" diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 1548df9a752..30e764ad021 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -308,13 +308,12 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p) static int ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse, - struct ip_vs_conn_param *p) + int inverse, struct ip_vs_conn_param *p) { __be16 _ports[2], *pptr; struct net *net = skb_net(skb); - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) return 1; @@ -329,12 +328,11 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse) + const struct ip_vs_iphdr *iph, int inverse) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) + if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) return NULL; return ip_vs_conn_in_get(&p); @@ -432,12 +430,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, int inverse) + const struct ip_vs_iphdr *iph, int inverse) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) + if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) return NULL; return ip_vs_conn_out_get(&p); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 58918e20f9d..fb45640dc1f 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -222,11 +222,10 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, */ static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, - struct sk_buff *skb, - __be16 src_port, __be16 dst_port, int *ignored) + struct sk_buff *skb, __be16 src_port, __be16 dst_port, + int *ignored, struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp = NULL; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport = 0; /* destination port to forward */ @@ -236,20 +235,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc, union nf_inet_addr snet; /* source network of the client, after masking */ - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); - /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); + ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask); else #endif - snet.ip = iph.saddr.ip & svc->netmask; + snet.ip = iph->saddr.ip & svc->netmask; IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", - IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), - IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), + IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, &snet)); /* @@ -266,8 +263,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * is created for other persistent services. */ { - int protocol = iph.protocol; - const union nf_inet_addr *vaddr = &iph.daddr; + int protocol = iph->protocol; + const union nf_inet_addr *vaddr = &iph->daddr; __be16 vport = 0; if (dst_port == svc->port) { @@ -342,14 +339,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc, dport = dest->port; flags = (svc->flags & IP_VS_SVC_F_ONEPACKET - && iph.protocol == IPPROTO_UDP)? + && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, - src_port, &iph.daddr, dst_port, ¶m); + ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, + src_port, &iph->daddr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { @@ -392,18 +389,20 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_proto_data *pd, int *ignored) + struct ip_vs_proto_data *pd, int *ignored, + struct ip_vs_iphdr *iph) { struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; unsigned int flags; *ignored = 1; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); - pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); + /* + * IPv6 frags, only the first hit here. + */ + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) return NULL; @@ -423,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * Do not schedule replies from local real server. */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { + (cp = pp->conn_in_get(svc->af, skb, iph, 1))) { IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling reply for existing connection"); __ip_vs_conn_put(cp); @@ -434,7 +433,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) - return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); + return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored, + iph); *ignored = 0; @@ -456,7 +456,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } flags = (svc->flags & IP_VS_SVC_F_ONEPACKET - && iph.protocol == IPPROTO_UDP)? + && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; /* @@ -465,9 +465,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, - &iph.saddr, pptr[0], &iph.daddr, pptr[1], - &p); + ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, + &iph->saddr, pptr[0], &iph->daddr, + pptr[1], &p); cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); @@ -496,19 +496,16 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * no destination is available for a new connection. */ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_proto_data *pd) + struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph) { __be16 _ports[2], *pptr; - struct ip_vs_iphdr iph; #ifdef CONFIG_SYSCTL struct net *net; struct netns_ipvs *ipvs; int unicast; #endif - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); - - pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); + pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; @@ -519,10 +516,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; + unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST; else #endif - unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); + unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST); /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create @@ -532,7 +529,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, int ret; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && - iph.protocol == IPPROTO_UDP)? + iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; @@ -542,9 +539,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, - &iph.saddr, pptr[0], - &iph.daddr, pptr[1], &p); + ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &daddr, 0, IP_VS_CONN_F_BYPASS | flags, NULL, skb->mark); @@ -559,7 +556,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); /* transmit the first SYN packet */ - ret = cp->packet_xmit(skb, cp, pd->pp); + ret = cp->packet_xmit(skb, cp, pd->pp, iph); /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); @@ -654,14 +651,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } -#ifdef CONFIG_IP_VS_IPV6 -static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) -{ - /* TODO IPv6: Find out what to do here for IPv6 */ - return 0; -} -#endif - static int ip_vs_route_me_harder(int af, struct sk_buff *skb) { #ifdef CONFIG_IP_VS_IPV6 @@ -732,10 +721,19 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int inout) { struct ipv6hdr *iph = ipv6_hdr(skb); - unsigned int icmp_offset = sizeof(struct ipv6hdr); - struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + - icmp_offset); - struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); + unsigned int icmp_offset = 0; + unsigned int offs = 0; /* header offset*/ + int protocol; + struct icmp6hdr *icmph; + struct ipv6hdr *ciph; + unsigned short fragoffs; + + ipv6_find_hdr(skb, &icmp_offset, IPPROTO_ICMPV6, &fragoffs, NULL); + icmph = (struct icmp6hdr *)(skb_network_header(skb) + icmp_offset); + offs = icmp_offset + sizeof(struct icmp6hdr); + ciph = (struct ipv6hdr *)(skb_network_header(skb) + offs); + + protocol = ipv6_find_hdr(skb, &offs, -1, &fragoffs, NULL); if (inout) { iph->saddr = cp->vaddr.in6; @@ -746,10 +744,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, } /* the TCP/UDP/SCTP port */ - if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr || - IPPROTO_SCTP == ciph->nexthdr) { - __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); + if (!fragoffs && (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || + IPPROTO_SCTP == protocol)) { + __be16 *ports = (void *)(skb_network_header(skb) + offs); + IP_VS_DBG(11, "%s() changed port %d to %d\n", __func__, + ntohs(inout ? ports[1] : ports[0]), + ntohs(inout ? cp->vport : cp->dport)); if (inout) ports[1] = cp->vport; else @@ -898,51 +899,35 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking outgoing ICMP for"); - offset += cih->ihl * 4; - - ip_vs_fill_iphdr(AF_INET, cih, &ciph); + ip_vs_fill_ip4hdr(cih, &ciph); + ciph.len += offset; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, 1); if (!cp) return NF_ACCEPT; snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, - pp, offset, ihl); + pp, ciph.len, ihl); } #ifdef CONFIG_IP_VS_IPV6 static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, - unsigned int hooknum) + unsigned int hooknum, struct ip_vs_iphdr *ipvsh) { - struct ipv6hdr *iph; struct icmp6hdr _icmph, *ic; - struct ipv6hdr _ciph, *cih; /* The ip header contained - within the ICMP */ - struct ip_vs_iphdr ciph; + struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */ + struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; - unsigned int offset; union nf_inet_addr snet; + unsigned int writable; *related = 1; - - /* reassemble IP fragments */ - if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { - if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum))) - return NF_STOLEN; - } - - iph = ipv6_hdr(skb); - offset = sizeof(struct ipv6hdr); - ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n", - ic->icmp6_type, ntohs(icmpv6_id(ic)), - &iph->saddr, &iph->daddr); - /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy @@ -950,42 +935,45 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ - if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && - (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && - (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { + if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) { *related = 0; return NF_ACCEPT; } + /* Fragment header that is before ICMP header tells us that: + * it's not an error message since they can't be fragmented. + */ + if (ipvsh->flags & IP6T_FH_F_FRAG) + return NF_DROP; + + IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n", + ic->icmp6_type, ntohs(icmpv6_id(ic)), + &ipvsh->saddr, &ipvsh->daddr); /* Now find the contained IP header */ - offset += sizeof(_icmph); - cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); - if (cih == NULL) + ciph.len = ipvsh->len + sizeof(_icmph); + ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); + if (ip6h == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - - pp = ip_vs_proto_get(cih->nexthdr); + ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */ + ciph.daddr.in6 = ip6h->daddr; + /* skip possible IPv6 exthdrs of contained IPv6 packet */ + ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); + if (ciph.protocol < 0) + return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ + + pp = ip_vs_proto_get(ciph.protocol); if (!pp) return NF_ACCEPT; - /* Is the embedded protocol header present? */ - /* TODO: we don't support fragmentation at the moment anyways */ - if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) - return NF_ACCEPT; - - IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, - "Checking outgoing ICMPv6 for"); - - offset += sizeof(struct ipv6hdr); - - ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1); if (!cp) return NF_ACCEPT; - snet.in6 = iph->saddr; - return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, - pp, offset, sizeof(struct ipv6hdr)); + snet.in6 = ciph.saddr.in6; + writable = ciph.len; + return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, + pp, writable, sizeof(struct ipv6hdr)); } #endif @@ -1018,17 +1006,17 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) */ static unsigned int handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - struct ip_vs_conn *cp, int ihl) + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct ip_vs_protocol *pp = pd->pp; IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); - if (!skb_make_writable(skb, ihl)) + if (!skb_make_writable(skb, iph->len)) goto drop; /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) + if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph)) goto drop; #ifdef CONFIG_IP_VS_IPV6 @@ -1115,17 +1103,22 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (!net_ipvs(net)->enable) return NF_ACCEPT; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(af, skb, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { + if (!iph.fragoffs && skb_nfct_reasm(skb)) { + struct sk_buff *reasm = skb_nfct_reasm(skb); + /* Save fw mark for coming frags */ + reasm->ipvs_property = 1; + reasm->mark = skb->mark; + } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(skb, &related, - hooknum); + hooknum, &iph); if (related) return verdict; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } } else #endif @@ -1135,7 +1128,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (related) return verdict; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } pd = ip_vs_proto_data_get(net, iph.protocol); @@ -1145,39 +1137,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) /* reassemble IP fragments */ #ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) { - if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { - if (ip_vs_gather_frags_v6(skb, - ip_vs_defrag_user(hooknum))) - return NF_STOLEN; - } - - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - } else + if (af == AF_INET) #endif if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + ip_vs_fill_ip4hdr(skb_network_header(skb), &iph); } /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); + cp = pp->conn_out_get(af, skb, &iph, 0); if (likely(cp)) - return handle_response(af, skb, pd, cp, iph.len); + return handle_response(af, skb, pd, cp, &iph); if (sysctl_nat_icmp_send(net) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, iph.len, - sizeof(_ports), _ports); + pptr = frag_safe_skb_hp(skb, iph.len, + sizeof(_ports), _ports, &iph); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ if (ip_vs_lookup_real_service(net, af, iph.protocol, @@ -1375,13 +1359,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) "Checking incoming ICMP for"); offset2 = offset; - offset += cih->ihl * 4; - - ip_vs_fill_iphdr(AF_INET, cih, &ciph); + ip_vs_fill_ip4hdr(cih, &ciph); + ciph.len += offset; + offset = ciph.len; /* The embedded headers contain source and dest in reverse order. * For IPIP this is error for request, not for reply. */ - cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1); + cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1); if (!cp) return NF_ACCEPT; @@ -1450,7 +1434,7 @@ ignore_ipip: ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); - verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); + verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); out: __ip_vs_conn_put(cp); @@ -1459,38 +1443,24 @@ out: } #ifdef CONFIG_IP_VS_IPV6 -static int -ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) +static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, + unsigned int hooknum, struct ip_vs_iphdr *iph) { struct net *net = NULL; - struct ipv6hdr *iph; + struct ipv6hdr _ip6h, *ip6h; struct icmp6hdr _icmph, *ic; - struct ipv6hdr _ciph, *cih; /* The ip header contained - within the ICMP */ - struct ip_vs_iphdr ciph; + struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; - unsigned int offset, verdict; + unsigned int offs_ciph, writable, verdict; *related = 1; - /* reassemble IP fragments */ - if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { - if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum))) - return NF_STOLEN; - } - - iph = ipv6_hdr(skb); - offset = sizeof(struct ipv6hdr); - ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph); if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n", - ic->icmp6_type, ntohs(icmpv6_id(ic)), - &iph->saddr, &iph->daddr); - /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy @@ -1498,47 +1468,71 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ - if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && - (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && - (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { + if (ic->icmp6_type & ICMPV6_INFOMSG_MASK) { *related = 0; return NF_ACCEPT; } + /* Fragment header that is before ICMP header tells us that: + * it's not an error message since they can't be fragmented. + */ + if (iph->flags & IP6T_FH_F_FRAG) + return NF_DROP; + + IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n", + ic->icmp6_type, ntohs(icmpv6_id(ic)), + &iph->saddr, &iph->daddr); /* Now find the contained IP header */ - offset += sizeof(_icmph); - cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); - if (cih == NULL) + ciph.len = iph->len + sizeof(_icmph); + offs_ciph = ciph.len; /* Save ip header offset */ + ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); + if (ip6h == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ + ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */ + ciph.daddr.in6 = ip6h->daddr; + /* skip possible IPv6 exthdrs of contained IPv6 packet */ + ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); + if (ciph.protocol < 0) + return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ net = skb_net(skb); - pd = ip_vs_proto_data_get(net, cih->nexthdr); + pd = ip_vs_proto_data_get(net, ciph.protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; - /* Is the embedded protocol header present? */ - /* TODO: we don't support fragmentation at the moment anyways */ - if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) + /* Cannot handle fragmented embedded protocol */ + if (ciph.fragoffs) return NF_ACCEPT; - IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, + IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph, "Checking incoming ICMPv6 for"); - offset += sizeof(struct ipv6hdr); + /* The embedded headers contain source and dest in reverse order + * if not from localhost + */ + cp = pp->conn_in_get(AF_INET6, skb, &ciph, + (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1); - ip_vs_fill_iphdr(AF_INET6, cih, &ciph); - /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; + /* VS/TUN, VS/DR and LOCALNODE just let it go */ + if ((hooknum == NF_INET_LOCAL_OUT) && + (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { + __ip_vs_conn_put(cp); + return NF_ACCEPT; + } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); - if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || - IPPROTO_SCTP == cih->nexthdr) - offset += 2 * sizeof(__u16); - verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum); + + /* Need to mangle contained IPv6 header in ICMPv6 packet */ + writable = ciph.len; + if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol || + IPPROTO_SCTP == ciph.protocol) + writable += 2 * sizeof(__u16); /* Also mangle ports */ + + verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph); __ip_vs_conn_put(cp); @@ -1574,7 +1568,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely((skb->pkt_type != PACKET_HOST && hooknum != NF_INET_LOCAL_OUT) || !skb_dst(skb))) { - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(af, skb, &iph); IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" " ignored in hook %u\n", skb->pkt_type, iph.protocol, @@ -1586,7 +1580,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (!net_ipvs(net)->enable) return NF_ACCEPT; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(af, skb, &iph); /* Bad... Do not break raw sockets */ if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && @@ -1600,13 +1594,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { + if (!iph.fragoffs && skb_nfct_reasm(skb)) { + struct sk_buff *reasm = skb_nfct_reasm(skb); + /* Save fw mark for coming frags. */ + reasm->ipvs_property = 1; + reasm->mark = skb->mark; + } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; - int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); + int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, + &iph); if (related) return verdict; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } } else #endif @@ -1616,7 +1616,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (related) return verdict; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } /* Protocol supported? */ @@ -1627,12 +1626,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); - - if (unlikely(!cp)) { + cp = pp->conn_in_get(af, skb, &iph, 0); + if (unlikely(!cp) && !iph.fragoffs) { + /* No (second) fragments need to enter here, as nf_defrag_ipv6 + * replayed fragment zero will already have created the cp + */ int v; - if (!pp->conn_schedule(af, skb, pd, &v, &cp)) + /* Schedule and create new connection entry into &cp */ + if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph)) return v; } @@ -1640,6 +1642,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_in: packet continues traversal as normal"); + if (iph.fragoffs && !skb_nfct_reasm(skb)) { + /* Fragment that couldn't be mapped to a conn entry + * and don't have any pointer to a reasm skb + * is missing module nf_defrag_ipv6 + */ + IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment"); + } return NF_ACCEPT; } @@ -1662,7 +1672,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_in_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) - ret = cp->packet_xmit(skb, cp, pp); + ret = cp->packet_xmit(skb, cp, pp, &iph); /* do not touch skb anymore */ else { IP_VS_DBG_RL("warning: packet_xmit is null"); @@ -1724,6 +1734,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 /* + * AF_INET6 fragment handling + * Copy info from first fragment, to the rest of them. + */ +static unsigned int +ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *reasm = skb_nfct_reasm(skb); + struct net *net; + + /* Skip if not a "replay" from nf_ct_frag6_output or first fragment. + * ipvs_property is set when checking first fragment + * in ip_vs_in() and ip_vs_out(). + */ + if (reasm) + IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property); + if (!reasm || !reasm->ipvs_property) + return NF_ACCEPT; + + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + + /* Copy stored fw mark, saved in ip_vs_{in,out} */ + skb->mark = reasm->mark; + + return NF_ACCEPT; +} + +/* * AF_INET6 handler in NF_INET_LOCAL_IN chain * Schedule and forward packets from remote clients */ @@ -1793,8 +1835,10 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, { int r; struct net *net; + struct ip_vs_iphdr iphdr; - if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) + ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); + if (iphdr.protocol != IPPROTO_ICMPV6) return NF_ACCEPT; /* ipvs enabled in this netns ? */ @@ -1802,7 +1846,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, if (!net_ipvs(net)->enable) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, hooknum); + return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); } #endif @@ -1860,6 +1904,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .priority = 100, }, #ifdef CONFIG_IP_VS_IPV6 + /* After mangle & nat fetch 2:nd fragment and following */ + { + .hook = ip_vs_preroute_frag6, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_NAT_DST + 1, + }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 8b7dca9ea42..7f3b0cc00b7 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -215,7 +215,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_dh_bucket *tbl; struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index df646ccf08a..cbd37489ac7 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -479,7 +479,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_dest *dest = NULL; struct ip_vs_lblc_entry *en; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 570e31ea427..161b67972e3 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -649,7 +649,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_dest *dest = NULL; struct ip_vs_lblcr_entry *en; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 1aa5cac748c..12475ef88da 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -68,23 +68,31 @@ static int get_callid(const char *dptr, unsigned int dataoff, static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) { + struct sk_buff *reasm = skb_nfct_reasm(skb); struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; int retc; - ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(p->af, skb, &iph); /* Only useful with UDP */ if (iph.protocol != IPPROTO_UDP) return -EINVAL; + /* todo: IPv6 fragments: + * I think this only should be done for the first fragment. /HS + */ + if (reasm) { + skb = reasm; + dataoff = iph.thoff_reasm + sizeof(struct udphdr); + } else + dataoff = iph.len + sizeof(struct udphdr); - /* No Data ? */ - dataoff = iph.len + sizeof(struct udphdr); if (dataoff >= skb->len) return -EINVAL; - - if ((retc=skb_linearize(skb)) < 0) + /* todo: Check if this will mess-up the reasm skb !!! /HS */ + retc = skb_linearize(skb); + if (retc < 0) return retc; dptr = skb->data + dataoff; datalen = skb->len - dataoff; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 50d82186da8..939f7fbe9b4 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -280,17 +280,17 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, if (ih == NULL) sprintf(buf, "TRUNCATED"); else if (ih->nexthdr == IPPROTO_FRAGMENT) - sprintf(buf, "%pI6->%pI6 frag", &ih->saddr, &ih->daddr); + sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr); else { __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), sizeof(_ports), _ports); if (pptr == NULL) - sprintf(buf, "TRUNCATED %pI6->%pI6", + sprintf(buf, "TRUNCATED %pI6c->%pI6c", &ih->saddr, &ih->daddr); else - sprintf(buf, "%pI6:%u->%pI6:%u", + sprintf(buf, "%pI6c:%u->%pI6c:%u", &ih->saddr, ntohs(pptr[0]), &ih->daddr, ntohs(pptr[1])); } diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 5b8eb8b12c3..5de3dd312c0 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -57,7 +57,7 @@ ah_esp_conn_fill_param_proto(struct net *net, int af, static struct ip_vs_conn * ah_esp_conn_in_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, unsigned int proto_off, + const struct ip_vs_iphdr *iph, int inverse) { struct ip_vs_conn *cp; @@ -85,9 +85,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, static struct ip_vs_conn * ah_esp_conn_out_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - unsigned int proto_off, - int inverse) + const struct ip_vs_iphdr *iph, int inverse) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; @@ -110,7 +108,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, static int ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - int *verdict, struct ip_vs_conn **cpp) + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) { /* * AH/ESP is only related traffic. Pass the packet to IP stack. diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 9f3fb751c49..746048b13ef 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -10,28 +10,26 @@ static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - int *verdict, struct ip_vs_conn **cpp) + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) { struct net *net; struct ip_vs_service *svc; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; - struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - - sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph); + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); if (sh == NULL) return 0; - sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t), + sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), sizeof(_schunkh), &_schunkh); if (sch == NULL) return 0; net = skb_net(skb); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, - &iph.daddr, sh->dest))) { + (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, + &iph->daddr, sh->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -47,10 +45,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pd, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pd); + *verdict = ip_vs_leave(svc, skb, pd, iph); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -64,20 +62,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, } static int -sctp_snat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { sctp_sctphdr_t *sctph; - unsigned int sctphoff; + unsigned int sctphoff = iph->len; struct sk_buff *iter; __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - sctphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - sctphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) @@ -108,20 +104,18 @@ sctp_snat_handler(struct sk_buff *skb, } static int -sctp_dnat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { sctp_sctphdr_t *sctph; - unsigned int sctphoff; + unsigned int sctphoff = iph->len; struct sk_buff *iter; __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - sctphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - sctphoff = ip_hdrlen(skb); /* csum_check requires unshared skb */ if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index cd609cc6272..9af653a7582 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -33,16 +33,14 @@ static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - int *verdict, struct ip_vs_conn **cpp) + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) { struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; - struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - - th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); if (th == NULL) { *verdict = NF_DROP; return 0; @@ -50,8 +48,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ if (th->syn && - (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, - &iph.daddr, th->dest))) { + (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, + &iph->daddr, th->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { @@ -68,10 +66,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pd, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pd); + *verdict = ip_vs_leave(svc, skb, pd, iph); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -128,20 +126,18 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph, static int -tcp_snat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct tcphdr *tcph; - unsigned int tcphoff; + unsigned int tcphoff = iph->len; int oldlen; int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - tcphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - tcphoff = ip_hdrlen(skb); oldlen = skb->len - tcphoff; /* csum_check requires unshared skb */ @@ -208,20 +204,18 @@ tcp_snat_handler(struct sk_buff *skb, static int -tcp_dnat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct tcphdr *tcph; - unsigned int tcphoff; + unsigned int tcphoff = iph->len; int oldlen; int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - tcphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - tcphoff = ip_hdrlen(skb); oldlen = skb->len - tcphoff; /* csum_check requires unshared skb */ diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 2fedb2dcb3d..503a842c90d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -30,23 +30,22 @@ static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - int *verdict, struct ip_vs_conn **cpp) + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) { struct net *net; struct ip_vs_service *svc; struct udphdr _udph, *uh; - struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - - uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph); + /* IPv6 fragments, only first fragment will hit this */ + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); if (uh == NULL) { *verdict = NF_DROP; return 0; } net = skb_net(skb); - svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, - &iph.daddr, uh->dest); + svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, + &iph->daddr, uh->dest); if (svc) { int ignored; @@ -64,10 +63,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pd, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pd); + *verdict = ip_vs_leave(svc, skb, pd, iph); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -125,20 +124,18 @@ udp_partial_csum_update(int af, struct udphdr *uhdr, static int -udp_snat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; - unsigned int udphoff; + unsigned int udphoff = iph->len; int oldlen; int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - udphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - udphoff = ip_hdrlen(skb); oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ @@ -210,20 +207,18 @@ udp_snat_handler(struct sk_buff *skb, static int -udp_dnat_handler(struct sk_buff *skb, - struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { struct udphdr *udph; - unsigned int udphoff; + unsigned int udphoff = iph->len; int oldlen; int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 - if (cp->af == AF_INET6) - udphoff = sizeof(struct ipv6hdr); - else + if (cp->af == AF_INET6 && iph->fragoffs) + return 1; #endif - udphoff = ip_hdrlen(skb); oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 08dbdd5bc18..d6bf20d6cdb 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -159,7 +159,7 @@ void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { - IP_VS_ERR_RL("%s: %s [%pI6]:%d - %s\n", + IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", svc->scheduler->name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 05126521743..e3312699462 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -228,7 +228,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_sh_bucket *tbl; struct ip_vs_iphdr iph; - ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index cc4c8095681..12008b47e5c 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -338,7 +338,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, local = __ip_vs_is_local_route6(rt); if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & rt_mode)) { - IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", + IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", local ? "local":"non-local", daddr); dst_release(&rt->dst); return NULL; @@ -346,8 +346,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && !((ort = (struct rt6_info *) skb_dst(skb)) && __ip_vs_is_local_route6(ort))) { - IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " - "requires NAT method, dest: %pI6\n", + IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " + "requires NAT method, dest: %pI6c\n", &ipv6_hdr(skb)->daddr, daddr); dst_release(&rt->dst); return NULL; @@ -355,8 +355,8 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LOOPBACK)) { - IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " - "to non-local address, dest: %pI6\n", + IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " + "to non-local address, dest: %pI6c\n", &ipv6_hdr(skb)->saddr, daddr); dst_release(&rt->dst); return NULL; @@ -427,7 +427,7 @@ do { \ */ int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { /* we do not touch skb and do not need pskb ptr */ IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); @@ -441,7 +441,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, */ int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rtable *rt; /* Route to the other host */ struct iphdr *iph = ip_hdr(skb); @@ -496,16 +496,16 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) { struct rt6_info *rt; /* Route to the other host */ - struct ipv6hdr *iph = ipv6_hdr(skb); int mtu; EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, - IP_VS_RT_MODE_NON_LOCAL))) + rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, + IP_VS_RT_MODE_NON_LOCAL); + if (!rt) goto tx_error_icmp; /* MTU checking */ @@ -516,7 +516,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!iph->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -559,7 +561,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rtable *rt; /* Route to the other host */ int mtu; @@ -629,7 +631,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) goto tx_error_put; ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); @@ -677,7 +679,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) { struct rt6_info *rt; /* Route to the other host */ int mtu; @@ -686,10 +688,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); /* check if it is a connection of no-client-port */ - if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { __be16 _pt, *p; - p = skb_header_pointer(skb, sizeof(struct ipv6hdr), - sizeof(_pt), &_pt); + p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); @@ -737,7 +738,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!iph->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); goto tx_error_put; @@ -751,7 +754,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) goto tx_error; ipv6_hdr(skb)->daddr = cp->daddr.in6; @@ -812,7 +815,7 @@ tx_error_put: */ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); struct rtable *rt; /* Route to the other host */ @@ -932,7 +935,7 @@ tx_error_put: #ifdef CONFIG_IP_VS_IPV6 int ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rt6_info *rt; /* Route to the other host */ struct in6_addr saddr; /* Source for tunnel */ @@ -972,7 +975,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!ipvsh->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } @@ -1053,7 +1058,7 @@ tx_error_put: */ int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rtable *rt; /* Route to the other host */ struct iphdr *iph = ip_hdr(skb); @@ -1115,7 +1120,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp) + struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) { struct rt6_info *rt; /* Route to the other host */ int mtu; @@ -1139,7 +1144,9 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!iph->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -1183,7 +1190,8 @@ tx_error: */ int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, int offset, unsigned int hooknum) + struct ip_vs_protocol *pp, int offset, unsigned int hooknum, + struct ip_vs_iphdr *iph) { struct rtable *rt; /* Route to the other host */ int mtu; @@ -1198,7 +1206,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) - rc = cp->packet_xmit(skb, cp, pp); + rc = cp->packet_xmit(skb, cp, pp, iph); else rc = NF_ACCEPT; /* do not touch skb anymore */ @@ -1304,7 +1312,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, int offset, unsigned int hooknum) + struct ip_vs_protocol *pp, int offset, unsigned int hooknum, + struct ip_vs_iphdr *iph) { struct rt6_info *rt; /* Route to the other host */ int mtu; @@ -1319,7 +1328,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) - rc = cp->packet_xmit(skb, cp, pp); + rc = cp->packet_xmit(skb, cp, pp, iph); else rc = NF_ACCEPT; /* do not touch skb anymore */ @@ -1375,7 +1384,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + /* only send ICMP too big on first fragment */ + if (!iph->fragoffs) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index bb10b0717f1..8d47c3780fd 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -67,7 +67,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) goto out; } - ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(family, skb, &iph); if (data->bitmask & XT_IPVS_PROTO) if ((iph.protocol == data->l4proto) ^ @@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); + cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */); if (unlikely(cp == NULL)) { match = false; goto out; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 94060edbbd7..f262dbfc7f0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1881,7 +1881,35 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + if (po->tp_tx_has_off) { + int off_min, off_max, off; + off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); + off_max = po->tx_ring.frame_size - tp_len; + if (sock->type == SOCK_DGRAM) { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_net; + break; + default: + off = ph.h1->tp_net; + break; + } + } else { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_mac; + break; + default: + off = ph.h1->tp_mac; + break; + } + } + if (unlikely((off < off_min) || (off_max < off))) + return -EINVAL; + data = ph.raw + off; + } else { + data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + } to_write = tp_len; if (sock->type == SOCK_DGRAM) { @@ -1907,7 +1935,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, to_write -= dev->hard_header_len; } - err = -EFAULT; offset = offset_in_page(data); len_max = PAGE_SIZE - offset; len = ((to_write > len_max) ? len_max : to_write); @@ -1957,7 +1984,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); - err = -EBUSY; if (saddr == NULL) { dev = po->prot_hook.dev; proto = po->num; @@ -3111,6 +3137,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return fanout_add(sk, val & 0xffff, val >> 16); } + case PACKET_TX_HAS_OFF: + { + unsigned int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->tp_tx_has_off = !!val; + return 0; + } default: return -ENOPROTOOPT; } @@ -3202,6 +3241,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, ((u32)po->fanout->type << 16)) : 0); break; + case PACKET_TX_HAS_OFF: + val = po->tp_tx_has_off; + break; default: return -ENOPROTOOPT; } diff --git a/net/packet/internal.h b/net/packet/internal.h index 44945f6b725..e84cab8cb7a 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -109,6 +109,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_loss:1; + unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 2ecde225ae6..709b0fb38a1 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -17,6 +17,7 @@ #include <linux/skbuff.h> #include <linux/cgroup.h> #include <linux/rcupdate.h> +#include <linux/fdtable.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> @@ -53,6 +54,28 @@ static void cgrp_destroy(struct cgroup *cgrp) kfree(cgrp_cls_state(cgrp)); } +static int update_classid(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + if (sock) + sock->sk->sk_classid = (u32)(unsigned long)v; + return 0; +} + +static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +{ + struct task_struct *p; + void *v; + + cgroup_taskset_for_each(p, cgrp, tset) { + task_lock(p); + v = (void *)(unsigned long)task_cls_classid(p); + iterate_fd(p->files, 0, update_classid, v); + task_unlock(p); + } +} + static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) { return cgrp_cls_state(cgrp)->classid; @@ -77,6 +100,7 @@ struct cgroup_subsys net_cls_subsys = { .name = "net_cls", .create = cgrp_create, .destroy = cgrp_destroy, + .attach = cgrp_attach, .subsys_id = net_cls_subsys_id, .base_cftypes = ss_files, .module = THIS_MODULE, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a18d975db59..13cc744a249 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -495,16 +495,15 @@ EXPORT_SYMBOL(qdisc_watchdog_init); void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) { - ktime_t time; - if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state)) return; qdisc_throttled(wd->qdisc); - time = ktime_set(0, 0); - time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); - hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); + + hrtimer_start(&wd->timer, + ns_to_ktime(PSCHED_TICKS2NS(expires)), + HRTIMER_MODE_ABS); } EXPORT_SYMBOL(qdisc_watchdog_schedule); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 564b9fc8efd..0e19948470b 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -509,8 +509,7 @@ static void cbq_ovl_delay(struct cbq_class *cl) cl->cpriority = TC_CBQ_MAXPRIO; q->pmask |= (1<<TC_CBQ_MAXPRIO); - expires = ktime_set(0, 0); - expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched)); + expires = ns_to_ktime(PSCHED_TICKS2NS(sched)); if (hrtimer_try_to_cancel(&q->delay_timer) && ktime_to_ns(ktime_sub( hrtimer_get_expires(&q->delay_timer), diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9d75b776131..d2922c0ef57 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -71,6 +71,12 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; +struct htb_rate_cfg { + u64 rate_bps; + u32 mult; + u32 shift; +}; + /* interior & leaf nodes; props specific to leaves are marked L: */ struct htb_class { struct Qdisc_class_common common; @@ -118,11 +124,11 @@ struct htb_class { int filter_cnt; /* token bucket parameters */ - struct qdisc_rate_table *rate; /* rate table of the class itself */ - struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ - long buffer, cbuffer; /* token bucket depth/rate */ + struct htb_rate_cfg rate; + struct htb_rate_cfg ceil; + s64 buffer, cbuffer; /* token bucket depth/rate */ psched_tdiff_t mbuffer; /* max wait time */ - long tokens, ctokens; /* current number of tokens */ + s64 tokens, ctokens; /* current number of tokens */ psched_time_t t_c; /* checkpoint time */ }; @@ -162,6 +168,45 @@ struct htb_sched { struct work_struct work; }; +static u64 l2t_ns(struct htb_rate_cfg *r, unsigned int len) +{ + return ((u64)len * r->mult) >> r->shift; +} + +static void htb_precompute_ratedata(struct htb_rate_cfg *r) +{ + u64 factor; + u64 mult; + int shift; + + r->shift = 0; + r->mult = 1; + /* + * Calibrate mult, shift so that token counting is accurate + * for smallest packet size (64 bytes). Token (time in ns) is + * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will + * work as long as the smallest packet transfer time can be + * accurately represented in nanosec. + */ + if (r->rate_bps > 0) { + /* + * Higher shift gives better accuracy. Find the largest + * shift such that mult fits in 32 bits. + */ + for (shift = 0; shift < 16; shift++) { + r->shift = shift; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + mult = div64_u64(factor, r->rate_bps); + if (mult > UINT_MAX) + break; + } + + r->shift = shift - 1; + factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); + r->mult = div64_u64(factor, r->rate_bps); + } +} + /* find class in global hash table using given handle */ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { @@ -273,7 +318,7 @@ static void htb_add_to_id_tree(struct rb_root *root, * already in the queue. */ static void htb_add_to_wait_tree(struct htb_sched *q, - struct htb_class *cl, long delay) + struct htb_class *cl, s64 delay) { struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; @@ -441,14 +486,14 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) htb_remove_class_from_row(q, cl, mask); } -static inline long htb_lowater(const struct htb_class *cl) +static inline s64 htb_lowater(const struct htb_class *cl) { if (htb_hysteresis) return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; else return 0; } -static inline long htb_hiwater(const struct htb_class *cl) +static inline s64 htb_hiwater(const struct htb_class *cl) { if (htb_hysteresis) return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; @@ -469,9 +514,9 @@ static inline long htb_hiwater(const struct htb_class *cl) * mode transitions per time unit. The speed gain is about 1/6. */ static inline enum htb_cmode -htb_class_mode(struct htb_class *cl, long *diff) +htb_class_mode(struct htb_class *cl, s64 *diff) { - long toks; + s64 toks; if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { *diff = -toks; @@ -495,7 +540,7 @@ htb_class_mode(struct htb_class *cl, long *diff) * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). */ static void -htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) +htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) { enum htb_cmode new_mode = htb_class_mode(cl, diff); @@ -581,26 +626,26 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff) +static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff) { - long toks = diff + cl->tokens; + s64 toks = diff + cl->tokens; if (toks > cl->buffer) toks = cl->buffer; - toks -= (long) qdisc_l2t(cl->rate, bytes); + toks -= (s64) l2t_ns(&cl->rate, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; cl->tokens = toks; } -static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff) +static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff) { - long toks = diff + cl->ctokens; + s64 toks = diff + cl->ctokens; if (toks > cl->cbuffer) toks = cl->cbuffer; - toks -= (long) qdisc_l2t(cl->ceil, bytes); + toks -= (s64) l2t_ns(&cl->ceil, bytes); if (toks <= -cl->mbuffer) toks = 1 - cl->mbuffer; @@ -623,10 +668,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, { int bytes = qdisc_pkt_len(skb); enum htb_cmode old_mode; - long diff; + s64 diff; while (cl) { - diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); + diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); if (cl->level >= level) { if (cl->level == level) cl->xstats.lends++; @@ -673,7 +718,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, unsigned long stop_at = start + 2; while (time_before(jiffies, stop_at)) { struct htb_class *cl; - long diff; + s64 diff; struct rb_node *p = rb_first(&q->wait_pq[level]); if (!p) @@ -684,7 +729,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, return cl->pq_key; htb_safe_rb_erase(p, q->wait_pq + level); - diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); + diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); htb_change_class_mode(q, cl, &diff); if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); @@ -871,10 +916,10 @@ ok: if (!sch->q.qlen) goto fin; - q->now = psched_get_time(); + q->now = ktime_to_ns(ktime_get()); start_at = jiffies; - next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; + next_event = q->now + 5 * NSEC_PER_SEC; for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ @@ -884,7 +929,7 @@ ok: if (q->now >= q->near_ev_cache[level]) { event = htb_do_events(q, level, start_at); if (!event) - event = q->now + PSCHED_TICKS_PER_SEC; + event = q->now + NSEC_PER_SEC; q->near_ev_cache[level] = event; } else event = q->near_ev_cache[level]; @@ -903,10 +948,17 @@ ok: } } sch->qstats.overlimits++; - if (likely(next_event > q->now)) - qdisc_watchdog_schedule(&q->watchdog, next_event); - else + if (likely(next_event > q->now)) { + if (!test_bit(__QDISC_STATE_DEACTIVATED, + &qdisc_root_sleeping(q->watchdog.qdisc)->state)) { + ktime_t time = ns_to_ktime(next_event); + qdisc_throttled(q->watchdog.qdisc); + hrtimer_start(&q->watchdog.timer, time, + HRTIMER_MODE_ABS); + } + } else { schedule_work(&q->work); + } fin: return skb; } @@ -1082,9 +1134,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, memset(&opt, 0, sizeof(opt)); - opt.rate = cl->rate->rate; + opt.rate.rate = cl->rate.rate_bps >> 3; opt.buffer = cl->buffer; - opt.ceil = cl->ceil->rate; + opt.ceil.rate = cl->ceil.rate_bps >> 3; opt.cbuffer = cl->cbuffer; opt.quantum = cl->quantum; opt.prio = cl->prio; @@ -1203,9 +1255,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) qdisc_destroy(cl->un.leaf.q); } gen_kill_estimator(&cl->bstats, &cl->rate_est); - qdisc_put_rtab(cl->rate); - qdisc_put_rtab(cl->ceil); - tcf_destroy_chain(&cl->filter_list); kfree(cl); } @@ -1307,7 +1356,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; - struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[__TCA_HTB_MAX]; struct tc_htb_opt *hopt; @@ -1326,10 +1374,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); hopt = nla_data(tb[TCA_HTB_PARMS]); - - rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]); - ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]); - if (!rtab || !ctab) + if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; if (!cl) { /* new class */ @@ -1439,7 +1484,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, * is really leaf before changing cl->un.leaf ! */ if (!cl->level) { - cl->quantum = rtab->rate.rate / q->rate2quantum; + cl->quantum = hopt->rate.rate / q->rate2quantum; if (!hopt->quantum && cl->quantum < 1000) { pr_warning( "HTB: quantum of class %X is small. Consider r2q change.\n", @@ -1460,12 +1505,16 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->buffer = hopt->buffer; cl->cbuffer = hopt->cbuffer; - if (cl->rate) - qdisc_put_rtab(cl->rate); - cl->rate = rtab; - if (cl->ceil) - qdisc_put_rtab(cl->ceil); - cl->ceil = ctab; + + cl->rate.rate_bps = (u64)hopt->rate.rate << 3; + cl->ceil.rate_bps = (u64)hopt->ceil.rate << 3; + + htb_precompute_ratedata(&cl->rate); + htb_precompute_ratedata(&cl->ceil); + + cl->buffer = hopt->buffer << PSCHED_SHIFT; + cl->cbuffer = hopt->buffer << PSCHED_SHIFT; + sch_tree_unlock(sch); qdisc_class_hash_grow(sch, &q->clhash); @@ -1474,10 +1523,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, return 0; failure: - if (rtab) - qdisc_put_rtab(rtab); - if (ctab) - qdisc_put_rtab(ctab); return err; } diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 126b014eb79..a9edd2e205f 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -9,7 +9,6 @@ menuconfig IP_SCTP select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 - select CRYPTO_MD5 if SCTP_HMAC_MD5 select LIBCRC32C ---help--- Stream Control Transmission Protocol @@ -68,33 +67,21 @@ config SCTP_DBG_OBJCNT If unsure, say N -choice - prompt "SCTP: Cookie HMAC Algorithm" - default SCTP_HMAC_MD5 +config SCTP_COOKIE_HMAC_MD5 + bool "Enable optional MD5 hmac cookie generation" help - HMAC algorithm to be used during association initialization. It - is strongly recommended to use HMAC-SHA1 or HMAC-MD5. See - configuration for Cryptographic API and enable those algorithms - to make usable by SCTP. - -config SCTP_HMAC_NONE - bool "None" - help - Choosing this disables the use of an HMAC during association - establishment. It is advised to use either HMAC-MD5 or HMAC-SHA1. - -config SCTP_HMAC_SHA1 - bool "HMAC-SHA1" - help - Enable the use of HMAC-SHA1 during association establishment. It - is advised to use either HMAC-MD5 or HMAC-SHA1. - -config SCTP_HMAC_MD5 - bool "HMAC-MD5" + Enable optional MD5 hmac based SCTP cookie generation + default y + select CRYPTO_HMAC if SCTP_COOKIE_HMAC_MD5 + select CRYPTO_MD5 if SCTP_COOKIE_HMAC_MD5 + +config SCTP_COOKIE_HMAC_SHA1 + bool "Enable optional SHA1 hmac cookie generation" help - Enable the use of HMAC-MD5 during association establishment. It is - advised to use either HMAC-MD5 or HMAC-SHA1. + Enable optional SHA1 hmac based SCTP cookie generation + default y + select CRYPTO_HMAC if SCTP_COOKIE_HMAC_SHA1 + select CRYPTO_SHA1 if SCTP_COOKIE_HMAC_SHA1 -endchoice endif # IP_SCTP diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2d518425d59..456bc3dbdd5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1190,6 +1190,15 @@ static int sctp_net_init(struct net *net) /* Whether Cookie Preservative is enabled(1) or not(0) */ net->sctp.cookie_preserve_enable = 1; + /* Default sctp sockets to use md5 as their hmac alg */ +#if defined (CONFIG_CRYPTO_MD5) + net->sctp.sctp_hmac_alg = "md5"; +#elif defined (CONFIG_CRYPTO_SHA1) + net->sctp.sctp_hmac_alg = "sha1"; +#else + net->sctp.sctp_hmac_alg = NULL; +#endif + /* Max.Burst - 4 */ net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 6773d780362..6eecf7e6338 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1268,14 +1268,14 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_outq_uncork(&asoc->outqueue); local_cork = 0; } - asoc = cmd->obj.ptr; + asoc = cmd->obj.asoc; /* Register with the endpoint. */ sctp_endpoint_add_asoc(ep, asoc); sctp_hash_established(asoc); break; case SCTP_CMD_UPDATE_ASSOC: - sctp_assoc_update(asoc, cmd->obj.ptr); + sctp_assoc_update(asoc, cmd->obj.asoc); break; case SCTP_CMD_PURGE_OUTQUEUE: @@ -1315,7 +1315,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_PROCESS_FWDTSN: - sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.ptr); + sctp_cmd_process_fwdtsn(&asoc->ulpq, cmd->obj.chunk); break; case SCTP_CMD_GEN_SACK: @@ -1331,7 +1331,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PROCESS_SACK: /* Process an inbound SACK. */ error = sctp_cmd_process_sack(commands, asoc, - cmd->obj.ptr); + cmd->obj.chunk); break; case SCTP_CMD_GEN_INIT_ACK: @@ -1352,15 +1352,15 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, * layer which will bail. */ error = sctp_cmd_process_init(commands, asoc, chunk, - cmd->obj.ptr, gfp); + cmd->obj.init, gfp); break; case SCTP_CMD_GEN_COOKIE_ECHO: /* Generate a COOKIE ECHO chunk. */ new_obj = sctp_make_cookie_echo(asoc, chunk); if (!new_obj) { - if (cmd->obj.ptr) - sctp_chunk_free(cmd->obj.ptr); + if (cmd->obj.chunk) + sctp_chunk_free(cmd->obj.chunk); goto nomem; } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -1369,9 +1369,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, /* If there is an ERROR chunk to be sent along with * the COOKIE_ECHO, send it, too. */ - if (cmd->obj.ptr) + if (cmd->obj.chunk) sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, - SCTP_CHUNK(cmd->obj.ptr)); + SCTP_CHUNK(cmd->obj.chunk)); if (new_obj->transport) { new_obj->transport->init_sent_count++; @@ -1417,18 +1417,18 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_CHUNK_ULP: /* Send a chunk to the sockets layer. */ SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "chunk_up:", cmd->obj.ptr, + "chunk_up:", cmd->obj.chunk, "ulpq:", &asoc->ulpq); - sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.ptr, + sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk, GFP_ATOMIC); break; case SCTP_CMD_EVENT_ULP: /* Send a notification to the sockets layer. */ SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "event_up:",cmd->obj.ptr, + "event_up:",cmd->obj.ulpevent, "ulpq:",&asoc->ulpq); - sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ptr); + sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent); break; case SCTP_CMD_REPLY: @@ -1438,12 +1438,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, local_cork = 1; } /* Send a chunk to our peer. */ - error = sctp_outq_tail(&asoc->outqueue, cmd->obj.ptr); + error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk); break; case SCTP_CMD_SEND_PKT: /* Send a full packet to our peer. */ - packet = cmd->obj.ptr; + packet = cmd->obj.packet; sctp_packet_transmit(packet); sctp_ootb_pkt_free(packet); break; @@ -1480,7 +1480,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_SETUP_T2: - sctp_cmd_setup_t2(commands, asoc, cmd->obj.ptr); + sctp_cmd_setup_t2(commands, asoc, cmd->obj.chunk); break; case SCTP_CMD_TIMER_START_ONCE: @@ -1514,7 +1514,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_INIT_CHOOSE_TRANSPORT: - chunk = cmd->obj.ptr; + chunk = cmd->obj.chunk; t = sctp_assoc_choose_alter_transport(asoc, asoc->init_last_sent_to); asoc->init_last_sent_to = t; @@ -1665,17 +1665,16 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_PART_DELIVER: - sctp_ulpq_partial_delivery(&asoc->ulpq, cmd->obj.ptr, - GFP_ATOMIC); + sctp_ulpq_partial_delivery(&asoc->ulpq, GFP_ATOMIC); break; case SCTP_CMD_RENEGE: - sctp_ulpq_renege(&asoc->ulpq, cmd->obj.ptr, + sctp_ulpq_renege(&asoc->ulpq, cmd->obj.chunk, GFP_ATOMIC); break; case SCTP_CMD_SETUP_T4: - sctp_cmd_setup_t4(commands, asoc, cmd->obj.ptr); + sctp_cmd_setup_t4(commands, asoc, cmd->obj.chunk); break; case SCTP_CMD_PROCESS_OPERR: @@ -1734,8 +1733,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; default: - pr_warn("Impossible command: %u, %p\n", - cmd->verb, cmd->obj.ptr); + pr_warn("Impossible command: %u\n", + cmd->verb); break; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a60d1f8b41c..15379acd9c0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -110,7 +110,6 @@ static int sctp_do_bind(struct sock *, union sctp_addr *, int); static int sctp_autobind(struct sock *sk); static void sctp_sock_migrate(struct sock *, struct sock *, struct sctp_association *, sctp_socket_type_t); -static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; extern struct kmem_cache *sctp_bucket_cachep; extern long sysctl_sctp_mem[3]; @@ -3890,6 +3889,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->default_rcv_context = 0; sp->max_burst = net->sctp.max_burst; + sp->sctp_hmac_alg = net->sctp.sctp_hmac_alg; + /* Initialize default setup parameters. These parameters * can be modified with the SCTP_INITMSG socket option or * overridden by the SCTP_INIT CMSG. @@ -5981,13 +5982,15 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; struct crypto_hash *tfm = NULL; + char alg[32]; /* Allocate HMAC for generating cookie. */ - if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { - tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); + if (!sp->hmac && sp->sctp_hmac_alg) { + sprintf(alg, "hmac(%s)", sp->sctp_hmac_alg); + tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { net_info_ratelimited("failed to load transform for %s: %ld\n", - sctp_hmac_alg, PTR_ERR(tfm)); + sp->sctp_hmac_alg, PTR_ERR(tfm)); return -ENOSYS; } sctp_sk(sk)->hmac = tfm; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 70e3ba5cb50..043889ac86c 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -62,6 +62,11 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; +static int proc_sctp_do_hmac_alg(ctl_table *ctl, + int write, + void __user *buffer, size_t *lenp, + + loff_t *ppos); static ctl_table sctp_table[] = { { .procname = "sctp_mem", @@ -147,6 +152,12 @@ static ctl_table sctp_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "cookie_hmac_alg", + .maxlen = 8, + .mode = 0644, + .proc_handler = proc_sctp_do_hmac_alg, + }, + { .procname = "valid_cookie_life", .data = &init_net.sctp.valid_cookie_life, .maxlen = sizeof(unsigned int), @@ -289,6 +300,54 @@ static ctl_table sctp_net_table[] = { { /* sentinel */ } }; +static int proc_sctp_do_hmac_alg(ctl_table *ctl, + int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + char tmp[8]; + ctl_table tbl; + int ret; + int changed = 0; + char *none = "none"; + + memset(&tbl, 0, sizeof(struct ctl_table)); + + if (write) { + tbl.data = tmp; + tbl.maxlen = 8; + } else { + tbl.data = net->sctp.sctp_hmac_alg ? : none; + tbl.maxlen = strlen(tbl.data); + } + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + + if (write) { +#ifdef CONFIG_CRYPTO_MD5 + if (!strncmp(tmp, "md5", 3)) { + net->sctp.sctp_hmac_alg = "md5"; + changed = 1; + } +#endif +#ifdef CONFIG_CRYPTO_SHA1 + if (!strncmp(tmp, "sha1", 4)) { + net->sctp.sctp_hmac_alg = "sha1"; + changed = 1; + } +#endif + if (!strncmp(tmp, "none", 4)) { + net->sctp.sctp_hmac_alg = NULL; + changed = 1; + } + + if (!changed) + ret = -EINVAL; + } + + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 360d8697b95..ada17464b65 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -997,7 +997,6 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed) /* Partial deliver the first message as there is pressure on rwnd. */ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, - struct sctp_chunk *chunk, gfp_t gfp) { struct sctp_ulpevent *event; @@ -1060,7 +1059,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport); sctp_ulpq_tail_data(ulpq, chunk, gfp); - sctp_ulpq_partial_delivery(ulpq, chunk, gfp); + sctp_ulpq_partial_delivery(ulpq, gfp); } sk_mem_reclaim(asoc->base.sk); diff --git a/net/socket.c b/net/socket.c index d92c490e66f..2ca51c719ef 100644 --- a/net/socket.c +++ b/net/socket.c @@ -620,8 +620,6 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, { struct sock_iocb *si = kiocb_to_siocb(iocb); - sock_update_classid(sock->sk); - si->sock = sock; si->scm = NULL; si->msg = msg; @@ -784,8 +782,6 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, { struct sock_iocb *si = kiocb_to_siocb(iocb); - sock_update_classid(sock->sk); - si->sock = sock; si->scm = NULL; si->msg = msg; @@ -896,8 +892,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, if (unlikely(!sock->ops->splice_read)) return -EINVAL; - sock_update_classid(sock->sk); - return sock->ops->splice_read(sock, ppos, pipe, len, flags); } @@ -3437,8 +3431,6 @@ EXPORT_SYMBOL(kernel_setsockopt); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { - sock_update_classid(sock->sk); - if (sock->ops->sendpage) return sock->ops->sendpage(sock, page, offset, size, flags); diff --git a/net/unix/diag.c b/net/unix/diag.c index 06748f108a5..5ac19dc1d5e 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -151,6 +151,9 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) goto out_nlmsg_trim; + if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: |