diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4')
24 files changed, 3106 insertions, 498 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index b0297da5030..963dd7e6d54 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -76,22 +76,53 @@ void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr) mlx4_bitmap_free_range(bitmap, obj, 1, use_rr); } -u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align) +static unsigned long find_aligned_range(unsigned long *bitmap, + u32 start, u32 nbits, + int len, int align, u32 skip_mask) +{ + unsigned long end, i; + +again: + start = ALIGN(start, align); + + while ((start < nbits) && (test_bit(start, bitmap) || + (start & skip_mask))) + start += align; + + if (start >= nbits) + return -1; + + end = start+len; + if (end > nbits) + return -1; + + for (i = start + 1; i < end; i++) { + if (test_bit(i, bitmap) || ((u32)i & skip_mask)) { + start = i + 1; + goto again; + } + } + + return start; +} + +u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, + int align, u32 skip_mask) { u32 obj; - if (likely(cnt == 1 && align == 1)) + if (likely(cnt == 1 && align == 1 && !skip_mask)) return mlx4_bitmap_alloc(bitmap); spin_lock(&bitmap->lock); - obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, - bitmap->last, cnt, align - 1); + obj = find_aligned_range(bitmap->table, bitmap->last, + bitmap->max, cnt, align, skip_mask); if (obj >= bitmap->max) { bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) & bitmap->mask; - obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, - 0, cnt, align - 1); + obj = find_aligned_range(bitmap->table, 0, bitmap->max, + cnt, align, skip_mask); } if (obj < bitmap->max) { @@ -118,6 +149,11 @@ u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap) return bitmap->avail; } +static u32 mlx4_bitmap_masked_value(struct mlx4_bitmap *bitmap, u32 obj) +{ + return obj & (bitmap->max + bitmap->reserved_top - 1); +} + void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, int use_rr) { @@ -147,6 +183,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, bitmap->mask = mask; bitmap->reserved_top = reserved_top; bitmap->avail = num - reserved_top - reserved_bot; + bitmap->effective_len = bitmap->avail; spin_lock_init(&bitmap->lock); bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) * sizeof (long), GFP_KERNEL); @@ -163,6 +200,382 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap) kfree(bitmap->table); } +struct mlx4_zone_allocator { + struct list_head entries; + struct list_head prios; + u32 last_uid; + u32 mask; + /* protect the zone_allocator from concurrent accesses */ + spinlock_t lock; + enum mlx4_zone_alloc_flags flags; +}; + +struct mlx4_zone_entry { + struct list_head list; + struct list_head prio_list; + u32 uid; + struct mlx4_zone_allocator *allocator; + struct mlx4_bitmap *bitmap; + int use_rr; + int priority; + int offset; + enum mlx4_zone_flags flags; +}; + +struct mlx4_zone_allocator *mlx4_zone_allocator_create(enum mlx4_zone_alloc_flags flags) +{ + struct mlx4_zone_allocator *zones = kmalloc(sizeof(*zones), GFP_KERNEL); + + if (NULL == zones) + return NULL; + + INIT_LIST_HEAD(&zones->entries); + INIT_LIST_HEAD(&zones->prios); + spin_lock_init(&zones->lock); + zones->last_uid = 0; + zones->mask = 0; + zones->flags = flags; + + return zones; +} + +int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc, + struct mlx4_bitmap *bitmap, + u32 flags, + int priority, + int offset, + u32 *puid) +{ + u32 mask = mlx4_bitmap_masked_value(bitmap, (u32)-1); + struct mlx4_zone_entry *it; + struct mlx4_zone_entry *zone = kmalloc(sizeof(*zone), GFP_KERNEL); + + if (NULL == zone) + return -ENOMEM; + + zone->flags = flags; + zone->bitmap = bitmap; + zone->use_rr = (flags & MLX4_ZONE_USE_RR) ? MLX4_USE_RR : 0; + zone->priority = priority; + zone->offset = offset; + + spin_lock(&zone_alloc->lock); + + zone->uid = zone_alloc->last_uid++; + zone->allocator = zone_alloc; + + if (zone_alloc->mask < mask) + zone_alloc->mask = mask; + + list_for_each_entry(it, &zone_alloc->prios, prio_list) + if (it->priority >= priority) + break; + + if (&it->prio_list == &zone_alloc->prios || it->priority > priority) + list_add_tail(&zone->prio_list, &it->prio_list); + list_add_tail(&zone->list, &it->list); + + spin_unlock(&zone_alloc->lock); + + *puid = zone->uid; + + return 0; +} + +/* Should be called under a lock */ +static int __mlx4_zone_remove_one_entry(struct mlx4_zone_entry *entry) +{ + struct mlx4_zone_allocator *zone_alloc = entry->allocator; + + if (!list_empty(&entry->prio_list)) { + /* Check if we need to add an alternative node to the prio list */ + if (!list_is_last(&entry->list, &zone_alloc->entries)) { + struct mlx4_zone_entry *next = list_first_entry(&entry->list, + typeof(*next), + list); + + if (next->priority == entry->priority) + list_add_tail(&next->prio_list, &entry->prio_list); + } + + list_del(&entry->prio_list); + } + + list_del(&entry->list); + + if (zone_alloc->flags & MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP) { + u32 mask = 0; + struct mlx4_zone_entry *it; + + list_for_each_entry(it, &zone_alloc->prios, prio_list) { + u32 cur_mask = mlx4_bitmap_masked_value(it->bitmap, (u32)-1); + + if (mask < cur_mask) + mask = cur_mask; + } + zone_alloc->mask = mask; + } + + return 0; +} + +void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc) +{ + struct mlx4_zone_entry *zone, *tmp; + + spin_lock(&zone_alloc->lock); + + list_for_each_entry_safe(zone, tmp, &zone_alloc->entries, list) { + list_del(&zone->list); + list_del(&zone->prio_list); + kfree(zone); + } + + spin_unlock(&zone_alloc->lock); + kfree(zone_alloc); +} + +/* Should be called under a lock */ +static u32 __mlx4_alloc_from_zone(struct mlx4_zone_entry *zone, int count, + int align, u32 skip_mask, u32 *puid) +{ + u32 uid; + u32 res; + struct mlx4_zone_allocator *zone_alloc = zone->allocator; + struct mlx4_zone_entry *curr_node; + + res = mlx4_bitmap_alloc_range(zone->bitmap, count, + align, skip_mask); + + if (res != (u32)-1) { + res += zone->offset; + uid = zone->uid; + goto out; + } + + list_for_each_entry(curr_node, &zone_alloc->prios, prio_list) { + if (unlikely(curr_node->priority == zone->priority)) + break; + } + + if (zone->flags & MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO) { + struct mlx4_zone_entry *it = curr_node; + + list_for_each_entry_continue_reverse(it, &zone_alloc->entries, list) { + res = mlx4_bitmap_alloc_range(it->bitmap, count, + align, skip_mask); + if (res != (u32)-1) { + res += it->offset; + uid = it->uid; + goto out; + } + } + } + + if (zone->flags & MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO) { + struct mlx4_zone_entry *it = curr_node; + + list_for_each_entry_from(it, &zone_alloc->entries, list) { + if (unlikely(it == zone)) + continue; + + if (unlikely(it->priority != curr_node->priority)) + break; + + res = mlx4_bitmap_alloc_range(it->bitmap, count, + align, skip_mask); + if (res != (u32)-1) { + res += it->offset; + uid = it->uid; + goto out; + } + } + } + + if (zone->flags & MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO) { + if (list_is_last(&curr_node->prio_list, &zone_alloc->prios)) + goto out; + + curr_node = list_first_entry(&curr_node->prio_list, + typeof(*curr_node), + prio_list); + + list_for_each_entry_from(curr_node, &zone_alloc->entries, list) { + res = mlx4_bitmap_alloc_range(curr_node->bitmap, count, + align, skip_mask); + if (res != (u32)-1) { + res += curr_node->offset; + uid = curr_node->uid; + goto out; + } + } + } + +out: + if (NULL != puid && res != (u32)-1) + *puid = uid; + return res; +} + +/* Should be called under a lock */ +static void __mlx4_free_from_zone(struct mlx4_zone_entry *zone, u32 obj, + u32 count) +{ + mlx4_bitmap_free_range(zone->bitmap, obj - zone->offset, count, zone->use_rr); +} + +/* Should be called under a lock */ +static struct mlx4_zone_entry *__mlx4_find_zone_by_uid( + struct mlx4_zone_allocator *zones, u32 uid) +{ + struct mlx4_zone_entry *zone; + + list_for_each_entry(zone, &zones->entries, list) { + if (zone->uid == uid) + return zone; + } + + return NULL; +} + +struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 uid) +{ + struct mlx4_zone_entry *zone; + struct mlx4_bitmap *bitmap; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid(zones, uid); + + bitmap = zone == NULL ? NULL : zone->bitmap; + + spin_unlock(&zones->lock); + + return bitmap; +} + +int mlx4_zone_remove_one(struct mlx4_zone_allocator *zones, u32 uid) +{ + struct mlx4_zone_entry *zone; + int res; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid(zones, uid); + + if (NULL == zone) { + res = -1; + goto out; + } + + res = __mlx4_zone_remove_one_entry(zone); + +out: + spin_unlock(&zones->lock); + kfree(zone); + + return res; +} + +/* Should be called under a lock */ +static struct mlx4_zone_entry *__mlx4_find_zone_by_uid_unique( + struct mlx4_zone_allocator *zones, u32 obj) +{ + struct mlx4_zone_entry *zone, *zone_candidate = NULL; + u32 dist = (u32)-1; + + /* Search for the smallest zone that this obj could be + * allocated from. This is done in order to handle + * situations when small bitmaps are allocated from bigger + * bitmaps (and the allocated space is marked as reserved in + * the bigger bitmap. + */ + list_for_each_entry(zone, &zones->entries, list) { + if (obj >= zone->offset) { + u32 mobj = (obj - zone->offset) & zones->mask; + + if (mobj < zone->bitmap->max) { + u32 curr_dist = zone->bitmap->effective_len; + + if (curr_dist < dist) { + dist = curr_dist; + zone_candidate = zone; + } + } + } + } + + return zone_candidate; +} + +u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count, + int align, u32 skip_mask, u32 *puid) +{ + struct mlx4_zone_entry *zone; + int res = -1; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid(zones, uid); + + if (NULL == zone) + goto out; + + res = __mlx4_alloc_from_zone(zone, count, align, skip_mask, puid); + +out: + spin_unlock(&zones->lock); + + return res; +} + +u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count) +{ + struct mlx4_zone_entry *zone; + int res = 0; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid(zones, uid); + + if (NULL == zone) { + res = -1; + goto out; + } + + __mlx4_free_from_zone(zone, obj, count); + +out: + spin_unlock(&zones->lock); + + return res; +} + +u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count) +{ + struct mlx4_zone_entry *zone; + int res; + + if (!(zones->flags & MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP)) + return -EFAULT; + + spin_lock(&zones->lock); + + zone = __mlx4_find_zone_by_uid_unique(zones, obj); + + if (NULL == zone) { + res = -1; + goto out; + } + + __mlx4_free_from_zone(zone, obj, count); + res = 0; + +out: + spin_unlock(&zones->lock); + + return res; +} /* * Handling for queue buffers -- we allocate a bunch of memory and * register it in a memory region at HCA virtual address 0. If the diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index b16e1b95566..5c93d1451c4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -990,11 +990,11 @@ static struct mlx4_cmd_info cmd_info[] = { { .opcode = MLX4_CMD_CONFIG_DEV, .has_inbox = false, - .has_outbox = false, + .has_outbox = true, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_CMD_EPERM_wrapper + .wrapper = mlx4_CONFIG_DEV_wrapper }, { .opcode = MLX4_CMD_ALLOC_RES, @@ -1338,6 +1338,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_QUERY_IF_STAT_wrapper }, + { + .opcode = MLX4_CMD_ACCESS_REG, + .has_inbox = true, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_ACCESS_REG_wrapper, + }, /* Native multicast commands are not available for guests */ { .opcode = MLX4_CMD_QP_ATTACH, @@ -2108,50 +2117,52 @@ err_vhcr: int mlx4_cmd_init(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); + int flags = 0; + + if (!priv->cmd.initialized) { + mutex_init(&priv->cmd.hcr_mutex); + mutex_init(&priv->cmd.slave_cmd_mutex); + sema_init(&priv->cmd.poll_sem, 1); + priv->cmd.use_events = 0; + priv->cmd.toggle = 1; + priv->cmd.initialized = 1; + flags |= MLX4_CMD_CLEANUP_STRUCT; + } - mutex_init(&priv->cmd.hcr_mutex); - mutex_init(&priv->cmd.slave_cmd_mutex); - sema_init(&priv->cmd.poll_sem, 1); - priv->cmd.use_events = 0; - priv->cmd.toggle = 1; - - priv->cmd.hcr = NULL; - priv->mfunc.vhcr = NULL; - - if (!mlx4_is_slave(dev)) { + if (!mlx4_is_slave(dev) && !priv->cmd.hcr) { priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE); if (!priv->cmd.hcr) { mlx4_err(dev, "Couldn't map command register\n"); - return -ENOMEM; + goto err; } + flags |= MLX4_CMD_CLEANUP_HCR; } - if (mlx4_is_mfunc(dev)) { + if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) { priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, &priv->mfunc.vhcr_dma, GFP_KERNEL); if (!priv->mfunc.vhcr) - goto err_hcr; + goto err; + + flags |= MLX4_CMD_CLEANUP_VHCR; } - priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, - MLX4_MAILBOX_SIZE, - MLX4_MAILBOX_SIZE, 0); - if (!priv->cmd.pool) - goto err_vhcr; + if (!priv->cmd.pool) { + priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, + MLX4_MAILBOX_SIZE, + MLX4_MAILBOX_SIZE, 0); + if (!priv->cmd.pool) + goto err; - return 0; + flags |= MLX4_CMD_CLEANUP_POOL; + } -err_vhcr: - if (mlx4_is_mfunc(dev)) - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, priv->mfunc.vhcr_dma); - priv->mfunc.vhcr = NULL; + return 0; -err_hcr: - if (!mlx4_is_slave(dev)) - iounmap(priv->cmd.hcr); +err: + mlx4_cmd_cleanup(dev, flags); return -ENOMEM; } @@ -2175,18 +2186,28 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) iounmap(priv->mfunc.comm); } -void mlx4_cmd_cleanup(struct mlx4_dev *dev) +void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) { struct mlx4_priv *priv = mlx4_priv(dev); - pci_pool_destroy(priv->cmd.pool); + if (priv->cmd.pool && (cleanup_mask & MLX4_CMD_CLEANUP_POOL)) { + pci_pool_destroy(priv->cmd.pool); + priv->cmd.pool = NULL; + } - if (!mlx4_is_slave(dev)) + if (!mlx4_is_slave(dev) && priv->cmd.hcr && + (cleanup_mask & MLX4_CMD_CLEANUP_HCR)) { iounmap(priv->cmd.hcr); - if (mlx4_is_mfunc(dev)) + priv->cmd.hcr = NULL; + } + if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr && + (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) { dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); - priv->mfunc.vhcr = NULL; + priv->mfunc.vhcr = NULL; + } + if (priv->cmd.initialized && (cleanup_mask & MLX4_CMD_CLEANUP_STRUCT)) + priv->cmd.initialized = 0; } /* diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 56022d64783..e71f31387ac 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -52,6 +52,51 @@ #define MLX4_CQ_STATE_ARMED_SOL ( 6 << 8) #define MLX4_EQ_STATE_FIRED (10 << 8) +#define TASKLET_MAX_TIME 2 +#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME) + +void mlx4_cq_tasklet_cb(unsigned long data) +{ + unsigned long flags; + unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES; + struct mlx4_eq_tasklet *ctx = (struct mlx4_eq_tasklet *)data; + struct mlx4_cq *mcq, *temp; + + spin_lock_irqsave(&ctx->lock, flags); + list_splice_tail_init(&ctx->list, &ctx->process_list); + spin_unlock_irqrestore(&ctx->lock, flags); + + list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) { + list_del_init(&mcq->tasklet_ctx.list); + mcq->tasklet_ctx.comp(mcq); + if (atomic_dec_and_test(&mcq->refcount)) + complete(&mcq->free); + if (time_after(jiffies, end)) + break; + } + + if (!list_empty(&ctx->process_list)) + tasklet_schedule(&ctx->task); +} + +static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) +{ + unsigned long flags; + struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + + spin_lock_irqsave(&tasklet_ctx->lock, flags); + /* When migrating CQs between EQs will be implemented, please note + * that you need to sync this point. It is possible that + * while migrating a CQ, completions on the old EQs could + * still arrive. + */ + if (list_empty_careful(&cq->tasklet_ctx.list)) { + atomic_inc(&cq->refcount); + list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + } + spin_unlock_irqrestore(&tasklet_ctx->lock, flags); +} + void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn) { struct mlx4_cq *cq; @@ -292,6 +337,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, cq->uar = uar; atomic_set(&cq->refcount, 1); init_completion(&cq->free); + cq->comp = mlx4_add_cq_to_tasklet; + cq->tasklet_ctx.priv = + &priv->eq_table.eq[cq->vector].tasklet_ctx; + INIT_LIST_HEAD(&cq->tasklet_ctx.list); + cq->irq = priv->eq_table.eq[cq->vector].irq; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 57dda95b67d..999014413b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -35,52 +35,6 @@ #include "mlx4_en.h" -int mlx4_en_timestamp_config(struct net_device *dev, int tx_type, int rx_filter) -{ - struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_dev *mdev = priv->mdev; - int port_up = 0; - int err = 0; - - if (priv->hwtstamp_config.tx_type == tx_type && - priv->hwtstamp_config.rx_filter == rx_filter) - return 0; - - mutex_lock(&mdev->state_lock); - if (priv->port_up) { - port_up = 1; - mlx4_en_stop_port(dev, 1); - } - - mlx4_en_free_resources(priv); - - en_warn(priv, "Changing Time Stamp configuration\n"); - - priv->hwtstamp_config.tx_type = tx_type; - priv->hwtstamp_config.rx_filter = rx_filter; - - if (rx_filter != HWTSTAMP_FILTER_NONE) - dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; - else - dev->features |= NETIF_F_HW_VLAN_CTAG_RX; - - err = mlx4_en_alloc_resources(priv); - if (err) { - en_err(priv, "Failed reallocating port resources\n"); - goto out; - } - if (port_up) { - err = mlx4_en_start_port(dev); - if (err) - en_err(priv, "Failed starting port\n"); - } - -out: - mutex_unlock(&mdev->state_lock); - netdev_features_change(dev); - return err; -} - /* mlx4_en_read_clock - read raw cycle counter (to be used by time counter) */ static cycle_t mlx4_en_read_clock(const struct cyclecounter *tc) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index ae83da9cd18..90e0f045a6b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -35,6 +35,7 @@ #include <linux/ethtool.h> #include <linux/netdevice.h> #include <linux/mlx4/driver.h> +#include <linux/mlx4/device.h> #include <linux/in.h> #include <net/ip.h> @@ -114,7 +115,7 @@ static const char main_strings[][ETH_GSTRING_LEN] = { "tso_packets", "xmit_more", "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed", - "rx_csum_good", "rx_csum_none", "tx_chksum_offload", + "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload", /* packet statistics */ "broadcast", "rx_prio_0", "rx_prio_1", "rx_prio_2", "rx_prio_3", @@ -374,7 +375,302 @@ static void mlx4_en_get_strings(struct net_device *dev, } } -static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static u32 mlx4_en_autoneg_get(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + u32 autoneg = AUTONEG_DISABLE; + + if ((mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP) && + (priv->port_state.flags & MLX4_EN_PORT_ANE)) + autoneg = AUTONEG_ENABLE; + + return autoneg; +} + +static u32 ptys_get_supported_port(struct mlx4_ptys_reg *ptys_reg) +{ + u32 eth_proto = be32_to_cpu(ptys_reg->eth_proto_cap); + + if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_T) + | MLX4_PROT_MASK(MLX4_1000BASE_T) + | MLX4_PROT_MASK(MLX4_100BASE_TX))) { + return SUPPORTED_TP; + } + + if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR) + | MLX4_PROT_MASK(MLX4_10GBASE_SR) + | MLX4_PROT_MASK(MLX4_56GBASE_SR4) + | MLX4_PROT_MASK(MLX4_40GBASE_CR4) + | MLX4_PROT_MASK(MLX4_40GBASE_SR4) + | MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII))) { + return SUPPORTED_FIBRE; + } + + if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4) + | MLX4_PROT_MASK(MLX4_40GBASE_KR4) + | MLX4_PROT_MASK(MLX4_20GBASE_KR2) + | MLX4_PROT_MASK(MLX4_10GBASE_KR) + | MLX4_PROT_MASK(MLX4_10GBASE_KX4) + | MLX4_PROT_MASK(MLX4_1000BASE_KX))) { + return SUPPORTED_Backplane; + } + return 0; +} + +static u32 ptys_get_active_port(struct mlx4_ptys_reg *ptys_reg) +{ + u32 eth_proto = be32_to_cpu(ptys_reg->eth_proto_oper); + + if (!eth_proto) /* link down */ + eth_proto = be32_to_cpu(ptys_reg->eth_proto_cap); + + if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_T) + | MLX4_PROT_MASK(MLX4_1000BASE_T) + | MLX4_PROT_MASK(MLX4_100BASE_TX))) { + return PORT_TP; + } + + if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_SR) + | MLX4_PROT_MASK(MLX4_56GBASE_SR4) + | MLX4_PROT_MASK(MLX4_40GBASE_SR4) + | MLX4_PROT_MASK(MLX4_1000BASE_CX_SGMII))) { + return PORT_FIBRE; + } + + if (eth_proto & (MLX4_PROT_MASK(MLX4_10GBASE_CR) + | MLX4_PROT_MASK(MLX4_56GBASE_CR4) + | MLX4_PROT_MASK(MLX4_40GBASE_CR4))) { + return PORT_DA; + } + + if (eth_proto & (MLX4_PROT_MASK(MLX4_56GBASE_KR4) + | MLX4_PROT_MASK(MLX4_40GBASE_KR4) + | MLX4_PROT_MASK(MLX4_20GBASE_KR2) + | MLX4_PROT_MASK(MLX4_10GBASE_KR) + | MLX4_PROT_MASK(MLX4_10GBASE_KX4) + | MLX4_PROT_MASK(MLX4_1000BASE_KX))) { + return PORT_NONE; + } + return PORT_OTHER; +} + +#define MLX4_LINK_MODES_SZ \ + (FIELD_SIZEOF(struct mlx4_ptys_reg, eth_proto_cap) * 8) + +enum ethtool_report { + SUPPORTED = 0, + ADVERTISED = 1, + SPEED = 2 +}; + +/* Translates mlx4 link mode to equivalent ethtool Link modes/speed */ +static u32 ptys2ethtool_map[MLX4_LINK_MODES_SZ][3] = { + [MLX4_100BASE_TX] = { + SUPPORTED_100baseT_Full, + ADVERTISED_100baseT_Full, + SPEED_100 + }, + + [MLX4_1000BASE_T] = { + SUPPORTED_1000baseT_Full, + ADVERTISED_1000baseT_Full, + SPEED_1000 + }, + [MLX4_1000BASE_CX_SGMII] = { + SUPPORTED_1000baseKX_Full, + ADVERTISED_1000baseKX_Full, + SPEED_1000 + }, + [MLX4_1000BASE_KX] = { + SUPPORTED_1000baseKX_Full, + ADVERTISED_1000baseKX_Full, + SPEED_1000 + }, + + [MLX4_10GBASE_T] = { + SUPPORTED_10000baseT_Full, + ADVERTISED_10000baseT_Full, + SPEED_10000 + }, + [MLX4_10GBASE_CX4] = { + SUPPORTED_10000baseKX4_Full, + ADVERTISED_10000baseKX4_Full, + SPEED_10000 + }, + [MLX4_10GBASE_KX4] = { + SUPPORTED_10000baseKX4_Full, + ADVERTISED_10000baseKX4_Full, + SPEED_10000 + }, + [MLX4_10GBASE_KR] = { + SUPPORTED_10000baseKR_Full, + ADVERTISED_10000baseKR_Full, + SPEED_10000 + }, + [MLX4_10GBASE_CR] = { + SUPPORTED_10000baseKR_Full, + ADVERTISED_10000baseKR_Full, + SPEED_10000 + }, + [MLX4_10GBASE_SR] = { + SUPPORTED_10000baseKR_Full, + ADVERTISED_10000baseKR_Full, + SPEED_10000 + }, + + [MLX4_20GBASE_KR2] = { + SUPPORTED_20000baseMLD2_Full | SUPPORTED_20000baseKR2_Full, + ADVERTISED_20000baseMLD2_Full | ADVERTISED_20000baseKR2_Full, + SPEED_20000 + }, + + [MLX4_40GBASE_CR4] = { + SUPPORTED_40000baseCR4_Full, + ADVERTISED_40000baseCR4_Full, + SPEED_40000 + }, + [MLX4_40GBASE_KR4] = { + SUPPORTED_40000baseKR4_Full, + ADVERTISED_40000baseKR4_Full, + SPEED_40000 + }, + [MLX4_40GBASE_SR4] = { + SUPPORTED_40000baseSR4_Full, + ADVERTISED_40000baseSR4_Full, + SPEED_40000 + }, + + [MLX4_56GBASE_KR4] = { + SUPPORTED_56000baseKR4_Full, + ADVERTISED_56000baseKR4_Full, + SPEED_56000 + }, + [MLX4_56GBASE_CR4] = { + SUPPORTED_56000baseCR4_Full, + ADVERTISED_56000baseCR4_Full, + SPEED_56000 + }, + [MLX4_56GBASE_SR4] = { + SUPPORTED_56000baseSR4_Full, + ADVERTISED_56000baseSR4_Full, + SPEED_56000 + }, +}; + +static u32 ptys2ethtool_link_modes(u32 eth_proto, enum ethtool_report report) +{ + int i; + u32 link_modes = 0; + + for (i = 0; i < MLX4_LINK_MODES_SZ; i++) { + if (eth_proto & MLX4_PROT_MASK(i)) + link_modes |= ptys2ethtool_map[i][report]; + } + return link_modes; +} + +static u32 ethtool2ptys_link_modes(u32 link_modes, enum ethtool_report report) +{ + int i; + u32 ptys_modes = 0; + + for (i = 0; i < MLX4_LINK_MODES_SZ; i++) { + if (ptys2ethtool_map[i][report] & link_modes) + ptys_modes |= 1 << i; + } + return ptys_modes; +} + +/* Convert actual speed (SPEED_XXX) to ptys link modes */ +static u32 speed2ptys_link_modes(u32 speed) +{ + int i; + u32 ptys_modes = 0; + + for (i = 0; i < MLX4_LINK_MODES_SZ; i++) { + if (ptys2ethtool_map[i][SPEED] == speed) + ptys_modes |= 1 << i; + } + return ptys_modes; +} + +static int ethtool_get_ptys_settings(struct net_device *dev, + struct ethtool_cmd *cmd) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_ptys_reg ptys_reg; + u32 eth_proto; + int ret; + + memset(&ptys_reg, 0, sizeof(ptys_reg)); + ptys_reg.local_port = priv->port; + ptys_reg.proto_mask = MLX4_PTYS_EN; + ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev, + MLX4_ACCESS_REG_QUERY, &ptys_reg); + if (ret) { + en_warn(priv, "Failed to run mlx4_ACCESS_PTYS_REG status(%x)", + ret); + return ret; + } + en_dbg(DRV, priv, "ptys_reg.proto_mask %x\n", + ptys_reg.proto_mask); + en_dbg(DRV, priv, "ptys_reg.eth_proto_cap %x\n", + be32_to_cpu(ptys_reg.eth_proto_cap)); + en_dbg(DRV, priv, "ptys_reg.eth_proto_admin %x\n", + be32_to_cpu(ptys_reg.eth_proto_admin)); + en_dbg(DRV, priv, "ptys_reg.eth_proto_oper %x\n", + be32_to_cpu(ptys_reg.eth_proto_oper)); + en_dbg(DRV, priv, "ptys_reg.eth_proto_lp_adv %x\n", + be32_to_cpu(ptys_reg.eth_proto_lp_adv)); + + cmd->supported = 0; + cmd->advertising = 0; + + cmd->supported |= ptys_get_supported_port(&ptys_reg); + + eth_proto = be32_to_cpu(ptys_reg.eth_proto_cap); + cmd->supported |= ptys2ethtool_link_modes(eth_proto, SUPPORTED); + + eth_proto = be32_to_cpu(ptys_reg.eth_proto_admin); + cmd->advertising |= ptys2ethtool_link_modes(eth_proto, ADVERTISED); + + cmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + cmd->advertising |= (priv->prof->tx_pause) ? ADVERTISED_Pause : 0; + + cmd->advertising |= (priv->prof->tx_pause ^ priv->prof->rx_pause) ? + ADVERTISED_Asym_Pause : 0; + + cmd->port = ptys_get_active_port(&ptys_reg); + cmd->transceiver = (SUPPORTED_TP & cmd->supported) ? + XCVR_EXTERNAL : XCVR_INTERNAL; + + if (mlx4_en_autoneg_get(dev)) { + cmd->supported |= SUPPORTED_Autoneg; + cmd->advertising |= ADVERTISED_Autoneg; + } + + cmd->autoneg = (priv->port_state.flags & MLX4_EN_PORT_ANC) ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + + eth_proto = be32_to_cpu(ptys_reg.eth_proto_lp_adv); + cmd->lp_advertising = ptys2ethtool_link_modes(eth_proto, ADVERTISED); + + cmd->lp_advertising |= (priv->port_state.flags & MLX4_EN_PORT_ANC) ? + ADVERTISED_Autoneg : 0; + + cmd->phy_address = 0; + cmd->mdio_support = 0; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + + return ret; +} + +static void ethtool_get_default_settings(struct net_device *dev, + struct ethtool_cmd *cmd) { struct mlx4_en_priv *priv = netdev_priv(dev); int trans_type; @@ -382,18 +678,7 @@ static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->autoneg = AUTONEG_DISABLE; cmd->supported = SUPPORTED_10000baseT_Full; cmd->advertising = ADVERTISED_10000baseT_Full; - - if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) - return -ENOMEM; - - trans_type = priv->port_state.transciver; - if (netif_carrier_ok(dev)) { - ethtool_cmd_speed_set(cmd, priv->port_state.link_speed); - cmd->duplex = DUPLEX_FULL; - } else { - ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; - } + trans_type = priv->port_state.transceiver; if (trans_type > 0 && trans_type <= 0xC) { cmd->port = PORT_FIBRE; @@ -409,17 +694,118 @@ static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->port = -1; cmd->transceiver = -1; } +} + +static int mlx4_en_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int ret = -EINVAL; + + if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) + return -ENOMEM; + + en_dbg(DRV, priv, "query port state.flags ANC(%x) ANE(%x)\n", + priv->port_state.flags & MLX4_EN_PORT_ANC, + priv->port_state.flags & MLX4_EN_PORT_ANE); + + if (priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL) + ret = ethtool_get_ptys_settings(dev, cmd); + if (ret) /* ETH PROT CRTL is not supported or PTYS CMD failed */ + ethtool_get_default_settings(dev, cmd); + + if (netif_carrier_ok(dev)) { + ethtool_cmd_speed_set(cmd, priv->port_state.link_speed); + cmd->duplex = DUPLEX_FULL; + } else { + ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); + cmd->duplex = DUPLEX_UNKNOWN; + } return 0; } +/* Calculate PTYS admin according ethtool speed (SPEED_XXX) */ +static __be32 speed_set_ptys_admin(struct mlx4_en_priv *priv, u32 speed, + __be32 proto_cap) +{ + __be32 proto_admin = 0; + + if (!speed) { /* Speed = 0 ==> Reset Link modes */ + proto_admin = proto_cap; + en_info(priv, "Speed was set to 0, Reset advertised Link Modes to default (%x)\n", + be32_to_cpu(proto_cap)); + } else { + u32 ptys_link_modes = speed2ptys_link_modes(speed); + + proto_admin = cpu_to_be32(ptys_link_modes) & proto_cap; + en_info(priv, "Setting Speed to %d\n", speed); + } + return proto_admin; +} + static int mlx4_en_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - if ((cmd->autoneg == AUTONEG_ENABLE) || - (ethtool_cmd_speed(cmd) != SPEED_10000) || - (cmd->duplex != DUPLEX_FULL)) + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_ptys_reg ptys_reg; + __be32 proto_admin; + int ret; + + u32 ptys_adv = ethtool2ptys_link_modes(cmd->advertising, ADVERTISED); + int speed = ethtool_cmd_speed(cmd); + + en_dbg(DRV, priv, "Set Speed=%d adv=0x%x autoneg=%d duplex=%d\n", + speed, cmd->advertising, cmd->autoneg, cmd->duplex); + + if (!(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL) || + (cmd->duplex == DUPLEX_HALF)) return -EINVAL; - /* Nothing to change */ + memset(&ptys_reg, 0, sizeof(ptys_reg)); + ptys_reg.local_port = priv->port; + ptys_reg.proto_mask = MLX4_PTYS_EN; + ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev, + MLX4_ACCESS_REG_QUERY, &ptys_reg); + if (ret) { + en_warn(priv, "Failed to QUERY mlx4_ACCESS_PTYS_REG status(%x)\n", + ret); + return 0; + } + + proto_admin = cpu_to_be32(ptys_adv); + if (speed >= 0 && speed != priv->port_state.link_speed) + /* If speed was set then speed decides :-) */ + proto_admin = speed_set_ptys_admin(priv, speed, + ptys_reg.eth_proto_cap); + + proto_admin &= ptys_reg.eth_proto_cap; + + if (proto_admin == ptys_reg.eth_proto_admin) + return 0; /* Nothing to change */ + + if (!proto_admin) { + en_warn(priv, "Not supported link mode(s) requested, check supported link modes.\n"); + return -EINVAL; /* nothing to change due to bad input */ + } + + en_dbg(DRV, priv, "mlx4_ACCESS_PTYS_REG SET: ptys_reg.eth_proto_admin = 0x%x\n", + be32_to_cpu(proto_admin)); + + ptys_reg.eth_proto_admin = proto_admin; + ret = mlx4_ACCESS_PTYS_REG(priv->mdev->dev, MLX4_ACCESS_REG_WRITE, + &ptys_reg); + if (ret) { + en_warn(priv, "Failed to write mlx4_ACCESS_PTYS_REG eth_proto_admin(0x%x) status(0x%x)", + be32_to_cpu(ptys_reg.eth_proto_admin), ret); + return ret; + } + + en_warn(priv, "Port link mode changed, restarting port...\n"); + mutex_lock(&priv->mdev->state_lock); + if (priv->port_up) { + mlx4_en_stop_port(dev, 1); + if (mlx4_en_start_port(dev)) + en_err(priv, "Failed restarting port %d\n", priv->port); + } + mutex_unlock(&priv->mdev->state_lock); return 0; } @@ -587,7 +973,34 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) return priv->rx_ring_num; } -static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key) +static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev) +{ + return MLX4_EN_RSS_KEY_SIZE; +} + +static int mlx4_en_check_rxfh_func(struct net_device *dev, u8 hfunc) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + /* check if requested function is supported by the device */ + if ((hfunc == ETH_RSS_HASH_TOP && + !(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) || + (hfunc == ETH_RSS_HASH_XOR && + !(priv->mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR))) + return -EINVAL; + + priv->rss_hash_fn = hfunc; + if (hfunc == ETH_RSS_HASH_TOP && !(dev->features & NETIF_F_RXHASH)) + en_warn(priv, + "Toeplitz hash function should be used in conjunction with RX hashing for optimal performance\n"); + if (hfunc == ETH_RSS_HASH_XOR && (dev->features & NETIF_F_RXHASH)) + en_warn(priv, + "Enabling both XOR Hash function and RX Hashing can limit RPS functionality\n"); + return 0; +} + +static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key, + u8 *hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rss_map *rss_map = &priv->rss_map; @@ -596,17 +1009,23 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key) int err = 0; rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num; + rss_rings = 1 << ilog2(rss_rings); while (n--) { + if (!ring_index) + break; ring_index[n] = rss_map->qps[n % rss_rings].qpn - rss_map->base_qpn; } - + if (key) + memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE); + if (hfunc) + *hfunc = priv->rss_hash_fn; return err; } static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, - const u8 *key) + const u8 *key, const u8 hfunc) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; @@ -619,6 +1038,8 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, * between rings */ for (i = 0; i < priv->rx_ring_num; i++) { + if (!ring_index) + continue; if (i > 0 && !ring_index[i] && !rss_rings) rss_rings = i; @@ -633,13 +1054,22 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index, if (!is_power_of_2(rss_rings)) return -EINVAL; + if (hfunc != ETH_RSS_HASH_NO_CHANGE) { + err = mlx4_en_check_rxfh_func(dev, hfunc); + if (err) + return err; + } + mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev, 1); } - priv->prof->rss_rings = rss_rings; + if (ring_index) + priv->prof->rss_rings = rss_rings; + if (key) + memcpy(priv->rss_key, key, MLX4_EN_RSS_KEY_SIZE); if (port_up) { err = mlx4_en_start_port(dev); @@ -1309,6 +1739,86 @@ static int mlx4_en_set_tunable(struct net_device *dev, return ret; } +static int mlx4_en_get_module_info(struct net_device *dev, + struct ethtool_modinfo *modinfo) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int ret; + u8 data[4]; + + /* Read first 2 bytes to get Module & REV ID */ + ret = mlx4_get_module_info(mdev->dev, priv->port, + 0/*offset*/, 2/*size*/, data); + if (ret < 2) + return -EIO; + + switch (data[0] /* identifier */) { + case MLX4_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLX4_MODULE_ID_QSFP_PLUS: + if (data[1] >= 0x3) { /* revision id */ + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLX4_MODULE_ID_QSFP28: + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + break; + case MLX4_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + return -ENOSYS; + } + + return 0; +} + +static int mlx4_en_get_module_eeprom(struct net_device *dev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int offset = ee->offset; + int i = 0, ret; + + if (ee->len == 0) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + en_dbg(DRV, priv, + "mlx4_get_module_info i(%d) offset(%d) len(%d)\n", + i, offset, ee->len - i); + + ret = mlx4_get_module_info(mdev->dev, priv->port, + offset, ee->len - i, data + i); + + if (!ret) /* Done reading */ + return 0; + + if (ret < 0) { + en_err(priv, + "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n", + i, offset, ee->len - i, ret); + return 0; + } + + i += ret; + offset += ret; + } + return 0; +} const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, @@ -1332,6 +1842,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_rxnfc = mlx4_en_get_rxnfc, .set_rxnfc = mlx4_en_set_rxnfc, .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size, + .get_rxfh_key_size = mlx4_en_get_rxfh_key_size, .get_rxfh = mlx4_en_get_rxfh, .set_rxfh = mlx4_en_set_rxfh, .get_channels = mlx4_en_get_channels, @@ -1341,6 +1852,8 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .get_priv_flags = mlx4_en_get_priv_flags, .get_tunable = mlx4_en_get_tunable, .set_tunable = mlx4_en_set_tunable, + .get_module_info = mlx4_en_get_module_info, + .get_module_eeprom = mlx4_en_get_module_eeprom }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 2091ae88615..9f16f754137 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -221,15 +221,12 @@ static void *mlx4_en_add(struct mlx4_dev *dev) { struct mlx4_en_dev *mdev; int i; - int err; printk_once(KERN_INFO "%s", mlx4_en_version); mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); - if (!mdev) { - err = -ENOMEM; + if (!mdev) goto err_free_res; - } if (mlx4_pd_alloc(dev, &mdev->priv_pdn)) goto err_free_dev; @@ -264,8 +261,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) } /* Build device profile according to supplied module parameters */ - err = mlx4_en_get_profile(mdev); - if (err) { + if (mlx4_en_get_profile(mdev)) { mlx4_err(mdev, "Bad module parameters, aborting\n"); goto err_mr; } @@ -286,10 +282,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev) * Note: we cannot use the shared workqueue because of deadlocks caused * by the rtnl lock */ mdev->workqueue = create_singlethread_workqueue("mlx4_en"); - if (!mdev->workqueue) { - err = -ENOMEM; + if (!mdev->workqueue) goto err_mr; - } /* At this stage all non-port specific tasks are complete: * mark the card state as up */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 4d69e382b4e..6ff214de111 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -575,7 +575,7 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) struct mlx4_mac_entry *entry; int index = 0; int err = 0; - u64 reg_id; + u64 reg_id = 0; int *qpn = &priv->base_qpn; u64 mac = mlx4_mac_to_u64(priv->dev->dev_addr); @@ -595,7 +595,7 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) return 0; } - err = mlx4_qp_reserve_range(dev, 1, 1, qpn); + err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); @@ -1843,8 +1843,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) } local_bh_enable(); - while (test_bit(NAPI_STATE_SCHED, &cq->napi.state)) - msleep(1); + napi_synchronize(&cq->napi); mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); @@ -1894,6 +1893,7 @@ static void mlx4_en_clear_stats(struct net_device *dev) priv->rx_ring[i]->packets = 0; priv->rx_ring[i]->csum_ok = 0; priv->rx_ring[i]->csum_none = 0; + priv->rx_ring[i]->csum_complete = 0; } } @@ -1974,15 +1974,8 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; - int err; int node; - err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &priv->base_tx_qpn); - if (err) { - en_err(priv, "failed reserving range for TX rings\n"); - return err; - } - /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { node = cpu_to_node(i % num_online_cpus()); @@ -1991,7 +1984,6 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], - priv->base_tx_qpn + i, prof->tx_ring_size, TXBB_SIZE, node, i)) goto err; @@ -2157,7 +2149,7 @@ static int mlx4_en_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) return -ERANGE; } - if (mlx4_en_timestamp_config(dev, config.tx_type, config.rx_filter)) { + if (mlx4_en_reset_config(dev, config, dev->features)) { config.tx_type = HWTSTAMP_TX_OFF; config.rx_filter = HWTSTAMP_FILTER_NONE; } @@ -2190,6 +2182,16 @@ static int mlx4_en_set_features(struct net_device *netdev, netdev_features_t features) { struct mlx4_en_priv *priv = netdev_priv(netdev); + int ret = 0; + + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_CTAG_RX)) { + en_info(priv, "Turn %s RX vlan strip offload\n", + (features & NETIF_F_HW_VLAN_CTAG_RX) ? "ON" : "OFF"); + ret = mlx4_en_reset_config(netdev, priv->hwtstamp_config, + features); + if (ret) + return ret; + } if (features & NETIF_F_LOOPBACK) priv->ctrl_flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); @@ -2249,7 +2251,7 @@ static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_st #define PORT_ID_BYTE_LEN 8 static int mlx4_en_get_phys_port_id(struct net_device *dev, - struct netdev_phys_port_id *ppid) + struct netdev_phys_item_id *ppid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_dev *mdev = priv->mdev->dev; @@ -2455,6 +2457,21 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv = netdev_priv(dev); memset(priv, 0, sizeof(struct mlx4_en_priv)); + spin_lock_init(&priv->stats_lock); + INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); + INIT_WORK(&priv->watchdog_task, mlx4_en_restart); + INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); + INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); + INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); +#ifdef CONFIG_MLX4_EN_VXLAN + INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads); + INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads); +#endif +#ifdef CONFIG_RFS_ACCEL + INIT_LIST_HEAD(&priv->filters); + spin_lock_init(&priv->filters_lock); +#endif + priv->dev = dev; priv->mdev = mdev; priv->ddev = &mdev->pdev->dev; @@ -2468,6 +2485,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; priv->tx_ring_num = prof->tx_ring_num; priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK; + netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key)); priv->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS, GFP_KERNEL); @@ -2486,16 +2504,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->cqe_size = mdev->dev->caps.cqe_size; priv->mac_index = -1; priv->msg_enable = MLX4_EN_MSG_LEVEL; - spin_lock_init(&priv->stats_lock); - INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); - INIT_WORK(&priv->watchdog_task, mlx4_en_restart); - INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); - INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); - INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); -#ifdef CONFIG_MLX4_EN_VXLAN - INIT_WORK(&priv->vxlan_add_task, mlx4_en_add_vxlan_offloads); - INIT_WORK(&priv->vxlan_del_task, mlx4_en_del_vxlan_offloads); -#endif #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) { @@ -2513,6 +2521,10 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; + if (mdev->dev->caps.rx_checksum_flags_port[priv->port] & + MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP) + priv->flags |= MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP; + /* Set default MAC */ dev->addr_len = ETH_ALEN; mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]); @@ -2538,11 +2550,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, if (err) goto out; -#ifdef CONFIG_RFS_ACCEL - INIT_LIST_HEAD(&priv->filters); - spin_lock_init(&priv->filters_lock); -#endif - /* Initialize time stamping config */ priv->hwtstamp_config.flags = 0; priv->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF; @@ -2583,15 +2590,28 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->features = dev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - dev->hw_features |= NETIF_F_LOOPBACK; + dev->hw_features |= NETIF_F_LOOPBACK | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; if (mdev->dev->caps.steering_mode == - MLX4_STEERING_MODE_DEVICE_MANAGED) + MLX4_STEERING_MODE_DEVICE_MANAGED && + mdev->dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC) dev->hw_features |= NETIF_F_NTUPLE; if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0) dev->priv_flags |= IFF_UNICAST_FLT; + /* Setting a default hash function value */ + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP) { + priv->rss_hash_fn = ETH_RSS_HASH_TOP; + } else if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_XOR) { + priv->rss_hash_fn = ETH_RSS_HASH_XOR; + } else { + en_warn(priv, + "No RSS hash capabilities exposed, using Toeplitz\n"); + priv->rss_hash_fn = ETH_RSS_HASH_TOP; + } + mdev->pndev[port] = dev; netif_carrier_off(dev); @@ -2650,3 +2670,79 @@ out: return err; } +int mlx4_en_reset_config(struct net_device *dev, + struct hwtstamp_config ts_config, + netdev_features_t features) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int port_up = 0; + int err = 0; + + if (priv->hwtstamp_config.tx_type == ts_config.tx_type && + priv->hwtstamp_config.rx_filter == ts_config.rx_filter && + !DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) + return 0; /* Nothing to change */ + + if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX) && + (features & NETIF_F_HW_VLAN_CTAG_RX) && + (priv->hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE)) { + en_warn(priv, "Can't turn ON rx vlan offload while time-stamping rx filter is ON\n"); + return -EINVAL; + } + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + mlx4_en_free_resources(priv); + + en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n", + ts_config.rx_filter, !!(features & NETIF_F_HW_VLAN_CTAG_RX)); + + priv->hwtstamp_config.tx_type = ts_config.tx_type; + priv->hwtstamp_config.rx_filter = ts_config.rx_filter; + + if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) { + if (features & NETIF_F_HW_VLAN_CTAG_RX) + dev->features |= NETIF_F_HW_VLAN_CTAG_RX; + else + dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; + } else if (ts_config.rx_filter == HWTSTAMP_FILTER_NONE) { + /* RX time-stamping is OFF, update the RX vlan offload + * to the latest wanted state + */ + if (dev->wanted_features & NETIF_F_HW_VLAN_CTAG_RX) + dev->features |= NETIF_F_HW_VLAN_CTAG_RX; + else + dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; + } + + /* RX vlan offload and RX time-stamping can't co-exist ! + * Regardless of the caller's choice, + * Turn Off RX vlan offload in case of time-stamping is ON + */ + if (ts_config.rx_filter != HWTSTAMP_FILTER_NONE) { + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) + en_warn(priv, "Turning off RX vlan offload since RX time-stamping is ON\n"); + dev->features &= ~NETIF_F_HW_VLAN_CTAG_RX; + } + + err = mlx4_en_alloc_resources(priv); + if (err) { + en_err(priv, "Failed reallocating port resources\n"); + goto out; + } + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + } + +out: + mutex_unlock(&mdev->state_lock); + netdev_features_change(dev); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 0a0261d128b..6cb80072af6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -91,21 +91,37 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port) * already synchronized, no need in locking */ state->link_state = !!(qport_context->link_up & MLX4_EN_LINK_UP_MASK); switch (qport_context->link_speed & MLX4_EN_SPEED_MASK) { + case MLX4_EN_100M_SPEED: + state->link_speed = SPEED_100; + break; case MLX4_EN_1G_SPEED: - state->link_speed = 1000; + state->link_speed = SPEED_1000; break; case MLX4_EN_10G_SPEED_XAUI: case MLX4_EN_10G_SPEED_XFI: - state->link_speed = 10000; + state->link_speed = SPEED_10000; + break; + case MLX4_EN_20G_SPEED: + state->link_speed = SPEED_20000; break; case MLX4_EN_40G_SPEED: - state->link_speed = 40000; + state->link_speed = SPEED_40000; + break; + case MLX4_EN_56G_SPEED: + state->link_speed = SPEED_56000; break; default: state->link_speed = -1; break; } - state->transciver = qport_context->transceiver; + + state->transceiver = qport_context->transceiver; + + state->flags = 0; /* Reset and recalculate the port flags */ + state->flags |= (qport_context->link_up & MLX4_EN_ANC_MASK) ? + MLX4_EN_PORT_ANC : 0; + state->flags |= (qport_context->autoneg & MLX4_EN_AUTONEG_MASK) ? + MLX4_EN_PORT_ANE : 0; out: mlx4_free_cmd_mailbox(mdev->dev, mailbox); @@ -139,11 +155,13 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->rx_bytes = 0; priv->port_stats.rx_chksum_good = 0; priv->port_stats.rx_chksum_none = 0; + priv->port_stats.rx_chksum_complete = 0; for (i = 0; i < priv->rx_ring_num; i++) { stats->rx_packets += priv->rx_ring[i]->packets; stats->rx_bytes += priv->rx_ring[i]->bytes; priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok; priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none; + priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete; } stats->tx_packets = 0; stats->tx_bytes = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h index 745090b49d9..040da4b16b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.h +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h @@ -53,22 +53,49 @@ enum { MLX4_MCAST_ENABLE = 2, }; +enum mlx4_link_mode { + MLX4_1000BASE_CX_SGMII = 0, + MLX4_1000BASE_KX = 1, + MLX4_10GBASE_CX4 = 2, + MLX4_10GBASE_KX4 = 3, + MLX4_10GBASE_KR = 4, + MLX4_20GBASE_KR2 = 5, + MLX4_40GBASE_CR4 = 6, + MLX4_40GBASE_KR4 = 7, + MLX4_56GBASE_KR4 = 8, + MLX4_10GBASE_CR = 12, + MLX4_10GBASE_SR = 13, + MLX4_40GBASE_SR4 = 15, + MLX4_56GBASE_CR4 = 17, + MLX4_56GBASE_SR4 = 18, + MLX4_100BASE_TX = 24, + MLX4_1000BASE_T = 25, + MLX4_10GBASE_T = 26, +}; + +#define MLX4_PROT_MASK(link_mode) (1<<link_mode) + enum { - MLX4_EN_1G_SPEED = 0x02, - MLX4_EN_10G_SPEED_XFI = 0x01, + MLX4_EN_100M_SPEED = 0x04, MLX4_EN_10G_SPEED_XAUI = 0x00, + MLX4_EN_10G_SPEED_XFI = 0x01, + MLX4_EN_1G_SPEED = 0x02, + MLX4_EN_20G_SPEED = 0x08, MLX4_EN_40G_SPEED = 0x40, + MLX4_EN_56G_SPEED = 0x20, MLX4_EN_OTHER_SPEED = 0x0f, }; struct mlx4_en_query_port_context { u8 link_up; #define MLX4_EN_LINK_UP_MASK 0x80 - u8 reserved; +#define MLX4_EN_ANC_MASK 0x40 + u8 autoneg; +#define MLX4_EN_AUTONEG_MASK 0x80 __be16 mtu; u8 reserved2; u8 link_speed; -#define MLX4_EN_SPEED_MASK 0x43 +#define MLX4_EN_SPEED_MASK 0x6f u16 reserved3[5]; __be64 mac; u8 transceiver; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 01660c595f5..a0474eb94aa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -42,6 +42,10 @@ #include <linux/vmalloc.h> #include <linux/irq.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_checksum.h> +#endif + #include "mlx4_en.h" static int mlx4_alloc_pages(struct mlx4_en_priv *priv, @@ -74,7 +78,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, page_alloc->page_size = PAGE_SIZE << order; page_alloc->page = page; page_alloc->dma = dma; - page_alloc->page_offset = frag_info->frag_align; + page_alloc->page_offset = 0; /* Not doing get_page() for each frag is a big win * on asymetric workloads. Note we can not use atomic_set(). */ @@ -119,7 +123,6 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, out: while (i--) { - frag_info = &priv->frag_info[i]; if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, page_alloc[i].page_size, PCI_DMA_FROMDEVICE); @@ -157,7 +160,7 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; if (mlx4_alloc_pages(priv, &ring->page_alloc[i], - frag_info, GFP_KERNEL)) + frag_info, GFP_KERNEL | __GFP_COLD)) goto out; } return 0; @@ -269,7 +272,7 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) if (mlx4_en_prepare_rx_desc(priv, ring, ring->actual_size, - GFP_KERNEL)) { + GFP_KERNEL | __GFP_COLD)) { if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { en_err(priv, "Failed to allocate enough rx buffers\n"); return -ENOMEM; @@ -636,13 +639,94 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, int index = ring->prod & ring->size_mask; while ((u32) (ring->prod - ring->cons) < ring->actual_size) { - if (mlx4_en_prepare_rx_desc(priv, ring, index, GFP_ATOMIC)) + if (mlx4_en_prepare_rx_desc(priv, ring, index, + GFP_ATOMIC | __GFP_COLD)) break; ring->prod++; index = ring->prod & ring->size_mask; } } +/* When hardware doesn't strip the vlan, we need to calculate the checksum + * over it and add it to the hardware's checksum calculation + */ +static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum, + struct vlan_hdr *vlanh) +{ + return csum_add(hw_checksum, *(__wsum *)vlanh); +} + +/* Although the stack expects checksum which doesn't include the pseudo + * header, the HW adds it. To address that, we are subtracting the pseudo + * header checksum from the checksum value provided by the HW. + */ +static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, + struct iphdr *iph) +{ + __u16 length_for_csum = 0; + __wsum csum_pseudo_header = 0; + + length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2)); + csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr, + length_for_csum, iph->protocol, 0); + skb->csum = csum_sub(hw_checksum, csum_pseudo_header); +} + +#if IS_ENABLED(CONFIG_IPV6) +/* In IPv6 packets, besides subtracting the pseudo header checksum, + * we also compute/add the IP header checksum which + * is not added by the HW. + */ +static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, + struct ipv6hdr *ipv6h) +{ + __wsum csum_pseudo_hdr = 0; + + if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) + return -1; + hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8)); + + csum_pseudo_hdr = csum_partial(&ipv6h->saddr, + sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0); + csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len); + csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr)); + + skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr); + skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0)); + return 0; +} +#endif +static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, + int hwtstamp_rx_filter) +{ + __wsum hw_checksum = 0; + + void *hdr = (u8 *)va + sizeof(struct ethhdr); + + hw_checksum = csum_unfold((__force __sum16)cqe->checksum); + + if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) && + hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) { + /* next protocol non IPv4 or IPv6 */ + if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto + != htons(ETH_P_IP) && + ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto + != htons(ETH_P_IPV6)) + return -1; + hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); + hdr += sizeof(struct vlan_hdr); + } + + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4)) + get_fixed_ipv4_csum(hw_checksum, skb, hdr); +#if IS_ENABLED(CONFIG_IPV6) + else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) + if (get_fixed_ipv6_csum(hw_checksum, skb, hdr)) + return -1; +#endif + return 0; +} + int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -744,73 +828,96 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); if (likely(dev->features & NETIF_F_RXCSUM)) { - if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && - (cqe->checksum == cpu_to_be16(0xffff))) { - ring->csum_ok++; - /* This packet is eligible for GRO if it is: - * - DIX Ethernet (type interpretation) - * - TCP/IP (v4) - * - without IP options - * - not an IP fragment - * - no LLS polling in progress - */ - if (!mlx4_en_cq_busy_polling(cq) && - (dev->features & NETIF_F_GRO)) { - struct sk_buff *gro_skb = napi_get_frags(&cq->napi); - if (!gro_skb) - goto next; - - nr = mlx4_en_complete_rx_desc(priv, - rx_desc, frags, gro_skb, - length); - if (!nr) - goto next; + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | + MLX4_CQE_STATUS_UDP)) { + if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && + cqe->checksum == cpu_to_be16(0xffff)) { + ip_summed = CHECKSUM_UNNECESSARY; + ring->csum_ok++; + } else { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + } + } else { + if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP && + (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | + MLX4_CQE_STATUS_IPV6))) { + ip_summed = CHECKSUM_COMPLETE; + ring->csum_complete++; + } else { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + } + } + } else { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + } - skb_shinfo(gro_skb)->nr_frags = nr; - gro_skb->len = length; - gro_skb->data_len = length; - gro_skb->ip_summed = CHECKSUM_UNNECESSARY; + /* This packet is eligible for GRO if it is: + * - DIX Ethernet (type interpretation) + * - TCP/IP (v4) + * - without IP options + * - not an IP fragment + * - no LLS polling in progress + */ + if (!mlx4_en_cq_busy_polling(cq) && + (dev->features & NETIF_F_GRO)) { + struct sk_buff *gro_skb = napi_get_frags(&cq->napi); + if (!gro_skb) + goto next; + + nr = mlx4_en_complete_rx_desc(priv, + rx_desc, frags, gro_skb, + length); + if (!nr) + goto next; + + if (ip_summed == CHECKSUM_COMPLETE) { + void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); + if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) { + ip_summed = CHECKSUM_NONE; + ring->csum_none++; + ring->csum_complete--; + } + } - if (l2_tunnel) - gro_skb->csum_level = 1; - if ((cqe->vlan_my_qpn & - cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && - (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { - u16 vid = be16_to_cpu(cqe->sl_vid); + skb_shinfo(gro_skb)->nr_frags = nr; + gro_skb->len = length; + gro_skb->data_len = length; + gro_skb->ip_summed = ip_summed; - __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); - } + if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY) + gro_skb->csum_level = 1; - if (dev->features & NETIF_F_RXHASH) - skb_set_hash(gro_skb, - be32_to_cpu(cqe->immed_rss_invalid), - PKT_HASH_TYPE_L3); + if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && + (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { + u16 vid = be16_to_cpu(cqe->sl_vid); - skb_record_rx_queue(gro_skb, cq->ring); - skb_mark_napi_id(gro_skb, &cq->napi); + __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); + } - if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, - skb_hwtstamps(gro_skb), - timestamp); - } + if (dev->features & NETIF_F_RXHASH) + skb_set_hash(gro_skb, + be32_to_cpu(cqe->immed_rss_invalid), + PKT_HASH_TYPE_L3); - napi_gro_frags(&cq->napi); - goto next; - } + skb_record_rx_queue(gro_skb, cq->ring); + skb_mark_napi_id(gro_skb, &cq->napi); - /* GRO not possible, complete processing here */ - ip_summed = CHECKSUM_UNNECESSARY; - } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { + timestamp = mlx4_en_get_cqe_ts(cqe); + mlx4_en_fill_hwtstamps(mdev, + skb_hwtstamps(gro_skb), + timestamp); } - } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + + napi_gro_frags(&cq->napi); + goto next; } + /* GRO not possible, complete processing here */ skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); if (!skb) { priv->stats.rx_dropped++; @@ -822,6 +929,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud goto next; } + if (ip_summed == CHECKSUM_COMPLETE) { + if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) { + ip_summed = CHECKSUM_NONE; + ring->csum_complete--; + ring->csum_none++; + } + } + skb->ip_summed = ip_summed; skb->protocol = eth_type_trans(skb, dev); skb_record_rx_queue(skb, cq->ring); @@ -879,8 +994,8 @@ void mlx4_en_rx_irq(struct mlx4_cq *mcq) struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); - if (priv->port_up) - napi_schedule(&cq->napi); + if (likely(priv->port_up)) + napi_schedule_irqoff(&cq->napi); else mlx4_en_arm_cq(priv, cq); } @@ -910,20 +1025,18 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) cpu_curr = smp_processor_id(); aff = irq_desc_get_irq_data(cq->irq_desc)->affinity; - if (unlikely(!cpumask_test_cpu(cpu_curr, aff))) { - /* Current cpu is not according to smp_irq_affinity - - * probably affinity changed. need to stop this NAPI - * poll, and restart it on the right CPU - */ - napi_complete(napi); - mlx4_en_arm_cq(priv, cq); - return 0; - } - } else { - /* Done for now */ - napi_complete(napi); - mlx4_en_arm_cq(priv, cq); + if (likely(cpumask_test_cpu(cpu_curr, aff))) + return budget; + + /* Current cpu is not according to smp_irq_affinity - + * probably affinity changed. need to stop this NAPI + * poll, and restart it on the right CPU + */ + done = 0; } + /* Done for now */ + napi_complete_done(napi, done); + mlx4_en_arm_cq(priv, cq); return done; } @@ -946,15 +1059,8 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) (eff_mtu > buf_size + frag_sizes[i]) ? frag_sizes[i] : eff_mtu - buf_size; priv->frag_info[i].frag_prefix_size = buf_size; - if (!i) { - priv->frag_info[i].frag_align = NET_IP_ALIGN; - priv->frag_info[i].frag_stride = - ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES); - } else { - priv->frag_info[i].frag_align = 0; - priv->frag_info[i].frag_stride = - ALIGN(frag_sizes[i], SMP_CACHE_BYTES); - } + priv->frag_info[i].frag_stride = ALIGN(frag_sizes[i], + SMP_CACHE_BYTES); buf_size += priv->frag_info[i].frag_size; i++; } @@ -967,11 +1073,10 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) eff_mtu, priv->num_frags); for (i = 0; i < priv->num_frags; i++) { en_err(priv, - " frag:%d - size:%d prefix:%d align:%d stride:%d\n", + " frag:%d - size:%d prefix:%d stride:%d\n", i, priv->frag_info[i].frag_size, priv->frag_info[i].frag_prefix_size, - priv->frag_info[i].frag_align, priv->frag_info[i].frag_stride); } } @@ -1026,7 +1131,8 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv) int err; u32 qpn; - err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn); + err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, + MLX4_RESERVE_A0_QP); if (err) { en_err(priv, "Failed reserving drop qpn\n"); return err; @@ -1065,14 +1171,11 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) int i, qpn; int err = 0; int good_qps = 0; - static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC, - 0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD, - 0x593D56D9, 0xF3253C06, 0x2ADC1FFC}; en_dbg(DRV, priv, "Configuring rss steering\n"); err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, priv->rx_ring_num, - &rss_map->base_qpn); + &rss_map->base_qpn, 0); if (err) { en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); return err; @@ -1122,9 +1225,19 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) rss_context->flags = rss_mask; rss_context->hash_fn = MLX4_RSS_HASH_TOP; - for (i = 0; i < 10; i++) - rss_context->rss_key[i] = cpu_to_be32(rsskey[i]); - + if (priv->rss_hash_fn == ETH_RSS_HASH_XOR) { + rss_context->hash_fn = MLX4_RSS_HASH_XOR; + } else if (priv->rss_hash_fn == ETH_RSS_HASH_TOP) { + rss_context->hash_fn = MLX4_RSS_HASH_TOP; + memcpy(rss_context->rss_key, priv->rss_key, + MLX4_EN_RSS_KEY_SIZE); + netdev_rss_key_fill(rss_context->rss_key, + MLX4_EN_RSS_KEY_SIZE); + } else { + en_err(priv, "Unknown RSS hash function requested\n"); + err = -EINVAL; + goto indir_err; + } err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, &rss_map->indir_qp, &rss_map->indir_state); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 49d5afc7cfb..2d8ee66138e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -129,11 +129,15 @@ static int mlx4_en_test_speed(struct mlx4_en_priv *priv) if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) return -ENOMEM; - /* The device supports 1G, 10G and 40G speeds */ - if (priv->port_state.link_speed != 1000 && - priv->port_state.link_speed != 10000 && - priv->port_state.link_speed != 40000) + /* The device supports 100M, 1G, 10G, 20G, 40G and 56G speed */ + if (priv->port_state.link_speed != SPEED_100 && + priv->port_state.link_speed != SPEED_1000 && + priv->port_state.link_speed != SPEED_10000 && + priv->port_state.link_speed != SPEED_20000 && + priv->port_state.link_speed != SPEED_40000 && + priv->port_state.link_speed != SPEED_56000) return priv->port_state.link_speed; + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 454d9fea640..a308d41e4de 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -46,7 +46,7 @@ #include "mlx4_en.h" int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring **pring, int qpn, u32 size, + struct mlx4_en_tx_ring **pring, u32 size, u16 stride, int node, int queue_index) { struct mlx4_en_dev *mdev = priv->mdev; @@ -112,11 +112,17 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring, ring->buf, ring->size, ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); - ring->qpn = qpn; + err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn, + MLX4_RESERVE_ETH_BF_QP); + if (err) { + en_err(priv, "failed reserving qp for TX ring\n"); + goto err_map; + } + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); - goto err_map; + goto err_reserve; } ring->qp.event = mlx4_en_sqp_event; @@ -143,6 +149,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, *pring = ring; return 0; +err_reserve: + mlx4_qp_release_range(mdev->dev, ring->qpn, 1); err_map: mlx4_en_unmap_buffer(&ring->wqres.buf); err_hwq_res: @@ -479,8 +487,8 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq) struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); - if (priv->port_up) - napi_schedule(&cq->napi); + if (likely(priv->port_up)) + napi_schedule_irqoff(&cq->napi); else mlx4_en_arm_cq(priv, cq); } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 49290a40590..3d275fbaf0e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -450,7 +450,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_eqe *eqe; - int cqn; + int cqn = -1; int eqes_found = 0; int set_ci = 0; int port; @@ -758,6 +758,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eq_set_ci(eq, 1); + /* cqn is 24bit wide but is initialized such that its higher bits + * are ones too. Thus, if we got any event, cqn's high bits should be off + * and we need to schedule the tasklet. + */ + if (!(cqn & ~0xffffff)) + tasklet_schedule(&eq->tasklet_ctx.task); + return eqes_found; } @@ -971,6 +978,12 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, eq->cons_index = 0; + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_init(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb, + (unsigned long)&eq->tasklet_ctx); + return err; err_out_free_mtt: @@ -1027,6 +1040,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev, } } synchronize_irq(eq->irq); + tasklet_disable(&eq->tasklet_ctx.task); mlx4_mtt_cleanup(dev, &eq->mtt); for (i = 0; i < npages; ++i) @@ -1123,8 +1137,12 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) goto err_out_free; } - err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs, - dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0); + err = mlx4_bitmap_init(&priv->eq_table.bitmap, + roundup_pow_of_two(dev->caps.num_eqs), + dev->caps.num_eqs - 1, + dev->caps.reserved_eqs, + roundup_pow_of_two(dev->caps.num_eqs) - + dev->caps.num_eqs); if (err) goto err_out_free; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 2e88a235e26..ef3b95bac2a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -139,7 +139,13 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [10] = "TCP/IP offloads/flow-steering for VXLAN support", [11] = "MAD DEMUX (Secure-Host) support", [12] = "Large cache line (>64B) CQE stride support", - [13] = "Large cache line (>64B) EQE stride support" + [13] = "Large cache line (>64B) EQE stride support", + [14] = "Ethernet protocol control support", + [15] = "Ethernet Backplane autoneg support", + [16] = "CONFIG DEV support", + [17] = "Asymmetric EQs support", + [18] = "More than 80 VFs support", + [19] = "Performance optimized for limited rule configuration flow steering support" }; int i; @@ -174,6 +180,61 @@ int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg) return err; } +int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u8 in_modifier; + u8 field; + u16 field16; + int err; + +#define QUERY_FUNC_BUS_OFFSET 0x00 +#define QUERY_FUNC_DEVICE_OFFSET 0x01 +#define QUERY_FUNC_FUNCTION_OFFSET 0x01 +#define QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET 0x03 +#define QUERY_FUNC_RSVD_EQS_OFFSET 0x04 +#define QUERY_FUNC_MAX_EQ_OFFSET 0x06 +#define QUERY_FUNC_RSVD_UARS_OFFSET 0x0b + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + in_modifier = slave; + + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0, + MLX4_CMD_QUERY_FUNC, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_FUNC_BUS_OFFSET); + func->bus = field & 0xf; + MLX4_GET(field, outbox, QUERY_FUNC_DEVICE_OFFSET); + func->device = field & 0xf1; + MLX4_GET(field, outbox, QUERY_FUNC_FUNCTION_OFFSET); + func->function = field & 0x7; + MLX4_GET(field, outbox, QUERY_FUNC_PHYSICAL_FUNCTION_OFFSET); + func->physical_function = field & 0xf; + MLX4_GET(field16, outbox, QUERY_FUNC_RSVD_EQS_OFFSET); + func->rsvd_eqs = field16 & 0xffff; + MLX4_GET(field16, outbox, QUERY_FUNC_MAX_EQ_OFFSET); + func->max_eq = field16 & 0xffff; + MLX4_GET(field, outbox, QUERY_FUNC_RSVD_UARS_OFFSET); + func->rsvd_uars = field & 0x0f; + + mlx4_dbg(dev, "Bus: %d, Device: %d, Function: %d, Physical function: %d, Max EQs: %d, Reserved EQs: %d, Reserved UARs: %d\n", + func->bus, func->device, func->function, func->physical_function, + func->max_eq, func->rsvd_eqs, func->rsvd_uars); + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -184,6 +245,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, u8 field, port; u32 size, proxy_qp, qkey; int err = 0; + struct mlx4_func func; #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 @@ -205,10 +267,16 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x64 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x68 +#define QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET 0x6c + #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 #define QUERY_FUNC_CAP_FLAG_ETH 0x80 #define QUERY_FUNC_CAP_FLAG_QUOTAS 0x10 +#define QUERY_FUNC_CAP_FLAG_VALID_MAILBOX 0x04 + +#define QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG (1UL << 31) +#define QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG (1UL << 30) /* when opcode modifier = 1 */ #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET 0x3 @@ -228,6 +296,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = @@ -277,7 +346,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, mlx4_get_active_ports(dev, slave); /* enable rdma and ethernet interfaces, and new quota locations */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA | - QUERY_FUNC_CAP_FLAG_QUOTAS); + QUERY_FUNC_CAP_FLAG_QUOTAS | QUERY_FUNC_CAP_FLAG_VALID_MAILBOX); MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); field = min( @@ -306,11 +375,24 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.num_cqs; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP); - size = dev->caps.num_eqs; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); - - size = dev->caps.reserved_eqs; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) || + mlx4_QUERY_FUNC(dev, &func, slave)) { + size = vhcr->in_modifier & + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? + dev->caps.num_eqs : + rounddown_pow_of_two(dev->caps.num_eqs); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + size = dev->caps.reserved_eqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + } else { + size = vhcr->in_modifier & + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ? + func.max_eq : + rounddown_pow_of_two(func.max_eq); + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + size = func.rsvd_eqs; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + } size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave]; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); @@ -326,13 +408,16 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP); + size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG | + QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET); } else err = -EINVAL; return err; } -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, struct mlx4_func_cap *func_cap) { struct mlx4_cmd_mailbox *mailbox; @@ -340,14 +425,17 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, u8 field, op_modifier; u32 size, qkey; int err = 0, quotas = 0; + u32 in_modifier; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ + in_modifier = op_modifier ? gen_or_port : + QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, + err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, op_modifier, MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) @@ -415,6 +503,19 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); func_cap->reserved_eq = size & 0xFFFFFF; + func_cap->extra_flags = 0; + + /* Mailbox data from 0x6c and onward should only be treated if + * QUERY_FUNC_CAP_FLAG_VALID_MAILBOX is set in func_cap->flags + */ + if (func_cap->flags & QUERY_FUNC_CAP_FLAG_VALID_MAILBOX) { + MLX4_GET(size, outbox, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET); + if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG) + func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_BF_RES_QP; + if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG) + func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_A0_RES_QP; + } + goto out; } @@ -519,6 +620,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET 0x21 #define QUERY_DEV_CAP_RSVD_MRW_OFFSET 0x22 #define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET 0x23 +#define QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET 0x26 #define QUERY_DEV_CAP_MAX_AV_OFFSET 0x27 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET 0x29 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET 0x2b @@ -560,6 +662,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77 #define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE 0x7a +#define QUERY_DEV_CAP_ETH_PROT_CTRL_OFFSET 0x7a #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84 @@ -571,11 +674,15 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET 0x90 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 +#define QUERY_DEV_CAP_CONFIG_DEV_OFFSET 0x94 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 +#define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d #define QUERY_DEV_CAP_VXLAN 0x9e #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 +#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET 0xa8 +#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET 0xac dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -605,7 +712,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET); dev_cap->max_mpts = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET); - dev_cap->reserved_eqs = field & 0xf; + dev_cap->reserved_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET); dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); @@ -616,6 +723,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->reserved_mrws = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); dev_cap->max_mtt_seg = 1 << (field & 0x3f); + MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET); + dev_cap->num_sys_eqs = size & 0xfff; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); dev_cap->max_requester_per_qp = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET); @@ -737,15 +846,22 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET); dev_cap->max_rq_desc_sz = size; MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE); + if (field & (1 << 5)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL; if (field & (1 << 6)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE; if (field & (1 << 7)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE; - MLX4_GET(dev_cap->bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET); + if (field & 0x20) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV; MLX4_GET(dev_cap->reserved_lkey, outbox, QUERY_DEV_CAP_RSVD_LKEY_OFFSET); + MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET); + if (field32 & (1 << 0)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP; MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); if (field & 1<<6) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN; @@ -763,6 +879,13 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) if (field32 & (1 << 0)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX; + MLX4_GET(dev_cap->dmfs_high_rate_qpn_base, outbox, + QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET); + dev_cap->dmfs_high_rate_qpn_base &= MGM_QPN_MASK; + MLX4_GET(dev_cap->dmfs_high_rate_qpn_range, outbox, + QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET); + dev_cap->dmfs_high_rate_qpn_range &= MGM_QPN_MASK; + MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; @@ -770,62 +893,13 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; if (field32 & (1 << 20)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM; + if (field32 & (1 << 21)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS; - if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { - for (i = 1; i <= dev_cap->num_ports; ++i) { - MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); - dev_cap->max_vl[i] = field >> 4; - MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET); - dev_cap->ib_mtu[i] = field >> 4; - dev_cap->max_port_width[i] = field & 0xf; - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET); - dev_cap->max_gids[i] = 1 << (field & 0xf); - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET); - dev_cap->max_pkeys[i] = 1 << (field & 0xf); - } - } else { -#define QUERY_PORT_SUPPORTED_TYPE_OFFSET 0x00 -#define QUERY_PORT_MTU_OFFSET 0x01 -#define QUERY_PORT_ETH_MTU_OFFSET 0x02 -#define QUERY_PORT_WIDTH_OFFSET 0x06 -#define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 -#define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a -#define QUERY_PORT_MAX_VL_OFFSET 0x0b -#define QUERY_PORT_MAC_OFFSET 0x10 -#define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18 -#define QUERY_PORT_WAVELENGTH_OFFSET 0x1c -#define QUERY_PORT_TRANS_CODE_OFFSET 0x20 - - for (i = 1; i <= dev_cap->num_ports; ++i) { - err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); - if (err) - goto out; - - MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); - dev_cap->supported_port_types[i] = field & 3; - dev_cap->suggested_type[i] = (field >> 3) & 1; - dev_cap->default_sense[i] = (field >> 4) & 1; - MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); - dev_cap->ib_mtu[i] = field & 0xf; - MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); - dev_cap->max_port_width[i] = field & 0xf; - MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET); - dev_cap->max_gids[i] = 1 << (field >> 4); - dev_cap->max_pkeys[i] = 1 << (field & 0xf); - MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); - dev_cap->max_vl[i] = field & 0xf; - MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); - dev_cap->log_max_macs[i] = field & 0xf; - dev_cap->log_max_vlans[i] = field >> 4; - MLX4_GET(dev_cap->eth_mtu[i], outbox, QUERY_PORT_ETH_MTU_OFFSET); - MLX4_GET(dev_cap->def_mac[i], outbox, QUERY_PORT_MAC_OFFSET); - MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET); - dev_cap->trans_type[i] = field32 >> 24; - dev_cap->vendor_oui[i] = field32 & 0xffffff; - MLX4_GET(dev_cap->wavelength[i], outbox, QUERY_PORT_WAVELENGTH_OFFSET); - MLX4_GET(dev_cap->trans_code[i], outbox, QUERY_PORT_TRANS_CODE_OFFSET); - } + for (i = 1; i <= dev_cap->num_ports; i++) { + err = mlx4_QUERY_PORT(dev, i, dev_cap->port_cap + i); + if (err) + goto out; } mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n", @@ -836,8 +910,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) * we can't use any EQs whose doorbell falls on that page, * even if the EQ itself isn't reserved. */ - dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, - dev_cap->reserved_eqs); + if (dev_cap->num_sys_eqs == 0) + dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4, + dev_cap->reserved_eqs); + else + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS; mlx4_dbg(dev, "Max ICM size %lld MB\n", (unsigned long long) dev_cap->max_icm_sz >> 20); @@ -847,8 +924,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz); mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz); - mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", - dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz); + mlx4_dbg(dev, "Num sys EQs: %d, max EQs: %d, reserved EQs: %d, entry size: %d\n", + dev_cap->num_sys_eqs, dev_cap->max_eqs, dev_cap->reserved_eqs, + dev_cap->eqc_entry_sz); mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n", dev_cap->reserved_mrws, dev_cap->reserved_mtts); mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", @@ -858,8 +936,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz); mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n", - dev_cap->local_ca_ack_delay, 128 << dev_cap->ib_mtu[1], - dev_cap->max_port_width[1]); + dev_cap->local_ca_ack_delay, 128 << dev_cap->port_cap[1].ib_mtu, + dev_cap->port_cap[1].max_port_width); mlx4_dbg(dev, "Max SQ desc size: %d, max SQ S/G: %d\n", dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg); mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n", @@ -867,6 +945,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz); mlx4_dbg(dev, "Max counters: %d\n", dev_cap->max_counters); mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz); + mlx4_dbg(dev, "DMFS high rate steer QPn base: %d\n", + dev_cap->dmfs_high_rate_qpn_base); + mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n", + dev_cap->dmfs_high_rate_qpn_range); dump_dev_cap_flags(dev, dev_cap->flags); dump_dev_cap_flags2(dev, dev_cap->flags2); @@ -876,6 +958,89 @@ out: return err; } +int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap) +{ + struct mlx4_cmd_mailbox *mailbox; + u32 *outbox; + u8 field; + u32 field32; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; + + if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, + MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET); + port_cap->max_vl = field >> 4; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MTU_WIDTH_OFFSET); + port_cap->ib_mtu = field >> 4; + port_cap->max_port_width = field & 0xf; + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GID_OFFSET); + port_cap->max_gids = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PKEY_OFFSET); + port_cap->max_pkeys = 1 << (field & 0xf); + } else { +#define QUERY_PORT_SUPPORTED_TYPE_OFFSET 0x00 +#define QUERY_PORT_MTU_OFFSET 0x01 +#define QUERY_PORT_ETH_MTU_OFFSET 0x02 +#define QUERY_PORT_WIDTH_OFFSET 0x06 +#define QUERY_PORT_MAX_GID_PKEY_OFFSET 0x07 +#define QUERY_PORT_MAX_MACVLAN_OFFSET 0x0a +#define QUERY_PORT_MAX_VL_OFFSET 0x0b +#define QUERY_PORT_MAC_OFFSET 0x10 +#define QUERY_PORT_TRANS_VENDOR_OFFSET 0x18 +#define QUERY_PORT_WAVELENGTH_OFFSET 0x1c +#define QUERY_PORT_TRANS_CODE_OFFSET 0x20 + + err = mlx4_cmd_box(dev, 0, mailbox->dma, port, 0, MLX4_CMD_QUERY_PORT, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) + goto out; + + MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET); + port_cap->supported_port_types = field & 3; + port_cap->suggested_type = (field >> 3) & 1; + port_cap->default_sense = (field >> 4) & 1; + port_cap->dmfs_optimized_state = (field >> 5) & 1; + MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET); + port_cap->ib_mtu = field & 0xf; + MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET); + port_cap->max_port_width = field & 0xf; + MLX4_GET(field, outbox, QUERY_PORT_MAX_GID_PKEY_OFFSET); + port_cap->max_gids = 1 << (field >> 4); + port_cap->max_pkeys = 1 << (field & 0xf); + MLX4_GET(field, outbox, QUERY_PORT_MAX_VL_OFFSET); + port_cap->max_vl = field & 0xf; + MLX4_GET(field, outbox, QUERY_PORT_MAX_MACVLAN_OFFSET); + port_cap->log_max_macs = field & 0xf; + port_cap->log_max_vlans = field >> 4; + MLX4_GET(port_cap->eth_mtu, outbox, QUERY_PORT_ETH_MTU_OFFSET); + MLX4_GET(port_cap->def_mac, outbox, QUERY_PORT_MAC_OFFSET); + MLX4_GET(field32, outbox, QUERY_PORT_TRANS_VENDOR_OFFSET); + port_cap->trans_type = field32 >> 24; + port_cap->vendor_oui = field32 & 0xffffff; + MLX4_GET(port_cap->wavelength, outbox, QUERY_PORT_WAVELENGTH_OFFSET); + MLX4_GET(port_cap->trans_code, outbox, QUERY_PORT_TRANS_CODE_OFFSET); + } + +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +#define DEV_CAP_EXT_2_FLAG_VLAN_CONTROL (1 << 26) +#define DEV_CAP_EXT_2_FLAG_80_VFS (1 << 21) +#define DEV_CAP_EXT_2_FLAG_FSM (1 << 20) + int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -885,7 +1050,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, u64 flags; int err = 0; u8 field; - u32 bmme_flags; + u32 bmme_flags, field32; int real_port; int slave_port; int first_port; @@ -956,6 +1121,12 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= ~0x80; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + /* turn off host side virt features (VST, FSM, etc) for guests */ + MLX4_GET(field32, outbox->buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + field32 &= ~(DEV_CAP_EXT_2_FLAG_VLAN_CONTROL | DEV_CAP_EXT_2_FLAG_80_VFS | + DEV_CAP_EXT_2_FLAG_FSM); + MLX4_PUT(outbox->buf, field32, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + return 0; } @@ -1374,6 +1545,12 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) struct mlx4_cmd_mailbox *mailbox; __be32 *inbox; int err; + static const u8 a0_dmfs_hw_steering[] = { + [MLX4_STEERING_DMFS_A0_DEFAULT] = 0, + [MLX4_STEERING_DMFS_A0_DYNAMIC] = 1, + [MLX4_STEERING_DMFS_A0_STATIC] = 2, + [MLX4_STEERING_DMFS_A0_DISABLE] = 3 + }; #define INIT_HCA_IN_SIZE 0x200 #define INIT_HCA_VERSION_OFFSET 0x000 @@ -1394,6 +1571,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_AUXC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x50) #define INIT_HCA_EQC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x60) #define INIT_HCA_LOG_EQ_OFFSET (INIT_HCA_QPC_OFFSET + 0x67) +#define INIT_HCA_NUM_SYS_EQS_OFFSET (INIT_HCA_QPC_OFFSET + 0x6a) #define INIT_HCA_RDMARC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x70) #define INIT_HCA_LOG_RD_OFFSET (INIT_HCA_QPC_OFFSET + 0x77) #define INIT_HCA_MCAST_OFFSET 0x0c0 @@ -1406,6 +1584,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_FS_PARAM_OFFSET 0x1d0 #define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00) #define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12) +#define INIT_HCA_FS_A0_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x18) #define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b) #define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21) #define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22) @@ -1497,6 +1676,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) MLX4_PUT(inbox, param->auxc_base, INIT_HCA_AUXC_BASE_OFFSET); MLX4_PUT(inbox, param->eqc_base, INIT_HCA_EQC_BASE_OFFSET); MLX4_PUT(inbox, param->log_num_eqs, INIT_HCA_LOG_EQ_OFFSET); + MLX4_PUT(inbox, param->num_sys_eqs, INIT_HCA_NUM_SYS_EQS_OFFSET); MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET); @@ -1515,8 +1695,11 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) /* Enable Ethernet flow steering * with udp unicast and tcp unicast */ - MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), - INIT_HCA_FS_ETH_BITS_OFFSET); + if (dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_STATIC) + MLX4_PUT(inbox, + (u8)(MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN), + INIT_HCA_FS_ETH_BITS_OFFSET); MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET); /* Enable IPoIB flow steering @@ -1526,6 +1709,13 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) INIT_HCA_FS_IB_BITS_OFFSET); MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR, INIT_HCA_FS_IB_NUM_ADDRS_OFFSET); + + if (dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) + MLX4_PUT(inbox, + ((u8)(a0_dmfs_hw_steering[dev->caps.dmfs_high_steer_mode] + << 6)), + INIT_HCA_FS_A0_OFFSET); } else { MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET); MLX4_PUT(inbox, param->log_mc_entry_sz, @@ -1576,6 +1766,12 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, u32 dword_field; int err; u8 byte_field; + static const u8 a0_dmfs_query_hw_steering[] = { + [0] = MLX4_STEERING_DMFS_A0_DEFAULT, + [1] = MLX4_STEERING_DMFS_A0_DYNAMIC, + [2] = MLX4_STEERING_DMFS_A0_STATIC, + [3] = MLX4_STEERING_DMFS_A0_DISABLE + }; #define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 #define QUERY_HCA_CORE_CLOCK_OFFSET 0x0c @@ -1607,6 +1803,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->auxc_base, outbox, INIT_HCA_AUXC_BASE_OFFSET); MLX4_GET(param->eqc_base, outbox, INIT_HCA_EQC_BASE_OFFSET); MLX4_GET(param->log_num_eqs, outbox, INIT_HCA_LOG_EQ_OFFSET); + MLX4_GET(param->num_sys_eqs, outbox, INIT_HCA_NUM_SYS_EQS_OFFSET); MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET); MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET); @@ -1627,6 +1824,10 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET); MLX4_GET(param->log_mc_table_sz, outbox, INIT_HCA_FS_LOG_TABLE_SZ_OFFSET); + MLX4_GET(byte_field, outbox, + INIT_HCA_FS_A0_OFFSET); + param->dmfs_high_steer_mode = + a0_dmfs_query_hw_steering[(byte_field >> 6) & 3]; } else { MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET); MLX4_GET(param->log_mc_entry_sz, outbox, @@ -1841,14 +2042,18 @@ int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic) struct mlx4_config_dev { __be32 update_flags; - __be32 rsdv1[3]; + __be32 rsvd1[3]; __be16 vxlan_udp_dport; __be16 rsvd2; + __be32 rsvd3[27]; + __be16 rsvd4; + u8 rsvd5; + u8 rx_checksum_val; }; #define MLX4_VXLAN_UDP_DPORT (1 << 0) -static int mlx4_CONFIG_DEV(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) +static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) { int err; struct mlx4_cmd_mailbox *mailbox; @@ -1866,6 +2071,77 @@ static int mlx4_CONFIG_DEV(struct mlx4_dev *dev, struct mlx4_config_dev *config_ return err; } +static int mlx4_CONFIG_DEV_get(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev) +{ + int err; + struct mlx4_cmd_mailbox *mailbox; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 1, MLX4_CMD_CONFIG_DEV, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + + if (!err) + memcpy(config_dev, mailbox->buf, sizeof(*config_dev)); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +/* Conversion between the HW values and the actual functionality. + * The value represented by the array index, + * and the functionality determined by the flags. + */ +static const u8 config_dev_csum_flags[] = { + [0] = 0, + [1] = MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP, + [2] = MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP | + MLX4_RX_CSUM_MODE_L4, + [3] = MLX4_RX_CSUM_MODE_L4 | + MLX4_RX_CSUM_MODE_IP_OK_IP_NON_TCP_UDP | + MLX4_RX_CSUM_MODE_MULTI_VLAN +}; + +int mlx4_config_dev_retrieval(struct mlx4_dev *dev, + struct mlx4_config_dev_params *params) +{ + struct mlx4_config_dev config_dev; + int err; + u8 csum_mask; + +#define CONFIG_DEV_RX_CSUM_MODE_MASK 0x7 +#define CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET 0 +#define CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET 4 + + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CONFIG_DEV)) + return -ENOTSUPP; + + err = mlx4_CONFIG_DEV_get(dev, &config_dev); + if (err) + return err; + + csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET) & + CONFIG_DEV_RX_CSUM_MODE_MASK; + + if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) + return -EINVAL; + params->rx_csum_flags_port_1 = config_dev_csum_flags[csum_mask]; + + csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET) & + CONFIG_DEV_RX_CSUM_MODE_MASK; + + if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) + return -EINVAL; + params->rx_csum_flags_port_2 = config_dev_csum_flags[csum_mask]; + + params->vxlan_udp_dport = be16_to_cpu(config_dev.vxlan_udp_dport); + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_config_dev_retrieval); + int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port) { struct mlx4_config_dev config_dev; @@ -1874,7 +2150,7 @@ int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port) config_dev.update_flags = cpu_to_be32(MLX4_VXLAN_UDP_DPORT); config_dev.vxlan_udp_dport = udp_port; - return mlx4_CONFIG_DEV(dev, &config_dev); + return mlx4_CONFIG_DEV_set(dev, &config_dev); } EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port); @@ -2144,3 +2420,142 @@ out: mlx4_free_cmd_mailbox(dev, mailbox); return err; } + +/* Access Reg commands */ +enum mlx4_access_reg_masks { + MLX4_ACCESS_REG_STATUS_MASK = 0x7f, + MLX4_ACCESS_REG_METHOD_MASK = 0x7f, + MLX4_ACCESS_REG_LEN_MASK = 0x7ff +}; + +struct mlx4_access_reg { + __be16 constant1; + u8 status; + u8 resrvd1; + __be16 reg_id; + u8 method; + u8 constant2; + __be32 resrvd2[2]; + __be16 len_const; + __be16 resrvd3; +#define MLX4_ACCESS_REG_HEADER_SIZE (20) + u8 reg_data[MLX4_MAILBOX_SIZE-MLX4_ACCESS_REG_HEADER_SIZE]; +} __attribute__((__packed__)); + +/** + * mlx4_ACCESS_REG - Generic access reg command. + * @dev: mlx4_dev. + * @reg_id: register ID to access. + * @method: Access method Read/Write. + * @reg_len: register length to Read/Write in bytes. + * @reg_data: reg_data pointer to Read/Write From/To. + * + * Access ConnectX registers FW command. + * Returns 0 on success and copies outbox mlx4_access_reg data + * field into reg_data or a negative error code. + */ +static int mlx4_ACCESS_REG(struct mlx4_dev *dev, u16 reg_id, + enum mlx4_access_reg_method method, + u16 reg_len, void *reg_data) +{ + struct mlx4_cmd_mailbox *inbox, *outbox; + struct mlx4_access_reg *inbuf, *outbuf; + int err; + + inbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inbox)) + return PTR_ERR(inbox); + + outbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outbox)) { + mlx4_free_cmd_mailbox(dev, inbox); + return PTR_ERR(outbox); + } + + inbuf = inbox->buf; + outbuf = outbox->buf; + + inbuf->constant1 = cpu_to_be16(0x1<<11 | 0x4); + inbuf->constant2 = 0x1; + inbuf->reg_id = cpu_to_be16(reg_id); + inbuf->method = method & MLX4_ACCESS_REG_METHOD_MASK; + + reg_len = min(reg_len, (u16)(sizeof(inbuf->reg_data))); + inbuf->len_const = + cpu_to_be16(((reg_len/4 + 1) & MLX4_ACCESS_REG_LEN_MASK) | + ((0x3) << 12)); + + memcpy(inbuf->reg_data, reg_data, reg_len); + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, 0, 0, + MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_WRAPPED); + if (err) + goto out; + + if (outbuf->status & MLX4_ACCESS_REG_STATUS_MASK) { + err = outbuf->status & MLX4_ACCESS_REG_STATUS_MASK; + mlx4_err(dev, + "MLX4_CMD_ACCESS_REG(%x) returned REG status (%x)\n", + reg_id, err); + goto out; + } + + memcpy(reg_data, outbuf->reg_data, reg_len); +out: + mlx4_free_cmd_mailbox(dev, inbox); + mlx4_free_cmd_mailbox(dev, outbox); + return err; +} + +/* ConnectX registers IDs */ +enum mlx4_reg_id { + MLX4_REG_ID_PTYS = 0x5004, +}; + +/** + * mlx4_ACCESS_PTYS_REG - Access PTYs (Port Type and Speed) + * register + * @dev: mlx4_dev. + * @method: Access method Read/Write. + * @ptys_reg: PTYS register data pointer. + * + * Access ConnectX PTYS register, to Read/Write Port Type/Speed + * configuration + * Returns 0 on success or a negative error code. + */ +int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev, + enum mlx4_access_reg_method method, + struct mlx4_ptys_reg *ptys_reg) +{ + return mlx4_ACCESS_REG(dev, MLX4_REG_ID_PTYS, + method, sizeof(*ptys_reg), ptys_reg); +} +EXPORT_SYMBOL_GPL(mlx4_ACCESS_PTYS_REG); + +int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_access_reg *inbuf = inbox->buf; + u8 method = inbuf->method & MLX4_ACCESS_REG_METHOD_MASK; + u16 reg_id = be16_to_cpu(inbuf->reg_id); + + if (slave != mlx4_master_func_num(dev) && + method == MLX4_ACCESS_REG_WRITE) + return -EPERM; + + if (reg_id == MLX4_REG_ID_PTYS) { + struct mlx4_ptys_reg *ptys_reg = + (struct mlx4_ptys_reg *)inbuf->reg_data; + + ptys_reg->local_port = + mlx4_slave_convert_port(dev, slave, + ptys_reg->local_port); + } + + return mlx4_cmd_box(dev, inbox->dma, outbox->dma, vhcr->in_modifier, + 0, MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); +} diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 9b835aecac9..794e2826609 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -43,6 +43,26 @@ struct mlx4_mod_stat_cfg { u8 log_pg_sz_m; }; +struct mlx4_port_cap { + u8 supported_port_types; + u8 suggested_type; + u8 default_sense; + u8 log_max_macs; + u8 log_max_vlans; + int ib_mtu; + int max_port_width; + int max_vl; + int max_gids; + int max_pkeys; + u64 def_mac; + u16 eth_mtu; + int trans_type; + int vendor_oui; + u16 wavelength; + u64 trans_code; + u8 dmfs_optimized_state; +}; + struct mlx4_dev_cap { int max_srq_sz; int max_qp_sz; @@ -56,6 +76,7 @@ struct mlx4_dev_cap { int max_mpts; int reserved_eqs; int max_eqs; + int num_sys_eqs; int reserved_mtts; int max_mrw_sz; int reserved_mrws; @@ -66,17 +87,6 @@ struct mlx4_dev_cap { int local_ca_ack_delay; int num_ports; u32 max_msg_sz; - int ib_mtu[MLX4_MAX_PORTS + 1]; - int max_port_width[MLX4_MAX_PORTS + 1]; - int max_vl[MLX4_MAX_PORTS + 1]; - int max_gids[MLX4_MAX_PORTS + 1]; - int max_pkeys[MLX4_MAX_PORTS + 1]; - u64 def_mac[MLX4_MAX_PORTS + 1]; - u16 eth_mtu[MLX4_MAX_PORTS + 1]; - int trans_type[MLX4_MAX_PORTS + 1]; - int vendor_oui[MLX4_MAX_PORTS + 1]; - u16 wavelength[MLX4_MAX_PORTS + 1]; - u64 trans_code[MLX4_MAX_PORTS + 1]; u16 stat_rate_support; int fs_log_max_ucast_qp_range_size; int fs_max_num_qp_per_entry; @@ -114,12 +124,10 @@ struct mlx4_dev_cap { u64 max_icm_sz; int max_gso_sz; int max_rss_tbl_sz; - u8 supported_port_types[MLX4_MAX_PORTS + 1]; - u8 suggested_type[MLX4_MAX_PORTS + 1]; - u8 default_sense[MLX4_MAX_PORTS + 1]; - u8 log_max_macs[MLX4_MAX_PORTS + 1]; - u8 log_max_vlans[MLX4_MAX_PORTS + 1]; u32 max_counters; + u32 dmfs_high_rate_qpn_base; + u32 dmfs_high_rate_qpn_range; + struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1]; }; struct mlx4_func_cap { @@ -143,6 +151,17 @@ struct mlx4_func_cap { u8 port_flags; u8 flags1; u64 phys_port_id; + u32 extra_flags; +}; + +struct mlx4_func { + int bus; + int device; + int function; + int physical_function; + int rsvd_eqs; + int max_eq; + int rsvd_uars; }; struct mlx4_adapter { @@ -170,6 +189,7 @@ struct mlx4_init_hca_param { u8 log_num_srqs; u8 log_num_cqs; u8 log_num_eqs; + u16 num_sys_eqs; u8 log_rd_per_qp; u8 log_mc_table_sz; u8 log_mpt_sz; @@ -177,6 +197,7 @@ struct mlx4_init_hca_param { u8 mw_enabled; /* Enable memory windows */ u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 steering_mode; /* for QUERY_HCA */ + u8 dmfs_high_steer_mode; /* for QUERY_HCA */ u64 dev_cap_enabled; u16 cqe_size; /* For use only when CQE stride feature enabled */ u16 eqe_size; /* For use only when EQE stride feature enabled */ @@ -204,13 +225,15 @@ struct mlx4_set_ib_param { }; int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, +int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap); +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, struct mlx4_func_cap *func_cap); int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave); int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm); int mlx4_UNMAP_FA(struct mlx4_dev *dev); int mlx4_RUN_FW(struct mlx4_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 90de6e1ad06..e25436b24ce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -105,7 +105,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe, "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)"); #define PF_CONTEXT_BEHAVIOUR_MASK (MLX4_FUNC_CAP_64B_EQE_CQE | \ - MLX4_FUNC_CAP_EQE_CQE_STRIDE) + MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ + MLX4_FUNC_CAP_DMFS_A0_STATIC) static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" @@ -197,6 +198,29 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev) dev->caps.port_mask[i] = dev->caps.port_type[i]; } +enum { + MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0, +}; + +static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) +{ + int err = 0; + struct mlx4_func func; + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_QUERY_FUNC(dev, &func, 0); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + return err; + } + dev_cap->max_eqs = func.max_eq; + dev_cap->reserved_eqs = func.rsvd_eqs; + dev_cap->reserved_uars = func.rsvd_uars; + err |= MLX4_QUERY_FUNC_NUM_SYS_EQS; + } + return err; +} + static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev) { struct mlx4_caps *dev_cap = &dev->caps; @@ -231,6 +255,46 @@ static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev) } } +static int _mlx4_dev_port(struct mlx4_dev *dev, int port, + struct mlx4_port_cap *port_cap) +{ + dev->caps.vl_cap[port] = port_cap->max_vl; + dev->caps.ib_mtu_cap[port] = port_cap->ib_mtu; + dev->phys_caps.gid_phys_table_len[port] = port_cap->max_gids; + dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys; + /* set gid and pkey table operating lengths by default + * to non-sriov values + */ + dev->caps.gid_table_len[port] = port_cap->max_gids; + dev->caps.pkey_table_len[port] = port_cap->max_pkeys; + dev->caps.port_width_cap[port] = port_cap->max_port_width; + dev->caps.eth_mtu_cap[port] = port_cap->eth_mtu; + dev->caps.def_mac[port] = port_cap->def_mac; + dev->caps.supported_type[port] = port_cap->supported_port_types; + dev->caps.suggested_type[port] = port_cap->suggested_type; + dev->caps.default_sense[port] = port_cap->default_sense; + dev->caps.trans_type[port] = port_cap->trans_type; + dev->caps.vendor_oui[port] = port_cap->vendor_oui; + dev->caps.wavelength[port] = port_cap->wavelength; + dev->caps.trans_code[port] = port_cap->trans_code; + + return 0; +} + +static int mlx4_dev_port(struct mlx4_dev *dev, int port, + struct mlx4_port_cap *port_cap) +{ + int err = 0; + + err = mlx4_QUERY_PORT(dev, port, port_cap); + + if (err) + mlx4_err(dev, "QUERY_PORT command failed.\n"); + + return err; +} + +#define MLX4_A0_STEERING_TABLE_SIZE 256 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { int err; @@ -261,26 +325,16 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) } dev->caps.num_ports = dev_cap->num_ports; - dev->phys_caps.num_phys_eqs = MLX4_MAX_EQ_NUM; + dev->caps.num_sys_eqs = dev_cap->num_sys_eqs; + dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ? + dev->caps.num_sys_eqs : + MLX4_MAX_EQ_NUM; for (i = 1; i <= dev->caps.num_ports; ++i) { - dev->caps.vl_cap[i] = dev_cap->max_vl[i]; - dev->caps.ib_mtu_cap[i] = dev_cap->ib_mtu[i]; - dev->phys_caps.gid_phys_table_len[i] = dev_cap->max_gids[i]; - dev->phys_caps.pkey_phys_table_len[i] = dev_cap->max_pkeys[i]; - /* set gid and pkey table operating lengths by default - * to non-sriov values */ - dev->caps.gid_table_len[i] = dev_cap->max_gids[i]; - dev->caps.pkey_table_len[i] = dev_cap->max_pkeys[i]; - dev->caps.port_width_cap[i] = dev_cap->max_port_width[i]; - dev->caps.eth_mtu_cap[i] = dev_cap->eth_mtu[i]; - dev->caps.def_mac[i] = dev_cap->def_mac[i]; - dev->caps.supported_type[i] = dev_cap->supported_port_types[i]; - dev->caps.suggested_type[i] = dev_cap->suggested_type[i]; - dev->caps.default_sense[i] = dev_cap->default_sense[i]; - dev->caps.trans_type[i] = dev_cap->trans_type[i]; - dev->caps.vendor_oui[i] = dev_cap->vendor_oui[i]; - dev->caps.wavelength[i] = dev_cap->wavelength[i]; - dev->caps.trans_code[i] = dev_cap->trans_code[i]; + err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i); + if (err) { + mlx4_err(dev, "QUERY_PORT command failed, aborting\n"); + return err; + } } dev->caps.uar_page_size = PAGE_SIZE; @@ -389,13 +443,13 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.possible_type[i] = dev->caps.port_type[i]; } - if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) { - dev->caps.log_num_macs = dev_cap->log_max_macs[i]; + if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) { + dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs; mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n", i, 1 << dev->caps.log_num_macs); } - if (dev->caps.log_num_vlans > dev_cap->log_max_vlans[i]) { - dev->caps.log_num_vlans = dev_cap->log_max_vlans[i]; + if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) { + dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans; mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n", i, 1 << dev->caps.log_num_vlans); } @@ -411,6 +465,28 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.num_ports; dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH; + if (dev_cap->dmfs_high_rate_qpn_base > 0 && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) + dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base; + else + dev->caps.dmfs_high_rate_qpn_base = + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; + + if (dev_cap->dmfs_high_rate_qpn_range > 0 && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) { + dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range; + dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT; + dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0; + } else { + dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED; + dev->caps.dmfs_high_rate_qpn_base = + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; + dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE; + } + + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] = + dev->caps.dmfs_high_rate_qpn_range; + dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + @@ -440,8 +516,14 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_is_master(dev)) dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE; - if (!mlx4_is_slave(dev)) + if (!mlx4_is_slave(dev)) { mlx4_enable_cqe_eqe_stride(dev); + dev->caps.alloc_res_qp_mask = + (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) | + MLX4_RESERVE_A0_QP; + } else { + dev->caps.alloc_res_qp_mask = 0; + } return 0; } @@ -631,7 +713,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) struct mlx4_dev_cap dev_cap; struct mlx4_func_cap func_cap; struct mlx4_init_hca_param hca_param; - int i; + u8 i; memset(&hca_param, 0, sizeof(hca_param)); err = mlx4_QUERY_HCA(dev, &hca_param); @@ -692,7 +774,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != PF_CONTEXT_BEHAVIOUR_MASK) { - mlx4_err(dev, "Unknown pf context behaviour\n"); + mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n", + func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK); return -ENOSYS; } @@ -732,7 +815,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) } for (i = 1; i <= dev->caps.num_ports; ++i) { - err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); + err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", i, err); @@ -791,6 +874,13 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) slave_adjust_steering_mode(dev, &dev_cap, &hca_param); + if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && + dev->caps.bf_reg_size) + dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; + + if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP) + dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP; + return 0; err_mem: @@ -901,9 +991,12 @@ static ssize_t set_port_type(struct device *dev, struct mlx4_priv *priv = mlx4_priv(mdev); enum mlx4_port_type types[MLX4_MAX_PORTS]; enum mlx4_port_type new_types[MLX4_MAX_PORTS]; + static DEFINE_MUTEX(set_port_type_mutex); int i; int err = 0; + mutex_lock(&set_port_type_mutex); + if (!strcmp(buf, "ib\n")) info->tmp_type = MLX4_PORT_TYPE_IB; else if (!strcmp(buf, "eth\n")) @@ -912,7 +1005,8 @@ static ssize_t set_port_type(struct device *dev, info->tmp_type = MLX4_PORT_TYPE_AUTO; else { mlx4_err(mdev, "%s is not supported port type\n", buf); - return -EINVAL; + err = -EINVAL; + goto err_out; } mlx4_stop_sense(mdev); @@ -958,6 +1052,9 @@ static ssize_t set_port_type(struct device *dev, out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); +err_out: + mutex_unlock(&set_port_type_mutex); + return err ? err : count; } @@ -1123,8 +1220,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base, if (err) goto err_srq; - num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : - dev->caps.num_eqs; + num_eqs = dev->phys_caps.num_phys_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table, cmpt_base + ((u64) (MLX4_CMPT_TYPE_EQ * @@ -1186,8 +1282,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, } - num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs : - dev->caps.num_eqs; + num_eqs = dev->phys_caps.num_phys_eqs; err = mlx4_init_icm_table(dev, &priv->eq_table.table, init_hca->eqc_base, dev_cap->eqc_entry_sz, num_eqs, num_eqs, 0, 0); @@ -1466,6 +1561,12 @@ static void mlx4_close_hca(struct mlx4_dev *dev) else { mlx4_CLOSE_HCA(dev, 0); mlx4_free_icms(dev); + } +} + +static void mlx4_close_fw(struct mlx4_dev *dev) +{ + if (!mlx4_is_slave(dev)) { mlx4_UNMAP_FA(dev); mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0); } @@ -1561,10 +1662,46 @@ static int choose_log_fs_mgm_entry_size(int qp_per_entry) return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1; } +static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode) +{ + switch (dmfs_high_steer_mode) { + case MLX4_STEERING_DMFS_A0_DEFAULT: + return "default performance"; + + case MLX4_STEERING_DMFS_A0_DYNAMIC: + return "dynamic hybrid mode"; + + case MLX4_STEERING_DMFS_A0_STATIC: + return "performance optimized for limited rule configuration (static)"; + + case MLX4_STEERING_DMFS_A0_DISABLE: + return "disabled performance optimized steering"; + + case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED: + return "performance optimized steering not supported"; + + default: + return "Unrecognized mode"; + } +} + +#define MLX4_DMFS_A0_STEERING (1UL << 2) + static void choose_steering_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { - if (mlx4_log_num_mgm_entry_size == -1 && + if (mlx4_log_num_mgm_entry_size <= 0) { + if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) { + if (dev->caps.dmfs_high_steer_mode == + MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) + mlx4_err(dev, "DMFS high rate mode not supported\n"); + else + dev->caps.dmfs_high_steer_mode = + MLX4_STEERING_DMFS_A0_STATIC; + } + } + + if (mlx4_log_num_mgm_entry_size <= 0 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) && @@ -1577,6 +1714,9 @@ static void choose_steering_mode(struct mlx4_dev *dev, dev->caps.fs_log_max_ucast_qp_range_size = dev_cap->fs_log_max_ucast_qp_range_size; } else { + if (dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) + dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE; if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER && dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) dev->caps.steering_mode = MLX4_STEERING_MODE_B0; @@ -1603,7 +1743,8 @@ static void choose_tunnel_offload_mode(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) { if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED && - dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS) + dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS && + dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC) dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN; else dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE; @@ -1612,16 +1753,39 @@ static void choose_tunnel_offload_mode(struct mlx4_dev *dev, == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none"); } -static int mlx4_init_hca(struct mlx4_dev *dev) +static int mlx4_validate_optimized_steering(struct mlx4_dev *dev) +{ + int i; + struct mlx4_port_cap port_cap; + + if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) + return -EINVAL; + + for (i = 1; i <= dev->caps.num_ports; i++) { + if (mlx4_dev_port(dev, i, &port_cap)) { + mlx4_err(dev, + "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n"); + } else if ((dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_DEFAULT) && + (port_cap.dmfs_optimized_state == + !!(dev->caps.dmfs_high_steer_mode == + MLX4_STEERING_DMFS_A0_DISABLE))) { + mlx4_err(dev, + "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n", + dmfs_high_rate_steering_mode_str( + dev->caps.dmfs_high_steer_mode), + (port_cap.dmfs_optimized_state ? + "enabled" : "disabled")); + } + } + + return 0; +} + +static int mlx4_init_fw(struct mlx4_dev *dev) { - struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_adapter adapter; - struct mlx4_dev_cap dev_cap; struct mlx4_mod_stat_cfg mlx4_cfg; - struct mlx4_profile profile; - struct mlx4_init_hca_param init_hca; - u64 icm_size; - int err; + int err = 0; if (!mlx4_is_slave(dev)) { err = mlx4_QUERY_FW(dev); @@ -1644,7 +1808,23 @@ static int mlx4_init_hca(struct mlx4_dev *dev) err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg); if (err) mlx4_warn(dev, "Failed to override log_pg_sz parameter\n"); + } + + return err; +} + +static int mlx4_init_hca(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_adapter adapter; + struct mlx4_dev_cap dev_cap; + struct mlx4_profile profile; + struct mlx4_init_hca_param init_hca; + u64 icm_size; + struct mlx4_config_dev_params params; + int err; + if (!mlx4_is_slave(dev)) { err = mlx4_dev_cap(dev, &dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); @@ -1654,6 +1834,10 @@ static int mlx4_init_hca(struct mlx4_dev *dev) choose_steering_mode(dev, &dev_cap); choose_tunnel_offload_mode(dev, &dev_cap); + if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC && + mlx4_is_master(dev)) + dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC; + err = mlx4_get_phys_port_id(dev); if (err) mlx4_err(dev, "Fail to get physical port id\n"); @@ -1696,6 +1880,19 @@ static int mlx4_init_hca(struct mlx4_dev *dev) mlx4_err(dev, "INIT_HCA command failed, aborting\n"); goto err_free_icm; } + + if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + err = mlx4_query_func(dev, &dev_cap); + if (err < 0) { + mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n"); + goto err_stop_fw; + } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) { + dev->caps.num_eqs = dev_cap.max_eqs; + dev->caps.reserved_eqs = dev_cap.reserved_eqs; + dev->caps.reserved_uars = dev_cap.reserved_uars; + } + } + /* * If TS is supported by FW * read HCA frequency by QUERY_HCA command @@ -1727,6 +1924,24 @@ static int mlx4_init_hca(struct mlx4_dev *dev) mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n"); } } + + if (dev->caps.dmfs_high_steer_mode != + MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) { + if (mlx4_validate_optimized_steering(dev)) + mlx4_warn(dev, "Optimized steering validation failed\n"); + + if (dev->caps.dmfs_high_steer_mode == + MLX4_STEERING_DMFS_A0_DISABLE) { + dev->caps.dmfs_high_rate_qpn_base = + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; + dev->caps.dmfs_high_rate_qpn_range = + MLX4_A0_STEERING_TABLE_SIZE; + } + + mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n", + dmfs_high_rate_steering_mode_str( + dev->caps.dmfs_high_steer_mode)); + } } else { err = mlx4_init_slave(dev); if (err) { @@ -1755,6 +1970,14 @@ static int mlx4_init_hca(struct mlx4_dev *dev) goto unmap_bf; } + /* Query CONFIG_DEV parameters */ + err = mlx4_config_dev_retrieval(dev, ¶ms); + if (err && err != -ENOTSUPP) { + mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n"); + } else if (!err) { + dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1; + dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2; + } priv->eq_table.inta_pin = adapter.inta_pin; memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); @@ -2054,12 +2277,11 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct msix_entry *entries; - int nreq = min_t(int, dev->caps.num_ports * - min_t(int, num_online_cpus() + 1, - MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX); int i; if (msi_x) { + int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ; + nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq); @@ -2259,6 +2481,71 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) iounmap(owner); } +#define SRIOV_VALID_STATE(flags) (!!((flags) & MLX4_FLAG_SRIOV) ==\ + !!((flags) & MLX4_FLAG_MASTER)) + +static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, + u8 total_vfs, int existing_vfs) +{ + u64 dev_flags = dev->flags; + + dev->dev_vfs = kzalloc( + total_vfs * sizeof(*dev->dev_vfs), + GFP_KERNEL); + if (NULL == dev->dev_vfs) { + mlx4_err(dev, "Failed to allocate memory for VFs\n"); + goto disable_sriov; + } else if (!(dev->flags & MLX4_FLAG_SRIOV)) { + int err = 0; + + atomic_inc(&pf_loading); + if (existing_vfs) { + if (existing_vfs != total_vfs) + mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n", + existing_vfs, total_vfs); + } else { + mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs); + err = pci_enable_sriov(pdev, total_vfs); + } + if (err) { + mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n", + err); + atomic_dec(&pf_loading); + goto disable_sriov; + } else { + mlx4_warn(dev, "Running in master mode\n"); + dev_flags |= MLX4_FLAG_SRIOV | + MLX4_FLAG_MASTER; + dev_flags &= ~MLX4_FLAG_SLAVE; + dev->num_vfs = total_vfs; + } + } + return dev_flags; + +disable_sriov: + dev->num_vfs = 0; + kfree(dev->dev_vfs); + return dev_flags & ~MLX4_FLAG_MASTER; +} + +enum { + MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1, +}; + +static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, + int *nvfs) +{ + int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2]; + /* Checking for 64 VFs as a limitation of CX2 */ + if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) && + requested_vfs >= 64) { + mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n", + requested_vfs); + return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64; + } + return 0; +} + static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, int total_vfs, int *nvfs, struct mlx4_priv *priv) { @@ -2267,6 +2554,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, int err; int port; int i; + struct mlx4_dev_cap *dev_cap = NULL; int existing_vfs = 0; dev = &priv->dev; @@ -2303,40 +2591,6 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, } } - if (total_vfs) { - mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", - total_vfs); - dev->dev_vfs = kzalloc( - total_vfs * sizeof(*dev->dev_vfs), - GFP_KERNEL); - if (NULL == dev->dev_vfs) { - mlx4_err(dev, "Failed to allocate memory for VFs\n"); - err = -ENOMEM; - goto err_free_own; - } else { - atomic_inc(&pf_loading); - existing_vfs = pci_num_vf(pdev); - if (existing_vfs) { - err = 0; - if (existing_vfs != total_vfs) - mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n", - existing_vfs, total_vfs); - } else { - err = pci_enable_sriov(pdev, total_vfs); - } - if (err) { - mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n", - err); - atomic_dec(&pf_loading); - } else { - mlx4_warn(dev, "Running in master mode\n"); - dev->flags |= MLX4_FLAG_SRIOV | - MLX4_FLAG_MASTER; - dev->num_vfs = total_vfs; - } - } - } - atomic_set(&priv->opreq_count, 0); INIT_WORK(&priv->opreq_task, mlx4_opreq_action); @@ -2350,6 +2604,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, mlx4_err(dev, "Failed to reset HCA, aborting\n"); goto err_sriov; } + + if (total_vfs) { + existing_vfs = pci_num_vf(pdev); + dev->flags = MLX4_FLAG_MASTER; + dev->num_vfs = total_vfs; + } } slave_start: @@ -2363,9 +2623,10 @@ slave_start: * before posting commands. Also, init num_slaves before calling * mlx4_init_hca */ if (mlx4_is_mfunc(dev)) { - if (mlx4_is_master(dev)) + if (mlx4_is_master(dev)) { dev->num_slaves = MLX4_MAX_NUM_SLAVES; - else { + + } else { dev->num_slaves = 0; err = mlx4_multi_func_init(dev); if (err) { @@ -2375,17 +2636,109 @@ slave_start: } } + err = mlx4_init_fw(dev); + if (err) { + mlx4_err(dev, "Failed to init fw, aborting.\n"); + goto err_mfunc; + } + + if (mlx4_is_master(dev)) { + if (!dev_cap) { + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + + if (!dev_cap) { + err = -ENOMEM; + goto err_fw; + } + + err = mlx4_QUERY_DEV_CAP(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + goto err_fw; + } + + if (mlx4_check_dev_cap(dev, dev_cap, nvfs)) + goto err_fw; + + if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, + existing_vfs); + + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); + dev->flags = dev_flags; + if (!SRIOV_VALID_STATE(dev->flags)) { + mlx4_err(dev, "Invalid SRIOV state\n"); + goto err_sriov; + } + err = mlx4_reset(dev); + if (err) { + mlx4_err(dev, "Failed to reset HCA, aborting.\n"); + goto err_sriov; + } + goto slave_start; + } + } else { + /* Legacy mode FW requires SRIOV to be enabled before + * doing QUERY_DEV_CAP, since max_eq's value is different if + * SRIOV is enabled. + */ + memset(dev_cap, 0, sizeof(*dev_cap)); + err = mlx4_QUERY_DEV_CAP(dev, dev_cap); + if (err) { + mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n"); + goto err_fw; + } + + if (mlx4_check_dev_cap(dev, dev_cap, nvfs)) + goto err_fw; + } + } + err = mlx4_init_hca(dev); if (err) { if (err == -EACCES) { /* Not primary Physical function * Running in slave mode */ - mlx4_cmd_cleanup(dev); + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); + /* We're not a PF */ + if (dev->flags & MLX4_FLAG_SRIOV) { + if (!existing_vfs) + pci_disable_sriov(pdev); + if (mlx4_is_master(dev)) + atomic_dec(&pf_loading); + dev->flags &= ~MLX4_FLAG_SRIOV; + } + if (!mlx4_is_slave(dev)) + mlx4_free_ownership(dev); dev->flags |= MLX4_FLAG_SLAVE; dev->flags &= ~MLX4_FLAG_MASTER; goto slave_start; } else - goto err_mfunc; + goto err_fw; + } + + if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs); + + if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); + dev->flags = dev_flags; + err = mlx4_cmd_init(dev); + if (err) { + /* Only VHCR is cleaned up, so could still + * send FW commands + */ + mlx4_err(dev, "Failed to init VHCR command interface, aborting\n"); + goto err_close; + } + } else { + dev->flags = dev_flags; + } + + if (!SRIOV_VALID_STATE(dev->flags)) { + mlx4_err(dev, "Invalid SRIOV state\n"); + goto err_close; + } } /* check if the device is functioning at its maximum possible speed. @@ -2540,12 +2893,15 @@ err_master_mfunc: err_close: mlx4_close_hca(dev); +err_fw: + mlx4_close_fw(dev); + err_mfunc: if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); err_cmd: - mlx4_cmd_cleanup(dev); + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); err_sriov: if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) @@ -2556,10 +2912,10 @@ err_sriov: kfree(priv->dev.dev_vfs); -err_free_own: if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + kfree(dev_cap); return err; } @@ -2787,15 +3143,17 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (mlx4_is_master(dev)) mlx4_multi_func_cleanup(dev); mlx4_close_hca(dev); + mlx4_close_fw(dev); if (mlx4_is_slave(dev)) mlx4_multi_func_cleanup(dev); - mlx4_cmd_cleanup(dev); + mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { mlx4_warn(dev, "Disabling SR-IOV\n"); pci_disable_sriov(pdev); + dev->flags &= ~MLX4_FLAG_SRIOV; dev->num_vfs = 0; } @@ -2956,10 +3314,11 @@ static int __init mlx4_verify_params(void) port_type_array[0] = true; } - if (mlx4_log_num_mgm_entry_size != -1 && - (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || - mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) { - pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-1 or %d..%d)\n", + if (mlx4_log_num_mgm_entry_size < -7 || + (mlx4_log_num_mgm_entry_size > 0 && + (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE || + mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) { + pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n", mlx4_log_num_mgm_entry_size, MLX4_MIN_MGM_LOG_ENTRY_SIZE, MLX4_MAX_MGM_LOG_ENTRY_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 872843179f4..a3867e7ef88 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -999,12 +999,27 @@ int mlx4_flow_attach(struct mlx4_dev *dev, } ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id); - if (ret == -ENOMEM) + if (ret == -ENOMEM) { mlx4_err_rule(dev, "mcg table is full. Fail to register network rule\n", rule); - else if (ret) - mlx4_err_rule(dev, "Fail to register network rule\n", rule); + } else if (ret) { + if (ret == -ENXIO) { + if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) + mlx4_err_rule(dev, + "DMFS is not enabled, " + "failed to register network rule.\n", + rule); + else + mlx4_err_rule(dev, + "Rule exceeds the dmfs_high_rate_mode limitations, " + "failed to register network rule.\n", + rule); + + } else { + mlx4_err_rule(dev, "Fail to register network rule.\n", rule); + } + } mlx4_free_cmd_mailbox(dev, mailbox); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index de10dbb2e6e..bdd4eea2247 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -43,6 +43,8 @@ #include <linux/timer.h> #include <linux/semaphore.h> #include <linux/workqueue.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> #include <linux/mlx4/device.h> #include <linux/mlx4/driver.h> @@ -243,6 +245,7 @@ struct mlx4_bitmap { u32 reserved_top; u32 mask; u32 avail; + u32 effective_len; spinlock_t lock; unsigned long *table; }; @@ -373,6 +376,14 @@ struct mlx4_srq_context { __be64 db_rec_addr; }; +struct mlx4_eq_tasklet { + struct list_head list; + struct list_head process_list; + struct tasklet_struct task; + /* lock on completion tasklet list */ + spinlock_t lock; +}; + struct mlx4_eq { struct mlx4_dev *dev; void __iomem *doorbell; @@ -383,6 +394,7 @@ struct mlx4_eq { int nent; struct mlx4_buf_list *page_list; struct mlx4_mtt mtt; + struct mlx4_eq_tasklet tasklet_ctx; }; struct mlx4_slave_eqe { @@ -606,6 +618,7 @@ struct mlx4_cmd { u8 use_events; u8 toggle; u8 comm_toggle; + u8 initialized; }; enum { @@ -669,8 +682,17 @@ struct mlx4_srq_table { struct mlx4_icm_table cmpt_table; }; +enum mlx4_qp_table_zones { + MLX4_QP_TABLE_ZONE_GENERAL, + MLX4_QP_TABLE_ZONE_RSS, + MLX4_QP_TABLE_ZONE_RAW_ETH, + MLX4_QP_TABLE_ZONE_NUM +}; + struct mlx4_qp_table { - struct mlx4_bitmap bitmap; + struct mlx4_bitmap *bitmap_gen; + struct mlx4_zone_allocator *zones; + u32 zones_uids[MLX4_QP_TABLE_ZONE_NUM]; u32 rdmarc_base; int rdmarc_shift; spinlock_t lock; @@ -872,7 +894,8 @@ extern struct workqueue_struct *mlx4_wq; u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap); void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr); -u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align); +u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, + int align, u32 skip_mask); void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt, int use_rr); u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap); @@ -947,13 +970,18 @@ int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base); + int *base, u8 flags); void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt); int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac); void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); @@ -1121,8 +1149,16 @@ int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave, int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); +enum { + MLX4_CMD_CLEANUP_STRUCT = 1UL << 0, + MLX4_CMD_CLEANUP_POOL = 1UL << 1, + MLX4_CMD_CLEANUP_HCR = 1UL << 2, + MLX4_CMD_CLEANUP_VHCR = 1UL << 3, + MLX4_CMD_CLEANUP_ALL = (MLX4_CMD_CLEANUP_VHCR << 1) - 1 +}; + int mlx4_cmd_init(struct mlx4_dev *dev); -void mlx4_cmd_cleanup(struct mlx4_dev *dev); +void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); int mlx4_multi_func_init(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); @@ -1132,6 +1168,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev); int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, unsigned long timeout); +void mlx4_cq_tasklet_cb(unsigned long data); void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn); void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type); @@ -1273,6 +1310,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_get_mgm_entry_size(struct mlx4_dev *dev); int mlx4_get_qp_per_mgm(struct mlx4_dev *dev); @@ -1313,4 +1355,72 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); int mlx4_config_mad_demux(struct mlx4_dev *dev); +enum mlx4_zone_flags { + MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0, + MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO = 1UL << 1, + MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO = 1UL << 2, + MLX4_ZONE_USE_RR = 1UL << 3, +}; + +enum mlx4_zone_alloc_flags { + /* No two objects could overlap between zones. UID + * could be left unused. If this flag is given and + * two overlapped zones are used, an object will be free'd + * from the smallest possible matching zone. + */ + MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP = 1UL << 0, +}; + +struct mlx4_zone_allocator; + +/* Create a new zone allocator */ +struct mlx4_zone_allocator *mlx4_zone_allocator_create(enum mlx4_zone_alloc_flags flags); + +/* Attach a mlx4_bitmap <bitmap> of priority <priority> to the zone allocator + * <zone_alloc>. Allocating an object from this zone adds an offset <offset>. + * Similarly, when searching for an object to free, this offset it taken into + * account. The use_rr mlx4_ib parameter for allocating objects from this <bitmap> + * is given through the MLX4_ZONE_USE_RR flag in <flags>. + * When an allocation fails, <zone_alloc> tries to allocate from other zones + * according to the policy set by <flags>. <puid> is the unique identifier + * received to this zone. + */ +int mlx4_zone_add_one(struct mlx4_zone_allocator *zone_alloc, + struct mlx4_bitmap *bitmap, + u32 flags, + int priority, + int offset, + u32 *puid); + +/* Remove bitmap indicated by <uid> from <zone_alloc> */ +int mlx4_zone_remove_one(struct mlx4_zone_allocator *zone_alloc, u32 uid); + +/* Delete the zone allocator <zone_alloc. This function doesn't destroy + * the attached bitmaps. + */ +void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc); + +/* Allocate <count> objects with align <align> and skip_mask <skip_mask> + * from the mlx4_bitmap whose uid is <uid>. The bitmap which we actually + * allocated from is returned in <puid>. If the allocation fails, a negative + * number is returned. Otherwise, the offset of the first object is returned. + */ +u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count, + int align, u32 skip_mask, u32 *puid); + +/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator + * <zones>. + */ +u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, + u32 uid, u32 obj, u32 count); + +/* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of + * specifying the uid when freeing an object, zone allocator could figure it by + * itself. Other parameters are similar to mlx4_zone_free. + */ +u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count); + +/* Returns a pointer to mlx4_bitmap that was attached to <zones> with <uid> */ +struct mlx4_bitmap *mlx4_zone_get_bitmap(struct mlx4_zone_allocator *zones, u32 uid); + #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 8fef65840b3..944a112dff3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -326,6 +326,7 @@ struct mlx4_en_rx_ring { #endif unsigned long csum_ok; unsigned long csum_none; + unsigned long csum_complete; int hwtstamp_rx_filter; cpumask_var_t affinity_mask; }; @@ -375,7 +376,6 @@ struct mlx4_en_port_profile { }; struct mlx4_en_profile { - int rss_xor; int udp_rss; u8 rss_mask; u32 active_ports; @@ -421,10 +421,16 @@ struct mlx4_en_rss_map { enum mlx4_qp_state indir_state; }; +enum mlx4_en_port_flag { + MLX4_EN_PORT_ANC = 1<<0, /* Auto-negotiation complete */ + MLX4_EN_PORT_ANE = 1<<1, /* Auto-negotiation enabled */ +}; + struct mlx4_en_port_state { int link_state; int link_speed; - int transciver; + int transceiver; + u32 flags; }; struct mlx4_en_pkt_stats { @@ -443,6 +449,7 @@ struct mlx4_en_port_stats { unsigned long rx_alloc_failed; unsigned long rx_chksum_good; unsigned long rx_chksum_none; + unsigned long rx_chksum_complete; unsigned long tx_chksum_offload; #define NUM_PORT_STATS 9 }; @@ -475,7 +482,6 @@ struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; u16 frag_stride; - u16 frag_align; }; #ifdef CONFIG_MLX4_EN_DCB @@ -502,7 +508,8 @@ enum { MLX4_EN_FLAG_ENABLE_HW_LOOPBACK = (1 << 2), /* whether we need to drop packets that hardware loopback-ed */ MLX4_EN_FLAG_RX_FILTER_NEEDED = (1 << 3), - MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4) + MLX4_EN_FLAG_FORCE_PROMISC = (1 << 4), + MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP = (1 << 5), }; #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE) @@ -610,6 +617,8 @@ struct mlx4_en_priv { __be16 vxlan_port; u32 pflags; + u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; + u8 rss_hash_fn; }; enum mlx4_en_wol { @@ -769,7 +778,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, - int qpn, u32 size, u16 stride, + u32 size, u16 stride, int node, int queue_index); void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring); @@ -829,6 +838,13 @@ void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv); void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf); void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); +#define DEV_FEATURE_CHANGED(dev, new_features, feature) \ + ((dev->features & feature) ^ (new_features & feature)) + +int mlx4_en_reset_config(struct net_device *dev, + struct hwtstamp_config ts_config, + netdev_features_t new_features); + /* * Functions for time stamping */ @@ -838,9 +854,6 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, u64 timestamp); void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev); void mlx4_en_remove_timestamp(struct mlx4_en_dev *mdev); -int mlx4_en_timestamp_config(struct net_device *dev, - int tx_type, - int rx_filter); /* Globals */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 193a6adb5d0..d6f549685c0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -130,10 +130,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) err_out_free: for (i = 0; i <= buddy->max_order; ++i) - if (buddy->bits[i] && is_vmalloc_addr(buddy->bits[i])) - vfree(buddy->bits[i]); - else - kfree(buddy->bits[i]); + kvfree(buddy->bits[i]); err_out: kfree(buddy->bits); @@ -147,10 +144,7 @@ static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy) int i; for (i = 0; i <= buddy->max_order; ++i) - if (is_vmalloc_addr(buddy->bits[i])) - vfree(buddy->bits[i]); - else - kfree(buddy->bits[i]); + kvfree(buddy->bits[i]); kfree(buddy->bits); kfree(buddy->num_free); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 94eeb2c7d7e..30eb1ead0fe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -1311,3 +1311,159 @@ int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id, return 0; } EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave); + +/* Cable Module Info */ +#define MODULE_INFO_MAX_READ 48 + +#define I2C_ADDR_LOW 0x50 +#define I2C_ADDR_HIGH 0x51 +#define I2C_PAGE_SIZE 256 + +/* Module Info Data */ +struct mlx4_cable_info { + u8 i2c_addr; + u8 page_num; + __be16 dev_mem_address; + __be16 reserved1; + __be16 size; + __be32 reserved2[2]; + u8 data[MODULE_INFO_MAX_READ]; +}; + +enum cable_info_err { + CABLE_INF_INV_PORT = 0x1, + CABLE_INF_OP_NOSUP = 0x2, + CABLE_INF_NOT_CONN = 0x3, + CABLE_INF_NO_EEPRM = 0x4, + CABLE_INF_PAGE_ERR = 0x5, + CABLE_INF_INV_ADDR = 0x6, + CABLE_INF_I2C_ADDR = 0x7, + CABLE_INF_QSFP_VIO = 0x8, + CABLE_INF_I2C_BUSY = 0x9, +}; + +#define MAD_STATUS_2_CABLE_ERR(mad_status) ((mad_status >> 8) & 0xFF) + +static inline const char *cable_info_mad_err_str(u16 mad_status) +{ + u8 err = MAD_STATUS_2_CABLE_ERR(mad_status); + + switch (err) { + case CABLE_INF_INV_PORT: + return "invalid port selected"; + case CABLE_INF_OP_NOSUP: + return "operation not supported for this port (the port is of type CX4 or internal)"; + case CABLE_INF_NOT_CONN: + return "cable is not connected"; + case CABLE_INF_NO_EEPRM: + return "the connected cable has no EPROM (passive copper cable)"; + case CABLE_INF_PAGE_ERR: + return "page number is greater than 15"; + case CABLE_INF_INV_ADDR: + return "invalid device_address or size (that is, size equals 0 or address+size is greater than 256)"; + case CABLE_INF_I2C_ADDR: + return "invalid I2C slave address"; + case CABLE_INF_QSFP_VIO: + return "at least one cable violates the QSFP specification and ignores the modsel signal"; + case CABLE_INF_I2C_BUSY: + return "I2C bus is constantly busy"; + } + return "Unknown Error"; +} + +/** + * mlx4_get_module_info - Read cable module eeprom data + * @dev: mlx4_dev. + * @port: port number. + * @offset: byte offset in eeprom to start reading data from. + * @size: num of bytes to read. + * @data: output buffer to put the requested data into. + * + * Reads cable module eeprom data, puts the outcome data into + * data pointer paramer. + * Returns num of read bytes on success or a negative error + * code. + */ +int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, + u16 offset, u16 size, u8 *data) +{ + struct mlx4_cmd_mailbox *inbox, *outbox; + struct mlx4_mad_ifc *inmad, *outmad; + struct mlx4_cable_info *cable_info; + u16 i2c_addr; + int ret; + + if (size > MODULE_INFO_MAX_READ) + size = MODULE_INFO_MAX_READ; + + inbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inbox)) + return PTR_ERR(inbox); + + outbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outbox)) { + mlx4_free_cmd_mailbox(dev, inbox); + return PTR_ERR(outbox); + } + + inmad = (struct mlx4_mad_ifc *)(inbox->buf); + outmad = (struct mlx4_mad_ifc *)(outbox->buf); + + inmad->method = 0x1; /* Get */ + inmad->class_version = 0x1; + inmad->mgmt_class = 0x1; + inmad->base_version = 0x1; + inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */ + + if (offset < I2C_PAGE_SIZE && offset + size > I2C_PAGE_SIZE) + /* Cross pages reads are not allowed + * read until offset 256 in low page + */ + size -= offset + size - I2C_PAGE_SIZE; + + i2c_addr = I2C_ADDR_LOW; + if (offset >= I2C_PAGE_SIZE) { + /* Reset offset to high page */ + i2c_addr = I2C_ADDR_HIGH; + offset -= I2C_PAGE_SIZE; + } + + cable_info = (struct mlx4_cable_info *)inmad->data; + cable_info->dev_mem_address = cpu_to_be16(offset); + cable_info->page_num = 0; + cable_info->i2c_addr = i2c_addr; + cable_info->size = cpu_to_be16(size); + + ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (ret) + goto out; + + if (be16_to_cpu(outmad->status)) { + /* Mad returned with bad status */ + ret = be16_to_cpu(outmad->status); + mlx4_warn(dev, + "MLX4_CMD_MAD_IFC Get Module info attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n", + 0xFF60, port, i2c_addr, offset, size, + ret, cable_info_mad_err_str(ret)); + + if (i2c_addr == I2C_ADDR_HIGH && + MAD_STATUS_2_CABLE_ERR(ret) == CABLE_INF_I2C_ADDR) + /* Some SFP cables do not support i2c slave + * address 0x51 (high page), abort silently. + */ + ret = 0; + else + ret = -ret; + goto out; + } + cable_info = (struct mlx4_cable_info *)outmad->data; + memcpy(data, cable_info->data, size); + ret = size; +out: + mlx4_free_cmd_mailbox(dev, inbox); + mlx4_free_cmd_mailbox(dev, outbox); + return ret; +} +EXPORT_SYMBOL(mlx4_get_module_info); diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c index 14089d9e166..2bf437aafc5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/profile.c +++ b/drivers/net/ethernet/mellanox/mlx4/profile.c @@ -126,8 +126,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, profile[MLX4_RES_AUXC].num = request->num_qp; profile[MLX4_RES_SRQ].num = request->num_srq; profile[MLX4_RES_CQ].num = request->num_cq; - profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? - dev->phys_caps.num_phys_eqs : + profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? dev->phys_caps.num_phys_eqs : min_t(unsigned, dev_cap->max_eqs, MAX_MSIX); profile[MLX4_RES_DMPT].num = request->num_mpt; profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; @@ -216,10 +215,18 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, init_hca->log_num_cqs = profile[i].log_num; break; case MLX4_RES_EQ: - dev->caps.num_eqs = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs, - MAX_MSIX)); - init_hca->eqc_base = profile[i].start; - init_hca->log_num_eqs = ilog2(dev->caps.num_eqs); + if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) { + init_hca->log_num_eqs = 0x1f; + init_hca->eqc_base = profile[i].start; + init_hca->num_sys_eqs = dev_cap->num_sys_eqs; + } else { + dev->caps.num_eqs = roundup_pow_of_two( + min_t(unsigned, + dev_cap->max_eqs, + MAX_MSIX)); + init_hca->eqc_base = profile[i].start; + init_hca->log_num_eqs = ilog2(dev->caps.num_eqs); + } break; case MLX4_RES_DMPT: dev->caps.num_mpts = profile[i].num; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 2301365c79c..1586ecce13c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -42,6 +42,10 @@ #include "mlx4.h" #include "icm.h" +/* QP to support BF should have bits 6,7 cleared */ +#define MLX4_BF_QP_SKIP_MASK 0xc0 +#define MLX4_MAX_BF_QP_RANGE 0x40 + void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) { struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; @@ -207,26 +211,45 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, EXPORT_SYMBOL_GPL(mlx4_qp_modify); int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, - int *base) + int *base, u8 flags) { + u32 uid; + int bf_qp = !!(flags & (u8)MLX4_RESERVE_ETH_BF_QP); + struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_qp_table *qp_table = &priv->qp_table; - *base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align); + if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp) + return -ENOMEM; + + uid = MLX4_QP_TABLE_ZONE_GENERAL; + if (flags & (u8)MLX4_RESERVE_A0_QP) { + if (bf_qp) + uid = MLX4_QP_TABLE_ZONE_RAW_ETH; + else + uid = MLX4_QP_TABLE_ZONE_RSS; + } + + *base = mlx4_zone_alloc_entries(qp_table->zones, uid, cnt, align, + bf_qp ? MLX4_BF_QP_SKIP_MASK : 0, NULL); if (*base == -1) return -ENOMEM; return 0; } -int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base) +int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, + int *base, u8 flags) { u64 in_param = 0; u64 out_param; int err; + /* Turn off all unsupported QP allocation flags */ + flags &= dev->caps.alloc_res_qp_mask; + if (mlx4_is_mfunc(dev)) { - set_param_l(&in_param, cnt); + set_param_l(&in_param, (((u32)flags) << 24) | (u32)cnt); set_param_h(&in_param, align); err = mlx4_cmd_imm(dev, in_param, &out_param, RES_QP, RES_OP_RESERVE, @@ -238,7 +261,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base) *base = get_param_l(&out_param); return 0; } - return __mlx4_qp_reserve_range(dev, cnt, align, base); + return __mlx4_qp_reserve_range(dev, cnt, align, base, flags); } EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range); @@ -249,7 +272,7 @@ void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) if (mlx4_is_qp_reserved(dev, (u32) base_qpn)) return; - mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, MLX4_USE_RR); + mlx4_zone_free_entries_unique(qp_table->zones, base_qpn, cnt); } void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt) @@ -459,28 +482,261 @@ static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn) MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); } +#define MLX4_QP_TABLE_RSS_ETH_PRIORITY 2 +#define MLX4_QP_TABLE_RAW_ETH_PRIORITY 1 +#define MLX4_QP_TABLE_RAW_ETH_SIZE 256 + +static int mlx4_create_zones(struct mlx4_dev *dev, + u32 reserved_bottom_general, + u32 reserved_top_general, + u32 reserved_bottom_rss, + u32 start_offset_rss, + u32 max_table_offset) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + struct mlx4_bitmap (*bitmap)[MLX4_QP_TABLE_ZONE_NUM] = NULL; + int bitmap_initialized = 0; + u32 last_offset; + int k; + int err; + + qp_table->zones = mlx4_zone_allocator_create(MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP); + + if (NULL == qp_table->zones) + return -ENOMEM; + + bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL); + + if (NULL == bitmap) { + err = -ENOMEM; + goto free_zone; + } + + err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_GENERAL, dev->caps.num_qps, + (1 << 23) - 1, reserved_bottom_general, + reserved_top_general); + + if (err) + goto free_bitmap; + + ++bitmap_initialized; + + err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_GENERAL, + MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO | + MLX4_ZONE_USE_RR, 0, + 0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_GENERAL); + + if (err) + goto free_bitmap; + + err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_RSS, + reserved_bottom_rss, + reserved_bottom_rss - 1, + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], + reserved_bottom_rss - start_offset_rss); + + if (err) + goto free_bitmap; + + ++bitmap_initialized; + + err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_RSS, + MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO | + MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO | + MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RSS_ETH_PRIORITY, + 0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_RSS); + + if (err) + goto free_bitmap; + + last_offset = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; + /* We have a single zone for the A0 steering QPs area of the FW. This area + * needs to be split into subareas. One set of subareas is for RSS QPs + * (in which qp number bits 6 and/or 7 are set); the other set of subareas + * is for RAW_ETH QPs, which require that both bits 6 and 7 are zero. + * Currently, the values returned by the FW (A0 steering area starting qp number + * and A0 steering area size) are such that there are only two subareas -- one + * for RSS and one for RAW_ETH. + */ + for (k = MLX4_QP_TABLE_ZONE_RSS + 1; k < sizeof(*bitmap)/sizeof((*bitmap)[0]); + k++) { + int size; + u32 offset = start_offset_rss; + u32 bf_mask; + u32 requested_size; + + /* Assuming MLX4_BF_QP_SKIP_MASK is consecutive ones, this calculates + * a mask of all LSB bits set until (and not including) the first + * set bit of MLX4_BF_QP_SKIP_MASK. For example, if MLX4_BF_QP_SKIP_MASK + * is 0xc0, bf_mask will be 0x3f. + */ + bf_mask = (MLX4_BF_QP_SKIP_MASK & ~(MLX4_BF_QP_SKIP_MASK - 1)) - 1; + requested_size = min((u32)MLX4_QP_TABLE_RAW_ETH_SIZE, bf_mask + 1); + + if (((last_offset & MLX4_BF_QP_SKIP_MASK) && + ((int)(max_table_offset - last_offset)) >= + roundup_pow_of_two(MLX4_BF_QP_SKIP_MASK)) || + (!(last_offset & MLX4_BF_QP_SKIP_MASK) && + !((last_offset + requested_size - 1) & + MLX4_BF_QP_SKIP_MASK))) + size = requested_size; + else { + u32 candidate_offset = + (last_offset | MLX4_BF_QP_SKIP_MASK | bf_mask) + 1; + + if (last_offset & MLX4_BF_QP_SKIP_MASK) + last_offset = candidate_offset; + + /* From this point, the BF bits are 0 */ + + if (last_offset > max_table_offset) { + /* need to skip */ + size = -1; + } else { + size = min3(max_table_offset - last_offset, + bf_mask - (last_offset & bf_mask), + requested_size); + if (size < requested_size) { + int candidate_size; + + candidate_size = min3( + max_table_offset - candidate_offset, + bf_mask - (last_offset & bf_mask), + requested_size); + + /* We will not take this path if last_offset was + * already set above to candidate_offset + */ + if (candidate_size > size) { + last_offset = candidate_offset; + size = candidate_size; + } + } + } + } + + if (size > 0) { + /* mlx4_bitmap_alloc_range will find a contiguous range of "size" + * QPs in which both bits 6 and 7 are zero, because we pass it the + * MLX4_BF_SKIP_MASK). + */ + offset = mlx4_bitmap_alloc_range( + *bitmap + MLX4_QP_TABLE_ZONE_RSS, + size, 1, + MLX4_BF_QP_SKIP_MASK); + + if (offset == (u32)-1) { + err = -ENOMEM; + break; + } + + last_offset = offset + size; + + err = mlx4_bitmap_init(*bitmap + k, roundup_pow_of_two(size), + roundup_pow_of_two(size) - 1, 0, + roundup_pow_of_two(size) - size); + } else { + /* Add an empty bitmap, we'll allocate from different zones (since + * at least one is reserved) + */ + err = mlx4_bitmap_init(*bitmap + k, 1, + MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0, + 0); + mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); + } + + if (err) + break; + + ++bitmap_initialized; + + err = mlx4_zone_add_one(qp_table->zones, *bitmap + k, + MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO | + MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO | + MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RAW_ETH_PRIORITY, + offset, qp_table->zones_uids + k); + + if (err) + break; + } + + if (err) + goto free_bitmap; + + qp_table->bitmap_gen = *bitmap; + + return err; + +free_bitmap: + for (k = 0; k < bitmap_initialized; k++) + mlx4_bitmap_cleanup(*bitmap + k); + kfree(bitmap); +free_zone: + mlx4_zone_allocator_destroy(qp_table->zones); + return err; +} + +static void mlx4_cleanup_qp_zones(struct mlx4_dev *dev) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + + if (qp_table->zones) { + int i; + + for (i = 0; + i < sizeof(qp_table->zones_uids)/sizeof(qp_table->zones_uids[0]); + i++) { + struct mlx4_bitmap *bitmap = + mlx4_zone_get_bitmap(qp_table->zones, + qp_table->zones_uids[i]); + + mlx4_zone_remove_one(qp_table->zones, qp_table->zones_uids[i]); + if (NULL == bitmap) + continue; + + mlx4_bitmap_cleanup(bitmap); + } + mlx4_zone_allocator_destroy(qp_table->zones); + kfree(qp_table->bitmap_gen); + qp_table->bitmap_gen = NULL; + qp_table->zones = NULL; + } +} + int mlx4_init_qp_table(struct mlx4_dev *dev) { struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; int err; int reserved_from_top = 0; + int reserved_from_bot; int k; + int fixed_reserved_from_bot_rv = 0; + int bottom_reserved_for_rss_bitmap; + u32 max_table_offset = dev->caps.dmfs_high_rate_qpn_base + + dev->caps.dmfs_high_rate_qpn_range; spin_lock_init(&qp_table->lock); INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); if (mlx4_is_slave(dev)) return 0; - /* - * We reserve 2 extra QPs per port for the special QPs. The + /* We reserve 2 extra QPs per port for the special QPs. The * block of special QPs must be aligned to a multiple of 8, so * round up. * * We also reserve the MSB of the 24-bit QP number to indicate * that a QP is an XRC QP. */ - dev->phys_caps.base_sqpn = - ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); + for (k = 0; k <= MLX4_QP_REGION_BOTTOM; k++) + fixed_reserved_from_bot_rv += dev->caps.reserved_qps_cnt[k]; + + if (fixed_reserved_from_bot_rv < max_table_offset) + fixed_reserved_from_bot_rv = max_table_offset; + + /* We reserve at least 1 extra for bitmaps that we don't have enough space for*/ + bottom_reserved_for_rss_bitmap = + roundup_pow_of_two(fixed_reserved_from_bot_rv + 1); + dev->phys_caps.base_sqpn = ALIGN(bottom_reserved_for_rss_bitmap, 8); { int sort[MLX4_NUM_QP_REGION]; @@ -490,8 +746,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) for (i = 1; i < MLX4_NUM_QP_REGION; ++i) sort[i] = i; - for (i = MLX4_NUM_QP_REGION; i > 0; --i) { - for (j = 2; j < i; ++j) { + for (i = MLX4_NUM_QP_REGION; i > MLX4_QP_REGION_BOTTOM; --i) { + for (j = MLX4_QP_REGION_BOTTOM + 2; j < i; ++j) { if (dev->caps.reserved_qps_cnt[sort[j]] > dev->caps.reserved_qps_cnt[sort[j - 1]]) { tmp = sort[j]; @@ -501,13 +757,12 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) } } - for (i = 1; i < MLX4_NUM_QP_REGION; ++i) { + for (i = MLX4_QP_REGION_BOTTOM + 1; i < MLX4_NUM_QP_REGION; ++i) { last_base -= dev->caps.reserved_qps_cnt[sort[i]]; dev->caps.reserved_qps_base[sort[i]] = last_base; reserved_from_top += dev->caps.reserved_qps_cnt[sort[i]]; } - } /* Reserve 8 real SQPs in both native and SRIOV modes. @@ -520,10 +775,17 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * b. All the proxy SQPs (8 per function) * c. All the tunnel QPs (8 per function) */ + reserved_from_bot = mlx4_num_reserved_sqps(dev); + if (reserved_from_bot + reserved_from_top > dev->caps.num_qps) { + mlx4_err(dev, "Number of reserved QPs is higher than number of QPs\n"); + return -EINVAL; + } + + err = mlx4_create_zones(dev, reserved_from_bot, reserved_from_bot, + bottom_reserved_for_rss_bitmap, + fixed_reserved_from_bot_rv, + max_table_offset); - err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, mlx4_num_reserved_sqps(dev), - reserved_from_top); if (err) return err; @@ -559,7 +821,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn); if (err) goto err_mem; - return 0; + + return err; err_mem: kfree(dev->caps.qp0_tunnel); @@ -568,6 +831,7 @@ err_mem: kfree(dev->caps.qp1_proxy); dev->caps.qp0_tunnel = dev->caps.qp0_proxy = dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + mlx4_cleanup_qp_zones(dev); return err; } @@ -577,7 +841,8 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev) return; mlx4_CONF_SPECIAL_QP(dev, 0); - mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap); + + mlx4_cleanup_qp_zones(dev); } int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index cd5cf6d957c..4efbd1eca61 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1543,16 +1543,21 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, int align; int base; int qpn; + u8 flags; switch (op) { case RES_OP_RESERVE: count = get_param_l(&in_param) & 0xffffff; + /* Turn off all unsupported QP allocation flags that the + * slave tries to set. + */ + flags = (get_param_l(&in_param) >> 24) & dev->caps.alloc_res_qp_mask; align = get_param_h(&in_param); err = mlx4_grant_resource(dev, slave, RES_QP, count, 0); if (err) return err; - err = __mlx4_qp_reserve_range(dev, count, align, &base); + err = __mlx4_qp_reserve_range(dev, count, align, &base, flags); if (err) { mlx4_release_resource(dev, slave, RES_QP, count, 0); return err; @@ -2872,6 +2877,23 @@ out_add: return err; } +int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + u8 get = vhcr->op_modifier; + + if (get != 1) + return -EPERM; + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + + return err; +} + static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, int len, struct res_mtt **res) { |