| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563 |
- // SPDX-License-Identifier: GPL-2.0-only
- /****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2022 Advanced Micro Devices, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
- */
- #include "tc_counters.h"
- #include "tc_encap_actions.h"
- #include "mae_counter_format.h"
- #include "mae.h"
- #include "rx_common.h"
- /* Counter-management hashtables */
- static const struct rhashtable_params efx_tc_counter_id_ht_params = {
- .key_len = offsetof(struct efx_tc_counter_index, linkage),
- .key_offset = 0,
- .head_offset = offsetof(struct efx_tc_counter_index, linkage),
- };
- static const struct rhashtable_params efx_tc_counter_ht_params = {
- .key_len = offsetof(struct efx_tc_counter, linkage),
- .key_offset = 0,
- .head_offset = offsetof(struct efx_tc_counter, linkage),
- };
- static void efx_tc_counter_free(void *ptr, void *__unused)
- {
- struct efx_tc_counter *cnt = ptr;
- WARN_ON(!list_empty(&cnt->users));
- /* We'd like to synchronize_rcu() here, but unfortunately we aren't
- * removing the element from the hashtable (it's not clear that's a
- * safe thing to do in an rhashtable_free_and_destroy free_fn), so
- * threads could still be obtaining new pointers to *cnt if they can
- * race against this function at all.
- */
- flush_work(&cnt->work);
- EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
- kfree(cnt);
- }
- static void efx_tc_counter_id_free(void *ptr, void *__unused)
- {
- struct efx_tc_counter_index *ctr = ptr;
- WARN_ON(refcount_read(&ctr->ref));
- kfree(ctr);
- }
- int efx_tc_init_counters(struct efx_nic *efx)
- {
- int rc;
- rc = rhashtable_init(&efx->tc->counter_id_ht, &efx_tc_counter_id_ht_params);
- if (rc < 0)
- goto fail_counter_id_ht;
- rc = rhashtable_init(&efx->tc->counter_ht, &efx_tc_counter_ht_params);
- if (rc < 0)
- goto fail_counter_ht;
- return 0;
- fail_counter_ht:
- rhashtable_destroy(&efx->tc->counter_id_ht);
- fail_counter_id_ht:
- return rc;
- }
- /* Only call this in init failure teardown.
- * Normal exit should fini instead as there may be entries in the table.
- */
- void efx_tc_destroy_counters(struct efx_nic *efx)
- {
- rhashtable_destroy(&efx->tc->counter_ht);
- rhashtable_destroy(&efx->tc->counter_id_ht);
- }
- void efx_tc_fini_counters(struct efx_nic *efx)
- {
- rhashtable_free_and_destroy(&efx->tc->counter_id_ht, efx_tc_counter_id_free, NULL);
- rhashtable_free_and_destroy(&efx->tc->counter_ht, efx_tc_counter_free, NULL);
- }
- static void efx_tc_counter_work(struct work_struct *work)
- {
- struct efx_tc_counter *cnt = container_of(work, struct efx_tc_counter, work);
- struct efx_tc_encap_action *encap;
- struct efx_tc_action_set *act;
- unsigned long touched;
- struct neighbour *n;
- spin_lock_bh(&cnt->lock);
- touched = READ_ONCE(cnt->touched);
- list_for_each_entry(act, &cnt->users, count_user) {
- encap = act->encap_md;
- if (!encap)
- continue;
- if (!encap->neigh) /* can't happen */
- continue;
- if (time_after_eq(encap->neigh->used, touched))
- continue;
- encap->neigh->used = touched;
- /* We have passed traffic using this ARP entry, so
- * indicate to the ARP cache that it's still active
- */
- if (encap->neigh->dst_ip)
- n = neigh_lookup(&arp_tbl, &encap->neigh->dst_ip,
- encap->neigh->egdev);
- else
- #if IS_ENABLED(CONFIG_IPV6)
- n = neigh_lookup(ipv6_stub->nd_tbl,
- &encap->neigh->dst_ip6,
- encap->neigh->egdev);
- #else
- n = NULL;
- #endif
- if (!n)
- continue;
- neigh_event_send(n, NULL);
- neigh_release(n);
- }
- spin_unlock_bh(&cnt->lock);
- }
- /* Counter allocation */
- struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx,
- int type)
- {
- struct efx_tc_counter *cnt;
- int rc, rc2;
- cnt = kzalloc_obj(*cnt, GFP_USER);
- if (!cnt)
- return ERR_PTR(-ENOMEM);
- spin_lock_init(&cnt->lock);
- INIT_WORK(&cnt->work, efx_tc_counter_work);
- cnt->touched = jiffies;
- cnt->type = type;
- rc = efx_mae_allocate_counter(efx, cnt);
- if (rc)
- goto fail1;
- INIT_LIST_HEAD(&cnt->users);
- rc = rhashtable_insert_fast(&efx->tc->counter_ht, &cnt->linkage,
- efx_tc_counter_ht_params);
- if (rc)
- goto fail2;
- return cnt;
- fail2:
- /* If we get here, it implies that we couldn't insert into the table,
- * which in turn probably means that the fw_id was already taken.
- * In that case, it's unclear whether we really 'own' the fw_id; but
- * the firmware seemed to think we did, so it's proper to free it.
- */
- rc2 = efx_mae_free_counter(efx, cnt);
- if (rc2)
- netif_warn(efx, hw, efx->net_dev,
- "Failed to free MAE counter %u, rc %d\n",
- cnt->fw_id, rc2);
- fail1:
- kfree(cnt);
- return ERR_PTR(rc > 0 ? -EIO : rc);
- }
- void efx_tc_flower_release_counter(struct efx_nic *efx,
- struct efx_tc_counter *cnt)
- {
- int rc;
- rhashtable_remove_fast(&efx->tc->counter_ht, &cnt->linkage,
- efx_tc_counter_ht_params);
- rc = efx_mae_free_counter(efx, cnt);
- if (rc)
- netif_warn(efx, hw, efx->net_dev,
- "Failed to free MAE counter %u, rc %d\n",
- cnt->fw_id, rc);
- WARN_ON(!list_empty(&cnt->users));
- /* This doesn't protect counter updates coming in arbitrarily long
- * after we deleted the counter. The RCU just ensures that we won't
- * free the counter while another thread has a pointer to it.
- * Ensuring we don't update the wrong counter if the ID gets re-used
- * is handled by the generation count.
- */
- synchronize_rcu();
- flush_work(&cnt->work);
- EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock));
- kfree(cnt);
- }
- static struct efx_tc_counter *efx_tc_flower_find_counter_by_fw_id(
- struct efx_nic *efx, int type, u32 fw_id)
- {
- struct efx_tc_counter key = {};
- key.fw_id = fw_id;
- key.type = type;
- return rhashtable_lookup_fast(&efx->tc->counter_ht, &key,
- efx_tc_counter_ht_params);
- }
- /* TC cookie to counter mapping */
- void efx_tc_flower_put_counter_index(struct efx_nic *efx,
- struct efx_tc_counter_index *ctr)
- {
- if (!refcount_dec_and_test(&ctr->ref))
- return; /* still in use */
- rhashtable_remove_fast(&efx->tc->counter_id_ht, &ctr->linkage,
- efx_tc_counter_id_ht_params);
- efx_tc_flower_release_counter(efx, ctr->cnt);
- kfree(ctr);
- }
- struct efx_tc_counter_index *efx_tc_flower_get_counter_index(
- struct efx_nic *efx, unsigned long cookie,
- enum efx_tc_counter_type type)
- {
- struct efx_tc_counter_index *ctr, *old;
- struct efx_tc_counter *cnt;
- ctr = kzalloc_obj(*ctr, GFP_USER);
- if (!ctr)
- return ERR_PTR(-ENOMEM);
- ctr->cookie = cookie;
- old = rhashtable_lookup_get_insert_fast(&efx->tc->counter_id_ht,
- &ctr->linkage,
- efx_tc_counter_id_ht_params);
- if (old) {
- /* don't need our new entry */
- kfree(ctr);
- if (IS_ERR(old)) /* oh dear, it's actually an error */
- return ERR_CAST(old);
- if (!refcount_inc_not_zero(&old->ref))
- return ERR_PTR(-EAGAIN);
- /* existing entry found */
- ctr = old;
- } else {
- cnt = efx_tc_flower_allocate_counter(efx, type);
- if (IS_ERR(cnt)) {
- rhashtable_remove_fast(&efx->tc->counter_id_ht,
- &ctr->linkage,
- efx_tc_counter_id_ht_params);
- kfree(ctr);
- return ERR_CAST(cnt);
- }
- ctr->cnt = cnt;
- refcount_set(&ctr->ref, 1);
- }
- return ctr;
- }
- struct efx_tc_counter_index *efx_tc_flower_find_counter_index(
- struct efx_nic *efx, unsigned long cookie)
- {
- struct efx_tc_counter_index key = {};
- key.cookie = cookie;
- return rhashtable_lookup_fast(&efx->tc->counter_id_ht, &key,
- efx_tc_counter_id_ht_params);
- }
- /* TC Channel. Counter updates are delivered on this channel's RXQ. */
- static void efx_tc_handle_no_channel(struct efx_nic *efx)
- {
- netif_warn(efx, drv, efx->net_dev,
- "MAE counters require MSI-X and 1 additional interrupt vector.\n");
- }
- static int efx_tc_probe_channel(struct efx_channel *channel)
- {
- struct efx_rx_queue *rx_queue = &channel->rx_queue;
- channel->irq_moderation_us = 0;
- rx_queue->core_index = 0;
- INIT_WORK(&rx_queue->grant_work, efx_mae_counters_grant_credits);
- return 0;
- }
- static int efx_tc_start_channel(struct efx_channel *channel)
- {
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
- struct efx_nic *efx = channel->efx;
- return efx_mae_start_counters(efx, rx_queue);
- }
- static void efx_tc_stop_channel(struct efx_channel *channel)
- {
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
- struct efx_nic *efx = channel->efx;
- int rc;
- rc = efx_mae_stop_counters(efx, rx_queue);
- if (rc)
- netif_warn(efx, drv, efx->net_dev,
- "Failed to stop MAE counters streaming, rc=%d.\n",
- rc);
- rx_queue->grant_credits = false;
- flush_work(&rx_queue->grant_work);
- }
- static void efx_tc_remove_channel(struct efx_channel *channel)
- {
- }
- static void efx_tc_get_channel_name(struct efx_channel *channel,
- char *buf, size_t len)
- {
- snprintf(buf, len, "%s-mae", channel->efx->name);
- }
- static void efx_tc_counter_update(struct efx_nic *efx,
- enum efx_tc_counter_type counter_type,
- u32 counter_idx, u64 packets, u64 bytes,
- u32 mark)
- {
- struct efx_tc_counter *cnt;
- rcu_read_lock(); /* Protect against deletion of 'cnt' */
- cnt = efx_tc_flower_find_counter_by_fw_id(efx, counter_type, counter_idx);
- if (!cnt) {
- /* This can legitimately happen when a counter is removed,
- * with updates for the counter still in-flight; however this
- * should be an infrequent occurrence.
- */
- if (net_ratelimit())
- netif_dbg(efx, drv, efx->net_dev,
- "Got update for unwanted MAE counter %u type %u\n",
- counter_idx, counter_type);
- goto out;
- }
- spin_lock_bh(&cnt->lock);
- if ((s32)mark - (s32)cnt->gen < 0) {
- /* This counter update packet is from before the counter was
- * allocated; thus it must be for a previous counter with
- * the same ID that has since been freed, and it should be
- * ignored.
- */
- } else {
- /* Update latest seen generation count. This ensures that
- * even a long-lived counter won't start getting ignored if
- * the generation count wraps around, unless it somehow
- * manages to go 1<<31 generations without an update.
- */
- cnt->gen = mark;
- /* update counter values */
- cnt->packets += packets;
- cnt->bytes += bytes;
- cnt->touched = jiffies;
- }
- spin_unlock_bh(&cnt->lock);
- schedule_work(&cnt->work);
- out:
- rcu_read_unlock();
- }
- static void efx_tc_rx_version_1(struct efx_nic *efx, const u8 *data, u32 mark)
- {
- u16 n_counters, i;
- /* Header format:
- * + | 0 | 1 | 2 | 3 |
- * 0 |version | reserved |
- * 4 | seq_index | n_counters |
- */
- n_counters = le16_to_cpu(*(const __le16 *)(data + 6));
- /* Counter update entry format:
- * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f |
- * | counter_idx | packet_count | byte_count |
- */
- for (i = 0; i < n_counters; i++) {
- const void *entry = data + 8 + 16 * i;
- u64 packet_count, byte_count;
- u32 counter_idx;
- counter_idx = le32_to_cpu(*(const __le32 *)entry);
- packet_count = le32_to_cpu(*(const __le32 *)(entry + 4)) |
- ((u64)le16_to_cpu(*(const __le16 *)(entry + 8)) << 32);
- byte_count = le16_to_cpu(*(const __le16 *)(entry + 10)) |
- ((u64)le32_to_cpu(*(const __le32 *)(entry + 12)) << 16);
- efx_tc_counter_update(efx, EFX_TC_COUNTER_TYPE_AR, counter_idx,
- packet_count, byte_count, mark);
- }
- }
- #define TCV2_HDR_PTR(pkt, field) \
- ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 7), \
- (pkt) + ERF_SC_PACKETISER_HEADER_##field##_LBN / 8)
- #define TCV2_HDR_BYTE(pkt, field) \
- ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 8),\
- *TCV2_HDR_PTR(pkt, field))
- #define TCV2_HDR_WORD(pkt, field) \
- ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 16),\
- (void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 15), \
- *(__force const __le16 *)TCV2_HDR_PTR(pkt, field))
- #define TCV2_PKT_PTR(pkt, poff, i, field) \
- ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_PAYLOAD_##field##_LBN & 7), \
- (pkt) + ERF_SC_PACKETISER_PAYLOAD_##field##_LBN/8 + poff + \
- i * ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE)
- /* Read a little-endian 48-bit field with 16-bit alignment */
- static u64 efx_tc_read48(const __le16 *field)
- {
- u64 out = 0;
- int i;
- for (i = 0; i < 3; i++)
- out |= (u64)le16_to_cpu(field[i]) << (i * 16);
- return out;
- }
- static enum efx_tc_counter_type efx_tc_rx_version_2(struct efx_nic *efx,
- const u8 *data, u32 mark)
- {
- u8 payload_offset, header_offset, ident;
- enum efx_tc_counter_type type;
- u16 n_counters, i;
- ident = TCV2_HDR_BYTE(data, IDENTIFIER);
- switch (ident) {
- case ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR:
- type = EFX_TC_COUNTER_TYPE_AR;
- break;
- case ERF_SC_PACKETISER_HEADER_IDENTIFIER_CT:
- type = EFX_TC_COUNTER_TYPE_CT;
- break;
- case ERF_SC_PACKETISER_HEADER_IDENTIFIER_OR:
- type = EFX_TC_COUNTER_TYPE_OR;
- break;
- default:
- if (net_ratelimit())
- netif_err(efx, drv, efx->net_dev,
- "ignored v2 MAE counter packet (bad identifier %u"
- "), counters may be inaccurate\n", ident);
- return EFX_TC_COUNTER_TYPE_MAX;
- }
- header_offset = TCV2_HDR_BYTE(data, HEADER_OFFSET);
- /* mae_counter_format.h implies that this offset is fixed, since it
- * carries on with SOP-based LBNs for the fields in this header
- */
- if (header_offset != ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT) {
- if (net_ratelimit())
- netif_err(efx, drv, efx->net_dev,
- "choked on v2 MAE counter packet (bad header_offset %u"
- "), counters may be inaccurate\n", header_offset);
- return EFX_TC_COUNTER_TYPE_MAX;
- }
- payload_offset = TCV2_HDR_BYTE(data, PAYLOAD_OFFSET);
- n_counters = le16_to_cpu(TCV2_HDR_WORD(data, COUNT));
- for (i = 0; i < n_counters; i++) {
- const void *counter_idx_p, *packet_count_p, *byte_count_p;
- u64 packet_count, byte_count;
- u32 counter_idx;
- /* 24-bit field with 32-bit alignment */
- counter_idx_p = TCV2_PKT_PTR(data, payload_offset, i, COUNTER_INDEX);
- BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_WIDTH != 24);
- BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_LBN & 31);
- counter_idx = le32_to_cpu(*(const __le32 *)counter_idx_p) & 0xffffff;
- /* 48-bit field with 16-bit alignment */
- packet_count_p = TCV2_PKT_PTR(data, payload_offset, i, PACKET_COUNT);
- BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_WIDTH != 48);
- BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LBN & 15);
- packet_count = efx_tc_read48((const __le16 *)packet_count_p);
- /* 48-bit field with 16-bit alignment */
- byte_count_p = TCV2_PKT_PTR(data, payload_offset, i, BYTE_COUNT);
- BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_WIDTH != 48);
- BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LBN & 15);
- byte_count = efx_tc_read48((const __le16 *)byte_count_p);
- if (type == EFX_TC_COUNTER_TYPE_CT) {
- /* CT counters are 1-bit saturating counters to update
- * the lastuse time in CT stats. A received CT counter
- * should have packet counter to 0 and only LSB bit on
- * in byte counter.
- */
- if (packet_count || byte_count != 1)
- netdev_warn_once(efx->net_dev,
- "CT counter with inconsistent state (%llu, %llu)\n",
- packet_count, byte_count);
- /* Do not increment the driver's byte counter */
- byte_count = 0;
- }
- efx_tc_counter_update(efx, type, counter_idx, packet_count,
- byte_count, mark);
- }
- return type;
- }
- /* We always swallow the packet, whether successful or not, since it's not
- * a network packet and shouldn't ever be forwarded to the stack.
- * @mark is the generation count for counter allocations.
- */
- static bool efx_tc_rx(struct efx_rx_queue *rx_queue, u32 mark)
- {
- struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
- struct efx_rx_buffer *rx_buf = efx_rx_buffer(rx_queue,
- channel->rx_pkt_index);
- const u8 *data = efx_rx_buf_va(rx_buf);
- struct efx_nic *efx = rx_queue->efx;
- enum efx_tc_counter_type type;
- u8 version;
- /* version is always first byte of packet */
- version = *data;
- switch (version) {
- case 1:
- type = EFX_TC_COUNTER_TYPE_AR;
- efx_tc_rx_version_1(efx, data, mark);
- break;
- case ERF_SC_PACKETISER_HEADER_VERSION_VALUE: // 2
- type = efx_tc_rx_version_2(efx, data, mark);
- break;
- default:
- if (net_ratelimit())
- netif_err(efx, drv, efx->net_dev,
- "choked on MAE counter packet (bad version %u"
- "); counters may be inaccurate\n",
- version);
- goto out;
- }
- if (type < EFX_TC_COUNTER_TYPE_MAX) {
- /* Update seen_gen unconditionally, to avoid a missed wakeup if
- * we race with efx_mae_stop_counters().
- */
- efx->tc->seen_gen[type] = mark;
- if (efx->tc->flush_counters &&
- (s32)(efx->tc->flush_gen[type] - mark) <= 0)
- wake_up(&efx->tc->flush_wq);
- }
- out:
- efx_free_rx_buffers(rx_queue, rx_buf, 1);
- channel->rx_pkt_n_frags = 0;
- return true;
- }
- const struct efx_channel_type efx_tc_channel_type = {
- .handle_no_channel = efx_tc_handle_no_channel,
- .pre_probe = efx_tc_probe_channel,
- .start = efx_tc_start_channel,
- .stop = efx_tc_stop_channel,
- .post_remove = efx_tc_remove_channel,
- .get_name = efx_tc_get_channel_name,
- .receive_raw = efx_tc_rx,
- .keep_eventq = true,
- };
|