rx.h 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /* Copyright (C) 2024-2025 Intel Corporation */
  3. #ifndef __LIBETH_RX_H
  4. #define __LIBETH_RX_H
  5. #include <linux/if_vlan.h>
  6. #include <net/page_pool/helpers.h>
  7. #include <net/xdp.h>
  8. /* Rx buffer management */
  9. /* Space reserved in front of each frame */
  10. #define LIBETH_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
  11. #define LIBETH_XDP_HEADROOM (ALIGN(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
  12. NET_IP_ALIGN)
  13. /* Maximum headroom for worst-case calculations */
  14. #define LIBETH_MAX_HEADROOM LIBETH_XDP_HEADROOM
  15. /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
  16. #define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
  17. /* Maximum supported L2-L4 header length */
  18. #define LIBETH_MAX_HEAD roundup_pow_of_two(max(MAX_HEADER, 256))
  19. /* Always use order-0 pages */
  20. #define LIBETH_RX_PAGE_ORDER 0
  21. /* Pick a sane buffer stride and align to a cacheline boundary */
  22. #define LIBETH_RX_BUF_STRIDE SKB_DATA_ALIGN(128)
  23. /* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */
  24. #define LIBETH_RX_PAGE_LEN(hr) \
  25. ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER), \
  26. LIBETH_RX_BUF_STRIDE)
  27. /**
  28. * struct libeth_fqe - structure representing an Rx buffer (fill queue element)
  29. * @netmem: network memory reference holding the buffer
  30. * @offset: offset from the page start (to the headroom)
  31. * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
  32. *
  33. * Depending on the MTU, API switches between one-page-per-frame and shared
  34. * page model (to conserve memory on bigger-page platforms). In case of the
  35. * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
  36. */
  37. struct libeth_fqe {
  38. netmem_ref netmem;
  39. u32 offset;
  40. u32 truesize;
  41. } __aligned_largest;
  42. /**
  43. * enum libeth_fqe_type - enum representing types of Rx buffers
  44. * @LIBETH_FQE_MTU: buffer size is determined by MTU
  45. * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames
  46. * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers
  47. */
  48. enum libeth_fqe_type {
  49. LIBETH_FQE_MTU = 0U,
  50. LIBETH_FQE_SHORT,
  51. LIBETH_FQE_HDR,
  52. };
  53. /**
  54. * struct libeth_fq - structure representing a buffer (fill) queue
  55. * @fp: hotpath part of the structure
  56. * @pp: &page_pool for buffer management
  57. * @fqes: array of Rx buffers
  58. * @truesize: size to allocate per buffer, w/overhead
  59. * @count: number of descriptors/buffers the queue has
  60. * @type: type of the buffers this queue has
  61. * @hsplit: flag whether header split is enabled
  62. * @xdp: flag indicating whether XDP is enabled
  63. * @buf_len: HW-writeable length per each buffer
  64. * @nid: ID of the closest NUMA node with memory
  65. */
  66. struct libeth_fq {
  67. struct_group_tagged(libeth_fq_fp, fp,
  68. struct page_pool *pp;
  69. struct libeth_fqe *fqes;
  70. u32 truesize;
  71. u32 count;
  72. );
  73. /* Cold fields */
  74. enum libeth_fqe_type type:2;
  75. bool hsplit:1;
  76. bool xdp:1;
  77. u32 buf_len;
  78. int nid;
  79. };
  80. int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi);
  81. void libeth_rx_fq_destroy(struct libeth_fq *fq);
  82. /**
  83. * libeth_rx_alloc - allocate a new Rx buffer
  84. * @fq: fill queue to allocate for
  85. * @i: index of the buffer within the queue
  86. *
  87. * Return: DMA address to be passed to HW for Rx on successful allocation,
  88. * ```DMA_MAPPING_ERROR``` otherwise.
  89. */
  90. static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
  91. {
  92. struct libeth_fqe *buf = &fq->fqes[i];
  93. buf->truesize = fq->truesize;
  94. buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset,
  95. &buf->truesize);
  96. if (unlikely(!buf->netmem))
  97. return DMA_MAPPING_ERROR;
  98. return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset +
  99. fq->pp->p.offset;
  100. }
  101. void libeth_rx_recycle_slow(netmem_ref netmem);
  102. /**
  103. * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
  104. * @fqe: buffer to process
  105. * @len: frame length from the descriptor
  106. *
  107. * Process the buffer after it's written by HW. The regular path is to
  108. * synchronize DMA for CPU, but in case of no data it will be immediately
  109. * recycled back to its PP.
  110. *
  111. * Return: true when there's data to process, false otherwise.
  112. */
  113. static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
  114. u32 len)
  115. {
  116. netmem_ref netmem = fqe->netmem;
  117. /* Very rare, but possible case. The most common reason:
  118. * the last fragment contained FCS only, which was then
  119. * stripped by the HW.
  120. */
  121. if (unlikely(!len)) {
  122. libeth_rx_recycle_slow(netmem);
  123. return false;
  124. }
  125. page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem,
  126. fqe->offset, len);
  127. return true;
  128. }
  129. /* Converting abstract packet type numbers into a software structure with
  130. * the packet parameters to do O(1) lookup on Rx.
  131. */
  132. enum {
  133. LIBETH_RX_PT_OUTER_L2 = 0U,
  134. LIBETH_RX_PT_OUTER_IPV4,
  135. LIBETH_RX_PT_OUTER_IPV6,
  136. };
  137. enum {
  138. LIBETH_RX_PT_NOT_FRAG = 0U,
  139. LIBETH_RX_PT_FRAG,
  140. };
  141. enum {
  142. LIBETH_RX_PT_TUNNEL_IP_NONE = 0U,
  143. LIBETH_RX_PT_TUNNEL_IP_IP,
  144. LIBETH_RX_PT_TUNNEL_IP_GRENAT,
  145. LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC,
  146. LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN,
  147. };
  148. enum {
  149. LIBETH_RX_PT_TUNNEL_END_NONE = 0U,
  150. LIBETH_RX_PT_TUNNEL_END_IPV4,
  151. LIBETH_RX_PT_TUNNEL_END_IPV6,
  152. };
  153. enum {
  154. LIBETH_RX_PT_INNER_NONE = 0U,
  155. LIBETH_RX_PT_INNER_UDP,
  156. LIBETH_RX_PT_INNER_TCP,
  157. LIBETH_RX_PT_INNER_SCTP,
  158. LIBETH_RX_PT_INNER_ICMP,
  159. LIBETH_RX_PT_INNER_TIMESYNC,
  160. };
  161. #define LIBETH_RX_PT_PAYLOAD_NONE PKT_HASH_TYPE_NONE
  162. #define LIBETH_RX_PT_PAYLOAD_L2 PKT_HASH_TYPE_L2
  163. #define LIBETH_RX_PT_PAYLOAD_L3 PKT_HASH_TYPE_L3
  164. #define LIBETH_RX_PT_PAYLOAD_L4 PKT_HASH_TYPE_L4
  165. struct libeth_rx_pt {
  166. u32 outer_ip:2;
  167. u32 outer_frag:1;
  168. u32 tunnel_type:3;
  169. u32 tunnel_end_prot:2;
  170. u32 tunnel_end_frag:1;
  171. u32 inner_prot:3;
  172. enum pkt_hash_types payload_layer:2;
  173. u32 pad:2;
  174. enum xdp_rss_hash_type hash_type:16;
  175. };
  176. /**
  177. * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor
  178. * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware
  179. * @ipe: IP checksum error
  180. * @eipe: external (outermost) IP header (only for tunels)
  181. * @eudpe: external (outermost) UDP checksum error (only for tunels)
  182. * @ipv6exadd: IPv6 header with extension headers
  183. * @l4e: L4 integrity error
  184. * @pprs: set for packets that skip checksum calculation in the HW pre parser
  185. * @nat: the packet is a UDP tunneled packet
  186. * @raw_csum_valid: set if raw checksum is valid
  187. * @pad: padding to naturally align raw_csum field
  188. * @raw_csum: raw checksum
  189. */
  190. struct libeth_rx_csum {
  191. u32 l3l4p:1;
  192. u32 ipe:1;
  193. u32 eipe:1;
  194. u32 eudpe:1;
  195. u32 ipv6exadd:1;
  196. u32 l4e:1;
  197. u32 pprs:1;
  198. u32 nat:1;
  199. u32 raw_csum_valid:1;
  200. u32 pad:7;
  201. u32 raw_csum:16;
  202. };
  203. /**
  204. * struct libeth_rqe_info - receive queue element info
  205. * @len: packet length
  206. * @ptype: packet type based on types programmed into the device
  207. * @eop: whether it's the last fragment of the packet
  208. * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error
  209. * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration
  210. */
  211. struct libeth_rqe_info {
  212. u32 len;
  213. u32 ptype:14;
  214. u32 eop:1;
  215. u32 rxe:1;
  216. u32 vlan:16;
  217. };
  218. void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
  219. /**
  220. * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure
  221. * @pt: packet type params
  222. *
  223. * Wrapper to compile out the IPv6 code from the drivers when not supported
  224. * by the kernel.
  225. *
  226. * Return: @pt.outer_ip or stub for IPv6 when not compiled-in.
  227. */
  228. static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt)
  229. {
  230. #if !IS_ENABLED(CONFIG_IPV6)
  231. switch (pt.outer_ip) {
  232. case LIBETH_RX_PT_OUTER_IPV4:
  233. return LIBETH_RX_PT_OUTER_IPV4;
  234. default:
  235. return LIBETH_RX_PT_OUTER_L2;
  236. }
  237. #else
  238. return pt.outer_ip;
  239. #endif
  240. }
  241. /* libeth_has_*() can be used to quickly check whether the HW metadata is
  242. * available to avoid further expensive processing such as descriptor reads.
  243. * They already check for the corresponding netdev feature to be enabled,
  244. * thus can be used as drop-in replacements.
  245. */
  246. static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev,
  247. struct libeth_rx_pt pt)
  248. {
  249. /* Non-zero _INNER* is only possible when _OUTER_IPV* is set,
  250. * it is enough to check only for the L4 type.
  251. */
  252. return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE &&
  253. (dev->features & NETIF_F_RXCSUM));
  254. }
  255. static inline bool libeth_rx_pt_has_hash(const struct net_device *dev,
  256. struct libeth_rx_pt pt)
  257. {
  258. return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE &&
  259. (dev->features & NETIF_F_RXHASH));
  260. }
  261. /**
  262. * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT
  263. * @skb: skb to fill the hash in
  264. * @hash: 32-bit hash value from the descriptor
  265. * @pt: packet type
  266. */
  267. static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash,
  268. struct libeth_rx_pt pt)
  269. {
  270. skb_set_hash(skb, hash, pt.payload_layer);
  271. }
  272. #endif /* __LIBETH_RX_H */