xdp.c 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* net/core/xdp.c
  3. *
  4. * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
  5. */
  6. #include <linux/bpf.h>
  7. #include <linux/btf.h>
  8. #include <linux/btf_ids.h>
  9. #include <linux/filter.h>
  10. #include <linux/types.h>
  11. #include <linux/mm.h>
  12. #include <linux/netdevice.h>
  13. #include <linux/slab.h>
  14. #include <linux/idr.h>
  15. #include <linux/rhashtable.h>
  16. #include <linux/bug.h>
  17. #include <net/page_pool/helpers.h>
  18. #include <net/hotdata.h>
  19. #include <net/netdev_lock.h>
  20. #include <net/xdp.h>
  21. #include <net/xdp_priv.h> /* struct xdp_mem_allocator */
  22. #include <trace/events/xdp.h>
  23. #include <net/xdp_sock_drv.h>
  24. #define REG_STATE_NEW 0x0
  25. #define REG_STATE_REGISTERED 0x1
  26. #define REG_STATE_UNREGISTERED 0x2
  27. #define REG_STATE_UNUSED 0x3
  28. static DEFINE_IDA(mem_id_pool);
  29. static DEFINE_MUTEX(mem_id_lock);
  30. #define MEM_ID_MAX 0xFFFE
  31. #define MEM_ID_MIN 1
  32. static int mem_id_next = MEM_ID_MIN;
  33. static bool mem_id_init; /* false */
  34. static struct rhashtable *mem_id_ht;
  35. static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
  36. {
  37. const u32 *k = data;
  38. const u32 key = *k;
  39. BUILD_BUG_ON(sizeof_field(struct xdp_mem_allocator, mem.id)
  40. != sizeof(u32));
  41. /* Use cyclic increasing ID as direct hash key */
  42. return key;
  43. }
  44. static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
  45. const void *ptr)
  46. {
  47. const struct xdp_mem_allocator *xa = ptr;
  48. u32 mem_id = *(u32 *)arg->key;
  49. return xa->mem.id != mem_id;
  50. }
  51. static const struct rhashtable_params mem_id_rht_params = {
  52. .nelem_hint = 64,
  53. .head_offset = offsetof(struct xdp_mem_allocator, node),
  54. .key_offset = offsetof(struct xdp_mem_allocator, mem.id),
  55. .key_len = sizeof_field(struct xdp_mem_allocator, mem.id),
  56. .max_size = MEM_ID_MAX,
  57. .min_size = 8,
  58. .automatic_shrinking = true,
  59. .hashfn = xdp_mem_id_hashfn,
  60. .obj_cmpfn = xdp_mem_id_cmp,
  61. };
  62. static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
  63. {
  64. struct xdp_mem_allocator *xa;
  65. xa = container_of(rcu, struct xdp_mem_allocator, rcu);
  66. /* Allow this ID to be reused */
  67. ida_free(&mem_id_pool, xa->mem.id);
  68. kfree(xa);
  69. }
  70. static void mem_xa_remove(struct xdp_mem_allocator *xa)
  71. {
  72. trace_mem_disconnect(xa);
  73. if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
  74. call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
  75. }
  76. static void mem_allocator_disconnect(void *allocator)
  77. {
  78. struct xdp_mem_allocator *xa;
  79. struct rhashtable_iter iter;
  80. mutex_lock(&mem_id_lock);
  81. rhashtable_walk_enter(mem_id_ht, &iter);
  82. do {
  83. rhashtable_walk_start(&iter);
  84. while ((xa = rhashtable_walk_next(&iter)) && !IS_ERR(xa)) {
  85. if (xa->allocator == allocator)
  86. mem_xa_remove(xa);
  87. }
  88. rhashtable_walk_stop(&iter);
  89. } while (xa == ERR_PTR(-EAGAIN));
  90. rhashtable_walk_exit(&iter);
  91. mutex_unlock(&mem_id_lock);
  92. }
  93. void xdp_unreg_mem_model(struct xdp_mem_info *mem)
  94. {
  95. struct xdp_mem_allocator *xa;
  96. int type = mem->type;
  97. int id = mem->id;
  98. /* Reset mem info to defaults */
  99. mem->id = 0;
  100. mem->type = 0;
  101. if (id == 0)
  102. return;
  103. if (type == MEM_TYPE_PAGE_POOL) {
  104. xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
  105. page_pool_destroy(xa->page_pool);
  106. }
  107. }
  108. EXPORT_SYMBOL_GPL(xdp_unreg_mem_model);
  109. void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
  110. {
  111. if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
  112. WARN(1, "Missing register, driver bug");
  113. return;
  114. }
  115. xdp_unreg_mem_model(&xdp_rxq->mem);
  116. }
  117. EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
  118. void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
  119. {
  120. /* Simplify driver cleanup code paths, allow unreg "unused" */
  121. if (xdp_rxq->reg_state == REG_STATE_UNUSED)
  122. return;
  123. xdp_rxq_info_unreg_mem_model(xdp_rxq);
  124. xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
  125. xdp_rxq->dev = NULL;
  126. }
  127. EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
  128. static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
  129. {
  130. memset(xdp_rxq, 0, sizeof(*xdp_rxq));
  131. }
  132. /* Returns 0 on success, negative on failure */
  133. int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
  134. struct net_device *dev, u32 queue_index,
  135. unsigned int napi_id, u32 frag_size)
  136. {
  137. if (!dev) {
  138. WARN(1, "Missing net_device from driver");
  139. return -ENODEV;
  140. }
  141. if (xdp_rxq->reg_state == REG_STATE_UNUSED) {
  142. WARN(1, "Driver promised not to register this");
  143. return -EINVAL;
  144. }
  145. if (xdp_rxq->reg_state == REG_STATE_REGISTERED) {
  146. WARN(1, "Missing unregister, handled but fix driver");
  147. xdp_rxq_info_unreg(xdp_rxq);
  148. }
  149. /* State either UNREGISTERED or NEW */
  150. xdp_rxq_info_init(xdp_rxq);
  151. xdp_rxq->dev = dev;
  152. xdp_rxq->queue_index = queue_index;
  153. xdp_rxq->frag_size = frag_size;
  154. xdp_rxq->reg_state = REG_STATE_REGISTERED;
  155. return 0;
  156. }
  157. EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);
  158. void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
  159. {
  160. xdp_rxq->reg_state = REG_STATE_UNUSED;
  161. }
  162. EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
  163. bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
  164. {
  165. return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
  166. }
  167. EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
  168. static int __mem_id_init_hash_table(void)
  169. {
  170. struct rhashtable *rht;
  171. int ret;
  172. if (unlikely(mem_id_init))
  173. return 0;
  174. rht = kzalloc_obj(*rht);
  175. if (!rht)
  176. return -ENOMEM;
  177. ret = rhashtable_init(rht, &mem_id_rht_params);
  178. if (ret < 0) {
  179. kfree(rht);
  180. return ret;
  181. }
  182. mem_id_ht = rht;
  183. smp_mb(); /* mutex lock should provide enough pairing */
  184. mem_id_init = true;
  185. return 0;
  186. }
  187. /* Allocate a cyclic ID that maps to allocator pointer.
  188. * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
  189. *
  190. * Caller must lock mem_id_lock.
  191. */
  192. static int __mem_id_cyclic_get(gfp_t gfp)
  193. {
  194. int retries = 1;
  195. int id;
  196. again:
  197. id = ida_alloc_range(&mem_id_pool, mem_id_next, MEM_ID_MAX - 1, gfp);
  198. if (id < 0) {
  199. if (id == -ENOSPC) {
  200. /* Cyclic allocator, reset next id */
  201. if (retries--) {
  202. mem_id_next = MEM_ID_MIN;
  203. goto again;
  204. }
  205. }
  206. return id; /* errno */
  207. }
  208. mem_id_next = id + 1;
  209. return id;
  210. }
  211. static bool __is_supported_mem_type(enum xdp_mem_type type)
  212. {
  213. if (type == MEM_TYPE_PAGE_POOL)
  214. return is_page_pool_compiled_in();
  215. if (type >= MEM_TYPE_MAX)
  216. return false;
  217. return true;
  218. }
  219. static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
  220. enum xdp_mem_type type,
  221. void *allocator)
  222. {
  223. struct xdp_mem_allocator *xdp_alloc;
  224. gfp_t gfp = GFP_KERNEL;
  225. int id, errno, ret;
  226. void *ptr;
  227. if (!__is_supported_mem_type(type))
  228. return ERR_PTR(-EOPNOTSUPP);
  229. mem->type = type;
  230. if (!allocator) {
  231. if (type == MEM_TYPE_PAGE_POOL)
  232. return ERR_PTR(-EINVAL); /* Setup time check page_pool req */
  233. return NULL;
  234. }
  235. /* Delay init of rhashtable to save memory if feature isn't used */
  236. if (!mem_id_init) {
  237. mutex_lock(&mem_id_lock);
  238. ret = __mem_id_init_hash_table();
  239. mutex_unlock(&mem_id_lock);
  240. if (ret < 0)
  241. return ERR_PTR(ret);
  242. }
  243. xdp_alloc = kzalloc_obj(*xdp_alloc, gfp);
  244. if (!xdp_alloc)
  245. return ERR_PTR(-ENOMEM);
  246. mutex_lock(&mem_id_lock);
  247. id = __mem_id_cyclic_get(gfp);
  248. if (id < 0) {
  249. errno = id;
  250. goto err;
  251. }
  252. mem->id = id;
  253. xdp_alloc->mem = *mem;
  254. xdp_alloc->allocator = allocator;
  255. /* Insert allocator into ID lookup table */
  256. ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
  257. if (IS_ERR(ptr)) {
  258. ida_free(&mem_id_pool, mem->id);
  259. mem->id = 0;
  260. errno = PTR_ERR(ptr);
  261. goto err;
  262. }
  263. if (type == MEM_TYPE_PAGE_POOL)
  264. page_pool_use_xdp_mem(allocator, mem_allocator_disconnect, mem);
  265. mutex_unlock(&mem_id_lock);
  266. return xdp_alloc;
  267. err:
  268. mutex_unlock(&mem_id_lock);
  269. kfree(xdp_alloc);
  270. return ERR_PTR(errno);
  271. }
  272. int xdp_reg_mem_model(struct xdp_mem_info *mem,
  273. enum xdp_mem_type type, void *allocator)
  274. {
  275. struct xdp_mem_allocator *xdp_alloc;
  276. xdp_alloc = __xdp_reg_mem_model(mem, type, allocator);
  277. if (IS_ERR(xdp_alloc))
  278. return PTR_ERR(xdp_alloc);
  279. return 0;
  280. }
  281. EXPORT_SYMBOL_GPL(xdp_reg_mem_model);
  282. int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
  283. enum xdp_mem_type type, void *allocator)
  284. {
  285. struct xdp_mem_allocator *xdp_alloc;
  286. if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
  287. WARN(1, "Missing register, driver bug");
  288. return -EFAULT;
  289. }
  290. xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator);
  291. if (IS_ERR(xdp_alloc))
  292. return PTR_ERR(xdp_alloc);
  293. if (type == MEM_TYPE_XSK_BUFF_POOL && allocator)
  294. xsk_pool_set_rxq_info(allocator, xdp_rxq);
  295. if (trace_mem_connect_enabled() && xdp_alloc)
  296. trace_mem_connect(xdp_alloc, xdp_rxq);
  297. return 0;
  298. }
  299. EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
  300. /**
  301. * xdp_reg_page_pool - register &page_pool as a memory provider for XDP
  302. * @pool: &page_pool to register
  303. *
  304. * Can be used to register pools manually without connecting to any XDP RxQ
  305. * info, so that the XDP layer will be aware of them. Then, they can be
  306. * attached to an RxQ info manually via xdp_rxq_info_attach_page_pool().
  307. *
  308. * Return: %0 on success, -errno on error.
  309. */
  310. int xdp_reg_page_pool(struct page_pool *pool)
  311. {
  312. struct xdp_mem_info mem;
  313. return xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pool);
  314. }
  315. EXPORT_SYMBOL_GPL(xdp_reg_page_pool);
  316. /**
  317. * xdp_unreg_page_pool - unregister &page_pool from the memory providers list
  318. * @pool: &page_pool to unregister
  319. *
  320. * A shorthand for manual unregistering page pools. If the pool was previously
  321. * attached to an RxQ info, it must be detached first.
  322. */
  323. void xdp_unreg_page_pool(const struct page_pool *pool)
  324. {
  325. struct xdp_mem_info mem = {
  326. .type = MEM_TYPE_PAGE_POOL,
  327. .id = pool->xdp_mem_id,
  328. };
  329. xdp_unreg_mem_model(&mem);
  330. }
  331. EXPORT_SYMBOL_GPL(xdp_unreg_page_pool);
  332. /**
  333. * xdp_rxq_info_attach_page_pool - attach registered pool to RxQ info
  334. * @xdp_rxq: XDP RxQ info to attach the pool to
  335. * @pool: pool to attach
  336. *
  337. * If the pool was registered manually, this function must be called instead
  338. * of xdp_rxq_info_reg_mem_model() to connect it to the RxQ info.
  339. */
  340. void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq,
  341. const struct page_pool *pool)
  342. {
  343. struct xdp_mem_info mem = {
  344. .type = MEM_TYPE_PAGE_POOL,
  345. .id = pool->xdp_mem_id,
  346. };
  347. xdp_rxq_info_attach_mem_model(xdp_rxq, &mem);
  348. }
  349. EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool);
  350. /* XDP RX runs under NAPI protection, and in different delivery error
  351. * scenarios (e.g. queue full), it is possible to return the xdp_frame
  352. * while still leveraging this protection. The @napi_direct boolean
  353. * is used for those calls sites. Thus, allowing for faster recycling
  354. * of xdp_frames/pages in those cases.
  355. */
  356. void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
  357. bool napi_direct, struct xdp_buff *xdp)
  358. {
  359. switch (mem_type) {
  360. case MEM_TYPE_PAGE_POOL:
  361. netmem = netmem_compound_head(netmem);
  362. if (napi_direct && xdp_return_frame_no_direct())
  363. napi_direct = false;
  364. /* No need to check netmem_is_pp() as mem->type knows this a
  365. * page_pool page
  366. */
  367. page_pool_put_full_netmem(netmem_get_pp(netmem), netmem,
  368. napi_direct);
  369. break;
  370. case MEM_TYPE_PAGE_SHARED:
  371. page_frag_free(__netmem_address(netmem));
  372. break;
  373. case MEM_TYPE_PAGE_ORDER0:
  374. put_page(__netmem_to_page(netmem));
  375. break;
  376. case MEM_TYPE_XSK_BUFF_POOL:
  377. /* NB! Only valid from an xdp_buff! */
  378. xsk_buff_free(xdp);
  379. break;
  380. default:
  381. /* Not possible, checked in xdp_rxq_info_reg_mem_model() */
  382. WARN(1, "Incorrect XDP memory type (%d) usage", mem_type);
  383. break;
  384. }
  385. }
  386. void xdp_return_frame(struct xdp_frame *xdpf)
  387. {
  388. struct skb_shared_info *sinfo;
  389. if (likely(!xdp_frame_has_frags(xdpf)))
  390. goto out;
  391. sinfo = xdp_get_shared_info_from_frame(xdpf);
  392. for (u32 i = 0; i < sinfo->nr_frags; i++)
  393. __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type,
  394. false, NULL);
  395. out:
  396. __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, false, NULL);
  397. }
  398. EXPORT_SYMBOL_GPL(xdp_return_frame);
  399. void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
  400. {
  401. struct skb_shared_info *sinfo;
  402. if (likely(!xdp_frame_has_frags(xdpf)))
  403. goto out;
  404. sinfo = xdp_get_shared_info_from_frame(xdpf);
  405. for (u32 i = 0; i < sinfo->nr_frags; i++)
  406. __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type,
  407. true, NULL);
  408. out:
  409. __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, true, NULL);
  410. }
  411. EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
  412. /* XDP bulk APIs introduce a defer/flush mechanism to return
  413. * pages belonging to the same xdp_mem_allocator object
  414. * (identified via the mem.id field) in bulk to optimize
  415. * I-cache and D-cache.
  416. * The bulk queue size is set to 16 to be aligned to how
  417. * XDP_REDIRECT bulking works. The bulk is flushed when
  418. * it is full or when mem.id changes.
  419. * xdp_frame_bulk is usually stored/allocated on the function
  420. * call-stack to avoid locking penalties.
  421. */
  422. /* Must be called with rcu_read_lock held */
  423. void xdp_return_frame_bulk(struct xdp_frame *xdpf,
  424. struct xdp_frame_bulk *bq)
  425. {
  426. if (xdpf->mem_type != MEM_TYPE_PAGE_POOL) {
  427. xdp_return_frame(xdpf);
  428. return;
  429. }
  430. if (bq->count == XDP_BULK_QUEUE_SIZE)
  431. xdp_flush_frame_bulk(bq);
  432. if (unlikely(xdp_frame_has_frags(xdpf))) {
  433. struct skb_shared_info *sinfo;
  434. int i;
  435. sinfo = xdp_get_shared_info_from_frame(xdpf);
  436. for (i = 0; i < sinfo->nr_frags; i++) {
  437. skb_frag_t *frag = &sinfo->frags[i];
  438. bq->q[bq->count++] = skb_frag_netmem(frag);
  439. if (bq->count == XDP_BULK_QUEUE_SIZE)
  440. xdp_flush_frame_bulk(bq);
  441. }
  442. }
  443. bq->q[bq->count++] = virt_to_netmem(xdpf->data);
  444. }
  445. EXPORT_SYMBOL_GPL(xdp_return_frame_bulk);
  446. /**
  447. * xdp_return_frag -- free one XDP frag or decrement its refcount
  448. * @netmem: network memory reference to release
  449. * @xdp: &xdp_buff to release the frag for
  450. */
  451. void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp)
  452. {
  453. __xdp_return(netmem, xdp->rxq->mem.type, true, NULL);
  454. }
  455. EXPORT_SYMBOL_GPL(xdp_return_frag);
  456. void xdp_return_buff(struct xdp_buff *xdp)
  457. {
  458. struct skb_shared_info *sinfo;
  459. if (likely(!xdp_buff_has_frags(xdp)))
  460. goto out;
  461. sinfo = xdp_get_shared_info_from_buff(xdp);
  462. for (u32 i = 0; i < sinfo->nr_frags; i++)
  463. __xdp_return(skb_frag_netmem(&sinfo->frags[i]),
  464. xdp->rxq->mem.type, true, xdp);
  465. out:
  466. __xdp_return(virt_to_netmem(xdp->data), xdp->rxq->mem.type, true, xdp);
  467. }
  468. EXPORT_SYMBOL_GPL(xdp_return_buff);
  469. void xdp_attachment_setup(struct xdp_attachment_info *info,
  470. struct netdev_bpf *bpf)
  471. {
  472. if (info->prog)
  473. bpf_prog_put(info->prog);
  474. info->prog = bpf->prog;
  475. info->flags = bpf->flags;
  476. }
  477. EXPORT_SYMBOL_GPL(xdp_attachment_setup);
  478. struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
  479. {
  480. unsigned int metasize, totsize;
  481. void *addr, *data_to_copy;
  482. struct xdp_frame *xdpf;
  483. struct page *page;
  484. /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */
  485. metasize = xdp_data_meta_unsupported(xdp) ? 0 :
  486. xdp->data - xdp->data_meta;
  487. totsize = xdp->data_end - xdp->data + metasize;
  488. if (sizeof(*xdpf) + totsize > PAGE_SIZE)
  489. return NULL;
  490. page = dev_alloc_page();
  491. if (!page)
  492. return NULL;
  493. addr = page_to_virt(page);
  494. xdpf = addr;
  495. memset(xdpf, 0, sizeof(*xdpf));
  496. addr += sizeof(*xdpf);
  497. data_to_copy = metasize ? xdp->data_meta : xdp->data;
  498. memcpy(addr, data_to_copy, totsize);
  499. xdpf->data = addr + metasize;
  500. xdpf->len = totsize - metasize;
  501. xdpf->headroom = 0;
  502. xdpf->metasize = metasize;
  503. xdpf->frame_sz = PAGE_SIZE;
  504. xdpf->mem_type = MEM_TYPE_PAGE_ORDER0;
  505. xsk_buff_free(xdp);
  506. return xdpf;
  507. }
  508. EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
  509. /* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */
  510. void xdp_warn(const char *msg, const char *func, const int line)
  511. {
  512. WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg);
  513. };
  514. EXPORT_SYMBOL_GPL(xdp_warn);
  515. /**
  516. * xdp_build_skb_from_buff - create an skb from &xdp_buff
  517. * @xdp: &xdp_buff to convert to an skb
  518. *
  519. * Perform common operations to create a new skb to pass up the stack from
  520. * &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize
  521. * skb data pointers and offsets, set the recycle bit if the buff is
  522. * PP-backed, Rx queue index, protocol and update frags info.
  523. *
  524. * Return: new &sk_buff on success, %NULL on error.
  525. */
  526. struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
  527. {
  528. const struct xdp_rxq_info *rxq = xdp->rxq;
  529. const struct skb_shared_info *sinfo;
  530. struct sk_buff *skb;
  531. u32 nr_frags = 0;
  532. int metalen;
  533. if (unlikely(xdp_buff_has_frags(xdp))) {
  534. sinfo = xdp_get_shared_info_from_buff(xdp);
  535. nr_frags = sinfo->nr_frags;
  536. }
  537. skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
  538. if (unlikely(!skb))
  539. return NULL;
  540. skb_reserve(skb, xdp->data - xdp->data_hard_start);
  541. __skb_put(skb, xdp->data_end - xdp->data);
  542. metalen = xdp->data - xdp->data_meta;
  543. if (metalen > 0)
  544. skb_metadata_set(skb, metalen);
  545. if (rxq->mem.type == MEM_TYPE_PAGE_POOL)
  546. skb_mark_for_recycle(skb);
  547. skb_record_rx_queue(skb, rxq->queue_index);
  548. if (unlikely(nr_frags)) {
  549. u32 tsize;
  550. tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
  551. xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size,
  552. tsize, xdp_buff_get_skb_flags(xdp));
  553. }
  554. skb->protocol = eth_type_trans(skb, rxq->dev);
  555. return skb;
  556. }
  557. EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff);
  558. /**
  559. * xdp_copy_frags_from_zc - copy frags from XSk buff to skb
  560. * @skb: skb to copy frags to
  561. * @xdp: XSk &xdp_buff from which the frags will be copied
  562. * @pp: &page_pool backing page allocation, if available
  563. *
  564. * Copy all frags from XSk &xdp_buff to the skb to pass it up the stack.
  565. * Allocate a new buffer for each frag, copy it and attach to the skb.
  566. *
  567. * Return: true on success, false on netmem allocation fail.
  568. */
  569. static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb,
  570. const struct xdp_buff *xdp,
  571. struct page_pool *pp)
  572. {
  573. struct skb_shared_info *sinfo = skb_shinfo(skb);
  574. const struct skb_shared_info *xinfo;
  575. u32 nr_frags, tsize = 0;
  576. u32 flags = 0;
  577. xinfo = xdp_get_shared_info_from_buff(xdp);
  578. nr_frags = xinfo->nr_frags;
  579. for (u32 i = 0; i < nr_frags; i++) {
  580. const skb_frag_t *frag = &xinfo->frags[i];
  581. u32 len = skb_frag_size(frag);
  582. u32 offset, truesize = len;
  583. struct page *page;
  584. page = page_pool_dev_alloc(pp, &offset, &truesize);
  585. if (unlikely(!page)) {
  586. sinfo->nr_frags = i;
  587. return false;
  588. }
  589. memcpy(page_address(page) + offset, skb_frag_address(frag),
  590. LARGEST_ALIGN(len));
  591. __skb_fill_page_desc_noacc(sinfo, i, page, offset, len);
  592. tsize += truesize;
  593. if (page_is_pfmemalloc(page))
  594. flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC;
  595. }
  596. xdp_update_skb_frags_info(skb, nr_frags, xinfo->xdp_frags_size, tsize,
  597. flags);
  598. return true;
  599. }
  600. /**
  601. * xdp_build_skb_from_zc - create an skb from XSk &xdp_buff
  602. * @xdp: source XSk buff
  603. *
  604. * Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb
  605. * head, new buffer for the head, copy the data and initialize the skb fields.
  606. * If there are frags, allocate new buffers for them and copy.
  607. * Buffers are allocated from the system percpu pools to try recycling them.
  608. * If new skb was built successfully, @xdp is returned to XSk pool's freelist.
  609. * On error, it remains untouched and the caller must take care of this.
  610. *
  611. * Return: new &sk_buff on success, %NULL on error.
  612. */
  613. struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp)
  614. {
  615. const struct xdp_rxq_info *rxq = xdp->rxq;
  616. u32 len = xdp->data_end - xdp->data_meta;
  617. u32 truesize = xdp->frame_sz;
  618. struct sk_buff *skb = NULL;
  619. struct page_pool *pp;
  620. int metalen;
  621. void *data;
  622. if (!IS_ENABLED(CONFIG_PAGE_POOL))
  623. return NULL;
  624. local_lock_nested_bh(&system_page_pool.bh_lock);
  625. pp = this_cpu_read(system_page_pool.pool);
  626. data = page_pool_dev_alloc_va(pp, &truesize);
  627. if (unlikely(!data))
  628. goto out;
  629. skb = napi_build_skb(data, truesize);
  630. if (unlikely(!skb)) {
  631. page_pool_free_va(pp, data, true);
  632. goto out;
  633. }
  634. skb_mark_for_recycle(skb);
  635. skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
  636. memcpy(__skb_put(skb, len), xdp->data_meta, LARGEST_ALIGN(len));
  637. metalen = xdp->data - xdp->data_meta;
  638. if (metalen > 0) {
  639. skb_metadata_set(skb, metalen);
  640. __skb_pull(skb, metalen);
  641. }
  642. skb_record_rx_queue(skb, rxq->queue_index);
  643. if (unlikely(xdp_buff_has_frags(xdp)) &&
  644. unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) {
  645. napi_consume_skb(skb, true);
  646. skb = NULL;
  647. goto out;
  648. }
  649. xsk_buff_free(xdp);
  650. skb->protocol = eth_type_trans(skb, rxq->dev);
  651. out:
  652. local_unlock_nested_bh(&system_page_pool.bh_lock);
  653. return skb;
  654. }
  655. EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc);
  656. struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
  657. struct sk_buff *skb,
  658. struct net_device *dev)
  659. {
  660. struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
  661. unsigned int headroom, frame_size;
  662. void *hard_start;
  663. u8 nr_frags;
  664. /* xdp frags frame */
  665. if (unlikely(xdp_frame_has_frags(xdpf)))
  666. nr_frags = sinfo->nr_frags;
  667. /* Part of headroom was reserved to xdpf */
  668. headroom = sizeof(*xdpf) + xdpf->headroom;
  669. /* Memory size backing xdp_frame data already have reserved
  670. * room for build_skb to place skb_shared_info in tailroom.
  671. */
  672. frame_size = xdpf->frame_sz;
  673. hard_start = xdpf->data - headroom;
  674. skb = build_skb_around(skb, hard_start, frame_size);
  675. if (unlikely(!skb))
  676. return NULL;
  677. skb_reserve(skb, headroom);
  678. __skb_put(skb, xdpf->len);
  679. if (xdpf->metasize)
  680. skb_metadata_set(skb, xdpf->metasize);
  681. if (unlikely(xdp_frame_has_frags(xdpf)))
  682. xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size,
  683. nr_frags * xdpf->frame_sz,
  684. xdp_frame_get_skb_flags(xdpf));
  685. /* Essential SKB info: protocol and skb->dev */
  686. skb->protocol = eth_type_trans(skb, dev);
  687. /* Optional SKB info, currently missing:
  688. * - HW checksum info (skb->ip_summed)
  689. * - HW RX hash (skb_set_hash)
  690. * - RX ring dev queue index (skb_record_rx_queue)
  691. */
  692. if (xdpf->mem_type == MEM_TYPE_PAGE_POOL)
  693. skb_mark_for_recycle(skb);
  694. /* Allow SKB to reuse area used by xdp_frame */
  695. xdp_scrub_frame(xdpf);
  696. return skb;
  697. }
  698. EXPORT_SYMBOL_GPL(__xdp_build_skb_from_frame);
  699. struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
  700. struct net_device *dev)
  701. {
  702. struct sk_buff *skb;
  703. skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC);
  704. if (unlikely(!skb))
  705. return NULL;
  706. memset(skb, 0, offsetof(struct sk_buff, tail));
  707. return __xdp_build_skb_from_frame(xdpf, skb, dev);
  708. }
  709. EXPORT_SYMBOL_GPL(xdp_build_skb_from_frame);
  710. struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
  711. {
  712. unsigned int headroom, totalsize;
  713. struct xdp_frame *nxdpf;
  714. struct page *page;
  715. void *addr;
  716. headroom = xdpf->headroom + sizeof(*xdpf);
  717. totalsize = headroom + xdpf->len;
  718. if (unlikely(totalsize > PAGE_SIZE))
  719. return NULL;
  720. page = dev_alloc_page();
  721. if (!page)
  722. return NULL;
  723. addr = page_to_virt(page);
  724. memcpy(addr, xdpf, totalsize);
  725. nxdpf = addr;
  726. nxdpf->data = addr + headroom;
  727. nxdpf->frame_sz = PAGE_SIZE;
  728. nxdpf->mem_type = MEM_TYPE_PAGE_ORDER0;
  729. return nxdpf;
  730. }
  731. __bpf_kfunc_start_defs();
  732. /**
  733. * bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp.
  734. * @ctx: XDP context pointer.
  735. * @timestamp: Return value pointer.
  736. *
  737. * Return:
  738. * * Returns 0 on success or ``-errno`` on error.
  739. * * ``-EOPNOTSUPP`` : means device driver does not implement kfunc
  740. * * ``-ENODATA`` : means no RX-timestamp available for this frame
  741. */
  742. __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
  743. {
  744. return -EOPNOTSUPP;
  745. }
  746. /**
  747. * bpf_xdp_metadata_rx_hash - Read XDP frame RX hash.
  748. * @ctx: XDP context pointer.
  749. * @hash: Return value pointer.
  750. * @rss_type: Return value pointer for RSS type.
  751. *
  752. * The RSS hash type (@rss_type) specifies what portion of packet headers NIC
  753. * hardware used when calculating RSS hash value. The RSS type can be decoded
  754. * via &enum xdp_rss_hash_type either matching on individual L3/L4 bits
  755. * ``XDP_RSS_L*`` or by combined traditional *RSS Hashing Types*
  756. * ``XDP_RSS_TYPE_L*``.
  757. *
  758. * Return:
  759. * * Returns 0 on success or ``-errno`` on error.
  760. * * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc
  761. * * ``-ENODATA`` : means no RX-hash available for this frame
  762. */
  763. __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
  764. enum xdp_rss_hash_type *rss_type)
  765. {
  766. return -EOPNOTSUPP;
  767. }
  768. /**
  769. * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag
  770. * @ctx: XDP context pointer.
  771. * @vlan_proto: Destination pointer for VLAN Tag protocol identifier (TPID).
  772. * @vlan_tci: Destination pointer for VLAN TCI (VID + DEI + PCP)
  773. *
  774. * In case of success, ``vlan_proto`` contains *Tag protocol identifier (TPID)*,
  775. * usually ``ETH_P_8021Q`` or ``ETH_P_8021AD``, but some networks can use
  776. * custom TPIDs. ``vlan_proto`` is stored in **network byte order (BE)**
  777. * and should be used as follows:
  778. * ``if (vlan_proto == bpf_htons(ETH_P_8021Q)) do_something();``
  779. *
  780. * ``vlan_tci`` contains the remaining 16 bits of a VLAN tag.
  781. * Driver is expected to provide those in **host byte order (usually LE)**,
  782. * so the bpf program should not perform byte conversion.
  783. * According to 802.1Q standard, *VLAN TCI (Tag control information)*
  784. * is a bit field that contains:
  785. * *VLAN identifier (VID)* that can be read with ``vlan_tci & 0xfff``,
  786. * *Drop eligible indicator (DEI)* - 1 bit,
  787. * *Priority code point (PCP)* - 3 bits.
  788. * For detailed meaning of DEI and PCP, please refer to other sources.
  789. *
  790. * Return:
  791. * * Returns 0 on success or ``-errno`` on error.
  792. * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
  793. * * ``-ENODATA`` : VLAN tag was not stripped or is not available
  794. */
  795. __bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
  796. __be16 *vlan_proto, u16 *vlan_tci)
  797. {
  798. return -EOPNOTSUPP;
  799. }
  800. __bpf_kfunc_end_defs();
  801. BTF_KFUNCS_START(xdp_metadata_kfunc_ids)
  802. #define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name)
  803. XDP_METADATA_KFUNC_xxx
  804. #undef XDP_METADATA_KFUNC
  805. BTF_KFUNCS_END(xdp_metadata_kfunc_ids)
  806. static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
  807. .owner = THIS_MODULE,
  808. .set = &xdp_metadata_kfunc_ids,
  809. };
  810. BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
  811. #define XDP_METADATA_KFUNC(name, _, str, __) BTF_ID(func, str)
  812. XDP_METADATA_KFUNC_xxx
  813. #undef XDP_METADATA_KFUNC
  814. u32 bpf_xdp_metadata_kfunc_id(int id)
  815. {
  816. /* xdp_metadata_kfunc_ids is sorted and can't be used */
  817. return xdp_metadata_kfunc_ids_unsorted[id];
  818. }
  819. bool bpf_dev_bound_kfunc_id(u32 btf_id)
  820. {
  821. return btf_id_set8_contains(&xdp_metadata_kfunc_ids, btf_id);
  822. }
  823. static int __init xdp_metadata_init(void)
  824. {
  825. return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &xdp_metadata_kfunc_set);
  826. }
  827. late_initcall(xdp_metadata_init);
  828. void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val)
  829. {
  830. val &= NETDEV_XDP_ACT_MASK;
  831. if (dev->xdp_features == val)
  832. return;
  833. netdev_assert_locked_or_invisible(dev);
  834. dev->xdp_features = val;
  835. if (dev->reg_state == NETREG_REGISTERED)
  836. call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
  837. }
  838. EXPORT_SYMBOL_GPL(xdp_set_features_flag_locked);
  839. void xdp_set_features_flag(struct net_device *dev, xdp_features_t val)
  840. {
  841. netdev_lock(dev);
  842. xdp_set_features_flag_locked(dev, val);
  843. netdev_unlock(dev);
  844. }
  845. EXPORT_SYMBOL_GPL(xdp_set_features_flag);
  846. void xdp_features_set_redirect_target_locked(struct net_device *dev,
  847. bool support_sg)
  848. {
  849. xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT);
  850. if (support_sg)
  851. val |= NETDEV_XDP_ACT_NDO_XMIT_SG;
  852. xdp_set_features_flag_locked(dev, val);
  853. }
  854. EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target_locked);
  855. void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
  856. {
  857. netdev_lock(dev);
  858. xdp_features_set_redirect_target_locked(dev, support_sg);
  859. netdev_unlock(dev);
  860. }
  861. EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target);
  862. void xdp_features_clear_redirect_target_locked(struct net_device *dev)
  863. {
  864. xdp_features_t val = dev->xdp_features;
  865. val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG);
  866. xdp_set_features_flag_locked(dev, val);
  867. }
  868. EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target_locked);
  869. void xdp_features_clear_redirect_target(struct net_device *dev)
  870. {
  871. netdev_lock(dev);
  872. xdp_features_clear_redirect_target_locked(dev);
  873. netdev_unlock(dev);
  874. }
  875. EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target);