page_pool.c 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354
  1. /* SPDX-License-Identifier: GPL-2.0
  2. *
  3. * page_pool.c
  4. * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com>
  5. * Copyright (C) 2016 Red Hat, Inc.
  6. */
  7. #include <linux/error-injection.h>
  8. #include <linux/types.h>
  9. #include <linux/kernel.h>
  10. #include <linux/slab.h>
  11. #include <linux/device.h>
  12. #include <net/netdev_lock.h>
  13. #include <net/netdev_rx_queue.h>
  14. #include <net/page_pool/helpers.h>
  15. #include <net/page_pool/memory_provider.h>
  16. #include <net/xdp.h>
  17. #include <linux/dma-direction.h>
  18. #include <linux/dma-mapping.h>
  19. #include <linux/page-flags.h>
  20. #include <linux/mm.h> /* for put_page() */
  21. #include <linux/poison.h>
  22. #include <linux/ethtool.h>
  23. #include <linux/netdevice.h>
  24. #include <trace/events/page_pool.h>
  25. #include "dev.h"
  26. #include "mp_dmabuf_devmem.h"
  27. #include "netmem_priv.h"
  28. #include "page_pool_priv.h"
  29. DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
  30. #define DEFER_TIME (msecs_to_jiffies(1000))
  31. #define DEFER_WARN_INTERVAL (60 * HZ)
  32. #define BIAS_MAX (LONG_MAX >> 1)
  33. #ifdef CONFIG_PAGE_POOL_STATS
  34. static DEFINE_PER_CPU(struct page_pool_recycle_stats, pp_system_recycle_stats);
  35. /* alloc_stat_inc is intended to be used in softirq context */
  36. #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++)
  37. /* recycle_stat_inc is safe to use when preemption is possible. */
  38. #define recycle_stat_inc(pool, __stat) \
  39. do { \
  40. struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \
  41. this_cpu_inc(s->__stat); \
  42. } while (0)
  43. #define recycle_stat_add(pool, __stat, val) \
  44. do { \
  45. struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \
  46. this_cpu_add(s->__stat, val); \
  47. } while (0)
  48. static const char pp_stats[][ETH_GSTRING_LEN] = {
  49. "rx_pp_alloc_fast",
  50. "rx_pp_alloc_slow",
  51. "rx_pp_alloc_slow_ho",
  52. "rx_pp_alloc_empty",
  53. "rx_pp_alloc_refill",
  54. "rx_pp_alloc_waive",
  55. "rx_pp_recycle_cached",
  56. "rx_pp_recycle_cache_full",
  57. "rx_pp_recycle_ring",
  58. "rx_pp_recycle_ring_full",
  59. "rx_pp_recycle_released_ref",
  60. };
  61. /**
  62. * page_pool_get_stats() - fetch page pool stats
  63. * @pool: pool from which page was allocated
  64. * @stats: struct page_pool_stats to fill in
  65. *
  66. * Retrieve statistics about the page_pool. This API is only available
  67. * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``.
  68. * A pointer to a caller allocated struct page_pool_stats structure
  69. * is passed to this API which is filled in. The caller can then report
  70. * those stats to the user (perhaps via ethtool, debugfs, etc.).
  71. */
  72. bool page_pool_get_stats(const struct page_pool *pool,
  73. struct page_pool_stats *stats)
  74. {
  75. int cpu = 0;
  76. if (!stats)
  77. return false;
  78. /* The caller is responsible to initialize stats. */
  79. stats->alloc_stats.fast += pool->alloc_stats.fast;
  80. stats->alloc_stats.slow += pool->alloc_stats.slow;
  81. stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order;
  82. stats->alloc_stats.empty += pool->alloc_stats.empty;
  83. stats->alloc_stats.refill += pool->alloc_stats.refill;
  84. stats->alloc_stats.waive += pool->alloc_stats.waive;
  85. for_each_possible_cpu(cpu) {
  86. const struct page_pool_recycle_stats *pcpu =
  87. per_cpu_ptr(pool->recycle_stats, cpu);
  88. stats->recycle_stats.cached += pcpu->cached;
  89. stats->recycle_stats.cache_full += pcpu->cache_full;
  90. stats->recycle_stats.ring += pcpu->ring;
  91. stats->recycle_stats.ring_full += pcpu->ring_full;
  92. stats->recycle_stats.released_refcnt += pcpu->released_refcnt;
  93. }
  94. return true;
  95. }
  96. EXPORT_SYMBOL(page_pool_get_stats);
  97. u8 *page_pool_ethtool_stats_get_strings(u8 *data)
  98. {
  99. int i;
  100. for (i = 0; i < ARRAY_SIZE(pp_stats); i++) {
  101. memcpy(data, pp_stats[i], ETH_GSTRING_LEN);
  102. data += ETH_GSTRING_LEN;
  103. }
  104. return data;
  105. }
  106. EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings);
  107. int page_pool_ethtool_stats_get_count(void)
  108. {
  109. return ARRAY_SIZE(pp_stats);
  110. }
  111. EXPORT_SYMBOL(page_pool_ethtool_stats_get_count);
  112. u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
  113. {
  114. const struct page_pool_stats *pool_stats = stats;
  115. *data++ = pool_stats->alloc_stats.fast;
  116. *data++ = pool_stats->alloc_stats.slow;
  117. *data++ = pool_stats->alloc_stats.slow_high_order;
  118. *data++ = pool_stats->alloc_stats.empty;
  119. *data++ = pool_stats->alloc_stats.refill;
  120. *data++ = pool_stats->alloc_stats.waive;
  121. *data++ = pool_stats->recycle_stats.cached;
  122. *data++ = pool_stats->recycle_stats.cache_full;
  123. *data++ = pool_stats->recycle_stats.ring;
  124. *data++ = pool_stats->recycle_stats.ring_full;
  125. *data++ = pool_stats->recycle_stats.released_refcnt;
  126. return data;
  127. }
  128. EXPORT_SYMBOL(page_pool_ethtool_stats_get);
  129. #else
  130. #define alloc_stat_inc(...) do { } while (0)
  131. #define recycle_stat_inc(...) do { } while (0)
  132. #define recycle_stat_add(...) do { } while (0)
  133. #endif
  134. static bool page_pool_producer_lock(struct page_pool *pool)
  135. __acquires(&pool->ring.producer_lock)
  136. {
  137. bool in_softirq = in_softirq();
  138. if (in_softirq)
  139. spin_lock(&pool->ring.producer_lock);
  140. else
  141. spin_lock_bh(&pool->ring.producer_lock);
  142. return in_softirq;
  143. }
  144. static void page_pool_producer_unlock(struct page_pool *pool,
  145. bool in_softirq)
  146. __releases(&pool->ring.producer_lock)
  147. {
  148. if (in_softirq)
  149. spin_unlock(&pool->ring.producer_lock);
  150. else
  151. spin_unlock_bh(&pool->ring.producer_lock);
  152. }
  153. static void page_pool_struct_check(void)
  154. {
  155. CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users);
  156. CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page);
  157. CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset);
  158. CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag,
  159. PAGE_POOL_FRAG_GROUP_ALIGN);
  160. }
  161. static int page_pool_init(struct page_pool *pool,
  162. const struct page_pool_params *params,
  163. int cpuid)
  164. {
  165. unsigned int ring_qsize = 1024; /* Default */
  166. struct netdev_rx_queue *rxq;
  167. int err;
  168. page_pool_struct_check();
  169. memcpy(&pool->p, &params->fast, sizeof(pool->p));
  170. memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
  171. pool->cpuid = cpuid;
  172. pool->dma_sync_for_cpu = true;
  173. /* Validate only known flags were used */
  174. if (pool->slow.flags & ~PP_FLAG_ALL)
  175. return -EINVAL;
  176. if (pool->p.pool_size)
  177. ring_qsize = min(pool->p.pool_size, 16384);
  178. /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL.
  179. * DMA_BIDIRECTIONAL is for allowing page used for DMA sending,
  180. * which is the XDP_TX use-case.
  181. */
  182. if (pool->slow.flags & PP_FLAG_DMA_MAP) {
  183. if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
  184. (pool->p.dma_dir != DMA_BIDIRECTIONAL))
  185. return -EINVAL;
  186. pool->dma_map = true;
  187. }
  188. if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) {
  189. /* In order to request DMA-sync-for-device the page
  190. * needs to be mapped
  191. */
  192. if (!(pool->slow.flags & PP_FLAG_DMA_MAP))
  193. return -EINVAL;
  194. if (!pool->p.max_len)
  195. return -EINVAL;
  196. pool->dma_sync = true;
  197. /* pool->p.offset has to be set according to the address
  198. * offset used by the DMA engine to start copying rx data
  199. */
  200. }
  201. pool->has_init_callback = !!pool->slow.init_callback;
  202. #ifdef CONFIG_PAGE_POOL_STATS
  203. if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) {
  204. pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
  205. if (!pool->recycle_stats)
  206. return -ENOMEM;
  207. } else {
  208. /* For system page pool instance we use a singular stats object
  209. * instead of allocating a separate percpu variable for each
  210. * (also percpu) page pool instance.
  211. */
  212. pool->recycle_stats = &pp_system_recycle_stats;
  213. pool->system = true;
  214. }
  215. #endif
  216. if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) {
  217. #ifdef CONFIG_PAGE_POOL_STATS
  218. if (!pool->system)
  219. free_percpu(pool->recycle_stats);
  220. #endif
  221. return -ENOMEM;
  222. }
  223. atomic_set(&pool->pages_state_release_cnt, 0);
  224. /* Driver calling page_pool_create() also call page_pool_destroy() */
  225. refcount_set(&pool->user_cnt, 1);
  226. xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1);
  227. if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) {
  228. netdev_assert_locked(pool->slow.netdev);
  229. rxq = __netif_get_rx_queue(pool->slow.netdev,
  230. pool->slow.queue_idx);
  231. pool->mp_priv = rxq->mp_params.mp_priv;
  232. pool->mp_ops = rxq->mp_params.mp_ops;
  233. }
  234. if (pool->mp_ops) {
  235. if (!pool->dma_map || !pool->dma_sync) {
  236. err = -EOPNOTSUPP;
  237. goto free_ptr_ring;
  238. }
  239. if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) {
  240. err = -EFAULT;
  241. goto free_ptr_ring;
  242. }
  243. err = pool->mp_ops->init(pool);
  244. if (err) {
  245. pr_warn("%s() mem-provider init failed %d\n", __func__,
  246. err);
  247. goto free_ptr_ring;
  248. }
  249. static_branch_inc(&page_pool_mem_providers);
  250. } else if (pool->p.order > MAX_PAGE_ORDER) {
  251. err = -EINVAL;
  252. goto free_ptr_ring;
  253. }
  254. return 0;
  255. free_ptr_ring:
  256. ptr_ring_cleanup(&pool->ring, NULL);
  257. xa_destroy(&pool->dma_mapped);
  258. #ifdef CONFIG_PAGE_POOL_STATS
  259. if (!pool->system)
  260. free_percpu(pool->recycle_stats);
  261. #endif
  262. return err;
  263. }
  264. static void page_pool_uninit(struct page_pool *pool)
  265. {
  266. ptr_ring_cleanup(&pool->ring, NULL);
  267. xa_destroy(&pool->dma_mapped);
  268. #ifdef CONFIG_PAGE_POOL_STATS
  269. if (!pool->system)
  270. free_percpu(pool->recycle_stats);
  271. #endif
  272. }
  273. /**
  274. * page_pool_create_percpu() - create a page pool for a given cpu.
  275. * @params: parameters, see struct page_pool_params
  276. * @cpuid: cpu identifier
  277. */
  278. struct page_pool *
  279. page_pool_create_percpu(const struct page_pool_params *params, int cpuid)
  280. {
  281. struct page_pool *pool;
  282. int err;
  283. pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid);
  284. if (!pool)
  285. return ERR_PTR(-ENOMEM);
  286. err = page_pool_init(pool, params, cpuid);
  287. if (err < 0)
  288. goto err_free;
  289. err = page_pool_list(pool);
  290. if (err)
  291. goto err_uninit;
  292. return pool;
  293. err_uninit:
  294. page_pool_uninit(pool);
  295. err_free:
  296. pr_warn("%s() gave up with errno %d\n", __func__, err);
  297. kfree(pool);
  298. return ERR_PTR(err);
  299. }
  300. EXPORT_SYMBOL(page_pool_create_percpu);
  301. /**
  302. * page_pool_create() - create a page pool
  303. * @params: parameters, see struct page_pool_params
  304. */
  305. struct page_pool *page_pool_create(const struct page_pool_params *params)
  306. {
  307. return page_pool_create_percpu(params, -1);
  308. }
  309. EXPORT_SYMBOL(page_pool_create);
  310. static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem);
  311. static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
  312. {
  313. struct ptr_ring *r = &pool->ring;
  314. netmem_ref netmem;
  315. int pref_nid; /* preferred NUMA node */
  316. /* Quicker fallback, avoid locks when ring is empty */
  317. if (__ptr_ring_empty(r)) {
  318. alloc_stat_inc(pool, empty);
  319. return 0;
  320. }
  321. /* Softirq guarantee CPU and thus NUMA node is stable. This,
  322. * assumes CPU refilling driver RX-ring will also run RX-NAPI.
  323. */
  324. #ifdef CONFIG_NUMA
  325. pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
  326. #else
  327. /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
  328. pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
  329. #endif
  330. /* Refill alloc array, but only if NUMA match */
  331. do {
  332. netmem = (__force netmem_ref)__ptr_ring_consume(r);
  333. if (unlikely(!netmem))
  334. break;
  335. if (likely(netmem_is_pref_nid(netmem, pref_nid))) {
  336. pool->alloc.cache[pool->alloc.count++] = netmem;
  337. } else {
  338. /* NUMA mismatch;
  339. * (1) release 1 page to page-allocator and
  340. * (2) break out to fallthrough to alloc_pages_node.
  341. * This limit stress on page buddy alloactor.
  342. */
  343. page_pool_return_netmem(pool, netmem);
  344. alloc_stat_inc(pool, waive);
  345. netmem = 0;
  346. break;
  347. }
  348. } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
  349. /* Return last page */
  350. if (likely(pool->alloc.count > 0)) {
  351. netmem = pool->alloc.cache[--pool->alloc.count];
  352. alloc_stat_inc(pool, refill);
  353. }
  354. return netmem;
  355. }
  356. /* fast path */
  357. static netmem_ref __page_pool_get_cached(struct page_pool *pool)
  358. {
  359. netmem_ref netmem;
  360. /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
  361. if (likely(pool->alloc.count)) {
  362. /* Fast-path */
  363. netmem = pool->alloc.cache[--pool->alloc.count];
  364. alloc_stat_inc(pool, fast);
  365. } else {
  366. netmem = page_pool_refill_alloc_cache(pool);
  367. }
  368. return netmem;
  369. }
  370. static void __page_pool_dma_sync_for_device(const struct page_pool *pool,
  371. netmem_ref netmem,
  372. u32 dma_sync_size)
  373. {
  374. #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
  375. dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem);
  376. dma_sync_size = min(dma_sync_size, pool->p.max_len);
  377. __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
  378. dma_sync_size, pool->p.dma_dir);
  379. #endif
  380. }
  381. static __always_inline void
  382. page_pool_dma_sync_for_device(const struct page_pool *pool,
  383. netmem_ref netmem,
  384. u32 dma_sync_size)
  385. {
  386. if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) {
  387. rcu_read_lock();
  388. /* re-check under rcu_read_lock() to sync with page_pool_scrub() */
  389. if (pool->dma_sync)
  390. __page_pool_dma_sync_for_device(pool, netmem,
  391. dma_sync_size);
  392. rcu_read_unlock();
  393. }
  394. }
  395. static int page_pool_register_dma_index(struct page_pool *pool,
  396. netmem_ref netmem, gfp_t gfp)
  397. {
  398. int err = 0;
  399. u32 id;
  400. if (unlikely(!PP_DMA_INDEX_BITS))
  401. goto out;
  402. if (in_softirq())
  403. err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem),
  404. PP_DMA_INDEX_LIMIT, gfp);
  405. else
  406. err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem),
  407. PP_DMA_INDEX_LIMIT, gfp);
  408. if (err) {
  409. WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@");
  410. goto out;
  411. }
  412. netmem_set_dma_index(netmem, id);
  413. out:
  414. return err;
  415. }
  416. static int page_pool_release_dma_index(struct page_pool *pool,
  417. netmem_ref netmem)
  418. {
  419. struct page *old, *page = netmem_to_page(netmem);
  420. unsigned long id;
  421. if (unlikely(!PP_DMA_INDEX_BITS))
  422. return 0;
  423. id = netmem_get_dma_index(netmem);
  424. if (!id)
  425. return -1;
  426. if (in_softirq())
  427. old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0);
  428. else
  429. old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0);
  430. if (old != page)
  431. return -1;
  432. netmem_set_dma_index(netmem, 0);
  433. return 0;
  434. }
  435. static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp)
  436. {
  437. dma_addr_t dma;
  438. int err;
  439. /* Setup DMA mapping: use 'struct page' area for storing DMA-addr
  440. * since dma_addr_t can be either 32 or 64 bits and does not always fit
  441. * into page private data (i.e 32bit cpu with 64bit DMA caps)
  442. * This mapping is kept for lifetime of page, until leaving pool.
  443. */
  444. dma = dma_map_page_attrs(pool->p.dev, netmem_to_page(netmem), 0,
  445. (PAGE_SIZE << pool->p.order), pool->p.dma_dir,
  446. DMA_ATTR_SKIP_CPU_SYNC |
  447. DMA_ATTR_WEAK_ORDERING);
  448. if (dma_mapping_error(pool->p.dev, dma))
  449. return false;
  450. if (page_pool_set_dma_addr_netmem(netmem, dma)) {
  451. WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
  452. goto unmap_failed;
  453. }
  454. err = page_pool_register_dma_index(pool, netmem, gfp);
  455. if (err)
  456. goto unset_failed;
  457. page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
  458. return true;
  459. unset_failed:
  460. page_pool_set_dma_addr_netmem(netmem, 0);
  461. unmap_failed:
  462. dma_unmap_page_attrs(pool->p.dev, dma,
  463. PAGE_SIZE << pool->p.order, pool->p.dma_dir,
  464. DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
  465. return false;
  466. }
  467. static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
  468. gfp_t gfp)
  469. {
  470. struct page *page;
  471. gfp |= __GFP_COMP;
  472. page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
  473. if (unlikely(!page))
  474. return NULL;
  475. if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) {
  476. put_page(page);
  477. return NULL;
  478. }
  479. alloc_stat_inc(pool, slow_high_order);
  480. page_pool_set_pp_info(pool, page_to_netmem(page));
  481. /* Track how many pages are held 'in-flight' */
  482. pool->pages_state_hold_cnt++;
  483. trace_page_pool_state_hold(pool, page_to_netmem(page),
  484. pool->pages_state_hold_cnt);
  485. return page;
  486. }
  487. /* slow path */
  488. static noinline netmem_ref __page_pool_alloc_netmems_slow(struct page_pool *pool,
  489. gfp_t gfp)
  490. {
  491. const int bulk = PP_ALLOC_CACHE_REFILL;
  492. unsigned int pp_order = pool->p.order;
  493. bool dma_map = pool->dma_map;
  494. netmem_ref netmem;
  495. int i, nr_pages;
  496. /* Unconditionally set NOWARN if allocating from NAPI.
  497. * Drivers forget to set it, and OOM reports on packet Rx are useless.
  498. */
  499. if ((gfp & GFP_ATOMIC) == GFP_ATOMIC)
  500. gfp |= __GFP_NOWARN;
  501. /* Don't support bulk alloc for high-order pages */
  502. if (unlikely(pp_order))
  503. return page_to_netmem(__page_pool_alloc_page_order(pool, gfp));
  504. /* Unnecessary as alloc cache is empty, but guarantees zero count */
  505. if (unlikely(pool->alloc.count > 0))
  506. return pool->alloc.cache[--pool->alloc.count];
  507. /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk */
  508. memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
  509. nr_pages = alloc_pages_bulk_node(gfp, pool->p.nid, bulk,
  510. (struct page **)pool->alloc.cache);
  511. if (unlikely(!nr_pages))
  512. return 0;
  513. /* Pages have been filled into alloc.cache array, but count is zero and
  514. * page element have not been (possibly) DMA mapped.
  515. */
  516. for (i = 0; i < nr_pages; i++) {
  517. netmem = pool->alloc.cache[i];
  518. if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) {
  519. put_page(netmem_to_page(netmem));
  520. continue;
  521. }
  522. page_pool_set_pp_info(pool, netmem);
  523. pool->alloc.cache[pool->alloc.count++] = netmem;
  524. /* Track how many pages are held 'in-flight' */
  525. pool->pages_state_hold_cnt++;
  526. trace_page_pool_state_hold(pool, netmem,
  527. pool->pages_state_hold_cnt);
  528. }
  529. /* Return last page */
  530. if (likely(pool->alloc.count > 0)) {
  531. netmem = pool->alloc.cache[--pool->alloc.count];
  532. alloc_stat_inc(pool, slow);
  533. } else {
  534. netmem = 0;
  535. }
  536. /* When page just alloc'ed is should/must have refcnt 1. */
  537. return netmem;
  538. }
  539. /* For using page_pool replace: alloc_pages() API calls, but provide
  540. * synchronization guarantee for allocation side.
  541. */
  542. netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp)
  543. {
  544. netmem_ref netmem;
  545. /* Fast-path: Get a page from cache */
  546. netmem = __page_pool_get_cached(pool);
  547. if (netmem)
  548. return netmem;
  549. /* Slow-path: cache empty, do real allocation */
  550. if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
  551. netmem = pool->mp_ops->alloc_netmems(pool, gfp);
  552. else
  553. netmem = __page_pool_alloc_netmems_slow(pool, gfp);
  554. return netmem;
  555. }
  556. EXPORT_SYMBOL(page_pool_alloc_netmems);
  557. ALLOW_ERROR_INJECTION(page_pool_alloc_netmems, NULL);
  558. struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
  559. {
  560. return netmem_to_page(page_pool_alloc_netmems(pool, gfp));
  561. }
  562. EXPORT_SYMBOL(page_pool_alloc_pages);
  563. /* Calculate distance between two u32 values, valid if distance is below 2^(31)
  564. * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution
  565. */
  566. #define _distance(a, b) (s32)((a) - (b))
  567. s32 page_pool_inflight(const struct page_pool *pool, bool strict)
  568. {
  569. u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
  570. u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
  571. s32 inflight;
  572. inflight = _distance(hold_cnt, release_cnt);
  573. if (strict) {
  574. trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
  575. WARN(inflight < 0, "Negative(%d) inflight packet-pages",
  576. inflight);
  577. } else {
  578. inflight = max(0, inflight);
  579. }
  580. return inflight;
  581. }
  582. void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
  583. {
  584. netmem_set_pp(netmem, pool);
  585. netmem_or_pp_magic(netmem, PP_SIGNATURE);
  586. /* Ensuring all pages have been split into one fragment initially:
  587. * page_pool_set_pp_info() is only called once for every page when it
  588. * is allocated from the page allocator and page_pool_fragment_page()
  589. * is dirtying the same cache line as the page->pp_magic above, so
  590. * the overhead is negligible.
  591. */
  592. page_pool_fragment_netmem(netmem, 1);
  593. if (pool->has_init_callback)
  594. pool->slow.init_callback(netmem, pool->slow.init_arg);
  595. }
  596. void page_pool_clear_pp_info(netmem_ref netmem)
  597. {
  598. netmem_clear_pp_magic(netmem);
  599. netmem_set_pp(netmem, NULL);
  600. }
  601. static __always_inline void __page_pool_release_netmem_dma(struct page_pool *pool,
  602. netmem_ref netmem)
  603. {
  604. dma_addr_t dma;
  605. if (!pool->dma_map)
  606. /* Always account for inflight pages, even if we didn't
  607. * map them
  608. */
  609. return;
  610. if (page_pool_release_dma_index(pool, netmem))
  611. return;
  612. dma = page_pool_get_dma_addr_netmem(netmem);
  613. /* When page is unmapped, it cannot be returned to our pool */
  614. dma_unmap_page_attrs(pool->p.dev, dma,
  615. PAGE_SIZE << pool->p.order, pool->p.dma_dir,
  616. DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
  617. page_pool_set_dma_addr_netmem(netmem, 0);
  618. }
  619. /* Disconnects a page (from a page_pool). API users can have a need
  620. * to disconnect a page (from a page_pool), to allow it to be used as
  621. * a regular page (that will eventually be returned to the normal
  622. * page-allocator via put_page).
  623. */
  624. static void page_pool_return_netmem(struct page_pool *pool, netmem_ref netmem)
  625. {
  626. int count;
  627. bool put;
  628. put = true;
  629. if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
  630. put = pool->mp_ops->release_netmem(pool, netmem);
  631. else
  632. __page_pool_release_netmem_dma(pool, netmem);
  633. /* This may be the last page returned, releasing the pool, so
  634. * it is not safe to reference pool afterwards.
  635. */
  636. count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
  637. trace_page_pool_state_release(pool, netmem, count);
  638. if (put) {
  639. page_pool_clear_pp_info(netmem);
  640. put_page(netmem_to_page(netmem));
  641. }
  642. /* An optimization would be to call __free_pages(page, pool->p.order)
  643. * knowing page is not part of page-cache (thus avoiding a
  644. * __page_cache_release() call).
  645. */
  646. }
  647. static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem)
  648. {
  649. bool in_softirq, ret;
  650. /* BH protection not needed if current is softirq */
  651. in_softirq = page_pool_producer_lock(pool);
  652. ret = !__ptr_ring_produce(&pool->ring, (__force void *)netmem);
  653. if (ret)
  654. recycle_stat_inc(pool, ring);
  655. page_pool_producer_unlock(pool, in_softirq);
  656. return ret;
  657. }
  658. /* Only allow direct recycling in special circumstances, into the
  659. * alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case.
  660. *
  661. * Caller must provide appropriate safe context.
  662. */
  663. static bool page_pool_recycle_in_cache(netmem_ref netmem,
  664. struct page_pool *pool)
  665. {
  666. if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
  667. recycle_stat_inc(pool, cache_full);
  668. return false;
  669. }
  670. /* Caller MUST have verified/know (page_ref_count(page) == 1) */
  671. pool->alloc.cache[pool->alloc.count++] = netmem;
  672. recycle_stat_inc(pool, cached);
  673. return true;
  674. }
  675. static bool __page_pool_page_can_be_recycled(netmem_ref netmem)
  676. {
  677. return netmem_is_net_iov(netmem) ||
  678. (page_ref_count(netmem_to_page(netmem)) == 1 &&
  679. !page_is_pfmemalloc(netmem_to_page(netmem)));
  680. }
  681. /* If the page refcnt == 1, this will try to recycle the page.
  682. * If pool->dma_sync is set, we'll try to sync the DMA area for
  683. * the configured size min(dma_sync_size, pool->max_len).
  684. * If the page refcnt != 1, then the page will be returned to memory
  685. * subsystem.
  686. */
  687. static __always_inline netmem_ref
  688. __page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
  689. unsigned int dma_sync_size, bool allow_direct)
  690. {
  691. lockdep_assert_no_hardirq();
  692. /* This allocator is optimized for the XDP mode that uses
  693. * one-frame-per-page, but have fallbacks that act like the
  694. * regular page allocator APIs.
  695. *
  696. * refcnt == 1 means page_pool owns page, and can recycle it.
  697. *
  698. * page is NOT reusable when allocated when system is under
  699. * some pressure. (page_is_pfmemalloc)
  700. */
  701. if (likely(__page_pool_page_can_be_recycled(netmem))) {
  702. /* Read barrier done in page_ref_count / READ_ONCE */
  703. page_pool_dma_sync_for_device(pool, netmem, dma_sync_size);
  704. if (allow_direct && page_pool_recycle_in_cache(netmem, pool))
  705. return 0;
  706. /* Page found as candidate for recycling */
  707. return netmem;
  708. }
  709. /* Fallback/non-XDP mode: API user have elevated refcnt.
  710. *
  711. * Many drivers split up the page into fragments, and some
  712. * want to keep doing this to save memory and do refcnt based
  713. * recycling. Support this use case too, to ease drivers
  714. * switching between XDP/non-XDP.
  715. *
  716. * In-case page_pool maintains the DMA mapping, API user must
  717. * call page_pool_put_page once. In this elevated refcnt
  718. * case, the DMA is unmapped/released, as driver is likely
  719. * doing refcnt based recycle tricks, meaning another process
  720. * will be invoking put_page.
  721. */
  722. recycle_stat_inc(pool, released_refcnt);
  723. page_pool_return_netmem(pool, netmem);
  724. return 0;
  725. }
  726. static bool page_pool_napi_local(const struct page_pool *pool)
  727. {
  728. const struct napi_struct *napi;
  729. u32 cpuid;
  730. /* On PREEMPT_RT the softirq can be preempted by the consumer */
  731. if (IS_ENABLED(CONFIG_PREEMPT_RT))
  732. return false;
  733. if (unlikely(!in_softirq()))
  734. return false;
  735. /* Allow direct recycle if we have reasons to believe that we are
  736. * in the same context as the consumer would run, so there's
  737. * no possible race.
  738. * __page_pool_put_page() makes sure we're not in hardirq context
  739. * and interrupts are enabled prior to accessing the cache.
  740. */
  741. cpuid = smp_processor_id();
  742. if (READ_ONCE(pool->cpuid) == cpuid)
  743. return true;
  744. napi = READ_ONCE(pool->p.napi);
  745. return napi && READ_ONCE(napi->list_owner) == cpuid;
  746. }
  747. void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
  748. unsigned int dma_sync_size, bool allow_direct)
  749. {
  750. if (!allow_direct)
  751. allow_direct = page_pool_napi_local(pool);
  752. netmem = __page_pool_put_page(pool, netmem, dma_sync_size,
  753. allow_direct);
  754. if (netmem && !page_pool_recycle_in_ring(pool, netmem)) {
  755. /* Cache full, fallback to free pages */
  756. recycle_stat_inc(pool, ring_full);
  757. page_pool_return_netmem(pool, netmem);
  758. }
  759. }
  760. EXPORT_SYMBOL(page_pool_put_unrefed_netmem);
  761. void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
  762. unsigned int dma_sync_size, bool allow_direct)
  763. {
  764. page_pool_put_unrefed_netmem(pool, page_to_netmem(page), dma_sync_size,
  765. allow_direct);
  766. }
  767. EXPORT_SYMBOL(page_pool_put_unrefed_page);
  768. static void page_pool_recycle_ring_bulk(struct page_pool *pool,
  769. netmem_ref *bulk,
  770. u32 bulk_len)
  771. {
  772. bool in_softirq;
  773. u32 i;
  774. /* Bulk produce into ptr_ring page_pool cache */
  775. in_softirq = page_pool_producer_lock(pool);
  776. for (i = 0; i < bulk_len; i++) {
  777. if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) {
  778. /* ring full */
  779. recycle_stat_inc(pool, ring_full);
  780. break;
  781. }
  782. }
  783. page_pool_producer_unlock(pool, in_softirq);
  784. recycle_stat_add(pool, ring, i);
  785. /* Hopefully all pages were returned into ptr_ring */
  786. if (likely(i == bulk_len))
  787. return;
  788. /*
  789. * ptr_ring cache is full, free remaining pages outside producer lock
  790. * since put_page() with refcnt == 1 can be an expensive operation.
  791. */
  792. for (; i < bulk_len; i++)
  793. page_pool_return_netmem(pool, bulk[i]);
  794. }
  795. /**
  796. * page_pool_put_netmem_bulk() - release references on multiple netmems
  797. * @data: array holding netmem references
  798. * @count: number of entries in @data
  799. *
  800. * Tries to refill a number of netmems into the ptr_ring cache holding ptr_ring
  801. * producer lock. If the ptr_ring is full, page_pool_put_netmem_bulk()
  802. * will release leftover netmems to the memory provider.
  803. * page_pool_put_netmem_bulk() is suitable to be run inside the driver NAPI tx
  804. * completion loop for the XDP_REDIRECT use case.
  805. *
  806. * Please note the caller must not use data area after running
  807. * page_pool_put_netmem_bulk(), as this function overwrites it.
  808. */
  809. void page_pool_put_netmem_bulk(netmem_ref *data, u32 count)
  810. {
  811. u32 bulk_len = 0;
  812. for (u32 i = 0; i < count; i++) {
  813. netmem_ref netmem = netmem_compound_head(data[i]);
  814. if (page_pool_unref_and_test(netmem))
  815. data[bulk_len++] = netmem;
  816. }
  817. count = bulk_len;
  818. while (count) {
  819. netmem_ref bulk[XDP_BULK_QUEUE_SIZE];
  820. struct page_pool *pool = NULL;
  821. bool allow_direct;
  822. u32 foreign = 0;
  823. bulk_len = 0;
  824. for (u32 i = 0; i < count; i++) {
  825. struct page_pool *netmem_pp;
  826. netmem_ref netmem = data[i];
  827. netmem_pp = netmem_get_pp(netmem);
  828. if (unlikely(!pool)) {
  829. pool = netmem_pp;
  830. allow_direct = page_pool_napi_local(pool);
  831. } else if (netmem_pp != pool) {
  832. /*
  833. * If the netmem belongs to a different
  834. * page_pool, save it for another round.
  835. */
  836. data[foreign++] = netmem;
  837. continue;
  838. }
  839. netmem = __page_pool_put_page(pool, netmem, -1,
  840. allow_direct);
  841. /* Approved for bulk recycling in ptr_ring cache */
  842. if (netmem)
  843. bulk[bulk_len++] = netmem;
  844. }
  845. if (bulk_len)
  846. page_pool_recycle_ring_bulk(pool, bulk, bulk_len);
  847. count = foreign;
  848. }
  849. }
  850. EXPORT_SYMBOL(page_pool_put_netmem_bulk);
  851. static netmem_ref page_pool_drain_frag(struct page_pool *pool,
  852. netmem_ref netmem)
  853. {
  854. long drain_count = BIAS_MAX - pool->frag_users;
  855. /* Some user is still using the page frag */
  856. if (likely(page_pool_unref_netmem(netmem, drain_count)))
  857. return 0;
  858. if (__page_pool_page_can_be_recycled(netmem)) {
  859. page_pool_dma_sync_for_device(pool, netmem, -1);
  860. return netmem;
  861. }
  862. page_pool_return_netmem(pool, netmem);
  863. return 0;
  864. }
  865. static void page_pool_free_frag(struct page_pool *pool)
  866. {
  867. long drain_count = BIAS_MAX - pool->frag_users;
  868. netmem_ref netmem = pool->frag_page;
  869. pool->frag_page = 0;
  870. if (!netmem || page_pool_unref_netmem(netmem, drain_count))
  871. return;
  872. page_pool_return_netmem(pool, netmem);
  873. }
  874. netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
  875. unsigned int *offset, unsigned int size,
  876. gfp_t gfp)
  877. {
  878. unsigned int max_size = PAGE_SIZE << pool->p.order;
  879. netmem_ref netmem = pool->frag_page;
  880. if (WARN_ON(size > max_size))
  881. return 0;
  882. size = ALIGN(size, dma_get_cache_alignment());
  883. *offset = pool->frag_offset;
  884. if (netmem && *offset + size > max_size) {
  885. netmem = page_pool_drain_frag(pool, netmem);
  886. if (netmem) {
  887. recycle_stat_inc(pool, cached);
  888. alloc_stat_inc(pool, fast);
  889. goto frag_reset;
  890. }
  891. }
  892. if (!netmem) {
  893. netmem = page_pool_alloc_netmems(pool, gfp);
  894. if (unlikely(!netmem)) {
  895. pool->frag_page = 0;
  896. return 0;
  897. }
  898. pool->frag_page = netmem;
  899. frag_reset:
  900. pool->frag_users = 1;
  901. *offset = 0;
  902. pool->frag_offset = size;
  903. page_pool_fragment_netmem(netmem, BIAS_MAX);
  904. return netmem;
  905. }
  906. pool->frag_users++;
  907. pool->frag_offset = *offset + size;
  908. return netmem;
  909. }
  910. EXPORT_SYMBOL(page_pool_alloc_frag_netmem);
  911. struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
  912. unsigned int size, gfp_t gfp)
  913. {
  914. return netmem_to_page(page_pool_alloc_frag_netmem(pool, offset, size,
  915. gfp));
  916. }
  917. EXPORT_SYMBOL(page_pool_alloc_frag);
  918. static void page_pool_empty_ring(struct page_pool *pool)
  919. {
  920. netmem_ref netmem;
  921. /* Empty recycle ring */
  922. while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) {
  923. /* Verify the refcnt invariant of cached pages */
  924. if (!(netmem_ref_count(netmem) == 1))
  925. pr_crit("%s() page_pool refcnt %d violation\n",
  926. __func__, netmem_ref_count(netmem));
  927. page_pool_return_netmem(pool, netmem);
  928. }
  929. }
  930. static void __page_pool_destroy(struct page_pool *pool)
  931. {
  932. if (pool->disconnect)
  933. pool->disconnect(pool);
  934. page_pool_unlist(pool);
  935. page_pool_uninit(pool);
  936. if (pool->mp_ops) {
  937. pool->mp_ops->destroy(pool);
  938. static_branch_dec(&page_pool_mem_providers);
  939. }
  940. kfree(pool);
  941. }
  942. static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
  943. {
  944. netmem_ref netmem;
  945. if (pool->destroy_cnt)
  946. return;
  947. /* Empty alloc cache, assume caller made sure this is
  948. * no-longer in use, and page_pool_alloc_pages() cannot be
  949. * call concurrently.
  950. */
  951. while (pool->alloc.count) {
  952. netmem = pool->alloc.cache[--pool->alloc.count];
  953. page_pool_return_netmem(pool, netmem);
  954. }
  955. }
  956. static void page_pool_scrub(struct page_pool *pool)
  957. {
  958. unsigned long id;
  959. void *ptr;
  960. page_pool_empty_alloc_cache_once(pool);
  961. if (!pool->destroy_cnt++ && pool->dma_map) {
  962. if (pool->dma_sync) {
  963. /* Disable page_pool_dma_sync_for_device() */
  964. pool->dma_sync = false;
  965. /* Make sure all concurrent returns that may see the old
  966. * value of dma_sync (and thus perform a sync) have
  967. * finished before doing the unmapping below. Skip the
  968. * wait if the device doesn't actually need syncing, or
  969. * if there are no outstanding mapped pages.
  970. */
  971. if (dma_dev_need_sync(pool->p.dev) &&
  972. !xa_empty(&pool->dma_mapped))
  973. synchronize_net();
  974. }
  975. xa_for_each(&pool->dma_mapped, id, ptr)
  976. __page_pool_release_netmem_dma(pool, page_to_netmem((struct page *)ptr));
  977. }
  978. /* No more consumers should exist, but producers could still
  979. * be in-flight.
  980. */
  981. page_pool_empty_ring(pool);
  982. }
  983. static int page_pool_release(struct page_pool *pool)
  984. {
  985. bool in_softirq;
  986. int inflight;
  987. page_pool_scrub(pool);
  988. inflight = page_pool_inflight(pool, true);
  989. /* Acquire producer lock to make sure producers have exited. */
  990. in_softirq = page_pool_producer_lock(pool);
  991. page_pool_producer_unlock(pool, in_softirq);
  992. if (!inflight)
  993. __page_pool_destroy(pool);
  994. return inflight;
  995. }
  996. static void page_pool_release_retry(struct work_struct *wq)
  997. {
  998. struct delayed_work *dwq = to_delayed_work(wq);
  999. struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
  1000. void *netdev;
  1001. int inflight;
  1002. inflight = page_pool_release(pool);
  1003. /* In rare cases, a driver bug may cause inflight to go negative.
  1004. * Don't reschedule release if inflight is 0 or negative.
  1005. * - If 0, the page_pool has been destroyed
  1006. * - if negative, we will never recover
  1007. * in both cases no reschedule is necessary.
  1008. */
  1009. if (inflight <= 0)
  1010. return;
  1011. /* Periodic warning for page pools the user can't see */
  1012. netdev = READ_ONCE(pool->slow.netdev);
  1013. if (time_after_eq(jiffies, pool->defer_warn) &&
  1014. (!netdev || netdev == NET_PTR_POISON)) {
  1015. int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
  1016. pr_warn("%s() stalled pool shutdown: id %u, %d inflight %d sec\n",
  1017. __func__, pool->user.id, inflight, sec);
  1018. pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
  1019. }
  1020. /* Still not ready to be disconnected, retry later */
  1021. schedule_delayed_work(&pool->release_dw, DEFER_TIME);
  1022. }
  1023. void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
  1024. const struct xdp_mem_info *mem)
  1025. {
  1026. refcount_inc(&pool->user_cnt);
  1027. pool->disconnect = disconnect;
  1028. pool->xdp_mem_id = mem->id;
  1029. }
  1030. /**
  1031. * page_pool_enable_direct_recycling() - mark page pool as owned by NAPI
  1032. * @pool: page pool to modify
  1033. * @napi: NAPI instance to associate the page pool with
  1034. *
  1035. * Associate a page pool with a NAPI instance for lockless page recycling.
  1036. * This is useful when a new page pool has to be added to a NAPI instance
  1037. * without disabling that NAPI instance, to mark the point at which control
  1038. * path "hands over" the page pool to the NAPI instance. In most cases driver
  1039. * can simply set the @napi field in struct page_pool_params, and does not
  1040. * have to call this helper.
  1041. *
  1042. * The function is idempotent, but does not implement any refcounting.
  1043. * Single page_pool_disable_direct_recycling() will disable recycling,
  1044. * no matter how many times enable was called.
  1045. */
  1046. void page_pool_enable_direct_recycling(struct page_pool *pool,
  1047. struct napi_struct *napi)
  1048. {
  1049. if (READ_ONCE(pool->p.napi) == napi)
  1050. return;
  1051. WARN_ON(!napi || pool->p.napi);
  1052. mutex_lock(&page_pools_lock);
  1053. WRITE_ONCE(pool->p.napi, napi);
  1054. mutex_unlock(&page_pools_lock);
  1055. }
  1056. EXPORT_SYMBOL(page_pool_enable_direct_recycling);
  1057. void page_pool_disable_direct_recycling(struct page_pool *pool)
  1058. {
  1059. /* Disable direct recycling based on pool->cpuid.
  1060. * Paired with READ_ONCE() in page_pool_napi_local().
  1061. */
  1062. WRITE_ONCE(pool->cpuid, -1);
  1063. if (!pool->p.napi)
  1064. return;
  1065. napi_assert_will_not_race(pool->p.napi);
  1066. mutex_lock(&page_pools_lock);
  1067. WRITE_ONCE(pool->p.napi, NULL);
  1068. mutex_unlock(&page_pools_lock);
  1069. }
  1070. EXPORT_SYMBOL(page_pool_disable_direct_recycling);
  1071. void page_pool_destroy(struct page_pool *pool)
  1072. {
  1073. if (!pool)
  1074. return;
  1075. if (!page_pool_put(pool))
  1076. return;
  1077. page_pool_disable_direct_recycling(pool);
  1078. page_pool_free_frag(pool);
  1079. if (!page_pool_release(pool))
  1080. return;
  1081. page_pool_detached(pool);
  1082. pool->defer_start = jiffies;
  1083. pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
  1084. INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry);
  1085. schedule_delayed_work(&pool->release_dw, DEFER_TIME);
  1086. }
  1087. EXPORT_SYMBOL(page_pool_destroy);
  1088. /* Caller must provide appropriate safe context, e.g. NAPI. */
  1089. void page_pool_update_nid(struct page_pool *pool, int new_nid)
  1090. {
  1091. netmem_ref netmem;
  1092. trace_page_pool_update_nid(pool, new_nid);
  1093. pool->p.nid = new_nid;
  1094. /* Flush pool alloc cache, as refill will check NUMA node */
  1095. while (pool->alloc.count) {
  1096. netmem = pool->alloc.cache[--pool->alloc.count];
  1097. page_pool_return_netmem(pool, netmem);
  1098. }
  1099. }
  1100. EXPORT_SYMBOL(page_pool_update_nid);
  1101. bool net_mp_niov_set_dma_addr(struct net_iov *niov, dma_addr_t addr)
  1102. {
  1103. return page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), addr);
  1104. }
  1105. /* Associate a niov with a page pool. Should follow with a matching
  1106. * net_mp_niov_clear_page_pool()
  1107. */
  1108. void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov)
  1109. {
  1110. netmem_ref netmem = net_iov_to_netmem(niov);
  1111. page_pool_set_pp_info(pool, netmem);
  1112. pool->pages_state_hold_cnt++;
  1113. trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt);
  1114. }
  1115. /* Disassociate a niov from a page pool. Should only be used in the
  1116. * ->release_netmem() path.
  1117. */
  1118. void net_mp_niov_clear_page_pool(struct net_iov *niov)
  1119. {
  1120. netmem_ref netmem = net_iov_to_netmem(niov);
  1121. page_pool_clear_pp_info(netmem);
  1122. }