nexthop.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Generic nexthop implementation
  4. *
  5. * Copyright (c) 2017-19 Cumulus Networks
  6. * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
  7. */
  8. #ifndef __LINUX_NEXTHOP_H
  9. #define __LINUX_NEXTHOP_H
  10. #include <linux/netdevice.h>
  11. #include <linux/notifier.h>
  12. #include <linux/route.h>
  13. #include <linux/types.h>
  14. #include <net/ip_fib.h>
  15. #include <net/ip6_fib.h>
  16. #include <net/netlink.h>
  17. #define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
  18. struct nexthop;
  19. struct nh_config {
  20. u32 nh_id;
  21. u8 nh_family;
  22. u8 nh_protocol;
  23. u8 nh_blackhole;
  24. u8 nh_fdb;
  25. u32 nh_flags;
  26. int nh_ifindex;
  27. struct net_device *dev;
  28. union {
  29. __be32 ipv4;
  30. struct in6_addr ipv6;
  31. } gw;
  32. struct nlattr *nh_grp;
  33. u16 nh_grp_type;
  34. u16 nh_grp_res_num_buckets;
  35. unsigned long nh_grp_res_idle_timer;
  36. unsigned long nh_grp_res_unbalanced_timer;
  37. bool nh_grp_res_has_num_buckets;
  38. bool nh_grp_res_has_idle_timer;
  39. bool nh_grp_res_has_unbalanced_timer;
  40. bool nh_hw_stats;
  41. struct nlattr *nh_encap;
  42. u16 nh_encap_type;
  43. u32 nlflags;
  44. struct nl_info nlinfo;
  45. };
  46. struct nh_info {
  47. struct hlist_node dev_hash; /* entry on netns devhash */
  48. struct nexthop *nh_parent;
  49. u8 family;
  50. bool reject_nh;
  51. bool fdb_nh;
  52. union {
  53. struct fib_nh_common fib_nhc;
  54. struct fib_nh fib_nh;
  55. struct fib6_nh fib6_nh;
  56. };
  57. };
  58. struct nh_res_bucket {
  59. struct nh_grp_entry __rcu *nh_entry;
  60. atomic_long_t used_time;
  61. unsigned long migrated_time;
  62. bool occupied;
  63. u8 nh_flags;
  64. };
  65. struct nh_res_table {
  66. struct net *net;
  67. u32 nhg_id;
  68. struct delayed_work upkeep_dw;
  69. /* List of NHGEs that have too few buckets ("uw" for underweight).
  70. * Reclaimed buckets will be given to entries in this list.
  71. */
  72. struct list_head uw_nh_entries;
  73. unsigned long unbalanced_since;
  74. u32 idle_timer;
  75. u32 unbalanced_timer;
  76. u16 num_nh_buckets;
  77. struct nh_res_bucket nh_buckets[] __counted_by(num_nh_buckets);
  78. };
  79. struct nh_grp_entry_stats {
  80. u64_stats_t packets;
  81. struct u64_stats_sync syncp;
  82. };
  83. struct nh_grp_entry {
  84. struct nexthop *nh;
  85. struct nh_grp_entry_stats __percpu *stats;
  86. u16 weight;
  87. union {
  88. struct {
  89. atomic_t upper_bound;
  90. } hthr;
  91. struct {
  92. /* Member on uw_nh_entries. */
  93. struct list_head uw_nh_entry;
  94. u16 count_buckets;
  95. u16 wants_buckets;
  96. } res;
  97. };
  98. struct list_head nh_list;
  99. struct nexthop *nh_parent; /* nexthop of group with this entry */
  100. u64 packets_hw;
  101. };
  102. struct nh_group {
  103. struct nh_group *spare; /* spare group for removals */
  104. u16 num_nh;
  105. bool is_multipath;
  106. bool hash_threshold;
  107. bool resilient;
  108. bool fdb_nh;
  109. bool has_v4;
  110. bool hw_stats;
  111. struct nh_res_table __rcu *res_table;
  112. struct nh_grp_entry nh_entries[] __counted_by(num_nh);
  113. };
  114. struct nexthop {
  115. struct rb_node rb_node; /* entry on netns rbtree */
  116. struct list_head fi_list; /* v4 entries using nh */
  117. struct list_head f6i_list; /* v6 entries using nh */
  118. struct list_head fdb_list; /* fdb entries using this nh */
  119. struct list_head grp_list; /* nh group entries using this nh */
  120. struct net *net;
  121. u32 id;
  122. u8 protocol; /* app managing this nh */
  123. u8 nh_flags;
  124. bool is_group;
  125. bool dead;
  126. spinlock_t lock; /* protect dead and f6i_list */
  127. refcount_t refcnt;
  128. struct rcu_head rcu;
  129. union {
  130. struct nh_info __rcu *nh_info;
  131. struct nh_group __rcu *nh_grp;
  132. };
  133. };
  134. enum nexthop_event_type {
  135. NEXTHOP_EVENT_DEL,
  136. NEXTHOP_EVENT_REPLACE,
  137. NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
  138. NEXTHOP_EVENT_BUCKET_REPLACE,
  139. NEXTHOP_EVENT_HW_STATS_REPORT_DELTA,
  140. };
  141. enum nh_notifier_info_type {
  142. NH_NOTIFIER_INFO_TYPE_SINGLE,
  143. NH_NOTIFIER_INFO_TYPE_GRP,
  144. NH_NOTIFIER_INFO_TYPE_RES_TABLE,
  145. NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
  146. NH_NOTIFIER_INFO_TYPE_GRP_HW_STATS,
  147. };
  148. struct nh_notifier_single_info {
  149. struct net_device *dev;
  150. u8 gw_family;
  151. union {
  152. __be32 ipv4;
  153. struct in6_addr ipv6;
  154. };
  155. u32 id;
  156. u8 is_reject:1,
  157. is_fdb:1,
  158. has_encap:1;
  159. };
  160. struct nh_notifier_grp_entry_info {
  161. u16 weight;
  162. struct nh_notifier_single_info nh;
  163. };
  164. struct nh_notifier_grp_info {
  165. u16 num_nh;
  166. bool is_fdb;
  167. bool hw_stats;
  168. struct nh_notifier_grp_entry_info nh_entries[] __counted_by(num_nh);
  169. };
  170. struct nh_notifier_res_bucket_info {
  171. u16 bucket_index;
  172. unsigned int idle_timer_ms;
  173. bool force;
  174. struct nh_notifier_single_info old_nh;
  175. struct nh_notifier_single_info new_nh;
  176. };
  177. struct nh_notifier_res_table_info {
  178. u16 num_nh_buckets;
  179. bool hw_stats;
  180. struct nh_notifier_single_info nhs[] __counted_by(num_nh_buckets);
  181. };
  182. struct nh_notifier_grp_hw_stats_entry_info {
  183. u32 id;
  184. u64 packets;
  185. };
  186. struct nh_notifier_grp_hw_stats_info {
  187. u16 num_nh;
  188. bool hw_stats_used;
  189. struct nh_notifier_grp_hw_stats_entry_info stats[] __counted_by(num_nh);
  190. };
  191. struct nh_notifier_info {
  192. struct net *net;
  193. struct netlink_ext_ack *extack;
  194. u32 id;
  195. enum nh_notifier_info_type type;
  196. union {
  197. struct nh_notifier_single_info *nh;
  198. struct nh_notifier_grp_info *nh_grp;
  199. struct nh_notifier_res_table_info *nh_res_table;
  200. struct nh_notifier_res_bucket_info *nh_res_bucket;
  201. struct nh_notifier_grp_hw_stats_info *nh_grp_hw_stats;
  202. };
  203. };
  204. int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
  205. struct netlink_ext_ack *extack);
  206. int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
  207. int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
  208. void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
  209. void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
  210. bool offload, bool trap);
  211. void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
  212. unsigned long *activity);
  213. void nh_grp_hw_stats_report_delta(struct nh_notifier_grp_hw_stats_info *info,
  214. unsigned int nh_idx,
  215. u64 delta_packets);
  216. /* caller is holding rcu or rtnl; no reference taken to nexthop */
  217. struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
  218. void nexthop_free_rcu(struct rcu_head *head);
  219. static inline bool nexthop_get(struct nexthop *nh)
  220. {
  221. return refcount_inc_not_zero(&nh->refcnt);
  222. }
  223. static inline void nexthop_put(struct nexthop *nh)
  224. {
  225. if (refcount_dec_and_test(&nh->refcnt))
  226. call_rcu_hurry(&nh->rcu, nexthop_free_rcu);
  227. }
  228. static inline bool nexthop_cmp(const struct nexthop *nh1,
  229. const struct nexthop *nh2)
  230. {
  231. return nh1 == nh2;
  232. }
  233. static inline bool nexthop_is_fdb(const struct nexthop *nh)
  234. {
  235. if (nh->is_group) {
  236. const struct nh_group *nh_grp;
  237. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  238. return nh_grp->fdb_nh;
  239. } else {
  240. const struct nh_info *nhi;
  241. nhi = rcu_dereference_rtnl(nh->nh_info);
  242. return nhi->fdb_nh;
  243. }
  244. }
  245. static inline bool nexthop_has_v4(const struct nexthop *nh)
  246. {
  247. if (nh->is_group) {
  248. struct nh_group *nh_grp;
  249. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  250. return nh_grp->has_v4;
  251. }
  252. return false;
  253. }
  254. static inline bool nexthop_is_multipath(const struct nexthop *nh)
  255. {
  256. if (nh->is_group) {
  257. struct nh_group *nh_grp;
  258. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  259. return nh_grp->is_multipath;
  260. }
  261. return false;
  262. }
  263. struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
  264. static inline unsigned int nexthop_num_path(const struct nexthop *nh)
  265. {
  266. unsigned int rc = 1;
  267. if (nh->is_group) {
  268. struct nh_group *nh_grp;
  269. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  270. if (nh_grp->is_multipath)
  271. rc = nh_grp->num_nh;
  272. }
  273. return rc;
  274. }
  275. static inline
  276. struct nexthop *nexthop_mpath_select(const struct nh_group *nhg, int nhsel)
  277. {
  278. /* for_nexthops macros in fib_semantics.c grabs a pointer to
  279. * the nexthop before checking nhsel
  280. */
  281. if (nhsel >= nhg->num_nh)
  282. return NULL;
  283. return nhg->nh_entries[nhsel].nh;
  284. }
  285. static inline
  286. int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
  287. u8 rt_family)
  288. {
  289. struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
  290. int i;
  291. for (i = 0; i < nhg->num_nh; i++) {
  292. struct nexthop *nhe = nhg->nh_entries[i].nh;
  293. struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
  294. struct fib_nh_common *nhc = &nhi->fib_nhc;
  295. int weight = nhg->nh_entries[i].weight;
  296. if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0)
  297. return -EMSGSIZE;
  298. }
  299. return 0;
  300. }
  301. /* called with rcu lock */
  302. static inline bool nexthop_is_blackhole(const struct nexthop *nh)
  303. {
  304. const struct nh_info *nhi;
  305. if (nh->is_group) {
  306. struct nh_group *nh_grp;
  307. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  308. if (nh_grp->num_nh > 1)
  309. return false;
  310. nh = nh_grp->nh_entries[0].nh;
  311. }
  312. nhi = rcu_dereference_rtnl(nh->nh_info);
  313. return nhi->reject_nh;
  314. }
  315. static inline void nexthop_path_fib_result(struct fib_result *res, int hash)
  316. {
  317. struct nh_info *nhi;
  318. struct nexthop *nh;
  319. nh = nexthop_select_path(res->fi->nh, hash);
  320. nhi = rcu_dereference(nh->nh_info);
  321. res->nhc = &nhi->fib_nhc;
  322. }
  323. /* called with rcu read lock or rtnl held */
  324. static inline
  325. struct fib_nh_common *nexthop_fib_nhc(struct nexthop *nh, int nhsel)
  326. {
  327. struct nh_info *nhi;
  328. BUILD_BUG_ON(offsetof(struct fib_nh, nh_common) != 0);
  329. BUILD_BUG_ON(offsetof(struct fib6_nh, nh_common) != 0);
  330. if (nh->is_group) {
  331. struct nh_group *nh_grp;
  332. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  333. if (nh_grp->is_multipath) {
  334. nh = nexthop_mpath_select(nh_grp, nhsel);
  335. if (!nh)
  336. return NULL;
  337. }
  338. }
  339. nhi = rcu_dereference_rtnl(nh->nh_info);
  340. return &nhi->fib_nhc;
  341. }
  342. /* called from fib_table_lookup with rcu_lock */
  343. static inline
  344. struct fib_nh_common *nexthop_get_nhc_lookup(const struct nexthop *nh,
  345. int fib_flags,
  346. const struct flowi4 *flp,
  347. int *nhsel)
  348. {
  349. struct nh_info *nhi;
  350. if (nh->is_group) {
  351. struct nh_group *nhg = rcu_dereference(nh->nh_grp);
  352. int i;
  353. for (i = 0; i < nhg->num_nh; i++) {
  354. struct nexthop *nhe = nhg->nh_entries[i].nh;
  355. nhi = rcu_dereference(nhe->nh_info);
  356. if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
  357. *nhsel = i;
  358. return &nhi->fib_nhc;
  359. }
  360. }
  361. } else {
  362. nhi = rcu_dereference(nh->nh_info);
  363. if (fib_lookup_good_nhc(&nhi->fib_nhc, fib_flags, flp)) {
  364. *nhsel = 0;
  365. return &nhi->fib_nhc;
  366. }
  367. }
  368. return NULL;
  369. }
  370. static inline bool nexthop_uses_dev(const struct nexthop *nh,
  371. const struct net_device *dev)
  372. {
  373. struct nh_info *nhi;
  374. if (nh->is_group) {
  375. struct nh_group *nhg = rcu_dereference(nh->nh_grp);
  376. int i;
  377. for (i = 0; i < nhg->num_nh; i++) {
  378. struct nexthop *nhe = nhg->nh_entries[i].nh;
  379. nhi = rcu_dereference(nhe->nh_info);
  380. if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
  381. return true;
  382. }
  383. } else {
  384. nhi = rcu_dereference(nh->nh_info);
  385. if (nhc_l3mdev_matches_dev(&nhi->fib_nhc, dev))
  386. return true;
  387. }
  388. return false;
  389. }
  390. static inline unsigned int fib_info_num_path(const struct fib_info *fi)
  391. {
  392. if (unlikely(fi->nh))
  393. return nexthop_num_path(fi->nh);
  394. return fi->fib_nhs;
  395. }
  396. int fib_check_nexthop(struct nexthop *nh, u8 scope,
  397. struct netlink_ext_ack *extack);
  398. static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
  399. {
  400. if (unlikely(fi->nh))
  401. return nexthop_fib_nhc(fi->nh, nhsel);
  402. return &fi->fib_nh[nhsel].nh_common;
  403. }
  404. /* only used when fib_nh is built into fib_info */
  405. static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
  406. {
  407. WARN_ON(fi->nh);
  408. return &fi->fib_nh[nhsel];
  409. }
  410. /*
  411. * IPv6 variants
  412. */
  413. int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
  414. struct netlink_ext_ack *extack);
  415. /* Caller should either hold rcu_read_lock(), or RTNL. */
  416. static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
  417. {
  418. struct nh_info *nhi;
  419. if (nh->is_group) {
  420. struct nh_group *nh_grp;
  421. nh_grp = rcu_dereference_rtnl(nh->nh_grp);
  422. nh = nexthop_mpath_select(nh_grp, 0);
  423. if (!nh)
  424. return NULL;
  425. }
  426. nhi = rcu_dereference_rtnl(nh->nh_info);
  427. if (nhi->family == AF_INET6)
  428. return &nhi->fib6_nh;
  429. return NULL;
  430. }
  431. static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
  432. {
  433. struct fib6_nh *fib6_nh;
  434. fib6_nh = f6i->nh ? nexthop_fib6_nh(f6i->nh) : f6i->fib6_nh;
  435. return fib6_nh->fib_nh_dev;
  436. }
  437. static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
  438. {
  439. struct nexthop *nh = res->f6i->nh;
  440. struct nh_info *nhi;
  441. nh = nexthop_select_path(nh, hash);
  442. nhi = rcu_dereference_rtnl(nh->nh_info);
  443. if (nhi->reject_nh) {
  444. res->fib6_type = RTN_BLACKHOLE;
  445. res->fib6_flags |= RTF_REJECT;
  446. res->nh = nexthop_fib6_nh(nh);
  447. } else {
  448. res->nh = &nhi->fib6_nh;
  449. }
  450. }
  451. int nexthop_for_each_fib6_nh(struct nexthop *nh,
  452. int (*cb)(struct fib6_nh *nh, void *arg),
  453. void *arg);
  454. static inline int nexthop_get_family(struct nexthop *nh)
  455. {
  456. struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
  457. return nhi->family;
  458. }
  459. static inline
  460. struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
  461. {
  462. struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
  463. return &nhi->fib_nhc;
  464. }
  465. static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
  466. int hash)
  467. {
  468. struct nh_info *nhi;
  469. struct nexthop *nhp;
  470. nhp = nexthop_select_path(nh, hash);
  471. if (unlikely(!nhp))
  472. return NULL;
  473. nhi = rcu_dereference(nhp->nh_info);
  474. return &nhi->fib_nhc;
  475. }
  476. #endif