lwtunnel.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * lwtunnel Infrastructure for light weight tunnels like mpls
  4. *
  5. * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
  6. */
  7. #include <linux/capability.h>
  8. #include <linux/module.h>
  9. #include <linux/types.h>
  10. #include <linux/kernel.h>
  11. #include <linux/slab.h>
  12. #include <linux/uaccess.h>
  13. #include <linux/skbuff.h>
  14. #include <linux/netdevice.h>
  15. #include <linux/lwtunnel.h>
  16. #include <linux/in.h>
  17. #include <linux/init.h>
  18. #include <linux/err.h>
  19. #include <net/lwtunnel.h>
  20. #include <net/rtnetlink.h>
  21. #include <net/ip6_fib.h>
  22. #include <net/rtnh.h>
  23. #include "dev.h"
  24. DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
  25. EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
  26. #ifdef CONFIG_MODULES
  27. static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
  28. {
  29. /* Only lwt encaps implemented without using an interface for
  30. * the encap need to return a string here.
  31. */
  32. switch (encap_type) {
  33. case LWTUNNEL_ENCAP_MPLS:
  34. return "MPLS";
  35. case LWTUNNEL_ENCAP_ILA:
  36. return "ILA";
  37. case LWTUNNEL_ENCAP_SEG6:
  38. return "SEG6";
  39. case LWTUNNEL_ENCAP_BPF:
  40. return "BPF";
  41. case LWTUNNEL_ENCAP_SEG6_LOCAL:
  42. return "SEG6LOCAL";
  43. case LWTUNNEL_ENCAP_RPL:
  44. return "RPL";
  45. case LWTUNNEL_ENCAP_IOAM6:
  46. return "IOAM6";
  47. case LWTUNNEL_ENCAP_XFRM:
  48. /* module autoload not supported for encap type */
  49. return NULL;
  50. case LWTUNNEL_ENCAP_IP6:
  51. case LWTUNNEL_ENCAP_IP:
  52. case LWTUNNEL_ENCAP_NONE:
  53. case __LWTUNNEL_ENCAP_MAX:
  54. /* should not have got here */
  55. WARN_ON(1);
  56. break;
  57. }
  58. return NULL;
  59. }
  60. #endif /* CONFIG_MODULES */
  61. struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
  62. {
  63. struct lwtunnel_state *lws;
  64. lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
  65. return lws;
  66. }
  67. EXPORT_SYMBOL_GPL(lwtunnel_state_alloc);
  68. static const struct lwtunnel_encap_ops __rcu *
  69. lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
  70. int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
  71. unsigned int num)
  72. {
  73. if (num > LWTUNNEL_ENCAP_MAX)
  74. return -ERANGE;
  75. return !cmpxchg((const struct lwtunnel_encap_ops **)
  76. &lwtun_encaps[num],
  77. NULL, ops) ? 0 : -1;
  78. }
  79. EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops);
  80. int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
  81. unsigned int encap_type)
  82. {
  83. int ret;
  84. if (encap_type == LWTUNNEL_ENCAP_NONE ||
  85. encap_type > LWTUNNEL_ENCAP_MAX)
  86. return -ERANGE;
  87. ret = (cmpxchg((const struct lwtunnel_encap_ops **)
  88. &lwtun_encaps[encap_type],
  89. ops, NULL) == ops) ? 0 : -1;
  90. synchronize_net();
  91. return ret;
  92. }
  93. EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops);
  94. int lwtunnel_build_state(struct net *net, u16 encap_type,
  95. struct nlattr *encap, unsigned int family,
  96. const void *cfg, struct lwtunnel_state **lws,
  97. struct netlink_ext_ack *extack)
  98. {
  99. const struct lwtunnel_encap_ops *ops;
  100. bool found = false;
  101. int ret = -EINVAL;
  102. if (encap_type == LWTUNNEL_ENCAP_NONE ||
  103. encap_type > LWTUNNEL_ENCAP_MAX) {
  104. NL_SET_ERR_MSG_ATTR(extack, encap,
  105. "Unknown LWT encapsulation type");
  106. return ret;
  107. }
  108. ret = -EOPNOTSUPP;
  109. rcu_read_lock();
  110. ops = rcu_dereference(lwtun_encaps[encap_type]);
  111. if (likely(ops && ops->build_state && try_module_get(ops->owner)))
  112. found = true;
  113. rcu_read_unlock();
  114. if (found) {
  115. ret = ops->build_state(net, encap, family, cfg, lws, extack);
  116. if (ret)
  117. module_put(ops->owner);
  118. } else {
  119. /* don't rely on -EOPNOTSUPP to detect match as build_state
  120. * handlers could return it
  121. */
  122. NL_SET_ERR_MSG_ATTR(extack, encap,
  123. "LWT encapsulation type not supported");
  124. }
  125. return ret;
  126. }
  127. EXPORT_SYMBOL_GPL(lwtunnel_build_state);
  128. int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack)
  129. {
  130. const struct lwtunnel_encap_ops *ops;
  131. int ret = -EINVAL;
  132. if (encap_type == LWTUNNEL_ENCAP_NONE ||
  133. encap_type > LWTUNNEL_ENCAP_MAX) {
  134. NL_SET_ERR_MSG(extack, "Unknown lwt encapsulation type");
  135. return ret;
  136. }
  137. ops = rcu_access_pointer(lwtun_encaps[encap_type]);
  138. #ifdef CONFIG_MODULES
  139. if (!ops) {
  140. const char *encap_type_str = lwtunnel_encap_str(encap_type);
  141. if (encap_type_str) {
  142. request_module("rtnl-lwt-%s", encap_type_str);
  143. ops = rcu_access_pointer(lwtun_encaps[encap_type]);
  144. }
  145. }
  146. #endif
  147. ret = ops ? 0 : -EOPNOTSUPP;
  148. if (ret < 0)
  149. NL_SET_ERR_MSG(extack, "lwt encapsulation type not supported");
  150. return ret;
  151. }
  152. EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type);
  153. int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining,
  154. struct netlink_ext_ack *extack)
  155. {
  156. struct rtnexthop *rtnh = (struct rtnexthop *)attr;
  157. struct nlattr *nla_entype;
  158. struct nlattr *attrs;
  159. u16 encap_type;
  160. int attrlen;
  161. while (rtnh_ok(rtnh, remaining)) {
  162. attrlen = rtnh_attrlen(rtnh);
  163. if (attrlen > 0) {
  164. attrs = rtnh_attrs(rtnh);
  165. nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
  166. if (nla_entype) {
  167. if (nla_len(nla_entype) < sizeof(u16)) {
  168. NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE");
  169. return -EINVAL;
  170. }
  171. encap_type = nla_get_u16(nla_entype);
  172. if (lwtunnel_valid_encap_type(encap_type, extack))
  173. return -EOPNOTSUPP;
  174. }
  175. }
  176. rtnh = rtnh_next(rtnh, &remaining);
  177. }
  178. return 0;
  179. }
  180. EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr);
  181. void lwtstate_free(struct lwtunnel_state *lws)
  182. {
  183. const struct lwtunnel_encap_ops *ops = lwtun_encaps[lws->type];
  184. if (ops->destroy_state) {
  185. ops->destroy_state(lws);
  186. kfree_rcu(lws, rcu);
  187. } else {
  188. kfree(lws);
  189. }
  190. module_put(ops->owner);
  191. }
  192. EXPORT_SYMBOL_GPL(lwtstate_free);
  193. int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
  194. int encap_attr, int encap_type_attr)
  195. {
  196. const struct lwtunnel_encap_ops *ops;
  197. struct nlattr *nest;
  198. int ret;
  199. if (!lwtstate)
  200. return 0;
  201. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  202. lwtstate->type > LWTUNNEL_ENCAP_MAX)
  203. return 0;
  204. nest = nla_nest_start_noflag(skb, encap_attr);
  205. if (!nest)
  206. return -EMSGSIZE;
  207. ret = -EOPNOTSUPP;
  208. rcu_read_lock();
  209. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  210. if (likely(ops && ops->fill_encap))
  211. ret = ops->fill_encap(skb, lwtstate);
  212. rcu_read_unlock();
  213. if (ret)
  214. goto nla_put_failure;
  215. nla_nest_end(skb, nest);
  216. ret = nla_put_u16(skb, encap_type_attr, lwtstate->type);
  217. if (ret)
  218. goto nla_put_failure;
  219. return 0;
  220. nla_put_failure:
  221. nla_nest_cancel(skb, nest);
  222. return (ret == -EOPNOTSUPP ? 0 : ret);
  223. }
  224. EXPORT_SYMBOL_GPL(lwtunnel_fill_encap);
  225. int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
  226. {
  227. const struct lwtunnel_encap_ops *ops;
  228. int ret = 0;
  229. if (!lwtstate)
  230. return 0;
  231. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  232. lwtstate->type > LWTUNNEL_ENCAP_MAX)
  233. return 0;
  234. rcu_read_lock();
  235. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  236. if (likely(ops && ops->get_encap_size))
  237. ret = nla_total_size(ops->get_encap_size(lwtstate));
  238. rcu_read_unlock();
  239. return ret;
  240. }
  241. EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size);
  242. int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
  243. {
  244. const struct lwtunnel_encap_ops *ops;
  245. int ret = 0;
  246. if (!a && !b)
  247. return 0;
  248. if (!a || !b)
  249. return 1;
  250. if (a->type != b->type)
  251. return 1;
  252. if (a->type == LWTUNNEL_ENCAP_NONE ||
  253. a->type > LWTUNNEL_ENCAP_MAX)
  254. return 0;
  255. rcu_read_lock();
  256. ops = rcu_dereference(lwtun_encaps[a->type]);
  257. if (likely(ops && ops->cmp_encap))
  258. ret = ops->cmp_encap(a, b);
  259. rcu_read_unlock();
  260. return ret;
  261. }
  262. EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap);
  263. int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  264. {
  265. const struct lwtunnel_encap_ops *ops;
  266. struct lwtunnel_state *lwtstate;
  267. struct dst_entry *dst;
  268. int ret;
  269. local_bh_disable();
  270. if (dev_xmit_recursion()) {
  271. net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
  272. __func__);
  273. ret = -ENETDOWN;
  274. goto drop;
  275. }
  276. dst = skb_dst(skb);
  277. if (!dst) {
  278. ret = -EINVAL;
  279. goto drop;
  280. }
  281. lwtstate = dst->lwtstate;
  282. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  283. lwtstate->type > LWTUNNEL_ENCAP_MAX) {
  284. ret = 0;
  285. goto out;
  286. }
  287. ret = -EOPNOTSUPP;
  288. rcu_read_lock();
  289. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  290. if (likely(ops && ops->output)) {
  291. dev_xmit_recursion_inc();
  292. ret = ops->output(net, sk, skb);
  293. dev_xmit_recursion_dec();
  294. }
  295. rcu_read_unlock();
  296. if (ret == -EOPNOTSUPP)
  297. goto drop;
  298. goto out;
  299. drop:
  300. kfree_skb(skb);
  301. out:
  302. local_bh_enable();
  303. return ret;
  304. }
  305. EXPORT_SYMBOL_GPL(lwtunnel_output);
  306. int lwtunnel_xmit(struct sk_buff *skb)
  307. {
  308. const struct lwtunnel_encap_ops *ops;
  309. struct lwtunnel_state *lwtstate;
  310. struct dst_entry *dst;
  311. int ret;
  312. local_bh_disable();
  313. if (dev_xmit_recursion()) {
  314. net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
  315. __func__);
  316. ret = -ENETDOWN;
  317. goto drop;
  318. }
  319. dst = skb_dst(skb);
  320. if (!dst) {
  321. ret = -EINVAL;
  322. goto drop;
  323. }
  324. lwtstate = dst->lwtstate;
  325. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  326. lwtstate->type > LWTUNNEL_ENCAP_MAX) {
  327. ret = 0;
  328. goto out;
  329. }
  330. ret = -EOPNOTSUPP;
  331. rcu_read_lock();
  332. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  333. if (likely(ops && ops->xmit)) {
  334. dev_xmit_recursion_inc();
  335. ret = ops->xmit(skb);
  336. dev_xmit_recursion_dec();
  337. }
  338. rcu_read_unlock();
  339. if (ret == -EOPNOTSUPP)
  340. goto drop;
  341. goto out;
  342. drop:
  343. kfree_skb(skb);
  344. out:
  345. local_bh_enable();
  346. return ret;
  347. }
  348. EXPORT_SYMBOL_GPL(lwtunnel_xmit);
  349. int lwtunnel_input(struct sk_buff *skb)
  350. {
  351. const struct lwtunnel_encap_ops *ops;
  352. struct lwtunnel_state *lwtstate;
  353. struct dst_entry *dst;
  354. int ret;
  355. DEBUG_NET_WARN_ON_ONCE(!in_softirq());
  356. if (dev_xmit_recursion()) {
  357. net_crit_ratelimited("%s(): recursion limit reached on datapath\n",
  358. __func__);
  359. ret = -ENETDOWN;
  360. goto drop;
  361. }
  362. dst = skb_dst(skb);
  363. if (!dst) {
  364. ret = -EINVAL;
  365. goto drop;
  366. }
  367. lwtstate = dst->lwtstate;
  368. if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
  369. lwtstate->type > LWTUNNEL_ENCAP_MAX)
  370. return 0;
  371. ret = -EOPNOTSUPP;
  372. rcu_read_lock();
  373. ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
  374. if (likely(ops && ops->input)) {
  375. dev_xmit_recursion_inc();
  376. ret = ops->input(skb);
  377. dev_xmit_recursion_dec();
  378. }
  379. rcu_read_unlock();
  380. if (ret == -EOPNOTSUPP)
  381. goto drop;
  382. return ret;
  383. drop:
  384. kfree_skb(skb);
  385. return ret;
  386. }
  387. EXPORT_SYMBOL_GPL(lwtunnel_input);