seg6_iptunnel.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * SR-IPv6 implementation
  4. *
  5. * Author:
  6. * David Lebrun <david.lebrun@uclouvain.be>
  7. */
  8. #include <linux/types.h>
  9. #include <linux/skbuff.h>
  10. #include <linux/net.h>
  11. #include <linux/module.h>
  12. #include <net/ip.h>
  13. #include <net/ip_tunnels.h>
  14. #include <net/lwtunnel.h>
  15. #include <net/netevent.h>
  16. #include <net/netns/generic.h>
  17. #include <net/ip6_fib.h>
  18. #include <net/route.h>
  19. #include <net/seg6.h>
  20. #include <linux/seg6.h>
  21. #include <linux/seg6_iptunnel.h>
  22. #include <net/addrconf.h>
  23. #include <net/ip6_route.h>
  24. #include <net/dst_cache.h>
  25. #ifdef CONFIG_IPV6_SEG6_HMAC
  26. #include <net/seg6_hmac.h>
  27. #endif
  28. #include <linux/netfilter.h>
  29. static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
  30. {
  31. int head = 0;
  32. switch (tuninfo->mode) {
  33. case SEG6_IPTUN_MODE_INLINE:
  34. break;
  35. case SEG6_IPTUN_MODE_ENCAP:
  36. case SEG6_IPTUN_MODE_ENCAP_RED:
  37. head = sizeof(struct ipv6hdr);
  38. break;
  39. case SEG6_IPTUN_MODE_L2ENCAP:
  40. case SEG6_IPTUN_MODE_L2ENCAP_RED:
  41. return 0;
  42. }
  43. return ((tuninfo->srh->hdrlen + 1) << 3) + head;
  44. }
  45. struct seg6_lwt {
  46. struct dst_cache cache_input;
  47. struct dst_cache cache_output;
  48. struct seg6_iptunnel_encap tuninfo[];
  49. };
  50. static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
  51. {
  52. return (struct seg6_lwt *)lwt->data;
  53. }
  54. static inline struct seg6_iptunnel_encap *
  55. seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
  56. {
  57. return seg6_lwt_lwtunnel(lwt)->tuninfo;
  58. }
  59. static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
  60. [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY },
  61. };
  62. static int nla_put_srh(struct sk_buff *skb, int attrtype,
  63. struct seg6_iptunnel_encap *tuninfo)
  64. {
  65. struct seg6_iptunnel_encap *data;
  66. struct nlattr *nla;
  67. int len;
  68. len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
  69. nla = nla_reserve(skb, attrtype, len);
  70. if (!nla)
  71. return -EMSGSIZE;
  72. data = nla_data(nla);
  73. memcpy(data, tuninfo, len);
  74. return 0;
  75. }
  76. static void set_tun_src(struct net *net, struct net_device *dev,
  77. struct in6_addr *daddr, struct in6_addr *saddr)
  78. {
  79. struct seg6_pernet_data *sdata = seg6_pernet(net);
  80. struct in6_addr *tun_src;
  81. rcu_read_lock();
  82. tun_src = rcu_dereference(sdata->tun_src);
  83. if (!ipv6_addr_any(tun_src)) {
  84. memcpy(saddr, tun_src, sizeof(struct in6_addr));
  85. } else {
  86. ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
  87. saddr);
  88. }
  89. rcu_read_unlock();
  90. }
  91. /* Compute flowlabel for outer IPv6 header */
  92. static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
  93. struct ipv6hdr *inner_hdr)
  94. {
  95. int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
  96. __be32 flowlabel = 0;
  97. u32 hash;
  98. if (do_flowlabel > 0) {
  99. hash = skb_get_hash(skb);
  100. hash = rol32(hash, 16);
  101. flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
  102. } else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
  103. flowlabel = ip6_flowlabel(inner_hdr);
  104. }
  105. return flowlabel;
  106. }
  107. static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
  108. int proto, struct dst_entry *cache_dst)
  109. {
  110. struct dst_entry *dst = skb_dst(skb);
  111. struct net_device *dev = dst_dev(dst);
  112. struct net *net = dev_net(dev);
  113. struct ipv6hdr *hdr, *inner_hdr;
  114. struct ipv6_sr_hdr *isrh;
  115. int hdrlen, tot_len, err;
  116. __be32 flowlabel;
  117. hdrlen = (osrh->hdrlen + 1) << 3;
  118. tot_len = hdrlen + sizeof(*hdr);
  119. err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
  120. if (unlikely(err))
  121. return err;
  122. inner_hdr = ipv6_hdr(skb);
  123. flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
  124. skb_push(skb, tot_len);
  125. skb_reset_network_header(skb);
  126. skb_mac_header_rebuild(skb);
  127. hdr = ipv6_hdr(skb);
  128. /* inherit tc, flowlabel and hlim
  129. * hlim will be decremented in ip6_forward() afterwards and
  130. * decapsulation will overwrite inner hlim with outer hlim
  131. */
  132. if (skb->protocol == htons(ETH_P_IPV6)) {
  133. ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
  134. flowlabel);
  135. hdr->hop_limit = inner_hdr->hop_limit;
  136. } else {
  137. ip6_flow_hdr(hdr, 0, flowlabel);
  138. hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
  139. memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
  140. /* the control block has been erased, so we have to set the
  141. * iif once again.
  142. * We read the receiving interface index directly from the
  143. * skb->skb_iif as it is done in the IPv4 receiving path (i.e.:
  144. * ip_rcv_core(...)).
  145. */
  146. IP6CB(skb)->iif = skb->skb_iif;
  147. }
  148. hdr->nexthdr = NEXTHDR_ROUTING;
  149. isrh = (void *)hdr + sizeof(*hdr);
  150. memcpy(isrh, osrh, hdrlen);
  151. isrh->nexthdr = proto;
  152. hdr->daddr = isrh->segments[isrh->first_segment];
  153. set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
  154. #ifdef CONFIG_IPV6_SEG6_HMAC
  155. if (sr_has_hmac(isrh)) {
  156. err = seg6_push_hmac(net, &hdr->saddr, isrh);
  157. if (unlikely(err))
  158. return err;
  159. }
  160. #endif
  161. hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
  162. skb_postpush_rcsum(skb, hdr, tot_len);
  163. return 0;
  164. }
  165. /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
  166. int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
  167. {
  168. return __seg6_do_srh_encap(skb, osrh, proto, NULL);
  169. }
  170. EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
  171. /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
  172. static int seg6_do_srh_encap_red(struct sk_buff *skb,
  173. struct ipv6_sr_hdr *osrh, int proto,
  174. struct dst_entry *cache_dst)
  175. {
  176. __u8 first_seg = osrh->first_segment;
  177. struct dst_entry *dst = skb_dst(skb);
  178. struct net_device *dev = dst_dev(dst);
  179. struct net *net = dev_net(dev);
  180. struct ipv6hdr *hdr, *inner_hdr;
  181. int hdrlen = ipv6_optlen(osrh);
  182. int red_tlv_offset, tlv_offset;
  183. struct ipv6_sr_hdr *isrh;
  184. bool skip_srh = false;
  185. __be32 flowlabel;
  186. int tot_len, err;
  187. int red_hdrlen;
  188. int tlvs_len;
  189. if (first_seg > 0) {
  190. red_hdrlen = hdrlen - sizeof(struct in6_addr);
  191. } else {
  192. /* NOTE: if tag/flags and/or other TLVs are introduced in the
  193. * seg6_iptunnel infrastructure, they should be considered when
  194. * deciding to skip the SRH.
  195. */
  196. skip_srh = !sr_has_hmac(osrh);
  197. red_hdrlen = skip_srh ? 0 : hdrlen;
  198. }
  199. tot_len = red_hdrlen + sizeof(struct ipv6hdr);
  200. err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
  201. if (unlikely(err))
  202. return err;
  203. inner_hdr = ipv6_hdr(skb);
  204. flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
  205. skb_push(skb, tot_len);
  206. skb_reset_network_header(skb);
  207. skb_mac_header_rebuild(skb);
  208. hdr = ipv6_hdr(skb);
  209. /* based on seg6_do_srh_encap() */
  210. if (skb->protocol == htons(ETH_P_IPV6)) {
  211. ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
  212. flowlabel);
  213. hdr->hop_limit = inner_hdr->hop_limit;
  214. } else {
  215. ip6_flow_hdr(hdr, 0, flowlabel);
  216. hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
  217. memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
  218. IP6CB(skb)->iif = skb->skb_iif;
  219. }
  220. /* no matter if we have to skip the SRH or not, the first segment
  221. * always comes in the pushed IPv6 header.
  222. */
  223. hdr->daddr = osrh->segments[first_seg];
  224. if (skip_srh) {
  225. hdr->nexthdr = proto;
  226. set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
  227. goto out;
  228. }
  229. /* we cannot skip the SRH, slow path */
  230. hdr->nexthdr = NEXTHDR_ROUTING;
  231. isrh = (void *)hdr + sizeof(struct ipv6hdr);
  232. if (unlikely(!first_seg)) {
  233. /* this is a very rare case; we have only one SID but
  234. * we cannot skip the SRH since we are carrying some
  235. * other info.
  236. */
  237. memcpy(isrh, osrh, hdrlen);
  238. goto srcaddr;
  239. }
  240. tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr);
  241. red_tlv_offset = tlv_offset - sizeof(struct in6_addr);
  242. memcpy(isrh, osrh, red_tlv_offset);
  243. tlvs_len = hdrlen - tlv_offset;
  244. if (unlikely(tlvs_len > 0)) {
  245. const void *s = (const void *)osrh + tlv_offset;
  246. void *d = (void *)isrh + red_tlv_offset;
  247. memcpy(d, s, tlvs_len);
  248. }
  249. --isrh->first_segment;
  250. isrh->hdrlen -= 2;
  251. srcaddr:
  252. isrh->nexthdr = proto;
  253. set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
  254. #ifdef CONFIG_IPV6_SEG6_HMAC
  255. if (unlikely(!skip_srh && sr_has_hmac(isrh))) {
  256. err = seg6_push_hmac(net, &hdr->saddr, isrh);
  257. if (unlikely(err))
  258. return err;
  259. }
  260. #endif
  261. out:
  262. hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
  263. skb_postpush_rcsum(skb, hdr, tot_len);
  264. return 0;
  265. }
  266. static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
  267. struct dst_entry *cache_dst)
  268. {
  269. struct ipv6hdr *hdr, *oldhdr;
  270. struct ipv6_sr_hdr *isrh;
  271. int hdrlen, err;
  272. hdrlen = (osrh->hdrlen + 1) << 3;
  273. err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
  274. if (unlikely(err))
  275. return err;
  276. oldhdr = ipv6_hdr(skb);
  277. skb_pull(skb, sizeof(struct ipv6hdr));
  278. skb_postpull_rcsum(skb, skb_network_header(skb),
  279. sizeof(struct ipv6hdr));
  280. skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
  281. skb_reset_network_header(skb);
  282. skb_mac_header_rebuild(skb);
  283. hdr = ipv6_hdr(skb);
  284. memmove(hdr, oldhdr, sizeof(*hdr));
  285. isrh = (void *)hdr + sizeof(*hdr);
  286. memcpy(isrh, osrh, hdrlen);
  287. isrh->nexthdr = hdr->nexthdr;
  288. hdr->nexthdr = NEXTHDR_ROUTING;
  289. isrh->segments[0] = hdr->daddr;
  290. hdr->daddr = isrh->segments[isrh->first_segment];
  291. #ifdef CONFIG_IPV6_SEG6_HMAC
  292. if (sr_has_hmac(isrh)) {
  293. struct net *net = skb_dst_dev_net(skb);
  294. err = seg6_push_hmac(net, &hdr->saddr, isrh);
  295. if (unlikely(err))
  296. return err;
  297. }
  298. #endif
  299. hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
  300. skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
  301. return 0;
  302. }
  303. static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
  304. {
  305. struct dst_entry *dst = skb_dst(skb);
  306. struct seg6_iptunnel_encap *tinfo;
  307. int proto, err = 0;
  308. tinfo = seg6_encap_lwtunnel(dst->lwtstate);
  309. switch (tinfo->mode) {
  310. case SEG6_IPTUN_MODE_INLINE:
  311. if (skb->protocol != htons(ETH_P_IPV6))
  312. return -EINVAL;
  313. err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst);
  314. if (err)
  315. return err;
  316. break;
  317. case SEG6_IPTUN_MODE_ENCAP:
  318. case SEG6_IPTUN_MODE_ENCAP_RED:
  319. err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
  320. if (err)
  321. return err;
  322. if (skb->protocol == htons(ETH_P_IPV6))
  323. proto = IPPROTO_IPV6;
  324. else if (skb->protocol == htons(ETH_P_IP))
  325. proto = IPPROTO_IPIP;
  326. else
  327. return -EINVAL;
  328. if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
  329. err = __seg6_do_srh_encap(skb, tinfo->srh,
  330. proto, cache_dst);
  331. else
  332. err = seg6_do_srh_encap_red(skb, tinfo->srh,
  333. proto, cache_dst);
  334. if (err)
  335. return err;
  336. skb_set_inner_transport_header(skb, skb_transport_offset(skb));
  337. skb_set_inner_protocol(skb, skb->protocol);
  338. skb->protocol = htons(ETH_P_IPV6);
  339. break;
  340. case SEG6_IPTUN_MODE_L2ENCAP:
  341. case SEG6_IPTUN_MODE_L2ENCAP_RED:
  342. if (!skb_mac_header_was_set(skb))
  343. return -EINVAL;
  344. if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
  345. return -ENOMEM;
  346. skb_mac_header_rebuild(skb);
  347. skb_push(skb, skb->mac_len);
  348. if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
  349. err = __seg6_do_srh_encap(skb, tinfo->srh,
  350. IPPROTO_ETHERNET,
  351. cache_dst);
  352. else
  353. err = seg6_do_srh_encap_red(skb, tinfo->srh,
  354. IPPROTO_ETHERNET,
  355. cache_dst);
  356. if (err)
  357. return err;
  358. skb->protocol = htons(ETH_P_IPV6);
  359. break;
  360. }
  361. skb_set_transport_header(skb, sizeof(struct ipv6hdr));
  362. nf_reset_ct(skb);
  363. return 0;
  364. }
  365. /* insert an SRH within an IPv6 packet, just after the IPv6 header */
  366. int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
  367. {
  368. return __seg6_do_srh_inline(skb, osrh, NULL);
  369. }
  370. EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
  371. static int seg6_input_finish(struct net *net, struct sock *sk,
  372. struct sk_buff *skb)
  373. {
  374. return dst_input(skb);
  375. }
  376. static int seg6_input_core(struct net *net, struct sock *sk,
  377. struct sk_buff *skb)
  378. {
  379. struct dst_entry *orig_dst = skb_dst(skb);
  380. struct dst_entry *dst = NULL;
  381. struct lwtunnel_state *lwtst;
  382. struct seg6_lwt *slwt;
  383. int err;
  384. /* We cannot dereference "orig_dst" once ip6_route_input() or
  385. * skb_dst_drop() is called. However, in order to detect a dst loop, we
  386. * need the address of its lwtstate. So, save the address of lwtstate
  387. * now and use it later as a comparison.
  388. */
  389. lwtst = orig_dst->lwtstate;
  390. slwt = seg6_lwt_lwtunnel(lwtst);
  391. local_bh_disable();
  392. dst = dst_cache_get(&slwt->cache_input);
  393. local_bh_enable();
  394. err = seg6_do_srh(skb, dst);
  395. if (unlikely(err)) {
  396. dst_release(dst);
  397. goto drop;
  398. }
  399. if (!dst) {
  400. ip6_route_input(skb);
  401. dst = skb_dst(skb);
  402. /* cache only if we don't create a dst reference loop */
  403. if (!dst->error && lwtst != dst->lwtstate) {
  404. local_bh_disable();
  405. dst_cache_set_ip6(&slwt->cache_input, dst,
  406. &ipv6_hdr(skb)->saddr);
  407. local_bh_enable();
  408. }
  409. err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
  410. if (unlikely(err))
  411. goto drop;
  412. } else {
  413. skb_dst_drop(skb);
  414. skb_dst_set(skb, dst);
  415. }
  416. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  417. return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
  418. dev_net(skb->dev), NULL, skb, NULL,
  419. skb_dst_dev(skb), seg6_input_finish);
  420. return seg6_input_finish(dev_net(skb->dev), NULL, skb);
  421. drop:
  422. kfree_skb(skb);
  423. return err;
  424. }
  425. static int seg6_input_nf(struct sk_buff *skb)
  426. {
  427. struct net_device *dev = skb_dst_dev(skb);
  428. struct net *net = dev_net(skb->dev);
  429. switch (skb->protocol) {
  430. case htons(ETH_P_IP):
  431. return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
  432. skb, NULL, dev, seg6_input_core);
  433. case htons(ETH_P_IPV6):
  434. return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
  435. skb, NULL, dev, seg6_input_core);
  436. }
  437. return -EINVAL;
  438. }
  439. static int seg6_input(struct sk_buff *skb)
  440. {
  441. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  442. return seg6_input_nf(skb);
  443. return seg6_input_core(dev_net(skb->dev), NULL, skb);
  444. }
  445. static int seg6_output_core(struct net *net, struct sock *sk,
  446. struct sk_buff *skb)
  447. {
  448. struct dst_entry *orig_dst = skb_dst(skb);
  449. struct dst_entry *dst = NULL;
  450. struct seg6_lwt *slwt;
  451. int err;
  452. slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
  453. local_bh_disable();
  454. dst = dst_cache_get(&slwt->cache_output);
  455. local_bh_enable();
  456. err = seg6_do_srh(skb, dst);
  457. if (unlikely(err))
  458. goto drop;
  459. if (unlikely(!dst)) {
  460. struct ipv6hdr *hdr = ipv6_hdr(skb);
  461. struct flowi6 fl6;
  462. memset(&fl6, 0, sizeof(fl6));
  463. fl6.daddr = hdr->daddr;
  464. fl6.saddr = hdr->saddr;
  465. fl6.flowlabel = ip6_flowinfo(hdr);
  466. fl6.flowi6_mark = skb->mark;
  467. fl6.flowi6_proto = hdr->nexthdr;
  468. dst = ip6_route_output(net, NULL, &fl6);
  469. if (dst->error) {
  470. err = dst->error;
  471. goto drop;
  472. }
  473. /* cache only if we don't create a dst reference loop */
  474. if (orig_dst->lwtstate != dst->lwtstate) {
  475. local_bh_disable();
  476. dst_cache_set_ip6(&slwt->cache_output, dst, &fl6.saddr);
  477. local_bh_enable();
  478. }
  479. err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
  480. if (unlikely(err))
  481. goto drop;
  482. }
  483. skb_dst_drop(skb);
  484. skb_dst_set(skb, dst);
  485. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  486. return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
  487. NULL, dst_dev(dst), dst_output);
  488. return dst_output(net, sk, skb);
  489. drop:
  490. dst_release(dst);
  491. kfree_skb(skb);
  492. return err;
  493. }
  494. static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
  495. {
  496. struct net_device *dev = skb_dst_dev(skb);
  497. switch (skb->protocol) {
  498. case htons(ETH_P_IP):
  499. return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
  500. NULL, dev, seg6_output_core);
  501. case htons(ETH_P_IPV6):
  502. return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
  503. NULL, dev, seg6_output_core);
  504. }
  505. return -EINVAL;
  506. }
  507. static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
  508. {
  509. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  510. return seg6_output_nf(net, sk, skb);
  511. return seg6_output_core(net, sk, skb);
  512. }
  513. static int seg6_build_state(struct net *net, struct nlattr *nla,
  514. unsigned int family, const void *cfg,
  515. struct lwtunnel_state **ts,
  516. struct netlink_ext_ack *extack)
  517. {
  518. struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
  519. struct seg6_iptunnel_encap *tuninfo;
  520. struct lwtunnel_state *newts;
  521. int tuninfo_len, min_size;
  522. struct seg6_lwt *slwt;
  523. int err;
  524. if (family != AF_INET && family != AF_INET6)
  525. return -EINVAL;
  526. err = nla_parse_nested_deprecated(tb, SEG6_IPTUNNEL_MAX, nla,
  527. seg6_iptunnel_policy, extack);
  528. if (err < 0)
  529. return err;
  530. if (!tb[SEG6_IPTUNNEL_SRH])
  531. return -EINVAL;
  532. tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
  533. tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
  534. /* tuninfo must contain at least the iptunnel encap structure,
  535. * the SRH and one segment
  536. */
  537. min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
  538. sizeof(struct in6_addr);
  539. if (tuninfo_len < min_size)
  540. return -EINVAL;
  541. switch (tuninfo->mode) {
  542. case SEG6_IPTUN_MODE_INLINE:
  543. if (family != AF_INET6)
  544. return -EINVAL;
  545. break;
  546. case SEG6_IPTUN_MODE_ENCAP:
  547. break;
  548. case SEG6_IPTUN_MODE_L2ENCAP:
  549. break;
  550. case SEG6_IPTUN_MODE_ENCAP_RED:
  551. break;
  552. case SEG6_IPTUN_MODE_L2ENCAP_RED:
  553. break;
  554. default:
  555. return -EINVAL;
  556. }
  557. /* verify that SRH is consistent */
  558. if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false))
  559. return -EINVAL;
  560. newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
  561. if (!newts)
  562. return -ENOMEM;
  563. slwt = seg6_lwt_lwtunnel(newts);
  564. err = dst_cache_init(&slwt->cache_input, GFP_ATOMIC);
  565. if (err)
  566. goto err_free_newts;
  567. err = dst_cache_init(&slwt->cache_output, GFP_ATOMIC);
  568. if (err)
  569. goto err_destroy_input;
  570. memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
  571. newts->type = LWTUNNEL_ENCAP_SEG6;
  572. newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
  573. if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
  574. newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
  575. newts->headroom = seg6_lwt_headroom(tuninfo);
  576. *ts = newts;
  577. return 0;
  578. err_destroy_input:
  579. dst_cache_destroy(&slwt->cache_input);
  580. err_free_newts:
  581. kfree(newts);
  582. return err;
  583. }
  584. static void seg6_destroy_state(struct lwtunnel_state *lwt)
  585. {
  586. struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwt);
  587. dst_cache_destroy(&slwt->cache_input);
  588. dst_cache_destroy(&slwt->cache_output);
  589. }
  590. static int seg6_fill_encap_info(struct sk_buff *skb,
  591. struct lwtunnel_state *lwtstate)
  592. {
  593. struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
  594. if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
  595. return -EMSGSIZE;
  596. return 0;
  597. }
  598. static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
  599. {
  600. struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
  601. return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
  602. }
  603. static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
  604. {
  605. struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
  606. struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
  607. int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
  608. if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
  609. return 1;
  610. return memcmp(a_hdr, b_hdr, len);
  611. }
  612. static const struct lwtunnel_encap_ops seg6_iptun_ops = {
  613. .build_state = seg6_build_state,
  614. .destroy_state = seg6_destroy_state,
  615. .output = seg6_output,
  616. .input = seg6_input,
  617. .fill_encap = seg6_fill_encap_info,
  618. .get_encap_size = seg6_encap_nlsize,
  619. .cmp_encap = seg6_encap_cmp,
  620. .owner = THIS_MODULE,
  621. };
  622. int __init seg6_iptunnel_init(void)
  623. {
  624. return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
  625. }
  626. void seg6_iptunnel_exit(void)
  627. {
  628. lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
  629. }