ip_tunnels.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef __NET_IP_TUNNELS_H
  3. #define __NET_IP_TUNNELS_H 1
  4. #include <linux/if_tunnel.h>
  5. #include <linux/netdevice.h>
  6. #include <linux/skbuff.h>
  7. #include <linux/socket.h>
  8. #include <linux/types.h>
  9. #include <linux/u64_stats_sync.h>
  10. #include <linux/bitops.h>
  11. #include <net/dsfield.h>
  12. #include <net/flow.h>
  13. #include <net/gro_cells.h>
  14. #include <net/inet_dscp.h>
  15. #include <net/inet_ecn.h>
  16. #include <net/netns/generic.h>
  17. #include <net/rtnetlink.h>
  18. #include <net/lwtunnel.h>
  19. #include <net/dst_cache.h>
  20. #include <net/netdev_lock.h>
  21. #if IS_ENABLED(CONFIG_IPV6)
  22. #include <net/ipv6.h>
  23. #include <net/ip6_fib.h>
  24. #include <net/ip6_route.h>
  25. #endif
  26. /* Recursion limit for tunnel xmit to detect routing loops.
  27. * Unlike XMIT_RECURSION_LIMIT (8) used in the no-qdisc path, tunnel
  28. * recursion involves route lookups and full IP output, consuming much
  29. * more stack per level, so a lower limit is needed.
  30. */
  31. #define IP_TUNNEL_RECURSION_LIMIT 5
  32. /* Keep error state on tunnel for 30 sec */
  33. #define IPTUNNEL_ERR_TIMEO (30*HZ)
  34. /* Used to memset ip_tunnel padding. */
  35. #define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst)
  36. /* Used to memset ipv4 address padding. */
  37. #define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst)
  38. #define IP_TUNNEL_KEY_IPV4_PAD_LEN \
  39. (sizeof_field(struct ip_tunnel_key, u) - \
  40. sizeof_field(struct ip_tunnel_key, u.ipv4))
  41. #define __ipt_flag_op(op, ...) \
  42. op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM)
  43. #define IP_TUNNEL_DECLARE_FLAGS(...) \
  44. __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__)
  45. #define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__)
  46. #define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__)
  47. #define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__)
  48. #define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__)
  49. #define ip_tunnel_flags_empty(...) \
  50. __ipt_flag_op(bitmap_empty, __VA_ARGS__)
  51. #define ip_tunnel_flags_intersect(...) \
  52. __ipt_flag_op(bitmap_intersects, __VA_ARGS__)
  53. #define ip_tunnel_flags_subset(...) \
  54. __ipt_flag_op(bitmap_subset, __VA_ARGS__)
  55. struct ip_tunnel_key {
  56. __be64 tun_id;
  57. union {
  58. struct {
  59. __be32 src;
  60. __be32 dst;
  61. } ipv4;
  62. struct {
  63. struct in6_addr src;
  64. struct in6_addr dst;
  65. } ipv6;
  66. } u;
  67. IP_TUNNEL_DECLARE_FLAGS(tun_flags);
  68. __be32 label; /* Flow Label for IPv6 */
  69. u32 nhid;
  70. u8 tos; /* TOS for IPv4, TC for IPv6 */
  71. u8 ttl; /* TTL for IPv4, HL for IPv6 */
  72. __be16 tp_src;
  73. __be16 tp_dst;
  74. __u8 flow_flags;
  75. };
  76. struct ip_tunnel_encap {
  77. u16 type;
  78. u16 flags;
  79. __be16 sport;
  80. __be16 dport;
  81. };
  82. /* Flags for ip_tunnel_info mode. */
  83. #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */
  84. #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */
  85. #define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */
  86. /* Maximum tunnel options length. */
  87. #define IP_TUNNEL_OPTS_MAX \
  88. GENMASK((sizeof_field(struct ip_tunnel_info, \
  89. options_len) * BITS_PER_BYTE) - 1, 0)
  90. #define ip_tunnel_info_opts(info) \
  91. _Generic(info, \
  92. const struct ip_tunnel_info * : ((const void *)(info)->options),\
  93. struct ip_tunnel_info * : ((void *)(info)->options)\
  94. )
  95. struct ip_tunnel_info {
  96. struct ip_tunnel_key key;
  97. struct ip_tunnel_encap encap;
  98. #ifdef CONFIG_DST_CACHE
  99. struct dst_cache dst_cache;
  100. #endif
  101. u8 options_len;
  102. u8 mode;
  103. u8 options[] __aligned_largest __counted_by(options_len);
  104. };
  105. /* 6rd prefix/relay information */
  106. #ifdef CONFIG_IPV6_SIT_6RD
  107. struct ip_tunnel_6rd_parm {
  108. struct in6_addr prefix;
  109. __be32 relay_prefix;
  110. u16 prefixlen;
  111. u16 relay_prefixlen;
  112. };
  113. #endif
  114. struct ip_tunnel_prl_entry {
  115. struct ip_tunnel_prl_entry __rcu *next;
  116. __be32 addr;
  117. u16 flags;
  118. struct rcu_head rcu_head;
  119. };
  120. struct metadata_dst;
  121. /* Kernel-side variant of ip_tunnel_parm */
  122. struct ip_tunnel_parm_kern {
  123. char name[IFNAMSIZ];
  124. IP_TUNNEL_DECLARE_FLAGS(i_flags);
  125. IP_TUNNEL_DECLARE_FLAGS(o_flags);
  126. __be32 i_key;
  127. __be32 o_key;
  128. int link;
  129. struct iphdr iph;
  130. };
  131. struct ip_tunnel {
  132. struct ip_tunnel __rcu *next;
  133. struct hlist_node hash_node;
  134. struct net_device *dev;
  135. netdevice_tracker dev_tracker;
  136. struct net *net; /* netns for packet i/o */
  137. unsigned long err_time; /* Time when the last ICMP error
  138. * arrived */
  139. int err_count; /* Number of arrived ICMP errors */
  140. /* These four fields used only by GRE */
  141. u32 i_seqno; /* The last seen seqno */
  142. atomic_t o_seqno; /* The last output seqno */
  143. int tun_hlen; /* Precalculated header length */
  144. /* These four fields used only by ERSPAN */
  145. u32 index; /* ERSPAN type II index */
  146. u8 erspan_ver; /* ERSPAN version */
  147. u8 dir; /* ERSPAN direction */
  148. u16 hwid; /* ERSPAN hardware ID */
  149. struct dst_cache dst_cache;
  150. struct ip_tunnel_parm_kern parms;
  151. int mlink;
  152. int encap_hlen; /* Encap header length (FOU,GUE) */
  153. int hlen; /* tun_hlen + encap_hlen */
  154. struct ip_tunnel_encap encap;
  155. /* for SIT */
  156. #ifdef CONFIG_IPV6_SIT_6RD
  157. struct ip_tunnel_6rd_parm ip6rd;
  158. #endif
  159. struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
  160. unsigned int prl_count; /* # of entries in PRL */
  161. unsigned int ip_tnl_net_id;
  162. struct gro_cells gro_cells;
  163. __u32 fwmark;
  164. bool collect_md;
  165. bool ignore_df;
  166. };
  167. struct tnl_ptk_info {
  168. IP_TUNNEL_DECLARE_FLAGS(flags);
  169. __be16 proto;
  170. __be32 key;
  171. __be32 seq;
  172. int hdr_len;
  173. };
  174. #define PACKET_RCVD 0
  175. #define PACKET_REJECT 1
  176. #define PACKET_NEXT 2
  177. #define IP_TNL_HASH_BITS 7
  178. #define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS)
  179. struct ip_tunnel_net {
  180. struct net_device *fb_tunnel_dev;
  181. struct rtnl_link_ops *rtnl_link_ops;
  182. struct hlist_head tunnels[IP_TNL_HASH_SIZE];
  183. struct ip_tunnel __rcu *collect_md_tun;
  184. int type;
  185. };
  186. static inline void ip_tunnel_set_options_present(unsigned long *flags)
  187. {
  188. IP_TUNNEL_DECLARE_FLAGS(present) = { };
  189. __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present);
  190. __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present);
  191. __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present);
  192. __set_bit(IP_TUNNEL_GTP_OPT_BIT, present);
  193. __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present);
  194. ip_tunnel_flags_or(flags, flags, present);
  195. }
  196. static inline void ip_tunnel_clear_options_present(unsigned long *flags)
  197. {
  198. IP_TUNNEL_DECLARE_FLAGS(present) = { };
  199. __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present);
  200. __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present);
  201. __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present);
  202. __set_bit(IP_TUNNEL_GTP_OPT_BIT, present);
  203. __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present);
  204. __ipt_flag_op(bitmap_andnot, flags, flags, present);
  205. }
  206. static inline bool ip_tunnel_is_options_present(const unsigned long *flags)
  207. {
  208. IP_TUNNEL_DECLARE_FLAGS(present) = { };
  209. __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present);
  210. __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present);
  211. __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present);
  212. __set_bit(IP_TUNNEL_GTP_OPT_BIT, present);
  213. __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present);
  214. return ip_tunnel_flags_intersect(flags, present);
  215. }
  216. static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags)
  217. {
  218. IP_TUNNEL_DECLARE_FLAGS(supp) = { };
  219. bitmap_set(supp, 0, BITS_PER_TYPE(__be16));
  220. __set_bit(IP_TUNNEL_VTI_BIT, supp);
  221. return ip_tunnel_flags_subset(flags, supp);
  222. }
  223. static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags)
  224. {
  225. ip_tunnel_flags_zero(dst);
  226. bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16));
  227. __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI);
  228. }
  229. static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags)
  230. {
  231. __be16 ret;
  232. ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16)));
  233. if (test_bit(IP_TUNNEL_VTI_BIT, flags))
  234. ret |= VTI_ISVTI;
  235. return ret;
  236. }
  237. static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
  238. __be32 saddr, __be32 daddr,
  239. u8 tos, u8 ttl, __be32 label,
  240. __be16 tp_src, __be16 tp_dst,
  241. __be64 tun_id,
  242. const unsigned long *tun_flags)
  243. {
  244. key->tun_id = tun_id;
  245. key->u.ipv4.src = saddr;
  246. key->u.ipv4.dst = daddr;
  247. memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD,
  248. 0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
  249. key->tos = tos;
  250. key->ttl = ttl;
  251. key->label = label;
  252. ip_tunnel_flags_copy(key->tun_flags, tun_flags);
  253. /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
  254. * the upper tunnel are used.
  255. * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
  256. */
  257. key->tp_src = tp_src;
  258. key->tp_dst = tp_dst;
  259. /* Clear struct padding. */
  260. if (sizeof(*key) != IP_TUNNEL_KEY_SIZE)
  261. memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE,
  262. 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
  263. }
  264. static inline bool
  265. ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
  266. const struct ip_tunnel_info *info)
  267. {
  268. if (skb->mark)
  269. return false;
  270. return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags);
  271. }
  272. static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
  273. *tun_info)
  274. {
  275. return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
  276. }
  277. static inline __be64 key32_to_tunnel_id(__be32 key)
  278. {
  279. #ifdef __BIG_ENDIAN
  280. return (__force __be64)key;
  281. #else
  282. return (__force __be64)((__force u64)key << 32);
  283. #endif
  284. }
  285. /* Returns the least-significant 32 bits of a __be64. */
  286. static inline __be32 tunnel_id_to_key32(__be64 tun_id)
  287. {
  288. #ifdef __BIG_ENDIAN
  289. return (__force __be32)tun_id;
  290. #else
  291. return (__force __be32)((__force u64)tun_id >> 32);
  292. #endif
  293. }
  294. #ifdef CONFIG_INET
  295. static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
  296. int proto,
  297. __be32 daddr, __be32 saddr,
  298. __be32 key, __u8 tos,
  299. struct net *net, int oif,
  300. __u32 mark, __u32 tun_inner_hash,
  301. __u8 flow_flags)
  302. {
  303. memset(fl4, 0, sizeof(*fl4));
  304. if (oif) {
  305. fl4->flowi4_l3mdev = l3mdev_master_upper_ifindex_by_index(net, oif);
  306. /* Legacy VRF/l3mdev use case */
  307. fl4->flowi4_oif = fl4->flowi4_l3mdev ? 0 : oif;
  308. }
  309. fl4->daddr = daddr;
  310. fl4->saddr = saddr;
  311. fl4->flowi4_dscp = inet_dsfield_to_dscp(tos);
  312. fl4->flowi4_proto = proto;
  313. fl4->fl4_gre_key = key;
  314. fl4->flowi4_mark = mark;
  315. fl4->flowi4_multipath_hash = tun_inner_hash;
  316. fl4->flowi4_flags = flow_flags;
  317. }
  318. int __ip_tunnel_init(struct net_device *dev);
  319. #define ip_tunnel_init(DEV) \
  320. ({ \
  321. struct net_device *__dev = (DEV); \
  322. int __res = __ip_tunnel_init(__dev); \
  323. \
  324. if (!__res) \
  325. netdev_lockdep_set_classes(__dev);\
  326. __res; \
  327. })
  328. void ip_tunnel_uninit(struct net_device *dev);
  329. void ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
  330. struct net *ip_tunnel_get_link_net(const struct net_device *dev);
  331. int ip_tunnel_get_iflink(const struct net_device *dev);
  332. int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
  333. struct rtnl_link_ops *ops, char *devname);
  334. void ip_tunnel_delete_net(struct net *net, unsigned int id,
  335. struct rtnl_link_ops *ops,
  336. struct list_head *dev_to_kill);
  337. void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
  338. const struct iphdr *tnl_params, const u8 protocol);
  339. void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
  340. const u8 proto, int tunnel_hlen);
  341. int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
  342. int cmd);
  343. bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
  344. const void __user *data);
  345. bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp);
  346. int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
  347. void __user *data, int cmd);
  348. int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
  349. int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
  350. struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
  351. int link, const unsigned long *flags,
  352. __be32 remote, __be32 local,
  353. __be32 key);
  354. void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info);
  355. int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
  356. const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
  357. bool log_ecn_error);
  358. int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
  359. struct ip_tunnel_parm_kern *p, __u32 fwmark);
  360. int ip_tunnel_newlink(struct net *net, struct net_device *dev,
  361. struct nlattr *tb[], struct ip_tunnel_parm_kern *p,
  362. __u32 fwmark);
  363. void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
  364. bool ip_tunnel_netlink_encap_parms(struct nlattr *data[],
  365. struct ip_tunnel_encap *encap);
  366. void ip_tunnel_netlink_parms(struct nlattr *data[],
  367. struct ip_tunnel_parm_kern *parms);
  368. extern const struct header_ops ip_tunnel_header_ops;
  369. __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb);
  370. struct ip_tunnel_encap_ops {
  371. size_t (*encap_hlen)(struct ip_tunnel_encap *e);
  372. int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
  373. u8 *protocol, struct flowi4 *fl4);
  374. int (*err_handler)(struct sk_buff *skb, u32 info);
  375. };
  376. #define MAX_IPTUN_ENCAP_OPS 8
  377. extern const struct ip_tunnel_encap_ops __rcu *
  378. iptun_encaps[MAX_IPTUN_ENCAP_OPS];
  379. int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op,
  380. unsigned int num);
  381. int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
  382. unsigned int num);
  383. int ip_tunnel_encap_setup(struct ip_tunnel *t,
  384. struct ip_tunnel_encap *ipencap);
  385. static inline enum skb_drop_reason
  386. pskb_inet_may_pull_reason(struct sk_buff *skb)
  387. {
  388. int nhlen;
  389. switch (skb->protocol) {
  390. #if IS_ENABLED(CONFIG_IPV6)
  391. case htons(ETH_P_IPV6):
  392. nhlen = sizeof(struct ipv6hdr);
  393. break;
  394. #endif
  395. case htons(ETH_P_IP):
  396. nhlen = sizeof(struct iphdr);
  397. break;
  398. default:
  399. nhlen = 0;
  400. }
  401. return pskb_network_may_pull_reason(skb, nhlen);
  402. }
  403. static inline bool pskb_inet_may_pull(struct sk_buff *skb)
  404. {
  405. return pskb_inet_may_pull_reason(skb) == SKB_NOT_DROPPED_YET;
  406. }
  407. /* Variant of pskb_inet_may_pull().
  408. */
  409. static inline enum skb_drop_reason
  410. skb_vlan_inet_prepare(struct sk_buff *skb, bool inner_proto_inherit)
  411. {
  412. int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN;
  413. __be16 type = skb->protocol;
  414. enum skb_drop_reason reason;
  415. /* Essentially this is skb_protocol(skb, true)
  416. * And we get MAC len.
  417. */
  418. if (eth_type_vlan(type))
  419. type = __vlan_get_protocol(skb, type, &maclen);
  420. switch (type) {
  421. #if IS_ENABLED(CONFIG_IPV6)
  422. case htons(ETH_P_IPV6):
  423. nhlen = sizeof(struct ipv6hdr);
  424. break;
  425. #endif
  426. case htons(ETH_P_IP):
  427. nhlen = sizeof(struct iphdr);
  428. break;
  429. }
  430. /* For ETH_P_IPV6/ETH_P_IP we make sure to pull
  431. * a base network header in skb->head.
  432. */
  433. reason = pskb_may_pull_reason(skb, maclen + nhlen);
  434. if (reason)
  435. return reason;
  436. skb_set_network_header(skb, maclen);
  437. return SKB_NOT_DROPPED_YET;
  438. }
  439. static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
  440. {
  441. const struct ip_tunnel_encap_ops *ops;
  442. int hlen = -EINVAL;
  443. if (e->type == TUNNEL_ENCAP_NONE)
  444. return 0;
  445. if (e->type >= MAX_IPTUN_ENCAP_OPS)
  446. return -EINVAL;
  447. rcu_read_lock();
  448. ops = rcu_dereference(iptun_encaps[e->type]);
  449. if (likely(ops && ops->encap_hlen))
  450. hlen = ops->encap_hlen(e);
  451. rcu_read_unlock();
  452. return hlen;
  453. }
  454. static inline int ip_tunnel_encap(struct sk_buff *skb,
  455. struct ip_tunnel_encap *e,
  456. u8 *protocol, struct flowi4 *fl4)
  457. {
  458. const struct ip_tunnel_encap_ops *ops;
  459. int ret = -EINVAL;
  460. if (e->type == TUNNEL_ENCAP_NONE)
  461. return 0;
  462. if (e->type >= MAX_IPTUN_ENCAP_OPS)
  463. return -EINVAL;
  464. rcu_read_lock();
  465. ops = rcu_dereference(iptun_encaps[e->type]);
  466. if (likely(ops && ops->build_header))
  467. ret = ops->build_header(skb, e, protocol, fl4);
  468. rcu_read_unlock();
  469. return ret;
  470. }
  471. /* Extract dsfield from inner protocol */
  472. static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
  473. const struct sk_buff *skb)
  474. {
  475. __be16 payload_protocol = skb_protocol(skb, true);
  476. if (payload_protocol == htons(ETH_P_IP))
  477. return iph->tos;
  478. else if (payload_protocol == htons(ETH_P_IPV6))
  479. return ipv6_get_dsfield((const struct ipv6hdr *)iph);
  480. else
  481. return 0;
  482. }
  483. static inline __be32 ip_tunnel_get_flowlabel(const struct iphdr *iph,
  484. const struct sk_buff *skb)
  485. {
  486. __be16 payload_protocol = skb_protocol(skb, true);
  487. if (payload_protocol == htons(ETH_P_IPV6))
  488. return ip6_flowlabel((const struct ipv6hdr *)iph);
  489. else
  490. return 0;
  491. }
  492. static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
  493. const struct sk_buff *skb)
  494. {
  495. __be16 payload_protocol = skb_protocol(skb, true);
  496. if (payload_protocol == htons(ETH_P_IP))
  497. return iph->ttl;
  498. else if (payload_protocol == htons(ETH_P_IPV6))
  499. return ((const struct ipv6hdr *)iph)->hop_limit;
  500. else
  501. return 0;
  502. }
  503. /* Propagate ECN bits out */
  504. static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
  505. const struct sk_buff *skb)
  506. {
  507. u8 inner = ip_tunnel_get_dsfield(iph, skb);
  508. return INET_ECN_encapsulate(tos, inner);
  509. }
  510. int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
  511. __be16 inner_proto, bool raw_proto, bool xnet);
  512. static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
  513. __be16 inner_proto, bool xnet)
  514. {
  515. return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet);
  516. }
  517. void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
  518. __be32 src, __be32 dst, u8 proto,
  519. u8 tos, u8 ttl, __be16 df, bool xnet, u16 ipcb_flags);
  520. struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
  521. gfp_t flags);
  522. int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
  523. int headroom, bool reply);
  524. static inline void ip_tunnel_adj_headroom(struct net_device *dev,
  525. unsigned int headroom)
  526. {
  527. /* we must cap headroom to some upperlimit, else pskb_expand_head
  528. * will overflow header offsets in skb_headers_offset_update().
  529. */
  530. const unsigned int max_allowed = 512;
  531. if (headroom > max_allowed)
  532. headroom = max_allowed;
  533. if (headroom > READ_ONCE(dev->needed_headroom))
  534. WRITE_ONCE(dev->needed_headroom, headroom);
  535. }
  536. int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
  537. static inline int iptunnel_pull_offloads(struct sk_buff *skb)
  538. {
  539. if (skb_is_gso(skb)) {
  540. int err;
  541. err = skb_unclone(skb, GFP_ATOMIC);
  542. if (unlikely(err))
  543. return err;
  544. skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
  545. NETIF_F_GSO_SHIFT);
  546. }
  547. skb->encapsulation = 0;
  548. return 0;
  549. }
  550. static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
  551. {
  552. if (pkt_len > 0) {
  553. if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_DSTATS) {
  554. struct pcpu_dstats *dstats = get_cpu_ptr(dev->dstats);
  555. u64_stats_update_begin(&dstats->syncp);
  556. u64_stats_add(&dstats->tx_bytes, pkt_len);
  557. u64_stats_inc(&dstats->tx_packets);
  558. u64_stats_update_end(&dstats->syncp);
  559. put_cpu_ptr(dstats);
  560. return;
  561. }
  562. if (dev->pcpu_stat_type == NETDEV_PCPU_STAT_TSTATS) {
  563. struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
  564. u64_stats_update_begin(&tstats->syncp);
  565. u64_stats_add(&tstats->tx_bytes, pkt_len);
  566. u64_stats_inc(&tstats->tx_packets);
  567. u64_stats_update_end(&tstats->syncp);
  568. put_cpu_ptr(tstats);
  569. return;
  570. }
  571. pr_err_once("iptunnel_xmit_stats pcpu_stat_type=%d\n",
  572. dev->pcpu_stat_type);
  573. WARN_ON_ONCE(1);
  574. return;
  575. }
  576. if (pkt_len < 0) {
  577. DEV_STATS_INC(dev, tx_errors);
  578. DEV_STATS_INC(dev, tx_aborted_errors);
  579. } else {
  580. DEV_STATS_INC(dev, tx_dropped);
  581. }
  582. }
  583. static inline void ip_tunnel_info_opts_get(void *to,
  584. const struct ip_tunnel_info *info)
  585. {
  586. memcpy(to, ip_tunnel_info_opts(info), info->options_len);
  587. }
  588. static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
  589. const void *from, int len,
  590. const unsigned long *flags)
  591. {
  592. info->options_len = len;
  593. if (len > 0) {
  594. memcpy(ip_tunnel_info_opts(info), from, len);
  595. ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags,
  596. flags);
  597. }
  598. }
  599. static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
  600. {
  601. return (struct ip_tunnel_info *)lwtstate->data;
  602. }
  603. DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt);
  604. /* Returns > 0 if metadata should be collected */
  605. static inline int ip_tunnel_collect_metadata(void)
  606. {
  607. return static_branch_unlikely(&ip_tunnel_metadata_cnt);
  608. }
  609. void __init ip_tunnel_core_init(void);
  610. void ip_tunnel_need_metadata(void);
  611. void ip_tunnel_unneed_metadata(void);
  612. #else /* CONFIG_INET */
  613. static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
  614. {
  615. return NULL;
  616. }
  617. static inline void ip_tunnel_need_metadata(void)
  618. {
  619. }
  620. static inline void ip_tunnel_unneed_metadata(void)
  621. {
  622. }
  623. static inline void ip_tunnel_info_opts_get(void *to,
  624. const struct ip_tunnel_info *info)
  625. {
  626. }
  627. static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
  628. const void *from, int len,
  629. const unsigned long *flags)
  630. {
  631. info->options_len = 0;
  632. }
  633. #endif /* CONFIG_INET */
  634. #endif /* __NET_IP_TUNNELS_H */