geneve.c 65 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * GENEVE: Generic Network Virtualization Encapsulation
  4. *
  5. * Copyright (c) 2015 Red Hat, Inc.
  6. */
  7. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  8. #include <linux/ethtool.h>
  9. #include <linux/kernel.h>
  10. #include <linux/module.h>
  11. #include <linux/etherdevice.h>
  12. #include <linux/hash.h>
  13. #include <net/ipv6_stubs.h>
  14. #include <net/dst_metadata.h>
  15. #include <net/gro_cells.h>
  16. #include <net/rtnetlink.h>
  17. #include <net/geneve.h>
  18. #include <net/gro.h>
  19. #include <net/netdev_lock.h>
  20. #include <net/protocol.h>
  21. #define GENEVE_NETDEV_VER "0.6"
  22. #define GENEVE_N_VID (1u << 24)
  23. #define GENEVE_VID_MASK (GENEVE_N_VID - 1)
  24. #define VNI_HASH_BITS 10
  25. #define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
  26. static bool log_ecn_error = true;
  27. module_param(log_ecn_error, bool, 0644);
  28. MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
  29. #define GENEVE_VER 0
  30. #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
  31. #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
  32. #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
  33. #define GENEVE_OPT_NETDEV_CLASS 0x100
  34. #define GENEVE_OPT_GRO_HINT_SIZE 8
  35. #define GENEVE_OPT_GRO_HINT_TYPE 1
  36. #define GENEVE_OPT_GRO_HINT_LEN 1
  37. struct geneve_opt_gro_hint {
  38. u8 inner_proto_id:2,
  39. nested_is_v6:1;
  40. u8 nested_nh_offset;
  41. u8 nested_tp_offset;
  42. u8 nested_hdr_len;
  43. };
  44. struct geneve_skb_cb {
  45. unsigned int gro_hint_len;
  46. struct geneve_opt_gro_hint gro_hint;
  47. };
  48. #define GENEVE_SKB_CB(__skb) ((struct geneve_skb_cb *)&((__skb)->cb[0]))
  49. /* per-network namespace private data for this module */
  50. struct geneve_net {
  51. struct list_head geneve_list;
  52. /* sock_list is protected by rtnl lock */
  53. struct list_head sock_list;
  54. };
  55. static unsigned int geneve_net_id;
  56. struct geneve_dev_node {
  57. struct hlist_node hlist;
  58. struct geneve_dev *geneve;
  59. };
  60. struct geneve_config {
  61. bool collect_md;
  62. bool use_udp6_rx_checksums;
  63. bool ttl_inherit;
  64. bool gro_hint;
  65. enum ifla_geneve_df df;
  66. bool inner_proto_inherit;
  67. u16 port_min;
  68. u16 port_max;
  69. /* Must be last --ends in a flexible-array member. */
  70. struct ip_tunnel_info info;
  71. };
  72. /* Pseudo network device */
  73. struct geneve_dev {
  74. struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */
  75. #if IS_ENABLED(CONFIG_IPV6)
  76. struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */
  77. #endif
  78. struct net *net; /* netns for packet i/o */
  79. struct net_device *dev; /* netdev for geneve tunnel */
  80. struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */
  81. #if IS_ENABLED(CONFIG_IPV6)
  82. struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */
  83. #endif
  84. struct list_head next; /* geneve's per namespace list */
  85. struct gro_cells gro_cells;
  86. struct geneve_config cfg;
  87. };
  88. struct geneve_sock {
  89. bool collect_md;
  90. bool gro_hint;
  91. struct list_head list;
  92. struct socket *sock;
  93. struct rcu_head rcu;
  94. int refcnt;
  95. struct hlist_head vni_list[VNI_HASH_SIZE];
  96. };
  97. static const __be16 proto_id_map[] = { htons(ETH_P_TEB),
  98. htons(ETH_P_IPV6),
  99. htons(ETH_P_IP) };
  100. static int proto_to_id(__be16 proto)
  101. {
  102. int i;
  103. for (i = 0; i < ARRAY_SIZE(proto_id_map); i++)
  104. if (proto_id_map[i] == proto)
  105. return i;
  106. return -1;
  107. }
  108. static inline __u32 geneve_net_vni_hash(u8 vni[3])
  109. {
  110. __u32 vnid;
  111. vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
  112. return hash_32(vnid, VNI_HASH_BITS);
  113. }
  114. static __be64 vni_to_tunnel_id(const __u8 *vni)
  115. {
  116. #ifdef __BIG_ENDIAN
  117. return (vni[0] << 16) | (vni[1] << 8) | vni[2];
  118. #else
  119. return (__force __be64)(((__force u64)vni[0] << 40) |
  120. ((__force u64)vni[1] << 48) |
  121. ((__force u64)vni[2] << 56));
  122. #endif
  123. }
  124. /* Convert 64 bit tunnel ID to 24 bit VNI. */
  125. static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
  126. {
  127. #ifdef __BIG_ENDIAN
  128. vni[0] = (__force __u8)(tun_id >> 16);
  129. vni[1] = (__force __u8)(tun_id >> 8);
  130. vni[2] = (__force __u8)tun_id;
  131. #else
  132. vni[0] = (__force __u8)((__force u64)tun_id >> 40);
  133. vni[1] = (__force __u8)((__force u64)tun_id >> 48);
  134. vni[2] = (__force __u8)((__force u64)tun_id >> 56);
  135. #endif
  136. }
  137. static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
  138. {
  139. return !memcmp(vni, &tun_id[5], 3);
  140. }
  141. static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
  142. {
  143. return gs->sock->sk->sk_family;
  144. }
  145. static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
  146. __be32 addr, u8 vni[])
  147. {
  148. struct hlist_head *vni_list_head;
  149. struct geneve_dev_node *node;
  150. __u32 hash;
  151. /* Find the device for this VNI */
  152. hash = geneve_net_vni_hash(vni);
  153. vni_list_head = &gs->vni_list[hash];
  154. hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
  155. if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
  156. addr == node->geneve->cfg.info.key.u.ipv4.dst)
  157. return node->geneve;
  158. }
  159. return NULL;
  160. }
  161. #if IS_ENABLED(CONFIG_IPV6)
  162. static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
  163. struct in6_addr addr6, u8 vni[])
  164. {
  165. struct hlist_head *vni_list_head;
  166. struct geneve_dev_node *node;
  167. __u32 hash;
  168. /* Find the device for this VNI */
  169. hash = geneve_net_vni_hash(vni);
  170. vni_list_head = &gs->vni_list[hash];
  171. hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
  172. if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
  173. ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst))
  174. return node->geneve;
  175. }
  176. return NULL;
  177. }
  178. #endif
  179. static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
  180. {
  181. return (struct genevehdr *)(udp_hdr(skb) + 1);
  182. }
  183. static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
  184. struct sk_buff *skb)
  185. {
  186. static u8 zero_vni[3];
  187. u8 *vni;
  188. if (geneve_get_sk_family(gs) == AF_INET) {
  189. struct iphdr *iph;
  190. __be32 addr;
  191. iph = ip_hdr(skb); /* outer IP header... */
  192. if (gs->collect_md) {
  193. vni = zero_vni;
  194. addr = 0;
  195. } else {
  196. vni = geneve_hdr(skb)->vni;
  197. addr = iph->saddr;
  198. }
  199. return geneve_lookup(gs, addr, vni);
  200. #if IS_ENABLED(CONFIG_IPV6)
  201. } else if (geneve_get_sk_family(gs) == AF_INET6) {
  202. static struct in6_addr zero_addr6;
  203. struct ipv6hdr *ip6h;
  204. struct in6_addr addr6;
  205. ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
  206. if (gs->collect_md) {
  207. vni = zero_vni;
  208. addr6 = zero_addr6;
  209. } else {
  210. vni = geneve_hdr(skb)->vni;
  211. addr6 = ip6h->saddr;
  212. }
  213. return geneve6_lookup(gs, addr6, vni);
  214. #endif
  215. }
  216. return NULL;
  217. }
  218. /* geneve receive/decap routine */
  219. static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
  220. struct sk_buff *skb, const struct genevehdr *gnvh)
  221. {
  222. struct metadata_dst *tun_dst = NULL;
  223. unsigned int len;
  224. int nh, err = 0;
  225. void *oiph;
  226. if (ip_tunnel_collect_metadata() || gs->collect_md) {
  227. IP_TUNNEL_DECLARE_FLAGS(flags) = { };
  228. __set_bit(IP_TUNNEL_KEY_BIT, flags);
  229. __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam);
  230. __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical);
  231. tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
  232. vni_to_tunnel_id(gnvh->vni),
  233. gnvh->opt_len * 4);
  234. if (!tun_dst) {
  235. dev_dstats_rx_dropped(geneve->dev);
  236. goto drop;
  237. }
  238. /* Update tunnel dst according to Geneve options. */
  239. ip_tunnel_flags_zero(flags);
  240. __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags);
  241. ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
  242. gnvh->options, gnvh->opt_len * 4,
  243. flags);
  244. } else {
  245. /* Drop packets w/ critical options,
  246. * since we don't support any...
  247. */
  248. if (gnvh->critical) {
  249. DEV_STATS_INC(geneve->dev, rx_frame_errors);
  250. DEV_STATS_INC(geneve->dev, rx_errors);
  251. goto drop;
  252. }
  253. }
  254. if (tun_dst)
  255. skb_dst_set(skb, &tun_dst->dst);
  256. if (gnvh->proto_type == htons(ETH_P_TEB)) {
  257. skb_reset_mac_header(skb);
  258. skb->protocol = eth_type_trans(skb, geneve->dev);
  259. skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
  260. /* Ignore packet loops (and multicast echo) */
  261. if (ether_addr_equal(eth_hdr(skb)->h_source,
  262. geneve->dev->dev_addr)) {
  263. DEV_STATS_INC(geneve->dev, rx_errors);
  264. goto drop;
  265. }
  266. } else {
  267. skb_reset_mac_header(skb);
  268. skb->dev = geneve->dev;
  269. skb->pkt_type = PACKET_HOST;
  270. }
  271. /* Save offset of outer header relative to skb->head,
  272. * because we are going to reset the network header to the inner header
  273. * and might change skb->head.
  274. */
  275. nh = skb_network_header(skb) - skb->head;
  276. skb_reset_network_header(skb);
  277. if (!pskb_inet_may_pull(skb)) {
  278. DEV_STATS_INC(geneve->dev, rx_length_errors);
  279. DEV_STATS_INC(geneve->dev, rx_errors);
  280. goto drop;
  281. }
  282. /* Get the outer header. */
  283. oiph = skb->head + nh;
  284. if (geneve_get_sk_family(gs) == AF_INET)
  285. err = IP_ECN_decapsulate(oiph, skb);
  286. #if IS_ENABLED(CONFIG_IPV6)
  287. else
  288. err = IP6_ECN_decapsulate(oiph, skb);
  289. #endif
  290. if (unlikely(err)) {
  291. if (log_ecn_error) {
  292. if (geneve_get_sk_family(gs) == AF_INET)
  293. net_info_ratelimited("non-ECT from %pI4 "
  294. "with TOS=%#x\n",
  295. &((struct iphdr *)oiph)->saddr,
  296. ((struct iphdr *)oiph)->tos);
  297. #if IS_ENABLED(CONFIG_IPV6)
  298. else
  299. net_info_ratelimited("non-ECT from %pI6\n",
  300. &((struct ipv6hdr *)oiph)->saddr);
  301. #endif
  302. }
  303. if (err > 1) {
  304. DEV_STATS_INC(geneve->dev, rx_frame_errors);
  305. DEV_STATS_INC(geneve->dev, rx_errors);
  306. goto drop;
  307. }
  308. }
  309. /* Skip the additional GRO stage when hints are in use. */
  310. len = skb->len;
  311. if (skb->encapsulation)
  312. err = netif_rx(skb);
  313. else
  314. err = gro_cells_receive(&geneve->gro_cells, skb);
  315. if (likely(err == NET_RX_SUCCESS))
  316. dev_dstats_rx_add(geneve->dev, len);
  317. return;
  318. drop:
  319. /* Consume bad packet */
  320. kfree_skb(skb);
  321. }
  322. /* Setup stats when device is created */
  323. static int geneve_init(struct net_device *dev)
  324. {
  325. struct geneve_dev *geneve = netdev_priv(dev);
  326. int err;
  327. err = gro_cells_init(&geneve->gro_cells, dev);
  328. if (err)
  329. return err;
  330. err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL);
  331. if (err) {
  332. gro_cells_destroy(&geneve->gro_cells);
  333. return err;
  334. }
  335. netdev_lockdep_set_classes(dev);
  336. return 0;
  337. }
  338. static void geneve_uninit(struct net_device *dev)
  339. {
  340. struct geneve_dev *geneve = netdev_priv(dev);
  341. dst_cache_destroy(&geneve->cfg.info.dst_cache);
  342. gro_cells_destroy(&geneve->gro_cells);
  343. }
  344. static int geneve_hlen(const struct genevehdr *gh)
  345. {
  346. return sizeof(*gh) + gh->opt_len * 4;
  347. }
  348. /*
  349. * Look for GRO hint in the genenve options; if not found or does not pass basic
  350. * sanitization return 0, otherwise the offset WRT the geneve hdr start.
  351. */
  352. static unsigned int
  353. geneve_opt_gro_hint_off(const struct genevehdr *gh, __be16 *type,
  354. unsigned int *gh_len)
  355. {
  356. struct geneve_opt *opt = (void *)(gh + 1);
  357. unsigned int id, opt_len = gh->opt_len;
  358. struct geneve_opt_gro_hint *gro_hint;
  359. while (opt_len >= (GENEVE_OPT_GRO_HINT_SIZE >> 2)) {
  360. if (opt->opt_class == htons(GENEVE_OPT_NETDEV_CLASS) &&
  361. opt->type == GENEVE_OPT_GRO_HINT_TYPE &&
  362. opt->length == GENEVE_OPT_GRO_HINT_LEN)
  363. goto found;
  364. /* check for bad opt len */
  365. if (opt->length + 1 >= opt_len)
  366. return 0;
  367. /* next opt */
  368. opt_len -= opt->length + 1;
  369. opt = ((void *)opt) + ((opt->length + 1) << 2);
  370. }
  371. return 0;
  372. found:
  373. gro_hint = (struct geneve_opt_gro_hint *)opt->opt_data;
  374. /*
  375. * Sanitize the hinted hdrs: the nested transport is UDP and must fit
  376. * the overall hinted hdr size.
  377. */
  378. if (gro_hint->nested_tp_offset + sizeof(struct udphdr) >
  379. gro_hint->nested_hdr_len)
  380. return 0;
  381. if (gro_hint->nested_nh_offset +
  382. (gro_hint->nested_is_v6 ? sizeof(struct ipv6hdr) :
  383. sizeof(struct iphdr)) >
  384. gro_hint->nested_tp_offset)
  385. return 0;
  386. /* Allow only supported L2. */
  387. id = gro_hint->inner_proto_id;
  388. if (id >= ARRAY_SIZE(proto_id_map))
  389. return 0;
  390. *type = proto_id_map[id];
  391. *gh_len += gro_hint->nested_hdr_len;
  392. return (void *)gro_hint - (void *)gh;
  393. }
  394. static const struct geneve_opt_gro_hint *
  395. geneve_opt_gro_hint(const struct genevehdr *gh, unsigned int hint_off)
  396. {
  397. return (const struct geneve_opt_gro_hint *)((void *)gh + hint_off);
  398. }
  399. static unsigned int
  400. geneve_sk_gro_hint_off(const struct sock *sk, const struct genevehdr *gh,
  401. __be16 *type, unsigned int *gh_len)
  402. {
  403. const struct geneve_sock *gs = rcu_dereference_sk_user_data(sk);
  404. if (!gs || !gs->gro_hint)
  405. return 0;
  406. return geneve_opt_gro_hint_off(gh, type, gh_len);
  407. }
  408. /* Validate the packet headers pointed by data WRT the provided hint */
  409. static bool
  410. geneve_opt_gro_hint_validate(void *data,
  411. const struct geneve_opt_gro_hint *gro_hint)
  412. {
  413. void *nested_nh = data + gro_hint->nested_nh_offset;
  414. struct iphdr *iph;
  415. if (gro_hint->nested_is_v6) {
  416. struct ipv6hdr *ipv6h = nested_nh;
  417. struct ipv6_opt_hdr *opth;
  418. int offset, len;
  419. if (ipv6h->nexthdr == IPPROTO_UDP)
  420. return true;
  421. offset = sizeof(*ipv6h) + gro_hint->nested_nh_offset;
  422. while (offset + sizeof(*opth) <= gro_hint->nested_tp_offset) {
  423. opth = data + offset;
  424. len = ipv6_optlen(opth);
  425. if (len + offset > gro_hint->nested_tp_offset)
  426. return false;
  427. if (opth->nexthdr == IPPROTO_UDP)
  428. return true;
  429. offset += len;
  430. }
  431. return false;
  432. }
  433. iph = nested_nh;
  434. if (*(u8 *)iph != 0x45 || ip_is_fragment(iph) ||
  435. iph->protocol != IPPROTO_UDP || ip_fast_csum((u8 *)iph, 5))
  436. return false;
  437. return true;
  438. }
  439. /*
  440. * Validate the skb headers following the specified geneve hdr vs the
  441. * provided hint, including nested L4 checksum.
  442. * The caller already ensured that the relevant amount of data is available
  443. * in the linear part.
  444. */
  445. static bool
  446. geneve_opt_gro_hint_validate_csum(const struct sk_buff *skb,
  447. const struct genevehdr *gh,
  448. const struct geneve_opt_gro_hint *gro_hint)
  449. {
  450. unsigned int plen, gh_len = geneve_hlen(gh);
  451. void *nested = (void *)gh + gh_len;
  452. struct udphdr *nested_uh;
  453. unsigned int nested_len;
  454. struct ipv6hdr *ipv6h;
  455. struct iphdr *iph;
  456. __wsum csum, psum;
  457. if (!geneve_opt_gro_hint_validate(nested, gro_hint))
  458. return false;
  459. /* Use GRO hints with nested csum only if the outer header has csum. */
  460. nested_uh = nested + gro_hint->nested_tp_offset;
  461. if (!nested_uh->check || skb->ip_summed == CHECKSUM_PARTIAL)
  462. return true;
  463. if (!NAPI_GRO_CB(skb)->csum_valid)
  464. return false;
  465. /* Compute the complete checksum up to the nested transport. */
  466. plen = gh_len + gro_hint->nested_tp_offset;
  467. csum = csum_sub(NAPI_GRO_CB(skb)->csum, csum_partial(gh, plen, 0));
  468. nested_len = skb_gro_len(skb) - plen;
  469. /* Compute the nested pseudo header csum. */
  470. ipv6h = nested + gro_hint->nested_nh_offset;
  471. iph = (struct iphdr *)ipv6h;
  472. psum = gro_hint->nested_is_v6 ?
  473. ~csum_unfold(csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
  474. nested_len, IPPROTO_UDP, 0)) :
  475. csum_tcpudp_nofold(iph->saddr, iph->daddr,
  476. nested_len, IPPROTO_UDP, 0);
  477. return !csum_fold(csum_add(psum, csum));
  478. }
  479. static int geneve_post_decap_hint(const struct sock *sk, struct sk_buff *skb,
  480. unsigned int gh_len,
  481. struct genevehdr **geneveh)
  482. {
  483. const struct geneve_opt_gro_hint *gro_hint;
  484. unsigned int len, total_len, hint_off;
  485. struct ipv6hdr *ipv6h;
  486. struct iphdr *iph;
  487. struct udphdr *uh;
  488. __be16 p;
  489. hint_off = geneve_sk_gro_hint_off(sk, *geneveh, &p, &len);
  490. if (!hint_off)
  491. return 0;
  492. if (!skb_is_gso(skb))
  493. return 0;
  494. gro_hint = geneve_opt_gro_hint(*geneveh, hint_off);
  495. if (unlikely(!pskb_may_pull(skb, gro_hint->nested_hdr_len)))
  496. return -ENOMEM;
  497. *geneveh = geneve_hdr(skb);
  498. gro_hint = geneve_opt_gro_hint(*geneveh, hint_off);
  499. /*
  500. * Validate hints from untrusted source before accessing
  501. * the headers; csum will be checked later by the nested
  502. * protocol rx path.
  503. */
  504. if (unlikely(skb_shinfo(skb)->gso_type & SKB_GSO_DODGY &&
  505. !geneve_opt_gro_hint_validate(skb->data, gro_hint)))
  506. return -EINVAL;
  507. ipv6h = (void *)skb->data + gro_hint->nested_nh_offset;
  508. iph = (struct iphdr *)ipv6h;
  509. total_len = skb->len - gro_hint->nested_nh_offset;
  510. if (total_len > GRO_LEGACY_MAX_SIZE)
  511. return -E2BIG;
  512. /*
  513. * After stripping the outer encap, the packet still carries a
  514. * tunnel encapsulation: the nested one.
  515. */
  516. skb->encapsulation = 1;
  517. /* GSO expect a valid transpor header, move it to the current one. */
  518. skb_set_transport_header(skb, gro_hint->nested_tp_offset);
  519. /* Adjust the nested IP{6} hdr to actual GSO len. */
  520. if (gro_hint->nested_is_v6) {
  521. ipv6h->payload_len = htons(total_len - sizeof(*ipv6h));
  522. } else {
  523. __be16 old_len = iph->tot_len;
  524. iph->tot_len = htons(total_len);
  525. /* For IPv4 additionally adjust the nested csum. */
  526. csum_replace2(&iph->check, old_len, iph->tot_len);
  527. ip_send_check(iph);
  528. }
  529. /* Adjust the nested UDP header len and checksum. */
  530. uh = udp_hdr(skb);
  531. uh->len = htons(skb->len - gro_hint->nested_tp_offset);
  532. if (uh->check) {
  533. len = skb->len - gro_hint->nested_nh_offset;
  534. skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
  535. if (gro_hint->nested_is_v6)
  536. uh->check = ~udp_v6_check(len, &ipv6h->saddr,
  537. &ipv6h->daddr, 0);
  538. else
  539. uh->check = ~udp_v4_check(len, iph->saddr,
  540. iph->daddr, 0);
  541. } else {
  542. skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
  543. }
  544. return 0;
  545. }
  546. /* Callback from net/ipv4/udp.c to receive packets */
  547. static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
  548. {
  549. struct genevehdr *geneveh;
  550. struct geneve_dev *geneve;
  551. struct geneve_sock *gs;
  552. __be16 inner_proto;
  553. int opts_len;
  554. /* Need UDP and Geneve header to be present */
  555. if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
  556. goto drop;
  557. /* Return packets with reserved bits set */
  558. geneveh = geneve_hdr(skb);
  559. if (unlikely(geneveh->ver != GENEVE_VER))
  560. goto drop;
  561. gs = rcu_dereference_sk_user_data(sk);
  562. if (!gs)
  563. goto drop;
  564. geneve = geneve_lookup_skb(gs, skb);
  565. if (!geneve)
  566. goto drop;
  567. inner_proto = geneveh->proto_type;
  568. if (unlikely((!geneve->cfg.inner_proto_inherit &&
  569. inner_proto != htons(ETH_P_TEB)))) {
  570. dev_dstats_rx_dropped(geneve->dev);
  571. goto drop;
  572. }
  573. opts_len = geneveh->opt_len * 4;
  574. if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto,
  575. !net_eq(geneve->net, dev_net(geneve->dev)))) {
  576. dev_dstats_rx_dropped(geneve->dev);
  577. goto drop;
  578. }
  579. /*
  580. * After hint processing, the transport header points to the inner one
  581. * and we can't use anymore on geneve_hdr().
  582. */
  583. geneveh = geneve_hdr(skb);
  584. if (geneve_post_decap_hint(sk, skb, sizeof(struct genevehdr) +
  585. opts_len, &geneveh)) {
  586. DEV_STATS_INC(geneve->dev, rx_errors);
  587. goto drop;
  588. }
  589. geneve_rx(geneve, gs, skb, geneveh);
  590. return 0;
  591. drop:
  592. /* Consume bad packet */
  593. kfree_skb(skb);
  594. return 0;
  595. }
  596. /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
  597. static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
  598. {
  599. struct genevehdr *geneveh;
  600. struct geneve_sock *gs;
  601. u8 zero_vni[3] = { 0 };
  602. u8 *vni = zero_vni;
  603. if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN))
  604. return -EINVAL;
  605. geneveh = geneve_hdr(skb);
  606. if (geneveh->ver != GENEVE_VER)
  607. return -EINVAL;
  608. if (geneveh->proto_type != htons(ETH_P_TEB))
  609. return -EINVAL;
  610. gs = rcu_dereference_sk_user_data(sk);
  611. if (!gs)
  612. return -ENOENT;
  613. if (geneve_get_sk_family(gs) == AF_INET) {
  614. struct iphdr *iph = ip_hdr(skb);
  615. __be32 addr4 = 0;
  616. if (!gs->collect_md) {
  617. vni = geneve_hdr(skb)->vni;
  618. addr4 = iph->daddr;
  619. }
  620. return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
  621. }
  622. #if IS_ENABLED(CONFIG_IPV6)
  623. if (geneve_get_sk_family(gs) == AF_INET6) {
  624. struct ipv6hdr *ip6h = ipv6_hdr(skb);
  625. struct in6_addr addr6;
  626. memset(&addr6, 0, sizeof(struct in6_addr));
  627. if (!gs->collect_md) {
  628. vni = geneve_hdr(skb)->vni;
  629. addr6 = ip6h->daddr;
  630. }
  631. return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
  632. }
  633. #endif
  634. return -EPFNOSUPPORT;
  635. }
  636. static struct socket *geneve_create_sock(struct net *net, bool ipv6,
  637. __be16 port, bool ipv6_rx_csum)
  638. {
  639. struct socket *sock;
  640. struct udp_port_cfg udp_conf;
  641. int err;
  642. memset(&udp_conf, 0, sizeof(udp_conf));
  643. if (ipv6) {
  644. udp_conf.family = AF_INET6;
  645. udp_conf.ipv6_v6only = 1;
  646. udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
  647. } else {
  648. udp_conf.family = AF_INET;
  649. udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
  650. }
  651. udp_conf.local_udp_port = port;
  652. /* Open UDP socket */
  653. err = udp_sock_create(net, &udp_conf, &sock);
  654. if (err < 0)
  655. return ERR_PTR(err);
  656. udp_allow_gso(sock->sk);
  657. return sock;
  658. }
  659. static bool geneve_hdr_match(struct sk_buff *skb,
  660. const struct genevehdr *gh,
  661. const struct genevehdr *gh2,
  662. unsigned int hint_off)
  663. {
  664. const struct geneve_opt_gro_hint *gro_hint;
  665. void *nested, *nested2, *nh, *nh2;
  666. struct udphdr *udp, *udp2;
  667. unsigned int gh_len;
  668. /* Match the geneve hdr and options */
  669. if (gh->opt_len != gh2->opt_len)
  670. return false;
  671. gh_len = geneve_hlen(gh);
  672. if (memcmp(gh, gh2, gh_len))
  673. return false;
  674. if (!hint_off)
  675. return true;
  676. /*
  677. * When gro is present consider the nested headers as part
  678. * of the geneve options
  679. */
  680. nested = (void *)gh + gh_len;
  681. nested2 = (void *)gh2 + gh_len;
  682. gro_hint = geneve_opt_gro_hint(gh, hint_off);
  683. if (!memcmp(nested, nested2, gro_hint->nested_hdr_len))
  684. return true;
  685. /*
  686. * The nested headers differ; the packets can still belong to
  687. * the same flow when IPs/proto/ports match; if so flushing is
  688. * required.
  689. */
  690. nh = nested + gro_hint->nested_nh_offset;
  691. nh2 = nested2 + gro_hint->nested_nh_offset;
  692. if (gro_hint->nested_is_v6) {
  693. struct ipv6hdr *iph = nh, *iph2 = nh2;
  694. unsigned int nested_nlen;
  695. __be32 first_word;
  696. first_word = *(__be32 *)iph ^ *(__be32 *)iph2;
  697. if ((first_word & htonl(0xF00FFFFF)) ||
  698. !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
  699. !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
  700. iph->nexthdr != iph2->nexthdr)
  701. return false;
  702. nested_nlen = gro_hint->nested_tp_offset -
  703. gro_hint->nested_nh_offset;
  704. if (nested_nlen > sizeof(struct ipv6hdr) &&
  705. (memcmp(iph + 1, iph2 + 1,
  706. nested_nlen - sizeof(struct ipv6hdr))))
  707. return false;
  708. } else {
  709. struct iphdr *iph = nh, *iph2 = nh2;
  710. if ((iph->protocol ^ iph2->protocol) |
  711. ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
  712. ((__force u32)iph->daddr ^ (__force u32)iph2->daddr))
  713. return false;
  714. }
  715. udp = nested + gro_hint->nested_tp_offset;
  716. udp2 = nested2 + gro_hint->nested_tp_offset;
  717. if (udp->source != udp2->source || udp->dest != udp2->dest ||
  718. udp->check != udp2->check)
  719. return false;
  720. NAPI_GRO_CB(skb)->flush = 1;
  721. return true;
  722. }
  723. static struct sk_buff *geneve_gro_receive(struct sock *sk,
  724. struct list_head *head,
  725. struct sk_buff *skb)
  726. {
  727. unsigned int hlen, gh_len, off_gnv, hint_off;
  728. const struct geneve_opt_gro_hint *gro_hint;
  729. const struct packet_offload *ptype;
  730. struct genevehdr *gh, *gh2;
  731. struct sk_buff *pp = NULL;
  732. struct sk_buff *p;
  733. int flush = 1;
  734. __be16 type;
  735. off_gnv = skb_gro_offset(skb);
  736. hlen = off_gnv + sizeof(*gh);
  737. gh = skb_gro_header(skb, hlen, off_gnv);
  738. if (unlikely(!gh))
  739. goto out;
  740. if (gh->ver != GENEVE_VER || gh->oam)
  741. goto out;
  742. gh_len = geneve_hlen(gh);
  743. type = gh->proto_type;
  744. hlen = off_gnv + gh_len;
  745. if (!skb_gro_may_pull(skb, hlen)) {
  746. gh = skb_gro_header_slow(skb, hlen, off_gnv);
  747. if (unlikely(!gh))
  748. goto out;
  749. }
  750. /* The GRO hint/nested hdr could use a different ethernet type. */
  751. hint_off = geneve_sk_gro_hint_off(sk, gh, &type, &gh_len);
  752. if (hint_off) {
  753. /*
  754. * If the hint is present, and nested hdr validation fails, do
  755. * not attempt plain GRO: it will ignore inner hdrs and cause
  756. * OoO.
  757. */
  758. gh = skb_gro_header(skb, off_gnv + gh_len, off_gnv);
  759. if (unlikely(!gh))
  760. goto out;
  761. gro_hint = geneve_opt_gro_hint(gh, hint_off);
  762. if (!geneve_opt_gro_hint_validate_csum(skb, gh, gro_hint))
  763. goto out;
  764. }
  765. list_for_each_entry(p, head, list) {
  766. if (!NAPI_GRO_CB(p)->same_flow)
  767. continue;
  768. gh2 = (struct genevehdr *)(p->data + off_gnv);
  769. if (!geneve_hdr_match(skb, gh, gh2, hint_off)) {
  770. NAPI_GRO_CB(p)->same_flow = 0;
  771. continue;
  772. }
  773. }
  774. skb_gro_pull(skb, gh_len);
  775. skb_gro_postpull_rcsum(skb, gh, gh_len);
  776. if (likely(type == htons(ETH_P_TEB)))
  777. return call_gro_receive(eth_gro_receive, head, skb);
  778. ptype = gro_find_receive_by_type(type);
  779. if (!ptype)
  780. goto out;
  781. pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
  782. flush = 0;
  783. out:
  784. skb_gro_flush_final(skb, pp, flush);
  785. return pp;
  786. }
  787. static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
  788. int nhoff)
  789. {
  790. struct genevehdr *gh;
  791. struct packet_offload *ptype;
  792. __be16 type;
  793. int gh_len;
  794. int err = -ENOSYS;
  795. gh = (struct genevehdr *)(skb->data + nhoff);
  796. gh_len = geneve_hlen(gh);
  797. type = gh->proto_type;
  798. geneve_opt_gro_hint_off(gh, &type, &gh_len);
  799. /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */
  800. if (likely(type == htons(ETH_P_TEB)))
  801. return eth_gro_complete(skb, nhoff + gh_len);
  802. ptype = gro_find_complete_by_type(type);
  803. if (ptype)
  804. err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
  805. skb_set_inner_mac_header(skb, nhoff + gh_len);
  806. return err;
  807. }
  808. /* Create new listen socket if needed */
  809. static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
  810. bool ipv6, bool ipv6_rx_csum)
  811. {
  812. struct geneve_net *gn = net_generic(net, geneve_net_id);
  813. struct geneve_sock *gs;
  814. struct socket *sock;
  815. struct udp_tunnel_sock_cfg tunnel_cfg;
  816. int h;
  817. gs = kzalloc_obj(*gs);
  818. if (!gs)
  819. return ERR_PTR(-ENOMEM);
  820. sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
  821. if (IS_ERR(sock)) {
  822. kfree(gs);
  823. return ERR_CAST(sock);
  824. }
  825. gs->sock = sock;
  826. gs->refcnt = 1;
  827. for (h = 0; h < VNI_HASH_SIZE; ++h)
  828. INIT_HLIST_HEAD(&gs->vni_list[h]);
  829. /* Initialize the geneve udp offloads structure */
  830. udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
  831. /* Mark socket as an encapsulation socket */
  832. memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
  833. tunnel_cfg.sk_user_data = gs;
  834. tunnel_cfg.encap_type = 1;
  835. tunnel_cfg.gro_receive = geneve_gro_receive;
  836. tunnel_cfg.gro_complete = geneve_gro_complete;
  837. tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
  838. tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
  839. tunnel_cfg.encap_destroy = NULL;
  840. setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
  841. list_add(&gs->list, &gn->sock_list);
  842. return gs;
  843. }
  844. static void __geneve_sock_release(struct geneve_sock *gs)
  845. {
  846. if (!gs || --gs->refcnt)
  847. return;
  848. list_del(&gs->list);
  849. udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
  850. udp_tunnel_sock_release(gs->sock);
  851. kfree_rcu(gs, rcu);
  852. }
  853. static void geneve_sock_release(struct geneve_dev *geneve)
  854. {
  855. struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
  856. #if IS_ENABLED(CONFIG_IPV6)
  857. struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
  858. rcu_assign_pointer(geneve->sock6, NULL);
  859. #endif
  860. rcu_assign_pointer(geneve->sock4, NULL);
  861. synchronize_net();
  862. __geneve_sock_release(gs4);
  863. #if IS_ENABLED(CONFIG_IPV6)
  864. __geneve_sock_release(gs6);
  865. #endif
  866. }
  867. static struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
  868. sa_family_t family,
  869. __be16 dst_port,
  870. bool gro_hint)
  871. {
  872. struct geneve_sock *gs;
  873. list_for_each_entry(gs, &gn->sock_list, list) {
  874. if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
  875. geneve_get_sk_family(gs) == family &&
  876. gs->gro_hint == gro_hint) {
  877. return gs;
  878. }
  879. }
  880. return NULL;
  881. }
  882. static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
  883. {
  884. struct net *net = geneve->net;
  885. struct geneve_net *gn = net_generic(net, geneve_net_id);
  886. bool gro_hint = geneve->cfg.gro_hint;
  887. struct geneve_dev_node *node;
  888. struct geneve_sock *gs;
  889. __u8 vni[3];
  890. __u32 hash;
  891. gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET,
  892. geneve->cfg.info.key.tp_dst, gro_hint);
  893. if (gs) {
  894. gs->refcnt++;
  895. goto out;
  896. }
  897. gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6,
  898. geneve->cfg.use_udp6_rx_checksums);
  899. if (IS_ERR(gs))
  900. return PTR_ERR(gs);
  901. out:
  902. gs->collect_md = geneve->cfg.collect_md;
  903. gs->gro_hint = gro_hint;
  904. #if IS_ENABLED(CONFIG_IPV6)
  905. if (ipv6) {
  906. rcu_assign_pointer(geneve->sock6, gs);
  907. node = &geneve->hlist6;
  908. } else
  909. #endif
  910. {
  911. rcu_assign_pointer(geneve->sock4, gs);
  912. node = &geneve->hlist4;
  913. }
  914. node->geneve = geneve;
  915. tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni);
  916. hash = geneve_net_vni_hash(vni);
  917. hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
  918. return 0;
  919. }
  920. static int geneve_open(struct net_device *dev)
  921. {
  922. struct geneve_dev *geneve = netdev_priv(dev);
  923. bool metadata = geneve->cfg.collect_md;
  924. bool ipv4, ipv6;
  925. int ret = 0;
  926. ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata;
  927. ipv4 = !ipv6 || metadata;
  928. #if IS_ENABLED(CONFIG_IPV6)
  929. if (ipv6) {
  930. ret = geneve_sock_add(geneve, true);
  931. if (ret < 0 && ret != -EAFNOSUPPORT)
  932. ipv4 = false;
  933. }
  934. #endif
  935. if (ipv4)
  936. ret = geneve_sock_add(geneve, false);
  937. if (ret < 0)
  938. geneve_sock_release(geneve);
  939. return ret;
  940. }
  941. static int geneve_stop(struct net_device *dev)
  942. {
  943. struct geneve_dev *geneve = netdev_priv(dev);
  944. hlist_del_init_rcu(&geneve->hlist4.hlist);
  945. #if IS_ENABLED(CONFIG_IPV6)
  946. hlist_del_init_rcu(&geneve->hlist6.hlist);
  947. #endif
  948. geneve_sock_release(geneve);
  949. return 0;
  950. }
  951. static void geneve_build_header(struct genevehdr *geneveh,
  952. const struct ip_tunnel_info *info,
  953. __be16 inner_proto)
  954. {
  955. geneveh->ver = GENEVE_VER;
  956. geneveh->opt_len = info->options_len / 4;
  957. geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags);
  958. geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT,
  959. info->key.tun_flags);
  960. geneveh->rsvd1 = 0;
  961. tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
  962. geneveh->proto_type = inner_proto;
  963. geneveh->rsvd2 = 0;
  964. if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags))
  965. ip_tunnel_info_opts_get(geneveh->options, info);
  966. }
  967. static int geneve_build_gro_hint_opt(const struct geneve_dev *geneve,
  968. struct sk_buff *skb)
  969. {
  970. struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb);
  971. struct geneve_opt_gro_hint *hint;
  972. unsigned int nhlen;
  973. bool nested_is_v6;
  974. int id;
  975. BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct geneve_skb_cb));
  976. cb->gro_hint_len = 0;
  977. /* Try to add the GRO hint only in case of double encap. */
  978. if (!geneve->cfg.gro_hint || !skb->encapsulation)
  979. return 0;
  980. /*
  981. * The nested headers must fit the geneve opt len fields and the
  982. * nested encap must carry a nested transport (UDP) header.
  983. */
  984. nhlen = skb_inner_mac_header(skb) - skb->data;
  985. if (nhlen > 255 || !skb_transport_header_was_set(skb) ||
  986. skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
  987. (skb_transport_offset(skb) + sizeof(struct udphdr) > nhlen))
  988. return 0;
  989. id = proto_to_id(skb->inner_protocol);
  990. if (id < 0)
  991. return 0;
  992. nested_is_v6 = skb->protocol == htons(ETH_P_IPV6);
  993. if (nested_is_v6) {
  994. int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
  995. u8 proto = ipv6_hdr(skb)->nexthdr;
  996. __be16 foff;
  997. if (ipv6_skip_exthdr(skb, start, &proto, &foff) < 0 ||
  998. proto != IPPROTO_UDP)
  999. return 0;
  1000. } else {
  1001. if (ip_hdr(skb)->protocol != IPPROTO_UDP)
  1002. return 0;
  1003. }
  1004. hint = &cb->gro_hint;
  1005. memset(hint, 0, sizeof(*hint));
  1006. hint->inner_proto_id = id;
  1007. hint->nested_is_v6 = skb->protocol == htons(ETH_P_IPV6);
  1008. hint->nested_nh_offset = skb_network_offset(skb);
  1009. hint->nested_tp_offset = skb_transport_offset(skb);
  1010. hint->nested_hdr_len = nhlen;
  1011. cb->gro_hint_len = GENEVE_OPT_GRO_HINT_SIZE;
  1012. return GENEVE_OPT_GRO_HINT_SIZE;
  1013. }
  1014. static void geneve_put_gro_hint_opt(struct genevehdr *gnvh, int opt_size,
  1015. const struct geneve_opt_gro_hint *hint)
  1016. {
  1017. struct geneve_opt *gro_opt;
  1018. /* geneve_build_header() did not took in account the GRO hint. */
  1019. gnvh->opt_len = (opt_size + GENEVE_OPT_GRO_HINT_SIZE) >> 2;
  1020. gro_opt = (void *)(gnvh + 1) + opt_size;
  1021. memset(gro_opt, 0, sizeof(*gro_opt));
  1022. gro_opt->opt_class = htons(GENEVE_OPT_NETDEV_CLASS);
  1023. gro_opt->type = GENEVE_OPT_GRO_HINT_TYPE;
  1024. gro_opt->length = GENEVE_OPT_GRO_HINT_LEN;
  1025. memcpy(gro_opt + 1, hint, sizeof(*hint));
  1026. }
  1027. static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
  1028. const struct ip_tunnel_info *info,
  1029. const struct geneve_dev *geneve, int ip_hdr_len)
  1030. {
  1031. bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
  1032. bool inner_proto_inherit = geneve->cfg.inner_proto_inherit;
  1033. bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
  1034. struct geneve_skb_cb *cb = GENEVE_SKB_CB(skb);
  1035. struct genevehdr *gnvh;
  1036. __be16 inner_proto;
  1037. bool double_encap;
  1038. int min_headroom;
  1039. int opt_size;
  1040. int err;
  1041. skb_reset_mac_header(skb);
  1042. skb_scrub_packet(skb, xnet);
  1043. opt_size = info->options_len + cb->gro_hint_len;
  1044. min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
  1045. GENEVE_BASE_HLEN + opt_size + ip_hdr_len;
  1046. err = skb_cow_head(skb, min_headroom);
  1047. if (unlikely(err))
  1048. goto free_dst;
  1049. double_encap = udp_tunnel_handle_partial(skb);
  1050. err = udp_tunnel_handle_offloads(skb, udp_sum);
  1051. if (err)
  1052. goto free_dst;
  1053. gnvh = __skb_push(skb, sizeof(*gnvh) + opt_size);
  1054. inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB);
  1055. geneve_build_header(gnvh, info, inner_proto);
  1056. if (cb->gro_hint_len)
  1057. geneve_put_gro_hint_opt(gnvh, info->options_len, &cb->gro_hint);
  1058. udp_tunnel_set_inner_protocol(skb, double_encap, inner_proto);
  1059. return 0;
  1060. free_dst:
  1061. dst_release(dst);
  1062. return err;
  1063. }
  1064. static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev,
  1065. const struct ip_tunnel_info *info,
  1066. bool *use_cache)
  1067. {
  1068. struct geneve_dev *geneve = netdev_priv(dev);
  1069. u8 dsfield;
  1070. dsfield = info->key.tos;
  1071. if (dsfield == 1 && !geneve->cfg.collect_md) {
  1072. dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
  1073. *use_cache = false;
  1074. }
  1075. return dsfield;
  1076. }
  1077. static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
  1078. struct geneve_dev *geneve,
  1079. const struct ip_tunnel_info *info)
  1080. {
  1081. struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
  1082. const struct ip_tunnel_key *key = &info->key;
  1083. struct rtable *rt;
  1084. bool use_cache;
  1085. __u8 tos, ttl;
  1086. __be16 df = 0;
  1087. __be32 saddr;
  1088. __be16 sport;
  1089. int err;
  1090. if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit))
  1091. return -EINVAL;
  1092. if (!gs4)
  1093. return -EIO;
  1094. use_cache = ip_tunnel_dst_cache_usable(skb, info);
  1095. tos = geneve_get_dsfield(skb, dev, info, &use_cache);
  1096. sport = udp_flow_src_port(geneve->net, skb,
  1097. geneve->cfg.port_min,
  1098. geneve->cfg.port_max, true);
  1099. rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
  1100. &info->key,
  1101. sport, geneve->cfg.info.key.tp_dst, tos,
  1102. use_cache ?
  1103. (struct dst_cache *)&info->dst_cache : NULL);
  1104. if (IS_ERR(rt))
  1105. return PTR_ERR(rt);
  1106. err = skb_tunnel_check_pmtu(skb, &rt->dst,
  1107. GENEVE_IPV4_HLEN + info->options_len +
  1108. geneve_build_gro_hint_opt(geneve, skb),
  1109. netif_is_any_bridge_port(dev));
  1110. if (err < 0) {
  1111. dst_release(&rt->dst);
  1112. return err;
  1113. } else if (err) {
  1114. struct ip_tunnel_info *info;
  1115. info = skb_tunnel_info(skb);
  1116. if (info) {
  1117. struct ip_tunnel_info *unclone;
  1118. unclone = skb_tunnel_info_unclone(skb);
  1119. if (unlikely(!unclone)) {
  1120. dst_release(&rt->dst);
  1121. return -ENOMEM;
  1122. }
  1123. unclone->key.u.ipv4.dst = saddr;
  1124. unclone->key.u.ipv4.src = info->key.u.ipv4.dst;
  1125. }
  1126. if (!pskb_may_pull(skb, ETH_HLEN)) {
  1127. dst_release(&rt->dst);
  1128. return -EINVAL;
  1129. }
  1130. skb->protocol = eth_type_trans(skb, geneve->dev);
  1131. __netif_rx(skb);
  1132. dst_release(&rt->dst);
  1133. return -EMSGSIZE;
  1134. }
  1135. tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb);
  1136. if (geneve->cfg.collect_md) {
  1137. ttl = key->ttl;
  1138. df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ?
  1139. htons(IP_DF) : 0;
  1140. } else {
  1141. if (geneve->cfg.ttl_inherit)
  1142. ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
  1143. else
  1144. ttl = key->ttl;
  1145. ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
  1146. if (geneve->cfg.df == GENEVE_DF_SET) {
  1147. df = htons(IP_DF);
  1148. } else if (geneve->cfg.df == GENEVE_DF_INHERIT) {
  1149. struct ethhdr *eth = skb_eth_hdr(skb);
  1150. if (ntohs(eth->h_proto) == ETH_P_IPV6) {
  1151. df = htons(IP_DF);
  1152. } else if (ntohs(eth->h_proto) == ETH_P_IP) {
  1153. struct iphdr *iph = ip_hdr(skb);
  1154. if (iph->frag_off & htons(IP_DF))
  1155. df = htons(IP_DF);
  1156. }
  1157. }
  1158. }
  1159. err = geneve_build_skb(&rt->dst, skb, info, geneve,
  1160. sizeof(struct iphdr));
  1161. if (unlikely(err))
  1162. return err;
  1163. udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst,
  1164. tos, ttl, df, sport, geneve->cfg.info.key.tp_dst,
  1165. !net_eq(geneve->net, dev_net(geneve->dev)),
  1166. !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags),
  1167. 0);
  1168. return 0;
  1169. }
  1170. #if IS_ENABLED(CONFIG_IPV6)
  1171. static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
  1172. struct geneve_dev *geneve,
  1173. const struct ip_tunnel_info *info)
  1174. {
  1175. struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
  1176. const struct ip_tunnel_key *key = &info->key;
  1177. struct dst_entry *dst = NULL;
  1178. struct in6_addr saddr;
  1179. bool use_cache;
  1180. __u8 prio, ttl;
  1181. __be16 sport;
  1182. int err;
  1183. if (skb_vlan_inet_prepare(skb, geneve->cfg.inner_proto_inherit))
  1184. return -EINVAL;
  1185. if (!gs6)
  1186. return -EIO;
  1187. use_cache = ip_tunnel_dst_cache_usable(skb, info);
  1188. prio = geneve_get_dsfield(skb, dev, info, &use_cache);
  1189. sport = udp_flow_src_port(geneve->net, skb,
  1190. geneve->cfg.port_min,
  1191. geneve->cfg.port_max, true);
  1192. dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0,
  1193. &saddr, key, sport,
  1194. geneve->cfg.info.key.tp_dst, prio,
  1195. use_cache ?
  1196. (struct dst_cache *)&info->dst_cache : NULL);
  1197. if (IS_ERR(dst))
  1198. return PTR_ERR(dst);
  1199. err = skb_tunnel_check_pmtu(skb, dst,
  1200. GENEVE_IPV6_HLEN + info->options_len +
  1201. geneve_build_gro_hint_opt(geneve, skb),
  1202. netif_is_any_bridge_port(dev));
  1203. if (err < 0) {
  1204. dst_release(dst);
  1205. return err;
  1206. } else if (err) {
  1207. struct ip_tunnel_info *info = skb_tunnel_info(skb);
  1208. if (info) {
  1209. struct ip_tunnel_info *unclone;
  1210. unclone = skb_tunnel_info_unclone(skb);
  1211. if (unlikely(!unclone)) {
  1212. dst_release(dst);
  1213. return -ENOMEM;
  1214. }
  1215. unclone->key.u.ipv6.dst = saddr;
  1216. unclone->key.u.ipv6.src = info->key.u.ipv6.dst;
  1217. }
  1218. if (!pskb_may_pull(skb, ETH_HLEN)) {
  1219. dst_release(dst);
  1220. return -EINVAL;
  1221. }
  1222. skb->protocol = eth_type_trans(skb, geneve->dev);
  1223. __netif_rx(skb);
  1224. dst_release(dst);
  1225. return -EMSGSIZE;
  1226. }
  1227. prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb);
  1228. if (geneve->cfg.collect_md) {
  1229. ttl = key->ttl;
  1230. } else {
  1231. if (geneve->cfg.ttl_inherit)
  1232. ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
  1233. else
  1234. ttl = key->ttl;
  1235. ttl = ttl ? : ip6_dst_hoplimit(dst);
  1236. }
  1237. err = geneve_build_skb(dst, skb, info, geneve, sizeof(struct ipv6hdr));
  1238. if (unlikely(err))
  1239. return err;
  1240. udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
  1241. &saddr, &key->u.ipv6.dst, prio, ttl,
  1242. info->key.label, sport, geneve->cfg.info.key.tp_dst,
  1243. !test_bit(IP_TUNNEL_CSUM_BIT,
  1244. info->key.tun_flags),
  1245. 0);
  1246. return 0;
  1247. }
  1248. #endif
  1249. static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
  1250. {
  1251. struct geneve_dev *geneve = netdev_priv(dev);
  1252. struct ip_tunnel_info *info = NULL;
  1253. int err;
  1254. if (geneve->cfg.collect_md) {
  1255. info = skb_tunnel_info(skb);
  1256. if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
  1257. netdev_dbg(dev, "no tunnel metadata\n");
  1258. dev_kfree_skb(skb);
  1259. dev_dstats_tx_dropped(dev);
  1260. return NETDEV_TX_OK;
  1261. }
  1262. } else {
  1263. info = &geneve->cfg.info;
  1264. }
  1265. rcu_read_lock();
  1266. #if IS_ENABLED(CONFIG_IPV6)
  1267. if (info->mode & IP_TUNNEL_INFO_IPV6)
  1268. err = geneve6_xmit_skb(skb, dev, geneve, info);
  1269. else
  1270. #endif
  1271. err = geneve_xmit_skb(skb, dev, geneve, info);
  1272. rcu_read_unlock();
  1273. if (likely(!err))
  1274. return NETDEV_TX_OK;
  1275. if (err != -EMSGSIZE)
  1276. dev_kfree_skb(skb);
  1277. if (err == -ELOOP)
  1278. DEV_STATS_INC(dev, collisions);
  1279. else if (err == -ENETUNREACH)
  1280. DEV_STATS_INC(dev, tx_carrier_errors);
  1281. DEV_STATS_INC(dev, tx_errors);
  1282. return NETDEV_TX_OK;
  1283. }
  1284. static int geneve_change_mtu(struct net_device *dev, int new_mtu)
  1285. {
  1286. if (new_mtu > dev->max_mtu)
  1287. new_mtu = dev->max_mtu;
  1288. else if (new_mtu < dev->min_mtu)
  1289. new_mtu = dev->min_mtu;
  1290. WRITE_ONCE(dev->mtu, new_mtu);
  1291. return 0;
  1292. }
  1293. static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
  1294. {
  1295. struct ip_tunnel_info *info = skb_tunnel_info(skb);
  1296. struct geneve_dev *geneve = netdev_priv(dev);
  1297. __be16 sport;
  1298. if (ip_tunnel_info_af(info) == AF_INET) {
  1299. struct rtable *rt;
  1300. struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
  1301. bool use_cache;
  1302. __be32 saddr;
  1303. u8 tos;
  1304. if (!gs4)
  1305. return -EIO;
  1306. use_cache = ip_tunnel_dst_cache_usable(skb, info);
  1307. tos = geneve_get_dsfield(skb, dev, info, &use_cache);
  1308. sport = udp_flow_src_port(geneve->net, skb,
  1309. geneve->cfg.port_min,
  1310. geneve->cfg.port_max, true);
  1311. rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
  1312. &info->key,
  1313. sport, geneve->cfg.info.key.tp_dst,
  1314. tos,
  1315. use_cache ? &info->dst_cache : NULL);
  1316. if (IS_ERR(rt))
  1317. return PTR_ERR(rt);
  1318. ip_rt_put(rt);
  1319. info->key.u.ipv4.src = saddr;
  1320. #if IS_ENABLED(CONFIG_IPV6)
  1321. } else if (ip_tunnel_info_af(info) == AF_INET6) {
  1322. struct dst_entry *dst;
  1323. struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
  1324. struct in6_addr saddr;
  1325. bool use_cache;
  1326. u8 prio;
  1327. if (!gs6)
  1328. return -EIO;
  1329. use_cache = ip_tunnel_dst_cache_usable(skb, info);
  1330. prio = geneve_get_dsfield(skb, dev, info, &use_cache);
  1331. sport = udp_flow_src_port(geneve->net, skb,
  1332. geneve->cfg.port_min,
  1333. geneve->cfg.port_max, true);
  1334. dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0,
  1335. &saddr, &info->key, sport,
  1336. geneve->cfg.info.key.tp_dst, prio,
  1337. use_cache ? &info->dst_cache : NULL);
  1338. if (IS_ERR(dst))
  1339. return PTR_ERR(dst);
  1340. dst_release(dst);
  1341. info->key.u.ipv6.src = saddr;
  1342. #endif
  1343. } else {
  1344. return -EINVAL;
  1345. }
  1346. info->key.tp_src = sport;
  1347. info->key.tp_dst = geneve->cfg.info.key.tp_dst;
  1348. return 0;
  1349. }
  1350. static const struct net_device_ops geneve_netdev_ops = {
  1351. .ndo_init = geneve_init,
  1352. .ndo_uninit = geneve_uninit,
  1353. .ndo_open = geneve_open,
  1354. .ndo_stop = geneve_stop,
  1355. .ndo_start_xmit = geneve_xmit,
  1356. .ndo_change_mtu = geneve_change_mtu,
  1357. .ndo_validate_addr = eth_validate_addr,
  1358. .ndo_set_mac_address = eth_mac_addr,
  1359. .ndo_fill_metadata_dst = geneve_fill_metadata_dst,
  1360. };
  1361. static void geneve_get_drvinfo(struct net_device *dev,
  1362. struct ethtool_drvinfo *drvinfo)
  1363. {
  1364. strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
  1365. strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
  1366. }
  1367. static const struct ethtool_ops geneve_ethtool_ops = {
  1368. .get_drvinfo = geneve_get_drvinfo,
  1369. .get_link = ethtool_op_get_link,
  1370. };
  1371. /* Info for udev, that this is a virtual tunnel endpoint */
  1372. static const struct device_type geneve_type = {
  1373. .name = "geneve",
  1374. };
  1375. /* Calls the ndo_udp_tunnel_add of the caller in order to
  1376. * supply the listening GENEVE udp ports. Callers are expected
  1377. * to implement the ndo_udp_tunnel_add.
  1378. */
  1379. static void geneve_offload_rx_ports(struct net_device *dev, bool push)
  1380. {
  1381. struct net *net = dev_net(dev);
  1382. struct geneve_net *gn = net_generic(net, geneve_net_id);
  1383. struct geneve_sock *gs;
  1384. ASSERT_RTNL();
  1385. list_for_each_entry(gs, &gn->sock_list, list) {
  1386. if (push) {
  1387. udp_tunnel_push_rx_port(dev, gs->sock,
  1388. UDP_TUNNEL_TYPE_GENEVE);
  1389. } else {
  1390. udp_tunnel_drop_rx_port(dev, gs->sock,
  1391. UDP_TUNNEL_TYPE_GENEVE);
  1392. }
  1393. }
  1394. }
  1395. /* Initialize the device structure. */
  1396. static void geneve_setup(struct net_device *dev)
  1397. {
  1398. ether_setup(dev);
  1399. dev->netdev_ops = &geneve_netdev_ops;
  1400. dev->ethtool_ops = &geneve_ethtool_ops;
  1401. dev->needs_free_netdev = true;
  1402. SET_NETDEV_DEVTYPE(dev, &geneve_type);
  1403. dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
  1404. dev->features |= NETIF_F_RXCSUM;
  1405. dev->features |= NETIF_F_GSO_SOFTWARE;
  1406. /* Partial features are disabled by default. */
  1407. dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
  1408. dev->hw_features |= NETIF_F_RXCSUM;
  1409. dev->hw_features |= NETIF_F_GSO_SOFTWARE;
  1410. dev->hw_features |= UDP_TUNNEL_PARTIAL_FEATURES;
  1411. dev->hw_features |= NETIF_F_GSO_PARTIAL;
  1412. dev->hw_enc_features = dev->hw_features;
  1413. dev->gso_partial_features = UDP_TUNNEL_PARTIAL_FEATURES;
  1414. dev->mangleid_features = NETIF_F_GSO_PARTIAL;
  1415. dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
  1416. /* MTU range: 68 - (something less than 65535) */
  1417. dev->min_mtu = ETH_MIN_MTU;
  1418. /* The max_mtu calculation does not take account of GENEVE
  1419. * options, to avoid excluding potentially valid
  1420. * configurations. This will be further reduced by IPvX hdr size.
  1421. */
  1422. dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
  1423. netif_keep_dst(dev);
  1424. dev->priv_flags &= ~IFF_TX_SKB_SHARING;
  1425. dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
  1426. dev->lltx = true;
  1427. eth_hw_addr_random(dev);
  1428. }
  1429. static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
  1430. [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT },
  1431. [IFLA_GENEVE_ID] = { .type = NLA_U32 },
  1432. [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
  1433. [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
  1434. [IFLA_GENEVE_TTL] = { .type = NLA_U8 },
  1435. [IFLA_GENEVE_TOS] = { .type = NLA_U8 },
  1436. [IFLA_GENEVE_LABEL] = { .type = NLA_U32 },
  1437. [IFLA_GENEVE_PORT] = { .type = NLA_U16 },
  1438. [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
  1439. [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 },
  1440. [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
  1441. [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
  1442. [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
  1443. [IFLA_GENEVE_DF] = { .type = NLA_U8 },
  1444. [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG },
  1445. [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)),
  1446. [IFLA_GENEVE_GRO_HINT] = { .type = NLA_FLAG },
  1447. };
  1448. static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
  1449. struct netlink_ext_ack *extack)
  1450. {
  1451. if (tb[IFLA_ADDRESS]) {
  1452. if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
  1453. NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
  1454. "Provided link layer address is not Ethernet");
  1455. return -EINVAL;
  1456. }
  1457. if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
  1458. NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
  1459. "Provided Ethernet address is not unicast");
  1460. return -EADDRNOTAVAIL;
  1461. }
  1462. }
  1463. if (!data) {
  1464. NL_SET_ERR_MSG(extack,
  1465. "Not enough attributes provided to perform the operation");
  1466. return -EINVAL;
  1467. }
  1468. if (data[IFLA_GENEVE_ID]) {
  1469. __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]);
  1470. if (vni >= GENEVE_N_VID) {
  1471. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID],
  1472. "Geneve ID must be lower than 16777216");
  1473. return -ERANGE;
  1474. }
  1475. }
  1476. if (data[IFLA_GENEVE_DF]) {
  1477. enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
  1478. if (df < 0 || df > GENEVE_DF_MAX) {
  1479. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF],
  1480. "Invalid DF attribute");
  1481. return -EINVAL;
  1482. }
  1483. }
  1484. if (data[IFLA_GENEVE_PORT_RANGE]) {
  1485. const struct ifla_geneve_port_range *p;
  1486. p = nla_data(data[IFLA_GENEVE_PORT_RANGE]);
  1487. if (ntohs(p->high) < ntohs(p->low)) {
  1488. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE],
  1489. "Invalid source port range");
  1490. return -EINVAL;
  1491. }
  1492. }
  1493. return 0;
  1494. }
  1495. static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
  1496. const struct ip_tunnel_info *info,
  1497. bool *tun_on_same_port,
  1498. bool *tun_collect_md)
  1499. {
  1500. struct geneve_dev *geneve, *t = NULL;
  1501. *tun_on_same_port = false;
  1502. *tun_collect_md = false;
  1503. list_for_each_entry(geneve, &gn->geneve_list, next) {
  1504. if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) {
  1505. *tun_collect_md = geneve->cfg.collect_md;
  1506. *tun_on_same_port = true;
  1507. }
  1508. if (info->key.tun_id == geneve->cfg.info.key.tun_id &&
  1509. info->key.tp_dst == geneve->cfg.info.key.tp_dst &&
  1510. !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u)))
  1511. t = geneve;
  1512. }
  1513. return t;
  1514. }
  1515. static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
  1516. {
  1517. return !(info->key.tun_id || info->key.tos ||
  1518. !ip_tunnel_flags_empty(info->key.tun_flags) ||
  1519. info->key.ttl || info->key.label || info->key.tp_src ||
  1520. memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
  1521. }
  1522. static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
  1523. struct ip_tunnel_info *b)
  1524. {
  1525. if (ip_tunnel_info_af(a) == AF_INET)
  1526. return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
  1527. else
  1528. return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
  1529. }
  1530. static int geneve_configure(struct net *net, struct net_device *dev,
  1531. struct netlink_ext_ack *extack,
  1532. const struct geneve_config *cfg)
  1533. {
  1534. struct geneve_net *gn = net_generic(net, geneve_net_id);
  1535. struct geneve_dev *t, *geneve = netdev_priv(dev);
  1536. const struct ip_tunnel_info *info = &cfg->info;
  1537. bool tun_collect_md, tun_on_same_port;
  1538. int err, encap_len;
  1539. if (cfg->collect_md && !is_tnl_info_zero(info)) {
  1540. NL_SET_ERR_MSG(extack,
  1541. "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
  1542. return -EINVAL;
  1543. }
  1544. geneve->net = net;
  1545. geneve->dev = dev;
  1546. t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
  1547. if (t)
  1548. return -EBUSY;
  1549. /* make enough headroom for basic scenario */
  1550. encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
  1551. if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) {
  1552. encap_len += sizeof(struct iphdr);
  1553. dev->max_mtu -= sizeof(struct iphdr);
  1554. } else {
  1555. encap_len += sizeof(struct ipv6hdr);
  1556. dev->max_mtu -= sizeof(struct ipv6hdr);
  1557. }
  1558. dev->needed_headroom = encap_len + ETH_HLEN;
  1559. if (cfg->collect_md) {
  1560. if (tun_on_same_port) {
  1561. NL_SET_ERR_MSG(extack,
  1562. "There can be only one externally controlled device on a destination port");
  1563. return -EPERM;
  1564. }
  1565. } else {
  1566. if (tun_collect_md) {
  1567. NL_SET_ERR_MSG(extack,
  1568. "There already exists an externally controlled device on this destination port");
  1569. return -EPERM;
  1570. }
  1571. }
  1572. dst_cache_reset(&geneve->cfg.info.dst_cache);
  1573. memcpy(&geneve->cfg, cfg, sizeof(*cfg));
  1574. if (geneve->cfg.inner_proto_inherit) {
  1575. dev->header_ops = NULL;
  1576. dev->type = ARPHRD_NONE;
  1577. dev->hard_header_len = 0;
  1578. dev->addr_len = 0;
  1579. dev->flags = IFF_POINTOPOINT | IFF_NOARP;
  1580. }
  1581. err = register_netdevice(dev);
  1582. if (err)
  1583. return err;
  1584. list_add(&geneve->next, &gn->geneve_list);
  1585. return 0;
  1586. }
  1587. static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
  1588. {
  1589. memset(info, 0, sizeof(*info));
  1590. info->key.tp_dst = htons(dst_port);
  1591. }
  1592. static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
  1593. struct netlink_ext_ack *extack,
  1594. struct geneve_config *cfg, bool changelink)
  1595. {
  1596. struct ip_tunnel_info *info = &cfg->info;
  1597. int attrtype;
  1598. if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
  1599. NL_SET_ERR_MSG(extack,
  1600. "Cannot specify both IPv4 and IPv6 Remote addresses");
  1601. return -EINVAL;
  1602. }
  1603. if (data[IFLA_GENEVE_REMOTE]) {
  1604. if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) {
  1605. attrtype = IFLA_GENEVE_REMOTE;
  1606. goto change_notsup;
  1607. }
  1608. info->key.u.ipv4.dst =
  1609. nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
  1610. if (ipv4_is_multicast(info->key.u.ipv4.dst)) {
  1611. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
  1612. "Remote IPv4 address cannot be Multicast");
  1613. return -EINVAL;
  1614. }
  1615. }
  1616. if (data[IFLA_GENEVE_REMOTE6]) {
  1617. #if IS_ENABLED(CONFIG_IPV6)
  1618. if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
  1619. attrtype = IFLA_GENEVE_REMOTE6;
  1620. goto change_notsup;
  1621. }
  1622. info->mode = IP_TUNNEL_INFO_IPV6;
  1623. info->key.u.ipv6.dst =
  1624. nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
  1625. if (ipv6_addr_type(&info->key.u.ipv6.dst) &
  1626. IPV6_ADDR_LINKLOCAL) {
  1627. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
  1628. "Remote IPv6 address cannot be link-local");
  1629. return -EINVAL;
  1630. }
  1631. if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
  1632. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
  1633. "Remote IPv6 address cannot be Multicast");
  1634. return -EINVAL;
  1635. }
  1636. __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
  1637. cfg->use_udp6_rx_checksums = true;
  1638. #else
  1639. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
  1640. "IPv6 support not enabled in the kernel");
  1641. return -EPFNOSUPPORT;
  1642. #endif
  1643. }
  1644. if (data[IFLA_GENEVE_ID]) {
  1645. __u32 vni;
  1646. __u8 tvni[3];
  1647. __be64 tunid;
  1648. vni = nla_get_u32(data[IFLA_GENEVE_ID]);
  1649. tvni[0] = (vni & 0x00ff0000) >> 16;
  1650. tvni[1] = (vni & 0x0000ff00) >> 8;
  1651. tvni[2] = vni & 0x000000ff;
  1652. tunid = vni_to_tunnel_id(tvni);
  1653. if (changelink && (tunid != info->key.tun_id)) {
  1654. attrtype = IFLA_GENEVE_ID;
  1655. goto change_notsup;
  1656. }
  1657. info->key.tun_id = tunid;
  1658. }
  1659. if (data[IFLA_GENEVE_TTL_INHERIT]) {
  1660. if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT]))
  1661. cfg->ttl_inherit = true;
  1662. else
  1663. cfg->ttl_inherit = false;
  1664. } else if (data[IFLA_GENEVE_TTL]) {
  1665. info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
  1666. cfg->ttl_inherit = false;
  1667. }
  1668. if (data[IFLA_GENEVE_TOS])
  1669. info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
  1670. if (data[IFLA_GENEVE_DF])
  1671. cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]);
  1672. if (data[IFLA_GENEVE_LABEL]) {
  1673. info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
  1674. IPV6_FLOWLABEL_MASK;
  1675. if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) {
  1676. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL],
  1677. "Label attribute only applies for IPv6 Geneve devices");
  1678. return -EINVAL;
  1679. }
  1680. }
  1681. if (data[IFLA_GENEVE_PORT]) {
  1682. if (changelink) {
  1683. attrtype = IFLA_GENEVE_PORT;
  1684. goto change_notsup;
  1685. }
  1686. info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
  1687. }
  1688. if (data[IFLA_GENEVE_PORT_RANGE]) {
  1689. const struct ifla_geneve_port_range *p;
  1690. if (changelink) {
  1691. attrtype = IFLA_GENEVE_PORT_RANGE;
  1692. goto change_notsup;
  1693. }
  1694. p = nla_data(data[IFLA_GENEVE_PORT_RANGE]);
  1695. cfg->port_min = ntohs(p->low);
  1696. cfg->port_max = ntohs(p->high);
  1697. }
  1698. if (data[IFLA_GENEVE_COLLECT_METADATA]) {
  1699. if (changelink) {
  1700. attrtype = IFLA_GENEVE_COLLECT_METADATA;
  1701. goto change_notsup;
  1702. }
  1703. cfg->collect_md = true;
  1704. }
  1705. if (data[IFLA_GENEVE_UDP_CSUM]) {
  1706. if (changelink) {
  1707. attrtype = IFLA_GENEVE_UDP_CSUM;
  1708. goto change_notsup;
  1709. }
  1710. if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
  1711. __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
  1712. }
  1713. if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
  1714. #if IS_ENABLED(CONFIG_IPV6)
  1715. if (changelink) {
  1716. attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX;
  1717. goto change_notsup;
  1718. }
  1719. if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
  1720. __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
  1721. #else
  1722. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
  1723. "IPv6 support not enabled in the kernel");
  1724. return -EPFNOSUPPORT;
  1725. #endif
  1726. }
  1727. if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
  1728. #if IS_ENABLED(CONFIG_IPV6)
  1729. if (changelink) {
  1730. attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX;
  1731. goto change_notsup;
  1732. }
  1733. if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
  1734. cfg->use_udp6_rx_checksums = false;
  1735. #else
  1736. NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX],
  1737. "IPv6 support not enabled in the kernel");
  1738. return -EPFNOSUPPORT;
  1739. #endif
  1740. }
  1741. if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) {
  1742. if (changelink) {
  1743. attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT;
  1744. goto change_notsup;
  1745. }
  1746. cfg->inner_proto_inherit = true;
  1747. }
  1748. if (data[IFLA_GENEVE_GRO_HINT]) {
  1749. if (changelink) {
  1750. attrtype = IFLA_GENEVE_GRO_HINT;
  1751. goto change_notsup;
  1752. }
  1753. cfg->gro_hint = true;
  1754. }
  1755. return 0;
  1756. change_notsup:
  1757. NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
  1758. "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, gro_hint and UDP checksum attributes are not supported");
  1759. return -EOPNOTSUPP;
  1760. }
  1761. static void geneve_link_config(struct net_device *dev,
  1762. struct ip_tunnel_info *info, struct nlattr *tb[])
  1763. {
  1764. struct geneve_dev *geneve = netdev_priv(dev);
  1765. int ldev_mtu = 0;
  1766. if (tb[IFLA_MTU]) {
  1767. geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
  1768. return;
  1769. }
  1770. switch (ip_tunnel_info_af(info)) {
  1771. case AF_INET: {
  1772. struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
  1773. struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
  1774. if (!IS_ERR(rt) && rt->dst.dev) {
  1775. ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
  1776. ip_rt_put(rt);
  1777. }
  1778. break;
  1779. }
  1780. #if IS_ENABLED(CONFIG_IPV6)
  1781. case AF_INET6: {
  1782. struct rt6_info *rt;
  1783. if (!__in6_dev_get(dev))
  1784. break;
  1785. rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0,
  1786. NULL, 0);
  1787. if (rt && rt->dst.dev)
  1788. ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
  1789. ip6_rt_put(rt);
  1790. break;
  1791. }
  1792. #endif
  1793. }
  1794. if (ldev_mtu <= 0)
  1795. return;
  1796. geneve_change_mtu(dev, ldev_mtu - info->options_len);
  1797. }
  1798. static int geneve_newlink(struct net_device *dev,
  1799. struct rtnl_newlink_params *params,
  1800. struct netlink_ext_ack *extack)
  1801. {
  1802. struct net *link_net = rtnl_newlink_link_net(params);
  1803. struct nlattr **data = params->data;
  1804. struct nlattr **tb = params->tb;
  1805. struct geneve_config cfg = {
  1806. .df = GENEVE_DF_UNSET,
  1807. .use_udp6_rx_checksums = false,
  1808. .ttl_inherit = false,
  1809. .collect_md = false,
  1810. .port_min = 1,
  1811. .port_max = USHRT_MAX,
  1812. };
  1813. int err;
  1814. init_tnl_info(&cfg.info, GENEVE_UDP_PORT);
  1815. err = geneve_nl2info(tb, data, extack, &cfg, false);
  1816. if (err)
  1817. return err;
  1818. err = geneve_configure(link_net, dev, extack, &cfg);
  1819. if (err)
  1820. return err;
  1821. geneve_link_config(dev, &cfg.info, tb);
  1822. return 0;
  1823. }
  1824. /* Quiesces the geneve device data path for both TX and RX.
  1825. *
  1826. * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
  1827. * So, if we set that socket to NULL under RCU and wait for synchronize_net()
  1828. * to complete for the existing set of in-flight packets to be transmitted,
  1829. * then we would have quiesced the transmit data path. All the future packets
  1830. * will get dropped until we unquiesce the data path.
  1831. *
  1832. * On receive geneve dereference the geneve_sock stashed in the socket. So,
  1833. * if we set that to NULL under RCU and wait for synchronize_net() to
  1834. * complete, then we would have quiesced the receive data path.
  1835. */
  1836. static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
  1837. struct geneve_sock **gs6)
  1838. {
  1839. *gs4 = rtnl_dereference(geneve->sock4);
  1840. rcu_assign_pointer(geneve->sock4, NULL);
  1841. if (*gs4)
  1842. rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
  1843. #if IS_ENABLED(CONFIG_IPV6)
  1844. *gs6 = rtnl_dereference(geneve->sock6);
  1845. rcu_assign_pointer(geneve->sock6, NULL);
  1846. if (*gs6)
  1847. rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
  1848. #else
  1849. *gs6 = NULL;
  1850. #endif
  1851. synchronize_net();
  1852. }
  1853. /* Resumes the geneve device data path for both TX and RX. */
  1854. static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
  1855. struct geneve_sock __maybe_unused *gs6)
  1856. {
  1857. rcu_assign_pointer(geneve->sock4, gs4);
  1858. if (gs4)
  1859. rcu_assign_sk_user_data(gs4->sock->sk, gs4);
  1860. #if IS_ENABLED(CONFIG_IPV6)
  1861. rcu_assign_pointer(geneve->sock6, gs6);
  1862. if (gs6)
  1863. rcu_assign_sk_user_data(gs6->sock->sk, gs6);
  1864. #endif
  1865. synchronize_net();
  1866. }
  1867. static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
  1868. struct nlattr *data[],
  1869. struct netlink_ext_ack *extack)
  1870. {
  1871. struct geneve_dev *geneve = netdev_priv(dev);
  1872. struct geneve_sock *gs4, *gs6;
  1873. struct geneve_config cfg;
  1874. int err;
  1875. /* If the geneve device is configured for metadata (or externally
  1876. * controlled, for example, OVS), then nothing can be changed.
  1877. */
  1878. if (geneve->cfg.collect_md)
  1879. return -EOPNOTSUPP;
  1880. /* Start with the existing info. */
  1881. memcpy(&cfg, &geneve->cfg, sizeof(cfg));
  1882. err = geneve_nl2info(tb, data, extack, &cfg, true);
  1883. if (err)
  1884. return err;
  1885. if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) {
  1886. dst_cache_reset(&cfg.info.dst_cache);
  1887. geneve_link_config(dev, &cfg.info, tb);
  1888. }
  1889. geneve_quiesce(geneve, &gs4, &gs6);
  1890. memcpy(&geneve->cfg, &cfg, sizeof(cfg));
  1891. geneve_unquiesce(geneve, gs4, gs6);
  1892. return 0;
  1893. }
  1894. static void geneve_dellink(struct net_device *dev, struct list_head *head)
  1895. {
  1896. struct geneve_dev *geneve = netdev_priv(dev);
  1897. list_del(&geneve->next);
  1898. unregister_netdevice_queue(dev, head);
  1899. }
  1900. static size_t geneve_get_size(const struct net_device *dev)
  1901. {
  1902. return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */
  1903. nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
  1904. nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
  1905. nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
  1906. nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */
  1907. nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
  1908. nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
  1909. nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
  1910. nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
  1911. nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
  1912. nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
  1913. nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
  1914. nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */
  1915. nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */
  1916. nla_total_size(0) + /* IFLA_GENEVE_GRO_HINT */
  1917. 0;
  1918. }
  1919. static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
  1920. {
  1921. struct geneve_dev *geneve = netdev_priv(dev);
  1922. struct ip_tunnel_info *info = &geneve->cfg.info;
  1923. bool ttl_inherit = geneve->cfg.ttl_inherit;
  1924. bool metadata = geneve->cfg.collect_md;
  1925. struct ifla_geneve_port_range ports = {
  1926. .low = htons(geneve->cfg.port_min),
  1927. .high = htons(geneve->cfg.port_max),
  1928. };
  1929. __u8 tmp_vni[3];
  1930. __u32 vni;
  1931. tunnel_id_to_vni(info->key.tun_id, tmp_vni);
  1932. vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
  1933. if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
  1934. goto nla_put_failure;
  1935. if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
  1936. if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
  1937. info->key.u.ipv4.dst))
  1938. goto nla_put_failure;
  1939. if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
  1940. test_bit(IP_TUNNEL_CSUM_BIT,
  1941. info->key.tun_flags)))
  1942. goto nla_put_failure;
  1943. #if IS_ENABLED(CONFIG_IPV6)
  1944. } else if (!metadata) {
  1945. if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
  1946. &info->key.u.ipv6.dst))
  1947. goto nla_put_failure;
  1948. if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
  1949. !test_bit(IP_TUNNEL_CSUM_BIT,
  1950. info->key.tun_flags)))
  1951. goto nla_put_failure;
  1952. #endif
  1953. }
  1954. if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
  1955. nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
  1956. nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
  1957. goto nla_put_failure;
  1958. if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df))
  1959. goto nla_put_failure;
  1960. if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
  1961. goto nla_put_failure;
  1962. if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
  1963. goto nla_put_failure;
  1964. #if IS_ENABLED(CONFIG_IPV6)
  1965. if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
  1966. !geneve->cfg.use_udp6_rx_checksums))
  1967. goto nla_put_failure;
  1968. #endif
  1969. if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
  1970. goto nla_put_failure;
  1971. if (geneve->cfg.inner_proto_inherit &&
  1972. nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT))
  1973. goto nla_put_failure;
  1974. if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports))
  1975. goto nla_put_failure;
  1976. if (geneve->cfg.gro_hint &&
  1977. nla_put_flag(skb, IFLA_GENEVE_GRO_HINT))
  1978. goto nla_put_failure;
  1979. return 0;
  1980. nla_put_failure:
  1981. return -EMSGSIZE;
  1982. }
  1983. static struct rtnl_link_ops geneve_link_ops __read_mostly = {
  1984. .kind = "geneve",
  1985. .maxtype = IFLA_GENEVE_MAX,
  1986. .policy = geneve_policy,
  1987. .priv_size = sizeof(struct geneve_dev),
  1988. .setup = geneve_setup,
  1989. .validate = geneve_validate,
  1990. .newlink = geneve_newlink,
  1991. .changelink = geneve_changelink,
  1992. .dellink = geneve_dellink,
  1993. .get_size = geneve_get_size,
  1994. .fill_info = geneve_fill_info,
  1995. };
  1996. struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
  1997. u8 name_assign_type, u16 dst_port)
  1998. {
  1999. struct nlattr *tb[IFLA_MAX + 1];
  2000. struct net_device *dev;
  2001. LIST_HEAD(list_kill);
  2002. int err;
  2003. struct geneve_config cfg = {
  2004. .df = GENEVE_DF_UNSET,
  2005. .use_udp6_rx_checksums = true,
  2006. .ttl_inherit = false,
  2007. .collect_md = true,
  2008. .port_min = 1,
  2009. .port_max = USHRT_MAX,
  2010. };
  2011. memset(tb, 0, sizeof(tb));
  2012. dev = rtnl_create_link(net, name, name_assign_type,
  2013. &geneve_link_ops, tb, NULL);
  2014. if (IS_ERR(dev))
  2015. return dev;
  2016. init_tnl_info(&cfg.info, dst_port);
  2017. err = geneve_configure(net, dev, NULL, &cfg);
  2018. if (err) {
  2019. free_netdev(dev);
  2020. return ERR_PTR(err);
  2021. }
  2022. /* openvswitch users expect packet sizes to be unrestricted,
  2023. * so set the largest MTU we can.
  2024. */
  2025. err = geneve_change_mtu(dev, IP_MAX_MTU);
  2026. if (err)
  2027. goto err;
  2028. err = rtnl_configure_link(dev, NULL, 0, NULL);
  2029. if (err < 0)
  2030. goto err;
  2031. return dev;
  2032. err:
  2033. geneve_dellink(dev, &list_kill);
  2034. unregister_netdevice_many(&list_kill);
  2035. return ERR_PTR(err);
  2036. }
  2037. EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
  2038. static int geneve_netdevice_event(struct notifier_block *unused,
  2039. unsigned long event, void *ptr)
  2040. {
  2041. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  2042. if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
  2043. geneve_offload_rx_ports(dev, true);
  2044. else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
  2045. geneve_offload_rx_ports(dev, false);
  2046. return NOTIFY_DONE;
  2047. }
  2048. static struct notifier_block geneve_notifier_block __read_mostly = {
  2049. .notifier_call = geneve_netdevice_event,
  2050. };
  2051. static __net_init int geneve_init_net(struct net *net)
  2052. {
  2053. struct geneve_net *gn = net_generic(net, geneve_net_id);
  2054. INIT_LIST_HEAD(&gn->geneve_list);
  2055. INIT_LIST_HEAD(&gn->sock_list);
  2056. return 0;
  2057. }
  2058. static void __net_exit geneve_exit_rtnl_net(struct net *net,
  2059. struct list_head *dev_to_kill)
  2060. {
  2061. struct geneve_net *gn = net_generic(net, geneve_net_id);
  2062. struct geneve_dev *geneve, *next;
  2063. list_for_each_entry_safe(geneve, next, &gn->geneve_list, next)
  2064. geneve_dellink(geneve->dev, dev_to_kill);
  2065. }
  2066. static void __net_exit geneve_exit_net(struct net *net)
  2067. {
  2068. const struct geneve_net *gn = net_generic(net, geneve_net_id);
  2069. WARN_ON_ONCE(!list_empty(&gn->sock_list));
  2070. }
  2071. static struct pernet_operations geneve_net_ops = {
  2072. .init = geneve_init_net,
  2073. .exit_rtnl = geneve_exit_rtnl_net,
  2074. .exit = geneve_exit_net,
  2075. .id = &geneve_net_id,
  2076. .size = sizeof(struct geneve_net),
  2077. };
  2078. static int __init geneve_init_module(void)
  2079. {
  2080. int rc;
  2081. rc = register_pernet_subsys(&geneve_net_ops);
  2082. if (rc)
  2083. goto out1;
  2084. rc = register_netdevice_notifier(&geneve_notifier_block);
  2085. if (rc)
  2086. goto out2;
  2087. rc = rtnl_link_register(&geneve_link_ops);
  2088. if (rc)
  2089. goto out3;
  2090. return 0;
  2091. out3:
  2092. unregister_netdevice_notifier(&geneve_notifier_block);
  2093. out2:
  2094. unregister_pernet_subsys(&geneve_net_ops);
  2095. out1:
  2096. return rc;
  2097. }
  2098. late_initcall(geneve_init_module);
  2099. static void __exit geneve_cleanup_module(void)
  2100. {
  2101. rtnl_link_unregister(&geneve_link_ops);
  2102. unregister_netdevice_notifier(&geneve_notifier_block);
  2103. unregister_pernet_subsys(&geneve_net_ops);
  2104. }
  2105. module_exit(geneve_cleanup_module);
  2106. MODULE_LICENSE("GPL");
  2107. MODULE_VERSION(GENEVE_NETDEV_VER);
  2108. MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
  2109. MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
  2110. MODULE_ALIAS_RTNL_LINK("geneve");