seg6_local.c 69 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * SR-IPv6 implementation
  4. *
  5. * Authors:
  6. * David Lebrun <david.lebrun@uclouvain.be>
  7. * eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
  8. */
  9. #include <linux/filter.h>
  10. #include <linux/types.h>
  11. #include <linux/skbuff.h>
  12. #include <linux/net.h>
  13. #include <linux/module.h>
  14. #include <net/ip.h>
  15. #include <net/lwtunnel.h>
  16. #include <net/netevent.h>
  17. #include <net/netns/generic.h>
  18. #include <net/ip6_fib.h>
  19. #include <net/route.h>
  20. #include <net/seg6.h>
  21. #include <linux/seg6.h>
  22. #include <linux/seg6_local.h>
  23. #include <net/addrconf.h>
  24. #include <net/ip6_route.h>
  25. #include <net/dst_cache.h>
  26. #include <net/ip_tunnels.h>
  27. #ifdef CONFIG_IPV6_SEG6_HMAC
  28. #include <net/seg6_hmac.h>
  29. #endif
  30. #include <net/seg6_local.h>
  31. #include <linux/etherdevice.h>
  32. #include <linux/bpf.h>
  33. #include <linux/netfilter.h>
  34. #define SEG6_F_ATTR(i) BIT(i)
  35. struct seg6_local_lwt;
  36. /* callbacks used for customizing the creation and destruction of a behavior */
  37. struct seg6_local_lwtunnel_ops {
  38. int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
  39. struct netlink_ext_ack *extack);
  40. void (*destroy_state)(struct seg6_local_lwt *slwt);
  41. };
  42. struct seg6_action_desc {
  43. int action;
  44. unsigned long attrs;
  45. /* The optattrs field is used for specifying all the optional
  46. * attributes supported by a specific behavior.
  47. * It means that if one of these attributes is not provided in the
  48. * netlink message during the behavior creation, no errors will be
  49. * returned to the userspace.
  50. *
  51. * Each attribute can be only of two types (mutually exclusive):
  52. * 1) required or 2) optional.
  53. * Every user MUST obey to this rule! If you set an attribute as
  54. * required the same attribute CANNOT be set as optional and vice
  55. * versa.
  56. */
  57. unsigned long optattrs;
  58. int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
  59. int static_headroom;
  60. struct seg6_local_lwtunnel_ops slwt_ops;
  61. };
  62. struct bpf_lwt_prog {
  63. struct bpf_prog *prog;
  64. char *name;
  65. };
  66. /* default length values (expressed in bits) for both Locator-Block and
  67. * Locator-Node Function.
  68. *
  69. * Both SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS *must* be:
  70. * i) greater than 0;
  71. * ii) evenly divisible by 8. In other terms, the lengths of the
  72. * Locator-Block and Locator-Node Function must be byte-aligned (we can
  73. * relax this constraint in the future if really needed).
  74. *
  75. * Moreover, a third condition must hold:
  76. * iii) SEG6_LOCAL_LCBLOCK_DBITS + SEG6_LOCAL_LCNODE_FN_DBITS <= 128.
  77. *
  78. * The correctness of SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS
  79. * values are checked during the kernel compilation. If the compilation stops,
  80. * check the value of these parameters to see if they meet conditions (i), (ii)
  81. * and (iii).
  82. */
  83. #define SEG6_LOCAL_LCBLOCK_DBITS 32
  84. #define SEG6_LOCAL_LCNODE_FN_DBITS 16
  85. /* The following next_csid_chk_{cntr,lcblock,lcblock_fn}_bits macros can be
  86. * used directly to check whether the lengths (in bits) of Locator-Block and
  87. * Locator-Node Function are valid according to (i), (ii), (iii).
  88. */
  89. #define next_csid_chk_cntr_bits(blen, flen) \
  90. ((blen) + (flen) > 128)
  91. #define next_csid_chk_lcblock_bits(blen) \
  92. ({ \
  93. typeof(blen) __tmp = blen; \
  94. (!__tmp || __tmp > 120 || (__tmp & 0x07)); \
  95. })
  96. #define next_csid_chk_lcnode_fn_bits(flen) \
  97. next_csid_chk_lcblock_bits(flen)
  98. /* flag indicating that flavors are set up for a given End* behavior */
  99. #define SEG6_F_LOCAL_FLAVORS SEG6_F_ATTR(SEG6_LOCAL_FLAVORS)
  100. #define SEG6_F_LOCAL_FLV_OP(flvname) BIT(SEG6_LOCAL_FLV_OP_##flvname)
  101. #define SEG6_F_LOCAL_FLV_NEXT_CSID SEG6_F_LOCAL_FLV_OP(NEXT_CSID)
  102. #define SEG6_F_LOCAL_FLV_PSP SEG6_F_LOCAL_FLV_OP(PSP)
  103. /* Supported RFC8986 Flavor operations are reported in this bitmask */
  104. #define SEG6_LOCAL_FLV8986_SUPP_OPS SEG6_F_LOCAL_FLV_PSP
  105. #define SEG6_LOCAL_END_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_NEXT_CSID | \
  106. SEG6_LOCAL_FLV8986_SUPP_OPS)
  107. #define SEG6_LOCAL_END_X_FLV_SUPP_OPS SEG6_F_LOCAL_FLV_NEXT_CSID
  108. struct seg6_flavors_info {
  109. /* Flavor operations */
  110. __u32 flv_ops;
  111. /* Locator-Block length, expressed in bits */
  112. __u8 lcblock_bits;
  113. /* Locator-Node Function length, expressed in bits*/
  114. __u8 lcnode_func_bits;
  115. };
  116. enum seg6_end_dt_mode {
  117. DT_INVALID_MODE = -EINVAL,
  118. DT_LEGACY_MODE = 0,
  119. DT_VRF_MODE = 1,
  120. };
  121. struct seg6_end_dt_info {
  122. enum seg6_end_dt_mode mode;
  123. struct net *net;
  124. /* VRF device associated to the routing table used by the SRv6
  125. * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
  126. */
  127. int vrf_ifindex;
  128. int vrf_table;
  129. /* tunneled packet family (IPv4 or IPv6).
  130. * Protocol and header length are inferred from family.
  131. */
  132. u16 family;
  133. };
  134. struct pcpu_seg6_local_counters {
  135. u64_stats_t packets;
  136. u64_stats_t bytes;
  137. u64_stats_t errors;
  138. struct u64_stats_sync syncp;
  139. };
  140. /* This struct groups all the SRv6 Behavior counters supported so far.
  141. *
  142. * put_nla_counters() makes use of this data structure to collect all counter
  143. * values after the per-CPU counter evaluation has been performed.
  144. * Finally, each counter value (in seg6_local_counters) is stored in the
  145. * corresponding netlink attribute and sent to user space.
  146. *
  147. * NB: we don't want to expose this structure to user space!
  148. */
  149. struct seg6_local_counters {
  150. __u64 packets;
  151. __u64 bytes;
  152. __u64 errors;
  153. };
  154. #define seg6_local_alloc_pcpu_counters(__gfp) \
  155. __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters, \
  156. ((__gfp) | __GFP_ZERO))
  157. #define SEG6_F_LOCAL_COUNTERS SEG6_F_ATTR(SEG6_LOCAL_COUNTERS)
  158. struct seg6_local_lwt {
  159. int action;
  160. struct ipv6_sr_hdr *srh;
  161. int table;
  162. struct in_addr nh4;
  163. struct in6_addr nh6;
  164. int iif;
  165. int oif;
  166. struct bpf_lwt_prog bpf;
  167. #ifdef CONFIG_NET_L3_MASTER_DEV
  168. struct seg6_end_dt_info dt_info;
  169. #endif
  170. struct seg6_flavors_info flv_info;
  171. struct pcpu_seg6_local_counters __percpu *pcpu_counters;
  172. int headroom;
  173. struct seg6_action_desc *desc;
  174. /* unlike the required attrs, we have to track the optional attributes
  175. * that have been effectively parsed.
  176. */
  177. unsigned long parsed_optattrs;
  178. };
  179. static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
  180. {
  181. return (struct seg6_local_lwt *)lwt->data;
  182. }
  183. static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
  184. {
  185. struct ipv6_sr_hdr *srh;
  186. srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH);
  187. if (!srh)
  188. return NULL;
  189. #ifdef CONFIG_IPV6_SEG6_HMAC
  190. if (!seg6_hmac_validate_skb(skb))
  191. return NULL;
  192. #endif
  193. return srh;
  194. }
  195. static bool decap_and_validate(struct sk_buff *skb, int proto)
  196. {
  197. struct ipv6_sr_hdr *srh;
  198. unsigned int off = 0;
  199. srh = seg6_get_srh(skb, 0);
  200. if (srh && srh->segments_left > 0)
  201. return false;
  202. #ifdef CONFIG_IPV6_SEG6_HMAC
  203. if (srh && !seg6_hmac_validate_skb(skb))
  204. return false;
  205. #endif
  206. if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
  207. return false;
  208. if (!pskb_pull(skb, off))
  209. return false;
  210. skb_postpull_rcsum(skb, skb_network_header(skb), off);
  211. skb_reset_network_header(skb);
  212. skb_reset_transport_header(skb);
  213. if (iptunnel_pull_offloads(skb))
  214. return false;
  215. return true;
  216. }
  217. static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
  218. {
  219. struct in6_addr *addr;
  220. srh->segments_left--;
  221. addr = srh->segments + srh->segments_left;
  222. *daddr = *addr;
  223. }
  224. static int
  225. seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
  226. u32 tbl_id, bool local_delivery, int oif)
  227. {
  228. struct net *net = dev_net(skb->dev);
  229. struct ipv6hdr *hdr = ipv6_hdr(skb);
  230. int flags = RT6_LOOKUP_F_HAS_SADDR;
  231. struct dst_entry *dst = NULL;
  232. struct rt6_info *rt;
  233. struct flowi6 fl6;
  234. int dev_flags = 0;
  235. memset(&fl6, 0, sizeof(fl6));
  236. fl6.flowi6_iif = skb->dev->ifindex;
  237. fl6.flowi6_oif = oif;
  238. fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
  239. fl6.saddr = hdr->saddr;
  240. fl6.flowlabel = ip6_flowinfo(hdr);
  241. fl6.flowi6_mark = skb->mark;
  242. fl6.flowi6_proto = hdr->nexthdr;
  243. if (nhaddr)
  244. fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
  245. if (!tbl_id && !oif) {
  246. dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
  247. } else if (tbl_id) {
  248. struct fib6_table *table;
  249. table = fib6_get_table(net, tbl_id);
  250. if (!table)
  251. goto out;
  252. rt = ip6_pol_route(net, table, oif, &fl6, skb, flags);
  253. dst = &rt->dst;
  254. } else {
  255. dst = ip6_route_output(net, NULL, &fl6);
  256. }
  257. /* we want to discard traffic destined for local packet processing,
  258. * if @local_delivery is set to false.
  259. */
  260. if (!local_delivery)
  261. dev_flags |= IFF_LOOPBACK;
  262. if (dst && (dst_dev(dst)->flags & dev_flags) && !dst->error) {
  263. dst_release(dst);
  264. dst = NULL;
  265. }
  266. out:
  267. if (!dst) {
  268. rt = net->ipv6.ip6_blk_hole_entry;
  269. dst = &rt->dst;
  270. dst_hold(dst);
  271. }
  272. skb_dst_drop(skb);
  273. skb_dst_set(skb, dst);
  274. return dst->error;
  275. }
  276. int seg6_lookup_nexthop(struct sk_buff *skb,
  277. struct in6_addr *nhaddr, u32 tbl_id)
  278. {
  279. return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false, 0);
  280. }
  281. static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo)
  282. {
  283. return finfo->lcblock_bits >> 3;
  284. }
  285. static __u8 seg6_flv_lcnode_func_octects(const struct seg6_flavors_info *finfo)
  286. {
  287. return finfo->lcnode_func_bits >> 3;
  288. }
  289. static bool seg6_next_csid_is_arg_zero(const struct in6_addr *addr,
  290. const struct seg6_flavors_info *finfo)
  291. {
  292. __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
  293. __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
  294. __u8 arg_octects;
  295. int i;
  296. arg_octects = 16 - blk_octects - fnc_octects;
  297. for (i = 0; i < arg_octects; ++i) {
  298. if (addr->s6_addr[blk_octects + fnc_octects + i] != 0x00)
  299. return false;
  300. }
  301. return true;
  302. }
  303. /* assume that DA.Argument length > 0 */
  304. static void seg6_next_csid_advance_arg(struct in6_addr *addr,
  305. const struct seg6_flavors_info *finfo)
  306. {
  307. __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo);
  308. __u8 blk_octects = seg6_flv_lcblock_octects(finfo);
  309. /* advance DA.Argument */
  310. memmove(&addr->s6_addr[blk_octects],
  311. &addr->s6_addr[blk_octects + fnc_octects],
  312. 16 - blk_octects - fnc_octects);
  313. memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects);
  314. }
  315. static int input_action_end_finish(struct sk_buff *skb,
  316. struct seg6_local_lwt *slwt)
  317. {
  318. seg6_lookup_nexthop(skb, NULL, 0);
  319. return dst_input(skb);
  320. }
  321. static int input_action_end_core(struct sk_buff *skb,
  322. struct seg6_local_lwt *slwt)
  323. {
  324. struct ipv6_sr_hdr *srh;
  325. srh = get_and_validate_srh(skb);
  326. if (!srh)
  327. goto drop;
  328. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  329. return input_action_end_finish(skb, slwt);
  330. drop:
  331. kfree_skb(skb);
  332. return -EINVAL;
  333. }
  334. static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  335. {
  336. const struct seg6_flavors_info *finfo = &slwt->flv_info;
  337. struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
  338. if (seg6_next_csid_is_arg_zero(daddr, finfo))
  339. return input_action_end_core(skb, slwt);
  340. /* update DA */
  341. seg6_next_csid_advance_arg(daddr, finfo);
  342. return input_action_end_finish(skb, slwt);
  343. }
  344. static int input_action_end_x_finish(struct sk_buff *skb,
  345. struct seg6_local_lwt *slwt)
  346. {
  347. seg6_lookup_any_nexthop(skb, &slwt->nh6, 0, false, slwt->oif);
  348. return dst_input(skb);
  349. }
  350. static int input_action_end_x_core(struct sk_buff *skb,
  351. struct seg6_local_lwt *slwt)
  352. {
  353. struct ipv6_sr_hdr *srh;
  354. srh = get_and_validate_srh(skb);
  355. if (!srh)
  356. goto drop;
  357. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  358. return input_action_end_x_finish(skb, slwt);
  359. drop:
  360. kfree_skb(skb);
  361. return -EINVAL;
  362. }
  363. static int end_x_next_csid_core(struct sk_buff *skb,
  364. struct seg6_local_lwt *slwt)
  365. {
  366. const struct seg6_flavors_info *finfo = &slwt->flv_info;
  367. struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
  368. if (seg6_next_csid_is_arg_zero(daddr, finfo))
  369. return input_action_end_x_core(skb, slwt);
  370. /* update DA */
  371. seg6_next_csid_advance_arg(daddr, finfo);
  372. return input_action_end_x_finish(skb, slwt);
  373. }
  374. static bool seg6_next_csid_enabled(__u32 fops)
  375. {
  376. return fops & SEG6_F_LOCAL_FLV_NEXT_CSID;
  377. }
  378. /* Processing of SRv6 End, End.X, and End.T behaviors can be extended through
  379. * the flavors framework. These behaviors must report the subset of (flavor)
  380. * operations they currently implement. In this way, if a user specifies a
  381. * flavor combination that is not supported by a given End* behavior, the
  382. * kernel refuses to instantiate the tunnel reporting the error.
  383. */
  384. static int seg6_flv_supp_ops_by_action(int action, __u32 *fops)
  385. {
  386. switch (action) {
  387. case SEG6_LOCAL_ACTION_END:
  388. *fops = SEG6_LOCAL_END_FLV_SUPP_OPS;
  389. break;
  390. case SEG6_LOCAL_ACTION_END_X:
  391. *fops = SEG6_LOCAL_END_X_FLV_SUPP_OPS;
  392. break;
  393. default:
  394. return -EOPNOTSUPP;
  395. }
  396. return 0;
  397. }
  398. /* We describe the packet state in relation to the absence/presence of the SRH
  399. * and the Segment Left (SL) field.
  400. * For our purposes, it is not necessary to record the exact value of the SL
  401. * when the SID List consists of two or more segments.
  402. */
  403. enum seg6_local_pktinfo {
  404. /* the order really matters! */
  405. SEG6_LOCAL_PKTINFO_NOHDR = 0,
  406. SEG6_LOCAL_PKTINFO_SL_ZERO,
  407. SEG6_LOCAL_PKTINFO_SL_ONE,
  408. SEG6_LOCAL_PKTINFO_SL_MORE,
  409. __SEG6_LOCAL_PKTINFO_MAX,
  410. };
  411. #define SEG6_LOCAL_PKTINFO_MAX (__SEG6_LOCAL_PKTINFO_MAX - 1)
  412. static enum seg6_local_pktinfo seg6_get_srh_pktinfo(struct ipv6_sr_hdr *srh)
  413. {
  414. __u8 sgl;
  415. if (!srh)
  416. return SEG6_LOCAL_PKTINFO_NOHDR;
  417. sgl = srh->segments_left;
  418. if (sgl < 2)
  419. return SEG6_LOCAL_PKTINFO_SL_ZERO + sgl;
  420. return SEG6_LOCAL_PKTINFO_SL_MORE;
  421. }
  422. enum seg6_local_flv_action {
  423. SEG6_LOCAL_FLV_ACT_UNSPEC = 0,
  424. SEG6_LOCAL_FLV_ACT_END,
  425. SEG6_LOCAL_FLV_ACT_PSP,
  426. SEG6_LOCAL_FLV_ACT_USP,
  427. SEG6_LOCAL_FLV_ACT_USD,
  428. __SEG6_LOCAL_FLV_ACT_MAX
  429. };
  430. #define SEG6_LOCAL_FLV_ACT_MAX (__SEG6_LOCAL_FLV_ACT_MAX - 1)
  431. /* The action table for RFC8986 flavors (see the flv8986_act_tbl below)
  432. * contains the actions (i.e. processing operations) to be applied on packets
  433. * when flavors are configured for an End* behavior.
  434. * By combining the pkinfo data and from the flavors mask, the macro
  435. * computes the index used to access the elements (actions) stored in the
  436. * action table. The index is structured as follows:
  437. *
  438. * index
  439. * _______________/\________________
  440. * / \
  441. * +----------------+----------------+
  442. * | pf | afm |
  443. * +----------------+----------------+
  444. * ph-1 ... p1 p0 fk-1 ... f1 f0
  445. * MSB LSB
  446. *
  447. * where:
  448. * - 'afm' (adjusted flavor mask) is the mask containing a combination of the
  449. * RFC8986 flavors currently supported. 'afm' corresponds to the @fm
  450. * argument of the macro whose value is righ-shifted by 1 bit. By doing so,
  451. * we discard the SEG6_LOCAL_FLV_OP_UNSPEC flag (bit 0 in @fm) which is
  452. * never used here;
  453. * - 'pf' encodes the packet info (pktinfo) regarding the presence/absence of
  454. * the SRH, SL = 0, etc. 'pf' is set with the value of @pf provided as
  455. * argument to the macro.
  456. */
  457. #define flv8986_act_tbl_idx(pf, fm) \
  458. ((((pf) << bits_per(SEG6_LOCAL_FLV8986_SUPP_OPS)) | \
  459. ((fm) & SEG6_LOCAL_FLV8986_SUPP_OPS)) >> SEG6_LOCAL_FLV_OP_PSP)
  460. /* We compute the size of the action table by considering the RFC8986 flavors
  461. * actually supported by the kernel. In this way, the size is automatically
  462. * adjusted when new flavors are supported.
  463. */
  464. #define FLV8986_ACT_TBL_SIZE \
  465. roundup_pow_of_two(flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_MAX, \
  466. SEG6_LOCAL_FLV8986_SUPP_OPS))
  467. /* tbl_cfg(act, pf, fm) macro is used to easily configure the action
  468. * table; it accepts 3 arguments:
  469. * i) @act, the suffix from SEG6_LOCAL_FLV_ACT_{act} representing
  470. * the action that should be applied on the packet;
  471. * ii) @pf, the suffix from SEG6_LOCAL_PKTINFO_{pf} reporting the packet
  472. * info about the lack/presence of SRH, SRH with SL = 0, etc;
  473. * iii) @fm, the mask of flavors.
  474. */
  475. #define tbl_cfg(act, pf, fm) \
  476. [flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_##pf, \
  477. (fm))] = SEG6_LOCAL_FLV_ACT_##act
  478. /* shorthand for improving readability */
  479. #define F_PSP SEG6_F_LOCAL_FLV_PSP
  480. /* The table contains, for each combination of the pktinfo data and
  481. * flavors, the action that should be taken on a packet (e.g.
  482. * "standard" Endpoint processing, Penultimate Segment Pop, etc).
  483. *
  484. * By default, table entries not explicitly configured are initialized with the
  485. * SEG6_LOCAL_FLV_ACT_UNSPEC action, which generally has the effect of
  486. * discarding the processed packet.
  487. */
  488. static const u8 flv8986_act_tbl[FLV8986_ACT_TBL_SIZE] = {
  489. /* PSP variant for packet where SRH with SL = 1 */
  490. tbl_cfg(PSP, SL_ONE, F_PSP),
  491. /* End for packet where the SRH with SL > 1*/
  492. tbl_cfg(END, SL_MORE, F_PSP),
  493. };
  494. #undef F_PSP
  495. #undef tbl_cfg
  496. /* For each flavor defined in RFC8986 (or a combination of them) an action is
  497. * performed on the packet. The specific action depends on:
  498. * - info extracted from the packet (i.e. pktinfo data) regarding the
  499. * lack/presence of the SRH, and if the SRH is available, on the value of
  500. * Segment Left field;
  501. * - the mask of flavors configured for the specific SRv6 End* behavior.
  502. *
  503. * The function combines both the pkinfo and the flavors mask to evaluate the
  504. * corresponding action to be taken on the packet.
  505. */
  506. static enum seg6_local_flv_action
  507. seg6_local_flv8986_act_lookup(enum seg6_local_pktinfo pinfo, __u32 flvmask)
  508. {
  509. unsigned long index;
  510. /* check if the provided mask of flavors is supported */
  511. if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS))
  512. return SEG6_LOCAL_FLV_ACT_UNSPEC;
  513. index = flv8986_act_tbl_idx(pinfo, flvmask);
  514. if (unlikely(index >= FLV8986_ACT_TBL_SIZE))
  515. return SEG6_LOCAL_FLV_ACT_UNSPEC;
  516. return flv8986_act_tbl[index];
  517. }
  518. /* skb->data must be aligned with skb->network_header */
  519. static bool seg6_pop_srh(struct sk_buff *skb, int srhoff)
  520. {
  521. struct ipv6_sr_hdr *srh;
  522. struct ipv6hdr *iph;
  523. __u8 srh_nexthdr;
  524. int thoff = -1;
  525. int srhlen;
  526. int nhlen;
  527. if (unlikely(srhoff < sizeof(*iph) ||
  528. !pskb_may_pull(skb, srhoff + sizeof(*srh))))
  529. return false;
  530. srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
  531. srhlen = ipv6_optlen(srh);
  532. /* we are about to mangle the pkt, let's check if we can write on it */
  533. if (unlikely(skb_ensure_writable(skb, srhoff + srhlen)))
  534. return false;
  535. /* skb_ensure_writable() may change skb pointers; evaluate srh again */
  536. srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
  537. srh_nexthdr = srh->nexthdr;
  538. if (unlikely(!skb_transport_header_was_set(skb)))
  539. goto pull;
  540. nhlen = skb_network_header_len(skb);
  541. /* we have to deal with the transport header: it could be set before
  542. * the SRH, after the SRH, or within it (which is considered wrong,
  543. * however).
  544. */
  545. if (likely(nhlen <= srhoff))
  546. thoff = nhlen;
  547. else if (nhlen >= srhoff + srhlen)
  548. /* transport_header is set after the SRH */
  549. thoff = nhlen - srhlen;
  550. else
  551. /* transport_header falls inside the SRH; hence, we can't
  552. * restore the transport_header pointer properly after
  553. * SRH removing operation.
  554. */
  555. return false;
  556. pull:
  557. /* we need to pop the SRH:
  558. * 1) first of all, we pull out everything from IPv6 header up to SRH
  559. * (included) evaluating also the rcsum;
  560. * 2) we overwrite (and then remove) the SRH by properly moving the
  561. * IPv6 along with any extension header that precedes the SRH;
  562. * 3) At the end, we push back the pulled headers (except for SRH,
  563. * obviously).
  564. */
  565. skb_pull_rcsum(skb, srhoff + srhlen);
  566. memmove(skb_network_header(skb) + srhlen, skb_network_header(skb),
  567. srhoff);
  568. skb_push(skb, srhoff);
  569. skb_reset_network_header(skb);
  570. skb_mac_header_rebuild(skb);
  571. if (likely(thoff >= 0))
  572. skb_set_transport_header(skb, thoff);
  573. iph = ipv6_hdr(skb);
  574. if (iph->nexthdr == NEXTHDR_ROUTING) {
  575. iph->nexthdr = srh_nexthdr;
  576. } else {
  577. /* we must look for the extension header (EXTH, for short) that
  578. * immediately precedes the SRH we have just removed.
  579. * Then, we update the value of the EXTH nexthdr with the one
  580. * contained in the SRH nexthdr.
  581. */
  582. unsigned int off = sizeof(*iph);
  583. struct ipv6_opt_hdr *hp, _hdr;
  584. __u8 nexthdr = iph->nexthdr;
  585. for (;;) {
  586. if (unlikely(!ipv6_ext_hdr(nexthdr) ||
  587. nexthdr == NEXTHDR_NONE))
  588. return false;
  589. hp = skb_header_pointer(skb, off, sizeof(_hdr), &_hdr);
  590. if (unlikely(!hp))
  591. return false;
  592. if (hp->nexthdr == NEXTHDR_ROUTING) {
  593. hp->nexthdr = srh_nexthdr;
  594. break;
  595. }
  596. switch (nexthdr) {
  597. case NEXTHDR_FRAGMENT:
  598. fallthrough;
  599. case NEXTHDR_AUTH:
  600. /* we expect SRH before FRAG and AUTH */
  601. return false;
  602. default:
  603. off += ipv6_optlen(hp);
  604. break;
  605. }
  606. nexthdr = hp->nexthdr;
  607. }
  608. }
  609. iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
  610. skb_postpush_rcsum(skb, iph, srhoff);
  611. return true;
  612. }
  613. /* process the packet on the basis of the RFC8986 flavors set for the given
  614. * SRv6 End behavior instance.
  615. */
  616. static int end_flv8986_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  617. {
  618. const struct seg6_flavors_info *finfo = &slwt->flv_info;
  619. enum seg6_local_flv_action action;
  620. enum seg6_local_pktinfo pinfo;
  621. struct ipv6_sr_hdr *srh;
  622. __u32 flvmask;
  623. int srhoff;
  624. srh = seg6_get_srh(skb, 0);
  625. srhoff = srh ? ((unsigned char *)srh - skb->data) : 0;
  626. pinfo = seg6_get_srh_pktinfo(srh);
  627. #ifdef CONFIG_IPV6_SEG6_HMAC
  628. if (srh && !seg6_hmac_validate_skb(skb))
  629. goto drop;
  630. #endif
  631. flvmask = finfo->flv_ops;
  632. if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS)) {
  633. pr_warn_once("seg6local: invalid RFC8986 flavors\n");
  634. goto drop;
  635. }
  636. /* retrieve the action triggered by the combination of pktinfo data and
  637. * the flavors mask.
  638. */
  639. action = seg6_local_flv8986_act_lookup(pinfo, flvmask);
  640. switch (action) {
  641. case SEG6_LOCAL_FLV_ACT_END:
  642. /* process the packet as the "standard" End behavior */
  643. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  644. break;
  645. case SEG6_LOCAL_FLV_ACT_PSP:
  646. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  647. if (unlikely(!seg6_pop_srh(skb, srhoff)))
  648. goto drop;
  649. break;
  650. case SEG6_LOCAL_FLV_ACT_UNSPEC:
  651. fallthrough;
  652. default:
  653. /* by default, we drop the packet since we could not find a
  654. * suitable action.
  655. */
  656. goto drop;
  657. }
  658. return input_action_end_finish(skb, slwt);
  659. drop:
  660. kfree_skb(skb);
  661. return -EINVAL;
  662. }
  663. /* regular endpoint function */
  664. static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  665. {
  666. const struct seg6_flavors_info *finfo = &slwt->flv_info;
  667. __u32 fops = finfo->flv_ops;
  668. if (!fops)
  669. return input_action_end_core(skb, slwt);
  670. /* check for the presence of NEXT-C-SID since it applies first */
  671. if (seg6_next_csid_enabled(fops))
  672. return end_next_csid_core(skb, slwt);
  673. /* the specific processing function to be performed on the packet
  674. * depends on the combination of flavors defined in RFC8986 and some
  675. * information extracted from the packet, e.g. presence/absence of SRH,
  676. * Segment Left = 0, etc.
  677. */
  678. return end_flv8986_core(skb, slwt);
  679. }
  680. /* regular endpoint, and forward to specified nexthop */
  681. static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  682. {
  683. const struct seg6_flavors_info *finfo = &slwt->flv_info;
  684. __u32 fops = finfo->flv_ops;
  685. /* check for the presence of NEXT-C-SID since it applies first */
  686. if (seg6_next_csid_enabled(fops))
  687. return end_x_next_csid_core(skb, slwt);
  688. return input_action_end_x_core(skb, slwt);
  689. }
  690. static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  691. {
  692. struct ipv6_sr_hdr *srh;
  693. srh = get_and_validate_srh(skb);
  694. if (!srh)
  695. goto drop;
  696. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  697. seg6_lookup_nexthop(skb, NULL, slwt->table);
  698. return dst_input(skb);
  699. drop:
  700. kfree_skb(skb);
  701. return -EINVAL;
  702. }
  703. /* decapsulate and forward inner L2 frame on specified interface */
  704. static int input_action_end_dx2(struct sk_buff *skb,
  705. struct seg6_local_lwt *slwt)
  706. {
  707. struct net *net = dev_net(skb->dev);
  708. struct net_device *odev;
  709. struct ethhdr *eth;
  710. if (!decap_and_validate(skb, IPPROTO_ETHERNET))
  711. goto drop;
  712. if (!pskb_may_pull(skb, ETH_HLEN))
  713. goto drop;
  714. skb_reset_mac_header(skb);
  715. eth = (struct ethhdr *)skb->data;
  716. /* To determine the frame's protocol, we assume it is 802.3. This avoids
  717. * a call to eth_type_trans(), which is not really relevant for our
  718. * use case.
  719. */
  720. if (!eth_proto_is_802_3(eth->h_proto))
  721. goto drop;
  722. odev = dev_get_by_index_rcu(net, slwt->oif);
  723. if (!odev)
  724. goto drop;
  725. /* As we accept Ethernet frames, make sure the egress device is of
  726. * the correct type.
  727. */
  728. if (odev->type != ARPHRD_ETHER)
  729. goto drop;
  730. if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
  731. goto drop;
  732. skb_orphan(skb);
  733. if (skb_warn_if_lro(skb))
  734. goto drop;
  735. skb_forward_csum(skb);
  736. if (skb->len - ETH_HLEN > odev->mtu)
  737. goto drop;
  738. skb->dev = odev;
  739. skb->protocol = eth->h_proto;
  740. return dev_queue_xmit(skb);
  741. drop:
  742. kfree_skb(skb);
  743. return -EINVAL;
  744. }
  745. static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
  746. struct sk_buff *skb)
  747. {
  748. struct dst_entry *orig_dst = skb_dst(skb);
  749. struct in6_addr *nhaddr = NULL;
  750. struct seg6_local_lwt *slwt;
  751. slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
  752. /* The inner packet is not associated to any local interface,
  753. * so we do not call netif_rx().
  754. *
  755. * If slwt->nh6 is set to ::, then lookup the nexthop for the
  756. * inner packet's DA. Otherwise, use the specified nexthop.
  757. */
  758. if (!ipv6_addr_any(&slwt->nh6))
  759. nhaddr = &slwt->nh6;
  760. seg6_lookup_nexthop(skb, nhaddr, 0);
  761. return dst_input(skb);
  762. }
  763. /* decapsulate and forward to specified nexthop */
  764. static int input_action_end_dx6(struct sk_buff *skb,
  765. struct seg6_local_lwt *slwt)
  766. {
  767. /* this function accepts IPv6 encapsulated packets, with either
  768. * an SRH with SL=0, or no SRH.
  769. */
  770. if (!decap_and_validate(skb, IPPROTO_IPV6))
  771. goto drop;
  772. if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
  773. goto drop;
  774. skb_set_transport_header(skb, sizeof(struct ipv6hdr));
  775. nf_reset_ct(skb);
  776. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  777. return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
  778. dev_net(skb->dev), NULL, skb, skb->dev,
  779. NULL, input_action_end_dx6_finish);
  780. return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
  781. drop:
  782. kfree_skb(skb);
  783. return -EINVAL;
  784. }
  785. static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
  786. struct sk_buff *skb)
  787. {
  788. struct dst_entry *orig_dst = skb_dst(skb);
  789. enum skb_drop_reason reason;
  790. struct seg6_local_lwt *slwt;
  791. struct iphdr *iph;
  792. __be32 nhaddr;
  793. slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
  794. iph = ip_hdr(skb);
  795. nhaddr = slwt->nh4.s_addr ?: iph->daddr;
  796. skb_dst_drop(skb);
  797. reason = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
  798. if (reason) {
  799. kfree_skb_reason(skb, reason);
  800. return -EINVAL;
  801. }
  802. return dst_input(skb);
  803. }
  804. static int input_action_end_dx4(struct sk_buff *skb,
  805. struct seg6_local_lwt *slwt)
  806. {
  807. if (!decap_and_validate(skb, IPPROTO_IPIP))
  808. goto drop;
  809. if (!pskb_may_pull(skb, sizeof(struct iphdr)))
  810. goto drop;
  811. skb->protocol = htons(ETH_P_IP);
  812. skb_set_transport_header(skb, sizeof(struct iphdr));
  813. nf_reset_ct(skb);
  814. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  815. return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
  816. dev_net(skb->dev), NULL, skb, skb->dev,
  817. NULL, input_action_end_dx4_finish);
  818. return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
  819. drop:
  820. kfree_skb(skb);
  821. return -EINVAL;
  822. }
  823. #ifdef CONFIG_NET_L3_MASTER_DEV
  824. static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
  825. {
  826. const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
  827. return nli->nl_net;
  828. }
  829. static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
  830. u16 family, struct netlink_ext_ack *extack)
  831. {
  832. struct seg6_end_dt_info *info = &slwt->dt_info;
  833. int vrf_ifindex;
  834. struct net *net;
  835. net = fib6_config_get_net(cfg);
  836. /* note that vrf_table was already set by parse_nla_vrftable() */
  837. vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
  838. info->vrf_table);
  839. if (vrf_ifindex < 0) {
  840. if (vrf_ifindex == -EPERM) {
  841. NL_SET_ERR_MSG(extack,
  842. "Strict mode for VRF is disabled");
  843. } else if (vrf_ifindex == -ENODEV) {
  844. NL_SET_ERR_MSG(extack,
  845. "Table has no associated VRF device");
  846. } else {
  847. pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
  848. vrf_ifindex);
  849. }
  850. return vrf_ifindex;
  851. }
  852. info->net = net;
  853. info->vrf_ifindex = vrf_ifindex;
  854. info->family = family;
  855. info->mode = DT_VRF_MODE;
  856. return 0;
  857. }
  858. /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
  859. * routes the IPv4/IPv6 packet by looking at the configured routing table.
  860. *
  861. * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
  862. * Routing Header packets) from several interfaces and the outer IPv6
  863. * destination address (DA) is used for retrieving the specific instance of the
  864. * End.DT4/DT6 behavior that should process the packets.
  865. *
  866. * However, the inner IPv4/IPv6 packet is not really bound to any receiving
  867. * interface and thus the End.DT4/DT6 sets the VRF (associated with the
  868. * corresponding routing table) as the *receiving* interface.
  869. * In other words, the End.DT4/DT6 processes a packet as if it has been received
  870. * directly by the VRF (and not by one of its slave devices, if any).
  871. * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
  872. * according to the routing table configured by the End.DT4/DT6 instance.
  873. *
  874. * This design allows you to get some interesting features like:
  875. * 1) the statistics on rx packets;
  876. * 2) the possibility to install a packet sniffer on the receiving interface
  877. * (the VRF one) for looking at the incoming packets;
  878. * 3) the possibility to leverage the netfilter prerouting hook for the inner
  879. * IPv4 packet.
  880. *
  881. * This function returns:
  882. * - the sk_buff* when the VRF rcv handler has processed the packet correctly;
  883. * - NULL when the skb is consumed by the VRF rcv handler;
  884. * - a pointer which encodes a negative error number in case of error.
  885. * Note that in this case, the function takes care of freeing the skb.
  886. */
  887. static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
  888. struct net_device *dev)
  889. {
  890. /* based on l3mdev_ip_rcv; we are only interested in the master */
  891. if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
  892. goto drop;
  893. if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
  894. goto drop;
  895. /* the decap packet IPv4/IPv6 does not come with any mac header info.
  896. * We must unset the mac header to allow the VRF device to rebuild it,
  897. * just in case there is a sniffer attached on the device.
  898. */
  899. skb_unset_mac_header(skb);
  900. skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
  901. if (!skb)
  902. /* the skb buffer was consumed by the handler */
  903. return NULL;
  904. /* when a packet is received by a VRF or by one of its slaves, the
  905. * master device reference is set into the skb.
  906. */
  907. if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
  908. goto drop;
  909. return skb;
  910. drop:
  911. kfree_skb(skb);
  912. return ERR_PTR(-EINVAL);
  913. }
  914. static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
  915. struct seg6_end_dt_info *info)
  916. {
  917. int vrf_ifindex = info->vrf_ifindex;
  918. struct net *net = info->net;
  919. if (unlikely(vrf_ifindex < 0))
  920. goto error;
  921. if (unlikely(!net_eq(dev_net(skb->dev), net)))
  922. goto error;
  923. return dev_get_by_index_rcu(net, vrf_ifindex);
  924. error:
  925. return NULL;
  926. }
  927. static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
  928. struct seg6_local_lwt *slwt, u16 family)
  929. {
  930. struct seg6_end_dt_info *info = &slwt->dt_info;
  931. struct net_device *vrf;
  932. __be16 protocol;
  933. int hdrlen;
  934. vrf = end_dt_get_vrf_rcu(skb, info);
  935. if (unlikely(!vrf))
  936. goto drop;
  937. switch (family) {
  938. case AF_INET:
  939. protocol = htons(ETH_P_IP);
  940. hdrlen = sizeof(struct iphdr);
  941. break;
  942. case AF_INET6:
  943. protocol = htons(ETH_P_IPV6);
  944. hdrlen = sizeof(struct ipv6hdr);
  945. break;
  946. case AF_UNSPEC:
  947. fallthrough;
  948. default:
  949. goto drop;
  950. }
  951. if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
  952. pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
  953. goto drop;
  954. }
  955. skb->protocol = protocol;
  956. skb_dst_drop(skb);
  957. skb_set_transport_header(skb, hdrlen);
  958. nf_reset_ct(skb);
  959. return end_dt_vrf_rcv(skb, family, vrf);
  960. drop:
  961. kfree_skb(skb);
  962. return ERR_PTR(-EINVAL);
  963. }
  964. static int input_action_end_dt4(struct sk_buff *skb,
  965. struct seg6_local_lwt *slwt)
  966. {
  967. enum skb_drop_reason reason;
  968. struct iphdr *iph;
  969. if (!decap_and_validate(skb, IPPROTO_IPIP))
  970. goto drop;
  971. if (!pskb_may_pull(skb, sizeof(struct iphdr)))
  972. goto drop;
  973. skb = end_dt_vrf_core(skb, slwt, AF_INET);
  974. if (!skb)
  975. /* packet has been processed and consumed by the VRF */
  976. return 0;
  977. if (IS_ERR(skb))
  978. return PTR_ERR(skb);
  979. iph = ip_hdr(skb);
  980. reason = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
  981. if (unlikely(reason))
  982. goto drop;
  983. return dst_input(skb);
  984. drop:
  985. kfree_skb(skb);
  986. return -EINVAL;
  987. }
  988. static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
  989. struct netlink_ext_ack *extack)
  990. {
  991. return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
  992. }
  993. static enum
  994. seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
  995. {
  996. unsigned long parsed_optattrs = slwt->parsed_optattrs;
  997. bool legacy, vrfmode;
  998. legacy = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE));
  999. vrfmode = !!(parsed_optattrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE));
  1000. if (!(legacy ^ vrfmode))
  1001. /* both are absent or present: invalid DT6 mode */
  1002. return DT_INVALID_MODE;
  1003. return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
  1004. }
  1005. static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
  1006. {
  1007. struct seg6_end_dt_info *info = &slwt->dt_info;
  1008. return info->mode;
  1009. }
  1010. static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
  1011. struct netlink_ext_ack *extack)
  1012. {
  1013. enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
  1014. struct seg6_end_dt_info *info = &slwt->dt_info;
  1015. switch (mode) {
  1016. case DT_LEGACY_MODE:
  1017. info->mode = DT_LEGACY_MODE;
  1018. return 0;
  1019. case DT_VRF_MODE:
  1020. return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
  1021. default:
  1022. NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
  1023. return -EINVAL;
  1024. }
  1025. }
  1026. #endif
  1027. static int input_action_end_dt6(struct sk_buff *skb,
  1028. struct seg6_local_lwt *slwt)
  1029. {
  1030. if (!decap_and_validate(skb, IPPROTO_IPV6))
  1031. goto drop;
  1032. if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
  1033. goto drop;
  1034. #ifdef CONFIG_NET_L3_MASTER_DEV
  1035. if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
  1036. goto legacy_mode;
  1037. /* DT6_VRF_MODE */
  1038. skb = end_dt_vrf_core(skb, slwt, AF_INET6);
  1039. if (!skb)
  1040. /* packet has been processed and consumed by the VRF */
  1041. return 0;
  1042. if (IS_ERR(skb))
  1043. return PTR_ERR(skb);
  1044. /* note: this time we do not need to specify the table because the VRF
  1045. * takes care of selecting the correct table.
  1046. */
  1047. seg6_lookup_any_nexthop(skb, NULL, 0, true, 0);
  1048. return dst_input(skb);
  1049. legacy_mode:
  1050. #endif
  1051. skb_set_transport_header(skb, sizeof(struct ipv6hdr));
  1052. seg6_lookup_any_nexthop(skb, NULL, slwt->table, true, 0);
  1053. return dst_input(skb);
  1054. drop:
  1055. kfree_skb(skb);
  1056. return -EINVAL;
  1057. }
  1058. #ifdef CONFIG_NET_L3_MASTER_DEV
  1059. static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
  1060. struct netlink_ext_ack *extack)
  1061. {
  1062. return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
  1063. }
  1064. static int input_action_end_dt46(struct sk_buff *skb,
  1065. struct seg6_local_lwt *slwt)
  1066. {
  1067. unsigned int off = 0;
  1068. int nexthdr;
  1069. nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
  1070. if (unlikely(nexthdr < 0))
  1071. goto drop;
  1072. switch (nexthdr) {
  1073. case IPPROTO_IPIP:
  1074. return input_action_end_dt4(skb, slwt);
  1075. case IPPROTO_IPV6:
  1076. return input_action_end_dt6(skb, slwt);
  1077. }
  1078. drop:
  1079. kfree_skb(skb);
  1080. return -EINVAL;
  1081. }
  1082. #endif
  1083. /* push an SRH on top of the current one */
  1084. static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1085. {
  1086. struct ipv6_sr_hdr *srh;
  1087. int err = -EINVAL;
  1088. srh = get_and_validate_srh(skb);
  1089. if (!srh)
  1090. goto drop;
  1091. err = seg6_do_srh_inline(skb, slwt->srh);
  1092. if (err)
  1093. goto drop;
  1094. skb_set_transport_header(skb, sizeof(struct ipv6hdr));
  1095. seg6_lookup_nexthop(skb, NULL, 0);
  1096. return dst_input(skb);
  1097. drop:
  1098. kfree_skb(skb);
  1099. return err;
  1100. }
  1101. /* encapsulate within an outer IPv6 header and a specified SRH */
  1102. static int input_action_end_b6_encap(struct sk_buff *skb,
  1103. struct seg6_local_lwt *slwt)
  1104. {
  1105. struct ipv6_sr_hdr *srh;
  1106. int err = -EINVAL;
  1107. srh = get_and_validate_srh(skb);
  1108. if (!srh)
  1109. goto drop;
  1110. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  1111. skb_reset_inner_headers(skb);
  1112. skb->encapsulation = 1;
  1113. err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
  1114. if (err)
  1115. goto drop;
  1116. skb_set_transport_header(skb, sizeof(struct ipv6hdr));
  1117. seg6_lookup_nexthop(skb, NULL, 0);
  1118. return dst_input(skb);
  1119. drop:
  1120. kfree_skb(skb);
  1121. return err;
  1122. }
  1123. DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states) = {
  1124. .bh_lock = INIT_LOCAL_LOCK(bh_lock),
  1125. };
  1126. bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
  1127. {
  1128. struct seg6_bpf_srh_state *srh_state =
  1129. this_cpu_ptr(&seg6_bpf_srh_states);
  1130. struct ipv6_sr_hdr *srh = srh_state->srh;
  1131. lockdep_assert_held(&srh_state->bh_lock);
  1132. if (unlikely(srh == NULL))
  1133. return false;
  1134. if (unlikely(!srh_state->valid)) {
  1135. if ((srh_state->hdrlen & 7) != 0)
  1136. return false;
  1137. srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
  1138. if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true))
  1139. return false;
  1140. srh_state->valid = true;
  1141. }
  1142. return true;
  1143. }
  1144. static int input_action_end_bpf(struct sk_buff *skb,
  1145. struct seg6_local_lwt *slwt)
  1146. {
  1147. struct seg6_bpf_srh_state *srh_state;
  1148. struct ipv6_sr_hdr *srh;
  1149. int ret;
  1150. srh = get_and_validate_srh(skb);
  1151. if (!srh) {
  1152. kfree_skb(skb);
  1153. return -EINVAL;
  1154. }
  1155. advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
  1156. /* The access to the per-CPU buffer srh_state is protected by running
  1157. * always in softirq context (with disabled BH). On PREEMPT_RT the
  1158. * required locking is provided by the following local_lock_nested_bh()
  1159. * statement. It is also accessed by the bpf_lwt_seg6_* helpers via
  1160. * bpf_prog_run_save_cb().
  1161. */
  1162. local_lock_nested_bh(&seg6_bpf_srh_states.bh_lock);
  1163. srh_state = this_cpu_ptr(&seg6_bpf_srh_states);
  1164. srh_state->srh = srh;
  1165. srh_state->hdrlen = srh->hdrlen << 3;
  1166. srh_state->valid = true;
  1167. rcu_read_lock();
  1168. bpf_compute_data_pointers(skb);
  1169. ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
  1170. rcu_read_unlock();
  1171. switch (ret) {
  1172. case BPF_OK:
  1173. case BPF_REDIRECT:
  1174. break;
  1175. case BPF_DROP:
  1176. goto drop;
  1177. default:
  1178. pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
  1179. goto drop;
  1180. }
  1181. if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
  1182. goto drop;
  1183. local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
  1184. if (ret != BPF_REDIRECT)
  1185. seg6_lookup_nexthop(skb, NULL, 0);
  1186. return dst_input(skb);
  1187. drop:
  1188. local_unlock_nested_bh(&seg6_bpf_srh_states.bh_lock);
  1189. kfree_skb(skb);
  1190. return -EINVAL;
  1191. }
  1192. static struct seg6_action_desc seg6_action_table[] = {
  1193. {
  1194. .action = SEG6_LOCAL_ACTION_END,
  1195. .attrs = 0,
  1196. .optattrs = SEG6_F_LOCAL_COUNTERS |
  1197. SEG6_F_LOCAL_FLAVORS,
  1198. .input = input_action_end,
  1199. },
  1200. {
  1201. .action = SEG6_LOCAL_ACTION_END_X,
  1202. .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
  1203. .optattrs = SEG6_F_LOCAL_COUNTERS |
  1204. SEG6_F_LOCAL_FLAVORS |
  1205. SEG6_F_ATTR(SEG6_LOCAL_OIF),
  1206. .input = input_action_end_x,
  1207. },
  1208. {
  1209. .action = SEG6_LOCAL_ACTION_END_T,
  1210. .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
  1211. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1212. .input = input_action_end_t,
  1213. },
  1214. {
  1215. .action = SEG6_LOCAL_ACTION_END_DX2,
  1216. .attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF),
  1217. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1218. .input = input_action_end_dx2,
  1219. },
  1220. {
  1221. .action = SEG6_LOCAL_ACTION_END_DX6,
  1222. .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
  1223. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1224. .input = input_action_end_dx6,
  1225. },
  1226. {
  1227. .action = SEG6_LOCAL_ACTION_END_DX4,
  1228. .attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4),
  1229. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1230. .input = input_action_end_dx4,
  1231. },
  1232. {
  1233. .action = SEG6_LOCAL_ACTION_END_DT4,
  1234. .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
  1235. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1236. #ifdef CONFIG_NET_L3_MASTER_DEV
  1237. .input = input_action_end_dt4,
  1238. .slwt_ops = {
  1239. .build_state = seg6_end_dt4_build,
  1240. },
  1241. #endif
  1242. },
  1243. {
  1244. .action = SEG6_LOCAL_ACTION_END_DT6,
  1245. #ifdef CONFIG_NET_L3_MASTER_DEV
  1246. .attrs = 0,
  1247. .optattrs = SEG6_F_LOCAL_COUNTERS |
  1248. SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
  1249. SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
  1250. .slwt_ops = {
  1251. .build_state = seg6_end_dt6_build,
  1252. },
  1253. #else
  1254. .attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
  1255. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1256. #endif
  1257. .input = input_action_end_dt6,
  1258. },
  1259. {
  1260. .action = SEG6_LOCAL_ACTION_END_DT46,
  1261. .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
  1262. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1263. #ifdef CONFIG_NET_L3_MASTER_DEV
  1264. .input = input_action_end_dt46,
  1265. .slwt_ops = {
  1266. .build_state = seg6_end_dt46_build,
  1267. },
  1268. #endif
  1269. },
  1270. {
  1271. .action = SEG6_LOCAL_ACTION_END_B6,
  1272. .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
  1273. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1274. .input = input_action_end_b6,
  1275. },
  1276. {
  1277. .action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
  1278. .attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
  1279. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1280. .input = input_action_end_b6_encap,
  1281. .static_headroom = sizeof(struct ipv6hdr),
  1282. },
  1283. {
  1284. .action = SEG6_LOCAL_ACTION_END_BPF,
  1285. .attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF),
  1286. .optattrs = SEG6_F_LOCAL_COUNTERS,
  1287. .input = input_action_end_bpf,
  1288. },
  1289. };
  1290. static struct seg6_action_desc *__get_action_desc(int action)
  1291. {
  1292. struct seg6_action_desc *desc;
  1293. int i, count;
  1294. count = ARRAY_SIZE(seg6_action_table);
  1295. for (i = 0; i < count; i++) {
  1296. desc = &seg6_action_table[i];
  1297. if (desc->action == action)
  1298. return desc;
  1299. }
  1300. return NULL;
  1301. }
  1302. static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt)
  1303. {
  1304. return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS;
  1305. }
  1306. static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
  1307. unsigned int len, int err)
  1308. {
  1309. struct pcpu_seg6_local_counters *pcounters;
  1310. pcounters = this_cpu_ptr(slwt->pcpu_counters);
  1311. u64_stats_update_begin(&pcounters->syncp);
  1312. if (likely(!err)) {
  1313. u64_stats_inc(&pcounters->packets);
  1314. u64_stats_add(&pcounters->bytes, len);
  1315. } else {
  1316. u64_stats_inc(&pcounters->errors);
  1317. }
  1318. u64_stats_update_end(&pcounters->syncp);
  1319. }
  1320. static int seg6_local_input_core(struct net *net, struct sock *sk,
  1321. struct sk_buff *skb)
  1322. {
  1323. struct dst_entry *orig_dst = skb_dst(skb);
  1324. struct seg6_action_desc *desc;
  1325. struct seg6_local_lwt *slwt;
  1326. unsigned int len = skb->len;
  1327. int rc;
  1328. slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
  1329. desc = slwt->desc;
  1330. rc = desc->input(skb, slwt);
  1331. if (!seg6_lwtunnel_counters_enabled(slwt))
  1332. return rc;
  1333. seg6_local_update_counters(slwt, len, rc);
  1334. return rc;
  1335. }
  1336. static int seg6_local_input(struct sk_buff *skb)
  1337. {
  1338. if (skb->protocol != htons(ETH_P_IPV6)) {
  1339. kfree_skb(skb);
  1340. return -EINVAL;
  1341. }
  1342. if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
  1343. return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
  1344. dev_net(skb->dev), NULL, skb, skb->dev, NULL,
  1345. seg6_local_input_core);
  1346. return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
  1347. }
  1348. static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
  1349. [SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
  1350. [SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
  1351. [SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
  1352. [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
  1353. [SEG6_LOCAL_NH4] = NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)),
  1354. [SEG6_LOCAL_NH6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
  1355. [SEG6_LOCAL_IIF] = { .type = NLA_U32 },
  1356. [SEG6_LOCAL_OIF] = { .type = NLA_U32 },
  1357. [SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
  1358. [SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED },
  1359. [SEG6_LOCAL_FLAVORS] = { .type = NLA_NESTED },
  1360. };
  1361. static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1362. struct netlink_ext_ack *extack)
  1363. {
  1364. struct ipv6_sr_hdr *srh;
  1365. int len;
  1366. srh = nla_data(attrs[SEG6_LOCAL_SRH]);
  1367. len = nla_len(attrs[SEG6_LOCAL_SRH]);
  1368. /* SRH must contain at least one segment */
  1369. if (len < sizeof(*srh) + sizeof(struct in6_addr))
  1370. return -EINVAL;
  1371. if (!seg6_validate_srh(srh, len, false))
  1372. return -EINVAL;
  1373. slwt->srh = kmemdup(srh, len, GFP_KERNEL);
  1374. if (!slwt->srh)
  1375. return -ENOMEM;
  1376. slwt->headroom += len;
  1377. return 0;
  1378. }
  1379. static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1380. {
  1381. struct ipv6_sr_hdr *srh;
  1382. struct nlattr *nla;
  1383. int len;
  1384. srh = slwt->srh;
  1385. len = (srh->hdrlen + 1) << 3;
  1386. nla = nla_reserve(skb, SEG6_LOCAL_SRH, len);
  1387. if (!nla)
  1388. return -EMSGSIZE;
  1389. memcpy(nla_data(nla), srh, len);
  1390. return 0;
  1391. }
  1392. static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1393. {
  1394. int len = (a->srh->hdrlen + 1) << 3;
  1395. if (len != ((b->srh->hdrlen + 1) << 3))
  1396. return 1;
  1397. return memcmp(a->srh, b->srh, len);
  1398. }
  1399. static void destroy_attr_srh(struct seg6_local_lwt *slwt)
  1400. {
  1401. kfree(slwt->srh);
  1402. }
  1403. static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1404. struct netlink_ext_ack *extack)
  1405. {
  1406. slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
  1407. return 0;
  1408. }
  1409. static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1410. {
  1411. if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table))
  1412. return -EMSGSIZE;
  1413. return 0;
  1414. }
  1415. static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1416. {
  1417. if (a->table != b->table)
  1418. return 1;
  1419. return 0;
  1420. }
  1421. static struct
  1422. seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
  1423. {
  1424. #ifdef CONFIG_NET_L3_MASTER_DEV
  1425. return &slwt->dt_info;
  1426. #else
  1427. return ERR_PTR(-EOPNOTSUPP);
  1428. #endif
  1429. }
  1430. static int parse_nla_vrftable(struct nlattr **attrs,
  1431. struct seg6_local_lwt *slwt,
  1432. struct netlink_ext_ack *extack)
  1433. {
  1434. struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
  1435. if (IS_ERR(info))
  1436. return PTR_ERR(info);
  1437. info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
  1438. return 0;
  1439. }
  1440. static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1441. {
  1442. struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
  1443. if (IS_ERR(info))
  1444. return PTR_ERR(info);
  1445. if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
  1446. return -EMSGSIZE;
  1447. return 0;
  1448. }
  1449. static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1450. {
  1451. struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
  1452. struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
  1453. if (info_a->vrf_table != info_b->vrf_table)
  1454. return 1;
  1455. return 0;
  1456. }
  1457. static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1458. struct netlink_ext_ack *extack)
  1459. {
  1460. memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
  1461. sizeof(struct in_addr));
  1462. return 0;
  1463. }
  1464. static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1465. {
  1466. struct nlattr *nla;
  1467. nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr));
  1468. if (!nla)
  1469. return -EMSGSIZE;
  1470. memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr));
  1471. return 0;
  1472. }
  1473. static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1474. {
  1475. return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr));
  1476. }
  1477. static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1478. struct netlink_ext_ack *extack)
  1479. {
  1480. memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]),
  1481. sizeof(struct in6_addr));
  1482. return 0;
  1483. }
  1484. static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1485. {
  1486. struct nlattr *nla;
  1487. nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr));
  1488. if (!nla)
  1489. return -EMSGSIZE;
  1490. memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr));
  1491. return 0;
  1492. }
  1493. static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1494. {
  1495. return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr));
  1496. }
  1497. static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1498. struct netlink_ext_ack *extack)
  1499. {
  1500. slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]);
  1501. return 0;
  1502. }
  1503. static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1504. {
  1505. if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif))
  1506. return -EMSGSIZE;
  1507. return 0;
  1508. }
  1509. static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1510. {
  1511. if (a->iif != b->iif)
  1512. return 1;
  1513. return 0;
  1514. }
  1515. static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1516. struct netlink_ext_ack *extack)
  1517. {
  1518. slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]);
  1519. return 0;
  1520. }
  1521. static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1522. {
  1523. if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif))
  1524. return -EMSGSIZE;
  1525. return 0;
  1526. }
  1527. static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1528. {
  1529. if (a->oif != b->oif)
  1530. return 1;
  1531. return 0;
  1532. }
  1533. #define MAX_PROG_NAME 256
  1534. static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
  1535. [SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, },
  1536. [SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
  1537. .len = MAX_PROG_NAME },
  1538. };
  1539. static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1540. struct netlink_ext_ack *extack)
  1541. {
  1542. struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
  1543. struct bpf_prog *p;
  1544. int ret;
  1545. u32 fd;
  1546. ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_BPF_PROG_MAX,
  1547. attrs[SEG6_LOCAL_BPF],
  1548. bpf_prog_policy, NULL);
  1549. if (ret < 0)
  1550. return ret;
  1551. if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
  1552. return -EINVAL;
  1553. slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
  1554. if (!slwt->bpf.name)
  1555. return -ENOMEM;
  1556. fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
  1557. p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
  1558. if (IS_ERR(p)) {
  1559. kfree(slwt->bpf.name);
  1560. return PTR_ERR(p);
  1561. }
  1562. slwt->bpf.prog = p;
  1563. return 0;
  1564. }
  1565. static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1566. {
  1567. struct nlattr *nest;
  1568. if (!slwt->bpf.prog)
  1569. return 0;
  1570. nest = nla_nest_start_noflag(skb, SEG6_LOCAL_BPF);
  1571. if (!nest)
  1572. return -EMSGSIZE;
  1573. if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
  1574. return -EMSGSIZE;
  1575. if (slwt->bpf.name &&
  1576. nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
  1577. return -EMSGSIZE;
  1578. return nla_nest_end(skb, nest);
  1579. }
  1580. static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1581. {
  1582. if (!a->bpf.name && !b->bpf.name)
  1583. return 0;
  1584. if (!a->bpf.name || !b->bpf.name)
  1585. return 1;
  1586. return strcmp(a->bpf.name, b->bpf.name);
  1587. }
  1588. static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
  1589. {
  1590. kfree(slwt->bpf.name);
  1591. if (slwt->bpf.prog)
  1592. bpf_prog_put(slwt->bpf.prog);
  1593. }
  1594. static const struct
  1595. nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = {
  1596. [SEG6_LOCAL_CNT_PACKETS] = { .type = NLA_U64 },
  1597. [SEG6_LOCAL_CNT_BYTES] = { .type = NLA_U64 },
  1598. [SEG6_LOCAL_CNT_ERRORS] = { .type = NLA_U64 },
  1599. };
  1600. static int parse_nla_counters(struct nlattr **attrs,
  1601. struct seg6_local_lwt *slwt,
  1602. struct netlink_ext_ack *extack)
  1603. {
  1604. struct pcpu_seg6_local_counters __percpu *pcounters;
  1605. struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1];
  1606. int ret;
  1607. ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX,
  1608. attrs[SEG6_LOCAL_COUNTERS],
  1609. seg6_local_counters_policy, NULL);
  1610. if (ret < 0)
  1611. return ret;
  1612. /* basic support for SRv6 Behavior counters requires at least:
  1613. * packets, bytes and errors.
  1614. */
  1615. if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] ||
  1616. !tb[SEG6_LOCAL_CNT_ERRORS])
  1617. return -EINVAL;
  1618. /* counters are always zero initialized */
  1619. pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL);
  1620. if (!pcounters)
  1621. return -ENOMEM;
  1622. slwt->pcpu_counters = pcounters;
  1623. return 0;
  1624. }
  1625. static int seg6_local_fill_nla_counters(struct sk_buff *skb,
  1626. struct seg6_local_counters *counters)
  1627. {
  1628. if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets,
  1629. SEG6_LOCAL_CNT_PAD))
  1630. return -EMSGSIZE;
  1631. if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes,
  1632. SEG6_LOCAL_CNT_PAD))
  1633. return -EMSGSIZE;
  1634. if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors,
  1635. SEG6_LOCAL_CNT_PAD))
  1636. return -EMSGSIZE;
  1637. return 0;
  1638. }
  1639. static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1640. {
  1641. struct seg6_local_counters counters = { 0, 0, 0 };
  1642. struct nlattr *nest;
  1643. int rc, i;
  1644. nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS);
  1645. if (!nest)
  1646. return -EMSGSIZE;
  1647. for_each_possible_cpu(i) {
  1648. struct pcpu_seg6_local_counters *pcounters;
  1649. u64 packets, bytes, errors;
  1650. unsigned int start;
  1651. pcounters = per_cpu_ptr(slwt->pcpu_counters, i);
  1652. do {
  1653. start = u64_stats_fetch_begin(&pcounters->syncp);
  1654. packets = u64_stats_read(&pcounters->packets);
  1655. bytes = u64_stats_read(&pcounters->bytes);
  1656. errors = u64_stats_read(&pcounters->errors);
  1657. } while (u64_stats_fetch_retry(&pcounters->syncp, start));
  1658. counters.packets += packets;
  1659. counters.bytes += bytes;
  1660. counters.errors += errors;
  1661. }
  1662. rc = seg6_local_fill_nla_counters(skb, &counters);
  1663. if (rc < 0) {
  1664. nla_nest_cancel(skb, nest);
  1665. return rc;
  1666. }
  1667. return nla_nest_end(skb, nest);
  1668. }
  1669. static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1670. {
  1671. /* a and b are equal if both have pcpu_counters set or not */
  1672. return (!!((unsigned long)a->pcpu_counters)) ^
  1673. (!!((unsigned long)b->pcpu_counters));
  1674. }
  1675. static void destroy_attr_counters(struct seg6_local_lwt *slwt)
  1676. {
  1677. free_percpu(slwt->pcpu_counters);
  1678. }
  1679. static const
  1680. struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = {
  1681. [SEG6_LOCAL_FLV_OPERATION] = { .type = NLA_U32 },
  1682. [SEG6_LOCAL_FLV_LCBLOCK_BITS] = { .type = NLA_U8 },
  1683. [SEG6_LOCAL_FLV_LCNODE_FN_BITS] = { .type = NLA_U8 },
  1684. };
  1685. /* check whether the lengths of the Locator-Block and Locator-Node Function
  1686. * are compatible with the dimension of a C-SID container.
  1687. */
  1688. static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len)
  1689. {
  1690. /* Locator-Block and Locator-Node Function cannot exceed 128 bits
  1691. * (i.e. C-SID container length).
  1692. */
  1693. if (next_csid_chk_cntr_bits(block_len, func_len))
  1694. return -EINVAL;
  1695. /* Locator-Block length must be greater than zero and evenly divisible
  1696. * by 8. There must be room for a Locator-Node Function, at least.
  1697. */
  1698. if (next_csid_chk_lcblock_bits(block_len))
  1699. return -EINVAL;
  1700. /* Locator-Node Function length must be greater than zero and evenly
  1701. * divisible by 8. There must be room for the Locator-Block.
  1702. */
  1703. if (next_csid_chk_lcnode_fn_bits(func_len))
  1704. return -EINVAL;
  1705. return 0;
  1706. }
  1707. static int seg6_parse_nla_next_csid_cfg(struct nlattr **tb,
  1708. struct seg6_flavors_info *finfo,
  1709. struct netlink_ext_ack *extack)
  1710. {
  1711. __u8 func_len = SEG6_LOCAL_LCNODE_FN_DBITS;
  1712. __u8 block_len = SEG6_LOCAL_LCBLOCK_DBITS;
  1713. int rc;
  1714. if (tb[SEG6_LOCAL_FLV_LCBLOCK_BITS])
  1715. block_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCBLOCK_BITS]);
  1716. if (tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS])
  1717. func_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS]);
  1718. rc = seg6_chk_next_csid_cfg(block_len, func_len);
  1719. if (rc < 0) {
  1720. NL_SET_ERR_MSG(extack,
  1721. "Invalid Locator Block/Node Function lengths");
  1722. return rc;
  1723. }
  1724. finfo->lcblock_bits = block_len;
  1725. finfo->lcnode_func_bits = func_len;
  1726. return 0;
  1727. }
  1728. static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1729. struct netlink_ext_ack *extack)
  1730. {
  1731. struct seg6_flavors_info *finfo = &slwt->flv_info;
  1732. struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1];
  1733. int action = slwt->action;
  1734. __u32 fops, supp_fops;
  1735. int rc;
  1736. rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX,
  1737. attrs[SEG6_LOCAL_FLAVORS],
  1738. seg6_local_flavors_policy, NULL);
  1739. if (rc < 0)
  1740. return rc;
  1741. /* this attribute MUST always be present since it represents the Flavor
  1742. * operation(s) to be carried out.
  1743. */
  1744. if (!tb[SEG6_LOCAL_FLV_OPERATION])
  1745. return -EINVAL;
  1746. fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]);
  1747. rc = seg6_flv_supp_ops_by_action(action, &supp_fops);
  1748. if (rc < 0 || (fops & ~supp_fops)) {
  1749. NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)");
  1750. return -EOPNOTSUPP;
  1751. }
  1752. finfo->flv_ops = fops;
  1753. if (seg6_next_csid_enabled(fops)) {
  1754. /* Locator-Block and Locator-Node Function lengths can be
  1755. * provided by the user space. Otherwise, default values are
  1756. * applied.
  1757. */
  1758. rc = seg6_parse_nla_next_csid_cfg(tb, finfo, extack);
  1759. if (rc < 0)
  1760. return rc;
  1761. }
  1762. return 0;
  1763. }
  1764. static int seg6_fill_nla_next_csid_cfg(struct sk_buff *skb,
  1765. struct seg6_flavors_info *finfo)
  1766. {
  1767. if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCBLOCK_BITS, finfo->lcblock_bits))
  1768. return -EMSGSIZE;
  1769. if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCNODE_FN_BITS,
  1770. finfo->lcnode_func_bits))
  1771. return -EMSGSIZE;
  1772. return 0;
  1773. }
  1774. static int put_nla_flavors(struct sk_buff *skb, struct seg6_local_lwt *slwt)
  1775. {
  1776. struct seg6_flavors_info *finfo = &slwt->flv_info;
  1777. __u32 fops = finfo->flv_ops;
  1778. struct nlattr *nest;
  1779. int rc;
  1780. nest = nla_nest_start(skb, SEG6_LOCAL_FLAVORS);
  1781. if (!nest)
  1782. return -EMSGSIZE;
  1783. if (nla_put_u32(skb, SEG6_LOCAL_FLV_OPERATION, fops)) {
  1784. rc = -EMSGSIZE;
  1785. goto err;
  1786. }
  1787. if (seg6_next_csid_enabled(fops)) {
  1788. rc = seg6_fill_nla_next_csid_cfg(skb, finfo);
  1789. if (rc < 0)
  1790. goto err;
  1791. }
  1792. return nla_nest_end(skb, nest);
  1793. err:
  1794. nla_nest_cancel(skb, nest);
  1795. return rc;
  1796. }
  1797. static int seg6_cmp_nla_next_csid_cfg(struct seg6_flavors_info *finfo_a,
  1798. struct seg6_flavors_info *finfo_b)
  1799. {
  1800. if (finfo_a->lcblock_bits != finfo_b->lcblock_bits)
  1801. return 1;
  1802. if (finfo_a->lcnode_func_bits != finfo_b->lcnode_func_bits)
  1803. return 1;
  1804. return 0;
  1805. }
  1806. static int cmp_nla_flavors(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
  1807. {
  1808. struct seg6_flavors_info *finfo_a = &a->flv_info;
  1809. struct seg6_flavors_info *finfo_b = &b->flv_info;
  1810. if (finfo_a->flv_ops != finfo_b->flv_ops)
  1811. return 1;
  1812. if (seg6_next_csid_enabled(finfo_a->flv_ops)) {
  1813. if (seg6_cmp_nla_next_csid_cfg(finfo_a, finfo_b))
  1814. return 1;
  1815. }
  1816. return 0;
  1817. }
  1818. static int encap_size_flavors(struct seg6_local_lwt *slwt)
  1819. {
  1820. struct seg6_flavors_info *finfo = &slwt->flv_info;
  1821. int nlsize;
  1822. nlsize = nla_total_size(0) + /* nest SEG6_LOCAL_FLAVORS */
  1823. nla_total_size(4); /* SEG6_LOCAL_FLV_OPERATION */
  1824. if (seg6_next_csid_enabled(finfo->flv_ops))
  1825. nlsize += nla_total_size(1) + /* SEG6_LOCAL_FLV_LCBLOCK_BITS */
  1826. nla_total_size(1); /* SEG6_LOCAL_FLV_LCNODE_FN_BITS */
  1827. return nlsize;
  1828. }
  1829. struct seg6_action_param {
  1830. int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1831. struct netlink_ext_ack *extack);
  1832. int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
  1833. int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
  1834. /* optional destroy() callback useful for releasing resources which
  1835. * have been previously acquired in the corresponding parse()
  1836. * function.
  1837. */
  1838. void (*destroy)(struct seg6_local_lwt *slwt);
  1839. };
  1840. static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
  1841. [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
  1842. .put = put_nla_srh,
  1843. .cmp = cmp_nla_srh,
  1844. .destroy = destroy_attr_srh },
  1845. [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
  1846. .put = put_nla_table,
  1847. .cmp = cmp_nla_table },
  1848. [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4,
  1849. .put = put_nla_nh4,
  1850. .cmp = cmp_nla_nh4 },
  1851. [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6,
  1852. .put = put_nla_nh6,
  1853. .cmp = cmp_nla_nh6 },
  1854. [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif,
  1855. .put = put_nla_iif,
  1856. .cmp = cmp_nla_iif },
  1857. [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
  1858. .put = put_nla_oif,
  1859. .cmp = cmp_nla_oif },
  1860. [SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
  1861. .put = put_nla_bpf,
  1862. .cmp = cmp_nla_bpf,
  1863. .destroy = destroy_attr_bpf },
  1864. [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable,
  1865. .put = put_nla_vrftable,
  1866. .cmp = cmp_nla_vrftable },
  1867. [SEG6_LOCAL_COUNTERS] = { .parse = parse_nla_counters,
  1868. .put = put_nla_counters,
  1869. .cmp = cmp_nla_counters,
  1870. .destroy = destroy_attr_counters },
  1871. [SEG6_LOCAL_FLAVORS] = { .parse = parse_nla_flavors,
  1872. .put = put_nla_flavors,
  1873. .cmp = cmp_nla_flavors },
  1874. };
  1875. /* call the destroy() callback (if available) for each set attribute in
  1876. * @parsed_attrs, starting from the first attribute up to the @max_parsed
  1877. * (excluded) attribute.
  1878. */
  1879. static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
  1880. struct seg6_local_lwt *slwt)
  1881. {
  1882. struct seg6_action_param *param;
  1883. int i;
  1884. /* Every required seg6local attribute is identified by an ID which is
  1885. * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
  1886. *
  1887. * We scan the 'parsed_attrs' bitmask, starting from the first attribute
  1888. * up to the @max_parsed (excluded) attribute.
  1889. * For each set attribute, we retrieve the corresponding destroy()
  1890. * callback. If the callback is not available, then we skip to the next
  1891. * attribute; otherwise, we call the destroy() callback.
  1892. */
  1893. for (i = SEG6_LOCAL_SRH; i < max_parsed; ++i) {
  1894. if (!(parsed_attrs & SEG6_F_ATTR(i)))
  1895. continue;
  1896. param = &seg6_action_params[i];
  1897. if (param->destroy)
  1898. param->destroy(slwt);
  1899. }
  1900. }
  1901. /* release all the resources that may have been acquired during parsing
  1902. * operations.
  1903. */
  1904. static void destroy_attrs(struct seg6_local_lwt *slwt)
  1905. {
  1906. unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
  1907. __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
  1908. }
  1909. static int parse_nla_optional_attrs(struct nlattr **attrs,
  1910. struct seg6_local_lwt *slwt,
  1911. struct netlink_ext_ack *extack)
  1912. {
  1913. struct seg6_action_desc *desc = slwt->desc;
  1914. unsigned long parsed_optattrs = 0;
  1915. struct seg6_action_param *param;
  1916. int err, i;
  1917. for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; ++i) {
  1918. if (!(desc->optattrs & SEG6_F_ATTR(i)) || !attrs[i])
  1919. continue;
  1920. /* once here, the i-th attribute is provided by the
  1921. * userspace AND it is identified optional as well.
  1922. */
  1923. param = &seg6_action_params[i];
  1924. err = param->parse(attrs, slwt, extack);
  1925. if (err < 0)
  1926. goto parse_optattrs_err;
  1927. /* current attribute has been correctly parsed */
  1928. parsed_optattrs |= SEG6_F_ATTR(i);
  1929. }
  1930. /* store in the tunnel state all the optional attributed successfully
  1931. * parsed.
  1932. */
  1933. slwt->parsed_optattrs = parsed_optattrs;
  1934. return 0;
  1935. parse_optattrs_err:
  1936. __destroy_attrs(parsed_optattrs, i, slwt);
  1937. return err;
  1938. }
  1939. /* call the custom constructor of the behavior during its initialization phase
  1940. * and after that all its attributes have been parsed successfully.
  1941. */
  1942. static int
  1943. seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
  1944. struct netlink_ext_ack *extack)
  1945. {
  1946. struct seg6_action_desc *desc = slwt->desc;
  1947. struct seg6_local_lwtunnel_ops *ops;
  1948. ops = &desc->slwt_ops;
  1949. if (!ops->build_state)
  1950. return 0;
  1951. return ops->build_state(slwt, cfg, extack);
  1952. }
  1953. /* call the custom destructor of the behavior which is invoked before the
  1954. * tunnel is going to be destroyed.
  1955. */
  1956. static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
  1957. {
  1958. struct seg6_action_desc *desc = slwt->desc;
  1959. struct seg6_local_lwtunnel_ops *ops;
  1960. ops = &desc->slwt_ops;
  1961. if (!ops->destroy_state)
  1962. return;
  1963. ops->destroy_state(slwt);
  1964. }
  1965. static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt,
  1966. struct netlink_ext_ack *extack)
  1967. {
  1968. struct seg6_action_param *param;
  1969. struct seg6_action_desc *desc;
  1970. unsigned long invalid_attrs;
  1971. int i, err;
  1972. desc = __get_action_desc(slwt->action);
  1973. if (!desc)
  1974. return -EINVAL;
  1975. if (!desc->input)
  1976. return -EOPNOTSUPP;
  1977. slwt->desc = desc;
  1978. slwt->headroom += desc->static_headroom;
  1979. /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
  1980. * disjoined, this allow us to release acquired resources by optional
  1981. * attributes and by required attributes independently from each other
  1982. * without any interference.
  1983. * In other terms, we are sure that we do not release some the acquired
  1984. * resources twice.
  1985. *
  1986. * Note that if an attribute is configured both as required and as
  1987. * optional, it means that the user has messed something up in the
  1988. * seg6_action_table. Therefore, this check is required for SRv6
  1989. * behaviors to work properly.
  1990. */
  1991. invalid_attrs = desc->attrs & desc->optattrs;
  1992. if (invalid_attrs) {
  1993. WARN_ONCE(1,
  1994. "An attribute cannot be both required AND optional");
  1995. return -EINVAL;
  1996. }
  1997. /* parse the required attributes */
  1998. for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
  1999. if (desc->attrs & SEG6_F_ATTR(i)) {
  2000. if (!attrs[i])
  2001. return -EINVAL;
  2002. param = &seg6_action_params[i];
  2003. err = param->parse(attrs, slwt, extack);
  2004. if (err < 0)
  2005. goto parse_attrs_err;
  2006. }
  2007. }
  2008. /* parse the optional attributes, if any */
  2009. err = parse_nla_optional_attrs(attrs, slwt, extack);
  2010. if (err < 0)
  2011. goto parse_attrs_err;
  2012. return 0;
  2013. parse_attrs_err:
  2014. /* release any resource that may have been acquired during the i-1
  2015. * parse() operations.
  2016. */
  2017. __destroy_attrs(desc->attrs, i, slwt);
  2018. return err;
  2019. }
  2020. static int seg6_local_build_state(struct net *net, struct nlattr *nla,
  2021. unsigned int family, const void *cfg,
  2022. struct lwtunnel_state **ts,
  2023. struct netlink_ext_ack *extack)
  2024. {
  2025. struct nlattr *tb[SEG6_LOCAL_MAX + 1];
  2026. struct lwtunnel_state *newts;
  2027. struct seg6_local_lwt *slwt;
  2028. int err;
  2029. if (family != AF_INET6)
  2030. return -EINVAL;
  2031. err = nla_parse_nested_deprecated(tb, SEG6_LOCAL_MAX, nla,
  2032. seg6_local_policy, extack);
  2033. if (err < 0)
  2034. return err;
  2035. if (!tb[SEG6_LOCAL_ACTION])
  2036. return -EINVAL;
  2037. newts = lwtunnel_state_alloc(sizeof(*slwt));
  2038. if (!newts)
  2039. return -ENOMEM;
  2040. slwt = seg6_local_lwtunnel(newts);
  2041. slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]);
  2042. err = parse_nla_action(tb, slwt, extack);
  2043. if (err < 0)
  2044. goto out_free;
  2045. err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
  2046. if (err < 0)
  2047. goto out_destroy_attrs;
  2048. newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
  2049. newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
  2050. newts->headroom = slwt->headroom;
  2051. *ts = newts;
  2052. return 0;
  2053. out_destroy_attrs:
  2054. destroy_attrs(slwt);
  2055. out_free:
  2056. kfree(newts);
  2057. return err;
  2058. }
  2059. static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
  2060. {
  2061. struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
  2062. seg6_local_lwtunnel_destroy_state(slwt);
  2063. destroy_attrs(slwt);
  2064. return;
  2065. }
  2066. static int seg6_local_fill_encap(struct sk_buff *skb,
  2067. struct lwtunnel_state *lwt)
  2068. {
  2069. struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
  2070. struct seg6_action_param *param;
  2071. unsigned long attrs;
  2072. int i, err;
  2073. if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
  2074. return -EMSGSIZE;
  2075. attrs = slwt->desc->attrs | slwt->parsed_optattrs;
  2076. for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
  2077. if (attrs & SEG6_F_ATTR(i)) {
  2078. param = &seg6_action_params[i];
  2079. err = param->put(skb, slwt);
  2080. if (err < 0)
  2081. return err;
  2082. }
  2083. }
  2084. return 0;
  2085. }
  2086. static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
  2087. {
  2088. struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
  2089. unsigned long attrs;
  2090. int nlsize;
  2091. nlsize = nla_total_size(4); /* action */
  2092. attrs = slwt->desc->attrs | slwt->parsed_optattrs;
  2093. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_SRH))
  2094. nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
  2095. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_TABLE))
  2096. nlsize += nla_total_size(4);
  2097. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH4))
  2098. nlsize += nla_total_size(4);
  2099. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_NH6))
  2100. nlsize += nla_total_size(16);
  2101. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_IIF))
  2102. nlsize += nla_total_size(4);
  2103. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_OIF))
  2104. nlsize += nla_total_size(4);
  2105. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_BPF))
  2106. nlsize += nla_total_size(sizeof(struct nlattr)) +
  2107. nla_total_size(MAX_PROG_NAME) +
  2108. nla_total_size(4);
  2109. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE))
  2110. nlsize += nla_total_size(4);
  2111. if (attrs & SEG6_F_LOCAL_COUNTERS)
  2112. nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */
  2113. /* SEG6_LOCAL_CNT_PACKETS */
  2114. nla_total_size_64bit(sizeof(__u64)) +
  2115. /* SEG6_LOCAL_CNT_BYTES */
  2116. nla_total_size_64bit(sizeof(__u64)) +
  2117. /* SEG6_LOCAL_CNT_ERRORS */
  2118. nla_total_size_64bit(sizeof(__u64));
  2119. if (attrs & SEG6_F_ATTR(SEG6_LOCAL_FLAVORS))
  2120. nlsize += encap_size_flavors(slwt);
  2121. return nlsize;
  2122. }
  2123. static int seg6_local_cmp_encap(struct lwtunnel_state *a,
  2124. struct lwtunnel_state *b)
  2125. {
  2126. struct seg6_local_lwt *slwt_a, *slwt_b;
  2127. struct seg6_action_param *param;
  2128. unsigned long attrs_a, attrs_b;
  2129. int i;
  2130. slwt_a = seg6_local_lwtunnel(a);
  2131. slwt_b = seg6_local_lwtunnel(b);
  2132. if (slwt_a->action != slwt_b->action)
  2133. return 1;
  2134. attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
  2135. attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
  2136. if (attrs_a != attrs_b)
  2137. return 1;
  2138. for (i = SEG6_LOCAL_SRH; i < SEG6_LOCAL_MAX + 1; i++) {
  2139. if (attrs_a & SEG6_F_ATTR(i)) {
  2140. param = &seg6_action_params[i];
  2141. if (param->cmp(slwt_a, slwt_b))
  2142. return 1;
  2143. }
  2144. }
  2145. return 0;
  2146. }
  2147. static const struct lwtunnel_encap_ops seg6_local_ops = {
  2148. .build_state = seg6_local_build_state,
  2149. .destroy_state = seg6_local_destroy_state,
  2150. .input = seg6_local_input,
  2151. .fill_encap = seg6_local_fill_encap,
  2152. .get_encap_size = seg6_local_get_encap_size,
  2153. .cmp_encap = seg6_local_cmp_encap,
  2154. .owner = THIS_MODULE,
  2155. };
  2156. int __init seg6_local_init(void)
  2157. {
  2158. /* If the max total number of defined attributes is reached, then your
  2159. * kernel build stops here.
  2160. *
  2161. * This check is required to avoid arithmetic overflows when processing
  2162. * behavior attributes and the maximum number of defined attributes
  2163. * exceeds the allowed value.
  2164. */
  2165. BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long));
  2166. /* Check whether the number of defined flavors exceeds the maximum
  2167. * allowed value.
  2168. */
  2169. BUILD_BUG_ON(SEG6_LOCAL_FLV_OP_MAX + 1 > BITS_PER_TYPE(__u32));
  2170. /* If the default NEXT-C-SID Locator-Block/Node Function lengths (in
  2171. * bits) have been changed with invalid values, kernel build stops
  2172. * here.
  2173. */
  2174. BUILD_BUG_ON(next_csid_chk_cntr_bits(SEG6_LOCAL_LCBLOCK_DBITS,
  2175. SEG6_LOCAL_LCNODE_FN_DBITS));
  2176. BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS));
  2177. BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS));
  2178. /* To be memory efficient, we use 'u8' to represent the different
  2179. * actions related to RFC8986 flavors. If the kernel build stops here,
  2180. * it means that it is not possible to correctly encode these actions
  2181. * with the data type chosen for the action table.
  2182. */
  2183. BUILD_BUG_ON(SEG6_LOCAL_FLV_ACT_MAX > (typeof(flv8986_act_tbl[0]))~0U);
  2184. return lwtunnel_encap_add_ops(&seg6_local_ops,
  2185. LWTUNNEL_ENCAP_SEG6_LOCAL);
  2186. }
  2187. void seg6_local_exit(void)
  2188. {
  2189. lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL);
  2190. }