datapath.c 70 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2007-2014 Nicira, Inc.
  4. */
  5. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  6. #include <linux/init.h>
  7. #include <linux/module.h>
  8. #include <linux/if_arp.h>
  9. #include <linux/if_vlan.h>
  10. #include <linux/in.h>
  11. #include <linux/ip.h>
  12. #include <linux/jhash.h>
  13. #include <linux/delay.h>
  14. #include <linux/time.h>
  15. #include <linux/etherdevice.h>
  16. #include <linux/kernel.h>
  17. #include <linux/kthread.h>
  18. #include <linux/mutex.h>
  19. #include <linux/percpu.h>
  20. #include <linux/rcupdate.h>
  21. #include <linux/tcp.h>
  22. #include <linux/udp.h>
  23. #include <linux/ethtool.h>
  24. #include <linux/wait.h>
  25. #include <asm/div64.h>
  26. #include <linux/highmem.h>
  27. #include <linux/netfilter_bridge.h>
  28. #include <linux/netfilter_ipv4.h>
  29. #include <linux/inetdevice.h>
  30. #include <linux/list.h>
  31. #include <linux/openvswitch.h>
  32. #include <linux/rculist.h>
  33. #include <linux/dmi.h>
  34. #include <net/genetlink.h>
  35. #include <net/gso.h>
  36. #include <net/net_namespace.h>
  37. #include <net/netns/generic.h>
  38. #include <net/pkt_cls.h>
  39. #include "datapath.h"
  40. #include "drop.h"
  41. #include "flow.h"
  42. #include "flow_table.h"
  43. #include "flow_netlink.h"
  44. #include "meter.h"
  45. #include "openvswitch_trace.h"
  46. #include "vport-internal_dev.h"
  47. #include "vport-netdev.h"
  48. unsigned int ovs_net_id __read_mostly;
  49. static struct genl_family dp_packet_genl_family;
  50. static struct genl_family dp_flow_genl_family;
  51. static struct genl_family dp_datapath_genl_family;
  52. static const struct nla_policy flow_policy[];
  53. static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
  54. .name = OVS_FLOW_MCGROUP,
  55. };
  56. static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
  57. .name = OVS_DATAPATH_MCGROUP,
  58. };
  59. static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
  60. .name = OVS_VPORT_MCGROUP,
  61. };
  62. /* Check if need to build a reply message.
  63. * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
  64. static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
  65. unsigned int group)
  66. {
  67. return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
  68. genl_has_listeners(family, genl_info_net(info), group);
  69. }
  70. static void ovs_notify(struct genl_family *family,
  71. struct sk_buff *skb, struct genl_info *info)
  72. {
  73. genl_notify(family, skb, info, 0, GFP_KERNEL);
  74. }
  75. /**
  76. * DOC: Locking:
  77. *
  78. * All writes e.g. Writes to device state (add/remove datapath, port, set
  79. * operations on vports, etc.), Writes to other state (flow table
  80. * modifications, set miscellaneous datapath parameters, etc.) are protected
  81. * by ovs_lock.
  82. *
  83. * Reads are protected by RCU.
  84. *
  85. * There are a few special cases (mostly stats) that have their own
  86. * synchronization but they nest under all of above and don't interact with
  87. * each other.
  88. *
  89. * The RTNL lock nests inside ovs_mutex.
  90. */
  91. static DEFINE_MUTEX(ovs_mutex);
  92. void ovs_lock(void)
  93. {
  94. mutex_lock(&ovs_mutex);
  95. }
  96. void ovs_unlock(void)
  97. {
  98. mutex_unlock(&ovs_mutex);
  99. }
  100. #ifdef CONFIG_LOCKDEP
  101. int lockdep_ovsl_is_held(void)
  102. {
  103. if (debug_locks)
  104. return lockdep_is_held(&ovs_mutex);
  105. else
  106. return 1;
  107. }
  108. #endif
  109. static struct vport *new_vport(const struct vport_parms *);
  110. static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
  111. const struct sw_flow_key *,
  112. const struct dp_upcall_info *,
  113. uint32_t cutlen);
  114. static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
  115. const struct sw_flow_key *,
  116. const struct dp_upcall_info *,
  117. uint32_t cutlen);
  118. static void ovs_dp_masks_rebalance(struct work_struct *work);
  119. static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
  120. /* Must be called with rcu_read_lock or ovs_mutex. */
  121. const char *ovs_dp_name(const struct datapath *dp)
  122. {
  123. struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
  124. return ovs_vport_name(vport);
  125. }
  126. static int get_dpifindex(const struct datapath *dp)
  127. {
  128. struct vport *local;
  129. int ifindex;
  130. rcu_read_lock();
  131. local = ovs_vport_rcu(dp, OVSP_LOCAL);
  132. if (local)
  133. ifindex = local->dev->ifindex;
  134. else
  135. ifindex = 0;
  136. rcu_read_unlock();
  137. return ifindex;
  138. }
  139. static void destroy_dp_rcu(struct rcu_head *rcu)
  140. {
  141. struct datapath *dp = container_of(rcu, struct datapath, rcu);
  142. ovs_flow_tbl_destroy(&dp->table);
  143. free_percpu(dp->stats_percpu);
  144. kfree(dp->ports);
  145. ovs_meters_exit(dp);
  146. kfree(rcu_dereference_raw(dp->upcall_portids));
  147. kfree(dp);
  148. }
  149. static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
  150. u16 port_no)
  151. {
  152. return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
  153. }
  154. /* Called with ovs_mutex or RCU read lock. */
  155. struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
  156. {
  157. struct vport *vport;
  158. struct hlist_head *head;
  159. head = vport_hash_bucket(dp, port_no);
  160. hlist_for_each_entry_rcu(vport, head, dp_hash_node,
  161. lockdep_ovsl_is_held()) {
  162. if (vport->port_no == port_no)
  163. return vport;
  164. }
  165. return NULL;
  166. }
  167. /* Called with ovs_mutex. */
  168. static struct vport *new_vport(const struct vport_parms *parms)
  169. {
  170. struct vport *vport;
  171. vport = ovs_vport_add(parms);
  172. if (!IS_ERR(vport)) {
  173. struct datapath *dp = parms->dp;
  174. struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
  175. hlist_add_head_rcu(&vport->dp_hash_node, head);
  176. }
  177. return vport;
  178. }
  179. static void ovs_vport_update_upcall_stats(struct sk_buff *skb,
  180. const struct dp_upcall_info *upcall_info,
  181. bool upcall_result)
  182. {
  183. struct vport *p = OVS_CB(skb)->input_vport;
  184. struct vport_upcall_stats_percpu *stats;
  185. if (upcall_info->cmd != OVS_PACKET_CMD_MISS &&
  186. upcall_info->cmd != OVS_PACKET_CMD_ACTION)
  187. return;
  188. stats = this_cpu_ptr(p->upcall_stats);
  189. u64_stats_update_begin(&stats->syncp);
  190. if (upcall_result)
  191. u64_stats_inc(&stats->n_success);
  192. else
  193. u64_stats_inc(&stats->n_fail);
  194. u64_stats_update_end(&stats->syncp);
  195. }
  196. void ovs_dp_detach_port(struct vport *p)
  197. {
  198. ASSERT_OVSL();
  199. /* First drop references to device. */
  200. hlist_del_rcu(&p->dp_hash_node);
  201. /* Then destroy it. */
  202. ovs_vport_del(p);
  203. }
  204. /* Must be called with rcu_read_lock. */
  205. void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
  206. {
  207. struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
  208. const struct vport *p = OVS_CB(skb)->input_vport;
  209. struct datapath *dp = p->dp;
  210. struct sw_flow *flow;
  211. struct sw_flow_actions *sf_acts;
  212. struct dp_stats_percpu *stats;
  213. bool ovs_pcpu_locked = false;
  214. u64 *stats_counter;
  215. u32 n_mask_hit;
  216. u32 n_cache_hit;
  217. int error;
  218. stats = this_cpu_ptr(dp->stats_percpu);
  219. /* Look up flow. */
  220. flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
  221. &n_mask_hit, &n_cache_hit);
  222. if (unlikely(!flow)) {
  223. struct dp_upcall_info upcall;
  224. memset(&upcall, 0, sizeof(upcall));
  225. upcall.cmd = OVS_PACKET_CMD_MISS;
  226. if (OVS_CB(skb)->upcall_pid)
  227. upcall.portid = OVS_CB(skb)->upcall_pid;
  228. else if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
  229. upcall.portid =
  230. ovs_dp_get_upcall_portid(dp, smp_processor_id());
  231. else
  232. upcall.portid = ovs_vport_find_upcall_portid(p, skb);
  233. upcall.mru = OVS_CB(skb)->mru;
  234. error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
  235. switch (error) {
  236. case 0:
  237. case -EAGAIN:
  238. case -ERESTARTSYS:
  239. case -EINTR:
  240. consume_skb(skb);
  241. break;
  242. default:
  243. kfree_skb(skb);
  244. break;
  245. }
  246. stats_counter = &stats->n_missed;
  247. goto out;
  248. }
  249. ovs_flow_stats_update(flow, key->tp.flags, skb);
  250. sf_acts = rcu_dereference(flow->sf_acts);
  251. /* This path can be invoked recursively: Use the current task to
  252. * identify recursive invocation - the lock must be acquired only once.
  253. * Even with disabled bottom halves this can be preempted on PREEMPT_RT.
  254. * Limit the locking to RT to avoid assigning `owner' if it can be
  255. * avoided.
  256. */
  257. if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
  258. local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
  259. ovs_pcpu->owner = current;
  260. ovs_pcpu_locked = true;
  261. }
  262. error = ovs_execute_actions(dp, skb, sf_acts, key);
  263. if (unlikely(error))
  264. net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
  265. ovs_dp_name(dp), error);
  266. if (ovs_pcpu_locked) {
  267. ovs_pcpu->owner = NULL;
  268. local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
  269. }
  270. stats_counter = &stats->n_hit;
  271. out:
  272. /* Update datapath statistics. */
  273. u64_stats_update_begin(&stats->syncp);
  274. (*stats_counter)++;
  275. stats->n_mask_hit += n_mask_hit;
  276. stats->n_cache_hit += n_cache_hit;
  277. u64_stats_update_end(&stats->syncp);
  278. }
  279. int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
  280. const struct sw_flow_key *key,
  281. const struct dp_upcall_info *upcall_info,
  282. uint32_t cutlen)
  283. {
  284. struct dp_stats_percpu *stats;
  285. int err;
  286. if (trace_ovs_dp_upcall_enabled())
  287. trace_ovs_dp_upcall(dp, skb, key, upcall_info);
  288. if (upcall_info->portid == 0) {
  289. err = -ENOTCONN;
  290. goto err;
  291. }
  292. if (!skb_is_gso(skb))
  293. err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
  294. else
  295. err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
  296. ovs_vport_update_upcall_stats(skb, upcall_info, !err);
  297. if (err)
  298. goto err;
  299. return 0;
  300. err:
  301. stats = this_cpu_ptr(dp->stats_percpu);
  302. u64_stats_update_begin(&stats->syncp);
  303. stats->n_lost++;
  304. u64_stats_update_end(&stats->syncp);
  305. return err;
  306. }
  307. static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
  308. const struct sw_flow_key *key,
  309. const struct dp_upcall_info *upcall_info,
  310. uint32_t cutlen)
  311. {
  312. unsigned int gso_type = skb_shinfo(skb)->gso_type;
  313. struct sw_flow_key later_key;
  314. struct sk_buff *segs, *nskb;
  315. int err;
  316. BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
  317. segs = __skb_gso_segment(skb, NETIF_F_SG, false);
  318. if (IS_ERR(segs))
  319. return PTR_ERR(segs);
  320. if (segs == NULL)
  321. return -EINVAL;
  322. if (gso_type & SKB_GSO_UDP) {
  323. /* The initial flow key extracted by ovs_flow_key_extract()
  324. * in this case is for a first fragment, so we need to
  325. * properly mark later fragments.
  326. */
  327. later_key = *key;
  328. later_key.ip.frag = OVS_FRAG_TYPE_LATER;
  329. }
  330. /* Queue all of the segments. */
  331. skb_list_walk_safe(segs, skb, nskb) {
  332. if (gso_type & SKB_GSO_UDP && skb != segs)
  333. key = &later_key;
  334. err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
  335. if (err)
  336. break;
  337. }
  338. /* Free all of the segments. */
  339. skb_list_walk_safe(segs, skb, nskb) {
  340. if (err)
  341. kfree_skb(skb);
  342. else
  343. consume_skb(skb);
  344. }
  345. return err;
  346. }
  347. static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
  348. unsigned int hdrlen, int actions_attrlen)
  349. {
  350. size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
  351. + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
  352. + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
  353. + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
  354. + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
  355. /* OVS_PACKET_ATTR_USERDATA */
  356. if (upcall_info->userdata)
  357. size += NLA_ALIGN(upcall_info->userdata->nla_len);
  358. /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
  359. if (upcall_info->egress_tun_info)
  360. size += nla_total_size(ovs_tun_key_attr_size());
  361. /* OVS_PACKET_ATTR_ACTIONS */
  362. if (upcall_info->actions_len)
  363. size += nla_total_size(actions_attrlen);
  364. /* OVS_PACKET_ATTR_MRU */
  365. if (upcall_info->mru)
  366. size += nla_total_size(sizeof(upcall_info->mru));
  367. return size;
  368. }
  369. static void pad_packet(struct datapath *dp, struct sk_buff *skb)
  370. {
  371. if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
  372. size_t plen = NLA_ALIGN(skb->len) - skb->len;
  373. if (plen > 0)
  374. skb_put_zero(skb, plen);
  375. }
  376. }
  377. static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
  378. const struct sw_flow_key *key,
  379. const struct dp_upcall_info *upcall_info,
  380. uint32_t cutlen)
  381. {
  382. struct ovs_header *upcall;
  383. struct sk_buff *nskb = NULL;
  384. struct sk_buff *user_skb = NULL; /* to be queued to userspace */
  385. struct nlattr *nla;
  386. size_t len;
  387. unsigned int hlen;
  388. int err, dp_ifindex;
  389. u64 hash;
  390. dp_ifindex = get_dpifindex(dp);
  391. if (!dp_ifindex)
  392. return -ENODEV;
  393. if (skb_vlan_tag_present(skb)) {
  394. nskb = skb_clone(skb, GFP_ATOMIC);
  395. if (!nskb)
  396. return -ENOMEM;
  397. nskb = __vlan_hwaccel_push_inside(nskb);
  398. if (!nskb)
  399. return -ENOMEM;
  400. skb = nskb;
  401. }
  402. if (nla_attr_size(skb->len) > USHRT_MAX) {
  403. err = -EFBIG;
  404. goto out;
  405. }
  406. /* Complete checksum if needed */
  407. if (skb->ip_summed == CHECKSUM_PARTIAL &&
  408. (err = skb_csum_hwoffload_help(skb, 0)))
  409. goto out;
  410. /* Older versions of OVS user space enforce alignment of the last
  411. * Netlink attribute to NLA_ALIGNTO which would require extensive
  412. * padding logic. Only perform zerocopy if padding is not required.
  413. */
  414. if (dp->user_features & OVS_DP_F_UNALIGNED)
  415. hlen = skb_zerocopy_headlen(skb);
  416. else
  417. hlen = skb->len;
  418. len = upcall_msg_size(upcall_info, hlen - cutlen,
  419. OVS_CB(skb)->acts_origlen);
  420. user_skb = genlmsg_new(len, GFP_ATOMIC);
  421. if (!user_skb) {
  422. err = -ENOMEM;
  423. goto out;
  424. }
  425. upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
  426. 0, upcall_info->cmd);
  427. if (!upcall) {
  428. err = -EINVAL;
  429. goto out;
  430. }
  431. upcall->dp_ifindex = dp_ifindex;
  432. err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
  433. if (err)
  434. goto out;
  435. if (upcall_info->userdata)
  436. __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
  437. nla_len(upcall_info->userdata),
  438. nla_data(upcall_info->userdata));
  439. if (upcall_info->egress_tun_info) {
  440. nla = nla_nest_start_noflag(user_skb,
  441. OVS_PACKET_ATTR_EGRESS_TUN_KEY);
  442. if (!nla) {
  443. err = -EMSGSIZE;
  444. goto out;
  445. }
  446. err = ovs_nla_put_tunnel_info(user_skb,
  447. upcall_info->egress_tun_info);
  448. if (err)
  449. goto out;
  450. nla_nest_end(user_skb, nla);
  451. }
  452. if (upcall_info->actions_len) {
  453. nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
  454. if (!nla) {
  455. err = -EMSGSIZE;
  456. goto out;
  457. }
  458. err = ovs_nla_put_actions(upcall_info->actions,
  459. upcall_info->actions_len,
  460. user_skb);
  461. if (!err)
  462. nla_nest_end(user_skb, nla);
  463. else
  464. nla_nest_cancel(user_skb, nla);
  465. }
  466. /* Add OVS_PACKET_ATTR_MRU */
  467. if (upcall_info->mru &&
  468. nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
  469. err = -ENOBUFS;
  470. goto out;
  471. }
  472. /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
  473. if (cutlen > 0 &&
  474. nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
  475. err = -ENOBUFS;
  476. goto out;
  477. }
  478. /* Add OVS_PACKET_ATTR_HASH */
  479. hash = skb_get_hash_raw(skb);
  480. if (skb->sw_hash)
  481. hash |= OVS_PACKET_HASH_SW_BIT;
  482. if (skb->l4_hash)
  483. hash |= OVS_PACKET_HASH_L4_BIT;
  484. if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
  485. err = -ENOBUFS;
  486. goto out;
  487. }
  488. /* Only reserve room for attribute header, packet data is added
  489. * in skb_zerocopy() */
  490. if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
  491. err = -ENOBUFS;
  492. goto out;
  493. }
  494. nla->nla_len = nla_attr_size(skb->len - cutlen);
  495. err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
  496. if (err)
  497. goto out;
  498. /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
  499. pad_packet(dp, user_skb);
  500. ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
  501. err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
  502. user_skb = NULL;
  503. out:
  504. if (err)
  505. skb_tx_error(skb);
  506. consume_skb(user_skb);
  507. consume_skb(nskb);
  508. return err;
  509. }
  510. static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
  511. {
  512. struct ovs_header *ovs_header = genl_info_userhdr(info);
  513. struct net *net = sock_net(skb->sk);
  514. struct nlattr **a = info->attrs;
  515. struct sw_flow_actions *acts;
  516. struct sk_buff *packet;
  517. struct sw_flow *flow;
  518. struct sw_flow_actions *sf_acts;
  519. struct datapath *dp;
  520. struct vport *input_vport;
  521. u16 mru = 0;
  522. u64 hash;
  523. int len;
  524. int err;
  525. bool log = !a[OVS_PACKET_ATTR_PROBE];
  526. err = -EINVAL;
  527. if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
  528. !a[OVS_PACKET_ATTR_ACTIONS])
  529. goto err;
  530. len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
  531. packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
  532. err = -ENOMEM;
  533. if (!packet)
  534. goto err;
  535. skb_reserve(packet, NET_IP_ALIGN);
  536. nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
  537. /* Set packet's mru */
  538. if (a[OVS_PACKET_ATTR_MRU]) {
  539. mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
  540. packet->ignore_df = 1;
  541. }
  542. OVS_CB(packet)->mru = mru;
  543. if (a[OVS_PACKET_ATTR_HASH]) {
  544. hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
  545. __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
  546. !!(hash & OVS_PACKET_HASH_SW_BIT),
  547. !!(hash & OVS_PACKET_HASH_L4_BIT));
  548. }
  549. OVS_CB(packet)->upcall_pid =
  550. nla_get_u32_default(a[OVS_PACKET_ATTR_UPCALL_PID], 0);
  551. /* Build an sw_flow for sending this packet. */
  552. flow = ovs_flow_alloc();
  553. err = PTR_ERR(flow);
  554. if (IS_ERR(flow))
  555. goto err_kfree_skb;
  556. err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
  557. packet, &flow->key, log);
  558. if (err)
  559. goto err_flow_free;
  560. err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
  561. &flow->key, &acts, log);
  562. if (err)
  563. goto err_flow_free;
  564. rcu_assign_pointer(flow->sf_acts, acts);
  565. packet->priority = flow->key.phy.priority;
  566. packet->mark = flow->key.phy.skb_mark;
  567. rcu_read_lock();
  568. dp = get_dp_rcu(net, ovs_header->dp_ifindex);
  569. err = -ENODEV;
  570. if (!dp)
  571. goto err_unlock;
  572. input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
  573. if (!input_vport)
  574. input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
  575. if (!input_vport)
  576. goto err_unlock;
  577. packet->dev = input_vport->dev;
  578. OVS_CB(packet)->input_vport = input_vport;
  579. sf_acts = rcu_dereference(flow->sf_acts);
  580. local_bh_disable();
  581. local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
  582. if (IS_ENABLED(CONFIG_PREEMPT_RT))
  583. this_cpu_write(ovs_pcpu_storage->owner, current);
  584. err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
  585. if (IS_ENABLED(CONFIG_PREEMPT_RT))
  586. this_cpu_write(ovs_pcpu_storage->owner, NULL);
  587. local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
  588. local_bh_enable();
  589. rcu_read_unlock();
  590. ovs_flow_free(flow, false);
  591. return err;
  592. err_unlock:
  593. rcu_read_unlock();
  594. err_flow_free:
  595. ovs_flow_free(flow, false);
  596. err_kfree_skb:
  597. kfree_skb(packet);
  598. err:
  599. return err;
  600. }
  601. static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
  602. [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
  603. [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
  604. [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
  605. [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
  606. [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
  607. [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
  608. [OVS_PACKET_ATTR_UPCALL_PID] = { .type = NLA_U32 },
  609. };
  610. static const struct genl_small_ops dp_packet_genl_ops[] = {
  611. { .cmd = OVS_PACKET_CMD_EXECUTE,
  612. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  613. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  614. .doit = ovs_packet_cmd_execute
  615. }
  616. };
  617. static struct genl_family dp_packet_genl_family __ro_after_init = {
  618. .hdrsize = sizeof(struct ovs_header),
  619. .name = OVS_PACKET_FAMILY,
  620. .version = OVS_PACKET_VERSION,
  621. .maxattr = OVS_PACKET_ATTR_MAX,
  622. .policy = packet_policy,
  623. .netnsok = true,
  624. .parallel_ops = true,
  625. .small_ops = dp_packet_genl_ops,
  626. .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
  627. .resv_start_op = OVS_PACKET_CMD_EXECUTE + 1,
  628. .module = THIS_MODULE,
  629. };
  630. static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
  631. struct ovs_dp_megaflow_stats *mega_stats)
  632. {
  633. int i;
  634. memset(mega_stats, 0, sizeof(*mega_stats));
  635. stats->n_flows = ovs_flow_tbl_count(&dp->table);
  636. mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
  637. stats->n_hit = stats->n_missed = stats->n_lost = 0;
  638. for_each_possible_cpu(i) {
  639. const struct dp_stats_percpu *percpu_stats;
  640. struct dp_stats_percpu local_stats;
  641. unsigned int start;
  642. percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
  643. do {
  644. start = u64_stats_fetch_begin(&percpu_stats->syncp);
  645. local_stats = *percpu_stats;
  646. } while (u64_stats_fetch_retry(&percpu_stats->syncp, start));
  647. stats->n_hit += local_stats.n_hit;
  648. stats->n_missed += local_stats.n_missed;
  649. stats->n_lost += local_stats.n_lost;
  650. mega_stats->n_mask_hit += local_stats.n_mask_hit;
  651. mega_stats->n_cache_hit += local_stats.n_cache_hit;
  652. }
  653. }
  654. static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
  655. {
  656. return ovs_identifier_is_ufid(sfid) &&
  657. !(ufid_flags & OVS_UFID_F_OMIT_KEY);
  658. }
  659. static bool should_fill_mask(uint32_t ufid_flags)
  660. {
  661. return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
  662. }
  663. static bool should_fill_actions(uint32_t ufid_flags)
  664. {
  665. return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
  666. }
  667. static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
  668. const struct sw_flow_id *sfid,
  669. uint32_t ufid_flags)
  670. {
  671. size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
  672. /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
  673. * see ovs_nla_put_identifier()
  674. */
  675. if (sfid && ovs_identifier_is_ufid(sfid))
  676. len += nla_total_size(sfid->ufid_len);
  677. else
  678. len += nla_total_size(ovs_key_attr_size());
  679. /* OVS_FLOW_ATTR_KEY */
  680. if (!sfid || should_fill_key(sfid, ufid_flags))
  681. len += nla_total_size(ovs_key_attr_size());
  682. /* OVS_FLOW_ATTR_MASK */
  683. if (should_fill_mask(ufid_flags))
  684. len += nla_total_size(ovs_key_attr_size());
  685. /* OVS_FLOW_ATTR_ACTIONS */
  686. if (should_fill_actions(ufid_flags))
  687. len += nla_total_size(acts->orig_len);
  688. return len
  689. + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
  690. + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
  691. + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
  692. }
  693. /* Called with ovs_mutex or RCU read lock. */
  694. static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
  695. struct sk_buff *skb)
  696. {
  697. struct ovs_flow_stats stats;
  698. __be16 tcp_flags;
  699. unsigned long used;
  700. ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
  701. if (used &&
  702. nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
  703. OVS_FLOW_ATTR_PAD))
  704. return -EMSGSIZE;
  705. if (stats.n_packets &&
  706. nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
  707. sizeof(struct ovs_flow_stats), &stats,
  708. OVS_FLOW_ATTR_PAD))
  709. return -EMSGSIZE;
  710. if ((u8)ntohs(tcp_flags) &&
  711. nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
  712. return -EMSGSIZE;
  713. return 0;
  714. }
  715. /* Called with ovs_mutex or RCU read lock. */
  716. static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
  717. struct sk_buff *skb, int skb_orig_len)
  718. {
  719. struct nlattr *start;
  720. int err;
  721. /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
  722. * this is the first flow to be dumped into 'skb'. This is unusual for
  723. * Netlink but individual action lists can be longer than
  724. * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
  725. * The userspace caller can always fetch the actions separately if it
  726. * really wants them. (Most userspace callers in fact don't care.)
  727. *
  728. * This can only fail for dump operations because the skb is always
  729. * properly sized for single flows.
  730. */
  731. start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
  732. if (start) {
  733. const struct sw_flow_actions *sf_acts;
  734. sf_acts = rcu_dereference_ovsl(flow->sf_acts);
  735. err = ovs_nla_put_actions(sf_acts->actions,
  736. sf_acts->actions_len, skb);
  737. if (!err)
  738. nla_nest_end(skb, start);
  739. else {
  740. if (skb_orig_len)
  741. return err;
  742. nla_nest_cancel(skb, start);
  743. }
  744. } else if (skb_orig_len) {
  745. return -EMSGSIZE;
  746. }
  747. return 0;
  748. }
  749. /* Called with ovs_mutex or RCU read lock. */
  750. static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
  751. struct sk_buff *skb, u32 portid,
  752. u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
  753. {
  754. const int skb_orig_len = skb->len;
  755. struct ovs_header *ovs_header;
  756. int err;
  757. ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
  758. flags, cmd);
  759. if (!ovs_header)
  760. return -EMSGSIZE;
  761. ovs_header->dp_ifindex = dp_ifindex;
  762. err = ovs_nla_put_identifier(flow, skb);
  763. if (err)
  764. goto error;
  765. if (should_fill_key(&flow->id, ufid_flags)) {
  766. err = ovs_nla_put_masked_key(flow, skb);
  767. if (err)
  768. goto error;
  769. }
  770. if (should_fill_mask(ufid_flags)) {
  771. err = ovs_nla_put_mask(flow, skb);
  772. if (err)
  773. goto error;
  774. }
  775. err = ovs_flow_cmd_fill_stats(flow, skb);
  776. if (err)
  777. goto error;
  778. if (should_fill_actions(ufid_flags)) {
  779. err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
  780. if (err)
  781. goto error;
  782. }
  783. genlmsg_end(skb, ovs_header);
  784. return 0;
  785. error:
  786. genlmsg_cancel(skb, ovs_header);
  787. return err;
  788. }
  789. /* May not be called with RCU read lock. */
  790. static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
  791. const struct sw_flow_id *sfid,
  792. struct genl_info *info,
  793. bool always,
  794. uint32_t ufid_flags)
  795. {
  796. struct sk_buff *skb;
  797. size_t len;
  798. if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
  799. return NULL;
  800. len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
  801. skb = genlmsg_new(len, GFP_KERNEL);
  802. if (!skb)
  803. return ERR_PTR(-ENOMEM);
  804. return skb;
  805. }
  806. /* Called with ovs_mutex. */
  807. static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
  808. int dp_ifindex,
  809. struct genl_info *info, u8 cmd,
  810. bool always, u32 ufid_flags)
  811. {
  812. struct sk_buff *skb;
  813. int retval;
  814. skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
  815. &flow->id, info, always, ufid_flags);
  816. if (IS_ERR_OR_NULL(skb))
  817. return skb;
  818. retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
  819. info->snd_portid, info->snd_seq, 0,
  820. cmd, ufid_flags);
  821. if (WARN_ON_ONCE(retval < 0)) {
  822. kfree_skb(skb);
  823. skb = ERR_PTR(retval);
  824. }
  825. return skb;
  826. }
  827. static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
  828. {
  829. struct net *net = sock_net(skb->sk);
  830. struct nlattr **a = info->attrs;
  831. struct ovs_header *ovs_header = genl_info_userhdr(info);
  832. struct sw_flow *flow = NULL, *new_flow;
  833. struct sw_flow_mask mask;
  834. struct sk_buff *reply;
  835. struct datapath *dp;
  836. struct sw_flow_key *key;
  837. struct sw_flow_actions *acts;
  838. struct sw_flow_match match;
  839. u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
  840. int error;
  841. bool log = !a[OVS_FLOW_ATTR_PROBE];
  842. /* Must have key and actions. */
  843. error = -EINVAL;
  844. if (!a[OVS_FLOW_ATTR_KEY]) {
  845. OVS_NLERR(log, "Flow key attr not present in new flow.");
  846. goto error;
  847. }
  848. if (!a[OVS_FLOW_ATTR_ACTIONS]) {
  849. OVS_NLERR(log, "Flow actions attr not present in new flow.");
  850. goto error;
  851. }
  852. /* Most of the time we need to allocate a new flow, do it before
  853. * locking.
  854. */
  855. new_flow = ovs_flow_alloc();
  856. if (IS_ERR(new_flow)) {
  857. error = PTR_ERR(new_flow);
  858. goto error;
  859. }
  860. /* Extract key. */
  861. key = kzalloc_obj(*key);
  862. if (!key) {
  863. error = -ENOMEM;
  864. goto err_kfree_flow;
  865. }
  866. ovs_match_init(&match, key, false, &mask);
  867. error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
  868. a[OVS_FLOW_ATTR_MASK], log);
  869. if (error)
  870. goto err_kfree_key;
  871. ovs_flow_mask_key(&new_flow->key, key, true, &mask);
  872. /* Extract flow identifier. */
  873. error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
  874. key, log);
  875. if (error)
  876. goto err_kfree_key;
  877. /* Validate actions. */
  878. error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
  879. &new_flow->key, &acts, log);
  880. if (error) {
  881. OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
  882. goto err_kfree_key;
  883. }
  884. reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
  885. ufid_flags);
  886. if (IS_ERR(reply)) {
  887. error = PTR_ERR(reply);
  888. goto err_kfree_acts;
  889. }
  890. ovs_lock();
  891. dp = get_dp(net, ovs_header->dp_ifindex);
  892. if (unlikely(!dp)) {
  893. error = -ENODEV;
  894. goto err_unlock_ovs;
  895. }
  896. /* Check if this is a duplicate flow */
  897. if (ovs_identifier_is_ufid(&new_flow->id))
  898. flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
  899. if (!flow)
  900. flow = ovs_flow_tbl_lookup(&dp->table, key);
  901. if (likely(!flow)) {
  902. rcu_assign_pointer(new_flow->sf_acts, acts);
  903. /* Put flow in bucket. */
  904. error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
  905. if (unlikely(error)) {
  906. acts = NULL;
  907. goto err_unlock_ovs;
  908. }
  909. if (unlikely(reply)) {
  910. error = ovs_flow_cmd_fill_info(new_flow,
  911. ovs_header->dp_ifindex,
  912. reply, info->snd_portid,
  913. info->snd_seq, 0,
  914. OVS_FLOW_CMD_NEW,
  915. ufid_flags);
  916. BUG_ON(error < 0);
  917. }
  918. ovs_unlock();
  919. } else {
  920. struct sw_flow_actions *old_acts;
  921. /* Bail out if we're not allowed to modify an existing flow.
  922. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
  923. * because Generic Netlink treats the latter as a dump
  924. * request. We also accept NLM_F_EXCL in case that bug ever
  925. * gets fixed.
  926. */
  927. if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
  928. | NLM_F_EXCL))) {
  929. error = -EEXIST;
  930. goto err_unlock_ovs;
  931. }
  932. /* The flow identifier has to be the same for flow updates.
  933. * Look for any overlapping flow.
  934. */
  935. if (unlikely(!ovs_flow_cmp(flow, &match))) {
  936. if (ovs_identifier_is_key(&flow->id))
  937. flow = ovs_flow_tbl_lookup_exact(&dp->table,
  938. &match);
  939. else /* UFID matches but key is different */
  940. flow = NULL;
  941. if (!flow) {
  942. error = -ENOENT;
  943. goto err_unlock_ovs;
  944. }
  945. }
  946. /* Update actions. */
  947. old_acts = ovsl_dereference(flow->sf_acts);
  948. rcu_assign_pointer(flow->sf_acts, acts);
  949. if (unlikely(reply)) {
  950. error = ovs_flow_cmd_fill_info(flow,
  951. ovs_header->dp_ifindex,
  952. reply, info->snd_portid,
  953. info->snd_seq, 0,
  954. OVS_FLOW_CMD_NEW,
  955. ufid_flags);
  956. BUG_ON(error < 0);
  957. }
  958. ovs_unlock();
  959. ovs_nla_free_flow_actions_rcu(old_acts);
  960. ovs_flow_free(new_flow, false);
  961. }
  962. if (reply)
  963. ovs_notify(&dp_flow_genl_family, reply, info);
  964. kfree(key);
  965. return 0;
  966. err_unlock_ovs:
  967. ovs_unlock();
  968. kfree_skb(reply);
  969. err_kfree_acts:
  970. ovs_nla_free_flow_actions(acts);
  971. err_kfree_key:
  972. kfree(key);
  973. err_kfree_flow:
  974. ovs_flow_free(new_flow, false);
  975. error:
  976. return error;
  977. }
  978. /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
  979. static noinline_for_stack
  980. struct sw_flow_actions *get_flow_actions(struct net *net,
  981. const struct nlattr *a,
  982. const struct sw_flow_key *key,
  983. const struct sw_flow_mask *mask,
  984. bool log)
  985. {
  986. struct sw_flow_actions *acts;
  987. struct sw_flow_key masked_key;
  988. int error;
  989. ovs_flow_mask_key(&masked_key, key, true, mask);
  990. error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
  991. if (error) {
  992. OVS_NLERR(log,
  993. "Actions may not be safe on all matching packets");
  994. return ERR_PTR(error);
  995. }
  996. return acts;
  997. }
  998. /* Factor out match-init and action-copy to avoid
  999. * "Wframe-larger-than=1024" warning. Because mask is only
  1000. * used to get actions, we new a function to save some
  1001. * stack space.
  1002. *
  1003. * If there are not key and action attrs, we return 0
  1004. * directly. In the case, the caller will also not use the
  1005. * match as before. If there is action attr, we try to get
  1006. * actions and save them to *acts. Before returning from
  1007. * the function, we reset the match->mask pointer. Because
  1008. * we should not to return match object with dangling reference
  1009. * to mask.
  1010. * */
  1011. static noinline_for_stack int
  1012. ovs_nla_init_match_and_action(struct net *net,
  1013. struct sw_flow_match *match,
  1014. struct sw_flow_key *key,
  1015. struct nlattr **a,
  1016. struct sw_flow_actions **acts,
  1017. bool log)
  1018. {
  1019. struct sw_flow_mask mask;
  1020. int error = 0;
  1021. if (a[OVS_FLOW_ATTR_KEY]) {
  1022. ovs_match_init(match, key, true, &mask);
  1023. error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
  1024. a[OVS_FLOW_ATTR_MASK], log);
  1025. if (error)
  1026. goto error;
  1027. }
  1028. if (a[OVS_FLOW_ATTR_ACTIONS]) {
  1029. if (!a[OVS_FLOW_ATTR_KEY]) {
  1030. OVS_NLERR(log,
  1031. "Flow key attribute not present in set flow.");
  1032. error = -EINVAL;
  1033. goto error;
  1034. }
  1035. *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
  1036. &mask, log);
  1037. if (IS_ERR(*acts)) {
  1038. error = PTR_ERR(*acts);
  1039. goto error;
  1040. }
  1041. }
  1042. /* On success, error is 0. */
  1043. error:
  1044. match->mask = NULL;
  1045. return error;
  1046. }
  1047. static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
  1048. {
  1049. struct net *net = sock_net(skb->sk);
  1050. struct nlattr **a = info->attrs;
  1051. struct ovs_header *ovs_header = genl_info_userhdr(info);
  1052. struct sw_flow_key key;
  1053. struct sw_flow *flow;
  1054. struct sk_buff *reply = NULL;
  1055. struct datapath *dp;
  1056. struct sw_flow_actions *old_acts = NULL, *acts = NULL;
  1057. struct sw_flow_match match;
  1058. struct sw_flow_id sfid;
  1059. u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
  1060. int error = 0;
  1061. bool log = !a[OVS_FLOW_ATTR_PROBE];
  1062. bool ufid_present;
  1063. ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
  1064. if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
  1065. OVS_NLERR(log,
  1066. "Flow set message rejected, Key attribute missing.");
  1067. return -EINVAL;
  1068. }
  1069. error = ovs_nla_init_match_and_action(net, &match, &key, a,
  1070. &acts, log);
  1071. if (error)
  1072. goto error;
  1073. if (acts) {
  1074. /* Can allocate before locking if have acts. */
  1075. reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
  1076. ufid_flags);
  1077. if (IS_ERR(reply)) {
  1078. error = PTR_ERR(reply);
  1079. goto err_kfree_acts;
  1080. }
  1081. }
  1082. ovs_lock();
  1083. dp = get_dp(net, ovs_header->dp_ifindex);
  1084. if (unlikely(!dp)) {
  1085. error = -ENODEV;
  1086. goto err_unlock_ovs;
  1087. }
  1088. /* Check that the flow exists. */
  1089. if (ufid_present)
  1090. flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
  1091. else
  1092. flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
  1093. if (unlikely(!flow)) {
  1094. error = -ENOENT;
  1095. goto err_unlock_ovs;
  1096. }
  1097. /* Update actions, if present. */
  1098. if (likely(acts)) {
  1099. old_acts = ovsl_dereference(flow->sf_acts);
  1100. rcu_assign_pointer(flow->sf_acts, acts);
  1101. if (unlikely(reply)) {
  1102. error = ovs_flow_cmd_fill_info(flow,
  1103. ovs_header->dp_ifindex,
  1104. reply, info->snd_portid,
  1105. info->snd_seq, 0,
  1106. OVS_FLOW_CMD_SET,
  1107. ufid_flags);
  1108. BUG_ON(error < 0);
  1109. }
  1110. } else {
  1111. /* Could not alloc without acts before locking. */
  1112. reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
  1113. info, OVS_FLOW_CMD_SET, false,
  1114. ufid_flags);
  1115. if (IS_ERR(reply)) {
  1116. error = PTR_ERR(reply);
  1117. goto err_unlock_ovs;
  1118. }
  1119. }
  1120. /* Clear stats. */
  1121. if (a[OVS_FLOW_ATTR_CLEAR])
  1122. ovs_flow_stats_clear(flow);
  1123. ovs_unlock();
  1124. if (reply)
  1125. ovs_notify(&dp_flow_genl_family, reply, info);
  1126. if (old_acts)
  1127. ovs_nla_free_flow_actions_rcu(old_acts);
  1128. return 0;
  1129. err_unlock_ovs:
  1130. ovs_unlock();
  1131. kfree_skb(reply);
  1132. err_kfree_acts:
  1133. ovs_nla_free_flow_actions(acts);
  1134. error:
  1135. return error;
  1136. }
  1137. static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
  1138. {
  1139. struct nlattr **a = info->attrs;
  1140. struct ovs_header *ovs_header = genl_info_userhdr(info);
  1141. struct net *net = sock_net(skb->sk);
  1142. struct sw_flow_key key;
  1143. struct sk_buff *reply;
  1144. struct sw_flow *flow;
  1145. struct datapath *dp;
  1146. struct sw_flow_match match;
  1147. struct sw_flow_id ufid;
  1148. u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
  1149. int err = 0;
  1150. bool log = !a[OVS_FLOW_ATTR_PROBE];
  1151. bool ufid_present;
  1152. ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
  1153. if (a[OVS_FLOW_ATTR_KEY]) {
  1154. ovs_match_init(&match, &key, true, NULL);
  1155. err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
  1156. log);
  1157. } else if (!ufid_present) {
  1158. OVS_NLERR(log,
  1159. "Flow get message rejected, Key attribute missing.");
  1160. err = -EINVAL;
  1161. }
  1162. if (err)
  1163. return err;
  1164. ovs_lock();
  1165. dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
  1166. if (!dp) {
  1167. err = -ENODEV;
  1168. goto unlock;
  1169. }
  1170. if (ufid_present)
  1171. flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
  1172. else
  1173. flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
  1174. if (!flow) {
  1175. err = -ENOENT;
  1176. goto unlock;
  1177. }
  1178. reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
  1179. OVS_FLOW_CMD_GET, true, ufid_flags);
  1180. if (IS_ERR(reply)) {
  1181. err = PTR_ERR(reply);
  1182. goto unlock;
  1183. }
  1184. ovs_unlock();
  1185. return genlmsg_reply(reply, info);
  1186. unlock:
  1187. ovs_unlock();
  1188. return err;
  1189. }
  1190. static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
  1191. {
  1192. struct nlattr **a = info->attrs;
  1193. struct ovs_header *ovs_header = genl_info_userhdr(info);
  1194. struct net *net = sock_net(skb->sk);
  1195. struct sw_flow_key key;
  1196. struct sk_buff *reply;
  1197. struct sw_flow *flow = NULL;
  1198. struct datapath *dp;
  1199. struct sw_flow_match match;
  1200. struct sw_flow_id ufid;
  1201. u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
  1202. int err;
  1203. bool log = !a[OVS_FLOW_ATTR_PROBE];
  1204. bool ufid_present;
  1205. ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
  1206. if (a[OVS_FLOW_ATTR_KEY]) {
  1207. ovs_match_init(&match, &key, true, NULL);
  1208. err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
  1209. NULL, log);
  1210. if (unlikely(err))
  1211. return err;
  1212. }
  1213. ovs_lock();
  1214. dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
  1215. if (unlikely(!dp)) {
  1216. err = -ENODEV;
  1217. goto unlock;
  1218. }
  1219. if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
  1220. err = ovs_flow_tbl_flush(&dp->table);
  1221. goto unlock;
  1222. }
  1223. if (ufid_present)
  1224. flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
  1225. else
  1226. flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
  1227. if (unlikely(!flow)) {
  1228. err = -ENOENT;
  1229. goto unlock;
  1230. }
  1231. ovs_flow_tbl_remove(&dp->table, flow);
  1232. ovs_unlock();
  1233. reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
  1234. &flow->id, info, false, ufid_flags);
  1235. if (likely(reply)) {
  1236. if (!IS_ERR(reply)) {
  1237. rcu_read_lock(); /*To keep RCU checker happy. */
  1238. err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
  1239. reply, info->snd_portid,
  1240. info->snd_seq, 0,
  1241. OVS_FLOW_CMD_DEL,
  1242. ufid_flags);
  1243. rcu_read_unlock();
  1244. if (WARN_ON_ONCE(err < 0)) {
  1245. kfree_skb(reply);
  1246. goto out_free;
  1247. }
  1248. ovs_notify(&dp_flow_genl_family, reply, info);
  1249. } else {
  1250. netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
  1251. PTR_ERR(reply));
  1252. }
  1253. }
  1254. out_free:
  1255. ovs_flow_free(flow, true);
  1256. return 0;
  1257. unlock:
  1258. ovs_unlock();
  1259. return err;
  1260. }
  1261. static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
  1262. {
  1263. struct nlattr *a[__OVS_FLOW_ATTR_MAX];
  1264. struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
  1265. struct table_instance *ti;
  1266. struct datapath *dp;
  1267. u32 ufid_flags;
  1268. int err;
  1269. err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
  1270. OVS_FLOW_ATTR_MAX, flow_policy, NULL);
  1271. if (err)
  1272. return err;
  1273. ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
  1274. rcu_read_lock();
  1275. dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
  1276. if (!dp) {
  1277. rcu_read_unlock();
  1278. return -ENODEV;
  1279. }
  1280. ti = rcu_dereference(dp->table.ti);
  1281. for (;;) {
  1282. struct sw_flow *flow;
  1283. u32 bucket, obj;
  1284. bucket = cb->args[0];
  1285. obj = cb->args[1];
  1286. flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
  1287. if (!flow)
  1288. break;
  1289. if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
  1290. NETLINK_CB(cb->skb).portid,
  1291. cb->nlh->nlmsg_seq, NLM_F_MULTI,
  1292. OVS_FLOW_CMD_GET, ufid_flags) < 0)
  1293. break;
  1294. cb->args[0] = bucket;
  1295. cb->args[1] = obj;
  1296. }
  1297. rcu_read_unlock();
  1298. return skb->len;
  1299. }
  1300. static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
  1301. [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
  1302. [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
  1303. [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
  1304. [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
  1305. [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
  1306. [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
  1307. [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
  1308. };
  1309. static const struct genl_small_ops dp_flow_genl_ops[] = {
  1310. { .cmd = OVS_FLOW_CMD_NEW,
  1311. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  1312. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1313. .doit = ovs_flow_cmd_new
  1314. },
  1315. { .cmd = OVS_FLOW_CMD_DEL,
  1316. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  1317. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1318. .doit = ovs_flow_cmd_del
  1319. },
  1320. { .cmd = OVS_FLOW_CMD_GET,
  1321. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  1322. .flags = 0, /* OK for unprivileged users. */
  1323. .doit = ovs_flow_cmd_get,
  1324. .dumpit = ovs_flow_cmd_dump
  1325. },
  1326. { .cmd = OVS_FLOW_CMD_SET,
  1327. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  1328. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1329. .doit = ovs_flow_cmd_set,
  1330. },
  1331. };
  1332. static struct genl_family dp_flow_genl_family __ro_after_init = {
  1333. .hdrsize = sizeof(struct ovs_header),
  1334. .name = OVS_FLOW_FAMILY,
  1335. .version = OVS_FLOW_VERSION,
  1336. .maxattr = OVS_FLOW_ATTR_MAX,
  1337. .policy = flow_policy,
  1338. .netnsok = true,
  1339. .parallel_ops = true,
  1340. .small_ops = dp_flow_genl_ops,
  1341. .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
  1342. .resv_start_op = OVS_FLOW_CMD_SET + 1,
  1343. .mcgrps = &ovs_dp_flow_multicast_group,
  1344. .n_mcgrps = 1,
  1345. .module = THIS_MODULE,
  1346. };
  1347. static size_t ovs_dp_cmd_msg_size(void)
  1348. {
  1349. size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
  1350. msgsize += nla_total_size(IFNAMSIZ);
  1351. msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
  1352. msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
  1353. msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
  1354. msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
  1355. msgsize += nla_total_size(sizeof(u32) * nr_cpu_ids); /* OVS_DP_ATTR_PER_CPU_PIDS */
  1356. return msgsize;
  1357. }
  1358. /* Called with ovs_mutex. */
  1359. static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
  1360. u32 portid, u32 seq, u32 flags, u8 cmd)
  1361. {
  1362. struct ovs_header *ovs_header;
  1363. struct ovs_dp_stats dp_stats;
  1364. struct ovs_dp_megaflow_stats dp_megaflow_stats;
  1365. struct dp_nlsk_pids *pids = ovsl_dereference(dp->upcall_portids);
  1366. int err, pids_len;
  1367. ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
  1368. flags, cmd);
  1369. if (!ovs_header)
  1370. goto error;
  1371. ovs_header->dp_ifindex = get_dpifindex(dp);
  1372. err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
  1373. if (err)
  1374. goto nla_put_failure;
  1375. get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
  1376. if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
  1377. &dp_stats, OVS_DP_ATTR_PAD))
  1378. goto nla_put_failure;
  1379. if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
  1380. sizeof(struct ovs_dp_megaflow_stats),
  1381. &dp_megaflow_stats, OVS_DP_ATTR_PAD))
  1382. goto nla_put_failure;
  1383. if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
  1384. goto nla_put_failure;
  1385. if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
  1386. ovs_flow_tbl_masks_cache_size(&dp->table)))
  1387. goto nla_put_failure;
  1388. if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {
  1389. pids_len = min(pids->n_pids, nr_cpu_ids) * sizeof(u32);
  1390. if (nla_put(skb, OVS_DP_ATTR_PER_CPU_PIDS, pids_len, &pids->pids))
  1391. goto nla_put_failure;
  1392. }
  1393. genlmsg_end(skb, ovs_header);
  1394. return 0;
  1395. nla_put_failure:
  1396. genlmsg_cancel(skb, ovs_header);
  1397. error:
  1398. return -EMSGSIZE;
  1399. }
  1400. static struct sk_buff *ovs_dp_cmd_alloc_info(void)
  1401. {
  1402. return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
  1403. }
  1404. /* Called with rcu_read_lock or ovs_mutex. */
  1405. static struct datapath *lookup_datapath(struct net *net,
  1406. const struct ovs_header *ovs_header,
  1407. struct nlattr *a[OVS_DP_ATTR_MAX + 1])
  1408. {
  1409. struct datapath *dp;
  1410. if (!a[OVS_DP_ATTR_NAME])
  1411. dp = get_dp(net, ovs_header->dp_ifindex);
  1412. else {
  1413. struct vport *vport;
  1414. vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
  1415. dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
  1416. }
  1417. return dp ? dp : ERR_PTR(-ENODEV);
  1418. }
  1419. static void ovs_dp_reset_user_features(struct sk_buff *skb,
  1420. struct genl_info *info)
  1421. {
  1422. struct datapath *dp;
  1423. dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
  1424. info->attrs);
  1425. if (IS_ERR(dp))
  1426. return;
  1427. pr_warn("%s: Dropping previously announced user features\n",
  1428. ovs_dp_name(dp));
  1429. dp->user_features = 0;
  1430. }
  1431. static int ovs_dp_set_upcall_portids(struct datapath *dp,
  1432. const struct nlattr *ids)
  1433. {
  1434. struct dp_nlsk_pids *old, *dp_nlsk_pids;
  1435. if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
  1436. return -EINVAL;
  1437. old = ovsl_dereference(dp->upcall_portids);
  1438. dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
  1439. GFP_KERNEL);
  1440. if (!dp_nlsk_pids)
  1441. return -ENOMEM;
  1442. dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
  1443. nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
  1444. rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
  1445. kfree_rcu(old, rcu);
  1446. return 0;
  1447. }
  1448. u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
  1449. {
  1450. struct dp_nlsk_pids *dp_nlsk_pids;
  1451. dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
  1452. if (dp_nlsk_pids) {
  1453. if (cpu_id < dp_nlsk_pids->n_pids) {
  1454. return dp_nlsk_pids->pids[cpu_id];
  1455. } else if (dp_nlsk_pids->n_pids > 0 &&
  1456. cpu_id >= dp_nlsk_pids->n_pids) {
  1457. /* If the number of netlink PIDs is mismatched with
  1458. * the number of CPUs as seen by the kernel, log this
  1459. * and send the upcall to an arbitrary socket (0) in
  1460. * order to not drop packets
  1461. */
  1462. pr_info_ratelimited("cpu_id mismatch with handler threads");
  1463. return dp_nlsk_pids->pids[cpu_id %
  1464. dp_nlsk_pids->n_pids];
  1465. } else {
  1466. return 0;
  1467. }
  1468. } else {
  1469. return 0;
  1470. }
  1471. }
  1472. static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
  1473. {
  1474. u32 user_features = 0, old_features = dp->user_features;
  1475. int err;
  1476. if (a[OVS_DP_ATTR_USER_FEATURES]) {
  1477. user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
  1478. if (user_features & ~(OVS_DP_F_VPORT_PIDS |
  1479. OVS_DP_F_UNALIGNED |
  1480. OVS_DP_F_TC_RECIRC_SHARING |
  1481. OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
  1482. return -EOPNOTSUPP;
  1483. #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
  1484. if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
  1485. return -EOPNOTSUPP;
  1486. #endif
  1487. }
  1488. if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
  1489. int err;
  1490. u32 cache_size;
  1491. cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
  1492. err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
  1493. if (err)
  1494. return err;
  1495. }
  1496. dp->user_features = user_features;
  1497. if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
  1498. a[OVS_DP_ATTR_PER_CPU_PIDS]) {
  1499. /* Upcall Netlink Port IDs have been updated */
  1500. err = ovs_dp_set_upcall_portids(dp,
  1501. a[OVS_DP_ATTR_PER_CPU_PIDS]);
  1502. if (err)
  1503. return err;
  1504. }
  1505. if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
  1506. !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
  1507. tc_skb_ext_tc_enable();
  1508. else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
  1509. (old_features & OVS_DP_F_TC_RECIRC_SHARING))
  1510. tc_skb_ext_tc_disable();
  1511. return 0;
  1512. }
  1513. static int ovs_dp_stats_init(struct datapath *dp)
  1514. {
  1515. dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
  1516. if (!dp->stats_percpu)
  1517. return -ENOMEM;
  1518. return 0;
  1519. }
  1520. static int ovs_dp_vport_init(struct datapath *dp)
  1521. {
  1522. int i;
  1523. dp->ports = kmalloc_objs(struct hlist_head, DP_VPORT_HASH_BUCKETS);
  1524. if (!dp->ports)
  1525. return -ENOMEM;
  1526. for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
  1527. INIT_HLIST_HEAD(&dp->ports[i]);
  1528. return 0;
  1529. }
  1530. static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
  1531. {
  1532. struct nlattr **a = info->attrs;
  1533. struct vport_parms parms;
  1534. struct sk_buff *reply;
  1535. struct datapath *dp;
  1536. struct vport *vport;
  1537. struct ovs_net *ovs_net;
  1538. int err;
  1539. err = -EINVAL;
  1540. if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
  1541. goto err;
  1542. reply = ovs_dp_cmd_alloc_info();
  1543. if (!reply)
  1544. return -ENOMEM;
  1545. err = -ENOMEM;
  1546. dp = kzalloc_obj(*dp);
  1547. if (dp == NULL)
  1548. goto err_destroy_reply;
  1549. ovs_dp_set_net(dp, sock_net(skb->sk));
  1550. /* Allocate table. */
  1551. err = ovs_flow_tbl_init(&dp->table);
  1552. if (err)
  1553. goto err_destroy_dp;
  1554. err = ovs_dp_stats_init(dp);
  1555. if (err)
  1556. goto err_destroy_table;
  1557. err = ovs_dp_vport_init(dp);
  1558. if (err)
  1559. goto err_destroy_stats;
  1560. err = ovs_meters_init(dp);
  1561. if (err)
  1562. goto err_destroy_ports;
  1563. /* Set up our datapath device. */
  1564. parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
  1565. parms.type = OVS_VPORT_TYPE_INTERNAL;
  1566. parms.options = NULL;
  1567. parms.dp = dp;
  1568. parms.port_no = OVSP_LOCAL;
  1569. parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
  1570. parms.desired_ifindex = nla_get_s32_default(a[OVS_DP_ATTR_IFINDEX], 0);
  1571. /* So far only local changes have been made, now need the lock. */
  1572. ovs_lock();
  1573. err = ovs_dp_change(dp, a);
  1574. if (err)
  1575. goto err_unlock_and_destroy_meters;
  1576. vport = new_vport(&parms);
  1577. if (IS_ERR(vport)) {
  1578. err = PTR_ERR(vport);
  1579. if (err == -EBUSY)
  1580. err = -EEXIST;
  1581. if (err == -EEXIST) {
  1582. /* An outdated user space instance that does not understand
  1583. * the concept of user_features has attempted to create a new
  1584. * datapath and is likely to reuse it. Drop all user features.
  1585. */
  1586. if (info->genlhdr->version < OVS_DP_VER_FEATURES)
  1587. ovs_dp_reset_user_features(skb, info);
  1588. }
  1589. goto err_destroy_portids;
  1590. }
  1591. err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
  1592. info->snd_seq, 0, OVS_DP_CMD_NEW);
  1593. BUG_ON(err < 0);
  1594. ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
  1595. list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
  1596. ovs_unlock();
  1597. ovs_notify(&dp_datapath_genl_family, reply, info);
  1598. return 0;
  1599. err_destroy_portids:
  1600. kfree(rcu_dereference_raw(dp->upcall_portids));
  1601. err_unlock_and_destroy_meters:
  1602. ovs_unlock();
  1603. ovs_meters_exit(dp);
  1604. err_destroy_ports:
  1605. kfree(dp->ports);
  1606. err_destroy_stats:
  1607. free_percpu(dp->stats_percpu);
  1608. err_destroy_table:
  1609. ovs_flow_tbl_destroy(&dp->table);
  1610. err_destroy_dp:
  1611. kfree(dp);
  1612. err_destroy_reply:
  1613. kfree_skb(reply);
  1614. err:
  1615. return err;
  1616. }
  1617. /* Called with ovs_mutex. */
  1618. static void __dp_destroy(struct datapath *dp)
  1619. {
  1620. struct flow_table *table = &dp->table;
  1621. int i;
  1622. if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
  1623. tc_skb_ext_tc_disable();
  1624. for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
  1625. struct vport *vport;
  1626. struct hlist_node *n;
  1627. hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
  1628. if (vport->port_no != OVSP_LOCAL)
  1629. ovs_dp_detach_port(vport);
  1630. }
  1631. list_del_rcu(&dp->list_node);
  1632. /* OVSP_LOCAL is datapath internal port. We need to make sure that
  1633. * all ports in datapath are destroyed first before freeing datapath.
  1634. */
  1635. ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
  1636. /* Flush sw_flow in the tables. RCU cb only releases resource
  1637. * such as dp, ports and tables. That may avoid some issues
  1638. * such as RCU usage warning.
  1639. */
  1640. table_instance_flow_flush(table, ovsl_dereference(table->ti),
  1641. ovsl_dereference(table->ufid_ti));
  1642. /* RCU destroy the ports, meters and flow tables. */
  1643. call_rcu(&dp->rcu, destroy_dp_rcu);
  1644. }
  1645. static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
  1646. {
  1647. struct sk_buff *reply;
  1648. struct datapath *dp;
  1649. int err;
  1650. reply = ovs_dp_cmd_alloc_info();
  1651. if (!reply)
  1652. return -ENOMEM;
  1653. ovs_lock();
  1654. dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
  1655. info->attrs);
  1656. err = PTR_ERR(dp);
  1657. if (IS_ERR(dp))
  1658. goto err_unlock_free;
  1659. err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
  1660. info->snd_seq, 0, OVS_DP_CMD_DEL);
  1661. BUG_ON(err < 0);
  1662. __dp_destroy(dp);
  1663. ovs_unlock();
  1664. ovs_notify(&dp_datapath_genl_family, reply, info);
  1665. return 0;
  1666. err_unlock_free:
  1667. ovs_unlock();
  1668. kfree_skb(reply);
  1669. return err;
  1670. }
  1671. static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
  1672. {
  1673. struct sk_buff *reply;
  1674. struct datapath *dp;
  1675. int err;
  1676. reply = ovs_dp_cmd_alloc_info();
  1677. if (!reply)
  1678. return -ENOMEM;
  1679. ovs_lock();
  1680. dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
  1681. info->attrs);
  1682. err = PTR_ERR(dp);
  1683. if (IS_ERR(dp))
  1684. goto err_unlock_free;
  1685. err = ovs_dp_change(dp, info->attrs);
  1686. if (err)
  1687. goto err_unlock_free;
  1688. err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
  1689. info->snd_seq, 0, OVS_DP_CMD_SET);
  1690. BUG_ON(err < 0);
  1691. ovs_unlock();
  1692. ovs_notify(&dp_datapath_genl_family, reply, info);
  1693. return 0;
  1694. err_unlock_free:
  1695. ovs_unlock();
  1696. kfree_skb(reply);
  1697. return err;
  1698. }
  1699. static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
  1700. {
  1701. struct sk_buff *reply;
  1702. struct datapath *dp;
  1703. int err;
  1704. reply = ovs_dp_cmd_alloc_info();
  1705. if (!reply)
  1706. return -ENOMEM;
  1707. ovs_lock();
  1708. dp = lookup_datapath(sock_net(skb->sk), genl_info_userhdr(info),
  1709. info->attrs);
  1710. if (IS_ERR(dp)) {
  1711. err = PTR_ERR(dp);
  1712. goto err_unlock_free;
  1713. }
  1714. err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
  1715. info->snd_seq, 0, OVS_DP_CMD_GET);
  1716. BUG_ON(err < 0);
  1717. ovs_unlock();
  1718. return genlmsg_reply(reply, info);
  1719. err_unlock_free:
  1720. ovs_unlock();
  1721. kfree_skb(reply);
  1722. return err;
  1723. }
  1724. static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
  1725. {
  1726. struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
  1727. struct datapath *dp;
  1728. int skip = cb->args[0];
  1729. int i = 0;
  1730. ovs_lock();
  1731. list_for_each_entry(dp, &ovs_net->dps, list_node) {
  1732. if (i >= skip &&
  1733. ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
  1734. cb->nlh->nlmsg_seq, NLM_F_MULTI,
  1735. OVS_DP_CMD_GET) < 0)
  1736. break;
  1737. i++;
  1738. }
  1739. ovs_unlock();
  1740. cb->args[0] = i;
  1741. return skb->len;
  1742. }
  1743. static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
  1744. [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
  1745. [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
  1746. [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
  1747. [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
  1748. PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
  1749. [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
  1750. };
  1751. static const struct genl_small_ops dp_datapath_genl_ops[] = {
  1752. { .cmd = OVS_DP_CMD_NEW,
  1753. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  1754. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1755. .doit = ovs_dp_cmd_new
  1756. },
  1757. { .cmd = OVS_DP_CMD_DEL,
  1758. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  1759. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1760. .doit = ovs_dp_cmd_del
  1761. },
  1762. { .cmd = OVS_DP_CMD_GET,
  1763. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  1764. .flags = 0, /* OK for unprivileged users. */
  1765. .doit = ovs_dp_cmd_get,
  1766. .dumpit = ovs_dp_cmd_dump
  1767. },
  1768. { .cmd = OVS_DP_CMD_SET,
  1769. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  1770. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  1771. .doit = ovs_dp_cmd_set,
  1772. },
  1773. };
  1774. static struct genl_family dp_datapath_genl_family __ro_after_init = {
  1775. .hdrsize = sizeof(struct ovs_header),
  1776. .name = OVS_DATAPATH_FAMILY,
  1777. .version = OVS_DATAPATH_VERSION,
  1778. .maxattr = OVS_DP_ATTR_MAX,
  1779. .policy = datapath_policy,
  1780. .netnsok = true,
  1781. .parallel_ops = true,
  1782. .small_ops = dp_datapath_genl_ops,
  1783. .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
  1784. .resv_start_op = OVS_DP_CMD_SET + 1,
  1785. .mcgrps = &ovs_dp_datapath_multicast_group,
  1786. .n_mcgrps = 1,
  1787. .module = THIS_MODULE,
  1788. };
  1789. /* Called with ovs_mutex or RCU read lock. */
  1790. static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
  1791. struct net *net, u32 portid, u32 seq,
  1792. u32 flags, u8 cmd, gfp_t gfp)
  1793. {
  1794. struct ovs_header *ovs_header;
  1795. struct ovs_vport_stats vport_stats;
  1796. struct net *net_vport;
  1797. int err;
  1798. ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
  1799. flags, cmd);
  1800. if (!ovs_header)
  1801. return -EMSGSIZE;
  1802. ovs_header->dp_ifindex = get_dpifindex(vport->dp);
  1803. if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
  1804. nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
  1805. nla_put_string(skb, OVS_VPORT_ATTR_NAME,
  1806. ovs_vport_name(vport)) ||
  1807. nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
  1808. goto nla_put_failure;
  1809. rcu_read_lock();
  1810. net_vport = dev_net_rcu(vport->dev);
  1811. if (!net_eq(net, net_vport)) {
  1812. int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC);
  1813. if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
  1814. goto nla_put_failure_unlock;
  1815. }
  1816. rcu_read_unlock();
  1817. ovs_vport_get_stats(vport, &vport_stats);
  1818. if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
  1819. sizeof(struct ovs_vport_stats), &vport_stats,
  1820. OVS_VPORT_ATTR_PAD))
  1821. goto nla_put_failure;
  1822. if (ovs_vport_get_upcall_stats(vport, skb))
  1823. goto nla_put_failure;
  1824. if (ovs_vport_get_upcall_portids(vport, skb))
  1825. goto nla_put_failure;
  1826. err = ovs_vport_get_options(vport, skb);
  1827. if (err == -EMSGSIZE)
  1828. goto error;
  1829. genlmsg_end(skb, ovs_header);
  1830. return 0;
  1831. nla_put_failure_unlock:
  1832. rcu_read_unlock();
  1833. nla_put_failure:
  1834. err = -EMSGSIZE;
  1835. error:
  1836. genlmsg_cancel(skb, ovs_header);
  1837. return err;
  1838. }
  1839. static struct sk_buff *ovs_vport_cmd_alloc_info(void)
  1840. {
  1841. return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
  1842. }
  1843. /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
  1844. struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
  1845. u32 portid, u32 seq, u8 cmd)
  1846. {
  1847. struct sk_buff *skb;
  1848. int retval;
  1849. skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
  1850. if (!skb)
  1851. return ERR_PTR(-ENOMEM);
  1852. retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
  1853. GFP_KERNEL);
  1854. BUG_ON(retval < 0);
  1855. return skb;
  1856. }
  1857. /* Called with ovs_mutex or RCU read lock. */
  1858. static struct vport *lookup_vport(struct net *net,
  1859. const struct ovs_header *ovs_header,
  1860. struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
  1861. {
  1862. struct datapath *dp;
  1863. struct vport *vport;
  1864. if (a[OVS_VPORT_ATTR_IFINDEX])
  1865. return ERR_PTR(-EOPNOTSUPP);
  1866. if (a[OVS_VPORT_ATTR_NAME]) {
  1867. vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
  1868. if (!vport)
  1869. return ERR_PTR(-ENODEV);
  1870. if (ovs_header->dp_ifindex &&
  1871. ovs_header->dp_ifindex != get_dpifindex(vport->dp))
  1872. return ERR_PTR(-ENODEV);
  1873. return vport;
  1874. } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
  1875. u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
  1876. if (port_no >= DP_MAX_PORTS)
  1877. return ERR_PTR(-EFBIG);
  1878. dp = get_dp(net, ovs_header->dp_ifindex);
  1879. if (!dp)
  1880. return ERR_PTR(-ENODEV);
  1881. vport = ovs_vport_ovsl_rcu(dp, port_no);
  1882. if (!vport)
  1883. return ERR_PTR(-ENODEV);
  1884. return vport;
  1885. } else
  1886. return ERR_PTR(-EINVAL);
  1887. }
  1888. static unsigned int ovs_get_max_headroom(struct datapath *dp)
  1889. {
  1890. unsigned int dev_headroom, max_headroom = 0;
  1891. struct net_device *dev;
  1892. struct vport *vport;
  1893. int i;
  1894. for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
  1895. hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
  1896. lockdep_ovsl_is_held()) {
  1897. dev = vport->dev;
  1898. dev_headroom = netdev_get_fwd_headroom(dev);
  1899. if (dev_headroom > max_headroom)
  1900. max_headroom = dev_headroom;
  1901. }
  1902. }
  1903. return max_headroom;
  1904. }
  1905. /* Called with ovs_mutex */
  1906. static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
  1907. {
  1908. struct vport *vport;
  1909. int i;
  1910. dp->max_headroom = new_headroom;
  1911. for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
  1912. hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
  1913. lockdep_ovsl_is_held())
  1914. netdev_set_rx_headroom(vport->dev, new_headroom);
  1915. }
  1916. }
  1917. static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
  1918. {
  1919. struct nlattr **a = info->attrs;
  1920. struct ovs_header *ovs_header = genl_info_userhdr(info);
  1921. struct vport_parms parms;
  1922. struct sk_buff *reply;
  1923. struct vport *vport;
  1924. struct datapath *dp;
  1925. unsigned int new_headroom;
  1926. u32 port_no;
  1927. int err;
  1928. if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
  1929. !a[OVS_VPORT_ATTR_UPCALL_PID])
  1930. return -EINVAL;
  1931. parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
  1932. if (a[OVS_VPORT_ATTR_IFINDEX] && parms.type != OVS_VPORT_TYPE_INTERNAL)
  1933. return -EOPNOTSUPP;
  1934. port_no = nla_get_u32_default(a[OVS_VPORT_ATTR_PORT_NO], 0);
  1935. if (port_no >= DP_MAX_PORTS)
  1936. return -EFBIG;
  1937. reply = ovs_vport_cmd_alloc_info();
  1938. if (!reply)
  1939. return -ENOMEM;
  1940. ovs_lock();
  1941. restart:
  1942. dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
  1943. err = -ENODEV;
  1944. if (!dp)
  1945. goto exit_unlock_free;
  1946. if (port_no) {
  1947. vport = ovs_vport_ovsl(dp, port_no);
  1948. err = -EBUSY;
  1949. if (vport)
  1950. goto exit_unlock_free;
  1951. } else {
  1952. for (port_no = 1; ; port_no++) {
  1953. if (port_no >= DP_MAX_PORTS) {
  1954. err = -EFBIG;
  1955. goto exit_unlock_free;
  1956. }
  1957. vport = ovs_vport_ovsl(dp, port_no);
  1958. if (!vport)
  1959. break;
  1960. }
  1961. }
  1962. parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
  1963. parms.options = a[OVS_VPORT_ATTR_OPTIONS];
  1964. parms.dp = dp;
  1965. parms.port_no = port_no;
  1966. parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
  1967. parms.desired_ifindex = nla_get_s32_default(a[OVS_VPORT_ATTR_IFINDEX],
  1968. 0);
  1969. vport = new_vport(&parms);
  1970. err = PTR_ERR(vport);
  1971. if (IS_ERR(vport)) {
  1972. if (err == -EAGAIN)
  1973. goto restart;
  1974. goto exit_unlock_free;
  1975. }
  1976. err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
  1977. info->snd_portid, info->snd_seq, 0,
  1978. OVS_VPORT_CMD_NEW, GFP_KERNEL);
  1979. new_headroom = netdev_get_fwd_headroom(vport->dev);
  1980. if (new_headroom > dp->max_headroom)
  1981. ovs_update_headroom(dp, new_headroom);
  1982. else
  1983. netdev_set_rx_headroom(vport->dev, dp->max_headroom);
  1984. BUG_ON(err < 0);
  1985. ovs_unlock();
  1986. ovs_notify(&dp_vport_genl_family, reply, info);
  1987. return 0;
  1988. exit_unlock_free:
  1989. ovs_unlock();
  1990. kfree_skb(reply);
  1991. return err;
  1992. }
  1993. static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
  1994. {
  1995. struct nlattr **a = info->attrs;
  1996. struct sk_buff *reply;
  1997. struct vport *vport;
  1998. int err;
  1999. reply = ovs_vport_cmd_alloc_info();
  2000. if (!reply)
  2001. return -ENOMEM;
  2002. ovs_lock();
  2003. vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
  2004. err = PTR_ERR(vport);
  2005. if (IS_ERR(vport))
  2006. goto exit_unlock_free;
  2007. if (a[OVS_VPORT_ATTR_TYPE] &&
  2008. nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
  2009. err = -EINVAL;
  2010. goto exit_unlock_free;
  2011. }
  2012. if (a[OVS_VPORT_ATTR_OPTIONS]) {
  2013. err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
  2014. if (err)
  2015. goto exit_unlock_free;
  2016. }
  2017. if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
  2018. struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
  2019. err = ovs_vport_set_upcall_portids(vport, ids);
  2020. if (err)
  2021. goto exit_unlock_free;
  2022. }
  2023. err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
  2024. info->snd_portid, info->snd_seq, 0,
  2025. OVS_VPORT_CMD_SET, GFP_KERNEL);
  2026. BUG_ON(err < 0);
  2027. ovs_unlock();
  2028. ovs_notify(&dp_vport_genl_family, reply, info);
  2029. return 0;
  2030. exit_unlock_free:
  2031. ovs_unlock();
  2032. kfree_skb(reply);
  2033. return err;
  2034. }
  2035. static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
  2036. {
  2037. bool update_headroom = false;
  2038. struct nlattr **a = info->attrs;
  2039. struct sk_buff *reply;
  2040. struct datapath *dp;
  2041. struct vport *vport;
  2042. unsigned int new_headroom;
  2043. int err;
  2044. reply = ovs_vport_cmd_alloc_info();
  2045. if (!reply)
  2046. return -ENOMEM;
  2047. ovs_lock();
  2048. vport = lookup_vport(sock_net(skb->sk), genl_info_userhdr(info), a);
  2049. err = PTR_ERR(vport);
  2050. if (IS_ERR(vport))
  2051. goto exit_unlock_free;
  2052. if (vport->port_no == OVSP_LOCAL) {
  2053. err = -EINVAL;
  2054. goto exit_unlock_free;
  2055. }
  2056. err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
  2057. info->snd_portid, info->snd_seq, 0,
  2058. OVS_VPORT_CMD_DEL, GFP_KERNEL);
  2059. BUG_ON(err < 0);
  2060. /* the vport deletion may trigger dp headroom update */
  2061. dp = vport->dp;
  2062. if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
  2063. update_headroom = true;
  2064. netdev_reset_rx_headroom(vport->dev);
  2065. ovs_dp_detach_port(vport);
  2066. if (update_headroom) {
  2067. new_headroom = ovs_get_max_headroom(dp);
  2068. if (new_headroom < dp->max_headroom)
  2069. ovs_update_headroom(dp, new_headroom);
  2070. }
  2071. ovs_unlock();
  2072. ovs_notify(&dp_vport_genl_family, reply, info);
  2073. return 0;
  2074. exit_unlock_free:
  2075. ovs_unlock();
  2076. kfree_skb(reply);
  2077. return err;
  2078. }
  2079. static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
  2080. {
  2081. struct nlattr **a = info->attrs;
  2082. struct ovs_header *ovs_header = genl_info_userhdr(info);
  2083. struct sk_buff *reply;
  2084. struct vport *vport;
  2085. int err;
  2086. reply = ovs_vport_cmd_alloc_info();
  2087. if (!reply)
  2088. return -ENOMEM;
  2089. rcu_read_lock();
  2090. vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
  2091. err = PTR_ERR(vport);
  2092. if (IS_ERR(vport))
  2093. goto exit_unlock_free;
  2094. err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
  2095. info->snd_portid, info->snd_seq, 0,
  2096. OVS_VPORT_CMD_GET, GFP_ATOMIC);
  2097. BUG_ON(err < 0);
  2098. rcu_read_unlock();
  2099. return genlmsg_reply(reply, info);
  2100. exit_unlock_free:
  2101. rcu_read_unlock();
  2102. kfree_skb(reply);
  2103. return err;
  2104. }
  2105. static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
  2106. {
  2107. struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
  2108. struct datapath *dp;
  2109. int bucket = cb->args[0], skip = cb->args[1];
  2110. int i, j = 0;
  2111. rcu_read_lock();
  2112. dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
  2113. if (!dp) {
  2114. rcu_read_unlock();
  2115. return -ENODEV;
  2116. }
  2117. for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
  2118. struct vport *vport;
  2119. j = 0;
  2120. hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
  2121. if (j >= skip &&
  2122. ovs_vport_cmd_fill_info(vport, skb,
  2123. sock_net(skb->sk),
  2124. NETLINK_CB(cb->skb).portid,
  2125. cb->nlh->nlmsg_seq,
  2126. NLM_F_MULTI,
  2127. OVS_VPORT_CMD_GET,
  2128. GFP_ATOMIC) < 0)
  2129. goto out;
  2130. j++;
  2131. }
  2132. skip = 0;
  2133. }
  2134. out:
  2135. rcu_read_unlock();
  2136. cb->args[0] = i;
  2137. cb->args[1] = j;
  2138. return skb->len;
  2139. }
  2140. static void ovs_dp_masks_rebalance(struct work_struct *work)
  2141. {
  2142. struct ovs_net *ovs_net = container_of(work, struct ovs_net,
  2143. masks_rebalance.work);
  2144. struct datapath *dp;
  2145. ovs_lock();
  2146. list_for_each_entry(dp, &ovs_net->dps, list_node)
  2147. ovs_flow_masks_rebalance(&dp->table);
  2148. ovs_unlock();
  2149. schedule_delayed_work(&ovs_net->masks_rebalance,
  2150. msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
  2151. }
  2152. static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
  2153. [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
  2154. [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
  2155. [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
  2156. [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
  2157. [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
  2158. [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
  2159. [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
  2160. [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
  2161. [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
  2162. };
  2163. static const struct genl_small_ops dp_vport_genl_ops[] = {
  2164. { .cmd = OVS_VPORT_CMD_NEW,
  2165. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  2166. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  2167. .doit = ovs_vport_cmd_new
  2168. },
  2169. { .cmd = OVS_VPORT_CMD_DEL,
  2170. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  2171. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  2172. .doit = ovs_vport_cmd_del
  2173. },
  2174. { .cmd = OVS_VPORT_CMD_GET,
  2175. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  2176. .flags = 0, /* OK for unprivileged users. */
  2177. .doit = ovs_vport_cmd_get,
  2178. .dumpit = ovs_vport_cmd_dump
  2179. },
  2180. { .cmd = OVS_VPORT_CMD_SET,
  2181. .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  2182. .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
  2183. .doit = ovs_vport_cmd_set,
  2184. },
  2185. };
  2186. struct genl_family dp_vport_genl_family __ro_after_init = {
  2187. .hdrsize = sizeof(struct ovs_header),
  2188. .name = OVS_VPORT_FAMILY,
  2189. .version = OVS_VPORT_VERSION,
  2190. .maxattr = OVS_VPORT_ATTR_MAX,
  2191. .policy = vport_policy,
  2192. .netnsok = true,
  2193. .parallel_ops = true,
  2194. .small_ops = dp_vport_genl_ops,
  2195. .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
  2196. .resv_start_op = OVS_VPORT_CMD_SET + 1,
  2197. .mcgrps = &ovs_dp_vport_multicast_group,
  2198. .n_mcgrps = 1,
  2199. .module = THIS_MODULE,
  2200. };
  2201. static struct genl_family * const dp_genl_families[] = {
  2202. &dp_datapath_genl_family,
  2203. &dp_vport_genl_family,
  2204. &dp_flow_genl_family,
  2205. &dp_packet_genl_family,
  2206. &dp_meter_genl_family,
  2207. #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
  2208. &dp_ct_limit_genl_family,
  2209. #endif
  2210. };
  2211. static void dp_unregister_genl(int n_families)
  2212. {
  2213. int i;
  2214. for (i = 0; i < n_families; i++)
  2215. genl_unregister_family(dp_genl_families[i]);
  2216. }
  2217. static int __init dp_register_genl(void)
  2218. {
  2219. int err;
  2220. int i;
  2221. for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
  2222. err = genl_register_family(dp_genl_families[i]);
  2223. if (err)
  2224. goto error;
  2225. }
  2226. return 0;
  2227. error:
  2228. dp_unregister_genl(i);
  2229. return err;
  2230. }
  2231. static int __net_init ovs_init_net(struct net *net)
  2232. {
  2233. struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
  2234. int err;
  2235. INIT_LIST_HEAD(&ovs_net->dps);
  2236. INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
  2237. INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
  2238. err = ovs_ct_init(net);
  2239. if (err)
  2240. return err;
  2241. schedule_delayed_work(&ovs_net->masks_rebalance,
  2242. msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
  2243. return 0;
  2244. }
  2245. static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
  2246. struct list_head *head)
  2247. {
  2248. struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
  2249. struct datapath *dp;
  2250. list_for_each_entry(dp, &ovs_net->dps, list_node) {
  2251. int i;
  2252. for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
  2253. struct vport *vport;
  2254. hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
  2255. if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
  2256. continue;
  2257. if (dev_net(vport->dev) == dnet)
  2258. list_add(&vport->detach_list, head);
  2259. }
  2260. }
  2261. }
  2262. }
  2263. static void __net_exit ovs_exit_net(struct net *dnet)
  2264. {
  2265. struct datapath *dp, *dp_next;
  2266. struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
  2267. struct vport *vport, *vport_next;
  2268. struct net *net;
  2269. LIST_HEAD(head);
  2270. ovs_lock();
  2271. ovs_ct_exit(dnet);
  2272. list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
  2273. __dp_destroy(dp);
  2274. down_read(&net_rwsem);
  2275. for_each_net(net)
  2276. list_vports_from_net(net, dnet, &head);
  2277. up_read(&net_rwsem);
  2278. /* Detach all vports from given namespace. */
  2279. list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
  2280. list_del(&vport->detach_list);
  2281. ovs_dp_detach_port(vport);
  2282. }
  2283. ovs_unlock();
  2284. cancel_delayed_work_sync(&ovs_net->masks_rebalance);
  2285. cancel_work_sync(&ovs_net->dp_notify_work);
  2286. }
  2287. static struct pernet_operations ovs_net_ops = {
  2288. .init = ovs_init_net,
  2289. .exit = ovs_exit_net,
  2290. .id = &ovs_net_id,
  2291. .size = sizeof(struct ovs_net),
  2292. };
  2293. static const char * const ovs_drop_reasons[] = {
  2294. #define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x),
  2295. OVS_DROP_REASONS(S)
  2296. #undef S
  2297. };
  2298. static struct drop_reason_list drop_reason_list_ovs = {
  2299. .reasons = ovs_drop_reasons,
  2300. .n_reasons = ARRAY_SIZE(ovs_drop_reasons),
  2301. };
  2302. static int __init ovs_alloc_percpu_storage(void)
  2303. {
  2304. unsigned int cpu;
  2305. ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
  2306. if (!ovs_pcpu_storage)
  2307. return -ENOMEM;
  2308. for_each_possible_cpu(cpu) {
  2309. struct ovs_pcpu_storage *ovs_pcpu;
  2310. ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
  2311. local_lock_init(&ovs_pcpu->bh_lock);
  2312. }
  2313. return 0;
  2314. }
  2315. static void ovs_free_percpu_storage(void)
  2316. {
  2317. free_percpu(ovs_pcpu_storage);
  2318. }
  2319. static int __init dp_init(void)
  2320. {
  2321. int err;
  2322. BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
  2323. sizeof_field(struct sk_buff, cb));
  2324. pr_info("Open vSwitch switching datapath\n");
  2325. err = ovs_alloc_percpu_storage();
  2326. if (err)
  2327. goto error;
  2328. err = ovs_internal_dev_rtnl_link_register();
  2329. if (err)
  2330. goto error;
  2331. err = ovs_flow_init();
  2332. if (err)
  2333. goto error_unreg_rtnl_link;
  2334. err = ovs_vport_init();
  2335. if (err)
  2336. goto error_flow_exit;
  2337. err = register_pernet_device(&ovs_net_ops);
  2338. if (err)
  2339. goto error_vport_exit;
  2340. err = register_netdevice_notifier(&ovs_dp_device_notifier);
  2341. if (err)
  2342. goto error_netns_exit;
  2343. err = ovs_netdev_init();
  2344. if (err)
  2345. goto error_unreg_notifier;
  2346. err = dp_register_genl();
  2347. if (err < 0)
  2348. goto error_unreg_netdev;
  2349. drop_reasons_register_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH,
  2350. &drop_reason_list_ovs);
  2351. return 0;
  2352. error_unreg_netdev:
  2353. ovs_netdev_exit();
  2354. error_unreg_notifier:
  2355. unregister_netdevice_notifier(&ovs_dp_device_notifier);
  2356. error_netns_exit:
  2357. unregister_pernet_device(&ovs_net_ops);
  2358. error_vport_exit:
  2359. ovs_vport_exit();
  2360. error_flow_exit:
  2361. ovs_flow_exit();
  2362. error_unreg_rtnl_link:
  2363. ovs_internal_dev_rtnl_link_unregister();
  2364. error:
  2365. ovs_free_percpu_storage();
  2366. return err;
  2367. }
  2368. static void dp_cleanup(void)
  2369. {
  2370. dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
  2371. ovs_netdev_exit();
  2372. unregister_netdevice_notifier(&ovs_dp_device_notifier);
  2373. unregister_pernet_device(&ovs_net_ops);
  2374. drop_reasons_unregister_subsys(SKB_DROP_REASON_SUBSYS_OPENVSWITCH);
  2375. rcu_barrier();
  2376. ovs_vport_exit();
  2377. ovs_flow_exit();
  2378. ovs_internal_dev_rtnl_link_unregister();
  2379. ovs_free_percpu_storage();
  2380. }
  2381. module_init(dp_init);
  2382. module_exit(dp_cleanup);
  2383. MODULE_DESCRIPTION("Open vSwitch switching datapath");
  2384. MODULE_LICENSE("GPL");
  2385. MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
  2386. MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
  2387. MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
  2388. MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
  2389. MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
  2390. MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);