tun.c 87 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * TUN - Universal TUN/TAP device driver.
  4. * Copyright (C) 1999-2002 Maxim Krasnyansky <maxk@qualcomm.com>
  5. *
  6. * $Id: tun.c,v 1.15 2002/03/01 02:44:24 maxk Exp $
  7. */
  8. /*
  9. * Changes:
  10. *
  11. * Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
  12. * Add TUNSETLINK ioctl to set the link encapsulation
  13. *
  14. * Mark Smith <markzzzsmith@yahoo.com.au>
  15. * Use eth_random_addr() for tap MAC address.
  16. *
  17. * Harald Roelle <harald.roelle@ifi.lmu.de> 2004/04/20
  18. * Fixes in packet dropping, queue length setting and queue wakeup.
  19. * Increased default tx queue length.
  20. * Added ethtool API.
  21. * Minor cleanups
  22. *
  23. * Daniel Podlejski <underley@underley.eu.org>
  24. * Modifications for 2.3.99-pre5 kernel.
  25. */
  26. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  27. #define DRV_NAME "tun"
  28. #define DRV_VERSION "1.6"
  29. #define DRV_DESCRIPTION "Universal TUN/TAP device driver"
  30. #define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
  31. #include <linux/module.h>
  32. #include <linux/errno.h>
  33. #include <linux/kernel.h>
  34. #include <linux/sched/signal.h>
  35. #include <linux/major.h>
  36. #include <linux/slab.h>
  37. #include <linux/poll.h>
  38. #include <linux/fcntl.h>
  39. #include <linux/init.h>
  40. #include <linux/skbuff.h>
  41. #include <linux/netdevice.h>
  42. #include <linux/etherdevice.h>
  43. #include <linux/miscdevice.h>
  44. #include <linux/ethtool.h>
  45. #include <linux/rtnetlink.h>
  46. #include <linux/compat.h>
  47. #include <linux/if.h>
  48. #include <linux/if_arp.h>
  49. #include <linux/if_ether.h>
  50. #include <linux/if_tun.h>
  51. #include <linux/if_vlan.h>
  52. #include <linux/crc32.h>
  53. #include <linux/math.h>
  54. #include <linux/nsproxy.h>
  55. #include <linux/virtio_net.h>
  56. #include <linux/rcupdate.h>
  57. #include <net/net_namespace.h>
  58. #include <net/netns/generic.h>
  59. #include <net/rtnetlink.h>
  60. #include <net/sock.h>
  61. #include <net/xdp.h>
  62. #include <net/ip_tunnels.h>
  63. #include <linux/seq_file.h>
  64. #include <linux/uio.h>
  65. #include <linux/skb_array.h>
  66. #include <linux/bpf.h>
  67. #include <linux/bpf_trace.h>
  68. #include <linux/mutex.h>
  69. #include <linux/ieee802154.h>
  70. #include <uapi/linux/if_ltalk.h>
  71. #include <uapi/linux/if_fddi.h>
  72. #include <uapi/linux/if_hippi.h>
  73. #include <uapi/linux/if_fc.h>
  74. #include <net/ax25.h>
  75. #include <net/rose.h>
  76. #include <net/6lowpan.h>
  77. #include <net/rps.h>
  78. #include <linux/uaccess.h>
  79. #include <linux/proc_fs.h>
  80. #include "tun_vnet.h"
  81. static void tun_default_link_ksettings(struct net_device *dev,
  82. struct ethtool_link_ksettings *cmd);
  83. #define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
  84. /* TUN device flags */
  85. /* IFF_ATTACH_QUEUE is never stored in device flags,
  86. * overload it to mean fasync when stored there.
  87. */
  88. #define TUN_FASYNC IFF_ATTACH_QUEUE
  89. #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
  90. IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
  91. #define GOODCOPY_LEN 128
  92. #define FLT_EXACT_COUNT 8
  93. struct tap_filter {
  94. unsigned int count; /* Number of addrs. Zero means disabled */
  95. u32 mask[2]; /* Mask of the hashed addrs */
  96. unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
  97. };
  98. /* MAX_TAP_QUEUES 256 is chosen to allow rx/tx queues to be equal
  99. * to max number of VCPUs in guest. */
  100. #define MAX_TAP_QUEUES 256
  101. #define MAX_TAP_FLOWS 4096
  102. #define TUN_FLOW_EXPIRE (3 * HZ)
  103. /* A tun_file connects an open character device to a tuntap netdevice. It
  104. * also contains all socket related structures (except sock_fprog and tap_filter)
  105. * to serve as one transmit queue for tuntap device. The sock_fprog and
  106. * tap_filter were kept in tun_struct since they were used for filtering for the
  107. * netdevice not for a specific queue (at least I didn't see the requirement for
  108. * this).
  109. *
  110. * RCU usage:
  111. * The tun_file and tun_struct are loosely coupled, the pointer from one to the
  112. * other can only be read while rcu_read_lock or rtnl_lock is held.
  113. */
  114. struct tun_file {
  115. struct sock sk;
  116. struct socket socket;
  117. struct tun_struct __rcu *tun;
  118. struct fasync_struct *fasync;
  119. /* only used for fasnyc */
  120. unsigned int flags;
  121. union {
  122. u16 queue_index;
  123. unsigned int ifindex;
  124. };
  125. struct napi_struct napi;
  126. bool napi_enabled;
  127. bool napi_frags_enabled;
  128. struct mutex napi_mutex; /* Protects access to the above napi */
  129. struct list_head next;
  130. struct tun_struct *detached;
  131. struct ptr_ring tx_ring;
  132. struct xdp_rxq_info xdp_rxq;
  133. };
  134. struct tun_page {
  135. struct page *page;
  136. int count;
  137. };
  138. struct tun_flow_entry {
  139. struct hlist_node hash_link;
  140. struct rcu_head rcu;
  141. struct tun_struct *tun;
  142. u32 rxhash;
  143. u32 rps_rxhash;
  144. int queue_index;
  145. unsigned long updated ____cacheline_aligned_in_smp;
  146. };
  147. #define TUN_NUM_FLOW_ENTRIES 1024
  148. #define TUN_MASK_FLOW_ENTRIES (TUN_NUM_FLOW_ENTRIES - 1)
  149. struct tun_prog {
  150. struct rcu_head rcu;
  151. struct bpf_prog *prog;
  152. };
  153. /* Since the socket were moved to tun_file, to preserve the behavior of persist
  154. * device, socket filter, sndbuf and vnet header size were restore when the
  155. * file were attached to a persist device.
  156. */
  157. struct tun_struct {
  158. struct tun_file __rcu *tfiles[MAX_TAP_QUEUES];
  159. unsigned int numqueues;
  160. unsigned int flags;
  161. kuid_t owner;
  162. kgid_t group;
  163. struct net_device *dev;
  164. netdev_features_t set_features;
  165. #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
  166. NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4 | \
  167. NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM)
  168. int align;
  169. int vnet_hdr_sz;
  170. int sndbuf;
  171. struct tap_filter txflt;
  172. struct sock_fprog fprog;
  173. /* protected by rtnl lock */
  174. bool filter_attached;
  175. u32 msg_enable;
  176. spinlock_t lock;
  177. struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
  178. struct timer_list flow_gc_timer;
  179. unsigned long ageing_time;
  180. unsigned int numdisabled;
  181. struct list_head disabled;
  182. void *security;
  183. u32 flow_count;
  184. u32 rx_batched;
  185. atomic_long_t rx_frame_errors;
  186. struct bpf_prog __rcu *xdp_prog;
  187. struct tun_prog __rcu *steering_prog;
  188. struct tun_prog __rcu *filter_prog;
  189. struct ethtool_link_ksettings link_ksettings;
  190. /* init args */
  191. struct file *file;
  192. struct ifreq *ifr;
  193. };
  194. struct veth {
  195. __be16 h_vlan_proto;
  196. __be16 h_vlan_TCI;
  197. };
  198. static void tun_flow_init(struct tun_struct *tun);
  199. static void tun_flow_uninit(struct tun_struct *tun);
  200. static int tun_napi_receive(struct napi_struct *napi, int budget)
  201. {
  202. struct tun_file *tfile = container_of(napi, struct tun_file, napi);
  203. struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
  204. struct sk_buff_head process_queue;
  205. struct sk_buff *skb;
  206. int received = 0;
  207. __skb_queue_head_init(&process_queue);
  208. spin_lock(&queue->lock);
  209. skb_queue_splice_tail_init(queue, &process_queue);
  210. spin_unlock(&queue->lock);
  211. while (received < budget && (skb = __skb_dequeue(&process_queue))) {
  212. napi_gro_receive(napi, skb);
  213. ++received;
  214. }
  215. if (!skb_queue_empty(&process_queue)) {
  216. spin_lock(&queue->lock);
  217. skb_queue_splice(&process_queue, queue);
  218. spin_unlock(&queue->lock);
  219. }
  220. return received;
  221. }
  222. static int tun_napi_poll(struct napi_struct *napi, int budget)
  223. {
  224. unsigned int received;
  225. received = tun_napi_receive(napi, budget);
  226. if (received < budget)
  227. napi_complete_done(napi, received);
  228. return received;
  229. }
  230. static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
  231. bool napi_en, bool napi_frags)
  232. {
  233. tfile->napi_enabled = napi_en;
  234. tfile->napi_frags_enabled = napi_en && napi_frags;
  235. if (napi_en) {
  236. netif_napi_add_tx(tun->dev, &tfile->napi, tun_napi_poll);
  237. napi_enable(&tfile->napi);
  238. }
  239. }
  240. static void tun_napi_enable(struct tun_file *tfile)
  241. {
  242. if (tfile->napi_enabled)
  243. napi_enable(&tfile->napi);
  244. }
  245. static void tun_napi_disable(struct tun_file *tfile)
  246. {
  247. if (tfile->napi_enabled)
  248. napi_disable(&tfile->napi);
  249. }
  250. static void tun_napi_del(struct tun_file *tfile)
  251. {
  252. if (tfile->napi_enabled)
  253. netif_napi_del(&tfile->napi);
  254. }
  255. static bool tun_napi_frags_enabled(const struct tun_file *tfile)
  256. {
  257. return tfile->napi_frags_enabled;
  258. }
  259. static inline u32 tun_hashfn(u32 rxhash)
  260. {
  261. return rxhash & TUN_MASK_FLOW_ENTRIES;
  262. }
  263. static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
  264. {
  265. struct tun_flow_entry *e;
  266. hlist_for_each_entry_rcu(e, head, hash_link) {
  267. if (e->rxhash == rxhash)
  268. return e;
  269. }
  270. return NULL;
  271. }
  272. static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
  273. struct hlist_head *head,
  274. u32 rxhash, u16 queue_index)
  275. {
  276. struct tun_flow_entry *e = kmalloc_obj(*e, GFP_ATOMIC);
  277. if (e) {
  278. netif_info(tun, tx_queued, tun->dev,
  279. "create flow: hash %u index %u\n",
  280. rxhash, queue_index);
  281. e->updated = jiffies;
  282. e->rxhash = rxhash;
  283. e->rps_rxhash = 0;
  284. e->queue_index = queue_index;
  285. e->tun = tun;
  286. hlist_add_head_rcu(&e->hash_link, head);
  287. ++tun->flow_count;
  288. }
  289. return e;
  290. }
  291. static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
  292. {
  293. netif_info(tun, tx_queued, tun->dev, "delete flow: hash %u index %u\n",
  294. e->rxhash, e->queue_index);
  295. hlist_del_rcu(&e->hash_link);
  296. kfree_rcu(e, rcu);
  297. --tun->flow_count;
  298. }
  299. static void tun_flow_flush(struct tun_struct *tun)
  300. {
  301. int i;
  302. spin_lock_bh(&tun->lock);
  303. for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
  304. struct tun_flow_entry *e;
  305. struct hlist_node *n;
  306. hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
  307. tun_flow_delete(tun, e);
  308. }
  309. spin_unlock_bh(&tun->lock);
  310. }
  311. static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
  312. {
  313. int i;
  314. spin_lock_bh(&tun->lock);
  315. for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
  316. struct tun_flow_entry *e;
  317. struct hlist_node *n;
  318. hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
  319. if (e->queue_index == queue_index)
  320. tun_flow_delete(tun, e);
  321. }
  322. }
  323. spin_unlock_bh(&tun->lock);
  324. }
  325. static void tun_flow_cleanup(struct timer_list *t)
  326. {
  327. struct tun_struct *tun = timer_container_of(tun, t, flow_gc_timer);
  328. unsigned long delay = tun->ageing_time;
  329. unsigned long next_timer = jiffies + delay;
  330. unsigned long count = 0;
  331. int i;
  332. spin_lock(&tun->lock);
  333. for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
  334. struct tun_flow_entry *e;
  335. struct hlist_node *n;
  336. hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
  337. unsigned long this_timer;
  338. this_timer = e->updated + delay;
  339. if (time_before_eq(this_timer, jiffies)) {
  340. tun_flow_delete(tun, e);
  341. continue;
  342. }
  343. count++;
  344. if (time_before(this_timer, next_timer))
  345. next_timer = this_timer;
  346. }
  347. }
  348. if (count)
  349. mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
  350. spin_unlock(&tun->lock);
  351. }
  352. static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
  353. struct tun_file *tfile)
  354. {
  355. struct hlist_head *head;
  356. struct tun_flow_entry *e;
  357. unsigned long delay = tun->ageing_time;
  358. u16 queue_index = tfile->queue_index;
  359. head = &tun->flows[tun_hashfn(rxhash)];
  360. rcu_read_lock();
  361. e = tun_flow_find(head, rxhash);
  362. if (likely(e)) {
  363. /* TODO: keep queueing to old queue until it's empty? */
  364. if (READ_ONCE(e->queue_index) != queue_index)
  365. WRITE_ONCE(e->queue_index, queue_index);
  366. if (e->updated != jiffies)
  367. e->updated = jiffies;
  368. sock_rps_record_flow_hash(e->rps_rxhash);
  369. } else {
  370. spin_lock_bh(&tun->lock);
  371. if (!tun_flow_find(head, rxhash) &&
  372. tun->flow_count < MAX_TAP_FLOWS)
  373. tun_flow_create(tun, head, rxhash, queue_index);
  374. if (!timer_pending(&tun->flow_gc_timer))
  375. mod_timer(&tun->flow_gc_timer,
  376. round_jiffies_up(jiffies + delay));
  377. spin_unlock_bh(&tun->lock);
  378. }
  379. rcu_read_unlock();
  380. }
  381. /* Save the hash received in the stack receive path and update the
  382. * flow_hash table accordingly.
  383. */
  384. static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
  385. {
  386. if (unlikely(e->rps_rxhash != hash))
  387. e->rps_rxhash = hash;
  388. }
  389. /* We try to identify a flow through its rxhash. The reason that
  390. * we do not check rxq no. is because some cards(e.g 82599), chooses
  391. * the rxq based on the txq where the last packet of the flow comes. As
  392. * the userspace application move between processors, we may get a
  393. * different rxq no. here.
  394. */
  395. static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
  396. {
  397. struct tun_flow_entry *e;
  398. u32 txq, numqueues;
  399. numqueues = READ_ONCE(tun->numqueues);
  400. txq = __skb_get_hash_symmetric(skb);
  401. e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
  402. if (e) {
  403. tun_flow_save_rps_rxhash(e, txq);
  404. txq = e->queue_index;
  405. } else {
  406. txq = reciprocal_scale(txq, numqueues);
  407. }
  408. return txq;
  409. }
  410. static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
  411. {
  412. struct tun_prog *prog;
  413. u32 numqueues;
  414. u16 ret = 0;
  415. numqueues = READ_ONCE(tun->numqueues);
  416. if (!numqueues)
  417. return 0;
  418. prog = rcu_dereference(tun->steering_prog);
  419. if (prog)
  420. ret = bpf_prog_run_clear_cb(prog->prog, skb);
  421. return ret % numqueues;
  422. }
  423. static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
  424. struct net_device *sb_dev)
  425. {
  426. struct tun_struct *tun = netdev_priv(dev);
  427. u16 ret;
  428. rcu_read_lock();
  429. if (rcu_dereference(tun->steering_prog))
  430. ret = tun_ebpf_select_queue(tun, skb);
  431. else
  432. ret = tun_automq_select_queue(tun, skb);
  433. rcu_read_unlock();
  434. return ret;
  435. }
  436. static inline bool tun_not_capable(struct tun_struct *tun)
  437. {
  438. const struct cred *cred = current_cred();
  439. struct net *net = dev_net(tun->dev);
  440. return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
  441. (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
  442. !ns_capable(net->user_ns, CAP_NET_ADMIN);
  443. }
  444. static void tun_set_real_num_queues(struct tun_struct *tun)
  445. {
  446. netif_set_real_num_tx_queues(tun->dev, tun->numqueues);
  447. netif_set_real_num_rx_queues(tun->dev, tun->numqueues);
  448. }
  449. static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile)
  450. {
  451. tfile->detached = tun;
  452. list_add_tail(&tfile->next, &tun->disabled);
  453. ++tun->numdisabled;
  454. }
  455. static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
  456. {
  457. struct tun_struct *tun = tfile->detached;
  458. tfile->detached = NULL;
  459. list_del_init(&tfile->next);
  460. --tun->numdisabled;
  461. return tun;
  462. }
  463. void tun_ptr_free(void *ptr)
  464. {
  465. if (!ptr)
  466. return;
  467. if (tun_is_xdp_frame(ptr)) {
  468. struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
  469. xdp_return_frame(xdpf);
  470. } else {
  471. __skb_array_destroy_skb(ptr);
  472. }
  473. }
  474. EXPORT_SYMBOL_GPL(tun_ptr_free);
  475. static void tun_queue_purge(struct tun_file *tfile)
  476. {
  477. void *ptr;
  478. while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL)
  479. tun_ptr_free(ptr);
  480. skb_queue_purge(&tfile->sk.sk_write_queue);
  481. skb_queue_purge(&tfile->sk.sk_error_queue);
  482. }
  483. static void __tun_detach(struct tun_file *tfile, bool clean)
  484. {
  485. struct tun_file *ntfile;
  486. struct tun_struct *tun;
  487. tun = rtnl_dereference(tfile->tun);
  488. if (tun && clean) {
  489. if (!tfile->detached)
  490. tun_napi_disable(tfile);
  491. tun_napi_del(tfile);
  492. }
  493. if (tun && !tfile->detached) {
  494. u16 index = tfile->queue_index;
  495. BUG_ON(index >= tun->numqueues);
  496. rcu_assign_pointer(tun->tfiles[index],
  497. tun->tfiles[tun->numqueues - 1]);
  498. ntfile = rtnl_dereference(tun->tfiles[index]);
  499. ntfile->queue_index = index;
  500. ntfile->xdp_rxq.queue_index = index;
  501. rcu_assign_pointer(tun->tfiles[tun->numqueues - 1],
  502. NULL);
  503. --tun->numqueues;
  504. if (clean) {
  505. RCU_INIT_POINTER(tfile->tun, NULL);
  506. sock_put(&tfile->sk);
  507. } else {
  508. tun_disable_queue(tun, tfile);
  509. tun_napi_disable(tfile);
  510. }
  511. synchronize_net();
  512. tun_flow_delete_by_queue(tun, tun->numqueues + 1);
  513. /* Drop read queue */
  514. tun_queue_purge(tfile);
  515. tun_set_real_num_queues(tun);
  516. } else if (tfile->detached && clean) {
  517. tun = tun_enable_queue(tfile);
  518. sock_put(&tfile->sk);
  519. }
  520. if (clean) {
  521. if (tun && tun->numqueues == 0 && tun->numdisabled == 0) {
  522. netif_carrier_off(tun->dev);
  523. if (!(tun->flags & IFF_PERSIST) &&
  524. tun->dev->reg_state == NETREG_REGISTERED)
  525. unregister_netdevice(tun->dev);
  526. }
  527. if (tun)
  528. xdp_rxq_info_unreg(&tfile->xdp_rxq);
  529. ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
  530. }
  531. }
  532. static void tun_detach(struct tun_file *tfile, bool clean)
  533. {
  534. struct tun_struct *tun;
  535. struct net_device *dev;
  536. rtnl_lock();
  537. tun = rtnl_dereference(tfile->tun);
  538. dev = tun ? tun->dev : NULL;
  539. __tun_detach(tfile, clean);
  540. if (dev)
  541. netdev_state_change(dev);
  542. rtnl_unlock();
  543. if (clean)
  544. sock_put(&tfile->sk);
  545. }
  546. static void tun_detach_all(struct net_device *dev)
  547. {
  548. struct tun_struct *tun = netdev_priv(dev);
  549. struct tun_file *tfile, *tmp;
  550. int i, n = tun->numqueues;
  551. for (i = 0; i < n; i++) {
  552. tfile = rtnl_dereference(tun->tfiles[i]);
  553. BUG_ON(!tfile);
  554. tun_napi_disable(tfile);
  555. tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
  556. tfile->socket.sk->sk_data_ready(tfile->socket.sk);
  557. RCU_INIT_POINTER(tfile->tun, NULL);
  558. --tun->numqueues;
  559. }
  560. list_for_each_entry(tfile, &tun->disabled, next) {
  561. tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
  562. tfile->socket.sk->sk_data_ready(tfile->socket.sk);
  563. RCU_INIT_POINTER(tfile->tun, NULL);
  564. }
  565. BUG_ON(tun->numqueues != 0);
  566. synchronize_net();
  567. for (i = 0; i < n; i++) {
  568. tfile = rtnl_dereference(tun->tfiles[i]);
  569. tun_napi_del(tfile);
  570. /* Drop read queue */
  571. tun_queue_purge(tfile);
  572. xdp_rxq_info_unreg(&tfile->xdp_rxq);
  573. sock_put(&tfile->sk);
  574. }
  575. list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
  576. tun_napi_del(tfile);
  577. tun_enable_queue(tfile);
  578. tun_queue_purge(tfile);
  579. xdp_rxq_info_unreg(&tfile->xdp_rxq);
  580. sock_put(&tfile->sk);
  581. }
  582. BUG_ON(tun->numdisabled != 0);
  583. if (tun->flags & IFF_PERSIST)
  584. module_put(THIS_MODULE);
  585. }
  586. static int tun_attach(struct tun_struct *tun, struct file *file,
  587. bool skip_filter, bool napi, bool napi_frags,
  588. bool publish_tun)
  589. {
  590. struct tun_file *tfile = file->private_data;
  591. struct net_device *dev = tun->dev;
  592. int err;
  593. err = security_tun_dev_attach(tfile->socket.sk, tun->security);
  594. if (err < 0)
  595. goto out;
  596. err = -EINVAL;
  597. if (rtnl_dereference(tfile->tun) && !tfile->detached)
  598. goto out;
  599. err = -EBUSY;
  600. if (!(tun->flags & IFF_MULTI_QUEUE) && tun->numqueues == 1)
  601. goto out;
  602. err = -E2BIG;
  603. if (!tfile->detached &&
  604. tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES)
  605. goto out;
  606. err = 0;
  607. /* Re-attach the filter to persist device */
  608. if (!skip_filter && (tun->filter_attached == true)) {
  609. lock_sock(tfile->socket.sk);
  610. err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
  611. release_sock(tfile->socket.sk);
  612. if (!err)
  613. goto out;
  614. }
  615. if (!tfile->detached &&
  616. ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len,
  617. GFP_KERNEL, tun_ptr_free)) {
  618. err = -ENOMEM;
  619. goto out;
  620. }
  621. tfile->queue_index = tun->numqueues;
  622. tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
  623. if (tfile->detached) {
  624. /* Re-attach detached tfile, updating XDP queue_index */
  625. WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq));
  626. if (tfile->xdp_rxq.queue_index != tfile->queue_index)
  627. tfile->xdp_rxq.queue_index = tfile->queue_index;
  628. } else {
  629. /* Setup XDP RX-queue info, for new tfile getting attached */
  630. err = xdp_rxq_info_reg(&tfile->xdp_rxq,
  631. tun->dev, tfile->queue_index, 0);
  632. if (err < 0)
  633. goto out;
  634. err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
  635. MEM_TYPE_PAGE_SHARED, NULL);
  636. if (err < 0) {
  637. xdp_rxq_info_unreg(&tfile->xdp_rxq);
  638. goto out;
  639. }
  640. err = 0;
  641. }
  642. if (tfile->detached) {
  643. tun_enable_queue(tfile);
  644. tun_napi_enable(tfile);
  645. } else {
  646. sock_hold(&tfile->sk);
  647. tun_napi_init(tun, tfile, napi, napi_frags);
  648. }
  649. if (rtnl_dereference(tun->xdp_prog))
  650. sock_set_flag(&tfile->sk, SOCK_XDP);
  651. /* device is allowed to go away first, so no need to hold extra
  652. * refcnt.
  653. */
  654. /* Publish tfile->tun and tun->tfiles only after we've fully
  655. * initialized tfile; otherwise we risk using half-initialized
  656. * object.
  657. */
  658. if (publish_tun)
  659. rcu_assign_pointer(tfile->tun, tun);
  660. rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
  661. tun->numqueues++;
  662. tun_set_real_num_queues(tun);
  663. out:
  664. return err;
  665. }
  666. static struct tun_struct *tun_get(struct tun_file *tfile)
  667. {
  668. struct tun_struct *tun;
  669. rcu_read_lock();
  670. tun = rcu_dereference(tfile->tun);
  671. if (tun)
  672. dev_hold(tun->dev);
  673. rcu_read_unlock();
  674. return tun;
  675. }
  676. static void tun_put(struct tun_struct *tun)
  677. {
  678. dev_put(tun->dev);
  679. }
  680. /* TAP filtering */
  681. static void addr_hash_set(u32 *mask, const u8 *addr)
  682. {
  683. int n = ether_crc(ETH_ALEN, addr) >> 26;
  684. mask[n >> 5] |= (1 << (n & 31));
  685. }
  686. static unsigned int addr_hash_test(const u32 *mask, const u8 *addr)
  687. {
  688. int n = ether_crc(ETH_ALEN, addr) >> 26;
  689. return mask[n >> 5] & (1 << (n & 31));
  690. }
  691. static int update_filter(struct tap_filter *filter, void __user *arg)
  692. {
  693. struct { u8 u[ETH_ALEN]; } *addr;
  694. struct tun_filter uf;
  695. int err, alen, n, nexact;
  696. if (copy_from_user(&uf, arg, sizeof(uf)))
  697. return -EFAULT;
  698. if (!uf.count) {
  699. /* Disabled */
  700. filter->count = 0;
  701. return 0;
  702. }
  703. alen = ETH_ALEN * uf.count;
  704. addr = memdup_user(arg + sizeof(uf), alen);
  705. if (IS_ERR(addr))
  706. return PTR_ERR(addr);
  707. /* The filter is updated without holding any locks. Which is
  708. * perfectly safe. We disable it first and in the worst
  709. * case we'll accept a few undesired packets. */
  710. filter->count = 0;
  711. wmb();
  712. /* Use first set of addresses as an exact filter */
  713. for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++)
  714. memcpy(filter->addr[n], addr[n].u, ETH_ALEN);
  715. nexact = n;
  716. /* Remaining multicast addresses are hashed,
  717. * unicast will leave the filter disabled. */
  718. memset(filter->mask, 0, sizeof(filter->mask));
  719. for (; n < uf.count; n++) {
  720. if (!is_multicast_ether_addr(addr[n].u)) {
  721. err = 0; /* no filter */
  722. goto free_addr;
  723. }
  724. addr_hash_set(filter->mask, addr[n].u);
  725. }
  726. /* For ALLMULTI just set the mask to all ones.
  727. * This overrides the mask populated above. */
  728. if ((uf.flags & TUN_FLT_ALLMULTI))
  729. memset(filter->mask, ~0, sizeof(filter->mask));
  730. /* Now enable the filter */
  731. wmb();
  732. filter->count = nexact;
  733. /* Return the number of exact filters */
  734. err = nexact;
  735. free_addr:
  736. kfree(addr);
  737. return err;
  738. }
  739. /* Returns: 0 - drop, !=0 - accept */
  740. static int run_filter(struct tap_filter *filter, const struct sk_buff *skb)
  741. {
  742. /* Cannot use eth_hdr(skb) here because skb_mac_hdr() is incorrect
  743. * at this point. */
  744. struct ethhdr *eh = (struct ethhdr *) skb->data;
  745. int i;
  746. /* Exact match */
  747. for (i = 0; i < filter->count; i++)
  748. if (ether_addr_equal(eh->h_dest, filter->addr[i]))
  749. return 1;
  750. /* Inexact match (multicast only) */
  751. if (is_multicast_ether_addr(eh->h_dest))
  752. return addr_hash_test(filter->mask, eh->h_dest);
  753. return 0;
  754. }
  755. /*
  756. * Checks whether the packet is accepted or not.
  757. * Returns: 0 - drop, !=0 - accept
  758. */
  759. static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
  760. {
  761. if (!filter->count)
  762. return 1;
  763. return run_filter(filter, skb);
  764. }
  765. /* Network device part of the driver */
  766. static const struct ethtool_ops tun_ethtool_ops;
  767. static int tun_net_init(struct net_device *dev)
  768. {
  769. struct tun_struct *tun = netdev_priv(dev);
  770. struct ifreq *ifr = tun->ifr;
  771. int err;
  772. spin_lock_init(&tun->lock);
  773. err = security_tun_dev_alloc_security(&tun->security);
  774. if (err < 0)
  775. return err;
  776. tun_flow_init(tun);
  777. dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
  778. dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
  779. TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
  780. NETIF_F_HW_VLAN_STAG_TX;
  781. dev->hw_enc_features = dev->hw_features;
  782. dev->features = dev->hw_features;
  783. dev->vlan_features = dev->features &
  784. ~(NETIF_F_HW_VLAN_CTAG_TX |
  785. NETIF_F_HW_VLAN_STAG_TX);
  786. dev->lltx = true;
  787. tun->flags = (tun->flags & ~TUN_FEATURES) |
  788. (ifr->ifr_flags & TUN_FEATURES);
  789. INIT_LIST_HEAD(&tun->disabled);
  790. err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI,
  791. ifr->ifr_flags & IFF_NAPI_FRAGS, false);
  792. if (err < 0) {
  793. tun_flow_uninit(tun);
  794. security_tun_dev_free_security(tun->security);
  795. return err;
  796. }
  797. return 0;
  798. }
  799. /* Net device detach from fd. */
  800. static void tun_net_uninit(struct net_device *dev)
  801. {
  802. tun_detach_all(dev);
  803. }
  804. /* Net device open. */
  805. static int tun_net_open(struct net_device *dev)
  806. {
  807. netif_tx_start_all_queues(dev);
  808. return 0;
  809. }
  810. /* Net device close. */
  811. static int tun_net_close(struct net_device *dev)
  812. {
  813. netif_tx_stop_all_queues(dev);
  814. return 0;
  815. }
  816. /* Net device start xmit */
  817. static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
  818. {
  819. #ifdef CONFIG_RPS
  820. if (tun->numqueues == 1 && static_branch_unlikely(&rps_needed)) {
  821. /* Select queue was not called for the skbuff, so we extract the
  822. * RPS hash and save it into the flow_table here.
  823. */
  824. struct tun_flow_entry *e;
  825. __u32 rxhash;
  826. rxhash = __skb_get_hash_symmetric(skb);
  827. e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], rxhash);
  828. if (e)
  829. tun_flow_save_rps_rxhash(e, rxhash);
  830. }
  831. #endif
  832. }
  833. static unsigned int run_ebpf_filter(struct tun_struct *tun,
  834. struct sk_buff *skb,
  835. int len)
  836. {
  837. struct tun_prog *prog = rcu_dereference(tun->filter_prog);
  838. if (prog)
  839. len = bpf_prog_run_clear_cb(prog->prog, skb);
  840. return len;
  841. }
  842. /* Net device start xmit */
  843. static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
  844. {
  845. enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
  846. struct tun_struct *tun = netdev_priv(dev);
  847. int txq = skb->queue_mapping;
  848. struct netdev_queue *queue;
  849. struct tun_file *tfile;
  850. int len = skb->len;
  851. rcu_read_lock();
  852. tfile = rcu_dereference(tun->tfiles[txq]);
  853. /* Drop packet if interface is not attached */
  854. if (!tfile) {
  855. drop_reason = SKB_DROP_REASON_DEV_READY;
  856. goto drop;
  857. }
  858. if (!rcu_dereference(tun->steering_prog))
  859. tun_automq_xmit(tun, skb);
  860. netif_info(tun, tx_queued, tun->dev, "%s %d\n", __func__, skb->len);
  861. /* Drop if the filter does not like it.
  862. * This is a noop if the filter is disabled.
  863. * Filter can be enabled only for the TAP devices. */
  864. if (!check_filter(&tun->txflt, skb)) {
  865. drop_reason = SKB_DROP_REASON_TAP_TXFILTER;
  866. goto drop;
  867. }
  868. if (tfile->socket.sk->sk_filter &&
  869. sk_filter_reason(tfile->socket.sk, skb, &drop_reason))
  870. goto drop;
  871. len = run_ebpf_filter(tun, skb, len);
  872. if (len == 0) {
  873. drop_reason = SKB_DROP_REASON_TAP_FILTER;
  874. goto drop;
  875. }
  876. if (pskb_trim(skb, len)) {
  877. drop_reason = SKB_DROP_REASON_NOMEM;
  878. goto drop;
  879. }
  880. if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) {
  881. drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
  882. goto drop;
  883. }
  884. skb_tx_timestamp(skb);
  885. /* Orphan the skb - required as we might hang on to it
  886. * for indefinite time.
  887. */
  888. skb_orphan(skb);
  889. nf_reset_ct(skb);
  890. if (ptr_ring_produce(&tfile->tx_ring, skb)) {
  891. drop_reason = SKB_DROP_REASON_FULL_RING;
  892. goto drop;
  893. }
  894. /* dev->lltx requires to do our own update of trans_start */
  895. queue = netdev_get_tx_queue(dev, txq);
  896. txq_trans_cond_update(queue);
  897. /* Notify and wake up reader process */
  898. if (tfile->flags & TUN_FASYNC)
  899. kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
  900. tfile->socket.sk->sk_data_ready(tfile->socket.sk);
  901. rcu_read_unlock();
  902. return NETDEV_TX_OK;
  903. drop:
  904. dev_core_stats_tx_dropped_inc(dev);
  905. skb_tx_error(skb);
  906. kfree_skb_reason(skb, drop_reason);
  907. rcu_read_unlock();
  908. return NET_XMIT_DROP;
  909. }
  910. static void tun_net_mclist(struct net_device *dev)
  911. {
  912. /*
  913. * This callback is supposed to deal with mc filter in
  914. * _rx_ path and has nothing to do with the _tx_ path.
  915. * In rx path we always accept everything userspace gives us.
  916. */
  917. }
  918. static netdev_features_t tun_net_fix_features(struct net_device *dev,
  919. netdev_features_t features)
  920. {
  921. struct tun_struct *tun = netdev_priv(dev);
  922. return (features & tun->set_features) | (features & ~TUN_USER_FEATURES);
  923. }
  924. static void tun_set_headroom(struct net_device *dev, int new_hr)
  925. {
  926. struct tun_struct *tun = netdev_priv(dev);
  927. if (new_hr < NET_SKB_PAD)
  928. new_hr = NET_SKB_PAD;
  929. tun->align = new_hr;
  930. }
  931. static void
  932. tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
  933. {
  934. struct tun_struct *tun = netdev_priv(dev);
  935. dev_get_tstats64(dev, stats);
  936. stats->rx_frame_errors +=
  937. (unsigned long)atomic_long_read(&tun->rx_frame_errors);
  938. }
  939. static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
  940. struct netlink_ext_ack *extack)
  941. {
  942. struct tun_struct *tun = netdev_priv(dev);
  943. struct tun_file *tfile;
  944. struct bpf_prog *old_prog;
  945. int i;
  946. old_prog = rtnl_dereference(tun->xdp_prog);
  947. rcu_assign_pointer(tun->xdp_prog, prog);
  948. if (old_prog)
  949. bpf_prog_put(old_prog);
  950. for (i = 0; i < tun->numqueues; i++) {
  951. tfile = rtnl_dereference(tun->tfiles[i]);
  952. if (prog)
  953. sock_set_flag(&tfile->sk, SOCK_XDP);
  954. else
  955. sock_reset_flag(&tfile->sk, SOCK_XDP);
  956. }
  957. list_for_each_entry(tfile, &tun->disabled, next) {
  958. if (prog)
  959. sock_set_flag(&tfile->sk, SOCK_XDP);
  960. else
  961. sock_reset_flag(&tfile->sk, SOCK_XDP);
  962. }
  963. return 0;
  964. }
  965. static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
  966. {
  967. switch (xdp->command) {
  968. case XDP_SETUP_PROG:
  969. return tun_xdp_set(dev, xdp->prog, xdp->extack);
  970. default:
  971. return -EINVAL;
  972. }
  973. }
  974. static int tun_net_change_carrier(struct net_device *dev, bool new_carrier)
  975. {
  976. if (new_carrier) {
  977. struct tun_struct *tun = netdev_priv(dev);
  978. if (!tun->numqueues)
  979. return -EPERM;
  980. netif_carrier_on(dev);
  981. } else {
  982. netif_carrier_off(dev);
  983. }
  984. return 0;
  985. }
  986. static const struct net_device_ops tun_netdev_ops = {
  987. .ndo_init = tun_net_init,
  988. .ndo_uninit = tun_net_uninit,
  989. .ndo_open = tun_net_open,
  990. .ndo_stop = tun_net_close,
  991. .ndo_start_xmit = tun_net_xmit,
  992. .ndo_fix_features = tun_net_fix_features,
  993. .ndo_select_queue = tun_select_queue,
  994. .ndo_set_rx_headroom = tun_set_headroom,
  995. .ndo_get_stats64 = tun_net_get_stats64,
  996. .ndo_change_carrier = tun_net_change_carrier,
  997. };
  998. static void __tun_xdp_flush_tfile(struct tun_file *tfile)
  999. {
  1000. /* Notify and wake up reader process */
  1001. if (tfile->flags & TUN_FASYNC)
  1002. kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
  1003. tfile->socket.sk->sk_data_ready(tfile->socket.sk);
  1004. }
  1005. static int tun_xdp_xmit(struct net_device *dev, int n,
  1006. struct xdp_frame **frames, u32 flags)
  1007. {
  1008. struct tun_struct *tun = netdev_priv(dev);
  1009. struct tun_file *tfile;
  1010. u32 numqueues;
  1011. int nxmit = 0;
  1012. int i;
  1013. if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
  1014. return -EINVAL;
  1015. rcu_read_lock();
  1016. resample:
  1017. numqueues = READ_ONCE(tun->numqueues);
  1018. if (!numqueues) {
  1019. rcu_read_unlock();
  1020. return -ENXIO; /* Caller will free/return all frames */
  1021. }
  1022. tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
  1023. numqueues]);
  1024. if (unlikely(!tfile))
  1025. goto resample;
  1026. spin_lock(&tfile->tx_ring.producer_lock);
  1027. for (i = 0; i < n; i++) {
  1028. struct xdp_frame *xdp = frames[i];
  1029. /* Encode the XDP flag into lowest bit for consumer to differ
  1030. * XDP buffer from sk_buff.
  1031. */
  1032. void *frame = tun_xdp_to_ptr(xdp);
  1033. if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
  1034. dev_core_stats_tx_dropped_inc(dev);
  1035. break;
  1036. }
  1037. nxmit++;
  1038. }
  1039. spin_unlock(&tfile->tx_ring.producer_lock);
  1040. if (flags & XDP_XMIT_FLUSH)
  1041. __tun_xdp_flush_tfile(tfile);
  1042. rcu_read_unlock();
  1043. return nxmit;
  1044. }
  1045. static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
  1046. {
  1047. struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
  1048. int nxmit;
  1049. if (unlikely(!frame))
  1050. return -EOVERFLOW;
  1051. nxmit = tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH);
  1052. if (!nxmit)
  1053. xdp_return_frame_rx_napi(frame);
  1054. return nxmit;
  1055. }
  1056. static const struct net_device_ops tap_netdev_ops = {
  1057. .ndo_init = tun_net_init,
  1058. .ndo_uninit = tun_net_uninit,
  1059. .ndo_open = tun_net_open,
  1060. .ndo_stop = tun_net_close,
  1061. .ndo_start_xmit = tun_net_xmit,
  1062. .ndo_fix_features = tun_net_fix_features,
  1063. .ndo_set_rx_mode = tun_net_mclist,
  1064. .ndo_set_mac_address = eth_mac_addr,
  1065. .ndo_validate_addr = eth_validate_addr,
  1066. .ndo_select_queue = tun_select_queue,
  1067. .ndo_features_check = passthru_features_check,
  1068. .ndo_set_rx_headroom = tun_set_headroom,
  1069. .ndo_bpf = tun_xdp,
  1070. .ndo_xdp_xmit = tun_xdp_xmit,
  1071. .ndo_change_carrier = tun_net_change_carrier,
  1072. };
  1073. static void tun_flow_init(struct tun_struct *tun)
  1074. {
  1075. int i;
  1076. for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
  1077. INIT_HLIST_HEAD(&tun->flows[i]);
  1078. tun->ageing_time = TUN_FLOW_EXPIRE;
  1079. timer_setup(&tun->flow_gc_timer, tun_flow_cleanup, 0);
  1080. mod_timer(&tun->flow_gc_timer,
  1081. round_jiffies_up(jiffies + tun->ageing_time));
  1082. }
  1083. static void tun_flow_uninit(struct tun_struct *tun)
  1084. {
  1085. timer_delete_sync(&tun->flow_gc_timer);
  1086. tun_flow_flush(tun);
  1087. }
  1088. #define MIN_MTU 68
  1089. #define MAX_MTU 65535
  1090. /* Initialize net device. */
  1091. static void tun_net_initialize(struct net_device *dev)
  1092. {
  1093. struct tun_struct *tun = netdev_priv(dev);
  1094. switch (tun->flags & TUN_TYPE_MASK) {
  1095. case IFF_TUN:
  1096. dev->netdev_ops = &tun_netdev_ops;
  1097. dev->header_ops = &ip_tunnel_header_ops;
  1098. /* Point-to-Point TUN Device */
  1099. dev->hard_header_len = 0;
  1100. dev->addr_len = 0;
  1101. dev->mtu = 1500;
  1102. /* Zero header length */
  1103. dev->type = ARPHRD_NONE;
  1104. dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
  1105. break;
  1106. case IFF_TAP:
  1107. dev->netdev_ops = &tap_netdev_ops;
  1108. /* Ethernet TAP Device */
  1109. ether_setup(dev);
  1110. dev->priv_flags &= ~IFF_TX_SKB_SHARING;
  1111. dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
  1112. eth_hw_addr_random(dev);
  1113. /* Currently tun does not support XDP, only tap does. */
  1114. dev->xdp_features = NETDEV_XDP_ACT_BASIC |
  1115. NETDEV_XDP_ACT_REDIRECT |
  1116. NETDEV_XDP_ACT_NDO_XMIT;
  1117. break;
  1118. }
  1119. dev->min_mtu = MIN_MTU;
  1120. dev->max_mtu = MAX_MTU - dev->hard_header_len;
  1121. }
  1122. static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile)
  1123. {
  1124. struct sock *sk = tfile->socket.sk;
  1125. return (tun->dev->flags & IFF_UP) && sock_writeable(sk);
  1126. }
  1127. /* Character device part */
  1128. /* Poll */
  1129. static __poll_t tun_chr_poll(struct file *file, poll_table *wait)
  1130. {
  1131. struct tun_file *tfile = file->private_data;
  1132. struct tun_struct *tun = tun_get(tfile);
  1133. struct sock *sk;
  1134. __poll_t mask = 0;
  1135. if (!tun)
  1136. return EPOLLERR;
  1137. sk = tfile->socket.sk;
  1138. poll_wait(file, sk_sleep(sk), wait);
  1139. if (!ptr_ring_empty(&tfile->tx_ring))
  1140. mask |= EPOLLIN | EPOLLRDNORM;
  1141. /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to
  1142. * guarantee EPOLLOUT to be raised by either here or
  1143. * tun_sock_write_space(). Then process could get notification
  1144. * after it writes to a down device and meets -EIO.
  1145. */
  1146. if (tun_sock_writeable(tun, tfile) ||
  1147. (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
  1148. tun_sock_writeable(tun, tfile)))
  1149. mask |= EPOLLOUT | EPOLLWRNORM;
  1150. if (tun->dev->reg_state != NETREG_REGISTERED)
  1151. mask = EPOLLERR;
  1152. tun_put(tun);
  1153. return mask;
  1154. }
  1155. static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
  1156. size_t len,
  1157. const struct iov_iter *it)
  1158. {
  1159. struct sk_buff *skb;
  1160. size_t linear;
  1161. int err;
  1162. int i;
  1163. if (it->nr_segs > MAX_SKB_FRAGS + 1 ||
  1164. len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN))
  1165. return ERR_PTR(-EMSGSIZE);
  1166. local_bh_disable();
  1167. skb = napi_get_frags(&tfile->napi);
  1168. local_bh_enable();
  1169. if (!skb)
  1170. return ERR_PTR(-ENOMEM);
  1171. linear = iov_iter_single_seg_count(it);
  1172. err = __skb_grow(skb, linear);
  1173. if (err)
  1174. goto free;
  1175. skb->len = len;
  1176. skb->data_len = len - linear;
  1177. skb->truesize += skb->data_len;
  1178. for (i = 1; i < it->nr_segs; i++) {
  1179. const struct iovec *iov = iter_iov(it) + i;
  1180. size_t fragsz = iov->iov_len;
  1181. struct page *page;
  1182. void *frag;
  1183. if (fragsz == 0 || fragsz > PAGE_SIZE) {
  1184. err = -EINVAL;
  1185. goto free;
  1186. }
  1187. frag = netdev_alloc_frag(fragsz);
  1188. if (!frag) {
  1189. err = -ENOMEM;
  1190. goto free;
  1191. }
  1192. page = virt_to_head_page(frag);
  1193. skb_fill_page_desc(skb, i - 1, page,
  1194. frag - page_address(page), fragsz);
  1195. }
  1196. return skb;
  1197. free:
  1198. /* frees skb and all frags allocated with napi_alloc_frag() */
  1199. napi_free_frags(&tfile->napi);
  1200. return ERR_PTR(err);
  1201. }
  1202. /* prepad is the amount to reserve at front. len is length after that.
  1203. * linear is a hint as to how much to copy (usually headers). */
  1204. static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
  1205. size_t prepad, size_t len,
  1206. size_t linear, int noblock)
  1207. {
  1208. struct sock *sk = tfile->socket.sk;
  1209. struct sk_buff *skb;
  1210. int err;
  1211. /* Under a page? Don't bother with paged skb. */
  1212. if (prepad + len < PAGE_SIZE)
  1213. linear = len;
  1214. if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
  1215. linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
  1216. skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
  1217. &err, PAGE_ALLOC_COSTLY_ORDER);
  1218. if (!skb)
  1219. return ERR_PTR(err);
  1220. skb_reserve(skb, prepad);
  1221. skb_put(skb, linear);
  1222. skb->data_len = len - linear;
  1223. skb->len += len - linear;
  1224. return skb;
  1225. }
  1226. static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
  1227. struct sk_buff *skb, int more)
  1228. {
  1229. struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
  1230. struct sk_buff_head process_queue;
  1231. u32 rx_batched = tun->rx_batched;
  1232. bool rcv = false;
  1233. if (!rx_batched || (!more && skb_queue_empty(queue))) {
  1234. local_bh_disable();
  1235. skb_record_rx_queue(skb, tfile->queue_index);
  1236. netif_receive_skb(skb);
  1237. local_bh_enable();
  1238. return;
  1239. }
  1240. spin_lock(&queue->lock);
  1241. if (!more || skb_queue_len(queue) == rx_batched) {
  1242. __skb_queue_head_init(&process_queue);
  1243. skb_queue_splice_tail_init(queue, &process_queue);
  1244. rcv = true;
  1245. } else {
  1246. __skb_queue_tail(queue, skb);
  1247. }
  1248. spin_unlock(&queue->lock);
  1249. if (rcv) {
  1250. struct sk_buff *nskb;
  1251. local_bh_disable();
  1252. while ((nskb = __skb_dequeue(&process_queue))) {
  1253. skb_record_rx_queue(nskb, tfile->queue_index);
  1254. netif_receive_skb(nskb);
  1255. }
  1256. skb_record_rx_queue(skb, tfile->queue_index);
  1257. netif_receive_skb(skb);
  1258. local_bh_enable();
  1259. }
  1260. }
  1261. static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
  1262. int len, int noblock, bool zerocopy)
  1263. {
  1264. if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
  1265. return false;
  1266. if (tfile->socket.sk->sk_sndbuf != INT_MAX)
  1267. return false;
  1268. if (!noblock)
  1269. return false;
  1270. if (zerocopy)
  1271. return false;
  1272. if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) +
  1273. SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
  1274. return false;
  1275. return true;
  1276. }
  1277. static struct sk_buff *__tun_build_skb(struct tun_file *tfile,
  1278. struct page_frag *alloc_frag, char *buf,
  1279. int buflen, int len, int pad,
  1280. int metasize)
  1281. {
  1282. struct sk_buff *skb = build_skb(buf, buflen);
  1283. if (!skb)
  1284. return ERR_PTR(-ENOMEM);
  1285. skb_reserve(skb, pad);
  1286. skb_put(skb, len);
  1287. if (metasize)
  1288. skb_metadata_set(skb, metasize);
  1289. skb_set_owner_w(skb, tfile->socket.sk);
  1290. get_page(alloc_frag->page);
  1291. alloc_frag->offset += buflen;
  1292. return skb;
  1293. }
  1294. static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
  1295. struct xdp_buff *xdp, u32 act)
  1296. {
  1297. int err;
  1298. switch (act) {
  1299. case XDP_REDIRECT:
  1300. err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
  1301. if (err) {
  1302. dev_core_stats_rx_dropped_inc(tun->dev);
  1303. return err;
  1304. }
  1305. dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data);
  1306. break;
  1307. case XDP_TX:
  1308. err = tun_xdp_tx(tun->dev, xdp);
  1309. if (err < 0) {
  1310. dev_core_stats_rx_dropped_inc(tun->dev);
  1311. return err;
  1312. }
  1313. dev_sw_netstats_rx_add(tun->dev, xdp->data_end - xdp->data);
  1314. break;
  1315. case XDP_PASS:
  1316. break;
  1317. default:
  1318. bpf_warn_invalid_xdp_action(tun->dev, xdp_prog, act);
  1319. fallthrough;
  1320. case XDP_ABORTED:
  1321. trace_xdp_exception(tun->dev, xdp_prog, act);
  1322. fallthrough;
  1323. case XDP_DROP:
  1324. dev_core_stats_rx_dropped_inc(tun->dev);
  1325. break;
  1326. }
  1327. return act;
  1328. }
  1329. static struct sk_buff *tun_build_skb(struct tun_struct *tun,
  1330. struct tun_file *tfile,
  1331. struct iov_iter *from,
  1332. struct virtio_net_hdr *hdr,
  1333. int len, int *skb_xdp)
  1334. {
  1335. struct page_frag *alloc_frag = &current->task_frag;
  1336. struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
  1337. struct bpf_prog *xdp_prog;
  1338. int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
  1339. char *buf;
  1340. size_t copied;
  1341. int pad = TUN_RX_PAD;
  1342. int metasize = 0;
  1343. int err = 0;
  1344. rcu_read_lock();
  1345. xdp_prog = rcu_dereference(tun->xdp_prog);
  1346. if (xdp_prog)
  1347. pad += XDP_PACKET_HEADROOM;
  1348. buflen += SKB_DATA_ALIGN(len + pad);
  1349. rcu_read_unlock();
  1350. alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
  1351. if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
  1352. return ERR_PTR(-ENOMEM);
  1353. buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
  1354. copied = copy_page_from_iter(alloc_frag->page,
  1355. alloc_frag->offset + pad,
  1356. len, from);
  1357. if (copied != len)
  1358. return ERR_PTR(-EFAULT);
  1359. /* There's a small window that XDP may be set after the check
  1360. * of xdp_prog above, this should be rare and for simplicity
  1361. * we do XDP on skb in case the headroom is not enough.
  1362. */
  1363. if (hdr->gso_type || !xdp_prog) {
  1364. *skb_xdp = 1;
  1365. return __tun_build_skb(tfile, alloc_frag, buf, buflen, len,
  1366. pad, metasize);
  1367. }
  1368. *skb_xdp = 0;
  1369. local_bh_disable();
  1370. rcu_read_lock();
  1371. bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
  1372. xdp_prog = rcu_dereference(tun->xdp_prog);
  1373. if (xdp_prog) {
  1374. struct xdp_buff xdp;
  1375. u32 act;
  1376. xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq);
  1377. xdp_prepare_buff(&xdp, buf, pad, len, true);
  1378. act = bpf_prog_run_xdp(xdp_prog, &xdp);
  1379. if (act == XDP_REDIRECT || act == XDP_TX) {
  1380. get_page(alloc_frag->page);
  1381. alloc_frag->offset += buflen;
  1382. }
  1383. err = tun_xdp_act(tun, xdp_prog, &xdp, act);
  1384. if (err < 0) {
  1385. if (act == XDP_REDIRECT || act == XDP_TX)
  1386. put_page(alloc_frag->page);
  1387. goto out;
  1388. }
  1389. if (err == XDP_REDIRECT)
  1390. xdp_do_flush();
  1391. if (err != XDP_PASS)
  1392. goto out;
  1393. pad = xdp.data - xdp.data_hard_start;
  1394. len = xdp.data_end - xdp.data;
  1395. /* It is known that the xdp_buff was prepared with metadata
  1396. * support, so the metasize will never be negative.
  1397. */
  1398. metasize = xdp.data - xdp.data_meta;
  1399. }
  1400. bpf_net_ctx_clear(bpf_net_ctx);
  1401. rcu_read_unlock();
  1402. local_bh_enable();
  1403. return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad,
  1404. metasize);
  1405. out:
  1406. bpf_net_ctx_clear(bpf_net_ctx);
  1407. rcu_read_unlock();
  1408. local_bh_enable();
  1409. return NULL;
  1410. }
  1411. /* Get packet from user space buffer */
  1412. static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
  1413. void *msg_control, struct iov_iter *from,
  1414. int noblock, bool more)
  1415. {
  1416. struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
  1417. struct sk_buff *skb;
  1418. size_t total_len = iov_iter_count(from);
  1419. size_t len = total_len, align = tun->align, linear;
  1420. struct virtio_net_hdr_v1_hash_tunnel hdr;
  1421. struct virtio_net_hdr *gso;
  1422. int good_linear;
  1423. int copylen;
  1424. int hdr_len = 0;
  1425. bool zerocopy = false;
  1426. int err;
  1427. u32 rxhash = 0;
  1428. int skb_xdp = 1;
  1429. bool frags = tun_napi_frags_enabled(tfile);
  1430. enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
  1431. netdev_features_t features = 0;
  1432. /*
  1433. * Keep it easy and always zero the whole buffer, even if the
  1434. * tunnel-related field will be touched only when the feature
  1435. * is enabled and the hdr size id compatible.
  1436. */
  1437. memset(&hdr, 0, sizeof(hdr));
  1438. gso = (struct virtio_net_hdr *)&hdr;
  1439. if (!(tun->flags & IFF_NO_PI)) {
  1440. if (len < sizeof(pi))
  1441. return -EINVAL;
  1442. len -= sizeof(pi);
  1443. if (!copy_from_iter_full(&pi, sizeof(pi), from))
  1444. return -EFAULT;
  1445. }
  1446. if (tun->flags & IFF_VNET_HDR) {
  1447. int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
  1448. features = tun_vnet_hdr_guest_features(vnet_hdr_sz);
  1449. hdr_len = __tun_vnet_hdr_get(vnet_hdr_sz, tun->flags,
  1450. features, from, gso);
  1451. if (hdr_len < 0)
  1452. return hdr_len;
  1453. len -= vnet_hdr_sz;
  1454. }
  1455. if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
  1456. align += NET_IP_ALIGN;
  1457. if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN)))
  1458. return -EINVAL;
  1459. }
  1460. good_linear = SKB_MAX_HEAD(align);
  1461. if (msg_control) {
  1462. struct iov_iter i = *from;
  1463. /* There are 256 bytes to be copied in skb, so there is
  1464. * enough room for skb expand head in case it is used.
  1465. * The rest of the buffer is mapped from userspace.
  1466. */
  1467. copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear);
  1468. linear = copylen;
  1469. iov_iter_advance(&i, copylen);
  1470. if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS)
  1471. zerocopy = true;
  1472. }
  1473. if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
  1474. /* For the packet that is not easy to be processed
  1475. * (e.g gso or jumbo packet), we will do it at after
  1476. * skb was created with generic XDP routine.
  1477. */
  1478. skb = tun_build_skb(tun, tfile, from, gso, len, &skb_xdp);
  1479. err = PTR_ERR_OR_ZERO(skb);
  1480. if (err)
  1481. goto drop;
  1482. if (!skb)
  1483. return total_len;
  1484. } else {
  1485. if (!zerocopy) {
  1486. copylen = len;
  1487. linear = min(hdr_len, good_linear);
  1488. }
  1489. if (frags) {
  1490. mutex_lock(&tfile->napi_mutex);
  1491. skb = tun_napi_alloc_frags(tfile, copylen, from);
  1492. /* tun_napi_alloc_frags() enforces a layout for the skb.
  1493. * If zerocopy is enabled, then this layout will be
  1494. * overwritten by zerocopy_sg_from_iter().
  1495. */
  1496. zerocopy = false;
  1497. } else {
  1498. if (!linear)
  1499. linear = min_t(size_t, good_linear, copylen);
  1500. skb = tun_alloc_skb(tfile, align, copylen, linear,
  1501. noblock);
  1502. }
  1503. err = PTR_ERR_OR_ZERO(skb);
  1504. if (err)
  1505. goto drop;
  1506. if (zerocopy)
  1507. err = zerocopy_sg_from_iter(skb, from);
  1508. else
  1509. err = skb_copy_datagram_from_iter(skb, 0, from, len);
  1510. if (err) {
  1511. err = -EFAULT;
  1512. drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT;
  1513. goto drop;
  1514. }
  1515. }
  1516. if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, &hdr)) {
  1517. atomic_long_inc(&tun->rx_frame_errors);
  1518. err = -EINVAL;
  1519. goto free_skb;
  1520. }
  1521. switch (tun->flags & TUN_TYPE_MASK) {
  1522. case IFF_TUN:
  1523. if (tun->flags & IFF_NO_PI) {
  1524. u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0;
  1525. switch (ip_version) {
  1526. case 4:
  1527. pi.proto = htons(ETH_P_IP);
  1528. break;
  1529. case 6:
  1530. pi.proto = htons(ETH_P_IPV6);
  1531. break;
  1532. default:
  1533. err = -EINVAL;
  1534. goto drop;
  1535. }
  1536. }
  1537. skb_reset_mac_header(skb);
  1538. skb->protocol = pi.proto;
  1539. skb->dev = tun->dev;
  1540. break;
  1541. case IFF_TAP:
  1542. if (frags && !pskb_may_pull(skb, ETH_HLEN)) {
  1543. err = -ENOMEM;
  1544. drop_reason = SKB_DROP_REASON_HDR_TRUNC;
  1545. goto drop;
  1546. }
  1547. skb->protocol = eth_type_trans(skb, tun->dev);
  1548. break;
  1549. }
  1550. /* copy skb_ubuf_info for callback when skb has no error */
  1551. if (zerocopy) {
  1552. skb_zcopy_init(skb, msg_control);
  1553. } else if (msg_control) {
  1554. struct ubuf_info *uarg = msg_control;
  1555. uarg->ops->complete(NULL, uarg, false);
  1556. }
  1557. skb_reset_network_header(skb);
  1558. skb_probe_transport_header(skb);
  1559. skb_record_rx_queue(skb, tfile->queue_index);
  1560. if (skb_xdp) {
  1561. struct bpf_prog *xdp_prog;
  1562. int ret;
  1563. local_bh_disable();
  1564. rcu_read_lock();
  1565. xdp_prog = rcu_dereference(tun->xdp_prog);
  1566. if (xdp_prog) {
  1567. ret = do_xdp_generic(xdp_prog, &skb);
  1568. if (ret != XDP_PASS) {
  1569. rcu_read_unlock();
  1570. local_bh_enable();
  1571. goto unlock_frags;
  1572. }
  1573. if (frags && skb != tfile->napi.skb)
  1574. tfile->napi.skb = skb;
  1575. }
  1576. rcu_read_unlock();
  1577. local_bh_enable();
  1578. }
  1579. /* Compute the costly rx hash only if needed for flow updates.
  1580. * We may get a very small possibility of OOO during switching, not
  1581. * worth to optimize.
  1582. */
  1583. if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 &&
  1584. !tfile->detached)
  1585. rxhash = __skb_get_hash_symmetric(skb);
  1586. rcu_read_lock();
  1587. if (unlikely(!(tun->dev->flags & IFF_UP))) {
  1588. err = -EIO;
  1589. rcu_read_unlock();
  1590. drop_reason = SKB_DROP_REASON_DEV_READY;
  1591. goto drop;
  1592. }
  1593. if (frags) {
  1594. u32 headlen;
  1595. /* Exercise flow dissector code path. */
  1596. skb_push(skb, ETH_HLEN);
  1597. headlen = eth_get_headlen(tun->dev, skb->data,
  1598. skb_headlen(skb));
  1599. if (unlikely(headlen > skb_headlen(skb))) {
  1600. WARN_ON_ONCE(1);
  1601. err = -ENOMEM;
  1602. dev_core_stats_rx_dropped_inc(tun->dev);
  1603. napi_busy:
  1604. napi_free_frags(&tfile->napi);
  1605. rcu_read_unlock();
  1606. mutex_unlock(&tfile->napi_mutex);
  1607. return err;
  1608. }
  1609. if (likely(napi_schedule_prep(&tfile->napi))) {
  1610. local_bh_disable();
  1611. napi_gro_frags(&tfile->napi);
  1612. napi_complete(&tfile->napi);
  1613. local_bh_enable();
  1614. } else {
  1615. err = -EBUSY;
  1616. goto napi_busy;
  1617. }
  1618. mutex_unlock(&tfile->napi_mutex);
  1619. } else if (tfile->napi_enabled) {
  1620. struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
  1621. int queue_len;
  1622. spin_lock_bh(&queue->lock);
  1623. if (unlikely(tfile->detached)) {
  1624. spin_unlock_bh(&queue->lock);
  1625. rcu_read_unlock();
  1626. err = -EBUSY;
  1627. goto free_skb;
  1628. }
  1629. __skb_queue_tail(queue, skb);
  1630. queue_len = skb_queue_len(queue);
  1631. spin_unlock(&queue->lock);
  1632. if (!more || queue_len > NAPI_POLL_WEIGHT)
  1633. napi_schedule(&tfile->napi);
  1634. local_bh_enable();
  1635. } else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
  1636. tun_rx_batched(tun, tfile, skb, more);
  1637. } else {
  1638. netif_rx(skb);
  1639. }
  1640. rcu_read_unlock();
  1641. preempt_disable();
  1642. dev_sw_netstats_rx_add(tun->dev, len);
  1643. preempt_enable();
  1644. if (rxhash)
  1645. tun_flow_update(tun, rxhash, tfile);
  1646. return total_len;
  1647. drop:
  1648. if (err != -EAGAIN)
  1649. dev_core_stats_rx_dropped_inc(tun->dev);
  1650. free_skb:
  1651. if (!IS_ERR_OR_NULL(skb))
  1652. kfree_skb_reason(skb, drop_reason);
  1653. unlock_frags:
  1654. if (frags) {
  1655. tfile->napi.skb = NULL;
  1656. mutex_unlock(&tfile->napi_mutex);
  1657. }
  1658. return err ?: total_len;
  1659. }
  1660. static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
  1661. {
  1662. struct file *file = iocb->ki_filp;
  1663. struct tun_file *tfile = file->private_data;
  1664. struct tun_struct *tun = tun_get(tfile);
  1665. ssize_t result;
  1666. int noblock = 0;
  1667. if (!tun)
  1668. return -EBADFD;
  1669. if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
  1670. noblock = 1;
  1671. result = tun_get_user(tun, tfile, NULL, from, noblock, false);
  1672. tun_put(tun);
  1673. return result;
  1674. }
  1675. static ssize_t tun_put_user_xdp(struct tun_struct *tun,
  1676. struct tun_file *tfile,
  1677. struct xdp_frame *xdp_frame,
  1678. struct iov_iter *iter)
  1679. {
  1680. int vnet_hdr_sz = 0;
  1681. size_t size = xdp_frame->len;
  1682. ssize_t ret;
  1683. if (tun->flags & IFF_VNET_HDR) {
  1684. struct virtio_net_hdr gso = { 0 };
  1685. vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
  1686. ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
  1687. if (ret)
  1688. return ret;
  1689. }
  1690. ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
  1691. preempt_disable();
  1692. dev_sw_netstats_tx_add(tun->dev, 1, ret);
  1693. preempt_enable();
  1694. return ret;
  1695. }
  1696. /* Put packet to the user space buffer */
  1697. static ssize_t tun_put_user(struct tun_struct *tun,
  1698. struct tun_file *tfile,
  1699. struct sk_buff *skb,
  1700. struct iov_iter *iter)
  1701. {
  1702. struct tun_pi pi = { 0, skb->protocol };
  1703. ssize_t total;
  1704. int vlan_offset = 0;
  1705. int vlan_hlen = 0;
  1706. int vnet_hdr_sz = 0;
  1707. int ret;
  1708. if (skb_vlan_tag_present(skb))
  1709. vlan_hlen = VLAN_HLEN;
  1710. if (tun->flags & IFF_VNET_HDR)
  1711. vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
  1712. total = skb->len + vlan_hlen + vnet_hdr_sz;
  1713. if (!(tun->flags & IFF_NO_PI)) {
  1714. if (iov_iter_count(iter) < sizeof(pi))
  1715. return -EINVAL;
  1716. total += sizeof(pi);
  1717. if (iov_iter_count(iter) < total) {
  1718. /* Packet will be striped */
  1719. pi.flags |= TUN_PKT_STRIP;
  1720. }
  1721. if (copy_to_iter(&pi, sizeof(pi), iter) != sizeof(pi))
  1722. return -EFAULT;
  1723. }
  1724. if (vnet_hdr_sz) {
  1725. struct virtio_net_hdr_v1_hash_tunnel hdr;
  1726. struct virtio_net_hdr *gso;
  1727. ret = tun_vnet_hdr_tnl_from_skb(tun->flags, tun->dev, skb,
  1728. &hdr);
  1729. if (ret)
  1730. return ret;
  1731. /*
  1732. * Drop the packet if the configured header size is too small
  1733. * WRT the enabled offloads.
  1734. */
  1735. gso = (struct virtio_net_hdr *)&hdr;
  1736. ret = __tun_vnet_hdr_put(vnet_hdr_sz, tun->dev->features,
  1737. iter, gso);
  1738. if (ret)
  1739. return ret;
  1740. }
  1741. if (vlan_hlen) {
  1742. int ret;
  1743. struct veth veth;
  1744. veth.h_vlan_proto = skb->vlan_proto;
  1745. veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
  1746. vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
  1747. ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset);
  1748. if (ret || !iov_iter_count(iter))
  1749. goto done;
  1750. ret = copy_to_iter(&veth, sizeof(veth), iter);
  1751. if (ret != sizeof(veth) || !iov_iter_count(iter))
  1752. goto done;
  1753. }
  1754. skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset);
  1755. done:
  1756. /* caller is in process context, */
  1757. preempt_disable();
  1758. dev_sw_netstats_tx_add(tun->dev, 1, skb->len + vlan_hlen);
  1759. preempt_enable();
  1760. return total;
  1761. }
  1762. static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
  1763. {
  1764. DECLARE_WAITQUEUE(wait, current);
  1765. void *ptr = NULL;
  1766. int error = 0;
  1767. ptr = ptr_ring_consume(&tfile->tx_ring);
  1768. if (ptr)
  1769. goto out;
  1770. if (noblock) {
  1771. error = -EAGAIN;
  1772. goto out;
  1773. }
  1774. add_wait_queue(&tfile->socket.wq.wait, &wait);
  1775. while (1) {
  1776. set_current_state(TASK_INTERRUPTIBLE);
  1777. ptr = ptr_ring_consume(&tfile->tx_ring);
  1778. if (ptr)
  1779. break;
  1780. if (signal_pending(current)) {
  1781. error = -ERESTARTSYS;
  1782. break;
  1783. }
  1784. if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) {
  1785. error = -EFAULT;
  1786. break;
  1787. }
  1788. schedule();
  1789. }
  1790. __set_current_state(TASK_RUNNING);
  1791. remove_wait_queue(&tfile->socket.wq.wait, &wait);
  1792. out:
  1793. *err = error;
  1794. return ptr;
  1795. }
  1796. static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
  1797. struct iov_iter *to,
  1798. int noblock, void *ptr)
  1799. {
  1800. ssize_t ret;
  1801. int err;
  1802. if (!iov_iter_count(to)) {
  1803. tun_ptr_free(ptr);
  1804. return 0;
  1805. }
  1806. if (!ptr) {
  1807. /* Read frames from ring */
  1808. ptr = tun_ring_recv(tfile, noblock, &err);
  1809. if (!ptr)
  1810. return err;
  1811. }
  1812. if (tun_is_xdp_frame(ptr)) {
  1813. struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
  1814. ret = tun_put_user_xdp(tun, tfile, xdpf, to);
  1815. xdp_return_frame(xdpf);
  1816. } else {
  1817. struct sk_buff *skb = ptr;
  1818. ret = tun_put_user(tun, tfile, skb, to);
  1819. if (unlikely(ret < 0))
  1820. kfree_skb(skb);
  1821. else
  1822. consume_skb(skb);
  1823. }
  1824. return ret;
  1825. }
  1826. static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
  1827. {
  1828. struct file *file = iocb->ki_filp;
  1829. struct tun_file *tfile = file->private_data;
  1830. struct tun_struct *tun = tun_get(tfile);
  1831. ssize_t len = iov_iter_count(to), ret;
  1832. int noblock = 0;
  1833. if (!tun)
  1834. return -EBADFD;
  1835. if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
  1836. noblock = 1;
  1837. ret = tun_do_read(tun, tfile, to, noblock, NULL);
  1838. ret = min_t(ssize_t, ret, len);
  1839. if (ret > 0)
  1840. iocb->ki_pos = ret;
  1841. tun_put(tun);
  1842. return ret;
  1843. }
  1844. static void tun_prog_free(struct rcu_head *rcu)
  1845. {
  1846. struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu);
  1847. bpf_prog_destroy(prog->prog);
  1848. kfree(prog);
  1849. }
  1850. static int __tun_set_ebpf(struct tun_struct *tun,
  1851. struct tun_prog __rcu **prog_p,
  1852. struct bpf_prog *prog)
  1853. {
  1854. struct tun_prog *old, *new = NULL;
  1855. if (prog) {
  1856. new = kmalloc_obj(*new);
  1857. if (!new)
  1858. return -ENOMEM;
  1859. new->prog = prog;
  1860. }
  1861. spin_lock_bh(&tun->lock);
  1862. old = rcu_dereference_protected(*prog_p,
  1863. lockdep_is_held(&tun->lock));
  1864. rcu_assign_pointer(*prog_p, new);
  1865. spin_unlock_bh(&tun->lock);
  1866. if (old)
  1867. call_rcu(&old->rcu, tun_prog_free);
  1868. return 0;
  1869. }
  1870. static void tun_free_netdev(struct net_device *dev)
  1871. {
  1872. struct tun_struct *tun = netdev_priv(dev);
  1873. BUG_ON(!(list_empty(&tun->disabled)));
  1874. tun_flow_uninit(tun);
  1875. security_tun_dev_free_security(tun->security);
  1876. __tun_set_ebpf(tun, &tun->steering_prog, NULL);
  1877. __tun_set_ebpf(tun, &tun->filter_prog, NULL);
  1878. }
  1879. static void tun_setup(struct net_device *dev)
  1880. {
  1881. struct tun_struct *tun = netdev_priv(dev);
  1882. tun->owner = INVALID_UID;
  1883. tun->group = INVALID_GID;
  1884. tun_default_link_ksettings(dev, &tun->link_ksettings);
  1885. dev->ethtool_ops = &tun_ethtool_ops;
  1886. dev->needs_free_netdev = true;
  1887. dev->priv_destructor = tun_free_netdev;
  1888. /* We prefer our own queue length */
  1889. dev->tx_queue_len = TUN_READQ_SIZE;
  1890. }
  1891. /* Trivial set of netlink ops to allow deleting tun or tap
  1892. * device with netlink.
  1893. */
  1894. static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
  1895. struct netlink_ext_ack *extack)
  1896. {
  1897. NL_SET_ERR_MSG(extack,
  1898. "tun/tap creation via rtnetlink is not supported.");
  1899. return -EOPNOTSUPP;
  1900. }
  1901. static size_t tun_get_size(const struct net_device *dev)
  1902. {
  1903. BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
  1904. BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
  1905. return nla_total_size(sizeof(uid_t)) + /* OWNER */
  1906. nla_total_size(sizeof(gid_t)) + /* GROUP */
  1907. nla_total_size(sizeof(u8)) + /* TYPE */
  1908. nla_total_size(sizeof(u8)) + /* PI */
  1909. nla_total_size(sizeof(u8)) + /* VNET_HDR */
  1910. nla_total_size(sizeof(u8)) + /* PERSIST */
  1911. nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */
  1912. nla_total_size(sizeof(u32)) + /* NUM_QUEUES */
  1913. nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */
  1914. 0;
  1915. }
  1916. static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
  1917. {
  1918. struct tun_struct *tun = netdev_priv(dev);
  1919. if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
  1920. goto nla_put_failure;
  1921. if (uid_valid(tun->owner) &&
  1922. nla_put_u32(skb, IFLA_TUN_OWNER,
  1923. from_kuid_munged(current_user_ns(), tun->owner)))
  1924. goto nla_put_failure;
  1925. if (gid_valid(tun->group) &&
  1926. nla_put_u32(skb, IFLA_TUN_GROUP,
  1927. from_kgid_munged(current_user_ns(), tun->group)))
  1928. goto nla_put_failure;
  1929. if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
  1930. goto nla_put_failure;
  1931. if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
  1932. goto nla_put_failure;
  1933. if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
  1934. goto nla_put_failure;
  1935. if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
  1936. !!(tun->flags & IFF_MULTI_QUEUE)))
  1937. goto nla_put_failure;
  1938. if (tun->flags & IFF_MULTI_QUEUE) {
  1939. if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
  1940. goto nla_put_failure;
  1941. if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
  1942. tun->numdisabled))
  1943. goto nla_put_failure;
  1944. }
  1945. return 0;
  1946. nla_put_failure:
  1947. return -EMSGSIZE;
  1948. }
  1949. static struct rtnl_link_ops tun_link_ops __read_mostly = {
  1950. .kind = DRV_NAME,
  1951. .priv_size = sizeof(struct tun_struct),
  1952. .setup = tun_setup,
  1953. .validate = tun_validate,
  1954. .get_size = tun_get_size,
  1955. .fill_info = tun_fill_info,
  1956. };
  1957. static void tun_sock_write_space(struct sock *sk)
  1958. {
  1959. struct tun_file *tfile;
  1960. wait_queue_head_t *wqueue;
  1961. if (!sock_writeable(sk))
  1962. return;
  1963. if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
  1964. return;
  1965. wqueue = sk_sleep(sk);
  1966. if (wqueue && waitqueue_active(wqueue))
  1967. wake_up_interruptible_sync_poll(wqueue, EPOLLOUT |
  1968. EPOLLWRNORM | EPOLLWRBAND);
  1969. tfile = container_of(sk, struct tun_file, sk);
  1970. kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
  1971. }
  1972. static void tun_put_page(struct tun_page *tpage)
  1973. {
  1974. if (tpage->page)
  1975. __page_frag_cache_drain(tpage->page, tpage->count);
  1976. }
  1977. static int tun_xdp_one(struct tun_struct *tun,
  1978. struct tun_file *tfile,
  1979. struct xdp_buff *xdp, int *flush,
  1980. struct tun_page *tpage)
  1981. {
  1982. unsigned int datasize = xdp->data_end - xdp->data;
  1983. struct virtio_net_hdr *gso = xdp->data_hard_start;
  1984. struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr;
  1985. struct bpf_prog *xdp_prog;
  1986. struct sk_buff *skb = NULL;
  1987. struct sk_buff_head *queue;
  1988. netdev_features_t features;
  1989. u32 rxhash = 0, act;
  1990. int buflen = xdp->frame_sz;
  1991. int metasize = 0;
  1992. int ret = 0;
  1993. bool skb_xdp = false;
  1994. struct page *page;
  1995. if (unlikely(datasize < ETH_HLEN))
  1996. return -EINVAL;
  1997. xdp_prog = rcu_dereference(tun->xdp_prog);
  1998. if (xdp_prog) {
  1999. if (gso->gso_type) {
  2000. skb_xdp = true;
  2001. goto build;
  2002. }
  2003. xdp_init_buff(xdp, buflen, &tfile->xdp_rxq);
  2004. act = bpf_prog_run_xdp(xdp_prog, xdp);
  2005. ret = tun_xdp_act(tun, xdp_prog, xdp, act);
  2006. if (ret < 0) {
  2007. put_page(virt_to_head_page(xdp->data));
  2008. return ret;
  2009. }
  2010. switch (ret) {
  2011. case XDP_REDIRECT:
  2012. *flush = true;
  2013. fallthrough;
  2014. case XDP_TX:
  2015. return 0;
  2016. case XDP_PASS:
  2017. break;
  2018. default:
  2019. page = virt_to_head_page(xdp->data);
  2020. if (tpage->page == page) {
  2021. ++tpage->count;
  2022. } else {
  2023. tun_put_page(tpage);
  2024. tpage->page = page;
  2025. tpage->count = 1;
  2026. }
  2027. return 0;
  2028. }
  2029. }
  2030. build:
  2031. skb = build_skb(xdp->data_hard_start, buflen);
  2032. if (!skb) {
  2033. ret = -ENOMEM;
  2034. goto out;
  2035. }
  2036. skb_reserve(skb, xdp->data - xdp->data_hard_start);
  2037. skb_put(skb, xdp->data_end - xdp->data);
  2038. /* The externally provided xdp_buff may have no metadata support, which
  2039. * is marked by xdp->data_meta being xdp->data + 1. This will lead to a
  2040. * metasize of -1 and is the reason why the condition checks for > 0.
  2041. */
  2042. metasize = xdp->data - xdp->data_meta;
  2043. if (metasize > 0)
  2044. skb_metadata_set(skb, metasize);
  2045. features = tun_vnet_hdr_guest_features(READ_ONCE(tun->vnet_hdr_sz));
  2046. tnl_hdr = (struct virtio_net_hdr_v1_hash_tunnel *)gso;
  2047. if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, tnl_hdr)) {
  2048. atomic_long_inc(&tun->rx_frame_errors);
  2049. kfree_skb(skb);
  2050. ret = -EINVAL;
  2051. goto out;
  2052. }
  2053. skb->protocol = eth_type_trans(skb, tun->dev);
  2054. skb_reset_network_header(skb);
  2055. skb_probe_transport_header(skb);
  2056. skb_record_rx_queue(skb, tfile->queue_index);
  2057. if (skb_xdp) {
  2058. ret = do_xdp_generic(xdp_prog, &skb);
  2059. if (ret != XDP_PASS) {
  2060. ret = 0;
  2061. goto out;
  2062. }
  2063. }
  2064. if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 &&
  2065. !tfile->detached)
  2066. rxhash = __skb_get_hash_symmetric(skb);
  2067. if (tfile->napi_enabled) {
  2068. queue = &tfile->sk.sk_write_queue;
  2069. spin_lock(&queue->lock);
  2070. if (unlikely(tfile->detached)) {
  2071. spin_unlock(&queue->lock);
  2072. kfree_skb(skb);
  2073. return -EBUSY;
  2074. }
  2075. __skb_queue_tail(queue, skb);
  2076. spin_unlock(&queue->lock);
  2077. ret = 1;
  2078. } else {
  2079. netif_receive_skb(skb);
  2080. ret = 0;
  2081. }
  2082. /* No need to disable preemption here since this function is
  2083. * always called with bh disabled
  2084. */
  2085. dev_sw_netstats_rx_add(tun->dev, datasize);
  2086. if (rxhash)
  2087. tun_flow_update(tun, rxhash, tfile);
  2088. out:
  2089. return ret;
  2090. }
  2091. static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
  2092. {
  2093. int ret, i;
  2094. struct tun_file *tfile = container_of(sock, struct tun_file, socket);
  2095. struct tun_struct *tun = tun_get(tfile);
  2096. struct tun_msg_ctl *ctl = m->msg_control;
  2097. struct xdp_buff *xdp;
  2098. if (!tun)
  2099. return -EBADFD;
  2100. if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
  2101. ctl && ctl->type == TUN_MSG_PTR) {
  2102. struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
  2103. struct tun_page tpage;
  2104. int n = ctl->num;
  2105. int flush = 0, queued = 0;
  2106. memset(&tpage, 0, sizeof(tpage));
  2107. local_bh_disable();
  2108. rcu_read_lock();
  2109. bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
  2110. for (i = 0; i < n; i++) {
  2111. xdp = &((struct xdp_buff *)ctl->ptr)[i];
  2112. ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
  2113. if (ret > 0)
  2114. queued += ret;
  2115. }
  2116. if (flush)
  2117. xdp_do_flush();
  2118. if (tfile->napi_enabled && queued > 0)
  2119. napi_schedule(&tfile->napi);
  2120. bpf_net_ctx_clear(bpf_net_ctx);
  2121. rcu_read_unlock();
  2122. local_bh_enable();
  2123. tun_put_page(&tpage);
  2124. ret = total_len;
  2125. goto out;
  2126. }
  2127. ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
  2128. m->msg_flags & MSG_DONTWAIT,
  2129. m->msg_flags & MSG_MORE);
  2130. out:
  2131. tun_put(tun);
  2132. return ret;
  2133. }
  2134. static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
  2135. int flags)
  2136. {
  2137. struct tun_file *tfile = container_of(sock, struct tun_file, socket);
  2138. struct tun_struct *tun = tun_get(tfile);
  2139. void *ptr = m->msg_control;
  2140. int ret;
  2141. if (!tun) {
  2142. ret = -EBADFD;
  2143. goto out_free;
  2144. }
  2145. if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
  2146. ret = -EINVAL;
  2147. goto out_put_tun;
  2148. }
  2149. if (flags & MSG_ERRQUEUE) {
  2150. ret = sock_recv_errqueue(sock->sk, m, total_len,
  2151. SOL_PACKET, TUN_TX_TIMESTAMP);
  2152. goto out;
  2153. }
  2154. ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr);
  2155. if (ret > (ssize_t)total_len) {
  2156. m->msg_flags |= MSG_TRUNC;
  2157. ret = flags & MSG_TRUNC ? ret : total_len;
  2158. }
  2159. out:
  2160. tun_put(tun);
  2161. return ret;
  2162. out_put_tun:
  2163. tun_put(tun);
  2164. out_free:
  2165. tun_ptr_free(ptr);
  2166. return ret;
  2167. }
  2168. static int tun_ptr_peek_len(void *ptr)
  2169. {
  2170. if (likely(ptr)) {
  2171. if (tun_is_xdp_frame(ptr)) {
  2172. struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
  2173. return xdpf->len;
  2174. }
  2175. return __skb_array_len_with_tag(ptr);
  2176. } else {
  2177. return 0;
  2178. }
  2179. }
  2180. static int tun_peek_len(struct socket *sock)
  2181. {
  2182. struct tun_file *tfile = container_of(sock, struct tun_file, socket);
  2183. struct tun_struct *tun;
  2184. int ret = 0;
  2185. tun = tun_get(tfile);
  2186. if (!tun)
  2187. return 0;
  2188. ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
  2189. tun_put(tun);
  2190. return ret;
  2191. }
  2192. /* Ops structure to mimic raw sockets with tun */
  2193. static const struct proto_ops tun_socket_ops = {
  2194. .peek_len = tun_peek_len,
  2195. .sendmsg = tun_sendmsg,
  2196. .recvmsg = tun_recvmsg,
  2197. };
  2198. static struct proto tun_proto = {
  2199. .name = "tun",
  2200. .owner = THIS_MODULE,
  2201. .obj_size = sizeof(struct tun_file),
  2202. };
  2203. static int tun_flags(struct tun_struct *tun)
  2204. {
  2205. return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP);
  2206. }
  2207. static ssize_t tun_flags_show(struct device *dev, struct device_attribute *attr,
  2208. char *buf)
  2209. {
  2210. struct tun_struct *tun = netdev_priv(to_net_dev(dev));
  2211. return sysfs_emit(buf, "0x%x\n", tun_flags(tun));
  2212. }
  2213. static ssize_t owner_show(struct device *dev, struct device_attribute *attr,
  2214. char *buf)
  2215. {
  2216. struct tun_struct *tun = netdev_priv(to_net_dev(dev));
  2217. return uid_valid(tun->owner)?
  2218. sysfs_emit(buf, "%u\n",
  2219. from_kuid_munged(current_user_ns(), tun->owner)) :
  2220. sysfs_emit(buf, "-1\n");
  2221. }
  2222. static ssize_t group_show(struct device *dev, struct device_attribute *attr,
  2223. char *buf)
  2224. {
  2225. struct tun_struct *tun = netdev_priv(to_net_dev(dev));
  2226. return gid_valid(tun->group) ?
  2227. sysfs_emit(buf, "%u\n",
  2228. from_kgid_munged(current_user_ns(), tun->group)) :
  2229. sysfs_emit(buf, "-1\n");
  2230. }
  2231. static DEVICE_ATTR_RO(tun_flags);
  2232. static DEVICE_ATTR_RO(owner);
  2233. static DEVICE_ATTR_RO(group);
  2234. static struct attribute *tun_dev_attrs[] = {
  2235. &dev_attr_tun_flags.attr,
  2236. &dev_attr_owner.attr,
  2237. &dev_attr_group.attr,
  2238. NULL
  2239. };
  2240. static const struct attribute_group tun_attr_group = {
  2241. .attrs = tun_dev_attrs
  2242. };
  2243. static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
  2244. {
  2245. struct tun_struct *tun;
  2246. struct tun_file *tfile = file->private_data;
  2247. struct net_device *dev;
  2248. int err;
  2249. if (tfile->detached)
  2250. return -EINVAL;
  2251. if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) {
  2252. if (!capable(CAP_NET_ADMIN))
  2253. return -EPERM;
  2254. if (!(ifr->ifr_flags & IFF_NAPI) ||
  2255. (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP)
  2256. return -EINVAL;
  2257. }
  2258. dev = __dev_get_by_name(net, ifr->ifr_name);
  2259. if (dev) {
  2260. if (ifr->ifr_flags & IFF_TUN_EXCL)
  2261. return -EBUSY;
  2262. if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
  2263. tun = netdev_priv(dev);
  2264. else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
  2265. tun = netdev_priv(dev);
  2266. else
  2267. return -EINVAL;
  2268. if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) !=
  2269. !!(tun->flags & IFF_MULTI_QUEUE))
  2270. return -EINVAL;
  2271. if (tun_not_capable(tun))
  2272. return -EPERM;
  2273. err = security_tun_dev_open(tun->security);
  2274. if (err < 0)
  2275. return err;
  2276. err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
  2277. ifr->ifr_flags & IFF_NAPI,
  2278. ifr->ifr_flags & IFF_NAPI_FRAGS, true);
  2279. if (err < 0)
  2280. return err;
  2281. if (tun->flags & IFF_MULTI_QUEUE &&
  2282. (tun->numqueues + tun->numdisabled > 1)) {
  2283. /* One or more queue has already been attached, no need
  2284. * to initialize the device again.
  2285. */
  2286. netdev_state_change(dev);
  2287. return 0;
  2288. }
  2289. tun->flags = (tun->flags & ~TUN_FEATURES) |
  2290. (ifr->ifr_flags & TUN_FEATURES);
  2291. netdev_state_change(dev);
  2292. } else {
  2293. char *name;
  2294. unsigned long flags = 0;
  2295. int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
  2296. MAX_TAP_QUEUES : 1;
  2297. if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  2298. return -EPERM;
  2299. err = security_tun_dev_create();
  2300. if (err < 0)
  2301. return err;
  2302. /* Set dev type */
  2303. if (ifr->ifr_flags & IFF_TUN) {
  2304. /* TUN device */
  2305. flags |= IFF_TUN;
  2306. name = "tun%d";
  2307. } else if (ifr->ifr_flags & IFF_TAP) {
  2308. /* TAP device */
  2309. flags |= IFF_TAP;
  2310. name = "tap%d";
  2311. } else
  2312. return -EINVAL;
  2313. if (*ifr->ifr_name)
  2314. name = ifr->ifr_name;
  2315. dev = alloc_netdev_mqs(sizeof(struct tun_struct), name,
  2316. NET_NAME_UNKNOWN, tun_setup, queues,
  2317. queues);
  2318. if (!dev)
  2319. return -ENOMEM;
  2320. dev_net_set(dev, net);
  2321. dev->rtnl_link_ops = &tun_link_ops;
  2322. dev->ifindex = tfile->ifindex;
  2323. dev->sysfs_groups[0] = &tun_attr_group;
  2324. tun = netdev_priv(dev);
  2325. tun->dev = dev;
  2326. tun->flags = flags;
  2327. tun->txflt.count = 0;
  2328. tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
  2329. tun->align = NET_SKB_PAD;
  2330. tun->filter_attached = false;
  2331. tun->sndbuf = tfile->socket.sk->sk_sndbuf;
  2332. tun->rx_batched = 0;
  2333. RCU_INIT_POINTER(tun->steering_prog, NULL);
  2334. tun->ifr = ifr;
  2335. tun->file = file;
  2336. tun_net_initialize(dev);
  2337. err = register_netdevice(tun->dev);
  2338. if (err < 0) {
  2339. free_netdev(dev);
  2340. return err;
  2341. }
  2342. /* free_netdev() won't check refcnt, to avoid race
  2343. * with dev_put() we need publish tun after registration.
  2344. */
  2345. rcu_assign_pointer(tfile->tun, tun);
  2346. }
  2347. if (ifr->ifr_flags & IFF_NO_CARRIER)
  2348. netif_carrier_off(tun->dev);
  2349. else
  2350. netif_carrier_on(tun->dev);
  2351. /* Make sure persistent devices do not get stuck in
  2352. * xoff state.
  2353. */
  2354. if (netif_running(tun->dev))
  2355. netif_tx_wake_all_queues(tun->dev);
  2356. strscpy(ifr->ifr_name, tun->dev->name);
  2357. return 0;
  2358. }
  2359. static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr)
  2360. {
  2361. strscpy(ifr->ifr_name, tun->dev->name);
  2362. ifr->ifr_flags = tun_flags(tun);
  2363. }
  2364. #define PLAIN_GSO (NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6)
  2365. /* This is like a cut-down ethtool ops, except done via tun fd so no
  2366. * privs required. */
  2367. static int set_offload(struct tun_struct *tun, unsigned long arg)
  2368. {
  2369. netdev_features_t features = 0;
  2370. if (arg & TUN_F_CSUM) {
  2371. features |= NETIF_F_HW_CSUM;
  2372. arg &= ~TUN_F_CSUM;
  2373. if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
  2374. if (arg & TUN_F_TSO_ECN) {
  2375. features |= NETIF_F_TSO_ECN;
  2376. arg &= ~TUN_F_TSO_ECN;
  2377. }
  2378. if (arg & TUN_F_TSO4)
  2379. features |= NETIF_F_TSO;
  2380. if (arg & TUN_F_TSO6)
  2381. features |= NETIF_F_TSO6;
  2382. arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
  2383. }
  2384. arg &= ~TUN_F_UFO;
  2385. /* TODO: for now USO4 and USO6 should work simultaneously */
  2386. if (arg & TUN_F_USO4 && arg & TUN_F_USO6) {
  2387. features |= NETIF_F_GSO_UDP_L4;
  2388. arg &= ~(TUN_F_USO4 | TUN_F_USO6);
  2389. }
  2390. /*
  2391. * Tunnel offload is allowed only if some plain offload is
  2392. * available, too.
  2393. */
  2394. if (features & PLAIN_GSO && arg & TUN_F_UDP_TUNNEL_GSO) {
  2395. features |= NETIF_F_GSO_UDP_TUNNEL;
  2396. if (arg & TUN_F_UDP_TUNNEL_GSO_CSUM)
  2397. features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
  2398. arg &= ~(TUN_F_UDP_TUNNEL_GSO |
  2399. TUN_F_UDP_TUNNEL_GSO_CSUM);
  2400. }
  2401. }
  2402. /* This gives the user a way to test for new features in future by
  2403. * trying to set them. */
  2404. if (arg)
  2405. return -EINVAL;
  2406. tun->set_features = features;
  2407. tun->dev->wanted_features &= ~TUN_USER_FEATURES;
  2408. tun->dev->wanted_features |= features;
  2409. netdev_update_features(tun->dev);
  2410. return 0;
  2411. }
  2412. static void tun_detach_filter(struct tun_struct *tun, int n)
  2413. {
  2414. int i;
  2415. struct tun_file *tfile;
  2416. for (i = 0; i < n; i++) {
  2417. tfile = rtnl_dereference(tun->tfiles[i]);
  2418. lock_sock(tfile->socket.sk);
  2419. sk_detach_filter(tfile->socket.sk);
  2420. release_sock(tfile->socket.sk);
  2421. }
  2422. tun->filter_attached = false;
  2423. }
  2424. static int tun_attach_filter(struct tun_struct *tun)
  2425. {
  2426. int i, ret = 0;
  2427. struct tun_file *tfile;
  2428. for (i = 0; i < tun->numqueues; i++) {
  2429. tfile = rtnl_dereference(tun->tfiles[i]);
  2430. lock_sock(tfile->socket.sk);
  2431. ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
  2432. release_sock(tfile->socket.sk);
  2433. if (ret) {
  2434. tun_detach_filter(tun, i);
  2435. return ret;
  2436. }
  2437. }
  2438. tun->filter_attached = true;
  2439. return ret;
  2440. }
  2441. static void tun_set_sndbuf(struct tun_struct *tun)
  2442. {
  2443. struct tun_file *tfile;
  2444. int i;
  2445. for (i = 0; i < tun->numqueues; i++) {
  2446. tfile = rtnl_dereference(tun->tfiles[i]);
  2447. tfile->socket.sk->sk_sndbuf = tun->sndbuf;
  2448. }
  2449. }
  2450. static int tun_set_queue(struct file *file, struct ifreq *ifr)
  2451. {
  2452. struct tun_file *tfile = file->private_data;
  2453. struct tun_struct *tun;
  2454. int ret = 0;
  2455. rtnl_lock();
  2456. if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
  2457. tun = tfile->detached;
  2458. if (!tun) {
  2459. ret = -EINVAL;
  2460. goto unlock;
  2461. }
  2462. ret = security_tun_dev_attach_queue(tun->security);
  2463. if (ret < 0)
  2464. goto unlock;
  2465. ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI,
  2466. tun->flags & IFF_NAPI_FRAGS, true);
  2467. } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
  2468. tun = rtnl_dereference(tfile->tun);
  2469. if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
  2470. ret = -EINVAL;
  2471. else
  2472. __tun_detach(tfile, false);
  2473. } else
  2474. ret = -EINVAL;
  2475. if (ret >= 0)
  2476. netdev_state_change(tun->dev);
  2477. unlock:
  2478. rtnl_unlock();
  2479. return ret;
  2480. }
  2481. static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog __rcu **prog_p,
  2482. void __user *data)
  2483. {
  2484. struct bpf_prog *prog;
  2485. int fd;
  2486. if (copy_from_user(&fd, data, sizeof(fd)))
  2487. return -EFAULT;
  2488. if (fd == -1) {
  2489. prog = NULL;
  2490. } else {
  2491. prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
  2492. if (IS_ERR(prog))
  2493. return PTR_ERR(prog);
  2494. }
  2495. return __tun_set_ebpf(tun, prog_p, prog);
  2496. }
  2497. /* Return correct value for tun->dev->addr_len based on tun->dev->type. */
  2498. static unsigned char tun_get_addr_len(unsigned short type)
  2499. {
  2500. switch (type) {
  2501. case ARPHRD_IP6GRE:
  2502. case ARPHRD_TUNNEL6:
  2503. return sizeof(struct in6_addr);
  2504. case ARPHRD_IPGRE:
  2505. case ARPHRD_TUNNEL:
  2506. case ARPHRD_SIT:
  2507. return 4;
  2508. case ARPHRD_ETHER:
  2509. return ETH_ALEN;
  2510. case ARPHRD_IEEE802154:
  2511. case ARPHRD_IEEE802154_MONITOR:
  2512. return IEEE802154_EXTENDED_ADDR_LEN;
  2513. case ARPHRD_PHONET_PIPE:
  2514. case ARPHRD_PPP:
  2515. case ARPHRD_NONE:
  2516. return 0;
  2517. case ARPHRD_6LOWPAN:
  2518. return EUI64_ADDR_LEN;
  2519. case ARPHRD_FDDI:
  2520. return FDDI_K_ALEN;
  2521. case ARPHRD_HIPPI:
  2522. return HIPPI_ALEN;
  2523. case ARPHRD_IEEE802:
  2524. return FC_ALEN;
  2525. case ARPHRD_ROSE:
  2526. return ROSE_ADDR_LEN;
  2527. case ARPHRD_NETROM:
  2528. return AX25_ADDR_LEN;
  2529. case ARPHRD_LOCALTLK:
  2530. return LTALK_ALEN;
  2531. default:
  2532. return 0;
  2533. }
  2534. }
  2535. static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
  2536. unsigned long arg, int ifreq_len)
  2537. {
  2538. struct tun_file *tfile = file->private_data;
  2539. struct net *net = sock_net(&tfile->sk);
  2540. struct tun_struct *tun;
  2541. void __user* argp = (void __user*)arg;
  2542. unsigned int carrier;
  2543. struct ifreq ifr;
  2544. kuid_t owner;
  2545. kgid_t group;
  2546. int ifindex;
  2547. int sndbuf;
  2548. int ret;
  2549. bool do_notify = false;
  2550. if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
  2551. (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
  2552. if (copy_from_user(&ifr, argp, ifreq_len))
  2553. return -EFAULT;
  2554. } else {
  2555. memset(&ifr, 0, sizeof(ifr));
  2556. }
  2557. if (cmd == TUNGETFEATURES) {
  2558. /* Currently this just means: "what IFF flags are valid?".
  2559. * This is needed because we never checked for invalid flags on
  2560. * TUNSETIFF.
  2561. */
  2562. return put_user(IFF_TUN | IFF_TAP | IFF_NO_CARRIER |
  2563. TUN_FEATURES, (unsigned int __user*)argp);
  2564. } else if (cmd == TUNSETQUEUE) {
  2565. return tun_set_queue(file, &ifr);
  2566. } else if (cmd == SIOCGSKNS) {
  2567. if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  2568. return -EPERM;
  2569. return open_related_ns(&net->ns, get_net_ns);
  2570. }
  2571. rtnl_lock();
  2572. tun = tun_get(tfile);
  2573. if (cmd == TUNSETIFF) {
  2574. ret = -EEXIST;
  2575. if (tun)
  2576. goto unlock;
  2577. ifr.ifr_name[IFNAMSIZ-1] = '\0';
  2578. ret = tun_set_iff(net, file, &ifr);
  2579. if (ret)
  2580. goto unlock;
  2581. if (copy_to_user(argp, &ifr, ifreq_len))
  2582. ret = -EFAULT;
  2583. goto unlock;
  2584. }
  2585. if (cmd == TUNSETIFINDEX) {
  2586. ret = -EPERM;
  2587. if (tun)
  2588. goto unlock;
  2589. ret = -EFAULT;
  2590. if (copy_from_user(&ifindex, argp, sizeof(ifindex)))
  2591. goto unlock;
  2592. ret = -EINVAL;
  2593. if (ifindex < 0)
  2594. goto unlock;
  2595. ret = 0;
  2596. tfile->ifindex = ifindex;
  2597. goto unlock;
  2598. }
  2599. ret = -EBADFD;
  2600. if (!tun)
  2601. goto unlock;
  2602. netif_info(tun, drv, tun->dev, "tun_chr_ioctl cmd %u\n", cmd);
  2603. net = dev_net(tun->dev);
  2604. ret = 0;
  2605. switch (cmd) {
  2606. case TUNGETIFF:
  2607. tun_get_iff(tun, &ifr);
  2608. if (tfile->detached)
  2609. ifr.ifr_flags |= IFF_DETACH_QUEUE;
  2610. if (!tfile->socket.sk->sk_filter)
  2611. ifr.ifr_flags |= IFF_NOFILTER;
  2612. if (copy_to_user(argp, &ifr, ifreq_len))
  2613. ret = -EFAULT;
  2614. break;
  2615. case TUNSETNOCSUM:
  2616. /* Disable/Enable checksum */
  2617. /* [unimplemented] */
  2618. netif_info(tun, drv, tun->dev, "ignored: set checksum %s\n",
  2619. arg ? "disabled" : "enabled");
  2620. break;
  2621. case TUNSETPERSIST:
  2622. /* Disable/Enable persist mode. Keep an extra reference to the
  2623. * module to prevent the module being unprobed.
  2624. */
  2625. if (arg && !(tun->flags & IFF_PERSIST)) {
  2626. tun->flags |= IFF_PERSIST;
  2627. __module_get(THIS_MODULE);
  2628. do_notify = true;
  2629. }
  2630. if (!arg && (tun->flags & IFF_PERSIST)) {
  2631. tun->flags &= ~IFF_PERSIST;
  2632. module_put(THIS_MODULE);
  2633. do_notify = true;
  2634. }
  2635. netif_info(tun, drv, tun->dev, "persist %s\n",
  2636. arg ? "enabled" : "disabled");
  2637. break;
  2638. case TUNSETOWNER:
  2639. /* Set owner of the device */
  2640. owner = make_kuid(current_user_ns(), arg);
  2641. if (!uid_valid(owner)) {
  2642. ret = -EINVAL;
  2643. break;
  2644. }
  2645. tun->owner = owner;
  2646. do_notify = true;
  2647. netif_info(tun, drv, tun->dev, "owner set to %u\n",
  2648. from_kuid(&init_user_ns, tun->owner));
  2649. break;
  2650. case TUNSETGROUP:
  2651. /* Set group of the device */
  2652. group = make_kgid(current_user_ns(), arg);
  2653. if (!gid_valid(group)) {
  2654. ret = -EINVAL;
  2655. break;
  2656. }
  2657. tun->group = group;
  2658. do_notify = true;
  2659. netif_info(tun, drv, tun->dev, "group set to %u\n",
  2660. from_kgid(&init_user_ns, tun->group));
  2661. break;
  2662. case TUNSETLINK:
  2663. /* Only allow setting the type when the interface is down */
  2664. if (tun->dev->flags & IFF_UP) {
  2665. netif_info(tun, drv, tun->dev,
  2666. "Linktype set failed because interface is up\n");
  2667. ret = -EBUSY;
  2668. } else {
  2669. ret = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE,
  2670. tun->dev);
  2671. ret = notifier_to_errno(ret);
  2672. if (ret) {
  2673. netif_info(tun, drv, tun->dev,
  2674. "Refused to change device type\n");
  2675. break;
  2676. }
  2677. tun->dev->type = (int) arg;
  2678. tun->dev->addr_len = tun_get_addr_len(tun->dev->type);
  2679. netif_info(tun, drv, tun->dev, "linktype set to %d\n",
  2680. tun->dev->type);
  2681. call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
  2682. tun->dev);
  2683. }
  2684. break;
  2685. case TUNSETDEBUG:
  2686. tun->msg_enable = (u32)arg;
  2687. break;
  2688. case TUNSETOFFLOAD:
  2689. ret = set_offload(tun, arg);
  2690. break;
  2691. case TUNSETTXFILTER:
  2692. /* Can be set only for TAPs */
  2693. ret = -EINVAL;
  2694. if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
  2695. break;
  2696. ret = update_filter(&tun->txflt, (void __user *)arg);
  2697. break;
  2698. case SIOCGIFHWADDR:
  2699. /* Get hw address */
  2700. netif_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name);
  2701. if (copy_to_user(argp, &ifr, ifreq_len))
  2702. ret = -EFAULT;
  2703. break;
  2704. case SIOCSIFHWADDR:
  2705. /* Set hw address */
  2706. if (tun->dev->addr_len > sizeof(ifr.ifr_hwaddr)) {
  2707. ret = -EINVAL;
  2708. break;
  2709. }
  2710. ret = dev_set_mac_address_user(tun->dev,
  2711. (struct sockaddr_storage *)&ifr.ifr_hwaddr,
  2712. NULL);
  2713. break;
  2714. case TUNGETSNDBUF:
  2715. sndbuf = tfile->socket.sk->sk_sndbuf;
  2716. if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
  2717. ret = -EFAULT;
  2718. break;
  2719. case TUNSETSNDBUF:
  2720. if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) {
  2721. ret = -EFAULT;
  2722. break;
  2723. }
  2724. if (sndbuf <= 0) {
  2725. ret = -EINVAL;
  2726. break;
  2727. }
  2728. tun->sndbuf = sndbuf;
  2729. tun_set_sndbuf(tun);
  2730. break;
  2731. case TUNATTACHFILTER:
  2732. /* Can be set only for TAPs */
  2733. ret = -EINVAL;
  2734. if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
  2735. break;
  2736. ret = -EFAULT;
  2737. if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog)))
  2738. break;
  2739. ret = tun_attach_filter(tun);
  2740. break;
  2741. case TUNDETACHFILTER:
  2742. /* Can be set only for TAPs */
  2743. ret = -EINVAL;
  2744. if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
  2745. break;
  2746. ret = 0;
  2747. tun_detach_filter(tun, tun->numqueues);
  2748. break;
  2749. case TUNGETFILTER:
  2750. ret = -EINVAL;
  2751. if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
  2752. break;
  2753. ret = -EFAULT;
  2754. if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog)))
  2755. break;
  2756. ret = 0;
  2757. break;
  2758. case TUNSETSTEERINGEBPF:
  2759. ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
  2760. break;
  2761. case TUNSETFILTEREBPF:
  2762. ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
  2763. break;
  2764. case TUNSETCARRIER:
  2765. ret = -EFAULT;
  2766. if (copy_from_user(&carrier, argp, sizeof(carrier)))
  2767. goto unlock;
  2768. ret = tun_net_change_carrier(tun->dev, (bool)carrier);
  2769. break;
  2770. case TUNGETDEVNETNS:
  2771. ret = -EPERM;
  2772. if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  2773. goto unlock;
  2774. ret = open_related_ns(&net->ns, get_net_ns);
  2775. break;
  2776. default:
  2777. ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
  2778. break;
  2779. }
  2780. if (do_notify)
  2781. netdev_state_change(tun->dev);
  2782. unlock:
  2783. rtnl_unlock();
  2784. if (tun)
  2785. tun_put(tun);
  2786. return ret;
  2787. }
  2788. static long tun_chr_ioctl(struct file *file,
  2789. unsigned int cmd, unsigned long arg)
  2790. {
  2791. return __tun_chr_ioctl(file, cmd, arg, sizeof (struct ifreq));
  2792. }
  2793. #ifdef CONFIG_COMPAT
  2794. static long tun_chr_compat_ioctl(struct file *file,
  2795. unsigned int cmd, unsigned long arg)
  2796. {
  2797. switch (cmd) {
  2798. case TUNSETIFF:
  2799. case TUNGETIFF:
  2800. case TUNSETTXFILTER:
  2801. case TUNGETSNDBUF:
  2802. case TUNSETSNDBUF:
  2803. case SIOCGIFHWADDR:
  2804. case SIOCSIFHWADDR:
  2805. arg = (unsigned long)compat_ptr(arg);
  2806. break;
  2807. default:
  2808. arg = (compat_ulong_t)arg;
  2809. break;
  2810. }
  2811. /*
  2812. * compat_ifreq is shorter than ifreq, so we must not access beyond
  2813. * the end of that structure. All fields that are used in this
  2814. * driver are compatible though, we don't need to convert the
  2815. * contents.
  2816. */
  2817. return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq));
  2818. }
  2819. #endif /* CONFIG_COMPAT */
  2820. static int tun_chr_fasync(int fd, struct file *file, int on)
  2821. {
  2822. struct tun_file *tfile = file->private_data;
  2823. int ret;
  2824. if (on) {
  2825. ret = file_f_owner_allocate(file);
  2826. if (ret)
  2827. goto out;
  2828. }
  2829. if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0)
  2830. goto out;
  2831. if (on) {
  2832. __f_setown(file, task_pid(current), PIDTYPE_TGID, 0);
  2833. tfile->flags |= TUN_FASYNC;
  2834. } else
  2835. tfile->flags &= ~TUN_FASYNC;
  2836. ret = 0;
  2837. out:
  2838. return ret;
  2839. }
  2840. static int tun_chr_open(struct inode *inode, struct file * file)
  2841. {
  2842. struct net *net = current->nsproxy->net_ns;
  2843. struct tun_file *tfile;
  2844. tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
  2845. &tun_proto, 0);
  2846. if (!tfile)
  2847. return -ENOMEM;
  2848. if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) {
  2849. sk_free(&tfile->sk);
  2850. return -ENOMEM;
  2851. }
  2852. mutex_init(&tfile->napi_mutex);
  2853. RCU_INIT_POINTER(tfile->tun, NULL);
  2854. tfile->flags = 0;
  2855. tfile->ifindex = 0;
  2856. init_waitqueue_head(&tfile->socket.wq.wait);
  2857. tfile->socket.file = file;
  2858. tfile->socket.ops = &tun_socket_ops;
  2859. sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid());
  2860. tfile->sk.sk_write_space = tun_sock_write_space;
  2861. tfile->sk.sk_sndbuf = INT_MAX;
  2862. file->private_data = tfile;
  2863. INIT_LIST_HEAD(&tfile->next);
  2864. sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
  2865. /* tun groks IOCB_NOWAIT just fine, mark it as such */
  2866. file->f_mode |= FMODE_NOWAIT;
  2867. return 0;
  2868. }
  2869. static int tun_chr_close(struct inode *inode, struct file *file)
  2870. {
  2871. struct tun_file *tfile = file->private_data;
  2872. tun_detach(tfile, true);
  2873. return 0;
  2874. }
  2875. #ifdef CONFIG_PROC_FS
  2876. static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
  2877. {
  2878. struct tun_file *tfile = file->private_data;
  2879. struct tun_struct *tun;
  2880. struct ifreq ifr;
  2881. memset(&ifr, 0, sizeof(ifr));
  2882. rtnl_lock();
  2883. tun = tun_get(tfile);
  2884. if (tun)
  2885. tun_get_iff(tun, &ifr);
  2886. rtnl_unlock();
  2887. if (tun)
  2888. tun_put(tun);
  2889. seq_printf(m, "iff:\t%s\n", ifr.ifr_name);
  2890. }
  2891. #endif
  2892. static const struct file_operations tun_fops = {
  2893. .owner = THIS_MODULE,
  2894. .read_iter = tun_chr_read_iter,
  2895. .write_iter = tun_chr_write_iter,
  2896. .poll = tun_chr_poll,
  2897. .unlocked_ioctl = tun_chr_ioctl,
  2898. #ifdef CONFIG_COMPAT
  2899. .compat_ioctl = tun_chr_compat_ioctl,
  2900. #endif
  2901. .open = tun_chr_open,
  2902. .release = tun_chr_close,
  2903. .fasync = tun_chr_fasync,
  2904. #ifdef CONFIG_PROC_FS
  2905. .show_fdinfo = tun_chr_show_fdinfo,
  2906. #endif
  2907. };
  2908. static struct miscdevice tun_miscdev = {
  2909. .minor = TUN_MINOR,
  2910. .name = "tun",
  2911. .nodename = "net/tun",
  2912. .fops = &tun_fops,
  2913. };
  2914. /* ethtool interface */
  2915. static void tun_default_link_ksettings(struct net_device *dev,
  2916. struct ethtool_link_ksettings *cmd)
  2917. {
  2918. ethtool_link_ksettings_zero_link_mode(cmd, supported);
  2919. ethtool_link_ksettings_zero_link_mode(cmd, advertising);
  2920. cmd->base.speed = SPEED_10000;
  2921. cmd->base.duplex = DUPLEX_FULL;
  2922. cmd->base.port = PORT_TP;
  2923. cmd->base.phy_address = 0;
  2924. cmd->base.autoneg = AUTONEG_DISABLE;
  2925. }
  2926. static int tun_get_link_ksettings(struct net_device *dev,
  2927. struct ethtool_link_ksettings *cmd)
  2928. {
  2929. struct tun_struct *tun = netdev_priv(dev);
  2930. memcpy(cmd, &tun->link_ksettings, sizeof(*cmd));
  2931. return 0;
  2932. }
  2933. static int tun_set_link_ksettings(struct net_device *dev,
  2934. const struct ethtool_link_ksettings *cmd)
  2935. {
  2936. struct tun_struct *tun = netdev_priv(dev);
  2937. memcpy(&tun->link_ksettings, cmd, sizeof(*cmd));
  2938. return 0;
  2939. }
  2940. static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
  2941. {
  2942. struct tun_struct *tun = netdev_priv(dev);
  2943. strscpy(info->driver, DRV_NAME, sizeof(info->driver));
  2944. strscpy(info->version, DRV_VERSION, sizeof(info->version));
  2945. switch (tun->flags & TUN_TYPE_MASK) {
  2946. case IFF_TUN:
  2947. strscpy(info->bus_info, "tun", sizeof(info->bus_info));
  2948. break;
  2949. case IFF_TAP:
  2950. strscpy(info->bus_info, "tap", sizeof(info->bus_info));
  2951. break;
  2952. }
  2953. }
  2954. static u32 tun_get_msglevel(struct net_device *dev)
  2955. {
  2956. struct tun_struct *tun = netdev_priv(dev);
  2957. return tun->msg_enable;
  2958. }
  2959. static void tun_set_msglevel(struct net_device *dev, u32 value)
  2960. {
  2961. struct tun_struct *tun = netdev_priv(dev);
  2962. tun->msg_enable = value;
  2963. }
  2964. static int tun_get_coalesce(struct net_device *dev,
  2965. struct ethtool_coalesce *ec,
  2966. struct kernel_ethtool_coalesce *kernel_coal,
  2967. struct netlink_ext_ack *extack)
  2968. {
  2969. struct tun_struct *tun = netdev_priv(dev);
  2970. ec->rx_max_coalesced_frames = tun->rx_batched;
  2971. return 0;
  2972. }
  2973. static int tun_set_coalesce(struct net_device *dev,
  2974. struct ethtool_coalesce *ec,
  2975. struct kernel_ethtool_coalesce *kernel_coal,
  2976. struct netlink_ext_ack *extack)
  2977. {
  2978. struct tun_struct *tun = netdev_priv(dev);
  2979. if (ec->rx_max_coalesced_frames > NAPI_POLL_WEIGHT)
  2980. tun->rx_batched = NAPI_POLL_WEIGHT;
  2981. else
  2982. tun->rx_batched = ec->rx_max_coalesced_frames;
  2983. return 0;
  2984. }
  2985. static void tun_get_channels(struct net_device *dev,
  2986. struct ethtool_channels *channels)
  2987. {
  2988. struct tun_struct *tun = netdev_priv(dev);
  2989. channels->combined_count = tun->numqueues;
  2990. channels->max_combined = tun->flags & IFF_MULTI_QUEUE ? MAX_TAP_QUEUES : 1;
  2991. }
  2992. static const struct ethtool_ops tun_ethtool_ops = {
  2993. .supported_coalesce_params = ETHTOOL_COALESCE_RX_MAX_FRAMES,
  2994. .get_drvinfo = tun_get_drvinfo,
  2995. .get_msglevel = tun_get_msglevel,
  2996. .set_msglevel = tun_set_msglevel,
  2997. .get_link = ethtool_op_get_link,
  2998. .get_channels = tun_get_channels,
  2999. .get_ts_info = ethtool_op_get_ts_info,
  3000. .get_coalesce = tun_get_coalesce,
  3001. .set_coalesce = tun_set_coalesce,
  3002. .get_link_ksettings = tun_get_link_ksettings,
  3003. .set_link_ksettings = tun_set_link_ksettings,
  3004. };
  3005. static int tun_queue_resize(struct tun_struct *tun)
  3006. {
  3007. struct net_device *dev = tun->dev;
  3008. struct tun_file *tfile;
  3009. struct ptr_ring **rings;
  3010. int n = tun->numqueues + tun->numdisabled;
  3011. int ret, i;
  3012. rings = kmalloc_objs(*rings, n);
  3013. if (!rings)
  3014. return -ENOMEM;
  3015. for (i = 0; i < tun->numqueues; i++) {
  3016. tfile = rtnl_dereference(tun->tfiles[i]);
  3017. rings[i] = &tfile->tx_ring;
  3018. }
  3019. list_for_each_entry(tfile, &tun->disabled, next)
  3020. rings[i++] = &tfile->tx_ring;
  3021. ret = ptr_ring_resize_multiple_bh(rings, n,
  3022. dev->tx_queue_len, GFP_KERNEL,
  3023. tun_ptr_free);
  3024. kfree(rings);
  3025. return ret;
  3026. }
  3027. static int tun_device_event(struct notifier_block *unused,
  3028. unsigned long event, void *ptr)
  3029. {
  3030. struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  3031. struct tun_struct *tun = netdev_priv(dev);
  3032. int i;
  3033. if (dev->rtnl_link_ops != &tun_link_ops)
  3034. return NOTIFY_DONE;
  3035. switch (event) {
  3036. case NETDEV_CHANGE_TX_QUEUE_LEN:
  3037. if (tun_queue_resize(tun))
  3038. return NOTIFY_BAD;
  3039. break;
  3040. case NETDEV_UP:
  3041. for (i = 0; i < tun->numqueues; i++) {
  3042. struct tun_file *tfile;
  3043. tfile = rtnl_dereference(tun->tfiles[i]);
  3044. tfile->socket.sk->sk_write_space(tfile->socket.sk);
  3045. }
  3046. break;
  3047. default:
  3048. break;
  3049. }
  3050. return NOTIFY_DONE;
  3051. }
  3052. static struct notifier_block tun_notifier_block __read_mostly = {
  3053. .notifier_call = tun_device_event,
  3054. };
  3055. static int __init tun_init(void)
  3056. {
  3057. int ret = 0;
  3058. pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
  3059. ret = rtnl_link_register(&tun_link_ops);
  3060. if (ret) {
  3061. pr_err("Can't register link_ops\n");
  3062. goto err_linkops;
  3063. }
  3064. ret = misc_register(&tun_miscdev);
  3065. if (ret) {
  3066. pr_err("Can't register misc device %d\n", TUN_MINOR);
  3067. goto err_misc;
  3068. }
  3069. ret = register_netdevice_notifier(&tun_notifier_block);
  3070. if (ret) {
  3071. pr_err("Can't register netdevice notifier\n");
  3072. goto err_notifier;
  3073. }
  3074. return 0;
  3075. err_notifier:
  3076. misc_deregister(&tun_miscdev);
  3077. err_misc:
  3078. rtnl_link_unregister(&tun_link_ops);
  3079. err_linkops:
  3080. return ret;
  3081. }
  3082. static void __exit tun_cleanup(void)
  3083. {
  3084. misc_deregister(&tun_miscdev);
  3085. rtnl_link_unregister(&tun_link_ops);
  3086. unregister_netdevice_notifier(&tun_notifier_block);
  3087. }
  3088. /* Get an underlying socket object from tun file. Returns error unless file is
  3089. * attached to a device. The returned object works like a packet socket, it
  3090. * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
  3091. * holding a reference to the file for as long as the socket is in use. */
  3092. struct socket *tun_get_socket(struct file *file)
  3093. {
  3094. struct tun_file *tfile;
  3095. if (file->f_op != &tun_fops)
  3096. return ERR_PTR(-EINVAL);
  3097. tfile = file->private_data;
  3098. if (!tfile)
  3099. return ERR_PTR(-EBADFD);
  3100. return &tfile->socket;
  3101. }
  3102. EXPORT_SYMBOL_GPL(tun_get_socket);
  3103. struct ptr_ring *tun_get_tx_ring(struct file *file)
  3104. {
  3105. struct tun_file *tfile;
  3106. if (file->f_op != &tun_fops)
  3107. return ERR_PTR(-EINVAL);
  3108. tfile = file->private_data;
  3109. if (!tfile)
  3110. return ERR_PTR(-EBADFD);
  3111. return &tfile->tx_ring;
  3112. }
  3113. EXPORT_SYMBOL_GPL(tun_get_tx_ring);
  3114. module_init(tun_init);
  3115. module_exit(tun_cleanup);
  3116. MODULE_DESCRIPTION(DRV_DESCRIPTION);
  3117. MODULE_AUTHOR(DRV_COPYRIGHT);
  3118. MODULE_LICENSE("GPL");
  3119. MODULE_ALIAS_MISCDEV(TUN_MINOR);
  3120. MODULE_ALIAS("devname:net/tun");
  3121. MODULE_IMPORT_NS("NETDEV_INTERNAL");